LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_sched.cpp
1/*
2 * kmp_sched.cpp -- static scheduling -- iteration initialization
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13/* Static scheduling initialization.
14
15 NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however
16 it may change values between parallel regions. __kmp_max_nth
17 is the largest value __kmp_nth may take, 1 is the smallest. */
18
19#include "kmp.h"
20#include "kmp_error.h"
21#include "kmp_i18n.h"
22#include "kmp_itt.h"
23#include "kmp_stats.h"
24#include "kmp_str.h"
25
26#if OMPT_SUPPORT
27#include "ompt-specific.h"
28#endif
29
30#ifdef KMP_DEBUG
31//-------------------------------------------------------------------------
32// template for debug prints specification ( d, u, lld, llu )
33char const *traits_t<int>::spec = "d";
34char const *traits_t<unsigned int>::spec = "u";
35char const *traits_t<long long>::spec = "lld";
36char const *traits_t<unsigned long long>::spec = "llu";
37char const *traits_t<long>::spec = "ld";
38//-------------------------------------------------------------------------
39#endif
40
41#if KMP_STATS_ENABLED
42#define KMP_STATS_LOOP_END(stat) \
43 { \
44 kmp_int64 t; \
45 kmp_int64 u = (kmp_int64)(*pupper); \
46 kmp_int64 l = (kmp_int64)(*plower); \
47 kmp_int64 i = (kmp_int64)incr; \
48 if (i == 1) { \
49 t = u - l + 1; \
50 } else if (i == -1) { \
51 t = l - u + 1; \
52 } else if (i > 0) { \
53 t = (u - l) / i + 1; \
54 } else { \
55 t = (l - u) / (-i) + 1; \
56 } \
57 KMP_COUNT_VALUE(stat, t); \
58 KMP_POP_PARTITIONED_TIMER(); \
59 }
60#else
61#define KMP_STATS_LOOP_END(stat) /* Nothing */
62#endif
63
64#if USE_ITT_BUILD || defined KMP_DEBUG
65static ident_t loc_stub = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"};
66static inline void check_loc(ident_t *&loc) {
67 if (loc == NULL)
68 loc = &loc_stub; // may need to report location info to ittnotify
69}
70#endif
71
72template <typename T>
73static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid,
74 kmp_int32 schedtype, kmp_int32 *plastiter,
75 T *plower, T *pupper,
76 typename traits_t<T>::signed_t *pstride,
77 typename traits_t<T>::signed_t incr,
78 typename traits_t<T>::signed_t chunk
79#if OMPT_SUPPORT && OMPT_OPTIONAL
80 ,
81 void *codeptr
82#endif
83) {
84 KMP_COUNT_BLOCK(OMP_LOOP_STATIC);
85 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static);
86 KMP_PUSH_PARTITIONED_TIMER(OMP_loop_static_scheduling);
87
88 // Clear monotonic/nonmonotonic bits (ignore it)
89 schedtype = SCHEDULE_WITHOUT_MODIFIERS(schedtype);
90
91 typedef typename traits_t<T>::unsigned_t UT;
92 typedef typename traits_t<T>::signed_t ST;
93 /* this all has to be changed back to TID and such.. */
94 kmp_int32 gtid = global_tid;
95 kmp_uint32 tid;
96 kmp_uint32 nth;
97 UT trip_count;
98 kmp_team_t *team;
99 __kmp_assert_valid_gtid(gtid);
100 kmp_info_t *th = __kmp_threads[gtid];
101
102#if OMPT_SUPPORT && OMPT_OPTIONAL
103 ompt_team_info_t *team_info = NULL;
104 ompt_task_info_t *task_info = NULL;
105 ompt_work_t ompt_work_type = ompt_work_loop;
106
107 static kmp_int8 warn = 0;
108
109 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
110 // Only fully initialize variables needed by OMPT if OMPT is enabled.
111 team_info = __ompt_get_teaminfo(0, NULL);
112 task_info = __ompt_get_task_info_object(0);
113 // Determine workshare type
114 if (loc != NULL) {
115 if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
116 ompt_work_type = ompt_work_loop;
117 } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
118 ompt_work_type = ompt_work_sections;
119 } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
120 ompt_work_type = ompt_work_distribute;
121 } else {
122 kmp_int8 bool_res =
123 KMP_COMPARE_AND_STORE_ACQ8(&warn, (kmp_int8)0, (kmp_int8)1);
124 if (bool_res)
125 KMP_WARNING(OmptOutdatedWorkshare);
126 }
127 KMP_DEBUG_ASSERT(ompt_work_type);
128 }
129 }
130#endif
131
132 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride);
133 KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid));
134#ifdef KMP_DEBUG
135 {
136 char *buff;
137 // create format specifiers before the debug output
138 buff = __kmp_str_format(
139 "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s,"
140 " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n",
141 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
142 traits_t<ST>::spec, traits_t<ST>::spec, traits_t<T>::spec);
143 KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper,
144 *pstride, incr, chunk));
145 __kmp_str_free(&buff);
146 }
147#endif
148
149 if (__kmp_env_consistency_check) {
150 __kmp_push_workshare(global_tid, ct_pdo, loc);
151 if (incr == 0) {
152 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
153 loc);
154 }
155 }
156 /* special handling for zero-trip loops */
157 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
158 if (plastiter != NULL)
159 *plastiter = FALSE;
160 /* leave pupper and plower set to entire iteration space */
161 *pstride = incr; /* value should never be used */
162// *plower = *pupper - incr;
163// let compiler bypass the illegal loop (like for(i=1;i<10;i--))
164// THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE
165// ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009.
166#ifdef KMP_DEBUG
167 {
168 char *buff;
169 // create format specifiers before the debug output
170 buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d "
171 "lower=%%%s upper=%%%s stride = %%%s "
172 "signed?<%s>, loc = %%s\n",
173 traits_t<T>::spec, traits_t<T>::spec,
174 traits_t<ST>::spec, traits_t<T>::spec);
175 check_loc(loc);
176 KD_TRACE(100,
177 (buff, *plastiter, *plower, *pupper, *pstride, loc->psource));
178 __kmp_str_free(&buff);
179 }
180#endif
181 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
182
183#if OMPT_SUPPORT && OMPT_OPTIONAL
184 if (ompt_enabled.ompt_callback_work) {
185 ompt_callbacks.ompt_callback(ompt_callback_work)(
186 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
187 &(task_info->task_data), 0, codeptr);
188 }
189#endif
190 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
191 return;
192 }
193
194 // Although there are schedule enumerations above kmp_ord_upper which are not
195 // schedules for "distribute", the only ones which are useful are dynamic, so
196 // cannot be seen here, since this codepath is only executed for static
197 // schedules.
198 if (schedtype > kmp_ord_upper) {
199 // we are in DISTRIBUTE construct
200 schedtype += kmp_sch_static -
201 kmp_distribute_static; // AC: convert to usual schedule type
202 if (th->th.th_team->t.t_serialized > 1) {
203 tid = 0;
204 team = th->th.th_team;
205 } else {
206 tid = th->th.th_team->t.t_master_tid;
207 team = th->th.th_team->t.t_parent;
208 }
209 } else {
210 tid = __kmp_tid_from_gtid(global_tid);
211 team = th->th.th_team;
212 }
213
214 /* determine if "for" loop is an active worksharing construct */
215 if (team->t.t_serialized) {
216 /* serialized parallel, each thread executes whole iteration space */
217 if (plastiter != NULL)
218 *plastiter = TRUE;
219 /* leave pupper and plower set to entire iteration space */
220 *pstride =
221 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
222
223#ifdef KMP_DEBUG
224 {
225 char *buff;
226 // create format specifiers before the debug output
227 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
228 "lower=%%%s upper=%%%s stride = %%%s\n",
229 traits_t<T>::spec, traits_t<T>::spec,
230 traits_t<ST>::spec);
231 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
232 __kmp_str_free(&buff);
233 }
234#endif
235 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
236
237#if OMPT_SUPPORT && OMPT_OPTIONAL
238 if (ompt_enabled.ompt_callback_work) {
239 ompt_callbacks.ompt_callback(ompt_callback_work)(
240 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
241 &(task_info->task_data), *pstride, codeptr);
242 }
243#endif
244 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
245 return;
246 }
247 nth = team->t.t_nproc;
248 if (nth == 1) {
249 if (plastiter != NULL)
250 *plastiter = TRUE;
251 *pstride =
252 (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1));
253#ifdef KMP_DEBUG
254 {
255 char *buff;
256 // create format specifiers before the debug output
257 buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d "
258 "lower=%%%s upper=%%%s stride = %%%s\n",
259 traits_t<T>::spec, traits_t<T>::spec,
260 traits_t<ST>::spec);
261 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
262 __kmp_str_free(&buff);
263 }
264#endif
265 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
266
267#if OMPT_SUPPORT && OMPT_OPTIONAL
268 if (ompt_enabled.ompt_callback_work) {
269 ompt_callbacks.ompt_callback(ompt_callback_work)(
270 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
271 &(task_info->task_data), *pstride, codeptr);
272 }
273#endif
274 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
275 return;
276 }
277
278 /* compute trip count */
279 if (incr == 1) {
280 trip_count = *pupper - *plower + 1;
281 } else if (incr == -1) {
282 trip_count = *plower - *pupper + 1;
283 } else if (incr > 0) {
284 // upper-lower can exceed the limit of signed type
285 trip_count = (UT)(*pupper - *plower) / incr + 1;
286 } else {
287 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
288 }
289
290#if KMP_STATS_ENABLED
291 if (KMP_MASTER_GTID(gtid)) {
292 KMP_COUNT_VALUE(OMP_loop_static_total_iterations, trip_count);
293 }
294#endif
295
296 if (__kmp_env_consistency_check) {
297 /* tripcount overflow? */
298 if (trip_count == 0 && *pupper != *plower) {
299 __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo,
300 loc);
301 }
302 }
303
304 /* compute remaining parameters */
305 switch (schedtype) {
306 case kmp_sch_static: {
307 if (trip_count < nth) {
308 KMP_DEBUG_ASSERT(
309 __kmp_static == kmp_sch_static_greedy ||
310 __kmp_static ==
311 kmp_sch_static_balanced); // Unknown static scheduling type.
312 if (tid < trip_count) {
313 *pupper = *plower = *plower + tid * incr;
314 } else {
315 // set bounds so non-active threads execute no iterations
316 *plower = *pupper + (incr > 0 ? 1 : -1);
317 }
318 if (plastiter != NULL)
319 *plastiter = (tid == trip_count - 1);
320 } else {
321 if (__kmp_static == kmp_sch_static_balanced) {
322 UT small_chunk = trip_count / nth;
323 UT extras = trip_count % nth;
324 *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras));
325 *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr);
326 if (plastiter != NULL)
327 *plastiter = (tid == nth - 1);
328 } else {
329 T big_chunk_inc_count =
330 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
331 T old_upper = *pupper;
332
333 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
334 // Unknown static scheduling type.
335
336 *plower += tid * big_chunk_inc_count;
337 *pupper = *plower + big_chunk_inc_count - incr;
338 if (incr > 0) {
339 if (*pupper < *plower)
340 *pupper = traits_t<T>::max_value;
341 if (plastiter != NULL)
342 *plastiter = *plower <= old_upper && *pupper > old_upper - incr;
343 if (*pupper > old_upper)
344 *pupper = old_upper; // tracker C73258
345 } else {
346 if (*pupper > *plower)
347 *pupper = traits_t<T>::min_value;
348 if (plastiter != NULL)
349 *plastiter = *plower >= old_upper && *pupper < old_upper - incr;
350 if (*pupper < old_upper)
351 *pupper = old_upper; // tracker C73258
352 }
353 }
354 }
355 *pstride = trip_count;
356 break;
357 }
358 case kmp_sch_static_chunked: {
359 ST span;
360 UT nchunks;
361 if (chunk < 1)
362 chunk = 1;
363 else if ((UT)chunk > trip_count)
364 chunk = trip_count;
365 nchunks = (trip_count) / (UT)chunk + (trip_count % (UT)chunk ? 1 : 0);
366 span = chunk * incr;
367 if (nchunks < nth) {
368 *pstride = span * nchunks;
369 if (tid < nchunks) {
370 *plower = *plower + (span * tid);
371 *pupper = *plower + span - incr;
372 } else {
373 *plower = *pupper + (incr > 0 ? 1 : -1);
374 }
375 } else {
376 *pstride = span * nth;
377 *plower = *plower + (span * tid);
378 *pupper = *plower + span - incr;
379 }
380 if (plastiter != NULL)
381 *plastiter = (tid == (nchunks - 1) % nth);
382 break;
383 }
384 case kmp_sch_static_balanced_chunked: {
385 T old_upper = *pupper;
386 // round up to make sure the chunk is enough to cover all iterations
387 UT span = (trip_count + nth - 1) / nth;
388
389 // perform chunk adjustment
390 chunk = (span + chunk - 1) & ~(chunk - 1);
391
392 span = chunk * incr;
393 *plower = *plower + (span * tid);
394 *pupper = *plower + span - incr;
395 if (incr > 0) {
396 if (*pupper > old_upper)
397 *pupper = old_upper;
398 } else if (*pupper < old_upper)
399 *pupper = old_upper;
400
401 if (plastiter != NULL)
402 *plastiter = (tid == ((trip_count - 1) / (UT)chunk));
403 break;
404 }
405 default:
406 KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type");
407 break;
408 }
409
410#if USE_ITT_BUILD
411 // Report loop metadata
412 if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr &&
413 __kmp_forkjoin_frames_mode == 3 && th->th.th_teams_microtask == NULL &&
414 team->t.t_active_level == 1) {
415 kmp_uint64 cur_chunk = chunk;
416 check_loc(loc);
417 // Calculate chunk in case it was not specified; it is specified for
418 // kmp_sch_static_chunked
419 if (schedtype == kmp_sch_static) {
420 cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0);
421 }
422 // 0 - "static" schedule
423 __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk);
424 }
425#endif
426#ifdef KMP_DEBUG
427 {
428 char *buff;
429 // create format specifiers before the debug output
430 buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s "
431 "upper=%%%s stride = %%%s signed?<%s>\n",
432 traits_t<T>::spec, traits_t<T>::spec,
433 traits_t<ST>::spec, traits_t<T>::spec);
434 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride));
435 __kmp_str_free(&buff);
436 }
437#endif
438 KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid));
439
440#if OMPT_SUPPORT && OMPT_OPTIONAL
441 if (ompt_enabled.ompt_callback_work) {
442 ompt_callbacks.ompt_callback(ompt_callback_work)(
443 ompt_work_type, ompt_scope_begin, &(team_info->parallel_data),
444 &(task_info->task_data), trip_count, codeptr);
445 }
446 if (ompt_enabled.ompt_callback_dispatch) {
447 ompt_dispatch_t dispatch_type;
448 ompt_data_t instance = ompt_data_none;
449 ompt_dispatch_chunk_t dispatch_chunk;
450 if (ompt_work_type == ompt_work_sections) {
451 dispatch_type = ompt_dispatch_section;
452 instance.ptr = codeptr;
453 } else {
454 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupper, incr);
455 dispatch_type = (ompt_work_type == ompt_work_distribute)
456 ? ompt_dispatch_distribute_chunk
457 : ompt_dispatch_ws_loop_chunk;
458 instance.ptr = &dispatch_chunk;
459 }
460 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
461 &(team_info->parallel_data), &(task_info->task_data), dispatch_type,
462 instance);
463 }
464#endif
465
466 KMP_STATS_LOOP_END(OMP_loop_static_iterations);
467 return;
468}
469
470template <typename T>
471static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid,
472 kmp_int32 schedule, kmp_int32 *plastiter,
473 T *plower, T *pupper, T *pupperDist,
474 typename traits_t<T>::signed_t *pstride,
475 typename traits_t<T>::signed_t incr,
476 typename traits_t<T>::signed_t chunk
477#if OMPT_SUPPORT && OMPT_OPTIONAL
478 ,
479 void *codeptr
480#endif
481) {
482 KMP_COUNT_BLOCK(OMP_DISTRIBUTE);
483 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute);
484 KMP_PUSH_PARTITIONED_TIMER(OMP_distribute_scheduling);
485 typedef typename traits_t<T>::unsigned_t UT;
486 typedef typename traits_t<T>::signed_t ST;
487 kmp_uint32 tid;
488 kmp_uint32 nth;
489 kmp_uint32 team_id;
490 kmp_uint32 nteams;
491 UT trip_count;
492 kmp_team_t *team;
493 kmp_info_t *th;
494
495 KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride);
496 KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid));
497 __kmp_assert_valid_gtid(gtid);
498#ifdef KMP_DEBUG
499 {
500 char *buff;
501 // create format specifiers before the debug output
502 buff = __kmp_str_format(
503 "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "
504 "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n",
505 traits_t<T>::spec, traits_t<T>::spec, traits_t<ST>::spec,
506 traits_t<ST>::spec, traits_t<T>::spec);
507 KD_TRACE(100,
508 (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk));
509 __kmp_str_free(&buff);
510 }
511#endif
512
513 if (__kmp_env_consistency_check) {
514 __kmp_push_workshare(gtid, ct_pdo, loc);
515 if (incr == 0) {
516 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
517 loc);
518 }
519 if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) {
520 // The loop is illegal.
521 // Some zero-trip loops maintained by compiler, e.g.:
522 // for(i=10;i<0;++i) // lower >= upper - run-time check
523 // for(i=0;i>10;--i) // lower <= upper - run-time check
524 // for(i=0;i>10;++i) // incr > 0 - compile-time check
525 // for(i=10;i<0;--i) // incr < 0 - compile-time check
526 // Compiler does not check the following illegal loops:
527 // for(i=0;i<10;i+=incr) // where incr<0
528 // for(i=10;i>0;i-=incr) // where incr<0
529 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
530 }
531 }
532 tid = __kmp_tid_from_gtid(gtid);
533 th = __kmp_threads[gtid];
534 nth = th->th.th_team_nproc;
535 team = th->th.th_team;
536 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
537 nteams = th->th.th_teams_size.nteams;
538 team_id = team->t.t_master_tid;
539 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
540
541 // compute global trip count
542 if (incr == 1) {
543 trip_count = *pupper - *plower + 1;
544 } else if (incr == -1) {
545 trip_count = *plower - *pupper + 1;
546 } else if (incr > 0) {
547 // upper-lower can exceed the limit of signed type
548 trip_count = (UT)(*pupper - *plower) / incr + 1;
549 } else {
550 trip_count = (UT)(*plower - *pupper) / (-incr) + 1;
551 }
552
553 *pstride = *pupper - *plower; // just in case (can be unused)
554 if (trip_count <= nteams) {
555 KMP_DEBUG_ASSERT(
556 __kmp_static == kmp_sch_static_greedy ||
557 __kmp_static ==
558 kmp_sch_static_balanced); // Unknown static scheduling type.
559 // only primary threads of some teams get single iteration, other threads
560 // get nothing
561 if (team_id < trip_count && tid == 0) {
562 *pupper = *pupperDist = *plower = *plower + team_id * incr;
563 } else {
564 *pupperDist = *pupper;
565 *plower = *pupper + incr; // compiler should skip loop body
566 }
567 if (plastiter != NULL)
568 *plastiter = (tid == 0 && team_id == trip_count - 1);
569 } else {
570 // Get the team's chunk first (each team gets at most one chunk)
571 if (__kmp_static == kmp_sch_static_balanced) {
572 UT chunkD = trip_count / nteams;
573 UT extras = trip_count % nteams;
574 *plower +=
575 incr * (team_id * chunkD + (team_id < extras ? team_id : extras));
576 *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr);
577 if (plastiter != NULL)
578 *plastiter = (team_id == nteams - 1);
579 } else {
580 T chunk_inc_count =
581 (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr;
582 T upper = *pupper;
583 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
584 // Unknown static scheduling type.
585 *plower += team_id * chunk_inc_count;
586 *pupperDist = *plower + chunk_inc_count - incr;
587 // Check/correct bounds if needed
588 if (incr > 0) {
589 if (*pupperDist < *plower)
590 *pupperDist = traits_t<T>::max_value;
591 if (plastiter != NULL)
592 *plastiter = *plower <= upper && *pupperDist > upper - incr;
593 if (*pupperDist > upper)
594 *pupperDist = upper; // tracker C73258
595 if (*plower > *pupperDist) {
596 *pupper = *pupperDist; // no iterations available for the team
597 goto end;
598 }
599 } else {
600 if (*pupperDist > *plower)
601 *pupperDist = traits_t<T>::min_value;
602 if (plastiter != NULL)
603 *plastiter = *plower >= upper && *pupperDist < upper - incr;
604 if (*pupperDist < upper)
605 *pupperDist = upper; // tracker C73258
606 if (*plower < *pupperDist) {
607 *pupper = *pupperDist; // no iterations available for the team
608 goto end;
609 }
610 }
611 }
612 // Get the parallel loop chunk now (for thread)
613 // compute trip count for team's chunk
614 if (incr == 1) {
615 trip_count = *pupperDist - *plower + 1;
616 } else if (incr == -1) {
617 trip_count = *plower - *pupperDist + 1;
618 } else if (incr > 1) {
619 // upper-lower can exceed the limit of signed type
620 trip_count = (UT)(*pupperDist - *plower) / incr + 1;
621 } else {
622 trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1;
623 }
624 KMP_DEBUG_ASSERT(trip_count);
625 switch (schedule) {
626 case kmp_sch_static: {
627 if (trip_count <= nth) {
628 KMP_DEBUG_ASSERT(
629 __kmp_static == kmp_sch_static_greedy ||
630 __kmp_static ==
631 kmp_sch_static_balanced); // Unknown static scheduling type.
632 if (tid < trip_count)
633 *pupper = *plower = *plower + tid * incr;
634 else
635 *plower = *pupper + incr; // no iterations available
636 if (plastiter != NULL)
637 if (*plastiter != 0 && !(tid == trip_count - 1))
638 *plastiter = 0;
639 } else {
640 if (__kmp_static == kmp_sch_static_balanced) {
641 UT chunkL = trip_count / nth;
642 UT extras = trip_count % nth;
643 *plower += incr * (tid * chunkL + (tid < extras ? tid : extras));
644 *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr);
645 if (plastiter != NULL)
646 if (*plastiter != 0 && !(tid == nth - 1))
647 *plastiter = 0;
648 } else {
649 T chunk_inc_count =
650 (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr;
651 T upper = *pupperDist;
652 KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy);
653 // Unknown static scheduling type.
654 *plower += tid * chunk_inc_count;
655 *pupper = *plower + chunk_inc_count - incr;
656 if (incr > 0) {
657 if (*pupper < *plower)
658 *pupper = traits_t<T>::max_value;
659 if (plastiter != NULL)
660 if (*plastiter != 0 &&
661 !(*plower <= upper && *pupper > upper - incr))
662 *plastiter = 0;
663 if (*pupper > upper)
664 *pupper = upper; // tracker C73258
665 } else {
666 if (*pupper > *plower)
667 *pupper = traits_t<T>::min_value;
668 if (plastiter != NULL)
669 if (*plastiter != 0 &&
670 !(*plower >= upper && *pupper < upper - incr))
671 *plastiter = 0;
672 if (*pupper < upper)
673 *pupper = upper; // tracker C73258
674 }
675 }
676 }
677 break;
678 }
679 case kmp_sch_static_chunked: {
680 ST span;
681 if (chunk < 1)
682 chunk = 1;
683 span = chunk * incr;
684 *pstride = span * nth;
685 *plower = *plower + (span * tid);
686 *pupper = *plower + span - incr;
687 if (plastiter != NULL)
688 if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth))
689 *plastiter = 0;
690 break;
691 }
692 default:
693 KMP_ASSERT2(0,
694 "__kmpc_dist_for_static_init: unknown loop scheduling type");
695 break;
696 }
697 }
698end:;
699#ifdef KMP_DEBUG
700 {
701 char *buff;
702 // create format specifiers before the debug output
703 buff = __kmp_str_format(
704 "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "
705 "stride=%%%s signed?<%s>\n",
706 traits_t<T>::spec, traits_t<T>::spec, traits_t<T>::spec,
707 traits_t<ST>::spec, traits_t<T>::spec);
708 KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride));
709 __kmp_str_free(&buff);
710 }
711#endif
712 KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid));
713#if OMPT_SUPPORT && OMPT_OPTIONAL
714 if (ompt_enabled.ompt_callback_work || ompt_enabled.ompt_callback_dispatch) {
715 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
716 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
717 if (ompt_enabled.ompt_callback_work) {
718 ompt_callbacks.ompt_callback(ompt_callback_work)(
719 ompt_work_distribute, ompt_scope_begin, &(team_info->parallel_data),
720 &(task_info->task_data), 0, codeptr);
721 }
722 if (ompt_enabled.ompt_callback_dispatch) {
723 ompt_data_t instance = ompt_data_none;
724 ompt_dispatch_chunk_t dispatch_chunk;
725 OMPT_GET_DISPATCH_CHUNK(dispatch_chunk, *plower, *pupperDist, incr);
726 instance.ptr = &dispatch_chunk;
727 ompt_callbacks.ompt_callback(ompt_callback_dispatch)(
728 &(team_info->parallel_data), &(task_info->task_data),
729 ompt_dispatch_distribute_chunk, instance);
730 }
731 }
732#endif // OMPT_SUPPORT && OMPT_OPTIONAL
733 KMP_STATS_LOOP_END(OMP_distribute_iterations);
734 return;
735}
736
737template <typename T>
738static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid,
739 kmp_int32 *p_last, T *p_lb, T *p_ub,
740 typename traits_t<T>::signed_t *p_st,
741 typename traits_t<T>::signed_t incr,
742 typename traits_t<T>::signed_t chunk) {
743 // The routine returns the first chunk distributed to the team and
744 // stride for next chunks calculation.
745 // Last iteration flag set for the team that will execute
746 // the last iteration of the loop.
747 // The routine is called for dist_schedule(static,chunk) only.
748 typedef typename traits_t<T>::unsigned_t UT;
749 typedef typename traits_t<T>::signed_t ST;
750 kmp_uint32 team_id;
751 kmp_uint32 nteams;
752 UT trip_count;
753 T lower;
754 T upper;
755 ST span;
756 kmp_team_t *team;
757 kmp_info_t *th;
758
759 KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st);
760 KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid));
761 __kmp_assert_valid_gtid(gtid);
762#ifdef KMP_DEBUG
763 {
764 char *buff;
765 // create format specifiers before the debug output
766 buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d "
767 "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n",
768 traits_t<T>::spec, traits_t<T>::spec,
769 traits_t<ST>::spec, traits_t<ST>::spec,
770 traits_t<T>::spec);
771 KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk));
772 __kmp_str_free(&buff);
773 }
774#endif
775
776 lower = *p_lb;
777 upper = *p_ub;
778 if (__kmp_env_consistency_check) {
779 if (incr == 0) {
780 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
781 loc);
782 }
783 if (incr > 0 ? (upper < lower) : (lower < upper)) {
784 // The loop is illegal.
785 // Some zero-trip loops maintained by compiler, e.g.:
786 // for(i=10;i<0;++i) // lower >= upper - run-time check
787 // for(i=0;i>10;--i) // lower <= upper - run-time check
788 // for(i=0;i>10;++i) // incr > 0 - compile-time check
789 // for(i=10;i<0;--i) // incr < 0 - compile-time check
790 // Compiler does not check the following illegal loops:
791 // for(i=0;i<10;i+=incr) // where incr<0
792 // for(i=10;i>0;i-=incr) // where incr<0
793 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc);
794 }
795 }
796 th = __kmp_threads[gtid];
797 team = th->th.th_team;
798 KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct
799 nteams = th->th.th_teams_size.nteams;
800 team_id = team->t.t_master_tid;
801 KMP_DEBUG_ASSERT(nteams == (kmp_uint32)team->t.t_parent->t.t_nproc);
802
803 // compute trip count
804 if (incr == 1) {
805 trip_count = upper - lower + 1;
806 } else if (incr == -1) {
807 trip_count = lower - upper + 1;
808 } else if (incr > 0) {
809 // upper-lower can exceed the limit of signed type
810 trip_count = (UT)(upper - lower) / incr + 1;
811 } else {
812 trip_count = (UT)(lower - upper) / (-incr) + 1;
813 }
814 if (chunk < 1)
815 chunk = 1;
816 span = chunk * incr;
817 *p_st = span * nteams;
818 *p_lb = lower + (span * team_id);
819 *p_ub = *p_lb + span - incr;
820 if (p_last != NULL)
821 *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams);
822 // Correct upper bound if needed
823 if (incr > 0) {
824 if (*p_ub < *p_lb) // overflow?
825 *p_ub = traits_t<T>::max_value;
826 if (*p_ub > upper)
827 *p_ub = upper; // tracker C73258
828 } else { // incr < 0
829 if (*p_ub > *p_lb)
830 *p_ub = traits_t<T>::min_value;
831 if (*p_ub < upper)
832 *p_ub = upper; // tracker C73258
833 }
834#ifdef KMP_DEBUG
835 {
836 char *buff;
837 // create format specifiers before the debug output
838 buff =
839 __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d "
840 "iter=(%%%s, %%%s, %%%s) chunk %%%s\n",
841 traits_t<T>::spec, traits_t<T>::spec,
842 traits_t<ST>::spec, traits_t<ST>::spec);
843 KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk));
844 __kmp_str_free(&buff);
845 }
846#endif
847}
848
849//------------------------------------------------------------------------------
850extern "C" {
872void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
873 kmp_int32 *plastiter, kmp_int32 *plower,
874 kmp_int32 *pupper, kmp_int32 *pstride,
875 kmp_int32 incr, kmp_int32 chunk) {
876 __kmp_for_static_init<kmp_int32>(loc, gtid, schedtype, plastiter, plower,
877 pupper, pstride, incr, chunk
878#if OMPT_SUPPORT && OMPT_OPTIONAL
879 ,
880 OMPT_GET_RETURN_ADDRESS(0)
881#endif
882 );
883}
884
888void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
889 kmp_int32 schedtype, kmp_int32 *plastiter,
890 kmp_uint32 *plower, kmp_uint32 *pupper,
891 kmp_int32 *pstride, kmp_int32 incr,
892 kmp_int32 chunk) {
893 __kmp_for_static_init<kmp_uint32>(loc, gtid, schedtype, plastiter, plower,
894 pupper, pstride, incr, chunk
895#if OMPT_SUPPORT && OMPT_OPTIONAL
896 ,
897 OMPT_GET_RETURN_ADDRESS(0)
898#endif
899 );
900}
901
905void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype,
906 kmp_int32 *plastiter, kmp_int64 *plower,
907 kmp_int64 *pupper, kmp_int64 *pstride,
908 kmp_int64 incr, kmp_int64 chunk) {
909 __kmp_for_static_init<kmp_int64>(loc, gtid, schedtype, plastiter, plower,
910 pupper, pstride, incr, chunk
911#if OMPT_SUPPORT && OMPT_OPTIONAL
912 ,
913 OMPT_GET_RETURN_ADDRESS(0)
914#endif
915 );
916}
917
921void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
922 kmp_int32 schedtype, kmp_int32 *plastiter,
923 kmp_uint64 *plower, kmp_uint64 *pupper,
924 kmp_int64 *pstride, kmp_int64 incr,
925 kmp_int64 chunk) {
926 __kmp_for_static_init<kmp_uint64>(loc, gtid, schedtype, plastiter, plower,
927 pupper, pstride, incr, chunk
928#if OMPT_SUPPORT && OMPT_OPTIONAL
929 ,
930 OMPT_GET_RETURN_ADDRESS(0)
931#endif
932 );
933}
938#if OMPT_SUPPORT && OMPT_OPTIONAL
939#define OMPT_CODEPTR_ARG , OMPT_GET_RETURN_ADDRESS(0)
940#else
941#define OMPT_CODEPTR_ARG
942#endif
943
966void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid,
967 kmp_int32 schedule, kmp_int32 *plastiter,
968 kmp_int32 *plower, kmp_int32 *pupper,
969 kmp_int32 *pupperD, kmp_int32 *pstride,
970 kmp_int32 incr, kmp_int32 chunk) {
971 __kmp_dist_for_static_init<kmp_int32>(loc, gtid, schedule, plastiter, plower,
972 pupper, pupperD, pstride, incr,
973 chunk OMPT_CODEPTR_ARG);
974}
975
979void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid,
980 kmp_int32 schedule, kmp_int32 *plastiter,
981 kmp_uint32 *plower, kmp_uint32 *pupper,
982 kmp_uint32 *pupperD, kmp_int32 *pstride,
983 kmp_int32 incr, kmp_int32 chunk) {
984 __kmp_dist_for_static_init<kmp_uint32>(loc, gtid, schedule, plastiter, plower,
985 pupper, pupperD, pstride, incr,
986 chunk OMPT_CODEPTR_ARG);
987}
988
992void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid,
993 kmp_int32 schedule, kmp_int32 *plastiter,
994 kmp_int64 *plower, kmp_int64 *pupper,
995 kmp_int64 *pupperD, kmp_int64 *pstride,
996 kmp_int64 incr, kmp_int64 chunk) {
997 __kmp_dist_for_static_init<kmp_int64>(loc, gtid, schedule, plastiter, plower,
998 pupper, pupperD, pstride, incr,
999 chunk OMPT_CODEPTR_ARG);
1000}
1001
1005void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid,
1006 kmp_int32 schedule, kmp_int32 *plastiter,
1007 kmp_uint64 *plower, kmp_uint64 *pupper,
1008 kmp_uint64 *pupperD, kmp_int64 *pstride,
1009 kmp_int64 incr, kmp_int64 chunk) {
1010 __kmp_dist_for_static_init<kmp_uint64>(loc, gtid, schedule, plastiter, plower,
1011 pupper, pupperD, pstride, incr,
1012 chunk OMPT_CODEPTR_ARG);
1013}
1018//------------------------------------------------------------------------------
1019// Auxiliary routines for Distribute Parallel Loop construct implementation
1020// Transfer call to template< type T >
1021// __kmp_team_static_init( ident_t *loc, int gtid,
1022// int *p_last, T *lb, T *ub, ST *st, ST incr, ST chunk )
1023
1044void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1045 kmp_int32 *p_lb, kmp_int32 *p_ub,
1046 kmp_int32 *p_st, kmp_int32 incr,
1047 kmp_int32 chunk) {
1048 KMP_DEBUG_ASSERT(__kmp_init_serial);
1049 __kmp_team_static_init<kmp_int32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1050 chunk);
1051}
1052
1056void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1057 kmp_uint32 *p_lb, kmp_uint32 *p_ub,
1058 kmp_int32 *p_st, kmp_int32 incr,
1059 kmp_int32 chunk) {
1060 KMP_DEBUG_ASSERT(__kmp_init_serial);
1061 __kmp_team_static_init<kmp_uint32>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1062 chunk);
1063}
1064
1068void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1069 kmp_int64 *p_lb, kmp_int64 *p_ub,
1070 kmp_int64 *p_st, kmp_int64 incr,
1071 kmp_int64 chunk) {
1072 KMP_DEBUG_ASSERT(__kmp_init_serial);
1073 __kmp_team_static_init<kmp_int64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1074 chunk);
1075}
1076
1080void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last,
1081 kmp_uint64 *p_lb, kmp_uint64 *p_ub,
1082 kmp_int64 *p_st, kmp_int64 incr,
1083 kmp_int64 chunk) {
1084 KMP_DEBUG_ASSERT(__kmp_init_serial);
1085 __kmp_team_static_init<kmp_uint64>(loc, gtid, p_last, p_lb, p_ub, p_st, incr,
1086 chunk);
1087}
1092} // extern "C"
@ KMP_IDENT_KMPC
Definition kmp.h:196
@ KMP_IDENT_WORK_LOOP
Definition kmp.h:214
@ KMP_IDENT_WORK_SECTIONS
Definition kmp.h:216
@ KMP_IDENT_WORK_DISTRIBUTE
Definition kmp.h:218
#define KMP_COUNT_VALUE(name, value)
Adds value to specified timer (name).
Definition kmp_stats.h:898
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition kmp_stats.h:911
void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk)
void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk)
void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk)
void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk)
@ kmp_sch_static
Definition kmp.h:360
@ kmp_distribute_static
Definition kmp.h:396
@ kmp_ord_upper
Definition kmp.h:392
Definition kmp.h:234
char const * psource
Definition kmp.h:244
kmp_int32 flags
Definition kmp.h:236