19#include "kmp_collapse.h"
22#include "ompt-specific.h"
29template <
typename T> T __kmp_abs(
const T val) {
30 return (val < 0) ? -val: val;
32kmp_uint32 __kmp_abs(
const kmp_uint32 val) {
return val; }
33kmp_uint64 __kmp_abs(
const kmp_uint64 val) {
return val; }
39template <
typename T>
int __kmp_sign(T val) {
return (T(0) < val) - (val < T(0)); }
49void kmp_canonicalize_one_loop_XX(
53 if (__kmp_env_consistency_check) {
54 if (bounds->step == 0) {
55 __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo,
60 if (bounds->comparison == comparison_t::comp_not_eq) {
62 if (bounds->step > 0) {
63 bounds->comparison = comparison_t::comp_less;
65 bounds->comparison = comparison_t::comp_greater;
69 if (bounds->comparison == comparison_t::comp_less) {
74 bounds->comparison = comparison_t::comp_less_or_eq;
75 }
else if (bounds->comparison == comparison_t::comp_greater) {
77 bounds->comparison = comparison_t::comp_greater_or_eq;
82void kmp_canonicalize_loop_nest(
ident_t *loc,
86 for (kmp_index_t ind = 0; ind < n; ++ind) {
87 auto bounds = &(original_bounds_nest[ind]);
89 switch (bounds->loop_type) {
90 case loop_type_t::loop_type_int32:
91 kmp_canonicalize_one_loop_XX<kmp_int32>(
95 case loop_type_t::loop_type_uint32:
96 kmp_canonicalize_one_loop_XX<kmp_uint32>(
100 case loop_type_t::loop_type_int64:
101 kmp_canonicalize_one_loop_XX<kmp_int64>(
105 case loop_type_t::loop_type_uint64:
106 kmp_canonicalize_one_loop_XX<kmp_uint64>(
126kmp_loop_nest_iv_t kmp_calculate_trip_count_XX(
129 if (bounds->comparison == comparison_t::comp_less_or_eq) {
130 if (bounds->ub0 < bounds->lb0) {
133 bounds->trip_count = 0;
138 static_cast<kmp_loop_nest_iv_t
>(bounds->ub0 - bounds->lb0) /
139 __kmp_abs(bounds->step) +
142 }
else if (bounds->comparison == comparison_t::comp_greater_or_eq) {
143 if (bounds->lb0 < bounds->ub0) {
146 bounds->trip_count = 0;
151 static_cast<kmp_loop_nest_iv_t
>(bounds->lb0 - bounds->ub0) /
152 __kmp_abs(bounds->step) +
158 return bounds->trip_count;
162kmp_loop_nest_iv_t kmp_calculate_trip_count(
bounds_info_t *bounds) {
164 kmp_loop_nest_iv_t trip_count = 0;
166 switch (bounds->loop_type) {
167 case loop_type_t::loop_type_int32:
168 trip_count = kmp_calculate_trip_count_XX<kmp_int32>(
171 case loop_type_t::loop_type_uint32:
172 trip_count = kmp_calculate_trip_count_XX<kmp_uint32>(
175 case loop_type_t::loop_type_int64:
176 trip_count = kmp_calculate_trip_count_XX<kmp_int64>(
179 case loop_type_t::loop_type_uint64:
180 trip_count = kmp_calculate_trip_count_XX<kmp_uint64>(
195kmp_uint64 kmp_fix_iv(loop_type_t loop_iv_type, kmp_uint64 original_iv) {
198 switch (loop_iv_type) {
199 case loop_type_t::loop_type_int8:
200 res =
static_cast<kmp_uint64
>(
static_cast<kmp_int8
>(original_iv));
202 case loop_type_t::loop_type_uint8:
203 res =
static_cast<kmp_uint64
>(
static_cast<kmp_uint8
>(original_iv));
205 case loop_type_t::loop_type_int16:
206 res =
static_cast<kmp_uint64
>(
static_cast<kmp_int16
>(original_iv));
208 case loop_type_t::loop_type_uint16:
209 res =
static_cast<kmp_uint64
>(
static_cast<kmp_uint16
>(original_iv));
211 case loop_type_t::loop_type_int32:
212 res =
static_cast<kmp_uint64
>(
static_cast<kmp_int32
>(original_iv));
214 case loop_type_t::loop_type_uint32:
215 res =
static_cast<kmp_uint64
>(
static_cast<kmp_uint32
>(original_iv));
217 case loop_type_t::loop_type_int64:
218 res =
static_cast<kmp_uint64
>(
static_cast<kmp_int64
>(original_iv));
220 case loop_type_t::loop_type_uint64:
221 res =
static_cast<kmp_uint64
>(original_iv);
232bool kmp_ivs_eq(loop_type_t loop_iv_type, kmp_uint64 original_iv1,
233 kmp_uint64 original_iv2) {
236 switch (loop_iv_type) {
237 case loop_type_t::loop_type_int8:
238 res =
static_cast<kmp_int8
>(original_iv1) ==
239 static_cast<kmp_int8
>(original_iv2);
241 case loop_type_t::loop_type_uint8:
242 res =
static_cast<kmp_uint8
>(original_iv1) ==
243 static_cast<kmp_uint8
>(original_iv2);
245 case loop_type_t::loop_type_int16:
246 res =
static_cast<kmp_int16
>(original_iv1) ==
247 static_cast<kmp_int16
>(original_iv2);
249 case loop_type_t::loop_type_uint16:
250 res =
static_cast<kmp_uint16
>(original_iv1) ==
251 static_cast<kmp_uint16
>(original_iv2);
253 case loop_type_t::loop_type_int32:
254 res =
static_cast<kmp_int32
>(original_iv1) ==
255 static_cast<kmp_int32
>(original_iv2);
257 case loop_type_t::loop_type_uint32:
258 res =
static_cast<kmp_uint32
>(original_iv1) ==
259 static_cast<kmp_uint32
>(original_iv2);
261 case loop_type_t::loop_type_int64:
262 res =
static_cast<kmp_int64
>(original_iv1) ==
263 static_cast<kmp_int64
>(original_iv2);
265 case loop_type_t::loop_type_uint64:
266 res =
static_cast<kmp_uint64
>(original_iv1) ==
267 static_cast<kmp_uint64
>(original_iv2);
282 const kmp_point_t original_ivs,
285 T iv =
static_cast<T
>(original_ivs[ind]);
286 T outer_iv =
static_cast<T
>(original_ivs[bounds->outer_iv]);
288 if (((bounds->comparison == comparison_t::comp_less_or_eq) &&
289 (iv > (bounds->ub0 + bounds->ub1 * outer_iv))) ||
290 ((bounds->comparison == comparison_t::comp_greater_or_eq) &&
291 (iv < (bounds->ub0 + bounds->ub1 * outer_iv)))) {
304 kmp_point_t original_ivs,
305 const kmp_iterations_t iterations, kmp_index_t ind,
306 bool start_with_lower_bound,
bool checkBounds) {
309 T outer_iv =
static_cast<T
>(original_ivs[bounds->outer_iv]);
311 if (start_with_lower_bound) {
314 temp = bounds->lb0 + bounds->lb1 * outer_iv;
316 auto iteration = iterations[ind];
317 temp = bounds->lb0 + bounds->lb1 * outer_iv + iteration * bounds->step;
321 original_ivs[ind] = kmp_fix_iv(bounds->loop_iv_type, temp);
324 return kmp_iv_is_in_upper_bound_XX(bounds, original_ivs, ind);
331 kmp_point_t original_ivs,
332 const kmp_iterations_t iterations, kmp_index_t ind,
333 bool start_with_lower_bound,
bool checkBounds) {
335 switch (bounds->loop_type) {
336 case loop_type_t::loop_type_int32:
337 return kmp_calc_one_iv_XX<kmp_int32>(
339 original_ivs, iterations, ind, start_with_lower_bound,
342 case loop_type_t::loop_type_uint32:
343 return kmp_calc_one_iv_XX<kmp_uint32>(
345 original_ivs, iterations, ind, start_with_lower_bound,
348 case loop_type_t::loop_type_int64:
349 return kmp_calc_one_iv_XX<kmp_int64>(
351 original_ivs, iterations, ind, start_with_lower_bound,
354 case loop_type_t::loop_type_uint64:
355 return kmp_calc_one_iv_XX<kmp_uint64>(
357 original_ivs, iterations, ind, start_with_lower_bound,
373 kmp_uint64 *original_ivs,
374 const kmp_iterations_t iterations,
377 auto iteration = iterations[ind];
381 bounds->lb1 *
static_cast<T
>(original_ivs[bounds->outer_iv]) +
382 iteration * bounds->step;
385 original_ivs[ind] = kmp_fix_iv(bounds->loop_iv_type, temp);
389 kmp_uint64 *original_ivs,
390 const kmp_iterations_t iterations,
393 switch (bounds->loop_type) {
394 case loop_type_t::loop_type_int32:
395 kmp_calc_one_iv_rectang_XX<kmp_int32>(
397 original_ivs, iterations, ind);
399 case loop_type_t::loop_type_uint32:
400 kmp_calc_one_iv_rectang_XX<kmp_uint32>(
402 original_ivs, iterations, ind);
404 case loop_type_t::loop_type_int64:
405 kmp_calc_one_iv_rectang_XX<kmp_int64>(
407 original_ivs, iterations, ind);
409 case loop_type_t::loop_type_uint64:
410 kmp_calc_one_iv_rectang_XX<kmp_uint64>(
412 original_ivs, iterations, ind);
432extern "C" kmp_loop_nest_iv_t
433__kmpc_process_loop_nest_rectang(
ident_t *loc, kmp_int32 gtid,
437 kmp_canonicalize_loop_nest(loc, original_bounds_nest, n);
439 kmp_loop_nest_iv_t total = 1;
441 for (kmp_index_t ind = 0; ind < n; ++ind) {
442 auto bounds = &(original_bounds_nest[ind]);
444 kmp_loop_nest_iv_t trip_count = kmp_calculate_trip_count( bounds);
461__kmpc_calc_original_ivs_rectang(
ident_t *loc, kmp_loop_nest_iv_t new_iv,
463 kmp_uint64 *original_ivs,
466 kmp_iterations_t iterations =
467 (kmp_iterations_t)__kmp_allocate(
sizeof(kmp_loop_nest_iv_t) * n);
470 for (kmp_index_t ind = n; ind > 0;) {
472 auto bounds = &(original_bounds_nest[ind]);
475 auto temp = new_iv / bounds->trip_count;
476 auto iteration = new_iv % bounds->trip_count;
479 iterations[ind] = iteration;
481 KMP_ASSERT(new_iv == 0);
483 for (kmp_index_t ind = 0; ind < n; ++ind) {
484 auto bounds = &(original_bounds_nest[ind]);
486 kmp_calc_one_iv_rectang(bounds, original_ivs, iterations, ind);
488 __kmp_free(iterations);
503void kmp_calc_span_lessoreq_XX(
504 bounds_info_internalXX_template<T> *bounds,
505 bounds_info_internal_t *bounds_nest) {
507 typedef typename traits_t<T>::unsigned_t UT;
513 auto &bbounds = bounds->b;
515 if ((bbounds.lb1 != 0) || (bbounds.ub1 != 0)) {
518 bounds_info_internalXX_template<T> *previous =
519 reinterpret_cast<bounds_info_internalXX_template<T> *
>(
520 &(bounds_nest[bbounds.outer_iv]));
526 span_t bound_candidate1 =
527 bbounds.lb0 + bbounds.lb1 * previous->span_smallest;
528 span_t bound_candidate2 =
529 bbounds.lb0 + bbounds.lb1 * previous->span_biggest;
530 if (bound_candidate1 < bound_candidate2) {
531 bounds->span_smallest = bound_candidate1;
533 bounds->span_smallest = bound_candidate2;
541 span_t bound_candidate1 =
542 bbounds.ub0 + bbounds.ub1 * previous->span_smallest;
543 span_t bound_candidate2 =
544 bbounds.ub0 + bbounds.ub1 * previous->span_biggest;
545 if (bound_candidate1 < bound_candidate2) {
546 bounds->span_biggest = bound_candidate2;
548 bounds->span_biggest = bound_candidate1;
553 bounds->span_smallest = bbounds.lb0;
554 bounds->span_biggest = bbounds.ub0;
556 if (!bounds->loop_bounds_adjusted) {
560 bounds->span_biggest -=
561 (
static_cast<UT
>(bbounds.ub0 - bbounds.lb0)) % bbounds.step;
567void kmp_calc_span_greateroreq_XX(
568 bounds_info_internalXX_template<T> *bounds,
569 bounds_info_internal_t *bounds_nest) {
571 typedef typename traits_t<T>::unsigned_t UT;
577 auto &bbounds = bounds->b;
579 if ((bbounds.lb1 != 0) || (bbounds.ub1 != 0)) {
582 bounds_info_internalXX_template<T> *previous =
583 reinterpret_cast<bounds_info_internalXX_template<T> *
>(
584 &(bounds_nest[bbounds.outer_iv]));
590 span_t bound_candidate1 =
591 bbounds.lb0 + bbounds.lb1 * previous->span_smallest;
592 span_t bound_candidate2 =
593 bbounds.lb0 + bbounds.lb1 * previous->span_biggest;
594 if (bound_candidate1 >= bound_candidate2) {
595 bounds->span_smallest = bound_candidate1;
597 bounds->span_smallest = bound_candidate2;
605 span_t bound_candidate1 =
606 bbounds.ub0 + bbounds.ub1 * previous->span_smallest;
607 span_t bound_candidate2 =
608 bbounds.ub0 + bbounds.ub1 * previous->span_biggest;
609 if (bound_candidate1 >= bound_candidate2) {
610 bounds->span_biggest = bound_candidate2;
612 bounds->span_biggest = bound_candidate1;
618 bounds->span_biggest = bbounds.lb0;
619 bounds->span_smallest = bbounds.ub0;
621 if (!bounds->loop_bounds_adjusted) {
625 bounds->span_biggest -=
626 (
static_cast<UT
>(bbounds.ub0 - bbounds.lb0)) % bbounds.step;
632void kmp_calc_span_XX(
633 bounds_info_internalXX_template<T> *bounds,
634 bounds_info_internal_t *bounds_nest) {
636 if (bounds->b.comparison == comparison_t::comp_less_or_eq) {
637 kmp_calc_span_lessoreq_XX( bounds, bounds_nest);
639 KMP_ASSERT(bounds->b.comparison == comparison_t::comp_greater_or_eq);
640 kmp_calc_span_greateroreq_XX( bounds, bounds_nest);
651void kmp_calc_new_bounds_XX(
652 bounds_info_internalXX_template<T> *bounds,
653 bounds_info_internal_t *bounds_nest) {
655 auto &bbounds = bounds->b;
657 if (bbounds.lb1 == bbounds.ub1) {
659 bounds->loop_bounds_adjusted =
false;
661 bounds->loop_bounds_adjusted =
true;
663 T old_lb1 = bbounds.lb1;
664 T old_ub1 = bbounds.ub1;
666 if (__kmp_sign(old_lb1) != __kmp_sign(old_ub1)) {
674 if (((old_lb1 < 0) && (old_lb1 < old_ub1)) ||
675 ((old_lb1 > 0) && (old_lb1 > old_ub1))) {
676 bbounds.lb1 = old_ub1;
678 bbounds.ub1 = old_lb1;
685 bounds_info_internalXX_template<T> *previous =
686 reinterpret_cast<bounds_info_internalXX_template<T> *
>(
687 &bounds_nest[bbounds.outer_iv]);
689 if (bbounds.comparison == comparison_t::comp_less_or_eq) {
690 if (old_lb1 < bbounds.lb1) {
691 KMP_ASSERT(old_lb1 < 0);
695 T sub = (bbounds.lb1 - old_lb1) * previous->span_biggest;
698 }
else if (old_lb1 > bbounds.lb1) {
700 T add = (old_lb1 - bbounds.lb1) * previous->span_smallest;
704 if (old_ub1 > bbounds.ub1) {
705 KMP_ASSERT(old_ub1 > 0);
709 T add = (old_ub1 - bbounds.ub1) * previous->span_biggest;
711 }
else if (old_ub1 < bbounds.ub1) {
713 T sub = (bbounds.ub1 - old_ub1) * previous->span_smallest;
717 KMP_ASSERT(bbounds.comparison == comparison_t::comp_greater_or_eq);
718 if (old_lb1 < bbounds.lb1) {
719 KMP_ASSERT(old_lb1 < 0);
720 T sub = (bbounds.lb1 - old_lb1) * previous->span_smallest;
722 }
else if (old_lb1 > bbounds.lb1) {
723 T add = (old_lb1 - bbounds.lb1) * previous->span_biggest;
727 if (old_ub1 > bbounds.ub1) {
728 KMP_ASSERT(old_ub1 > 0);
729 T add = (old_ub1 - bbounds.ub1) * previous->span_smallest;
731 }
else if (old_ub1 < bbounds.ub1) {
732 T sub = (bbounds.ub1 - old_ub1) * previous->span_biggest;
742kmp_loop_nest_iv_t kmp_process_one_loop_XX(
743 bounds_info_internalXX_template<T> *bounds,
744 bounds_info_internal_t *bounds_nest) {
746 kmp_calc_new_bounds_XX( bounds, bounds_nest);
747 kmp_calc_span_XX( bounds, bounds_nest);
748 return kmp_calculate_trip_count_XX( &(bounds->b));
756kmp_loop_nest_iv_t kmp_process_loop_nest(
757 bounds_info_internal_t *bounds_nest, kmp_index_t n) {
759 kmp_loop_nest_iv_t total = 1;
761 for (kmp_index_t ind = 0; ind < n; ++ind) {
762 auto bounds = &(bounds_nest[ind]);
763 kmp_loop_nest_iv_t trip_count = 0;
765 switch (bounds->b.loop_type) {
766 case loop_type_t::loop_type_int32:
767 trip_count = kmp_process_one_loop_XX<kmp_int32>(
768 (bounds_info_internalXX_template<kmp_int32> *)(bounds),
771 case loop_type_t::loop_type_uint32:
772 trip_count = kmp_process_one_loop_XX<kmp_uint32>(
773 (bounds_info_internalXX_template<kmp_uint32> *)(bounds),
776 case loop_type_t::loop_type_int64:
777 trip_count = kmp_process_one_loop_XX<kmp_int64>(
778 (bounds_info_internalXX_template<kmp_int64> *)(bounds),
781 case loop_type_t::loop_type_uint64:
782 trip_count = kmp_process_one_loop_XX<kmp_uint64>(
783 (bounds_info_internalXX_template<kmp_uint64> *)(bounds),
803 const kmp_point_t original_ivs,
806 kmp_loop_nest_iv_t iterations = 0;
808 if (bounds->comparison == comparison_t::comp_less_or_eq) {
810 (
static_cast<T
>(original_ivs[ind]) - bounds->lb0 -
811 bounds->lb1 *
static_cast<T
>(original_ivs[bounds->outer_iv])) /
812 __kmp_abs(bounds->step);
814 KMP_DEBUG_ASSERT(bounds->comparison == comparison_t::comp_greater_or_eq);
815 iterations = (bounds->lb0 +
816 bounds->lb1 *
static_cast<T
>(original_ivs[bounds->outer_iv]) -
817 static_cast<T
>(original_ivs[ind])) /
818 __kmp_abs(bounds->step);
826kmp_loop_nest_iv_t kmp_calc_number_of_iterations(
const bounds_info_t *bounds,
827 const kmp_point_t original_ivs,
830 switch (bounds->loop_type) {
831 case loop_type_t::loop_type_int32:
832 return kmp_calc_number_of_iterations_XX<kmp_int32>(
835 case loop_type_t::loop_type_uint32:
836 return kmp_calc_number_of_iterations_XX<kmp_uint32>(
839 case loop_type_t::loop_type_int64:
840 return kmp_calc_number_of_iterations_XX<kmp_int64>(
843 case loop_type_t::loop_type_uint64:
844 return kmp_calc_number_of_iterations_XX<kmp_uint64>(
861kmp_calc_new_iv_from_original_ivs(
const bounds_info_internal_t *bounds_nest,
862 const kmp_point_t original_ivs,
865 kmp_loop_nest_iv_t new_iv = 0;
867 for (kmp_index_t ind = 0; ind < n; ++ind) {
868 auto bounds = &(bounds_nest[ind].b);
870 new_iv = new_iv * bounds->trip_count +
871 kmp_calc_number_of_iterations(bounds, original_ivs, ind);
882bool kmp_calc_original_ivs_from_iterations(
884 kmp_point_t original_ivs,
885 kmp_iterations_t iterations, kmp_index_t ind) {
887 kmp_index_t lengthened_ind = n;
890 auto bounds = &(original_bounds_nest[ind]);
891 bool good = kmp_calc_one_iv(bounds, original_ivs, iterations,
892 ind, (lengthened_ind < ind),
true);
903 lengthened_ind = ind;
904 for (kmp_index_t i = ind + 1; i < n; ++i) {
922bool kmp_calc_original_ivs_for_start(
const bounds_info_t *original_bounds_nest,
924 kmp_point_t original_ivs) {
927 kmp_iterations_t iterations =
928 (kmp_iterations_t)__kmp_allocate(
sizeof(kmp_loop_nest_iv_t) * n);
930 for (kmp_index_t ind = n; ind > 0;) {
936 bool b = kmp_calc_original_ivs_from_iterations(original_bounds_nest, n,
939 __kmp_free(iterations);
947bool kmp_calc_next_original_ivs(
const bounds_info_t *original_bounds_nest,
948 kmp_index_t n,
const kmp_point_t original_ivs,
949 kmp_point_t next_original_ivs) {
951 kmp_iterations_t iterations =
952 (kmp_iterations_t)__kmp_allocate(
sizeof(kmp_loop_nest_iv_t) * n);
955 for (kmp_index_t ind = 0; ind < n; ++ind) {
956 auto bounds = &(original_bounds_nest[ind]);
957 iterations[ind] = kmp_calc_number_of_iterations(bounds, original_ivs, ind);
960 for (kmp_index_t ind = 0; ind < n; ++ind) {
961 next_original_ivs[ind] = original_ivs[ind];
966 kmp_index_t ind = n - 1;
969 bool b = kmp_calc_original_ivs_from_iterations(
970 original_bounds_nest, n, next_original_ivs, iterations, ind);
972 __kmp_free(iterations);
983bool kmp_calc_one_iv_for_chunk_end_XX(
986 kmp_point_t original_ivs,
const kmp_iterations_t iterations,
987 kmp_index_t ind,
bool start_with_lower_bound,
bool compare_with_start,
988 const kmp_point_t original_ivs_start) {
996 T outer_iv =
static_cast<T
>(original_ivs[bounds->outer_iv]);
998 if (start_with_lower_bound) {
1001 temp = bounds->lb0 + bounds->lb1 * outer_iv;
1011 auto iteration = iterations[ind];
1013 auto step = bounds->step;
1016 auto accountForStep =
1017 ((bounds->lb0 + bounds->lb1 * outer_iv) -
1018 (updated_bounds->lb0 + updated_bounds->lb1 * outer_iv)) %
1021 temp = updated_bounds->lb0 + updated_bounds->lb1 * outer_iv +
1022 accountForStep + iteration * step;
1024 if (((bounds->comparison == comparison_t::comp_less_or_eq) &&
1025 (temp < (bounds->lb0 + bounds->lb1 * outer_iv))) ||
1026 ((bounds->comparison == comparison_t::comp_greater_or_eq) &&
1027 (temp > (bounds->lb0 + bounds->lb1 * outer_iv)))) {
1030 temp = bounds->lb0 + bounds->lb1 * outer_iv + iteration / 2 * step;
1033 if (compare_with_start) {
1035 T start =
static_cast<T
>(original_ivs_start[ind]);
1037 temp = kmp_fix_iv(bounds->loop_iv_type, temp);
1041 if (((bounds->comparison == comparison_t::comp_less_or_eq) &&
1043 ((bounds->comparison == comparison_t::comp_greater_or_eq) &&
1047 temp = start + iteration / 4 * step;
1052 original_ivs[ind] = temp = kmp_fix_iv(bounds->loop_iv_type, temp);
1054 if (((bounds->comparison == comparison_t::comp_less_or_eq) &&
1055 (temp > (bounds->ub0 + bounds->ub1 * outer_iv))) ||
1056 ((bounds->comparison == comparison_t::comp_greater_or_eq) &&
1057 (temp < (bounds->ub0 + bounds->ub1 * outer_iv)))) {
1067bool kmp_calc_one_iv_for_chunk_end(
const bounds_info_t *bounds,
1069 kmp_point_t original_ivs,
1070 const kmp_iterations_t iterations,
1071 kmp_index_t ind,
bool start_with_lower_bound,
1072 bool compare_with_start,
1073 const kmp_point_t original_ivs_start) {
1075 switch (bounds->loop_type) {
1076 case loop_type_t::loop_type_int32:
1077 return kmp_calc_one_iv_for_chunk_end_XX<kmp_int32>(
1081 original_ivs, iterations, ind, start_with_lower_bound,
1082 compare_with_start, original_ivs_start);
1084 case loop_type_t::loop_type_uint32:
1085 return kmp_calc_one_iv_for_chunk_end_XX<kmp_uint32>(
1089 original_ivs, iterations, ind, start_with_lower_bound,
1090 compare_with_start, original_ivs_start);
1092 case loop_type_t::loop_type_int64:
1093 return kmp_calc_one_iv_for_chunk_end_XX<kmp_int64>(
1097 original_ivs, iterations, ind, start_with_lower_bound,
1098 compare_with_start, original_ivs_start);
1100 case loop_type_t::loop_type_uint64:
1101 return kmp_calc_one_iv_for_chunk_end_XX<kmp_uint64>(
1105 original_ivs, iterations, ind, start_with_lower_bound,
1106 compare_with_start, original_ivs_start);
1128bool kmp_calc_original_ivs_for_chunk_end(
1130 const bounds_info_internal_t *updated_bounds_nest,
1131 const kmp_point_t original_ivs_start, kmp_loop_nest_iv_t new_iv,
1132 kmp_point_t original_ivs) {
1135 kmp_iterations_t iterations =
1136 (kmp_iterations_t)__kmp_allocate(
sizeof(kmp_loop_nest_iv_t) * n);
1139 for (kmp_index_t ind = n; ind > 0;) {
1141 auto &updated_bounds = updated_bounds_nest[ind];
1144 auto new_ind = new_iv / updated_bounds.b.trip_count;
1145 auto iteration = new_iv % updated_bounds.b.trip_count;
1148 iterations[ind] = iteration;
1150 KMP_DEBUG_ASSERT(new_iv == 0);
1152 kmp_index_t lengthened_ind = n;
1153 kmp_index_t equal_ind = -1;
1156 for (kmp_index_t ind = 0; ind < n;) {
1157 auto bounds = &(original_bounds_nest[ind]);
1158 auto updated_bounds = &(updated_bounds_nest[ind].b);
1160 bool good = kmp_calc_one_iv_for_chunk_end(
1161 bounds, updated_bounds,
1162 original_ivs, iterations, ind, (lengthened_ind < ind),
1163 (equal_ind >= ind - 1), original_ivs_start);
1169 __kmp_free(iterations);
1174 ++(iterations[ind]);
1175 lengthened_ind = ind;
1176 if (equal_ind >= lengthened_ind) {
1179 equal_ind = lengthened_ind - 1;
1181 for (kmp_index_t i = ind + 1; i < n; ++i) {
1188 if ((equal_ind == ind - 1) &&
1189 (kmp_ivs_eq(bounds->loop_iv_type, original_ivs[ind],
1190 original_ivs_start[ind]))) {
1192 }
else if ((equal_ind > ind - 1) &&
1193 !(kmp_ivs_eq(bounds->loop_iv_type, original_ivs[ind],
1194 original_ivs_start[ind]))) {
1195 equal_ind = ind - 1;
1200 __kmp_free(iterations);
1207template <
typename T>
1209 kmp_point_t original_ivs,
1212 T temp = bounds->ub0 +
1213 bounds->ub1 *
static_cast<T
>(original_ivs[bounds->outer_iv]);
1215 original_ivs[ind] = kmp_fix_iv(bounds->loop_iv_type, temp);
1219 kmp_point_t original_ivs, kmp_index_t ind) {
1221 switch (bounds->loop_type) {
1225 case loop_type_t::loop_type_int32:
1226 kmp_calc_one_iv_end_XX<kmp_int32>(
1230 case loop_type_t::loop_type_uint32:
1231 kmp_calc_one_iv_end_XX<kmp_uint32>(
1235 case loop_type_t::loop_type_int64:
1236 kmp_calc_one_iv_end_XX<kmp_int64>(
1240 case loop_type_t::loop_type_uint64:
1241 kmp_calc_one_iv_end_XX<kmp_uint64>(
1251void kmp_calc_original_ivs_for_end(
1252 const bounds_info_t *
const original_bounds_nest, kmp_index_t n,
1253 kmp_point_t original_ivs) {
1254 for (kmp_index_t ind = 0; ind < n; ++ind) {
1255 auto bounds = &(original_bounds_nest[ind]);
1256 kmp_calc_one_iv_end(bounds, original_ivs, ind);
1280__kmpc_for_collapsed_init(
ident_t *loc, kmp_int32 gtid,
1283 kmp_index_t n, kmp_int32 *plastiter) {
1285 KMP_DEBUG_ASSERT(plastiter && original_bounds_nest);
1286 KE_TRACE(10, (
"__kmpc_for_collapsed_init called (%d)\n", gtid));
1288 if (__kmp_env_consistency_check) {
1289 __kmp_push_workshare(gtid, ct_pdo, loc);
1292 kmp_canonicalize_loop_nest(loc, original_bounds_nest, n);
1294 bounds_info_internal_t *updated_bounds_nest =
1295 (bounds_info_internal_t *)__kmp_allocate(
sizeof(bounds_info_internal_t) *
1298 for (kmp_index_t i = 0; i < n; ++i) {
1299 updated_bounds_nest[i].b = original_bounds_nest[i];
1302 kmp_loop_nest_iv_t total =
1303 kmp_process_loop_nest( updated_bounds_nest, n);
1305 if (plastiter != NULL) {
1311 __kmp_free(updated_bounds_nest);
1316 __kmp_assert_valid_gtid(gtid);
1317 kmp_uint32 tid = __kmp_tid_from_gtid(gtid);
1319 kmp_info_t *th = __kmp_threads[gtid];
1320 kmp_team_t *team = th->th.th_team;
1321 kmp_uint32 nth = team->t.t_nproc;
1323 KMP_DEBUG_ASSERT(tid < nth);
1325 kmp_point_t original_ivs_start =
1326 (kmp_point_t)__kmp_allocate(
sizeof(kmp_uint64) * n);
1327 kmp_point_t original_ivs_end =
1328 (kmp_point_t)__kmp_allocate(
sizeof(kmp_uint64) * n);
1329 kmp_point_t original_ivs_next_start =
1330 (kmp_point_t)__kmp_allocate(
sizeof(kmp_uint64) * n);
1332 if (!kmp_calc_original_ivs_for_start(original_bounds_nest, n,
1333 original_ivs_start)) {
1335 __kmp_free(updated_bounds_nest);
1336 __kmp_free(original_ivs_start);
1337 __kmp_free(original_ivs_end);
1338 __kmp_free(original_ivs_next_start);
1364 kmp_loop_nest_iv_t new_iv = kmp_calc_new_iv_from_original_ivs(
1365 updated_bounds_nest, original_ivs_start, n);
1367 bool last_iter =
false;
1373 KMP_DEBUG_ASSERT(total >= new_iv);
1375 kmp_loop_nest_iv_t total_left = total - new_iv;
1376 kmp_loop_nest_iv_t chunk_size = total_left / nth;
1377 kmp_loop_nest_iv_t remainder = total_left % nth;
1379 kmp_loop_nest_iv_t curr_chunk_size = chunk_size;
1381 if (remainder > 0) {
1386#if defined(KMP_DEBUG)
1387 kmp_loop_nest_iv_t new_iv_for_start = new_iv;
1390 if (curr_chunk_size > 1) {
1391 new_iv += curr_chunk_size - 1;
1394 if ((nth == 1) || (new_iv >= total - 1)) {
1397 kmp_calc_original_ivs_for_end(original_bounds_nest, n,
1403 if (!kmp_calc_original_ivs_for_chunk_end(original_bounds_nest, n,
1404 updated_bounds_nest,
1405 original_ivs_start, new_iv,
1406 original_ivs_end)) {
1408 kmp_calc_original_ivs_for_end(original_bounds_nest, n,
1415#if defined(KMP_DEBUG)
1416 auto new_iv_for_end = kmp_calc_new_iv_from_original_ivs(
1417 updated_bounds_nest, original_ivs_end, n);
1418 KMP_DEBUG_ASSERT(new_iv_for_end >= new_iv_for_start);
1421 if (last_iter && (tid != 0)) {
1424 __kmp_free(updated_bounds_nest);
1425 __kmp_free(original_ivs_start);
1426 __kmp_free(original_ivs_end);
1427 __kmp_free(original_ivs_next_start);
1436 !kmp_calc_next_original_ivs(original_bounds_nest, n, original_ivs_end,
1437 original_ivs_next_start)) {
1440 if (plastiter != NULL) {
1446 for (kmp_index_t i = 0; i < n; ++i) {
1447 chunk_bounds_nest[i] =
1448 original_bounds_nest[i];
1449 chunk_bounds_nest[i].lb0_u64 = original_ivs_start[i];
1450 chunk_bounds_nest[i].lb1_u64 = 0;
1452 chunk_bounds_nest[i].ub0_u64 = original_ivs_end[i];
1453 chunk_bounds_nest[i].ub1_u64 = 0;
1456 __kmp_free(updated_bounds_nest);
1457 __kmp_free(original_ivs_start);
1458 __kmp_free(original_ivs_end);
1459 __kmp_free(original_ivs_next_start);
1466 bool next_chunk = kmp_calc_next_original_ivs(
1467 original_bounds_nest, n, original_ivs_end, original_ivs_start);
1477 new_iv = kmp_calc_new_iv_from_original_ivs(updated_bounds_nest,
1478 original_ivs_start, n);
1481 __kmp_free(updated_bounds_nest);
1482 __kmp_free(original_ivs_start);
1483 __kmp_free(original_ivs_end);
1484 __kmp_free(original_ivs_next_start);