21#include "ompt-specific.h"
23#define MAX_MESSAGE 512
39 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 __kmp_assign_root_init_mask();
43 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
44 }
else if (__kmp_ignore_mppbeg() == FALSE) {
46 __kmp_internal_begin();
47 KC_TRACE(10, (
"__kmpc_begin: called\n"));
65 if (__kmp_ignore_mppend() == FALSE) {
66 KC_TRACE(10, (
"__kmpc_end: called\n"));
67 KA_TRACE(30, (
"__kmpc_end\n"));
69 __kmp_internal_end_thread(-1);
71#if KMP_OS_WINDOWS && OMPT_SUPPORT
76 if (ompt_enabled.enabled)
77 __kmp_internal_end_library(__kmp_gtid_get_specific());
100 kmp_int32 gtid = __kmp_entry_gtid();
102 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
123 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
125 return TCR_4(__kmp_all_nth);
135 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
136 return __kmp_tid_from_gtid(__kmp_entry_gtid());
145 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
147 return __kmp_entry_thread()->th.th_team->t.t_nproc;
167 if (__kmp_par_range == 0) {
174 semi2 = strchr(semi2,
';');
178 semi2 = strchr(semi2 + 1,
';');
182 if (__kmp_par_range_filename[0]) {
183 const char *name = semi2 - 1;
184 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
187 if ((*name ==
'/') || (*name ==
';')) {
190 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
191 return __kmp_par_range < 0;
194 semi3 = strchr(semi2 + 1,
';');
195 if (__kmp_par_range_routine[0]) {
196 if ((semi3 != NULL) && (semi3 > semi2) &&
197 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
198 return __kmp_par_range < 0;
201 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
202 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
203 return __kmp_par_range > 0;
205 return __kmp_par_range < 0;
219 return __kmp_entry_thread()->th.th_root->r.r_active;
232 kmp_int32 num_threads) {
233 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
234 global_tid, num_threads));
235 __kmp_assert_valid_gtid(global_tid);
236 __kmp_push_num_threads(loc, global_tid, num_threads);
239void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
240 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
244void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
248 __kmp_assert_valid_gtid(global_tid);
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
263 int gtid = __kmp_entry_gtid();
265#if (KMP_STATS_ENABLED)
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
285 va_start(ap, microtask);
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 ompt_frame = &master_th->th.th_current_task->ompt_task_info.frame;
292 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
294 OMPT_STORE_RETURN_ADDRESS(gtid);
300 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
301 VOLATILE_CAST(microtask_t) microtask,
302 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
307 __kmp_join_call(loc, gtid
317 if (ompt_enabled.enabled) {
318 ompt_frame->enter_frame = ompt_data_none;
324 if (previous_state == stats_state_e::SERIAL_REGION) {
325 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
326 KMP_SET_THREAD_STATE(previous_state);
328 KMP_POP_PARTITIONED_TIMER();
344 kmp_int32 cond,
void *args) {
345 int gtid = __kmp_entry_gtid();
356 microtask(>id, &zero, args);
358 microtask(>id, &zero);
376 kmp_int32 num_teams, kmp_int32 num_threads) {
378 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
379 global_tid, num_teams, num_threads));
380 __kmp_assert_valid_gtid(global_tid);
381 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
401 kmp_int32 num_teams_lb, kmp_int32 num_teams_ub,
402 kmp_int32 num_threads) {
403 KA_TRACE(20, (
"__kmpc_push_num_teams_51: enter T#%d num_teams_lb=%d"
404 " num_teams_ub=%d num_threads=%d\n",
405 global_tid, num_teams_lb, num_teams_ub, num_threads));
406 __kmp_assert_valid_gtid(global_tid);
407 __kmp_push_num_teams_51(loc, global_tid, num_teams_lb, num_teams_ub,
423 int gtid = __kmp_entry_gtid();
424 kmp_info_t *this_thr = __kmp_threads[gtid];
426 va_start(ap, microtask);
431 if (previous_state == stats_state_e::SERIAL_REGION) {
432 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
434 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
439 this_thr->th.th_teams_microtask = microtask;
440 this_thr->th.th_teams_level =
441 this_thr->th.th_team->t.t_level;
444 kmp_team_t *parent_team = this_thr->th.th_team;
445 int tid = __kmp_tid_from_gtid(gtid);
446 if (ompt_enabled.enabled) {
447 parent_team->t.t_implicit_task_taskdata[tid]
448 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
450 OMPT_STORE_RETURN_ADDRESS(gtid);
455 if (this_thr->th.th_teams_size.nteams == 0) {
456 __kmp_push_num_teams(loc, gtid, 0, 0);
458 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
459 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
460 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
463 loc, gtid, fork_context_intel, argc,
464 VOLATILE_CAST(microtask_t) __kmp_teams_master,
465 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, kmp_va_addr_of(ap));
466 __kmp_join_call(loc, gtid
474 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
475 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
476 this_thr->th.th_cg_roots = tmp->up;
477 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up"
478 " to node %p. cg_nthreads was %d\n",
479 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
480 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
481 int i = tmp->cg_nthreads--;
486 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
487 this_thr->th.th_current_task->td_icvs.thread_limit =
488 this_thr->th.th_cg_roots->cg_thread_limit;
490 this_thr->th.th_teams_microtask = NULL;
491 this_thr->th.th_teams_level = 0;
492 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
495 if (previous_state == stats_state_e::SERIAL_REGION) {
496 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
497 KMP_SET_THREAD_STATE(previous_state);
499 KMP_POP_PARTITIONED_TIMER();
508int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
526 __kmp_assert_valid_gtid(global_tid);
528 OMPT_STORE_RETURN_ADDRESS(global_tid);
530 __kmp_serialized_parallel(loc, global_tid);
541 kmp_internal_control_t *top;
542 kmp_info_t *this_thr;
543 kmp_team_t *serial_team;
546 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
554 __kmp_assert_valid_gtid(global_tid);
555 if (!TCR_4(__kmp_init_parallel))
556 __kmp_parallel_initialize();
558 __kmp_resume_if_soft_paused();
560 this_thr = __kmp_threads[global_tid];
561 serial_team = this_thr->th.th_serial_team;
563 kmp_task_team_t *task_team = this_thr->th.th_task_team;
565 if (task_team != NULL && (task_team->tt.tt_found_proxy_tasks ||
566 task_team->tt.tt_hidden_helper_task_encountered))
567 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
570 KMP_DEBUG_ASSERT(serial_team);
571 KMP_ASSERT(serial_team->t.t_serialized);
572 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
573 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
574 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
575 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
578 if (ompt_enabled.enabled &&
579 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
580 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
581 if (ompt_enabled.ompt_callback_implicit_task) {
582 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
583 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
584 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
588 ompt_data_t *parent_task_data;
589 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
591 if (ompt_enabled.ompt_callback_parallel_end) {
592 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
593 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
594 ompt_parallel_invoker_program | ompt_parallel_team,
595 OMPT_LOAD_RETURN_ADDRESS(global_tid));
597 __ompt_lw_taskteam_unlink(this_thr);
598 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
604 top = serial_team->t.t_control_stack_top;
605 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
606 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
607 serial_team->t.t_control_stack_top = top->next;
612 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
614 dispatch_private_info_t *disp_buffer =
615 serial_team->t.t_dispatch->th_disp_buffer;
616 serial_team->t.t_dispatch->th_disp_buffer =
617 serial_team->t.t_dispatch->th_disp_buffer->next;
618 __kmp_free(disp_buffer);
620 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
622 --serial_team->t.t_serialized;
623 if (serial_team->t.t_serialized == 0) {
627#if KMP_ARCH_X86 || KMP_ARCH_X86_64
628 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
629 __kmp_clear_x87_fpu_status_word();
630 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
631 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
635 __kmp_pop_current_task_from_thread(this_thr);
637 if (ompd_state & OMPD_ENABLE_BP)
638 ompd_bp_parallel_end();
641 this_thr->th.th_team = serial_team->t.t_parent;
642 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
645 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
646 this_thr->th.th_team_master =
647 serial_team->t.t_parent->t.t_threads[0];
648 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
651 this_thr->th.th_dispatch =
652 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
654 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
655 this_thr->th.th_current_task->td_flags.executing = 1;
657 if (__kmp_tasking_mode != tskm_immediate_exec) {
659 this_thr->th.th_task_team =
660 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
662 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
664 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
666#if KMP_AFFINITY_SUPPORTED
667 if (this_thr->th.th_team->t.t_level == 0 && __kmp_affinity.flags.reset) {
668 __kmp_reset_root_init_mask(global_tid);
672 if (__kmp_tasking_mode != tskm_immediate_exec) {
673 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting "
674 "depth of serial team %p to %d\n",
675 global_tid, serial_team, serial_team->t.t_serialized));
679 serial_team->t.t_level--;
680 if (__kmp_env_consistency_check)
681 __kmp_pop_parallel(global_tid, NULL);
683 if (ompt_enabled.enabled)
684 this_thr->th.ompt_thread_info.state =
685 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
686 : ompt_state_work_parallel);
699 KC_TRACE(10, (
"__kmpc_flush: called\n"));
704#if OMPT_SUPPORT && OMPT_OPTIONAL
705 if (ompt_enabled.ompt_callback_flush) {
706 ompt_callbacks.ompt_callback(ompt_callback_flush)(
707 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
722 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
723 __kmp_assert_valid_gtid(global_tid);
725 if (!TCR_4(__kmp_init_parallel))
726 __kmp_parallel_initialize();
728 __kmp_resume_if_soft_paused();
730 if (__kmp_env_consistency_check) {
732 KMP_WARNING(ConstructIdentInvalid);
734 __kmp_check_barrier(global_tid, ct_barrier, loc);
738 ompt_frame_t *ompt_frame;
739 if (ompt_enabled.enabled) {
740 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
741 if (ompt_frame->enter_frame.ptr == NULL)
742 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
744 OMPT_STORE_RETURN_ADDRESS(global_tid);
746 __kmp_threads[global_tid]->th.th_ident = loc;
754 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
755#if OMPT_SUPPORT && OMPT_OPTIONAL
756 if (ompt_enabled.enabled) {
757 ompt_frame->enter_frame = ompt_data_none;
772 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
773 __kmp_assert_valid_gtid(global_tid);
775 if (!TCR_4(__kmp_init_parallel))
776 __kmp_parallel_initialize();
778 __kmp_resume_if_soft_paused();
780 if (KMP_MASTER_GTID(global_tid)) {
782 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
786#if OMPT_SUPPORT && OMPT_OPTIONAL
788 if (ompt_enabled.ompt_callback_masked) {
789 kmp_info_t *this_thr = __kmp_threads[global_tid];
790 kmp_team_t *team = this_thr->th.th_team;
792 int tid = __kmp_tid_from_gtid(global_tid);
793 ompt_callbacks.ompt_callback(ompt_callback_masked)(
794 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
795 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
796 OMPT_GET_RETURN_ADDRESS(0));
801 if (__kmp_env_consistency_check) {
802#if KMP_USE_DYNAMIC_LOCK
804 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
806 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
809 __kmp_push_sync(global_tid, ct_master, loc, NULL);
811 __kmp_check_sync(global_tid, ct_master, loc, NULL);
827 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
828 __kmp_assert_valid_gtid(global_tid);
829 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
830 KMP_POP_PARTITIONED_TIMER();
832#if OMPT_SUPPORT && OMPT_OPTIONAL
833 kmp_info_t *this_thr = __kmp_threads[global_tid];
834 kmp_team_t *team = this_thr->th.th_team;
835 if (ompt_enabled.ompt_callback_masked) {
836 int tid = __kmp_tid_from_gtid(global_tid);
837 ompt_callbacks.ompt_callback(ompt_callback_masked)(
838 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
839 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
840 OMPT_GET_RETURN_ADDRESS(0));
844 if (__kmp_env_consistency_check) {
845 if (KMP_MASTER_GTID(global_tid))
846 __kmp_pop_sync(global_tid, ct_master, loc);
861 KC_TRACE(10, (
"__kmpc_masked: called T#%d\n", global_tid));
862 __kmp_assert_valid_gtid(global_tid);
864 if (!TCR_4(__kmp_init_parallel))
865 __kmp_parallel_initialize();
867 __kmp_resume_if_soft_paused();
869 tid = __kmp_tid_from_gtid(global_tid);
872 KMP_PUSH_PARTITIONED_TIMER(OMP_masked);
876#if OMPT_SUPPORT && OMPT_OPTIONAL
878 if (ompt_enabled.ompt_callback_masked) {
879 kmp_info_t *this_thr = __kmp_threads[global_tid];
880 kmp_team_t *team = this_thr->th.th_team;
881 ompt_callbacks.ompt_callback(ompt_callback_masked)(
882 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
883 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
884 OMPT_GET_RETURN_ADDRESS(0));
889 if (__kmp_env_consistency_check) {
890#if KMP_USE_DYNAMIC_LOCK
892 __kmp_push_sync(global_tid, ct_masked, loc, NULL, 0);
894 __kmp_check_sync(global_tid, ct_masked, loc, NULL, 0);
897 __kmp_push_sync(global_tid, ct_masked, loc, NULL);
899 __kmp_check_sync(global_tid, ct_masked, loc, NULL);
915 KC_TRACE(10, (
"__kmpc_end_masked: called T#%d\n", global_tid));
916 __kmp_assert_valid_gtid(global_tid);
917 KMP_POP_PARTITIONED_TIMER();
919#if OMPT_SUPPORT && OMPT_OPTIONAL
920 kmp_info_t *this_thr = __kmp_threads[global_tid];
921 kmp_team_t *team = this_thr->th.th_team;
922 if (ompt_enabled.ompt_callback_masked) {
923 int tid = __kmp_tid_from_gtid(global_tid);
924 ompt_callbacks.ompt_callback(ompt_callback_masked)(
925 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
926 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
927 OMPT_GET_RETURN_ADDRESS(0));
931 if (__kmp_env_consistency_check) {
932 __kmp_pop_sync(global_tid, ct_masked, loc);
946 KMP_DEBUG_ASSERT(__kmp_init_serial);
948 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
949 __kmp_assert_valid_gtid(gtid);
951 if (!TCR_4(__kmp_init_parallel))
952 __kmp_parallel_initialize();
954 __kmp_resume_if_soft_paused();
957 __kmp_itt_ordered_prep(gtid);
961 th = __kmp_threads[gtid];
963#if OMPT_SUPPORT && OMPT_OPTIONAL
967 OMPT_STORE_RETURN_ADDRESS(gtid);
968 if (ompt_enabled.enabled) {
969 team = __kmp_team_from_gtid(gtid);
970 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
972 th->th.ompt_thread_info.wait_id = lck;
973 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
976 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
977 if (ompt_enabled.ompt_callback_mutex_acquire) {
978 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
979 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
985 if (th->th.th_dispatch->th_deo_fcn != 0)
986 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
988 __kmp_parallel_deo(>id, &cid, loc);
990#if OMPT_SUPPORT && OMPT_OPTIONAL
991 if (ompt_enabled.enabled) {
993 th->th.ompt_thread_info.state = ompt_state_work_parallel;
994 th->th.ompt_thread_info.wait_id = 0;
997 if (ompt_enabled.ompt_callback_mutex_acquired) {
998 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
999 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1005 __kmp_itt_ordered_start(gtid);
1020 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
1021 __kmp_assert_valid_gtid(gtid);
1024 __kmp_itt_ordered_end(gtid);
1028 th = __kmp_threads[gtid];
1030 if (th->th.th_dispatch->th_dxo_fcn != 0)
1031 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
1033 __kmp_parallel_dxo(>id, &cid, loc);
1035#if OMPT_SUPPORT && OMPT_OPTIONAL
1036 OMPT_STORE_RETURN_ADDRESS(gtid);
1037 if (ompt_enabled.ompt_callback_mutex_released) {
1038 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1040 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
1041 ->t.t_ordered.dt.t_value,
1042 OMPT_LOAD_RETURN_ADDRESS(gtid));
1047#if KMP_USE_DYNAMIC_LOCK
1049static __forceinline
void
1050__kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
1051 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
1055 kmp_indirect_lock_t **lck;
1056 lck = (kmp_indirect_lock_t **)crit;
1057 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
1058 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
1059 KMP_SET_I_LOCK_LOCATION(ilk, loc);
1060 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
1062 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
1064 __kmp_itt_critical_creating(ilk->lock, loc);
1066 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
1069 __kmp_itt_critical_destroyed(ilk->lock);
1075 KMP_DEBUG_ASSERT(*lck != NULL);
1079#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
1081 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1082 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1083 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1084 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1085 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
1087 KMP_FSYNC_PREPARE(l); \
1088 KMP_INIT_YIELD(spins); \
1089 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
1091 if (TCR_4(__kmp_nth) > \
1092 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1095 KMP_YIELD_SPIN(spins); \
1097 __kmp_spin_backoff(&backoff); \
1099 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1100 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1102 KMP_FSYNC_ACQUIRED(l); \
1106#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1108 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1109 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1110 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1111 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1112 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1116#define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1117 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1121#include <sys/syscall.h>
1131#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1133 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1134 kmp_int32 gtid_code = (gtid + 1) << 1; \
1136 KMP_FSYNC_PREPARE(ftx); \
1137 kmp_int32 poll_val; \
1138 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1139 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1140 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1141 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1143 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1145 KMP_LOCK_BUSY(1, futex))) { \
1148 poll_val |= KMP_LOCK_BUSY(1, futex); \
1151 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1152 NULL, NULL, 0)) != 0) { \
1157 KMP_FSYNC_ACQUIRED(ftx); \
1161#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1163 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1164 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1165 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1166 KMP_FSYNC_ACQUIRED(ftx); \
1174#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1176 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1178 KMP_FSYNC_RELEASING(ftx); \
1179 kmp_int32 poll_val = \
1180 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1181 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1182 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1183 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1186 KMP_YIELD_OVERSUB(); \
1193static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1196 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1199 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1206 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1207 __kmp_init_user_lock_with_checks(lck);
1208 __kmp_set_user_lock_location(lck, loc);
1210 __kmp_itt_critical_creating(lck);
1221 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1226 __kmp_itt_critical_destroyed(lck);
1230 __kmp_destroy_user_lock_with_checks(lck);
1231 __kmp_user_lock_free(&idx, gtid, lck);
1232 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1233 KMP_DEBUG_ASSERT(lck != NULL);
1252 kmp_critical_name *crit) {
1253#if KMP_USE_DYNAMIC_LOCK
1254#if OMPT_SUPPORT && OMPT_OPTIONAL
1255 OMPT_STORE_RETURN_ADDRESS(global_tid);
1257 __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1260#if OMPT_SUPPORT && OMPT_OPTIONAL
1261 ompt_state_t prev_state = ompt_state_undefined;
1262 ompt_thread_info_t ti;
1264 kmp_user_lock_p lck;
1266 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1267 __kmp_assert_valid_gtid(global_tid);
1271 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1272 KMP_CHECK_USER_LOCK_INIT();
1274 if ((__kmp_user_lock_kind == lk_tas) &&
1275 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1276 lck = (kmp_user_lock_p)crit;
1279 else if ((__kmp_user_lock_kind == lk_futex) &&
1280 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1281 lck = (kmp_user_lock_p)crit;
1285 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1288 if (__kmp_env_consistency_check)
1289 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1297 __kmp_itt_critical_acquiring(lck);
1299#if OMPT_SUPPORT && OMPT_OPTIONAL
1300 OMPT_STORE_RETURN_ADDRESS(gtid);
1301 void *codeptr_ra = NULL;
1302 if (ompt_enabled.enabled) {
1303 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1305 prev_state = ti.state;
1306 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1307 ti.state = ompt_state_wait_critical;
1310 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1311 if (ompt_enabled.ompt_callback_mutex_acquire) {
1312 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1313 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1314 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1320 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1323 __kmp_itt_critical_acquired(lck);
1325#if OMPT_SUPPORT && OMPT_OPTIONAL
1326 if (ompt_enabled.enabled) {
1328 ti.state = prev_state;
1332 if (ompt_enabled.ompt_callback_mutex_acquired) {
1333 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1334 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1338 KMP_POP_PARTITIONED_TIMER();
1340 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1341 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1345#if KMP_USE_DYNAMIC_LOCK
1348static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1350#define KMP_TSX_LOCK(seq) lockseq_##seq
1352#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1355#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1356#define KMP_CPUINFO_RTM (__kmp_cpuinfo.flags.rtm)
1358#define KMP_CPUINFO_RTM 0
1362 if (hint & kmp_lock_hint_hle)
1363 return KMP_TSX_LOCK(hle);
1364 if (hint & kmp_lock_hint_rtm)
1365 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_queuing) : __kmp_user_lock_seq;
1366 if (hint & kmp_lock_hint_adaptive)
1367 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1370 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1371 return __kmp_user_lock_seq;
1372 if ((hint & omp_lock_hint_speculative) &&
1373 (hint & omp_lock_hint_nonspeculative))
1374 return __kmp_user_lock_seq;
1377 if (hint & omp_lock_hint_contended)
1378 return lockseq_queuing;
1381 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1385 if (hint & omp_lock_hint_speculative)
1386 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm_spin) : __kmp_user_lock_seq;
1388 return __kmp_user_lock_seq;
1391#if OMPT_SUPPORT && OMPT_OPTIONAL
1392#if KMP_USE_DYNAMIC_LOCK
1393static kmp_mutex_impl_t
1394__ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1396 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1401 return kmp_mutex_impl_queuing;
1404 return kmp_mutex_impl_spin;
1407 case locktag_rtm_spin:
1408 return kmp_mutex_impl_speculative;
1411 return kmp_mutex_impl_none;
1413 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1416 switch (ilock->type) {
1418 case locktag_adaptive:
1419 case locktag_rtm_queuing:
1420 return kmp_mutex_impl_speculative;
1422 case locktag_nested_tas:
1423 return kmp_mutex_impl_spin;
1425 case locktag_nested_futex:
1427 case locktag_ticket:
1428 case locktag_queuing:
1430 case locktag_nested_ticket:
1431 case locktag_nested_queuing:
1432 case locktag_nested_drdpa:
1433 return kmp_mutex_impl_queuing;
1435 return kmp_mutex_impl_none;
1440static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1441 switch (__kmp_user_lock_kind) {
1443 return kmp_mutex_impl_spin;
1450 return kmp_mutex_impl_queuing;
1453 case lk_rtm_queuing:
1456 return kmp_mutex_impl_speculative;
1459 return kmp_mutex_impl_none;
1478void __kmpc_critical_with_hint(
ident_t *loc, kmp_int32 global_tid,
1479 kmp_critical_name *crit, uint32_t hint) {
1481 kmp_user_lock_p lck;
1482#if OMPT_SUPPORT && OMPT_OPTIONAL
1483 ompt_state_t prev_state = ompt_state_undefined;
1484 ompt_thread_info_t ti;
1486 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1488 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1491 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1492 __kmp_assert_valid_gtid(global_tid);
1494 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1496 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1497 kmp_dyna_lockseq_t lockseq = __kmp_map_hint_to_lock(hint);
1499 if (KMP_IS_D_LOCK(lockseq)) {
1500 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1501 KMP_GET_D_TAG(lockseq));
1503 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lockseq));
1509 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1510 lck = (kmp_user_lock_p)lk;
1511 if (__kmp_env_consistency_check) {
1512 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1513 __kmp_map_hint_to_lock(hint));
1516 __kmp_itt_critical_acquiring(lck);
1518#if OMPT_SUPPORT && OMPT_OPTIONAL
1519 if (ompt_enabled.enabled) {
1520 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1522 prev_state = ti.state;
1523 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1524 ti.state = ompt_state_wait_critical;
1527 if (ompt_enabled.ompt_callback_mutex_acquire) {
1528 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1529 ompt_mutex_critical, (
unsigned int)hint,
1530 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1535#if KMP_USE_INLINED_TAS
1536 if (lockseq == lockseq_tas && !__kmp_env_consistency_check) {
1537 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1539#elif KMP_USE_INLINED_FUTEX
1540 if (lockseq == lockseq_futex && !__kmp_env_consistency_check) {
1541 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1545 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1548 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1550 if (__kmp_env_consistency_check) {
1551 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1552 __kmp_map_hint_to_lock(hint));
1555 __kmp_itt_critical_acquiring(lck);
1557#if OMPT_SUPPORT && OMPT_OPTIONAL
1558 if (ompt_enabled.enabled) {
1559 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1561 prev_state = ti.state;
1562 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1563 ti.state = ompt_state_wait_critical;
1566 if (ompt_enabled.ompt_callback_mutex_acquire) {
1567 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1568 ompt_mutex_critical, (
unsigned int)hint,
1569 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1574 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1576 KMP_POP_PARTITIONED_TIMER();
1579 __kmp_itt_critical_acquired(lck);
1581#if OMPT_SUPPORT && OMPT_OPTIONAL
1582 if (ompt_enabled.enabled) {
1584 ti.state = prev_state;
1588 if (ompt_enabled.ompt_callback_mutex_acquired) {
1589 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1590 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1595 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1596 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1611 kmp_critical_name *crit) {
1612 kmp_user_lock_p lck;
1614 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1616#if KMP_USE_DYNAMIC_LOCK
1617 int locktag = KMP_EXTRACT_D_TAG(crit);
1619 lck = (kmp_user_lock_p)crit;
1620 KMP_ASSERT(lck != NULL);
1621 if (__kmp_env_consistency_check) {
1622 __kmp_pop_sync(global_tid, ct_critical, loc);
1625 __kmp_itt_critical_releasing(lck);
1627#if KMP_USE_INLINED_TAS
1628 if (locktag == locktag_tas && !__kmp_env_consistency_check) {
1629 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1631#elif KMP_USE_INLINED_FUTEX
1632 if (locktag == locktag_futex && !__kmp_env_consistency_check) {
1633 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1637 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1640 kmp_indirect_lock_t *ilk =
1641 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1642 KMP_ASSERT(ilk != NULL);
1644 if (__kmp_env_consistency_check) {
1645 __kmp_pop_sync(global_tid, ct_critical, loc);
1648 __kmp_itt_critical_releasing(lck);
1650 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1655 if ((__kmp_user_lock_kind == lk_tas) &&
1656 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1657 lck = (kmp_user_lock_p)crit;
1660 else if ((__kmp_user_lock_kind == lk_futex) &&
1661 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1662 lck = (kmp_user_lock_p)crit;
1666 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1669 KMP_ASSERT(lck != NULL);
1671 if (__kmp_env_consistency_check)
1672 __kmp_pop_sync(global_tid, ct_critical, loc);
1675 __kmp_itt_critical_releasing(lck);
1679 __kmp_release_user_lock_with_checks(lck, global_tid);
1683#if OMPT_SUPPORT && OMPT_OPTIONAL
1686 OMPT_STORE_RETURN_ADDRESS(global_tid);
1687 if (ompt_enabled.ompt_callback_mutex_released) {
1688 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1689 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1690 OMPT_LOAD_RETURN_ADDRESS(0));
1694 KMP_POP_PARTITIONED_TIMER();
1695 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1709 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1710 __kmp_assert_valid_gtid(global_tid);
1712 if (!TCR_4(__kmp_init_parallel))
1713 __kmp_parallel_initialize();
1715 __kmp_resume_if_soft_paused();
1717 if (__kmp_env_consistency_check)
1718 __kmp_check_barrier(global_tid, ct_barrier, loc);
1721 ompt_frame_t *ompt_frame;
1722 if (ompt_enabled.enabled) {
1723 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1724 if (ompt_frame->enter_frame.ptr == NULL)
1725 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1727 OMPT_STORE_RETURN_ADDRESS(global_tid);
1730 __kmp_threads[global_tid]->th.th_ident = loc;
1732 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1733#if OMPT_SUPPORT && OMPT_OPTIONAL
1734 if (ompt_enabled.enabled) {
1735 ompt_frame->enter_frame = ompt_data_none;
1739 return (status != 0) ? 0 : 1;
1752 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1753 __kmp_assert_valid_gtid(global_tid);
1754 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1769 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1770 __kmp_assert_valid_gtid(global_tid);
1772 if (!TCR_4(__kmp_init_parallel))
1773 __kmp_parallel_initialize();
1775 __kmp_resume_if_soft_paused();
1777 if (__kmp_env_consistency_check) {
1779 KMP_WARNING(ConstructIdentInvalid);
1781 __kmp_check_barrier(global_tid, ct_barrier, loc);
1785 ompt_frame_t *ompt_frame;
1786 if (ompt_enabled.enabled) {
1787 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1788 if (ompt_frame->enter_frame.ptr == NULL)
1789 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1791 OMPT_STORE_RETURN_ADDRESS(global_tid);
1794 __kmp_threads[global_tid]->th.th_ident = loc;
1796 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1797#if OMPT_SUPPORT && OMPT_OPTIONAL
1798 if (ompt_enabled.enabled) {
1799 ompt_frame->enter_frame = ompt_data_none;
1805 if (__kmp_env_consistency_check) {
1811 __kmp_pop_sync(global_tid, ct_master, loc);
1831 __kmp_assert_valid_gtid(global_tid);
1832 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1837 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1840#if OMPT_SUPPORT && OMPT_OPTIONAL
1841 kmp_info_t *this_thr = __kmp_threads[global_tid];
1842 kmp_team_t *team = this_thr->th.th_team;
1843 int tid = __kmp_tid_from_gtid(global_tid);
1845 if (ompt_enabled.enabled) {
1847 if (ompt_enabled.ompt_callback_work) {
1848 ompt_callbacks.ompt_callback(ompt_callback_work)(
1849 ompt_work_single_executor, ompt_scope_begin,
1850 &(team->t.ompt_team_info.parallel_data),
1851 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1852 1, OMPT_GET_RETURN_ADDRESS(0));
1855 if (ompt_enabled.ompt_callback_work) {
1856 ompt_callbacks.ompt_callback(ompt_callback_work)(
1857 ompt_work_single_other, ompt_scope_begin,
1858 &(team->t.ompt_team_info.parallel_data),
1859 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1860 1, OMPT_GET_RETURN_ADDRESS(0));
1861 ompt_callbacks.ompt_callback(ompt_callback_work)(
1862 ompt_work_single_other, ompt_scope_end,
1863 &(team->t.ompt_team_info.parallel_data),
1864 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1865 1, OMPT_GET_RETURN_ADDRESS(0));
1884 __kmp_assert_valid_gtid(global_tid);
1885 __kmp_exit_single(global_tid);
1886 KMP_POP_PARTITIONED_TIMER();
1888#if OMPT_SUPPORT && OMPT_OPTIONAL
1889 kmp_info_t *this_thr = __kmp_threads[global_tid];
1890 kmp_team_t *team = this_thr->th.th_team;
1891 int tid = __kmp_tid_from_gtid(global_tid);
1893 if (ompt_enabled.ompt_callback_work) {
1894 ompt_callbacks.ompt_callback(ompt_callback_work)(
1895 ompt_work_single_executor, ompt_scope_end,
1896 &(team->t.ompt_team_info.parallel_data),
1897 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1898 OMPT_GET_RETURN_ADDRESS(0));
1911 KMP_POP_PARTITIONED_TIMER();
1912 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1914#if OMPT_SUPPORT && OMPT_OPTIONAL
1915 if (ompt_enabled.ompt_callback_work) {
1916 ompt_work_t ompt_work_type = ompt_work_loop;
1917 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1918 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1922 ompt_work_type = ompt_work_loop;
1924 ompt_work_type = ompt_work_sections;
1926 ompt_work_type = ompt_work_distribute;
1931 KMP_DEBUG_ASSERT(ompt_work_type);
1933 ompt_callbacks.ompt_callback(ompt_callback_work)(
1934 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1935 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1938 if (__kmp_env_consistency_check)
1939 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1945void ompc_set_num_threads(
int arg) {
1947 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1950void ompc_set_dynamic(
int flag) {
1954 thread = __kmp_entry_thread();
1956 __kmp_save_internal_controls(thread);
1958 set__dynamic(thread, flag ?
true : false);
1961void ompc_set_nested(
int flag) {
1965 thread = __kmp_entry_thread();
1967 __kmp_save_internal_controls(thread);
1969 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1972void ompc_set_max_active_levels(
int max_active_levels) {
1977 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1980void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1982 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1985int ompc_get_ancestor_thread_num(
int level) {
1986 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1989int ompc_get_team_size(
int level) {
1990 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1994void KMP_EXPAND_NAME(ompc_set_affinity_format)(
char const *format) {
1995 if (!__kmp_init_serial) {
1996 __kmp_serial_initialize();
1998 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1999 format, KMP_STRLEN(format) + 1);
2002size_t KMP_EXPAND_NAME(ompc_get_affinity_format)(
char *buffer,
size_t size) {
2004 if (!__kmp_init_serial) {
2005 __kmp_serial_initialize();
2007 format_size = KMP_STRLEN(__kmp_affinity_format);
2008 if (buffer && size) {
2009 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
2015void KMP_EXPAND_NAME(ompc_display_affinity)(
char const *format) {
2017 if (!TCR_4(__kmp_init_middle)) {
2018 __kmp_middle_initialize();
2020 __kmp_assign_root_init_mask();
2021 gtid = __kmp_get_gtid();
2022#if KMP_AFFINITY_SUPPORTED
2023 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2024 __kmp_affinity.flags.reset) {
2025 __kmp_reset_root_init_mask(gtid);
2028 __kmp_aux_display_affinity(gtid, format);
2031size_t KMP_EXPAND_NAME(ompc_capture_affinity)(
char *buffer,
size_t buf_size,
2032 char const *format) {
2034 size_t num_required;
2035 kmp_str_buf_t capture_buf;
2036 if (!TCR_4(__kmp_init_middle)) {
2037 __kmp_middle_initialize();
2039 __kmp_assign_root_init_mask();
2040 gtid = __kmp_get_gtid();
2041#if KMP_AFFINITY_SUPPORTED
2042 if (__kmp_threads[gtid]->th.th_team->t.t_level == 0 &&
2043 __kmp_affinity.flags.reset) {
2044 __kmp_reset_root_init_mask(gtid);
2047 __kmp_str_buf_init(&capture_buf);
2048 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
2049 if (buffer && buf_size) {
2050 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
2051 capture_buf.used + 1);
2053 __kmp_str_buf_free(&capture_buf);
2054 return num_required;
2057void kmpc_set_stacksize(
int arg) {
2059 __kmp_aux_set_stacksize(arg);
2062void kmpc_set_stacksize_s(
size_t arg) {
2064 __kmp_aux_set_stacksize(arg);
2067void kmpc_set_blocktime(
int arg) {
2071 gtid = __kmp_entry_gtid();
2072 tid = __kmp_tid_from_gtid(gtid);
2073 thread = __kmp_thread_from_gtid(gtid);
2075 __kmp_aux_set_blocktime(arg, thread, tid);
2078void kmpc_set_library(
int arg) {
2080 __kmp_user_set_library((
enum library_type)arg);
2083void kmpc_set_defaults(
char const *str) {
2085 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
2088void kmpc_set_disp_num_buffers(
int arg) {
2091 if (__kmp_init_serial == FALSE && arg >= KMP_MIN_DISP_NUM_BUFF &&
2092 arg <= KMP_MAX_DISP_NUM_BUFF) {
2093 __kmp_dispatch_num_buffers = arg;
2097int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
2098#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2101 if (!TCR_4(__kmp_init_middle)) {
2102 __kmp_middle_initialize();
2104 __kmp_assign_root_init_mask();
2105 return __kmp_aux_set_affinity_mask_proc(proc, mask);
2109int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
2110#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2113 if (!TCR_4(__kmp_init_middle)) {
2114 __kmp_middle_initialize();
2116 __kmp_assign_root_init_mask();
2117 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2121int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
2122#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2125 if (!TCR_4(__kmp_init_middle)) {
2126 __kmp_middle_initialize();
2128 __kmp_assign_root_init_mask();
2129 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2179 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2182 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2183 __kmp_assert_valid_gtid(gtid);
2187 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2189 if (__kmp_env_consistency_check) {
2191 KMP_WARNING(ConstructIdentInvalid);
2198 *data_ptr = cpy_data;
2201 ompt_frame_t *ompt_frame;
2202 if (ompt_enabled.enabled) {
2203 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2204 if (ompt_frame->enter_frame.ptr == NULL)
2205 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2207 OMPT_STORE_RETURN_ADDRESS(gtid);
2211 __kmp_threads[gtid]->th.th_ident = loc;
2213 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2216 (*cpy_func)(cpy_data, *data_ptr);
2222 OMPT_STORE_RETURN_ADDRESS(gtid);
2225 __kmp_threads[gtid]->th.th_ident = loc;
2228 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2229#if OMPT_SUPPORT && OMPT_OPTIONAL
2230 if (ompt_enabled.enabled) {
2231 ompt_frame->enter_frame = ompt_data_none;
2257 KC_TRACE(10, (
"__kmpc_copyprivate_light: called T#%d\n", gtid));
2261 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2263 if (__kmp_env_consistency_check) {
2265 KMP_WARNING(ConstructIdentInvalid);
2272 *data_ptr = cpy_data;
2275 ompt_frame_t *ompt_frame;
2276 if (ompt_enabled.enabled) {
2277 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2278 if (ompt_frame->enter_frame.ptr == NULL)
2279 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2280 OMPT_STORE_RETURN_ADDRESS(gtid);
2285 __kmp_threads[gtid]->th.th_ident = loc;
2287 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2294#define INIT_LOCK __kmp_init_user_lock_with_checks
2295#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2296#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2297#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2298#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2299#define ACQUIRE_NESTED_LOCK_TIMED \
2300 __kmp_acquire_nested_user_lock_with_checks_timed
2301#define RELEASE_LOCK __kmp_release_user_lock_with_checks
2302#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2303#define TEST_LOCK __kmp_test_user_lock_with_checks
2304#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2305#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2306#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2311#if KMP_USE_DYNAMIC_LOCK
2314static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2315 kmp_dyna_lockseq_t seq) {
2316 if (KMP_IS_D_LOCK(seq)) {
2317 KMP_INIT_D_LOCK(lock, seq);
2319 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2322 KMP_INIT_I_LOCK(lock, seq);
2324 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2325 __kmp_itt_lock_creating(ilk->lock, loc);
2331static __forceinline
void
2332__kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2333 kmp_dyna_lockseq_t seq) {
2336 if (seq == lockseq_hle || seq == lockseq_rtm_queuing ||
2337 seq == lockseq_rtm_spin || seq == lockseq_adaptive)
2338 seq = __kmp_user_lock_seq;
2342 seq = lockseq_nested_tas;
2346 seq = lockseq_nested_futex;
2349 case lockseq_ticket:
2350 seq = lockseq_nested_ticket;
2352 case lockseq_queuing:
2353 seq = lockseq_nested_queuing;
2356 seq = lockseq_nested_drdpa;
2359 seq = lockseq_nested_queuing;
2361 KMP_INIT_I_LOCK(lock, seq);
2363 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2364 __kmp_itt_lock_creating(ilk->lock, loc);
2369void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2371 KMP_DEBUG_ASSERT(__kmp_init_serial);
2372 if (__kmp_env_consistency_check && user_lock == NULL) {
2373 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2376 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2378#if OMPT_SUPPORT && OMPT_OPTIONAL
2380 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2382 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2383 if (ompt_enabled.ompt_callback_lock_init) {
2384 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2385 ompt_mutex_lock, (omp_lock_hint_t)hint,
2386 __ompt_get_mutex_impl_type(user_lock),
2387 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2393void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2394 void **user_lock, uintptr_t hint) {
2395 KMP_DEBUG_ASSERT(__kmp_init_serial);
2396 if (__kmp_env_consistency_check && user_lock == NULL) {
2397 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2400 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2402#if OMPT_SUPPORT && OMPT_OPTIONAL
2404 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2406 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2407 if (ompt_enabled.ompt_callback_lock_init) {
2408 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2409 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2410 __ompt_get_mutex_impl_type(user_lock),
2411 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2419void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2420#if KMP_USE_DYNAMIC_LOCK
2422 KMP_DEBUG_ASSERT(__kmp_init_serial);
2423 if (__kmp_env_consistency_check && user_lock == NULL) {
2424 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2426 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2428#if OMPT_SUPPORT && OMPT_OPTIONAL
2430 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2432 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2433 if (ompt_enabled.ompt_callback_lock_init) {
2434 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2435 ompt_mutex_lock, omp_lock_hint_none,
2436 __ompt_get_mutex_impl_type(user_lock),
2437 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2443 static char const *
const func =
"omp_init_lock";
2444 kmp_user_lock_p lck;
2445 KMP_DEBUG_ASSERT(__kmp_init_serial);
2447 if (__kmp_env_consistency_check) {
2448 if (user_lock == NULL) {
2449 KMP_FATAL(LockIsUninitialized, func);
2453 KMP_CHECK_USER_LOCK_INIT();
2455 if ((__kmp_user_lock_kind == lk_tas) &&
2456 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2457 lck = (kmp_user_lock_p)user_lock;
2460 else if ((__kmp_user_lock_kind == lk_futex) &&
2461 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2462 lck = (kmp_user_lock_p)user_lock;
2466 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2469 __kmp_set_user_lock_location(lck, loc);
2471#if OMPT_SUPPORT && OMPT_OPTIONAL
2473 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2475 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2476 if (ompt_enabled.ompt_callback_lock_init) {
2477 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2478 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2479 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2484 __kmp_itt_lock_creating(lck);
2491void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2492#if KMP_USE_DYNAMIC_LOCK
2494 KMP_DEBUG_ASSERT(__kmp_init_serial);
2495 if (__kmp_env_consistency_check && user_lock == NULL) {
2496 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2498 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2500#if OMPT_SUPPORT && OMPT_OPTIONAL
2502 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2504 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2505 if (ompt_enabled.ompt_callback_lock_init) {
2506 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2507 ompt_mutex_nest_lock, omp_lock_hint_none,
2508 __ompt_get_mutex_impl_type(user_lock),
2509 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2515 static char const *
const func =
"omp_init_nest_lock";
2516 kmp_user_lock_p lck;
2517 KMP_DEBUG_ASSERT(__kmp_init_serial);
2519 if (__kmp_env_consistency_check) {
2520 if (user_lock == NULL) {
2521 KMP_FATAL(LockIsUninitialized, func);
2525 KMP_CHECK_USER_LOCK_INIT();
2527 if ((__kmp_user_lock_kind == lk_tas) &&
2528 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2529 OMP_NEST_LOCK_T_SIZE)) {
2530 lck = (kmp_user_lock_p)user_lock;
2533 else if ((__kmp_user_lock_kind == lk_futex) &&
2534 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2535 OMP_NEST_LOCK_T_SIZE)) {
2536 lck = (kmp_user_lock_p)user_lock;
2540 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2543 INIT_NESTED_LOCK(lck);
2544 __kmp_set_user_lock_location(lck, loc);
2546#if OMPT_SUPPORT && OMPT_OPTIONAL
2548 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2550 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2551 if (ompt_enabled.ompt_callback_lock_init) {
2552 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2553 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2554 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2559 __kmp_itt_lock_creating(lck);
2565void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2566#if KMP_USE_DYNAMIC_LOCK
2569 kmp_user_lock_p lck;
2570 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2571 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2573 lck = (kmp_user_lock_p)user_lock;
2575 __kmp_itt_lock_destroyed(lck);
2577#if OMPT_SUPPORT && OMPT_OPTIONAL
2579 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2581 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2582 if (ompt_enabled.ompt_callback_lock_destroy) {
2583 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2584 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2587 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2589 kmp_user_lock_p lck;
2591 if ((__kmp_user_lock_kind == lk_tas) &&
2592 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2593 lck = (kmp_user_lock_p)user_lock;
2596 else if ((__kmp_user_lock_kind == lk_futex) &&
2597 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2598 lck = (kmp_user_lock_p)user_lock;
2602 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2605#if OMPT_SUPPORT && OMPT_OPTIONAL
2607 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2609 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2610 if (ompt_enabled.ompt_callback_lock_destroy) {
2611 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2612 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2617 __kmp_itt_lock_destroyed(lck);
2621 if ((__kmp_user_lock_kind == lk_tas) &&
2622 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2626 else if ((__kmp_user_lock_kind == lk_futex) &&
2627 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2632 __kmp_user_lock_free(user_lock, gtid, lck);
2638void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2639#if KMP_USE_DYNAMIC_LOCK
2642 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2643 __kmp_itt_lock_destroyed(ilk->lock);
2645#if OMPT_SUPPORT && OMPT_OPTIONAL
2647 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2649 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2650 if (ompt_enabled.ompt_callback_lock_destroy) {
2651 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2652 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2655 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2659 kmp_user_lock_p lck;
2661 if ((__kmp_user_lock_kind == lk_tas) &&
2662 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2663 OMP_NEST_LOCK_T_SIZE)) {
2664 lck = (kmp_user_lock_p)user_lock;
2667 else if ((__kmp_user_lock_kind == lk_futex) &&
2668 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2669 OMP_NEST_LOCK_T_SIZE)) {
2670 lck = (kmp_user_lock_p)user_lock;
2674 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2677#if OMPT_SUPPORT && OMPT_OPTIONAL
2679 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2681 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2682 if (ompt_enabled.ompt_callback_lock_destroy) {
2683 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2684 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2689 __kmp_itt_lock_destroyed(lck);
2692 DESTROY_NESTED_LOCK(lck);
2694 if ((__kmp_user_lock_kind == lk_tas) &&
2695 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2696 OMP_NEST_LOCK_T_SIZE)) {
2700 else if ((__kmp_user_lock_kind == lk_futex) &&
2701 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2702 OMP_NEST_LOCK_T_SIZE)) {
2707 __kmp_user_lock_free(user_lock, gtid, lck);
2712void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2714#if KMP_USE_DYNAMIC_LOCK
2715 int tag = KMP_EXTRACT_D_TAG(user_lock);
2717 __kmp_itt_lock_acquiring(
2721#if OMPT_SUPPORT && OMPT_OPTIONAL
2723 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2725 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2726 if (ompt_enabled.ompt_callback_mutex_acquire) {
2727 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2728 ompt_mutex_lock, omp_lock_hint_none,
2729 __ompt_get_mutex_impl_type(user_lock),
2730 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2733#if KMP_USE_INLINED_TAS
2734 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2735 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2737#elif KMP_USE_INLINED_FUTEX
2738 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2739 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2743 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2746 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2748#if OMPT_SUPPORT && OMPT_OPTIONAL
2749 if (ompt_enabled.ompt_callback_mutex_acquired) {
2750 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2751 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2757 kmp_user_lock_p lck;
2759 if ((__kmp_user_lock_kind == lk_tas) &&
2760 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2761 lck = (kmp_user_lock_p)user_lock;
2764 else if ((__kmp_user_lock_kind == lk_futex) &&
2765 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2766 lck = (kmp_user_lock_p)user_lock;
2770 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2774 __kmp_itt_lock_acquiring(lck);
2776#if OMPT_SUPPORT && OMPT_OPTIONAL
2778 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2780 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2781 if (ompt_enabled.ompt_callback_mutex_acquire) {
2782 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2783 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2784 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2788 ACQUIRE_LOCK(lck, gtid);
2791 __kmp_itt_lock_acquired(lck);
2794#if OMPT_SUPPORT && OMPT_OPTIONAL
2795 if (ompt_enabled.ompt_callback_mutex_acquired) {
2796 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2797 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2804void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2805#if KMP_USE_DYNAMIC_LOCK
2808 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2810#if OMPT_SUPPORT && OMPT_OPTIONAL
2812 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2814 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2815 if (ompt_enabled.enabled) {
2816 if (ompt_enabled.ompt_callback_mutex_acquire) {
2817 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2818 ompt_mutex_nest_lock, omp_lock_hint_none,
2819 __ompt_get_mutex_impl_type(user_lock),
2820 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2824 int acquire_status =
2825 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2826 (void)acquire_status;
2828 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2831#if OMPT_SUPPORT && OMPT_OPTIONAL
2832 if (ompt_enabled.enabled) {
2833 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2834 if (ompt_enabled.ompt_callback_mutex_acquired) {
2836 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2837 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2841 if (ompt_enabled.ompt_callback_nest_lock) {
2843 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2844 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2852 kmp_user_lock_p lck;
2854 if ((__kmp_user_lock_kind == lk_tas) &&
2855 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2856 OMP_NEST_LOCK_T_SIZE)) {
2857 lck = (kmp_user_lock_p)user_lock;
2860 else if ((__kmp_user_lock_kind == lk_futex) &&
2861 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2862 OMP_NEST_LOCK_T_SIZE)) {
2863 lck = (kmp_user_lock_p)user_lock;
2867 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2871 __kmp_itt_lock_acquiring(lck);
2873#if OMPT_SUPPORT && OMPT_OPTIONAL
2875 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2877 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2878 if (ompt_enabled.enabled) {
2879 if (ompt_enabled.ompt_callback_mutex_acquire) {
2880 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2881 ompt_mutex_nest_lock, omp_lock_hint_none,
2882 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2888 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2891 __kmp_itt_lock_acquired(lck);
2894#if OMPT_SUPPORT && OMPT_OPTIONAL
2895 if (ompt_enabled.enabled) {
2896 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2897 if (ompt_enabled.ompt_callback_mutex_acquired) {
2899 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2900 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2903 if (ompt_enabled.ompt_callback_nest_lock) {
2905 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2906 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2915void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2916#if KMP_USE_DYNAMIC_LOCK
2918 int tag = KMP_EXTRACT_D_TAG(user_lock);
2920 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2922#if KMP_USE_INLINED_TAS
2923 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2924 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2926#elif KMP_USE_INLINED_FUTEX
2927 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2928 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2932 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2935#if OMPT_SUPPORT && OMPT_OPTIONAL
2937 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2939 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2940 if (ompt_enabled.ompt_callback_mutex_released) {
2941 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2942 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2948 kmp_user_lock_p lck;
2953 if ((__kmp_user_lock_kind == lk_tas) &&
2954 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2955#if KMP_OS_LINUX && \
2956 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2959 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2961 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2964#if OMPT_SUPPORT && OMPT_OPTIONAL
2966 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2968 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2969 if (ompt_enabled.ompt_callback_mutex_released) {
2970 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2971 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2977 lck = (kmp_user_lock_p)user_lock;
2981 else if ((__kmp_user_lock_kind == lk_futex) &&
2982 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2983 lck = (kmp_user_lock_p)user_lock;
2987 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2991 __kmp_itt_lock_releasing(lck);
2994 RELEASE_LOCK(lck, gtid);
2996#if OMPT_SUPPORT && OMPT_OPTIONAL
2998 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3000 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3001 if (ompt_enabled.ompt_callback_mutex_released) {
3002 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3003 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3011void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3012#if KMP_USE_DYNAMIC_LOCK
3015 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3017 int release_status =
3018 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
3019 (void)release_status;
3021#if OMPT_SUPPORT && OMPT_OPTIONAL
3023 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3025 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3026 if (ompt_enabled.enabled) {
3027 if (release_status == KMP_LOCK_RELEASED) {
3028 if (ompt_enabled.ompt_callback_mutex_released) {
3030 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3031 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3034 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3036 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3037 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3044 kmp_user_lock_p lck;
3048 if ((__kmp_user_lock_kind == lk_tas) &&
3049 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3050 OMP_NEST_LOCK_T_SIZE)) {
3051#if KMP_OS_LINUX && \
3052 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
3054 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
3056 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
3059#if OMPT_SUPPORT && OMPT_OPTIONAL
3060 int release_status = KMP_LOCK_STILL_HELD;
3063 if (--(tl->lk.depth_locked) == 0) {
3064 TCW_4(tl->lk.poll, 0);
3065#if OMPT_SUPPORT && OMPT_OPTIONAL
3066 release_status = KMP_LOCK_RELEASED;
3071#if OMPT_SUPPORT && OMPT_OPTIONAL
3073 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3075 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3076 if (ompt_enabled.enabled) {
3077 if (release_status == KMP_LOCK_RELEASED) {
3078 if (ompt_enabled.ompt_callback_mutex_released) {
3080 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3081 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3083 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3085 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3086 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3093 lck = (kmp_user_lock_p)user_lock;
3097 else if ((__kmp_user_lock_kind == lk_futex) &&
3098 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3099 OMP_NEST_LOCK_T_SIZE)) {
3100 lck = (kmp_user_lock_p)user_lock;
3104 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
3108 __kmp_itt_lock_releasing(lck);
3112 release_status = RELEASE_NESTED_LOCK(lck, gtid);
3113#if OMPT_SUPPORT && OMPT_OPTIONAL
3115 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3117 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3118 if (ompt_enabled.enabled) {
3119 if (release_status == KMP_LOCK_RELEASED) {
3120 if (ompt_enabled.ompt_callback_mutex_released) {
3122 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
3123 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3125 }
else if (ompt_enabled.ompt_callback_nest_lock) {
3127 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3128 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3137int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3140#if KMP_USE_DYNAMIC_LOCK
3142 int tag = KMP_EXTRACT_D_TAG(user_lock);
3144 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3146#if OMPT_SUPPORT && OMPT_OPTIONAL
3148 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3150 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3151 if (ompt_enabled.ompt_callback_mutex_acquire) {
3152 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3153 ompt_mutex_test_lock, omp_lock_hint_none,
3154 __ompt_get_mutex_impl_type(user_lock),
3155 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3158#if KMP_USE_INLINED_TAS
3159 if (tag == locktag_tas && !__kmp_env_consistency_check) {
3160 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
3162#elif KMP_USE_INLINED_FUTEX
3163 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3164 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3168 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3172 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3174#if OMPT_SUPPORT && OMPT_OPTIONAL
3175 if (ompt_enabled.ompt_callback_mutex_acquired) {
3176 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3177 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3183 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3190 kmp_user_lock_p lck;
3193 if ((__kmp_user_lock_kind == lk_tas) &&
3194 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3195 lck = (kmp_user_lock_p)user_lock;
3198 else if ((__kmp_user_lock_kind == lk_futex) &&
3199 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3200 lck = (kmp_user_lock_p)user_lock;
3204 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3208 __kmp_itt_lock_acquiring(lck);
3210#if OMPT_SUPPORT && OMPT_OPTIONAL
3212 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3214 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3215 if (ompt_enabled.ompt_callback_mutex_acquire) {
3216 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3217 ompt_mutex_test_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3218 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3222 rc = TEST_LOCK(lck, gtid);
3225 __kmp_itt_lock_acquired(lck);
3227 __kmp_itt_lock_cancelled(lck);
3230#if OMPT_SUPPORT && OMPT_OPTIONAL
3231 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3232 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3233 ompt_mutex_test_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3237 return (rc ? FTN_TRUE : FTN_FALSE);
3245int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3246#if KMP_USE_DYNAMIC_LOCK
3249 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3251#if OMPT_SUPPORT && OMPT_OPTIONAL
3253 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3255 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3256 if (ompt_enabled.ompt_callback_mutex_acquire) {
3257 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3258 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3259 __ompt_get_mutex_impl_type(user_lock),
3260 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3263 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3266 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3268 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3271#if OMPT_SUPPORT && OMPT_OPTIONAL
3272 if (ompt_enabled.enabled && rc) {
3274 if (ompt_enabled.ompt_callback_mutex_acquired) {
3276 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3277 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3281 if (ompt_enabled.ompt_callback_nest_lock) {
3283 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3284 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3293 kmp_user_lock_p lck;
3296 if ((__kmp_user_lock_kind == lk_tas) &&
3297 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3298 OMP_NEST_LOCK_T_SIZE)) {
3299 lck = (kmp_user_lock_p)user_lock;
3302 else if ((__kmp_user_lock_kind == lk_futex) &&
3303 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3304 OMP_NEST_LOCK_T_SIZE)) {
3305 lck = (kmp_user_lock_p)user_lock;
3309 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3313 __kmp_itt_lock_acquiring(lck);
3316#if OMPT_SUPPORT && OMPT_OPTIONAL
3318 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3320 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3321 if (ompt_enabled.enabled) &&
3322 ompt_enabled.ompt_callback_mutex_acquire) {
3323 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3324 ompt_mutex_test_nest_lock, omp_lock_hint_none,
3325 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3330 rc = TEST_NESTED_LOCK(lck, gtid);
3333 __kmp_itt_lock_acquired(lck);
3335 __kmp_itt_lock_cancelled(lck);
3338#if OMPT_SUPPORT && OMPT_OPTIONAL
3339 if (ompt_enabled.enabled && rc) {
3341 if (ompt_enabled.ompt_callback_mutex_acquired) {
3343 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3344 ompt_mutex_test_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3347 if (ompt_enabled.ompt_callback_nest_lock) {
3349 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3350 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3369#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3370 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3372#define __KMP_GET_REDUCTION_METHOD(gtid) \
3373 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3379static __forceinline
void
3380__kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3381 kmp_critical_name *crit) {
3387 kmp_user_lock_p lck;
3389#if KMP_USE_DYNAMIC_LOCK
3391 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3394 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3395 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3396 KMP_GET_D_TAG(__kmp_user_lock_seq));
3398 __kmp_init_indirect_csptr(crit, loc, global_tid,
3399 KMP_GET_I_TAG(__kmp_user_lock_seq));
3405 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3406 lck = (kmp_user_lock_p)lk;
3407 KMP_DEBUG_ASSERT(lck != NULL);
3408 if (__kmp_env_consistency_check) {
3409 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3411 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3413 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3415 KMP_DEBUG_ASSERT(lck != NULL);
3416 if (__kmp_env_consistency_check) {
3417 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3419 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3427 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3428 lck = (kmp_user_lock_p)crit;
3430 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3432 KMP_DEBUG_ASSERT(lck != NULL);
3434 if (__kmp_env_consistency_check)
3435 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3437 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3443static __forceinline
void
3444__kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3445 kmp_critical_name *crit) {
3447 kmp_user_lock_p lck;
3449#if KMP_USE_DYNAMIC_LOCK
3451 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3452 lck = (kmp_user_lock_p)crit;
3453 if (__kmp_env_consistency_check)
3454 __kmp_pop_sync(global_tid, ct_critical, loc);
3455 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3457 kmp_indirect_lock_t *ilk =
3458 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3459 if (__kmp_env_consistency_check)
3460 __kmp_pop_sync(global_tid, ct_critical, loc);
3461 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3469 if (__kmp_base_user_lock_size > 32) {
3470 lck = *((kmp_user_lock_p *)crit);
3471 KMP_ASSERT(lck != NULL);
3473 lck = (kmp_user_lock_p)crit;
3476 if (__kmp_env_consistency_check)
3477 __kmp_pop_sync(global_tid, ct_critical, loc);
3479 __kmp_release_user_lock_with_checks(lck, global_tid);
3484static __forceinline
int
3485__kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3490 if (th->th.th_teams_microtask) {
3491 *team_p = team = th->th.th_team;
3492 if (team->t.t_level == th->th.th_teams_level) {
3494 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3496 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3497 th->th.th_team = team->t.t_parent;
3498 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3499 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3500 *task_state = th->th.th_task_state;
3501 th->th.th_task_state = 0;
3509static __forceinline
void
3510__kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3512 th->th.th_info.ds.ds_tid = 0;
3513 th->th.th_team = team;
3514 th->th.th_team_nproc = team->t.t_nproc;
3515 th->th.th_task_team = team->t.t_task_team[task_state];
3516 __kmp_type_convert(task_state, &(th->th.th_task_state));
3537 size_t reduce_size,
void *reduce_data,
3538 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3539 kmp_critical_name *lck) {
3543 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3546 int teams_swapped = 0, task_state;
3547 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3548 __kmp_assert_valid_gtid(global_tid);
3556 if (!TCR_4(__kmp_init_parallel))
3557 __kmp_parallel_initialize();
3559 __kmp_resume_if_soft_paused();
3562#if KMP_USE_DYNAMIC_LOCK
3563 if (__kmp_env_consistency_check)
3564 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3566 if (__kmp_env_consistency_check)
3567 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3570 th = __kmp_thread_from_gtid(global_tid);
3571 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3589 packed_reduction_method = __kmp_determine_reduction_method(
3590 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3591 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3593 OMPT_REDUCTION_DECL(th, global_tid);
3594 if (packed_reduction_method == critical_reduce_block) {
3596 OMPT_REDUCTION_BEGIN;
3598 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3601 }
else if (packed_reduction_method == empty_reduce_block) {
3603 OMPT_REDUCTION_BEGIN;
3609 }
else if (packed_reduction_method == atomic_reduce_block) {
3619 if (__kmp_env_consistency_check)
3620 __kmp_pop_sync(global_tid, ct_reduce, loc);
3622 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3623 tree_reduce_block)) {
3643 ompt_frame_t *ompt_frame;
3644 if (ompt_enabled.enabled) {
3645 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3646 if (ompt_frame->enter_frame.ptr == NULL)
3647 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3649 OMPT_STORE_RETURN_ADDRESS(global_tid);
3652 __kmp_threads[global_tid]->th.th_ident = loc;
3655 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3656 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3657 retval = (retval != 0) ? (0) : (1);
3658#if OMPT_SUPPORT && OMPT_OPTIONAL
3659 if (ompt_enabled.enabled) {
3660 ompt_frame->enter_frame = ompt_data_none;
3666 if (__kmp_env_consistency_check) {
3668 __kmp_pop_sync(global_tid, ct_reduce, loc);
3677 if (teams_swapped) {
3678 __kmp_restore_swapped_teams(th, team, task_state);
3682 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3683 global_tid, packed_reduction_method, retval));
3697 kmp_critical_name *lck) {
3699 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3701 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3702 __kmp_assert_valid_gtid(global_tid);
3704 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3706 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3708 if (packed_reduction_method == critical_reduce_block) {
3710 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3713 }
else if (packed_reduction_method == empty_reduce_block) {
3720 }
else if (packed_reduction_method == atomic_reduce_block) {
3727 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3728 tree_reduce_block)) {
3739 if (__kmp_env_consistency_check)
3740 __kmp_pop_sync(global_tid, ct_reduce, loc);
3742 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3743 global_tid, packed_reduction_method));
3766 size_t reduce_size,
void *reduce_data,
3767 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3768 kmp_critical_name *lck) {
3771 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3774 int teams_swapped = 0, task_state;
3776 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3777 __kmp_assert_valid_gtid(global_tid);
3785 if (!TCR_4(__kmp_init_parallel))
3786 __kmp_parallel_initialize();
3788 __kmp_resume_if_soft_paused();
3791#if KMP_USE_DYNAMIC_LOCK
3792 if (__kmp_env_consistency_check)
3793 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3795 if (__kmp_env_consistency_check)
3796 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3799 th = __kmp_thread_from_gtid(global_tid);
3800 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3802 packed_reduction_method = __kmp_determine_reduction_method(
3803 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3804 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3806 OMPT_REDUCTION_DECL(th, global_tid);
3808 if (packed_reduction_method == critical_reduce_block) {
3810 OMPT_REDUCTION_BEGIN;
3811 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3814 }
else if (packed_reduction_method == empty_reduce_block) {
3816 OMPT_REDUCTION_BEGIN;
3821 }
else if (packed_reduction_method == atomic_reduce_block) {
3825 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3826 tree_reduce_block)) {
3832 ompt_frame_t *ompt_frame;
3833 if (ompt_enabled.enabled) {
3834 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3835 if (ompt_frame->enter_frame.ptr == NULL)
3836 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3838 OMPT_STORE_RETURN_ADDRESS(global_tid);
3841 __kmp_threads[global_tid]->th.th_ident =
3845 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3846 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3847 retval = (retval != 0) ? (0) : (1);
3848#if OMPT_SUPPORT && OMPT_OPTIONAL
3849 if (ompt_enabled.enabled) {
3850 ompt_frame->enter_frame = ompt_data_none;
3856 if (__kmp_env_consistency_check) {
3858 __kmp_pop_sync(global_tid, ct_reduce, loc);
3867 if (teams_swapped) {
3868 __kmp_restore_swapped_teams(th, team, task_state);
3872 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3873 global_tid, packed_reduction_method, retval));
3888 kmp_critical_name *lck) {
3890 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3893 int teams_swapped = 0, task_state;
3895 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3896 __kmp_assert_valid_gtid(global_tid);
3898 th = __kmp_thread_from_gtid(global_tid);
3899 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3901 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3905 OMPT_REDUCTION_DECL(th, global_tid);
3907 if (packed_reduction_method == critical_reduce_block) {
3908 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3914 ompt_frame_t *ompt_frame;
3915 if (ompt_enabled.enabled) {
3916 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3917 if (ompt_frame->enter_frame.ptr == NULL)
3918 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3920 OMPT_STORE_RETURN_ADDRESS(global_tid);
3923 __kmp_threads[global_tid]->th.th_ident = loc;
3925 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3926#if OMPT_SUPPORT && OMPT_OPTIONAL
3927 if (ompt_enabled.enabled) {
3928 ompt_frame->enter_frame = ompt_data_none;
3932 }
else if (packed_reduction_method == empty_reduce_block) {
3940 ompt_frame_t *ompt_frame;
3941 if (ompt_enabled.enabled) {
3942 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3943 if (ompt_frame->enter_frame.ptr == NULL)
3944 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3946 OMPT_STORE_RETURN_ADDRESS(global_tid);
3949 __kmp_threads[global_tid]->th.th_ident = loc;
3951 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3952#if OMPT_SUPPORT && OMPT_OPTIONAL
3953 if (ompt_enabled.enabled) {
3954 ompt_frame->enter_frame = ompt_data_none;
3958 }
else if (packed_reduction_method == atomic_reduce_block) {
3961 ompt_frame_t *ompt_frame;
3962 if (ompt_enabled.enabled) {
3963 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3964 if (ompt_frame->enter_frame.ptr == NULL)
3965 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3967 OMPT_STORE_RETURN_ADDRESS(global_tid);
3971 __kmp_threads[global_tid]->th.th_ident = loc;
3973 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3974#if OMPT_SUPPORT && OMPT_OPTIONAL
3975 if (ompt_enabled.enabled) {
3976 ompt_frame->enter_frame = ompt_data_none;
3980 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3981 tree_reduce_block)) {
3984 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3992 if (teams_swapped) {
3993 __kmp_restore_swapped_teams(th, team, task_state);
3996 if (__kmp_env_consistency_check)
3997 __kmp_pop_sync(global_tid, ct_reduce, loc);
3999 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
4000 global_tid, packed_reduction_method));
4005#undef __KMP_GET_REDUCTION_METHOD
4006#undef __KMP_SET_REDUCTION_METHOD
4010kmp_uint64 __kmpc_get_taskid() {
4015 gtid = __kmp_get_gtid();
4019 thread = __kmp_thread_from_gtid(gtid);
4020 return thread->th.th_current_task->td_task_id;
4024kmp_uint64 __kmpc_get_parent_taskid() {
4028 kmp_taskdata_t *parent_task;
4030 gtid = __kmp_get_gtid();
4034 thread = __kmp_thread_from_gtid(gtid);
4035 parent_task = thread->th.th_current_task->td_parent;
4036 return (parent_task == NULL ? 0 : parent_task->td_task_id);
4052 const struct kmp_dim *dims) {
4053 __kmp_assert_valid_gtid(gtid);
4055 kmp_int64 last, trace_count;
4056 kmp_info_t *th = __kmp_threads[gtid];
4057 kmp_team_t *team = th->th.th_team;
4059 kmp_disp_t *pr_buf = th->th.th_dispatch;
4060 dispatch_shared_info_t *sh_buf;
4064 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
4065 gtid, num_dims, !team->t.t_serialized));
4066 KMP_DEBUG_ASSERT(dims != NULL);
4067 KMP_DEBUG_ASSERT(num_dims > 0);
4069 if (team->t.t_serialized) {
4070 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
4073 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
4074 idx = pr_buf->th_doacross_buf_idx++;
4076 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4079 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
4080 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
4081 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
4082 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4083 pr_buf->th_doacross_info[0] =
4084 (kmp_int64)num_dims;
4087 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
4088 pr_buf->th_doacross_info[2] = dims[0].lo;
4089 pr_buf->th_doacross_info[3] = dims[0].up;
4090 pr_buf->th_doacross_info[4] = dims[0].st;
4092 for (j = 1; j < num_dims; ++j) {
4095 if (dims[j].st == 1) {
4097 range_length = dims[j].up - dims[j].lo + 1;
4099 if (dims[j].st > 0) {
4100 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
4101 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
4103 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
4105 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
4108 pr_buf->th_doacross_info[last++] = range_length;
4109 pr_buf->th_doacross_info[last++] = dims[j].lo;
4110 pr_buf->th_doacross_info[last++] = dims[j].up;
4111 pr_buf->th_doacross_info[last++] = dims[j].st;
4116 if (dims[0].st == 1) {
4117 trace_count = dims[0].up - dims[0].lo + 1;
4118 }
else if (dims[0].st > 0) {
4119 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
4120 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
4122 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
4123 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
4125 for (j = 1; j < num_dims; ++j) {
4126 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
4128 KMP_DEBUG_ASSERT(trace_count > 0);
4132 if (idx != sh_buf->doacross_buf_idx) {
4134 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
4141 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
4142 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
4144 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
4145 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
4147 if (flags == NULL) {
4150 (size_t)trace_count / 8 + 8;
4151 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
4153 sh_buf->doacross_flags = flags;
4154 }
else if (flags == (kmp_uint32 *)1) {
4157 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
4159 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
4166 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
4167 pr_buf->th_doacross_flags =
4168 sh_buf->doacross_flags;
4170 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
4173void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4174 __kmp_assert_valid_gtid(gtid);
4178 kmp_int64 iter_number;
4179 kmp_info_t *th = __kmp_threads[gtid];
4180 kmp_team_t *team = th->th.th_team;
4182 kmp_int64 lo, up, st;
4184 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4185 if (team->t.t_serialized) {
4186 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4191 pr_buf = th->th.th_dispatch;
4192 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4193 num_dims = (size_t)pr_buf->th_doacross_info[0];
4194 lo = pr_buf->th_doacross_info[2];
4195 up = pr_buf->th_doacross_info[3];
4196 st = pr_buf->th_doacross_info[4];
4197#if OMPT_SUPPORT && OMPT_OPTIONAL
4198 ompt_dependence_t deps[num_dims];
4201 if (vec[0] < lo || vec[0] > up) {
4202 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4203 "bounds [%lld,%lld]\n",
4204 gtid, vec[0], lo, up));
4207 iter_number = vec[0] - lo;
4208 }
else if (st > 0) {
4209 if (vec[0] < lo || vec[0] > up) {
4210 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4211 "bounds [%lld,%lld]\n",
4212 gtid, vec[0], lo, up));
4215 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4217 if (vec[0] > lo || vec[0] < up) {
4218 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4219 "bounds [%lld,%lld]\n",
4220 gtid, vec[0], lo, up));
4223 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4225#if OMPT_SUPPORT && OMPT_OPTIONAL
4226 deps[0].variable.value = iter_number;
4227 deps[0].dependence_type = ompt_dependence_type_sink;
4229 for (i = 1; i < num_dims; ++i) {
4232 ln = pr_buf->th_doacross_info[j + 1];
4233 lo = pr_buf->th_doacross_info[j + 2];
4234 up = pr_buf->th_doacross_info[j + 3];
4235 st = pr_buf->th_doacross_info[j + 4];
4237 if (vec[i] < lo || vec[i] > up) {
4238 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4239 "bounds [%lld,%lld]\n",
4240 gtid, vec[i], lo, up));
4244 }
else if (st > 0) {
4245 if (vec[i] < lo || vec[i] > up) {
4246 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4247 "bounds [%lld,%lld]\n",
4248 gtid, vec[i], lo, up));
4251 iter = (kmp_uint64)(vec[i] - lo) / st;
4253 if (vec[i] > lo || vec[i] < up) {
4254 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4255 "bounds [%lld,%lld]\n",
4256 gtid, vec[i], lo, up));
4259 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4261 iter_number = iter + ln * iter_number;
4262#if OMPT_SUPPORT && OMPT_OPTIONAL
4263 deps[i].variable.value = iter;
4264 deps[i].dependence_type = ompt_dependence_type_sink;
4267 shft = iter_number % 32;
4270 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4274#if OMPT_SUPPORT && OMPT_OPTIONAL
4275 if (ompt_enabled.ompt_callback_dependences) {
4276 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4277 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4281 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4282 gtid, (iter_number << 5) + shft));
4285void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4286 __kmp_assert_valid_gtid(gtid);
4290 kmp_int64 iter_number;
4291 kmp_info_t *th = __kmp_threads[gtid];
4292 kmp_team_t *team = th->th.th_team;
4296 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4297 if (team->t.t_serialized) {
4298 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4304 pr_buf = th->th.th_dispatch;
4305 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4306 num_dims = (size_t)pr_buf->th_doacross_info[0];
4307 lo = pr_buf->th_doacross_info[2];
4308 st = pr_buf->th_doacross_info[4];
4309#if OMPT_SUPPORT && OMPT_OPTIONAL
4310 ompt_dependence_t deps[num_dims];
4313 iter_number = vec[0] - lo;
4314 }
else if (st > 0) {
4315 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4317 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4319#if OMPT_SUPPORT && OMPT_OPTIONAL
4320 deps[0].variable.value = iter_number;
4321 deps[0].dependence_type = ompt_dependence_type_source;
4323 for (i = 1; i < num_dims; ++i) {
4326 ln = pr_buf->th_doacross_info[j + 1];
4327 lo = pr_buf->th_doacross_info[j + 2];
4328 st = pr_buf->th_doacross_info[j + 4];
4331 }
else if (st > 0) {
4332 iter = (kmp_uint64)(vec[i] - lo) / st;
4334 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4336 iter_number = iter + ln * iter_number;
4337#if OMPT_SUPPORT && OMPT_OPTIONAL
4338 deps[i].variable.value = iter;
4339 deps[i].dependence_type = ompt_dependence_type_source;
4342#if OMPT_SUPPORT && OMPT_OPTIONAL
4343 if (ompt_enabled.ompt_callback_dependences) {
4344 ompt_callbacks.ompt_callback(ompt_callback_dependences)(
4345 &(OMPT_CUR_TASK_INFO(th)->task_data), deps, (kmp_uint32)num_dims);
4348 shft = iter_number % 32;
4352 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4353 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4354 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4355 (iter_number << 5) + shft));
4358void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4359 __kmp_assert_valid_gtid(gtid);
4361 kmp_info_t *th = __kmp_threads[gtid];
4362 kmp_team_t *team = th->th.th_team;
4363 kmp_disp_t *pr_buf = th->th.th_dispatch;
4365 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4366 if (team->t.t_serialized) {
4367 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4371 KMP_TEST_THEN_INC32((kmp_uintptr_t)(pr_buf->th_doacross_info[1])) + 1;
4372 if (num_done == th->th.th_team_nproc) {
4374 int idx = pr_buf->th_doacross_buf_idx - 1;
4375 dispatch_shared_info_t *sh_buf =
4376 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4377 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4378 (kmp_int64)&sh_buf->doacross_num_done);
4379 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4380 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4381 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4382 sh_buf->doacross_flags = NULL;
4383 sh_buf->doacross_num_done = 0;
4384 sh_buf->doacross_buf_idx +=
4385 __kmp_dispatch_num_buffers;
4388 pr_buf->th_doacross_flags = NULL;
4389 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4390 pr_buf->th_doacross_info = NULL;
4391 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4395void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4396 return __kmp_alloc(__kmp_entry_gtid(), 0, size, allocator);
4399void *omp_aligned_alloc(
size_t align,
size_t size,
4400 omp_allocator_handle_t allocator) {
4401 return __kmp_alloc(__kmp_entry_gtid(), align, size, allocator);
4404void *omp_calloc(
size_t nmemb,
size_t size, omp_allocator_handle_t allocator) {
4405 return __kmp_calloc(__kmp_entry_gtid(), 0, nmemb, size, allocator);
4408void *omp_aligned_calloc(
size_t align,
size_t nmemb,
size_t size,
4409 omp_allocator_handle_t allocator) {
4410 return __kmp_calloc(__kmp_entry_gtid(), align, nmemb, size, allocator);
4413void *omp_realloc(
void *ptr,
size_t size, omp_allocator_handle_t allocator,
4414 omp_allocator_handle_t free_allocator) {
4415 return __kmp_realloc(__kmp_entry_gtid(), ptr, size, allocator,
4419void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4420 ___kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4424int __kmpc_get_target_offload(
void) {
4425 if (!__kmp_init_serial) {
4426 __kmp_serial_initialize();
4428 return __kmp_target_offload;
4431int __kmpc_pause_resource(kmp_pause_status_t level) {
4432 if (!__kmp_init_serial) {
4435 return __kmp_pause_resource(level);
4438void __kmpc_error(
ident_t *loc,
int severity,
const char *message) {
4439 if (!__kmp_init_serial)
4440 __kmp_serial_initialize();
4442 KMP_ASSERT(severity == severity_warning || severity == severity_fatal);
4445 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_error) {
4446 ompt_callbacks.ompt_callback(ompt_callback_error)(
4447 (ompt_severity_t)severity, message, KMP_STRLEN(message),
4448 OMPT_GET_RETURN_ADDRESS(0));
4454 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->
psource,
false);
4456 __kmp_str_format(
"%s:%d:%d", str_loc.file, str_loc.line, str_loc.col);
4457 __kmp_str_loc_free(&str_loc);
4459 src_loc = __kmp_str_format(
"unknown");
4462 if (severity == severity_warning)
4463 KMP_WARNING(UserDirectedWarning, src_loc, message);
4465 KMP_FATAL(UserDirectedError, src_loc, message);
4467 __kmp_str_free(&src_loc);
4471void __kmpc_scope(
ident_t *loc, kmp_int32 gtid,
void *reserved) {
4473#if OMPT_SUPPORT && OMPT_OPTIONAL
4474 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4475 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4476 int tid = __kmp_tid_from_gtid(gtid);
4477 ompt_callbacks.ompt_callback(ompt_callback_work)(
4478 ompt_work_scope, ompt_scope_begin,
4479 &(team->t.ompt_team_info.parallel_data),
4480 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4481 OMPT_GET_RETURN_ADDRESS(0));
4487void __kmpc_end_scope(
ident_t *loc, kmp_int32 gtid,
void *reserved) {
4489#if OMPT_SUPPORT && OMPT_OPTIONAL
4490 if (ompt_enabled.enabled && ompt_enabled.ompt_callback_work) {
4491 kmp_team_t *team = __kmp_threads[gtid]->th.th_team;
4492 int tid = __kmp_tid_from_gtid(gtid);
4493 ompt_callbacks.ompt_callback(ompt_callback_work)(
4494 ompt_work_scope, ompt_scope_end,
4495 &(team->t.ompt_team_info.parallel_data),
4496 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
4497 OMPT_GET_RETURN_ADDRESS(0));
4502#ifdef KMP_USE_VERSION_SYMBOLS
4511#ifdef omp_set_affinity_format
4512#undef omp_set_affinity_format
4514#ifdef omp_get_affinity_format
4515#undef omp_get_affinity_format
4517#ifdef omp_display_affinity
4518#undef omp_display_affinity
4520#ifdef omp_capture_affinity
4521#undef omp_capture_affinity
4523KMP_VERSION_OMPC_SYMBOL(ompc_set_affinity_format, omp_set_affinity_format, 50,
4525KMP_VERSION_OMPC_SYMBOL(ompc_get_affinity_format, omp_get_affinity_format, 50,
4527KMP_VERSION_OMPC_SYMBOL(ompc_display_affinity, omp_display_affinity, 50,
4529KMP_VERSION_OMPC_SYMBOL(ompc_capture_affinity, omp_capture_affinity, 50,
@ KMP_IDENT_WORK_SECTIONS
@ KMP_IDENT_WORK_DISTRIBUTE
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_fork_call_if(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, kmp_int32 cond, void *args)
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
void __kmpc_push_num_teams_51(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams_lb, kmp_int32 num_teams_ub, kmp_int32 num_threads)
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
void __kmpc_end(ident_t *loc)
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
stats_state_e
the states which a thread can be in
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
void __kmpc_flush(ident_t *loc)
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
void * __kmpc_copyprivate_light(ident_t *loc, kmp_int32 gtid, void *cpy_data)
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
kmp_int32 __kmpc_in_parallel(ident_t *loc)
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_masked(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
kmp_int32 __kmpc_masked(ident_t *loc, kmp_int32 global_tid, kmp_int32 filter)
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)