20#if KMP_AFFINITY_SUPPORTED
22class KMPHwlocAffinity :
public KMPAffinity {
24 class Mask :
public KMPAffinity::Mask {
29 mask = hwloc_bitmap_alloc();
32 ~Mask() { hwloc_bitmap_free(mask); }
33 void set(
int i)
override { hwloc_bitmap_set(mask, i); }
34 bool is_set(
int i)
const override {
return hwloc_bitmap_isset(mask, i); }
35 void clear(
int i)
override { hwloc_bitmap_clr(mask, i); }
36 void zero()
override { hwloc_bitmap_zero(mask); }
37 void copy(
const KMPAffinity::Mask *src)
override {
38 const Mask *convert =
static_cast<const Mask *
>(src);
39 hwloc_bitmap_copy(mask, convert->mask);
41 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
42 const Mask *convert =
static_cast<const Mask *
>(rhs);
43 hwloc_bitmap_and(mask, mask, convert->mask);
45 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
46 const Mask *convert =
static_cast<const Mask *
>(rhs);
47 hwloc_bitmap_or(mask, mask, convert->mask);
49 void bitwise_not()
override { hwloc_bitmap_not(mask, mask); }
50 int begin()
const override {
return hwloc_bitmap_first(mask); }
51 int end()
const override {
return -1; }
52 int next(
int previous)
const override {
53 return hwloc_bitmap_next(mask, previous);
55 int get_system_affinity(
bool abort_on_error)
override {
56 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
57 "Illegal get affinity operation when not capable");
59 hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
65 __kmp_fatal(KMP_MSG(FunctionError,
"hwloc_get_cpubind()"),
66 KMP_ERR(error), __kmp_msg_null);
70 int set_system_affinity(
bool abort_on_error)
const override {
71 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
72 "Illegal set affinity operation when not capable");
74 hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
80 __kmp_fatal(KMP_MSG(FunctionError,
"hwloc_set_cpubind()"),
81 KMP_ERR(error), __kmp_msg_null);
86 int set_process_affinity(
bool abort_on_error)
const override {
87 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
88 "Illegal set process affinity operation when not capable");
90 const hwloc_topology_support *support =
91 hwloc_topology_get_support(__kmp_hwloc_topology);
92 if (support->cpubind->set_proc_cpubind) {
94 retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask,
95 HWLOC_CPUBIND_PROCESS);
100 __kmp_fatal(KMP_MSG(FunctionError,
"hwloc_set_cpubind()"),
101 KMP_ERR(error), __kmp_msg_null);
106 int get_proc_group()
const override {
109 if (__kmp_num_proc_groups == 1) {
112 for (
int i = 0; i < __kmp_num_proc_groups; i++) {
114 unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
115 unsigned long second_32_bits =
116 hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
117 if (first_32_bits == 0 && second_32_bits == 0) {
129 void determine_capable(
const char *var)
override {
130 const hwloc_topology_support *topology_support;
131 if (__kmp_hwloc_topology == NULL) {
132 if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
133 __kmp_hwloc_error = TRUE;
134 if (__kmp_affinity.flags.verbose) {
135 KMP_WARNING(AffHwlocErrorOccurred, var,
"hwloc_topology_init()");
138 if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
139 __kmp_hwloc_error = TRUE;
140 if (__kmp_affinity.flags.verbose) {
141 KMP_WARNING(AffHwlocErrorOccurred, var,
"hwloc_topology_load()");
145 topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
150 if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
151 topology_support->cpubind->get_thisthread_cpubind &&
152 topology_support->discovery->pu && !__kmp_hwloc_error) {
154 KMP_AFFINITY_ENABLE(TRUE);
157 __kmp_hwloc_error = TRUE;
158 KMP_AFFINITY_DISABLE();
161 void bind_thread(
int which)
override {
162 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
163 "Illegal set affinity operation when not capable");
164 KMPAffinity::Mask *mask;
165 KMP_CPU_ALLOC_ON_STACK(mask);
167 KMP_CPU_SET(which, mask);
168 __kmp_set_system_affinity(mask, TRUE);
169 KMP_CPU_FREE_FROM_STACK(mask);
171 KMPAffinity::Mask *allocate_mask()
override {
return new Mask(); }
172 void deallocate_mask(KMPAffinity::Mask *m)
override {
delete m; }
173 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
174 return new Mask[num];
176 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
177 Mask *hwloc_array =
static_cast<Mask *
>(array);
178 delete[] hwloc_array;
180 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
181 int index)
override {
182 Mask *hwloc_array =
static_cast<Mask *
>(array);
183 return &(hwloc_array[index]);
185 api_type get_api_type()
const override {
return HWLOC; }
189#if KMP_OS_LINUX || KMP_OS_FREEBSD
195#include <sys/syscall.h>
196#if KMP_ARCH_X86 || KMP_ARCH_ARM
197#ifndef __NR_sched_setaffinity
198#define __NR_sched_setaffinity 241
199#elif __NR_sched_setaffinity != 241
200#error Wrong code for setaffinity system call.
202#ifndef __NR_sched_getaffinity
203#define __NR_sched_getaffinity 242
204#elif __NR_sched_getaffinity != 242
205#error Wrong code for getaffinity system call.
207#elif KMP_ARCH_AARCH64
208#ifndef __NR_sched_setaffinity
209#define __NR_sched_setaffinity 122
210#elif __NR_sched_setaffinity != 122
211#error Wrong code for setaffinity system call.
213#ifndef __NR_sched_getaffinity
214#define __NR_sched_getaffinity 123
215#elif __NR_sched_getaffinity != 123
216#error Wrong code for getaffinity system call.
218#elif KMP_ARCH_RISCV64
219#ifndef __NR_sched_setaffinity
220#define __NR_sched_setaffinity 122
221#elif __NR_sched_setaffinity != 122
222#error Wrong code for setaffinity system call.
224#ifndef __NR_sched_getaffinity
225#define __NR_sched_getaffinity 123
226#elif __NR_sched_getaffinity != 123
227#error Wrong code for getaffinity system call.
230#ifndef __NR_sched_setaffinity
231#define __NR_sched_setaffinity 203
232#elif __NR_sched_setaffinity != 203
233#error Wrong code for setaffinity system call.
235#ifndef __NR_sched_getaffinity
236#define __NR_sched_getaffinity 204
237#elif __NR_sched_getaffinity != 204
238#error Wrong code for getaffinity system call.
241#ifndef __NR_sched_setaffinity
242#define __NR_sched_setaffinity 222
243#elif __NR_sched_setaffinity != 222
244#error Wrong code for setaffinity system call.
246#ifndef __NR_sched_getaffinity
247#define __NR_sched_getaffinity 223
248#elif __NR_sched_getaffinity != 223
249#error Wrong code for getaffinity system call.
252#ifndef __NR_sched_setaffinity
253#define __NR_sched_setaffinity 4239
254#elif __NR_sched_setaffinity != 4239
255#error Wrong code for setaffinity system call.
257#ifndef __NR_sched_getaffinity
258#define __NR_sched_getaffinity 4240
259#elif __NR_sched_getaffinity != 4240
260#error Wrong code for getaffinity system call.
263#ifndef __NR_sched_setaffinity
264#define __NR_sched_setaffinity 5195
265#elif __NR_sched_setaffinity != 5195
266#error Wrong code for setaffinity system call.
268#ifndef __NR_sched_getaffinity
269#define __NR_sched_getaffinity 5196
270#elif __NR_sched_getaffinity != 5196
271#error Wrong code for getaffinity system call.
273#elif KMP_ARCH_LOONGARCH64
274#ifndef __NR_sched_setaffinity
275#define __NR_sched_setaffinity 122
276#elif __NR_sched_setaffinity != 122
277#error Wrong code for setaffinity system call.
279#ifndef __NR_sched_getaffinity
280#define __NR_sched_getaffinity 123
281#elif __NR_sched_getaffinity != 123
282#error Wrong code for getaffinity system call.
284#elif KMP_ARCH_RISCV64
285#ifndef __NR_sched_setaffinity
286#define __NR_sched_setaffinity 122
287#elif __NR_sched_setaffinity != 122
288#error Wrong code for setaffinity system call.
290#ifndef __NR_sched_getaffinity
291#define __NR_sched_getaffinity 123
292#elif __NR_sched_getaffinity != 123
293#error Wrong code for getaffinity system call.
296#error Unknown or unsupported architecture
300#include <pthread_np.h>
302class KMPNativeAffinity :
public KMPAffinity {
303 class Mask :
public KMPAffinity::Mask {
304 typedef unsigned long mask_t;
305 typedef decltype(__kmp_affin_mask_size) mask_size_type;
306 static const unsigned int BITS_PER_MASK_T =
sizeof(mask_t) * CHAR_BIT;
307 static const mask_t ONE = 1;
308 mask_size_type get_num_mask_types()
const {
309 return __kmp_affin_mask_size /
sizeof(mask_t);
314 Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
319 void set(
int i)
override {
320 mask[i / BITS_PER_MASK_T] |= (ONE << (i % BITS_PER_MASK_T));
322 bool is_set(
int i)
const override {
323 return (mask[i / BITS_PER_MASK_T] & (ONE << (i % BITS_PER_MASK_T)));
325 void clear(
int i)
override {
326 mask[i / BITS_PER_MASK_T] &= ~(ONE << (i % BITS_PER_MASK_T));
328 void zero()
override {
329 mask_size_type e = get_num_mask_types();
330 for (mask_size_type i = 0; i < e; ++i)
333 void copy(
const KMPAffinity::Mask *src)
override {
334 const Mask *convert =
static_cast<const Mask *
>(src);
335 mask_size_type e = get_num_mask_types();
336 for (mask_size_type i = 0; i < e; ++i)
337 mask[i] = convert->mask[i];
339 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
340 const Mask *convert =
static_cast<const Mask *
>(rhs);
341 mask_size_type e = get_num_mask_types();
342 for (mask_size_type i = 0; i < e; ++i)
343 mask[i] &= convert->mask[i];
345 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
346 const Mask *convert =
static_cast<const Mask *
>(rhs);
347 mask_size_type e = get_num_mask_types();
348 for (mask_size_type i = 0; i < e; ++i)
349 mask[i] |= convert->mask[i];
351 void bitwise_not()
override {
352 mask_size_type e = get_num_mask_types();
353 for (mask_size_type i = 0; i < e; ++i)
354 mask[i] = ~(mask[i]);
356 int begin()
const override {
358 while (retval < end() && !is_set(retval))
362 int end()
const override {
364 __kmp_type_convert(get_num_mask_types() * BITS_PER_MASK_T, &e);
367 int next(
int previous)
const override {
368 int retval = previous + 1;
369 while (retval < end() && !is_set(retval))
373 int get_system_affinity(
bool abort_on_error)
override {
374 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
375 "Illegal get affinity operation when not capable");
378 syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
380 int r = pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size,
381 reinterpret_cast<cpuset_t *
>(mask));
382 int retval = (r == 0 ? 0 : -1);
388 if (abort_on_error) {
389 __kmp_fatal(KMP_MSG(FunctionError,
"pthread_getaffinity_np()"),
390 KMP_ERR(error), __kmp_msg_null);
394 int set_system_affinity(
bool abort_on_error)
const override {
395 KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
396 "Illegal set affinity operation when not capable");
399 syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
401 int r = pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size,
402 reinterpret_cast<cpuset_t *
>(mask));
403 int retval = (r == 0 ? 0 : -1);
409 if (abort_on_error) {
410 __kmp_fatal(KMP_MSG(FunctionError,
"pthread_setaffinity_np()"),
411 KMP_ERR(error), __kmp_msg_null);
416 void determine_capable(
const char *env_var)
override {
417 __kmp_affinity_determine_capable(env_var);
419 void bind_thread(
int which)
override { __kmp_affinity_bind_thread(which); }
420 KMPAffinity::Mask *allocate_mask()
override {
421 KMPNativeAffinity::Mask *retval =
new Mask();
424 void deallocate_mask(KMPAffinity::Mask *m)
override {
425 KMPNativeAffinity::Mask *native_mask =
426 static_cast<KMPNativeAffinity::Mask *
>(m);
429 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
430 return new Mask[num];
432 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
433 Mask *linux_array =
static_cast<Mask *
>(array);
434 delete[] linux_array;
436 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
437 int index)
override {
438 Mask *linux_array =
static_cast<Mask *
>(array);
439 return &(linux_array[index]);
441 api_type get_api_type()
const override {
return NATIVE_OS; }
446class KMPNativeAffinity :
public KMPAffinity {
447 class Mask :
public KMPAffinity::Mask {
448 typedef ULONG_PTR mask_t;
449 static const int BITS_PER_MASK_T =
sizeof(mask_t) * CHAR_BIT;
454 mask = (mask_t *)__kmp_allocate(
sizeof(mask_t) * __kmp_num_proc_groups);
460 void set(
int i)
override {
461 mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
463 bool is_set(
int i)
const override {
464 return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
466 void clear(
int i)
override {
467 mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
469 void zero()
override {
470 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
473 void copy(
const KMPAffinity::Mask *src)
override {
474 const Mask *convert =
static_cast<const Mask *
>(src);
475 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
476 mask[i] = convert->mask[i];
478 void bitwise_and(
const KMPAffinity::Mask *rhs)
override {
479 const Mask *convert =
static_cast<const Mask *
>(rhs);
480 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
481 mask[i] &= convert->mask[i];
483 void bitwise_or(
const KMPAffinity::Mask *rhs)
override {
484 const Mask *convert =
static_cast<const Mask *
>(rhs);
485 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
486 mask[i] |= convert->mask[i];
488 void bitwise_not()
override {
489 for (
int i = 0; i < __kmp_num_proc_groups; ++i)
490 mask[i] = ~(mask[i]);
492 int begin()
const override {
494 while (retval < end() && !is_set(retval))
498 int end()
const override {
return __kmp_num_proc_groups * BITS_PER_MASK_T; }
499 int next(
int previous)
const override {
500 int retval = previous + 1;
501 while (retval < end() && !is_set(retval))
505 int set_process_affinity(
bool abort_on_error)
const override {
506 if (__kmp_num_proc_groups <= 1) {
507 if (!SetProcessAffinityMask(GetCurrentProcess(), *mask)) {
508 DWORD error = GetLastError();
509 if (abort_on_error) {
510 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
518 int set_system_affinity(
bool abort_on_error)
const override {
519 if (__kmp_num_proc_groups > 1) {
522 int group = get_proc_group();
524 if (abort_on_error) {
525 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
532 ga.Mask = mask[group];
533 ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
535 KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
536 if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
537 DWORD error = GetLastError();
538 if (abort_on_error) {
539 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
545 if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
546 DWORD error = GetLastError();
547 if (abort_on_error) {
548 __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
556 int get_system_affinity(
bool abort_on_error)
override {
557 if (__kmp_num_proc_groups > 1) {
560 KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
561 if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
562 DWORD error = GetLastError();
563 if (abort_on_error) {
564 __kmp_fatal(KMP_MSG(FunctionError,
"GetThreadGroupAffinity()"),
565 KMP_ERR(error), __kmp_msg_null);
569 if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
573 mask[ga.Group] = ga.Mask;
575 mask_t newMask, sysMask, retval;
576 if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
577 DWORD error = GetLastError();
578 if (abort_on_error) {
579 __kmp_fatal(KMP_MSG(FunctionError,
"GetProcessAffinityMask()"),
580 KMP_ERR(error), __kmp_msg_null);
584 retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
586 DWORD error = GetLastError();
587 if (abort_on_error) {
588 __kmp_fatal(KMP_MSG(FunctionError,
"SetThreadAffinityMask()"),
589 KMP_ERR(error), __kmp_msg_null);
593 newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
595 DWORD error = GetLastError();
596 if (abort_on_error) {
597 __kmp_fatal(KMP_MSG(FunctionError,
"SetThreadAffinityMask()"),
598 KMP_ERR(error), __kmp_msg_null);
605 int get_proc_group()
const override {
607 if (__kmp_num_proc_groups == 1) {
610 for (
int i = 0; i < __kmp_num_proc_groups; i++) {
620 void determine_capable(
const char *env_var)
override {
621 __kmp_affinity_determine_capable(env_var);
623 void bind_thread(
int which)
override { __kmp_affinity_bind_thread(which); }
624 KMPAffinity::Mask *allocate_mask()
override {
return new Mask(); }
625 void deallocate_mask(KMPAffinity::Mask *m)
override {
delete m; }
626 KMPAffinity::Mask *allocate_mask_array(
int num)
override {
627 return new Mask[num];
629 void deallocate_mask_array(KMPAffinity::Mask *array)
override {
630 Mask *windows_array =
static_cast<Mask *
>(array);
631 delete[] windows_array;
633 KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
634 int index)
override {
635 Mask *windows_array =
static_cast<Mask *
>(array);
636 return &(windows_array[index]);
638 api_type get_api_type()
const override {
return NATIVE_OS; }
644struct kmp_hw_attr_t {
648 unsigned reserved : 15;
650 static const int UNKNOWN_CORE_EFF = -1;
653 : core_type(KMP_HW_CORE_TYPE_UNKNOWN), core_eff(UNKNOWN_CORE_EFF),
654 valid(0), reserved(0) {}
655 void set_core_type(kmp_hw_core_type_t type) {
659 void set_core_eff(
int eff) {
663 kmp_hw_core_type_t get_core_type()
const {
664 return (kmp_hw_core_type_t)core_type;
666 int get_core_eff()
const {
return core_eff; }
667 bool is_core_type_valid()
const {
668 return core_type != KMP_HW_CORE_TYPE_UNKNOWN;
670 bool is_core_eff_valid()
const {
return core_eff != UNKNOWN_CORE_EFF; }
671 operator bool()
const {
return valid; }
673 core_type = KMP_HW_CORE_TYPE_UNKNOWN;
674 core_eff = UNKNOWN_CORE_EFF;
677 bool contains(
const kmp_hw_attr_t &other)
const {
678 if (!valid && !other.valid)
680 if (valid && other.valid) {
681 if (other.is_core_type_valid()) {
682 if (!is_core_type_valid() || (get_core_type() != other.get_core_type()))
685 if (other.is_core_eff_valid()) {
686 if (!is_core_eff_valid() || (get_core_eff() != other.get_core_eff()))
693 bool operator==(
const kmp_hw_attr_t &rhs)
const {
694 return (rhs.valid == valid && rhs.core_eff == core_eff &&
695 rhs.core_type == core_type);
697 bool operator!=(
const kmp_hw_attr_t &rhs)
const {
return !operator==(rhs); }
700#if KMP_AFFINITY_SUPPORTED
701KMP_BUILD_ASSERT(
sizeof(kmp_hw_attr_t) ==
sizeof(kmp_affinity_attrs_t));
704class kmp_hw_thread_t {
706 static const int UNKNOWN_ID = -1;
707 static const int MULTIPLE_ID = -2;
708 static int compare_ids(
const void *a,
const void *b);
709 static int compare_compact(
const void *a,
const void *b);
710 int ids[KMP_HW_LAST];
711 int sub_ids[KMP_HW_LAST];
718 for (
int i = 0; i < (int)KMP_HW_LAST; ++i)
725class kmp_topology_t {
751 int num_core_efficiencies;
753 kmp_hw_core_type_t core_types[KMP_HW_MAX_NUM_CORE_TYPES];
759 kmp_hw_thread_t *hw_threads;
765 kmp_hw_t equivalent[KMP_HW_LAST];
774 void _insert_layer(kmp_hw_t type,
const int *ids);
776#if KMP_GROUP_AFFINITY
778 void _insert_windows_proc_groups();
784 void _gather_enumeration_information();
788 void _remove_radix1_layers();
791 void _discover_uniformity();
802 void _set_last_level_cache();
807 int _get_ncores_with_attr(
const kmp_hw_attr_t &attr,
int above,
808 bool find_all =
false)
const;
812 kmp_topology_t() =
delete;
813 kmp_topology_t(
const kmp_topology_t &t) =
delete;
814 kmp_topology_t(kmp_topology_t &&t) =
delete;
815 kmp_topology_t &operator=(
const kmp_topology_t &t) =
delete;
816 kmp_topology_t &operator=(kmp_topology_t &&t) =
delete;
818 static kmp_topology_t *allocate(
int nproc,
int ndepth,
const kmp_hw_t *types);
819 static void deallocate(kmp_topology_t *);
822 kmp_hw_thread_t &at(
int index) {
823 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
824 return hw_threads[index];
826 const kmp_hw_thread_t &at(
int index)
const {
827 KMP_DEBUG_ASSERT(index >= 0 && index < num_hw_threads);
828 return hw_threads[index];
830 int get_num_hw_threads()
const {
return num_hw_threads; }
832 qsort(hw_threads, num_hw_threads,
sizeof(kmp_hw_thread_t),
833 kmp_hw_thread_t::compare_ids);
837 bool check_ids()
const;
841 void canonicalize(
int pkgs,
int cores_per_pkg,
int thr_per_core,
int cores);
845#if KMP_AFFINITY_SUPPORTED
847 void set_granularity(kmp_affinity_t &stgs)
const;
849 bool filter_hw_subset();
850 bool is_close(
int hwt1,
int hwt2,
int level)
const;
851 bool is_uniform()
const {
return flags.uniform; }
854 kmp_hw_t get_equivalent_type(kmp_hw_t type)
const {
return equivalent[type]; }
856 void set_equivalent_type(kmp_hw_t type1, kmp_hw_t type2) {
857 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type1);
858 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type2);
859 kmp_hw_t real_type2 = equivalent[type2];
860 if (real_type2 == KMP_HW_UNKNOWN)
862 equivalent[type1] = real_type2;
865 KMP_FOREACH_HW_TYPE(type) {
866 if (equivalent[type] == type1) {
867 equivalent[type] = real_type2;
873 int calculate_ratio(
int level1,
int level2)
const {
874 KMP_DEBUG_ASSERT(level1 >= 0 && level1 < depth);
875 KMP_DEBUG_ASSERT(level2 >= 0 && level2 < depth);
877 for (
int level = level1; level > level2; --level)
881 int get_ratio(
int level)
const {
882 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
885 int get_depth()
const {
return depth; };
886 kmp_hw_t get_type(
int level)
const {
887 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
890 int get_level(kmp_hw_t type)
const {
891 KMP_DEBUG_ASSERT_VALID_HW_TYPE(type);
892 int eq_type = equivalent[type];
893 if (eq_type == KMP_HW_UNKNOWN)
895 for (
int i = 0; i < depth; ++i)
896 if (types[i] == eq_type)
900 int get_count(
int level)
const {
901 KMP_DEBUG_ASSERT(level >= 0 && level < depth);
905 int get_ncores_with_attr(
const kmp_hw_attr_t &attr)
const {
906 return _get_ncores_with_attr(attr, -1,
true);
910 int get_ncores_with_attr_per(
const kmp_hw_attr_t &attr,
int above)
const {
911 return _get_ncores_with_attr(attr, above,
false);
914#if KMP_AFFINITY_SUPPORTED
915 friend int kmp_hw_thread_t::compare_compact(
const void *a,
const void *b);
916 void sort_compact(kmp_affinity_t &affinity) {
917 compact = affinity.compact;
918 qsort(hw_threads, num_hw_threads,
sizeof(kmp_hw_thread_t),
919 kmp_hw_thread_t::compare_compact);
922 void print(
const char *env_var =
"KMP_AFFINITY")
const;
925extern kmp_topology_t *__kmp_topology;
927class kmp_hw_subset_t {
928 const static size_t MAX_ATTRS = KMP_HW_MAX_NUM_CORE_EFFS;
936 int offset[MAX_ATTRS];
937 kmp_hw_attr_t attr[MAX_ATTRS];
940 const static int USE_ALL = (std::numeric_limits<int>::max)();
949 KMP_BUILD_ASSERT(
sizeof(set) * 8 >= KMP_HW_LAST);
952 static int hw_subset_compare(
const void *i1,
const void *i2) {
953 kmp_hw_t type1 = ((
const item_t *)i1)->type;
954 kmp_hw_t type2 = ((
const item_t *)i2)->type;
955 int level1 = __kmp_topology->get_level(type1);
956 int level2 = __kmp_topology->get_level(type2);
957 return level1 - level2;
962 kmp_hw_subset_t() =
delete;
963 kmp_hw_subset_t(
const kmp_hw_subset_t &t) =
delete;
964 kmp_hw_subset_t(kmp_hw_subset_t &&t) =
delete;
965 kmp_hw_subset_t &operator=(
const kmp_hw_subset_t &t) =
delete;
966 kmp_hw_subset_t &operator=(kmp_hw_subset_t &&t) =
delete;
968 static kmp_hw_subset_t *allocate() {
969 int initial_capacity = 5;
970 kmp_hw_subset_t *retval =
971 (kmp_hw_subset_t *)__kmp_allocate(
sizeof(kmp_hw_subset_t));
973 retval->capacity = initial_capacity;
975 retval->absolute =
false;
976 retval->items = (item_t *)__kmp_allocate(
sizeof(item_t) * initial_capacity);
979 static void deallocate(kmp_hw_subset_t *subset) {
980 __kmp_free(subset->items);
983 void set_absolute() { absolute =
true; }
984 bool is_absolute()
const {
return absolute; }
985 void push_back(
int num, kmp_hw_t type,
int offset, kmp_hw_attr_t attr) {
986 for (
int i = 0; i < depth; ++i) {
989 if (items[i].type == type) {
990 int idx = items[i].num_attrs++;
991 if ((
size_t)idx >= MAX_ATTRS)
993 items[i].num[idx] = num;
994 items[i].offset[idx] = offset;
995 items[i].attr[idx] = attr;
999 if (depth == capacity - 1) {
1001 item_t *new_items = (item_t *)__kmp_allocate(
sizeof(item_t) * capacity);
1002 for (
int i = 0; i < depth; ++i)
1003 new_items[i] = items[i];
1007 items[depth].num_attrs = 1;
1008 items[depth].type = type;
1009 items[depth].num[0] = num;
1010 items[depth].offset[0] = offset;
1011 items[depth].attr[0] = attr;
1013 set |= (1ull << type);
1015 int get_depth()
const {
return depth; }
1016 const item_t &at(
int index)
const {
1017 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1018 return items[index];
1020 item_t &at(
int index) {
1021 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1022 return items[index];
1024 void remove(
int index) {
1025 KMP_DEBUG_ASSERT(index >= 0 && index < depth);
1026 set &= ~(1ull << items[index].type);
1027 for (
int j = index + 1; j < depth; ++j) {
1028 items[j - 1] = items[j];
1033 KMP_DEBUG_ASSERT(__kmp_topology);
1034 qsort(items, depth,
sizeof(item_t), hw_subset_compare);
1036 bool specified(kmp_hw_t type)
const {
return ((set & (1ull << type)) > 0); }
1038 printf(
"**********************\n");
1039 printf(
"*** kmp_hw_subset: ***\n");
1040 printf(
"* depth: %d\n", depth);
1041 printf(
"* items:\n");
1042 for (
int i = 0; i < depth; ++i) {
1043 printf(
" type: %s\n", __kmp_hw_get_keyword(items[i].type));
1044 for (
int j = 0; j < items[i].num_attrs; ++j) {
1045 printf(
" num: %d, offset: %d, attr: ", items[i].num[j],
1046 items[i].offset[j]);
1047 if (!items[i].attr[j]) {
1048 printf(
" (none)\n");
1051 " core_type = %s, core_eff = %d\n",
1052 __kmp_hw_get_core_type_string(items[i].attr[j].get_core_type()),
1053 items[i].attr[j].get_core_eff());
1057 printf(
"* set: 0x%llx\n", set);
1058 printf(
"* absolute: %d\n", absolute);
1059 printf(
"**********************\n");
1062extern kmp_hw_subset_t *__kmp_hw_subset;
1070class hierarchy_info {
1074 static const kmp_uint32 maxLeaves = 4;
1075 static const kmp_uint32 minBranch = 4;
1081 kmp_uint32 maxLevels;
1088 kmp_uint32 base_num_threads;
1089 enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
1090 volatile kmp_int8 uninitialized;
1092 volatile kmp_int8 resizing;
1098 kmp_uint32 *numPerLevel;
1099 kmp_uint32 *skipPerLevel;
1101 void deriveLevels() {
1102 int hier_depth = __kmp_topology->get_depth();
1103 for (
int i = hier_depth - 1, level = 0; i >= 0; --i, ++level) {
1104 numPerLevel[level] = __kmp_topology->get_ratio(i);
1109 : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
1112 if (!uninitialized && numPerLevel) {
1113 __kmp_free(numPerLevel);
1115 uninitialized = not_initialized;
1119 void init(
int num_addrs) {
1120 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
1121 &uninitialized, not_initialized, initializing);
1122 if (bool_result == 0) {
1123 while (TCR_1(uninitialized) != initialized)
1127 KMP_DEBUG_ASSERT(bool_result == 1);
1137 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 *
sizeof(kmp_uint32));
1138 skipPerLevel = &(numPerLevel[maxLevels]);
1139 for (kmp_uint32 i = 0; i < maxLevels;
1142 skipPerLevel[i] = 1;
1146 if (__kmp_topology && __kmp_topology->get_depth() > 0) {
1149 numPerLevel[0] = maxLeaves;
1150 numPerLevel[1] = num_addrs / maxLeaves;
1151 if (num_addrs % maxLeaves)
1155 base_num_threads = num_addrs;
1156 for (
int i = maxLevels - 1; i >= 0;
1158 if (numPerLevel[i] != 1 || depth > 1)
1161 kmp_uint32 branch = minBranch;
1162 if (numPerLevel[0] == 1)
1163 branch = num_addrs / maxLeaves;
1164 if (branch < minBranch)
1166 for (kmp_uint32 d = 0; d < depth - 1; ++d) {
1167 while (numPerLevel[d] > branch ||
1168 (d == 0 && numPerLevel[d] > maxLeaves)) {
1169 if (numPerLevel[d] & 1)
1171 numPerLevel[d] = numPerLevel[d] >> 1;
1172 if (numPerLevel[d + 1] == 1)
1174 numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
1176 if (numPerLevel[0] == 1) {
1177 branch = branch >> 1;
1183 for (kmp_uint32 i = 1; i < depth; ++i)
1184 skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
1186 for (kmp_uint32 i = depth; i < maxLevels; ++i)
1187 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1189 uninitialized = initialized;
1193 void resize(kmp_uint32 nproc) {
1194 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1195 while (bool_result == 0) {
1197 if (nproc <= base_num_threads)
1200 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
1202 KMP_DEBUG_ASSERT(bool_result != 0);
1203 if (nproc <= base_num_threads)
1207 kmp_uint32 old_sz = skipPerLevel[depth - 1];
1208 kmp_uint32 incs = 0, old_maxLevels = maxLevels;
1210 for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
1211 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1212 numPerLevel[i - 1] *= 2;
1216 if (nproc > old_sz) {
1217 while (nproc > old_sz) {
1225 kmp_uint32 *old_numPerLevel = numPerLevel;
1226 kmp_uint32 *old_skipPerLevel = skipPerLevel;
1227 numPerLevel = skipPerLevel = NULL;
1229 (kmp_uint32 *)__kmp_allocate(maxLevels * 2 *
sizeof(kmp_uint32));
1230 skipPerLevel = &(numPerLevel[maxLevels]);
1233 for (kmp_uint32 i = 0; i < old_maxLevels; ++i) {
1235 numPerLevel[i] = old_numPerLevel[i];
1236 skipPerLevel[i] = old_skipPerLevel[i];
1240 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i) {
1243 skipPerLevel[i] = 1;
1247 __kmp_free(old_numPerLevel);
1251 for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
1252 skipPerLevel[i] = 2 * skipPerLevel[i - 1];
1254 base_num_threads = nproc;