14#include "kmp_affinity.h"
15#include "kmp_atomic.h"
16#include "kmp_environment.h"
21#include "kmp_settings.h"
24#include "kmp_wait_release.h"
25#include "kmp_wrapper_getpid.h"
26#include "kmp_dispatch.h"
29#include "kmp_dispatch_hier.h"
33#include "ompt-specific.h"
36#include "ompd-specific.h"
39#if OMP_PROFILING_SUPPORT
40#include "llvm/Support/TimeProfiler.h"
41static char *ProfileTraceFile =
nullptr;
45#define KMP_USE_PRCTL 0
61#if defined(KMP_GOMP_COMPAT)
62char const __kmp_version_alt_comp[] =
63 KMP_VERSION_PREFIX
"alternative compiler support: yes";
66char const __kmp_version_omp_api[] =
67 KMP_VERSION_PREFIX
"API version: 5.0 (201611)";
70char const __kmp_version_lock[] =
71 KMP_VERSION_PREFIX
"lock type: run time selectable";
74#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y))
79kmp_info_t __kmp_monitor;
84void __kmp_cleanup(
void);
86static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *,
int tid,
88static void __kmp_initialize_team(kmp_team_t *team,
int new_nproc,
89 kmp_internal_control_t *new_icvs,
91#if KMP_AFFINITY_SUPPORTED
92static void __kmp_partition_places(kmp_team_t *team,
93 int update_master_only = 0);
95static void __kmp_do_serial_initialize(
void);
96void __kmp_fork_barrier(
int gtid,
int tid);
97void __kmp_join_barrier(
int gtid);
98void __kmp_setup_icv_copy(kmp_team_t *team,
int new_nproc,
99 kmp_internal_control_t *new_icvs,
ident_t *loc);
101#ifdef USE_LOAD_BALANCE
102static int __kmp_load_balance_nproc(kmp_root_t *root,
int set_nproc);
105static int __kmp_expand_threads(
int nNeed);
107static int __kmp_unregister_root_other_thread(
int gtid);
109static void __kmp_reap_thread(kmp_info_t *thread,
int is_root);
110kmp_info_t *__kmp_thread_pool_insert_pt = NULL;
112void __kmp_resize_dist_barrier(kmp_team_t *team,
int old_nthreads,
114void __kmp_add_threads_to_team(kmp_team_t *team,
int new_nthreads);
119int __kmp_get_global_thread_id() {
121 kmp_info_t **other_threads;
129 (
"*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n",
130 __kmp_nth, __kmp_all_nth));
137 if (!TCR_4(__kmp_init_gtid))
141 if (TCR_4(__kmp_gtid_mode) >= 3) {
142 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using TDATA\n"));
146 if (TCR_4(__kmp_gtid_mode) >= 2) {
147 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using keyed TLS\n"));
148 return __kmp_gtid_get_specific();
150 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id: using internal alg.\n"));
152 stack_addr = (
char *)&stack_data;
153 other_threads = __kmp_threads;
166 for (i = 0; i < __kmp_threads_capacity; i++) {
168 kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]);
172 stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize);
173 stack_base = (
char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase);
177 if (stack_addr <= stack_base) {
178 size_t stack_diff = stack_base - stack_addr;
180 if (stack_diff <= stack_size) {
187 KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() < 0 ||
188 __kmp_gtid_get_specific() == i);
196 (
"*** __kmp_get_global_thread_id: internal alg. failed to find "
197 "thread, using TLS\n"));
198 i = __kmp_gtid_get_specific();
209 if (!TCR_SYNC_PTR(other_threads[i]))
214 if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) {
215 KMP_FATAL(StackOverflow, i);
218 stack_base = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
219 if (stack_addr > stack_base) {
220 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr);
221 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
222 other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr -
225 TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize,
226 stack_base - stack_addr);
230 if (__kmp_storage_map) {
231 char *stack_end = (
char *)other_threads[i]->th.th_info.ds.ds_stackbase;
232 char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize;
233 __kmp_print_storage_map_gtid(i, stack_beg, stack_end,
234 other_threads[i]->th.th_info.ds.ds_stacksize,
235 "th_%d stack (refinement)", i);
240int __kmp_get_global_thread_id_reg() {
243 if (!__kmp_init_serial) {
247 if (TCR_4(__kmp_gtid_mode) >= 3) {
248 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using TDATA\n"));
252 if (TCR_4(__kmp_gtid_mode) >= 2) {
253 KA_TRACE(1000, (
"*** __kmp_get_global_thread_id_reg: using keyed TLS\n"));
254 gtid = __kmp_gtid_get_specific();
257 (
"*** __kmp_get_global_thread_id_reg: using internal alg.\n"));
258 gtid = __kmp_get_global_thread_id();
262 if (gtid == KMP_GTID_DNE) {
264 (
"__kmp_get_global_thread_id_reg: Encountered new root thread. "
265 "Registering a new gtid.\n"));
266 __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
267 if (!__kmp_init_serial) {
268 __kmp_do_serial_initialize();
269 gtid = __kmp_gtid_get_specific();
271 gtid = __kmp_register_root(FALSE);
273 __kmp_release_bootstrap_lock(&__kmp_initz_lock);
277 KMP_DEBUG_ASSERT(gtid >= 0);
283void __kmp_check_stack_overlap(kmp_info_t *th) {
285 char *stack_beg = NULL;
286 char *stack_end = NULL;
289 KA_TRACE(10, (
"__kmp_check_stack_overlap: called\n"));
290 if (__kmp_storage_map) {
291 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
292 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
294 gtid = __kmp_gtid_from_thread(th);
296 if (gtid == KMP_GTID_MONITOR) {
297 __kmp_print_storage_map_gtid(
298 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
299 "th_%s stack (%s)",
"mon",
300 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
302 __kmp_print_storage_map_gtid(
303 gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize,
304 "th_%d stack (%s)", gtid,
305 (th->th.th_info.ds.ds_stackgrow) ?
"initial" :
"actual");
311 gtid = __kmp_gtid_from_thread(th);
312 if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) {
314 (
"__kmp_check_stack_overlap: performing extensive checking\n"));
315 if (stack_beg == NULL) {
316 stack_end = (
char *)th->th.th_info.ds.ds_stackbase;
317 stack_beg = stack_end - th->th.th_info.ds.ds_stacksize;
320 for (f = 0; f < __kmp_threads_capacity; f++) {
321 kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]);
323 if (f_th && f_th != th) {
324 char *other_stack_end =
325 (
char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase);
326 char *other_stack_beg =
327 other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize);
328 if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) ||
329 (stack_end > other_stack_beg && stack_end < other_stack_end)) {
332 if (__kmp_storage_map)
333 __kmp_print_storage_map_gtid(
334 -1, other_stack_beg, other_stack_end,
335 (
size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize),
336 "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th));
338 __kmp_fatal(KMP_MSG(StackOverlap), KMP_HNT(ChangeStackLimit),
344 KA_TRACE(10, (
"__kmp_check_stack_overlap: returning\n"));
349void __kmp_infinite_loop(
void) {
350 static int done = FALSE;
357#define MAX_MESSAGE 512
359void __kmp_print_storage_map_gtid(
int gtid,
void *p1,
void *p2,
size_t size,
360 char const *format, ...) {
361 char buffer[MAX_MESSAGE];
364 va_start(ap, format);
365 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP storage map: %p %p%8lu %s\n", p1,
366 p2, (
unsigned long)size, format);
367 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
368 __kmp_vprintf(kmp_err, buffer, ap);
369#if KMP_PRINT_DATA_PLACEMENT
372 if (p1 <= p2 && (
char *)p2 - (
char *)p1 == size) {
373 if (__kmp_storage_map_verbose) {
374 node = __kmp_get_host_node(p1);
376 __kmp_storage_map_verbose = FALSE;
380 int localProc = __kmp_get_cpu_from_gtid(gtid);
382 const int page_size = KMP_GET_PAGE_SIZE();
384 p1 = (
void *)((
size_t)p1 & ~((size_t)page_size - 1));
385 p2 = (
void *)(((
size_t)p2 - 1) & ~((
size_t)page_size - 1));
387 __kmp_printf_no_lock(
" GTID %d localNode %d\n", gtid,
390 __kmp_printf_no_lock(
" GTID %d\n", gtid);
399 (
char *)p1 += page_size;
400 }
while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode);
401 __kmp_printf_no_lock(
" %p-%p memNode %d\n", last, (
char *)p1 - 1,
405 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p1,
406 (
char *)p1 + (page_size - 1),
407 __kmp_get_host_node(p1));
409 __kmp_printf_no_lock(
" %p-%p memNode %d\n", p2,
410 (
char *)p2 + (page_size - 1),
411 __kmp_get_host_node(p2));
417 __kmp_printf_no_lock(
" %s\n", KMP_I18N_STR(StorageMapWarning));
420 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
425void __kmp_warn(
char const *format, ...) {
426 char buffer[MAX_MESSAGE];
429 if (__kmp_generate_warnings == kmp_warnings_off) {
433 va_start(ap, format);
435 KMP_SNPRINTF(buffer,
sizeof(buffer),
"OMP warning: %s\n", format);
436 __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
437 __kmp_vprintf(kmp_err, buffer, ap);
438 __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
443void __kmp_abort_process() {
445 __kmp_acquire_bootstrap_lock(&__kmp_exit_lock);
447 if (__kmp_debug_buf) {
448 __kmp_dump_debug_buffer();
454 __kmp_global.g.g_abort = SIGABRT;
468 __kmp_unregister_library();
472 __kmp_infinite_loop();
473 __kmp_release_bootstrap_lock(&__kmp_exit_lock);
477void __kmp_abort_thread(
void) {
480 __kmp_infinite_loop();
486static void __kmp_print_thread_storage_map(kmp_info_t *thr,
int gtid) {
487 __kmp_print_storage_map_gtid(gtid, thr, thr + 1,
sizeof(kmp_info_t),
"th_%d",
490 __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team,
491 sizeof(kmp_desc_t),
"th_%d.th_info", gtid);
493 __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head,
494 sizeof(kmp_local_t),
"th_%d.th_local", gtid);
496 __kmp_print_storage_map_gtid(
497 gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier],
498 sizeof(kmp_balign_t) * bs_last_barrier,
"th_%d.th_bar", gtid);
500 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier],
501 &thr->th.th_bar[bs_plain_barrier + 1],
502 sizeof(kmp_balign_t),
"th_%d.th_bar[plain]",
505 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier],
506 &thr->th.th_bar[bs_forkjoin_barrier + 1],
507 sizeof(kmp_balign_t),
"th_%d.th_bar[forkjoin]",
510#if KMP_FAST_REDUCTION_BARRIER
511 __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier],
512 &thr->th.th_bar[bs_reduction_barrier + 1],
513 sizeof(kmp_balign_t),
"th_%d.th_bar[reduction]",
521static void __kmp_print_team_storage_map(
const char *header, kmp_team_t *team,
522 int team_id,
int num_thr) {
523 int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2;
524 __kmp_print_storage_map_gtid(-1, team, team + 1,
sizeof(kmp_team_t),
"%s_%d",
527 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0],
528 &team->t.t_bar[bs_last_barrier],
529 sizeof(kmp_balign_team_t) * bs_last_barrier,
530 "%s_%d.t_bar", header, team_id);
532 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier],
533 &team->t.t_bar[bs_plain_barrier + 1],
534 sizeof(kmp_balign_team_t),
"%s_%d.t_bar[plain]",
537 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier],
538 &team->t.t_bar[bs_forkjoin_barrier + 1],
539 sizeof(kmp_balign_team_t),
540 "%s_%d.t_bar[forkjoin]", header, team_id);
542#if KMP_FAST_REDUCTION_BARRIER
543 __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier],
544 &team->t.t_bar[bs_reduction_barrier + 1],
545 sizeof(kmp_balign_team_t),
546 "%s_%d.t_bar[reduction]", header, team_id);
549 __kmp_print_storage_map_gtid(
550 -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr],
551 sizeof(kmp_disp_t) * num_thr,
"%s_%d.t_dispatch", header, team_id);
553 __kmp_print_storage_map_gtid(
554 -1, &team->t.t_threads[0], &team->t.t_threads[num_thr],
555 sizeof(kmp_info_t *) * num_thr,
"%s_%d.t_threads", header, team_id);
557 __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0],
558 &team->t.t_disp_buffer[num_disp_buff],
559 sizeof(dispatch_shared_info_t) * num_disp_buff,
560 "%s_%d.t_disp_buffer", header, team_id);
563static void __kmp_init_allocator() {
564 __kmp_init_memkind();
565 __kmp_init_target_mem();
567static void __kmp_fini_allocator() { __kmp_fini_memkind(); }
571#if ENABLE_LIBOMPTARGET
572static void __kmp_init_omptarget() {
573 __kmp_init_target_task();
582BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) {
587 case DLL_PROCESS_ATTACH:
588 KA_TRACE(10, (
"DllMain: PROCESS_ATTACH\n"));
592 case DLL_PROCESS_DETACH:
593 KA_TRACE(10, (
"DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific()));
606 if (lpReserved == NULL)
607 __kmp_internal_end_library(__kmp_gtid_get_specific());
611 case DLL_THREAD_ATTACH:
612 KA_TRACE(10, (
"DllMain: THREAD_ATTACH\n"));
618 case DLL_THREAD_DETACH:
619 KA_TRACE(10, (
"DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific()));
621 __kmp_internal_end_thread(__kmp_gtid_get_specific());
632void __kmp_parallel_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
633 int gtid = *gtid_ref;
634#ifdef BUILD_PARALLEL_ORDERED
635 kmp_team_t *team = __kmp_team_from_gtid(gtid);
638 if (__kmp_env_consistency_check) {
639 if (__kmp_threads[gtid]->th.th_root->r.r_active)
640#if KMP_USE_DYNAMIC_LOCK
641 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0);
643 __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL);
646#ifdef BUILD_PARALLEL_ORDERED
647 if (!team->t.t_serialized) {
649 KMP_WAIT(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), KMP_EQ,
657void __kmp_parallel_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
658 int gtid = *gtid_ref;
659#ifdef BUILD_PARALLEL_ORDERED
660 int tid = __kmp_tid_from_gtid(gtid);
661 kmp_team_t *team = __kmp_team_from_gtid(gtid);
664 if (__kmp_env_consistency_check) {
665 if (__kmp_threads[gtid]->th.th_root->r.r_active)
666 __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref);
668#ifdef BUILD_PARALLEL_ORDERED
669 if (!team->t.t_serialized) {
674 team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc);
684int __kmp_enter_single(
int gtid,
ident_t *id_ref,
int push_ws) {
689 if (!TCR_4(__kmp_init_parallel))
690 __kmp_parallel_initialize();
691 __kmp_resume_if_soft_paused();
693 th = __kmp_threads[gtid];
694 team = th->th.th_team;
697 th->th.th_ident = id_ref;
699 if (team->t.t_serialized) {
702 kmp_int32 old_this = th->th.th_local.this_construct;
704 ++th->th.th_local.this_construct;
708 if (team->t.t_construct == old_this) {
709 status = __kmp_atomic_compare_store_acq(&team->t.t_construct, old_this,
710 th->th.th_local.this_construct);
713 if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 &&
714 KMP_MASTER_GTID(gtid) && th->th.th_teams_microtask == NULL &&
715 team->t.t_active_level == 1) {
717 __kmp_itt_metadata_single(id_ref);
722 if (__kmp_env_consistency_check) {
723 if (status && push_ws) {
724 __kmp_push_workshare(gtid, ct_psingle, id_ref);
726 __kmp_check_workshare(gtid, ct_psingle, id_ref);
731 __kmp_itt_single_start(gtid);
737void __kmp_exit_single(
int gtid) {
739 __kmp_itt_single_end(gtid);
741 if (__kmp_env_consistency_check)
742 __kmp_pop_workshare(gtid, ct_psingle, NULL);
751static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team,
752 int master_tid,
int set_nthreads,
756 KMP_DEBUG_ASSERT(__kmp_init_serial);
757 KMP_DEBUG_ASSERT(root && parent_team);
758 kmp_info_t *this_thr = parent_team->t.t_threads[master_tid];
762 new_nthreads = set_nthreads;
763 if (!get__dynamic_2(parent_team, master_tid)) {
766#ifdef USE_LOAD_BALANCE
767 else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) {
768 new_nthreads = __kmp_load_balance_nproc(root, set_nthreads);
769 if (new_nthreads == 1) {
770 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
771 "reservation to 1 thread\n",
775 if (new_nthreads < set_nthreads) {
776 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d load balance reduced "
777 "reservation to %d threads\n",
778 master_tid, new_nthreads));
782 else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) {
783 new_nthreads = __kmp_avail_proc - __kmp_nth +
784 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
785 if (new_nthreads <= 1) {
786 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
787 "reservation to 1 thread\n",
791 if (new_nthreads < set_nthreads) {
792 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d thread limit reduced "
793 "reservation to %d threads\n",
794 master_tid, new_nthreads));
796 new_nthreads = set_nthreads;
798 }
else if (__kmp_global.g.g_dynamic_mode == dynamic_random) {
799 if (set_nthreads > 2) {
800 new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]);
801 new_nthreads = (new_nthreads % set_nthreads) + 1;
802 if (new_nthreads == 1) {
803 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
804 "reservation to 1 thread\n",
808 if (new_nthreads < set_nthreads) {
809 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d dynamic random reduced "
810 "reservation to %d threads\n",
811 master_tid, new_nthreads));
819 if (__kmp_nth + new_nthreads -
820 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
822 int tl_nthreads = __kmp_max_nth - __kmp_nth +
823 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
824 if (tl_nthreads <= 0) {
829 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
830 __kmp_reserve_warn = 1;
831 __kmp_msg(kmp_ms_warning,
832 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
833 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
835 if (tl_nthreads == 1) {
836 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT "
837 "reduced reservation to 1 thread\n",
841 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d KMP_DEVICE_THREAD_LIMIT reduced "
842 "reservation to %d threads\n",
843 master_tid, tl_nthreads));
844 new_nthreads = tl_nthreads;
848 int cg_nthreads = this_thr->th.th_cg_roots->cg_nthreads;
849 int max_cg_threads = this_thr->th.th_cg_roots->cg_thread_limit;
850 if (cg_nthreads + new_nthreads -
851 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
853 int tl_nthreads = max_cg_threads - cg_nthreads +
854 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc);
855 if (tl_nthreads <= 0) {
860 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
861 __kmp_reserve_warn = 1;
862 __kmp_msg(kmp_ms_warning,
863 KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads),
864 KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null);
866 if (tl_nthreads == 1) {
867 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT "
868 "reduced reservation to 1 thread\n",
872 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d OMP_THREAD_LIMIT reduced "
873 "reservation to %d threads\n",
874 master_tid, tl_nthreads));
875 new_nthreads = tl_nthreads;
881 capacity = __kmp_threads_capacity;
882 if (TCR_PTR(__kmp_threads[0]) == NULL) {
888 if (__kmp_enable_hidden_helper && !TCR_4(__kmp_init_hidden_helper_threads)) {
889 capacity -= __kmp_hidden_helper_threads_num;
891 if (__kmp_nth + new_nthreads -
892 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) >
895 int slotsRequired = __kmp_nth + new_nthreads -
896 (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) -
898 int slotsAdded = __kmp_expand_threads(slotsRequired);
899 if (slotsAdded < slotsRequired) {
901 new_nthreads -= (slotsRequired - slotsAdded);
902 KMP_ASSERT(new_nthreads >= 1);
905 if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) {
906 __kmp_reserve_warn = 1;
907 if (__kmp_tp_cached) {
908 __kmp_msg(kmp_ms_warning,
909 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
910 KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity),
911 KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null);
913 __kmp_msg(kmp_ms_warning,
914 KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads),
915 KMP_HNT(SystemLimitOnThreads), __kmp_msg_null);
922 if (new_nthreads == 1) {
924 (
"__kmp_reserve_threads: T#%d serializing team after reclaiming "
925 "dead roots and rechecking; requested %d threads\n",
926 __kmp_get_gtid(), set_nthreads));
928 KC_TRACE(10, (
"__kmp_reserve_threads: T#%d allocating %d threads; requested"
930 __kmp_get_gtid(), new_nthreads, set_nthreads));
939static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
940 kmp_info_t *master_th,
int master_gtid,
941 int fork_teams_workers) {
945 KA_TRACE(10, (
"__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc));
946 KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid());
950 master_th->th.th_info.ds.ds_tid = 0;
951 master_th->th.th_team = team;
952 master_th->th.th_team_nproc = team->t.t_nproc;
953 master_th->th.th_team_master = master_th;
954 master_th->th.th_team_serialized = FALSE;
955 master_th->th.th_dispatch = &team->t.t_dispatch[0];
958#if KMP_NESTED_HOT_TEAMS
960 kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams;
963 int level = team->t.t_active_level - 1;
964 if (master_th->th.th_teams_microtask) {
965 if (master_th->th.th_teams_size.nteams > 1) {
969 if (team->t.t_pkfn != (microtask_t)__kmp_teams_master &&
970 master_th->th.th_teams_level == team->t.t_level) {
975 if (level < __kmp_hot_teams_max_level) {
976 if (hot_teams[level].hot_team) {
978 KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team);
982 hot_teams[level].hot_team = team;
983 hot_teams[level].hot_team_nth = team->t.t_nproc;
990 use_hot_team = team == root->r.r_hot_team;
995 team->t.t_threads[0] = master_th;
996 __kmp_initialize_info(master_th, team, 0, master_gtid);
999 for (i = 1; i < team->t.t_nproc; i++) {
1002 kmp_info_t *thr = __kmp_allocate_thread(root, team, i);
1003 team->t.t_threads[i] = thr;
1004 KMP_DEBUG_ASSERT(thr);
1005 KMP_DEBUG_ASSERT(thr->th.th_team == team);
1007 KA_TRACE(20, (
"__kmp_fork_team_threads: T#%d(%d:%d) init arrived "
1008 "T#%d(%d:%d) join =%llu, plain=%llu\n",
1009 __kmp_gtid_from_tid(0, team), team->t.t_id, 0,
1010 __kmp_gtid_from_tid(i, team), team->t.t_id, i,
1011 team->t.t_bar[bs_forkjoin_barrier].b_arrived,
1012 team->t.t_bar[bs_plain_barrier].b_arrived));
1013 thr->th.th_teams_microtask = master_th->th.th_teams_microtask;
1014 thr->th.th_teams_level = master_th->th.th_teams_level;
1015 thr->th.th_teams_size = master_th->th.th_teams_size;
1018 kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar;
1019 for (b = 0; b < bs_last_barrier; ++b) {
1020 balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived;
1021 KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG);
1023 balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived;
1029#if KMP_AFFINITY_SUPPORTED
1033 if (!fork_teams_workers) {
1034 __kmp_partition_places(team);
1038 if (team->t.t_nproc > 1 &&
1039 __kmp_barrier_gather_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1040 team->t.b->update_num_threads(team->t.t_nproc);
1041 __kmp_add_threads_to_team(team, team->t.t_nproc);
1045 if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
1046 for (i = 0; i < team->t.t_nproc; i++) {
1047 kmp_info_t *thr = team->t.t_threads[i];
1048 if (thr->th.th_prev_num_threads != team->t.t_nproc ||
1049 thr->th.th_prev_level != team->t.t_level) {
1050 team->t.t_display_affinity = 1;
1059#if KMP_ARCH_X86 || KMP_ARCH_X86_64
1063inline static void propagateFPControl(kmp_team_t *team) {
1064 if (__kmp_inherit_fp_control) {
1065 kmp_int16 x87_fpu_control_word;
1069 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1070 __kmp_store_mxcsr(&mxcsr);
1071 mxcsr &= KMP_X86_MXCSR_MASK;
1082 KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word);
1083 KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr);
1086 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE);
1090 KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE);
1096inline static void updateHWFPControl(kmp_team_t *team) {
1097 if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) {
1100 kmp_int16 x87_fpu_control_word;
1102 __kmp_store_x87_fpu_control_word(&x87_fpu_control_word);
1103 __kmp_store_mxcsr(&mxcsr);
1104 mxcsr &= KMP_X86_MXCSR_MASK;
1106 if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) {
1107 __kmp_clear_x87_fpu_status_word();
1108 __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word);
1111 if (team->t.t_mxcsr != mxcsr) {
1112 __kmp_load_mxcsr(&team->t.t_mxcsr);
1117#define propagateFPControl(x) ((void)0)
1118#define updateHWFPControl(x) ((void)0)
1121static void __kmp_alloc_argv_entries(
int argc, kmp_team_t *team,
1126void __kmp_serialized_parallel(
ident_t *loc, kmp_int32 global_tid) {
1127 kmp_info_t *this_thr;
1128 kmp_team_t *serial_team;
1130 KC_TRACE(10, (
"__kmpc_serialized_parallel: called by T#%d\n", global_tid));
1137 if (!TCR_4(__kmp_init_parallel))
1138 __kmp_parallel_initialize();
1139 __kmp_resume_if_soft_paused();
1141 this_thr = __kmp_threads[global_tid];
1142 serial_team = this_thr->th.th_serial_team;
1145 KMP_DEBUG_ASSERT(serial_team);
1148 if (__kmp_tasking_mode != tskm_immediate_exec) {
1150 this_thr->th.th_task_team ==
1151 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]);
1152 KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] ==
1154 KA_TRACE(20, (
"__kmpc_serialized_parallel: T#%d pushing task_team %p / "
1155 "team %p, new task_team = NULL\n",
1156 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
1157 this_thr->th.th_task_team = NULL;
1160 kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind;
1161 if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1162 proc_bind = proc_bind_false;
1163 }
else if (proc_bind == proc_bind_default) {
1166 proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind;
1169 this_thr->th.th_set_proc_bind = proc_bind_default;
1172 this_thr->th.th_set_nproc = 0;
1175 ompt_data_t ompt_parallel_data = ompt_data_none;
1176 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1177 if (ompt_enabled.enabled &&
1178 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1180 ompt_task_info_t *parent_task_info;
1181 parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
1183 parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1184 if (ompt_enabled.ompt_callback_parallel_begin) {
1187 ompt_callbacks.ompt_callback(ompt_callback_parallel_begin)(
1188 &(parent_task_info->task_data), &(parent_task_info->frame),
1189 &ompt_parallel_data, team_size,
1190 ompt_parallel_invoker_program | ompt_parallel_team, codeptr);
1195 if (this_thr->th.th_team != serial_team) {
1197 int level = this_thr->th.th_team->t.t_level;
1199 if (serial_team->t.t_serialized) {
1202 kmp_team_t *new_team;
1204 __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1207 __kmp_allocate_team(this_thr->th.th_root, 1, 1,
1211 proc_bind, &this_thr->th.th_current_task->td_icvs,
1212 0 USE_NESTED_HOT_ARG(NULL));
1213 __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1214 KMP_ASSERT(new_team);
1217 new_team->t.t_threads[0] = this_thr;
1218 new_team->t.t_parent = this_thr->th.th_team;
1219 serial_team = new_team;
1220 this_thr->th.th_serial_team = serial_team;
1224 (
"__kmpc_serialized_parallel: T#%d allocated new serial team %p\n",
1225 global_tid, serial_team));
1233 (
"__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n",
1234 global_tid, serial_team));
1238 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1239 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1240 KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team);
1241 serial_team->t.t_ident = loc;
1242 serial_team->t.t_serialized = 1;
1243 serial_team->t.t_nproc = 1;
1244 serial_team->t.t_parent = this_thr->th.th_team;
1245 serial_team->t.t_sched.sched = this_thr->th.th_team->t.t_sched.sched;
1246 this_thr->th.th_team = serial_team;
1247 serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid;
1249 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d curtask=%p\n", global_tid,
1250 this_thr->th.th_current_task));
1251 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1);
1252 this_thr->th.th_current_task->td_flags.executing = 0;
1254 __kmp_push_current_task_to_thread(this_thr, serial_team, 0);
1259 copy_icvs(&this_thr->th.th_current_task->td_icvs,
1260 &this_thr->th.th_current_task->td_parent->td_icvs);
1264 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1265 this_thr->th.th_current_task->td_icvs.nproc =
1266 __kmp_nested_nth.nth[level + 1];
1269 if (__kmp_nested_proc_bind.used &&
1270 (level + 1 < __kmp_nested_proc_bind.used)) {
1271 this_thr->th.th_current_task->td_icvs.proc_bind =
1272 __kmp_nested_proc_bind.bind_types[level + 1];
1276 serial_team->t.t_pkfn = (microtask_t)(~0);
1278 this_thr->th.th_info.ds.ds_tid = 0;
1281 this_thr->th.th_team_nproc = 1;
1282 this_thr->th.th_team_master = this_thr;
1283 this_thr->th.th_team_serialized = 1;
1285 serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1;
1286 serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level;
1287 serial_team->t.t_def_allocator = this_thr->th.th_def_allocator;
1289 propagateFPControl(serial_team);
1292 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1293 if (!serial_team->t.t_dispatch->th_disp_buffer) {
1294 serial_team->t.t_dispatch->th_disp_buffer =
1295 (dispatch_private_info_t *)__kmp_allocate(
1296 sizeof(dispatch_private_info_t));
1298 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1305 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
1306 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
1307 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
1308 ++serial_team->t.t_serialized;
1309 this_thr->th.th_team_serialized = serial_team->t.t_serialized;
1312 int level = this_thr->th.th_team->t.t_level;
1315 if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) {
1316 this_thr->th.th_current_task->td_icvs.nproc =
1317 __kmp_nested_nth.nth[level + 1];
1319 serial_team->t.t_level++;
1320 KF_TRACE(10, (
"__kmpc_serialized_parallel: T#%d increasing nesting level "
1321 "of serial team %p to %d\n",
1322 global_tid, serial_team, serial_team->t.t_level));
1325 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch);
1327 dispatch_private_info_t *disp_buffer =
1328 (dispatch_private_info_t *)__kmp_allocate(
1329 sizeof(dispatch_private_info_t));
1330 disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer;
1331 serial_team->t.t_dispatch->th_disp_buffer = disp_buffer;
1333 this_thr->th.th_dispatch = serial_team->t.t_dispatch;
1337 KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
1341 if (__kmp_display_affinity) {
1342 if (this_thr->th.th_prev_level != serial_team->t.t_level ||
1343 this_thr->th.th_prev_num_threads != 1) {
1345 __kmp_aux_display_affinity(global_tid, NULL);
1346 this_thr->th.th_prev_level = serial_team->t.t_level;
1347 this_thr->th.th_prev_num_threads = 1;
1351 if (__kmp_env_consistency_check)
1352 __kmp_push_parallel(global_tid, NULL);
1354 serial_team->t.ompt_team_info.master_return_address = codeptr;
1355 if (ompt_enabled.enabled &&
1356 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
1357 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1358 OMPT_GET_FRAME_ADDRESS(0);
1360 ompt_lw_taskteam_t lw_taskteam;
1361 __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
1362 &ompt_parallel_data, codeptr);
1364 __ompt_lw_taskteam_link(&lw_taskteam, this_thr, 1);
1368 if (ompt_enabled.ompt_callback_implicit_task) {
1369 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1370 ompt_scope_begin, OMPT_CUR_TEAM_DATA(this_thr),
1371 OMPT_CUR_TASK_DATA(this_thr), 1, __kmp_tid_from_gtid(global_tid),
1372 ompt_task_implicit);
1373 OMPT_CUR_TASK_INFO(this_thr)->thread_num =
1374 __kmp_tid_from_gtid(global_tid);
1378 this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
1379 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr =
1380 OMPT_GET_FRAME_ADDRESS(0);
1386static inline bool __kmp_is_fork_in_teams(kmp_info_t *master_th,
1387 microtask_t microtask,
int level,
1388 int teams_level, kmp_va_list ap) {
1389 return (master_th->th.th_teams_microtask && ap &&
1390 microtask != (microtask_t)__kmp_teams_master && level == teams_level);
1395static inline bool __kmp_is_entering_teams(
int active_level,
int level,
1396 int teams_level, kmp_va_list ap) {
1397 return ((ap == NULL && active_level == 0) ||
1398 (ap && teams_level > 0 && teams_level == level));
1405__kmp_fork_in_teams(
ident_t *loc,
int gtid, kmp_team_t *parent_team,
1406 kmp_int32 argc, kmp_info_t *master_th, kmp_root_t *root,
1407 enum fork_context_e call_context, microtask_t microtask,
1408 launch_t invoker,
int master_set_numthreads,
int level,
1410 ompt_data_t ompt_parallel_data,
void *return_address,
1416 parent_team->t.t_ident = loc;
1417 __kmp_alloc_argv_entries(argc, parent_team, TRUE);
1418 parent_team->t.t_argc = argc;
1419 argv = (
void **)parent_team->t.t_argv;
1420 for (i = argc - 1; i >= 0; --i) {
1421 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1424 if (parent_team == master_th->th.th_serial_team) {
1427 KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1);
1429 if (call_context == fork_context_gnu) {
1432 parent_team->t.t_serialized--;
1437 parent_team->t.t_pkfn = microtask;
1442 void **exit_frame_p;
1443 ompt_data_t *implicit_task_data;
1444 ompt_lw_taskteam_t lw_taskteam;
1446 if (ompt_enabled.enabled) {
1447 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1448 &ompt_parallel_data, return_address);
1449 exit_frame_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
1451 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1455 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1456 if (ompt_enabled.ompt_callback_implicit_task) {
1457 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1458 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1459 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), implicit_task_data,
1460 1, OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1464 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1466 exit_frame_p = &dummy;
1472 parent_team->t.t_serialized--;
1475 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1476 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1477 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1486 if (ompt_enabled.enabled) {
1487 *exit_frame_p = NULL;
1488 OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
1489 if (ompt_enabled.ompt_callback_implicit_task) {
1490 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1491 ompt_scope_end, NULL, implicit_task_data, 1,
1492 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1494 ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1495 __ompt_lw_taskteam_unlink(master_th);
1496 if (ompt_enabled.ompt_callback_parallel_end) {
1497 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1498 &ompt_parallel_data, OMPT_CUR_TASK_DATA(master_th),
1499 OMPT_INVOKER(call_context) | ompt_parallel_team, return_address);
1501 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1507 parent_team->t.t_pkfn = microtask;
1508 parent_team->t.t_invoke = invoker;
1509 KMP_ATOMIC_INC(&root->r.r_in_parallel);
1510 parent_team->t.t_active_level++;
1511 parent_team->t.t_level++;
1512 parent_team->t.t_def_allocator = master_th->th.th_def_allocator;
1519 master_th->th.th_teams_size.nth = parent_team->t.t_nproc;
1522 if (ompt_enabled.enabled) {
1523 ompt_lw_taskteam_t lw_taskteam;
1524 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data,
1526 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 1,
true);
1531 if (master_set_numthreads) {
1532 if (master_set_numthreads <= master_th->th.th_teams_size.nth) {
1534 kmp_info_t **other_threads = parent_team->t.t_threads;
1537 int old_proc = master_th->th.th_teams_size.nth;
1538 if (__kmp_barrier_release_pattern[bs_forkjoin_barrier] == bp_dist_bar) {
1539 __kmp_resize_dist_barrier(parent_team, old_proc, master_set_numthreads);
1540 __kmp_add_threads_to_team(parent_team, master_set_numthreads);
1542 parent_team->t.t_nproc = master_set_numthreads;
1543 for (i = 0; i < master_set_numthreads; ++i) {
1544 other_threads[i]->th.th_team_nproc = master_set_numthreads;
1548 master_th->th.th_set_nproc = 0;
1552 if (__kmp_debugging) {
1553 int nth = __kmp_omp_num_threads(loc);
1555 master_set_numthreads = nth;
1561 kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind;
1563 kmp_proc_bind_t proc_bind_icv = proc_bind_default;
1564 if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) {
1565 proc_bind = proc_bind_false;
1568 if (proc_bind == proc_bind_default) {
1569 proc_bind = master_th->th.th_current_task->td_icvs.proc_bind;
1575 if ((level + 1 < __kmp_nested_proc_bind.used) &&
1576 (__kmp_nested_proc_bind.bind_types[level + 1] !=
1577 master_th->th.th_current_task->td_icvs.proc_bind)) {
1578 proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1];
1581 KMP_CHECK_UPDATE(parent_team->t.t_proc_bind, proc_bind);
1583 if (proc_bind_icv != proc_bind_default &&
1584 master_th->th.th_current_task->td_icvs.proc_bind != proc_bind_icv) {
1585 kmp_info_t **other_threads = parent_team->t.t_threads;
1586 for (i = 0; i < master_th->th.th_team_nproc; ++i) {
1587 other_threads[i]->th.th_current_task->td_icvs.proc_bind = proc_bind_icv;
1591 master_th->th.th_set_proc_bind = proc_bind_default;
1593#if USE_ITT_BUILD && USE_ITT_NOTIFY
1594 if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
1596 __kmp_forkjoin_frames_mode == 3 &&
1597 parent_team->t.t_active_level == 1
1598 && master_th->th.th_teams_size.nteams == 1) {
1599 kmp_uint64 tmp_time = __itt_get_timestamp();
1600 master_th->th.th_frame_time = tmp_time;
1601 parent_team->t.t_region_time = tmp_time;
1603 if (__itt_stack_caller_create_ptr) {
1604 KMP_DEBUG_ASSERT(parent_team->t.t_stack_id == NULL);
1606 parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
1609#if KMP_AFFINITY_SUPPORTED
1610 __kmp_partition_places(parent_team);
1613 KF_TRACE(10, (
"__kmp_fork_in_teams: before internal fork: root=%p, team=%p, "
1614 "master_th=%p, gtid=%d\n",
1615 root, parent_team, master_th, gtid));
1616 __kmp_internal_fork(loc, gtid, parent_team);
1617 KF_TRACE(10, (
"__kmp_fork_in_teams: after internal fork: root=%p, team=%p, "
1618 "master_th=%p, gtid=%d\n",
1619 root, parent_team, master_th, gtid));
1621 if (call_context == fork_context_gnu)
1625 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) invoke microtask = %p\n", gtid,
1626 parent_team->t.t_id, parent_team->t.t_pkfn));
1628 if (!parent_team->t.t_invoke(gtid)) {
1629 KMP_ASSERT2(0,
"cannot invoke microtask for PRIMARY thread");
1631 KA_TRACE(20, (
"__kmp_fork_in_teams: T#%d(%d:0) done microtask = %p\n", gtid,
1632 parent_team->t.t_id, parent_team->t.t_pkfn));
1635 KA_TRACE(20, (
"__kmp_fork_in_teams: parallel exit T#%d\n", gtid));
1642__kmp_serial_fork_call(
ident_t *loc,
int gtid,
enum fork_context_e call_context,
1643 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1644 kmp_info_t *master_th, kmp_team_t *parent_team,
1646 ompt_data_t *ompt_parallel_data,
void **return_address,
1647 ompt_data_t **parent_task_data,
1655#if KMP_OS_LINUX && \
1656 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
1659 void **args = (
void **)KMP_ALLOCA(argc *
sizeof(
void *));
1664 20, (
"__kmp_serial_fork_call: T#%d serializing parallel region\n", gtid));
1669 master_th->th.th_serial_team->t.t_pkfn = microtask;
1672 if (call_context == fork_context_intel) {
1674 master_th->th.th_serial_team->t.t_ident = loc;
1677 master_th->th.th_serial_team->t.t_level--;
1682 void **exit_frame_p;
1683 ompt_task_info_t *task_info;
1684 ompt_lw_taskteam_t lw_taskteam;
1686 if (ompt_enabled.enabled) {
1687 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1688 ompt_parallel_data, *return_address);
1690 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1692 task_info = OMPT_CUR_TASK_INFO(master_th);
1693 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1694 if (ompt_enabled.ompt_callback_implicit_task) {
1695 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1696 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1697 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1698 &(task_info->task_data), 1,
1699 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1703 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1705 exit_frame_p = &dummy;
1710 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1711 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1712 __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv
1721 if (ompt_enabled.enabled) {
1722 *exit_frame_p = NULL;
1723 if (ompt_enabled.ompt_callback_implicit_task) {
1724 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1725 ompt_scope_end, NULL, &(task_info->task_data), 1,
1726 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1728 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1729 __ompt_lw_taskteam_unlink(master_th);
1730 if (ompt_enabled.ompt_callback_parallel_end) {
1731 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1732 ompt_parallel_data, *parent_task_data,
1733 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1735 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1738 }
else if (microtask == (microtask_t)__kmp_teams_master) {
1739 KMP_DEBUG_ASSERT(master_th->th.th_team == master_th->th.th_serial_team);
1740 team = master_th->th.th_team;
1742 team->t.t_invoke = invoker;
1743 __kmp_alloc_argv_entries(argc, team, TRUE);
1744 team->t.t_argc = argc;
1745 argv = (
void **)team->t.t_argv;
1747 for (i = argc - 1; i >= 0; --i)
1748 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1750 for (i = 0; i < argc; ++i)
1752 argv[i] = parent_team->t.t_argv[i];
1760 if (ompt_enabled.enabled) {
1761 ompt_task_info_t *task_info = OMPT_CUR_TASK_INFO(master_th);
1762 if (ompt_enabled.ompt_callback_implicit_task) {
1763 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1764 ompt_scope_end, NULL, &(task_info->task_data), 0,
1765 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_initial);
1767 if (ompt_enabled.ompt_callback_parallel_end) {
1768 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1769 ompt_parallel_data, *parent_task_data,
1770 OMPT_INVOKER(call_context) | ompt_parallel_league,
1773 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1778 for (i = argc - 1; i >= 0; --i)
1779 *argv++ = va_arg(kmp_va_deref(ap),
void *);
1784 void **exit_frame_p;
1785 ompt_task_info_t *task_info;
1786 ompt_lw_taskteam_t lw_taskteam;
1787 ompt_data_t *implicit_task_data;
1789 if (ompt_enabled.enabled) {
1790 __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
1791 ompt_parallel_data, *return_address);
1792 __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
1794 task_info = OMPT_CUR_TASK_INFO(master_th);
1795 exit_frame_p = &(task_info->frame.exit_frame.ptr);
1798 implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
1799 if (ompt_enabled.ompt_callback_implicit_task) {
1800 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1801 ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
1802 implicit_task_data, 1, __kmp_tid_from_gtid(gtid),
1803 ompt_task_implicit);
1804 OMPT_CUR_TASK_INFO(master_th)->thread_num = __kmp_tid_from_gtid(gtid);
1808 master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
1810 exit_frame_p = &dummy;
1815 KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
1816 KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
1817 __kmp_invoke_microtask(microtask, gtid, 0, argc, args
1826 if (ompt_enabled.enabled) {
1827 *exit_frame_p = NULL;
1828 if (ompt_enabled.ompt_callback_implicit_task) {
1829 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
1830 ompt_scope_end, NULL, &(task_info->task_data), 1,
1831 OMPT_CUR_TASK_INFO(master_th)->thread_num, ompt_task_implicit);
1834 *ompt_parallel_data = *OMPT_CUR_TEAM_DATA(master_th);
1835 __ompt_lw_taskteam_unlink(master_th);
1836 if (ompt_enabled.ompt_callback_parallel_end) {
1837 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
1838 ompt_parallel_data, *parent_task_data,
1839 OMPT_INVOKER(call_context) | ompt_parallel_team, *return_address);
1841 master_th->th.ompt_thread_info.state = ompt_state_overhead;
1845 }
else if (call_context == fork_context_gnu) {
1847 if (ompt_enabled.enabled) {
1848 ompt_lw_taskteam_t lwt;
1849 __ompt_lw_taskteam_init(&lwt, master_th, gtid, ompt_parallel_data,
1852 lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
1853 __ompt_lw_taskteam_link(&lwt, master_th, 1);
1859 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1862 KMP_ASSERT2(call_context < fork_context_last,
1863 "__kmp_serial_fork_call: unknown fork_context parameter");
1866 KA_TRACE(20, (
"__kmp_serial_fork_call: T#%d serial exit\n", gtid));
1873int __kmp_fork_call(
ident_t *loc,
int gtid,
1874 enum fork_context_e call_context,
1875 kmp_int32 argc, microtask_t microtask, launch_t invoker,
1880 int master_this_cons;
1882 kmp_team_t *parent_team;
1883 kmp_info_t *master_th;
1887 int master_set_numthreads;
1888 int task_thread_limit = 0;
1892#if KMP_NESTED_HOT_TEAMS
1893 kmp_hot_team_ptr_t **p_hot_teams;
1896 KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
1899 KA_TRACE(20, (
"__kmp_fork_call: enter T#%d\n", gtid));
1900 if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) {
1903 void *dummy = KMP_ALLOCA(__kmp_stkpadding);
1905 if (__kmp_stkpadding > KMP_MAX_STKPADDING)
1906 __kmp_stkpadding += (short)((kmp_int64)dummy);
1912 if (!TCR_4(__kmp_init_parallel))
1913 __kmp_parallel_initialize();
1914 __kmp_resume_if_soft_paused();
1919 master_th = __kmp_threads[gtid];
1921 parent_team = master_th->th.th_team;
1922 master_tid = master_th->th.th_info.ds.ds_tid;
1923 master_this_cons = master_th->th.th_local.this_construct;
1924 root = master_th->th.th_root;
1925 master_active = root->r.r_active;
1926 master_set_numthreads = master_th->th.th_set_nproc;
1928 master_th->th.th_current_task->td_icvs.task_thread_limit;
1931 ompt_data_t ompt_parallel_data = ompt_data_none;
1932 ompt_data_t *parent_task_data;
1933 ompt_frame_t *ompt_frame;
1934 void *return_address = NULL;
1936 if (ompt_enabled.enabled) {
1937 __ompt_get_task_info_internal(0, NULL, &parent_task_data, &ompt_frame,
1939 return_address = OMPT_LOAD_RETURN_ADDRESS(gtid);