LLVM OpenMP* Runtime Library
kmp_lock.cpp
1/*
2 * kmp_lock.cpp -- lock-related functions
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#include <stddef.h>
14#include <atomic>
15
16#include "kmp.h"
17#include "kmp_i18n.h"
18#include "kmp_io.h"
19#include "kmp_itt.h"
20#include "kmp_lock.h"
21#include "kmp_wait_release.h"
22#include "kmp_wrapper_getpid.h"
23
24#if KMP_USE_FUTEX
25#include <sys/syscall.h>
26#include <unistd.h>
27// We should really include <futex.h>, but that causes compatibility problems on
28// different Linux* OS distributions that either require that you include (or
29// break when you try to include) <pci/types.h>. Since all we need is the two
30// macros below (which are part of the kernel ABI, so can't change) we just
31// define the constants here and don't include <futex.h>
32#ifndef FUTEX_WAIT
33#define FUTEX_WAIT 0
34#endif
35#ifndef FUTEX_WAKE
36#define FUTEX_WAKE 1
37#endif
38#endif
39
40/* Implement spin locks for internal library use. */
41/* The algorithm implemented is Lamport's bakery lock [1974]. */
42
43void __kmp_validate_locks(void) {
44 int i;
45 kmp_uint32 x, y;
46
47 /* Check to make sure unsigned arithmetic does wraps properly */
48 x = ~((kmp_uint32)0) - 2;
49 y = x - 2;
50
51 for (i = 0; i < 8; ++i, ++x, ++y) {
52 kmp_uint32 z = (x - y);
53 KMP_ASSERT(z == 2);
54 }
55
56 KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0);
57}
58
59/* ------------------------------------------------------------------------ */
60/* test and set locks */
61
62// For the non-nested locks, we can only assume that the first 4 bytes were
63// allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel
64// compiler only allocates a 4 byte pointer on IA-32 architecture. On
65// Windows* OS on Intel(R) 64, we can assume that all 8 bytes were allocated.
66//
67// gcc reserves >= 8 bytes for nested locks, so we can assume that the
68// entire 8 bytes were allocated for nested locks on all 64-bit platforms.
69
70static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) {
71 return KMP_LOCK_STRIP(KMP_ATOMIC_LD_RLX(&lck->lk.poll)) - 1;
72}
73
74static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) {
75 return lck->lk.depth_locked != -1;
76}
77
78__forceinline static int
79__kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) {
80 KMP_MB();
81
82#ifdef USE_LOCK_PROFILE
83 kmp_uint32 curr = KMP_LOCK_STRIP(lck->lk.poll);
84 if ((curr != 0) && (curr != gtid + 1))
85 __kmp_printf("LOCK CONTENTION: %p\n", lck);
86/* else __kmp_printf( "." );*/
87#endif /* USE_LOCK_PROFILE */
88
89 kmp_int32 tas_free = KMP_LOCK_FREE(tas);
90 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
91
92 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
93 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
94 KMP_FSYNC_ACQUIRED(lck);
95 return KMP_LOCK_ACQUIRED_FIRST;
96 }
97
98 kmp_uint32 spins;
99 kmp_uint64 time;
100 KMP_FSYNC_PREPARE(lck);
101 KMP_INIT_YIELD(spins);
102 KMP_INIT_BACKOFF(time);
103 kmp_backoff_t backoff = __kmp_spin_backoff_params;
104 do {
105#if !KMP_HAVE_UMWAIT
106 __kmp_spin_backoff(&backoff);
107#else
108 if (!__kmp_tpause_enabled)
109 __kmp_spin_backoff(&backoff);
110#endif
111 KMP_YIELD_OVERSUB_ELSE_SPIN(spins, time);
112 } while (KMP_ATOMIC_LD_RLX(&lck->lk.poll) != tas_free ||
113 !__kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy));
114 KMP_FSYNC_ACQUIRED(lck);
115 return KMP_LOCK_ACQUIRED_FIRST;
116}
117
118int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
119 int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid);
120 return retval;
121}
122
123static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck,
124 kmp_int32 gtid) {
125 char const *const func = "omp_set_lock";
126 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
127 __kmp_is_tas_lock_nestable(lck)) {
128 KMP_FATAL(LockNestableUsedAsSimple, func);
129 }
130 if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) {
131 KMP_FATAL(LockIsAlreadyOwned, func);
132 }
133 return __kmp_acquire_tas_lock(lck, gtid);
134}
135
136int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) {
137 kmp_int32 tas_free = KMP_LOCK_FREE(tas);
138 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas);
139 if (KMP_ATOMIC_LD_RLX(&lck->lk.poll) == tas_free &&
140 __kmp_atomic_compare_store_acq(&lck->lk.poll, tas_free, tas_busy)) {
141 KMP_FSYNC_ACQUIRED(lck);
142 return TRUE;
143 }
144 return FALSE;
145}
146
147static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck,
148 kmp_int32 gtid) {
149 char const *const func = "omp_test_lock";
150 if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) &&
151 __kmp_is_tas_lock_nestable(lck)) {
152 KMP_FATAL(LockNestableUsedAsSimple, func);
153 }
154 return __kmp_test_tas_lock(lck, gtid);