Skip to content

Commit ecca8ce

Browse files
committed
std: simplify TLS key creation on Windows
1 parent 4c3f8eb commit ecca8ce

File tree

8 files changed

+93
-173
lines changed

8 files changed

+93
-173
lines changed

library/std/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,7 @@
251251
#![feature(doc_notable_trait)]
252252
#![feature(dropck_eyepatch)]
253253
#![feature(exhaustive_patterns)]
254+
#![feature(inline_const)]
254255
#![feature(intra_doc_pointers)]
255256
#![cfg_attr(bootstrap, feature(label_break_value))]
256257
#![feature(lang_items)]

library/std/src/sys/sgx/thread_local_key.rs

-5
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,3 @@ pub unsafe fn get(key: Key) -> *mut u8 {
2121
pub unsafe fn destroy(key: Key) {
2222
Tls::destroy(AbiKey::from_usize(key))
2323
}
24-
25-
#[inline]
26-
pub fn requires_synchronized_create() -> bool {
27-
false
28-
}

library/std/src/sys/solid/thread_local_key.rs

-5
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,3 @@ pub unsafe fn get(_key: Key) -> *mut u8 {
1919
pub unsafe fn destroy(_key: Key) {
2020
panic!("should not be used on the solid target");
2121
}
22-
23-
#[inline]
24-
pub fn requires_synchronized_create() -> bool {
25-
panic!("should not be used on the solid target");
26-
}

library/std/src/sys/unix/thread_local_key.rs

+21-6
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,28 @@ pub type Key = libc::pthread_key_t;
66

77
#[inline]
88
pub unsafe fn create(dtor: Option<unsafe extern "C" fn(*mut u8)>) -> Key {
9+
let dtor = mem::transmute(dtor);
910
let mut key = 0;
10-
assert_eq!(libc::pthread_key_create(&mut key, mem::transmute(dtor)), 0);
11+
let r = libc::pthread_key_create(&mut key, dtor);
12+
assert_eq!(r, 0);
13+
14+
// POSIX allows the key created here to be 0, but `StaticKey` relies
15+
// on using 0 as a sentinel value to check who won the race to set the
16+
// shared TLS key. As far as I know, there is no guaranteed value that
17+
// cannot be returned as a posix_key_create key, so there is no value
18+
// we can initialize the inner key with to prove that it has not yet
19+
// been set. Therefore, we use this small trick to ensure the returned
20+
// key is not zero.
21+
if key == 0 {
22+
let mut new = 0;
23+
// Only check the creation result after deleting the old key to avoid
24+
// leaking it.
25+
let r_c = libc::pthread_key_create(&mut new, dtor);
26+
let r_d = libc::pthread_key_delete(key);
27+
assert_eq!(r_c, 0);
28+
debug_assert_eq!(r_d, 0);
29+
key = new;
30+
}
1131
key
1232
}
1333

@@ -27,8 +47,3 @@ pub unsafe fn destroy(key: Key) {
2747
let r = libc::pthread_key_delete(key);
2848
debug_assert_eq!(r, 0);
2949
}
30-
31-
#[inline]
32-
pub fn requires_synchronized_create() -> bool {
33-
false
34-
}

library/std/src/sys/unsupported/thread_local_key.rs

-5
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,3 @@ pub unsafe fn get(_key: Key) -> *mut u8 {
1919
pub unsafe fn destroy(_key: Key) {
2020
panic!("should not be used on this target");
2121
}
22-
23-
#[inline]
24-
pub fn requires_synchronized_create() -> bool {
25-
panic!("should not be used on this target");
26-
}

library/std/src/sys/windows/c.rs

+1
Original file line numberDiff line numberDiff line change
@@ -959,6 +959,7 @@ extern "system" {
959959
pub fn TlsAlloc() -> DWORD;
960960
pub fn TlsGetValue(dwTlsIndex: DWORD) -> LPVOID;
961961
pub fn TlsSetValue(dwTlsIndex: DWORD, lpTlsvalue: LPVOID) -> BOOL;
962+
pub fn TlsFree(dwTlsIndex: DWORD) -> BOOL;
962963
pub fn GetLastError() -> DWORD;
963964
pub fn QueryPerformanceFrequency(lpFrequency: *mut LARGE_INTEGER) -> BOOL;
964965
pub fn QueryPerformanceCounter(lpPerformanceCount: *mut LARGE_INTEGER) -> BOOL;
+66-110
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
use crate::mem::ManuallyDrop;
1+
use crate::mem;
22
use crate::ptr;
3-
use crate::sync::atomic::AtomicPtr;
4-
use crate::sync::atomic::Ordering::SeqCst;
3+
use crate::sync::atomic::{
4+
compiler_fence, AtomicPtr, AtomicUsize,
5+
Ordering::{Relaxed, Release},
6+
};
57
use crate::sys::c;
68

79
pub type Key = c::DWORD;
@@ -19,106 +21,93 @@ pub type Dtor = unsafe extern "C" fn(*mut u8);
1921
// somewhere to run arbitrary code on thread termination. With this in place
2022
// we'll be able to run anything we like, including all TLS destructors!
2123
//
22-
// To accomplish this feat, we perform a number of threads, all contained
23-
// within this module:
24-
//
25-
// * All TLS destructors are tracked by *us*, not the windows runtime. This
26-
// means that we have a global list of destructors for each TLS key that
27-
// we know about.
28-
// * When a thread exits, we run over the entire list and run dtors for all
29-
// non-null keys. This attempts to match Unix semantics in this regard.
30-
//
31-
// This ends up having the overhead of using a global list, having some
32-
// locks here and there, and in general just adding some more code bloat. We
33-
// attempt to optimize runtime by forgetting keys that don't have
34-
// destructors, but this only gets us so far.
24+
// Since the maximum number of keys is 1088 [3] and key values are always lower
25+
// than 1088 [4], we can just use a static array to store the destructor functions
26+
// and use the TLS key as index. This avoids all synchronization problems
27+
// encountered with linked lists or other kinds of storage.
3528
//
3629
// For more details and nitty-gritty, see the code sections below!
3730
//
3831
// [1]: https://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
39-
// [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base
40-
// /threading/thread_local_storage_win.cc#L42
32+
// [2]: https://github.com/ChromiumWebApps/chromium/blob/master/base/threading/thread_local_storage_win.cc#L42
33+
// [3]: https://learn.microsoft.com/en-us/windows/win32/procthread/thread-local-storage
34+
// [4]: https://learn.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-tlssetvalue
4135

42-
// -------------------------------------------------------------------------
43-
// Native bindings
44-
//
45-
// This section is just raw bindings to the native functions that Windows
46-
// provides, There's a few extra calls to deal with destructors.
36+
static DTORS: [AtomicPtr<()>; 1088] = [const { AtomicPtr::new(ptr::null_mut()) }; 1088];
37+
// The highest key that has a destructor associated with it. Used as an
38+
// optimization so we don't need to iterate over the whole array when there
39+
// are only a few keys.
40+
static HIGHEST: AtomicUsize = AtomicUsize::new(0);
4741

4842
#[inline]
4943
pub unsafe fn create(dtor: Option<Dtor>) -> Key {
5044
let key = c::TlsAlloc();
5145
assert!(key != c::TLS_OUT_OF_INDEXES);
52-
if let Some(f) = dtor {
53-
register_dtor(key, f);
46+
47+
if let Some(dtor) = dtor {
48+
DTORS[key as usize].store(mem::transmute::<Dtor, *mut ()>(dtor), Relaxed);
49+
HIGHEST.fetch_max(key as usize, Relaxed);
50+
// If the destructors are run in a signal handler running after this
51+
// code, we need to guarantee that the changes have been performed
52+
// before the handler is triggered.
53+
compiler_fence(Release);
5454
}
55-
key
55+
56+
// Ensure that the key is always non-null. Since key values are below
57+
// 1088, this cannot overflow.
58+
key + 1
5659
}
5760

5861
#[inline]
5962
pub unsafe fn set(key: Key, value: *mut u8) {
60-
let r = c::TlsSetValue(key, value as c::LPVOID);
63+
let r = c::TlsSetValue(key - 1, value as c::LPVOID);
6164
debug_assert!(r != 0);
6265
}
6366

6467
#[inline]
6568
pub unsafe fn get(key: Key) -> *mut u8 {
66-
c::TlsGetValue(key) as *mut u8
67-
}
68-
69-
#[inline]
70-
pub unsafe fn destroy(_key: Key) {
71-
rtabort!("can't destroy tls keys on windows")
69+
c::TlsGetValue(key - 1) as *mut u8
7270
}
7371

7472
#[inline]
75-
pub fn requires_synchronized_create() -> bool {
76-
true
77-
}
78-
79-
// -------------------------------------------------------------------------
80-
// Dtor registration
81-
//
82-
// Windows has no native support for running destructors so we manage our own
83-
// list of destructors to keep track of how to destroy keys. We then install a
84-
// callback later to get invoked whenever a thread exits, running all
85-
// appropriate destructors.
86-
//
87-
// Currently unregistration from this list is not supported. A destructor can be
88-
// registered but cannot be unregistered. There's various simplifying reasons
89-
// for doing this, the big ones being:
90-
//
91-
// 1. Currently we don't even support deallocating TLS keys, so normal operation
92-
// doesn't need to deallocate a destructor.
93-
// 2. There is no point in time where we know we can unregister a destructor
94-
// because it could always be getting run by some remote thread.
95-
//
96-
// Typically processes have a statically known set of TLS keys which is pretty
97-
// small, and we'd want to keep this memory alive for the whole process anyway
98-
// really.
99-
//
100-
// Perhaps one day we can fold the `Box` here into a static allocation,
101-
// expanding the `StaticKey` structure to contain not only a slot for the TLS
102-
// key but also a slot for the destructor queue on windows. An optimization for
103-
// another day!
104-
105-
static DTORS: AtomicPtr<Node> = AtomicPtr::new(ptr::null_mut());
106-
107-
struct Node {
108-
dtor: Dtor,
109-
key: Key,
110-
next: *mut Node,
73+
pub unsafe fn destroy(key: Key) {
74+
DTORS[(key - 1) as usize].store(ptr::null_mut(), Relaxed);
75+
let r = c::TlsFree(key - 1);
76+
// Use release ordering for the same reason as above.
77+
compiler_fence(Release);
78+
debug_assert!(r != 0);
11179
}
11280

113-
unsafe fn register_dtor(key: Key, dtor: Dtor) {
114-
let mut node = ManuallyDrop::new(Box::new(Node { key, dtor, next: ptr::null_mut() }));
81+
#[allow(dead_code)] // actually called below
82+
unsafe fn run_dtors() {
83+
let mut iterations = 5;
84+
while iterations != 0 {
85+
let mut any_run = false;
86+
// All keys have either been created by the current thread or must
87+
// have been propagated through other means of synchronization, so
88+
// we can just use relaxed ordering here and still observe all
89+
// changes relevant to us.
90+
let highest = HIGHEST.load(Relaxed);
91+
for (index, dtor) in DTORS[..highest].iter().enumerate() {
92+
let dtor = mem::transmute::<*mut (), Option<Dtor>>(dtor.load(Relaxed));
93+
if let Some(dtor) = dtor {
94+
let ptr = c::TlsGetValue(index as Key) as *mut u8;
95+
if !ptr.is_null() {
96+
let r = c::TlsSetValue(index as Key, ptr::null_mut());
97+
debug_assert!(r != 0);
98+
99+
(dtor)(ptr);
100+
any_run = true;
101+
}
102+
}
103+
}
115104

116-
let mut head = DTORS.load(SeqCst);
117-
loop {
118-
node.next = head;
119-
match DTORS.compare_exchange(head, &mut **node, SeqCst, SeqCst) {
120-
Ok(_) => return, // nothing to drop, we successfully added the node to the list
121-
Err(cur) => head = cur,
105+
iterations -= 1;
106+
// If no destructors where run, no new keys have been initialized,
107+
// so we are done. FIXME: Maybe use TLS to store the number of active
108+
// keys per thread.
109+
if !any_run {
110+
return;
122111
}
123112
}
124113
}
@@ -154,16 +143,6 @@ unsafe fn register_dtor(key: Key, dtor: Dtor) {
154143
// thread or a process "detaches" (exits). The process part happens for the
155144
// last thread and the thread part happens for any normal thread.
156145
//
157-
// # Ok, what's up with running all these destructors?
158-
//
159-
// This will likely need to be improved over time, but this function
160-
// attempts a "poor man's" destructor callback system. Once we've got a list
161-
// of what to run, we iterate over all keys, check their values, and then run
162-
// destructors if the values turn out to be non null (setting them to null just
163-
// beforehand). We do this a few times in a loop to basically match Unix
164-
// semantics. If we don't reach a fixed point after a short while then we just
165-
// inevitably leak something most likely.
166-
//
167146
// # The article mentions weird stuff about "/INCLUDE"?
168147
//
169148
// It sure does! Specifically we're talking about this quote:
@@ -213,26 +192,3 @@ unsafe extern "system" fn on_tls_callback(h: c::LPVOID, dwReason: c::DWORD, pv:
213192
#[cfg(not(target_env = "msvc"))]
214193
unsafe fn reference_tls_used() {}
215194
}
216-
217-
#[allow(dead_code)] // actually called above
218-
unsafe fn run_dtors() {
219-
let mut any_run = true;
220-
for _ in 0..5 {
221-
if !any_run {
222-
break;
223-
}
224-
any_run = false;
225-
let mut cur = DTORS.load(SeqCst);
226-
while !cur.is_null() {
227-
let ptr = c::TlsGetValue((*cur).key);
228-
229-
if !ptr.is_null() {
230-
c::TlsSetValue((*cur).key, ptr::null_mut());
231-
((*cur).dtor)(ptr as *mut _);
232-
any_run = true;
233-
}
234-
235-
cur = (*cur).next;
236-
}
237-
}
238-
}

library/std/src/sys_common/thread_local_key.rs

+4-42
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ mod tests;
5353

5454
use crate::sync::atomic::{self, AtomicUsize, Ordering};
5555
use crate::sys::thread_local_key as imp;
56-
use crate::sys_common::mutex::StaticMutex;
5756

5857
/// A type for TLS keys that are statically allocated.
5958
///
@@ -151,44 +150,9 @@ impl StaticKey {
151150
}
152151

153152
unsafe fn lazy_init(&self) -> usize {
154-
// Currently the Windows implementation of TLS is pretty hairy, and
155-
// it greatly simplifies creation if we just synchronize everything.
156-
//
157-
// Additionally a 0-index of a tls key hasn't been seen on windows, so
158-
// we just simplify the whole branch.
159-
if imp::requires_synchronized_create() {
160-
// We never call `INIT_LOCK.init()`, so it is UB to attempt to
161-
// acquire this mutex reentrantly!
162-
static INIT_LOCK: StaticMutex = StaticMutex::new();
163-
let _guard = INIT_LOCK.lock();
164-
let mut key = self.key.load(Ordering::SeqCst);
165-
if key == 0 {
166-
key = imp::create(self.dtor) as usize;
167-
self.key.store(key, Ordering::SeqCst);
168-
}
169-
rtassert!(key != 0);
170-
return key;
171-
}
172-
173-
// POSIX allows the key created here to be 0, but the compare_exchange
174-
// below relies on using 0 as a sentinel value to check who won the
175-
// race to set the shared TLS key. As far as I know, there is no
176-
// guaranteed value that cannot be returned as a posix_key_create key,
177-
// so there is no value we can initialize the inner key with to
178-
// prove that it has not yet been set. As such, we'll continue using a
179-
// value of 0, but with some gyrations to make sure we have a non-0
180-
// value returned from the creation routine.
181-
// FIXME: this is clearly a hack, and should be cleaned up.
182-
let key1 = imp::create(self.dtor);
183-
let key = if key1 != 0 {
184-
key1
185-
} else {
186-
let key2 = imp::create(self.dtor);
187-
imp::destroy(key1);
188-
key2
189-
};
190-
rtassert!(key != 0);
191-
match self.key.compare_exchange(0, key as usize, Ordering::SeqCst, Ordering::SeqCst) {
153+
let key = imp::create(self.dtor);
154+
debug_assert!(key != 0);
155+
match self.key.compare_exchange(0, key as usize, Ordering::AcqRel, Ordering::Acquire) {
192156
// The CAS succeeded, so we've created the actual key
193157
Ok(_) => key as usize,
194158
// If someone beat us to the punch, use their key instead
@@ -232,8 +196,6 @@ impl Key {
232196

233197
impl Drop for Key {
234198
fn drop(&mut self) {
235-
// Right now Windows doesn't support TLS key destruction, but this also
236-
// isn't used anywhere other than tests, so just leak the TLS key.
237-
// unsafe { imp::destroy(self.key) }
199+
unsafe { imp::destroy(self.key) }
238200
}
239201
}

0 commit comments

Comments
 (0)