1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#include "lint.h"
30#include "thr_uberdata.h"
31#include <stddef.h>
32
33/*
34 * These symbols should not be exported from libc, but
35 * /lib/libm.so.2 references them.  libm needs to be fixed.
36 * Also, some older versions of the Studio compiler/debugger
37 * components reference them.  These need to be fixed, too.
38 */
39#pragma weak _thr_getspecific = thr_getspecific
40#pragma weak _thr_keycreate = thr_keycreate
41#pragma weak _thr_setspecific = thr_setspecific
42
43/*
44 * 128 million keys should be enough for anyone.
45 * This allocates half a gigabyte of memory for the keys themselves and
46 * half a gigabyte of memory for each thread that uses the largest key.
47 */
48#define	MAX_KEYS	0x08000000U
49
50int
51thr_keycreate(thread_key_t *pkey, void (*destructor)(void *))
52{
53	tsd_metadata_t *tsdm = &curthread->ul_uberdata->tsd_metadata;
54	void (**old_data)(void *) = NULL;
55	void (**new_data)(void *);
56	uint_t old_nkeys;
57	uint_t new_nkeys;
58
59	lmutex_lock(&tsdm->tsdm_lock);
60
61	/*
62	 * Unfortunately, pthread_getspecific() specifies that a
63	 * pthread_getspecific() on an allocated key upon which the
64	 * calling thread has not performed a pthread_setspecifc()
65	 * must return NULL.  Consider the following sequence:
66	 *
67	 *	pthread_key_create(&key);
68	 *	pthread_setspecific(key, datum);
69	 *	pthread_key_delete(&key);
70	 *	pthread_key_create(&key);
71	 *	val = pthread_getspecific(key);
72	 *
73	 * According to POSIX, if the deleted key is reused for the new
74	 * key returned by the second pthread_key_create(), then the
75	 * pthread_getspecific() in the above example must return NULL
76	 * (and not the stale datum).  The implementation is thus left
77	 * with two alternatives:
78	 *
79	 *  (1)	Reuse deleted keys.  If this is to be implemented optimally,
80	 *	it requires that pthread_key_create() somehow associate
81	 *	the value NULL with the new (reused) key for each thread.
82	 *	Keeping the hot path fast and lock-free induces substantial
83	 *	complexity on the implementation.
84	 *
85	 *  (2)	Never reuse deleted keys. This allows the pthread_getspecific()
86	 *	implementation to simply perform a check against the number
87	 *	of keys set by the calling thread, returning NULL if the
88	 *	specified key is larger than the highest set key.  This has
89	 *	the disadvantage of wasting memory (a program which simply
90	 *	loops calling pthread_key_create()/pthread_key_delete()
91	 *	will ultimately run out of memory), but permits an optimal
92	 *	pthread_getspecific() while allowing for simple key creation
93	 *	and deletion.
94	 *
95	 * All Solaris implementations have opted for (2).  Given the
96	 * ~10 years that this has been in the field, it is safe to assume
97	 * that applications don't loop creating and destroying keys; we
98	 * stick with (2).
99	 */
100	if (tsdm->tsdm_nused == (old_nkeys = tsdm->tsdm_nkeys)) {
101		/*
102		 * We need to allocate or double the number of keys.
103		 * tsdm->tsdm_nused must always be a power of two.
104		 */
105		if ((new_nkeys = (old_nkeys << 1)) == 0)
106			new_nkeys = 8;
107
108		if (new_nkeys > MAX_KEYS) {
109			lmutex_unlock(&tsdm->tsdm_lock);
110			return (EAGAIN);
111		}
112		if ((new_data = lmalloc(new_nkeys * sizeof (void *))) == NULL) {
113			lmutex_unlock(&tsdm->tsdm_lock);
114			return (ENOMEM);
115		}
116		if ((old_data = tsdm->tsdm_destro) == NULL) {
117			/* key == 0 is always invalid */
118			new_data[0] = TSD_UNALLOCATED;
119			tsdm->tsdm_nused = 1;
120		} else {
121			(void) memcpy(new_data, old_data,
122			    old_nkeys * sizeof (void *));
123		}
124		tsdm->tsdm_destro = new_data;
125		tsdm->tsdm_nkeys = new_nkeys;
126	}
127
128	*pkey = tsdm->tsdm_nused;
129	tsdm->tsdm_destro[tsdm->tsdm_nused++] = destructor;
130	lmutex_unlock(&tsdm->tsdm_lock);
131
132	if (old_data != NULL)
133		lfree(old_data, old_nkeys * sizeof (void *));
134
135	return (0);
136}
137
138#pragma weak _pthread_key_create = pthread_key_create
139int
140pthread_key_create(pthread_key_t *pkey, void (*destructor)(void *))
141{
142	return (thr_keycreate(pkey, destructor));
143}
144
145/*
146 * Same as thr_keycreate(), above, except that the key creation
147 * is performed only once.  This relies upon the fact that a key
148 * value of THR_ONCE_KEY is invalid, and requires that the key be
149 * allocated with a value of THR_ONCE_KEY before calling here.
150 * THR_ONCE_KEY and PTHREAD_ONCE_KEY_NP, defined in <thread.h>
151 * and <pthread.h> respectively, must have the same value.
152 * Example:
153 *
154 *	static pthread_key_t key = PTHREAD_ONCE_KEY_NP;
155 *	...
156 *	pthread_key_create_once_np(&key, destructor);
157 */
158#pragma weak pthread_key_create_once_np = thr_keycreate_once
159int
160thr_keycreate_once(thread_key_t *keyp, void (*destructor)(void *))
161{
162	static mutex_t key_lock = DEFAULTMUTEX;
163	thread_key_t key;
164	int error;
165
166	if (*keyp == THR_ONCE_KEY) {
167		lmutex_lock(&key_lock);
168		if (*keyp == THR_ONCE_KEY) {
169			error = thr_keycreate(&key, destructor);
170			if (error) {
171				lmutex_unlock(&key_lock);
172				return (error);
173			}
174			membar_producer();
175			*keyp = key;
176		}
177		lmutex_unlock(&key_lock);
178	}
179	membar_consumer();
180
181	return (0);
182}
183
184int
185pthread_key_delete(pthread_key_t key)
186{
187	tsd_metadata_t *tsdm = &curthread->ul_uberdata->tsd_metadata;
188
189	lmutex_lock(&tsdm->tsdm_lock);
190
191	if (key >= tsdm->tsdm_nused ||
192	    tsdm->tsdm_destro[key] == TSD_UNALLOCATED) {
193		lmutex_unlock(&tsdm->tsdm_lock);
194		return (EINVAL);
195	}
196
197	tsdm->tsdm_destro[key] = TSD_UNALLOCATED;
198	lmutex_unlock(&tsdm->tsdm_lock);
199
200	return (0);
201}
202
203/*
204 * Blessedly, the pthread_getspecific() interface is much better than the
205 * thr_getspecific() interface in that it cannot return an error status.
206 * Thus, if the key specified is bogus, pthread_getspecific()'s behavior
207 * is undefined.  As an added bonus (and as an artificat of not returning
208 * an error code), the requested datum is returned rather than stored
209 * through a parameter -- thereby avoiding the unnecessary store/load pair
210 * incurred by thr_getspecific().  Every once in a while, the Standards
211 * get it right -- but usually by accident.
212 */
213void *
214pthread_getspecific(pthread_key_t key)
215{
216	tsd_t *stsd;
217
218	/*
219	 * We are cycle-shaving in this function because some
220	 * applications make heavy use of it and one machine cycle
221	 * can make a measurable difference in performance.  This
222	 * is why we waste a little memory and allocate a NULL value
223	 * for the invalid key == 0 in curthread->ul_ftsd[0] rather
224	 * than adjusting the key by subtracting one.
225	 */
226	if (key < TSD_NFAST)
227		return (curthread->ul_ftsd[key]);
228
229	if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc)
230		return (stsd->tsd_data[key]);
231
232	return (NULL);
233}
234
235int
236thr_getspecific(thread_key_t key, void **valuep)
237{
238	tsd_t *stsd;
239
240	/*
241	 * Amazingly, some application code (and worse, some particularly
242	 * fugly Solaris library code) _relies_ on the fact that 0 is always
243	 * an invalid key.  To preserve this semantic, 0 is never returned
244	 * as a key from thr_/pthread_key_create(); we explicitly check
245	 * for it here and return EINVAL.
246	 */
247	if (key == 0)
248		return (EINVAL);
249
250	if (key < TSD_NFAST)
251		*valuep = curthread->ul_ftsd[key];
252	else if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc)
253		*valuep = stsd->tsd_data[key];
254	else
255		*valuep = NULL;
256
257	return (0);
258}
259
260/*
261 * We call thr_setspecific_slow() when the key specified
262 * is beyond the current thread's currently allocated range.
263 * This case is in a separate function because we want
264 * the compiler to optimize for the common case.
265 */
266static int
267thr_setspecific_slow(thread_key_t key, void *value)
268{
269	ulwp_t *self = curthread;
270	tsd_metadata_t *tsdm = &self->ul_uberdata->tsd_metadata;
271	tsd_t *stsd;
272	tsd_t *ntsd;
273	uint_t nkeys;
274
275	/*
276	 * It isn't necessary to grab locks in this path;
277	 * tsdm->tsdm_nused can only increase.
278	 */
279	if (key >= tsdm->tsdm_nused)
280		return (EINVAL);
281
282	/*
283	 * We would like to test (tsdm->tsdm_destro[key] == TSD_UNALLOCATED)
284	 * here but that would require acquiring tsdm->tsdm_lock and we
285	 * want to avoid locks in this path.
286	 *
287	 * We have a key which is (or at least _was_) valid.  If this key
288	 * is later deleted (or indeed, is deleted before we set the value),
289	 * we don't care; such a condition would indicate an application
290	 * race for which POSIX thankfully leaves the behavior unspecified.
291	 *
292	 * First, determine our new size.  To avoid allocating more than we
293	 * have to, continue doubling our size only until the new key fits.
294	 * stsd->tsd_nalloc must always be a power of two.
295	 */
296	nkeys = ((stsd = self->ul_stsd) != NULL)? stsd->tsd_nalloc : 8;
297	for (; key >= nkeys; nkeys <<= 1)
298		continue;
299
300	/*
301	 * Allocate the new TSD.
302	 */
303	if ((ntsd = lmalloc(nkeys * sizeof (void *))) == NULL)
304		return (ENOMEM);
305
306	if (stsd != NULL) {
307		/*
308		 * Copy the old TSD across to the new.
309		 */
310		(void) memcpy(ntsd, stsd, stsd->tsd_nalloc * sizeof (void *));
311		lfree(stsd, stsd->tsd_nalloc * sizeof (void *));
312	}
313
314	ntsd->tsd_nalloc = nkeys;
315	ntsd->tsd_data[key] = value;
316	self->ul_stsd = ntsd;
317
318	return (0);
319}
320
321int
322thr_setspecific(thread_key_t key, void *value)
323{
324	tsd_t *stsd;
325	int ret;
326	ulwp_t *self = curthread;
327
328	/*
329	 * See the comment in thr_getspecific(), above.
330	 */
331	if (key == 0)
332		return (EINVAL);
333
334	if (key < TSD_NFAST) {
335		curthread->ul_ftsd[key] = value;
336		return (0);
337	}
338
339	if ((stsd = curthread->ul_stsd) != NULL && key < stsd->tsd_nalloc) {
340		stsd->tsd_data[key] = value;
341		return (0);
342	}
343
344	/*
345	 * This is a critical region since we are dealing with memory
346	 * allocation and free. Similar protection required in tsd_free().
347	 */
348	enter_critical(self);
349	ret = thr_setspecific_slow(key, value);
350	exit_critical(self);
351	return (ret);
352}
353
354int
355pthread_setspecific(pthread_key_t key, const void *value)
356{
357	return (thr_setspecific(key, (void *)value));
358}
359
360/*
361 * Contract-private interface for java.  See PSARC/2003/159
362 *
363 * If the key falls within the TSD_NFAST range, return a non-negative
364 * offset that can be used by the caller to fetch the TSD data value
365 * directly out of the thread structure using %g7 (sparc) or %gs (x86).
366 * With the advent of TLS, %g7 and %gs are part of the ABI, even though
367 * the definition of the thread structure itself (ulwp_t) is private.
368 *
369 * We guarantee that the offset returned on sparc will fit within
370 * a SIMM13 field (that is, it is less than 2048).
371 *
372 * On failure (key is not in the TSD_NFAST range), return -1.
373 */
374ptrdiff_t
375_thr_slot_offset(thread_key_t key)
376{
377	if (key != 0 && key < TSD_NFAST)
378		return ((ptrdiff_t)offsetof(ulwp_t, ul_ftsd[key]));
379	return (-1);
380}
381
382/*
383 * This is called by _thrp_exit() to apply destructors to the thread's tsd.
384 */
385void
386tsd_exit()
387{
388	ulwp_t *self = curthread;
389	tsd_metadata_t *tsdm = &self->ul_uberdata->tsd_metadata;
390	thread_key_t key;
391	int recheck;
392	void *val;
393	void (*func)(void *);
394
395	lmutex_lock(&tsdm->tsdm_lock);
396
397	do {
398		recheck = 0;
399
400		for (key = 1; key < TSD_NFAST &&
401		    key < tsdm->tsdm_nused; key++) {
402			if ((func = tsdm->tsdm_destro[key]) != NULL &&
403			    func != TSD_UNALLOCATED &&
404			    (val = self->ul_ftsd[key]) != NULL) {
405				self->ul_ftsd[key] = NULL;
406				lmutex_unlock(&tsdm->tsdm_lock);
407				(*func)(val);
408				lmutex_lock(&tsdm->tsdm_lock);
409				recheck = 1;
410			}
411		}
412
413		if (self->ul_stsd == NULL)
414			continue;
415
416		/*
417		 * Any of these destructors could cause us to grow the number
418		 * TSD keys in the slow TSD; we cannot cache the slow TSD
419		 * pointer through this loop.
420		 */
421		for (; key < self->ul_stsd->tsd_nalloc &&
422		    key < tsdm->tsdm_nused; key++) {
423			if ((func = tsdm->tsdm_destro[key]) != NULL &&
424			    func != TSD_UNALLOCATED &&
425			    (val = self->ul_stsd->tsd_data[key]) != NULL) {
426				self->ul_stsd->tsd_data[key] = NULL;
427				lmutex_unlock(&tsdm->tsdm_lock);
428				(*func)(val);
429				lmutex_lock(&tsdm->tsdm_lock);
430				recheck = 1;
431			}
432		}
433	} while (recheck);
434
435	lmutex_unlock(&tsdm->tsdm_lock);
436
437	/*
438	 * We're done; if we have slow TSD, we need to free it.
439	 */
440	tsd_free(self);
441}
442
443void
444tsd_free(ulwp_t *ulwp)
445{
446	tsd_t *stsd;
447	ulwp_t *self = curthread;
448
449	enter_critical(self);
450	if ((stsd = ulwp->ul_stsd) != NULL)
451		lfree(stsd, stsd->tsd_nalloc * sizeof (void *));
452	ulwp->ul_stsd = NULL;
453	exit_critical(self);
454}
455