1f4b3ec61Sdh /*
2f4b3ec61Sdh * CDDL HEADER START
3f4b3ec61Sdh *
4f4b3ec61Sdh * The contents of this file are subject to the terms of the
5f4b3ec61Sdh * Common Development and Distribution License (the "License").
6f4b3ec61Sdh * You may not use this file except in compliance with the License.
7f4b3ec61Sdh *
8f4b3ec61Sdh * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9f4b3ec61Sdh * or http://www.opensolaris.org/os/licensing.
10f4b3ec61Sdh * See the License for the specific language governing permissions
11f4b3ec61Sdh * and limitations under the License.
12f4b3ec61Sdh *
13f4b3ec61Sdh * When distributing Covered Code, include this CDDL HEADER in each
14f4b3ec61Sdh * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15f4b3ec61Sdh * If applicable, add the following below this CDDL HEADER, with the
16f4b3ec61Sdh * fields enclosed by brackets "[]" replaced with your own identifying
17f4b3ec61Sdh * information: Portions Copyright [yyyy] [name of copyright owner]
18f4b3ec61Sdh *
19f4b3ec61Sdh * CDDL HEADER END
20f4b3ec61Sdh */
21f4b3ec61Sdh
22f4b3ec61Sdh /*
230a0e9771SDarren Reed * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24f4b3ec61Sdh * Use is subject to license terms.
2584fe1120SDan McDonald * Copyright (c) 2017, Joyent, Inc. All rights reserved.
26f4b3ec61Sdh */
27f4b3ec61Sdh
28f4b3ec61Sdh #include <sys/param.h>
29f4b3ec61Sdh #include <sys/sysmacros.h>
30f4b3ec61Sdh #include <sys/vm.h>
31f4b3ec61Sdh #include <sys/proc.h>
32f4b3ec61Sdh #include <sys/tuneable.h>
33f4b3ec61Sdh #include <sys/systm.h>
34f4b3ec61Sdh #include <sys/cmn_err.h>
35f4b3ec61Sdh #include <sys/debug.h>
36f4b3ec61Sdh #include <sys/sdt.h>
37f4b3ec61Sdh #include <sys/mutex.h>
38f4b3ec61Sdh #include <sys/bitmap.h>
39f4b3ec61Sdh #include <sys/atomic.h>
4084fe1120SDan McDonald #include <sys/sunddi.h>
41f4b3ec61Sdh #include <sys/kobj.h>
42f4b3ec61Sdh #include <sys/disp.h>
43f4b3ec61Sdh #include <vm/seg_kmem.h>
44f4b3ec61Sdh #include <sys/zone.h>
45f4b3ec61Sdh #include <sys/netstack.h>
46f4b3ec61Sdh
47f4b3ec61Sdh /*
48f4b3ec61Sdh * What we use so that the zones framework can tell us about new zones,
49f4b3ec61Sdh * which we use to create new stacks.
50f4b3ec61Sdh */
51f4b3ec61Sdh static zone_key_t netstack_zone_key;
52f4b3ec61Sdh
53f4b3ec61Sdh static int netstack_initialized = 0;
54f4b3ec61Sdh
55f4b3ec61Sdh /*
56f4b3ec61Sdh * Track the registered netstacks.
57f4b3ec61Sdh * The global lock protects
58f4b3ec61Sdh * - ns_reg
59f4b3ec61Sdh * - the list starting at netstack_head and following the netstack_next
60f4b3ec61Sdh * pointers.
61f4b3ec61Sdh */
62f4b3ec61Sdh static kmutex_t netstack_g_lock;
63f4b3ec61Sdh
64f4b3ec61Sdh /*
65f4b3ec61Sdh * Registry of netstacks with their create/shutdown/destory functions.
66f4b3ec61Sdh */
67f4b3ec61Sdh static struct netstack_registry ns_reg[NS_MAX];
68f4b3ec61Sdh
69f4b3ec61Sdh /*
70f4b3ec61Sdh * Global list of existing stacks. We use this when a new zone with
71f4b3ec61Sdh * an exclusive IP instance is created.
72f4b3ec61Sdh *
73f4b3ec61Sdh * Note that in some cases a netstack_t needs to stay around after the zone
74f4b3ec61Sdh * has gone away. This is because there might be outstanding references
75f4b3ec61Sdh * (from TCP TIME_WAIT connections, IPsec state, etc). The netstack_t data
76f4b3ec61Sdh * structure and all the foo_stack_t's hanging off of it will be cleaned up
77f4b3ec61Sdh * when the last reference to it is dropped.
78f4b3ec61Sdh * However, the same zone might be rebooted. That is handled using the
79f4b3ec61Sdh * assumption that the zones framework picks a new zoneid each time a zone
80f4b3ec61Sdh * is (re)booted. We assert for that condition in netstack_zone_create().
81f4b3ec61Sdh * Thus the old netstack_t can take its time for things to time out.
82f4b3ec61Sdh */
83f4b3ec61Sdh static netstack_t *netstack_head;
84f4b3ec61Sdh
85f4b3ec61Sdh /*
86f4b3ec61Sdh * To support kstat_create_netstack() using kstat_zone_add we need
87f4b3ec61Sdh * to track both
88f4b3ec61Sdh * - all zoneids that use the global/shared stack
89f4b3ec61Sdh * - all kstats that have been added for the shared stack
90f4b3ec61Sdh */
91f4b3ec61Sdh struct shared_zone_list {
92f4b3ec61Sdh struct shared_zone_list *sz_next;
93f4b3ec61Sdh zoneid_t sz_zoneid;
94f4b3ec61Sdh };
95f4b3ec61Sdh
96f4b3ec61Sdh struct shared_kstat_list {
97f4b3ec61Sdh struct shared_kstat_list *sk_next;
98f4b3ec61Sdh kstat_t *sk_kstat;
99f4b3ec61Sdh };
100f4b3ec61Sdh
101f4b3ec61Sdh static kmutex_t netstack_shared_lock; /* protects the following two */
102f4b3ec61Sdh static struct shared_zone_list *netstack_shared_zones;
103f4b3ec61Sdh static struct shared_kstat_list *netstack_shared_kstats;
104f4b3ec61Sdh
105f4b3ec61Sdh static void *netstack_zone_create(zoneid_t zoneid);
106f4b3ec61Sdh static void netstack_zone_shutdown(zoneid_t zoneid, void *arg);
107f4b3ec61Sdh static void netstack_zone_destroy(zoneid_t zoneid, void *arg);
108f4b3ec61Sdh
109f4b3ec61Sdh static void netstack_shared_zone_add(zoneid_t zoneid);
110f4b3ec61Sdh static void netstack_shared_zone_remove(zoneid_t zoneid);
111f4b3ec61Sdh static void netstack_shared_kstat_add(kstat_t *ks);
112f4b3ec61Sdh static void netstack_shared_kstat_remove(kstat_t *ks);
113f4b3ec61Sdh
11423f4867fSnordmark typedef boolean_t applyfn_t(kmutex_t *, netstack_t *, int);
115f4b3ec61Sdh
116bd41d0a8Snordmark static void apply_all_netstacks(int, applyfn_t *);
117bd41d0a8Snordmark static void apply_all_modules(netstack_t *, applyfn_t *);
118bd41d0a8Snordmark static void apply_all_modules_reverse(netstack_t *, applyfn_t *);
119bd41d0a8Snordmark static boolean_t netstack_apply_create(kmutex_t *, netstack_t *, int);
120bd41d0a8Snordmark static boolean_t netstack_apply_shutdown(kmutex_t *, netstack_t *, int);
121bd41d0a8Snordmark static boolean_t netstack_apply_destroy(kmutex_t *, netstack_t *, int);
122bd41d0a8Snordmark static boolean_t wait_for_zone_creator(netstack_t *, kmutex_t *);
123bd41d0a8Snordmark static boolean_t wait_for_nms_inprogress(netstack_t *, nm_state_t *,
124bd41d0a8Snordmark kmutex_t *);
125bd41d0a8Snordmark
126*704ca705SDan McDonald static void netstack_hold_locked(netstack_t *);
127*704ca705SDan McDonald
12884fe1120SDan McDonald static ksema_t netstack_reap_limiter;
12984fe1120SDan McDonald /*
13084fe1120SDan McDonald * Hard-coded constant, but since this is not tunable in real-time, it seems
13184fe1120SDan McDonald * making it an /etc/system tunable is better than nothing.
13284fe1120SDan McDonald */
13384fe1120SDan McDonald uint_t netstack_outstanding_reaps = 1024;
13484fe1120SDan McDonald
135f4b3ec61Sdh void
netstack_init(void)136f4b3ec61Sdh netstack_init(void)
137f4b3ec61Sdh {
138f4b3ec61Sdh mutex_init(&netstack_g_lock, NULL, MUTEX_DEFAULT, NULL);
139f4b3ec61Sdh mutex_init(&netstack_shared_lock, NULL, MUTEX_DEFAULT, NULL);
140f4b3ec61Sdh
14184fe1120SDan McDonald sema_init(&netstack_reap_limiter, netstack_outstanding_reaps, NULL,
14284fe1120SDan McDonald SEMA_DRIVER, NULL);
14384fe1120SDan McDonald
144f4b3ec61Sdh netstack_initialized = 1;
145f4b3ec61Sdh
146f4b3ec61Sdh /*
147f4b3ec61Sdh * We want to be informed each time a zone is created or
148f4b3ec61Sdh * destroyed in the kernel, so we can maintain the
149f4b3ec61Sdh * stack instance information.
150f4b3ec61Sdh */
151f4b3ec61Sdh zone_key_create(&netstack_zone_key, netstack_zone_create,
152f4b3ec61Sdh netstack_zone_shutdown, netstack_zone_destroy);
153f4b3ec61Sdh }
154f4b3ec61Sdh
155f4b3ec61Sdh /*
156f4b3ec61Sdh * Register a new module with the framework.
157f4b3ec61Sdh * This registers interest in changes to the set of netstacks.
158f4b3ec61Sdh * The createfn and destroyfn are required, but the shutdownfn can be
159f4b3ec61Sdh * NULL.
160f4b3ec61Sdh * Note that due to the current zsd implementation, when the create
161f4b3ec61Sdh * function is called the zone isn't fully present, thus functions
162f4b3ec61Sdh * like zone_find_by_* will fail, hence the create function can not
163f4b3ec61Sdh * use many zones kernel functions including zcmn_err().
164f4b3ec61Sdh */
165f4b3ec61Sdh void
netstack_register(int moduleid,void * (* module_create)(netstackid_t,netstack_t *),void (* module_shutdown)(netstackid_t,void *),void (* module_destroy)(netstackid_t,void *))166f4b3ec61Sdh netstack_register(int moduleid,
167f4b3ec61Sdh void *(*module_create)(netstackid_t, netstack_t *),
168f4b3ec61Sdh void (*module_shutdown)(netstackid_t, void *),
169f4b3ec61Sdh void (*module_destroy)(netstackid_t, void *))
170f4b3ec61Sdh {
171f4b3ec61Sdh netstack_t *ns;
172f4b3ec61Sdh
173f4b3ec61Sdh ASSERT(netstack_initialized);
174f4b3ec61Sdh ASSERT(moduleid >= 0 && moduleid < NS_MAX);
175f4b3ec61Sdh ASSERT(module_create != NULL);
176f4b3ec61Sdh
177bd41d0a8Snordmark /*
178bd41d0a8Snordmark * Make instances created after this point in time run the create
179bd41d0a8Snordmark * callback.
180bd41d0a8Snordmark */
181f4b3ec61Sdh mutex_enter(&netstack_g_lock);
182f4b3ec61Sdh ASSERT(ns_reg[moduleid].nr_create == NULL);
183f4b3ec61Sdh ASSERT(ns_reg[moduleid].nr_flags == 0);
184f4b3ec61Sdh ns_reg[moduleid].nr_create = module_create;
185f4b3ec61Sdh ns_reg[moduleid].nr_shutdown = module_shutdown;
186f4b3ec61Sdh ns_reg[moduleid].nr_destroy = module_destroy;
187f4b3ec61Sdh ns_reg[moduleid].nr_flags = NRF_REGISTERED;
188f4b3ec61Sdh
189f4b3ec61Sdh /*
190f4b3ec61Sdh * Determine the set of stacks that exist before we drop the lock.
191bd41d0a8Snordmark * Set NSS_CREATE_NEEDED for each of those.
192f4b3ec61Sdh * netstacks which have been deleted will have NSS_CREATE_COMPLETED
193f4b3ec61Sdh * set, but check NSF_CLOSING to be sure.
194f4b3ec61Sdh */
195f4b3ec61Sdh for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
196bd41d0a8Snordmark nm_state_t *nms = &ns->netstack_m_state[moduleid];
197bd41d0a8Snordmark
198f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
199f4b3ec61Sdh if (!(ns->netstack_flags & NSF_CLOSING) &&
200bd41d0a8Snordmark (nms->nms_flags & NSS_CREATE_ALL) == 0) {
201bd41d0a8Snordmark nms->nms_flags |= NSS_CREATE_NEEDED;
202f4b3ec61Sdh DTRACE_PROBE2(netstack__create__needed,
203f4b3ec61Sdh netstack_t *, ns, int, moduleid);
204f4b3ec61Sdh }
205f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
206f4b3ec61Sdh }
207f4b3ec61Sdh mutex_exit(&netstack_g_lock);
208f4b3ec61Sdh
209f4b3ec61Sdh /*
210bd41d0a8Snordmark * At this point in time a new instance can be created or an instance
211bd41d0a8Snordmark * can be destroyed, or some other module can register or unregister.
212bd41d0a8Snordmark * Make sure we either run all the create functions for this moduleid
213bd41d0a8Snordmark * or we wait for any other creators for this moduleid.
214f4b3ec61Sdh */
215bd41d0a8Snordmark apply_all_netstacks(moduleid, netstack_apply_create);
216f4b3ec61Sdh }
217f4b3ec61Sdh
218f4b3ec61Sdh void
netstack_unregister(int moduleid)219f4b3ec61Sdh netstack_unregister(int moduleid)
220f4b3ec61Sdh {
221f4b3ec61Sdh netstack_t *ns;
222f4b3ec61Sdh
223f4b3ec61Sdh ASSERT(moduleid >= 0 && moduleid < NS_MAX);
224f4b3ec61Sdh
225f4b3ec61Sdh ASSERT(ns_reg[moduleid].nr_create != NULL);
226f4b3ec61Sdh ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
227f4b3ec61Sdh
228f4b3ec61Sdh mutex_enter(&netstack_g_lock);
229f4b3ec61Sdh /*
230f4b3ec61Sdh * Determine the set of stacks that exist before we drop the lock.
231bd41d0a8Snordmark * Set NSS_SHUTDOWN_NEEDED and NSS_DESTROY_NEEDED for each of those.
232bd41d0a8Snordmark * That ensures that when we return all the callbacks for existing
233bd41d0a8Snordmark * instances have completed. And since we set NRF_DYING no new
234bd41d0a8Snordmark * instances can use this module.
235f4b3ec61Sdh */
236f4b3ec61Sdh for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
237589efa95SRobert Mustacchi boolean_t created = B_FALSE;
238bd41d0a8Snordmark nm_state_t *nms = &ns->netstack_m_state[moduleid];
239bd41d0a8Snordmark
240f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
241589efa95SRobert Mustacchi
242589efa95SRobert Mustacchi /*
243589efa95SRobert Mustacchi * We need to be careful here. We could actually have a netstack
244589efa95SRobert Mustacchi * being created as we speak waiting for us to let go of this
245589efa95SRobert Mustacchi * lock to proceed. It may have set NSS_CREATE_NEEDED, but not
246589efa95SRobert Mustacchi * have gotten to the point of completing it yet. If
247589efa95SRobert Mustacchi * NSS_CREATE_NEEDED, we can safely just remove it here and
248589efa95SRobert Mustacchi * never create the module. However, if NSS_CREATE_INPROGRESS is
249589efa95SRobert Mustacchi * set, we need to still flag this module for shutdown and
250589efa95SRobert Mustacchi * deletion, just as though it had reached NSS_CREATE_COMPLETED.
251589efa95SRobert Mustacchi *
252589efa95SRobert Mustacchi * It is safe to do that because of two different guarantees
253589efa95SRobert Mustacchi * that exist in the system. The first is that before we do a
254589efa95SRobert Mustacchi * create, shutdown, or destroy, we ensure that nothing else is
255589efa95SRobert Mustacchi * in progress in the system for this netstack and wait for it
256589efa95SRobert Mustacchi * to complete. Secondly, because the zone is being created, we
257589efa95SRobert Mustacchi * know that the following call to apply_all_netstack will block
258589efa95SRobert Mustacchi * on the zone finishing its initialization.
259589efa95SRobert Mustacchi */
260589efa95SRobert Mustacchi if (nms->nms_flags & NSS_CREATE_NEEDED)
261589efa95SRobert Mustacchi nms->nms_flags &= ~NSS_CREATE_NEEDED;
262589efa95SRobert Mustacchi
263589efa95SRobert Mustacchi if (nms->nms_flags & NSS_CREATE_INPROGRESS ||
264589efa95SRobert Mustacchi nms->nms_flags & NSS_CREATE_COMPLETED)
265589efa95SRobert Mustacchi created = B_TRUE;
266589efa95SRobert Mustacchi
267589efa95SRobert Mustacchi if (ns_reg[moduleid].nr_shutdown != NULL && created &&
268bd41d0a8Snordmark (nms->nms_flags & NSS_CREATE_COMPLETED) &&
269bd41d0a8Snordmark (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
270bd41d0a8Snordmark nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
271f4b3ec61Sdh DTRACE_PROBE2(netstack__shutdown__needed,
272f4b3ec61Sdh netstack_t *, ns, int, moduleid);
273f4b3ec61Sdh }
274f4b3ec61Sdh if ((ns_reg[moduleid].nr_flags & NRF_REGISTERED) &&
275589efa95SRobert Mustacchi ns_reg[moduleid].nr_destroy != NULL && created &&
276bd41d0a8Snordmark (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
277bd41d0a8Snordmark nms->nms_flags |= NSS_DESTROY_NEEDED;
278f4b3ec61Sdh DTRACE_PROBE2(netstack__destroy__needed,
279f4b3ec61Sdh netstack_t *, ns, int, moduleid);
280f4b3ec61Sdh }
281f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
282f4b3ec61Sdh }
283bd41d0a8Snordmark /*
284bd41d0a8Snordmark * Prevent any new netstack from calling the registered create
285bd41d0a8Snordmark * function, while keeping the function pointers in place until the
286bd41d0a8Snordmark * shutdown and destroy callbacks are complete.
287bd41d0a8Snordmark */
288bd41d0a8Snordmark ns_reg[moduleid].nr_flags |= NRF_DYING;
289f4b3ec61Sdh mutex_exit(&netstack_g_lock);
290f4b3ec61Sdh
291bd41d0a8Snordmark apply_all_netstacks(moduleid, netstack_apply_shutdown);
292bd41d0a8Snordmark apply_all_netstacks(moduleid, netstack_apply_destroy);
293f4b3ec61Sdh
294f4b3ec61Sdh /*
295bd41d0a8Snordmark * Clear the nms_flags so that we can handle this module
296f4b3ec61Sdh * being loaded again.
297bd41d0a8Snordmark * Also remove the registered functions.
298f4b3ec61Sdh */
299f4b3ec61Sdh mutex_enter(&netstack_g_lock);
300bd41d0a8Snordmark ASSERT(ns_reg[moduleid].nr_flags & NRF_REGISTERED);
301bd41d0a8Snordmark ASSERT(ns_reg[moduleid].nr_flags & NRF_DYING);
302f4b3ec61Sdh for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
303bd41d0a8Snordmark nm_state_t *nms = &ns->netstack_m_state[moduleid];
304bd41d0a8Snordmark
305f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
306bd41d0a8Snordmark if (nms->nms_flags & NSS_DESTROY_COMPLETED) {
307bd41d0a8Snordmark nms->nms_flags = 0;
308f4b3ec61Sdh DTRACE_PROBE2(netstack__destroy__done,
309f4b3ec61Sdh netstack_t *, ns, int, moduleid);
310f4b3ec61Sdh }
311f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
312f4b3ec61Sdh }
313f4b3ec61Sdh
314f4b3ec61Sdh ns_reg[moduleid].nr_create = NULL;
315f4b3ec61Sdh ns_reg[moduleid].nr_shutdown = NULL;
316f4b3ec61Sdh ns_reg[moduleid].nr_destroy = NULL;
317f4b3ec61Sdh ns_reg[moduleid].nr_flags = 0;
318f4b3ec61Sdh mutex_exit(&netstack_g_lock);
319f4b3ec61Sdh }
320f4b3ec61Sdh
321f4b3ec61Sdh /*
322f4b3ec61Sdh * Lookup and/or allocate a netstack for this zone.
323f4b3ec61Sdh */
324f4b3ec61Sdh static void *
netstack_zone_create(zoneid_t zoneid)325f4b3ec61Sdh netstack_zone_create(zoneid_t zoneid)
326f4b3ec61Sdh {
327f4b3ec61Sdh netstackid_t stackid;
328f4b3ec61Sdh netstack_t *ns;
329f4b3ec61Sdh netstack_t **nsp;
330f4b3ec61Sdh zone_t *zone;
331f4b3ec61Sdh int i;
332f4b3ec61Sdh
333f4b3ec61Sdh ASSERT(netstack_initialized);
334f4b3ec61Sdh
335f4b3ec61Sdh zone = zone_find_by_id_nolock(zoneid);
336f4b3ec61Sdh ASSERT(zone != NULL);
337f4b3ec61Sdh
338f4b3ec61Sdh if (zone->zone_flags & ZF_NET_EXCL) {
339f4b3ec61Sdh stackid = zoneid;
340f4b3ec61Sdh } else {
341f4b3ec61Sdh /* Look for the stack instance for the global */
342f4b3ec61Sdh stackid = GLOBAL_NETSTACKID;
343f4b3ec61Sdh }
344f4b3ec61Sdh
345f4b3ec61Sdh /* Allocate even if it isn't needed; simplifies locking */
346f4b3ec61Sdh ns = (netstack_t *)kmem_zalloc(sizeof (netstack_t), KM_SLEEP);
347f4b3ec61Sdh
348f4b3ec61Sdh /* Look if there is a matching stack instance */
349f4b3ec61Sdh mutex_enter(&netstack_g_lock);
350f4b3ec61Sdh for (nsp = &netstack_head; *nsp != NULL;
351f4b3ec61Sdh nsp = &((*nsp)->netstack_next)) {
352f4b3ec61Sdh if ((*nsp)->netstack_stackid == stackid) {
353f4b3ec61Sdh /*
354f4b3ec61Sdh * Should never find a pre-existing exclusive stack
355f4b3ec61Sdh */
356854956ceSBryan Cantrill VERIFY(stackid == GLOBAL_NETSTACKID);
357f4b3ec61Sdh kmem_free(ns, sizeof (netstack_t));
358f4b3ec61Sdh ns = *nsp;
359f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
360f4b3ec61Sdh ns->netstack_numzones++;
361f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
362f4b3ec61Sdh mutex_exit(&netstack_g_lock);
363f4b3ec61Sdh DTRACE_PROBE1(netstack__inc__numzones,
364f4b3ec61Sdh netstack_t *, ns);
365f4b3ec61Sdh /* Record that we have a new shared stack zone */
366f4b3ec61Sdh netstack_shared_zone_add(zoneid);
367f4b3ec61Sdh zone->zone_netstack = ns;
368f4b3ec61Sdh return (ns);
369f4b3ec61Sdh }
370f4b3ec61Sdh }
371f4b3ec61Sdh /* Not found */
372f4b3ec61Sdh mutex_init(&ns->netstack_lock, NULL, MUTEX_DEFAULT, NULL);
373bd41d0a8Snordmark cv_init(&ns->netstack_cv, NULL, CV_DEFAULT, NULL);
374f4b3ec61Sdh ns->netstack_stackid = zoneid;
375f4b3ec61Sdh ns->netstack_numzones = 1;
376f4b3ec61Sdh ns->netstack_refcnt = 1; /* Decremented by netstack_zone_destroy */
377f4b3ec61Sdh ns->netstack_flags = NSF_UNINIT;
378f4b3ec61Sdh *nsp = ns;
379f4b3ec61Sdh zone->zone_netstack = ns;
380f4b3ec61Sdh
381bd41d0a8Snordmark mutex_enter(&ns->netstack_lock);
382bd41d0a8Snordmark /*
383bd41d0a8Snordmark * Mark this netstack as having a CREATE running so
384bd41d0a8Snordmark * any netstack_register/netstack_unregister waits for
385bd41d0a8Snordmark * the existing create callbacks to complete in moduleid order
386bd41d0a8Snordmark */
387bd41d0a8Snordmark ns->netstack_flags |= NSF_ZONE_CREATE;
388bd41d0a8Snordmark
389f4b3ec61Sdh /*
390f4b3ec61Sdh * Determine the set of module create functions that need to be
391f4b3ec61Sdh * called before we drop the lock.
392bd41d0a8Snordmark * Set NSS_CREATE_NEEDED for each of those.
393bd41d0a8Snordmark * Skip any with NRF_DYING set, since those are in the process of
394bd41d0a8Snordmark * going away, by checking for flags being exactly NRF_REGISTERED.
395f4b3ec61Sdh */
396f4b3ec61Sdh for (i = 0; i < NS_MAX; i++) {
397bd41d0a8Snordmark nm_state_t *nms = &ns->netstack_m_state[i];
398bd41d0a8Snordmark
399bd41d0a8Snordmark cv_init(&nms->nms_cv, NULL, CV_DEFAULT, NULL);
400bd41d0a8Snordmark
401bd41d0a8Snordmark if ((ns_reg[i].nr_flags == NRF_REGISTERED) &&
402bd41d0a8Snordmark (nms->nms_flags & NSS_CREATE_ALL) == 0) {
403bd41d0a8Snordmark nms->nms_flags |= NSS_CREATE_NEEDED;
404f4b3ec61Sdh DTRACE_PROBE2(netstack__create__needed,
405f4b3ec61Sdh netstack_t *, ns, int, i);
406f4b3ec61Sdh }
407f4b3ec61Sdh }
408bd41d0a8Snordmark mutex_exit(&ns->netstack_lock);
409f4b3ec61Sdh mutex_exit(&netstack_g_lock);
410f4b3ec61Sdh
411bd41d0a8Snordmark apply_all_modules(ns, netstack_apply_create);
412f4b3ec61Sdh
413bd41d0a8Snordmark /* Tell any waiting netstack_register/netstack_unregister to proceed */
414f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
415f4b3ec61Sdh ns->netstack_flags &= ~NSF_UNINIT;
416bd41d0a8Snordmark ASSERT(ns->netstack_flags & NSF_ZONE_CREATE);
417bd41d0a8Snordmark ns->netstack_flags &= ~NSF_ZONE_CREATE;
418bd41d0a8Snordmark cv_broadcast(&ns->netstack_cv);
419f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
420f4b3ec61Sdh
421f4b3ec61Sdh return (ns);
422f4b3ec61Sdh }
423f4b3ec61Sdh
424f4b3ec61Sdh /* ARGSUSED */
425f4b3ec61Sdh static void
netstack_zone_shutdown(zoneid_t zoneid,void * arg)426f4b3ec61Sdh netstack_zone_shutdown(zoneid_t zoneid, void *arg)
427f4b3ec61Sdh {
428f4b3ec61Sdh netstack_t *ns = (netstack_t *)arg;
429f4b3ec61Sdh int i;
430f4b3ec61Sdh
431f4b3ec61Sdh ASSERT(arg != NULL);
432f4b3ec61Sdh
433f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
434f4b3ec61Sdh ASSERT(ns->netstack_numzones > 0);
435f4b3ec61Sdh if (ns->netstack_numzones != 1) {
436f4b3ec61Sdh /* Stack instance being used by other zone */
437f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
438f4b3ec61Sdh ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
439f4b3ec61Sdh return;
440f4b3ec61Sdh }
441f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
442f4b3ec61Sdh
443f4b3ec61Sdh mutex_enter(&netstack_g_lock);
444bd41d0a8Snordmark mutex_enter(&ns->netstack_lock);
445bd41d0a8Snordmark /*
446bd41d0a8Snordmark * Mark this netstack as having a SHUTDOWN running so
447bd41d0a8Snordmark * any netstack_register/netstack_unregister waits for
448bd41d0a8Snordmark * the existing create callbacks to complete in moduleid order
449bd41d0a8Snordmark */
450bd41d0a8Snordmark ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
451bd41d0a8Snordmark ns->netstack_flags |= NSF_ZONE_SHUTDOWN;
452bd41d0a8Snordmark
453f4b3ec61Sdh /*
454f4b3ec61Sdh * Determine the set of stacks that exist before we drop the lock.
455bd41d0a8Snordmark * Set NSS_SHUTDOWN_NEEDED for each of those.
456f4b3ec61Sdh */
457f4b3ec61Sdh for (i = 0; i < NS_MAX; i++) {
458bd41d0a8Snordmark nm_state_t *nms = &ns->netstack_m_state[i];
459bd41d0a8Snordmark
460f4b3ec61Sdh if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
461f4b3ec61Sdh ns_reg[i].nr_shutdown != NULL &&
462bd41d0a8Snordmark (nms->nms_flags & NSS_CREATE_COMPLETED) &&
463bd41d0a8Snordmark (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
464bd41d0a8Snordmark nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
465f4b3ec61Sdh DTRACE_PROBE2(netstack__shutdown__needed,
466f4b3ec61Sdh netstack_t *, ns, int, i);
467f4b3ec61Sdh }
468f4b3ec61Sdh }
469bd41d0a8Snordmark mutex_exit(&ns->netstack_lock);
470f4b3ec61Sdh mutex_exit(&netstack_g_lock);
471f4b3ec61Sdh
47223f4867fSnordmark /*
47323f4867fSnordmark * Call the shutdown function for all registered modules for this
47423f4867fSnordmark * netstack.
47523f4867fSnordmark */
4767ddc9b1aSDarren Reed apply_all_modules_reverse(ns, netstack_apply_shutdown);
477bd41d0a8Snordmark
478bd41d0a8Snordmark /* Tell any waiting netstack_register/netstack_unregister to proceed */
479bd41d0a8Snordmark mutex_enter(&ns->netstack_lock);
480bd41d0a8Snordmark ASSERT(ns->netstack_flags & NSF_ZONE_SHUTDOWN);
481bd41d0a8Snordmark ns->netstack_flags &= ~NSF_ZONE_SHUTDOWN;
482bd41d0a8Snordmark cv_broadcast(&ns->netstack_cv);
483bd41d0a8Snordmark mutex_exit(&ns->netstack_lock);
484f4b3ec61Sdh }
485f4b3ec61Sdh
486f4b3ec61Sdh /*
487f4b3ec61Sdh * Common routine to release a zone.
488f4b3ec61Sdh * If this was the last zone using the stack instance then prepare to
489f4b3ec61Sdh * have the refcnt dropping to zero free the zone.
490f4b3ec61Sdh */
491f4b3ec61Sdh /* ARGSUSED */
492f4b3ec61Sdh static void
netstack_zone_destroy(zoneid_t zoneid,void * arg)493f4b3ec61Sdh netstack_zone_destroy(zoneid_t zoneid, void *arg)
494f4b3ec61Sdh {
495f4b3ec61Sdh netstack_t *ns = (netstack_t *)arg;
496f4b3ec61Sdh
497f4b3ec61Sdh ASSERT(arg != NULL);
498f4b3ec61Sdh
499f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
500f4b3ec61Sdh ASSERT(ns->netstack_numzones > 0);
501f4b3ec61Sdh ns->netstack_numzones--;
502f4b3ec61Sdh if (ns->netstack_numzones != 0) {
503f4b3ec61Sdh /* Stack instance being used by other zone */
504f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
505f4b3ec61Sdh ASSERT(ns->netstack_stackid == GLOBAL_NETSTACKID);
506f4b3ec61Sdh /* Record that we a shared stack zone has gone away */
507f4b3ec61Sdh netstack_shared_zone_remove(zoneid);
508f4b3ec61Sdh return;
509f4b3ec61Sdh }
510f4b3ec61Sdh /*
51123f4867fSnordmark * Set CLOSING so that netstack_find_by will not find it.
512f4b3ec61Sdh */
513f4b3ec61Sdh ns->netstack_flags |= NSF_CLOSING;
514f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
515f4b3ec61Sdh DTRACE_PROBE1(netstack__dec__numzones, netstack_t *, ns);
516f4b3ec61Sdh /* No other thread can call zone_destroy for this stack */
517f4b3ec61Sdh
518f4b3ec61Sdh /*
519f4b3ec61Sdh * Decrease refcnt to account for the one in netstack_zone_init()
520f4b3ec61Sdh */
521f4b3ec61Sdh netstack_rele(ns);
522f4b3ec61Sdh }
523f4b3ec61Sdh
524f4b3ec61Sdh /*
525f4b3ec61Sdh * Called when the reference count drops to zero.
526f4b3ec61Sdh * Call the destroy functions for each registered module.
527f4b3ec61Sdh */
528f4b3ec61Sdh static void
netstack_stack_inactive(netstack_t * ns)529f4b3ec61Sdh netstack_stack_inactive(netstack_t *ns)
530f4b3ec61Sdh {
531f4b3ec61Sdh int i;
532f4b3ec61Sdh
533f4b3ec61Sdh mutex_enter(&netstack_g_lock);
534bd41d0a8Snordmark mutex_enter(&ns->netstack_lock);
535bd41d0a8Snordmark /*
536bd41d0a8Snordmark * Mark this netstack as having a DESTROY running so
537bd41d0a8Snordmark * any netstack_register/netstack_unregister waits for
538bd41d0a8Snordmark * the existing destroy callbacks to complete in reverse moduleid order
539bd41d0a8Snordmark */
540bd41d0a8Snordmark ASSERT(!(ns->netstack_flags & NSF_ZONE_INPROGRESS));
541bd41d0a8Snordmark ns->netstack_flags |= NSF_ZONE_DESTROY;
542f4b3ec61Sdh /*
543f4b3ec61Sdh * If the shutdown callback wasn't called earlier (e.g., if this is
544bd41d0a8Snordmark * a netstack shared between multiple zones), then we schedule it now.
545bd41d0a8Snordmark *
546bd41d0a8Snordmark * Determine the set of stacks that exist before we drop the lock.
547bd41d0a8Snordmark * Set NSS_DESTROY_NEEDED for each of those. That
548bd41d0a8Snordmark * ensures that when we return all the callbacks for existing
549bd41d0a8Snordmark * instances have completed.
550f4b3ec61Sdh */
551f4b3ec61Sdh for (i = 0; i < NS_MAX; i++) {
552bd41d0a8Snordmark nm_state_t *nms = &ns->netstack_m_state[i];
553bd41d0a8Snordmark
554f4b3ec61Sdh if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
555f4b3ec61Sdh ns_reg[i].nr_shutdown != NULL &&
556bd41d0a8Snordmark (nms->nms_flags & NSS_CREATE_COMPLETED) &&
557bd41d0a8Snordmark (nms->nms_flags & NSS_SHUTDOWN_ALL) == 0) {
558bd41d0a8Snordmark nms->nms_flags |= NSS_SHUTDOWN_NEEDED;
559f4b3ec61Sdh DTRACE_PROBE2(netstack__shutdown__needed,
560f4b3ec61Sdh netstack_t *, ns, int, i);
561f4b3ec61Sdh }
562bd41d0a8Snordmark
563f4b3ec61Sdh if ((ns_reg[i].nr_flags & NRF_REGISTERED) &&
564f4b3ec61Sdh ns_reg[i].nr_destroy != NULL &&
565bd41d0a8Snordmark (nms->nms_flags & NSS_CREATE_COMPLETED) &&
566bd41d0a8Snordmark (nms->nms_flags & NSS_DESTROY_ALL) == 0) {
567bd41d0a8Snordmark nms->nms_flags |= NSS_DESTROY_NEEDED;
568f4b3ec61Sdh DTRACE_PROBE2(netstack__destroy__needed,
569f4b3ec61Sdh netstack_t *, ns, int, i);
570f4b3ec61Sdh }
571f4b3ec61Sdh }
572bd41d0a8Snordmark mutex_exit(&ns->netstack_lock);
573f4b3ec61Sdh mutex_exit(&netstack_g_lock);
574f4b3ec61Sdh
57523f4867fSnordmark /*
57623f4867fSnordmark * Call the shutdown and destroy functions for all registered modules
57723f4867fSnordmark * for this netstack.
578bd41d0a8Snordmark *
579bd41d0a8Snordmark * Since there are some ordering dependencies between the modules we
580bd41d0a8Snordmark * tear them down in the reverse order of what was used to create them.
581bd41d0a8Snordmark *
582bd41d0a8Snordmark * Since a netstack_t is never reused (when a zone is rebooted it gets
583bd41d0a8Snordmark * a new zoneid == netstackid i.e. a new netstack_t is allocated) we
584bd41d0a8Snordmark * leave nms_flags the way it is i.e. with NSS_DESTROY_COMPLETED set.
585bd41d0a8Snordmark * That is different than in the netstack_unregister() case.
58623f4867fSnordmark */
5877ddc9b1aSDarren Reed apply_all_modules_reverse(ns, netstack_apply_shutdown);
588bd41d0a8Snordmark apply_all_modules_reverse(ns, netstack_apply_destroy);
589bd41d0a8Snordmark
590bd41d0a8Snordmark /* Tell any waiting netstack_register/netstack_unregister to proceed */
591bd41d0a8Snordmark mutex_enter(&ns->netstack_lock);
592bd41d0a8Snordmark ASSERT(ns->netstack_flags & NSF_ZONE_DESTROY);
593bd41d0a8Snordmark ns->netstack_flags &= ~NSF_ZONE_DESTROY;
594bd41d0a8Snordmark cv_broadcast(&ns->netstack_cv);
595bd41d0a8Snordmark mutex_exit(&ns->netstack_lock);
596bd41d0a8Snordmark }
597bd41d0a8Snordmark
598bd41d0a8Snordmark /*
599bd41d0a8Snordmark * Apply a function to all netstacks for a particular moduleid.
600bd41d0a8Snordmark *
601bd41d0a8Snordmark * If there is any zone activity (due to a zone being created, shutdown,
602bd41d0a8Snordmark * or destroyed) we wait for that to complete before we proceed. This ensures
603bd41d0a8Snordmark * that the moduleids are processed in order when a zone is created or
604bd41d0a8Snordmark * destroyed.
605bd41d0a8Snordmark *
606bd41d0a8Snordmark * The applyfn has to drop netstack_g_lock if it does some work.
607bd41d0a8Snordmark * In that case we don't follow netstack_next,
608bd41d0a8Snordmark * even if it is possible to do so without any hazards. This is
609bd41d0a8Snordmark * because we want the design to allow for the list of netstacks threaded
610bd41d0a8Snordmark * by netstack_next to change in any arbitrary way during the time the
611bd41d0a8Snordmark * lock was dropped.
612bd41d0a8Snordmark *
613bd41d0a8Snordmark * It is safe to restart the loop at netstack_head since the applyfn
614bd41d0a8Snordmark * changes netstack_m_state as it processes things, so a subsequent
615bd41d0a8Snordmark * pass through will have no effect in applyfn, hence the loop will terminate
616bd41d0a8Snordmark * in at worst O(N^2).
617bd41d0a8Snordmark */
618bd41d0a8Snordmark static void
apply_all_netstacks(int moduleid,applyfn_t * applyfn)619bd41d0a8Snordmark apply_all_netstacks(int moduleid, applyfn_t *applyfn)
620bd41d0a8Snordmark {
621bd41d0a8Snordmark netstack_t *ns;
622bd41d0a8Snordmark
623bd41d0a8Snordmark mutex_enter(&netstack_g_lock);
624bd41d0a8Snordmark ns = netstack_head;
625bd41d0a8Snordmark while (ns != NULL) {
626bd41d0a8Snordmark if (wait_for_zone_creator(ns, &netstack_g_lock)) {
627bd41d0a8Snordmark /* Lock dropped - restart at head */
628bd41d0a8Snordmark ns = netstack_head;
629bd41d0a8Snordmark } else if ((applyfn)(&netstack_g_lock, ns, moduleid)) {
630bd41d0a8Snordmark /* Lock dropped - restart at head */
631bd41d0a8Snordmark ns = netstack_head;
632bd41d0a8Snordmark } else {
633bd41d0a8Snordmark ns = ns->netstack_next;
634bd41d0a8Snordmark }
635bd41d0a8Snordmark }
636bd41d0a8Snordmark mutex_exit(&netstack_g_lock);
637bd41d0a8Snordmark }
638bd41d0a8Snordmark
639bd41d0a8Snordmark /*
640bd41d0a8Snordmark * Apply a function to all moduleids for a particular netstack.
641bd41d0a8Snordmark *
642bd41d0a8Snordmark * Since the netstack linkage doesn't matter in this case we can
643bd41d0a8Snordmark * ignore whether the function drops the lock.
644bd41d0a8Snordmark */
645bd41d0a8Snordmark static void
apply_all_modules(netstack_t * ns,applyfn_t * applyfn)646bd41d0a8Snordmark apply_all_modules(netstack_t *ns, applyfn_t *applyfn)
647bd41d0a8Snordmark {
648bd41d0a8Snordmark int i;
649bd41d0a8Snordmark
650bd41d0a8Snordmark mutex_enter(&netstack_g_lock);
651bd41d0a8Snordmark for (i = 0; i < NS_MAX; i++) {
652bd41d0a8Snordmark /*
653bd41d0a8Snordmark * We don't care whether the lock was dropped
654bd41d0a8Snordmark * since we are not iterating over netstack_head.
655bd41d0a8Snordmark */
656bd41d0a8Snordmark (void) (applyfn)(&netstack_g_lock, ns, i);
657bd41d0a8Snordmark }
658bd41d0a8Snordmark mutex_exit(&netstack_g_lock);
659bd41d0a8Snordmark }
660bd41d0a8Snordmark
661bd41d0a8Snordmark /* Like the above but in reverse moduleid order */
662bd41d0a8Snordmark static void
apply_all_modules_reverse(netstack_t * ns,applyfn_t * applyfn)663bd41d0a8Snordmark apply_all_modules_reverse(netstack_t *ns, applyfn_t *applyfn)
664bd41d0a8Snordmark {
665bd41d0a8Snordmark int i;
666bd41d0a8Snordmark
667bd41d0a8Snordmark mutex_enter(&netstack_g_lock);
668bd41d0a8Snordmark for (i = NS_MAX-1; i >= 0; i--) {
669bd41d0a8Snordmark /*
670bd41d0a8Snordmark * We don't care whether the lock was dropped
671bd41d0a8Snordmark * since we are not iterating over netstack_head.
672bd41d0a8Snordmark */
673bd41d0a8Snordmark (void) (applyfn)(&netstack_g_lock, ns, i);
674bd41d0a8Snordmark }
675bd41d0a8Snordmark mutex_exit(&netstack_g_lock);
676f4b3ec61Sdh }
677f4b3ec61Sdh
678f4b3ec61Sdh /*
679f4b3ec61Sdh * Call the create function for the ns and moduleid if CREATE_NEEDED
680f4b3ec61Sdh * is set.
681bd41d0a8Snordmark * If some other thread gets here first and sets *_INPROGRESS, then
682bd41d0a8Snordmark * we wait for that thread to complete so that we can ensure that
683bd41d0a8Snordmark * all the callbacks are done when we've looped over all netstacks/moduleids.
684bd41d0a8Snordmark *
685bd41d0a8Snordmark * When we call the create function, we temporarily drop the netstack_lock
686bd41d0a8Snordmark * held by the caller, and return true to tell the caller it needs to
687bd41d0a8Snordmark * re-evalute the state.
688f4b3ec61Sdh */
689f4b3ec61Sdh static boolean_t
netstack_apply_create(kmutex_t * lockp,netstack_t * ns,int moduleid)690f4b3ec61Sdh netstack_apply_create(kmutex_t *lockp, netstack_t *ns, int moduleid)
691f4b3ec61Sdh {
692f4b3ec61Sdh void *result;
693f4b3ec61Sdh netstackid_t stackid;
694bd41d0a8Snordmark nm_state_t *nms = &ns->netstack_m_state[moduleid];
695bd41d0a8Snordmark boolean_t dropped = B_FALSE;
696f4b3ec61Sdh
697f4b3ec61Sdh ASSERT(MUTEX_HELD(lockp));
698f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
699bd41d0a8Snordmark
700bd41d0a8Snordmark if (wait_for_nms_inprogress(ns, nms, lockp))
701bd41d0a8Snordmark dropped = B_TRUE;
702bd41d0a8Snordmark
703bd41d0a8Snordmark if (nms->nms_flags & NSS_CREATE_NEEDED) {
704bd41d0a8Snordmark nms->nms_flags &= ~NSS_CREATE_NEEDED;
705bd41d0a8Snordmark nms->nms_flags |= NSS_CREATE_INPROGRESS;
706f4b3ec61Sdh DTRACE_PROBE2(netstack__create__inprogress,
707f4b3ec61Sdh netstack_t *, ns, int, moduleid);
708f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
709f4b3ec61Sdh mutex_exit(lockp);
710bd41d0a8Snordmark dropped = B_TRUE;
711f4b3ec61Sdh
712f4b3ec61Sdh ASSERT(ns_reg[moduleid].nr_create != NULL);
713f4b3ec61Sdh stackid = ns->netstack_stackid;
714f4b3ec61Sdh DTRACE_PROBE2(netstack__create__start,
715f4b3ec61Sdh netstackid_t, stackid,
716f4b3ec61Sdh netstack_t *, ns);
717f4b3ec61Sdh result = (ns_reg[moduleid].nr_create)(stackid, ns);
718f4b3ec61Sdh DTRACE_PROBE2(netstack__create__end,
719f4b3ec61Sdh void *, result, netstack_t *, ns);
720f4b3ec61Sdh
721f4b3ec61Sdh ASSERT(result != NULL);
722bd41d0a8Snordmark mutex_enter(lockp);
723f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
724f4b3ec61Sdh ns->netstack_modules[moduleid] = result;
725bd41d0a8Snordmark nms->nms_flags &= ~NSS_CREATE_INPROGRESS;
726bd41d0a8Snordmark nms->nms_flags |= NSS_CREATE_COMPLETED;
727bd41d0a8Snordmark cv_broadcast(&nms->nms_cv);
728f4b3ec61Sdh DTRACE_PROBE2(netstack__create__completed,
729f4b3ec61Sdh netstack_t *, ns, int, moduleid);
730f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
731bd41d0a8Snordmark return (dropped);
732f4b3ec61Sdh } else {
733f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
734bd41d0a8Snordmark return (dropped);
735f4b3ec61Sdh }
736f4b3ec61Sdh }
737f4b3ec61Sdh
738f4b3ec61Sdh /*
739f4b3ec61Sdh * Call the shutdown function for the ns and moduleid if SHUTDOWN_NEEDED
740f4b3ec61Sdh * is set.
741bd41d0a8Snordmark * If some other thread gets here first and sets *_INPROGRESS, then
742bd41d0a8Snordmark * we wait for that thread to complete so that we can ensure that
743bd41d0a8Snordmark * all the callbacks are done when we've looped over all netstacks/moduleids.
744bd41d0a8Snordmark *
745bd41d0a8Snordmark * When we call the shutdown function, we temporarily drop the netstack_lock
746bd41d0a8Snordmark * held by the caller, and return true to tell the caller it needs to
747bd41d0a8Snordmark * re-evalute the state.
748f4b3ec61Sdh */
749f4b3ec61Sdh static boolean_t
netstack_apply_shutdown(kmutex_t * lockp,netstack_t * ns,int moduleid)750f4b3ec61Sdh netstack_apply_shutdown(kmutex_t *lockp, netstack_t *ns, int moduleid)
751f4b3ec61Sdh {
752f4b3ec61Sdh netstackid_t stackid;
753f4b3ec61Sdh void * netstack_module;
754bd41d0a8Snordmark nm_state_t *nms = &ns->netstack_m_state[moduleid];
755bd41d0a8Snordmark boolean_t dropped = B_FALSE;
756f4b3ec61Sdh
757f4b3ec61Sdh ASSERT(MUTEX_HELD(lockp));
758f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
759bd41d0a8Snordmark
760bd41d0a8Snordmark if (wait_for_nms_inprogress(ns, nms, lockp))
761bd41d0a8Snordmark dropped = B_TRUE;
762bd41d0a8Snordmark
763bd41d0a8Snordmark if (nms->nms_flags & NSS_SHUTDOWN_NEEDED) {
764bd41d0a8Snordmark nms->nms_flags &= ~NSS_SHUTDOWN_NEEDED;
765bd41d0a8Snordmark nms->nms_flags |= NSS_SHUTDOWN_INPROGRESS;
766f4b3ec61Sdh DTRACE_PROBE2(netstack__shutdown__inprogress,
767f4b3ec61Sdh netstack_t *, ns, int, moduleid);
768f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
769f4b3ec61Sdh mutex_exit(lockp);
770bd41d0a8Snordmark dropped = B_TRUE;
771f4b3ec61Sdh
772f4b3ec61Sdh ASSERT(ns_reg[moduleid].nr_shutdown != NULL);
773f4b3ec61Sdh stackid = ns->netstack_stackid;
774f4b3ec61Sdh netstack_module = ns->netstack_modules[moduleid];
775f4b3ec61Sdh DTRACE_PROBE2(netstack__shutdown__start,
776f4b3ec61Sdh netstackid_t, stackid,
777f4b3ec61Sdh void *, netstack_module);
778f4b3ec61Sdh (ns_reg[moduleid].nr_shutdown)(stackid, netstack_module);
779f4b3ec61Sdh DTRACE_PROBE1(netstack__shutdown__end,
780f4b3ec61Sdh netstack_t *, ns);
781f4b3ec61Sdh
782bd41d0a8Snordmark mutex_enter(lockp);
783f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
784bd41d0a8Snordmark nms->nms_flags &= ~NSS_SHUTDOWN_INPROGRESS;
785bd41d0a8Snordmark nms->nms_flags |= NSS_SHUTDOWN_COMPLETED;
786bd41d0a8Snordmark cv_broadcast(&nms->nms_cv);
787f4b3ec61Sdh DTRACE_PROBE2(netstack__shutdown__completed,
788f4b3ec61Sdh netstack_t *, ns, int, moduleid);
789f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
790bd41d0a8Snordmark return (dropped);
791f4b3ec61Sdh } else {
792f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
793bd41d0a8Snordmark return (dropped);
794f4b3ec61Sdh }
795f4b3ec61Sdh }
796f4b3ec61Sdh
797f4b3ec61Sdh /*
798f4b3ec61Sdh * Call the destroy function for the ns and moduleid if DESTROY_NEEDED
799f4b3ec61Sdh * is set.
800bd41d0a8Snordmark * If some other thread gets here first and sets *_INPROGRESS, then
801bd41d0a8Snordmark * we wait for that thread to complete so that we can ensure that
802bd41d0a8Snordmark * all the callbacks are done when we've looped over all netstacks/moduleids.
803bd41d0a8Snordmark *
804bd41d0a8Snordmark * When we call the destroy function, we temporarily drop the netstack_lock
805bd41d0a8Snordmark * held by the caller, and return true to tell the caller it needs to
806bd41d0a8Snordmark * re-evalute the state.
807f4b3ec61Sdh */
808f4b3ec61Sdh static boolean_t
netstack_apply_destroy(kmutex_t * lockp,netstack_t * ns,int moduleid)809f4b3ec61Sdh netstack_apply_destroy(kmutex_t *lockp, netstack_t *ns, int moduleid)
810f4b3ec61Sdh {
811f4b3ec61Sdh netstackid_t stackid;
812f4b3ec61Sdh void * netstack_module;
813bd41d0a8Snordmark nm_state_t *nms = &ns->netstack_m_state[moduleid];
814bd41d0a8Snordmark boolean_t dropped = B_FALSE;
815f4b3ec61Sdh
816f4b3ec61Sdh ASSERT(MUTEX_HELD(lockp));
817f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
818bd41d0a8Snordmark
819bd41d0a8Snordmark if (wait_for_nms_inprogress(ns, nms, lockp))
820bd41d0a8Snordmark dropped = B_TRUE;
821bd41d0a8Snordmark
822bd41d0a8Snordmark if (nms->nms_flags & NSS_DESTROY_NEEDED) {
823bd41d0a8Snordmark nms->nms_flags &= ~NSS_DESTROY_NEEDED;
824bd41d0a8Snordmark nms->nms_flags |= NSS_DESTROY_INPROGRESS;
825f4b3ec61Sdh DTRACE_PROBE2(netstack__destroy__inprogress,
826f4b3ec61Sdh netstack_t *, ns, int, moduleid);
827f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
828f4b3ec61Sdh mutex_exit(lockp);
829bd41d0a8Snordmark dropped = B_TRUE;
830f4b3ec61Sdh
831f4b3ec61Sdh ASSERT(ns_reg[moduleid].nr_destroy != NULL);
832f4b3ec61Sdh stackid = ns->netstack_stackid;
833f4b3ec61Sdh netstack_module = ns->netstack_modules[moduleid];
834f4b3ec61Sdh DTRACE_PROBE2(netstack__destroy__start,
835f4b3ec61Sdh netstackid_t, stackid,
836f4b3ec61Sdh void *, netstack_module);
837f4b3ec61Sdh (ns_reg[moduleid].nr_destroy)(stackid, netstack_module);
838f4b3ec61Sdh DTRACE_PROBE1(netstack__destroy__end,
839f4b3ec61Sdh netstack_t *, ns);
840f4b3ec61Sdh
841bd41d0a8Snordmark mutex_enter(lockp);
842f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
843f4b3ec61Sdh ns->netstack_modules[moduleid] = NULL;
844bd41d0a8Snordmark nms->nms_flags &= ~NSS_DESTROY_INPROGRESS;
845bd41d0a8Snordmark nms->nms_flags |= NSS_DESTROY_COMPLETED;
846bd41d0a8Snordmark cv_broadcast(&nms->nms_cv);
847f4b3ec61Sdh DTRACE_PROBE2(netstack__destroy__completed,
848f4b3ec61Sdh netstack_t *, ns, int, moduleid);
849f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
850bd41d0a8Snordmark return (dropped);
851f4b3ec61Sdh } else {
852f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
853bd41d0a8Snordmark return (dropped);
854f4b3ec61Sdh }
855f4b3ec61Sdh }
856f4b3ec61Sdh
85723f4867fSnordmark /*
858bd41d0a8Snordmark * If somebody is creating the netstack (due to a new zone being created)
859bd41d0a8Snordmark * then we wait for them to complete. This ensures that any additional
860bd41d0a8Snordmark * netstack_register() doesn't cause the create functions to run out of
861bd41d0a8Snordmark * order.
862bd41d0a8Snordmark * Note that we do not need such a global wait in the case of the shutdown
863bd41d0a8Snordmark * and destroy callbacks, since in that case it is sufficient for both
864bd41d0a8Snordmark * threads to set NEEDED and wait for INPROGRESS to ensure ordering.
865bd41d0a8Snordmark * Returns true if lockp was temporarily dropped while waiting.
86623f4867fSnordmark */
867bd41d0a8Snordmark static boolean_t
wait_for_zone_creator(netstack_t * ns,kmutex_t * lockp)868bd41d0a8Snordmark wait_for_zone_creator(netstack_t *ns, kmutex_t *lockp)
869f4b3ec61Sdh {
870bd41d0a8Snordmark boolean_t dropped = B_FALSE;
871f4b3ec61Sdh
872bd41d0a8Snordmark mutex_enter(&ns->netstack_lock);
873bd41d0a8Snordmark while (ns->netstack_flags & NSF_ZONE_CREATE) {
874bd41d0a8Snordmark DTRACE_PROBE1(netstack__wait__zone__inprogress,
875bd41d0a8Snordmark netstack_t *, ns);
876bd41d0a8Snordmark if (lockp != NULL) {
877bd41d0a8Snordmark dropped = B_TRUE;
878bd41d0a8Snordmark mutex_exit(lockp);
879bd41d0a8Snordmark }
880bd41d0a8Snordmark cv_wait(&ns->netstack_cv, &ns->netstack_lock);
881bd41d0a8Snordmark if (lockp != NULL) {
882bd41d0a8Snordmark /* First drop netstack_lock to preserve order */
883bd41d0a8Snordmark mutex_exit(&ns->netstack_lock);
884bd41d0a8Snordmark mutex_enter(lockp);
885bd41d0a8Snordmark mutex_enter(&ns->netstack_lock);
886f4b3ec61Sdh }
887f4b3ec61Sdh }
888bd41d0a8Snordmark mutex_exit(&ns->netstack_lock);
889bd41d0a8Snordmark return (dropped);
890f4b3ec61Sdh }
891f4b3ec61Sdh
89223f4867fSnordmark /*
893bd41d0a8Snordmark * Wait for any INPROGRESS flag to be cleared for the netstack/moduleid
894bd41d0a8Snordmark * combination.
895bd41d0a8Snordmark * Returns true if lockp was temporarily dropped while waiting.
89623f4867fSnordmark */
897bd41d0a8Snordmark static boolean_t
wait_for_nms_inprogress(netstack_t * ns,nm_state_t * nms,kmutex_t * lockp)898bd41d0a8Snordmark wait_for_nms_inprogress(netstack_t *ns, nm_state_t *nms, kmutex_t *lockp)
899f4b3ec61Sdh {
900bd41d0a8Snordmark boolean_t dropped = B_FALSE;
901bd41d0a8Snordmark
902bd41d0a8Snordmark while (nms->nms_flags & NSS_ALL_INPROGRESS) {
903bd41d0a8Snordmark DTRACE_PROBE2(netstack__wait__nms__inprogress,
904bd41d0a8Snordmark netstack_t *, ns, nm_state_t *, nms);
905bd41d0a8Snordmark if (lockp != NULL) {
906bd41d0a8Snordmark dropped = B_TRUE;
907bd41d0a8Snordmark mutex_exit(lockp);
908f4b3ec61Sdh }
909bd41d0a8Snordmark cv_wait(&nms->nms_cv, &ns->netstack_lock);
910bd41d0a8Snordmark if (lockp != NULL) {
911bd41d0a8Snordmark /* First drop netstack_lock to preserve order */
912bd41d0a8Snordmark mutex_exit(&ns->netstack_lock);
913bd41d0a8Snordmark mutex_enter(lockp);
914bd41d0a8Snordmark mutex_enter(&ns->netstack_lock);
915f4b3ec61Sdh }
916f4b3ec61Sdh }
917bd41d0a8Snordmark return (dropped);
918f4b3ec61Sdh }
919f4b3ec61Sdh
920f4b3ec61Sdh /*
921f4b3ec61Sdh * Get the stack instance used in caller's zone.
922f4b3ec61Sdh * Increases the reference count, caller must do a netstack_rele.
923f4b3ec61Sdh * It can't be called after zone_destroy() has started.
924f4b3ec61Sdh */
925fd006805Snordmark netstack_t *
netstack_get_current(void)926f4b3ec61Sdh netstack_get_current(void)
927f4b3ec61Sdh {
928f4b3ec61Sdh netstack_t *ns;
929f4b3ec61Sdh
930f4b3ec61Sdh ns = curproc->p_zone->zone_netstack;
931f4b3ec61Sdh ASSERT(ns != NULL);
932*704ca705SDan McDonald return (netstack_hold_if_active(ns));
933f4b3ec61Sdh }
934f4b3ec61Sdh
935f4b3ec61Sdh /*
936f4b3ec61Sdh * Find a stack instance given the cred.
937f4b3ec61Sdh * This is used by the modules to potentially allow for a future when
938f4b3ec61Sdh * something other than the zoneid is used to determine the stack.
939f4b3ec61Sdh */
940f4b3ec61Sdh netstack_t *
netstack_find_by_cred(const cred_t * cr)941f4b3ec61Sdh netstack_find_by_cred(const cred_t *cr)
942f4b3ec61Sdh {
943f4b3ec61Sdh zoneid_t zoneid = crgetzoneid(cr);
944f4b3ec61Sdh
945f4b3ec61Sdh /* Handle the case when cr_zone is NULL */
946f4b3ec61Sdh if (zoneid == (zoneid_t)-1)
947f4b3ec61Sdh zoneid = GLOBAL_ZONEID;
948f4b3ec61Sdh
949f4b3ec61Sdh /* For performance ... */
950f4b3ec61Sdh if (curproc->p_zone->zone_id == zoneid)
951f4b3ec61Sdh return (netstack_get_current());
952f4b3ec61Sdh else
953f4b3ec61Sdh return (netstack_find_by_zoneid(zoneid));
954f4b3ec61Sdh }
955f4b3ec61Sdh
956f4b3ec61Sdh /*
957f4b3ec61Sdh * Find a stack instance given the zoneid.
958f4b3ec61Sdh * Increases the reference count if found; caller must do a
959f4b3ec61Sdh * netstack_rele().
960f4b3ec61Sdh *
961f4b3ec61Sdh * If there is no exact match then assume the shared stack instance
962f4b3ec61Sdh * matches.
963f4b3ec61Sdh *
964*704ca705SDan McDonald * Skip the uninitialized and closing ones.
965f4b3ec61Sdh */
966f4b3ec61Sdh netstack_t *
netstack_find_by_zoneid(zoneid_t zoneid)967f4b3ec61Sdh netstack_find_by_zoneid(zoneid_t zoneid)
968f4b3ec61Sdh {
969f4b3ec61Sdh netstack_t *ns;
970f4b3ec61Sdh zone_t *zone;
971f4b3ec61Sdh
972f4b3ec61Sdh zone = zone_find_by_id(zoneid);
973f4b3ec61Sdh
974f4b3ec61Sdh if (zone == NULL)
975f4b3ec61Sdh return (NULL);
976f4b3ec61Sdh
977*704ca705SDan McDonald ASSERT(zone->zone_netstack != NULL);
978*704ca705SDan McDonald ns = netstack_hold_if_active(zone->zone_netstack);
979f4b3ec61Sdh
980f4b3ec61Sdh zone_rele(zone);
981f4b3ec61Sdh return (ns);
982f4b3ec61Sdh }
983f4b3ec61Sdh
984f4b3ec61Sdh /*
985bd41d0a8Snordmark * Find a stack instance given the zoneid. Can only be called from
986bd41d0a8Snordmark * the create callback. See the comments in zone_find_by_id_nolock why
987bd41d0a8Snordmark * that limitation exists.
988bd41d0a8Snordmark *
989f4b3ec61Sdh * Increases the reference count if found; caller must do a
990f4b3ec61Sdh * netstack_rele().
991f4b3ec61Sdh *
992f4b3ec61Sdh * If there is no exact match then assume the shared stack instance
993f4b3ec61Sdh * matches.
994f4b3ec61Sdh *
995f4b3ec61Sdh * Skip the unitialized ones.
996f4b3ec61Sdh */
997f4b3ec61Sdh netstack_t *
netstack_find_by_zoneid_nolock(zoneid_t zoneid)998f4b3ec61Sdh netstack_find_by_zoneid_nolock(zoneid_t zoneid)
999f4b3ec61Sdh {
1000f4b3ec61Sdh zone_t *zone;
1001f4b3ec61Sdh
1002f4b3ec61Sdh zone = zone_find_by_id_nolock(zoneid);
1003f4b3ec61Sdh
1004f4b3ec61Sdh if (zone == NULL)
1005f4b3ec61Sdh return (NULL);
1006f4b3ec61Sdh
1007*704ca705SDan McDonald ASSERT(zone->zone_netstack != NULL);
1008bd41d0a8Snordmark /* zone_find_by_id_nolock does not have a hold on the zone */
1009*704ca705SDan McDonald return (netstack_hold_if_active(zone->zone_netstack));
1010f4b3ec61Sdh }
1011f4b3ec61Sdh
1012f4b3ec61Sdh /*
1013f4b3ec61Sdh * Find a stack instance given the stackid with exact match?
1014f4b3ec61Sdh * Increases the reference count if found; caller must do a
1015f4b3ec61Sdh * netstack_rele().
1016f4b3ec61Sdh *
1017f4b3ec61Sdh * Skip the unitialized ones.
1018f4b3ec61Sdh */
1019f4b3ec61Sdh netstack_t *
netstack_find_by_stackid(netstackid_t stackid)1020f4b3ec61Sdh netstack_find_by_stackid(netstackid_t stackid)
1021f4b3ec61Sdh {
1022f4b3ec61Sdh netstack_t *ns;
1023f4b3ec61Sdh
1024f4b3ec61Sdh mutex_enter(&netstack_g_lock);
1025f4b3ec61Sdh for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1026*704ca705SDan McDonald /* Can't use hold_if_active because of stackid check. */
1027f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
1028f4b3ec61Sdh if (ns->netstack_stackid == stackid &&
1029f4b3ec61Sdh !(ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING))) {
1030*704ca705SDan McDonald netstack_hold_locked(ns);
1031f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
1032f4b3ec61Sdh mutex_exit(&netstack_g_lock);
1033f4b3ec61Sdh return (ns);
1034f4b3ec61Sdh }
1035f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
1036f4b3ec61Sdh }
1037f4b3ec61Sdh mutex_exit(&netstack_g_lock);
1038f4b3ec61Sdh return (NULL);
1039f4b3ec61Sdh }
1040f4b3ec61Sdh
1041854956ceSBryan Cantrill boolean_t
netstack_inuse_by_stackid(netstackid_t stackid)1042854956ceSBryan Cantrill netstack_inuse_by_stackid(netstackid_t stackid)
1043854956ceSBryan Cantrill {
1044854956ceSBryan Cantrill netstack_t *ns;
1045854956ceSBryan Cantrill boolean_t rval = B_FALSE;
1046854956ceSBryan Cantrill
1047854956ceSBryan Cantrill mutex_enter(&netstack_g_lock);
1048854956ceSBryan Cantrill
1049854956ceSBryan Cantrill for (ns = netstack_head; ns != NULL; ns = ns->netstack_next) {
1050854956ceSBryan Cantrill if (ns->netstack_stackid == stackid) {
1051854956ceSBryan Cantrill rval = B_TRUE;
1052854956ceSBryan Cantrill break;
1053854956ceSBryan Cantrill }
1054854956ceSBryan Cantrill }
1055854956ceSBryan Cantrill
1056854956ceSBryan Cantrill mutex_exit(&netstack_g_lock);
1057854956ceSBryan Cantrill
1058854956ceSBryan Cantrill return (rval);
1059854956ceSBryan Cantrill }
1060854956ceSBryan Cantrill
106184fe1120SDan McDonald
106284fe1120SDan McDonald static void
netstack_reap(void * arg)106384fe1120SDan McDonald netstack_reap(void *arg)
106484fe1120SDan McDonald {
106584fe1120SDan McDonald netstack_t **nsp, *ns = (netstack_t *)arg;
106684fe1120SDan McDonald boolean_t found;
106784fe1120SDan McDonald int i;
106884fe1120SDan McDonald
106984fe1120SDan McDonald /*
107084fe1120SDan McDonald * Time to call the destroy functions and free up
107184fe1120SDan McDonald * the structure
107284fe1120SDan McDonald */
107384fe1120SDan McDonald netstack_stack_inactive(ns);
107484fe1120SDan McDonald
107584fe1120SDan McDonald /* Make sure nothing increased the references */
107684fe1120SDan McDonald ASSERT(ns->netstack_refcnt == 0);
107784fe1120SDan McDonald ASSERT(ns->netstack_numzones == 0);
107884fe1120SDan McDonald
107984fe1120SDan McDonald /* Finally remove from list of netstacks */
108084fe1120SDan McDonald mutex_enter(&netstack_g_lock);
108184fe1120SDan McDonald found = B_FALSE;
108284fe1120SDan McDonald for (nsp = &netstack_head; *nsp != NULL;
108384fe1120SDan McDonald nsp = &(*nsp)->netstack_next) {
108484fe1120SDan McDonald if (*nsp == ns) {
108584fe1120SDan McDonald *nsp = ns->netstack_next;
108684fe1120SDan McDonald ns->netstack_next = NULL;
108784fe1120SDan McDonald found = B_TRUE;
108884fe1120SDan McDonald break;
108984fe1120SDan McDonald }
109084fe1120SDan McDonald }
109184fe1120SDan McDonald ASSERT(found);
109284fe1120SDan McDonald mutex_exit(&netstack_g_lock);
109384fe1120SDan McDonald
109484fe1120SDan McDonald /* Make sure nothing increased the references */
109584fe1120SDan McDonald ASSERT(ns->netstack_refcnt == 0);
109684fe1120SDan McDonald ASSERT(ns->netstack_numzones == 0);
109784fe1120SDan McDonald
109884fe1120SDan McDonald ASSERT(ns->netstack_flags & NSF_CLOSING);
109984fe1120SDan McDonald
110084fe1120SDan McDonald for (i = 0; i < NS_MAX; i++) {
110184fe1120SDan McDonald nm_state_t *nms = &ns->netstack_m_state[i];
110284fe1120SDan McDonald
110384fe1120SDan McDonald cv_destroy(&nms->nms_cv);
110484fe1120SDan McDonald }
110584fe1120SDan McDonald mutex_destroy(&ns->netstack_lock);
110684fe1120SDan McDonald cv_destroy(&ns->netstack_cv);
110784fe1120SDan McDonald kmem_free(ns, sizeof (*ns));
110884fe1120SDan McDonald /* Allow another reap to be scheduled. */
110984fe1120SDan McDonald sema_v(&netstack_reap_limiter);
111084fe1120SDan McDonald }
111184fe1120SDan McDonald
1112f4b3ec61Sdh void
netstack_rele(netstack_t * ns)1113f4b3ec61Sdh netstack_rele(netstack_t *ns)
1114f4b3ec61Sdh {
1115f4b3ec61Sdh int refcnt, numzones;
1116f4b3ec61Sdh
1117f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
1118f4b3ec61Sdh ASSERT(ns->netstack_refcnt > 0);
1119f4b3ec61Sdh ns->netstack_refcnt--;
1120f4b3ec61Sdh /*
1121f4b3ec61Sdh * As we drop the lock additional netstack_rele()s can come in
1122f4b3ec61Sdh * and decrement the refcnt to zero and free the netstack_t.
1123f4b3ec61Sdh * Store pointers in local variables and if we were not the last
1124f4b3ec61Sdh * then don't reference the netstack_t after that.
1125f4b3ec61Sdh */
1126f4b3ec61Sdh refcnt = ns->netstack_refcnt;
1127f4b3ec61Sdh numzones = ns->netstack_numzones;
1128f4b3ec61Sdh DTRACE_PROBE1(netstack__dec__ref, netstack_t *, ns);
1129f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
1130f4b3ec61Sdh
1131f4b3ec61Sdh if (refcnt == 0 && numzones == 0) {
1132f4b3ec61Sdh /*
113384fe1120SDan McDonald * Because there are possibilities of re-entrancy in various
113484fe1120SDan McDonald * netstack structures by callers, which might cause a lock up
113584fe1120SDan McDonald * due to odd reference models, or other factors, we choose to
113684fe1120SDan McDonald * schedule the actual deletion of this netstack as a deferred
113784fe1120SDan McDonald * task on the system taskq. This way, any such reference
113884fe1120SDan McDonald * models won't trip over themselves.
113984fe1120SDan McDonald *
114084fe1120SDan McDonald * Assume we aren't in a high-priority interrupt context, so
114184fe1120SDan McDonald * we can use KM_SLEEP and semaphores.
1142f4b3ec61Sdh */
114384fe1120SDan McDonald if (sema_tryp(&netstack_reap_limiter) == 0) {
114484fe1120SDan McDonald /*
114584fe1120SDan McDonald * Indicate we're slamming against a limit.
114684fe1120SDan McDonald */
114784fe1120SDan McDonald hrtime_t measurement = gethrtime();
1148bd41d0a8Snordmark
114984fe1120SDan McDonald sema_p(&netstack_reap_limiter);
115084fe1120SDan McDonald /* Capture delay in ns. */
115184fe1120SDan McDonald DTRACE_PROBE1(netstack__reap__rate__limited,
115284fe1120SDan McDonald hrtime_t, gethrtime() - measurement);
1153bd41d0a8Snordmark }
115484fe1120SDan McDonald
115584fe1120SDan McDonald /* TQ_SLEEP should prevent taskq_dispatch() from failing. */
115684fe1120SDan McDonald (void) taskq_dispatch(system_taskq, netstack_reap, ns,
115784fe1120SDan McDonald TQ_SLEEP);
1158f4b3ec61Sdh }
1159f4b3ec61Sdh }
1160f4b3ec61Sdh
1161*704ca705SDan McDonald static void
netstack_hold_locked(netstack_t * ns)1162*704ca705SDan McDonald netstack_hold_locked(netstack_t *ns)
1163*704ca705SDan McDonald {
1164*704ca705SDan McDonald ASSERT(MUTEX_HELD(&ns->netstack_lock));
1165*704ca705SDan McDonald ns->netstack_refcnt++;
1166*704ca705SDan McDonald ASSERT(ns->netstack_refcnt > 0);
1167*704ca705SDan McDonald DTRACE_PROBE1(netstack__inc__ref, netstack_t *, ns);
1168*704ca705SDan McDonald }
1169*704ca705SDan McDonald
1170*704ca705SDan McDonald /*
1171*704ca705SDan McDonald * If the passed-in netstack isn't active (i.e. it's uninitialized or closing),
1172*704ca705SDan McDonald * return NULL, otherwise return it with its reference held. Common code
1173*704ca705SDan McDonald * for many netstack_find*() functions.
1174*704ca705SDan McDonald */
1175*704ca705SDan McDonald netstack_t *
netstack_hold_if_active(netstack_t * ns)1176*704ca705SDan McDonald netstack_hold_if_active(netstack_t *ns)
1177*704ca705SDan McDonald {
1178*704ca705SDan McDonald netstack_t *retval;
1179*704ca705SDan McDonald
1180*704ca705SDan McDonald mutex_enter(&ns->netstack_lock);
1181*704ca705SDan McDonald if (ns->netstack_flags & (NSF_UNINIT | NSF_CLOSING)) {
1182*704ca705SDan McDonald retval = NULL;
1183*704ca705SDan McDonald } else {
1184*704ca705SDan McDonald netstack_hold_locked(ns);
1185*704ca705SDan McDonald retval = ns;
1186*704ca705SDan McDonald }
1187*704ca705SDan McDonald mutex_exit(&ns->netstack_lock);
1188*704ca705SDan McDonald
1189*704ca705SDan McDonald return (retval);
1190*704ca705SDan McDonald }
1191*704ca705SDan McDonald
1192f4b3ec61Sdh void
netstack_hold(netstack_t * ns)1193f4b3ec61Sdh netstack_hold(netstack_t *ns)
1194f4b3ec61Sdh {
1195f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
1196*704ca705SDan McDonald netstack_hold_locked(ns);
1197f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
1198f4b3ec61Sdh }
1199f4b3ec61Sdh
1200f4b3ec61Sdh /*
1201f4b3ec61Sdh * To support kstat_create_netstack() using kstat_zone_add we need
1202f4b3ec61Sdh * to track both
1203f4b3ec61Sdh * - all zoneids that use the global/shared stack
1204f4b3ec61Sdh * - all kstats that have been added for the shared stack
1205f4b3ec61Sdh */
1206f4b3ec61Sdh kstat_t *
kstat_create_netstack(char * ks_module,int ks_instance,char * ks_name,char * ks_class,uchar_t ks_type,uint_t ks_ndata,uchar_t ks_flags,netstackid_t ks_netstackid)1207f4b3ec61Sdh kstat_create_netstack(char *ks_module, int ks_instance, char *ks_name,
1208f4b3ec61Sdh char *ks_class, uchar_t ks_type, uint_t ks_ndata, uchar_t ks_flags,
1209f4b3ec61Sdh netstackid_t ks_netstackid)
1210f4b3ec61Sdh {
1211f4b3ec61Sdh kstat_t *ks;
1212f4b3ec61Sdh
1213f4b3ec61Sdh if (ks_netstackid == GLOBAL_NETSTACKID) {
1214f4b3ec61Sdh ks = kstat_create_zone(ks_module, ks_instance, ks_name,
1215f4b3ec61Sdh ks_class, ks_type, ks_ndata, ks_flags, GLOBAL_ZONEID);
1216f4b3ec61Sdh if (ks != NULL)
1217f4b3ec61Sdh netstack_shared_kstat_add(ks);
1218f4b3ec61Sdh return (ks);
1219f4b3ec61Sdh } else {
1220f4b3ec61Sdh zoneid_t zoneid = ks_netstackid;
1221f4b3ec61Sdh
1222f4b3ec61Sdh return (kstat_create_zone(ks_module, ks_instance, ks_name,
1223bd41d0a8Snordmark ks_class, ks_type, ks_ndata, ks_flags, zoneid));
1224f4b3ec61Sdh }
1225f4b3ec61Sdh }
1226f4b3ec61Sdh
1227f4b3ec61Sdh void
kstat_delete_netstack(kstat_t * ks,netstackid_t ks_netstackid)1228f4b3ec61Sdh kstat_delete_netstack(kstat_t *ks, netstackid_t ks_netstackid)
1229f4b3ec61Sdh {
1230f4b3ec61Sdh if (ks_netstackid == GLOBAL_NETSTACKID) {
1231f4b3ec61Sdh netstack_shared_kstat_remove(ks);
1232f4b3ec61Sdh }
1233f4b3ec61Sdh kstat_delete(ks);
1234f4b3ec61Sdh }
1235f4b3ec61Sdh
1236f4b3ec61Sdh static void
netstack_shared_zone_add(zoneid_t zoneid)1237f4b3ec61Sdh netstack_shared_zone_add(zoneid_t zoneid)
1238f4b3ec61Sdh {
1239f4b3ec61Sdh struct shared_zone_list *sz;
1240f4b3ec61Sdh struct shared_kstat_list *sk;
1241f4b3ec61Sdh
1242f4b3ec61Sdh sz = (struct shared_zone_list *)kmem_zalloc(sizeof (*sz), KM_SLEEP);
1243f4b3ec61Sdh sz->sz_zoneid = zoneid;
1244f4b3ec61Sdh
1245f4b3ec61Sdh /* Insert in list */
1246f4b3ec61Sdh mutex_enter(&netstack_shared_lock);
1247f4b3ec61Sdh sz->sz_next = netstack_shared_zones;
1248f4b3ec61Sdh netstack_shared_zones = sz;
1249f4b3ec61Sdh
1250f4b3ec61Sdh /*
1251f4b3ec61Sdh * Perform kstat_zone_add for each existing shared stack kstat.
1252f4b3ec61Sdh * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1253f4b3ec61Sdh */
1254f4b3ec61Sdh for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1255f4b3ec61Sdh kstat_zone_add(sk->sk_kstat, zoneid);
1256f4b3ec61Sdh }
1257f4b3ec61Sdh mutex_exit(&netstack_shared_lock);
1258f4b3ec61Sdh }
1259f4b3ec61Sdh
1260f4b3ec61Sdh static void
netstack_shared_zone_remove(zoneid_t zoneid)1261f4b3ec61Sdh netstack_shared_zone_remove(zoneid_t zoneid)
1262f4b3ec61Sdh {
1263f4b3ec61Sdh struct shared_zone_list **szp, *sz;
1264f4b3ec61Sdh struct shared_kstat_list *sk;
1265f4b3ec61Sdh
1266f4b3ec61Sdh /* Find in list */
1267f4b3ec61Sdh mutex_enter(&netstack_shared_lock);
1268f4b3ec61Sdh sz = NULL;
1269f4b3ec61Sdh for (szp = &netstack_shared_zones; *szp != NULL;
1270f4b3ec61Sdh szp = &((*szp)->sz_next)) {
1271f4b3ec61Sdh if ((*szp)->sz_zoneid == zoneid) {
1272f4b3ec61Sdh sz = *szp;
1273f4b3ec61Sdh break;
1274f4b3ec61Sdh }
1275f4b3ec61Sdh }
1276f4b3ec61Sdh /* We must find it */
1277f4b3ec61Sdh ASSERT(sz != NULL);
1278f4b3ec61Sdh *szp = sz->sz_next;
1279f4b3ec61Sdh sz->sz_next = NULL;
1280f4b3ec61Sdh
1281f4b3ec61Sdh /*
1282f4b3ec61Sdh * Perform kstat_zone_remove for each existing shared stack kstat.
1283f4b3ec61Sdh * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1284f4b3ec61Sdh */
1285f4b3ec61Sdh for (sk = netstack_shared_kstats; sk != NULL; sk = sk->sk_next) {
1286f4b3ec61Sdh kstat_zone_remove(sk->sk_kstat, zoneid);
1287f4b3ec61Sdh }
1288f4b3ec61Sdh mutex_exit(&netstack_shared_lock);
1289f4b3ec61Sdh
1290f4b3ec61Sdh kmem_free(sz, sizeof (*sz));
1291f4b3ec61Sdh }
1292f4b3ec61Sdh
1293f4b3ec61Sdh static void
netstack_shared_kstat_add(kstat_t * ks)1294f4b3ec61Sdh netstack_shared_kstat_add(kstat_t *ks)
1295f4b3ec61Sdh {
1296f4b3ec61Sdh struct shared_zone_list *sz;
1297f4b3ec61Sdh struct shared_kstat_list *sk;
1298f4b3ec61Sdh
1299f4b3ec61Sdh sk = (struct shared_kstat_list *)kmem_zalloc(sizeof (*sk), KM_SLEEP);
1300f4b3ec61Sdh sk->sk_kstat = ks;
1301f4b3ec61Sdh
1302f4b3ec61Sdh /* Insert in list */
1303f4b3ec61Sdh mutex_enter(&netstack_shared_lock);
1304f4b3ec61Sdh sk->sk_next = netstack_shared_kstats;
1305f4b3ec61Sdh netstack_shared_kstats = sk;
1306f4b3ec61Sdh
1307f4b3ec61Sdh /*
1308f4b3ec61Sdh * Perform kstat_zone_add for each existing shared stack zone.
1309f4b3ec61Sdh * Note: Holds netstack_shared_lock lock across kstat_zone_add.
1310f4b3ec61Sdh */
1311f4b3ec61Sdh for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1312f4b3ec61Sdh kstat_zone_add(ks, sz->sz_zoneid);
1313f4b3ec61Sdh }
1314f4b3ec61Sdh mutex_exit(&netstack_shared_lock);
1315f4b3ec61Sdh }
1316f4b3ec61Sdh
1317f4b3ec61Sdh static void
netstack_shared_kstat_remove(kstat_t * ks)1318f4b3ec61Sdh netstack_shared_kstat_remove(kstat_t *ks)
1319f4b3ec61Sdh {
1320f4b3ec61Sdh struct shared_zone_list *sz;
1321f4b3ec61Sdh struct shared_kstat_list **skp, *sk;
1322f4b3ec61Sdh
1323f4b3ec61Sdh /* Find in list */
1324f4b3ec61Sdh mutex_enter(&netstack_shared_lock);
1325f4b3ec61Sdh sk = NULL;
1326f4b3ec61Sdh for (skp = &netstack_shared_kstats; *skp != NULL;
1327f4b3ec61Sdh skp = &((*skp)->sk_next)) {
1328f4b3ec61Sdh if ((*skp)->sk_kstat == ks) {
1329f4b3ec61Sdh sk = *skp;
1330f4b3ec61Sdh break;
1331f4b3ec61Sdh }
1332f4b3ec61Sdh }
1333f4b3ec61Sdh /* Must find it */
1334f4b3ec61Sdh ASSERT(sk != NULL);
1335f4b3ec61Sdh *skp = sk->sk_next;
1336f4b3ec61Sdh sk->sk_next = NULL;
1337f4b3ec61Sdh
1338f4b3ec61Sdh /*
1339f4b3ec61Sdh * Perform kstat_zone_remove for each existing shared stack kstat.
1340f4b3ec61Sdh * Note: Holds netstack_shared_lock lock across kstat_zone_remove.
1341f4b3ec61Sdh */
1342f4b3ec61Sdh for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1343f4b3ec61Sdh kstat_zone_remove(ks, sz->sz_zoneid);
1344f4b3ec61Sdh }
1345f4b3ec61Sdh mutex_exit(&netstack_shared_lock);
1346f4b3ec61Sdh kmem_free(sk, sizeof (*sk));
1347f4b3ec61Sdh }
1348f4b3ec61Sdh
1349f4b3ec61Sdh /*
1350f4b3ec61Sdh * If a zoneid is part of the shared zone, return true
1351f4b3ec61Sdh */
1352f4b3ec61Sdh static boolean_t
netstack_find_shared_zoneid(zoneid_t zoneid)1353f4b3ec61Sdh netstack_find_shared_zoneid(zoneid_t zoneid)
1354f4b3ec61Sdh {
1355f4b3ec61Sdh struct shared_zone_list *sz;
1356f4b3ec61Sdh
1357f4b3ec61Sdh mutex_enter(&netstack_shared_lock);
1358f4b3ec61Sdh for (sz = netstack_shared_zones; sz != NULL; sz = sz->sz_next) {
1359f4b3ec61Sdh if (sz->sz_zoneid == zoneid) {
1360f4b3ec61Sdh mutex_exit(&netstack_shared_lock);
1361f4b3ec61Sdh return (B_TRUE);
1362f4b3ec61Sdh }
1363f4b3ec61Sdh }
1364f4b3ec61Sdh mutex_exit(&netstack_shared_lock);
1365f4b3ec61Sdh return (B_FALSE);
1366f4b3ec61Sdh }
1367f4b3ec61Sdh
1368f4b3ec61Sdh /*
1369f4b3ec61Sdh * Hide the fact that zoneids and netstackids are allocated from
1370f4b3ec61Sdh * the same space in the current implementation.
1371bd41d0a8Snordmark * We currently do not check that the stackid/zoneids are valid, since there
1372bd41d0a8Snordmark * is no need for that. But this should only be done for ids that are
1373bd41d0a8Snordmark * valid.
1374f4b3ec61Sdh */
1375f4b3ec61Sdh zoneid_t
netstackid_to_zoneid(netstackid_t stackid)1376f4b3ec61Sdh netstackid_to_zoneid(netstackid_t stackid)
1377f4b3ec61Sdh {
1378f4b3ec61Sdh return (stackid);
1379f4b3ec61Sdh }
1380f4b3ec61Sdh
1381f4b3ec61Sdh netstackid_t
zoneid_to_netstackid(zoneid_t zoneid)1382f4b3ec61Sdh zoneid_to_netstackid(zoneid_t zoneid)
1383f4b3ec61Sdh {
1384f4b3ec61Sdh if (netstack_find_shared_zoneid(zoneid))
1385f4b3ec61Sdh return (GLOBAL_ZONEID);
1386f4b3ec61Sdh else
1387f4b3ec61Sdh return (zoneid);
1388f4b3ec61Sdh }
1389f4b3ec61Sdh
13900a0e9771SDarren Reed zoneid_t
netstack_get_zoneid(netstack_t * ns)13910a0e9771SDarren Reed netstack_get_zoneid(netstack_t *ns)
13920a0e9771SDarren Reed {
13930a0e9771SDarren Reed return (netstackid_to_zoneid(ns->netstack_stackid));
13940a0e9771SDarren Reed }
13950a0e9771SDarren Reed
1396f4b3ec61Sdh /*
1397f4b3ec61Sdh * Simplistic support for walking all the handles.
1398f4b3ec61Sdh * Example usage:
1399f4b3ec61Sdh * netstack_handle_t nh;
1400f4b3ec61Sdh * netstack_t *ns;
1401f4b3ec61Sdh *
1402f4b3ec61Sdh * netstack_next_init(&nh);
1403f4b3ec61Sdh * while ((ns = netstack_next(&nh)) != NULL) {
1404f4b3ec61Sdh * do something;
1405f4b3ec61Sdh * netstack_rele(ns);
1406f4b3ec61Sdh * }
1407f4b3ec61Sdh * netstack_next_fini(&nh);
1408f4b3ec61Sdh */
1409f4b3ec61Sdh void
netstack_next_init(netstack_handle_t * handle)1410f4b3ec61Sdh netstack_next_init(netstack_handle_t *handle)
1411f4b3ec61Sdh {
1412f4b3ec61Sdh *handle = 0;
1413f4b3ec61Sdh }
1414f4b3ec61Sdh
1415f4b3ec61Sdh /* ARGSUSED */
1416f4b3ec61Sdh void
netstack_next_fini(netstack_handle_t * handle)1417f4b3ec61Sdh netstack_next_fini(netstack_handle_t *handle)
1418f4b3ec61Sdh {
1419f4b3ec61Sdh }
1420f4b3ec61Sdh
1421f4b3ec61Sdh netstack_t *
netstack_next(netstack_handle_t * handle)1422f4b3ec61Sdh netstack_next(netstack_handle_t *handle)
1423f4b3ec61Sdh {
1424f4b3ec61Sdh netstack_t *ns;
1425f4b3ec61Sdh int i, end;
1426f4b3ec61Sdh
1427f4b3ec61Sdh end = *handle;
1428f4b3ec61Sdh /* Walk skipping *handle number of instances */
1429f4b3ec61Sdh
1430f4b3ec61Sdh /* Look if there is a matching stack instance */
1431f4b3ec61Sdh mutex_enter(&netstack_g_lock);
1432f4b3ec61Sdh ns = netstack_head;
1433f4b3ec61Sdh for (i = 0; i < end; i++) {
1434f4b3ec61Sdh if (ns == NULL)
1435f4b3ec61Sdh break;
1436f4b3ec61Sdh ns = ns->netstack_next;
1437f4b3ec61Sdh }
1438*704ca705SDan McDonald /*
1439*704ca705SDan McDonald * Skip those that aren't really here (uninitialized or closing).
1440*704ca705SDan McDonald * Can't use hold_if_active because of "end" tracking.
1441*704ca705SDan McDonald */
1442f4b3ec61Sdh while (ns != NULL) {
1443f4b3ec61Sdh mutex_enter(&ns->netstack_lock);
1444f4b3ec61Sdh if ((ns->netstack_flags & (NSF_UNINIT|NSF_CLOSING)) == 0) {
1445*704ca705SDan McDonald *handle = end + 1;
1446*704ca705SDan McDonald netstack_hold_locked(ns);
1447f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
1448f4b3ec61Sdh break;
1449f4b3ec61Sdh }
1450f4b3ec61Sdh mutex_exit(&ns->netstack_lock);
1451f4b3ec61Sdh end++;
1452f4b3ec61Sdh ns = ns->netstack_next;
1453f4b3ec61Sdh }
1454f4b3ec61Sdh mutex_exit(&netstack_g_lock);
1455f4b3ec61Sdh return (ns);
1456f4b3ec61Sdh }
1457