xref: /illumos-gate/usr/src/uts/common/io/mac/mac_flow.c (revision bbf21555)
1da14cebeSEric Cheng /*
2da14cebeSEric Cheng  * CDDL HEADER START
3da14cebeSEric Cheng  *
4da14cebeSEric Cheng  * The contents of this file are subject to the terms of the
5da14cebeSEric Cheng  * Common Development and Distribution License (the "License").
6da14cebeSEric Cheng  * You may not use this file except in compliance with the License.
7da14cebeSEric Cheng  *
8da14cebeSEric Cheng  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9da14cebeSEric Cheng  * or http://www.opensolaris.org/os/licensing.
10da14cebeSEric Cheng  * See the License for the specific language governing permissions
11da14cebeSEric Cheng  * and limitations under the License.
12da14cebeSEric Cheng  *
13da14cebeSEric Cheng  * When distributing Covered Code, include this CDDL HEADER in each
14da14cebeSEric Cheng  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15da14cebeSEric Cheng  * If applicable, add the following below this CDDL HEADER, with the
16da14cebeSEric Cheng  * fields enclosed by brackets "[]" replaced with your own identifying
17da14cebeSEric Cheng  * information: Portions Copyright [yyyy] [name of copyright owner]
18da14cebeSEric Cheng  *
19da14cebeSEric Cheng  * CDDL HEADER END
20da14cebeSEric Cheng  */
21da14cebeSEric Cheng 
22da14cebeSEric Cheng /*
239820c710SBaban Kenkre  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24da14cebeSEric Cheng  * Use is subject to license terms.
25c61a1653SRyan Zezeski  * Copyright 2018 Joyent, Inc.
26da14cebeSEric Cheng  */
27da14cebeSEric Cheng 
28da14cebeSEric Cheng #include <sys/strsun.h>
29da14cebeSEric Cheng #include <sys/sdt.h>
30da14cebeSEric Cheng #include <sys/mac.h>
31da14cebeSEric Cheng #include <sys/mac_impl.h>
32da14cebeSEric Cheng #include <sys/mac_client_impl.h>
330dc2366fSVenugopal Iyer #include <sys/mac_stat.h>
34da14cebeSEric Cheng #include <sys/dls.h>
35da14cebeSEric Cheng #include <sys/dls_impl.h>
36da14cebeSEric Cheng #include <sys/mac_soft_ring.h>
37da14cebeSEric Cheng #include <sys/ethernet.h>
380dc2366fSVenugopal Iyer #include <sys/cpupart.h>
390dc2366fSVenugopal Iyer #include <sys/pool.h>
400dc2366fSVenugopal Iyer #include <sys/pool_pset.h>
41da14cebeSEric Cheng #include <sys/vlan.h>
42da14cebeSEric Cheng #include <inet/ip.h>
43da14cebeSEric Cheng #include <inet/ip6.h>
44da14cebeSEric Cheng #include <netinet/tcp.h>
45da14cebeSEric Cheng #include <netinet/udp.h>
46da14cebeSEric Cheng #include <netinet/sctp.h>
47da14cebeSEric Cheng 
480dc2366fSVenugopal Iyer typedef struct flow_stats_s {
490dc2366fSVenugopal Iyer 	uint64_t	fs_obytes;
500dc2366fSVenugopal Iyer 	uint64_t	fs_opackets;
510dc2366fSVenugopal Iyer 	uint64_t	fs_oerrors;
520dc2366fSVenugopal Iyer 	uint64_t	fs_ibytes;
530dc2366fSVenugopal Iyer 	uint64_t	fs_ipackets;
540dc2366fSVenugopal Iyer 	uint64_t	fs_ierrors;
550dc2366fSVenugopal Iyer } flow_stats_t;
560dc2366fSVenugopal Iyer 
570dc2366fSVenugopal Iyer 
58da14cebeSEric Cheng /* global flow table, will be a per exclusive-zone table later */
59da14cebeSEric Cheng static mod_hash_t	*flow_hash;
60da14cebeSEric Cheng static krwlock_t	flow_tab_lock;
61da14cebeSEric Cheng 
62da14cebeSEric Cheng static kmem_cache_t	*flow_cache;
63da14cebeSEric Cheng static kmem_cache_t	*flow_tab_cache;
64da14cebeSEric Cheng static flow_ops_t	flow_l2_ops;
65da14cebeSEric Cheng 
66da14cebeSEric Cheng typedef struct {
67da14cebeSEric Cheng 	const char	*fs_name;
68da14cebeSEric Cheng 	uint_t		fs_offset;
69da14cebeSEric Cheng } flow_stats_info_t;
70da14cebeSEric Cheng 
71da14cebeSEric Cheng #define	FS_OFF(f)	(offsetof(flow_stats_t, f))
72da14cebeSEric Cheng static flow_stats_info_t flow_stats_list[] = {
730dc2366fSVenugopal Iyer 	{"rbytes",	FS_OFF(fs_ibytes)},
74da14cebeSEric Cheng 	{"ipackets",	FS_OFF(fs_ipackets)},
75da14cebeSEric Cheng 	{"ierrors",	FS_OFF(fs_ierrors)},
76da14cebeSEric Cheng 	{"obytes",	FS_OFF(fs_obytes)},
77da14cebeSEric Cheng 	{"opackets",	FS_OFF(fs_opackets)},
78da14cebeSEric Cheng 	{"oerrors",	FS_OFF(fs_oerrors)}
79da14cebeSEric Cheng };
80da14cebeSEric Cheng #define	FS_SIZE		(sizeof (flow_stats_list) / sizeof (flow_stats_info_t))
81da14cebeSEric Cheng 
82da14cebeSEric Cheng /*
83da14cebeSEric Cheng  * Checks whether a flow mask is legal.
84da14cebeSEric Cheng  */
85da14cebeSEric Cheng static flow_tab_info_t	*mac_flow_tab_info_get(flow_mask_t);
86da14cebeSEric Cheng 
87da14cebeSEric Cheng static void
flow_stat_init(kstat_named_t * knp)88da14cebeSEric Cheng flow_stat_init(kstat_named_t *knp)
89da14cebeSEric Cheng {
90da14cebeSEric Cheng 	int	i;
91da14cebeSEric Cheng 
92da14cebeSEric Cheng 	for (i = 0; i < FS_SIZE; i++, knp++) {
93da14cebeSEric Cheng 		kstat_named_init(knp, flow_stats_list[i].fs_name,
94da14cebeSEric Cheng 		    KSTAT_DATA_UINT64);
95da14cebeSEric Cheng 	}
96da14cebeSEric Cheng }
97da14cebeSEric Cheng 
98da14cebeSEric Cheng static int
flow_stat_update(kstat_t * ksp,int rw)99da14cebeSEric Cheng flow_stat_update(kstat_t *ksp, int rw)
100da14cebeSEric Cheng {
1010dc2366fSVenugopal Iyer 	flow_entry_t		*fep = ksp->ks_private;
1020dc2366fSVenugopal Iyer 	kstat_named_t		*knp = ksp->ks_data;
1030dc2366fSVenugopal Iyer 	uint64_t		*statp;
1040dc2366fSVenugopal Iyer 	int			i;
1050dc2366fSVenugopal Iyer 	mac_rx_stats_t		*mac_rx_stat;
1060dc2366fSVenugopal Iyer 	mac_tx_stats_t		*mac_tx_stat;
1070dc2366fSVenugopal Iyer 	flow_stats_t		flow_stats;
1080dc2366fSVenugopal Iyer 	mac_soft_ring_set_t	*mac_srs;
109da14cebeSEric Cheng 
110da14cebeSEric Cheng 	if (rw != KSTAT_READ)
111da14cebeSEric Cheng 		return (EACCES);
112da14cebeSEric Cheng 
1130dc2366fSVenugopal Iyer 	bzero(&flow_stats, sizeof (flow_stats_t));
1140dc2366fSVenugopal Iyer 
1150dc2366fSVenugopal Iyer 	for (i = 0; i < fep->fe_rx_srs_cnt; i++) {
1160dc2366fSVenugopal Iyer 		mac_srs = (mac_soft_ring_set_t *)fep->fe_rx_srs[i];
1170dc2366fSVenugopal Iyer 		if (mac_srs == NULL) 		/* Multicast flow */
1180dc2366fSVenugopal Iyer 			break;
1190dc2366fSVenugopal Iyer 		mac_rx_stat = &mac_srs->srs_rx.sr_stat;
1200dc2366fSVenugopal Iyer 
1210dc2366fSVenugopal Iyer 		flow_stats.fs_ibytes += mac_rx_stat->mrs_intrbytes +
1220dc2366fSVenugopal Iyer 		    mac_rx_stat->mrs_pollbytes + mac_rx_stat->mrs_lclbytes;
1230dc2366fSVenugopal Iyer 
1240dc2366fSVenugopal Iyer 		flow_stats.fs_ipackets += mac_rx_stat->mrs_intrcnt +
1250dc2366fSVenugopal Iyer 		    mac_rx_stat->mrs_pollcnt + mac_rx_stat->mrs_lclcnt;
1260dc2366fSVenugopal Iyer 
1270dc2366fSVenugopal Iyer 		flow_stats.fs_ierrors += mac_rx_stat->mrs_ierrors;
1280dc2366fSVenugopal Iyer 	}
1290dc2366fSVenugopal Iyer 
1300dc2366fSVenugopal Iyer 	mac_srs = (mac_soft_ring_set_t *)fep->fe_tx_srs;
1310dc2366fSVenugopal Iyer 	if (mac_srs == NULL) 		/* Multicast flow */
1320dc2366fSVenugopal Iyer 		goto done;
1330dc2366fSVenugopal Iyer 	mac_tx_stat = &mac_srs->srs_tx.st_stat;
1340dc2366fSVenugopal Iyer 
1350dc2366fSVenugopal Iyer 	flow_stats.fs_obytes = mac_tx_stat->mts_obytes;
1360dc2366fSVenugopal Iyer 	flow_stats.fs_opackets = mac_tx_stat->mts_opackets;
1370dc2366fSVenugopal Iyer 	flow_stats.fs_oerrors = mac_tx_stat->mts_oerrors;
1380dc2366fSVenugopal Iyer 
1390dc2366fSVenugopal Iyer done:
140da14cebeSEric Cheng 	for (i = 0; i < FS_SIZE; i++, knp++) {
141da14cebeSEric Cheng 		statp = (uint64_t *)
1420dc2366fSVenugopal Iyer 		    ((uchar_t *)&flow_stats + flow_stats_list[i].fs_offset);
143da14cebeSEric Cheng 		knp->value.ui64 = *statp;
144da14cebeSEric Cheng 	}
145da14cebeSEric Cheng 	return (0);
146da14cebeSEric Cheng }
147da14cebeSEric Cheng 
148da14cebeSEric Cheng static void
flow_stat_create(flow_entry_t * fep)149da14cebeSEric Cheng flow_stat_create(flow_entry_t *fep)
150da14cebeSEric Cheng {
151da14cebeSEric Cheng 	kstat_t		*ksp;
152da14cebeSEric Cheng 	kstat_named_t	*knp;
153da14cebeSEric Cheng 	uint_t		nstats = FS_SIZE;
154da14cebeSEric Cheng 
1552b24ab6bSSebastien Roy 	/*
1562b24ab6bSSebastien Roy 	 * Fow now, flow entries are only manipulated and visible from the
1572b24ab6bSSebastien Roy 	 * global zone.
1582b24ab6bSSebastien Roy 	 */
1592b24ab6bSSebastien Roy 	ksp = kstat_create_zone("unix", 0, (char *)fep->fe_flow_name, "flow",
1602b24ab6bSSebastien Roy 	    KSTAT_TYPE_NAMED, nstats, 0, GLOBAL_ZONEID);
161da14cebeSEric Cheng 	if (ksp == NULL)
162da14cebeSEric Cheng 		return;
163da14cebeSEric Cheng 
164da14cebeSEric Cheng 	ksp->ks_update = flow_stat_update;
165da14cebeSEric Cheng 	ksp->ks_private = fep;
166da14cebeSEric Cheng 	fep->fe_ksp = ksp;
167da14cebeSEric Cheng 
168da14cebeSEric Cheng 	knp = (kstat_named_t *)ksp->ks_data;
169da14cebeSEric Cheng 	flow_stat_init(knp);
170da14cebeSEric Cheng 	kstat_install(ksp);
171da14cebeSEric Cheng }
172da14cebeSEric Cheng 
173da14cebeSEric Cheng void
flow_stat_destroy(flow_entry_t * fep)174da14cebeSEric Cheng flow_stat_destroy(flow_entry_t *fep)
175da14cebeSEric Cheng {
176da14cebeSEric Cheng 	if (fep->fe_ksp != NULL) {
177da14cebeSEric Cheng 		kstat_delete(fep->fe_ksp);
178da14cebeSEric Cheng 		fep->fe_ksp = NULL;
179da14cebeSEric Cheng 	}
180da14cebeSEric Cheng }
181da14cebeSEric Cheng 
182da14cebeSEric Cheng /*
183da14cebeSEric Cheng  * Initialize the flow table
184da14cebeSEric Cheng  */
185da14cebeSEric Cheng void
mac_flow_init()186da14cebeSEric Cheng mac_flow_init()
187da14cebeSEric Cheng {
188da14cebeSEric Cheng 	flow_cache = kmem_cache_create("flow_entry_cache",
189da14cebeSEric Cheng 	    sizeof (flow_entry_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
190da14cebeSEric Cheng 	flow_tab_cache = kmem_cache_create("flow_tab_cache",
191da14cebeSEric Cheng 	    sizeof (flow_tab_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
192da14cebeSEric Cheng 	flow_hash = mod_hash_create_extended("flow_hash",
193da14cebeSEric Cheng 	    100, mod_hash_null_keydtor, mod_hash_null_valdtor,
194da14cebeSEric Cheng 	    mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP);
195da14cebeSEric Cheng 	rw_init(&flow_tab_lock, NULL, RW_DEFAULT, NULL);
196da14cebeSEric Cheng }
197da14cebeSEric Cheng 
198da14cebeSEric Cheng /*
199da14cebeSEric Cheng  * Cleanup and release the flow table
200da14cebeSEric Cheng  */
201da14cebeSEric Cheng void
mac_flow_fini()202da14cebeSEric Cheng mac_flow_fini()
203da14cebeSEric Cheng {
204da14cebeSEric Cheng 	kmem_cache_destroy(flow_cache);
205da14cebeSEric Cheng 	kmem_cache_destroy(flow_tab_cache);
206da14cebeSEric Cheng 	mod_hash_destroy_hash(flow_hash);
207da14cebeSEric Cheng 	rw_destroy(&flow_tab_lock);
208da14cebeSEric Cheng }
209da14cebeSEric Cheng 
210da14cebeSEric Cheng /*
211da14cebeSEric Cheng  * mac_create_flow(): create a flow_entry_t.
212da14cebeSEric Cheng  */
213da14cebeSEric Cheng int
mac_flow_create(flow_desc_t * fd,mac_resource_props_t * mrp,char * name,void * client_cookie,uint_t type,flow_entry_t ** flentp)214da14cebeSEric Cheng mac_flow_create(flow_desc_t *fd, mac_resource_props_t *mrp, char *name,
215da14cebeSEric Cheng     void *client_cookie, uint_t type, flow_entry_t **flentp)
216da14cebeSEric Cheng {
2170dc2366fSVenugopal Iyer 	flow_entry_t		*flent = *flentp;
2180dc2366fSVenugopal Iyer 	int			err = 0;
219da14cebeSEric Cheng 
220da14cebeSEric Cheng 	if (mrp != NULL) {
2210dc2366fSVenugopal Iyer 		err = mac_validate_props(NULL, mrp);
222da14cebeSEric Cheng 		if (err != 0)
223da14cebeSEric Cheng 			return (err);
224da14cebeSEric Cheng 	}
225da14cebeSEric Cheng 
226da14cebeSEric Cheng 	if (flent == NULL) {
227da14cebeSEric Cheng 		flent = kmem_cache_alloc(flow_cache, KM_SLEEP);
228da14cebeSEric Cheng 		bzero(flent, sizeof (*flent));
229da14cebeSEric Cheng 		mutex_init(&flent->fe_lock, NULL, MUTEX_DEFAULT, NULL);
230da14cebeSEric Cheng 		cv_init(&flent->fe_cv, NULL, CV_DEFAULT, NULL);
231da14cebeSEric Cheng 
232da14cebeSEric Cheng 		/* Initialize the receiver function to a safe routine */
233c61a1653SRyan Zezeski 		flent->fe_cb_fn = (flow_fn_t)mac_rx_def;
234da14cebeSEric Cheng 		flent->fe_index = -1;
235da14cebeSEric Cheng 	}
236da000602SGirish Moodalbail 	(void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
237da14cebeSEric Cheng 
238da14cebeSEric Cheng 	/* This is an initial flow, will be configured later */
239da14cebeSEric Cheng 	if (fd == NULL) {
240da14cebeSEric Cheng 		*flentp = flent;
241da14cebeSEric Cheng 		return (0);
242da14cebeSEric Cheng 	}
243da14cebeSEric Cheng 
244da14cebeSEric Cheng 	flent->fe_client_cookie = client_cookie;
245da14cebeSEric Cheng 	flent->fe_type = type;
246da14cebeSEric Cheng 
247da14cebeSEric Cheng 	/* Save flow desc */
248da14cebeSEric Cheng 	bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
249da14cebeSEric Cheng 
250da14cebeSEric Cheng 	if (mrp != NULL) {
251da14cebeSEric Cheng 		/*
252da14cebeSEric Cheng 		 * We have already set fe_resource_props for a Link.
253da14cebeSEric Cheng 		 */
254da14cebeSEric Cheng 		if (type & FLOW_USER) {
255da14cebeSEric Cheng 			bcopy(mrp, &flent->fe_resource_props,
256da14cebeSEric Cheng 			    sizeof (mac_resource_props_t));
257da14cebeSEric Cheng 		}
258da14cebeSEric Cheng 		/*
259da14cebeSEric Cheng 		 * The effective resource list should reflect the priority
260da14cebeSEric Cheng 		 * that we set implicitly.
261da14cebeSEric Cheng 		 */
262da14cebeSEric Cheng 		if (!(mrp->mrp_mask & MRP_PRIORITY))
263da14cebeSEric Cheng 			mrp->mrp_mask |= MRP_PRIORITY;
264da14cebeSEric Cheng 		if (type & FLOW_USER)
265da14cebeSEric Cheng 			mrp->mrp_priority = MPL_SUBFLOW_DEFAULT;
266da14cebeSEric Cheng 		else
267da14cebeSEric Cheng 			mrp->mrp_priority = MPL_LINK_DEFAULT;
2680dc2366fSVenugopal Iyer 		bzero(mrp->mrp_pool, MAXPATHLEN);
2690dc2366fSVenugopal Iyer 		bzero(&mrp->mrp_cpus, sizeof (mac_cpus_t));
270da14cebeSEric Cheng 		bcopy(mrp, &flent->fe_effective_props,
271da14cebeSEric Cheng 		    sizeof (mac_resource_props_t));
272da14cebeSEric Cheng 	}
273da14cebeSEric Cheng 	flow_stat_create(flent);
274da14cebeSEric Cheng 
275da14cebeSEric Cheng 	*flentp = flent;
276da14cebeSEric Cheng 	return (0);
277da14cebeSEric Cheng }
278da14cebeSEric Cheng 
279da14cebeSEric Cheng /*
280da14cebeSEric Cheng  * Validate flow entry and add it to a flow table.
281da14cebeSEric Cheng  */
282da14cebeSEric Cheng int
mac_flow_add(flow_tab_t * ft,flow_entry_t * flent)283da14cebeSEric Cheng mac_flow_add(flow_tab_t *ft, flow_entry_t *flent)
284da14cebeSEric Cheng {
285da14cebeSEric Cheng 	flow_entry_t	**headp, **p;
286da14cebeSEric Cheng 	flow_ops_t	*ops = &ft->ft_ops;
287da14cebeSEric Cheng 	flow_mask_t	mask;
288da14cebeSEric Cheng 	uint32_t	index;
289da14cebeSEric Cheng 	int		err;
290da14cebeSEric Cheng 
291da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
292da14cebeSEric Cheng 
293da14cebeSEric Cheng 	/*
294da14cebeSEric Cheng 	 * Check for invalid bits in mask.
295da14cebeSEric Cheng 	 */
296da14cebeSEric Cheng 	mask = flent->fe_flow_desc.fd_mask;
297da14cebeSEric Cheng 	if ((mask & ft->ft_mask) == 0 || (mask & ~ft->ft_mask) != 0)
298da14cebeSEric Cheng 		return (EOPNOTSUPP);
299da14cebeSEric Cheng 
300da14cebeSEric Cheng 	/*
301da14cebeSEric Cheng 	 * Validate flent.
302da14cebeSEric Cheng 	 */
303da14cebeSEric Cheng 	if ((err = ops->fo_accept_fe(ft, flent)) != 0) {
304da14cebeSEric Cheng 		DTRACE_PROBE3(accept_failed, flow_tab_t *, ft,
305da14cebeSEric Cheng 		    flow_entry_t *, flent, int, err);
306da14cebeSEric Cheng 		return (err);
307da14cebeSEric Cheng 	}
308da14cebeSEric Cheng 
309da14cebeSEric Cheng 	/*
310da14cebeSEric Cheng 	 * Flent is valid. now calculate hash and insert it
311da14cebeSEric Cheng 	 * into hash table.
312da14cebeSEric Cheng 	 */
313da14cebeSEric Cheng 	index = ops->fo_hash_fe(ft, flent);
314da14cebeSEric Cheng 
315da14cebeSEric Cheng 	/*
316da14cebeSEric Cheng 	 * We do not need a lock up until now because we were
317da14cebeSEric Cheng 	 * not accessing the flow table.
318da14cebeSEric Cheng 	 */
319da14cebeSEric Cheng 	rw_enter(&ft->ft_lock, RW_WRITER);
320da14cebeSEric Cheng 	headp = &ft->ft_table[index];
321da14cebeSEric Cheng 
322da14cebeSEric Cheng 	/*
323da14cebeSEric Cheng 	 * Check for duplicate flow.
324da14cebeSEric Cheng 	 */
325da14cebeSEric Cheng 	for (p = headp; *p != NULL; p = &(*p)->fe_next) {
326da14cebeSEric Cheng 		if ((*p)->fe_flow_desc.fd_mask !=
327da14cebeSEric Cheng 		    flent->fe_flow_desc.fd_mask)
328da14cebeSEric Cheng 			continue;
329da14cebeSEric Cheng 
330da14cebeSEric Cheng 		if (ft->ft_ops.fo_match_fe(ft, *p, flent)) {
331da14cebeSEric Cheng 			rw_exit(&ft->ft_lock);
332da14cebeSEric Cheng 			DTRACE_PROBE3(dup_flow, flow_tab_t *, ft,
333da14cebeSEric Cheng 			    flow_entry_t *, flent, int, err);
334da14cebeSEric Cheng 			return (EALREADY);
335da14cebeSEric Cheng 		}
336da14cebeSEric Cheng 	}
337da14cebeSEric Cheng 
338da14cebeSEric Cheng 	/*
339da14cebeSEric Cheng 	 * Insert flow to hash list.
340da14cebeSEric Cheng 	 */
341da14cebeSEric Cheng 	err = ops->fo_insert_fe(ft, headp, flent);
342da14cebeSEric Cheng 	if (err != 0) {
343da14cebeSEric Cheng 		rw_exit(&ft->ft_lock);
344da14cebeSEric Cheng 		DTRACE_PROBE3(insert_failed, flow_tab_t *, ft,
345da14cebeSEric Cheng 		    flow_entry_t *, flent, int, err);
346da14cebeSEric Cheng 		return (err);
347da14cebeSEric Cheng 	}
348da14cebeSEric Cheng 
349da14cebeSEric Cheng 	/*
350da14cebeSEric Cheng 	 * Save the hash index so it can be used by mac_flow_remove().
351da14cebeSEric Cheng 	 */
352da14cebeSEric Cheng 	flent->fe_index = (int)index;
353da14cebeSEric Cheng 
354da14cebeSEric Cheng 	/*
355da14cebeSEric Cheng 	 * Save the flow tab back reference.
356da14cebeSEric Cheng 	 */
357da14cebeSEric Cheng 	flent->fe_flow_tab = ft;
358da14cebeSEric Cheng 	FLOW_MARK(flent, FE_FLOW_TAB);
359da14cebeSEric Cheng 	ft->ft_flow_count++;
360da14cebeSEric Cheng 	rw_exit(&ft->ft_lock);
361da14cebeSEric Cheng 	return (0);
362da14cebeSEric Cheng }
363da14cebeSEric Cheng 
364da14cebeSEric Cheng /*
365da14cebeSEric Cheng  * Remove a flow from a mac client's subflow table
366da14cebeSEric Cheng  */
367da14cebeSEric Cheng void
mac_flow_rem_subflow(flow_entry_t * flent)368da14cebeSEric Cheng mac_flow_rem_subflow(flow_entry_t *flent)
369da14cebeSEric Cheng {
370da14cebeSEric Cheng 	flow_tab_t		*ft = flent->fe_flow_tab;
371da14cebeSEric Cheng 	mac_client_impl_t	*mcip = ft->ft_mcip;
3725d460eafSCathy Zhou 	mac_handle_t		mh = (mac_handle_t)ft->ft_mip;
373da14cebeSEric Cheng 
3745d460eafSCathy Zhou 	ASSERT(MAC_PERIM_HELD(mh));
375da14cebeSEric Cheng 
376da14cebeSEric Cheng 	mac_flow_remove(ft, flent, B_FALSE);
377da14cebeSEric Cheng 	if (flent->fe_mcip == NULL) {
378da14cebeSEric Cheng 		/*
379da14cebeSEric Cheng 		 * The interface is not yet plumbed and mac_client_flow_add
380da14cebeSEric Cheng 		 * was not done.
381da14cebeSEric Cheng 		 */
382da14cebeSEric Cheng 		if (FLOW_TAB_EMPTY(ft)) {
383da14cebeSEric Cheng 			mac_flow_tab_destroy(ft);
384da14cebeSEric Cheng 			mcip->mci_subflow_tab = NULL;
385da14cebeSEric Cheng 		}
3865d460eafSCathy Zhou 	} else {
3875d460eafSCathy Zhou 		mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
3885d460eafSCathy Zhou 		mac_link_flow_clean((mac_client_handle_t)mcip, flent);
389da14cebeSEric Cheng 	}
3905d460eafSCathy Zhou 	mac_fastpath_enable(mh);
391da14cebeSEric Cheng }
392da14cebeSEric Cheng 
393da14cebeSEric Cheng /*
394da14cebeSEric Cheng  * Add a flow to a mac client's subflow table and instantiate the flow
395da14cebeSEric Cheng  * in the mac by creating the associated SRSs etc.
396da14cebeSEric Cheng  */
397da14cebeSEric Cheng int
mac_flow_add_subflow(mac_client_handle_t mch,flow_entry_t * flent,boolean_t instantiate_flow)398da14cebeSEric Cheng mac_flow_add_subflow(mac_client_handle_t mch, flow_entry_t *flent,
399da14cebeSEric Cheng     boolean_t instantiate_flow)
400da14cebeSEric Cheng {
401da14cebeSEric Cheng 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
4025d460eafSCathy Zhou 	mac_handle_t		mh = (mac_handle_t)mcip->mci_mip;
403da14cebeSEric Cheng 	flow_tab_info_t		*ftinfo;
404da14cebeSEric Cheng 	flow_mask_t		mask;
405da14cebeSEric Cheng 	flow_tab_t		*ft;
406da14cebeSEric Cheng 	int			err;
407da14cebeSEric Cheng 	boolean_t		ft_created = B_FALSE;
408da14cebeSEric Cheng 
4095d460eafSCathy Zhou 	ASSERT(MAC_PERIM_HELD(mh));
4105d460eafSCathy Zhou 
4115d460eafSCathy Zhou 	if ((err = mac_fastpath_disable(mh)) != 0)
4125d460eafSCathy Zhou 		return (err);
413da14cebeSEric Cheng 
414da14cebeSEric Cheng 	/*
415da14cebeSEric Cheng 	 * If the subflow table exists already just add the new subflow
416da14cebeSEric Cheng 	 * to the existing table, else we create a new subflow table below.
417da14cebeSEric Cheng 	 */
418da14cebeSEric Cheng 	ft = mcip->mci_subflow_tab;
419da14cebeSEric Cheng 	if (ft == NULL) {
420da14cebeSEric Cheng 		mask = flent->fe_flow_desc.fd_mask;
421da14cebeSEric Cheng 		/*
422da14cebeSEric Cheng 		 * Try to create a new table and then add the subflow to the
423da14cebeSEric Cheng 		 * newly created subflow table
424da14cebeSEric Cheng 		 */
4255d460eafSCathy Zhou 		if ((ftinfo = mac_flow_tab_info_get(mask)) == NULL) {
4265d460eafSCathy Zhou 			mac_fastpath_enable(mh);
427da14cebeSEric Cheng 			return (EOPNOTSUPP);
4285d460eafSCathy Zhou 		}
429da14cebeSEric Cheng 
430da14cebeSEric Cheng 		mac_flow_tab_create(ftinfo->fti_ops, mask, ftinfo->fti_size,
431da14cebeSEric Cheng 		    mcip->mci_mip, &ft);
432da14cebeSEric Cheng 		ft_created = B_TRUE;
433da14cebeSEric Cheng 	}
434da14cebeSEric Cheng 
435da14cebeSEric Cheng 	err = mac_flow_add(ft, flent);
436da14cebeSEric Cheng 	if (err != 0) {
437da14cebeSEric Cheng 		if (ft_created)
438da14cebeSEric Cheng 			mac_flow_tab_destroy(ft);
4395d460eafSCathy Zhou 		mac_fastpath_enable(mh);
440da14cebeSEric Cheng 		return (err);
441da14cebeSEric Cheng 	}
442da14cebeSEric Cheng 
443da14cebeSEric Cheng 	if (instantiate_flow) {
444da14cebeSEric Cheng 		/* Now activate the flow by creating its SRSs */
445da14cebeSEric Cheng 		ASSERT(MCIP_DATAPATH_SETUP(mcip));
446da14cebeSEric Cheng 		err = mac_link_flow_init((mac_client_handle_t)mcip, flent);
447da14cebeSEric Cheng 		if (err != 0) {
448da14cebeSEric Cheng 			mac_flow_remove(ft, flent, B_FALSE);
449da14cebeSEric Cheng 			if (ft_created)
450da14cebeSEric Cheng 				mac_flow_tab_destroy(ft);
4515d460eafSCathy Zhou 			mac_fastpath_enable(mh);
452da14cebeSEric Cheng 			return (err);
453da14cebeSEric Cheng 		}
454da14cebeSEric Cheng 	} else {
455da14cebeSEric Cheng 		FLOW_MARK(flent, FE_UF_NO_DATAPATH);
456da14cebeSEric Cheng 	}
457da14cebeSEric Cheng 	if (ft_created) {
458da14cebeSEric Cheng 		ASSERT(mcip->mci_subflow_tab == NULL);
459da14cebeSEric Cheng 		ft->ft_mcip = mcip;
460da14cebeSEric Cheng 		mcip->mci_subflow_tab = ft;
461da14cebeSEric Cheng 		if (instantiate_flow)
462da14cebeSEric Cheng 			mac_client_update_classifier(mcip, B_TRUE);
463da14cebeSEric Cheng 	}
464da14cebeSEric Cheng 	return (0);
465da14cebeSEric Cheng }
466da14cebeSEric Cheng 
467da14cebeSEric Cheng /*
468da14cebeSEric Cheng  * Remove flow entry from flow table.
469da14cebeSEric Cheng  */
470da14cebeSEric Cheng void
mac_flow_remove(flow_tab_t * ft,flow_entry_t * flent,boolean_t temp)471da14cebeSEric Cheng mac_flow_remove(flow_tab_t *ft, flow_entry_t *flent, boolean_t temp)
472da14cebeSEric Cheng {
473da14cebeSEric Cheng 	flow_entry_t	**fp;
474da14cebeSEric Cheng 
475da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
476da14cebeSEric Cheng 	if (!(flent->fe_flags & FE_FLOW_TAB))
477da14cebeSEric Cheng 		return;
478da14cebeSEric Cheng 
479da14cebeSEric Cheng 	rw_enter(&ft->ft_lock, RW_WRITER);
480da14cebeSEric Cheng 	/*
481da14cebeSEric Cheng 	 * If this is a permanent removal from the flow table, mark it
482da14cebeSEric Cheng 	 * CONDEMNED to prevent future references. If this is a temporary
483da14cebeSEric Cheng 	 * removal from the table, say to update the flow descriptor then
484da14cebeSEric Cheng 	 * we don't mark it CONDEMNED
485da14cebeSEric Cheng 	 */
486da14cebeSEric Cheng 	if (!temp)
487da14cebeSEric Cheng 		FLOW_MARK(flent, FE_CONDEMNED);
488da14cebeSEric Cheng 	/*
489da14cebeSEric Cheng 	 * Locate the specified flent.
490da14cebeSEric Cheng 	 */
491da14cebeSEric Cheng 	fp = &ft->ft_table[flent->fe_index];
492da14cebeSEric Cheng 	while (*fp != flent)
493da14cebeSEric Cheng 		fp = &(*fp)->fe_next;
494da14cebeSEric Cheng 
495da14cebeSEric Cheng 	/*
496da14cebeSEric Cheng 	 * The flent must exist. Otherwise it's a bug.
497da14cebeSEric Cheng 	 */
498da14cebeSEric Cheng 	ASSERT(fp != NULL);
499da14cebeSEric Cheng 	*fp = flent->fe_next;
500da14cebeSEric Cheng 	flent->fe_next = NULL;
501da14cebeSEric Cheng 
502da14cebeSEric Cheng 	/*
503da14cebeSEric Cheng 	 * Reset fe_index to -1 so any attempt to call mac_flow_remove()
504da14cebeSEric Cheng 	 * on a flent that is supposed to be in the table (FE_FLOW_TAB)
505da14cebeSEric Cheng 	 * will panic.
506da14cebeSEric Cheng 	 */
507da14cebeSEric Cheng 	flent->fe_index = -1;
508da14cebeSEric Cheng 	FLOW_UNMARK(flent, FE_FLOW_TAB);
509da14cebeSEric Cheng 	ft->ft_flow_count--;
510da14cebeSEric Cheng 	rw_exit(&ft->ft_lock);
511da14cebeSEric Cheng }
512da14cebeSEric Cheng 
513da14cebeSEric Cheng /*
514da14cebeSEric Cheng  * This is the flow lookup routine used by the mac sw classifier engine.
515da14cebeSEric Cheng  */
516da14cebeSEric Cheng int
mac_flow_lookup(flow_tab_t * ft,mblk_t * mp,uint_t flags,flow_entry_t ** flentp)517da14cebeSEric Cheng mac_flow_lookup(flow_tab_t *ft, mblk_t *mp, uint_t flags, flow_entry_t **flentp)
518da14cebeSEric Cheng {
519da14cebeSEric Cheng 	flow_state_t	s;
520da14cebeSEric Cheng 	flow_entry_t	*flent;
521da14cebeSEric Cheng 	flow_ops_t	*ops = &ft->ft_ops;
522da14cebeSEric Cheng 	boolean_t	retried = B_FALSE;
523da14cebeSEric Cheng 	int		i, err;
524da14cebeSEric Cheng 
525da14cebeSEric Cheng 	s.fs_flags = flags;
526da14cebeSEric Cheng retry:
527ae6aa22aSVenugopal Iyer 	s.fs_mp = mp;
528da14cebeSEric Cheng 
529da14cebeSEric Cheng 	/*
530da14cebeSEric Cheng 	 * Walk the list of predeclared accept functions.
531da14cebeSEric Cheng 	 * Each of these would accumulate enough state to allow the next
532da14cebeSEric Cheng 	 * accept routine to make progress.
533da14cebeSEric Cheng 	 */
534da14cebeSEric Cheng 	for (i = 0; i < FLOW_MAX_ACCEPT && ops->fo_accept[i] != NULL; i++) {
535da14cebeSEric Cheng 		if ((err = (ops->fo_accept[i])(ft, &s)) != 0) {
536ae6aa22aSVenugopal Iyer 			mblk_t	*last;
537ae6aa22aSVenugopal Iyer 
538da14cebeSEric Cheng 			/*
539da14cebeSEric Cheng 			 * ENOBUFS indicates that the mp could be too short
540da14cebeSEric Cheng 			 * and may need a pullup.
541da14cebeSEric Cheng 			 */
542da14cebeSEric Cheng 			if (err != ENOBUFS || retried)
543da14cebeSEric Cheng 				return (err);
544da14cebeSEric Cheng 
545da14cebeSEric Cheng 			/*
546ae6aa22aSVenugopal Iyer 			 * The pullup is done on the last processed mblk, not
547ae6aa22aSVenugopal Iyer 			 * the starting one. pullup is not done if the mblk
548ae6aa22aSVenugopal Iyer 			 * has references or if b_cont is NULL.
549da14cebeSEric Cheng 			 */
550ae6aa22aSVenugopal Iyer 			last = s.fs_mp;
551ae6aa22aSVenugopal Iyer 			if (DB_REF(last) > 1 || last->b_cont == NULL ||
552ae6aa22aSVenugopal Iyer 			    pullupmsg(last, -1) == 0)
553da14cebeSEric Cheng 				return (EINVAL);
554da14cebeSEric Cheng 
555da14cebeSEric Cheng 			retried = B_TRUE;
556da14cebeSEric Cheng 			DTRACE_PROBE2(need_pullup, flow_tab_t *, ft,
557da14cebeSEric Cheng 			    flow_state_t *, &s);
558da14cebeSEric Cheng 			goto retry;
559da14cebeSEric Cheng 		}
560da14cebeSEric Cheng 	}
561da14cebeSEric Cheng 
562da14cebeSEric Cheng 	/*
563da14cebeSEric Cheng 	 * The packet is considered sane. We may now attempt to
564da14cebeSEric Cheng 	 * find the corresponding flent.
565da14cebeSEric Cheng 	 */
566da14cebeSEric Cheng 	rw_enter(&ft->ft_lock, RW_READER);
567da14cebeSEric Cheng 	flent = ft->ft_table[ops->fo_hash(ft, &s)];
568da14cebeSEric Cheng 	for (; flent != NULL; flent = flent->fe_next) {
569da14cebeSEric Cheng 		if (flent->fe_match(ft, flent, &s)) {
570da14cebeSEric Cheng 			FLOW_TRY_REFHOLD(flent, err);
571da14cebeSEric Cheng 			if (err != 0)
572da14cebeSEric Cheng 				continue;
573da14cebeSEric Cheng 			*flentp = flent;
574da14cebeSEric Cheng 			rw_exit(&ft->ft_lock);
575da14cebeSEric Cheng 			return (0);
576da14cebeSEric Cheng 		}
577da14cebeSEric Cheng 	}
578da14cebeSEric Cheng 	rw_exit(&ft->ft_lock);
579da14cebeSEric Cheng 	return (ENOENT);
580da14cebeSEric Cheng }
581da14cebeSEric Cheng 
582da14cebeSEric Cheng /*
583da14cebeSEric Cheng  * Walk flow table.
584da14cebeSEric Cheng  * The caller is assumed to have proper perimeter protection.
585da14cebeSEric Cheng  */
586da14cebeSEric Cheng int
mac_flow_walk_nolock(flow_tab_t * ft,int (* fn)(flow_entry_t *,void *),void * arg)587da14cebeSEric Cheng mac_flow_walk_nolock(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *),
588da14cebeSEric Cheng     void *arg)
589da14cebeSEric Cheng {
590da14cebeSEric Cheng 	int		err, i, cnt = 0;
591da14cebeSEric Cheng 	flow_entry_t	*flent;
592da14cebeSEric Cheng 
593da14cebeSEric Cheng 	if (ft == NULL)
594da14cebeSEric Cheng 		return (0);
595da14cebeSEric Cheng 
596da14cebeSEric Cheng 	for (i = 0; i < ft->ft_size; i++) {
597da14cebeSEric Cheng 		for (flent = ft->ft_table[i]; flent != NULL;
598da14cebeSEric Cheng 		    flent = flent->fe_next) {
599da14cebeSEric Cheng 			cnt++;
600da14cebeSEric Cheng 			err = (*fn)(flent, arg);
601da14cebeSEric Cheng 			if (err != 0)
602da14cebeSEric Cheng 				return (err);
603da14cebeSEric Cheng 		}
604da14cebeSEric Cheng 	}
605da14cebeSEric Cheng 	VERIFY(cnt == ft->ft_flow_count);
606da14cebeSEric Cheng 	return (0);
607da14cebeSEric Cheng }
608da14cebeSEric Cheng 
609da14cebeSEric Cheng /*
610da14cebeSEric Cheng  * Same as the above except a mutex is used for protection here.
611da14cebeSEric Cheng  */
612da14cebeSEric Cheng int
mac_flow_walk(flow_tab_t * ft,int (* fn)(flow_entry_t *,void *),void * arg)613da14cebeSEric Cheng mac_flow_walk(flow_tab_t *ft, int (*fn)(flow_entry_t *, void *),
614da14cebeSEric Cheng     void *arg)
615da14cebeSEric Cheng {
616da14cebeSEric Cheng 	int		err;
617da14cebeSEric Cheng 
618da14cebeSEric Cheng 	if (ft == NULL)
619da14cebeSEric Cheng 		return (0);
620da14cebeSEric Cheng 
621da14cebeSEric Cheng 	rw_enter(&ft->ft_lock, RW_WRITER);
622da14cebeSEric Cheng 	err = mac_flow_walk_nolock(ft, fn, arg);
623da14cebeSEric Cheng 	rw_exit(&ft->ft_lock);
624da14cebeSEric Cheng 	return (err);
625da14cebeSEric Cheng }
626da14cebeSEric Cheng 
627da14cebeSEric Cheng static boolean_t	mac_flow_clean(flow_entry_t *);
628da14cebeSEric Cheng 
629da14cebeSEric Cheng /*
630da14cebeSEric Cheng  * Destroy a flow entry. Called when the last reference on a flow is released.
631da14cebeSEric Cheng  */
632da14cebeSEric Cheng void
mac_flow_destroy(flow_entry_t * flent)633da14cebeSEric Cheng mac_flow_destroy(flow_entry_t *flent)
634da14cebeSEric Cheng {
635da14cebeSEric Cheng 	ASSERT(flent->fe_refcnt == 0);
636da14cebeSEric Cheng 
637da14cebeSEric Cheng 	if ((flent->fe_type & FLOW_USER) != 0) {
638da14cebeSEric Cheng 		ASSERT(mac_flow_clean(flent));
639da14cebeSEric Cheng 	} else {
640da14cebeSEric Cheng 		mac_flow_cleanup(flent);
641da14cebeSEric Cheng 	}
6420dc2366fSVenugopal Iyer 	mac_misc_stat_delete(flent);
643da14cebeSEric Cheng 	mutex_destroy(&flent->fe_lock);
644da14cebeSEric Cheng 	cv_destroy(&flent->fe_cv);
645da14cebeSEric Cheng 	flow_stat_destroy(flent);
646da14cebeSEric Cheng 	kmem_cache_free(flow_cache, flent);
647da14cebeSEric Cheng }
648da14cebeSEric Cheng 
649da14cebeSEric Cheng /*
650da14cebeSEric Cheng  * XXX eric
651da14cebeSEric Cheng  * The MAC_FLOW_PRIORITY checks in mac_resource_ctl_set() and
652da14cebeSEric Cheng  * mac_link_flow_modify() should really be moved/reworked into the
653da14cebeSEric Cheng  * two functions below. This would consolidate all the mac property
654da14cebeSEric Cheng  * checking in one place. I'm leaving this alone for now since it's
655da14cebeSEric Cheng  * out of scope of the new flows work.
656da14cebeSEric Cheng  */
657da14cebeSEric Cheng /* ARGSUSED */
658da14cebeSEric Cheng uint32_t
mac_flow_modify_props(flow_entry_t * flent,mac_resource_props_t * mrp)659da14cebeSEric Cheng mac_flow_modify_props(flow_entry_t *flent, mac_resource_props_t *mrp)
660da14cebeSEric Cheng {
661da14cebeSEric Cheng 	uint32_t		changed_mask = 0;
662da14cebeSEric Cheng 	mac_resource_props_t	*fmrp = &flent->fe_effective_props;
663da14cebeSEric Cheng 	int			i;
664da14cebeSEric Cheng 
665da14cebeSEric Cheng 	if ((mrp->mrp_mask & MRP_MAXBW) != 0 &&
6660dc2366fSVenugopal Iyer 	    (!(fmrp->mrp_mask & MRP_MAXBW) ||
6670dc2366fSVenugopal Iyer 	    (fmrp->mrp_maxbw != mrp->mrp_maxbw))) {
668da14cebeSEric Cheng 		changed_mask |= MRP_MAXBW;
669da14cebeSEric Cheng 		if (mrp->mrp_maxbw == MRP_MAXBW_RESETVAL) {
670da14cebeSEric Cheng 			fmrp->mrp_mask &= ~MRP_MAXBW;
6710dc2366fSVenugopal Iyer 			fmrp->mrp_maxbw = 0;
672da14cebeSEric Cheng 		} else {
673da14cebeSEric Cheng 			fmrp->mrp_mask |= MRP_MAXBW;
6740dc2366fSVenugopal Iyer 			fmrp->mrp_maxbw = mrp->mrp_maxbw;
675da14cebeSEric Cheng 		}
676da14cebeSEric Cheng 	}
677da14cebeSEric Cheng 
678da14cebeSEric Cheng 	if ((mrp->mrp_mask & MRP_PRIORITY) != 0) {
679da14cebeSEric Cheng 		if (fmrp->mrp_priority != mrp->mrp_priority)
680da14cebeSEric Cheng 			changed_mask |= MRP_PRIORITY;
681da14cebeSEric Cheng 		if (mrp->mrp_priority == MPL_RESET) {
682da14cebeSEric Cheng 			fmrp->mrp_priority = MPL_SUBFLOW_DEFAULT;
683da14cebeSEric Cheng 			fmrp->mrp_mask &= ~MRP_PRIORITY;
684da14cebeSEric Cheng 		} else {
685da14cebeSEric Cheng 			fmrp->mrp_priority = mrp->mrp_priority;
686da14cebeSEric Cheng 			fmrp->mrp_mask |= MRP_PRIORITY;
687da14cebeSEric Cheng 		}
688da14cebeSEric Cheng 	}
689da14cebeSEric Cheng 
690da14cebeSEric Cheng 	/* modify fanout */
691da14cebeSEric Cheng 	if ((mrp->mrp_mask & MRP_CPUS) != 0) {
692da14cebeSEric Cheng 		if ((fmrp->mrp_ncpus == mrp->mrp_ncpus) &&
693da14cebeSEric Cheng 		    (fmrp->mrp_fanout_mode == mrp->mrp_fanout_mode)) {
694da14cebeSEric Cheng 			for (i = 0; i < mrp->mrp_ncpus; i++) {
695da14cebeSEric Cheng 				if (mrp->mrp_cpu[i] != fmrp->mrp_cpu[i])
696da14cebeSEric Cheng 					break;
697da14cebeSEric Cheng 			}
698da14cebeSEric Cheng 			if (i == mrp->mrp_ncpus) {
699da14cebeSEric Cheng 				/*
700da14cebeSEric Cheng 				 * The new set of cpus passed is exactly
701da14cebeSEric Cheng 				 * the same as the existing set.
702da14cebeSEric Cheng 				 */
703da14cebeSEric Cheng 				return (changed_mask);
704da14cebeSEric Cheng 			}
705da14cebeSEric Cheng 		}
706da14cebeSEric Cheng 		changed_mask |= MRP_CPUS;
707da14cebeSEric Cheng 		MAC_COPY_CPUS(mrp, fmrp);
708da14cebeSEric Cheng 	}
7090dc2366fSVenugopal Iyer 
7100dc2366fSVenugopal Iyer 	/*
7110dc2366fSVenugopal Iyer 	 * Modify the rings property.
7120dc2366fSVenugopal Iyer 	 */
7130dc2366fSVenugopal Iyer 	if (mrp->mrp_mask & MRP_RX_RINGS || mrp->mrp_mask & MRP_TX_RINGS)
7140dc2366fSVenugopal Iyer 		mac_set_rings_effective(flent->fe_mcip);
7150dc2366fSVenugopal Iyer 
7160dc2366fSVenugopal Iyer 	if ((mrp->mrp_mask & MRP_POOL) != 0) {
7170dc2366fSVenugopal Iyer 		if (strcmp(fmrp->mrp_pool, mrp->mrp_pool) != 0)
7180dc2366fSVenugopal Iyer 			changed_mask |= MRP_POOL;
7190dc2366fSVenugopal Iyer 		if (strlen(mrp->mrp_pool) == 0)
7200dc2366fSVenugopal Iyer 			fmrp->mrp_mask &= ~MRP_POOL;
7210dc2366fSVenugopal Iyer 		else
7220dc2366fSVenugopal Iyer 			fmrp->mrp_mask |= MRP_POOL;
7230dc2366fSVenugopal Iyer 		(void) strncpy(fmrp->mrp_pool, mrp->mrp_pool, MAXPATHLEN);
7240dc2366fSVenugopal Iyer 	}
725da14cebeSEric Cheng 	return (changed_mask);
726da14cebeSEric Cheng }
727da14cebeSEric Cheng 
728da14cebeSEric Cheng void
mac_flow_modify(flow_tab_t * ft,flow_entry_t * flent,mac_resource_props_t * mrp)729da14cebeSEric Cheng mac_flow_modify(flow_tab_t *ft, flow_entry_t *flent, mac_resource_props_t *mrp)
730da14cebeSEric Cheng {
731da14cebeSEric Cheng 	uint32_t changed_mask;
732da14cebeSEric Cheng 	mac_client_impl_t *mcip = flent->fe_mcip;
733da14cebeSEric Cheng 	mac_resource_props_t *mcip_mrp = MCIP_RESOURCE_PROPS(mcip);
7340dc2366fSVenugopal Iyer 	mac_resource_props_t *emrp = MCIP_EFFECTIVE_PROPS(mcip);
7350dc2366fSVenugopal Iyer 	cpupart_t *cpupart = NULL;
7360dc2366fSVenugopal Iyer 	boolean_t use_default = B_FALSE;
737da14cebeSEric Cheng 
738da14cebeSEric Cheng 	ASSERT(flent != NULL);
739da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
740da14cebeSEric Cheng 
741da14cebeSEric Cheng 	rw_enter(&ft->ft_lock, RW_WRITER);
742da14cebeSEric Cheng 
743da14cebeSEric Cheng 	/* Update the cached values inside the subflow entry */
744da14cebeSEric Cheng 	changed_mask = mac_flow_modify_props(flent, mrp);
745da14cebeSEric Cheng 	rw_exit(&ft->ft_lock);
746da14cebeSEric Cheng 	/*
747da14cebeSEric Cheng 	 * Push the changed parameters to the scheduling code in the
748da14cebeSEric Cheng 	 * SRS's, to take effect right away.
749da14cebeSEric Cheng 	 */
750da14cebeSEric Cheng 	if (changed_mask & MRP_MAXBW) {
751da14cebeSEric Cheng 		mac_srs_update_bwlimit(flent, mrp);
752da14cebeSEric Cheng 		/*
753da14cebeSEric Cheng 		 * If bandwidth is changed, we may have to change
754da14cebeSEric Cheng 		 * the number of soft ring to be used for fanout.
755da14cebeSEric Cheng 		 * Call mac_flow_update_fanout() if MAC_BIND_CPU
756da14cebeSEric Cheng 		 * is not set and there is no user supplied cpu
757da14cebeSEric Cheng 		 * info. This applies only to link at this time.
758da14cebeSEric Cheng 		 */
759da14cebeSEric Cheng 		if (!(flent->fe_type & FLOW_USER) &&
760da14cebeSEric Cheng 		    !(changed_mask & MRP_CPUS) &&
761da14cebeSEric Cheng 		    !(mcip_mrp->mrp_mask & MRP_CPUS_USERSPEC)) {
762da14cebeSEric Cheng 			mac_fanout_setup(mcip, flent, mcip_mrp,
7630dc2366fSVenugopal Iyer 			    mac_rx_deliver, mcip, NULL, NULL);
764da14cebeSEric Cheng 		}
765da14cebeSEric Cheng 	}
766da14cebeSEric Cheng 	if (mrp->mrp_mask & MRP_PRIORITY)
767da14cebeSEric Cheng 		mac_flow_update_priority(mcip, flent);
768da14cebeSEric Cheng 
769da14cebeSEric Cheng 	if (changed_mask & MRP_CPUS)
7700dc2366fSVenugopal Iyer 		mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL,
7710dc2366fSVenugopal Iyer 		    NULL);
7720dc2366fSVenugopal Iyer 
7730dc2366fSVenugopal Iyer 	if (mrp->mrp_mask & MRP_POOL) {
7740dc2366fSVenugopal Iyer 		pool_lock();
7750dc2366fSVenugopal Iyer 		cpupart = mac_pset_find(mrp, &use_default);
7760dc2366fSVenugopal Iyer 		mac_fanout_setup(mcip, flent, mrp, mac_rx_deliver, mcip, NULL,
7770dc2366fSVenugopal Iyer 		    cpupart);
7780dc2366fSVenugopal Iyer 		mac_set_pool_effective(use_default, cpupart, mrp, emrp);
7790dc2366fSVenugopal Iyer 		pool_unlock();
7800dc2366fSVenugopal Iyer 	}
781da14cebeSEric Cheng }
782da14cebeSEric Cheng 
783da14cebeSEric Cheng /*
784da14cebeSEric Cheng  * This function waits for a certain condition to be met and is generally
785da14cebeSEric Cheng  * used before a destructive or quiescing operation.
786da14cebeSEric Cheng  */
787da14cebeSEric Cheng void
mac_flow_wait(flow_entry_t * flent,mac_flow_state_t event)788da14cebeSEric Cheng mac_flow_wait(flow_entry_t *flent, mac_flow_state_t event)
789da14cebeSEric Cheng {
790da14cebeSEric Cheng 	mutex_enter(&flent->fe_lock);
791da14cebeSEric Cheng 	flent->fe_flags |= FE_WAITER;
792da14cebeSEric Cheng 
793da14cebeSEric Cheng 	switch (event) {
794da14cebeSEric Cheng 	case FLOW_DRIVER_UPCALL:
795da14cebeSEric Cheng 		/*
796da14cebeSEric Cheng 		 * We want to make sure the driver upcalls have finished before
797da14cebeSEric Cheng 		 * we signal the Rx SRS worker to quit.
798da14cebeSEric Cheng 		 */
799da14cebeSEric Cheng 		while (flent->fe_refcnt != 1)
800da14cebeSEric Cheng 			cv_wait(&flent->fe_cv, &flent->fe_lock);
801da14cebeSEric Cheng 		break;
802da14cebeSEric Cheng 
803da14cebeSEric Cheng 	case FLOW_USER_REF:
804da14cebeSEric Cheng 		/*
805da14cebeSEric Cheng 		 * Wait for the fe_user_refcnt to drop to 0. The flow has
806da14cebeSEric Cheng 		 * been removed from the global flow hash.
807da14cebeSEric Cheng 		 */
808da14cebeSEric Cheng 		ASSERT(!(flent->fe_flags & FE_G_FLOW_HASH));
809da14cebeSEric Cheng 		while (flent->fe_user_refcnt != 0)
810da14cebeSEric Cheng 			cv_wait(&flent->fe_cv, &flent->fe_lock);
811da14cebeSEric Cheng 		break;
812da14cebeSEric Cheng 
813da14cebeSEric Cheng 	default:
814da14cebeSEric Cheng 		ASSERT(0);
815da14cebeSEric Cheng 	}
816da14cebeSEric Cheng 
817da14cebeSEric Cheng 	flent->fe_flags &= ~FE_WAITER;
818da14cebeSEric Cheng 	mutex_exit(&flent->fe_lock);
819da14cebeSEric Cheng }
820da14cebeSEric Cheng 
821da14cebeSEric Cheng static boolean_t
mac_flow_clean(flow_entry_t * flent)822da14cebeSEric Cheng mac_flow_clean(flow_entry_t *flent)
823da14cebeSEric Cheng {
824da14cebeSEric Cheng 	ASSERT(flent->fe_next == NULL);
825da14cebeSEric Cheng 	ASSERT(flent->fe_tx_srs == NULL);
826da14cebeSEric Cheng 	ASSERT(flent->fe_rx_srs_cnt == 0 && flent->fe_rx_srs[0] == NULL);
827da14cebeSEric Cheng 	ASSERT(flent->fe_mbg == NULL);
828da14cebeSEric Cheng 
829da14cebeSEric Cheng 	return (B_TRUE);
830da14cebeSEric Cheng }
831da14cebeSEric Cheng 
832da14cebeSEric Cheng void
mac_flow_cleanup(flow_entry_t * flent)833da14cebeSEric Cheng mac_flow_cleanup(flow_entry_t *flent)
834da14cebeSEric Cheng {
835da14cebeSEric Cheng 	if ((flent->fe_type & FLOW_USER) == 0) {
836da14cebeSEric Cheng 		ASSERT((flent->fe_mbg == NULL && flent->fe_mcip != NULL) ||
837da14cebeSEric Cheng 		    (flent->fe_mbg != NULL && flent->fe_mcip == NULL));
838da14cebeSEric Cheng 		ASSERT(flent->fe_refcnt == 0);
839da14cebeSEric Cheng 	} else {
840da14cebeSEric Cheng 		ASSERT(flent->fe_refcnt == 1);
841da14cebeSEric Cheng 	}
842da14cebeSEric Cheng 
843da14cebeSEric Cheng 	if (flent->fe_mbg != NULL) {
844da14cebeSEric Cheng 		ASSERT(flent->fe_tx_srs == NULL);
845da14cebeSEric Cheng 		/* This is a multicast or broadcast flow entry */
846da14cebeSEric Cheng 		mac_bcast_grp_free(flent->fe_mbg);
847da14cebeSEric Cheng 		flent->fe_mbg = NULL;
848da14cebeSEric Cheng 	}
849da14cebeSEric Cheng 
850da14cebeSEric Cheng 	if (flent->fe_tx_srs != NULL) {
851da14cebeSEric Cheng 		ASSERT(flent->fe_mbg == NULL);
852da14cebeSEric Cheng 		mac_srs_free(flent->fe_tx_srs);
853da14cebeSEric Cheng 		flent->fe_tx_srs = NULL;
854da14cebeSEric Cheng 	}
855da14cebeSEric Cheng 
856da14cebeSEric Cheng 	/*
857da14cebeSEric Cheng 	 * In the normal case fe_rx_srs_cnt is 1. However in the error case
858da14cebeSEric Cheng 	 * when mac_unicast_add fails we may not have set up any SRS
859da14cebeSEric Cheng 	 * in which case fe_rx_srs_cnt will be zero.
860da14cebeSEric Cheng 	 */
861da14cebeSEric Cheng 	if (flent->fe_rx_srs_cnt != 0) {
862da14cebeSEric Cheng 		ASSERT(flent->fe_rx_srs_cnt == 1);
863da14cebeSEric Cheng 		mac_srs_free(flent->fe_rx_srs[0]);
864da14cebeSEric Cheng 		flent->fe_rx_srs[0] = NULL;
865da14cebeSEric Cheng 		flent->fe_rx_srs_cnt = 0;
866da14cebeSEric Cheng 	}
867da14cebeSEric Cheng 	ASSERT(flent->fe_rx_srs[0] == NULL);
868da14cebeSEric Cheng }
869da14cebeSEric Cheng 
870da14cebeSEric Cheng void
mac_flow_get_desc(flow_entry_t * flent,flow_desc_t * fd)871da14cebeSEric Cheng mac_flow_get_desc(flow_entry_t *flent, flow_desc_t *fd)
872da14cebeSEric Cheng {
873da14cebeSEric Cheng 	/*
874da14cebeSEric Cheng 	 * Grab the fe_lock to see a self-consistent fe_flow_desc.
875da14cebeSEric Cheng 	 * Updates to the fe_flow_desc happen under the fe_lock
876da14cebeSEric Cheng 	 * after removing the flent from the flow table
877da14cebeSEric Cheng 	 */
878da14cebeSEric Cheng 	mutex_enter(&flent->fe_lock);
879da14cebeSEric Cheng 	bcopy(&flent->fe_flow_desc, fd, sizeof (*fd));
880da14cebeSEric Cheng 	mutex_exit(&flent->fe_lock);
881da14cebeSEric Cheng }
882da14cebeSEric Cheng 
883da14cebeSEric Cheng /*
884da14cebeSEric Cheng  * Update a field of a flow entry. The mac perimeter ensures that
885da14cebeSEric Cheng  * this is the only thread doing a modify operation on this mac end point.
886da14cebeSEric Cheng  * So the flow table can't change or disappear. The ft_lock protects access
887da14cebeSEric Cheng  * to the flow entry, and holding the lock ensures that there isn't any thread
888da14cebeSEric Cheng  * accessing the flow entry or attempting a flow table lookup. However
889da14cebeSEric Cheng  * data threads that are using the flow entry based on the old descriptor
890da14cebeSEric Cheng  * will continue to use the flow entry. If strong coherence is required
891da14cebeSEric Cheng  * then the flow will have to be quiesced before the descriptor can be
892da14cebeSEric Cheng  * changed.
893da14cebeSEric Cheng  */
894da14cebeSEric Cheng void
mac_flow_set_desc(flow_entry_t * flent,flow_desc_t * fd)895da14cebeSEric Cheng mac_flow_set_desc(flow_entry_t *flent, flow_desc_t *fd)
896da14cebeSEric Cheng {
897da14cebeSEric Cheng 	flow_tab_t	*ft = flent->fe_flow_tab;
898da14cebeSEric Cheng 	flow_desc_t	old_desc;
899da14cebeSEric Cheng 	int		err;
900da14cebeSEric Cheng 
901da14cebeSEric Cheng 	if (ft == NULL) {
902da14cebeSEric Cheng 		/*
903da14cebeSEric Cheng 		 * The flow hasn't yet been inserted into the table,
904da14cebeSEric Cheng 		 * so only the caller knows about this flow, however for
905da14cebeSEric Cheng 		 * uniformity we grab the fe_lock here.
906da14cebeSEric Cheng 		 */
907da14cebeSEric Cheng 		mutex_enter(&flent->fe_lock);
908da14cebeSEric Cheng 		bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
909da14cebeSEric Cheng 		mutex_exit(&flent->fe_lock);
910da14cebeSEric Cheng 	}
911da14cebeSEric Cheng 
912da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
913da14cebeSEric Cheng 
914da14cebeSEric Cheng 	/*
915da14cebeSEric Cheng 	 * Need to remove the flow entry from the table and reinsert it,
916da14cebeSEric Cheng 	 * into a potentially diference hash line. The hash depends on
917da14cebeSEric Cheng 	 * the new descriptor fields. However access to fe_desc itself
918da14cebeSEric Cheng 	 * is always under the fe_lock. This helps log and stat functions
919da14cebeSEric Cheng 	 * see a self-consistent fe_flow_desc.
920da14cebeSEric Cheng 	 */
921da14cebeSEric Cheng 	mac_flow_remove(ft, flent, B_TRUE);
922da14cebeSEric Cheng 	old_desc = flent->fe_flow_desc;
923da14cebeSEric Cheng 
924da14cebeSEric Cheng 	mutex_enter(&flent->fe_lock);
925da14cebeSEric Cheng 	bcopy(fd, &flent->fe_flow_desc, sizeof (*fd));
926da14cebeSEric Cheng 	mutex_exit(&flent->fe_lock);
927da14cebeSEric Cheng 
928da14cebeSEric Cheng 	if (mac_flow_add(ft, flent) != 0) {
929da14cebeSEric Cheng 		/*
930da14cebeSEric Cheng 		 * The add failed say due to an invalid flow descriptor.
931da14cebeSEric Cheng 		 * Undo the update
932da14cebeSEric Cheng 		 */
933da14cebeSEric Cheng 		flent->fe_flow_desc = old_desc;
934da14cebeSEric Cheng 		err = mac_flow_add(ft, flent);
935da14cebeSEric Cheng 		ASSERT(err == 0);
936da14cebeSEric Cheng 	}
937da14cebeSEric Cheng }
938da14cebeSEric Cheng 
939da14cebeSEric Cheng void
mac_flow_set_name(flow_entry_t * flent,const char * name)940da14cebeSEric Cheng mac_flow_set_name(flow_entry_t *flent, const char *name)
941da14cebeSEric Cheng {
942da14cebeSEric Cheng 	flow_tab_t	*ft = flent->fe_flow_tab;
943da14cebeSEric Cheng 
944da14cebeSEric Cheng 	if (ft == NULL) {
945da14cebeSEric Cheng 		/*
946da14cebeSEric Cheng 		 *  The flow hasn't yet been inserted into the table,
947da14cebeSEric Cheng 		 * so only the caller knows about this flow
948da14cebeSEric Cheng 		 */
949da000602SGirish Moodalbail 		(void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
950da14cebeSEric Cheng 	} else {
951da14cebeSEric Cheng 		ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
952da14cebeSEric Cheng 	}
953da14cebeSEric Cheng 
954da14cebeSEric Cheng 	mutex_enter(&flent->fe_lock);
955da000602SGirish Moodalbail 	(void) strlcpy(flent->fe_flow_name, name, MAXFLOWNAMELEN);
956da14cebeSEric Cheng 	mutex_exit(&flent->fe_lock);
957da14cebeSEric Cheng }
958da14cebeSEric Cheng 
959da14cebeSEric Cheng /*
960da14cebeSEric Cheng  * Return the client-private cookie that was associated with
961da14cebeSEric Cheng  * the flow when it was created.
962da14cebeSEric Cheng  */
963da14cebeSEric Cheng void *
mac_flow_get_client_cookie(flow_entry_t * flent)964da14cebeSEric Cheng mac_flow_get_client_cookie(flow_entry_t *flent)
965da14cebeSEric Cheng {
966da14cebeSEric Cheng 	return (flent->fe_client_cookie);
967da14cebeSEric Cheng }
968da14cebeSEric Cheng 
969da14cebeSEric Cheng /*
970da14cebeSEric Cheng  * Forward declarations.
971da14cebeSEric Cheng  */
972da14cebeSEric Cheng static uint32_t	flow_l2_hash(flow_tab_t *, flow_state_t *);
9732b24ab6bSSebastien Roy static uint32_t	flow_l2_hash_fe(flow_tab_t *, flow_entry_t *);
974da14cebeSEric Cheng static int	flow_l2_accept(flow_tab_t *, flow_state_t *);
975da14cebeSEric Cheng static uint32_t	flow_ether_hash(flow_tab_t *, flow_state_t *);
9762b24ab6bSSebastien Roy static uint32_t	flow_ether_hash_fe(flow_tab_t *, flow_entry_t *);
977da14cebeSEric Cheng static int	flow_ether_accept(flow_tab_t *, flow_state_t *);
978da14cebeSEric Cheng 
979da14cebeSEric Cheng /*
980da14cebeSEric Cheng  * Create flow table.
981da14cebeSEric Cheng  */
982da14cebeSEric Cheng void
mac_flow_tab_create(flow_ops_t * ops,flow_mask_t mask,uint_t size,mac_impl_t * mip,flow_tab_t ** ftp)983da14cebeSEric Cheng mac_flow_tab_create(flow_ops_t *ops, flow_mask_t mask, uint_t size,
984da14cebeSEric Cheng     mac_impl_t *mip, flow_tab_t **ftp)
985da14cebeSEric Cheng {
986da14cebeSEric Cheng 	flow_tab_t	*ft;
987da14cebeSEric Cheng 	flow_ops_t	*new_ops;
988da14cebeSEric Cheng 
989da14cebeSEric Cheng 	ft = kmem_cache_alloc(flow_tab_cache, KM_SLEEP);
990da14cebeSEric Cheng 	bzero(ft, sizeof (*ft));
991da14cebeSEric Cheng 
992da14cebeSEric Cheng 	ft->ft_table = kmem_zalloc(size * sizeof (flow_entry_t *), KM_SLEEP);
993da14cebeSEric Cheng 
994da14cebeSEric Cheng 	/*
995da14cebeSEric Cheng 	 * We make a copy of the ops vector instead of just pointing to it
996da14cebeSEric Cheng 	 * because we might want to customize the ops vector on a per table
997da14cebeSEric Cheng 	 * basis (e.g. for optimization).
998da14cebeSEric Cheng 	 */
999da14cebeSEric Cheng 	new_ops = &ft->ft_ops;
1000da14cebeSEric Cheng 	bcopy(ops, new_ops, sizeof (*ops));
1001da14cebeSEric Cheng 	ft->ft_mask = mask;
1002da14cebeSEric Cheng 	ft->ft_size = size;
1003da14cebeSEric Cheng 	ft->ft_mip = mip;
1004da14cebeSEric Cheng 
1005da14cebeSEric Cheng 	/*
10062b24ab6bSSebastien Roy 	 * Optimizations for DL_ETHER media.
1007da14cebeSEric Cheng 	 */
1008da14cebeSEric Cheng 	if (mip->mi_info.mi_nativemedia == DL_ETHER) {
1009da14cebeSEric Cheng 		if (new_ops->fo_hash == flow_l2_hash)
1010da14cebeSEric Cheng 			new_ops->fo_hash = flow_ether_hash;
10112b24ab6bSSebastien Roy 		if (new_ops->fo_hash_fe == flow_l2_hash_fe)
10122b24ab6bSSebastien Roy 			new_ops->fo_hash_fe = flow_ether_hash_fe;
1013da14cebeSEric Cheng 		if (new_ops->fo_accept[0] == flow_l2_accept)
1014da14cebeSEric Cheng 			new_ops->fo_accept[0] = flow_ether_accept;
1015da14cebeSEric Cheng 	}
1016da14cebeSEric Cheng 	*ftp = ft;
1017da14cebeSEric Cheng }
1018da14cebeSEric Cheng 
1019da14cebeSEric Cheng void
mac_flow_l2tab_create(mac_impl_t * mip,flow_tab_t ** ftp)1020da14cebeSEric Cheng mac_flow_l2tab_create(mac_impl_t *mip, flow_tab_t **ftp)
1021da14cebeSEric Cheng {
1022da14cebeSEric Cheng 	mac_flow_tab_create(&flow_l2_ops, FLOW_LINK_DST | FLOW_LINK_VID,
1023da14cebeSEric Cheng 	    1024, mip, ftp);
1024da14cebeSEric Cheng }
1025da14cebeSEric Cheng 
1026da14cebeSEric Cheng /*
1027da14cebeSEric Cheng  * Destroy flow table.
1028da14cebeSEric Cheng  */
1029da14cebeSEric Cheng void
mac_flow_tab_destroy(flow_tab_t * ft)1030da14cebeSEric Cheng mac_flow_tab_destroy(flow_tab_t *ft)
1031da14cebeSEric Cheng {
1032da14cebeSEric Cheng 	if (ft == NULL)
1033da14cebeSEric Cheng 		return;
1034da14cebeSEric Cheng 
1035da14cebeSEric Cheng 	ASSERT(ft->ft_flow_count == 0);
1036da14cebeSEric Cheng 	kmem_free(ft->ft_table, ft->ft_size * sizeof (flow_entry_t *));
1037da14cebeSEric Cheng 	bzero(ft, sizeof (*ft));
1038da14cebeSEric Cheng 	kmem_cache_free(flow_tab_cache, ft);
1039da14cebeSEric Cheng }
1040da14cebeSEric Cheng 
1041da14cebeSEric Cheng /*
1042da14cebeSEric Cheng  * Add a new flow entry to the global flow hash table
1043da14cebeSEric Cheng  */
1044da14cebeSEric Cheng int
mac_flow_hash_add(flow_entry_t * flent)1045da14cebeSEric Cheng mac_flow_hash_add(flow_entry_t *flent)
1046da14cebeSEric Cheng {
1047da14cebeSEric Cheng 	int	err;
1048da14cebeSEric Cheng 
1049da14cebeSEric Cheng 	rw_enter(&flow_tab_lock, RW_WRITER);
1050da14cebeSEric Cheng 	err = mod_hash_insert(flow_hash,
1051da14cebeSEric Cheng 	    (mod_hash_key_t)flent->fe_flow_name, (mod_hash_val_t)flent);
1052da14cebeSEric Cheng 	if (err != 0) {
1053da14cebeSEric Cheng 		rw_exit(&flow_tab_lock);
1054da14cebeSEric Cheng 		return (EEXIST);
1055da14cebeSEric Cheng 	}
1056da14cebeSEric Cheng 	/* Mark as inserted into the global flow hash table */
1057da14cebeSEric Cheng 	FLOW_MARK(flent, FE_G_FLOW_HASH);
1058da14cebeSEric Cheng 	rw_exit(&flow_tab_lock);
1059da14cebeSEric Cheng 	return (err);
1060da14cebeSEric Cheng }
1061da14cebeSEric Cheng 
1062da14cebeSEric Cheng /*
1063da14cebeSEric Cheng  * Remove a flow entry from the global flow hash table
1064da14cebeSEric Cheng  */
1065da14cebeSEric Cheng void
mac_flow_hash_remove(flow_entry_t * flent)1066da14cebeSEric Cheng mac_flow_hash_remove(flow_entry_t *flent)
1067da14cebeSEric Cheng {
1068da14cebeSEric Cheng 	mod_hash_val_t	val;
1069da14cebeSEric Cheng 
1070da14cebeSEric Cheng 	rw_enter(&flow_tab_lock, RW_WRITER);
1071da14cebeSEric Cheng 	VERIFY(mod_hash_remove(flow_hash,
1072da14cebeSEric Cheng 	    (mod_hash_key_t)flent->fe_flow_name, &val) == 0);
1073da14cebeSEric Cheng 
1074da14cebeSEric Cheng 	/* Clear the mark that says inserted into the global flow hash table */
1075da14cebeSEric Cheng 	FLOW_UNMARK(flent, FE_G_FLOW_HASH);
1076da14cebeSEric Cheng 	rw_exit(&flow_tab_lock);
1077da14cebeSEric Cheng }
1078da14cebeSEric Cheng 
1079da14cebeSEric Cheng /*
1080da14cebeSEric Cheng  * Retrieve a flow entry from the global flow hash table.
1081da14cebeSEric Cheng  */
1082da14cebeSEric Cheng int
mac_flow_lookup_byname(char * name,flow_entry_t ** flentp)1083da14cebeSEric Cheng mac_flow_lookup_byname(char *name, flow_entry_t **flentp)
1084da14cebeSEric Cheng {
1085da14cebeSEric Cheng 	int		err;
1086da14cebeSEric Cheng 	flow_entry_t	*flent;
1087da14cebeSEric Cheng 
1088da14cebeSEric Cheng 	rw_enter(&flow_tab_lock, RW_READER);
1089da14cebeSEric Cheng 	err = mod_hash_find(flow_hash, (mod_hash_key_t)name,
1090da14cebeSEric Cheng 	    (mod_hash_val_t *)&flent);
1091da14cebeSEric Cheng 	if (err != 0) {
1092da14cebeSEric Cheng 		rw_exit(&flow_tab_lock);
1093da14cebeSEric Cheng 		return (ENOENT);
1094da14cebeSEric Cheng 	}
1095da14cebeSEric Cheng 	ASSERT(flent != NULL);
1096da14cebeSEric Cheng 	FLOW_USER_REFHOLD(flent);
1097da14cebeSEric Cheng 	rw_exit(&flow_tab_lock);
1098da14cebeSEric Cheng 
1099da14cebeSEric Cheng 	*flentp = flent;
1100da14cebeSEric Cheng 	return (0);
1101da14cebeSEric Cheng }
1102da14cebeSEric Cheng 
1103da14cebeSEric Cheng /*
1104da14cebeSEric Cheng  * Initialize or release mac client flows by walking the subflow table.
1105da14cebeSEric Cheng  * These are typically invoked during plumb/unplumb of links.
1106da14cebeSEric Cheng  */
1107da14cebeSEric Cheng 
1108da14cebeSEric Cheng static int
mac_link_init_flows_cb(flow_entry_t * flent,void * arg)1109da14cebeSEric Cheng mac_link_init_flows_cb(flow_entry_t *flent, void *arg)
1110da14cebeSEric Cheng {
1111da14cebeSEric Cheng 	mac_client_impl_t	*mcip = arg;
1112da14cebeSEric Cheng 
1113da14cebeSEric Cheng 	if (mac_link_flow_init(arg, flent) != 0) {
1114da14cebeSEric Cheng 		cmn_err(CE_WARN, "Failed to initialize flow '%s' on link '%s'",
1115da14cebeSEric Cheng 		    flent->fe_flow_name, mcip->mci_name);
1116da14cebeSEric Cheng 	} else {
1117da14cebeSEric Cheng 		FLOW_UNMARK(flent, FE_UF_NO_DATAPATH);
1118da14cebeSEric Cheng 	}
1119da14cebeSEric Cheng 	return (0);
1120da14cebeSEric Cheng }
1121da14cebeSEric Cheng 
1122da14cebeSEric Cheng void
mac_link_init_flows(mac_client_handle_t mch)1123da14cebeSEric Cheng mac_link_init_flows(mac_client_handle_t mch)
1124da14cebeSEric Cheng {
1125da14cebeSEric Cheng 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
1126da14cebeSEric Cheng 
1127da14cebeSEric Cheng 	(void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
1128da14cebeSEric Cheng 	    mac_link_init_flows_cb, mcip);
1129da14cebeSEric Cheng 	/*
1130da14cebeSEric Cheng 	 * If mac client had subflow(s) configured before plumb, change
1131da14cebeSEric Cheng 	 * function to mac_rx_srs_subflow_process and in case of hardware
1132da14cebeSEric Cheng 	 * classification, disable polling.
1133da14cebeSEric Cheng 	 */
1134da14cebeSEric Cheng 	mac_client_update_classifier(mcip, B_TRUE);
1135da14cebeSEric Cheng 
1136da14cebeSEric Cheng }
1137da14cebeSEric Cheng 
1138da14cebeSEric Cheng boolean_t
mac_link_has_flows(mac_client_handle_t mch)1139da14cebeSEric Cheng mac_link_has_flows(mac_client_handle_t mch)
1140da14cebeSEric Cheng {
1141da14cebeSEric Cheng 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
1142da14cebeSEric Cheng 
1143da14cebeSEric Cheng 	if (!FLOW_TAB_EMPTY(mcip->mci_subflow_tab))
1144da14cebeSEric Cheng 		return (B_TRUE);
1145da14cebeSEric Cheng 
1146da14cebeSEric Cheng 	return (B_FALSE);
1147da14cebeSEric Cheng }
1148da14cebeSEric Cheng 
1149da14cebeSEric Cheng static int
mac_link_release_flows_cb(flow_entry_t * flent,void * arg)1150da14cebeSEric Cheng mac_link_release_flows_cb(flow_entry_t *flent, void *arg)
1151da14cebeSEric Cheng {
1152da14cebeSEric Cheng 	FLOW_MARK(flent, FE_UF_NO_DATAPATH);
1153da14cebeSEric Cheng 	mac_flow_wait(flent, FLOW_DRIVER_UPCALL);
1154da14cebeSEric Cheng 	mac_link_flow_clean(arg, flent);
1155da14cebeSEric Cheng 	return (0);
1156da14cebeSEric Cheng }
1157da14cebeSEric Cheng 
1158da14cebeSEric Cheng void
mac_link_release_flows(mac_client_handle_t mch)1159da14cebeSEric Cheng mac_link_release_flows(mac_client_handle_t mch)
1160da14cebeSEric Cheng {
1161da14cebeSEric Cheng 	mac_client_impl_t	*mcip = (mac_client_impl_t *)mch;
1162da14cebeSEric Cheng 
1163da14cebeSEric Cheng 	/*
1164da14cebeSEric Cheng 	 * Change the mci_flent callback back to mac_rx_srs_process()
1165da14cebeSEric Cheng 	 * because flows are about to be deactivated.
1166da14cebeSEric Cheng 	 */
1167da14cebeSEric Cheng 	mac_client_update_classifier(mcip, B_FALSE);
1168da14cebeSEric Cheng 	(void) mac_flow_walk_nolock(mcip->mci_subflow_tab,
1169da14cebeSEric Cheng 	    mac_link_release_flows_cb, mcip);
1170da14cebeSEric Cheng }
1171da14cebeSEric Cheng 
1172da14cebeSEric Cheng void
mac_rename_flow(flow_entry_t * fep,const char * new_name)1173da14cebeSEric Cheng mac_rename_flow(flow_entry_t *fep, const char *new_name)
1174da14cebeSEric Cheng {
1175da14cebeSEric Cheng 	mac_flow_set_name(fep, new_name);
1176da14cebeSEric Cheng 	if (fep->fe_ksp != NULL) {
1177da14cebeSEric Cheng 		flow_stat_destroy(fep);
1178da14cebeSEric Cheng 		flow_stat_create(fep);
1179da14cebeSEric Cheng 	}
1180da14cebeSEric Cheng }
1181da14cebeSEric Cheng 
1182da14cebeSEric Cheng /*
1183da14cebeSEric Cheng  * mac_link_flow_init()
1184da14cebeSEric Cheng  * Internal flow interface used for allocating SRSs and related
1185da14cebeSEric Cheng  * data structures. Not meant to be used by mac clients.
1186da14cebeSEric Cheng  */
1187da14cebeSEric Cheng int
mac_link_flow_init(mac_client_handle_t mch,flow_entry_t * sub_flow)1188da14cebeSEric Cheng mac_link_flow_init(mac_client_handle_t mch, flow_entry_t *sub_flow)
1189da14cebeSEric Cheng {
1190da14cebeSEric Cheng 	mac_client_impl_t 	*mcip = (mac_client_impl_t *)mch;
1191da14cebeSEric Cheng 	mac_impl_t		*mip = mcip->mci_mip;
1192da14cebeSEric Cheng 	int			err;
1193da14cebeSEric Cheng 
1194da14cebeSEric Cheng 	ASSERT(mch != NULL);
1195da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1196da14cebeSEric Cheng 
1197da14cebeSEric Cheng 	if ((err = mac_datapath_setup(mcip, sub_flow, SRST_FLOW)) != 0)
1198da14cebeSEric Cheng 		return (err);
1199da14cebeSEric Cheng 
1200da14cebeSEric Cheng 	sub_flow->fe_mcip = mcip;
1201da14cebeSEric Cheng 
1202da14cebeSEric Cheng 	return (0);
1203da14cebeSEric Cheng }
1204da14cebeSEric Cheng 
1205da14cebeSEric Cheng /*
1206da14cebeSEric Cheng  * mac_link_flow_add()
1207*bbf21555SRichard Lowe  * Used by flowadm(8) or kernel mac clients for creating flows.
1208da14cebeSEric Cheng  */
1209da14cebeSEric Cheng int
mac_link_flow_add(datalink_id_t linkid,char * flow_name,flow_desc_t * flow_desc,mac_resource_props_t * mrp)1210da14cebeSEric Cheng mac_link_flow_add(datalink_id_t linkid, char *flow_name,
1211da14cebeSEric Cheng     flow_desc_t *flow_desc, mac_resource_props_t *mrp)
1212da14cebeSEric Cheng {
1213da14cebeSEric Cheng 	flow_entry_t		*flent = NULL;
1214da14cebeSEric Cheng 	int			err;
1215da14cebeSEric Cheng 	dls_dl_handle_t		dlh;
1216da14cebeSEric Cheng 	dls_link_t		*dlp;
1217da14cebeSEric Cheng 	boolean_t		link_held = B_FALSE;
1218da14cebeSEric Cheng 	boolean_t		hash_added = B_FALSE;
1219da14cebeSEric Cheng 	mac_perim_handle_t	mph;
1220da14cebeSEric Cheng 
1221da14cebeSEric Cheng 	err = mac_flow_lookup_byname(flow_name, &flent);
1222da14cebeSEric Cheng 	if (err == 0) {
1223da14cebeSEric Cheng 		FLOW_USER_REFRELE(flent);
1224da14cebeSEric Cheng 		return (EEXIST);
1225da14cebeSEric Cheng 	}
1226da14cebeSEric Cheng 
1227da14cebeSEric Cheng 	/*
1228da14cebeSEric Cheng 	 * First create a flow entry given the description provided
1229da14cebeSEric Cheng 	 * by the caller.
1230da14cebeSEric Cheng 	 */
1231da14cebeSEric Cheng 	err = mac_flow_create(flow_desc, mrp, flow_name, NULL,
1232da14cebeSEric Cheng 	    FLOW_USER | FLOW_OTHER, &flent);
1233da14cebeSEric Cheng 
1234da14cebeSEric Cheng 	if (err != 0)
1235da14cebeSEric Cheng 		return (err);
1236da14cebeSEric Cheng 
1237da14cebeSEric Cheng 	/*
1238da14cebeSEric Cheng 	 * We've got a local variable referencing this flow now, so we need
1239da14cebeSEric Cheng 	 * to hold it. We'll release this flow before returning.
1240da14cebeSEric Cheng 	 * All failures until we return will undo any action that may internally
1241da14cebeSEric Cheng 	 * held the flow, so the last REFRELE will assure a clean freeing
1242da14cebeSEric Cheng 	 * of resources.
1243da14cebeSEric Cheng 	 */
1244da14cebeSEric Cheng 	FLOW_REFHOLD(flent);
1245da14cebeSEric Cheng 
1246da14cebeSEric Cheng 	flent->fe_link_id = linkid;
1247da14cebeSEric Cheng 	FLOW_MARK(flent, FE_INCIPIENT);
1248da14cebeSEric Cheng 
1249da14cebeSEric Cheng 	err = mac_perim_enter_by_linkid(linkid, &mph);
1250da14cebeSEric Cheng 	if (err != 0) {
1251da14cebeSEric Cheng 		FLOW_FINAL_REFRELE(flent);
1252da14cebeSEric Cheng 		return (err);
1253da14cebeSEric Cheng 	}
1254da14cebeSEric Cheng 
1255da14cebeSEric Cheng 	/*
1256da14cebeSEric Cheng 	 * dls will eventually be merged with mac so it's ok
1257da14cebeSEric Cheng 	 * to call dls' internal functions.
1258da14cebeSEric Cheng 	 */
1259da14cebeSEric Cheng 	err = dls_devnet_hold_link(linkid, &dlh, &dlp);
1260da14cebeSEric Cheng 	if (err != 0)
1261da14cebeSEric Cheng 		goto bail;
1262da14cebeSEric Cheng 
1263da14cebeSEric Cheng 	link_held = B_TRUE;
1264da14cebeSEric Cheng 
1265da14cebeSEric Cheng 	/*
1266da14cebeSEric Cheng 	 * Add the flow to the global flow table, this table will be per
1267da14cebeSEric Cheng 	 * exclusive zone so each zone can have its own flow namespace.
1268da14cebeSEric Cheng 	 * RFE 6625651 will fix this.
1269da14cebeSEric Cheng 	 *
1270da14cebeSEric Cheng 	 */
1271da14cebeSEric Cheng 	if ((err = mac_flow_hash_add(flent)) != 0)
1272da14cebeSEric Cheng 		goto bail;
1273da14cebeSEric Cheng 
1274da14cebeSEric Cheng 	hash_added = B_TRUE;
1275da14cebeSEric Cheng 
1276da14cebeSEric Cheng 	/*
1277da14cebeSEric Cheng 	 * do not allow flows to be configured on an anchor VNIC
1278da14cebeSEric Cheng 	 */
1279da14cebeSEric Cheng 	if (mac_capab_get(dlp->dl_mh, MAC_CAPAB_ANCHOR_VNIC, NULL)) {
1280da14cebeSEric Cheng 		err = ENOTSUP;
1281da14cebeSEric Cheng 		goto bail;
1282da14cebeSEric Cheng 	}
1283da14cebeSEric Cheng 
1284da14cebeSEric Cheng 	/*
1285da14cebeSEric Cheng 	 * Add the subflow to the subflow table. Also instantiate the flow
1286ae6aa22aSVenugopal Iyer 	 * in the mac if there is an active user (we check if the MAC client's
1287ae6aa22aSVenugopal Iyer 	 * datapath has been setup).
1288da14cebeSEric Cheng 	 */
1289ae6aa22aSVenugopal Iyer 	err = mac_flow_add_subflow(dlp->dl_mch, flent,
1290ae6aa22aSVenugopal Iyer 	    MCIP_DATAPATH_SETUP((mac_client_impl_t *)dlp->dl_mch));
1291da14cebeSEric Cheng 	if (err != 0)
1292da14cebeSEric Cheng 		goto bail;
1293da14cebeSEric Cheng 
1294da14cebeSEric Cheng 	FLOW_UNMARK(flent, FE_INCIPIENT);
1295da14cebeSEric Cheng 	dls_devnet_rele_link(dlh, dlp);
1296da14cebeSEric Cheng 	mac_perim_exit(mph);
1297da14cebeSEric Cheng 	return (0);
1298da14cebeSEric Cheng 
1299da14cebeSEric Cheng bail:
1300da14cebeSEric Cheng 	if (hash_added)
1301da14cebeSEric Cheng 		mac_flow_hash_remove(flent);
1302da14cebeSEric Cheng 
1303da14cebeSEric Cheng 	if (link_held)
1304da14cebeSEric Cheng 		dls_devnet_rele_link(dlh, dlp);
1305da14cebeSEric Cheng 
1306da14cebeSEric Cheng 	/*
1307da14cebeSEric Cheng 	 * Wait for any transient global flow hash refs to clear
1308da14cebeSEric Cheng 	 * and then release the creation reference on the flow
1309da14cebeSEric Cheng 	 */
1310da14cebeSEric Cheng 	mac_flow_wait(flent, FLOW_USER_REF);
1311da14cebeSEric Cheng 	FLOW_FINAL_REFRELE(flent);
1312da14cebeSEric Cheng 	mac_perim_exit(mph);
1313da14cebeSEric Cheng 	return (err);
1314da14cebeSEric Cheng }
1315da14cebeSEric Cheng 
1316da14cebeSEric Cheng /*
1317da14cebeSEric Cheng  * mac_link_flow_clean()
1318da14cebeSEric Cheng  * Internal flow interface used for freeing SRSs and related
1319da14cebeSEric Cheng  * data structures. Not meant to be used by mac clients.
1320da14cebeSEric Cheng  */
1321da14cebeSEric Cheng void
mac_link_flow_clean(mac_client_handle_t mch,flow_entry_t * sub_flow)1322da14cebeSEric Cheng mac_link_flow_clean(mac_client_handle_t mch, flow_entry_t *sub_flow)
1323da14cebeSEric Cheng {
1324da14cebeSEric Cheng 	mac_client_impl_t 	*mcip = (mac_client_impl_t *)mch;
1325da14cebeSEric Cheng 	mac_impl_t		*mip = mcip->mci_mip;
1326da14cebeSEric Cheng 	boolean_t		last_subflow;
1327da14cebeSEric Cheng 
1328da14cebeSEric Cheng 	ASSERT(mch != NULL);
1329da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)mip));
1330da14cebeSEric Cheng 
1331da14cebeSEric Cheng 	/*
1332da14cebeSEric Cheng 	 * This sub flow entry may fail to be fully initialized by
1333da14cebeSEric Cheng 	 * mac_link_flow_init(). If so, simply return.
1334da14cebeSEric Cheng 	 */
1335da14cebeSEric Cheng 	if (sub_flow->fe_mcip == NULL)
1336da14cebeSEric Cheng 		return;
1337da14cebeSEric Cheng 
1338da14cebeSEric Cheng 	last_subflow = FLOW_TAB_EMPTY(mcip->mci_subflow_tab);
1339da14cebeSEric Cheng 	/*
1340da14cebeSEric Cheng 	 * Tear down the data path
1341da14cebeSEric Cheng 	 */
1342da14cebeSEric Cheng 	mac_datapath_teardown(mcip, sub_flow, SRST_FLOW);
1343da14cebeSEric Cheng 	sub_flow->fe_mcip = NULL;
1344da14cebeSEric Cheng 
1345da14cebeSEric Cheng 	/*
1346da14cebeSEric Cheng 	 * Delete the SRSs associated with this subflow. If this is being
1347*bbf21555SRichard Lowe 	 * driven by flowadm(8) then the subflow will be deleted by
1348da14cebeSEric Cheng 	 * dls_rem_flow. However if this is a result of the interface being
1349da14cebeSEric Cheng 	 * unplumbed then the subflow itself won't be deleted.
1350da14cebeSEric Cheng 	 */
1351da14cebeSEric Cheng 	mac_flow_cleanup(sub_flow);
1352da14cebeSEric Cheng 
1353da14cebeSEric Cheng 	/*
1354da14cebeSEric Cheng 	 * If all the subflows are gone, renable some of the stuff
1355da14cebeSEric Cheng 	 * we disabled when adding a subflow, polling etc.
1356da14cebeSEric Cheng 	 */
1357da14cebeSEric Cheng 	if (last_subflow) {
1358da14cebeSEric Cheng 		/*
1359da14cebeSEric Cheng 		 * The subflow table itself is not protected by any locks or
1360da14cebeSEric Cheng 		 * refcnts. Hence quiesce the client upfront before clearing
1361da14cebeSEric Cheng 		 * mci_subflow_tab.
1362da14cebeSEric Cheng 		 */
1363da14cebeSEric Cheng 		mac_client_quiesce(mcip);
1364da14cebeSEric Cheng 		mac_client_update_classifier(mcip, B_FALSE);
1365da14cebeSEric Cheng 		mac_flow_tab_destroy(mcip->mci_subflow_tab);
1366da14cebeSEric Cheng 		mcip->mci_subflow_tab = NULL;
1367da14cebeSEric Cheng 		mac_client_restart(mcip);
1368da14cebeSEric Cheng 	}
1369da14cebeSEric Cheng }
1370da14cebeSEric Cheng 
1371da14cebeSEric Cheng /*
1372da14cebeSEric Cheng  * mac_link_flow_remove()
1373*bbf21555SRichard Lowe  * Used by flowadm(8) or kernel mac clients for removing flows.
1374da14cebeSEric Cheng  */
1375da14cebeSEric Cheng int
mac_link_flow_remove(char * flow_name)1376da14cebeSEric Cheng mac_link_flow_remove(char *flow_name)
1377da14cebeSEric Cheng {
1378da14cebeSEric Cheng 	flow_entry_t		*flent;
1379da14cebeSEric Cheng 	mac_perim_handle_t	mph;
1380da14cebeSEric Cheng 	int			err;
1381da14cebeSEric Cheng 	datalink_id_t		linkid;
1382da14cebeSEric Cheng 
1383da14cebeSEric Cheng 	err = mac_flow_lookup_byname(flow_name, &flent);
1384da14cebeSEric Cheng 	if (err != 0)
1385da14cebeSEric Cheng 		return (err);
1386da14cebeSEric Cheng 
1387da14cebeSEric Cheng 	linkid = flent->fe_link_id;
1388da14cebeSEric Cheng 	FLOW_USER_REFRELE(flent);
1389da14cebeSEric Cheng 
1390da14cebeSEric Cheng 	/*
1391da14cebeSEric Cheng 	 * The perim must be acquired before acquiring any other references
1392da14cebeSEric Cheng 	 * to maintain the lock and perimeter hierarchy. Please note the
1393da14cebeSEric Cheng 	 * FLOW_REFRELE above.
1394da14cebeSEric Cheng 	 */
1395da14cebeSEric Cheng 	err = mac_perim_enter_by_linkid(linkid, &mph);
1396da14cebeSEric Cheng 	if (err != 0)
1397da14cebeSEric Cheng 		return (err);
1398da14cebeSEric Cheng 
1399da14cebeSEric Cheng 	/*
1400da14cebeSEric Cheng 	 * Note the second lookup of the flow, because a concurrent thread
1401da14cebeSEric Cheng 	 * may have removed it already while we were waiting to enter the
1402da14cebeSEric Cheng 	 * link's perimeter.
1403da14cebeSEric Cheng 	 */
1404da14cebeSEric Cheng 	err = mac_flow_lookup_byname(flow_name, &flent);
1405da14cebeSEric Cheng 	if (err != 0) {
1406da14cebeSEric Cheng 		mac_perim_exit(mph);
1407da14cebeSEric Cheng 		return (err);
1408da14cebeSEric Cheng 	}
1409da14cebeSEric Cheng 	FLOW_USER_REFRELE(flent);
1410da14cebeSEric Cheng 
1411da14cebeSEric Cheng 	/*
1412da14cebeSEric Cheng 	 * Remove the flow from the subflow table and deactivate the flow
1413da14cebeSEric Cheng 	 * by quiescing and removings its SRSs
1414da14cebeSEric Cheng 	 */
1415da14cebeSEric Cheng 	mac_flow_rem_subflow(flent);
1416da14cebeSEric Cheng 
1417da14cebeSEric Cheng 	/*
1418da14cebeSEric Cheng 	 * Finally, remove the flow from the global table.
1419da14cebeSEric Cheng 	 */
1420da14cebeSEric Cheng 	mac_flow_hash_remove(flent);
1421da14cebeSEric Cheng 
1422da14cebeSEric Cheng 	/*
1423da14cebeSEric Cheng 	 * Wait for any transient global flow hash refs to clear
1424da14cebeSEric Cheng 	 * and then release the creation reference on the flow
1425da14cebeSEric Cheng 	 */
1426da14cebeSEric Cheng 	mac_flow_wait(flent, FLOW_USER_REF);
1427da14cebeSEric Cheng 	FLOW_FINAL_REFRELE(flent);
1428da14cebeSEric Cheng 
1429da14cebeSEric Cheng 	mac_perim_exit(mph);
1430da14cebeSEric Cheng 
1431da14cebeSEric Cheng 	return (0);
1432da14cebeSEric Cheng }
1433da14cebeSEric Cheng 
1434da14cebeSEric Cheng /*
1435da14cebeSEric Cheng  * mac_link_flow_modify()
1436da14cebeSEric Cheng  * Modifies the properties of a flow identified by its name.
1437da14cebeSEric Cheng  */
1438da14cebeSEric Cheng int
mac_link_flow_modify(char * flow_name,mac_resource_props_t * mrp)1439da14cebeSEric Cheng mac_link_flow_modify(char *flow_name, mac_resource_props_t *mrp)
1440da14cebeSEric Cheng {
1441da14cebeSEric Cheng 	flow_entry_t		*flent;
1442da14cebeSEric Cheng 	mac_client_impl_t 	*mcip;
1443da14cebeSEric Cheng 	int			err = 0;
1444da14cebeSEric Cheng 	mac_perim_handle_t	mph;
1445da14cebeSEric Cheng 	datalink_id_t		linkid;
1446da14cebeSEric Cheng 	flow_tab_t		*flow_tab;
1447da14cebeSEric Cheng 
14480dc2366fSVenugopal Iyer 	err = mac_validate_props(NULL, mrp);
1449da14cebeSEric Cheng 	if (err != 0)
1450da14cebeSEric Cheng 		return (err);
1451da14cebeSEric Cheng 
1452da14cebeSEric Cheng 	err = mac_flow_lookup_byname(flow_name, &flent);
1453da14cebeSEric Cheng 	if (err != 0)
1454da14cebeSEric Cheng 		return (err);
1455da14cebeSEric Cheng 
1456da14cebeSEric Cheng 	linkid = flent->fe_link_id;
1457da14cebeSEric Cheng 	FLOW_USER_REFRELE(flent);
1458da14cebeSEric Cheng 
1459da14cebeSEric Cheng 	/*
1460da14cebeSEric Cheng 	 * The perim must be acquired before acquiring any other references
1461da14cebeSEric Cheng 	 * to maintain the lock and perimeter hierarchy. Please note the
1462da14cebeSEric Cheng 	 * FLOW_REFRELE above.
1463da14cebeSEric Cheng 	 */
1464da14cebeSEric Cheng 	err = mac_perim_enter_by_linkid(linkid, &mph);
1465da14cebeSEric Cheng 	if (err != 0)
1466da14cebeSEric Cheng 		return (err);
1467da14cebeSEric Cheng 
1468da14cebeSEric Cheng 	/*
1469da14cebeSEric Cheng 	 * Note the second lookup of the flow, because a concurrent thread
1470da14cebeSEric Cheng 	 * may have removed it already while we were waiting to enter the
1471da14cebeSEric Cheng 	 * link's perimeter.
1472da14cebeSEric Cheng 	 */
1473da14cebeSEric Cheng 	err = mac_flow_lookup_byname(flow_name, &flent);
1474da14cebeSEric Cheng 	if (err != 0) {
1475da14cebeSEric Cheng 		mac_perim_exit(mph);
1476da14cebeSEric Cheng 		return (err);
1477da14cebeSEric Cheng 	}
1478da14cebeSEric Cheng 	FLOW_USER_REFRELE(flent);
1479da14cebeSEric Cheng 
1480da14cebeSEric Cheng 	/*
1481da14cebeSEric Cheng 	 * If this flow is attached to a MAC client, then pass the request
1482da14cebeSEric Cheng 	 * along to the client.
1483da14cebeSEric Cheng 	 * Otherwise, just update the cached values.
1484da14cebeSEric Cheng 	 */
1485da14cebeSEric Cheng 	mcip = flent->fe_mcip;
1486da14cebeSEric Cheng 	mac_update_resources(mrp, &flent->fe_resource_props, B_TRUE);
1487da14cebeSEric Cheng 	if (mcip != NULL) {
1488da14cebeSEric Cheng 		if ((flow_tab = mcip->mci_subflow_tab) == NULL) {
1489da14cebeSEric Cheng 			err = ENOENT;
1490da14cebeSEric Cheng 		} else {
1491da14cebeSEric Cheng 			mac_flow_modify(flow_tab, flent, mrp);
1492da14cebeSEric Cheng 		}
1493da14cebeSEric Cheng 	} else {
1494da14cebeSEric Cheng 		(void) mac_flow_modify_props(flent, mrp);
1495da14cebeSEric Cheng 	}
1496da14cebeSEric Cheng 
1497da14cebeSEric Cheng done:
1498da14cebeSEric Cheng 	mac_perim_exit(mph);
1499da14cebeSEric Cheng 	return (err);
1500da14cebeSEric Cheng }
1501da14cebeSEric Cheng 
1502da14cebeSEric Cheng 
1503da14cebeSEric Cheng /*
1504da14cebeSEric Cheng  * State structure and misc functions used by mac_link_flow_walk().
1505da14cebeSEric Cheng  */
1506da14cebeSEric Cheng typedef struct {
1507da14cebeSEric Cheng 	int	(*ws_func)(mac_flowinfo_t *, void *);
1508da14cebeSEric Cheng 	void	*ws_arg;
1509da14cebeSEric Cheng } flow_walk_state_t;
1510da14cebeSEric Cheng 
1511da14cebeSEric Cheng static void
mac_link_flowinfo_copy(mac_flowinfo_t * finfop,flow_entry_t * flent)1512da14cebeSEric Cheng mac_link_flowinfo_copy(mac_flowinfo_t *finfop, flow_entry_t *flent)
1513da14cebeSEric Cheng {
1514da000602SGirish Moodalbail 	(void) strlcpy(finfop->fi_flow_name, flent->fe_flow_name,
1515da000602SGirish Moodalbail 	    MAXFLOWNAMELEN);
1516da14cebeSEric Cheng 	finfop->fi_link_id = flent->fe_link_id;
1517da14cebeSEric Cheng 	finfop->fi_flow_desc = flent->fe_flow_desc;
1518da14cebeSEric Cheng 	finfop->fi_resource_props = flent->fe_resource_props;
1519da14cebeSEric Cheng }
1520da14cebeSEric Cheng 
1521da14cebeSEric Cheng static int
mac_link_flow_walk_cb(flow_entry_t * flent,void * arg)1522da14cebeSEric Cheng mac_link_flow_walk_cb(flow_entry_t *flent, void *arg)
1523da14cebeSEric Cheng {
1524da14cebeSEric Cheng 	flow_walk_state_t	*statep = arg;
15250dc2366fSVenugopal Iyer 	mac_flowinfo_t		*finfo;
15260dc2366fSVenugopal Iyer 	int			err;
1527da14cebeSEric Cheng 
15280dc2366fSVenugopal Iyer 	finfo = kmem_zalloc(sizeof (*finfo), KM_SLEEP);
15290dc2366fSVenugopal Iyer 	mac_link_flowinfo_copy(finfo, flent);
15300dc2366fSVenugopal Iyer 	err = statep->ws_func(finfo, statep->ws_arg);
15310dc2366fSVenugopal Iyer 	kmem_free(finfo, sizeof (*finfo));
15320dc2366fSVenugopal Iyer 	return (err);
1533da14cebeSEric Cheng }
1534da14cebeSEric Cheng 
1535da14cebeSEric Cheng /*
1536da14cebeSEric Cheng  * mac_link_flow_walk()
1537da14cebeSEric Cheng  * Invokes callback 'func' for all flows belonging to the specified link.
1538da14cebeSEric Cheng  */
1539da14cebeSEric Cheng int
mac_link_flow_walk(datalink_id_t linkid,int (* func)(mac_flowinfo_t *,void *),void * arg)1540da14cebeSEric Cheng mac_link_flow_walk(datalink_id_t linkid,
1541da14cebeSEric Cheng     int (*func)(mac_flowinfo_t *, void *), void *arg)
1542da14cebeSEric Cheng {
1543da14cebeSEric Cheng 	mac_client_impl_t	*mcip;
1544da14cebeSEric Cheng 	mac_perim_handle_t	mph;
1545da14cebeSEric Cheng 	flow_walk_state_t	state;
1546da14cebeSEric Cheng 	dls_dl_handle_t		dlh;
1547da14cebeSEric Cheng 	dls_link_t		*dlp;
1548da14cebeSEric Cheng 	int			err;
1549da14cebeSEric Cheng 
1550da14cebeSEric Cheng 	err = mac_perim_enter_by_linkid(linkid, &mph);
1551da14cebeSEric Cheng 	if (err != 0)
1552da14cebeSEric Cheng 		return (err);
1553da14cebeSEric Cheng 
1554da14cebeSEric Cheng 	err = dls_devnet_hold_link(linkid, &dlh, &dlp);
1555da14cebeSEric Cheng 	if (err != 0) {
1556da14cebeSEric Cheng 		mac_perim_exit(mph);
1557da14cebeSEric Cheng 		return (err);
1558da14cebeSEric Cheng 	}
1559da14cebeSEric Cheng 
1560da14cebeSEric Cheng 	mcip = (mac_client_impl_t *)dlp->dl_mch;
1561da14cebeSEric Cheng 	state.ws_func = func;
1562da14cebeSEric Cheng 	state.ws_arg = arg;
1563da14cebeSEric Cheng 
1564da14cebeSEric Cheng 	err = mac_flow_walk_nolock(mcip->mci_subflow_tab,
1565da14cebeSEric Cheng 	    mac_link_flow_walk_cb, &state);
1566da14cebeSEric Cheng 
1567da14cebeSEric Cheng 	dls_devnet_rele_link(dlh, dlp);
1568da14cebeSEric Cheng 	mac_perim_exit(mph);
1569da14cebeSEric Cheng 	return (err);
1570da14cebeSEric Cheng }
1571da14cebeSEric Cheng 
1572da14cebeSEric Cheng /*
1573da14cebeSEric Cheng  * mac_link_flow_info()
1574da14cebeSEric Cheng  * Retrieves information about a specific flow.
1575da14cebeSEric Cheng  */
1576da14cebeSEric Cheng int
mac_link_flow_info(char * flow_name,mac_flowinfo_t * finfo)1577da14cebeSEric Cheng mac_link_flow_info(char *flow_name, mac_flowinfo_t *finfo)
1578da14cebeSEric Cheng {
1579da14cebeSEric Cheng 	flow_entry_t	*flent;
1580da14cebeSEric Cheng 	int		err;
1581da14cebeSEric Cheng 
1582da14cebeSEric Cheng 	err = mac_flow_lookup_byname(flow_name, &flent);
1583da14cebeSEric Cheng 	if (err != 0)
1584da14cebeSEric Cheng 		return (err);
1585da14cebeSEric Cheng 
1586da14cebeSEric Cheng 	mac_link_flowinfo_copy(finfo, flent);
1587da14cebeSEric Cheng 	FLOW_USER_REFRELE(flent);
1588da14cebeSEric Cheng 	return (0);
1589da14cebeSEric Cheng }
1590da14cebeSEric Cheng 
15912b24ab6bSSebastien Roy /*
15922b24ab6bSSebastien Roy  * Hash function macro that takes an Ethernet address and VLAN id as input.
15932b24ab6bSSebastien Roy  */
15942b24ab6bSSebastien Roy #define	HASH_ETHER_VID(a, v, s)	\
1595da14cebeSEric Cheng 	((((uint32_t)(a)[3] + (a)[4] + (a)[5]) ^ (v)) % (s))
1596da14cebeSEric Cheng 
15972b24ab6bSSebastien Roy /*
15982b24ab6bSSebastien Roy  * Generic layer-2 address hashing function that takes an address and address
15992b24ab6bSSebastien Roy  * length as input.  This is the DJB hash function.
16002b24ab6bSSebastien Roy  */
16012b24ab6bSSebastien Roy static uint32_t
flow_l2_addrhash(uint8_t * addr,size_t addrlen,size_t htsize)16022b24ab6bSSebastien Roy flow_l2_addrhash(uint8_t *addr, size_t addrlen, size_t htsize)
16032b24ab6bSSebastien Roy {
16042b24ab6bSSebastien Roy 	uint32_t	hash = 5381;
16052b24ab6bSSebastien Roy 	size_t		i;
16062b24ab6bSSebastien Roy 
16072b24ab6bSSebastien Roy 	for (i = 0; i < addrlen; i++)
16082b24ab6bSSebastien Roy 		hash = ((hash << 5) + hash) + addr[i];
16092b24ab6bSSebastien Roy 	return (hash % htsize);
16102b24ab6bSSebastien Roy }
16112b24ab6bSSebastien Roy 
1612da14cebeSEric Cheng #define	PKT_TOO_SMALL(s, end) ((s)->fs_mp->b_wptr < (end))
1613da14cebeSEric Cheng 
1614ae6aa22aSVenugopal Iyer #define	CHECK_AND_ADJUST_START_PTR(s, start) {		\
1615ae6aa22aSVenugopal Iyer 	if ((s)->fs_mp->b_wptr == (start)) {		\
1616ae6aa22aSVenugopal Iyer 		mblk_t	*next = (s)->fs_mp->b_cont;	\
1617ae6aa22aSVenugopal Iyer 		if (next == NULL)			\
1618ae6aa22aSVenugopal Iyer 			return (EINVAL);		\
1619ae6aa22aSVenugopal Iyer 							\
1620ae6aa22aSVenugopal Iyer 		(s)->fs_mp = next;			\
1621ae6aa22aSVenugopal Iyer 		(start) = next->b_rptr;			\
1622ae6aa22aSVenugopal Iyer 	}						\
1623ae6aa22aSVenugopal Iyer }
1624ae6aa22aSVenugopal Iyer 
1625da14cebeSEric Cheng /* ARGSUSED */
1626da14cebeSEric Cheng static boolean_t
flow_l2_match(flow_tab_t * ft,flow_entry_t * flent,flow_state_t * s)1627da14cebeSEric Cheng flow_l2_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1628da14cebeSEric Cheng {
1629da14cebeSEric Cheng 	flow_l2info_t		*l2 = &s->fs_l2info;
1630da14cebeSEric Cheng 	flow_desc_t		*fd = &flent->fe_flow_desc;
1631da14cebeSEric Cheng 
1632da14cebeSEric Cheng 	return (l2->l2_vid == fd->fd_vid &&
1633da14cebeSEric Cheng 	    bcmp(l2->l2_daddr, fd->fd_dst_mac, fd->fd_mac_len) == 0);
1634da14cebeSEric Cheng }
1635da14cebeSEric Cheng 
1636da14cebeSEric Cheng /*
1637da14cebeSEric Cheng  * Layer 2 hash function.
1638da14cebeSEric Cheng  * Must be paired with flow_l2_accept() within a set of flow_ops
1639da14cebeSEric Cheng  * because it assumes the dest address is already extracted.
1640da14cebeSEric Cheng  */
1641da14cebeSEric Cheng static uint32_t
flow_l2_hash(flow_tab_t * ft,flow_state_t * s)1642da14cebeSEric Cheng flow_l2_hash(flow_tab_t *ft, flow_state_t *s)
1643da14cebeSEric Cheng {
16442b24ab6bSSebastien Roy 	return (flow_l2_addrhash(s->fs_l2info.l2_daddr,
16452b24ab6bSSebastien Roy 	    ft->ft_mip->mi_type->mt_addr_length, ft->ft_size));
1646da14cebeSEric Cheng }
1647da14cebeSEric Cheng 
1648da14cebeSEric Cheng /*
1649da14cebeSEric Cheng  * This is the generic layer 2 accept function.
1650da14cebeSEric Cheng  * It makes use of mac_header_info() to extract the header length,
1651da14cebeSEric Cheng  * sap, vlan ID and destination address.
1652da14cebeSEric Cheng  */
1653da14cebeSEric Cheng static int
flow_l2_accept(flow_tab_t * ft,flow_state_t * s)1654da14cebeSEric Cheng flow_l2_accept(flow_tab_t *ft, flow_state_t *s)
1655da14cebeSEric Cheng {
1656da14cebeSEric Cheng 	boolean_t		is_ether;
1657da14cebeSEric Cheng 	flow_l2info_t		*l2 = &s->fs_l2info;
1658da14cebeSEric Cheng 	mac_header_info_t	mhi;
1659da14cebeSEric Cheng 	int			err;
1660da14cebeSEric Cheng 
1661da14cebeSEric Cheng 	is_ether = (ft->ft_mip->mi_info.mi_nativemedia == DL_ETHER);
1662da14cebeSEric Cheng 	if ((err = mac_header_info((mac_handle_t)ft->ft_mip,
1663da14cebeSEric Cheng 	    s->fs_mp, &mhi)) != 0) {
1664da14cebeSEric Cheng 		if (err == EINVAL)
1665da14cebeSEric Cheng 			err = ENOBUFS;
1666da14cebeSEric Cheng 
1667da14cebeSEric Cheng 		return (err);
1668da14cebeSEric Cheng 	}
1669da14cebeSEric Cheng 
1670da14cebeSEric Cheng 	l2->l2_start = s->fs_mp->b_rptr;
1671da14cebeSEric Cheng 	l2->l2_daddr = (uint8_t *)mhi.mhi_daddr;
1672da14cebeSEric Cheng 
1673da14cebeSEric Cheng 	if (is_ether && mhi.mhi_bindsap == ETHERTYPE_VLAN &&
1674da14cebeSEric Cheng 	    ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) {
1675da14cebeSEric Cheng 		struct ether_vlan_header	*evhp =
1676da14cebeSEric Cheng 		    (struct ether_vlan_header *)l2->l2_start;
1677da14cebeSEric Cheng 
1678da14cebeSEric Cheng 		if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp)))
1679da14cebeSEric Cheng 			return (ENOBUFS);
1680da14cebeSEric Cheng 
1681da14cebeSEric Cheng 		l2->l2_sap = ntohs(evhp->ether_type);
1682da14cebeSEric Cheng 		l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci));
1683da14cebeSEric Cheng 		l2->l2_hdrsize = sizeof (*evhp);
1684da14cebeSEric Cheng 	} else {
1685da14cebeSEric Cheng 		l2->l2_sap = mhi.mhi_bindsap;
1686da14cebeSEric Cheng 		l2->l2_vid = 0;
1687da14cebeSEric Cheng 		l2->l2_hdrsize = (uint32_t)mhi.mhi_hdrsize;
1688da14cebeSEric Cheng 	}
1689da14cebeSEric Cheng 	return (0);
1690da14cebeSEric Cheng }
1691da14cebeSEric Cheng 
1692da14cebeSEric Cheng /*
1693da14cebeSEric Cheng  * flow_ether_hash()/accept() are optimized versions of flow_l2_hash()/
1694da14cebeSEric Cheng  * accept(). The notable difference is that dest address is now extracted
1695da14cebeSEric Cheng  * by hash() rather than by accept(). This saves a few memory references
1696da14cebeSEric Cheng  * for flow tables that do not care about mac addresses.
1697da14cebeSEric Cheng  */
1698da14cebeSEric Cheng static uint32_t
flow_ether_hash(flow_tab_t * ft,flow_state_t * s)1699da14cebeSEric Cheng flow_ether_hash(flow_tab_t *ft, flow_state_t *s)
1700da14cebeSEric Cheng {
1701da14cebeSEric Cheng 	flow_l2info_t			*l2 = &s->fs_l2info;
1702da14cebeSEric Cheng 	struct ether_vlan_header	*evhp;
1703da14cebeSEric Cheng 
1704da14cebeSEric Cheng 	evhp = (struct ether_vlan_header *)l2->l2_start;
1705da14cebeSEric Cheng 	l2->l2_daddr = evhp->ether_dhost.ether_addr_octet;
17062b24ab6bSSebastien Roy 	return (HASH_ETHER_VID(l2->l2_daddr, l2->l2_vid, ft->ft_size));
17072b24ab6bSSebastien Roy }
17082b24ab6bSSebastien Roy 
17092b24ab6bSSebastien Roy static uint32_t
flow_ether_hash_fe(flow_tab_t * ft,flow_entry_t * flent)17102b24ab6bSSebastien Roy flow_ether_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
17112b24ab6bSSebastien Roy {
17122b24ab6bSSebastien Roy 	flow_desc_t	*fd = &flent->fe_flow_desc;
17132b24ab6bSSebastien Roy 
17142b24ab6bSSebastien Roy 	ASSERT((fd->fd_mask & FLOW_LINK_VID) != 0 || fd->fd_vid == 0);
17152b24ab6bSSebastien Roy 	return (HASH_ETHER_VID(fd->fd_dst_mac, fd->fd_vid, ft->ft_size));
1716da14cebeSEric Cheng }
1717da14cebeSEric Cheng 
1718da14cebeSEric Cheng /* ARGSUSED */
1719da14cebeSEric Cheng static int
flow_ether_accept(flow_tab_t * ft,flow_state_t * s)1720da14cebeSEric Cheng flow_ether_accept(flow_tab_t *ft, flow_state_t *s)
1721da14cebeSEric Cheng {
1722da14cebeSEric Cheng 	flow_l2info_t			*l2 = &s->fs_l2info;
1723da14cebeSEric Cheng 	struct ether_vlan_header	*evhp;
1724da14cebeSEric Cheng 	uint16_t			sap;
1725da14cebeSEric Cheng 
1726da14cebeSEric Cheng 	evhp = (struct ether_vlan_header *)s->fs_mp->b_rptr;
1727da14cebeSEric Cheng 	l2->l2_start = (uchar_t *)evhp;
1728da14cebeSEric Cheng 
1729da14cebeSEric Cheng 	if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (struct ether_header)))
1730da14cebeSEric Cheng 		return (ENOBUFS);
1731da14cebeSEric Cheng 
1732da14cebeSEric Cheng 	if ((sap = ntohs(evhp->ether_tpid)) == ETHERTYPE_VLAN &&
1733da14cebeSEric Cheng 	    ((s->fs_flags & FLOW_IGNORE_VLAN) == 0)) {
1734da14cebeSEric Cheng 		if (PKT_TOO_SMALL(s, l2->l2_start + sizeof (*evhp)))
1735da14cebeSEric Cheng 			return (ENOBUFS);
1736da14cebeSEric Cheng 
1737da14cebeSEric Cheng 		l2->l2_sap = ntohs(evhp->ether_type);
1738da14cebeSEric Cheng 		l2->l2_vid = VLAN_ID(ntohs(evhp->ether_tci));
1739da14cebeSEric Cheng 		l2->l2_hdrsize = sizeof (struct ether_vlan_header);
1740da14cebeSEric Cheng 	} else {
1741da14cebeSEric Cheng 		l2->l2_sap = sap;
1742da14cebeSEric Cheng 		l2->l2_vid = 0;
1743da14cebeSEric Cheng 		l2->l2_hdrsize = sizeof (struct ether_header);
1744da14cebeSEric Cheng 	}
1745da14cebeSEric Cheng 	return (0);
1746da14cebeSEric Cheng }
1747da14cebeSEric Cheng 
1748da14cebeSEric Cheng /*
1749da14cebeSEric Cheng  * Validates a layer 2 flow entry.
1750da14cebeSEric Cheng  */
1751da14cebeSEric Cheng static int
flow_l2_accept_fe(flow_tab_t * ft,flow_entry_t * flent)1752da14cebeSEric Cheng flow_l2_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
1753da14cebeSEric Cheng {
1754da14cebeSEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
1755da14cebeSEric Cheng 
1756da14cebeSEric Cheng 	/*
17572b24ab6bSSebastien Roy 	 * Dest address is mandatory, and 0 length addresses are not yet
17582b24ab6bSSebastien Roy 	 * supported.
1759da14cebeSEric Cheng 	 */
17602b24ab6bSSebastien Roy 	if ((fd->fd_mask & FLOW_LINK_DST) == 0 || fd->fd_mac_len == 0)
1761da14cebeSEric Cheng 		return (EINVAL);
1762da14cebeSEric Cheng 
1763da14cebeSEric Cheng 	if ((fd->fd_mask & FLOW_LINK_VID) != 0) {
1764da14cebeSEric Cheng 		/*
1765da14cebeSEric Cheng 		 * VLAN flows are only supported over ethernet macs.
1766da14cebeSEric Cheng 		 */
1767da14cebeSEric Cheng 		if (ft->ft_mip->mi_info.mi_nativemedia != DL_ETHER)
1768da14cebeSEric Cheng 			return (EINVAL);
1769da14cebeSEric Cheng 
1770da14cebeSEric Cheng 		if (fd->fd_vid == 0)
1771da14cebeSEric Cheng 			return (EINVAL);
1772da14cebeSEric Cheng 
1773da14cebeSEric Cheng 	}
1774da14cebeSEric Cheng 	flent->fe_match = flow_l2_match;
1775da14cebeSEric Cheng 	return (0);
1776da14cebeSEric Cheng }
1777da14cebeSEric Cheng 
1778da14cebeSEric Cheng /*
1779da14cebeSEric Cheng  * Calculates hash index of flow entry.
1780da14cebeSEric Cheng  */
1781da14cebeSEric Cheng static uint32_t
flow_l2_hash_fe(flow_tab_t * ft,flow_entry_t * flent)1782da14cebeSEric Cheng flow_l2_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
1783da14cebeSEric Cheng {
1784da14cebeSEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
1785da14cebeSEric Cheng 
17862b24ab6bSSebastien Roy 	ASSERT((fd->fd_mask & FLOW_LINK_VID) == 0 && fd->fd_vid == 0);
17872b24ab6bSSebastien Roy 	return (flow_l2_addrhash(fd->fd_dst_mac,
17882b24ab6bSSebastien Roy 	    ft->ft_mip->mi_type->mt_addr_length, ft->ft_size));
1789da14cebeSEric Cheng }
1790da14cebeSEric Cheng 
1791da14cebeSEric Cheng /*
1792da14cebeSEric Cheng  * This is used for duplicate flow checking.
1793da14cebeSEric Cheng  */
1794da14cebeSEric Cheng /* ARGSUSED */
1795da14cebeSEric Cheng static boolean_t
flow_l2_match_fe(flow_tab_t * ft,flow_entry_t * f1,flow_entry_t * f2)1796da14cebeSEric Cheng flow_l2_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
1797da14cebeSEric Cheng {
1798da14cebeSEric Cheng 	flow_desc_t	*fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
1799da14cebeSEric Cheng 
1800da14cebeSEric Cheng 	ASSERT(fd1->fd_mac_len == fd2->fd_mac_len && fd1->fd_mac_len != 0);
1801da14cebeSEric Cheng 	return (bcmp(&fd1->fd_dst_mac, &fd2->fd_dst_mac,
1802da14cebeSEric Cheng 	    fd1->fd_mac_len) == 0 && fd1->fd_vid == fd2->fd_vid);
1803da14cebeSEric Cheng }
1804da14cebeSEric Cheng 
1805da14cebeSEric Cheng /*
1806da14cebeSEric Cheng  * Generic flow entry insertion function.
1807da14cebeSEric Cheng  * Used by flow tables that do not have ordering requirements.
1808da14cebeSEric Cheng  */
1809da14cebeSEric Cheng /* ARGSUSED */
1810da14cebeSEric Cheng static int
flow_generic_insert_fe(flow_tab_t * ft,flow_entry_t ** headp,flow_entry_t * flent)1811da14cebeSEric Cheng flow_generic_insert_fe(flow_tab_t *ft, flow_entry_t **headp,
1812da14cebeSEric Cheng     flow_entry_t *flent)
1813da14cebeSEric Cheng {
1814da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
1815da14cebeSEric Cheng 
1816da14cebeSEric Cheng 	if (*headp != NULL) {
1817da14cebeSEric Cheng 		ASSERT(flent->fe_next == NULL);
1818da14cebeSEric Cheng 		flent->fe_next = *headp;
1819da14cebeSEric Cheng 	}
1820da14cebeSEric Cheng 	*headp = flent;
1821da14cebeSEric Cheng 	return (0);
1822da14cebeSEric Cheng }
1823da14cebeSEric Cheng 
1824da14cebeSEric Cheng /*
1825da14cebeSEric Cheng  * IP version independent DSField matching function.
1826da14cebeSEric Cheng  */
1827da14cebeSEric Cheng /* ARGSUSED */
1828da14cebeSEric Cheng static boolean_t
flow_ip_dsfield_match(flow_tab_t * ft,flow_entry_t * flent,flow_state_t * s)1829da14cebeSEric Cheng flow_ip_dsfield_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1830da14cebeSEric Cheng {
1831da14cebeSEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
1832da14cebeSEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
1833da14cebeSEric Cheng 
1834da14cebeSEric Cheng 	switch (l3info->l3_version) {
1835da14cebeSEric Cheng 	case IPV4_VERSION: {
1836da14cebeSEric Cheng 		ipha_t		*ipha = (ipha_t *)l3info->l3_start;
1837da14cebeSEric Cheng 
1838da14cebeSEric Cheng 		return ((ipha->ipha_type_of_service &
1839da14cebeSEric Cheng 		    fd->fd_dsfield_mask) == fd->fd_dsfield);
1840da14cebeSEric Cheng 	}
1841da14cebeSEric Cheng 	case IPV6_VERSION: {
1842da14cebeSEric Cheng 		ip6_t		*ip6h = (ip6_t *)l3info->l3_start;
1843da14cebeSEric Cheng 
1844da14cebeSEric Cheng 		return ((IPV6_FLOW_TCLASS(ip6h->ip6_vcf) &
1845da14cebeSEric Cheng 		    fd->fd_dsfield_mask) == fd->fd_dsfield);
1846da14cebeSEric Cheng 	}
1847da14cebeSEric Cheng 	default:
1848da14cebeSEric Cheng 		return (B_FALSE);
1849da14cebeSEric Cheng 	}
1850da14cebeSEric Cheng }
1851da14cebeSEric Cheng 
1852da14cebeSEric Cheng /*
1853da14cebeSEric Cheng  * IP v4 and v6 address matching.
1854da14cebeSEric Cheng  * The netmask only needs to be applied on the packet but not on the
1855da14cebeSEric Cheng  * flow_desc since fd_local_addr/fd_remote_addr are premasked subnets.
1856da14cebeSEric Cheng  */
1857da14cebeSEric Cheng 
1858da14cebeSEric Cheng /* ARGSUSED */
1859da14cebeSEric Cheng static boolean_t
flow_ip_v4_match(flow_tab_t * ft,flow_entry_t * flent,flow_state_t * s)1860da14cebeSEric Cheng flow_ip_v4_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1861da14cebeSEric Cheng {
1862da14cebeSEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
1863da14cebeSEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
1864da14cebeSEric Cheng 	ipha_t		*ipha = (ipha_t *)l3info->l3_start;
1865da14cebeSEric Cheng 	in_addr_t	addr;
1866da14cebeSEric Cheng 
1867da14cebeSEric Cheng 	addr = (l3info->l3_dst_or_src ? ipha->ipha_dst : ipha->ipha_src);
1868da14cebeSEric Cheng 	if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) {
1869da14cebeSEric Cheng 		return ((addr & V4_PART_OF_V6(fd->fd_local_netmask)) ==
1870da14cebeSEric Cheng 		    V4_PART_OF_V6(fd->fd_local_addr));
1871da14cebeSEric Cheng 	}
1872da14cebeSEric Cheng 	return ((addr & V4_PART_OF_V6(fd->fd_remote_netmask)) ==
1873da14cebeSEric Cheng 	    V4_PART_OF_V6(fd->fd_remote_addr));
1874da14cebeSEric Cheng }
1875da14cebeSEric Cheng 
1876da14cebeSEric Cheng /* ARGSUSED */
1877da14cebeSEric Cheng static boolean_t
flow_ip_v6_match(flow_tab_t * ft,flow_entry_t * flent,flow_state_t * s)1878da14cebeSEric Cheng flow_ip_v6_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1879da14cebeSEric Cheng {
1880da14cebeSEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
1881da14cebeSEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
1882da14cebeSEric Cheng 	ip6_t		*ip6h = (ip6_t *)l3info->l3_start;
1883da14cebeSEric Cheng 	in6_addr_t	*addrp;
1884da14cebeSEric Cheng 
1885da14cebeSEric Cheng 	addrp = (l3info->l3_dst_or_src ? &ip6h->ip6_dst : &ip6h->ip6_src);
1886da14cebeSEric Cheng 	if ((fd->fd_mask & FLOW_IP_LOCAL) != 0) {
1887da14cebeSEric Cheng 		return (V6_MASK_EQ(*addrp, fd->fd_local_netmask,
1888da14cebeSEric Cheng 		    fd->fd_local_addr));
1889da14cebeSEric Cheng 	}
1890da14cebeSEric Cheng 	return (V6_MASK_EQ(*addrp, fd->fd_remote_netmask, fd->fd_remote_addr));
1891da14cebeSEric Cheng }
1892da14cebeSEric Cheng 
1893da14cebeSEric Cheng /* ARGSUSED */
1894da14cebeSEric Cheng static boolean_t
flow_ip_proto_match(flow_tab_t * ft,flow_entry_t * flent,flow_state_t * s)1895da14cebeSEric Cheng flow_ip_proto_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
1896da14cebeSEric Cheng {
1897da14cebeSEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
1898da14cebeSEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
1899da14cebeSEric Cheng 
1900da14cebeSEric Cheng 	return (l3info->l3_protocol == fd->fd_protocol);
1901da14cebeSEric Cheng }
1902da14cebeSEric Cheng 
1903da14cebeSEric Cheng static uint32_t
flow_ip_hash(flow_tab_t * ft,flow_state_t * s)1904da14cebeSEric Cheng flow_ip_hash(flow_tab_t *ft, flow_state_t *s)
1905da14cebeSEric Cheng {
1906da14cebeSEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
1907da14cebeSEric Cheng 	flow_mask_t	mask = ft->ft_mask;
1908da14cebeSEric Cheng 
1909da14cebeSEric Cheng 	if ((mask & FLOW_IP_LOCAL) != 0) {
1910da14cebeSEric Cheng 		l3info->l3_dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0);
1911da14cebeSEric Cheng 	} else if ((mask & FLOW_IP_REMOTE) != 0) {
1912da14cebeSEric Cheng 		l3info->l3_dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0);
1913da14cebeSEric Cheng 	} else if ((mask & FLOW_IP_DSFIELD) != 0) {
1914da14cebeSEric Cheng 		/*
1915da14cebeSEric Cheng 		 * DSField flents are arranged as a single list.
1916da14cebeSEric Cheng 		 */
1917da14cebeSEric Cheng 		return (0);
1918da14cebeSEric Cheng 	}
1919da14cebeSEric Cheng 	/*
1920da14cebeSEric Cheng 	 * IP addr flents are hashed into two lists, v4 or v6.
1921da14cebeSEric Cheng 	 */
1922da14cebeSEric Cheng 	ASSERT(ft->ft_size >= 2);
1923da14cebeSEric Cheng 	return ((l3info->l3_version == IPV4_VERSION) ? 0 : 1);
1924da14cebeSEric Cheng }
1925da14cebeSEric Cheng 
1926da14cebeSEric Cheng static uint32_t
flow_ip_proto_hash(flow_tab_t * ft,flow_state_t * s)1927da14cebeSEric Cheng flow_ip_proto_hash(flow_tab_t *ft, flow_state_t *s)
1928da14cebeSEric Cheng {
1929da14cebeSEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
1930da14cebeSEric Cheng 
1931da14cebeSEric Cheng 	return (l3info->l3_protocol % ft->ft_size);
1932da14cebeSEric Cheng }
1933da14cebeSEric Cheng 
1934da14cebeSEric Cheng /* ARGSUSED */
1935da14cebeSEric Cheng static int
flow_ip_accept(flow_tab_t * ft,flow_state_t * s)1936da14cebeSEric Cheng flow_ip_accept(flow_tab_t *ft, flow_state_t *s)
1937da14cebeSEric Cheng {
1938da14cebeSEric Cheng 	flow_l2info_t	*l2info = &s->fs_l2info;
1939da14cebeSEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
1940da14cebeSEric Cheng 	uint16_t	sap = l2info->l2_sap;
1941da14cebeSEric Cheng 	uchar_t		*l3_start;
1942da14cebeSEric Cheng 
1943ae6aa22aSVenugopal Iyer 	l3_start = l2info->l2_start + l2info->l2_hdrsize;
1944ae6aa22aSVenugopal Iyer 
1945ae6aa22aSVenugopal Iyer 	/*
1946ae6aa22aSVenugopal Iyer 	 * Adjust start pointer if we're at the end of an mblk.
1947ae6aa22aSVenugopal Iyer 	 */
1948ae6aa22aSVenugopal Iyer 	CHECK_AND_ADJUST_START_PTR(s, l3_start);
1949ae6aa22aSVenugopal Iyer 
1950ae6aa22aSVenugopal Iyer 	l3info->l3_start = l3_start;
1951da14cebeSEric Cheng 	if (!OK_32PTR(l3_start))
1952da14cebeSEric Cheng 		return (EINVAL);
1953da14cebeSEric Cheng 
1954da14cebeSEric Cheng 	switch (sap) {
1955da14cebeSEric Cheng 	case ETHERTYPE_IP: {
1956da14cebeSEric Cheng 		ipha_t	*ipha = (ipha_t *)l3_start;
1957da14cebeSEric Cheng 
1958da14cebeSEric Cheng 		if (PKT_TOO_SMALL(s, l3_start + IP_SIMPLE_HDR_LENGTH))
1959da14cebeSEric Cheng 			return (ENOBUFS);
1960da14cebeSEric Cheng 
1961da14cebeSEric Cheng 		l3info->l3_hdrsize = IPH_HDR_LENGTH(ipha);
1962da14cebeSEric Cheng 		l3info->l3_protocol = ipha->ipha_protocol;
1963da14cebeSEric Cheng 		l3info->l3_version = IPV4_VERSION;
1964da14cebeSEric Cheng 		l3info->l3_fragmented =
1965da14cebeSEric Cheng 		    IS_V4_FRAGMENT(ipha->ipha_fragment_offset_and_flags);
1966da14cebeSEric Cheng 		break;
1967da14cebeSEric Cheng 	}
1968da14cebeSEric Cheng 	case ETHERTYPE_IPV6: {
19690dc2366fSVenugopal Iyer 		ip6_t		*ip6h = (ip6_t *)l3_start;
19700dc2366fSVenugopal Iyer 		ip6_frag_t	*frag = NULL;
19710dc2366fSVenugopal Iyer 		uint16_t	ip6_hdrlen;
19720dc2366fSVenugopal Iyer 		uint8_t		nexthdr;
1973da14cebeSEric Cheng 
19740dc2366fSVenugopal Iyer 		if (!mac_ip_hdr_length_v6(ip6h, s->fs_mp->b_wptr, &ip6_hdrlen,
19750dc2366fSVenugopal Iyer 		    &nexthdr, &frag)) {
1976da14cebeSEric Cheng 			return (ENOBUFS);
1977da14cebeSEric Cheng 		}
1978da14cebeSEric Cheng 		l3info->l3_hdrsize = ip6_hdrlen;
1979da14cebeSEric Cheng 		l3info->l3_protocol = nexthdr;
1980da14cebeSEric Cheng 		l3info->l3_version = IPV6_VERSION;
19810dc2366fSVenugopal Iyer 		l3info->l3_fragmented = (frag != NULL);
1982da14cebeSEric Cheng 		break;
1983da14cebeSEric Cheng 	}
1984da14cebeSEric Cheng 	default:
1985da14cebeSEric Cheng 		return (EINVAL);
1986da14cebeSEric Cheng 	}
1987da14cebeSEric Cheng 	return (0);
1988da14cebeSEric Cheng }
1989da14cebeSEric Cheng 
1990da14cebeSEric Cheng /* ARGSUSED */
1991da14cebeSEric Cheng static int
flow_ip_proto_accept_fe(flow_tab_t * ft,flow_entry_t * flent)1992da14cebeSEric Cheng flow_ip_proto_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
1993da14cebeSEric Cheng {
1994da14cebeSEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
1995da14cebeSEric Cheng 
1996da14cebeSEric Cheng 	switch (fd->fd_protocol) {
1997da14cebeSEric Cheng 	case IPPROTO_TCP:
1998da14cebeSEric Cheng 	case IPPROTO_UDP:
1999da14cebeSEric Cheng 	case IPPROTO_SCTP:
2000da14cebeSEric Cheng 	case IPPROTO_ICMP:
2001da14cebeSEric Cheng 	case IPPROTO_ICMPV6:
2002da14cebeSEric Cheng 		flent->fe_match = flow_ip_proto_match;
2003da14cebeSEric Cheng 		return (0);
2004da14cebeSEric Cheng 	default:
2005da14cebeSEric Cheng 		return (EINVAL);
2006da14cebeSEric Cheng 	}
2007da14cebeSEric Cheng }
2008da14cebeSEric Cheng 
2009da14cebeSEric Cheng /* ARGSUSED */
2010da14cebeSEric Cheng static int
flow_ip_accept_fe(flow_tab_t * ft,flow_entry_t * flent)2011da14cebeSEric Cheng flow_ip_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
2012da14cebeSEric Cheng {
2013da14cebeSEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
2014da14cebeSEric Cheng 	flow_mask_t	mask;
2015da14cebeSEric Cheng 	uint8_t		version;
2016da14cebeSEric Cheng 	in6_addr_t	*addr, *netmask;
2017da14cebeSEric Cheng 
2018da14cebeSEric Cheng 	/*
2019da14cebeSEric Cheng 	 * DSField does not require a IP version.
2020da14cebeSEric Cheng 	 */
2021da14cebeSEric Cheng 	if (fd->fd_mask == FLOW_IP_DSFIELD) {
2022da14cebeSEric Cheng 		if (fd->fd_dsfield_mask == 0)
2023da14cebeSEric Cheng 			return (EINVAL);
2024da14cebeSEric Cheng 
2025da14cebeSEric Cheng 		flent->fe_match = flow_ip_dsfield_match;
2026da14cebeSEric Cheng 		return (0);
2027da14cebeSEric Cheng 	}
2028da14cebeSEric Cheng 
2029da14cebeSEric Cheng 	/*
2030da14cebeSEric Cheng 	 * IP addresses must come with a version to avoid ambiguity.
2031da14cebeSEric Cheng 	 */
2032da14cebeSEric Cheng 	if ((fd->fd_mask & FLOW_IP_VERSION) == 0)
2033da14cebeSEric Cheng 		return (EINVAL);
2034da14cebeSEric Cheng 
2035da14cebeSEric Cheng 	version = fd->fd_ipversion;
2036da14cebeSEric Cheng 	if (version != IPV4_VERSION && version != IPV6_VERSION)
2037da14cebeSEric Cheng 		return (EINVAL);
2038da14cebeSEric Cheng 
2039da14cebeSEric Cheng 	mask = fd->fd_mask & ~FLOW_IP_VERSION;
2040da14cebeSEric Cheng 	switch (mask) {
2041da14cebeSEric Cheng 	case FLOW_IP_LOCAL:
2042da14cebeSEric Cheng 		addr = &fd->fd_local_addr;
2043da14cebeSEric Cheng 		netmask = &fd->fd_local_netmask;
2044da14cebeSEric Cheng 		break;
2045da14cebeSEric Cheng 	case FLOW_IP_REMOTE:
2046da14cebeSEric Cheng 		addr = &fd->fd_remote_addr;
2047da14cebeSEric Cheng 		netmask = &fd->fd_remote_netmask;
2048da14cebeSEric Cheng 		break;
2049da14cebeSEric Cheng 	default:
2050da14cebeSEric Cheng 		return (EINVAL);
2051da14cebeSEric Cheng 	}
2052da14cebeSEric Cheng 
2053da14cebeSEric Cheng 	/*
2054da14cebeSEric Cheng 	 * Apply netmask onto specified address.
2055da14cebeSEric Cheng 	 */
2056da14cebeSEric Cheng 	V6_MASK_COPY(*addr, *netmask, *addr);
2057da14cebeSEric Cheng 	if (version == IPV4_VERSION) {
2058da14cebeSEric Cheng 		ipaddr_t	v4addr = V4_PART_OF_V6((*addr));
2059da14cebeSEric Cheng 		ipaddr_t	v4mask = V4_PART_OF_V6((*netmask));
2060da14cebeSEric Cheng 
2061da14cebeSEric Cheng 		if (v4addr == 0 || v4mask == 0)
2062da14cebeSEric Cheng 			return (EINVAL);
2063da14cebeSEric Cheng 		flent->fe_match = flow_ip_v4_match;
2064da14cebeSEric Cheng 	} else {
2065da14cebeSEric Cheng 		if (IN6_IS_ADDR_UNSPECIFIED(addr) ||
2066da14cebeSEric Cheng 		    IN6_IS_ADDR_UNSPECIFIED(netmask))
2067da14cebeSEric Cheng 			return (EINVAL);
2068da14cebeSEric Cheng 		flent->fe_match = flow_ip_v6_match;
2069da14cebeSEric Cheng 	}
2070da14cebeSEric Cheng 	return (0);
2071da14cebeSEric Cheng }
2072da14cebeSEric Cheng 
2073da14cebeSEric Cheng static uint32_t
flow_ip_proto_hash_fe(flow_tab_t * ft,flow_entry_t * flent)2074da14cebeSEric Cheng flow_ip_proto_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
2075da14cebeSEric Cheng {
2076da14cebeSEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
2077da14cebeSEric Cheng 
2078da14cebeSEric Cheng 	return (fd->fd_protocol % ft->ft_size);
2079da14cebeSEric Cheng }
2080da14cebeSEric Cheng 
2081da14cebeSEric Cheng static uint32_t
flow_ip_hash_fe(flow_tab_t * ft,flow_entry_t * flent)2082da14cebeSEric Cheng flow_ip_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
2083da14cebeSEric Cheng {
2084da14cebeSEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
2085da14cebeSEric Cheng 
2086da14cebeSEric Cheng 	/*
2087da14cebeSEric Cheng 	 * DSField flents are arranged as a single list.
2088da14cebeSEric Cheng 	 */
2089da14cebeSEric Cheng 	if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0)
2090da14cebeSEric Cheng 		return (0);
2091da14cebeSEric Cheng 
2092da14cebeSEric Cheng 	/*
2093da14cebeSEric Cheng 	 * IP addr flents are hashed into two lists, v4 or v6.
2094da14cebeSEric Cheng 	 */
2095da14cebeSEric Cheng 	ASSERT(ft->ft_size >= 2);
2096da14cebeSEric Cheng 	return ((fd->fd_ipversion == IPV4_VERSION) ? 0 : 1);
2097da14cebeSEric Cheng }
2098da14cebeSEric Cheng 
2099da14cebeSEric Cheng /* ARGSUSED */
2100da14cebeSEric Cheng static boolean_t
flow_ip_proto_match_fe(flow_tab_t * ft,flow_entry_t * f1,flow_entry_t * f2)2101da14cebeSEric Cheng flow_ip_proto_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
2102da14cebeSEric Cheng {
2103da14cebeSEric Cheng 	flow_desc_t	*fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
2104da14cebeSEric Cheng 
2105da14cebeSEric Cheng 	return (fd1->fd_protocol == fd2->fd_protocol);
2106da14cebeSEric Cheng }
2107da14cebeSEric Cheng 
2108da14cebeSEric Cheng /* ARGSUSED */
2109da14cebeSEric Cheng static boolean_t
flow_ip_match_fe(flow_tab_t * ft,flow_entry_t * f1,flow_entry_t * f2)2110da14cebeSEric Cheng flow_ip_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
2111da14cebeSEric Cheng {
2112da14cebeSEric Cheng 	flow_desc_t	*fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
2113da14cebeSEric Cheng 	in6_addr_t	*a1, *m1, *a2, *m2;
2114da14cebeSEric Cheng 
2115da14cebeSEric Cheng 	ASSERT(fd1->fd_mask == fd2->fd_mask);
2116da14cebeSEric Cheng 	if (fd1->fd_mask == FLOW_IP_DSFIELD) {
2117da14cebeSEric Cheng 		return (fd1->fd_dsfield == fd2->fd_dsfield &&
2118da14cebeSEric Cheng 		    fd1->fd_dsfield_mask == fd2->fd_dsfield_mask);
2119da14cebeSEric Cheng 	}
2120da14cebeSEric Cheng 
2121da14cebeSEric Cheng 	/*
2122da14cebeSEric Cheng 	 * flow_ip_accept_fe() already validated the version.
2123da14cebeSEric Cheng 	 */
2124da14cebeSEric Cheng 	ASSERT((fd1->fd_mask & FLOW_IP_VERSION) != 0);
2125da14cebeSEric Cheng 	if (fd1->fd_ipversion != fd2->fd_ipversion)
2126da14cebeSEric Cheng 		return (B_FALSE);
2127da14cebeSEric Cheng 
2128da14cebeSEric Cheng 	switch (fd1->fd_mask & ~FLOW_IP_VERSION) {
2129da14cebeSEric Cheng 	case FLOW_IP_LOCAL:
2130da14cebeSEric Cheng 		a1 = &fd1->fd_local_addr;
2131da14cebeSEric Cheng 		m1 = &fd1->fd_local_netmask;
2132da14cebeSEric Cheng 		a2 = &fd2->fd_local_addr;
2133da14cebeSEric Cheng 		m2 = &fd2->fd_local_netmask;
2134da14cebeSEric Cheng 		break;
2135da14cebeSEric Cheng 	case FLOW_IP_REMOTE:
2136da14cebeSEric Cheng 		a1 = &fd1->fd_remote_addr;
2137da14cebeSEric Cheng 		m1 = &fd1->fd_remote_netmask;
2138da14cebeSEric Cheng 		a2 = &fd2->fd_remote_addr;
2139da14cebeSEric Cheng 		m2 = &fd2->fd_remote_netmask;
2140da14cebeSEric Cheng 		break;
2141da14cebeSEric Cheng 	default:
2142da14cebeSEric Cheng 		/*
2143da14cebeSEric Cheng 		 * This is unreachable given the checks in
2144da14cebeSEric Cheng 		 * flow_ip_accept_fe().
2145da14cebeSEric Cheng 		 */
2146da14cebeSEric Cheng 		return (B_FALSE);
2147da14cebeSEric Cheng 	}
2148da14cebeSEric Cheng 
2149da14cebeSEric Cheng 	if (fd1->fd_ipversion == IPV4_VERSION) {
2150da14cebeSEric Cheng 		return (V4_PART_OF_V6((*a1)) == V4_PART_OF_V6((*a2)) &&
2151da14cebeSEric Cheng 		    V4_PART_OF_V6((*m1)) == V4_PART_OF_V6((*m2)));
2152da14cebeSEric Cheng 
2153da14cebeSEric Cheng 	} else {
2154da14cebeSEric Cheng 		return (IN6_ARE_ADDR_EQUAL(a1, a2) &&
2155da14cebeSEric Cheng 		    IN6_ARE_ADDR_EQUAL(m1, m2));
2156da14cebeSEric Cheng 	}
2157da14cebeSEric Cheng }
2158da14cebeSEric Cheng 
2159da14cebeSEric Cheng static int
flow_ip_mask2plen(in6_addr_t * v6mask)2160da14cebeSEric Cheng flow_ip_mask2plen(in6_addr_t *v6mask)
2161da14cebeSEric Cheng {
2162da14cebeSEric Cheng 	int		bits;
2163da14cebeSEric Cheng 	int		plen = IPV6_ABITS;
2164da14cebeSEric Cheng 	int		i;
2165da14cebeSEric Cheng 
2166da14cebeSEric Cheng 	for (i = 3; i >= 0; i--) {
2167da14cebeSEric Cheng 		if (v6mask->s6_addr32[i] == 0) {
2168da14cebeSEric Cheng 			plen -= 32;
2169da14cebeSEric Cheng 			continue;
2170da14cebeSEric Cheng 		}
2171da14cebeSEric Cheng 		bits = ffs(ntohl(v6mask->s6_addr32[i])) - 1;
2172da14cebeSEric Cheng 		if (bits == 0)
2173da14cebeSEric Cheng 			break;
2174da14cebeSEric Cheng 		plen -= bits;
2175da14cebeSEric Cheng 	}
2176da14cebeSEric Cheng 	return (plen);
2177da14cebeSEric Cheng }
2178da14cebeSEric Cheng 
2179da14cebeSEric Cheng /* ARGSUSED */
2180da14cebeSEric Cheng static int
flow_ip_insert_fe(flow_tab_t * ft,flow_entry_t ** headp,flow_entry_t * flent)2181da14cebeSEric Cheng flow_ip_insert_fe(flow_tab_t *ft, flow_entry_t **headp,
2182da14cebeSEric Cheng     flow_entry_t *flent)
2183da14cebeSEric Cheng {
2184da14cebeSEric Cheng 	flow_entry_t	**p = headp;
2185da14cebeSEric Cheng 	flow_desc_t	*fd0, *fd;
2186da14cebeSEric Cheng 	in6_addr_t	*m0, *m;
2187da14cebeSEric Cheng 	int		plen0, plen;
2188da14cebeSEric Cheng 
2189da14cebeSEric Cheng 	ASSERT(MAC_PERIM_HELD((mac_handle_t)ft->ft_mip));
2190da14cebeSEric Cheng 
2191da14cebeSEric Cheng 	/*
2192da14cebeSEric Cheng 	 * No special ordering needed for dsfield.
2193da14cebeSEric Cheng 	 */
2194da14cebeSEric Cheng 	fd0 = &flent->fe_flow_desc;
2195da14cebeSEric Cheng 	if ((fd0->fd_mask & FLOW_IP_DSFIELD) != 0) {
2196da14cebeSEric Cheng 		if (*p != NULL) {
2197da14cebeSEric Cheng 			ASSERT(flent->fe_next == NULL);
2198da14cebeSEric Cheng 			flent->fe_next = *p;
2199da14cebeSEric Cheng 		}
2200da14cebeSEric Cheng 		*p = flent;
2201da14cebeSEric Cheng 		return (0);
2202da14cebeSEric Cheng 	}
2203da14cebeSEric Cheng 
2204da14cebeSEric Cheng 	/*
2205da14cebeSEric Cheng 	 * IP address flows are arranged in descending prefix length order.
2206da14cebeSEric Cheng 	 */
2207da14cebeSEric Cheng 	m0 = ((fd0->fd_mask & FLOW_IP_LOCAL) != 0) ?
2208da14cebeSEric Cheng 	    &fd0->fd_local_netmask : &fd0->fd_remote_netmask;
2209da14cebeSEric Cheng 	plen0 = flow_ip_mask2plen(m0);
2210da14cebeSEric Cheng 	ASSERT(plen0 != 0);
2211da14cebeSEric Cheng 
2212da14cebeSEric Cheng 	for (; *p != NULL; p = &(*p)->fe_next) {
2213da14cebeSEric Cheng 		fd = &(*p)->fe_flow_desc;
2214da14cebeSEric Cheng 
2215da14cebeSEric Cheng 		/*
2216da14cebeSEric Cheng 		 * Normally a dsfield flent shouldn't end up on the same
2217da14cebeSEric Cheng 		 * list as an IP address because flow tables are (for now)
2218da14cebeSEric Cheng 		 * disjoint. If we decide to support both IP and dsfield
2219da14cebeSEric Cheng 		 * in the same table in the future, this check will allow
2220da14cebeSEric Cheng 		 * for that.
2221da14cebeSEric Cheng 		 */
2222da14cebeSEric Cheng 		if ((fd->fd_mask & FLOW_IP_DSFIELD) != 0)
2223da14cebeSEric Cheng 			continue;
2224da14cebeSEric Cheng 
2225da14cebeSEric Cheng 		/*
2226da14cebeSEric Cheng 		 * We also allow for the mixing of local and remote address
2227da14cebeSEric Cheng 		 * flents within one list.
2228da14cebeSEric Cheng 		 */
2229da14cebeSEric Cheng 		m = ((fd->fd_mask & FLOW_IP_LOCAL) != 0) ?
2230da14cebeSEric Cheng 		    &fd->fd_local_netmask : &fd->fd_remote_netmask;
2231da14cebeSEric Cheng 		plen = flow_ip_mask2plen(m);
2232da14cebeSEric Cheng 
2233da14cebeSEric Cheng 		if (plen <= plen0)
2234da14cebeSEric Cheng 			break;
2235da14cebeSEric Cheng 	}
2236da14cebeSEric Cheng 	if (*p != NULL) {
2237da14cebeSEric Cheng 		ASSERT(flent->fe_next == NULL);
2238da14cebeSEric Cheng 		flent->fe_next = *p;
2239da14cebeSEric Cheng 	}
2240da14cebeSEric Cheng 	*p = flent;
2241da14cebeSEric Cheng 	return (0);
2242da14cebeSEric Cheng }
2243da14cebeSEric Cheng 
2244da14cebeSEric Cheng /*
2245da14cebeSEric Cheng  * Transport layer protocol and port matching functions.
2246da14cebeSEric Cheng  */
2247da14cebeSEric Cheng 
2248da14cebeSEric Cheng /* ARGSUSED */
2249da14cebeSEric Cheng static boolean_t
flow_transport_lport_match(flow_tab_t * ft,flow_entry_t * flent,flow_state_t * s)2250da14cebeSEric Cheng flow_transport_lport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
2251da14cebeSEric Cheng {
2252da14cebeSEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
2253da14cebeSEric Cheng 	flow_l4info_t	*l4info = &s->fs_l4info;
2254da14cebeSEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
2255da14cebeSEric Cheng 
2256da14cebeSEric Cheng 	return (fd->fd_protocol == l3info->l3_protocol &&
2257da14cebeSEric Cheng 	    fd->fd_local_port == l4info->l4_hash_port);
2258da14cebeSEric Cheng }
2259da14cebeSEric Cheng 
2260da14cebeSEric Cheng /* ARGSUSED */
2261da14cebeSEric Cheng static boolean_t
flow_transport_rport_match(flow_tab_t * ft,flow_entry_t * flent,flow_state_t * s)2262da14cebeSEric Cheng flow_transport_rport_match(flow_tab_t *ft, flow_entry_t *flent, flow_state_t *s)
2263da14cebeSEric Cheng {
2264da14cebeSEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
2265da14cebeSEric Cheng 	flow_l4info_t	*l4info = &s->fs_l4info;
2266da14cebeSEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
2267da14cebeSEric Cheng 
2268da14cebeSEric Cheng 	return (fd->fd_protocol == l3info->l3_protocol &&
2269da14cebeSEric Cheng 	    fd->fd_remote_port == l4info->l4_hash_port);
2270da14cebeSEric Cheng }
2271da14cebeSEric Cheng 
2272da14cebeSEric Cheng /*
2273da14cebeSEric Cheng  * Transport hash function.
2274da14cebeSEric Cheng  * Since we only support either local or remote port flows,
2275da14cebeSEric Cheng  * we only need to extract one of the ports to be used for
2276da14cebeSEric Cheng  * matching.
2277da14cebeSEric Cheng  */
2278da14cebeSEric Cheng static uint32_t
flow_transport_hash(flow_tab_t * ft,flow_state_t * s)2279da14cebeSEric Cheng flow_transport_hash(flow_tab_t *ft, flow_state_t *s)
2280da14cebeSEric Cheng {
2281da14cebeSEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
2282da14cebeSEric Cheng 	flow_l4info_t	*l4info = &s->fs_l4info;
2283da14cebeSEric Cheng 	uint8_t		proto = l3info->l3_protocol;
2284da14cebeSEric Cheng 	boolean_t	dst_or_src;
2285da14cebeSEric Cheng 
2286da14cebeSEric Cheng 	if ((ft->ft_mask & FLOW_ULP_PORT_LOCAL) != 0) {
2287da14cebeSEric Cheng 		dst_or_src = ((s->fs_flags & FLOW_INBOUND) != 0);
2288da14cebeSEric Cheng 	} else {
2289da14cebeSEric Cheng 		dst_or_src = ((s->fs_flags & FLOW_OUTBOUND) != 0);
2290da14cebeSEric Cheng 	}
2291da14cebeSEric Cheng 
2292da14cebeSEric Cheng 	l4info->l4_hash_port = dst_or_src ? l4info->l4_dst_port :
2293da14cebeSEric Cheng 	    l4info->l4_src_port;
2294da14cebeSEric Cheng 
2295da14cebeSEric Cheng 	return ((l4info->l4_hash_port ^ (proto << 4)) % ft->ft_size);
2296da14cebeSEric Cheng }
2297da14cebeSEric Cheng 
2298da14cebeSEric Cheng /*
2299da14cebeSEric Cheng  * Unlike other accept() functions above, we do not need to get the header
2300da14cebeSEric Cheng  * size because this is our highest layer so far. If we want to do support
2301da14cebeSEric Cheng  * other higher layer protocols, we would need to save the l4_hdrsize
2302da14cebeSEric Cheng  * in the code below.
2303da14cebeSEric Cheng  */
2304da14cebeSEric Cheng 
2305da14cebeSEric Cheng /* ARGSUSED */
2306da14cebeSEric Cheng static int
flow_transport_accept(flow_tab_t * ft,flow_state_t * s)2307da14cebeSEric Cheng flow_transport_accept(flow_tab_t *ft, flow_state_t *s)
2308da14cebeSEric Cheng {
2309da14cebeSEric Cheng 	flow_l3info_t	*l3info = &s->fs_l3info;
2310da14cebeSEric Cheng 	flow_l4info_t	*l4info = &s->fs_l4info;
2311da14cebeSEric Cheng 	uint8_t		proto = l3info->l3_protocol;
2312da14cebeSEric Cheng 	uchar_t		*l4_start;
2313da14cebeSEric Cheng 
2314ae6aa22aSVenugopal Iyer 	l4_start = l3info->l3_start + l3info->l3_hdrsize;
2315ae6aa22aSVenugopal Iyer 
2316ae6aa22aSVenugopal Iyer 	/*
2317ae6aa22aSVenugopal Iyer 	 * Adjust start pointer if we're at the end of an mblk.
2318ae6aa22aSVenugopal Iyer 	 */
2319ae6aa22aSVenugopal Iyer 	CHECK_AND_ADJUST_START_PTR(s, l4_start);
2320ae6aa22aSVenugopal Iyer 
2321ae6aa22aSVenugopal Iyer 	l4info->l4_start = l4_start;
2322da14cebeSEric Cheng 	if (!OK_32PTR(l4_start))
2323da14cebeSEric Cheng 		return (EINVAL);
2324da14cebeSEric Cheng 
2325da14cebeSEric Cheng 	if (l3info->l3_fragmented == B_TRUE)
2326da14cebeSEric Cheng 		return (EINVAL);
2327da14cebeSEric Cheng 
2328da14cebeSEric Cheng 	switch (proto) {
2329da14cebeSEric Cheng 	case IPPROTO_TCP: {
2330da14cebeSEric Cheng 		struct tcphdr	*tcph = (struct tcphdr *)l4_start;
2331da14cebeSEric Cheng 
2332da14cebeSEric Cheng 		if (PKT_TOO_SMALL(s, l4_start + sizeof (*tcph)))
2333da14cebeSEric Cheng 			return (ENOBUFS);
2334da14cebeSEric Cheng 
2335da14cebeSEric Cheng 		l4info->l4_src_port = tcph->th_sport;
2336da14cebeSEric Cheng 		l4info->l4_dst_port = tcph->th_dport;
2337da14cebeSEric Cheng 		break;
2338da14cebeSEric Cheng 	}
2339da14cebeSEric Cheng 	case IPPROTO_UDP: {
2340da14cebeSEric Cheng 		struct udphdr	*udph = (struct udphdr *)l4_start;
2341da14cebeSEric Cheng 
2342da14cebeSEric Cheng 		if (PKT_TOO_SMALL(s, l4_start + sizeof (*udph)))
2343da14cebeSEric Cheng 			return (ENOBUFS);
2344da14cebeSEric Cheng 
2345da14cebeSEric Cheng 		l4info->l4_src_port = udph->uh_sport;
2346da14cebeSEric Cheng 		l4info->l4_dst_port = udph->uh_dport;
2347da14cebeSEric Cheng 		break;
2348da14cebeSEric Cheng 	}
2349da14cebeSEric Cheng 	case IPPROTO_SCTP: {
2350da14cebeSEric Cheng 		sctp_hdr_t	*sctph = (sctp_hdr_t *)l4_start;
2351da14cebeSEric Cheng 
2352da14cebeSEric Cheng 		if (PKT_TOO_SMALL(s, l4_start + sizeof (*sctph)))
2353da14cebeSEric Cheng 			return (ENOBUFS);
2354da14cebeSEric Cheng 
2355da14cebeSEric Cheng 		l4info->l4_src_port = sctph->sh_sport;
2356da14cebeSEric Cheng 		l4info->l4_dst_port = sctph->sh_dport;
2357da14cebeSEric Cheng 		break;
2358da14cebeSEric Cheng 	}
2359da14cebeSEric Cheng 	default:
2360da14cebeSEric Cheng 		return (EINVAL);
2361da14cebeSEric Cheng 	}
2362da14cebeSEric Cheng 
2363da14cebeSEric Cheng 	return (0);
2364da14cebeSEric Cheng }
2365da14cebeSEric Cheng 
2366da14cebeSEric Cheng /*
2367da14cebeSEric Cheng  * Validates transport flow entry.
2368da14cebeSEric Cheng  * The protocol field must be present.
2369da14cebeSEric Cheng  */
2370da14cebeSEric Cheng 
2371da14cebeSEric Cheng /* ARGSUSED */
2372da14cebeSEric Cheng static int
flow_transport_accept_fe(flow_tab_t * ft,flow_entry_t * flent)2373da14cebeSEric Cheng flow_transport_accept_fe(flow_tab_t *ft, flow_entry_t *flent)
2374da14cebeSEric Cheng {
2375da14cebeSEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
2376da14cebeSEric Cheng 	flow_mask_t	mask = fd->fd_mask;
2377da14cebeSEric Cheng 
2378da14cebeSEric Cheng 	if ((mask & FLOW_IP_PROTOCOL) == 0)
2379da14cebeSEric Cheng 		return (EINVAL);
2380da14cebeSEric Cheng 
2381da14cebeSEric Cheng 	switch (fd->fd_protocol) {
2382da14cebeSEric Cheng 	case IPPROTO_TCP:
2383da14cebeSEric Cheng 	case IPPROTO_UDP:
2384da14cebeSEric Cheng 	case IPPROTO_SCTP:
2385da14cebeSEric Cheng 		break;
2386da14cebeSEric Cheng 	default:
2387da14cebeSEric Cheng 		return (EINVAL);
2388da14cebeSEric Cheng 	}
2389da14cebeSEric Cheng 
2390da14cebeSEric Cheng 	switch (mask & ~FLOW_IP_PROTOCOL) {
2391da14cebeSEric Cheng 	case FLOW_ULP_PORT_LOCAL:
2392da14cebeSEric Cheng 		if (fd->fd_local_port == 0)
2393da14cebeSEric Cheng 			return (EINVAL);
2394da14cebeSEric Cheng 
2395da14cebeSEric Cheng 		flent->fe_match = flow_transport_lport_match;
2396da14cebeSEric Cheng 		break;
2397da14cebeSEric Cheng 	case FLOW_ULP_PORT_REMOTE:
2398da14cebeSEric Cheng 		if (fd->fd_remote_port == 0)
2399da14cebeSEric Cheng 			return (EINVAL);
2400da14cebeSEric Cheng 
2401da14cebeSEric Cheng 		flent->fe_match = flow_transport_rport_match;
2402da14cebeSEric Cheng 		break;
2403da14cebeSEric Cheng 	case 0:
2404da14cebeSEric Cheng 		/*
2405da14cebeSEric Cheng 		 * transport-only flows conflicts with our table type.
2406da14cebeSEric Cheng 		 */
2407da14cebeSEric Cheng 		return (EOPNOTSUPP);
2408da14cebeSEric Cheng 	default:
2409da14cebeSEric Cheng 		return (EINVAL);
2410da14cebeSEric Cheng 	}
2411da14cebeSEric Cheng 
2412da14cebeSEric Cheng 	return (0);
2413da14cebeSEric Cheng }
2414da14cebeSEric Cheng 
2415da14cebeSEric Cheng static uint32_t
flow_transport_hash_fe(flow_tab_t * ft,flow_entry_t * flent)2416da14cebeSEric Cheng flow_transport_hash_fe(flow_tab_t *ft, flow_entry_t *flent)
2417da14cebeSEric Cheng {
2418da14cebeSEric Cheng 	flow_desc_t	*fd = &flent->fe_flow_desc;
2419da14cebeSEric Cheng 	uint16_t	port = 0;
2420da14cebeSEric Cheng 
2421da14cebeSEric Cheng 	port = ((fd->fd_mask & FLOW_ULP_PORT_LOCAL) != 0) ?
2422da14cebeSEric Cheng 	    fd->fd_local_port : fd->fd_remote_port;
2423da14cebeSEric Cheng 
2424da14cebeSEric Cheng 	return ((port ^ (fd->fd_protocol << 4)) % ft->ft_size);
2425da14cebeSEric Cheng }
2426da14cebeSEric Cheng 
2427da14cebeSEric Cheng /* ARGSUSED */
2428da14cebeSEric Cheng static boolean_t
flow_transport_match_fe(flow_tab_t * ft,flow_entry_t * f1,flow_entry_t * f2)2429da14cebeSEric Cheng flow_transport_match_fe(flow_tab_t *ft, flow_entry_t *f1, flow_entry_t *f2)
2430da14cebeSEric Cheng {
2431da14cebeSEric Cheng 	flow_desc_t	*fd1 = &f1->fe_flow_desc, *fd2 = &f2->fe_flow_desc;
2432da14cebeSEric Cheng 
2433da14cebeSEric Cheng 	if (fd1->fd_protocol != fd2->fd_protocol)
2434da14cebeSEric Cheng 		return (B_FALSE);
2435da14cebeSEric Cheng 
2436da14cebeSEric Cheng 	if ((fd1->fd_mask & FLOW_ULP_PORT_LOCAL) != 0)
2437da14cebeSEric Cheng 		return (fd1->fd_local_port == fd2->fd_local_port);
2438da14cebeSEric Cheng 
243925ec3e3dSEric Cheng 	if ((fd1->fd_mask & FLOW_ULP_PORT_REMOTE) != 0)
244025ec3e3dSEric Cheng 		return (fd1->fd_remote_port == fd2->fd_remote_port);
244125ec3e3dSEric Cheng 
244225ec3e3dSEric Cheng 	return (B_TRUE);
2443da14cebeSEric Cheng }
2444da14cebeSEric Cheng 
2445da14cebeSEric Cheng static flow_ops_t flow_l2_ops = {
2446da14cebeSEric Cheng 	flow_l2_accept_fe,
2447da14cebeSEric Cheng 	flow_l2_hash_fe,
2448da14cebeSEric Cheng 	flow_l2_match_fe,
2449da14cebeSEric Cheng 	flow_generic_insert_fe,
2450da14cebeSEric Cheng 	flow_l2_hash,
2451da14cebeSEric Cheng 	{flow_l2_accept}
2452da14cebeSEric Cheng };
2453da14cebeSEric Cheng 
2454da14cebeSEric Cheng static flow_ops_t flow_ip_ops = {
2455da14cebeSEric Cheng 	flow_ip_accept_fe,
2456da14cebeSEric Cheng 	flow_ip_hash_fe,
2457da14cebeSEric Cheng 	flow_ip_match_fe,
2458da14cebeSEric Cheng 	flow_ip_insert_fe,
2459da14cebeSEric Cheng 	flow_ip_hash,
2460da14cebeSEric Cheng 	{flow_l2_accept, flow_ip_accept}
2461da14cebeSEric Cheng };
2462da14cebeSEric Cheng 
2463da14cebeSEric Cheng static flow_ops_t flow_ip_proto_ops = {
2464da14cebeSEric Cheng 	flow_ip_proto_accept_fe,
2465da14cebeSEric Cheng 	flow_ip_proto_hash_fe,
2466da14cebeSEric Cheng 	flow_ip_proto_match_fe,
2467da14cebeSEric Cheng 	flow_generic_insert_fe,
2468da14cebeSEric Cheng 	flow_ip_proto_hash,
2469da14cebeSEric Cheng 	{flow_l2_accept, flow_ip_accept}
2470da14cebeSEric Cheng };
2471da14cebeSEric Cheng 
2472da14cebeSEric Cheng static flow_ops_t flow_transport_ops = {
2473da14cebeSEric Cheng 	flow_transport_accept_fe,
2474da14cebeSEric Cheng 	flow_transport_hash_fe,
2475da14cebeSEric Cheng 	flow_transport_match_fe,
2476da14cebeSEric Cheng 	flow_generic_insert_fe,
2477da14cebeSEric Cheng 	flow_transport_hash,
2478da14cebeSEric Cheng 	{flow_l2_accept, flow_ip_accept, flow_transport_accept}
2479da14cebeSEric Cheng };
2480da14cebeSEric Cheng 
2481da14cebeSEric Cheng static flow_tab_info_t flow_tab_info_list[] = {
2482da14cebeSEric Cheng 	{&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_LOCAL, 2},
2483da14cebeSEric Cheng 	{&flow_ip_ops, FLOW_IP_VERSION | FLOW_IP_REMOTE, 2},
2484da14cebeSEric Cheng 	{&flow_ip_ops, FLOW_IP_DSFIELD, 1},
2485da14cebeSEric Cheng 	{&flow_ip_proto_ops, FLOW_IP_PROTOCOL, 256},
248625ec3e3dSEric Cheng 	{&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_LOCAL, 1024},
248725ec3e3dSEric Cheng 	{&flow_transport_ops, FLOW_IP_PROTOCOL | FLOW_ULP_PORT_REMOTE, 1024}
2488da14cebeSEric Cheng };
2489da14cebeSEric Cheng 
2490da14cebeSEric Cheng #define	FLOW_MAX_TAB_INFO \
2491da14cebeSEric Cheng 	((sizeof (flow_tab_info_list)) / sizeof (flow_tab_info_t))
2492da14cebeSEric Cheng 
2493da14cebeSEric Cheng static flow_tab_info_t *
mac_flow_tab_info_get(flow_mask_t mask)2494da14cebeSEric Cheng mac_flow_tab_info_get(flow_mask_t mask)
2495da14cebeSEric Cheng {
2496da14cebeSEric Cheng 	int	i;
2497da14cebeSEric Cheng 
2498da14cebeSEric Cheng 	for (i = 0; i < FLOW_MAX_TAB_INFO; i++) {
2499da14cebeSEric Cheng 		if (mask == flow_tab_info_list[i].fti_mask)
2500da14cebeSEric Cheng 			return (&flow_tab_info_list[i]);
2501da14cebeSEric Cheng 	}
2502da14cebeSEric Cheng 	return (NULL);
2503da14cebeSEric Cheng }
2504