xref: /illumos-gate/usr/src/uts/common/io/mac/mac_util.c (revision c61a1653)
1da14cebeSEric Cheng /*
2da14cebeSEric Cheng  * CDDL HEADER START
3da14cebeSEric Cheng  *
4da14cebeSEric Cheng  * The contents of this file are subject to the terms of the
5da14cebeSEric Cheng  * Common Development and Distribution License (the "License").
6da14cebeSEric Cheng  * You may not use this file except in compliance with the License.
7da14cebeSEric Cheng  *
8da14cebeSEric Cheng  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9da14cebeSEric Cheng  * or http://www.opensolaris.org/os/licensing.
10da14cebeSEric Cheng  * See the License for the specific language governing permissions
11da14cebeSEric Cheng  * and limitations under the License.
12da14cebeSEric Cheng  *
13da14cebeSEric Cheng  * When distributing Covered Code, include this CDDL HEADER in each
14da14cebeSEric Cheng  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15da14cebeSEric Cheng  * If applicable, add the following below this CDDL HEADER, with the
16da14cebeSEric Cheng  * fields enclosed by brackets "[]" replaced with your own identifying
17da14cebeSEric Cheng  * information: Portions Copyright [yyyy] [name of copyright owner]
18da14cebeSEric Cheng  *
19da14cebeSEric Cheng  * CDDL HEADER END
20da14cebeSEric Cheng  */
21da14cebeSEric Cheng /*
225cd376e8SJimmy Vetayases  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23*c61a1653SRyan Zezeski  * Copyright 2019 Joyent, Inc.
24da14cebeSEric Cheng  */
25da14cebeSEric Cheng 
26da14cebeSEric Cheng /*
27da14cebeSEric Cheng  * MAC Services Module - misc utilities
28da14cebeSEric Cheng  */
29da14cebeSEric Cheng 
30da14cebeSEric Cheng #include <sys/types.h>
31da14cebeSEric Cheng #include <sys/mac.h>
32da14cebeSEric Cheng #include <sys/mac_impl.h>
33da14cebeSEric Cheng #include <sys/mac_client_priv.h>
34da14cebeSEric Cheng #include <sys/mac_client_impl.h>
35da14cebeSEric Cheng #include <sys/mac_soft_ring.h>
36da14cebeSEric Cheng #include <sys/strsubr.h>
37da14cebeSEric Cheng #include <sys/strsun.h>
38da14cebeSEric Cheng #include <sys/vlan.h>
39da14cebeSEric Cheng #include <sys/pattr.h>
40da14cebeSEric Cheng #include <sys/pci_tools.h>
41da14cebeSEric Cheng #include <inet/ip.h>
42da14cebeSEric Cheng #include <inet/ip_impl.h>
43da14cebeSEric Cheng #include <inet/ip6.h>
44da14cebeSEric Cheng #include <sys/vtrace.h>
45da14cebeSEric Cheng #include <sys/dlpi.h>
46da14cebeSEric Cheng #include <sys/sunndi.h>
47ae6aa22aSVenugopal Iyer #include <inet/ipsec_impl.h>
48ae6aa22aSVenugopal Iyer #include <inet/sadb.h>
49ae6aa22aSVenugopal Iyer #include <inet/ipsecesp.h>
50ae6aa22aSVenugopal Iyer #include <inet/ipsecah.h>
51*c61a1653SRyan Zezeski #include <inet/tcp.h>
52*c61a1653SRyan Zezeski #include <inet/udp_impl.h>
53*c61a1653SRyan Zezeski #include <inet/sctp_ip.h>
54*c61a1653SRyan Zezeski 
55*c61a1653SRyan Zezeski /*
56*c61a1653SRyan Zezeski  * The next two functions are used for dropping packets or chains of
57*c61a1653SRyan Zezeski  * packets, respectively. We could use one function for both but
58*c61a1653SRyan Zezeski  * separating the use cases allows us to specify intent and prevent
59*c61a1653SRyan Zezeski  * dropping more data than intended.
60*c61a1653SRyan Zezeski  *
61*c61a1653SRyan Zezeski  * The purpose of these functions is to aid the debugging effort,
62*c61a1653SRyan Zezeski  * especially in production. Rather than use freemsg()/freemsgchain(),
63*c61a1653SRyan Zezeski  * it's preferable to use these functions when dropping a packet in
64*c61a1653SRyan Zezeski  * the MAC layer. These functions should only be used during
65*c61a1653SRyan Zezeski  * unexpected conditions. That is, any time a packet is dropped
66*c61a1653SRyan Zezeski  * outside of the regular, successful datapath. Consolidating all
67*c61a1653SRyan Zezeski  * drops on these functions allows the user to trace one location and
68*c61a1653SRyan Zezeski  * determine why the packet was dropped based on the msg. It also
69*c61a1653SRyan Zezeski  * allows the user to inspect the packet before it is freed. Finally,
70*c61a1653SRyan Zezeski  * it allows the user to avoid tracing freemsg()/freemsgchain() thus
71*c61a1653SRyan Zezeski  * keeping the hot path running as efficiently as possible.
72*c61a1653SRyan Zezeski  *
73*c61a1653SRyan Zezeski  * NOTE: At this time not all MAC drops are aggregated on these
74*c61a1653SRyan Zezeski  * functions; but that is the plan. This comment should be erased once
75*c61a1653SRyan Zezeski  * completed.
76*c61a1653SRyan Zezeski  */
77*c61a1653SRyan Zezeski 
78*c61a1653SRyan Zezeski /*PRINTFLIKE2*/
79*c61a1653SRyan Zezeski void
mac_drop_pkt(mblk_t * mp,const char * fmt,...)80*c61a1653SRyan Zezeski mac_drop_pkt(mblk_t *mp, const char *fmt, ...)
81*c61a1653SRyan Zezeski {
82*c61a1653SRyan Zezeski 	va_list adx;
83*c61a1653SRyan Zezeski 	char msg[128];
84*c61a1653SRyan Zezeski 	char *msgp = msg;
85*c61a1653SRyan Zezeski 
86*c61a1653SRyan Zezeski 	ASSERT3P(mp->b_next, ==, NULL);
87*c61a1653SRyan Zezeski 
88*c61a1653SRyan Zezeski 	va_start(adx, fmt);
89*c61a1653SRyan Zezeski 	(void) vsnprintf(msgp, sizeof (msg), fmt, adx);
90*c61a1653SRyan Zezeski 	va_end(adx);
91*c61a1653SRyan Zezeski 
92*c61a1653SRyan Zezeski 	DTRACE_PROBE2(mac__drop, mblk_t *, mp, char *, msgp);
93*c61a1653SRyan Zezeski 	freemsg(mp);
94*c61a1653SRyan Zezeski }
95*c61a1653SRyan Zezeski 
96*c61a1653SRyan Zezeski /*PRINTFLIKE2*/
97*c61a1653SRyan Zezeski void
mac_drop_chain(mblk_t * chain,const char * fmt,...)98*c61a1653SRyan Zezeski mac_drop_chain(mblk_t *chain, const char *fmt, ...)
99*c61a1653SRyan Zezeski {
100*c61a1653SRyan Zezeski 	va_list adx;
101*c61a1653SRyan Zezeski 	char msg[128];
102*c61a1653SRyan Zezeski 	char *msgp = msg;
103*c61a1653SRyan Zezeski 
104*c61a1653SRyan Zezeski 	va_start(adx, fmt);
105*c61a1653SRyan Zezeski 	(void) vsnprintf(msgp, sizeof (msg), fmt, adx);
106*c61a1653SRyan Zezeski 	va_end(adx);
107*c61a1653SRyan Zezeski 
108*c61a1653SRyan Zezeski 	/*
109*c61a1653SRyan Zezeski 	 * We could use freemsgchain() for the actual freeing but
110*c61a1653SRyan Zezeski 	 * since we are already walking the chain to fire the dtrace
111*c61a1653SRyan Zezeski 	 * probe we might as well free the msg here too.
112*c61a1653SRyan Zezeski 	 */
113*c61a1653SRyan Zezeski 	for (mblk_t *mp = chain, *next; mp != NULL; ) {
114*c61a1653SRyan Zezeski 		next = mp->b_next;
115*c61a1653SRyan Zezeski 		DTRACE_PROBE2(mac__drop, mblk_t *, mp, char *, msgp);
116*c61a1653SRyan Zezeski 		freemsg(mp);
117*c61a1653SRyan Zezeski 		mp = next;
118*c61a1653SRyan Zezeski 	}
119*c61a1653SRyan Zezeski }
120da14cebeSEric Cheng 
121da14cebeSEric Cheng /*
122da14cebeSEric Cheng  * Copy an mblk, preserving its hardware checksum flags.
123da14cebeSEric Cheng  */
124da14cebeSEric Cheng static mblk_t *
mac_copymsg_cksum(mblk_t * mp)125da14cebeSEric Cheng mac_copymsg_cksum(mblk_t *mp)
126da14cebeSEric Cheng {
127da14cebeSEric Cheng 	mblk_t *mp1;
128da14cebeSEric Cheng 
129da14cebeSEric Cheng 	mp1 = copymsg(mp);
130da14cebeSEric Cheng 	if (mp1 == NULL)
131da14cebeSEric Cheng 		return (NULL);
132da14cebeSEric Cheng 
133ec71f88eSPatrick Mooney 	mac_hcksum_clone(mp, mp1);
134da14cebeSEric Cheng 
135da14cebeSEric Cheng 	return (mp1);
136da14cebeSEric Cheng }
137da14cebeSEric Cheng 
138da14cebeSEric Cheng /*
139da14cebeSEric Cheng  * Copy an mblk chain, presenting the hardware checksum flags of the
140da14cebeSEric Cheng  * individual mblks.
141da14cebeSEric Cheng  */
142da14cebeSEric Cheng mblk_t *
mac_copymsgchain_cksum(mblk_t * mp)143da14cebeSEric Cheng mac_copymsgchain_cksum(mblk_t *mp)
144da14cebeSEric Cheng {
145da14cebeSEric Cheng 	mblk_t *nmp = NULL;
146da14cebeSEric Cheng 	mblk_t **nmpp = &nmp;
147da14cebeSEric Cheng 
148da14cebeSEric Cheng 	for (; mp != NULL; mp = mp->b_next) {
149da14cebeSEric Cheng 		if ((*nmpp = mac_copymsg_cksum(mp)) == NULL) {
150da14cebeSEric Cheng 			freemsgchain(nmp);
151da14cebeSEric Cheng 			return (NULL);
152da14cebeSEric Cheng 		}
153da14cebeSEric Cheng 
154da14cebeSEric Cheng 		nmpp = &((*nmpp)->b_next);
155da14cebeSEric Cheng 	}
156da14cebeSEric Cheng 
157da14cebeSEric Cheng 	return (nmp);
158da14cebeSEric Cheng }
159da14cebeSEric Cheng 
160da14cebeSEric Cheng /*
161*c61a1653SRyan Zezeski  * Calculate the ULP checksum for IPv4. Return true if the calculation
162*c61a1653SRyan Zezeski  * was successful, or false if an error occurred. If the later, place
163*c61a1653SRyan Zezeski  * an error message into '*err'.
164da14cebeSEric Cheng  */
165*c61a1653SRyan Zezeski static boolean_t
mac_sw_cksum_ipv4(mblk_t * mp,uint32_t ip_hdr_offset,ipha_t * ipha,const char ** err)166*c61a1653SRyan Zezeski mac_sw_cksum_ipv4(mblk_t *mp, uint32_t ip_hdr_offset, ipha_t *ipha,
167*c61a1653SRyan Zezeski     const char **err)
168*c61a1653SRyan Zezeski {
169*c61a1653SRyan Zezeski 	const uint8_t proto = ipha->ipha_protocol;
170*c61a1653SRyan Zezeski 	size_t len;
171*c61a1653SRyan Zezeski 	const uint32_t ip_hdr_sz = IPH_HDR_LENGTH(ipha);
172*c61a1653SRyan Zezeski 	/* ULP offset from start of L2. */
173*c61a1653SRyan Zezeski 	const uint32_t ulp_offset = ip_hdr_offset + ip_hdr_sz;
174*c61a1653SRyan Zezeski 	ipaddr_t src, dst;
175*c61a1653SRyan Zezeski 	uint32_t cksum;
176*c61a1653SRyan Zezeski 	uint16_t *up;
177*c61a1653SRyan Zezeski 
178*c61a1653SRyan Zezeski 	/*
179*c61a1653SRyan Zezeski 	 * We need a pointer to the ULP checksum. We're assuming the
180*c61a1653SRyan Zezeski 	 * ULP checksum pointer resides in the first mblk. Our native
181*c61a1653SRyan Zezeski 	 * TCP stack should always put the headers in the first mblk,
182*c61a1653SRyan Zezeski 	 * but currently we have no way to guarantee that other
183*c61a1653SRyan Zezeski 	 * clients don't spread headers (or even header fields) across
184*c61a1653SRyan Zezeski 	 * mblks.
185*c61a1653SRyan Zezeski 	 */
186*c61a1653SRyan Zezeski 	switch (proto) {
187*c61a1653SRyan Zezeski 	case IPPROTO_TCP:
188*c61a1653SRyan Zezeski 		ASSERT3U(MBLKL(mp), >=, (ulp_offset + sizeof (tcph_t)));
189*c61a1653SRyan Zezeski 		if (MBLKL(mp) < (ulp_offset + sizeof (tcph_t))) {
190*c61a1653SRyan Zezeski 			*err = "mblk doesn't contain TCP header";
191*c61a1653SRyan Zezeski 			goto bail;
192*c61a1653SRyan Zezeski 		}
193*c61a1653SRyan Zezeski 
194*c61a1653SRyan Zezeski 		up = IPH_TCPH_CHECKSUMP(ipha, ip_hdr_sz);
195*c61a1653SRyan Zezeski 		cksum = IP_TCP_CSUM_COMP;
196*c61a1653SRyan Zezeski 		break;
197*c61a1653SRyan Zezeski 
198*c61a1653SRyan Zezeski 	case IPPROTO_UDP:
199*c61a1653SRyan Zezeski 		ASSERT3U(MBLKL(mp), >=, (ulp_offset + sizeof (udpha_t)));
200*c61a1653SRyan Zezeski 		if (MBLKL(mp) < (ulp_offset + sizeof (udpha_t))) {
201*c61a1653SRyan Zezeski 			*err = "mblk doesn't contain UDP header";
202*c61a1653SRyan Zezeski 			goto bail;
203*c61a1653SRyan Zezeski 		}
204*c61a1653SRyan Zezeski 
205*c61a1653SRyan Zezeski 		up = IPH_UDPH_CHECKSUMP(ipha, ip_hdr_sz);
206*c61a1653SRyan Zezeski 		cksum = IP_UDP_CSUM_COMP;
207*c61a1653SRyan Zezeski 		break;
208*c61a1653SRyan Zezeski 
209*c61a1653SRyan Zezeski 	case IPPROTO_SCTP: {
210*c61a1653SRyan Zezeski 		sctp_hdr_t *sctph;
211*c61a1653SRyan Zezeski 
212*c61a1653SRyan Zezeski 		ASSERT3U(MBLKL(mp), >=, (ulp_offset + sizeof (sctp_hdr_t)));
213*c61a1653SRyan Zezeski 		if (MBLKL(mp) < (ulp_offset + sizeof (sctp_hdr_t))) {
214*c61a1653SRyan Zezeski 			*err = "mblk doesn't contain SCTP header";
215*c61a1653SRyan Zezeski 			goto bail;
216*c61a1653SRyan Zezeski 		}
217*c61a1653SRyan Zezeski 
218*c61a1653SRyan Zezeski 		sctph = (sctp_hdr_t *)(mp->b_rptr + ulp_offset);
219*c61a1653SRyan Zezeski 		sctph->sh_chksum = 0;
220*c61a1653SRyan Zezeski 		sctph->sh_chksum = sctp_cksum(mp, ulp_offset);
221*c61a1653SRyan Zezeski 		return (B_TRUE);
222*c61a1653SRyan Zezeski 	}
223*c61a1653SRyan Zezeski 
224*c61a1653SRyan Zezeski 	default:
225*c61a1653SRyan Zezeski 		*err = "unexpected protocol";
226*c61a1653SRyan Zezeski 		goto bail;
227*c61a1653SRyan Zezeski 
228*c61a1653SRyan Zezeski 	}
229*c61a1653SRyan Zezeski 
230*c61a1653SRyan Zezeski 	/* Pseudo-header checksum. */
231*c61a1653SRyan Zezeski 	src = ipha->ipha_src;
232*c61a1653SRyan Zezeski 	dst = ipha->ipha_dst;
233*c61a1653SRyan Zezeski 	len = ntohs(ipha->ipha_length) - ip_hdr_sz;
234*c61a1653SRyan Zezeski 
235*c61a1653SRyan Zezeski 	cksum += (dst >> 16) + (dst & 0xFFFF) + (src >> 16) + (src & 0xFFFF);
236*c61a1653SRyan Zezeski 	cksum += htons(len);
237*c61a1653SRyan Zezeski 
238*c61a1653SRyan Zezeski 	/*
239*c61a1653SRyan Zezeski 	 * We have already accounted for the pseudo checksum above.
240*c61a1653SRyan Zezeski 	 * Make sure the ULP checksum field is zero before computing
241*c61a1653SRyan Zezeski 	 * the rest.
242*c61a1653SRyan Zezeski 	 */
243*c61a1653SRyan Zezeski 	*up = 0;
244*c61a1653SRyan Zezeski 	cksum = IP_CSUM(mp, ulp_offset, cksum);
245*c61a1653SRyan Zezeski 	*up = (uint16_t)(cksum ? cksum : ~cksum);
246*c61a1653SRyan Zezeski 
247*c61a1653SRyan Zezeski 	return (B_TRUE);
248*c61a1653SRyan Zezeski 
249*c61a1653SRyan Zezeski bail:
250*c61a1653SRyan Zezeski 	return (B_FALSE);
251*c61a1653SRyan Zezeski }
252*c61a1653SRyan Zezeski 
253*c61a1653SRyan Zezeski /*
254*c61a1653SRyan Zezeski  * Calculate the ULP checksum for IPv6. Return true if the calculation
255*c61a1653SRyan Zezeski  * was successful, or false if an error occurred. If the later, place
256*c61a1653SRyan Zezeski  * an error message into '*err'.
257*c61a1653SRyan Zezeski  */
258*c61a1653SRyan Zezeski static boolean_t
mac_sw_cksum_ipv6(mblk_t * mp,uint32_t ip_hdr_offset,const char ** err)259*c61a1653SRyan Zezeski mac_sw_cksum_ipv6(mblk_t *mp, uint32_t ip_hdr_offset, const char **err)
260da14cebeSEric Cheng {
261*c61a1653SRyan Zezeski 	ip6_t *ip6h = (ip6_t *)(mp->b_rptr + ip_hdr_offset);
262*c61a1653SRyan Zezeski 	const uint8_t proto = ip6h->ip6_nxt;
263*c61a1653SRyan Zezeski 	const uint16_t *iphs = (uint16_t *)ip6h;
264*c61a1653SRyan Zezeski 	/* ULP offset from start of L2. */
265*c61a1653SRyan Zezeski 	uint32_t ulp_offset;
266*c61a1653SRyan Zezeski 	size_t len;
267*c61a1653SRyan Zezeski 	uint32_t cksum;
268*c61a1653SRyan Zezeski 	uint16_t *up;
269*c61a1653SRyan Zezeski 	uint16_t ip_hdr_sz;
270*c61a1653SRyan Zezeski 
271*c61a1653SRyan Zezeski 	if (!ip_hdr_length_nexthdr_v6(mp, ip6h, &ip_hdr_sz, NULL)) {
272*c61a1653SRyan Zezeski 		*err = "malformed IPv6 header";
273*c61a1653SRyan Zezeski 		goto bail;
274*c61a1653SRyan Zezeski 	}
275*c61a1653SRyan Zezeski 
276*c61a1653SRyan Zezeski 	ulp_offset = ip_hdr_offset + ip_hdr_sz;
277*c61a1653SRyan Zezeski 
278*c61a1653SRyan Zezeski 	/*
279*c61a1653SRyan Zezeski 	 * We need a pointer to the ULP checksum. We're assuming the
280*c61a1653SRyan Zezeski 	 * ULP checksum pointer resides in the first mblk. Our native
281*c61a1653SRyan Zezeski 	 * TCP stack should always put the headers in the first mblk,
282*c61a1653SRyan Zezeski 	 * but currently we have no way to guarantee that other
283*c61a1653SRyan Zezeski 	 * clients don't spread headers (or even header fields) across
284*c61a1653SRyan Zezeski 	 * mblks.
285*c61a1653SRyan Zezeski 	 */
286*c61a1653SRyan Zezeski 	switch (proto) {
287*c61a1653SRyan Zezeski 	case IPPROTO_TCP:
288*c61a1653SRyan Zezeski 		ASSERT3U(MBLKL(mp), >=, (ulp_offset + sizeof (tcph_t)));
289*c61a1653SRyan Zezeski 		if (MBLKL(mp) < (ulp_offset + sizeof (tcph_t))) {
290*c61a1653SRyan Zezeski 			*err = "mblk doesn't contain TCP header";
291*c61a1653SRyan Zezeski 			goto bail;
292*c61a1653SRyan Zezeski 		}
293*c61a1653SRyan Zezeski 
294*c61a1653SRyan Zezeski 		up = IPH_TCPH_CHECKSUMP(ip6h, ip_hdr_sz);
295*c61a1653SRyan Zezeski 		cksum = IP_TCP_CSUM_COMP;
296*c61a1653SRyan Zezeski 		break;
297*c61a1653SRyan Zezeski 
298*c61a1653SRyan Zezeski 	case IPPROTO_UDP:
299*c61a1653SRyan Zezeski 		ASSERT3U(MBLKL(mp), >=, (ulp_offset + sizeof (udpha_t)));
300*c61a1653SRyan Zezeski 		if (MBLKL(mp) < (ulp_offset + sizeof (udpha_t))) {
301*c61a1653SRyan Zezeski 			*err = "mblk doesn't contain UDP header";
302*c61a1653SRyan Zezeski 			goto bail;
303*c61a1653SRyan Zezeski 		}
304*c61a1653SRyan Zezeski 
305*c61a1653SRyan Zezeski 		up = IPH_UDPH_CHECKSUMP(ip6h, ip_hdr_sz);
306*c61a1653SRyan Zezeski 		cksum = IP_UDP_CSUM_COMP;
307*c61a1653SRyan Zezeski 		break;
308*c61a1653SRyan Zezeski 
309*c61a1653SRyan Zezeski 	case IPPROTO_SCTP: {
310*c61a1653SRyan Zezeski 		sctp_hdr_t *sctph;
311*c61a1653SRyan Zezeski 
312*c61a1653SRyan Zezeski 		ASSERT3U(MBLKL(mp), >=, (ulp_offset + sizeof (sctp_hdr_t)));
313*c61a1653SRyan Zezeski 		if (MBLKL(mp) < (ulp_offset + sizeof (sctp_hdr_t))) {
314*c61a1653SRyan Zezeski 			*err = "mblk doesn't contain SCTP header";
315*c61a1653SRyan Zezeski 			goto bail;
316*c61a1653SRyan Zezeski 		}
317*c61a1653SRyan Zezeski 
318*c61a1653SRyan Zezeski 		sctph = (sctp_hdr_t *)(mp->b_rptr + ulp_offset);
319*c61a1653SRyan Zezeski 		/*
320*c61a1653SRyan Zezeski 		 * Zero out the checksum field to ensure proper
321*c61a1653SRyan Zezeski 		 * checksum calculation.
322*c61a1653SRyan Zezeski 		 */
323*c61a1653SRyan Zezeski 		sctph->sh_chksum = 0;
324*c61a1653SRyan Zezeski 		sctph->sh_chksum = sctp_cksum(mp, ulp_offset);
325*c61a1653SRyan Zezeski 		return (B_TRUE);
326*c61a1653SRyan Zezeski 	}
327*c61a1653SRyan Zezeski 
328*c61a1653SRyan Zezeski 	default:
329*c61a1653SRyan Zezeski 		*err = "unexpected protocol";
330*c61a1653SRyan Zezeski 		goto bail;
331*c61a1653SRyan Zezeski 	}
332*c61a1653SRyan Zezeski 
333*c61a1653SRyan Zezeski 	/*
334*c61a1653SRyan Zezeski 	 * The payload length includes the payload and the IPv6
335*c61a1653SRyan Zezeski 	 * extension headers; the idea is to subtract the extension
336*c61a1653SRyan Zezeski 	 * header length to get the real payload length.
337*c61a1653SRyan Zezeski 	 */
338*c61a1653SRyan Zezeski 	len = ntohs(ip6h->ip6_plen) - (ip_hdr_sz - IPV6_HDR_LEN);
339*c61a1653SRyan Zezeski 	cksum += len;
340*c61a1653SRyan Zezeski 
341*c61a1653SRyan Zezeski 	/*
342*c61a1653SRyan Zezeski 	 * We accumulate the pseudo header checksum in cksum; then we
343*c61a1653SRyan Zezeski 	 * call IP_CSUM to compute the checksum over the payload.
344*c61a1653SRyan Zezeski 	 */
345*c61a1653SRyan Zezeski 	cksum += iphs[4] + iphs[5] + iphs[6] + iphs[7] + iphs[8] + iphs[9] +
346*c61a1653SRyan Zezeski 	    iphs[10] + iphs[11] + iphs[12] + iphs[13] + iphs[14] + iphs[15] +
347*c61a1653SRyan Zezeski 	    iphs[16] + iphs[17] + iphs[18] + iphs[19];
348*c61a1653SRyan Zezeski 	cksum = IP_CSUM(mp, ulp_offset, cksum);
349*c61a1653SRyan Zezeski 
350*c61a1653SRyan Zezeski 	/* For UDP/IPv6 a zero UDP checksum is not allowed. Change to 0xffff */
351*c61a1653SRyan Zezeski 	if (proto == IPPROTO_UDP && cksum == 0)
352*c61a1653SRyan Zezeski 		cksum = ~cksum;
353*c61a1653SRyan Zezeski 
354*c61a1653SRyan Zezeski 	*up = (uint16_t)cksum;
355*c61a1653SRyan Zezeski 
356*c61a1653SRyan Zezeski 	return (B_TRUE);
357*c61a1653SRyan Zezeski 
358*c61a1653SRyan Zezeski bail:
359*c61a1653SRyan Zezeski 	return (B_FALSE);
360*c61a1653SRyan Zezeski }
361*c61a1653SRyan Zezeski 
362*c61a1653SRyan Zezeski /*
363*c61a1653SRyan Zezeski  * Perform software checksum on a single message, if needed. The
364*c61a1653SRyan Zezeski  * emulation performed is determined by an intersection of the mblk's
365*c61a1653SRyan Zezeski  * flags and the emul flags requested. The emul flags are documented
366*c61a1653SRyan Zezeski  * in mac.h.
367*c61a1653SRyan Zezeski  */
368*c61a1653SRyan Zezeski static mblk_t *
mac_sw_cksum(mblk_t * mp,mac_emul_t emul)369*c61a1653SRyan Zezeski mac_sw_cksum(mblk_t *mp, mac_emul_t emul)
370*c61a1653SRyan Zezeski {
371*c61a1653SRyan Zezeski 	mblk_t *skipped_hdr = NULL;
372da14cebeSEric Cheng 	uint32_t flags, start, stuff, end, value;
373*c61a1653SRyan Zezeski 	uint32_t ip_hdr_offset;
374*c61a1653SRyan Zezeski 	uint16_t etype;
375*c61a1653SRyan Zezeski 	size_t ip_hdr_sz;
376*c61a1653SRyan Zezeski 	struct ether_header *ehp;
377*c61a1653SRyan Zezeski 	const char *err = "";
378da14cebeSEric Cheng 
379*c61a1653SRyan Zezeski 	/*
380*c61a1653SRyan Zezeski 	 * This function should only be called from mac_hw_emul()
381*c61a1653SRyan Zezeski 	 * which handles mblk chains and the shared ref case.
382*c61a1653SRyan Zezeski 	 */
383*c61a1653SRyan Zezeski 	ASSERT3P(mp->b_next, ==, NULL);
384da14cebeSEric Cheng 
385*c61a1653SRyan Zezeski 	mac_hcksum_get(mp, &start, &stuff, &end, &value, NULL);
386*c61a1653SRyan Zezeski 
387*c61a1653SRyan Zezeski 	flags = DB_CKSUMFLAGS(mp);
388*c61a1653SRyan Zezeski 
389*c61a1653SRyan Zezeski 	/* Why call this if checksum emulation isn't needed? */
390*c61a1653SRyan Zezeski 	ASSERT3U(flags & (HCK_FLAGS), !=, 0);
391*c61a1653SRyan Zezeski 
392*c61a1653SRyan Zezeski 	/*
393*c61a1653SRyan Zezeski 	 * Ethernet, and optionally VLAN header. mac_hw_emul() has
394*c61a1653SRyan Zezeski 	 * already verified we have enough data to read the L2 header.
395*c61a1653SRyan Zezeski 	 */
396*c61a1653SRyan Zezeski 	ehp = (struct ether_header *)mp->b_rptr;
397*c61a1653SRyan Zezeski 	if (ntohs(ehp->ether_type) == VLAN_TPID) {
398*c61a1653SRyan Zezeski 		struct ether_vlan_header *evhp;
399*c61a1653SRyan Zezeski 
400*c61a1653SRyan Zezeski 		evhp = (struct ether_vlan_header *)mp->b_rptr;
401*c61a1653SRyan Zezeski 		etype = ntohs(evhp->ether_type);
402*c61a1653SRyan Zezeski 		ip_hdr_offset = sizeof (struct ether_vlan_header);
403*c61a1653SRyan Zezeski 	} else {
404*c61a1653SRyan Zezeski 		etype = ntohs(ehp->ether_type);
405*c61a1653SRyan Zezeski 		ip_hdr_offset = sizeof (struct ether_header);
406*c61a1653SRyan Zezeski 	}
407*c61a1653SRyan Zezeski 
408*c61a1653SRyan Zezeski 	/*
409*c61a1653SRyan Zezeski 	 * If this packet isn't IP, then leave it alone. We don't want
410*c61a1653SRyan Zezeski 	 * to affect non-IP traffic like ARP. Assume the IP header
411*c61a1653SRyan Zezeski 	 * doesn't include any options, for now. We will use the
412*c61a1653SRyan Zezeski 	 * correct size later after we know there are enough bytes to
413*c61a1653SRyan Zezeski 	 * at least fill out the basic header.
414*c61a1653SRyan Zezeski 	 */
415*c61a1653SRyan Zezeski 	switch (etype) {
416*c61a1653SRyan Zezeski 	case ETHERTYPE_IP:
417*c61a1653SRyan Zezeski 		ip_hdr_sz = sizeof (ipha_t);
418*c61a1653SRyan Zezeski 		break;
419*c61a1653SRyan Zezeski 	case ETHERTYPE_IPV6:
420*c61a1653SRyan Zezeski 		ip_hdr_sz = sizeof (ip6_t);
421*c61a1653SRyan Zezeski 		break;
422*c61a1653SRyan Zezeski 	default:
423*c61a1653SRyan Zezeski 		return (mp);
424*c61a1653SRyan Zezeski 	}
425*c61a1653SRyan Zezeski 
426*c61a1653SRyan Zezeski 	ASSERT3U(MBLKL(mp), >=, ip_hdr_offset);
427*c61a1653SRyan Zezeski 
428*c61a1653SRyan Zezeski 	/*
429*c61a1653SRyan Zezeski 	 * If the first mblk of this packet contains only the ethernet
430*c61a1653SRyan Zezeski 	 * header, skip past it for now. Packets with their data
431*c61a1653SRyan Zezeski 	 * contained in only a single mblk can then use the fastpaths
432*c61a1653SRyan Zezeski 	 * tuned to that possibility.
433*c61a1653SRyan Zezeski 	 */
434*c61a1653SRyan Zezeski 	if (MBLKL(mp) == ip_hdr_offset) {
435*c61a1653SRyan Zezeski 		ip_hdr_offset -= MBLKL(mp);
436*c61a1653SRyan Zezeski 		/* This is guaranteed by mac_hw_emul(). */
437*c61a1653SRyan Zezeski 		ASSERT3P(mp->b_cont, !=, NULL);
438*c61a1653SRyan Zezeski 		skipped_hdr = mp;
439*c61a1653SRyan Zezeski 		mp = mp->b_cont;
440*c61a1653SRyan Zezeski 	}
441*c61a1653SRyan Zezeski 
442*c61a1653SRyan Zezeski 	/*
443*c61a1653SRyan Zezeski 	 * Both full and partial checksum rely on finding the IP
444*c61a1653SRyan Zezeski 	 * header in the current mblk. Our native TCP stack honors
445*c61a1653SRyan Zezeski 	 * this assumption but it's prudent to guard our future
446*c61a1653SRyan Zezeski 	 * clients that might not honor this contract.
447*c61a1653SRyan Zezeski 	 */
448*c61a1653SRyan Zezeski 	ASSERT3U(MBLKL(mp), >=, ip_hdr_offset + ip_hdr_sz);
449*c61a1653SRyan Zezeski 	if (MBLKL(mp) < (ip_hdr_offset + ip_hdr_sz)) {
450*c61a1653SRyan Zezeski 		err = "mblk doesn't contain IP header";
451*c61a1653SRyan Zezeski 		goto bail;
452*c61a1653SRyan Zezeski 	}
453*c61a1653SRyan Zezeski 
454*c61a1653SRyan Zezeski 	/*
455*c61a1653SRyan Zezeski 	 * We are about to modify the header mblk; make sure we are
456*c61a1653SRyan Zezeski 	 * modifying our own copy. The code that follows assumes that
457*c61a1653SRyan Zezeski 	 * the IP/ULP headers exist in this mblk (and drops the
458*c61a1653SRyan Zezeski 	 * message if they don't).
459*c61a1653SRyan Zezeski 	 */
460*c61a1653SRyan Zezeski 	if (DB_REF(mp) > 1) {
461*c61a1653SRyan Zezeski 		mblk_t *tmp = copyb(mp);
462*c61a1653SRyan Zezeski 
463*c61a1653SRyan Zezeski 		if (tmp == NULL) {
464*c61a1653SRyan Zezeski 			err = "copyb failed";
465*c61a1653SRyan Zezeski 			goto bail;
466*c61a1653SRyan Zezeski 		}
467*c61a1653SRyan Zezeski 
468*c61a1653SRyan Zezeski 		if (skipped_hdr != NULL) {
469*c61a1653SRyan Zezeski 			ASSERT3P(skipped_hdr->b_cont, ==, mp);
470*c61a1653SRyan Zezeski 			skipped_hdr->b_cont = tmp;
471*c61a1653SRyan Zezeski 		}
472*c61a1653SRyan Zezeski 
473*c61a1653SRyan Zezeski 		tmp->b_cont = mp->b_cont;
474*c61a1653SRyan Zezeski 		freeb(mp);
475*c61a1653SRyan Zezeski 		mp = tmp;
476*c61a1653SRyan Zezeski 	}
477*c61a1653SRyan Zezeski 
478*c61a1653SRyan Zezeski 	if (etype == ETHERTYPE_IP) {
479*c61a1653SRyan Zezeski 		ipha_t *ipha = (ipha_t *)(mp->b_rptr + ip_hdr_offset);
480*c61a1653SRyan Zezeski 
481*c61a1653SRyan Zezeski 		if ((flags & HCK_FULLCKSUM) && (emul & MAC_HWCKSUM_EMUL)) {
482*c61a1653SRyan Zezeski 			if (!mac_sw_cksum_ipv4(mp, ip_hdr_offset, ipha, &err))
483*c61a1653SRyan Zezeski 				goto bail;
484*c61a1653SRyan Zezeski 		}
485*c61a1653SRyan Zezeski 
486*c61a1653SRyan Zezeski 		/* We always update the ULP checksum flags. */
487*c61a1653SRyan Zezeski 		if ((flags & HCK_FULLCKSUM) && (emul & MAC_HWCKSUM_EMULS)) {
488*c61a1653SRyan Zezeski 			flags &= ~HCK_FULLCKSUM;
489*c61a1653SRyan Zezeski 			flags |= HCK_FULLCKSUM_OK;
490*c61a1653SRyan Zezeski 			value = 0;
491*c61a1653SRyan Zezeski 		}
492da14cebeSEric Cheng 
493da14cebeSEric Cheng 		/*
494*c61a1653SRyan Zezeski 		 * While unlikely, it's possible to write code that
495*c61a1653SRyan Zezeski 		 * might end up calling mac_sw_cksum() twice on the
496*c61a1653SRyan Zezeski 		 * same mblk (performing both LSO and checksum
497*c61a1653SRyan Zezeski 		 * emualtion in a single mblk chain loop -- the LSO
498