xref: /illumos-gate/usr/src/common/inet/inet_hash.c (revision 3e8b6b84)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2015, Joyent, Inc.
24  */
25 
26 /*
27  * Common routines usable by any part of the networking stack for hashing
28  * packets. The hashing logic originally was part of MAC, but it has more
29  * utility being usable by the rest of the broader system.
30  */
31 
32 #include <sys/types.h>
33 #include <sys/mac.h>
34 #include <sys/strsubr.h>
35 #include <sys/strsun.h>
36 #include <sys/vlan.h>
37 #include <inet/ip.h>
38 #include <inet/ip_impl.h>
39 #include <inet/ip6.h>
40 #include <sys/dlpi.h>
41 #include <sys/sunndi.h>
42 #include <inet/ipsec_impl.h>
43 #include <inet/sadb.h>
44 #include <inet/ipsecesp.h>
45 #include <inet/ipsecah.h>
46 #include <inet/inet_hash.h>
47 
48 /*
49  * Determines the IPv6 header length accounting for all the optional IPv6
50  * headers (hop-by-hop, destination, routing and fragment). The header length
51  * and next header value (a transport header) is captured.
52  *
53  * Returns B_FALSE if all the IP headers are not in the same mblk otherwise
54  * returns B_TRUE.
55  */
56 static boolean_t
inet_pkthash_ip_hdr_length_v6(ip6_t * ip6h,uint8_t * endptr,uint16_t * hdr_length,uint8_t * next_hdr,ip6_frag_t ** fragp)57 inet_pkthash_ip_hdr_length_v6(ip6_t *ip6h, uint8_t *endptr,
58     uint16_t *hdr_length,  uint8_t *next_hdr, ip6_frag_t **fragp)
59 {
60 	uint16_t length;
61 	uint_t	ehdrlen;
62 	uint8_t *whereptr;
63 	uint8_t *nexthdrp;
64 	ip6_dest_t *desthdr;
65 	ip6_rthdr_t *rthdr;
66 	ip6_frag_t *fraghdr;
67 
68 	if (((uchar_t *)ip6h + IPV6_HDR_LEN) > endptr)
69 		return (B_FALSE);
70 	ASSERT(IPH_HDR_VERSION(ip6h) == IPV6_VERSION);
71 	length = IPV6_HDR_LEN;
72 	whereptr = ((uint8_t *)&ip6h[1]); /* point to next hdr */
73 
74 	if (fragp != NULL)
75 		*fragp = NULL;
76 
77 	nexthdrp = &ip6h->ip6_nxt;
78 	while (whereptr < endptr) {
79 		/* Is there enough left for len + nexthdr? */
80 		if (whereptr + MIN_EHDR_LEN > endptr)
81 			break;
82 
83 		switch (*nexthdrp) {
84 		case IPPROTO_HOPOPTS:
85 		case IPPROTO_DSTOPTS:
86 			/* Assumes the headers are identical for hbh and dst */
87 			desthdr = (ip6_dest_t *)whereptr;
88 			ehdrlen = 8 * (desthdr->ip6d_len + 1);
89 			if ((uchar_t *)desthdr +  ehdrlen > endptr)
90 				return (B_FALSE);
91 			nexthdrp = &desthdr->ip6d_nxt;
92 			break;
93 		case IPPROTO_ROUTING:
94 			rthdr = (ip6_rthdr_t *)whereptr;
95 			ehdrlen =  8 * (rthdr->ip6r_len + 1);
96 			if ((uchar_t *)rthdr +  ehdrlen > endptr)
97 				return (B_FALSE);
98 			nexthdrp = &rthdr->ip6r_nxt;
99 			break;
100 		case IPPROTO_FRAGMENT:
101 			fraghdr = (ip6_frag_t *)whereptr;
102 			ehdrlen = sizeof (ip6_frag_t);
103 			if ((uchar_t *)&fraghdr[1] > endptr)
104 				return (B_FALSE);
105 			nexthdrp = &fraghdr->ip6f_nxt;
106 			if (fragp != NULL)
107 				*fragp = fraghdr;
108 			break;
109 		case IPPROTO_NONE:
110 			/* No next header means we're finished */
111 		default:
112 			*hdr_length = length;
113 			*next_hdr = *nexthdrp;
114 			return (B_TRUE);
115 		}
116 		length += ehdrlen;
117 		whereptr += ehdrlen;
118 		*hdr_length = length;
119 		*next_hdr = *nexthdrp;
120 	}
121 	switch (*nexthdrp) {
122 	case IPPROTO_HOPOPTS:
123 	case IPPROTO_DSTOPTS:
124 	case IPPROTO_ROUTING:
125 	case IPPROTO_FRAGMENT:
126 		/*
127 		 * If any known extension headers are still to be processed,
128 		 * the packet's malformed (or at least all the IP header(s) are
129 		 * not in the same mblk - and that should never happen.
130 		 */
131 		return (B_FALSE);
132 
133 	default:
134 		/*
135 		 * If we get here, we know that all of the IP headers were in
136 		 * the same mblk, even if the ULP header is in the next mblk.
137 		 */
138 		*hdr_length = length;
139 		*next_hdr = *nexthdrp;
140 		return (B_TRUE);
141 	}
142 }
143 
144 #define	PKT_HASH_2BYTES(x) ((x)[0] ^ (x)[1])
145 #define	PKT_HASH_4BYTES(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3])
146 #define	PKT_HASH_MAC(x) ((x)[0] ^ (x)[1] ^ (x)[2] ^ (x)[3] ^ (x)[4] ^ (x)[5])
147 uint64_t
inet_pkt_hash(uint_t media,mblk_t * mp,uint8_t policy)148 inet_pkt_hash(uint_t media, mblk_t *mp, uint8_t policy)
149 {
150 	struct ether_header *ehp;
151 	uint64_t hash = 0;
152 	uint16_t sap;
153 	uint_t skip_len;
154 	uint8_t proto;
155 	boolean_t ip_fragmented;
156 
157 	/*
158 	 * We may want to have one of these per MAC type plugin in the
159 	 * future. For now supports only ethernet.
160 	 */
161 	if (media != DL_ETHER)
162 		return (0L);
163 
164 	/* for now we support only outbound packets */
165 	ASSERT(IS_P2ALIGNED(mp->b_rptr, sizeof (uint16_t)));
166 	ASSERT(MBLKL(mp) >= sizeof (struct ether_header));
167 
168 	/* compute L2 hash */
169 
170 	ehp = (struct ether_header *)mp->b_rptr;
171 
172 	if ((policy & INET_PKT_HASH_L2) != 0) {
173 		uchar_t *mac_src = ehp->ether_shost.ether_addr_octet;
174 		uchar_t *mac_dst = ehp->ether_dhost.ether_addr_octet;
175 		hash = PKT_HASH_MAC(mac_src) ^ PKT_HASH_MAC(mac_dst);
176 		policy &= ~INET_PKT_HASH_L2;
177 	}
178 
179 	if (policy == 0)
180 		goto done;
181 
182 	/* skip ethernet header */
183 
184 	sap = ntohs(ehp->ether_type);
185 	if (sap == ETHERTYPE_VLAN) {
186 		struct ether_vlan_header *evhp;
187 		mblk_t *newmp = NULL;
188 
189 		skip_len = sizeof (struct ether_vlan_header);
190 		if (MBLKL(mp) < skip_len) {
191 			/* the vlan tag is the payload, pull up first */
192 			newmp = msgpullup(mp, -1);
193 			if ((newmp == NULL) || (MBLKL(newmp) < skip_len)) {
194 				goto done;
195 			}
196 			evhp = (struct ether_vlan_header *)newmp->b_rptr;
197 		} else {
198 			evhp = (struct ether_vlan_header *)mp->b_rptr;
199 		}
200 
201 		sap = ntohs(evhp->ether_type);
202 		freemsg(newmp);
203 	} else {
204 		skip_len = sizeof (struct ether_header);
205 	}
206 
207 	/* if ethernet header is in its own mblk, skip it */
208 	if (MBLKL(mp) <= skip_len) {
209 		skip_len -= MBLKL(mp);
210 		mp = mp->b_cont;
211 		if (mp == NULL)
212 			goto done;
213 	}
214 
215 	sap = (sap < ETHERTYPE_802_MIN) ? 0 : sap;
216 
217 	/* compute IP src/dst addresses hash and skip IPv{4,6} header */
218 
219 	switch (sap) {
220 	case ETHERTYPE_IP: {
221 		ipha_t *iphp;
222 
223 		/*
224 		 * If the header is not aligned or the header doesn't fit
225 		 * in the mblk, bail now. Note that this may cause packet
226 		 * reordering.
227 		 */
228 		iphp = (ipha_t *)(mp->b_rptr + skip_len);
229 		if (((unsigned char *)iphp + sizeof (ipha_t) > mp->b_wptr) ||
230 		    !OK_32PTR((char *)iphp))
231 			goto done;
232 
233 		proto = iphp->ipha_protocol;
234 		skip_len += IPH_HDR_LENGTH(iphp);
235 
236 		/* Check if the packet is fragmented. */
237 		ip_fragmented = ntohs(iphp->ipha_fragment_offset_and_flags) &
238 		    IPH_OFFSET;
239 
240 		/*
241 		 * For fragmented packets, use addresses in addition to
242 		 * the frag_id to generate the hash inorder to get
243 		 * better distribution.
244 		 */
245 		if (ip_fragmented || (policy & INET_PKT_HASH_L3) != 0) {
246 			uint8_t *ip_src = (uint8_t *)&(iphp->ipha_src);
247 			uint8_t *ip_dst = (uint8_t *)&(iphp->ipha_dst);
248 
249 			hash ^= (PKT_HASH_4BYTES(ip_src) ^
250 			    PKT_HASH_4BYTES(ip_dst));
251 			policy &= ~INET_PKT_HASH_L3;
252 		}
253 
254 		if (ip_fragmented) {
255 			uint8_t *identp = (uint8_t *)&iphp->ipha_ident;
256 			hash ^= PKT_HASH_2BYTES(identp);
257 			goto done;
258 		}
259 		break;
260 	}
261 	case ETHERTYPE_IPV6: {
262 		ip6_t *ip6hp;
263 		ip6_frag_t *frag = NULL;
264 		uint16_t hdr_length;
265 
266 		/*
267 		 * If the header is not aligned or the header doesn't fit
268 		 * in the mblk, bail now. Note that this may cause packets
269 		 * reordering.
270 		 */
271 
272 		ip6hp = (ip6_t *)(mp->b_rptr + skip_len);
273 		if (((unsigned char *)ip6hp + IPV6_HDR_LEN > mp->b_wptr) ||
274 		    !OK_32PTR((char *)ip6hp))
275 			goto done;
276 
277 		if (!inet_pkthash_ip_hdr_length_v6(ip6hp, mp->b_wptr,
278 		    &hdr_length, &proto, &frag))
279 			goto done;
280 		skip_len += hdr_length;
281 
282 		/*
283 		 * For fragmented packets, use addresses in addition to
284 		 * the frag_id to generate the hash inorder to get
285 		 * better distribution.
286 		 */
287 		if (frag != NULL || (policy & INET_PKT_HASH_L3) != 0) {
288 			uint8_t *ip_src = &(ip6hp->ip6_src.s6_addr8[12]);
289 			uint8_t *ip_dst = &(ip6hp->ip6_dst.s6_addr8[12]);
290 
291 			hash ^= (PKT_HASH_4BYTES(ip_src) ^
292 			    PKT_HASH_4BYTES(ip_dst));
293 			policy &= ~INET_PKT_HASH_L3;
294 		}
295 
296 		if (frag != NULL) {
297 			uint8_t *identp = (uint8_t *)&frag->ip6f_ident;
298 			hash ^= PKT_HASH_4BYTES(identp);
299 			goto done;
300 		}
301 		break;
302 	}
303 	default:
304 		goto done;
305 	}
306 
307 	if (policy == 0)
308 		goto done;
309 
310 	/* if ip header is in its own mblk, skip it */
311 	if (MBLKL(mp) <= skip_len) {
312 		skip_len -= MBLKL(mp);
313 		mp = mp->b_cont;
314 		if (mp == NULL)
315 			goto done;
316 	}
317 
318 	/* parse ULP header */
319 again:
320 	switch (proto) {
321 	case IPPROTO_TCP:
322 	case IPPROTO_UDP:
323 	case IPPROTO_ESP:
324 	case IPPROTO_SCTP:
325 		/*
326 		 * These Internet Protocols are intentionally designed
327 		 * for hashing from the git-go.  Port numbers are in the first
328 		 * word for transports, SPI is first for ESP.
329 		 */
330 		if (mp->b_rptr + skip_len + 4 > mp->b_wptr)
331 			goto done;
332 		hash ^= PKT_HASH_4BYTES((mp->b_rptr + skip_len));
333 		break;
334 
335 	case IPPROTO_AH: {
336 		ah_t *ah = (ah_t *)(mp->b_rptr + skip_len);
337 		uint_t ah_length = AH_TOTAL_LEN(ah);
338 
339 		if ((unsigned char *)ah + sizeof (ah_t) > mp->b_wptr)
340 			goto done;
341 
342 		proto = ah->ah_nexthdr;
343 		skip_len += ah_length;
344 
345 		/* if AH header is in its own mblk, skip it */
346 		if (MBLKL(mp) <= skip_len) {
347 			skip_len -= MBLKL(mp);
348 			mp = mp->b_cont;
349 			if (mp == NULL)
350 				goto done;
351 		}
352 
353 		goto again;
354 	}
355 	}
356 
357 done:
358 	return (hash);
359 }
360