1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  */
24 
25 #include <errno.h>
26 #include <fcntl.h>
27 #include <priv_utils.h>
28 #include <signal.h>
29 #include <stdlib.h>
30 #include <stdio.h>
31 #include <strings.h>
32 #include <sys/param.h>
33 #include <sys/stat.h>
34 #include <unistd.h>
35 #include <zone.h>
36 #include <libipadm.h>
37 #include <libdladm.h>
38 #include <libdllink.h>
39 #include <net/route.h>
40 #include <netinet/in.h>
41 #include <net/route.h>
42 #include <errno.h>
43 #include <inet/ip.h>
44 #include <string.h>
45 #include <libinetutil.h>
46 #include <unistd.h>
47 #include <libipadm_impl.h>
48 #include <sys/brand.h>
49 
50 #define	ROUNDUP_LONG(a) \
51 	((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
52 #define	HOST_MASK	0xffffffffU
53 
54 typedef struct ngz_walk_data_s {
55 	ipadm_handle_t	ngz_iph;
56 	zoneid_t	ngz_zoneid;
57 	char		*ngz_ifname;
58 	boolean_t	ngz_s10c;
59 	ipadm_status_t  ngz_ipstatus;
60 	persist_cb_t	ngz_persist_if;
61 } ngz_walk_data_t;
62 
63 /*
64  * Tell the kernel to add, delete or change a route
65  */
66 static void
67 i_ipadm_rtioctl4(int rtsock,
68     int action,			/* RTM_DELETE, etc */
69     in_addr_t dst,
70     in_addr_t gate,
71     uint_t masklen,
72     char *ifname,
73     uint8_t metric,
74     int flags)
75 {
76 	static int rt_sock_seqno = 0;
77 	struct {
78 		struct rt_msghdr w_rtm;
79 		struct sockaddr_in w_dst;
80 		struct sockaddr_in w_gate;
81 		uint8_t w_space[512];
82 	} w;
83 	struct sockaddr_in w_mask;
84 	struct sockaddr_dl w_ifp;
85 	uint8_t *cp;
86 	long cc;
87 
88 again:
89 	(void) memset(&w, 0, sizeof (w));
90 	(void) memset(&w_mask, 0, sizeof (w_mask));
91 	(void) memset(&w_ifp, 0, sizeof (w_ifp));
92 	cp = w.w_space;
93 	w.w_rtm.rtm_msglen = sizeof (struct rt_msghdr) +
94 	    2 * ROUNDUP_LONG(sizeof (struct sockaddr_in));
95 	w.w_rtm.rtm_version = RTM_VERSION;
96 	w.w_rtm.rtm_type = action;
97 	w.w_rtm.rtm_flags = (flags | RTF_ZONE);
98 	w.w_rtm.rtm_seq = ++rt_sock_seqno;
99 	w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
100 	if (metric != 0 || action == RTM_CHANGE) {
101 		w.w_rtm.rtm_rmx.rmx_hopcount = metric;
102 		w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
103 	}
104 	w.w_dst.sin_family = AF_INET;
105 	w.w_dst.sin_addr.s_addr = dst;
106 	w.w_gate.sin_family = AF_INET;
107 	w.w_gate.sin_addr.s_addr = gate;
108 	if (masklen == HOST_MASK) {
109 		w.w_rtm.rtm_flags |= RTF_HOST;
110 	} else {
111 		struct sockaddr_storage m4;
112 
113 		w.w_rtm.rtm_addrs |= RTA_NETMASK;
114 		w_mask.sin_family = AF_INET;
115 		if (plen2mask(masklen, AF_INET, &m4) != 0) {
116 			return;
117 		}
118 		w_mask.sin_addr = ((struct sockaddr_in *)&m4)->sin_addr;
119 		(void) memmove(cp, &w_mask, sizeof (w_mask));
120 		cp += ROUNDUP_LONG(sizeof (struct sockaddr_in));
121 		w.w_rtm.rtm_msglen += ROUNDUP_LONG(sizeof (struct sockaddr_in));
122 	}
123 	w_ifp.sdl_family = AF_LINK;
124 	w.w_rtm.rtm_addrs |= RTA_IFP;
125 	w_ifp.sdl_index = if_nametoindex(ifname);
126 	(void) memmove(cp, &w_ifp, sizeof (w_ifp));
127 	w.w_rtm.rtm_msglen += ROUNDUP_LONG(sizeof (struct sockaddr_dl));
128 
129 	cc = write(rtsock, &w, w.w_rtm.rtm_msglen);
130 	if (cc < 0) {
131 		if (errno == ESRCH && (action == RTM_CHANGE ||
132 		    action == RTM_DELETE)) {
133 			if (action == RTM_CHANGE) {
134 				action = RTM_ADD;
135 				goto again;
136 			}
137 			return;
138 		}
139 		return;
140 	} else if (cc != w.w_rtm.rtm_msglen) {
141 		return;
142 	}
143 }
144 
145 static void
146 i_ipadm_rtioctl6(int rtsock,
147     int action,			/* RTM_DELETE, etc */
148     in6_addr_t dst,
149     in6_addr_t gate,
150     uint_t prefix_length,
151     char *ifname,
152     int flags)
153 {
154 	static int rt_sock_seqno = 0;
155 	struct {
156 		struct rt_msghdr w_rtm;
157 		struct sockaddr_in6 w_dst;
158 		struct sockaddr_in6 w_gate;
159 		uint8_t w_space[512];
160 	} w;
161 	struct sockaddr_in6 w_mask;
162 	struct sockaddr_dl w_ifp;
163 	uint8_t *cp;
164 	long cc;
165 
166 again:
167 	(void) memset(&w, 0, sizeof (w));
168 	(void) memset(&w_mask, 0, sizeof (w_mask));
169 	(void) memset(&w_ifp, 0, sizeof (w_ifp));
170 	cp = w.w_space;
171 	w.w_rtm.rtm_msglen = sizeof (struct rt_msghdr) +
172 	    2 * ROUNDUP_LONG(sizeof (struct sockaddr_in6));
173 	w.w_rtm.rtm_version = RTM_VERSION;
174 	w.w_rtm.rtm_type = action;
175 	w.w_rtm.rtm_flags = (flags | RTF_ZONE);
176 	w.w_rtm.rtm_seq = ++rt_sock_seqno;
177 	w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
178 	w.w_dst.sin6_family = AF_INET6;
179 	w.w_dst.sin6_addr = dst;
180 	w.w_gate.sin6_family = AF_INET6;
181 	w.w_gate.sin6_addr = gate;
182 	if (prefix_length == IPV6_ABITS) {
183 		w.w_rtm.rtm_flags |= RTF_HOST;
184 	} else {
185 		struct sockaddr_storage m6;
186 
187 		w.w_rtm.rtm_addrs |= RTA_NETMASK;
188 		w_mask.sin6_family = AF_INET6;
189 		if (plen2mask(prefix_length, AF_INET6, &m6) != 0) {
190 			return;
191 		}
192 		w_mask.sin6_addr = ((struct sockaddr_in6 *)&m6)->sin6_addr;
193 		(void) memmove(cp, &w_mask, sizeof (w_mask));
194 		cp += ROUNDUP_LONG(sizeof (struct sockaddr_in6));
195 		w.w_rtm.rtm_msglen +=
196 		    ROUNDUP_LONG(sizeof (struct sockaddr_in6));
197 	}
198 	w_ifp.sdl_family = AF_LINK;
199 	w.w_rtm.rtm_addrs |= RTA_IFP;
200 	w_ifp.sdl_index = if_nametoindex(ifname);
201 	(void) memmove(cp, &w_ifp, sizeof (w_ifp));
202 	w.w_rtm.rtm_msglen += ROUNDUP_LONG(sizeof (struct sockaddr_dl));
203 
204 	cc = write(rtsock, &w, w.w_rtm.rtm_msglen);
205 	if (cc < 0) {
206 		if (errno == ESRCH && (action == RTM_CHANGE ||
207 		    action == RTM_DELETE)) {
208 			if (action == RTM_CHANGE) {
209 				action = RTM_ADD;
210 				goto again;
211 			}
212 			return;
213 		}
214 		return;
215 	} else if (cc != w.w_rtm.rtm_msglen) {
216 		return;
217 	}
218 }
219 
220 /*
221  * Return TRUE if running in a Solaris 10 Container.
222  */
223 static boolean_t
224 i_ipadm_zone_is_s10c(zoneid_t zoneid)
225 {
226 	char brand[MAXNAMELEN];
227 
228 	if (zone_getattr(zoneid, ZONE_ATTR_BRAND, brand, sizeof (brand)) < 0)
229 		return (B_FALSE);
230 	return (strcmp(brand, NATIVE_BRAND_NAME) != 0);
231 }
232 
233 /*
234  * Configure addresses on link. `buf' is a string of comma-separated
235  * IP addresses.
236  */
237 static ipadm_status_t
238 i_ipadm_ngz_addr(ipadm_handle_t iph, char *link, char *buf)
239 {
240 	ipadm_status_t ipstatus;
241 	ipadm_addrobj_t ipaddr;
242 	char *cp;
243 
244 	for (cp = strtok(buf, ","); cp != NULL; cp = strtok(NULL, ",")) {
245 		ipstatus = ipadm_create_addrobj(IPADM_ADDR_STATIC, link,
246 		    &ipaddr);
247 		if (ipstatus != IPADM_SUCCESS)
248 			return (ipstatus);
249 		/*
250 		 * ipadm_set_addr does the appropriate name resolution and
251 		 * sets up the ipadm_static_addr field.
252 		 */
253 		ipstatus = ipadm_set_addr(ipaddr, cp, AF_UNSPEC);
254 		if (ipstatus != IPADM_SUCCESS) {
255 			ipadm_destroy_addrobj(ipaddr);
256 			return (ipstatus);
257 		}
258 
259 		ipstatus = ipadm_create_addr(iph, ipaddr,
260 		    (IPADM_OPT_ACTIVE | IPADM_OPT_UP));
261 		if (ipstatus != IPADM_SUCCESS) {
262 			ipadm_destroy_addrobj(ipaddr);
263 			return (ipstatus);
264 		}
265 		ipadm_destroy_addrobj(ipaddr);
266 	}
267 	return (IPADM_SUCCESS);
268 }
269 
270 /*
271  * The (*persist_if)() will set up persistent information for the interface,
272  * based on what interface families are required, so just resolve the
273  * address and inform the callback about the linkname, and required address
274  * families.
275  */
276 static ipadm_status_t
277 i_ipadm_ngz_persist_if(char *link, char *buf,
278     void (*ngz_persist_if)(char *, boolean_t, boolean_t))
279 {
280 	char *cp, *slashp, addr[INET6_ADDRSTRLEN];
281 	ipadm_status_t ipstatus;
282 	struct sockaddr_storage ss;
283 	boolean_t v4 = B_FALSE;
284 	boolean_t v6 = B_FALSE;
285 
286 	for (cp = strtok(buf, ","); cp != NULL; cp = strtok(NULL, ",")) {
287 		/* remove the /<masklen> that's always added by zoneadmd */
288 		slashp = strchr(cp, '/');
289 		(void) strlcpy(addr, cp, (slashp - cp + 1));
290 
291 		/* resolve the address to find the family */
292 		bzero(&ss, sizeof (ss));
293 		ipstatus = i_ipadm_resolve_addr(addr, AF_UNSPEC, &ss);
294 		if (ipstatus != IPADM_SUCCESS)
295 			return (ipstatus);
296 		switch (ss.ss_family) {
297 		case AF_INET:
298 			v4 = B_TRUE;
299 			break;
300 		case AF_INET6:
301 			v6 = B_TRUE;
302 			break;
303 		default:
304 			return (IPADM_BAD_ADDR);
305 		}
306 	}
307 	(*ngz_persist_if)(link, v4, v6);
308 	return (IPADM_SUCCESS);
309 }
310 
311 static void
312 i_ipadm_create_ngz_route(int rtsock, char *link, uint8_t *buf, size_t buflen)
313 {
314 	struct in6_addr defrouter;
315 	boolean_t isv6;
316 	struct in_addr gw4;
317 	uint8_t *cp;
318 	const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
319 
320 	if (rtsock == -1)
321 		return;
322 
323 	for (cp = buf; cp < buf + buflen; cp += sizeof (defrouter)) {
324 		bcopy(cp, &defrouter, sizeof (defrouter));
325 		if (IN6_IS_ADDR_UNSPECIFIED(&defrouter))
326 			break;
327 		isv6 = !IN6_IS_ADDR_V4MAPPED(&defrouter);
328 		if (isv6) {
329 			i_ipadm_rtioctl6(rtsock, RTM_ADD, ipv6_all_zeros,
330 			    defrouter, 0, link, RTF_GATEWAY);
331 		} else {
332 			IN6_V4MAPPED_TO_INADDR(&defrouter, &gw4);
333 			i_ipadm_rtioctl4(rtsock, RTM_ADD, INADDR_ANY,
334 			    gw4.s_addr, 0, link, 0, RTF_GATEWAY);
335 		}
336 	}
337 }
338 
339 /*
340  * Wrapper function to zone_getattr() for retrieving from-gz attributes that
341  * were made availabe for exclusive IP non-global zones by zoneadmd from teh
342  * global zone.
343  */
344 static ipadm_status_t
345 i_ipadm_zone_get_network(zoneid_t zoneid, datalink_id_t linkid, int type,
346     void *buf, size_t *bufsize)
347 {
348 	zone_net_data_t *zndata;
349 
350 	zndata = calloc(1, sizeof (*zndata) + *bufsize);
351 	if (zndata == NULL)
352 		return (IPADM_NO_MEMORY);
353 	zndata->zn_type = type;
354 	zndata->zn_linkid = linkid;
355 	zndata->zn_len = *bufsize;
356 
357 	if (zone_getattr(zoneid, ZONE_ATTR_NETWORK, zndata,
358 	    sizeof (*zndata) + *bufsize) < 0) {
359 		return (ipadm_errno2status(errno));
360 	}
361 	*bufsize = zndata->zn_len;
362 	bcopy(zndata->zn_val, buf, *bufsize);
363 	return (IPADM_SUCCESS);
364 }
365 
366 /*
367  * Callback function that configures a single datalink in a non-global zone.
368  */
369 static int
370 i_ipadm_zone_network_attr(dladm_handle_t dh, datalink_id_t linkid, void *arg)
371 {
372 	ngz_walk_data_t *nwd = arg;
373 	zoneid_t zoneid = nwd->ngz_zoneid;
374 	uint8_t buf[PIPE_BUF];
375 	dladm_status_t dlstatus;
376 	ipadm_status_t ipstatus;
377 	char link[MAXLINKNAMELEN];
378 	ipadm_handle_t iph = nwd->ngz_iph;
379 	int rtsock = iph->iph_rtsock;
380 	char *ifname = nwd->ngz_ifname;
381 	boolean_t s10c = nwd->ngz_s10c;
382 	boolean_t is_ipmgmtd = (iph->iph_flags & IPH_IPMGMTD);
383 	size_t bufsize = sizeof (buf);
384 
385 	bzero(buf, bufsize);
386 	ipstatus = i_ipadm_zone_get_network(zoneid, linkid,
387 	    ZONE_NETWORK_ADDRESS, buf, &bufsize);
388 	if (ipstatus != IPADM_SUCCESS)
389 		goto fail;
390 
391 	dlstatus = dladm_datalink_id2info(dh, linkid, NULL, NULL,
392 	    NULL, link, sizeof (link));
393 	if (dlstatus != DLADM_STATUS_OK)
394 		return (DLADM_WALK_CONTINUE);
395 
396 	/*
397 	 * if ifname has been specified, then skip interfaces that don't match
398 	 */
399 	if (ifname != NULL && strcmp(ifname, link) != 0)
400 		return (DLADM_WALK_CONTINUE);
401 
402 	/*
403 	 * Plumb the interface and configure addresses on for S10 Containers.
404 	 * We need to always do this for S10C because ipadm persistent
405 	 * configuration is not available in S10C. For ipkg zones,
406 	 * we skip the actual plumbing/configuration, but will call the
407 	 * (*ngz_persist_if)() callback to create the persistent state for the
408 	 * interface. The interface will be configured in ipkg zones when
409 	 * ipadm_enable_if() is invoked to restore persistent configuration.
410 	 */
411 	if (is_ipmgmtd && !s10c) {
412 		(void) i_ipadm_ngz_persist_if(link, (char *)buf,
413 		    nwd->ngz_persist_if);
414 		return (DLADM_WALK_CONTINUE);
415 	}
416 	ipstatus = i_ipadm_ngz_addr(iph, link, (char *)buf);
417 	if (ipstatus != IPADM_SUCCESS)
418 		goto fail;
419 
420 	/* apply any default router information.  */
421 	bufsize = sizeof (buf);
422 	bzero(buf, bufsize);
423 	ipstatus = i_ipadm_zone_get_network(zoneid, linkid,
424 	    ZONE_NETWORK_DEFROUTER, buf, &bufsize);
425 	if (ipstatus != IPADM_SUCCESS)
426 		goto fail;
427 
428 	i_ipadm_create_ngz_route(rtsock, link, buf, bufsize);
429 
430 	return (DLADM_WALK_CONTINUE);
431 fail:
432 	if (ifname != NULL) {
433 		nwd->ngz_ipstatus = ipstatus;
434 		return (DLADM_WALK_TERMINATE);
435 	}
436 	return (DLADM_WALK_CONTINUE);
437 }
438 
439 /*
440  * ipmgmt_net_from_gz_init() initializes exclusive-IP stack non-global zones by
441  * extracting configuration that has been saved in the kernel and applying
442  * that information to the appropriate datalinks for the zone. If an ifname
443  * argument is passed in, only the selected IP interface corresponding to
444  * datalink will be initialized, otherwise all datalinks will be plumbed for IP
445  * and IP address and route information will be configured.
446  */
447 ipadm_status_t
448 ipadm_init_net_from_gz(ipadm_handle_t iph, char *ifname,
449 	void (*persist_if)(char *, boolean_t, boolean_t))
450 {
451 	ngz_walk_data_t nwd;
452 	uint64_t flags;
453 	dladm_handle_t dlh = iph->iph_dlh;
454 	datalink_id_t linkid;
455 
456 	if (iph->iph_zoneid == GLOBAL_ZONEID)
457 		return (IPADM_NOTSUP);
458 
459 	if (ifname != NULL &&
460 	    i_ipadm_get_flags(iph, ifname, AF_INET, &flags) != IPADM_SUCCESS &&
461 	    i_ipadm_get_flags(iph, ifname, AF_INET6, &flags) != IPADM_SUCCESS)
462 		return (IPADM_ENXIO);
463 
464 	if (ifname != NULL && !(flags & IFF_L3PROTECT))
465 		return (IPADM_SUCCESS); /* nothing to initialize */
466 
467 	nwd.ngz_iph = iph;
468 	nwd.ngz_zoneid = iph->iph_zoneid;
469 	nwd.ngz_ifname = ifname;
470 	nwd.ngz_persist_if = persist_if;
471 	nwd.ngz_s10c = i_ipadm_zone_is_s10c(iph->iph_zoneid);
472 	nwd.ngz_ipstatus = IPADM_SUCCESS;
473 	if (ifname != NULL) {
474 		if (dladm_name2info(dlh, ifname, &linkid, NULL, NULL,
475 		    NULL) != DLADM_STATUS_OK) {
476 			return (IPADM_ENXIO);
477 		}
478 		(void) i_ipadm_zone_network_attr(dlh, linkid, &nwd);
479 	} else {
480 		(void) dladm_walk_datalink_id(i_ipadm_zone_network_attr, dlh,
481 		    &nwd, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE,
482 		    DLADM_OPT_PERSIST);
483 	}
484 	return (nwd.ngz_ipstatus);
485 }
486