1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2019 OmniOS Community Edition (OmniOSce) Association.
24  */
25 
26 #include <errno.h>
27 #include <fcntl.h>
28 #include <priv_utils.h>
29 #include <signal.h>
30 #include <stdlib.h>
31 #include <stdio.h>
32 #include <strings.h>
33 #include <sys/param.h>
34 #include <sys/stat.h>
35 #include <unistd.h>
36 #include <zone.h>
37 #include <libipadm.h>
38 #include <libdladm.h>
39 #include <libdllink.h>
40 #include <net/route.h>
41 #include <netinet/in.h>
42 #include <net/route.h>
43 #include <errno.h>
44 #include <inet/ip.h>
45 #include <string.h>
46 #include <libinetutil.h>
47 #include <unistd.h>
48 #include <libipadm_impl.h>
49 #include <sys/brand.h>
50 
51 #define	ROUNDUP_LONG(a) \
52 	((a) > 0 ? (1 + (((a) - 1) | (sizeof (long) - 1))) : sizeof (long))
53 #define	HOST_MASK	0xffffffffU
54 
55 typedef struct ngz_walk_data_s {
56 	ipadm_handle_t	ngz_iph;
57 	zoneid_t	ngz_zoneid;
58 	char		*ngz_ifname;
59 	boolean_t	ngz_s10c;
60 	ipadm_status_t  ngz_ipstatus;
61 	persist_cb_t	ngz_persist_if;
62 } ngz_walk_data_t;
63 
64 /*
65  * Tell the kernel to add, delete or change a route
66  */
67 static void
i_ipadm_rtioctl4(int rtsock,int action,in_addr_t dst,in_addr_t gate,uint_t masklen,char * ifname,uint8_t metric,int flags)68 i_ipadm_rtioctl4(int rtsock,
69     int action,			/* RTM_DELETE, etc */
70     in_addr_t dst,
71     in_addr_t gate,
72     uint_t masklen,
73     char *ifname,
74     uint8_t metric,
75     int flags)
76 {
77 	static int rt_sock_seqno = 0;
78 	struct {
79 		struct rt_msghdr w_rtm;
80 		struct sockaddr_in w_dst;
81 		struct sockaddr_in w_gate;
82 		uint8_t w_space[512];
83 	} w;
84 	struct sockaddr_in w_mask;
85 	struct sockaddr_dl w_ifp;
86 	uint8_t *cp;
87 	long cc;
88 
89 again:
90 	(void) memset(&w, 0, sizeof (w));
91 	(void) memset(&w_mask, 0, sizeof (w_mask));
92 	(void) memset(&w_ifp, 0, sizeof (w_ifp));
93 	cp = w.w_space;
94 	w.w_rtm.rtm_msglen = sizeof (struct rt_msghdr) +
95 	    2 * ROUNDUP_LONG(sizeof (struct sockaddr_in));
96 	w.w_rtm.rtm_version = RTM_VERSION;
97 	w.w_rtm.rtm_type = action;
98 	w.w_rtm.rtm_flags = (flags | RTF_ZONE);
99 	w.w_rtm.rtm_seq = ++rt_sock_seqno;
100 	w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
101 	if (metric != 0 || action == RTM_CHANGE) {
102 		w.w_rtm.rtm_rmx.rmx_hopcount = metric;
103 		w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
104 	}
105 	w.w_dst.sin_family = AF_INET;
106 	w.w_dst.sin_addr.s_addr = dst;
107 	w.w_gate.sin_family = AF_INET;
108 	w.w_gate.sin_addr.s_addr = gate;
109 	if (masklen == HOST_MASK) {
110 		w.w_rtm.rtm_flags |= RTF_HOST;
111 	} else {
112 		struct sockaddr_storage m4;
113 
114 		w.w_rtm.rtm_addrs |= RTA_NETMASK;
115 		w_mask.sin_family = AF_INET;
116 		if (plen2mask(masklen, AF_INET, (struct sockaddr *)&m4) != 0) {
117 			return;
118 		}
119 		w_mask.sin_addr = ((struct sockaddr_in *)&m4)->sin_addr;
120 		(void) memmove(cp, &w_mask, sizeof (w_mask));
121 		cp += ROUNDUP_LONG(sizeof (struct sockaddr_in));
122 		w.w_rtm.rtm_msglen += ROUNDUP_LONG(sizeof (struct sockaddr_in));
123 	}
124 	w_ifp.sdl_family = AF_LINK;
125 	w.w_rtm.rtm_addrs |= RTA_IFP;
126 	w_ifp.sdl_index = if_nametoindex(ifname);
127 	(void) memmove(cp, &w_ifp, sizeof (w_ifp));
128 	w.w_rtm.rtm_msglen += ROUNDUP_LONG(sizeof (struct sockaddr_dl));
129 
130 	cc = write(rtsock, &w, w.w_rtm.rtm_msglen);
131 	if (cc < 0) {
132 		if (errno == ESRCH && (action == RTM_CHANGE ||
133 		    action == RTM_DELETE)) {
134 			if (action == RTM_CHANGE) {
135 				action = RTM_ADD;
136 				goto again;
137 			}
138 			return;
139 		}
140 		return;
141 	} else if (cc != w.w_rtm.rtm_msglen) {
142 		return;
143 	}
144 }
145 
146 static void
i_ipadm_rtioctl6(int rtsock,int action,in6_addr_t dst,in6_addr_t gate,uint_t prefix_length,char * ifname,int flags)147 i_ipadm_rtioctl6(int rtsock,
148     int action,			/* RTM_DELETE, etc */
149     in6_addr_t dst,
150     in6_addr_t gate,
151     uint_t prefix_length,
152     char *ifname,
153     int flags)
154 {
155 	static int rt_sock_seqno = 0;
156 	struct {
157 		struct rt_msghdr w_rtm;
158 		struct sockaddr_in6 w_dst;
159 		struct sockaddr_in6 w_gate;
160 		uint8_t w_space[512];
161 	} w;
162 	struct sockaddr_in6 w_mask;
163 	struct sockaddr_dl w_ifp;
164 	uint8_t *cp;
165 	long cc;
166 
167 again:
168 	(void) memset(&w, 0, sizeof (w));
169 	(void) memset(&w_mask, 0, sizeof (w_mask));
170 	(void) memset(&w_ifp, 0, sizeof (w_ifp));
171 	cp = w.w_space;
172 	w.w_rtm.rtm_msglen = sizeof (struct rt_msghdr) +
173 	    2 * ROUNDUP_LONG(sizeof (struct sockaddr_in6));
174 	w.w_rtm.rtm_version = RTM_VERSION;
175 	w.w_rtm.rtm_type = action;
176 	w.w_rtm.rtm_flags = (flags | RTF_ZONE);
177 	w.w_rtm.rtm_seq = ++rt_sock_seqno;
178 	w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
179 	w.w_dst.sin6_family = AF_INET6;
180 	w.w_dst.sin6_addr = dst;
181 	w.w_gate.sin6_family = AF_INET6;
182 	w.w_gate.sin6_addr = gate;
183 	if (prefix_length == IPV6_ABITS) {
184 		w.w_rtm.rtm_flags |= RTF_HOST;
185 	} else {
186 		struct sockaddr_storage m6;
187 
188 		w.w_rtm.rtm_addrs |= RTA_NETMASK;
189 		w_mask.sin6_family = AF_INET6;
190 		if (plen2mask(prefix_length, AF_INET6,
191 		    (struct sockaddr *)&m6) != 0) {
192 			return;
193 		}
194 		w_mask.sin6_addr = ((struct sockaddr_in6 *)&m6)->sin6_addr;
195 		(void) memmove(cp, &w_mask, sizeof (w_mask));
196 		cp += ROUNDUP_LONG(sizeof (struct sockaddr_in6));
197 		w.w_rtm.rtm_msglen +=
198 		    ROUNDUP_LONG(sizeof (struct sockaddr_in6));
199 	}
200 	w_ifp.sdl_family = AF_LINK;
201 	w.w_rtm.rtm_addrs |= RTA_IFP;
202 	w_ifp.sdl_index = if_nametoindex(ifname);
203 	(void) memmove(cp, &w_ifp, sizeof (w_ifp));
204 	w.w_rtm.rtm_msglen += ROUNDUP_LONG(sizeof (struct sockaddr_dl));
205 
206 	cc = write(rtsock, &w, w.w_rtm.rtm_msglen);
207 	if (cc < 0) {
208 		if (errno == ESRCH && (action == RTM_CHANGE ||
209 		    action == RTM_DELETE)) {
210 			if (action == RTM_CHANGE) {
211 				action = RTM_ADD;
212 				goto again;
213 			}
214 			return;
215 		}
216 		return;
217 	} else if (cc != w.w_rtm.rtm_msglen) {
218 		return;
219 	}
220 }
221 
222 /*
223  * Return TRUE if running in a Solaris 10 Container.
224  */
225 static boolean_t
i_ipadm_zone_is_s10c(zoneid_t zoneid)226 i_ipadm_zone_is_s10c(zoneid_t zoneid)
227 {
228 	char brand[MAXNAMELEN];
229 
230 	if (zone_getattr(zoneid, ZONE_ATTR_BRAND, brand, sizeof (brand)) < 0)
231 		return (B_FALSE);
232 	return (strcmp(brand, NATIVE_BRAND_NAME) != 0);
233 }
234 
235 /*
236  * Configure addresses on link. `buf' is a string of comma-separated
237  * IP addresses.
238  */
239 static ipadm_status_t
i_ipadm_ngz_addr(ipadm_handle_t iph,char * link,char * buf)240 i_ipadm_ngz_addr(ipadm_handle_t iph, char *link, char *buf)
241 {
242 	ipadm_status_t ipstatus;
243 	ipadm_addrobj_t ipaddr;
244 	char *cp;
245 
246 	for (cp = strtok(buf, ","); cp != NULL; cp = strtok(NULL, ",")) {
247 		ipstatus = ipadm_create_addrobj(IPADM_ADDR_STATIC, link,
248 		    &ipaddr);
249 		if (ipstatus != IPADM_SUCCESS)
250 			return (ipstatus);
251 		/*
252 		 * ipadm_set_addr does the appropriate name resolution and
253 		 * sets up the ipadm_static_addr field.
254 		 */
255 		ipstatus = ipadm_set_addr(ipaddr, cp, AF_UNSPEC);
256 		if (ipstatus != IPADM_SUCCESS) {
257 			ipadm_destroy_addrobj(ipaddr);
258 			return (ipstatus);
259 		}
260 
261 		ipstatus = ipadm_create_addr(iph, ipaddr,
262 		    (IPADM_OPT_ACTIVE | IPADM_OPT_UP));
263 		if (ipstatus != IPADM_SUCCESS) {
264 			ipadm_destroy_addrobj(ipaddr);
265 			return (ipstatus);
266 		}
267 		ipadm_destroy_addrobj(ipaddr);
268 	}
269 	return (IPADM_SUCCESS);
270 }
271 
272 /*
273  * The (*persist_if)() will set up persistent information for the interface,
274  * based on what interface families are required, so just resolve the
275  * address and inform the callback about the linkname, and required address
276  * families.
277  */
278 static ipadm_status_t
i_ipadm_ngz_persist_if(char * link,char * buf,void (* ngz_persist_if)(char *,boolean_t,boolean_t))279 i_ipadm_ngz_persist_if(char *link, char *buf,
280     void (*ngz_persist_if)(char *, boolean_t, boolean_t))
281 {
282 	char *cp, *slashp, addr[INET6_ADDRSTRLEN];
283 	ipadm_status_t ipstatus;
284 	struct sockaddr_storage ss;
285 	boolean_t v4 = B_FALSE;
286 	boolean_t v6 = B_FALSE;
287 
288 	for (cp = strtok(buf, ","); cp != NULL; cp = strtok(NULL, ",")) {
289 		/* remove the /<masklen> that's always added by zoneadmd */
290 		slashp = strchr(cp, '/');
291 		(void) strlcpy(addr, cp, (slashp - cp + 1));
292 
293 		/* resolve the address to find the family */
294 		bzero(&ss, sizeof (ss));
295 		ipstatus = i_ipadm_resolve_addr(addr, AF_UNSPEC, &ss);
296 		if (ipstatus != IPADM_SUCCESS)
297 			return (ipstatus);
298 		switch (ss.ss_family) {
299 		case AF_INET:
300 			v4 = B_TRUE;
301 			break;
302 		case AF_INET6:
303 			v6 = B_TRUE;
304 			break;
305 		default:
306 			return (IPADM_BAD_ADDR);
307 		}
308 	}
309 	(*ngz_persist_if)(link, v4, v6);
310 	return (IPADM_SUCCESS);
311 }
312 
313 static void
i_ipadm_create_ngz_route(int rtsock,char * link,uint8_t * buf,size_t buflen)314 i_ipadm_create_ngz_route(int rtsock, char *link, uint8_t *buf, size_t buflen)
315 {
316 	struct in6_addr defrouter;
317 	boolean_t isv6;
318 	struct in_addr gw4;
319 	uint8_t *cp;
320 	const in6_addr_t ipv6_all_zeros = { 0, 0, 0, 0 };
321 
322 	if (rtsock == -1)
323 		return;
324 
325 	for (cp = buf; cp < buf + buflen; cp += sizeof (defrouter)) {
326 		bcopy(cp, &defrouter, sizeof (defrouter));
327 		if (IN6_IS_ADDR_UNSPECIFIED(&defrouter))
328 			break;
329 		isv6 = !IN6_IS_ADDR_V4MAPPED(&defrouter);
330 		if (isv6) {
331 			i_ipadm_rtioctl6(rtsock, RTM_ADD, ipv6_all_zeros,
332 			    defrouter, 0, link, RTF_GATEWAY);
333 		} else {
334 			IN6_V4MAPPED_TO_INADDR(&defrouter, &gw4);
335 			i_ipadm_rtioctl4(rtsock, RTM_ADD, INADDR_ANY,
336 			    gw4.s_addr, 0, link, 0, RTF_GATEWAY);
337 		}
338 	}
339 }
340 
341 /*
342  * Wrapper function to zone_getattr() for retrieving from-gz attributes that
343  * were made availabe for exclusive IP non-global zones by zoneadmd from teh
344  * global zone.
345  */
346 static ipadm_status_t
i_ipadm_zone_get_network(zoneid_t zoneid,datalink_id_t linkid,int type,void * buf,size_t * bufsize)347 i_ipadm_zone_get_network(zoneid_t zoneid, datalink_id_t linkid, int type,
348     void *buf, size_t *bufsize)
349 {
350 	zone_net_data_t *zndata;
351 	ipadm_status_t ret = IPADM_SUCCESS;
352 
353 	zndata = calloc(1, sizeof (*zndata) + *bufsize);
354 	if (zndata == NULL)
355 		return (IPADM_NO_MEMORY);
356 	zndata->zn_type = type;
357 	zndata->zn_linkid = linkid;
358 	zndata->zn_len = *bufsize;
359 
360 	if (zone_getattr(zoneid, ZONE_ATTR_NETWORK, zndata,
361 	    sizeof (*zndata) + *bufsize) < 0) {
362 		ret = ipadm_errno2status(errno);
363 		goto out;
364 	}
365 	*bufsize = zndata->zn_len;
366 	bcopy(zndata->zn_val, buf, *bufsize);
367 out:
368 	free(zndata);
369 	return (ret);
370 }
371 
372 /*
373  * Callback function that configures a single datalink in a non-global zone.
374  */
375 static int
i_ipadm_zone_network_attr(dladm_handle_t dh,datalink_id_t linkid,void * arg)376 i_ipadm_zone_network_attr(dladm_handle_t dh, datalink_id_t linkid, void *arg)
377 {
378 	ngz_walk_data_t *nwd = arg;
379 	zoneid_t zoneid = nwd->ngz_zoneid;
380 	uint8_t buf[PIPE_BUF];
381 	dladm_status_t dlstatus;
382 	ipadm_status_t ipstatus;
383 	char link[MAXLINKNAMELEN];
384 	ipadm_handle_t iph = nwd->ngz_iph;
385 	int rtsock = iph->iph_rtsock;
386 	char *ifname = nwd->ngz_ifname;
387 	boolean_t s10c = nwd->ngz_s10c;
388 	boolean_t is_ipmgmtd = (iph->iph_flags & IPH_IPMGMTD);
389 	size_t bufsize = sizeof (buf);
390 
391 	bzero(buf, bufsize);
392 	ipstatus = i_ipadm_zone_get_network(zoneid, linkid,
393 	    ZONE_NETWORK_ADDRESS, buf, &bufsize);
394 	if (ipstatus != IPADM_SUCCESS)
395 		goto fail;
396 
397 	dlstatus = dladm_datalink_id2info(dh, linkid, NULL, NULL,
398 	    NULL, link, sizeof (link));
399 	if (dlstatus != DLADM_STATUS_OK)
400 		return (DLADM_WALK_CONTINUE);
401 
402 	/*
403 	 * if ifname has been specified, then skip interfaces that don't match
404 	 */
405 	if (ifname != NULL && strcmp(ifname, link) != 0)
406 		return (DLADM_WALK_CONTINUE);
407 
408 	/*
409 	 * Plumb the interface and configure addresses on for S10 Containers.
410 	 * We need to always do this for S10C because ipadm persistent
411 	 * configuration is not available in S10C. For ipkg zones,
412 	 * we skip the actual plumbing/configuration, but will call the
413 	 * (*ngz_persist_if)() callback to create the persistent state for the
414 	 * interface. The interface will be configured in ipkg zones when
415 	 * ipadm_enable_if() is invoked to restore persistent configuration.
416 	 */
417 	if (is_ipmgmtd && !s10c) {
418 		(void) i_ipadm_ngz_persist_if(link, (char *)buf,
419 		    nwd->ngz_persist_if);
420 		return (DLADM_WALK_CONTINUE);
421 	}
422 	ipstatus = i_ipadm_ngz_addr(iph, link, (char *)buf);
423 	if (ipstatus != IPADM_SUCCESS)
424 		goto fail;
425 
426 	/* apply any default router information.  */
427 	bufsize = sizeof (buf);
428 	bzero(buf, bufsize);
429 	ipstatus = i_ipadm_zone_get_network(zoneid, linkid,
430 	    ZONE_NETWORK_DEFROUTER, buf, &bufsize);
431 	if (ipstatus != IPADM_SUCCESS)
432 		goto fail;
433 
434 	i_ipadm_create_ngz_route(rtsock, link, buf, bufsize);
435 
436 	return (DLADM_WALK_CONTINUE);
437 fail:
438 	if (ifname != NULL) {
439 		nwd->ngz_ipstatus = ipstatus;
440 		return (DLADM_WALK_TERMINATE);
441 	}
442 	return (DLADM_WALK_CONTINUE);
443 }
444 
445 /*
446  * ipmgmt_net_from_gz_init() initializes exclusive-IP stack non-global zones by
447  * extracting configuration that has been saved in the kernel and applying
448  * that information to the appropriate datalinks for the zone. If an ifname
449  * argument is passed in, only the selected IP interface corresponding to
450  * datalink will be initialized, otherwise all datalinks will be plumbed for IP
451  * and IP address and route information will be configured.
452  */
453 ipadm_status_t
ipadm_init_net_from_gz(ipadm_handle_t iph,char * ifname,void (* persist_if)(char *,boolean_t,boolean_t))454 ipadm_init_net_from_gz(ipadm_handle_t iph, char *ifname,
455     void (*persist_if)(char *, boolean_t, boolean_t))
456 {
457 	ngz_walk_data_t nwd;
458 	uint64_t flags;
459 	dladm_handle_t dlh = iph->iph_dlh;
460 	datalink_id_t linkid;
461 
462 	if (iph->iph_zoneid == GLOBAL_ZONEID)
463 		return (IPADM_NOTSUP);
464 
465 	if (ifname != NULL &&
466 	    i_ipadm_get_flags(iph, ifname, AF_INET, &flags) != IPADM_SUCCESS &&
467 	    i_ipadm_get_flags(iph, ifname, AF_INET6, &flags) != IPADM_SUCCESS)
468 		return (IPADM_ENXIO);
469 
470 	if (ifname != NULL && !(flags & IFF_L3PROTECT))
471 		return (IPADM_SUCCESS); /* nothing to initialize */
472 
473 	nwd.ngz_iph = iph;
474 	nwd.ngz_zoneid = iph->iph_zoneid;
475 	nwd.ngz_ifname = ifname;
476 	nwd.ngz_persist_if = persist_if;
477 	nwd.ngz_s10c = i_ipadm_zone_is_s10c(iph->iph_zoneid);
478 	nwd.ngz_ipstatus = IPADM_SUCCESS;
479 	if (ifname != NULL) {
480 		if (dladm_name2info(dlh, ifname, &linkid, NULL, NULL,
481 		    NULL) != DLADM_STATUS_OK) {
482 			return (IPADM_ENXIO);
483 		}
484 		(void) i_ipadm_zone_network_attr(dlh, linkid, &nwd);
485 	} else {
486 		(void) dladm_walk_datalink_id(i_ipadm_zone_network_attr, dlh,
487 		    &nwd, DATALINK_CLASS_ALL, DATALINK_ANY_MEDIATYPE,
488 		    DLADM_OPT_PERSIST);
489 	}
490 	return (nwd.ngz_ipstatus);
491 }
492