if_ipsec.c revision c4ed3acb9f59d3498905e95d5d445c5046fdfdb6
1/*-
2 * Copyright (c) 2016-2018 Yandex LLC
3 * Copyright (c) 2016-2018 Andrey V. Elsukov <ae@FreeBSD.org>
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27
28#include <sys/cdefs.h>
29__FBSDID("$FreeBSD$");
30
31#include "opt_inet.h"
32#include "opt_inet6.h"
33
34#include <sys/param.h>
35#include <sys/systm.h>
36#include <sys/kernel.h>
37#include <sys/fnv_hash.h>
38#include <sys/jail.h>
39#include <sys/lock.h>
40#include <sys/malloc.h>
41#include <sys/mbuf.h>
42#include <sys/module.h>
43#include <sys/socket.h>
44#include <sys/sockio.h>
45#include <sys/sx.h>
46#include <sys/errno.h>
47#include <sys/sysctl.h>
48#include <sys/priv.h>
49#include <sys/proc.h>
50#include <sys/conf.h>
51
52#include <net/if.h>
53#include <net/if_var.h>
54#include <net/if_clone.h>
55#include <net/if_types.h>
56#include <net/bpf.h>
57#include <net/route.h>
58#include <net/vnet.h>
59
60#include <netinet/in.h>
61#include <netinet/in_var.h>
62#include <netinet/ip.h>
63#include <netinet/ip_encap.h>
64
65#include <netinet/ip6.h>
66#include <netinet6/in6_var.h>
67#include <netinet6/scope6_var.h>
68
69#include <netipsec/ipsec.h>
70#ifdef INET6
71#include <netipsec/ipsec6.h>
72#endif
73
74#include <net/if_ipsec.h>
75#include <netipsec/key.h>
76
77#include <security/mac/mac_framework.h>
78
79static MALLOC_DEFINE(M_IPSEC, "ipsec", "IPsec Virtual Tunnel Interface");
80static const char ipsecname[] = "ipsec";
81
82#if defined(INET) && defined(INET6)
83#define	IPSEC_SPCOUNT		4
84#else
85#define	IPSEC_SPCOUNT		2
86#endif
87
88struct ipsec_softc {
89	struct ifnet		*ifp;
90	struct secpolicy	*sp[IPSEC_SPCOUNT];
91	uint32_t		reqid;
92	u_int			family;
93	u_int			fibnum;
94
95	CK_LIST_ENTRY(ipsec_softc) idhash;
96	CK_LIST_ENTRY(ipsec_softc) srchash;
97};
98
99#define	IPSEC_RLOCK_TRACKER	struct epoch_tracker ipsec_et
100#define	IPSEC_RLOCK()	epoch_enter_preempt(net_epoch_preempt, &ipsec_et)
101#define	IPSEC_RUNLOCK()	epoch_exit_preempt(net_epoch_preempt, &ipsec_et)
102#define	IPSEC_WAIT()	epoch_wait_preempt(net_epoch_preempt)
103
104#ifndef IPSEC_HASH_SIZE
105#define	IPSEC_HASH_SIZE	(1 << 5)
106#endif
107
108CK_LIST_HEAD(ipsec_iflist, ipsec_softc);
109VNET_DEFINE_STATIC(struct ipsec_iflist *, ipsec_idhtbl) = NULL;
110#define	V_ipsec_idhtbl		VNET(ipsec_idhtbl)
111
112#ifdef INET
113VNET_DEFINE_STATIC(struct ipsec_iflist *, ipsec4_srchtbl) = NULL;
114#define	V_ipsec4_srchtbl	VNET(ipsec4_srchtbl)
115static const struct srcaddrtab *ipsec4_srctab = NULL;
116#endif
117
118#ifdef INET6
119VNET_DEFINE_STATIC(struct ipsec_iflist *, ipsec6_srchtbl) = NULL;
120#define	V_ipsec6_srchtbl	VNET(ipsec6_srchtbl)
121static const struct srcaddrtab *ipsec6_srctab = NULL;
122#endif
123
124static struct ipsec_iflist *
125ipsec_idhash(uint32_t id)
126{
127
128	return (&V_ipsec_idhtbl[fnv_32_buf(&id, sizeof(id),
129	    FNV1_32_INIT) & (IPSEC_HASH_SIZE - 1)]);
130}
131
132static struct ipsec_iflist *
133ipsec_srchash(const struct sockaddr *sa)
134{
135	uint32_t hval;
136
137	switch (sa->sa_family) {
138#ifdef INET
139	case AF_INET:
140		hval = fnv_32_buf(
141		    &((const struct sockaddr_in *)sa)->sin_addr.s_addr,
142		    sizeof(in_addr_t), FNV1_32_INIT);
143		return (&V_ipsec4_srchtbl[hval & (IPSEC_HASH_SIZE - 1)]);
144#endif
145#ifdef INET6
146	case AF_INET6:
147		hval = fnv_32_buf(
148		    &((const struct sockaddr_in6 *)sa)->sin6_addr,
149		    sizeof(struct in6_addr), FNV1_32_INIT);
150		return (&V_ipsec6_srchtbl[hval & (IPSEC_HASH_SIZE - 1)]);
151#endif
152	}
153	return (NULL);
154}
155
156/*
157 * ipsec_ioctl_sx protects from concurrent ioctls.
158 */
159static struct sx ipsec_ioctl_sx;
160SX_SYSINIT(ipsec_ioctl_sx, &ipsec_ioctl_sx, "ipsec_ioctl");
161
162static int	ipsec_init_reqid(struct ipsec_softc *);
163static int	ipsec_set_tunnel(struct ipsec_softc *, struct sockaddr *,
164    struct sockaddr *, uint32_t);
165static void	ipsec_delete_tunnel(struct ipsec_softc *);
166
167static int	ipsec_set_addresses(struct ifnet *, struct sockaddr *,
168    struct sockaddr *);
169static int	ipsec_set_reqid(struct ipsec_softc *, uint32_t);
170static void	ipsec_set_running(struct ipsec_softc *);
171
172static void	ipsec_srcaddr(void *, const struct sockaddr *, int);
173static int	ipsec_ioctl(struct ifnet *, u_long, caddr_t);
174static int	ipsec_transmit(struct ifnet *, struct mbuf *);
175static int	ipsec_output(struct ifnet *, struct mbuf *,
176    const struct sockaddr *, struct route *);
177static void	ipsec_qflush(struct ifnet *);
178static int	ipsec_clone_create(struct if_clone *, int, caddr_t);
179static void	ipsec_clone_destroy(struct ifnet *);
180
181VNET_DEFINE_STATIC(struct if_clone *, ipsec_cloner);
182#define	V_ipsec_cloner		VNET(ipsec_cloner)
183
184static int
185ipsec_clone_create(struct if_clone *ifc, int unit, caddr_t params)
186{
187	struct ipsec_softc *sc;
188	struct ifnet *ifp;
189
190	sc = malloc(sizeof(*sc), M_IPSEC, M_WAITOK | M_ZERO);
191	sc->fibnum = curthread->td_proc->p_fibnum;
192	sc->ifp = ifp = if_alloc(IFT_TUNNEL);
193	ifp->if_softc = sc;
194	if_initname(ifp, ipsecname, unit);
195
196	ifp->if_addrlen = 0;
197	ifp->if_mtu = IPSEC_MTU;
198	ifp->if_flags  = IFF_POINTOPOINT | IFF_MULTICAST;
199	ifp->if_ioctl  = ipsec_ioctl;
200	ifp->if_transmit  = ipsec_transmit;
201	ifp->if_qflush  = ipsec_qflush;
202	ifp->if_output = ipsec_output;
203	if_attach(ifp);
204	bpfattach(ifp, DLT_NULL, sizeof(uint32_t));
205
206	return (0);
207}
208
209static void
210ipsec_clone_destroy(struct ifnet *ifp)
211{
212	struct ipsec_softc *sc;
213
214	sx_xlock(&ipsec_ioctl_sx);
215	sc = ifp->if_softc;
216	ipsec_delete_tunnel(sc);
217	bpfdetach(ifp);
218	if_detach(ifp);
219	ifp->if_softc = NULL;
220	sx_xunlock(&ipsec_ioctl_sx);
221
222	IPSEC_WAIT();
223	if_free(ifp);
224	free(sc, M_IPSEC);
225}
226
227static struct ipsec_iflist *
228ipsec_hashinit(void)
229{
230	struct ipsec_iflist *hash;
231	int i;
232
233	hash = malloc(sizeof(struct ipsec_iflist) * IPSEC_HASH_SIZE,
234	    M_IPSEC, M_WAITOK);
235	for (i = 0; i < IPSEC_HASH_SIZE; i++)
236		CK_LIST_INIT(&hash[i]);
237
238	return (hash);
239}
240
241static void
242vnet_ipsec_init(const void *unused __unused)
243{
244
245	V_ipsec_idhtbl = ipsec_hashinit();
246#ifdef INET
247	V_ipsec4_srchtbl = ipsec_hashinit();
248	if (IS_DEFAULT_VNET(curvnet))
249		ipsec4_srctab = ip_encap_register_srcaddr(ipsec_srcaddr,
250		    NULL, M_WAITOK);
251#endif
252#ifdef INET6
253	V_ipsec6_srchtbl = ipsec_hashinit();
254	if (IS_DEFAULT_VNET(curvnet))
255		ipsec6_srctab = ip6_encap_register_srcaddr(ipsec_srcaddr,
256		    NULL, M_WAITOK);
257#endif
258	V_ipsec_cloner = if_clone_simple(ipsecname, ipsec_clone_create,
259	    ipsec_clone_destroy, 0);
260}
261VNET_SYSINIT(vnet_ipsec_init, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
262    vnet_ipsec_init, NULL);
263
264static void
265vnet_ipsec_uninit(const void *unused __unused)
266{
267
268	if_clone_detach(V_ipsec_cloner);
269	free(V_ipsec_idhtbl, M_IPSEC);
270#ifdef INET
271	if (IS_DEFAULT_VNET(curvnet))
272		ip_encap_unregister_srcaddr(ipsec4_srctab);
273	free(V_ipsec4_srchtbl, M_IPSEC);
274#endif
275#ifdef INET6
276	if (IS_DEFAULT_VNET(curvnet))
277		ip6_encap_unregister_srcaddr(ipsec6_srctab);
278	free(V_ipsec6_srchtbl, M_IPSEC);
279#endif
280}
281VNET_SYSUNINIT(vnet_ipsec_uninit, SI_SUB_PROTO_IFATTACHDOMAIN, SI_ORDER_ANY,
282    vnet_ipsec_uninit, NULL);
283
284static struct secpolicy *
285ipsec_getpolicy(struct ipsec_softc *sc, int dir, sa_family_t af)
286{
287
288	switch (af) {
289#ifdef INET
290	case AF_INET:
291		return (sc->sp[(dir == IPSEC_DIR_INBOUND ? 0: 1)]);
292#endif
293#ifdef INET6
294	case AF_INET6:
295		return (sc->sp[(dir == IPSEC_DIR_INBOUND ? 0: 1)
296#ifdef INET
297			+ 2
298#endif
299		]);
300#endif
301	}
302	return (NULL);
303}
304
305static struct secasindex *
306ipsec_getsaidx(struct ipsec_softc *sc, int dir, sa_family_t af)
307{
308	struct secpolicy *sp;
309
310	sp = ipsec_getpolicy(sc, dir, af);
311	if (sp == NULL)
312		return (NULL);
313	return (&sp->req[0]->saidx);
314}
315
316static int
317ipsec_transmit(struct ifnet *ifp, struct mbuf *m)
318{
319	IPSEC_RLOCK_TRACKER;
320	struct ipsec_softc *sc;
321	struct secpolicy *sp;
322	struct ip *ip;
323	uint32_t af;
324	int error;
325
326	IPSEC_RLOCK();
327#ifdef MAC
328	error = mac_ifnet_check_transmit(ifp, m);
329	if (error) {
330		m_freem(m);
331		goto err;
332	}
333#endif
334	error = ENETDOWN;
335	sc = ifp->if_softc;
336	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
337	    (ifp->if_flags & IFF_MONITOR) != 0 ||
338	    (ifp->if_flags & IFF_UP) == 0 || sc->family == 0) {
339		m_freem(m);
340		goto err;
341	}
342
343	/* Determine address family to correctly handle packet in BPF */
344	ip = mtod(m, struct ip *);
345	switch (ip->ip_v) {
346#ifdef INET
347	case IPVERSION:
348		af = AF_INET;
349		break;
350#endif
351#ifdef INET6
352	case (IPV6_VERSION >> 4):
353		af = AF_INET6;
354		break;
355#endif
356	default:
357		error = EAFNOSUPPORT;
358		m_freem(m);
359		goto err;
360	}
361
362	/*
363	 * Loop prevention.
364	 * XXX: for now just check presence of IPSEC_OUT_DONE mbuf tag.
365	 *      We can read full chain and compare destination address,
366	 *      proto and mode from xform_history with values from softc.
367	 */
368	if (m_tag_find(m, PACKET_TAG_IPSEC_OUT_DONE, NULL) != NULL) {
369		m_freem(m);
370		goto err;
371	}
372
373	sp = ipsec_getpolicy(sc, IPSEC_DIR_OUTBOUND, af);
374	key_addref(sp);
375	M_SETFIB(m, sc->fibnum);
376
377	BPF_MTAP2(ifp, &af, sizeof(af), m);
378	if_inc_counter(ifp, IFCOUNTER_OPACKETS, 1);
379	if_inc_counter(ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
380
381	switch (af) {
382#ifdef INET
383	case AF_INET:
384		error = ipsec4_process_packet(m, sp, NULL);
385		break;
386#endif
387#ifdef INET6
388	case AF_INET6:
389		error = ipsec6_process_packet(m, sp, NULL);
390		break;
391#endif
392	default:
393		panic("%s: unknown address family\n", __func__);
394	}
395err:
396	if (error != 0)
397		if_inc_counter(ifp, IFCOUNTER_OERRORS, 1);
398	IPSEC_RUNLOCK();
399	return (error);
400}
401
402static void
403ipsec_qflush(struct ifnet *ifp __unused)
404{
405
406}
407
408static int
409ipsec_output(struct ifnet *ifp, struct mbuf *m, const struct sockaddr *dst,
410	struct route *ro)
411{
412
413	return (ifp->if_transmit(ifp, m));
414}
415
416int
417ipsec_if_input(struct mbuf *m, struct secasvar *sav, uint32_t af)
418{
419	IPSEC_RLOCK_TRACKER;
420	struct secasindex *saidx;
421	struct ipsec_softc *sc;
422	struct ifnet *ifp;
423
424	if (sav->state != SADB_SASTATE_MATURE &&
425	    sav->state != SADB_SASTATE_DYING) {
426		m_freem(m);
427		return (ENETDOWN);
428	}
429
430	if (sav->sah->saidx.mode != IPSEC_MODE_TUNNEL ||
431	    sav->sah->saidx.proto != IPPROTO_ESP)
432		return (0);
433
434	IPSEC_RLOCK();
435	CK_LIST_FOREACH(sc, ipsec_idhash(sav->sah->saidx.reqid), idhash) {
436		if (sc->family == 0)
437			continue;
438		saidx = ipsec_getsaidx(sc, IPSEC_DIR_INBOUND,
439		    sav->sah->saidx.src.sa.sa_family);
440		/* SA's reqid should match reqid in SP */
441		if (saidx == NULL ||
442		    sav->sah->saidx.reqid != saidx->reqid)
443			continue;
444		/* SAH's addresses should match tunnel endpoints. */
445		if (key_sockaddrcmp(&sav->sah->saidx.dst.sa,
446		    &saidx->dst.sa, 0) != 0)
447			continue;
448		if (key_sockaddrcmp(&sav->sah->saidx.src.sa,
449		    &saidx->src.sa, 0) == 0)
450			break;
451	}
452	if (sc == NULL) {
453		IPSEC_RUNLOCK();
454		/* Tunnel was not found. Nothing to do. */
455		return (0);
456	}
457	ifp = sc->ifp;
458	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ||
459	    (ifp->if_flags & IFF_UP) == 0) {
460		IPSEC_RUNLOCK();
461		m_freem(m);
462		return (ENETDOWN);
463	}
464	/*
465	 * We found matching and working tunnel.
466	 * Set its ifnet as receiving interface.
467	 */
468	m->m_pkthdr.rcvif = ifp;
469
470	m_clrprotoflags(m);
471	M_SETFIB(m, ifp->if_fib);
472	BPF_MTAP2(ifp, &af, sizeof(af), m);
473	if_inc_counter(ifp, IFCOUNTER_IPACKETS, 1);
474	if_inc_counter(ifp, IFCOUNTER_IBYTES, m->m_pkthdr.len);
475	if ((ifp->if_flags & IFF_MONITOR) != 0) {
476		IPSEC_RUNLOCK();
477		m_freem(m);
478		return (ENETDOWN);
479	}
480	IPSEC_RUNLOCK();
481	return (0);
482}
483
484static int
485ipsec_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
486{
487	struct ifreq *ifr = (struct ifreq*)data;
488	struct sockaddr *dst, *src;
489	struct ipsec_softc *sc;
490	struct secasindex *saidx;
491#ifdef INET
492	struct sockaddr_in *sin = NULL;
493#endif
494#ifdef INET6
495	struct sockaddr_in6 *sin6 = NULL;
496#endif
497	uint32_t reqid;
498	int error;
499
500	switch (cmd) {
501	case SIOCSIFADDR:
502		ifp->if_flags |= IFF_UP;
503	case SIOCADDMULTI:
504	case SIOCDELMULTI:
505	case SIOCGIFMTU:
506	case SIOCSIFFLAGS:
507		return (0);
508	case SIOCSIFMTU:
509		if (ifr->ifr_mtu < IPSEC_MTU_MIN ||
510		    ifr->ifr_mtu > IPSEC_MTU_MAX)
511			return (EINVAL);
512		else
513			ifp->if_mtu = ifr->ifr_mtu;
514		return (0);
515	}
516	sx_xlock(&ipsec_ioctl_sx);
517	sc = ifp->if_softc;
518	/* Check that softc is still here */
519	if (sc == NULL) {
520		error = ENXIO;
521		goto bad;
522	}
523	error = 0;
524	switch (cmd) {
525	case SIOCSIFPHYADDR:
526#ifdef INET6
527	case SIOCSIFPHYADDR_IN6:
528#endif
529		error = EINVAL;
530		switch (cmd) {
531#ifdef INET
532		case SIOCSIFPHYADDR:
533			src = (struct sockaddr *)
534				&(((struct in_aliasreq *)data)->ifra_addr);
535			dst = (struct sockaddr *)
536				&(((struct in_aliasreq *)data)->ifra_dstaddr);
537			break;
538#endif
539#ifdef INET6
540		case SIOCSIFPHYADDR_IN6:
541			src = (struct sockaddr *)
542				&(((struct in6_aliasreq *)data)->ifra_addr);
543			dst = (struct sockaddr *)
544				&(((struct in6_aliasreq *)data)->ifra_dstaddr);
545			break;
546#endif
547		default:
548			goto bad;
549		}
550		/* sa_family must be equal */
551		if (src->sa_family != dst->sa_family ||
552		    src->sa_len != dst->sa_len)
553			goto bad;
554
555		/* validate sa_len */
556		switch (src->sa_family) {
557#ifdef INET
558		case AF_INET:
559			if (src->sa_len != sizeof(struct sockaddr_in))
560				goto bad;
561			break;
562#endif
563#ifdef INET6
564		case AF_INET6:
565			if (src->sa_len != sizeof(struct sockaddr_in6))
566				goto bad;
567			break;
568#endif
569		default:
570			error = EAFNOSUPPORT;
571			goto bad;
572		}
573		/* check sa_family looks sane for the cmd */
574		error = EAFNOSUPPORT;
575		switch (cmd) {
576#ifdef INET
577		case SIOCSIFPHYADDR:
578			if (src->sa_family == AF_INET)
579				break;
580			goto bad;
581#endif
582#ifdef INET6
583		case SIOCSIFPHYADDR_IN6:
584			if (src->sa_family == AF_INET6)
585				break;
586			goto bad;
587#endif
588		}
589		error = EADDRNOTAVAIL;
590		switch (src->sa_family) {
591#ifdef INET
592		case AF_INET:
593			if (satosin(src)->sin_addr.s_addr == INADDR_ANY ||
594			    satosin(dst)->sin_addr.s_addr == INADDR_ANY)
595				goto bad;
596			break;
597#endif
598#ifdef INET6
599		case AF_INET6:
600			if (IN6_IS_ADDR_UNSPECIFIED(
601			    &satosin6(src)->sin6_addr) ||
602			    IN6_IS_ADDR_UNSPECIFIED(
603			    &satosin6(dst)->sin6_addr))
604				goto bad;
605			/*
606			 * Check validity of the scope zone ID of the
607			 * addresses, and convert it into the kernel
608			 * internal form if necessary.
609			 */
610			error = sa6_embedscope(satosin6(src), 0);
611			if (error != 0)
612				goto bad;
613			error = sa6_embedscope(satosin6(dst), 0);
614			if (error != 0)
615				goto bad;
616#endif
617		};
618		error = ipsec_set_addresses(ifp, src, dst);
619		break;
620	case SIOCDIFPHYADDR:
621		ipsec_delete_tunnel(sc);
622		break;
623	case SIOCGIFPSRCADDR:
624	case SIOCGIFPDSTADDR:
625#ifdef INET6
626	case SIOCGIFPSRCADDR_IN6:
627	case SIOCGIFPDSTADDR_IN6:
628#endif
629		if (sc->family == 0) {
630			error = EADDRNOTAVAIL;
631			break;
632		}
633		saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family);
634		switch (cmd) {
635#ifdef INET
636		case SIOCGIFPSRCADDR:
637		case SIOCGIFPDSTADDR:
638			if (saidx->src.sa.sa_family != AF_INET) {
639				error = EADDRNOTAVAIL;
640				break;
641			}
642			sin = (struct sockaddr_in *)&ifr->ifr_addr;
643			memset(sin, 0, sizeof(*sin));
644			sin->sin_family = AF_INET;
645			sin->sin_len = sizeof(*sin);
646			break;
647#endif
648#ifdef INET6
649		case SIOCGIFPSRCADDR_IN6:
650		case SIOCGIFPDSTADDR_IN6:
651			if (saidx->src.sa.sa_family != AF_INET6) {
652				error = EADDRNOTAVAIL;
653				break;
654			}
655			sin6 = (struct sockaddr_in6 *)
656				&(((struct in6_ifreq *)data)->ifr_addr);
657			memset(sin6, 0, sizeof(*sin6));
658			sin6->sin6_family = AF_INET6;
659			sin6->sin6_len = sizeof(*sin6);
660			break;
661#endif
662		default:
663			error = EAFNOSUPPORT;
664		}
665		if (error == 0) {
666			switch (cmd) {
667#ifdef INET
668			case SIOCGIFPSRCADDR:
669				sin->sin_addr = saidx->src.sin.sin_addr;
670				break;
671			case SIOCGIFPDSTADDR:
672				sin->sin_addr = saidx->dst.sin.sin_addr;
673				break;
674#endif
675#ifdef INET6
676			case SIOCGIFPSRCADDR_IN6:
677				sin6->sin6_addr = saidx->src.sin6.sin6_addr;
678				break;
679			case SIOCGIFPDSTADDR_IN6:
680				sin6->sin6_addr = saidx->dst.sin6.sin6_addr;
681				break;
682#endif
683			}
684		}
685		if (error != 0)
686			break;
687		switch (cmd) {
688#ifdef INET
689		case SIOCGIFPSRCADDR:
690		case SIOCGIFPDSTADDR:
691			error = prison_if(curthread->td_ucred,
692			    (struct sockaddr *)sin);
693			if (error != 0)
694				memset(sin, 0, sizeof(*sin));
695			break;
696#endif
697#ifdef INET6
698		case SIOCGIFPSRCADDR_IN6:
699		case SIOCGIFPDSTADDR_IN6:
700			error = prison_if(curthread->td_ucred,
701			    (struct sockaddr *)sin6);
702			if (error == 0)
703				error = sa6_recoverscope(sin6);
704			if (error != 0)
705				memset(sin6, 0, sizeof(*sin6));
706#endif
707		}
708		break;
709	case SIOCGTUNFIB:
710		ifr->ifr_fib = sc->fibnum;
711		break;
712	case SIOCSTUNFIB:
713		if ((error = priv_check(curthread, PRIV_NET_SETIFFIB)) != 0)
714			break;
715		if (ifr->ifr_fib >= rt_numfibs)
716			error = EINVAL;
717		else
718			sc->fibnum = ifr->ifr_fib;
719		break;
720	case IPSECGREQID:
721		reqid = sc->reqid;
722		error = copyout(&reqid, ifr_data_get_ptr(ifr), sizeof(reqid));
723		break;
724	case IPSECSREQID:
725		if ((error = priv_check(curthread, PRIV_NET_SETIFCAP)) != 0)
726			break;
727		error = copyin(ifr_data_get_ptr(ifr), &reqid, sizeof(reqid));
728		if (error != 0)
729			break;
730		error = ipsec_set_reqid(sc, reqid);
731		break;
732	default:
733		error = EINVAL;
734		break;
735	}
736bad:
737	sx_xunlock(&ipsec_ioctl_sx);
738	return (error);
739}
740
741/*
742 * Check that ingress address belongs to local host.
743 */
744static void
745ipsec_set_running(struct ipsec_softc *sc)
746{
747	struct secasindex *saidx;
748	int localip;
749
750	saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family);
751	localip = 0;
752	switch (sc->family) {
753#ifdef INET
754	case AF_INET:
755		localip = in_localip(saidx->src.sin.sin_addr);
756		break;
757#endif
758#ifdef INET6
759	case AF_INET6:
760		localip = in6_localip(&saidx->src.sin6.sin6_addr);
761		break;
762#endif
763	}
764	if (localip != 0)
765		sc->ifp->if_drv_flags |= IFF_DRV_RUNNING;
766	else
767		sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
768}
769
770/*
771 * ifaddr_event handler.
772 * Clear IFF_DRV_RUNNING flag when ingress address disappears to prevent
773 * source address spoofing.
774 */
775static void
776ipsec_srcaddr(void *arg __unused, const struct sockaddr *sa,
777    int event __unused)
778{
779	struct ipsec_softc *sc;
780	struct secasindex *saidx;
781
782	MPASS(in_epoch(net_epoch_preempt));
783	CK_LIST_FOREACH(sc, ipsec_srchash(sa), srchash) {
784		if (sc->family == 0)
785			continue;
786		saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sa->sa_family);
787		if (saidx == NULL ||
788		    key_sockaddrcmp(&saidx->src.sa, sa, 0) != 0)
789			continue;
790		ipsec_set_running(sc);
791	}
792}
793
794/*
795 * Allocate new private security policies for tunneling interface.
796 * Each tunneling interface has following security policies for
797 * both AF:
798 *   0.0.0.0/0[any] 0.0.0.0/0[any] -P in \
799 *	ipsec esp/tunnel/RemoteIP-LocalIP/unique:reqid
800 *   0.0.0.0/0[any] 0.0.0.0/0[any] -P out \
801 *	ipsec esp/tunnel/LocalIP-RemoteIP/unique:reqid
802 */
803static int
804ipsec_newpolicies(struct ipsec_softc *sc, struct secpolicy *sp[IPSEC_SPCOUNT],
805    const struct sockaddr *src, const struct sockaddr *dst, uint32_t reqid)
806{
807	struct ipsecrequest *isr;
808	int i;
809
810	memset(sp, 0, sizeof(struct secpolicy *) * IPSEC_SPCOUNT);
811	for (i = 0; i < IPSEC_SPCOUNT; i++) {
812		if ((sp[i] = key_newsp()) == NULL)
813			goto fail;
814		if ((isr = ipsec_newisr()) == NULL)
815			goto fail;
816
817		sp[i]->policy = IPSEC_POLICY_IPSEC;
818		sp[i]->state = IPSEC_SPSTATE_DEAD;
819		sp[i]->req[sp[i]->tcount++] = isr;
820		sp[i]->created = time_second;
821		/* Use priority field to store if_index */
822		sp[i]->priority = sc->ifp->if_index;
823		isr->level = IPSEC_LEVEL_UNIQUE;
824		isr->saidx.proto = IPPROTO_ESP;
825		isr->saidx.mode = IPSEC_MODE_TUNNEL;
826		isr->saidx.reqid = reqid;
827		if (i % 2 == 0) {
828			sp[i]->spidx.dir = IPSEC_DIR_INBOUND;
829			bcopy(src, &isr->saidx.dst, src->sa_len);
830			bcopy(dst, &isr->saidx.src, dst->sa_len);
831		} else {
832			sp[i]->spidx.dir = IPSEC_DIR_OUTBOUND;
833			bcopy(src, &isr->saidx.src, src->sa_len);
834			bcopy(dst, &isr->saidx.dst, dst->sa_len);
835		}
836		sp[i]->spidx.ul_proto = IPSEC_ULPROTO_ANY;
837#ifdef INET
838		if (i < 2) {
839			sp[i]->spidx.src.sa.sa_family =
840			    sp[i]->spidx.dst.sa.sa_family = AF_INET;
841			sp[i]->spidx.src.sa.sa_len =
842			    sp[i]->spidx.dst.sa.sa_len =
843			    sizeof(struct sockaddr_in);
844			continue;
845		}
846#endif
847#ifdef INET6
848		sp[i]->spidx.src.sa.sa_family =
849		    sp[i]->spidx.dst.sa.sa_family = AF_INET6;
850		sp[i]->spidx.src.sa.sa_len =
851		    sp[i]->spidx.dst.sa.sa_len = sizeof(struct sockaddr_in6);
852#endif
853	}
854	return (0);
855fail:
856	for (i = 0; i < IPSEC_SPCOUNT; i++)
857		key_freesp(&sp[i]);
858	return (ENOMEM);
859}
860
861static int
862ipsec_check_reqid(uint32_t reqid)
863{
864	struct ipsec_softc *sc;
865
866	sx_assert(&ipsec_ioctl_sx, SA_XLOCKED);
867	CK_LIST_FOREACH(sc, ipsec_idhash(reqid), idhash) {
868		if (sc->reqid == reqid)
869			return (EEXIST);
870	}
871	return (0);
872}
873
874/*
875 * We use key_newreqid() to automatically obtain unique reqid.
876 * Then we check that given id is unique, i.e. it is not used by
877 * another if_ipsec(4) interface. This macro limits the number of
878 * tries to get unique id.
879 */
880#define	IPSEC_REQID_TRYCNT	64
881static int
882ipsec_init_reqid(struct ipsec_softc *sc)
883{
884	uint32_t reqid;
885	int trycount;
886
887	sx_assert(&ipsec_ioctl_sx, SA_XLOCKED);
888	if (sc->reqid != 0) /* already initialized */
889		return (0);
890
891	trycount = IPSEC_REQID_TRYCNT;
892	while (--trycount > 0) {
893		reqid = key_newreqid();
894		if (ipsec_check_reqid(reqid) == 0)
895			break;
896	}
897	if (trycount == 0)
898		return (EEXIST);
899	sc->reqid = reqid;
900	CK_LIST_INSERT_HEAD(ipsec_idhash(reqid), sc, idhash);
901	return (0);
902}
903
904/*
905 * Set or update reqid for given tunneling interface.
906 * When specified reqid is zero, generate new one.
907 * We are protected by ioctl_sx lock from concurrent id generation.
908 * Also softc would not disappear while we hold ioctl_sx lock.
909 */
910static int
911ipsec_set_reqid(struct ipsec_softc *sc, uint32_t reqid)
912{
913	struct secasindex *saidx;
914
915	sx_assert(&ipsec_ioctl_sx, SA_XLOCKED);
916
917	if (sc->reqid == reqid && reqid != 0)
918		return (0);
919
920	if (reqid != 0) {
921		/* Check that specified reqid doesn't exist */
922		if (ipsec_check_reqid(reqid) != 0)
923			return (EEXIST);
924		if (sc->reqid != 0) {
925			CK_LIST_REMOVE(sc, idhash);
926			IPSEC_WAIT();
927		}
928		sc->reqid = reqid;
929		CK_LIST_INSERT_HEAD(ipsec_idhash(reqid), sc, idhash);
930	} else {
931		/* Generate new reqid */
932		if (ipsec_init_reqid(sc) != 0)
933			return (EEXIST);
934	}
935
936	/* Tunnel isn't fully configured, just return. */
937	if (sc->family == 0)
938		return (0);
939
940	saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND, sc->family);
941	KASSERT(saidx != NULL,
942	    ("saidx is NULL, but family is %d", sc->family));
943	return (ipsec_set_tunnel(sc, &saidx->src.sa, &saidx->dst.sa,
944	    sc->reqid));
945}
946
947/*
948 * Set tunnel endpoints addresses.
949 */
950static int
951ipsec_set_addresses(struct ifnet *ifp, struct sockaddr *src,
952    struct sockaddr *dst)
953{
954	struct ipsec_softc *sc, *tsc;
955	struct secasindex *saidx;
956
957	sx_assert(&ipsec_ioctl_sx, SA_XLOCKED);
958
959	sc = ifp->if_softc;
960	if (sc->family != 0) {
961		saidx = ipsec_getsaidx(sc, IPSEC_DIR_OUTBOUND,
962		    src->sa_family);
963		if (saidx != NULL && saidx->reqid == sc->reqid &&
964		    key_sockaddrcmp(&saidx->src.sa, src, 0) == 0 &&
965		    key_sockaddrcmp(&saidx->dst.sa, dst, 0) == 0)
966			return (0); /* Nothing has been changed. */
967
968	}
969	/* Check that given addresses aren't already configured */
970	CK_LIST_FOREACH(tsc, ipsec_srchash(src), srchash) {
971		if (tsc == sc)
972			continue;
973		MPASS(tsc->family == src->sa_family);
974		saidx = ipsec_getsaidx(tsc, IPSEC_DIR_OUTBOUND, tsc->family);
975		if (key_sockaddrcmp(&saidx->src.sa, src, 0) == 0 &&
976		    key_sockaddrcmp(&saidx->dst.sa, dst, 0) == 0) {
977			/* We already have tunnel with such addresses */
978			return (EADDRNOTAVAIL);
979		}
980	}
981	/* If reqid is not set, generate new one. */
982	if (ipsec_init_reqid(sc) != 0)
983		return (EEXIST);
984	return (ipsec_set_tunnel(sc, src, dst, sc->reqid));
985}
986
987static int
988ipsec_set_tunnel(struct ipsec_softc *sc, struct sockaddr *src,
989    struct sockaddr *dst, uint32_t reqid)
990{
991	struct secpolicy *sp[IPSEC_SPCOUNT];
992	int i;
993
994	sx_assert(&ipsec_ioctl_sx, SA_XLOCKED);
995
996	/* Allocate SP with new addresses. */
997	if (ipsec_newpolicies(sc, sp, src, dst, reqid) == 0) {
998		/* Add new policies to SPDB */
999		if (key_register_ifnet(sp, IPSEC_SPCOUNT) != 0) {
1000			for (i = 0; i < IPSEC_SPCOUNT; i++)
1001				key_freesp(&sp[i]);
1002			return (EAGAIN);
1003		}
1004		if (sc->family != 0)
1005			ipsec_delete_tunnel(sc);
1006		for (i = 0; i < IPSEC_SPCOUNT; i++)
1007			sc->sp[i] = sp[i];
1008		sc->family = src->sa_family;
1009		CK_LIST_INSERT_HEAD(ipsec_srchash(src), sc, srchash);
1010	} else {
1011		sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1012		return (ENOMEM);
1013	}
1014	ipsec_set_running(sc);
1015	return (0);
1016}
1017
1018static void
1019ipsec_delete_tunnel(struct ipsec_softc *sc)
1020{
1021	int i;
1022
1023	sx_assert(&ipsec_ioctl_sx, SA_XLOCKED);
1024
1025	sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
1026	if (sc->family != 0) {
1027		CK_LIST_REMOVE(sc, srchash);
1028		IPSEC_WAIT();
1029
1030		/*
1031		 * Make sure that ipsec_if_input() will not do access
1032		 * to softc's policies.
1033		 */
1034		sc->family = 0;
1035		IPSEC_WAIT();
1036
1037		key_unregister_ifnet(sc->sp, IPSEC_SPCOUNT);
1038		for (i = 0; i < IPSEC_SPCOUNT; i++)
1039			key_freesp(&sc->sp[i]);
1040	}
1041}
1042