1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the project nor the names of its contributors
16 *    may be used to endorse or promote products derived from this software
17 *    without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED.  IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 *	$KAME: nd6_rtr.c,v 1.111 2001/04/27 01:37:15 jinmei Exp $
32 */
33
34#include <sys/cdefs.h>
35__FBSDID("$FreeBSD$");
36
37#include "opt_inet.h"
38#include "opt_inet6.h"
39
40#include <sys/param.h>
41#include <sys/systm.h>
42#include <sys/malloc.h>
43#include <sys/mbuf.h>
44#include <sys/refcount.h>
45#include <sys/socket.h>
46#include <sys/sockio.h>
47#include <sys/time.h>
48#include <sys/kernel.h>
49#include <sys/lock.h>
50#include <sys/errno.h>
51#include <sys/rmlock.h>
52#include <sys/rwlock.h>
53#include <sys/sysctl.h>
54#include <sys/syslog.h>
55#include <sys/queue.h>
56
57#include <net/if.h>
58#include <net/if_var.h>
59#include <net/if_types.h>
60#include <net/if_dl.h>
61#include <net/route.h>
62#include <net/route_var.h>
63#include <net/radix.h>
64#include <net/vnet.h>
65
66#include <netinet/in.h>
67#include <net/if_llatbl.h>
68#include <netinet6/in6_var.h>
69#include <netinet6/in6_ifattach.h>
70#include <netinet/ip6.h>
71#include <netinet6/ip6_var.h>
72#include <netinet6/nd6.h>
73#include <netinet/icmp6.h>
74#include <netinet6/scope6_var.h>
75
76static struct nd_defrouter *defrtrlist_update(struct nd_defrouter *);
77static int prelist_update(struct nd_prefixctl *, struct nd_defrouter *,
78    struct mbuf *, int);
79static int nd6_prefix_onlink(struct nd_prefix *);
80
81TAILQ_HEAD(nd6_drhead, nd_defrouter);
82VNET_DEFINE_STATIC(struct nd6_drhead, nd6_defrouter);
83#define	V_nd6_defrouter			VNET(nd6_defrouter)
84
85VNET_DECLARE(int, nd6_recalc_reachtm_interval);
86#define	V_nd6_recalc_reachtm_interval	VNET(nd6_recalc_reachtm_interval)
87
88VNET_DEFINE_STATIC(struct ifnet *, nd6_defifp);
89VNET_DEFINE(int, nd6_defifindex);
90#define	V_nd6_defifp			VNET(nd6_defifp)
91
92VNET_DEFINE(int, ip6_use_tempaddr) = 0;
93
94VNET_DEFINE(int, ip6_desync_factor);
95VNET_DEFINE(u_int32_t, ip6_temp_preferred_lifetime) = DEF_TEMP_PREFERRED_LIFETIME;
96VNET_DEFINE(u_int32_t, ip6_temp_valid_lifetime) = DEF_TEMP_VALID_LIFETIME;
97
98VNET_DEFINE(int, ip6_temp_regen_advance) = TEMPADDR_REGEN_ADVANCE;
99
100#ifdef EXPERIMENTAL
101VNET_DEFINE(int, nd6_ignore_ipv6_only_ra) = 1;
102#endif
103
104SYSCTL_DECL(_net_inet6_icmp6);
105
106/* RTPREF_MEDIUM has to be 0! */
107#define RTPREF_HIGH	1
108#define RTPREF_MEDIUM	0
109#define RTPREF_LOW	(-1)
110#define RTPREF_RESERVED	(-2)
111#define RTPREF_INVALID	(-3)	/* internal */
112
113static void
114defrouter_ref(struct nd_defrouter *dr)
115{
116
117	refcount_acquire(&dr->refcnt);
118}
119
120void
121defrouter_rele(struct nd_defrouter *dr)
122{
123
124	if (refcount_release(&dr->refcnt))
125		free(dr, M_IP6NDP);
126}
127
128/*
129 * Remove a router from the global list and optionally stash it in a
130 * caller-supplied queue.
131 */
132static void
133defrouter_unlink(struct nd_defrouter *dr, struct nd6_drhead *drq)
134{
135
136	ND6_WLOCK_ASSERT();
137
138	TAILQ_REMOVE(&V_nd6_defrouter, dr, dr_entry);
139	V_nd6_list_genid++;
140	if (drq != NULL)
141		TAILQ_INSERT_TAIL(drq, dr, dr_entry);
142}
143
144/*
145 * Receive Router Solicitation Message - just for routers.
146 * Router solicitation/advertisement is mostly managed by userland program
147 * (rtadvd) so here we have no function like nd6_ra_output().
148 *
149 * Based on RFC 2461
150 */
151void
152nd6_rs_input(struct mbuf *m, int off, int icmp6len)
153{
154	struct ifnet *ifp;
155	struct ip6_hdr *ip6;
156	struct nd_router_solicit *nd_rs;
157	struct in6_addr saddr6;
158	union nd_opts ndopts;
159	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
160	char *lladdr;
161	int lladdrlen;
162
163	ifp = m->m_pkthdr.rcvif;
164
165	/*
166	 * Accept RS only when V_ip6_forwarding=1 and the interface has
167	 * no ND6_IFF_ACCEPT_RTADV.
168	 */
169	if (!V_ip6_forwarding || ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV)
170		goto freeit;
171
172	/* RFC 6980: Nodes MUST silently ignore fragments */
173	if(m->m_flags & M_FRAGMENTED)
174		goto freeit;
175
176	/* Sanity checks */
177	ip6 = mtod(m, struct ip6_hdr *);
178	if (ip6->ip6_hlim != 255) {
179		nd6log((LOG_ERR,
180		    "%s: invalid hlim (%d) from %s to %s on %s\n", __func__,
181		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
182		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
183		goto bad;
184	}
185
186	/*
187	 * Don't update the neighbor cache, if src = ::.
188	 * This indicates that the src has no IP address assigned yet.
189	 */
190	saddr6 = ip6->ip6_src;
191	if (IN6_IS_ADDR_UNSPECIFIED(&saddr6))
192		goto freeit;
193
194	if (m->m_len < off + icmp6len) {
195		m = m_pullup(m, off + icmp6len);
196		if (m == NULL) {
197			IP6STAT_INC(ip6s_exthdrtoolong);
198			return;
199		}
200	}
201	ip6 = mtod(m, struct ip6_hdr *);
202	nd_rs = (struct nd_router_solicit *)((caddr_t)ip6 + off);
203
204	icmp6len -= sizeof(*nd_rs);
205	nd6_option_init(nd_rs + 1, icmp6len, &ndopts);
206	if (nd6_options(&ndopts) < 0) {
207		nd6log((LOG_INFO,
208		    "%s: invalid ND option, ignored\n", __func__));
209		/* nd6_options have incremented stats */
210		goto freeit;
211	}
212
213	lladdr = NULL;
214	lladdrlen = 0;
215	if (ndopts.nd_opts_src_lladdr) {
216		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
217		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
218	}
219
220	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
221		nd6log((LOG_INFO,
222		    "%s: lladdrlen mismatch for %s (if %d, RS packet %d)\n",
223		    __func__, ip6_sprintf(ip6bufs, &saddr6),
224		    ifp->if_addrlen, lladdrlen - 2));
225		goto bad;
226	}
227
228	nd6_cache_lladdr(ifp, &saddr6, lladdr, lladdrlen, ND_ROUTER_SOLICIT, 0);
229
230 freeit:
231	m_freem(m);
232	return;
233
234 bad:
235	ICMP6STAT_INC(icp6s_badrs);
236	m_freem(m);
237}
238
239#ifdef EXPERIMENTAL
240/*
241 * An initial update routine for draft-ietf-6man-ipv6only-flag.
242 * We need to iterate over all default routers for the given
243 * interface to see whether they are all advertising the "S"
244 * (IPv6-Only) flag.  If they do set, otherwise unset, the
245 * interface flag we later use to filter on.
246 */
247static void
248defrtr_ipv6_only_ifp(struct ifnet *ifp)
249{
250	struct nd_defrouter *dr;
251	bool ipv6_only, ipv6_only_old;
252#ifdef INET
253	struct epoch_tracker et;
254	struct ifaddr *ifa;
255	bool has_ipv4_addr;
256#endif
257
258	if (V_nd6_ignore_ipv6_only_ra != 0)
259		return;
260
261	ipv6_only = true;
262	ND6_RLOCK();
263	TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry)
264		if (dr->ifp == ifp &&
265		    (dr->raflags & ND_RA_FLAG_IPV6_ONLY) == 0)
266			ipv6_only = false;
267	ND6_RUNLOCK();
268
269	IF_AFDATA_WLOCK(ifp);
270	ipv6_only_old = ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY;
271	IF_AFDATA_WUNLOCK(ifp);
272
273	/* If nothing changed, we have an early exit. */
274	if (ipv6_only == ipv6_only_old)
275		return;
276
277#ifdef INET
278	/*
279	 * Should we want to set the IPV6-ONLY flag, check if the
280	 * interface has a non-0/0 and non-link-local IPv4 address
281	 * configured on it.  If it has we will assume working
282	 * IPv4 operations and will clear the interface flag.
283	 */
284	has_ipv4_addr = false;
285	if (ipv6_only) {
286		NET_EPOCH_ENTER(et);
287		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
288			if (ifa->ifa_addr->sa_family != AF_INET)
289				continue;
290			if (in_canforward(
291			    satosin(ifa->ifa_addr)->sin_addr)) {
292				has_ipv4_addr = true;
293				break;
294			}
295		}
296		NET_EPOCH_EXIT(et);
297	}
298	if (ipv6_only && has_ipv4_addr) {
299		log(LOG_NOTICE, "%s rcvd RA w/ IPv6-Only flag set but has IPv4 "
300		    "configured, ignoring IPv6-Only flag.\n", ifp->if_xname);
301		ipv6_only = false;
302	}
303#endif
304
305	IF_AFDATA_WLOCK(ifp);
306	if (ipv6_only)
307		ND_IFINFO(ifp)->flags |= ND6_IFF_IPV6_ONLY;
308	else
309		ND_IFINFO(ifp)->flags &= ~ND6_IFF_IPV6_ONLY;
310	IF_AFDATA_WUNLOCK(ifp);
311
312#ifdef notyet
313	/* Send notification of flag change. */
314#endif
315}
316
317static void
318defrtr_ipv6_only_ipf_down(struct ifnet *ifp)
319{
320
321	IF_AFDATA_WLOCK(ifp);
322	ND_IFINFO(ifp)->flags &= ~ND6_IFF_IPV6_ONLY;
323	IF_AFDATA_WUNLOCK(ifp);
324}
325#endif	/* EXPERIMENTAL */
326
327void
328nd6_ifnet_link_event(void *arg __unused, struct ifnet *ifp, int linkstate)
329{
330
331	/*
332	 * XXX-BZ we might want to trigger re-evaluation of our default router
333	 * availability. E.g., on link down the default router might be
334	 * unreachable but a different interface might still have connectivity.
335	 */
336
337#ifdef EXPERIMENTAL
338	if (linkstate == LINK_STATE_DOWN)
339		defrtr_ipv6_only_ipf_down(ifp);
340#endif
341}
342
343/*
344 * Receive Router Advertisement Message.
345 *
346 * Based on RFC 2461
347 * TODO: on-link bit on prefix information
348 * TODO: ND_RA_FLAG_{OTHER,MANAGED} processing
349 */
350void
351nd6_ra_input(struct mbuf *m, int off, int icmp6len)
352{
353	struct ifnet *ifp;
354	struct nd_ifinfo *ndi;
355	struct ip6_hdr *ip6;
356	struct nd_router_advert *nd_ra;
357	struct in6_addr saddr6;
358	struct nd_defrouter *dr;
359	union nd_opts ndopts;
360	char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN];
361	int mcast;
362
363	/*
364	 * We only accept RAs only when the per-interface flag
365	 * ND6_IFF_ACCEPT_RTADV is on the receiving interface.
366	 */
367	ifp = m->m_pkthdr.rcvif;
368	ndi = ND_IFINFO(ifp);
369	if (!(ndi->flags & ND6_IFF_ACCEPT_RTADV))
370		goto freeit;
371
372	/* RFC 6980: Nodes MUST silently ignore fragments */
373	if(m->m_flags & M_FRAGMENTED)
374		goto freeit;
375
376	ip6 = mtod(m, struct ip6_hdr *);
377	if (ip6->ip6_hlim != 255) {
378		nd6log((LOG_ERR,
379		    "%s: invalid hlim (%d) from %s to %s on %s\n", __func__,
380		    ip6->ip6_hlim, ip6_sprintf(ip6bufs, &ip6->ip6_src),
381		    ip6_sprintf(ip6bufd, &ip6->ip6_dst), if_name(ifp)));
382		goto bad;
383	}
384
385	saddr6 = ip6->ip6_src;
386	if (!IN6_IS_ADDR_LINKLOCAL(&saddr6)) {
387		nd6log((LOG_ERR,
388		    "%s: src %s is not link-local\n", __func__,
389		    ip6_sprintf(ip6bufs, &saddr6)));
390		goto bad;
391	}
392
393	if (m->m_len < off + icmp6len) {
394		m = m_pullup(m, off + icmp6len);
395		if (m == NULL) {
396			IP6STAT_INC(ip6s_exthdrtoolong);
397			return;
398		}
399	}
400	ip6 = mtod(m, struct ip6_hdr *);
401	nd_ra = (struct nd_router_advert *)((caddr_t)ip6 + off);
402
403	icmp6len -= sizeof(*nd_ra);
404	nd6_option_init(nd_ra + 1, icmp6len, &ndopts);
405	if (nd6_options(&ndopts) < 0) {
406		nd6log((LOG_INFO,
407		    "%s: invalid ND option, ignored\n", __func__));
408		/* nd6_options have incremented stats */
409		goto freeit;
410	}
411
412	mcast = 0;
413	dr = NULL;
414    {
415	struct nd_defrouter dr0;
416	u_int32_t advreachable = nd_ra->nd_ra_reachable;
417
418	/* remember if this is a multicasted advertisement */
419	if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst))
420		mcast = 1;
421
422	bzero(&dr0, sizeof(dr0));
423	dr0.rtaddr = saddr6;
424	dr0.raflags = nd_ra->nd_ra_flags_reserved;
425	/*
426	 * Effectively-disable routes from RA messages when
427	 * ND6_IFF_NO_RADR enabled on the receiving interface or
428	 * (ip6.forwarding == 1 && ip6.rfc6204w3 != 1).
429	 */
430	if (ndi->flags & ND6_IFF_NO_RADR)
431		dr0.rtlifetime = 0;
432	else if (V_ip6_forwarding && !V_ip6_rfc6204w3)
433		dr0.rtlifetime = 0;
434	else
435		dr0.rtlifetime = ntohs(nd_ra->nd_ra_router_lifetime);
436	dr0.expire = time_uptime + dr0.rtlifetime;
437	dr0.ifp = ifp;
438	/* unspecified or not? (RFC 2461 6.3.4) */
439	if (advreachable) {
440		advreachable = ntohl(advreachable);
441		if (advreachable <= MAX_REACHABLE_TIME &&
442		    ndi->basereachable != advreachable) {
443			ndi->basereachable = advreachable;
444			ndi->reachable = ND_COMPUTE_RTIME(ndi->basereachable);
445			ndi->recalctm = V_nd6_recalc_reachtm_interval; /* reset */
446		}
447	}
448	if (nd_ra->nd_ra_retransmit)
449		ndi->retrans = ntohl(nd_ra->nd_ra_retransmit);
450	if (nd_ra->nd_ra_curhoplimit) {
451		if (ndi->chlim < nd_ra->nd_ra_curhoplimit)
452			ndi->chlim = nd_ra->nd_ra_curhoplimit;
453		else if (ndi->chlim != nd_ra->nd_ra_curhoplimit) {
454			log(LOG_ERR, "RA with a lower CurHopLimit sent from "
455			    "%s on %s (current = %d, received = %d). "
456			    "Ignored.\n", ip6_sprintf(ip6bufs, &ip6->ip6_src),
457			    if_name(ifp), ndi->chlim, nd_ra->nd_ra_curhoplimit);
458		}
459	}
460	dr = defrtrlist_update(&dr0);
461#ifdef EXPERIMENTAL
462	defrtr_ipv6_only_ifp(ifp);
463#endif
464    }
465
466	/*
467	 * prefix
468	 */
469	if (ndopts.nd_opts_pi) {
470		struct nd_opt_hdr *pt;
471		struct nd_opt_prefix_info *pi = NULL;
472		struct nd_prefixctl pr;
473
474		for (pt = (struct nd_opt_hdr *)ndopts.nd_opts_pi;
475		     pt <= (struct nd_opt_hdr *)ndopts.nd_opts_pi_end;
476		     pt = (struct nd_opt_hdr *)((caddr_t)pt +
477						(pt->nd_opt_len << 3))) {
478			if (pt->nd_opt_type != ND_OPT_PREFIX_INFORMATION)
479				continue;
480			pi = (struct nd_opt_prefix_info *)pt;
481
482			if (pi->nd_opt_pi_len != 4) {
483				nd6log((LOG_INFO,
484				    "%s: invalid option len %d for prefix "
485				    "information option, ignored\n", __func__,
486				    pi->nd_opt_pi_len));
487				continue;
488			}
489
490			if (128 < pi->nd_opt_pi_prefix_len) {
491				nd6log((LOG_INFO,
492				    "%s: invalid prefix len %d for prefix "
493				    "information option, ignored\n", __func__,
494				    pi->nd_opt_pi_prefix_len));
495				continue;
496			}
497
498			if (IN6_IS_ADDR_MULTICAST(&pi->nd_opt_pi_prefix)
499			 || IN6_IS_ADDR_LINKLOCAL(&pi->nd_opt_pi_prefix)) {
500				nd6log((LOG_INFO,
501				    "%s: invalid prefix %s, ignored\n",
502				    __func__, ip6_sprintf(ip6bufs,
503					&pi->nd_opt_pi_prefix)));
504				continue;
505			}
506
507			bzero(&pr, sizeof(pr));
508			pr.ndpr_prefix.sin6_family = AF_INET6;
509			pr.ndpr_prefix.sin6_len = sizeof(pr.ndpr_prefix);
510			pr.ndpr_prefix.sin6_addr = pi->nd_opt_pi_prefix;
511			pr.ndpr_ifp = (struct ifnet *)m->m_pkthdr.rcvif;
512
513			pr.ndpr_raf_onlink = (pi->nd_opt_pi_flags_reserved &
514			    ND_OPT_PI_FLAG_ONLINK) ? 1 : 0;
515			pr.ndpr_raf_auto = (pi->nd_opt_pi_flags_reserved &
516			    ND_OPT_PI_FLAG_AUTO) ? 1 : 0;
517			pr.ndpr_plen = pi->nd_opt_pi_prefix_len;
518			pr.ndpr_vltime = ntohl(pi->nd_opt_pi_valid_time);
519			pr.ndpr_pltime = ntohl(pi->nd_opt_pi_preferred_time);
520			(void)prelist_update(&pr, dr, m, mcast);
521		}
522	}
523	if (dr != NULL) {
524		defrouter_rele(dr);
525		dr = NULL;
526	}
527
528	/*
529	 * MTU
530	 */
531	if (ndopts.nd_opts_mtu && ndopts.nd_opts_mtu->nd_opt_mtu_len == 1) {
532		u_long mtu;
533		u_long maxmtu;
534
535		mtu = (u_long)ntohl(ndopts.nd_opts_mtu->nd_opt_mtu_mtu);
536
537		/* lower bound */
538		if (mtu < IPV6_MMTU) {
539			nd6log((LOG_INFO, "%s: bogus mtu option mtu=%lu sent "
540			    "from %s, ignoring\n", __func__,
541			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src)));
542			goto skip;
543		}
544
545		/* upper bound */
546		maxmtu = (ndi->maxmtu && ndi->maxmtu < ifp->if_mtu)
547		    ? ndi->maxmtu : ifp->if_mtu;
548		if (mtu <= maxmtu) {
549			int change = (ndi->linkmtu != mtu);
550
551			ndi->linkmtu = mtu;
552			if (change) {
553				/* in6_maxmtu may change */
554				in6_setmaxmtu();
555				rt_updatemtu(ifp);
556			}
557		} else {
558			nd6log((LOG_INFO, "%s: bogus mtu=%lu sent from %s; "
559			    "exceeds maxmtu %lu, ignoring\n", __func__,
560			    mtu, ip6_sprintf(ip6bufs, &ip6->ip6_src), maxmtu));
561		}
562	}
563
564 skip:
565
566	/*
567	 * Source link layer address
568	 */
569    {
570	char *lladdr = NULL;
571	int lladdrlen = 0;
572
573	if (ndopts.nd_opts_src_lladdr) {
574		lladdr = (char *)(ndopts.nd_opts_src_lladdr + 1);
575		lladdrlen = ndopts.nd_opts_src_lladdr->nd_opt_len << 3;
576	}
577
578	if (lladdr && ((ifp->if_addrlen + 2 + 7) & ~7) != lladdrlen) {
579		nd6log((LOG_INFO,
580		    "%s: lladdrlen mismatch for %s (if %d, RA packet %d)\n",
581		    __func__, ip6_sprintf(ip6bufs, &saddr6),
582		    ifp->if_addrlen, lladdrlen - 2));
583		goto bad;
584	}
585
586	nd6_cache_lladdr(ifp, &saddr6, lladdr,
587	    lladdrlen, ND_ROUTER_ADVERT, 0);
588
589	/*
590	 * Installing a link-layer address might change the state of the
591	 * router's neighbor cache, which might also affect our on-link
592	 * detection of adveritsed prefixes.
593	 */
594	pfxlist_onlink_check();
595    }
596
597 freeit:
598	m_freem(m);
599	return;
600
601 bad:
602	ICMP6STAT_INC(icp6s_badra);
603	m_freem(m);
604}
605
606/* tell the change to user processes watching the routing socket. */
607static void
608nd6_rtmsg(int cmd, struct rtentry *rt)
609{
610
611	rt_routemsg(cmd, rt, rt->rt_ifp, 0, rt->rt_fibnum);
612}
613
614/* PFXRTR */
615static struct nd_pfxrouter *
616pfxrtr_lookup(struct nd_prefix *pr, struct nd_defrouter *dr)
617{
618	struct nd_pfxrouter *search;
619
620	ND6_LOCK_ASSERT();
621
622	LIST_FOREACH(search, &pr->ndpr_advrtrs, pfr_entry) {
623		if (search->router == dr)
624			break;
625	}
626	return (search);
627}
628
629static void
630pfxrtr_add(struct nd_prefix *pr, struct nd_defrouter *dr)
631{
632	struct nd_pfxrouter *new;
633	bool update;
634
635	ND6_UNLOCK_ASSERT();
636
637	ND6_RLOCK();
638	if (pfxrtr_lookup(pr, dr) != NULL) {
639		ND6_RUNLOCK();
640		return;
641	}
642	ND6_RUNLOCK();
643
644	new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
645	if (new == NULL)
646		return;
647	defrouter_ref(dr);
648	new->router = dr;
649
650	ND6_WLOCK();
651	if (pfxrtr_lookup(pr, dr) == NULL) {
652		LIST_INSERT_HEAD(&pr->ndpr_advrtrs, new, pfr_entry);
653		update = true;
654	} else {
655		/* We lost a race to add the reference. */
656		defrouter_rele(dr);
657		free(new, M_IP6NDP);
658		update = false;
659	}
660	ND6_WUNLOCK();
661
662	if (update)
663		pfxlist_onlink_check();
664}
665
666static void
667pfxrtr_del(struct nd_pfxrouter *pfr)
668{
669
670	ND6_WLOCK_ASSERT();
671
672	LIST_REMOVE(pfr, pfr_entry);
673	defrouter_rele(pfr->router);
674	free(pfr, M_IP6NDP);
675}
676
677
678/* Default router list processing sub routines. */
679static void
680defrouter_addreq(struct nd_defrouter *new)
681{
682	struct sockaddr_in6 def, mask, gate;
683	struct rtentry *newrt = NULL;
684	int error;
685
686	bzero(&def, sizeof(def));
687	bzero(&mask, sizeof(mask));
688	bzero(&gate, sizeof(gate));
689
690	def.sin6_len = mask.sin6_len = gate.sin6_len =
691	    sizeof(struct sockaddr_in6);
692	def.sin6_family = gate.sin6_family = AF_INET6;
693	gate.sin6_addr = new->rtaddr;
694
695	error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def,
696	    (struct sockaddr *)&gate, (struct sockaddr *)&mask,
697	    RTF_GATEWAY, &newrt, new->ifp->if_fib);
698	if (newrt) {
699		nd6_rtmsg(RTM_ADD, newrt); /* tell user process */
700		RTFREE(newrt);
701	}
702	if (error == 0)
703		new->installed = 1;
704}
705
706/*
707 * Remove the default route for a given router.
708 * This is just a subroutine function for defrouter_select_fib(), and
709 * should not be called from anywhere else.
710 */
711static void
712defrouter_delreq(struct nd_defrouter *dr)
713{
714	struct sockaddr_in6 def, mask, gate;
715	struct rtentry *oldrt = NULL;
716
717	bzero(&def, sizeof(def));
718	bzero(&mask, sizeof(mask));
719	bzero(&gate, sizeof(gate));
720
721	def.sin6_len = mask.sin6_len = gate.sin6_len =
722	    sizeof(struct sockaddr_in6);
723	def.sin6_family = gate.sin6_family = AF_INET6;
724	gate.sin6_addr = dr->rtaddr;
725
726	in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def,
727	    (struct sockaddr *)&gate,
728	    (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, dr->ifp->if_fib);
729	if (oldrt) {
730		nd6_rtmsg(RTM_DELETE, oldrt);
731		RTFREE(oldrt);
732	}
733
734	dr->installed = 0;
735}
736
737static void
738defrouter_del(struct nd_defrouter *dr)
739{
740	struct nd_defrouter *deldr = NULL;
741	struct nd_prefix *pr;
742	struct nd_pfxrouter *pfxrtr;
743
744	ND6_UNLOCK_ASSERT();
745
746	/*
747	 * Flush all the routing table entries that use the router
748	 * as a next hop.
749	 */
750	if (ND_IFINFO(dr->ifp)->flags & ND6_IFF_ACCEPT_RTADV)
751		rt6_flush(&dr->rtaddr, dr->ifp);
752
753#ifdef EXPERIMENTAL
754	defrtr_ipv6_only_ifp(dr->ifp);
755#endif
756
757	if (dr->installed) {
758		deldr = dr;
759		defrouter_delreq(dr);
760	}
761
762	/*
763	 * Also delete all the pointers to the router in each prefix lists.
764	 */
765	ND6_WLOCK();
766	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
767		if ((pfxrtr = pfxrtr_lookup(pr, dr)) != NULL)
768			pfxrtr_del(pfxrtr);
769	}
770	ND6_WUNLOCK();
771
772	pfxlist_onlink_check();
773
774	/*
775	 * If the router is the primary one, choose a new one.
776	 * Note that defrouter_select_fib() will remove the current
777         * gateway from the routing table.
778	 */
779	if (deldr)
780		defrouter_select_fib(deldr->ifp->if_fib);
781
782	/*
783	 * Release the list reference.
784	 */
785	defrouter_rele(dr);
786}
787
788
789struct nd_defrouter *
790defrouter_lookup_locked(struct in6_addr *addr, struct ifnet *ifp)
791{
792	struct nd_defrouter *dr;
793
794	ND6_LOCK_ASSERT();
795	TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry)
796		if (dr->ifp == ifp && IN6_ARE_ADDR_EQUAL(addr, &dr->rtaddr)) {
797			defrouter_ref(dr);
798			return (dr);
799		}
800	return (NULL);
801}
802
803struct nd_defrouter *
804defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
805{
806	struct nd_defrouter *dr;
807
808	ND6_RLOCK();
809	dr = defrouter_lookup_locked(addr, ifp);
810	ND6_RUNLOCK();
811	return (dr);
812}
813
814/*
815 * Remove all default routes from default router list.
816 */
817void
818defrouter_reset(void)
819{
820	struct nd_defrouter *dr, **dra;
821	int count, i;
822
823	count = i = 0;
824
825	/*
826	 * We can't delete routes with the ND lock held, so make a copy of the
827	 * current default router list and use that when deleting routes.
828	 */
829	ND6_RLOCK();
830	TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry)
831		count++;
832	ND6_RUNLOCK();
833
834	dra = malloc(count * sizeof(*dra), M_TEMP, M_WAITOK | M_ZERO);
835
836	ND6_RLOCK();
837	TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry) {
838		if (i == count)
839			break;
840		defrouter_ref(dr);
841		dra[i++] = dr;
842	}
843	ND6_RUNLOCK();
844
845	for (i = 0; i < count && dra[i] != NULL; i++) {
846		defrouter_delreq(dra[i]);
847		defrouter_rele(dra[i]);
848	}
849	free(dra, M_TEMP);
850
851	/*
852	 * XXX should we also nuke any default routers in the kernel, by
853	 * going through them by rtalloc1()?
854	 */
855}
856
857/*
858 * Look up a matching default router list entry and remove it. Returns true if a
859 * matching entry was found, false otherwise.
860 */
861bool
862defrouter_remove(struct in6_addr *addr, struct ifnet *ifp)
863{
864	struct nd_defrouter *dr;
865
866	ND6_WLOCK();
867	dr = defrouter_lookup_locked(addr, ifp);
868	if (dr == NULL) {
869		ND6_WUNLOCK();
870		return (false);
871	}
872
873	defrouter_unlink(dr, NULL);
874	ND6_WUNLOCK();
875	defrouter_del(dr);
876	defrouter_rele(dr);
877	return (true);
878}
879
880/*
881 * for default router selection
882 * regards router-preference field as a 2-bit signed integer
883 */
884static int
885rtpref(struct nd_defrouter *dr)
886{
887	switch (dr->raflags & ND_RA_FLAG_RTPREF_MASK) {
888	case ND_RA_FLAG_RTPREF_HIGH:
889		return (RTPREF_HIGH);
890	case ND_RA_FLAG_RTPREF_MEDIUM:
891	case ND_RA_FLAG_RTPREF_RSV:
892		return (RTPREF_MEDIUM);
893	case ND_RA_FLAG_RTPREF_LOW:
894		return (RTPREF_LOW);
895	default:
896		/*
897		 * This case should never happen.  If it did, it would mean a
898		 * serious bug of kernel internal.  We thus always bark here.
899		 * Or, can we even panic?
900		 */
901		log(LOG_ERR, "rtpref: impossible RA flag %x\n", dr->raflags);
902		return (RTPREF_INVALID);
903	}
904	/* NOTREACHED */
905}
906
907/*
908 * Default Router Selection according to Section 6.3.6 of RFC 2461 and
909 * draft-ietf-ipngwg-router-selection:
910 * 1) Routers that are reachable or probably reachable should be preferred.
911 *    If we have more than one (probably) reachable router, prefer ones
912 *    with the highest router preference.
913 * 2) When no routers on the list are known to be reachable or
914 *    probably reachable, routers SHOULD be selected in a round-robin
915 *    fashion, regardless of router preference values.
916 * 3) If the Default Router List is empty, assume that all
917 *    destinations are on-link.
918 *
919 * We assume nd_defrouter is sorted by router preference value.
920 * Since the code below covers both with and without router preference cases,
921 * we do not need to classify the cases by ifdef.
922 *
923 * At this moment, we do not try to install more than one default router,
924 * even when the multipath routing is available, because we're not sure about
925 * the benefits for stub hosts comparing to the risk of making the code
926 * complicated and the possibility of introducing bugs.
927 *
928 * We maintain a single list of routers for multiple FIBs, only considering one
929 * at a time based on the receiving interface's FIB. If @fibnum is RT_ALL_FIBS,
930 * we do the whole thing multiple times.
931 */
932void
933defrouter_select_fib(int fibnum)
934{
935	struct epoch_tracker et;
936	struct nd_defrouter *dr, *selected_dr, *installed_dr;
937	struct llentry *ln = NULL;
938
939	if (fibnum == RT_ALL_FIBS) {
940		for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
941			defrouter_select_fib(fibnum);
942		}
943	}
944
945	ND6_RLOCK();
946	/*
947	 * Let's handle easy case (3) first:
948	 * If default router list is empty, there's nothing to be done.
949	 */
950	if (TAILQ_EMPTY(&V_nd6_defrouter)) {
951		ND6_RUNLOCK();
952		return;
953	}
954
955	/*
956	 * Search for a (probably) reachable router from the list.
957	 * We just pick up the first reachable one (if any), assuming that
958	 * the ordering rule of the list described in defrtrlist_update().
959	 */
960	selected_dr = installed_dr = NULL;
961	TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry) {
962		NET_EPOCH_ENTER(et);
963		if (selected_dr == NULL && dr->ifp->if_fib == fibnum &&
964		    (ln = nd6_lookup(&dr->rtaddr, 0, dr->ifp)) &&
965		    ND6_IS_LLINFO_PROBREACH(ln)) {
966			selected_dr = dr;
967			defrouter_ref(selected_dr);
968		}
969		NET_EPOCH_EXIT(et);
970		if (ln != NULL) {
971			LLE_RUNLOCK(ln);
972			ln = NULL;
973		}
974
975		if (dr->installed && dr->ifp->if_fib == fibnum) {
976			if (installed_dr == NULL) {
977				installed_dr = dr;
978				defrouter_ref(installed_dr);
979			} else {
980				/*
981				 * this should not happen.
982				 * warn for diagnosis.
983				 */
984				log(LOG_ERR, "defrouter_select_fib: more than "
985				             "one router is installed\n");
986			}
987		}
988	}
989	/*
990	 * If none of the default routers was found to be reachable,
991	 * round-robin the list regardless of preference.
992	 * Otherwise, if we have an installed router, check if the selected
993	 * (reachable) router should really be preferred to the installed one.
994	 * We only prefer the new router when the old one is not reachable
995	 * or when the new one has a really higher preference value.
996	 */
997	if (selected_dr == NULL) {
998		if (installed_dr == NULL ||
999		    TAILQ_NEXT(installed_dr, dr_entry) == NULL)
1000			dr = TAILQ_FIRST(&V_nd6_defrouter);
1001		else
1002			dr = TAILQ_NEXT(installed_dr, dr_entry);
1003
1004		/* Ensure we select a router for this FIB. */
1005		TAILQ_FOREACH_FROM(dr, &V_nd6_defrouter, dr_entry) {
1006			if (dr->ifp->if_fib == fibnum) {
1007				selected_dr = dr;
1008				defrouter_ref(selected_dr);
1009				break;
1010			}
1011		}
1012	} else if (installed_dr != NULL) {
1013		NET_EPOCH_ENTER(et);
1014		if ((ln = nd6_lookup(&installed_dr->rtaddr, 0,
1015		                     installed_dr->ifp)) &&
1016		    ND6_IS_LLINFO_PROBREACH(ln) &&
1017		    installed_dr->ifp->if_fib == fibnum &&
1018		    rtpref(selected_dr) <= rtpref(installed_dr)) {
1019			defrouter_rele(selected_dr);
1020			selected_dr = installed_dr;
1021		}
1022		NET_EPOCH_EXIT(et);
1023		if (ln != NULL)
1024			LLE_RUNLOCK(ln);
1025	}
1026	ND6_RUNLOCK();
1027
1028	/*
1029	 * If we selected a router for this FIB and it's different
1030	 * than the installed one, remove the installed router and
1031	 * install the selected one in its place.
1032	 */
1033	if (installed_dr != selected_dr) {
1034		if (installed_dr != NULL) {
1035			defrouter_delreq(installed_dr);
1036			defrouter_rele(installed_dr);
1037		}
1038		if (selected_dr != NULL)
1039			defrouter_addreq(selected_dr);
1040	}
1041	if (selected_dr != NULL)
1042		defrouter_rele(selected_dr);
1043}
1044
1045static struct nd_defrouter *
1046defrtrlist_update(struct nd_defrouter *new)
1047{
1048	struct nd_defrouter *dr, *n;
1049	uint64_t genid;
1050	int oldpref;
1051	bool writelocked;
1052
1053	if (new->rtlifetime == 0) {
1054		defrouter_remove(&new->rtaddr, new->ifp);
1055		return (NULL);
1056	}
1057
1058	ND6_RLOCK();
1059	writelocked = false;
1060restart:
1061	dr = defrouter_lookup_locked(&new->rtaddr, new->ifp);
1062	if (dr != NULL) {
1063		oldpref = rtpref(dr);
1064
1065		/* override */
1066		dr->raflags = new->raflags; /* XXX flag check */
1067		dr->rtlifetime = new->rtlifetime;
1068		dr->expire = new->expire;
1069
1070		/*
1071		 * If the preference does not change, there's no need
1072		 * to sort the entries. Also make sure the selected
1073		 * router is still installed in the kernel.
1074		 */
1075		if (dr->installed && rtpref(new) == oldpref) {
1076			if (writelocked)
1077				ND6_WUNLOCK();
1078			else
1079				ND6_RUNLOCK();
1080			return (dr);
1081		}
1082	}
1083
1084	/*
1085	 * The router needs to be reinserted into the default router
1086	 * list, so upgrade to a write lock. If that fails and the list
1087	 * has potentially changed while the lock was dropped, we'll
1088	 * redo the lookup with the write lock held.
1089	 */
1090	if (!writelocked) {
1091		writelocked = true;
1092		if (!ND6_TRY_UPGRADE()) {
1093			genid = V_nd6_list_genid;
1094			ND6_RUNLOCK();
1095			ND6_WLOCK();
1096			if (genid != V_nd6_list_genid)
1097				goto restart;
1098		}
1099	}
1100
1101	if (dr != NULL) {
1102		/*
1103		 * The preferred router may have changed, so relocate this
1104		 * router.
1105		 */
1106		TAILQ_REMOVE(&V_nd6_defrouter, dr, dr_entry);
1107		n = dr;
1108	} else {
1109		n = malloc(sizeof(*n), M_IP6NDP, M_NOWAIT | M_ZERO);
1110		if (n == NULL) {
1111			ND6_WUNLOCK();
1112			return (NULL);
1113		}
1114		memcpy(n, new, sizeof(*n));
1115		/* Initialize with an extra reference for the caller. */
1116		refcount_init(&n->refcnt, 2);
1117	}
1118
1119	/*
1120	 * Insert the new router in the Default Router List;
1121	 * The Default Router List should be in the descending order
1122	 * of router-preferece.  Routers with the same preference are
1123	 * sorted in the arriving time order.
1124	 */
1125
1126	/* insert at the end of the group */
1127	TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry) {
1128		if (rtpref(n) > rtpref(dr))
1129			break;
1130	}
1131	if (dr != NULL)
1132		TAILQ_INSERT_BEFORE(dr, n, dr_entry);
1133	else
1134		TAILQ_INSERT_TAIL(&V_nd6_defrouter, n, dr_entry);
1135	V_nd6_list_genid++;
1136	ND6_WUNLOCK();
1137
1138	defrouter_select_fib(new->ifp->if_fib);
1139
1140	return (n);
1141}
1142
1143static int
1144in6_init_prefix_ltimes(struct nd_prefix *ndpr)
1145{
1146	if (ndpr->ndpr_pltime == ND6_INFINITE_LIFETIME)
1147		ndpr->ndpr_preferred = 0;
1148	else
1149		ndpr->ndpr_preferred = time_uptime + ndpr->ndpr_pltime;
1150	if (ndpr->ndpr_vltime == ND6_INFINITE_LIFETIME)
1151		ndpr->ndpr_expire = 0;
1152	else
1153		ndpr->ndpr_expire = time_uptime + ndpr->ndpr_vltime;
1154
1155	return 0;
1156}
1157
1158static void
1159in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
1160{
1161	/* init ia6t_expire */
1162	if (lt6->ia6t_vltime == ND6_INFINITE_LIFETIME)
1163		lt6->ia6t_expire = 0;
1164	else {
1165		lt6->ia6t_expire = time_uptime;
1166		lt6->ia6t_expire += lt6->ia6t_vltime;
1167	}
1168
1169	/* init ia6t_preferred */
1170	if (lt6->ia6t_pltime == ND6_INFINITE_LIFETIME)
1171		lt6->ia6t_preferred = 0;
1172	else {
1173		lt6->ia6t_preferred = time_uptime;
1174		lt6->ia6t_preferred += lt6->ia6t_pltime;
1175	}
1176}
1177
1178static struct in6_ifaddr *
1179in6_ifadd(struct nd_prefixctl *pr, int mcast)
1180{
1181	struct ifnet *ifp = pr->ndpr_ifp;
1182	struct ifaddr *ifa;
1183	struct in6_aliasreq ifra;
1184	struct in6_ifaddr *ia, *ib;
1185	int error, plen0;
1186	struct in6_addr mask;
1187	int prefixlen = pr->ndpr_plen;
1188	int updateflags;
1189	char ip6buf[INET6_ADDRSTRLEN];
1190
1191	in6_prefixlen2mask(&mask, prefixlen);
1192
1193	/*
1194	 * find a link-local address (will be interface ID).
1195	 * Is it really mandatory? Theoretically, a global or a site-local
1196	 * address can be configured without a link-local address, if we
1197	 * have a unique interface identifier...
1198	 *
1199	 * it is not mandatory to have a link-local address, we can generate
1200	 * interface identifier on the fly.  we do this because:
1201	 * (1) it should be the easiest way to find interface identifier.
1202	 * (2) RFC2462 5.4 suggesting the use of the same interface identifier
1203	 * for multiple addresses on a single interface, and possible shortcut
1204	 * of DAD.  we omitted DAD for this reason in the past.
1205	 * (3) a user can prevent autoconfiguration of global address
1206	 * by removing link-local address by hand (this is partly because we
1207	 * don't have other way to control the use of IPv6 on an interface.
1208	 * this has been our design choice - cf. NRL's "ifconfig auto").
1209	 * (4) it is easier to manage when an interface has addresses
1210	 * with the same interface identifier, than to have multiple addresses
1211	 * with different interface identifiers.
1212	 */
1213	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp, 0); /* 0 is OK? */
1214	if (ifa)
1215		ib = (struct in6_ifaddr *)ifa;
1216	else
1217		return NULL;
1218
1219	/* prefixlen + ifidlen must be equal to 128 */
1220	plen0 = in6_mask2len(&ib->ia_prefixmask.sin6_addr, NULL);
1221	if (prefixlen != plen0) {
1222		ifa_free(ifa);
1223		nd6log((LOG_INFO,
1224		    "%s: wrong prefixlen for %s (prefix=%d ifid=%d)\n",
1225		    __func__, if_name(ifp), prefixlen, 128 - plen0));
1226		return NULL;
1227	}
1228
1229	/* make ifaddr */
1230	in6_prepare_ifra(&ifra, &pr->ndpr_prefix.sin6_addr, &mask);
1231
1232	IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr, &mask);
1233	/* interface ID */
1234	ifra.ifra_addr.sin6_addr.s6_addr32[0] |=
1235	    (ib->ia_addr.sin6_addr.s6_addr32[0] & ~mask.s6_addr32[0]);
1236	ifra.ifra_addr.sin6_addr.s6_addr32[1] |=
1237	    (ib->ia_addr.sin6_addr.s6_addr32[1] & ~mask.s6_addr32[1]);
1238	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
1239	    (ib->ia_addr.sin6_addr.s6_addr32[2] & ~mask.s6_addr32[2]);
1240	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
1241	    (ib->ia_addr.sin6_addr.s6_addr32[3] & ~mask.s6_addr32[3]);
1242	ifa_free(ifa);
1243
1244	/* lifetimes. */
1245	ifra.ifra_lifetime.ia6t_vltime = pr->ndpr_vltime;
1246	ifra.ifra_lifetime.ia6t_pltime = pr->ndpr_pltime;
1247
1248	/* XXX: scope zone ID? */
1249
1250	ifra.ifra_flags |= IN6_IFF_AUTOCONF; /* obey autoconf */
1251
1252	/*
1253	 * Make sure that we do not have this address already.  This should
1254	 * usually not happen, but we can still see this case, e.g., if we
1255	 * have manually configured the exact address to be configured.
1256	 */
1257	ifa = (struct ifaddr *)in6ifa_ifpwithaddr(ifp,
1258	    &ifra.ifra_addr.sin6_addr);
1259	if (ifa != NULL) {
1260		ifa_free(ifa);
1261		/* this should be rare enough to make an explicit log */
1262		log(LOG_INFO, "in6_ifadd: %s is already configured\n",
1263		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr));
1264		return (NULL);
1265	}
1266
1267	/*
1268	 * Allocate ifaddr structure, link into chain, etc.
1269	 * If we are going to create a new address upon receiving a multicasted
1270	 * RA, we need to impose a random delay before starting DAD.
1271	 * [draft-ietf-ipv6-rfc2462bis-02.txt, Section 5.4.2]
1272	 */
1273	updateflags = 0;
1274	if (mcast)
1275		updateflags |= IN6_IFAUPDATE_DADDELAY;
1276	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0) {
1277		nd6log((LOG_ERR,
1278		    "%s: failed to make ifaddr %s on %s (errno=%d)\n", __func__,
1279		    ip6_sprintf(ip6buf, &ifra.ifra_addr.sin6_addr),
1280		    if_name(ifp), error));
1281		return (NULL);	/* ifaddr must not have been allocated. */
1282	}
1283
1284	ia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
1285	/*
1286	 * XXXRW: Assumption of non-NULLness here might not be true with
1287	 * fine-grained locking -- should we validate it?  Or just return
1288	 * earlier ifa rather than looking it up again?
1289	 */
1290	return (ia);		/* this is always non-NULL  and referenced. */
1291}
1292
1293static struct nd_prefix *
1294nd6_prefix_lookup_locked(struct nd_prefixctl *key)
1295{
1296	struct nd_prefix *search;
1297
1298	ND6_LOCK_ASSERT();
1299
1300	LIST_FOREACH(search, &V_nd_prefix, ndpr_entry) {
1301		if (key->ndpr_ifp == search->ndpr_ifp &&
1302		    key->ndpr_plen == search->ndpr_plen &&
1303		    in6_are_prefix_equal(&key->ndpr_prefix.sin6_addr,
1304		    &search->ndpr_prefix.sin6_addr, key->ndpr_plen)) {
1305			nd6_prefix_ref(search);
1306			break;
1307		}
1308	}
1309	return (search);
1310}
1311
1312struct nd_prefix *
1313nd6_prefix_lookup(struct nd_prefixctl *key)
1314{
1315	struct nd_prefix *search;
1316
1317	ND6_RLOCK();
1318	search = nd6_prefix_lookup_locked(key);
1319	ND6_RUNLOCK();
1320	return (search);
1321}
1322
1323void
1324nd6_prefix_ref(struct nd_prefix *pr)
1325{
1326
1327	refcount_acquire(&pr->ndpr_refcnt);
1328}
1329
1330void
1331nd6_prefix_rele(struct nd_prefix *pr)
1332{
1333
1334	if (refcount_release(&pr->ndpr_refcnt)) {
1335		KASSERT(LIST_EMPTY(&pr->ndpr_advrtrs),
1336		    ("prefix %p has advertising routers", pr));
1337		free(pr, M_IP6NDP);
1338	}
1339}
1340
1341int
1342nd6_prelist_add(struct nd_prefixctl *pr, struct nd_defrouter *dr,
1343    struct nd_prefix **newp)
1344{
1345	struct nd_prefix *new;
1346	char ip6buf[INET6_ADDRSTRLEN];
1347	int error;
1348
1349	new = malloc(sizeof(*new), M_IP6NDP, M_NOWAIT | M_ZERO);
1350	if (new == NULL)
1351		return (ENOMEM);
1352	refcount_init(&new->ndpr_refcnt, newp != NULL ? 2 : 1);
1353	new->ndpr_ifp = pr->ndpr_ifp;
1354	new->ndpr_prefix = pr->ndpr_prefix;
1355	new->ndpr_plen = pr->ndpr_plen;
1356	new->ndpr_vltime = pr->ndpr_vltime;
1357	new->ndpr_pltime = pr->ndpr_pltime;
1358	new->ndpr_flags = pr->ndpr_flags;
1359	if ((error = in6_init_prefix_ltimes(new)) != 0) {
1360		free(new, M_IP6NDP);
1361		return (error);
1362	}
1363	new->ndpr_lastupdate = time_uptime;
1364
1365	/* initialization */
1366	LIST_INIT(&new->ndpr_advrtrs);
1367	in6_prefixlen2mask(&new->ndpr_mask, new->ndpr_plen);
1368	/* make prefix in the canonical form */
1369	IN6_MASK_ADDR(&new->ndpr_prefix.sin6_addr, &new->ndpr_mask);
1370
1371	ND6_WLOCK();
1372	LIST_INSERT_HEAD(&V_nd_prefix, new, ndpr_entry);
1373	V_nd6_list_genid++;
1374	ND6_WUNLOCK();
1375
1376	/* ND_OPT_PI_FLAG_ONLINK processing */
1377	if (new->ndpr_raf_onlink) {
1378		struct epoch_tracker et;
1379
1380		ND6_ONLINK_LOCK();
1381		NET_EPOCH_ENTER(et);
1382		if ((error = nd6_prefix_onlink(new)) != 0) {
1383			nd6log((LOG_ERR, "%s: failed to make the prefix %s/%d "
1384			    "on-link on %s (errno=%d)\n", __func__,
1385			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1386			    pr->ndpr_plen, if_name(pr->ndpr_ifp), error));
1387			/* proceed anyway. XXX: is it correct? */
1388		}
1389		NET_EPOCH_EXIT(et);
1390		ND6_ONLINK_UNLOCK();
1391	}
1392
1393	if (dr != NULL)
1394		pfxrtr_add(new, dr);
1395	if (newp != NULL)
1396		*newp = new;
1397	return (0);
1398}
1399
1400/*
1401 * Remove a prefix from the prefix list and optionally stash it in a
1402 * caller-provided list.
1403 *
1404 * The ND6 lock must be held.
1405 */
1406void
1407nd6_prefix_unlink(struct nd_prefix *pr, struct nd_prhead *list)
1408{
1409
1410	ND6_WLOCK_ASSERT();
1411
1412	LIST_REMOVE(pr, ndpr_entry);
1413	V_nd6_list_genid++;
1414	if (list != NULL)
1415		LIST_INSERT_HEAD(list, pr, ndpr_entry);
1416}
1417
1418/*
1419 * Free an unlinked prefix, first marking it off-link if necessary.
1420 */
1421void
1422nd6_prefix_del(struct nd_prefix *pr)
1423{
1424	struct nd_pfxrouter *pfr, *next;
1425	int e;
1426	char ip6buf[INET6_ADDRSTRLEN];
1427
1428	KASSERT(pr->ndpr_addrcnt == 0,
1429	    ("prefix %p has referencing addresses", pr));
1430	ND6_UNLOCK_ASSERT();
1431
1432	/*
1433	 * Though these flags are now meaningless, we'd rather keep the value
1434	 * of pr->ndpr_raf_onlink and pr->ndpr_raf_auto not to confuse users
1435	 * when executing "ndp -p".
1436	 */
1437	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0) {
1438		ND6_ONLINK_LOCK();
1439		if ((e = nd6_prefix_offlink(pr)) != 0) {
1440			nd6log((LOG_ERR,
1441			    "%s: failed to make the prefix %s/%d offlink on %s "
1442			    "(errno=%d)\n", __func__,
1443			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
1444			    pr->ndpr_plen, if_name(pr->ndpr_ifp), e));
1445			/* what should we do? */
1446		}
1447		ND6_ONLINK_UNLOCK();
1448	}
1449
1450	/* Release references to routers that have advertised this prefix. */
1451	ND6_WLOCK();
1452	LIST_FOREACH_SAFE(pfr, &pr->ndpr_advrtrs, pfr_entry, next)
1453		pfxrtr_del(pfr);
1454	ND6_WUNLOCK();
1455
1456	nd6_prefix_rele(pr);
1457
1458	pfxlist_onlink_check();
1459}
1460
1461static int
1462prelist_update(struct nd_prefixctl *new, struct nd_defrouter *dr,
1463    struct mbuf *m, int mcast)
1464{
1465	struct in6_ifaddr *ia6 = NULL, *ia6_match = NULL;
1466	struct ifaddr *ifa;
1467	struct ifnet *ifp = new->ndpr_ifp;
1468	struct nd_prefix *pr;
1469	int error = 0;
1470	int auth;
1471	struct in6_addrlifetime lt6_tmp;
1472	char ip6buf[INET6_ADDRSTRLEN];
1473
1474	NET_EPOCH_ASSERT();
1475
1476	auth = 0;
1477	if (m) {
1478		/*
1479		 * Authenticity for NA consists authentication for
1480		 * both IP header and IP datagrams, doesn't it ?
1481		 */
1482#if defined(M_AUTHIPHDR) && defined(M_AUTHIPDGM)
1483		auth = ((m->m_flags & M_AUTHIPHDR) &&
1484		    (m->m_flags & M_AUTHIPDGM));
1485#endif
1486	}
1487
1488	if ((pr = nd6_prefix_lookup(new)) != NULL) {
1489		/*
1490		 * nd6_prefix_lookup() ensures that pr and new have the same
1491		 * prefix on a same interface.
1492		 */
1493
1494		/*
1495		 * Update prefix information.  Note that the on-link (L) bit
1496		 * and the autonomous (A) bit should NOT be changed from 1
1497		 * to 0.
1498		 */
1499		if (new->ndpr_raf_onlink == 1)
1500			pr->ndpr_raf_onlink = 1;
1501		if (new->ndpr_raf_auto == 1)
1502			pr->ndpr_raf_auto = 1;
1503		if (new->ndpr_raf_onlink) {
1504			pr->ndpr_vltime = new->ndpr_vltime;
1505			pr->ndpr_pltime = new->ndpr_pltime;
1506			(void)in6_init_prefix_ltimes(pr); /* XXX error case? */
1507			pr->ndpr_lastupdate = time_uptime;
1508		}
1509
1510		if (new->ndpr_raf_onlink &&
1511		    (pr->ndpr_stateflags & NDPRF_ONLINK) == 0) {
1512			ND6_ONLINK_LOCK();
1513			if ((error = nd6_prefix_onlink(pr)) != 0) {
1514				nd6log((LOG_ERR,
1515				    "%s: failed to make the prefix %s/%d "
1516				    "on-link on %s (errno=%d)\n", __func__,
1517				    ip6_sprintf(ip6buf,
1518				        &pr->ndpr_prefix.sin6_addr),
1519				    pr->ndpr_plen, if_name(pr->ndpr_ifp),
1520				    error));
1521				/* proceed anyway. XXX: is it correct? */
1522			}
1523			ND6_ONLINK_UNLOCK();
1524		}
1525
1526		if (dr != NULL)
1527			pfxrtr_add(pr, dr);
1528	} else {
1529		if (new->ndpr_vltime == 0)
1530			goto end;
1531		if (new->ndpr_raf_onlink == 0 && new->ndpr_raf_auto == 0)
1532			goto end;
1533
1534		error = nd6_prelist_add(new, dr, &pr);
1535		if (error != 0) {
1536			nd6log((LOG_NOTICE, "%s: nd6_prelist_add() failed for "
1537			    "the prefix %s/%d on %s (errno=%d)\n", __func__,
1538			    ip6_sprintf(ip6buf, &new->ndpr_prefix.sin6_addr),
1539			    new->ndpr_plen, if_name(new->ndpr_ifp), error));
1540			goto end; /* we should just give up in this case. */
1541		}
1542
1543		/*
1544		 * XXX: from the ND point of view, we can ignore a prefix
1545		 * with the on-link bit being zero.  However, we need a
1546		 * prefix structure for references from autoconfigured
1547		 * addresses.  Thus, we explicitly make sure that the prefix
1548		 * itself expires now.
1549		 */
1550		if (pr->ndpr_raf_onlink == 0) {
1551			pr->ndpr_vltime = 0;
1552			pr->ndpr_pltime = 0;
1553			in6_init_prefix_ltimes(pr);
1554		}
1555	}
1556
1557	/*
1558	 * Address autoconfiguration based on Section 5.5.3 of RFC 2462.
1559	 * Note that pr must be non NULL at this point.
1560	 */
1561
1562	/* 5.5.3 (a). Ignore the prefix without the A bit set. */
1563	if (!new->ndpr_raf_auto)
1564		goto end;
1565
1566	/*
1567	 * 5.5.3 (b). the link-local prefix should have been ignored in
1568	 * nd6_ra_input.
1569	 */
1570
1571	/* 5.5.3 (c). Consistency check on lifetimes: pltime <= vltime. */
1572	if (new->ndpr_pltime > new->ndpr_vltime) {
1573		error = EINVAL;	/* XXX: won't be used */
1574		goto end;
1575	}
1576
1577	/*
1578	 * 5.5.3 (d).  If the prefix advertised is not equal to the prefix of
1579	 * an address configured by stateless autoconfiguration already in the
1580	 * list of addresses associated with the interface, and the Valid
1581	 * Lifetime is not 0, form an address.  We first check if we have
1582	 * a matching prefix.
1583	 * Note: we apply a clarification in rfc2462bis-02 here.  We only
1584	 * consider autoconfigured addresses while RFC2462 simply said
1585	 * "address".
1586	 */
1587	CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
1588		struct in6_ifaddr *ifa6;
1589		u_int32_t remaininglifetime;
1590
1591		if (ifa->ifa_addr->sa_family != AF_INET6)
1592			continue;
1593
1594		ifa6 = (struct in6_ifaddr *)ifa;
1595
1596		/*
1597		 * We only consider autoconfigured addresses as per rfc2462bis.
1598		 */
1599		if (!(ifa6->ia6_flags & IN6_IFF_AUTOCONF))
1600			continue;
1601
1602		/*
1603		 * Spec is not clear here, but I believe we should concentrate
1604		 * on unicast (i.e. not anycast) addresses.
1605		 * XXX: other ia6_flags? detached or duplicated?
1606		 */
1607		if ((ifa6->ia6_flags & IN6_IFF_ANYCAST) != 0)
1608			continue;
1609
1610		/*
1611		 * Ignore the address if it is not associated with a prefix
1612		 * or is associated with a prefix that is different from this
1613		 * one.  (pr is never NULL here)
1614		 */
1615		if (ifa6->ia6_ndpr != pr)
1616			continue;
1617
1618		if (ia6_match == NULL) /* remember the first one */
1619			ia6_match = ifa6;
1620
1621		/*
1622		 * An already autoconfigured address matched.  Now that we
1623		 * are sure there is at least one matched address, we can
1624		 * proceed to 5.5.3. (e): update the lifetimes according to the
1625		 * "two hours" rule and the privacy extension.
1626		 * We apply some clarifications in rfc2462bis:
1627		 * - use remaininglifetime instead of storedlifetime as a
1628		 *   variable name
1629		 * - remove the dead code in the "two-hour" rule
1630		 */
1631#define TWOHOUR		(120*60)
1632		lt6_tmp = ifa6->ia6_lifetime;
1633
1634		if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME)
1635			remaininglifetime = ND6_INFINITE_LIFETIME;
1636		else if (time_uptime - ifa6->ia6_updatetime >
1637			 lt6_tmp.ia6t_vltime) {
1638			/*
1639			 * The case of "invalid" address.  We should usually
1640			 * not see this case.
1641			 */
1642			remaininglifetime = 0;
1643		} else
1644			remaininglifetime = lt6_tmp.ia6t_vltime -
1645			    (time_uptime - ifa6->ia6_updatetime);
1646
1647		/* when not updating, keep the current stored lifetime. */
1648		lt6_tmp.ia6t_vltime = remaininglifetime;
1649
1650		if (TWOHOUR < new->ndpr_vltime ||
1651		    remaininglifetime < new->ndpr_vltime) {
1652			lt6_tmp.ia6t_vltime = new->ndpr_vltime;
1653		} else if (remaininglifetime <= TWOHOUR) {
1654			if (auth) {
1655				lt6_tmp.ia6t_vltime = new->ndpr_vltime;
1656			}
1657		} else {
1658			/*
1659			 * new->ndpr_vltime <= TWOHOUR &&
1660			 * TWOHOUR < remaininglifetime
1661			 */
1662			lt6_tmp.ia6t_vltime = TWOHOUR;
1663		}
1664
1665		/* The 2 hour rule is not imposed for preferred lifetime. */
1666		lt6_tmp.ia6t_pltime = new->ndpr_pltime;
1667
1668		in6_init_address_ltimes(pr, &lt6_tmp);
1669
1670		/*
1671		 * We need to treat lifetimes for temporary addresses
1672		 * differently, according to
1673		 * draft-ietf-ipv6-privacy-addrs-v2-01.txt 3.3 (1);
1674		 * we only update the lifetimes when they are in the maximum
1675		 * intervals.
1676		 */
1677		if ((ifa6->ia6_flags & IN6_IFF_TEMPORARY) != 0) {
1678			u_int32_t maxvltime, maxpltime;
1679
1680			if (V_ip6_temp_valid_lifetime >
1681			    (u_int32_t)((time_uptime - ifa6->ia6_createtime) +
1682			    V_ip6_desync_factor)) {
1683				maxvltime = V_ip6_temp_valid_lifetime -
1684				    (time_uptime - ifa6->ia6_createtime) -
1685				    V_ip6_desync_factor;
1686			} else
1687				maxvltime = 0;
1688			if (V_ip6_temp_preferred_lifetime >
1689			    (u_int32_t)((time_uptime - ifa6->ia6_createtime) +
1690			    V_ip6_desync_factor)) {
1691				maxpltime = V_ip6_temp_preferred_lifetime -
1692				    (time_uptime - ifa6->ia6_createtime) -
1693				    V_ip6_desync_factor;
1694			} else
1695				maxpltime = 0;
1696
1697			if (lt6_tmp.ia6t_vltime == ND6_INFINITE_LIFETIME ||
1698			    lt6_tmp.ia6t_vltime > maxvltime) {
1699				lt6_tmp.ia6t_vltime = maxvltime;
1700			}
1701			if (lt6_tmp.ia6t_pltime == ND6_INFINITE_LIFETIME ||
1702			    lt6_tmp.ia6t_pltime > maxpltime) {
1703				lt6_tmp.ia6t_pltime = maxpltime;
1704			}
1705		}
1706		ifa6->ia6_lifetime = lt6_tmp;
1707		ifa6->ia6_updatetime = time_uptime;
1708	}
1709	if (ia6_match == NULL && new->ndpr_vltime) {
1710		int ifidlen;
1711
1712		/*
1713		 * 5.5.3 (d) (continued)
1714		 * No address matched and the valid lifetime is non-zero.
1715		 * Create a new address.
1716		 */
1717
1718		/*
1719		 * Prefix Length check:
1720		 * If the sum of the prefix length and interface identifier
1721		 * length does not equal 128 bits, the Prefix Information
1722		 * option MUST be ignored.  The length of the interface
1723		 * identifier is defined in a separate link-type specific
1724		 * document.
1725		 */
1726		ifidlen = in6_if2idlen(ifp);
1727		if (ifidlen < 0) {
1728			/* this should not happen, so we always log it. */
1729			log(LOG_ERR, "prelist_update: IFID undefined (%s)\n",
1730			    if_name(ifp));
1731			goto end;
1732		}
1733		if (ifidlen + pr->ndpr_plen != 128) {
1734			nd6log((LOG_INFO,
1735			    "%s: invalid prefixlen %d for %s, ignored\n",
1736			    __func__, pr->ndpr_plen, if_name(ifp)));
1737			goto end;
1738		}
1739
1740		if ((ia6 = in6_ifadd(new, mcast)) != NULL) {
1741			/*
1742			 * note that we should use pr (not new) for reference.
1743			 */
1744			pr->ndpr_addrcnt++;
1745			ia6->ia6_ndpr = pr;
1746
1747			/*
1748			 * RFC 3041 3.3 (2).
1749			 * When a new public address is created as described
1750			 * in RFC2462, also create a new temporary address.
1751			 *
1752			 * RFC 3041 3.5.
1753			 * When an interface connects to a new link, a new
1754			 * randomized interface identifier should be generated
1755			 * immediately together with a new set of temporary
1756			 * addresses.  Thus, we specifiy 1 as the 2nd arg of
1757			 * in6_tmpifadd().
1758			 */
1759			if (V_ip6_use_tempaddr) {
1760				int e;
1761				if ((e = in6_tmpifadd(ia6, 1, 1)) != 0) {
1762					nd6log((LOG_NOTICE, "%s: failed to "
1763					    "create a temporary address "
1764					    "(errno=%d)\n", __func__, e));
1765				}
1766			}
1767			ifa_free(&ia6->ia_ifa);
1768
1769			/*
1770			 * A newly added address might affect the status
1771			 * of other addresses, so we check and update it.
1772			 * XXX: what if address duplication happens?
1773			 */
1774			pfxlist_onlink_check();
1775		} else {
1776			/* just set an error. do not bark here. */
1777			error = EADDRNOTAVAIL; /* XXX: might be unused. */
1778		}
1779	}
1780
1781end:
1782	if (pr != NULL)
1783		nd6_prefix_rele(pr);
1784	return (error);
1785}
1786
1787/*
1788 * A supplement function used in the on-link detection below;
1789 * detect if a given prefix has a (probably) reachable advertising router.
1790 * XXX: lengthy function name...
1791 */
1792static struct nd_pfxrouter *
1793find_pfxlist_reachable_router(struct nd_prefix *pr)
1794{
1795	struct epoch_tracker et;
1796	struct nd_pfxrouter *pfxrtr;
1797	struct llentry *ln;
1798	int canreach;
1799
1800	ND6_LOCK_ASSERT();
1801
1802	NET_EPOCH_ENTER(et);
1803	LIST_FOREACH(pfxrtr, &pr->ndpr_advrtrs, pfr_entry) {
1804		ln = nd6_lookup(&pfxrtr->router->rtaddr, 0, pfxrtr->router->ifp);
1805		if (ln == NULL)
1806			continue;
1807		canreach = ND6_IS_LLINFO_PROBREACH(ln);
1808		LLE_RUNLOCK(ln);
1809		if (canreach)
1810			break;
1811	}
1812	NET_EPOCH_EXIT(et);
1813	return (pfxrtr);
1814}
1815
1816/*
1817 * Check if each prefix in the prefix list has at least one available router
1818 * that advertised the prefix (a router is "available" if its neighbor cache
1819 * entry is reachable or probably reachable).
1820 * If the check fails, the prefix may be off-link, because, for example,
1821 * we have moved from the network but the lifetime of the prefix has not
1822 * expired yet.  So we should not use the prefix if there is another prefix
1823 * that has an available router.
1824 * But, if there is no prefix that has an available router, we still regard
1825 * all the prefixes as on-link.  This is because we can't tell if all the
1826 * routers are simply dead or if we really moved from the network and there
1827 * is no router around us.
1828 */
1829void
1830pfxlist_onlink_check(void)
1831{
1832	struct nd_prefix *pr;
1833	struct in6_ifaddr *ifa;
1834	struct nd_defrouter *dr;
1835	struct nd_pfxrouter *pfxrtr = NULL;
1836	struct rm_priotracker in6_ifa_tracker;
1837	uint64_t genid;
1838	uint32_t flags;
1839
1840	ND6_ONLINK_LOCK();
1841	ND6_RLOCK();
1842
1843	/*
1844	 * Check if there is a prefix that has a reachable advertising
1845	 * router.
1846	 */
1847	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1848		if (pr->ndpr_raf_onlink && find_pfxlist_reachable_router(pr))
1849			break;
1850	}
1851
1852	/*
1853	 * If we have no such prefix, check whether we still have a router
1854	 * that does not advertise any prefixes.
1855	 */
1856	if (pr == NULL) {
1857		TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry) {
1858			struct nd_prefix *pr0;
1859
1860			LIST_FOREACH(pr0, &V_nd_prefix, ndpr_entry) {
1861				if ((pfxrtr = pfxrtr_lookup(pr0, dr)) != NULL)
1862					break;
1863			}
1864			if (pfxrtr != NULL)
1865				break;
1866		}
1867	}
1868	if (pr != NULL || (!TAILQ_EMPTY(&V_nd6_defrouter) && pfxrtr == NULL)) {
1869		/*
1870		 * There is at least one prefix that has a reachable router,
1871		 * or at least a router which probably does not advertise
1872		 * any prefixes.  The latter would be the case when we move
1873		 * to a new link where we have a router that does not provide
1874		 * prefixes and we configure an address by hand.
1875		 * Detach prefixes which have no reachable advertising
1876		 * router, and attach other prefixes.
1877		 */
1878		LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1879			/* XXX: a link-local prefix should never be detached */
1880			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr) ||
1881			    pr->ndpr_raf_onlink == 0 ||
1882			    pr->ndpr_raf_auto == 0)
1883				continue;
1884
1885			if ((pr->ndpr_stateflags & NDPRF_DETACHED) == 0 &&
1886			    find_pfxlist_reachable_router(pr) == NULL)
1887				pr->ndpr_stateflags |= NDPRF_DETACHED;
1888			else if ((pr->ndpr_stateflags & NDPRF_DETACHED) != 0 &&
1889			    find_pfxlist_reachable_router(pr) != NULL)
1890				pr->ndpr_stateflags &= ~NDPRF_DETACHED;
1891		}
1892	} else {
1893		/* there is no prefix that has a reachable router */
1894		LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1895			if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr) ||
1896			    pr->ndpr_raf_onlink == 0 ||
1897			    pr->ndpr_raf_auto == 0)
1898				continue;
1899			pr->ndpr_stateflags &= ~NDPRF_DETACHED;
1900		}
1901	}
1902
1903	/*
1904	 * Remove each interface route associated with a (just) detached
1905	 * prefix, and reinstall the interface route for a (just) attached
1906	 * prefix.  Note that all attempt of reinstallation does not
1907	 * necessarily success, when a same prefix is shared among multiple
1908	 * interfaces.  Such cases will be handled in nd6_prefix_onlink,
1909	 * so we don't have to care about them.
1910	 */
1911restart:
1912	LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) {
1913		char ip6buf[INET6_ADDRSTRLEN];
1914		int e;
1915
1916		if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr) ||
1917		    pr->ndpr_raf_onlink == 0 ||
1918		    pr->ndpr_raf_auto == 0)
1919			continue;
1920
1921		flags = pr->ndpr_stateflags & (NDPRF_DETACHED | NDPRF_ONLINK);
1922		if (flags == 0 || flags == (NDPRF_DETACHED | NDPRF_ONLINK)) {
1923			genid = V_nd6_list_genid;
1924			ND6_RUNLOCK();
1925			if ((flags & NDPRF_ONLINK) != 0 &&
1926			    (e = nd6_prefix_offlink(pr)) != 0) {
1927				nd6log((LOG_ERR,
1928				    "%s: failed to make %s/%d offlink "
1929				    "(errno=%d)\n", __func__,
1930				    ip6_sprintf(ip6buf,
1931					    &pr->ndpr_prefix.sin6_addr),
1932					    pr->ndpr_plen, e));
1933			} else if ((flags & NDPRF_ONLINK) == 0 &&
1934			    (e = nd6_prefix_onlink(pr)) != 0) {
1935				nd6log((LOG_ERR,
1936				    "%s: failed to make %s/%d onlink "
1937				    "(errno=%d)\n", __func__,
1938				    ip6_sprintf(ip6buf,
1939					    &pr->ndpr_prefix.sin6_addr),
1940					    pr->ndpr_plen, e));
1941			}
1942			ND6_RLOCK();
1943			if (genid != V_nd6_list_genid)
1944				goto restart;
1945		}
1946	}
1947
1948	/*
1949	 * Changes on the prefix status might affect address status as well.
1950	 * Make sure that all addresses derived from an attached prefix are
1951	 * attached, and that all addresses derived from a detached prefix are
1952	 * detached.  Note, however, that a manually configured address should
1953	 * always be attached.
1954	 * The precise detection logic is same as the one for prefixes.
1955	 */
1956	IN6_IFADDR_RLOCK(&in6_ifa_tracker);
1957	CK_STAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1958		if (!(ifa->ia6_flags & IN6_IFF_AUTOCONF))
1959			continue;
1960
1961		if (ifa->ia6_ndpr == NULL) {
1962			/*
1963			 * This can happen when we first configure the address
1964			 * (i.e. the address exists, but the prefix does not).
1965			 * XXX: complicated relationships...
1966			 */
1967			continue;
1968		}
1969
1970		if (find_pfxlist_reachable_router(ifa->ia6_ndpr))
1971			break;
1972	}
1973	if (ifa) {
1974		CK_STAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1975			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1976				continue;
1977
1978			if (ifa->ia6_ndpr == NULL) /* XXX: see above. */
1979				continue;
1980
1981			if (find_pfxlist_reachable_router(ifa->ia6_ndpr)) {
1982				if (ifa->ia6_flags & IN6_IFF_DETACHED) {
1983					ifa->ia6_flags &= ~IN6_IFF_DETACHED;
1984					ifa->ia6_flags |= IN6_IFF_TENTATIVE;
1985					nd6_dad_start((struct ifaddr *)ifa, 0);
1986				}
1987			} else {
1988				ifa->ia6_flags |= IN6_IFF_DETACHED;
1989			}
1990		}
1991	} else {
1992		CK_STAILQ_FOREACH(ifa, &V_in6_ifaddrhead, ia_link) {
1993			if ((ifa->ia6_flags & IN6_IFF_AUTOCONF) == 0)
1994				continue;
1995
1996			if (ifa->ia6_flags & IN6_IFF_DETACHED) {
1997				ifa->ia6_flags &= ~IN6_IFF_DETACHED;
1998				ifa->ia6_flags |= IN6_IFF_TENTATIVE;
1999				/* Do we need a delay in this case? */
2000				nd6_dad_start((struct ifaddr *)ifa, 0);
2001			}
2002		}
2003	}
2004	IN6_IFADDR_RUNLOCK(&in6_ifa_tracker);
2005	ND6_RUNLOCK();
2006	ND6_ONLINK_UNLOCK();
2007}
2008
2009static int
2010nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
2011{
2012	struct sockaddr_dl sdl;
2013	struct rtentry *rt;
2014	struct sockaddr_in6 mask6;
2015	u_long rtflags;
2016	int error, a_failure, fibnum, maxfib;
2017
2018	/*
2019	 * in6_ifinit() sets nd6_rtrequest to ifa_rtrequest for all ifaddrs.
2020	 * ifa->ifa_rtrequest = nd6_rtrequest;
2021	 */
2022	bzero(&mask6, sizeof(mask6));
2023	mask6.sin6_len = sizeof(mask6);
2024	mask6.sin6_addr = pr->ndpr_mask;
2025	rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP;
2026
2027	bzero(&sdl, sizeof(struct sockaddr_dl));
2028	sdl.sdl_len = sizeof(struct sockaddr_dl);
2029	sdl.sdl_family = AF_LINK;
2030	sdl.sdl_type = ifa->ifa_ifp->if_type;
2031	sdl.sdl_index = ifa->ifa_ifp->if_index;
2032
2033	if(V_rt_add_addr_allfibs) {
2034		fibnum = 0;
2035		maxfib = rt_numfibs;
2036	} else {
2037		fibnum = ifa->ifa_ifp->if_fib;
2038		maxfib = fibnum + 1;
2039	}
2040	a_failure = 0;
2041	for (; fibnum < maxfib; fibnum++) {
2042
2043		rt = NULL;
2044		error = in6_rtrequest(RTM_ADD,
2045		    (struct sockaddr *)&pr->ndpr_prefix, (struct sockaddr *)&sdl,
2046		    (struct sockaddr *)&mask6, rtflags, &rt, fibnum);
2047		if (error == 0) {
2048			KASSERT(rt != NULL, ("%s: in6_rtrequest return no "
2049			    "error(%d) but rt is NULL, pr=%p, ifa=%p", __func__,
2050			    error, pr, ifa));
2051			RT_LOCK(rt);
2052			nd6_rtmsg(RTM_ADD, rt);
2053			RT_UNLOCK(rt);
2054			pr->ndpr_stateflags |= NDPRF_ONLINK;
2055		} else {
2056			char ip6buf[INET6_ADDRSTRLEN];
2057			char ip6bufg[INET6_ADDRSTRLEN];
2058			char ip6bufm[INET6_ADDRSTRLEN];
2059			struct sockaddr_in6 *sin6;
2060
2061			sin6 = (struct sockaddr_in6 *)ifa->ifa_addr;
2062			nd6log((LOG_ERR, "%s: failed to add "
2063			    "route for a prefix (%s/%d) on %s, gw=%s, mask=%s, "
2064			    "flags=%lx errno = %d\n", __func__,
2065			    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
2066			    pr->ndpr_plen, if_name(pr->ndpr_ifp),
2067			    ip6_sprintf(ip6bufg, &sin6->sin6_addr),
2068			    ip6_sprintf(ip6bufm, &mask6.sin6_addr),
2069			    rtflags, error));
2070
2071			/* Save last error to return, see rtinit(). */
2072			a_failure = error;
2073		}
2074
2075		if (rt != NULL) {
2076			RT_LOCK(rt);
2077			RT_REMREF(rt);
2078			RT_UNLOCK(rt);
2079		}
2080	}
2081
2082	/* Return the last error we got. */
2083	return (a_failure);
2084}
2085
2086static int
2087nd6_prefix_onlink(struct nd_prefix *pr)
2088{
2089	struct epoch_tracker et;
2090	struct ifaddr *ifa;
2091	struct ifnet *ifp = pr->ndpr_ifp;
2092	struct nd_prefix *opr;
2093	char ip6buf[INET6_ADDRSTRLEN];
2094	int error;
2095
2096	ND6_ONLINK_LOCK_ASSERT();
2097	ND6_UNLOCK_ASSERT();
2098
2099	if ((pr->ndpr_stateflags & NDPRF_ONLINK) != 0)
2100		return (EEXIST);
2101
2102	/*
2103	 * Add the interface route associated with the prefix.  Before
2104	 * installing the route, check if there's the same prefix on another
2105	 * interface, and the prefix has already installed the interface route.
2106	 * Although such a configuration is expected to be rare, we explicitly
2107	 * allow it.
2108	 */
2109	ND6_RLOCK();
2110	LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
2111		if (opr == pr)
2112			continue;
2113
2114		if ((opr->ndpr_stateflags & NDPRF_ONLINK) == 0)
2115			continue;
2116
2117		if (!V_rt_add_addr_allfibs &&
2118		    opr->ndpr_ifp->if_fib != pr->ndpr_ifp->if_fib)
2119			continue;
2120
2121		if (opr->ndpr_plen == pr->ndpr_plen &&
2122		    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
2123		    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
2124			ND6_RUNLOCK();
2125			return (0);
2126		}
2127	}
2128	ND6_RUNLOCK();
2129
2130	/*
2131	 * We prefer link-local addresses as the associated interface address.
2132	 */
2133	/* search for a link-local addr */
2134	NET_EPOCH_ENTER(et);
2135	ifa = (struct ifaddr *)in6ifa_ifpforlinklocal(ifp,
2136	    IN6_IFF_NOTREADY | IN6_IFF_ANYCAST);
2137	if (ifa == NULL) {
2138		/* XXX: freebsd does not have ifa_ifwithaf */
2139		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
2140			if (ifa->ifa_addr->sa_family == AF_INET6) {
2141				ifa_ref(ifa);
2142				break;
2143			}
2144		}
2145		/* should we care about ia6_flags? */
2146	}
2147	NET_EPOCH_EXIT(et);
2148	if (ifa == NULL) {
2149		/*
2150		 * This can still happen, when, for example, we receive an RA
2151		 * containing a prefix with the L bit set and the A bit clear,
2152		 * after removing all IPv6 addresses on the receiving
2153		 * interface.  This should, of course, be rare though.
2154		 */
2155		nd6log((LOG_NOTICE,
2156		    "%s: failed to find any ifaddr to add route for a "
2157		    "prefix(%s/%d) on %s\n", __func__,
2158		    ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
2159		    pr->ndpr_plen, if_name(ifp)));
2160		return (0);
2161	}
2162
2163	error = nd6_prefix_onlink_rtrequest(pr, ifa);
2164
2165	if (ifa != NULL)
2166		ifa_free(ifa);
2167
2168	return (error);
2169}
2170
2171int
2172nd6_prefix_offlink(struct nd_prefix *pr)
2173{
2174	int error = 0;
2175	struct ifnet *ifp = pr->ndpr_ifp;
2176	struct nd_prefix *opr;
2177	struct sockaddr_in6 sa6, mask6;
2178	struct rtentry *rt;
2179	char ip6buf[INET6_ADDRSTRLEN];
2180	uint64_t genid;
2181	int fibnum, maxfib, a_failure;
2182
2183	ND6_ONLINK_LOCK_ASSERT();
2184	ND6_UNLOCK_ASSERT();
2185
2186	if ((pr->ndpr_stateflags & NDPRF_ONLINK) == 0)
2187		return (EEXIST);
2188
2189	bzero(&sa6, sizeof(sa6));
2190	sa6.sin6_family = AF_INET6;
2191	sa6.sin6_len = sizeof(sa6);
2192	bcopy(&pr->ndpr_prefix.sin6_addr, &sa6.sin6_addr,
2193	    sizeof(struct in6_addr));
2194	bzero(&mask6, sizeof(mask6));
2195	mask6.sin6_family = AF_INET6;
2196	mask6.sin6_len = sizeof(sa6);
2197	bcopy(&pr->ndpr_mask, &mask6.sin6_addr, sizeof(struct in6_addr));
2198
2199	if (V_rt_add_addr_allfibs) {
2200		fibnum = 0;
2201		maxfib = rt_numfibs;
2202	} else {
2203		fibnum = ifp->if_fib;
2204		maxfib = fibnum + 1;
2205	}
2206
2207	a_failure = 0;
2208	for (; fibnum < maxfib; fibnum++) {
2209		rt = NULL;
2210		error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL,
2211		    (struct sockaddr *)&mask6, 0, &rt, fibnum);
2212		if (error == 0) {
2213			/* report the route deletion to the routing socket. */
2214			if (rt != NULL)
2215				nd6_rtmsg(RTM_DELETE, rt);
2216		} else {
2217			/* Save last error to return, see rtinit(). */
2218			a_failure = error;
2219		}
2220		if (rt != NULL) {
2221			RTFREE(rt);
2222		}
2223	}
2224	error = a_failure;
2225	a_failure = 1;
2226	if (error == 0) {
2227		pr->ndpr_stateflags &= ~NDPRF_ONLINK;
2228
2229		/*
2230		 * There might be the same prefix on another interface,
2231		 * the prefix which could not be on-link just because we have
2232		 * the interface route (see comments in nd6_prefix_onlink).
2233		 * If there's one, try to make the prefix on-link on the
2234		 * interface.
2235		 */
2236		ND6_RLOCK();
2237restart:
2238		LIST_FOREACH(opr, &V_nd_prefix, ndpr_entry) {
2239			/*
2240			 * KAME specific: detached prefixes should not be
2241			 * on-link.
2242			 */
2243			if (opr == pr || (opr->ndpr_stateflags &
2244			    (NDPRF_ONLINK | NDPRF_DETACHED)) != 0)
2245				continue;
2246
2247			if (opr->ndpr_plen == pr->ndpr_plen &&
2248			    in6_are_prefix_equal(&pr->ndpr_prefix.sin6_addr,
2249			    &opr->ndpr_prefix.sin6_addr, pr->ndpr_plen)) {
2250				int e;
2251
2252				genid = V_nd6_list_genid;
2253				ND6_RUNLOCK();
2254				if ((e = nd6_prefix_onlink(opr)) != 0) {
2255					nd6log((LOG_ERR,
2256					    "%s: failed to recover a prefix "
2257					    "%s/%d from %s to %s (errno=%d)\n",
2258					    __func__, ip6_sprintf(ip6buf,
2259						&opr->ndpr_prefix.sin6_addr),
2260					    opr->ndpr_plen, if_name(ifp),
2261					    if_name(opr->ndpr_ifp), e));
2262				} else
2263					a_failure = 0;
2264				ND6_RLOCK();
2265				if (genid != V_nd6_list_genid)
2266					goto restart;
2267			}
2268		}
2269		ND6_RUNLOCK();
2270	} else {
2271		/* XXX: can we still set the NDPRF_ONLINK flag? */
2272		nd6log((LOG_ERR,
2273		    "%s: failed to delete route: %s/%d on %s (errno=%d)\n",
2274		    __func__, ip6_sprintf(ip6buf, &sa6.sin6_addr),
2275		    pr->ndpr_plen, if_name(ifp), error));
2276	}
2277
2278	if (a_failure)
2279		lltable_prefix_free(AF_INET6, (struct sockaddr *)&sa6,
2280		    (struct sockaddr *)&mask6, LLE_STATIC);
2281
2282	return (error);
2283}
2284
2285/*
2286 * ia0 - corresponding public address
2287 */
2288int
2289in6_tmpifadd(const struct in6_ifaddr *ia0, int forcegen, int delay)
2290{
2291	struct ifnet *ifp = ia0->ia_ifa.ifa_ifp;
2292	struct in6_ifaddr *newia;
2293	struct in6_aliasreq ifra;
2294	int error;
2295	int trylimit = 3;	/* XXX: adhoc value */
2296	int updateflags;
2297	u_int32_t randid[2];
2298	time_t vltime0, pltime0;
2299
2300	in6_prepare_ifra(&ifra, &ia0->ia_addr.sin6_addr,
2301	    &ia0->ia_prefixmask.sin6_addr);
2302
2303	ifra.ifra_addr = ia0->ia_addr;	/* XXX: do we need this ? */
2304	/* clear the old IFID */
2305	IN6_MASK_ADDR(&ifra.ifra_addr.sin6_addr,
2306	    &ifra.ifra_prefixmask.sin6_addr);
2307
2308  again:
2309	if (in6_get_tmpifid(ifp, (u_int8_t *)randid,
2310	    (const u_int8_t *)&ia0->ia_addr.sin6_addr.s6_addr[8], forcegen)) {
2311		nd6log((LOG_NOTICE, "%s: failed to find a good random IFID\n",
2312		    __func__));
2313		return (EINVAL);
2314	}
2315	ifra.ifra_addr.sin6_addr.s6_addr32[2] |=
2316	    (randid[0] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[2]));
2317	ifra.ifra_addr.sin6_addr.s6_addr32[3] |=
2318	    (randid[1] & ~(ifra.ifra_prefixmask.sin6_addr.s6_addr32[3]));
2319
2320	/*
2321	 * in6_get_tmpifid() quite likely provided a unique interface ID.
2322	 * However, we may still have a chance to see collision, because
2323	 * there may be a time lag between generation of the ID and generation
2324	 * of the address.  So, we'll do one more sanity check.
2325	 */
2326
2327	if (in6_localip(&ifra.ifra_addr.sin6_addr) != 0) {
2328		if (trylimit-- > 0) {
2329			forcegen = 1;
2330			goto again;
2331		}
2332
2333		/* Give up.  Something strange should have happened.  */
2334		nd6log((LOG_NOTICE, "%s: failed to find a unique random IFID\n",
2335		    __func__));
2336		return (EEXIST);
2337	}
2338
2339	/*
2340	 * The Valid Lifetime is the lower of the Valid Lifetime of the
2341         * public address or TEMP_VALID_LIFETIME.
2342	 * The Preferred Lifetime is the lower of the Preferred Lifetime
2343         * of the public address or TEMP_PREFERRED_LIFETIME -
2344         * DESYNC_FACTOR.
2345	 */
2346	if (ia0->ia6_lifetime.ia6t_vltime != ND6_INFINITE_LIFETIME) {
2347		vltime0 = IFA6_IS_INVALID(ia0) ? 0 :
2348		    (ia0->ia6_lifetime.ia6t_vltime -
2349		    (time_uptime - ia0->ia6_updatetime));
2350		if (vltime0 > V_ip6_temp_valid_lifetime)
2351			vltime0 = V_ip6_temp_valid_lifetime;
2352	} else
2353		vltime0 = V_ip6_temp_valid_lifetime;
2354	if (ia0->ia6_lifetime.ia6t_pltime != ND6_INFINITE_LIFETIME) {
2355		pltime0 = IFA6_IS_DEPRECATED(ia0) ? 0 :
2356		    (ia0->ia6_lifetime.ia6t_pltime -
2357		    (time_uptime - ia0->ia6_updatetime));
2358		if (pltime0 > V_ip6_temp_preferred_lifetime - V_ip6_desync_factor){
2359			pltime0 = V_ip6_temp_preferred_lifetime -
2360			    V_ip6_desync_factor;
2361		}
2362	} else
2363		pltime0 = V_ip6_temp_preferred_lifetime - V_ip6_desync_factor;
2364	ifra.ifra_lifetime.ia6t_vltime = vltime0;
2365	ifra.ifra_lifetime.ia6t_pltime = pltime0;
2366
2367	/*
2368	 * A temporary address is created only if this calculated Preferred
2369	 * Lifetime is greater than REGEN_ADVANCE time units.
2370	 */
2371	if (ifra.ifra_lifetime.ia6t_pltime <= V_ip6_temp_regen_advance)
2372		return (0);
2373
2374	/* XXX: scope zone ID? */
2375
2376	ifra.ifra_flags |= (IN6_IFF_AUTOCONF|IN6_IFF_TEMPORARY);
2377
2378	/* allocate ifaddr structure, link into chain, etc. */
2379	updateflags = 0;
2380	if (delay)
2381		updateflags |= IN6_IFAUPDATE_DADDELAY;
2382	if ((error = in6_update_ifa(ifp, &ifra, NULL, updateflags)) != 0)
2383		return (error);
2384
2385	newia = in6ifa_ifpwithaddr(ifp, &ifra.ifra_addr.sin6_addr);
2386	if (newia == NULL) {	/* XXX: can it happen? */
2387		nd6log((LOG_ERR,
2388		    "%s: ifa update succeeded, but we got no ifaddr\n",
2389		    __func__));
2390		return (EINVAL); /* XXX */
2391	}
2392	newia->ia6_ndpr = ia0->ia6_ndpr;
2393	newia->ia6_ndpr->ndpr_addrcnt++;
2394	ifa_free(&newia->ia_ifa);
2395
2396	/*
2397	 * A newly added address might affect the status of other addresses.
2398	 * XXX: when the temporary address is generated with a new public
2399	 * address, the onlink check is redundant.  However, it would be safe
2400	 * to do the check explicitly everywhere a new address is generated,
2401	 * and, in fact, we surely need the check when we create a new
2402	 * temporary address due to deprecation of an old temporary address.
2403	 */
2404	pfxlist_onlink_check();
2405
2406	return (0);
2407}
2408
2409static int
2410rt6_deleteroute(const struct rtentry *rt, void *arg)
2411{
2412#define SIN6(s)	((struct sockaddr_in6 *)s)
2413	struct in6_addr *gate = (struct in6_addr *)arg;
2414
2415	if (rt->rt_gateway == NULL || rt->rt_gateway->sa_family != AF_INET6)
2416		return (0);
2417
2418	if (!IN6_ARE_ADDR_EQUAL(gate, &SIN6(rt->rt_gateway)->sin6_addr)) {
2419		return (0);
2420	}
2421
2422	/*
2423	 * Do not delete a static route.
2424	 * XXX: this seems to be a bit ad-hoc. Should we consider the
2425	 * 'cloned' bit instead?
2426	 */
2427	if ((rt->rt_flags & RTF_STATIC) != 0)
2428		return (0);
2429
2430	/*
2431	 * We delete only host route. This means, in particular, we don't
2432	 * delete default route.
2433	 */
2434	if ((rt->rt_flags & RTF_HOST) == 0)
2435		return (0);
2436
2437	return (1);
2438#undef SIN6
2439}
2440
2441/*
2442 * Delete all the routing table entries that use the specified gateway.
2443 * XXX: this function causes search through all entries of routing table, so
2444 * it shouldn't be called when acting as a router.
2445 */
2446void
2447rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
2448{
2449
2450	/* We'll care only link-local addresses */
2451	if (!IN6_IS_ADDR_LINKLOCAL(gateway))
2452		return;
2453
2454	/* XXX Do we really need to walk any but the default FIB? */
2455	rt_foreach_fib_walk_del(AF_INET6, rt6_deleteroute, (void *)gateway);
2456}
2457
2458int
2459nd6_setdefaultiface(int ifindex)
2460{
2461	int error = 0;
2462
2463	if (ifindex < 0 || V_if_index < ifindex)
2464		return (EINVAL);
2465	if (ifindex != 0 && !ifnet_byindex(ifindex))
2466		return (EINVAL);
2467
2468	if (V_nd6_defifindex != ifindex) {
2469		V_nd6_defifindex = ifindex;
2470		if (V_nd6_defifindex > 0)
2471			V_nd6_defifp = ifnet_byindex(V_nd6_defifindex);
2472		else
2473			V_nd6_defifp = NULL;
2474
2475		/*
2476		 * Our current implementation assumes one-to-one maping between
2477		 * interfaces and links, so it would be natural to use the
2478		 * default interface as the default link.
2479		 */
2480		scope6_setdefault(V_nd6_defifp);
2481	}
2482
2483	return (error);
2484}
2485
2486bool
2487nd6_defrouter_list_empty(void)
2488{
2489
2490	return (TAILQ_EMPTY(&V_nd6_defrouter));
2491}
2492
2493void
2494nd6_defrouter_timer(void)
2495{
2496	struct nd_defrouter *dr, *ndr;
2497	struct nd6_drhead drq;
2498
2499	TAILQ_INIT(&drq);
2500
2501	ND6_WLOCK();
2502	TAILQ_FOREACH_SAFE(dr, &V_nd6_defrouter, dr_entry, ndr)
2503		if (dr->expire && dr->expire < time_uptime)
2504			defrouter_unlink(dr, &drq);
2505	ND6_WUNLOCK();
2506
2507	while ((dr = TAILQ_FIRST(&drq)) != NULL) {
2508		TAILQ_REMOVE(&drq, dr, dr_entry);
2509		defrouter_del(dr);
2510	}
2511}
2512
2513/*
2514 * Nuke default router list entries toward ifp.
2515 * We defer removal of default router list entries that is installed in the
2516 * routing table, in order to keep additional side effects as small as possible.
2517 */
2518void
2519nd6_defrouter_purge(struct ifnet *ifp)
2520{
2521	struct nd_defrouter *dr, *ndr;
2522	struct nd6_drhead drq;
2523
2524	TAILQ_INIT(&drq);
2525
2526	ND6_WLOCK();
2527	TAILQ_FOREACH_SAFE(dr, &V_nd6_defrouter, dr_entry, ndr) {
2528		if (dr->installed)
2529			continue;
2530		if (dr->ifp == ifp)
2531			defrouter_unlink(dr, &drq);
2532	}
2533	TAILQ_FOREACH_SAFE(dr, &V_nd6_defrouter, dr_entry, ndr) {
2534		if (!dr->installed)
2535			continue;
2536		if (dr->ifp == ifp)
2537			defrouter_unlink(dr, &drq);
2538	}
2539	ND6_WUNLOCK();
2540
2541	/* Delete the unlinked router objects. */
2542	while ((dr = TAILQ_FIRST(&drq)) != NULL) {
2543		TAILQ_REMOVE(&drq, dr, dr_entry);
2544		defrouter_del(dr);
2545	}
2546}
2547
2548void
2549nd6_defrouter_flush_all(void)
2550{
2551	struct nd_defrouter *dr;
2552	struct nd6_drhead drq;
2553
2554	TAILQ_INIT(&drq);
2555
2556	ND6_WLOCK();
2557	while ((dr = TAILQ_FIRST(&V_nd6_defrouter)) != NULL)
2558		defrouter_unlink(dr, &drq);
2559	ND6_WUNLOCK();
2560
2561	while ((dr = TAILQ_FIRST(&drq)) != NULL) {
2562		TAILQ_REMOVE(&drq, dr, dr_entry);
2563		defrouter_del(dr);
2564	}
2565}
2566
2567void
2568nd6_defrouter_init(void)
2569{
2570
2571	TAILQ_INIT(&V_nd6_defrouter);
2572}
2573
2574static int
2575nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS)
2576{
2577	struct in6_defrouter d;
2578	struct nd_defrouter *dr;
2579	int error;
2580
2581	if (req->newptr != NULL)
2582		return (EPERM);
2583
2584	error = sysctl_wire_old_buffer(req, 0);
2585	if (error != 0)
2586		return (error);
2587
2588	bzero(&d, sizeof(d));
2589	d.rtaddr.sin6_family = AF_INET6;
2590	d.rtaddr.sin6_len = sizeof(d.rtaddr);
2591
2592	ND6_RLOCK();
2593	TAILQ_FOREACH(dr, &V_nd6_defrouter, dr_entry) {
2594		d.rtaddr.sin6_addr = dr->rtaddr;
2595		error = sa6_recoverscope(&d.rtaddr);
2596		if (error != 0)
2597			break;
2598		d.flags = dr->raflags;
2599		d.rtlifetime = dr->rtlifetime;
2600		d.expire = dr->expire + (time_second - time_uptime);
2601		d.if_index = dr->ifp->if_index;
2602		error = SYSCTL_OUT(req, &d, sizeof(d));
2603		if (error != 0)
2604			break;
2605	}
2606	ND6_RUNLOCK();
2607	return (error);
2608}
2609SYSCTL_PROC(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist,
2610	CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE,
2611	NULL, 0, nd6_sysctl_drlist, "S,in6_defrouter",
2612	"NDP default router list");
2613