ip_ire.c (dc3879f9) ip_ire.c (c793af95)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 41 unchanged lines hidden (view full) ---

50#include <netinet/ip6.h>
51#include <netinet/icmp6.h>
52
53#include <inet/common.h>
54#include <inet/mi.h>
55#include <inet/ip.h>
56#include <inet/ip6.h>
57#include <inet/ip_ndp.h>
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE

--- 41 unchanged lines hidden (view full) ---

50#include <netinet/ip6.h>
51#include <netinet/icmp6.h>
52
53#include <inet/common.h>
54#include <inet/mi.h>
55#include <inet/ip.h>
56#include <inet/ip6.h>
57#include <inet/ip_ndp.h>
58#include <inet/arp.h>
58#include <inet/ip_if.h>
59#include <inet/ip_ire.h>
59#include <inet/ip_if.h>
60#include <inet/ip_ire.h>
61#include <inet/ip_ftable.h>
60#include <inet/ip_rts.h>
61#include <inet/nd.h>
62
63#include <net/pfkeyv2.h>
64#include <inet/ipsec_info.h>
65#include <inet/sadb.h>
66#include <sys/kmem.h>
67#include <inet/tcp.h>
68#include <inet/ipclassifier.h>
69#include <sys/zone.h>
62#include <inet/ip_rts.h>
63#include <inet/nd.h>
64
65#include <net/pfkeyv2.h>
66#include <inet/ipsec_info.h>
67#include <inet/sadb.h>
68#include <sys/kmem.h>
69#include <inet/tcp.h>
70#include <inet/ipclassifier.h>
71#include <sys/zone.h>
70
71#include <sys/tsol/label.h>
72#include <sys/tsol/tnet.h>
73
72#include <sys/tsol/label.h>
73#include <sys/tsol/tnet.h>
74
75struct kmem_cache *rt_entry_cache;
76
77
74/*
75 * Synchronization notes:
76 *
77 * The fields of the ire_t struct are protected in the following way :
78 *
79 * ire_next/ire_ptpn
80 *
81 * - bucket lock of the respective tables (cache or forwarding tables).
82 *
78/*
79 * Synchronization notes:
80 *
81 * The fields of the ire_t struct are protected in the following way :
82 *
83 * ire_next/ire_ptpn
84 *
85 * - bucket lock of the respective tables (cache or forwarding tables).
86 *
83 * ire_fp_mp
84 * ire_dlureq_mp
85 *
86 * - ire_lock protects multiple threads updating ire_fp_mp
87 * simultaneously. Otherwise no locks are used while accessing
88 * (both read/write) both the fields.
89 *
90 * ire_mp, ire_rfq, ire_stq, ire_u *except* ire_gateway_addr[v6], ire_mask,
91 * ire_type, ire_create_time, ire_masklen, ire_ipversion, ire_flags, ire_ipif,
92 * ire_ihandle, ire_phandle, ire_nce, ire_bucket, ire_in_ill, ire_in_src_addr
93 *
94 * - Set in ire_create_v4/v6 and never changes after that. Thus,
95 * we don't need a lock whenever these fields are accessed.
96 *
97 * - ire_bucket and ire_masklen (also set in ire_create) is set in

--- 31 unchanged lines hidden (view full) ---

129 * ire_marks
130 * - bucket lock protects this.
131 *
132 * ire_ipsec_overhead/ire_ll_hdr_length
133 *
134 * - Place holder for returning the information to the upper layers
135 * when IRE_DB_REQ comes down.
136 *
87 * ire_mp, ire_rfq, ire_stq, ire_u *except* ire_gateway_addr[v6], ire_mask,
88 * ire_type, ire_create_time, ire_masklen, ire_ipversion, ire_flags, ire_ipif,
89 * ire_ihandle, ire_phandle, ire_nce, ire_bucket, ire_in_ill, ire_in_src_addr
90 *
91 * - Set in ire_create_v4/v6 and never changes after that. Thus,
92 * we don't need a lock whenever these fields are accessed.
93 *
94 * - ire_bucket and ire_masklen (also set in ire_create) is set in

--- 31 unchanged lines hidden (view full) ---

126 * ire_marks
127 * - bucket lock protects this.
128 *
129 * ire_ipsec_overhead/ire_ll_hdr_length
130 *
131 * - Place holder for returning the information to the upper layers
132 * when IRE_DB_REQ comes down.
133 *
137 * ip_ire_default_count protected by the bucket lock of
138 * ip_forwarding_table[0][0].
139 *
140 * ipv6_ire_default_count is protected by the bucket lock of
141 * ip_forwarding_table_v6[0][0].
142 *
134 *
135 * ipv6_ire_default_count is protected by the bucket lock of
136 * ip_forwarding_table_v6[0][0].
137 *
143 * ip_ire_default_index/ipv6_ire_default_index is not protected as it
144 * is just a hint at which default gateway to use. There is nothing
138 * ipv6_ire_default_index is not protected as it is just a hint
139 * at which default gateway to use. There is nothing
145 * wrong in using the same gateway for two different connections.
146 *
147 * As we always hold the bucket locks in all the places while accessing
148 * the above values, it is natural to use them for protecting them.
149 *
150 * We have a separate cache table and forwarding table for IPv4 and IPv6.
151 * Cache table (ip_cache_table/ip_cache_table_v6) is a pointer to an
152 * array of irb_t structure and forwarding table (ip_forwarding_table/

--- 78 unchanged lines hidden (view full) ---

231 * Zones note:
232 * Walking IREs within a given zone also walks certain ires in other
233 * zones. This is done intentionally. IRE walks with a specified
234 * zoneid are used only when doing informational reports, and
235 * zone users want to see things that they can access. See block
236 * comment in ire_walk_ill_match().
237 */
238
140 * wrong in using the same gateway for two different connections.
141 *
142 * As we always hold the bucket locks in all the places while accessing
143 * the above values, it is natural to use them for protecting them.
144 *
145 * We have a separate cache table and forwarding table for IPv4 and IPv6.
146 * Cache table (ip_cache_table/ip_cache_table_v6) is a pointer to an
147 * array of irb_t structure and forwarding table (ip_forwarding_table/

--- 78 unchanged lines hidden (view full) ---

226 * Zones note:
227 * Walking IREs within a given zone also walks certain ires in other
228 * zones. This is done intentionally. IRE walks with a specified
229 * zoneid are used only when doing informational reports, and
230 * zone users want to see things that they can access. See block
231 * comment in ire_walk_ill_match().
232 */
233
239static irb_t *ip_forwarding_table[IP_MASK_TABLE_SIZE];
240/* This is dynamically allocated in ip_ire_init */
234/* This is dynamically allocated in ip_ire_init */
241static irb_t *ip_cache_table;
235irb_t *ip_cache_table;
242/* This is dynamically allocated in ire_add_mrtun */
243irb_t *ip_mrtun_table;
244
245uint32_t ire_handle = 1;
246/*
247 * ire_ft_init_lock is used while initializing ip_forwarding_table
248 * dynamically in ire_add.
249 */

--- 33 unchanged lines hidden (view full) ---

283 */
284uint32_t ip_cache_table_size = IP_CACHE_TABLE_SIZE;
285uint32_t ip6_cache_table_size = IP6_CACHE_TABLE_SIZE;
286
287/*
288 * The size of the forwarding table. We will make sure that it is a
289 * power of 2 in ip_ire_init().
290 */
236/* This is dynamically allocated in ire_add_mrtun */
237irb_t *ip_mrtun_table;
238
239uint32_t ire_handle = 1;
240/*
241 * ire_ft_init_lock is used while initializing ip_forwarding_table
242 * dynamically in ire_add.
243 */

--- 33 unchanged lines hidden (view full) ---

277 */
278uint32_t ip_cache_table_size = IP_CACHE_TABLE_SIZE;
279uint32_t ip6_cache_table_size = IP6_CACHE_TABLE_SIZE;
280
281/*
282 * The size of the forwarding table. We will make sure that it is a
283 * power of 2 in ip_ire_init().
284 */
291uint32_t ip_ftable_hash_size = IP_FTABLE_HASH_SIZE;
292uint32_t ip6_ftable_hash_size = IP6_FTABLE_HASH_SIZE;
293
294struct kmem_cache *ire_cache;
295static ire_t ire_null;
296
297ire_stats_t ire_stats_v4; /* IPv4 ire statistics */
298ire_stats_t ire_stats_v6; /* IPv6 ire statistics */
299

--- 65 unchanged lines hidden (view full) ---

365 * The ratio of memory consumed by IRE used for temporary to available
366 * memory. This is a shift factor, so 6 means the ratio 1 to 64. This
367 * value can be changed in /etc/system. 6 is a reasonable number.
368 */
369uint32_t ip_ire_mem_ratio = 6;
370/* The shift factor for CPU speed to calculate the max IRE bucket length. */
371uint32_t ip_ire_cpu_ratio = 7;
372
285uint32_t ip6_ftable_hash_size = IP6_FTABLE_HASH_SIZE;
286
287struct kmem_cache *ire_cache;
288static ire_t ire_null;
289
290ire_stats_t ire_stats_v4; /* IPv4 ire statistics */
291ire_stats_t ire_stats_v6; /* IPv6 ire statistics */
292

--- 65 unchanged lines hidden (view full) ---

358 * The ratio of memory consumed by IRE used for temporary to available
359 * memory. This is a shift factor, so 6 means the ratio 1 to 64. This
360 * value can be changed in /etc/system. 6 is a reasonable number.
361 */
362uint32_t ip_ire_mem_ratio = 6;
363/* The shift factor for CPU speed to calculate the max IRE bucket length. */
364uint32_t ip_ire_cpu_ratio = 7;
365
366typedef struct nce_clookup_s {
367 ipaddr_t ncecl_addr;
368 boolean_t ncecl_found;
369} nce_clookup_t;
370
373/*
374 * The maximum number of buckets in IRE cache table. In future, we may
375 * want to make it a dynamic hash table. For the moment, we fix the
376 * size and allocate the table in ip_ire_init() when IP is first loaded.
377 * We take into account the amount of memory a system has.
378 */
379#define IP_MAX_CACHE_TABLE_SIZE 4096
380
381static uint32_t ip_max_cache_table_size = IP_MAX_CACHE_TABLE_SIZE;
382static uint32_t ip6_max_cache_table_size = IP_MAX_CACHE_TABLE_SIZE;
383
384#define NUM_ILLS 3 /* To build the ILL list to unlock */
385
386/* Zero iulp_t for initialization. */
387const iulp_t ire_uinfo_null = { 0 };
388
389static int ire_add_v4(ire_t **ire_p, queue_t *q, mblk_t *mp,
371/*
372 * The maximum number of buckets in IRE cache table. In future, we may
373 * want to make it a dynamic hash table. For the moment, we fix the
374 * size and allocate the table in ip_ire_init() when IP is first loaded.
375 * We take into account the amount of memory a system has.
376 */
377#define IP_MAX_CACHE_TABLE_SIZE 4096
378
379static uint32_t ip_max_cache_table_size = IP_MAX_CACHE_TABLE_SIZE;
380static uint32_t ip6_max_cache_table_size = IP_MAX_CACHE_TABLE_SIZE;
381
382#define NUM_ILLS 3 /* To build the ILL list to unlock */
383
384/* Zero iulp_t for initialization. */
385const iulp_t ire_uinfo_null = { 0 };
386
387static int ire_add_v4(ire_t **ire_p, queue_t *q, mblk_t *mp,
390 ipsq_func_t func);
388 ipsq_func_t func, boolean_t);
391static int ire_add_srcif_v4(ire_t **ire_p, queue_t *q, mblk_t *mp,
392 ipsq_func_t func);
393static ire_t *ire_update_srcif_v4(ire_t *ire);
394static void ire_delete_v4(ire_t *ire);
389static int ire_add_srcif_v4(ire_t **ire_p, queue_t *q, mblk_t *mp,
390 ipsq_func_t func);
391static ire_t *ire_update_srcif_v4(ire_t *ire);
392static void ire_delete_v4(ire_t *ire);
395static void ire_report_ftable(ire_t *ire, char *mp);
396static void ire_report_ctable(ire_t *ire, char *mp);
397static void ire_report_mrtun_table(ire_t *ire, char *mp);
398static void ire_report_srcif_table(ire_t *ire, char *mp);
399static void ire_walk_ipvers(pfv_t func, void *arg, uchar_t vers,
400 zoneid_t zoneid);
401static void ire_walk_ill_ipvers(uint_t match_flags, uint_t ire_type,
402 pfv_t func, void *arg, uchar_t vers, ill_t *ill);
393static void ire_report_ctable(ire_t *ire, char *mp);
394static void ire_report_mrtun_table(ire_t *ire, char *mp);
395static void ire_report_srcif_table(ire_t *ire, char *mp);
396static void ire_walk_ipvers(pfv_t func, void *arg, uchar_t vers,
397 zoneid_t zoneid);
398static void ire_walk_ill_ipvers(uint_t match_flags, uint_t ire_type,
399 pfv_t func, void *arg, uchar_t vers, ill_t *ill);
403static void ire_walk_ill_tables(uint_t match_flags, uint_t ire_type,
404 pfv_t func, void *arg, size_t ftbl_sz, size_t htbl_sz, irb_t **ipftbl,
405 size_t ctbl_sz, irb_t *ipctbl, ill_t *ill, zoneid_t zoneid);
406static void ire_delete_host_redirects(ipaddr_t gateway);
407static boolean_t ire_match_args(ire_t *ire, ipaddr_t addr, ipaddr_t mask,
408 ipaddr_t gateway, int type, const ipif_t *ipif, zoneid_t zoneid,
409 uint32_t ihandle, const ts_label_t *tsl, int match_flags);
410static void ire_cache_cleanup(irb_t *irb, uint32_t threshold, int cnt);
411extern void ill_unlock_ills(ill_t **list, int cnt);
412static void ire_fastpath_list_add(ill_t *ill, ire_t *ire);
400static void ire_cache_cleanup(irb_t *irb, uint32_t threshold, int cnt);
401extern void ill_unlock_ills(ill_t **list, int cnt);
402static void ire_fastpath_list_add(ill_t *ill, ire_t *ire);
403static void ip_nce_clookup_and_delete(nce_t *nce, void *arg);
413extern void th_trace_rrecord(th_trace_t *);
414#ifdef IRE_DEBUG
415static void ire_trace_inactive(ire_t *);
416#endif
417
418/*
419 * To avoid bloating the code, we call this function instead of
420 * using the macro IRE_REFRELE. Use macro only in performance

--- 24 unchanged lines hidden (view full) ---

445 * not in this cache.
446 */
447/* ARGSUSED */
448static int
449ip_ire_constructor(void *buf, void *cdrarg, int kmflags)
450{
451 ire_t *ire = buf;
452
404extern void th_trace_rrecord(th_trace_t *);
405#ifdef IRE_DEBUG
406static void ire_trace_inactive(ire_t *);
407#endif
408
409/*
410 * To avoid bloating the code, we call this function instead of
411 * using the macro IRE_REFRELE. Use macro only in performance

--- 24 unchanged lines hidden (view full) ---

436 * not in this cache.
437 */
438/* ARGSUSED */
439static int
440ip_ire_constructor(void *buf, void *cdrarg, int kmflags)
441{
442 ire_t *ire = buf;
443
453 ire->ire_fp_mp = NULL;
454 ire->ire_dlureq_mp = NULL;
444 ire->ire_nce = NULL;
455
456 return (0);
457}
458
459/* ARGSUSED1 */
460static void
461ip_ire_destructor(void *buf, void *cdrarg)
462{
463 ire_t *ire = buf;
464
445
446 return (0);
447}
448
449/* ARGSUSED1 */
450static void
451ip_ire_destructor(void *buf, void *cdrarg)
452{
453 ire_t *ire = buf;
454
465 ASSERT(ire->ire_fp_mp == NULL);
466 ASSERT(ire->ire_dlureq_mp == NULL);
455 ASSERT(ire->ire_nce == NULL);
467}
468
469/*
470 * This function is associated with the IP_IOC_IRE_ADVISE_NO_REPLY
471 * IOCTL. It is used by TCP (or other ULPs) to supply revised information
472 * for an existing CACHED IRE.
473 */
474/* ARGSUSED */

--- 118 unchanged lines hidden (view full) ---

593 *
594 * This function does not support IPv6 since Neighbor Unreachability Detection
595 * means that negative advise like this is useless.
596 */
597/* ARGSUSED */
598int
599ip_ire_delete(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
600{
456}
457
458/*
459 * This function is associated with the IP_IOC_IRE_ADVISE_NO_REPLY
460 * IOCTL. It is used by TCP (or other ULPs) to supply revised information
461 * for an existing CACHED IRE.
462 */
463/* ARGSUSED */

--- 118 unchanged lines hidden (view full) ---

582 *
583 * This function does not support IPv6 since Neighbor Unreachability Detection
584 * means that negative advise like this is useless.
585 */
586/* ARGSUSED */
587int
588ip_ire_delete(queue_t *q, mblk_t *mp, cred_t *ioc_cr)
589{
601 uchar_t *addr_ucp;
590 uchar_t *addr_ucp;
602 ipaddr_t addr;
591 ipaddr_t addr;
603 ire_t *ire;
604 ipid_t *ipid;
605 boolean_t routing_sock_info = B_FALSE; /* Sent info? */
592 ire_t *ire;
593 ipid_t *ipid;
594 boolean_t routing_sock_info = B_FALSE; /* Sent info? */
606 zoneid_t zoneid;
595 zoneid_t zoneid;
596 ire_t *gire = NULL;
607
608 ASSERT(q->q_next == NULL);
609 zoneid = Q_TO_CONN(q)->conn_zoneid;
610
611 /*
612 * Check privilege using the ioctl credential; if it is NULL
613 * then this is a kernel message and therefor privileged.
614 */

--- 49 unchanged lines hidden (view full) ---

664 if (gethrestime_sec() <
665 ire->ire_create_time + ip_ignore_delete_time) {
666 ire_refrele(ire);
667 return (EINVAL);
668 }
669 /*
670 * Now we have a potentially dead cache entry. We need
671 * to remove it.
597
598 ASSERT(q->q_next == NULL);
599 zoneid = Q_TO_CONN(q)->conn_zoneid;
600
601 /*
602 * Check privilege using the ioctl credential; if it is NULL
603 * then this is a kernel message and therefor privileged.
604 */

--- 49 unchanged lines hidden (view full) ---

654 if (gethrestime_sec() <
655 ire->ire_create_time + ip_ignore_delete_time) {
656 ire_refrele(ire);
657 return (EINVAL);
658 }
659 /*
660 * Now we have a potentially dead cache entry. We need
661 * to remove it.
672 * If this cache entry is generated from a default route,
662 * If this cache entry is generated from a
663 * default route (i.e., ire_cmask == 0),
673 * search the default list and mark it dead and some
674 * background process will try to activate it.
675 */
676 if ((ire->ire_gateway_addr != 0) && (ire->ire_cmask == 0)) {
677 /*
678 * Make sure that we pick a different
679 * IRE_DEFAULT next time.
664 * search the default list and mark it dead and some
665 * background process will try to activate it.
666 */
667 if ((ire->ire_gateway_addr != 0) && (ire->ire_cmask == 0)) {
668 /*
669 * Make sure that we pick a different
670 * IRE_DEFAULT next time.
680 * The ip_ire_default_count tracks the number of
681 * IRE_DEFAULT entries. However, the
682 * ip_forwarding_table[0] also contains
683 * interface routes thus the count can be zero.
684 */
685 ire_t *gw_ire;
671 */
672 ire_t *gw_ire;
686 irb_t *irb_ptr;
687 irb_t *irb;
673 irb_t *irb = NULL;
674 uint_t match_flags;
688
675
689 if (((irb_ptr = ip_forwarding_table[0]) != NULL) &&
690 (irb = &irb_ptr[0])->irb_ire != NULL &&
691 ip_ire_default_count != 0) {
692 uint_t index;
676 match_flags = (MATCH_IRE_DEFAULT | MATCH_IRE_RJ_BHOLE);
693
677
678 gire = ire_ftable_lookup(ire->ire_addr,
679 ire->ire_cmask, 0, 0,
680 ire->ire_ipif, NULL, zoneid, 0, NULL, match_flags);
681
682 ip3dbg(("ire_ftable_lookup() returned gire %p\n",
683 (void *)gire));
684
685 if (gire != NULL) {
686 irb = gire->ire_bucket;
687
694 /*
695 * We grab it as writer just to serialize
696 * multiple threads trying to bump up
688 /*
689 * We grab it as writer just to serialize
690 * multiple threads trying to bump up
697 * ip_ire_default_index.
691 * irb_rr_origin
698 */
699 rw_enter(&irb->irb_lock, RW_WRITER);
692 */
693 rw_enter(&irb->irb_lock, RW_WRITER);
700 if ((gw_ire = irb->irb_ire) == NULL) {
694 if ((gw_ire = irb->irb_rr_origin) == NULL) {
701 rw_exit(&irb->irb_lock);
702 goto done;
703 }
695 rw_exit(&irb->irb_lock);
696 goto done;
697 }
704 index = ip_ire_default_index %
705 ip_ire_default_count;
706 while (index-- && gw_ire->ire_next != NULL)
707 gw_ire = gw_ire->ire_next;
708
698
699
709 /* Skip past the potentially bad gateway */
710 if (ire->ire_gateway_addr ==
711 gw_ire->ire_gateway_addr)
700 /* Skip past the potentially bad gateway */
701 if (ire->ire_gateway_addr ==
702 gw_ire->ire_gateway_addr)
712 ip_ire_default_index++;
703 irb->irb_rr_origin = gw_ire->ire_next;
713
714 rw_exit(&irb->irb_lock);
704
705 rw_exit(&irb->irb_lock);
715 }
706 }
716 }
717done:
707 }
708done:
709 if (gire != NULL)
710 IRE_REFRELE(gire);
718 /* report the bad route to routing sockets */
719 ip_rts_change(RTM_LOSING, ire->ire_addr, ire->ire_gateway_addr,
720 ire->ire_mask, ire->ire_src_addr, 0, 0, 0,
721 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_IFA));
722 routing_sock_info = B_TRUE;
723 ire_delete(ire);
724 ire_refrele(ire);
725 }

--- 61 unchanged lines hidden (view full) ---

787 zoneid = ALL_ZONES;
788
789 ire_walk_v4(ire_report_ftable, mp->b_cont, zoneid);
790 ire_walk_v4(ire_report_ctable, mp->b_cont, zoneid);
791
792 return (0);
793}
794
711 /* report the bad route to routing sockets */
712 ip_rts_change(RTM_LOSING, ire->ire_addr, ire->ire_gateway_addr,
713 ire->ire_mask, ire->ire_src_addr, 0, 0, 0,
714 (RTA_DST | RTA_GATEWAY | RTA_NETMASK | RTA_IFA));
715 routing_sock_info = B_TRUE;
716 ire_delete(ire);
717 ire_refrele(ire);
718 }

--- 61 unchanged lines hidden (view full) ---

780 zoneid = ALL_ZONES;
781
782 ire_walk_v4(ire_report_ftable, mp->b_cont, zoneid);
783 ire_walk_v4(ire_report_ctable, mp->b_cont, zoneid);
784
785 return (0);
786}
787
795/* ire_walk routine invoked for ip_ire_report for each IRE. */
796static void
797ire_report_ftable(ire_t *ire, char *mp)
798{
799 char buf1[16];
800 char buf2[16];
801 char buf3[16];
802 char buf4[16];
803 uint_t fo_pkt_count;
804 uint_t ib_pkt_count;
805 int ref;
806 uint_t print_len, buf_len;
807
788
808 if (ire->ire_type & IRE_CACHETABLE)
809 return;
810 buf_len = ((mblk_t *)mp)->b_datap->db_lim - ((mblk_t *)mp)->b_wptr;
811 if (buf_len <= 0)
812 return;
813
814 /* Number of active references of this ire */
815 ref = ire->ire_refcnt;
816 /* "inbound" to a non local address is a forward */
817 ib_pkt_count = ire->ire_ib_pkt_count;
818 fo_pkt_count = 0;
819 if (!(ire->ire_type & (IRE_LOCAL|IRE_BROADCAST))) {
820 fo_pkt_count = ib_pkt_count;
821 ib_pkt_count = 0;
822 }
823 print_len = snprintf((char *)((mblk_t *)mp)->b_wptr, buf_len,
824 MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR MI_COL_PTRFMT_STR "%5d "
825 "%s %s %s %s %05d %05ld %06ld %08d %03d %06d %09d %09d %06d %08d "
826 "%04d %08d %08d %d/%d/%d %s\n",
827 (void *)ire, (void *)ire->ire_rfq, (void *)ire->ire_stq,
828 (int)ire->ire_zoneid,
829 ip_dot_addr(ire->ire_addr, buf1), ip_dot_addr(ire->ire_mask, buf2),
830 ip_dot_addr(ire->ire_src_addr, buf3),
831 ip_dot_addr(ire->ire_gateway_addr, buf4),
832 ire->ire_max_frag, ire->ire_uinfo.iulp_rtt,
833 ire->ire_uinfo.iulp_rtt_sd,
834 ire->ire_uinfo.iulp_ssthresh, ref,
835 ire->ire_uinfo.iulp_rtomax,
836 (ire->ire_uinfo.iulp_tstamp_ok ? 1: 0),
837 (ire->ire_uinfo.iulp_wscale_ok ? 1: 0),
838 (ire->ire_uinfo.iulp_ecn_ok ? 1: 0),
839 (ire->ire_uinfo.iulp_pmtud_ok ? 1: 0),
840 ire->ire_uinfo.iulp_sack,
841 ire->ire_uinfo.iulp_spipe, ire->ire_uinfo.iulp_rpipe,
842 ib_pkt_count, ire->ire_ob_pkt_count, fo_pkt_count,
843 ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type));
844 if (print_len < buf_len) {
845 ((mblk_t *)mp)->b_wptr += print_len;
846 } else {
847 ((mblk_t *)mp)->b_wptr += buf_len;
848 }
849}
850
851/* ire_walk routine invoked for ip_ire_report for each cached IRE. */
852static void
853ire_report_ctable(ire_t *ire, char *mp)
854{
855 char buf1[16];
856 char buf2[16];
857 char buf3[16];
858 char buf4[16];

--- 234 unchanged lines hidden (view full) ---

1093/*
1094 * Send a packet using the specified IRE.
1095 * If ire_src_addr_v6 is all zero then discard the IRE after
1096 * the packet has been sent.
1097 */
1098static void
1099ire_send(queue_t *q, mblk_t *pkt, ire_t *ire)
1100{
789/* ire_walk routine invoked for ip_ire_report for each cached IRE. */
790static void
791ire_report_ctable(ire_t *ire, char *mp)
792{
793 char buf1[16];
794 char buf2[16];
795 char buf3[16];
796 char buf4[16];

--- 234 unchanged lines hidden (view full) ---

1031/*
1032 * Send a packet using the specified IRE.
1033 * If ire_src_addr_v6 is all zero then discard the IRE after
1034 * the packet has been sent.
1035 */
1036static void
1037ire_send(queue_t *q, mblk_t *pkt, ire_t *ire)
1038{
1101 mblk_t *mp;
1102 mblk_t *ipsec_mp;
1103 boolean_t is_secure;
1104 uint_t ifindex;
1105 ill_t *ill;
1106
1107 ASSERT(ire->ire_ipversion == IPV4_VERSION);
1108 ipsec_mp = pkt;
1109 is_secure = (pkt->b_datap->db_type == M_CTL);

--- 14 unchanged lines hidden (view full) ---

1124 if (ill == NULL) {
1125 pkt->b_prev = NULL;
1126 pkt->b_next = NULL;
1127 freemsg(ipsec_mp);
1128 return;
1129 }
1130 q = ill->ill_rq;
1131 pkt->b_prev = NULL;
1039 mblk_t *ipsec_mp;
1040 boolean_t is_secure;
1041 uint_t ifindex;
1042 ill_t *ill;
1043
1044 ASSERT(ire->ire_ipversion == IPV4_VERSION);
1045 ipsec_mp = pkt;
1046 is_secure = (pkt->b_datap->db_type == M_CTL);

--- 14 unchanged lines hidden (view full) ---

1061 if (ill == NULL) {
1062 pkt->b_prev = NULL;
1063 pkt->b_next = NULL;
1064 freemsg(ipsec_mp);
1065 return;
1066 }
1067 q = ill->ill_rq;
1068 pkt->b_prev = NULL;
1132 mp = allocb(0, BPRI_HI);
1133 if (mp == NULL) {
1134 ill_refrele(ill);
1135 pkt->b_next = NULL;
1136 freemsg(ipsec_mp);
1137 return;
1138 }
1139 mp->b_datap->db_type = M_BREAK;
1140 /*
1141 * This packet has not gone through IPSEC processing
1142 * and hence we should not have any IPSEC message
1143 * prepended.
1144 */
1145 ASSERT(ipsec_mp == pkt);
1069 /*
1070 * This packet has not gone through IPSEC processing
1071 * and hence we should not have any IPSEC message
1072 * prepended.
1073 */
1074 ASSERT(ipsec_mp == pkt);
1146 mp->b_cont = ipsec_mp;
1147 put(q, mp);
1075 put(q, pkt);
1148 ill_refrele(ill);
1149 } else if (pkt->b_next) {
1150 /* Packets from multicast router */
1151 pkt->b_next = NULL;
1152 /*
1153 * We never get the IPSEC_OUT while forwarding the
1154 * packet for multicast router.
1155 */

--- 127 unchanged lines hidden (view full) ---

1283
1284 /* If the packet originated externally then */
1285 if (pkt->b_prev) {
1286 ill_t *ill;
1287 /*
1288 * Extract the ifindex from b_prev (set in ip_rput_data_v6).
1289 * Look up interface to see if it still exists (it could have
1290 * been unplumbed by the time the reply came back from the
1076 ill_refrele(ill);
1077 } else if (pkt->b_next) {
1078 /* Packets from multicast router */
1079 pkt->b_next = NULL;
1080 /*
1081 * We never get the IPSEC_OUT while forwarding the
1082 * packet for multicast router.
1083 */

--- 127 unchanged lines hidden (view full) ---

1211
1212 /* If the packet originated externally then */
1213 if (pkt->b_prev) {
1214 ill_t *ill;
1215 /*
1216 * Extract the ifindex from b_prev (set in ip_rput_data_v6).
1217 * Look up interface to see if it still exists (it could have
1218 * been unplumbed by the time the reply came back from the
1291 * resolver). Unlike IPv4 there is no need for a prepended
1292 * M_BREAK since ip_rput_data_v6 does not process options
1293 * before finding an IRE.
1219 * resolver).
1294 */
1295 ifindex = (uint_t)(uintptr_t)pkt->b_prev;
1296 ill = ill_lookup_on_ifindex(ifindex, B_TRUE,
1297 NULL, NULL, NULL, NULL);
1298 if (ill == NULL) {
1299 pkt->b_prev = NULL;
1300 pkt->b_next = NULL;
1301 freemsg(ipsec_mp);

--- 210 unchanged lines hidden (view full) ---

1512 EXTRACT_PKT_MP(mp, first_mp, mctl_present);
1513 ipha = (ipha_t *)mp->b_rptr;
1514 save_mp = mp;
1515 mp = first_mp;
1516
1517 dst_ire = ire_cache_lookup(ipha->ipha_dst,
1518 ire->ire_zoneid, MBLK_GETLABEL(mp));
1519 } else {
1220 */
1221 ifindex = (uint_t)(uintptr_t)pkt->b_prev;
1222 ill = ill_lookup_on_ifindex(ifindex, B_TRUE,
1223 NULL, NULL, NULL, NULL);
1224 if (ill == NULL) {
1225 pkt->b_prev = NULL;
1226 pkt->b_next = NULL;
1227 freemsg(ipsec_mp);

--- 210 unchanged lines hidden (view full) ---

1438 EXTRACT_PKT_MP(mp, first_mp, mctl_present);
1439 ipha = (ipha_t *)mp->b_rptr;
1440 save_mp = mp;
1441 mp = first_mp;
1442
1443 dst_ire = ire_cache_lookup(ipha->ipha_dst,
1444 ire->ire_zoneid, MBLK_GETLABEL(mp));
1445 } else {
1446 ASSERT(ire->ire_ipversion == IPV6_VERSION);
1520 /*
1521 * Get a pointer to the beginning of the IPv6 header.
1522 * Ignore leading IPsec control mblks.
1523 */
1524 first_mp = mp;
1525 if (mp->b_datap->db_type == M_CTL) {
1526 mp = mp->b_cont;
1527 }

--- 32 unchanged lines hidden (view full) ---

1560 if (!(ire->ire_marks & IRE_MARK_NOADD)) {
1561 /*
1562 * Regular packets with cache bound ires and
1563 * the packets from ARP response for ires which
1564 * belong to the ire_srcif_v4 table, are here.
1565 */
1566 if (ire->ire_in_ill == NULL) {
1567 /* Add the ire */
1447 /*
1448 * Get a pointer to the beginning of the IPv6 header.
1449 * Ignore leading IPsec control mblks.
1450 */
1451 first_mp = mp;
1452 if (mp->b_datap->db_type == M_CTL) {
1453 mp = mp->b_cont;
1454 }

--- 32 unchanged lines hidden (view full) ---

1487 if (!(ire->ire_marks & IRE_MARK_NOADD)) {
1488 /*
1489 * Regular packets with cache bound ires and
1490 * the packets from ARP response for ires which
1491 * belong to the ire_srcif_v4 table, are here.
1492 */
1493 if (ire->ire_in_ill == NULL) {
1494 /* Add the ire */
1568 (void) ire_add(&ire, NULL, NULL, NULL);
1495 (void) ire_add(&ire, NULL, NULL, NULL, B_FALSE);
1569 } else {
1570 /*
1571 * This must be ARP response for ire in interface based
1572 * table. Note that we don't add them in cache table,
1573 * instead we update the existing table with dlureq_mp
1574 * information. The reverse tunnel ires do not come
1575 * here, as reverse tunnel is non-resolver interface.
1576 * XXX- another design alternative was to mark the

--- 44 unchanged lines hidden (view full) ---

1621 * TODO: in order for CGTP to work in non-global
1622 * zones, ip_newroute() must create the IRE
1623 * cache in the zone indicated by
1624 * ire->ire_zoneid.
1625 */
1626 ip_newroute(q, mp, ipha->ipha_dst, 0,
1627 (CONN_Q(q) ? Q_TO_CONN(q) : NULL));
1628 } else {
1496 } else {
1497 /*
1498 * This must be ARP response for ire in interface based
1499 * table. Note that we don't add them in cache table,
1500 * instead we update the existing table with dlureq_mp
1501 * information. The reverse tunnel ires do not come
1502 * here, as reverse tunnel is non-resolver interface.
1503 * XXX- another design alternative was to mark the

--- 44 unchanged lines hidden (view full) ---

1548 * TODO: in order for CGTP to work in non-global
1549 * zones, ip_newroute() must create the IRE
1550 * cache in the zone indicated by
1551 * ire->ire_zoneid.
1552 */
1553 ip_newroute(q, mp, ipha->ipha_dst, 0,
1554 (CONN_Q(q) ? Q_TO_CONN(q) : NULL));
1555 } else {
1556 ASSERT(ire->ire_ipversion == IPV6_VERSION);
1629 ip_newroute_v6(q, mp, &ip6h->ip6_dst, NULL,
1630 NULL, ire->ire_zoneid);
1631 }
1632 }
1633
1634 ire_refrele(ire); /* As done by ire_send(). */
1635 return;
1636 }

--- 107 unchanged lines hidden (view full) ---

1744 */
1745ire_t *
1746ire_create_mp(uchar_t *addr, uchar_t *mask, uchar_t *src_addr, uchar_t *gateway,
1747 uchar_t *in_src_addr, uint_t max_frag, mblk_t *fp_mp, queue_t *rfq,
1748 queue_t *stq, ushort_t type, mblk_t *dlureq_mp, ipif_t *ipif, ill_t *in_ill,
1749 ipaddr_t cmask, uint32_t phandle, uint32_t ihandle, uint32_t flags,
1750 const iulp_t *ulp_info, tsol_gc_t *gc, tsol_gcgrp_t *gcgrp)
1751{
1557 ip_newroute_v6(q, mp, &ip6h->ip6_dst, NULL,
1558 NULL, ire->ire_zoneid);
1559 }
1560 }
1561
1562 ire_refrele(ire); /* As done by ire_send(). */
1563 return;
1564 }

--- 107 unchanged lines hidden (view full) ---

1672 */
1673ire_t *
1674ire_create_mp(uchar_t *addr, uchar_t *mask, uchar_t *src_addr, uchar_t *gateway,
1675 uchar_t *in_src_addr, uint_t max_frag, mblk_t *fp_mp, queue_t *rfq,
1676 queue_t *stq, ushort_t type, mblk_t *dlureq_mp, ipif_t *ipif, ill_t *in_ill,
1677 ipaddr_t cmask, uint32_t phandle, uint32_t ihandle, uint32_t flags,
1678 const iulp_t *ulp_info, tsol_gc_t *gc, tsol_gcgrp_t *gcgrp)
1679{
1752 ire_t *ire;
1680 ire_t *ire, *buf;
1753 ire_t *ret_ire;
1754 mblk_t *mp;
1681 ire_t *ret_ire;
1682 mblk_t *mp;
1683 size_t bufsize;
1684 frtn_t *frtnp;
1685 ill_t *ill;
1755
1686
1756 /* Allocate the new IRE. */
1757 mp = allocb(sizeof (ire_t), BPRI_MED);
1758 if (mp == NULL) {
1687 bufsize = sizeof (ire_t) + sizeof (frtn_t);
1688 buf = kmem_alloc(bufsize, KM_NOSLEEP);
1689 if (buf == NULL) {
1759 ip1dbg(("ire_create_mp: alloc failed\n"));
1760 return (NULL);
1761 }
1690 ip1dbg(("ire_create_mp: alloc failed\n"));
1691 return (NULL);
1692 }
1693 frtnp = (frtn_t *)(buf + 1);
1694 frtnp->free_arg = (caddr_t)buf;
1695 frtnp->free_func = ire_freemblk;
1762
1696
1697 /*
1698 * Allocate the new IRE. The ire created will hold a ref on
1699 * an nce_t after ire_nce_init, and this ref must either be
1700 * (a) transferred to the ire_cache entry created when ire_add_v4
1701 * is called after successful arp resolution, or,
1702 * (b) released, when arp resolution fails
1703 * Case (b) is handled in ire_freemblk() which will be called
1704 * when mp is freed as a result of failed arp.
1705 */
1706 mp = esballoc((unsigned char *)buf, bufsize, BPRI_MED, frtnp);
1707 if (mp == NULL) {
1708 ip1dbg(("ire_create_mp: alloc failed\n"));
1709 kmem_free(buf, bufsize);
1710 return (NULL);
1711 }
1763 ire = (ire_t *)mp->b_rptr;
1764 mp->b_wptr = (uchar_t *)&ire[1];
1765
1766 /* Start clean. */
1767 *ire = ire_null;
1768 ire->ire_mp = mp;
1769 mp->b_datap->db_type = IRE_DB_TYPE;
1712 ire = (ire_t *)mp->b_rptr;
1713 mp->b_wptr = (uchar_t *)&ire[1];
1714
1715 /* Start clean. */
1716 *ire = ire_null;
1717 ire->ire_mp = mp;
1718 mp->b_datap->db_type = IRE_DB_TYPE;
1719 ire->ire_marks |= IRE_MARK_UNCACHED;
1770
1771 ret_ire = ire_init(ire, addr, mask, src_addr, gateway, in_src_addr,
1772 NULL, fp_mp, rfq, stq, type, dlureq_mp, ipif, in_ill, cmask,
1773 phandle, ihandle, flags, ulp_info, gc, gcgrp);
1774
1775 if (ret_ire == NULL) {
1776 freeb(ire->ire_mp);
1777 return (NULL);
1778 }
1720
1721 ret_ire = ire_init(ire, addr, mask, src_addr, gateway, in_src_addr,
1722 NULL, fp_mp, rfq, stq, type, dlureq_mp, ipif, in_ill, cmask,
1723 phandle, ihandle, flags, ulp_info, gc, gcgrp);
1724
1725 if (ret_ire == NULL) {
1726 freeb(ire->ire_mp);
1727 return (NULL);
1728 }
1729 ill = ire_to_ill(ret_ire);
1730 ret_ire->ire_stq_ifindex = ill->ill_phyint->phyint_ifindex;
1779 ASSERT(ret_ire == ire);
1780 /*
1781 * ire_max_frag is normally zero here and is atomically set
1782 * under the irebucket lock in ire_add_v[46] except for the
1783 * case of IRE_MARK_NOADD. In that event the the ire_max_frag
1784 * is non-zero here.
1785 */
1786 ire->ire_max_frag = max_frag;

--- 86 unchanged lines hidden (view full) ---

1873 if (fp_mp != NULL)
1874 freeb(fp_mp);
1875 if (dlureq_mp != NULL)
1876 freeb(dlureq_mp);
1877 return (B_FALSE);
1878 }
1879 }
1880
1731 ASSERT(ret_ire == ire);
1732 /*
1733 * ire_max_frag is normally zero here and is atomically set
1734 * under the irebucket lock in ire_add_v[46] except for the
1735 * case of IRE_MARK_NOADD. In that event the the ire_max_frag
1736 * is non-zero here.
1737 */
1738 ire->ire_max_frag = max_frag;

--- 86 unchanged lines hidden (view full) ---

1825 if (fp_mp != NULL)
1826 freeb(fp_mp);
1827 if (dlureq_mp != NULL)
1828 freeb(dlureq_mp);
1829 return (B_FALSE);
1830 }
1831 }
1832
1881 ire->ire_fp_mp = fp_mp;
1882 ire->ire_dlureq_mp = dlureq_mp;
1883 ire->ire_stq = stq;
1884 ire->ire_rfq = rfq;
1885 ire->ire_type = type;
1886 ire->ire_flags = RTF_UP | flags;
1887 ire->ire_ident = TICK_TO_MSEC(lbolt);
1888 bcopy(ulp_info, &ire->ire_uinfo, sizeof (iulp_t));
1889
1890 ire->ire_tire_mark = ire->ire_ob_pkt_count + ire->ire_ib_pkt_count;

--- 24 unchanged lines hidden (view full) ---

1915 ire->ire_ipif = ipif;
1916 if (ipif != NULL) {
1917 ire->ire_ipif_seqid = ipif->ipif_seqid;
1918 ire->ire_zoneid = ipif->ipif_zoneid;
1919 } else {
1920 ire->ire_zoneid = GLOBAL_ZONEID;
1921 }
1922 ire->ire_ipversion = ipversion;
1833 ire->ire_stq = stq;
1834 ire->ire_rfq = rfq;
1835 ire->ire_type = type;
1836 ire->ire_flags = RTF_UP | flags;
1837 ire->ire_ident = TICK_TO_MSEC(lbolt);
1838 bcopy(ulp_info, &ire->ire_uinfo, sizeof (iulp_t));
1839
1840 ire->ire_tire_mark = ire->ire_ob_pkt_count + ire->ire_ib_pkt_count;

--- 24 unchanged lines hidden (view full) ---

1865 ire->ire_ipif = ipif;
1866 if (ipif != NULL) {
1867 ire->ire_ipif_seqid = ipif->ipif_seqid;
1868 ire->ire_zoneid = ipif->ipif_zoneid;
1869 } else {
1870 ire->ire_zoneid = GLOBAL_ZONEID;
1871 }
1872 ire->ire_ipversion = ipversion;
1923 ire->ire_refcnt = 1;
1924 mutex_init(&ire->ire_lock, NULL, MUTEX_DEFAULT, NULL);
1873 mutex_init(&ire->ire_lock, NULL, MUTEX_DEFAULT, NULL);
1874 if (ipversion == IPV4_VERSION) {
1875 if (ire_nce_init(ire, fp_mp, dlureq_mp) != 0) {
1876 /* some failure occurred. propagate error back */
1877 return (B_FALSE);
1878 }
1879 } else {
1880 ASSERT(ipversion == IPV6_VERSION);
1881 /*
1882 * IPv6 initializes the ire_nce in ire_add_v6,
1883 * which expects to find the ire_nce to be null when
1884 * when it is called.
1885 */
1886 if (dlureq_mp)
1887 freemsg(dlureq_mp);
1888 if (fp_mp)
1889 freemsg(fp_mp);
1890 }
1891 ire->ire_refcnt = 1;
1925
1926#ifdef IRE_DEBUG
1927 bzero(ire->ire_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX);
1928#endif
1929
1930 return (B_TRUE);
1931}
1932

--- 133 unchanged lines hidden (view full) ---

2066 ill_t *stq_ill;
2067
2068 if ((flush_flags & FLUSH_REDIRECT_TIME) &&
2069 ire->ire_type == IRE_HOST_REDIRECT) {
2070 /* Make sure we delete the corresponding IRE_CACHE */
2071 ip1dbg(("ire_expire: all redirects\n"));
2072 ip_rts_rtmsg(RTM_DELETE, ire, 0);
2073 ire_delete(ire);
1892
1893#ifdef IRE_DEBUG
1894 bzero(ire->ire_trace, sizeof (th_trace_t *) * IP_TR_HASH_MAX);
1895#endif
1896
1897 return (B_TRUE);
1898}
1899

--- 133 unchanged lines hidden (view full) ---

2033 ill_t *stq_ill;
2034
2035 if ((flush_flags & FLUSH_REDIRECT_TIME) &&
2036 ire->ire_type == IRE_HOST_REDIRECT) {
2037 /* Make sure we delete the corresponding IRE_CACHE */
2038 ip1dbg(("ire_expire: all redirects\n"));
2039 ip_rts_rtmsg(RTM_DELETE, ire, 0);
2040 ire_delete(ire);
2041 atomic_dec_32(&ip_redirect_cnt);
2074 return;
2075 }
2076 if (ire->ire_type != IRE_CACHE)
2077 return;
2078
2079 if (flush_flags & FLUSH_ARP_TIME) {
2080 /*
2081 * Remove all IRE_CACHE.
2082 * Verify that create time is more than
2083 * ip_ire_arp_interval milliseconds ago.
2084 */
2042 return;
2043 }
2044 if (ire->ire_type != IRE_CACHE)
2045 return;
2046
2047 if (flush_flags & FLUSH_ARP_TIME) {
2048 /*
2049 * Remove all IRE_CACHE.
2050 * Verify that create time is more than
2051 * ip_ire_arp_interval milliseconds ago.
2052 */
2085 if (((uint32_t)gethrestime_sec() - ire->ire_create_time) *
2086 MILLISEC > ip_ire_arp_interval) {
2087 ip1dbg(("ire_expire: all IRE_CACHE\n"));
2053 if (NCE_EXPIRED(ire->ire_nce)) {
2088 ire_delete(ire);
2089 return;
2090 }
2091 }
2092
2093 if (ip_path_mtu_discovery && (flush_flags & FLUSH_MTU_TIME) &&
2094 (ire->ire_ipif != NULL)) {
2095 /* Increase pmtu if it is less than the interface mtu */

--- 13 unchanged lines hidden (view full) ---

2109 ire->ire_frag_flag |= IPH_DF;
2110 mutex_exit(&ire->ire_lock);
2111 }
2112}
2113
2114/*
2115 * Do fast path probing if necessary.
2116 */
2054 ire_delete(ire);
2055 return;
2056 }
2057 }
2058
2059 if (ip_path_mtu_discovery && (flush_flags & FLUSH_MTU_TIME) &&
2060 (ire->ire_ipif != NULL)) {
2061 /* Increase pmtu if it is less than the interface mtu */

--- 13 unchanged lines hidden (view full) ---

2075 ire->ire_frag_flag |= IPH_DF;
2076 mutex_exit(&ire->ire_lock);
2077 }
2078}
2079
2080/*
2081 * Do fast path probing if necessary.
2082 */
2117static void
2083void
2118ire_fastpath(ire_t *ire)
2119{
2120 ill_t *ill;
2121 int res;
2122
2084ire_fastpath(ire_t *ire)
2085{
2086 ill_t *ill;
2087 int res;
2088
2123 if (ire->ire_fp_mp != NULL || ire->ire_dlureq_mp == NULL ||
2124 (ire->ire_stq == NULL)) {
2089 if (ire->ire_nce == NULL || ire->ire_nce->nce_fp_mp != NULL ||
2090 ire->ire_nce->nce_state != ND_REACHABLE ||
2091 ire->ire_nce->nce_res_mp == NULL) {
2092
2125 /*
2126 * Already contains fastpath info or
2093 /*
2094 * Already contains fastpath info or
2127 * doesn't have DL_UNITDATA_REQ header
2095 * doesn't have DL_UNITDATA_REQ header or
2096 * or is an incomplete ire in the ire table
2128 * or is a loopback broadcast ire i.e. no stq.
2129 */
2130 return;
2131 }
2132 ill = ire_to_ill(ire);
2133 if (ill == NULL)
2134 return;
2135 ire_fastpath_list_add(ill, ire);
2097 * or is a loopback broadcast ire i.e. no stq.
2098 */
2099 return;
2100 }
2101 ill = ire_to_ill(ire);
2102 if (ill == NULL)
2103 return;
2104 ire_fastpath_list_add(ill, ire);
2136 res = ill_fastpath_probe(ill, ire->ire_dlureq_mp);
2105 res = ill_fastpath_probe(ill, ire->ire_nce->nce_res_mp);
2137 /*
2138 * EAGAIN is an indication of a transient error
2139 * i.e. allocation failure etc. leave the ire in the list it will
2140 * be updated when another probe happens for another ire if not
2141 * it will be taken out of the list when the ire is deleted.
2142 */
2143 if (res != 0 && res != EAGAIN)
2144 ire_fastpath_list_delete(ill, ire);

--- 4 unchanged lines hidden (view full) ---

2149 * have an dlureq_mp that matches mp. mp->b_cont contains
2150 * the fastpath header.
2151 *
2152 * Returns TRUE if entry should be dequeued, or FALSE otherwise.
2153 */
2154boolean_t
2155ire_fastpath_update(ire_t *ire, void *arg)
2156{
2106 /*
2107 * EAGAIN is an indication of a transient error
2108 * i.e. allocation failure etc. leave the ire in the list it will
2109 * be updated when another probe happens for another ire if not
2110 * it will be taken out of the list when the ire is deleted.
2111 */
2112 if (res != 0 && res != EAGAIN)
2113 ire_fastpath_list_delete(ill, ire);

--- 4 unchanged lines hidden (view full) ---

2118 * have an dlureq_mp that matches mp. mp->b_cont contains
2119 * the fastpath header.
2120 *
2121 * Returns TRUE if entry should be dequeued, or FALSE otherwise.
2122 */
2123boolean_t
2124ire_fastpath_update(ire_t *ire, void *arg)
2125{
2157 mblk_t *mp, *fp_mp;
2126 mblk_t *mp, *fp_mp;
2158 uchar_t *up, *up2;
2159 ptrdiff_t cmplen;
2127 uchar_t *up, *up2;
2128 ptrdiff_t cmplen;
2129 nce_t *arpce;
2160
2161 ASSERT((ire->ire_type & (IRE_CACHE | IRE_BROADCAST |
2162 IRE_MIPRTUN)) != 0);
2163
2164 /*
2165 * Already contains fastpath info or doesn't have
2130
2131 ASSERT((ire->ire_type & (IRE_CACHE | IRE_BROADCAST |
2132 IRE_MIPRTUN)) != 0);
2133
2134 /*
2135 * Already contains fastpath info or doesn't have
2166 * DL_UNITDATA_REQ header.
2136 * DL_UNITDATA_REQ header or is an incomplete ire.
2167 */
2137 */
2168 if (ire->ire_fp_mp != NULL || ire->ire_dlureq_mp == NULL)
2138 if (ire->ire_nce == NULL || ire->ire_nce->nce_res_mp == NULL ||
2139 ire->ire_nce->nce_fp_mp != NULL ||
2140 ire->ire_nce->nce_state != ND_REACHABLE)
2169 return (B_TRUE);
2170
2171 ip2dbg(("ire_fastpath_update: trying\n"));
2141 return (B_TRUE);
2142
2143 ip2dbg(("ire_fastpath_update: trying\n"));
2172 mp = (mblk_t *)arg;
2144 mp = arg;
2173 up = mp->b_rptr;
2174 cmplen = mp->b_wptr - up;
2175 /* Serialize multiple fast path updates */
2176 mutex_enter(&ire->ire_lock);
2145 up = mp->b_rptr;
2146 cmplen = mp->b_wptr - up;
2147 /* Serialize multiple fast path updates */
2148 mutex_enter(&ire->ire_lock);
2177 up2 = ire->ire_dlureq_mp->b_rptr;
2149 up2 = ire->ire_nce->nce_res_mp->b_rptr;
2178 ASSERT(cmplen >= 0);
2150 ASSERT(cmplen >= 0);
2179 if (ire->ire_dlureq_mp->b_wptr - up2 != cmplen ||
2151 if (ire->ire_nce->nce_res_mp->b_wptr - up2 != cmplen ||
2180 bcmp(up, up2, cmplen) != 0) {
2181 mutex_exit(&ire->ire_lock);
2182 /*
2183 * Don't take the ire off the fastpath list yet,
2184 * since the response may come later.
2185 */
2186 return (B_FALSE);
2187 }
2152 bcmp(up, up2, cmplen) != 0) {
2153 mutex_exit(&ire->ire_lock);
2154 /*
2155 * Don't take the ire off the fastpath list yet,
2156 * since the response may come later.
2157 */
2158 return (B_FALSE);
2159 }
2188 /* Matched - install mp as the ire_fp_mp */
2160 arpce = ire->ire_nce;
2161 /* Matched - install mp as the nce_fp_mp */
2189 ip1dbg(("ire_fastpath_update: match\n"));
2190 fp_mp = dupb(mp->b_cont);
2191 if (fp_mp) {
2192 /*
2162 ip1dbg(("ire_fastpath_update: match\n"));
2163 fp_mp = dupb(mp->b_cont);
2164 if (fp_mp) {
2165 /*
2193 * We checked ire_fp_mp above. Check it again with the
2166 * We checked nce_fp_mp above. Check it again with the
2194 * lock. Update fp_mp only if it has not been done
2195 * already.
2196 */
2167 * lock. Update fp_mp only if it has not been done
2168 * already.
2169 */
2197 if (ire->ire_fp_mp == NULL) {
2170 if (arpce->nce_fp_mp == NULL) {
2198 /*
2199 * ire_ll_hdr_length is just an optimization to
2200 * store the length. It is used to return the
2201 * fast path header length to the upper layers.
2202 */
2171 /*
2172 * ire_ll_hdr_length is just an optimization to
2173 * store the length. It is used to return the
2174 * fast path header length to the upper layers.
2175 */
2203 ire->ire_fp_mp = fp_mp;
2176 arpce->nce_fp_mp = fp_mp;
2204 ire->ire_ll_hdr_length =
2205 (uint_t)(fp_mp->b_wptr - fp_mp->b_rptr);
2206 } else {
2207 freeb(fp_mp);
2208 }
2209 }
2210 mutex_exit(&ire->ire_lock);
2211 return (B_TRUE);

--- 6 unchanged lines hidden (view full) ---

2218/* ARGSUSED */
2219void
2220ire_fastpath_flush(ire_t *ire, void *arg)
2221{
2222 ill_t *ill;
2223 int res;
2224
2225 /* No fastpath info? */
2177 ire->ire_ll_hdr_length =
2178 (uint_t)(fp_mp->b_wptr - fp_mp->b_rptr);
2179 } else {
2180 freeb(fp_mp);
2181 }
2182 }
2183 mutex_exit(&ire->ire_lock);
2184 return (B_TRUE);

--- 6 unchanged lines hidden (view full) ---

2191/* ARGSUSED */
2192void
2193ire_fastpath_flush(ire_t *ire, void *arg)
2194{
2195 ill_t *ill;
2196 int res;
2197
2198 /* No fastpath info? */
2226 if (ire->ire_fp_mp == NULL || ire->ire_dlureq_mp == NULL)
2199 if (ire->ire_nce == NULL ||
2200 ire->ire_nce->nce_fp_mp == NULL || ire->ire_nce->nce_res_mp == NULL)
2227 return;
2228
2229 /*
2230 * Just remove the IRE if it is for non-broadcast dest. Then
2231 * we will create another one which will have the correct
2232 * fastpath info.
2233 */
2234 switch (ire->ire_type) {
2235 case IRE_CACHE:
2236 ire_delete(ire);
2237 break;
2238 case IRE_MIPRTUN:
2239 case IRE_BROADCAST:
2240 /*
2241 * We can't delete the ire since it is difficult to
2242 * recreate these ire's without going through the
2201 return;
2202
2203 /*
2204 * Just remove the IRE if it is for non-broadcast dest. Then
2205 * we will create another one which will have the correct
2206 * fastpath info.
2207 */
2208 switch (ire->ire_type) {
2209 case IRE_CACHE:
2210 ire_delete(ire);
2211 break;
2212 case IRE_MIPRTUN:
2213 case IRE_BROADCAST:
2214 /*
2215 * We can't delete the ire since it is difficult to
2216 * recreate these ire's without going through the
2243 * ipif down/up dance. The ire_fp_mp is protected by the
2244 * ire_lock in the case of IRE_MIPRTUN and IRE_BROADCAST.
2245 * All access to ire_fp_mp in the case of these 2 ire types
2246 * is protected by ire_lock.
2217 * ipif down/up dance. The nce_fp_mp is protected by the
2218 * nce_lock in the case of IRE_MIPRTUN and IRE_BROADCAST.
2219 * All access to ire->ire_nce->nce_fp_mp in the case of these
2220 * 2 ire types * is protected by nce_lock.
2247 */
2221 */
2248 mutex_enter(&ire->ire_lock);
2249 if (ire->ire_fp_mp != NULL) {
2250 freeb(ire->ire_fp_mp);
2251 ire->ire_fp_mp = NULL;
2252 mutex_exit(&ire->ire_lock);
2222 mutex_enter(&ire->ire_nce->nce_lock);
2223 if (ire->ire_nce->nce_fp_mp != NULL) {
2224 freeb(ire->ire_nce->nce_fp_mp);
2225 ire->ire_nce->nce_fp_mp = NULL;
2226 mutex_exit(&ire->ire_nce->nce_lock);
2253 /*
2254 * No fastpath probe if there is no stq i.e.
2255 * i.e. the case of loopback broadcast ire.
2256 */
2257 if (ire->ire_stq == NULL)
2258 break;
2259 ill = (ill_t *)((ire->ire_stq)->q_ptr);
2260 ire_fastpath_list_add(ill, ire);
2227 /*
2228 * No fastpath probe if there is no stq i.e.
2229 * i.e. the case of loopback broadcast ire.
2230 */
2231 if (ire->ire_stq == NULL)
2232 break;
2233 ill = (ill_t *)((ire->ire_stq)->q_ptr);
2234 ire_fastpath_list_add(ill, ire);
2261 res = ill_fastpath_probe(ill, ire->ire_dlureq_mp);
2235 res = ill_fastpath_probe(ill, ire->ire_nce->nce_res_mp);
2262 /*
2263 * EAGAIN is an indication of a transient error
2264 * i.e. allocation failure etc. leave the ire in the
2265 * list it will be updated when another probe happens
2266 * for another ire if not it will be taken out of the
2267 * list when the ire is deleted.
2268 */
2269 if (res != 0 && res != EAGAIN)
2270 ire_fastpath_list_delete(ill, ire);
2271 } else {
2236 /*
2237 * EAGAIN is an indication of a transient error
2238 * i.e. allocation failure etc. leave the ire in the
2239 * list it will be updated when another probe happens
2240 * for another ire if not it will be taken out of the
2241 * list when the ire is deleted.
2242 */
2243 if (res != 0 && res != EAGAIN)
2244 ire_fastpath_list_delete(ill, ire);
2245 } else {
2272 mutex_exit(&ire->ire_lock);
2246 mutex_exit(&ire->ire_nce->nce_lock);
2273 }
2274 break;
2275 default:
2276 /* This should not happen! */
2277 ip0dbg(("ire_fastpath_flush: Wrong ire type %s\n",
2278 ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type)));
2279 break;
2280 }

--- 95 unchanged lines hidden (view full) ---

2376 ire_ptr = ire_ptr->ire_fastpath;
2377 }
2378 }
2379 ire->ire_fastpath = NULL;
2380done:
2381 mutex_exit(&ill->ill_lock);
2382}
2383
2247 }
2248 break;
2249 default:
2250 /* This should not happen! */
2251 ip0dbg(("ire_fastpath_flush: Wrong ire type %s\n",
2252 ip_nv_lookup(ire_nv_tbl, (int)ire->ire_type)));
2253 break;
2254 }

--- 95 unchanged lines hidden (view full) ---

2350 ire_ptr = ire_ptr->ire_fastpath;
2351 }
2352 }
2353 ire->ire_fastpath = NULL;
2354done:
2355 mutex_exit(&ill->ill_lock);
2356}
2357
2384
2385/*
2358/*
2386 * Find an IRE_INTERFACE for the multicast group.
2387 * Allows different routes for multicast addresses
2388 * in the unicast routing table (akin to 224.0.0.0 but could be more specific)
2389 * which point at different interfaces. This is used when IP_MULTICAST_IF
2390 * isn't specified (when sending) and when IP_ADD_MEMBERSHIP doesn't
2391 * specify the interface to join on.
2392 *
2393 * Supports IP_BOUND_IF by following the ipif/ill when recursing.
2394 */
2395ire_t *
2396ire_lookup_multi(ipaddr_t group, zoneid_t zoneid)
2397{
2398 ire_t *ire;
2399 ipif_t *ipif = NULL;
2400 int match_flags = MATCH_IRE_TYPE;
2401 ipaddr_t gw_addr;
2402
2403 ire = ire_ftable_lookup(group, 0, 0, 0, NULL, NULL, zoneid,
2404 0, NULL, MATCH_IRE_DEFAULT);
2405
2406 /* We search a resolvable ire in case of multirouting. */
2407 if ((ire != NULL) && (ire->ire_flags & RTF_MULTIRT)) {
2408 ire_t *cire = NULL;
2409 /*
2410 * If the route is not resolvable, the looked up ire
2411 * may be changed here. In that case, ire_multirt_lookup()
2412 * IRE_REFRELE the original ire and change it.
2413 */
2414 (void) ire_multirt_lookup(&cire, &ire, MULTIRT_CACHEGW, NULL);
2415 if (cire != NULL)
2416 ire_refrele(cire);
2417 }
2418 if (ire == NULL)
2419 return (NULL);
2420 /*
2421 * Make sure we follow ire_ipif.
2422 *
2423 * We need to determine the interface route through
2424 * which the gateway will be reached. We don't really
2425 * care which interface is picked if the interface is
2426 * part of a group.
2427 */
2428 if (ire->ire_ipif != NULL) {
2429 ipif = ire->ire_ipif;
2430 match_flags |= MATCH_IRE_ILL_GROUP;
2431 }
2432
2433 switch (ire->ire_type) {
2434 case IRE_DEFAULT:
2435 case IRE_PREFIX:
2436 case IRE_HOST:
2437 gw_addr = ire->ire_gateway_addr;
2438 ire_refrele(ire);
2439 ire = ire_ftable_lookup(gw_addr, 0, 0,
2440 IRE_INTERFACE, ipif, NULL, zoneid, 0,
2441 NULL, match_flags);
2442 return (ire);
2443 case IRE_IF_NORESOLVER:
2444 case IRE_IF_RESOLVER:
2445 return (ire);
2446 default:
2447 ire_refrele(ire);
2448 return (NULL);
2449 }
2450}
2451
2452/*
2453 * Return any local address. We use this to target ourselves
2454 * when the src address was specified as 'default'.
2455 * Preference for IRE_LOCAL entries.
2456 */
2457ire_t *
2458ire_lookup_local(zoneid_t zoneid)
2459{
2460 ire_t *ire;

--- 109 unchanged lines hidden (view full) ---

2570
2571/*
2572 * Walk a particular version. version == 0 means both v4 and v6.
2573 */
2574static void
2575ire_walk_ipvers(pfv_t func, void *arg, uchar_t vers, zoneid_t zoneid)
2576{
2577 if (vers != IPV6_VERSION) {
2359 * Return any local address. We use this to target ourselves
2360 * when the src address was specified as 'default'.
2361 * Preference for IRE_LOCAL entries.
2362 */
2363ire_t *
2364ire_lookup_local(zoneid_t zoneid)
2365{
2366 ire_t *ire;

--- 109 unchanged lines hidden (view full) ---

2476
2477/*
2478 * Walk a particular version. version == 0 means both v4 and v6.
2479 */
2480static void
2481ire_walk_ipvers(pfv_t func, void *arg, uchar_t vers, zoneid_t zoneid)
2482{
2483 if (vers != IPV6_VERSION) {
2484 /*
2485 * ip_forwarding_table variable doesn't matter for IPv4 since
2486 * ire_walk_ill_tables directly calls with the ip_ftable global
2487 */
2578 ire_walk_ill_tables(0, 0, func, arg, IP_MASK_TABLE_SIZE,
2488 ire_walk_ill_tables(0, 0, func, arg, IP_MASK_TABLE_SIZE,
2579 ip_ftable_hash_size, ip_forwarding_table,
2489 0, NULL,
2580 ip_cache_table_size, ip_cache_table, NULL, zoneid);
2581 }
2582 if (vers != IPV4_VERSION) {
2583 ire_walk_ill_tables(0, 0, func, arg, IP6_MASK_TABLE_SIZE,
2584 ip6_ftable_hash_size, ip_forwarding_table_v6,
2585 ip6_cache_table_size, ip_cache_table_v6, NULL, zoneid);
2586 }
2587}

--- 29 unchanged lines hidden (view full) ---

2617 * Walk a particular ill and version. version == 0 means both v4 and v6.
2618 */
2619static void
2620ire_walk_ill_ipvers(uint_t match_flags, uint_t ire_type, pfv_t func,
2621 void *arg, uchar_t vers, ill_t *ill)
2622{
2623 if (vers != IPV6_VERSION) {
2624 ire_walk_ill_tables(match_flags, ire_type, func, arg,
2490 ip_cache_table_size, ip_cache_table, NULL, zoneid);
2491 }
2492 if (vers != IPV4_VERSION) {
2493 ire_walk_ill_tables(0, 0, func, arg, IP6_MASK_TABLE_SIZE,
2494 ip6_ftable_hash_size, ip_forwarding_table_v6,
2495 ip6_cache_table_size, ip_cache_table_v6, NULL, zoneid);
2496 }
2497}

--- 29 unchanged lines hidden (view full) ---

2527 * Walk a particular ill and version. version == 0 means both v4 and v6.
2528 */
2529static void
2530ire_walk_ill_ipvers(uint_t match_flags, uint_t ire_type, pfv_t func,
2531 void *arg, uchar_t vers, ill_t *ill)
2532{
2533 if (vers != IPV6_VERSION) {
2534 ire_walk_ill_tables(match_flags, ire_type, func, arg,
2625 IP_MASK_TABLE_SIZE, ip_ftable_hash_size,
2626 ip_forwarding_table, ip_cache_table_size,
2535 IP_MASK_TABLE_SIZE, 0,
2536 NULL, ip_cache_table_size,
2627 ip_cache_table, ill, ALL_ZONES);
2628 }
2629 if (vers != IPV4_VERSION) {
2630 ire_walk_ill_tables(match_flags, ire_type, func, arg,
2631 IP6_MASK_TABLE_SIZE, ip6_ftable_hash_size,
2632 ip_forwarding_table_v6, ip6_cache_table_size,
2633 ip_cache_table_v6, ill, ALL_ZONES);
2634 }
2635}
2636
2537 ip_cache_table, ill, ALL_ZONES);
2538 }
2539 if (vers != IPV4_VERSION) {
2540 ire_walk_ill_tables(match_flags, ire_type, func, arg,
2541 IP6_MASK_TABLE_SIZE, ip6_ftable_hash_size,
2542 ip_forwarding_table_v6, ip6_cache_table_size,
2543 ip_cache_table_v6, ill, ALL_ZONES);
2544 }
2545}
2546
2637static boolean_t
2547boolean_t
2638ire_walk_ill_match(uint_t match_flags, uint_t ire_type, ire_t *ire,
2639 ill_t *ill, zoneid_t zoneid)
2640{
2641 ill_t *ire_stq_ill = NULL;
2642 ill_t *ire_ipif_ill = NULL;
2643 ill_group_t *ire_ill_group = NULL;
2644
2645 ASSERT(match_flags != 0 || zoneid != ALL_ZONES);

--- 125 unchanged lines hidden (view full) ---

2771 (ire_stq_ill == ill) || (ire_ipif_ill == ill) ||
2772 (ire_ill_group != NULL &&
2773 ire_ill_group == ill->ill_group))) {
2774 return (B_TRUE);
2775 }
2776 return (B_FALSE);
2777}
2778
2548ire_walk_ill_match(uint_t match_flags, uint_t ire_type, ire_t *ire,
2549 ill_t *ill, zoneid_t zoneid)
2550{
2551 ill_t *ire_stq_ill = NULL;
2552 ill_t *ire_ipif_ill = NULL;
2553 ill_group_t *ire_ill_group = NULL;
2554
2555 ASSERT(match_flags != 0 || zoneid != ALL_ZONES);

--- 125 unchanged lines hidden (view full) ---

2681 (ire_stq_ill == ill) || (ire_ipif_ill == ill) ||
2682 (ire_ill_group != NULL &&
2683 ire_ill_group == ill->ill_group))) {
2684 return (B_TRUE);
2685 }
2686 return (B_FALSE);
2687}
2688
2689int
2690rtfunc(struct radix_node *rn, void *arg)
2691{
2692 struct rtfuncarg *rtf = arg;
2693 struct rt_entry *rt;
2694 irb_t *irb;
2695 ire_t *ire;
2696 boolean_t ret;
2697
2698 rt = (struct rt_entry *)rn;
2699 ASSERT(rt != NULL);
2700 irb = &rt->rt_irb;
2701 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
2702 if ((rtf->rt_match_flags != 0) ||
2703 (rtf->rt_zoneid != ALL_ZONES)) {
2704 ret = ire_walk_ill_match(rtf->rt_match_flags,
2705 rtf->rt_ire_type, ire,
2706 rtf->rt_ill, rtf->rt_zoneid);
2707 } else
2708 ret = B_TRUE;
2709 if (ret)
2710 (*rtf->rt_func)(ire, rtf->rt_arg);
2711 }
2712 return (0);
2713}
2714
2779/*
2780 * Walk the ftable and the ctable entries that match the ill.
2781 */
2715/*
2716 * Walk the ftable and the ctable entries that match the ill.
2717 */
2782static void
2718void
2783ire_walk_ill_tables(uint_t match_flags, uint_t ire_type, pfv_t func,
2784 void *arg, size_t ftbl_sz, size_t htbl_sz, irb_t **ipftbl,
2785 size_t ctbl_sz, irb_t *ipctbl, ill_t *ill, zoneid_t zoneid)
2786{
2787 irb_t *irb_ptr;
2788 irb_t *irb;
2789 ire_t *ire;
2790 int i, j;
2791 boolean_t ret;
2719ire_walk_ill_tables(uint_t match_flags, uint_t ire_type, pfv_t func,
2720 void *arg, size_t ftbl_sz, size_t htbl_sz, irb_t **ipftbl,
2721 size_t ctbl_sz, irb_t *ipctbl, ill_t *ill, zoneid_t zoneid)
2722{
2723 irb_t *irb_ptr;
2724 irb_t *irb;
2725 ire_t *ire;
2726 int i, j;
2727 boolean_t ret;
2728 struct rtfuncarg rtfarg;
2792
2793 ASSERT((!(match_flags & (MATCH_IRE_WQ | MATCH_IRE_ILL |
2794 MATCH_IRE_ILL_GROUP))) || (ill != NULL));
2795 ASSERT(!(match_flags & MATCH_IRE_TYPE) || (ire_type != 0));
2796 /*
2797 * Optimize by not looking at the forwarding table if there
2798 * is a MATCH_IRE_TYPE specified with no IRE_FORWARDTABLE
2799 * specified in ire_type.
2800 */
2801 if (!(match_flags & MATCH_IRE_TYPE) ||
2802 ((ire_type & IRE_FORWARDTABLE) != 0)) {
2729
2730 ASSERT((!(match_flags & (MATCH_IRE_WQ | MATCH_IRE_ILL |
2731 MATCH_IRE_ILL_GROUP))) || (ill != NULL));
2732 ASSERT(!(match_flags & MATCH_IRE_TYPE) || (ire_type != 0));
2733 /*
2734 * Optimize by not looking at the forwarding table if there
2735 * is a MATCH_IRE_TYPE specified with no IRE_FORWARDTABLE
2736 * specified in ire_type.
2737 */
2738 if (!(match_flags & MATCH_IRE_TYPE) ||
2739 ((ire_type & IRE_FORWARDTABLE) != 0)) {
2803 for (i = (ftbl_sz - 1); i >= 0; i--) {
2804 if ((irb_ptr = ipftbl[i]) == NULL)
2805 continue;
2806 for (j = 0; j < htbl_sz; j++) {
2807 irb = &irb_ptr[j];
2808 if (irb->irb_ire == NULL)
2740 /* knobs such that routine is called only for v6 case */
2741 if (ipftbl == ip_forwarding_table_v6) {
2742 for (i = (ftbl_sz - 1); i >= 0; i--) {
2743 if ((irb_ptr = ipftbl[i]) == NULL)
2809 continue;
2744 continue;
2810 IRB_REFHOLD(irb);
2811 for (ire = irb->irb_ire; ire != NULL;
2812 ire = ire->ire_next) {
2813 if (match_flags == 0 &&
2814 zoneid == ALL_ZONES) {
2815 ret = B_TRUE;
2816 } else {
2817 ret = ire_walk_ill_match(
2818 match_flags, ire_type,
2819 ire, ill, zoneid);
2745 for (j = 0; j < htbl_sz; j++) {
2746 irb = &irb_ptr[j];
2747 if (irb->irb_ire == NULL)
2748 continue;
2749
2750 IRB_REFHOLD(irb);
2751 for (ire = irb->irb_ire; ire != NULL;
2752 ire = ire->ire_next) {
2753 if (match_flags == 0 &&
2754 zoneid == ALL_ZONES) {
2755 ret = B_TRUE;
2756 } else {
2757 ret =
2758 ire_walk_ill_match(
2759 match_flags,
2760 ire_type, ire, ill,
2761 zoneid);
2762 }
2763 if (ret)
2764 (*func)(ire, arg);
2820 }
2765 }
2821 if (ret)
2822 (*func)(ire, arg);
2766 IRB_REFRELE(irb);
2823 }
2767 }
2824 IRB_REFRELE(irb);
2825 }
2768 }
2769 } else {
2770 (void) memset(&rtfarg, 0, sizeof (rtfarg));
2771 rtfarg.rt_func = func;
2772 rtfarg.rt_arg = arg;
2773 if (match_flags != 0) {
2774 rtfarg.rt_match_flags = match_flags;
2775 }
2776 rtfarg.rt_ire_type = ire_type;
2777 rtfarg.rt_ill = ill;
2778 rtfarg.rt_zoneid = zoneid;
2779 (void) ip_ftable->rnh_walktree(ip_ftable, rtfunc,
2780 &rtfarg);
2826 }
2827 }
2828
2829 /*
2830 * Optimize by not looking at the cache table if there
2831 * is a MATCH_IRE_TYPE specified with no IRE_CACHETABLE
2832 * specified in ire_type.
2833 */

--- 260 unchanged lines hidden (view full) ---

3094
3095 RELEASE_CONN_LOCK(q);
3096 return (error);
3097}
3098
3099/*
3100 * Add a fully initialized IRE to an appropriate table based on
3101 * ire_type.
2781 }
2782 }
2783
2784 /*
2785 * Optimize by not looking at the cache table if there
2786 * is a MATCH_IRE_TYPE specified with no IRE_CACHETABLE
2787 * specified in ire_type.
2788 */

--- 260 unchanged lines hidden (view full) ---

3049
3050 RELEASE_CONN_LOCK(q);
3051 return (error);
3052}
3053
3054/*
3055 * Add a fully initialized IRE to an appropriate table based on
3056 * ire_type.
3057 *
3058 * allow_unresolved == B_FALSE indicates a legacy code-path call
3059 * that has prohibited the addition of incomplete ire's. If this
3060 * parameter is set, and we find an nce that is in a state other
3061 * than ND_REACHABLE, we fail the add. Note that nce_state could be
3062 * something other than ND_REACHABLE if nce_reinit has just
3063 * kicked in and reset the nce.
3102 */
3103int
3064 */
3065int
3104ire_add(ire_t **irep, queue_t *q, mblk_t *mp, ipsq_func_t func)
3066ire_add(ire_t **irep, queue_t *q, mblk_t *mp, ipsq_func_t func,
3067 boolean_t allow_unresolved)
3105{
3106 ire_t *ire1;
3107 ill_t *stq_ill = NULL;
3108 ill_t *ill;
3109 ipif_t *ipif = NULL;
3110 ill_walk_context_t ctx;
3111 ire_t *ire = *irep;
3112 int error;

--- 9 unchanged lines hidden (view full) ---

3122 /* Copy the ire to a kmem_alloc'ed area */
3123 ire1 = kmem_cache_alloc(ire_cache, KM_NOSLEEP);
3124 if (ire1 == NULL) {
3125 ip1dbg(("ire_add: alloc failed\n"));
3126 ire_delete(ire);
3127 *irep = NULL;
3128 return (ENOMEM);
3129 }
3068{
3069 ire_t *ire1;
3070 ill_t *stq_ill = NULL;
3071 ill_t *ill;
3072 ipif_t *ipif = NULL;
3073 ill_walk_context_t ctx;
3074 ire_t *ire = *irep;
3075 int error;

--- 9 unchanged lines hidden (view full) ---

3085 /* Copy the ire to a kmem_alloc'ed area */
3086 ire1 = kmem_cache_alloc(ire_cache, KM_NOSLEEP);
3087 if (ire1 == NULL) {
3088 ip1dbg(("ire_add: alloc failed\n"));
3089 ire_delete(ire);
3090 *irep = NULL;
3091 return (ENOMEM);
3092 }
3093 ire->ire_marks &= ~IRE_MARK_UNCACHED;
3130 *ire1 = *ire;
3131 ire1->ire_mp = NULL;
3094 *ire1 = *ire;
3095 ire1->ire_mp = NULL;
3096 ire1->ire_stq_ifindex = 0;
3132 freeb(ire->ire_mp);
3133 ire = ire1;
3134 }
3135 if (ire->ire_stq != NULL)
3136 stq_ill = (ill_t *)ire->ire_stq->q_ptr;
3137
3138 if (ire->ire_type == IRE_CACHE) {
3139 /*

--- 131 unchanged lines hidden (view full) ---

3271 /*
3272 * In case ire was changed
3273 */
3274 *irep = ire;
3275 if (ire->ire_ipversion == IPV6_VERSION) {
3276 error = ire_add_v6(irep, q, mp, func);
3277 } else {
3278 if (ire->ire_in_ill == NULL)
3097 freeb(ire->ire_mp);
3098 ire = ire1;
3099 }
3100 if (ire->ire_stq != NULL)
3101 stq_ill = (ill_t *)ire->ire_stq->q_ptr;
3102
3103 if (ire->ire_type == IRE_CACHE) {
3104 /*

--- 131 unchanged lines hidden (view full) ---

3236 /*
3237 * In case ire was changed
3238 */
3239 *irep = ire;
3240 if (ire->ire_ipversion == IPV6_VERSION) {
3241 error = ire_add_v6(irep, q, mp, func);
3242 } else {
3243 if (ire->ire_in_ill == NULL)
3279 error = ire_add_v4(irep, q, mp, func);
3244 error = ire_add_v4(irep, q, mp, func, allow_unresolved);
3280 else
3281 error = ire_add_srcif_v4(irep, q, mp, func);
3282 }
3283 if (ipif != NULL)
3284 ipif_refrele(ipif);
3285 return (error);
3286}
3287

--- 5 unchanged lines hidden (view full) ---

3293 *
3294 * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK
3295 * and IRE_CACHE.
3296 *
3297 * NOTE : This function is called as writer though not required
3298 * by this function.
3299 */
3300static int
3245 else
3246 error = ire_add_srcif_v4(irep, q, mp, func);
3247 }
3248 if (ipif != NULL)
3249 ipif_refrele(ipif);
3250 return (error);
3251}
3252

--- 5 unchanged lines hidden (view full) ---

3258 *
3259 * The cache table contains IRE_BROADCAST/IRE_LOCAL/IRE_LOOPBACK
3260 * and IRE_CACHE.
3261 *
3262 * NOTE : This function is called as writer though not required
3263 * by this function.
3264 */
3265static int
3301ire_add_v4(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func)
3266ire_add_v4(ire_t **ire_p, queue_t *q, mblk_t *mp, ipsq_func_t func,
3267 boolean_t allow_unresolved)
3302{
3303 ire_t *ire1;
3268{
3269 ire_t *ire1;
3304 int mask_table_index;
3305 irb_t *irb_ptr;
3306 ire_t **irep;
3307 int flags;
3308 ire_t *pire = NULL;
3309 ill_t *stq_ill;
3310 ire_t *ire = *ire_p;
3311 int error;
3270 irb_t *irb_ptr;
3271 ire_t **irep;
3272 int flags;
3273 ire_t *pire = NULL;
3274 ill_t *stq_ill;
3275 ire_t *ire = *ire_p;
3276 int error;
3277 boolean_t need_refrele = B_FALSE;
3278 nce_t *nce;
3312
3313 if (ire->ire_ipif != NULL)
3314 ASSERT(!MUTEX_HELD(&ire->ire_ipif->ipif_ill->ill_lock));
3315 if (ire->ire_stq != NULL)
3316 ASSERT(!MUTEX_HELD(
3317 &((ill_t *)(ire->ire_stq->q_ptr))->ill_lock));
3318 ASSERT(ire->ire_ipversion == IPV4_VERSION);
3319 ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */

--- 26 unchanged lines hidden (view full) ---

3346 case IRE_DEFAULT:
3347 if ((ire->ire_flags & RTF_SETSRC) == 0)
3348 ire->ire_src_addr = 0;
3349 break;
3350 case IRE_IF_RESOLVER:
3351 case IRE_IF_NORESOLVER:
3352 break;
3353 default:
3279
3280 if (ire->ire_ipif != NULL)
3281 ASSERT(!MUTEX_HELD(&ire->ire_ipif->ipif_ill->ill_lock));
3282 if (ire->ire_stq != NULL)
3283 ASSERT(!MUTEX_HELD(
3284 &((ill_t *)(ire->ire_stq->q_ptr))->ill_lock));
3285 ASSERT(ire->ire_ipversion == IPV4_VERSION);
3286 ASSERT(ire->ire_mp == NULL); /* Calls should go through ire_add */

--- 26 unchanged lines hidden (view full) ---

3313 case IRE_DEFAULT:
3314 if ((ire->ire_flags & RTF_SETSRC) == 0)
3315 ire->ire_src_addr = 0;
3316 break;
3317 case IRE_IF_RESOLVER:
3318 case IRE_IF_NORESOLVER:
3319 break;
3320 default:
3354 printf("ire_add_v4: ire %p has unrecognized IRE type (%d)\n",
3355 (void *)ire, ire->ire_type);
3321 ip0dbg(("ire_add_v4: ire %p has unrecognized IRE type (%d)\n",
3322 (void *)ire, ire->ire_type));
3356 ire_delete(ire);
3357 *ire_p = NULL;
3358 return (EINVAL);
3359 }
3360
3361 /* Make sure the address is properly masked. */
3362 ire->ire_addr &= ire->ire_mask;
3363
3323 ire_delete(ire);
3324 *ire_p = NULL;
3325 return (EINVAL);
3326 }
3327
3328 /* Make sure the address is properly masked. */
3329 ire->ire_addr &= ire->ire_mask;
3330
3364 if ((ire->ire_type & IRE_CACHETABLE) == 0) {
3365 /* IRE goes into Forward Table */
3366 mask_table_index = ire->ire_masklen;
3367 if ((ip_forwarding_table[mask_table_index]) == NULL) {
3368 irb_t *ptr;
3369 int i;
3370
3371 ptr = (irb_t *)mi_zalloc((ip_ftable_hash_size *
3372 sizeof (irb_t)));
3373 if (ptr == NULL) {
3374 ire_delete(ire);
3375 *ire_p = NULL;
3376 return (ENOMEM);
3377 }
3378 for (i = 0; i < ip_ftable_hash_size; i++) {
3379 rw_init(&ptr[i].irb_lock, NULL,
3380 RW_DEFAULT, NULL);
3381 }
3382 mutex_enter(&ire_ft_init_lock);
3383 if (ip_forwarding_table[mask_table_index] == NULL) {
3384 ip_forwarding_table[mask_table_index] = ptr;
3385 mutex_exit(&ire_ft_init_lock);
3386 } else {
3387 /*
3388 * Some other thread won the race in
3389 * initializing the forwarding table at the
3390 * same index.
3391 */
3392 mutex_exit(&ire_ft_init_lock);
3393 for (i = 0; i < ip_ftable_hash_size; i++) {
3394 rw_destroy(&ptr[i].irb_lock);
3395 }
3396 mi_free(ptr);
3397 }
3398 }
3399 irb_ptr = &(ip_forwarding_table[mask_table_index][
3400 IRE_ADDR_HASH(ire->ire_addr, ip_ftable_hash_size)]);
3401 } else {
3402 irb_ptr = &(ip_cache_table[IRE_ADDR_HASH(ire->ire_addr,
3403 ip_cache_table_size)]);
3404 }
3405 /*
3406 * ip_newroute/ip_newroute_multi are unable to prevent the deletion
3407 * of the interface route while adding an IRE_CACHE for an on-link
3408 * destination in the IRE_IF_RESOLVER case, since the ire has to
3409 * go to ARP and return. We can't do a REFHOLD on the
3410 * associated interface ire for fear of ARP freeing the message.
3411 * Here we look up the interface ire in the forwarding table and
3412 * make sure that the interface route has not been deleted.
3413 */
3414 if (ire->ire_type == IRE_CACHE && ire->ire_gateway_addr == 0 &&
3415 ((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) {
3331 /*
3332 * ip_newroute/ip_newroute_multi are unable to prevent the deletion
3333 * of the interface route while adding an IRE_CACHE for an on-link
3334 * destination in the IRE_IF_RESOLVER case, since the ire has to
3335 * go to ARP and return. We can't do a REFHOLD on the
3336 * associated interface ire for fear of ARP freeing the message.
3337 * Here we look up the interface ire in the forwarding table and
3338 * make sure that the interface route has not been deleted.
3339 */
3340 if (ire->ire_type == IRE_CACHE && ire->ire_gateway_addr == 0 &&
3341 ((ill_t *)ire->ire_stq->q_ptr)->ill_net_type == IRE_IF_RESOLVER) {
3342
3416 ASSERT(ire->ire_max_fragp == NULL);
3417 if (CLASSD(ire->ire_addr) && !(ire->ire_flags & RTF_SETSRC)) {
3418 /*
3419 * The ihandle that we used in ip_newroute_multi
3420 * comes from the interface route corresponding
3421 * to ire_ipif. Lookup here to see if it exists
3422 * still.
3423 * If the ire has a source address assigned using

--- 59 unchanged lines hidden (view full) ---

3483 * (if we get to ip_wput again) eventually we need an hidden
3484 * ire for this packet to go out. MATCH_IRE_ILL is explicitly
3485 * done below.
3486 */
3487 if (ire->ire_type == IRE_CACHE &&
3488 (ire->ire_marks & IRE_MARK_HIDDEN))
3489 flags |= (MATCH_IRE_MARK_HIDDEN);
3490 }
3343 ASSERT(ire->ire_max_fragp == NULL);
3344 if (CLASSD(ire->ire_addr) && !(ire->ire_flags & RTF_SETSRC)) {
3345 /*
3346 * The ihandle that we used in ip_newroute_multi
3347 * comes from the interface route corresponding
3348 * to ire_ipif. Lookup here to see if it exists
3349 * still.
3350 * If the ire has a source address assigned using

--- 59 unchanged lines hidden (view full) ---

3410 * (if we get to ip_wput again) eventually we need an hidden
3411 * ire for this packet to go out. MATCH_IRE_ILL is explicitly
3412 * done below.
3413 */
3414 if (ire->ire_type == IRE_CACHE &&
3415 (ire->ire_marks & IRE_MARK_HIDDEN))
3416 flags |= (MATCH_IRE_MARK_HIDDEN);
3417 }
3418 if ((ire->ire_type & IRE_CACHETABLE) == 0) {
3419 irb_ptr = ire_get_bucket(ire);
3420 need_refrele = B_TRUE;
3421 if (irb_ptr == NULL) {
3422 /*
3423 * This assumes that the ire has not added
3424 * a reference to the ipif.
3425 */
3426 ire->ire_ipif = NULL;
3427 ire_delete(ire);
3428 if (pire != NULL) {
3429 IRB_REFRELE(pire->ire_bucket);
3430 ire_refrele(pire);
3431 }
3432 *ire_p = NULL;
3433 return (EINVAL);
3434 }
3435 } else {
3436 irb_ptr = &(ip_cache_table[IRE_ADDR_HASH(ire->ire_addr,
3437 ip_cache_table_size)]);
3438 }
3491
3492 /*
3493 * Start the atomic add of the ire. Grab the ill locks,
3494 * ill_g_usesrc_lock and the bucket lock. Check for condemned
3495 *
3496 * If ipif or ill is changing ire_atomic_start() may queue the
3497 * request and return EINPROGRESS.
3439
3440 /*
3441 * Start the atomic add of the ire. Grab the ill locks,
3442 * ill_g_usesrc_lock and the bucket lock. Check for condemned
3443 *
3444 * If ipif or ill is changing ire_atomic_start() may queue the
3445 * request and return EINPROGRESS.
3446 * To avoid lock order problems, get the ndp4.ndp_g_lock.
3498 */
3447 */
3448 mutex_enter(&ndp4.ndp_g_lock);
3499 error = ire_atomic_start(irb_ptr, ire, q, mp, func);
3500 if (error != 0) {
3449 error = ire_atomic_start(irb_ptr, ire, q, mp, func);
3450 if (error != 0) {
3451 mutex_exit(&ndp4.ndp_g_lock);
3501 /*
3502 * We don't know whether it is a valid ipif or not.
3503 * So, set it to NULL. This assumes that the ire has not added
3504 * a reference to the ipif.
3505 */
3506 ire->ire_ipif = NULL;
3507 ire_delete(ire);
3508 if (pire != NULL) {
3509 IRB_REFRELE(pire->ire_bucket);
3510 ire_refrele(pire);
3511 }
3512 *ire_p = NULL;
3452 /*
3453 * We don't know whether it is a valid ipif or not.
3454 * So, set it to NULL. This assumes that the ire has not added
3455 * a reference to the ipif.
3456 */
3457 ire->ire_ipif = NULL;
3458 ire_delete(ire);
3459 if (pire != NULL) {
3460 IRB_REFRELE(pire->ire_bucket);
3461 ire_refrele(pire);
3462 }
3463 *ire_p = NULL;
3464 if (need_refrele)
3465 IRB_REFRELE(irb_ptr);
3513 return (error);
3514 }
3515 /*
3516 * To avoid creating ires having stale values for the ire_max_frag
3517 * we get the latest value atomically here. For more details
3518 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE
3519 * in ip_rput_dlpi_writer
3520 */

--- 57 unchanged lines hidden (view full) ---

3578 * after adding, we return a held ire. This will
3579 * avoid a lookup in the caller again. If the callers
3580 * don't want to use it, they need to do a REFRELE.
3581 */
3582 ip1dbg(("found dup ire existing %p new %p",
3583 (void *)ire1, (void *)ire));
3584 IRE_REFHOLD(ire1);
3585 ire_atomic_end(irb_ptr, ire);
3466 return (error);
3467 }
3468 /*
3469 * To avoid creating ires having stale values for the ire_max_frag
3470 * we get the latest value atomically here. For more details
3471 * see the block comment in ip_sioctl_mtu and in DL_NOTE_SDU_CHANGE
3472 * in ip_rput_dlpi_writer
3473 */

--- 57 unchanged lines hidden (view full) ---

3531 * after adding, we return a held ire. This will
3532 * avoid a lookup in the caller again. If the callers
3533 * don't want to use it, they need to do a REFRELE.
3534 */
3535 ip1dbg(("found dup ire existing %p new %p",
3536 (void *)ire1, (void *)ire));
3537 IRE_REFHOLD(ire1);
3538 ire_atomic_end(irb_ptr, ire);
3539 mutex_exit(&ndp4.ndp_g_lock);
3586 ire_delete(ire);
3587 if (pire != NULL) {
3588 /*
3589 * Assert that it is not removed from the
3590 * list yet.
3591 */
3592 ASSERT(pire->ire_ptpn != NULL);
3593 IRB_REFRELE(pire->ire_bucket);
3594 ire_refrele(pire);
3595 }
3596 *ire_p = ire1;
3540 ire_delete(ire);
3541 if (pire != NULL) {
3542 /*
3543 * Assert that it is not removed from the
3544 * list yet.
3545 */
3546 ASSERT(pire->ire_ptpn != NULL);
3547 IRB_REFRELE(pire->ire_bucket);
3548 ire_refrele(pire);
3549 }
3550 *ire_p = ire1;
3551 if (need_refrele)
3552 IRB_REFRELE(irb_ptr);
3597 return (0);
3598 }
3599 }
3553 return (0);
3554 }
3555 }
3600
3556 if (ire->ire_type & IRE_CACHE) {
3557 ASSERT(ire->ire_stq != NULL);
3558 nce = ndp_lookup_v4(ire_to_ill(ire),
3559 ((ire->ire_gateway_addr != INADDR_ANY) ?
3560 &ire->ire_gateway_addr : &ire->ire_addr),
3561 B_TRUE);
3562 if (nce != NULL)
3563 mutex_enter(&nce->nce_lock);
3564 /*
3565 * if the nce is NCE_F_CONDEMNED, or if it is not ND_REACHABLE
3566 * and the caller has prohibited the addition of incomplete
3567 * ire's, we fail the add. Note that nce_state could be
3568 * something other than ND_REACHABLE if nce_reinit has just
3569 * kicked in and reset the nce.
3570 */
3571 if ((nce == NULL) ||
3572 (nce->nce_flags & NCE_F_CONDEMNED) ||
3573 (!allow_unresolved &&
3574 ((nce->nce_state & ND_REACHABLE) == 0))) {
3575 if (nce != NULL)
3576 mutex_exit(&nce->nce_lock);
3577 ire_atomic_end(irb_ptr, ire);
3578 mutex_exit(&ndp4.ndp_g_lock);
3579 if (nce != NULL)
3580 NCE_REFRELE(nce);
3581 DTRACE_PROBE1(ire__no__nce, ire_t *, ire);
3582 ire_delete(ire);
3583 if (pire != NULL) {
3584 IRB_REFRELE(pire->ire_bucket);
3585 ire_refrele(pire);
3586 }
3587 *ire_p = NULL;
3588 if (need_refrele)
3589 IRB_REFRELE(irb_ptr);
3590 return (EINVAL);
3591 } else {
3592 ire->ire_nce = nce;
3593 mutex_exit(&nce->nce_lock);
3594 /*
3595 * We are associating this nce to the ire, so
3596 * change the nce ref taken in ndp_lookup_v4() from
3597 * NCE_REFHOLD to NCE_REFHOLD_NOTR
3598 */
3599 NCE_REFHOLD_TO_REFHOLD_NOTR(ire->ire_nce);
3600 }
3601 }
3601 /*
3602 * Make it easy for ip_wput_ire() to hit multiple broadcast ires by
3603 * grouping identical addresses together on the hash chain. We also
3604 * don't want to send multiple copies out if there are two ills part
3605 * of the same group. Thus we group the ires with same addr and same
3606 * ill group together so that ip_wput_ire can easily skip all the
3607 * ires with same addr and same group after sending the first copy.
3608 * We do this only for IRE_BROADCASTs as ip_wput_ire is currently

--- 78 unchanged lines hidden (view full) ---

3687 while (ire->ire_addr == ire1->ire_addr) {
3688 irep = &ire1->ire_next;
3689 ire1 = *irep;
3690 if (ire1 == NULL)
3691 break;
3692 }
3693 }
3694
3602 /*
3603 * Make it easy for ip_wput_ire() to hit multiple broadcast ires by
3604 * grouping identical addresses together on the hash chain. We also
3605 * don't want to send multiple copies out if there are two ills part
3606 * of the same group. Thus we group the ires with same addr and same
3607 * ill group together so that ip_wput_ire can easily skip all the
3608 * ires with same addr and same group after sending the first copy.
3609 * We do this only for IRE_BROADCASTs as ip_wput_ire is currently

--- 78 unchanged lines hidden (view full) ---

3688 while (ire->ire_addr == ire1->ire_addr) {
3689 irep = &ire1->ire_next;
3690 ire1 = *irep;
3691 if (ire1 == NULL)
3692 break;
3693 }
3694 }
3695
3695 if (ire->ire_type == IRE_DEFAULT) {
3696 /*
3697 * We keep a count of default gateways which is used when
3698 * assigning them as routes.
3699 */
3700 ip_ire_default_count++;
3701 ASSERT(ip_ire_default_count != 0); /* Wraparound */
3702 }
3703 /* Insert at *irep */
3704 ire1 = *irep;
3705 if (ire1 != NULL)
3706 ire1->ire_ptpn = &ire->ire_next;
3707 ire->ire_next = ire1;
3708 /* Link the new one in. */
3709 ire->ire_ptpn = irep;
3710

--- 20 unchanged lines hidden (view full) ---

3731 * lookup again after we return the IRE.
3732 *
3733 * NOTE : We don't have to use atomics as this is appearing
3734 * in the list for the first time and no one else can bump
3735 * up the reference count on this yet.
3736 */
3737 IRE_REFHOLD_LOCKED(ire);
3738 BUMP_IRE_STATS(ire_stats_v4, ire_stats_inserted);
3696 /* Insert at *irep */
3697 ire1 = *irep;
3698 if (ire1 != NULL)
3699 ire1->ire_ptpn = &ire->ire_next;
3700 ire->ire_next = ire1;
3701 /* Link the new one in. */
3702 ire->ire_ptpn = irep;
3703

--- 20 unchanged lines hidden (view full) ---

3724 * lookup again after we return the IRE.
3725 *
3726 * NOTE : We don't have to use atomics as this is appearing
3727 * in the list for the first time and no one else can bump
3728 * up the reference count on this yet.
3729 */
3730 IRE_REFHOLD_LOCKED(ire);
3731 BUMP_IRE_STATS(ire_stats_v4, ire_stats_inserted);
3732
3739 irb_ptr->irb_ire_cnt++;
3733 irb_ptr->irb_ire_cnt++;
3734 if (irb_ptr->irb_marks & IRB_MARK_FTABLE)
3735 irb_ptr->irb_nire++;
3736
3740 if (ire->ire_marks & IRE_MARK_TEMPORARY)
3741 irb_ptr->irb_tmp_ire_cnt++;
3742
3743 if (ire->ire_ipif != NULL) {
3744 ire->ire_ipif->ipif_ire_cnt++;
3745 if (ire->ire_stq != NULL) {
3746 stq_ill = (ill_t *)ire->ire_stq->q_ptr;
3747 stq_ill->ill_ire_cnt++;
3748 }
3749 } else {
3750 ASSERT(ire->ire_stq == NULL);
3751 }
3752
3753 ire_atomic_end(irb_ptr, ire);
3737 if (ire->ire_marks & IRE_MARK_TEMPORARY)
3738 irb_ptr->irb_tmp_ire_cnt++;
3739
3740 if (ire->ire_ipif != NULL) {
3741 ire->ire_ipif->ipif_ire_cnt++;
3742 if (ire->ire_stq != NULL) {
3743 stq_ill = (ill_t *)ire->ire_stq->q_ptr;
3744 stq_ill->ill_ire_cnt++;
3745 }
3746 } else {
3747 ASSERT(ire->ire_stq == NULL);
3748 }
3749
3750 ire_atomic_end(irb_ptr, ire);
3751 mutex_exit(&ndp4.ndp_g_lock);
3754
3755 if (pire != NULL) {
3756 /* Assert that it is not removed from the list yet */
3757 ASSERT(pire->ire_ptpn != NULL);
3758 IRB_REFRELE(pire->ire_bucket);
3759 ire_refrele(pire);
3760 }
3761
3762 if (ire->ire_type != IRE_CACHE) {
3763 /*
3752
3753 if (pire != NULL) {
3754 /* Assert that it is not removed from the list yet */
3755 ASSERT(pire->ire_ptpn != NULL);
3756 IRB_REFRELE(pire->ire_bucket);
3757 ire_refrele(pire);
3758 }
3759
3760 if (ire->ire_type != IRE_CACHE) {
3761 /*
3764 * For ire's with with host mask see if there is an entry
3762 * For ire's with host mask see if there is an entry
3765 * in the cache. If there is one flush the whole cache as
3766 * there might be multiple entries due to RTF_MULTIRT (CGTP).
3767 * If no entry is found than there is no need to flush the
3768 * cache.
3769 */
3770 if (ire->ire_mask == IP_HOST_MASK) {
3771 ire_t *lire;
3772 lire = ire_ctable_lookup(ire->ire_addr, NULL, IRE_CACHE,

--- 11 unchanged lines hidden (view full) ---

3784 * in the list. Otherwise the fast path ack won't find the ire in
3785 * the table.
3786 */
3787 if (ire->ire_type == IRE_CACHE || ire->ire_type == IRE_BROADCAST)
3788 ire_fastpath(ire);
3789 if (ire->ire_ipif != NULL)
3790 ASSERT(!MUTEX_HELD(&ire->ire_ipif->ipif_ill->ill_lock));
3791 *ire_p = ire;
3763 * in the cache. If there is one flush the whole cache as
3764 * there might be multiple entries due to RTF_MULTIRT (CGTP).
3765 * If no entry is found than there is no need to flush the
3766 * cache.
3767 */
3768 if (ire->ire_mask == IP_HOST_MASK) {
3769 ire_t *lire;
3770 lire = ire_ctable_lookup(ire->ire_addr, NULL, IRE_CACHE,

--- 11 unchanged lines hidden (view full) ---

3782 * in the list. Otherwise the fast path ack won't find the ire in
3783 * the table.
3784 */
3785 if (ire->ire_type == IRE_CACHE || ire->ire_type == IRE_BROADCAST)
3786 ire_fastpath(ire);
3787 if (ire->ire_ipif != NULL)
3788 ASSERT(!MUTEX_HELD(&ire->ire_ipif->ipif_ill->ill_lock));
3789 *ire_p = ire;
3790 if (need_refrele) {
3791 IRB_REFRELE(irb_ptr);
3792 }
3792 return (0);
3793}
3794
3795/*
3793 return (0);
3794}
3795
3796/*
3796 * Search for all HOST REDIRECT routes that are
3797 * pointing at the specified gateway and
3798 * delete them. This routine is called only
3799 * when a default gateway is going away.
3800 */
3801static void
3802ire_delete_host_redirects(ipaddr_t gateway)
3803{
3804 irb_t *irb_ptr;
3805 irb_t *irb;
3806 ire_t *ire;
3807 int i;
3808
3809 /* get the hash table for HOST routes */
3810 irb_ptr = ip_forwarding_table[(IP_MASK_TABLE_SIZE - 1)];
3811 if (irb_ptr == NULL)
3812 return;
3813 for (i = 0; (i < ip_ftable_hash_size); i++) {
3814 irb = &irb_ptr[i];
3815 IRB_REFHOLD(irb);
3816 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
3817 if (ire->ire_type != IRE_HOST_REDIRECT)
3818 continue;
3819 if (ire->ire_gateway_addr == gateway) {
3820 ire_delete(ire);
3821 }
3822 }
3823 IRB_REFRELE(irb);
3824 }
3825}
3826
3827/*
3828 * IRB_REFRELE is the only caller of the function. ire_unlink calls to
3829 * do the final cleanup for this ire.
3830 */
3831void
3832ire_cleanup(ire_t *ire)
3833{
3834 ire_t *ire_next;
3835

--- 28 unchanged lines hidden (view full) ---

3864ire_unlink(irb_t *irb)
3865{
3866 ire_t *ire;
3867 ire_t *ire1;
3868 ire_t **ptpn;
3869 ire_t *ire_list = NULL;
3870
3871 ASSERT(RW_WRITE_HELD(&irb->irb_lock));
3797 * IRB_REFRELE is the only caller of the function. ire_unlink calls to
3798 * do the final cleanup for this ire.
3799 */
3800void
3801ire_cleanup(ire_t *ire)
3802{
3803 ire_t *ire_next;
3804

--- 28 unchanged lines hidden (view full) ---

3833ire_unlink(irb_t *irb)
3834{
3835 ire_t *ire;
3836 ire_t *ire1;
3837 ire_t **ptpn;
3838 ire_t *ire_list = NULL;
3839
3840 ASSERT(RW_WRITE_HELD(&irb->irb_lock));
3872 ASSERT(irb->irb_refcnt == 0);
3873 ASSERT(irb->irb_marks & IRE_MARK_CONDEMNED);
3841 ASSERT(((irb->irb_marks & IRB_MARK_FTABLE) && irb->irb_refcnt == 1) ||
3842 (irb->irb_refcnt == 0));
3843 ASSERT(irb->irb_marks & IRB_MARK_CONDEMNED);
3874 ASSERT(irb->irb_ire != NULL);
3875
3876 for (ire = irb->irb_ire; ire != NULL; ire = ire1) {
3877 ire1 = ire->ire_next;
3878 if (ire->ire_marks & IRE_MARK_CONDEMNED) {
3879 ptpn = ire->ire_ptpn;
3880 ire1 = ire->ire_next;
3881 if (ire1)

--- 5 unchanged lines hidden (view full) ---

3887 /*
3888 * IRE is out of the list. We need to adjust
3889 * the accounting before the caller drops
3890 * the lock.
3891 */
3892 if (ire->ire_ipversion == IPV6_VERSION) {
3893 ASSERT(ipv6_ire_default_count != 0);
3894 ipv6_ire_default_count--;
3844 ASSERT(irb->irb_ire != NULL);
3845
3846 for (ire = irb->irb_ire; ire != NULL; ire = ire1) {
3847 ire1 = ire->ire_next;
3848 if (ire->ire_marks & IRE_MARK_CONDEMNED) {
3849 ptpn = ire->ire_ptpn;
3850 ire1 = ire->ire_next;
3851 if (ire1)

--- 5 unchanged lines hidden (view full) ---

3857 /*
3858 * IRE is out of the list. We need to adjust
3859 * the accounting before the caller drops
3860 * the lock.
3861 */
3862 if (ire->ire_ipversion == IPV6_VERSION) {
3863 ASSERT(ipv6_ire_default_count != 0);
3864 ipv6_ire_default_count--;
3895 } else {
3896 ASSERT(ip_ire_default_count != 0);
3897 ip_ire_default_count--;
3898 }
3899 }
3900 /*
3901 * We need to call ire_delete_v4 or ire_delete_v6
3902 * to clean up the cache or the redirects pointing at
3903 * the default gateway. We need to drop the lock
3904 * as ire_flush_cache/ire_delete_host_redircts require
3905 * so. But we can't drop the lock, as ire_unlink needs
3906 * to atomically remove the ires from the list.
3907 * So, create a temporary list of CONDEMNED ires
3908 * for doing ire_delete_v4/ire_delete_v6 operations
3909 * later on.
3910 */
3911 ire->ire_next = ire_list;
3912 ire_list = ire;
3913 }
3914 }
3865 }
3866 }
3867 /*
3868 * We need to call ire_delete_v4 or ire_delete_v6
3869 * to clean up the cache or the redirects pointing at
3870 * the default gateway. We need to drop the lock
3871 * as ire_flush_cache/ire_delete_host_redircts require
3872 * so. But we can't drop the lock, as ire_unlink needs
3873 * to atomically remove the ires from the list.
3874 * So, create a temporary list of CONDEMNED ires
3875 * for doing ire_delete_v4/ire_delete_v6 operations
3876 * later on.
3877 */
3878 ire->ire_next = ire_list;
3879 ire_list = ire;
3880 }
3881 }
3915 ASSERT(irb->irb_refcnt == 0);
3916 irb->irb_marks &= ~IRE_MARK_CONDEMNED;
3917 ASSERT(ire_list != NULL);
3882 irb->irb_marks &= ~IRB_MARK_CONDEMNED;
3918 return (ire_list);
3919}
3920
3921/*
3922 * Delete all the cache entries with this 'addr'. When IP gets a gratuitous
3883 return (ire_list);
3884}
3885
3886/*
3887 * Delete all the cache entries with this 'addr'. When IP gets a gratuitous
3923 * ARP message on any of its interface queue, it scans the cache table and
3924 * deletes all the cache entries for that address. This function is called
3925 * from ip_arp_news in ip.c and also for ARP ioctl processing in ip_if.c.
3926 * ip_ire_clookup_and_delete returns true if it finds at least one cache entry
3927 * which is used by ip_arp_news to determine if it needs to do an ire_walk_v4.
3928 * The return value is also used for the same purpose by ARP IOCTL processing
3929 * in ip_if.c when deleting ARP entries. For SIOC*IFARP ioctls in addition to
3930 * the address, ip_if->ipif_ill also needs to be matched.
3888 * ARP message on any of its interface queue, it scans the nce table and
3889 * deletes and calls ndp_delete() for the appropriate nce. This action
3890 * also deletes all the neighbor/ire cache entries for that address.
3891 * This function is called from ip_arp_news in ip.c and also for
3892 * ARP ioctl processing in ip_if.c. ip_ire_clookup_and_delete returns
3893 * true if it finds a nce entry which is used by ip_arp_news to determine if
3894 * it needs to do an ire_walk_v4. The return value is also used for the
3895 * same purpose by ARP IOCTL processing * in ip_if.c when deleting
3896 * ARP entries. For SIOC*IFARP ioctls in addition to the address,
3897 * ip_if->ipif_ill also needs to be matched.
3931 */
3932boolean_t
3933ip_ire_clookup_and_delete(ipaddr_t addr, ipif_t *ipif)
3934{
3898 */
3899boolean_t
3900ip_ire_clookup_and_delete(ipaddr_t addr, ipif_t *ipif)
3901{
3935 irb_t *irb;
3936 ire_t *cire;
3937 ill_t *ill;
3938 boolean_t found = B_FALSE, loop_end = B_FALSE;
3902 ill_t *ill;
3903 nce_t *nce;
3939
3904
3940 irb = &ip_cache_table[IRE_ADDR_HASH(addr, ip_cache_table_size)];
3941 IRB_REFHOLD(irb);
3942 for (cire = irb->irb_ire; cire != NULL; cire = cire->ire_next) {
3943 if (cire->ire_marks & IRE_MARK_CONDEMNED)
3944 continue;
3945 if (cire->ire_addr == addr) {
3905 ill = (ipif ? ipif->ipif_ill : NULL);
3946
3906
3947 /* This signifies start of an address match */
3948 if (!loop_end)
3949 loop_end = B_TRUE;
3907 if (ill != NULL) {
3908 /*
3909 * clean up the nce (and any relevant ire's) that matches
3910 * on addr and ill.
3911 */
3912 nce = ndp_lookup_v4(ill, &addr, B_FALSE);
3913 if (nce != NULL) {
3914 ndp_delete(nce);
3915 return (B_TRUE);
3916 }
3917 } else {
3918 /*
3919 * ill is wildcard. clean up all nce's and
3920 * ire's that match on addr
3921 */
3922 nce_clookup_t cl;
3950
3923
3951 /* We are interested only in IRE_CACHEs */
3952 if (cire->ire_type == IRE_CACHE) {
3953 /* If we want a match with the ILL */
3954 if (ipif != NULL &&
3955 ((ill = ire_to_ill(cire)) == NULL ||
3956 ill != ipif->ipif_ill)) {
3957 continue;
3958 }
3959 if (!found)
3960 found = B_TRUE;
3961 ire_delete(cire);
3962 }
3963 /* End of the match */
3964 } else if (loop_end)
3965 break;
3924 cl.ncecl_addr = addr;
3925 cl.ncecl_found = B_FALSE;
3926
3927 ndp_walk_common(&ndp4, NULL,
3928 (pfi_t)ip_nce_clookup_and_delete, (uchar_t *)&cl, B_TRUE);
3929
3930 /*
3931 * ncecl_found would be set by ip_nce_clookup_and_delete if
3932 * we found a matching nce.
3933 */
3934 return (cl.ncecl_found);
3966 }
3935 }
3967 IRB_REFRELE(irb);
3936 return (B_FALSE);
3968
3937
3969 return (found);
3938}
3970
3939
3940/* Delete the supplied nce if its nce_addr matches the supplied address */
3941static void
3942ip_nce_clookup_and_delete(nce_t *nce, void *arg)
3943{
3944 nce_clookup_t *cl = (nce_clookup_t *)arg;
3945 ipaddr_t nce_addr;
3946
3947 IN6_V4MAPPED_TO_IPADDR(&nce->nce_addr, nce_addr);
3948 if (nce_addr == cl->ncecl_addr) {
3949 cl->ncecl_found = B_TRUE;
3950 /* clean up the nce (and any relevant ire's) */
3951 ndp_delete(nce);
3952 }
3971}
3972
3973/*
3953}
3954
3955/*
3956 * Clean up the radix node for this ire. Must be called by IRB_REFRELE
3957 * when there are no ire's left in the bucket. Returns TRUE if the bucket
3958 * is deleted and freed.
3959 */
3960boolean_t
3961irb_inactive(irb_t *irb)
3962{
3963 struct rt_entry *rt;
3964 struct radix_node *rn;
3965
3966 rt = IRB2RT(irb);
3967 rn = (struct radix_node *)rt;
3968
3969 /* first remove it from the radix tree. */
3970 RADIX_NODE_HEAD_WLOCK(ip_ftable);
3971 rw_enter(&irb->irb_lock, RW_WRITER);
3972 if (irb->irb_refcnt == 1 && irb->irb_nire == 0) {
3973 rn = ip_ftable->rnh_deladdr(rn->rn_key, rn->rn_mask,
3974 ip_ftable);
3975 DTRACE_PROBE1(irb__free, rt_t *, rt);
3976 ASSERT((void *)rn == (void *)rt);
3977 Free(rt, rt_entry_cache);
3978 /* irb_lock is freed */
3979 RADIX_NODE_HEAD_UNLOCK(ip_ftable);
3980 return (B_TRUE);
3981 }
3982 rw_exit(&irb->irb_lock);
3983 RADIX_NODE_HEAD_UNLOCK(ip_ftable);
3984 return (B_FALSE);
3985}
3986
3987/*
3974 * Delete the specified IRE.
3975 */
3976void
3977ire_delete(ire_t *ire)
3978{
3979 ire_t *ire1;
3980 ire_t **ptpn;
3981 irb_t *irb;
3982
3988 * Delete the specified IRE.
3989 */
3990void
3991ire_delete(ire_t *ire)
3992{
3993 ire_t *ire1;
3994 ire_t **ptpn;
3995 irb_t *irb;
3996
3983 /*
3984 * It was never inserted in the list. Should call REFRELE
3985 * to free this IRE.
3986 */
3987 if ((irb = ire->ire_bucket) == NULL) {
3997 if ((irb = ire->ire_bucket) == NULL) {
3998 /*
3999 * It was never inserted in the list. Should call REFRELE
4000 * to free this IRE.
4001 */
3988 IRE_REFRELE_NOTR(ire);
3989 return;
3990 }
3991
3992 rw_enter(&irb->irb_lock, RW_WRITER);
3993
4002 IRE_REFRELE_NOTR(ire);
4003 return;
4004 }
4005
4006 rw_enter(&irb->irb_lock, RW_WRITER);
4007
4008 if (irb->irb_rr_origin == ire) {
4009 irb->irb_rr_origin = NULL;
4010 }
4011
3994 /*
3995 * In case of V4 we might still be waiting for fastpath ack.
3996 */
4012 /*
4013 * In case of V4 we might still be waiting for fastpath ack.
4014 */
3997 if (ire->ire_nce == NULL && ire->ire_stq != NULL) {
4015 if (ire->ire_ipversion == IPV4_VERSION && ire->ire_stq != NULL) {
3998 ill_t *ill;
3999
4000 ill = ire_to_ill(ire);
4001 if (ill != NULL)
4002 ire_fastpath_list_delete(ill, ire);
4003 }
4004
4005 if (ire->ire_ptpn == NULL) {

--- 11 unchanged lines hidden (view full) ---

4017 * delete this ire.
4018 */
4019 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) {
4020 irb->irb_ire_cnt--;
4021 if (ire->ire_marks & IRE_MARK_TEMPORARY)
4022 irb->irb_tmp_ire_cnt--;
4023 ire->ire_marks |= IRE_MARK_CONDEMNED;
4024 }
4016 ill_t *ill;
4017
4018 ill = ire_to_ill(ire);
4019 if (ill != NULL)
4020 ire_fastpath_list_delete(ill, ire);
4021 }
4022
4023 if (ire->ire_ptpn == NULL) {

--- 11 unchanged lines hidden (view full) ---

4035 * delete this ire.
4036 */
4037 if (!(ire->ire_marks & IRE_MARK_CONDEMNED)) {
4038 irb->irb_ire_cnt--;
4039 if (ire->ire_marks & IRE_MARK_TEMPORARY)
4040 irb->irb_tmp_ire_cnt--;
4041 ire->ire_marks |= IRE_MARK_CONDEMNED;
4042 }
4025 irb->irb_marks |= IRE_MARK_CONDEMNED;
4043 irb->irb_marks |= IRB_MARK_CONDEMNED;
4026 rw_exit(&irb->irb_lock);
4027 return;
4028 }
4029
4030 /*
4031 * Normally to delete an ire, we walk the bucket. While we
4032 * walk the bucket, we normally bump up irb_refcnt and hence
4033 * we return from above where we mark CONDEMNED and the ire

--- 23 unchanged lines hidden (view full) ---

4057 if (ire->ire_type == IRE_DEFAULT) {
4058 /*
4059 * IRE is out of the list. We need to adjust the
4060 * accounting before we drop the lock.
4061 */
4062 if (ire->ire_ipversion == IPV6_VERSION) {
4063 ASSERT(ipv6_ire_default_count != 0);
4064 ipv6_ire_default_count--;
4044 rw_exit(&irb->irb_lock);
4045 return;
4046 }
4047
4048 /*
4049 * Normally to delete an ire, we walk the bucket. While we
4050 * walk the bucket, we normally bump up irb_refcnt and hence
4051 * we return from above where we mark CONDEMNED and the ire

--- 23 unchanged lines hidden (view full) ---

4075 if (ire->ire_type == IRE_DEFAULT) {
4076 /*
4077 * IRE is out of the list. We need to adjust the
4078 * accounting before we drop the lock.
4079 */
4080 if (ire->ire_ipversion == IPV6_VERSION) {
4081 ASSERT(ipv6_ire_default_count != 0);
4082 ipv6_ire_default_count--;
4065 } else {
4066 ASSERT(ip_ire_default_count != 0);
4067 ip_ire_default_count--;
4068 }
4069 }
4070 irb->irb_ire_cnt--;
4083 }
4084 }
4085 irb->irb_ire_cnt--;
4086
4071 if (ire->ire_marks & IRE_MARK_TEMPORARY)
4072 irb->irb_tmp_ire_cnt--;
4073 rw_exit(&irb->irb_lock);
4074
4075 if (ire->ire_ipversion == IPV6_VERSION) {
4076 ire_delete_v6(ire);
4077 } else {
4078 ire_delete_v4(ire);

--- 33 unchanged lines hidden (view full) ---

4112
4113/*
4114 * IRE_REFRELE/ire_refrele are the only caller of the function. It calls
4115 * to free the ire when the reference count goes to zero.
4116 */
4117void
4118ire_inactive(ire_t *ire)
4119{
4087 if (ire->ire_marks & IRE_MARK_TEMPORARY)
4088 irb->irb_tmp_ire_cnt--;
4089 rw_exit(&irb->irb_lock);
4090
4091 if (ire->ire_ipversion == IPV6_VERSION) {
4092 ire_delete_v6(ire);
4093 } else {
4094 ire_delete_v4(ire);

--- 33 unchanged lines hidden (view full) ---

4128
4129/*
4130 * IRE_REFRELE/ire_refrele are the only caller of the function. It calls
4131 * to free the ire when the reference count goes to zero.
4132 */
4133void
4134ire_inactive(ire_t *ire)
4135{
4120 mblk_t *mp;
4121 nce_t *nce;
4122 ill_t *ill = NULL;
4123 ill_t *stq_ill = NULL;
4124 ill_t *in_ill = NULL;
4125 ipif_t *ipif;
4126 boolean_t need_wakeup = B_FALSE;
4136 nce_t *nce;
4137 ill_t *ill = NULL;
4138 ill_t *stq_ill = NULL;
4139 ill_t *in_ill = NULL;
4140 ipif_t *ipif;
4141 boolean_t need_wakeup = B_FALSE;
4142 irb_t *irb;
4127
4128 ASSERT(ire->ire_refcnt == 0);
4129 ASSERT(ire->ire_ptpn == NULL);
4130 ASSERT(ire->ire_next == NULL);
4131
4143
4144 ASSERT(ire->ire_refcnt == 0);
4145 ASSERT(ire->ire_ptpn == NULL);
4146 ASSERT(ire->ire_next == NULL);
4147
4148 if (ire->ire_gw_secattr != NULL) {
4149 ire_gw_secattr_free(ire->ire_gw_secattr);
4150 ire->ire_gw_secattr = NULL;
4151 }
4152
4153 if (ire->ire_mp != NULL) {
4154 ASSERT(ire->ire_fastpath == NULL);
4155 ASSERT(ire->ire_bucket == NULL);
4156 mutex_destroy(&ire->ire_lock);
4157 BUMP_IRE_STATS(ire_stats_v4, ire_stats_freed);
4158 if (ire->ire_nce != NULL)
4159 NCE_REFRELE_NOTR(ire->ire_nce);
4160 freeb(ire->ire_mp);
4161 return;
4162 }
4163
4132 if ((nce = ire->ire_nce) != NULL) {
4164 if ((nce = ire->ire_nce) != NULL) {
4133 /* Only IPv6 IRE_CACHE type has an nce */
4134 ASSERT(ire->ire_type == IRE_CACHE);
4135 ASSERT(ire->ire_ipversion == IPV6_VERSION);
4136 NCE_REFRELE_NOTR(nce);
4137 ire->ire_nce = NULL;
4138 }
4165 NCE_REFRELE_NOTR(nce);
4166 ire->ire_nce = NULL;
4167 }
4168
4139 if (ire->ire_ipif == NULL)
4140 goto end;
4141
4142 ipif = ire->ire_ipif;
4143 ill = ipif->ipif_ill;
4144
4145 if (ire->ire_bucket == NULL) {
4146 /* The ire was never inserted in the table. */

--- 104 unchanged lines hidden (view full) ---

4251 }
4252 }
4253 }
4254 }
4255end:
4256 /* This should be true for both V4 and V6 */
4257 ASSERT(ire->ire_fastpath == NULL);
4258
4169 if (ire->ire_ipif == NULL)
4170 goto end;
4171
4172 ipif = ire->ire_ipif;
4173 ill = ipif->ipif_ill;
4174
4175 if (ire->ire_bucket == NULL) {
4176 /* The ire was never inserted in the table. */

--- 104 unchanged lines hidden (view full) ---

4281 }
4282 }
4283 }
4284 }
4285end:
4286 /* This should be true for both V4 and V6 */
4287 ASSERT(ire->ire_fastpath == NULL);
4288
4259
4289 if ((ire->ire_type & IRE_FORWARDTABLE) &&
4290 (ire->ire_ipversion == IPV4_VERSION) &&
4291 ((irb = ire->ire_bucket) != NULL)) {
4292 rw_enter(&irb->irb_lock, RW_WRITER);
4293 irb->irb_nire--;
4294 /*
4295 * Instead of examining the conditions for freeing
4296 * the radix node here, we do it by calling
4297 * IRB_REFRELE which is a single point in the code
4298 * that embeds that logic. Bump up the refcnt to
4299 * be able to call IRB_REFRELE
4300 */
4301 IRB_REFHOLD_LOCKED(irb);
4302 rw_exit(&irb->irb_lock);
4303 IRB_REFRELE(irb);
4304 }
4260 ire->ire_ipif = NULL;
4261
4305 ire->ire_ipif = NULL;
4306
4262 /* Free the xmit header, and the IRE itself. */
4263 if ((mp = ire->ire_dlureq_mp) != NULL) {
4264 freeb(mp);
4265 ire->ire_dlureq_mp = NULL;
4266 }
4267
4268 if ((mp = ire->ire_fp_mp) != NULL) {
4269 freeb(mp);
4270 ire->ire_fp_mp = NULL;
4271 }
4272
4273 if (ire->ire_in_ill != NULL) {
4274 ire->ire_in_ill = NULL;
4275 }
4276
4307 if (ire->ire_in_ill != NULL) {
4308 ire->ire_in_ill = NULL;
4309 }
4310
4277 if (ire->ire_gw_secattr != NULL) {
4278 ire_gw_secattr_free(ire->ire_gw_secattr);
4279 ire->ire_gw_secattr = NULL;
4280 }
4281#ifdef IRE_DEBUG
4282 ire_trace_inactive(ire);
4283#endif
4284 mutex_destroy(&ire->ire_lock);
4285 if (ire->ire_ipversion == IPV6_VERSION) {
4286 BUMP_IRE_STATS(ire_stats_v6, ire_stats_freed);
4287 } else {
4288 BUMP_IRE_STATS(ire_stats_v4, ire_stats_freed);
4289 }
4311#ifdef IRE_DEBUG
4312 ire_trace_inactive(ire);
4313#endif
4314 mutex_destroy(&ire->ire_lock);
4315 if (ire->ire_ipversion == IPV6_VERSION) {
4316 BUMP_IRE_STATS(ire_stats_v6, ire_stats_freed);
4317 } else {
4318 BUMP_IRE_STATS(ire_stats_v4, ire_stats_freed);
4319 }
4290 if (ire->ire_mp != NULL) {
4291 /* Still in an mblk */
4292 freeb(ire->ire_mp);
4293 } else {
4294 /* Has been allocated out of the cache */
4295 kmem_cache_free(ire_cache, ire);
4296 }
4320 ASSERT(ire->ire_mp == NULL);
4321 /* Has been allocated out of the cache */
4322 kmem_cache_free(ire_cache, ire);
4297}
4298
4299/*
4300 * ire_walk routine to delete all IRE_CACHE/IRE_HOST_REDIRECT entries
4301 * that have a given gateway address.
4302 */
4303void
4304ire_delete_cache_gw(ire_t *ire, char *cp)

--- 147 unchanged lines hidden (view full) ---

4452}
4453
4454/*
4455 * Matches the arguments passed with the values in the ire.
4456 *
4457 * Note: for match types that match using "ipif" passed in, ipif
4458 * must be checked for non-NULL before calling this routine.
4459 */
4323}
4324
4325/*
4326 * ire_walk routine to delete all IRE_CACHE/IRE_HOST_REDIRECT entries
4327 * that have a given gateway address.
4328 */
4329void
4330ire_delete_cache_gw(ire_t *ire, char *cp)

--- 147 unchanged lines hidden (view full) ---

4478}
4479
4480/*
4481 * Matches the arguments passed with the values in the ire.
4482 *
4483 * Note: for match types that match using "ipif" passed in, ipif
4484 * must be checked for non-NULL before calling this routine.
4485 */
4460static boolean_t
4486boolean_t
4461ire_match_args(ire_t *ire, ipaddr_t addr, ipaddr_t mask, ipaddr_t gateway,
4462 int type, const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle,
4463 const ts_label_t *tsl, int match_flags)
4464{
4465 ill_t *ire_ill = NULL, *dst_ill;
4466 ill_t *ipif_ill = NULL;
4467 ill_group_t *ire_ill_group = NULL;
4468 ill_group_t *ipif_ill_group = NULL;

--- 137 unchanged lines hidden (view full) ---

4606 ire->ire_marks & IRE_MARK_HIDDEN)) &&
4607 ((!(match_flags & MATCH_IRE_MARK_PRIVATE_ADDR)) ||
4608 (ire->ire_type != IRE_CACHE ||
4609 ire->ire_marks & IRE_MARK_PRIVATE_ADDR)) &&
4610 ((!(match_flags & MATCH_IRE_ILL)) ||
4611 (ire_ill == ipif_ill)) &&
4612 ((!(match_flags & MATCH_IRE_IHANDLE)) ||
4613 (ire->ire_ihandle == ihandle)) &&
4487ire_match_args(ire_t *ire, ipaddr_t addr, ipaddr_t mask, ipaddr_t gateway,
4488 int type, const ipif_t *ipif, zoneid_t zoneid, uint32_t ihandle,
4489 const ts_label_t *tsl, int match_flags)
4490{
4491 ill_t *ire_ill = NULL, *dst_ill;
4492 ill_t *ipif_ill = NULL;
4493 ill_group_t *ire_ill_group = NULL;
4494 ill_group_t *ipif_ill_group = NULL;

--- 137 unchanged lines hidden (view full) ---

4632 ire->ire_marks & IRE_MARK_HIDDEN)) &&
4633 ((!(match_flags & MATCH_IRE_MARK_PRIVATE_ADDR)) ||
4634 (ire->ire_type != IRE_CACHE ||
4635 ire->ire_marks & IRE_MARK_PRIVATE_ADDR)) &&
4636 ((!(match_flags & MATCH_IRE_ILL)) ||
4637 (ire_ill == ipif_ill)) &&
4638 ((!(match_flags & MATCH_IRE_IHANDLE)) ||
4639 (ire->ire_ihandle == ihandle)) &&
4640 ((!(match_flags & MATCH_IRE_MASK)) ||
4641 (ire->ire_mask == mask)) &&
4614 ((!(match_flags & MATCH_IRE_ILL_GROUP)) ||
4615 (ire_ill == ipif_ill) ||
4616 (ire_ill_group != NULL &&
4617 ire_ill_group == ipif_ill_group)) &&
4618 ((!(match_flags & MATCH_IRE_SECATTR)) ||
4619 (!is_system_labeled()) ||
4620 (tsol_ire_match_gwattr(ire, tsl) == 0))) {
4621 /* We found the matched IRE */

--- 37 unchanged lines hidden (view full) ---

4659 }
4660 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) {
4661 ire = ire_ftable_lookup(addr, mask, gateway, type, ipif, pire,
4662 zoneid, 0, tsl, flags);
4663 }
4664 return (ire);
4665}
4666
4642 ((!(match_flags & MATCH_IRE_ILL_GROUP)) ||
4643 (ire_ill == ipif_ill) ||
4644 (ire_ill_group != NULL &&
4645 ire_ill_group == ipif_ill_group)) &&
4646 ((!(match_flags & MATCH_IRE_SECATTR)) ||
4647 (!is_system_labeled()) ||
4648 (tsol_ire_match_gwattr(ire, tsl) == 0))) {
4649 /* We found the matched IRE */

--- 37 unchanged lines hidden (view full) ---

4687 }
4688 if ((flags & MATCH_IRE_TYPE) == 0 || (type & IRE_FORWARDTABLE) != 0) {
4689 ire = ire_ftable_lookup(addr, mask, gateway, type, ipif, pire,
4690 zoneid, 0, tsl, flags);
4691 }
4692 return (ire);
4693}
4694
4667/*
4668 * Lookup a route in forwarding table.
4669 * specific lookup is indicated by passing the
4670 * required parameters and indicating the
4671 * match required in flag field.
4672 *
4673 * Looking for default route can be done in three ways
4674 * 1) pass mask as 0 and set MATCH_IRE_MASK in flags field
4675 * along with other matches.
4676 * 2) pass type as IRE_DEFAULT and set MATCH_IRE_TYPE in flags
4677 * field along with other matches.
4678 * 3) if the destination and mask are passed as zeros.
4679 *
4680 * A request to return a default route if no route
4681 * is found, can be specified by setting MATCH_IRE_DEFAULT
4682 * in flags.
4683 *
4684 * It does not support recursion more than one level. It
4685 * will do recursive lookup only when the lookup maps to
4686 * a prefix or default route and MATCH_IRE_RECURSIVE flag is passed.
4687 *
4688 * If the routing table is setup to allow more than one level
4689 * of recursion, the cleaning up cache table will not work resulting
4690 * in invalid routing.
4691 *
4692 * Supports IP_BOUND_IF by following the ipif/ill when recursing.
4693 *
4694 * NOTE : When this function returns NULL, pire has already been released.
4695 * pire is valid only when this function successfully returns an
4696 * ire.
4697 */
4698ire_t *
4699ire_ftable_lookup(ipaddr_t addr, ipaddr_t mask, ipaddr_t gateway,
4700 int type, const ipif_t *ipif, ire_t **pire, zoneid_t zoneid,
4701 uint32_t ihandle, const ts_label_t *tsl, int flags)
4702{
4703 irb_t *irb_ptr;
4704 ire_t *ire = NULL;
4705 int i;
4706 ipaddr_t gw_addr;
4707
4695
4708 ASSERT(ipif == NULL || !ipif->ipif_isv6);
4709 ASSERT(!(flags & MATCH_IRE_WQ));
4710
4711 /*
4712 * When we return NULL from this function, we should make
4713 * sure that *pire is NULL so that the callers will not
4714 * wrongly REFRELE the pire.
4715 */
4716 if (pire != NULL)
4717 *pire = NULL;
4718 /*
4719 * ire_match_args() will dereference ipif MATCH_IRE_SRC or
4720 * MATCH_IRE_ILL is set.
4721 */
4722 if ((flags & (MATCH_IRE_SRC | MATCH_IRE_ILL | MATCH_IRE_ILL_GROUP)) &&
4723 (ipif == NULL))
4724 return (NULL);
4725
4726 /*
4727 * If the mask is known, the lookup
4728 * is simple, if the mask is not known
4729 * we need to search.
4730 */
4731 if (flags & MATCH_IRE_MASK) {
4732 uint_t masklen;
4733
4734 masklen = ip_mask_to_plen(mask);
4735 if (ip_forwarding_table[masklen] == NULL)
4736 return (NULL);
4737 irb_ptr = &(ip_forwarding_table[masklen][
4738 IRE_ADDR_HASH(addr & mask, ip_ftable_hash_size)]);
4739 rw_enter(&irb_ptr->irb_lock, RW_READER);
4740 for (ire = irb_ptr->irb_ire; ire != NULL;
4741 ire = ire->ire_next) {
4742 if (ire->ire_marks & IRE_MARK_CONDEMNED)
4743 continue;
4744 if (ire_match_args(ire, addr, mask, gateway, type, ipif,
4745 zoneid, ihandle, tsl, flags))
4746 goto found_ire;
4747 }
4748 rw_exit(&irb_ptr->irb_lock);
4749 } else {
4750 /*
4751 * In this case we don't know the mask, we need to
4752 * search the table assuming different mask sizes.
4753 * we start with 32 bit mask, we don't allow default here.
4754 */
4755 for (i = (IP_MASK_TABLE_SIZE - 1); i > 0; i--) {
4756 ipaddr_t tmpmask;
4757
4758 if ((ip_forwarding_table[i]) == NULL)
4759 continue;
4760 tmpmask = ip_plen_to_mask(i);
4761 irb_ptr = &ip_forwarding_table[i][
4762 IRE_ADDR_HASH(addr & tmpmask,
4763 ip_ftable_hash_size)];
4764 rw_enter(&irb_ptr->irb_lock, RW_READER);
4765 for (ire = irb_ptr->irb_ire; ire != NULL;
4766 ire = ire->ire_next) {
4767 if (ire->ire_marks & IRE_MARK_CONDEMNED)
4768 continue;
4769 if (ire_match_args(ire, addr, ire->ire_mask,
4770 gateway, type, ipif, zoneid, ihandle,
4771 tsl, flags))
4772 goto found_ire;
4773 }
4774 rw_exit(&irb_ptr->irb_lock);
4775 }
4776 }
4777 /*
4778 * We come here if no route has yet been found.
4779 *
4780 * Handle the case where default route is
4781 * requested by specifying type as one of the possible
4782 * types for that can have a zero mask (IRE_DEFAULT and IRE_INTERFACE).
4783 *
4784 * If MATCH_IRE_MASK is specified, then the appropriate default route
4785 * would have been found above if it exists so it isn't looked up here.
4786 * If MATCH_IRE_DEFAULT was also specified, then a default route will be
4787 * searched for later.
4788 */
4789 if ((flags & (MATCH_IRE_TYPE | MATCH_IRE_MASK)) == MATCH_IRE_TYPE &&
4790 (type & (IRE_DEFAULT | IRE_INTERFACE))) {
4791 if ((ip_forwarding_table[0])) {
4792 /* addr & mask is zero for defaults */
4793 irb_ptr = &ip_forwarding_table[0][
4794 IRE_ADDR_HASH(0, ip_ftable_hash_size)];
4795 rw_enter(&irb_ptr->irb_lock, RW_READER);
4796 for (ire = irb_ptr->irb_ire; ire != NULL;
4797 ire = ire->ire_next) {
4798 if (ire->ire_marks & IRE_MARK_CONDEMNED)
4799 continue;
4800 if (ire_match_args(ire, addr, (ipaddr_t)0,
4801 gateway, type, ipif, zoneid, ihandle,
4802 tsl, flags))
4803 goto found_ire;
4804 }
4805 rw_exit(&irb_ptr->irb_lock);
4806 }
4807 }
4808 /*
4809 * we come here only if no route is found.
4810 * see if the default route can be used which is allowed
4811 * only if the default matching criteria is specified.
4812 * The ip_ire_default_count tracks the number of IRE_DEFAULT
4813 * entries. However, the ip_forwarding_table[0] also contains
4814 * interface routes thus the count can be zero.
4815 */
4816 if ((flags & (MATCH_IRE_DEFAULT | MATCH_IRE_MASK)) ==
4817 MATCH_IRE_DEFAULT) {
4818 ire_t *ire_origin;
4819 uint_t g_index;
4820 uint_t index;
4821
4822 if (ip_forwarding_table[0] == NULL)
4823 return (NULL);
4824 irb_ptr = &(ip_forwarding_table[0])[0];
4825
4826 /*
4827 * Keep a tab on the bucket while looking the IRE_DEFAULT
4828 * entries. We need to keep track of a particular IRE
4829 * (ire_origin) so this ensures that it will not be unlinked
4830 * from the hash list during the recursive lookup below.
4831 */
4832 IRB_REFHOLD(irb_ptr);
4833 ire = irb_ptr->irb_ire;
4834 if (ire == NULL) {
4835 IRB_REFRELE(irb_ptr);
4836 return (NULL);
4837 }
4838
4839 /*
4840 * Get the index first, since it can be changed by other
4841 * threads. Then get to the right default route skipping
4842 * default interface routes if any. As we hold a reference on
4843 * the IRE bucket, ip_ire_default_count can only increase so we
4844 * can't reach the end of the hash list unexpectedly.
4845 */
4846 if (ip_ire_default_count != 0) {
4847 g_index = ip_ire_default_index++;
4848 index = g_index % ip_ire_default_count;
4849 while (index != 0) {
4850 if (!(ire->ire_type & IRE_INTERFACE))
4851 index--;
4852 ire = ire->ire_next;
4853 }
4854 ASSERT(ire != NULL);
4855 } else {
4856 /*
4857 * No default routes, so we only have default interface
4858 * routes: don't enter the first loop.
4859 */
4860 ire = NULL;
4861 }
4862
4863 /*
4864 * Round-robin the default routers list looking for a route that
4865 * matches the passed in parameters. If we can't find a default
4866 * route (IRE_DEFAULT), look for interface default routes.
4867 * We start with the ire we found above and we walk the hash
4868 * list until we're back where we started, see
4869 * ire_get_next_default_ire(). It doesn't matter if default
4870 * routes are added or deleted by other threads - we know this
4871 * ire will stay in the list because we hold a reference on the
4872 * ire bucket.
4873 * NB: if we only have interface default routes, ire is NULL so
4874 * we don't even enter this loop (see above).
4875 */
4876 ire_origin = ire;
4877 for (; ire != NULL;
4878 ire = ire_get_next_default_ire(ire, ire_origin)) {
4879
4880 if (ire_match_args(ire, addr, (ipaddr_t)0,
4881 gateway, type, ipif, zoneid, ihandle, tsl, flags)) {
4882 int match_flags = 0;
4883 ire_t *rire;
4884
4885 /*
4886 * The potentially expensive call to
4887 * ire_route_lookup() is avoided when we have
4888 * only one default route.
4889 */
4890 if (ip_ire_default_count == 1 ||
4891 zoneid == ALL_ZONES) {
4892 IRE_REFHOLD(ire);
4893 IRB_REFRELE(irb_ptr);
4894 goto found_ire_held;
4895 }
4896 /*
4897 * When we're in a local zone, we're only
4898 * interested in default routers that are
4899 * reachable through ipifs within our zone.
4900 */
4901 if (ire->ire_ipif != NULL) {
4902 match_flags |= MATCH_IRE_ILL_GROUP;
4903 }
4904 rire = ire_route_lookup(ire->ire_gateway_addr,
4905 0, 0, 0, ire->ire_ipif, NULL, zoneid, tsl,
4906 match_flags);
4907 if (rire != NULL) {
4908 ire_refrele(rire);
4909 IRE_REFHOLD(ire);
4910 IRB_REFRELE(irb_ptr);
4911 goto found_ire_held;
4912 }
4913 }
4914 }
4915 /*
4916 * Either there are no default routes or we could not
4917 * find a default route. Look for a interface default
4918 * route matching the args passed in. No round robin
4919 * here. Just pick the right one.
4920 */
4921 for (ire = irb_ptr->irb_ire; ire != NULL;
4922 ire = ire->ire_next) {
4923
4924 if (!(ire->ire_type & IRE_INTERFACE))
4925 continue;
4926
4927 if (ire->ire_marks & IRE_MARK_CONDEMNED)
4928 continue;
4929
4930 if (ire_match_args(ire, addr, (ipaddr_t)0,
4931 gateway, type, ipif, zoneid, ihandle, tsl,
4932 flags)) {
4933 IRE_REFHOLD(ire);
4934 IRB_REFRELE(irb_ptr);
4935 goto found_ire_held;
4936 }
4937 }
4938 IRB_REFRELE(irb_ptr);
4939 }
4940 ASSERT(ire == NULL);
4941 return (NULL);
4942found_ire:
4943 ASSERT((ire->ire_marks & IRE_MARK_CONDEMNED) == 0);
4944 IRE_REFHOLD(ire);
4945 rw_exit(&irb_ptr->irb_lock);
4946
4947found_ire_held:
4948 ASSERT(ire->ire_type != IRE_MIPRTUN && ire->ire_in_ill == NULL);
4949 if ((flags & MATCH_IRE_RJ_BHOLE) &&
4950 (ire->ire_flags & (RTF_BLACKHOLE | RTF_REJECT))) {
4951 return (ire);
4952 }
4953 /*
4954 * At this point, IRE that was found must be an IRE_FORWARDTABLE
4955 * type. If this is a recursive lookup and an IRE_INTERFACE type was
4956 * found, return that. If it was some other IRE_FORWARDTABLE type of
4957 * IRE (one of the prefix types), then it is necessary to fill in the
4958 * parent IRE pointed to by pire, and then lookup the gateway address of
4959 * the parent. For backwards compatiblity, if this lookup returns an
4960 * IRE other than a IRE_CACHETABLE or IRE_INTERFACE, then one more level
4961 * of lookup is done.
4962 */
4963 if (flags & MATCH_IRE_RECURSIVE) {
4964 const ipif_t *gw_ipif;
4965 int match_flags = MATCH_IRE_DSTONLY;
4966 ire_t *save_ire;
4967
4968 if (ire->ire_type & IRE_INTERFACE)
4969 return (ire);
4970 if (pire != NULL)
4971 *pire = ire;
4972 /*
4973 * If we can't find an IRE_INTERFACE or the caller has not
4974 * asked for pire, we need to REFRELE the save_ire.
4975 */
4976 save_ire = ire;
4977
4978 /*
4979 * Currently MATCH_IRE_ILL is never used with
4980 * (MATCH_IRE_RECURSIVE | MATCH_IRE_DEFAULT) while
4981 * sending out packets as MATCH_IRE_ILL is used only
4982 * for communicating with on-link hosts. We can't assert
4983 * that here as RTM_GET calls this function with
4984 * MATCH_IRE_ILL | MATCH_IRE_DEFAULT | MATCH_IRE_RECURSIVE.
4985 * We have already used the MATCH_IRE_ILL in determining
4986 * the right prefix route at this point. To match the
4987 * behavior of how we locate routes while sending out
4988 * packets, we don't want to use MATCH_IRE_ILL below
4989 * while locating the interface route.
4990 */
4991 if (ire->ire_ipif != NULL)
4992 match_flags |= MATCH_IRE_ILL_GROUP;
4993
4994 ire = ire_route_lookup(ire->ire_gateway_addr, 0, 0, 0,
4995 ire->ire_ipif, NULL, zoneid, tsl, match_flags);
4996 if (ire == NULL) {
4997 /*
4998 * Do not release the parent ire if MATCH_IRE_PARENT
4999 * is set. Also return it via ire.
5000 */
5001 if (flags & MATCH_IRE_PARENT) {
5002 if (pire != NULL) {
5003 /*
5004 * Need an extra REFHOLD, if the parent
5005 * ire is returned via both ire and
5006 * pire.
5007 */
5008 IRE_REFHOLD(save_ire);
5009 }
5010 ire = save_ire;
5011 } else {
5012 ire_refrele(save_ire);
5013 if (pire != NULL)
5014 *pire = NULL;
5015 }
5016 return (ire);
5017 }
5018 if (ire->ire_type & (IRE_CACHETABLE | IRE_INTERFACE)) {
5019 /*
5020 * If the caller did not ask for pire, release
5021 * it now.
5022 */
5023 if (pire == NULL) {
5024 ire_refrele(save_ire);
5025 }
5026 return (ire);
5027 }
5028 match_flags |= MATCH_IRE_TYPE;
5029 gw_addr = ire->ire_gateway_addr;
5030 gw_ipif = ire->ire_ipif;
5031 ire_refrele(ire);
5032 ire = ire_route_lookup(gw_addr, 0, 0,
5033 (IRE_CACHETABLE | IRE_INTERFACE), gw_ipif, NULL, zoneid,
5034 tsl, match_flags);
5035 if (ire == NULL) {
5036 /*
5037 * Do not release the parent ire if MATCH_IRE_PARENT
5038 * is set. Also return it via ire.
5039 */
5040 if (flags & MATCH_IRE_PARENT) {
5041 if (pire != NULL) {
5042 /*
5043 * Need an extra REFHOLD, if the
5044 * parent ire is returned via both
5045 * ire and pire.
5046 */
5047 IRE_REFHOLD(save_ire);
5048 }
5049 ire = save_ire;
5050 } else {
5051 ire_refrele(save_ire);
5052 if (pire != NULL)
5053 *pire = NULL;
5054 }
5055 return (ire);
5056 } else if (pire == NULL) {
5057 /*
5058 * If the caller did not ask for pire, release
5059 * it now.
5060 */
5061 ire_refrele(save_ire);
5062 }
5063 return (ire);
5064 }
5065 ASSERT(pire == NULL || *pire == NULL);
5066 return (ire);
5067}
5068
5069/*
5070 * Delete the IRE cache for the gateway and all IRE caches whose
5071 * ire_gateway_addr points to this gateway, and allow them to
5072 * be created on demand by ip_newroute.
5073 */
5074void
5075ire_clookup_delete_cache_gw(ipaddr_t addr, zoneid_t zoneid)
5076{

--- 182 unchanged lines hidden (view full) ---

5259
5260 match_flags |= MATCH_IRE_IHANDLE;
5261 ire = ire_ftable_lookup(gw_addr, 0, 0, IRE_INTERFACE,
5262 gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, NULL, match_flags);
5263 return (ire);
5264}
5265
5266/*
4696/*
4697 * Delete the IRE cache for the gateway and all IRE caches whose
4698 * ire_gateway_addr points to this gateway, and allow them to
4699 * be created on demand by ip_newroute.
4700 */
4701void
4702ire_clookup_delete_cache_gw(ipaddr_t addr, zoneid_t zoneid)
4703{

--- 182 unchanged lines hidden (view full) ---

4886
4887 match_flags |= MATCH_IRE_IHANDLE;
4888 ire = ire_ftable_lookup(gw_addr, 0, 0, IRE_INTERFACE,
4889 gw_ipif, NULL, ALL_ZONES, cire->ire_ihandle, NULL, match_flags);
4890 return (ire);
4891}
4892
4893/*
5267 * Locate the interface ire that is tied to the cache ire 'cire' via
5268 * cire->ire_ihandle.
5269 *
5270 * We are trying to create the cache ire for an onlink destn. or
5271 * gateway in 'cire'. We are called from ire_add_v4() in the IRE_IF_RESOLVER
5272 * case, after the ire has come back from ARP.
5273 */
5274ire_t *
5275ire_ihandle_lookup_onlink(ire_t *cire)
5276{
5277 ire_t *ire;
5278 int match_flags;
5279 int i;
5280 int j;
5281 irb_t *irb_ptr;
5282
5283 ASSERT(cire != NULL);
5284
5285 /*
5286 * We don't need to specify the zoneid to ire_ftable_lookup() below
5287 * because the ihandle refers to an ipif which can be in only one zone.
5288 */
5289 match_flags = MATCH_IRE_TYPE | MATCH_IRE_IHANDLE | MATCH_IRE_MASK;
5290 /*
5291 * We know that the mask of the interface ire equals cire->ire_cmask.
5292 * (When ip_newroute() created 'cire' for an on-link destn. it set its
5293 * cmask from the interface ire's mask)
5294 */
5295 ire = ire_ftable_lookup(cire->ire_addr, cire->ire_cmask, 0,
5296 IRE_INTERFACE, NULL, NULL, ALL_ZONES, cire->ire_ihandle,
5297 NULL, match_flags);
5298 if (ire != NULL)
5299 return (ire);
5300 /*
5301 * If we didn't find an interface ire above, we can't declare failure.
5302 * For backwards compatibility, we need to support prefix routes
5303 * pointing to next hop gateways that are not on-link.
5304 *
5305 * In the resolver/noresolver case, ip_newroute() thinks it is creating
5306 * the cache ire for an onlink destination in 'cire'. But 'cire' is
5307 * not actually onlink, because ire_ftable_lookup() cheated it, by
5308 * doing ire_route_lookup() twice and returning an interface ire.
5309 *
5310 * Eg. default - gw1 (line 1)
5311 * gw1 - gw2 (line 2)
5312 * gw2 - hme0 (line 3)
5313 *
5314 * In the above example, ip_newroute() tried to create the cache ire
5315 * 'cire' for gw1, based on the interface route in line 3. The
5316 * ire_ftable_lookup() above fails, because there is no interface route
5317 * to reach gw1. (it is gw2). We fall thru below.
5318 *
5319 * Do a brute force search based on the ihandle in a subset of the
5320 * forwarding tables, corresponding to cire->ire_cmask. Otherwise
5321 * things become very complex, since we don't have 'pire' in this
5322 * case. (Also note that this method is not possible in the offlink
5323 * case because we don't know the mask)
5324 */
5325 i = ip_mask_to_plen(cire->ire_cmask);
5326 if ((ip_forwarding_table[i]) == NULL)
5327 return (NULL);
5328 for (j = 0; j < ip_ftable_hash_size; j++) {
5329 irb_ptr = &ip_forwarding_table[i][j];
5330 rw_enter(&irb_ptr->irb_lock, RW_READER);
5331 for (ire = irb_ptr->irb_ire; ire != NULL;
5332 ire = ire->ire_next) {
5333 if (ire->ire_marks & IRE_MARK_CONDEMNED)
5334 continue;
5335 if ((ire->ire_type & IRE_INTERFACE) &&
5336 (ire->ire_ihandle == cire->ire_ihandle)) {
5337 IRE_REFHOLD(ire);
5338 rw_exit(&irb_ptr->irb_lock);
5339 return (ire);
5340 }
5341 }
5342 rw_exit(&irb_ptr->irb_lock);
5343 }
5344 return (NULL);
5345}
5346
5347/*
5348 * ire_mrtun_lookup() is called by ip_rput() when packet is to be
5349 * tunneled through reverse tunnel. This is only supported for
5350 * IPv4 packets
5351 */
5352
5353ire_t *
5354ire_mrtun_lookup(ipaddr_t srcaddr, ill_t *ill)
5355{

--- 186 unchanged lines hidden (view full) ---

5542ip_ire_init()
5543{
5544 int i;
5545
5546 mutex_init(&ire_ft_init_lock, NULL, MUTEX_DEFAULT, 0);
5547 mutex_init(&ire_handle_lock, NULL, MUTEX_DEFAULT, NULL);
5548 mutex_init(&ire_mrtun_lock, NULL, MUTEX_DEFAULT, NULL);
5549 mutex_init(&ire_srcif_table_lock, NULL, MUTEX_DEFAULT, NULL);
4894 * ire_mrtun_lookup() is called by ip_rput() when packet is to be
4895 * tunneled through reverse tunnel. This is only supported for
4896 * IPv4 packets
4897 */
4898
4899ire_t *
4900ire_mrtun_lookup(ipaddr_t srcaddr, ill_t *ill)
4901{

--- 186 unchanged lines hidden (view full) ---

5088ip_ire_init()
5089{
5090 int i;
5091
5092 mutex_init(&ire_ft_init_lock, NULL, MUTEX_DEFAULT, 0);
5093 mutex_init(&ire_handle_lock, NULL, MUTEX_DEFAULT, NULL);
5094 mutex_init(&ire_mrtun_lock, NULL, MUTEX_DEFAULT, NULL);
5095 mutex_init(&ire_srcif_table_lock, NULL, MUTEX_DEFAULT, NULL);
5096 mutex_init(&ndp4.ndp_g_lock, NULL, MUTEX_DEFAULT, NULL);
5550
5097
5098 rn_init();
5099 (void) rn_inithead((void **)&ip_ftable, 32);
5100 /*
5101 * mark kernel ip ftable with RNF_SUNW_FT flag.
5102 */
5103 ip_ftable->rnh_treetop->rn_flags |= RNF_SUNW_FT;
5104 rt_entry_cache = kmem_cache_create("rt_entry",
5105 sizeof (struct rt_entry), 0, NULL, NULL, NULL, NULL, NULL, 0);
5106
5551 /* Calculate the IPv4 cache table size. */
5552 ip_cache_table_size = MAX(ip_cache_table_size,
5553 ((kmem_avail() >> ip_ire_mem_ratio) / sizeof (ire_t) /
5554 ip_ire_max_bucket_cnt));
5555 if (ip_cache_table_size > ip_max_cache_table_size)
5556 ip_cache_table_size = ip_max_cache_table_size;
5557 /*
5558 * Make sure that the table size is always a power of 2. The

--- 43 unchanged lines hidden (view full) ---

5602 * populated by ip_rt_add if reverse tunnel is created
5603 */
5604 ip_mrtun_table = NULL;
5605
5606 /*
5607 * Make sure that the forwarding table size is a power of 2.
5608 * The IRE*_ADDR_HASH() macroes depend on that.
5609 */
5107 /* Calculate the IPv4 cache table size. */
5108 ip_cache_table_size = MAX(ip_cache_table_size,
5109 ((kmem_avail() >> ip_ire_mem_ratio) / sizeof (ire_t) /
5110 ip_ire_max_bucket_cnt));
5111 if (ip_cache_table_size > ip_max_cache_table_size)
5112 ip_cache_table_size = ip_max_cache_table_size;
5113 /*
5114 * Make sure that the table size is always a power of 2. The

--- 43 unchanged lines hidden (view full) ---

5158 * populated by ip_rt_add if reverse tunnel is created
5159 */
5160 ip_mrtun_table = NULL;
5161
5162 /*
5163 * Make sure that the forwarding table size is a power of 2.
5164 * The IRE*_ADDR_HASH() macroes depend on that.
5165 */
5610 power2_roundup(&ip_ftable_hash_size);
5611 power2_roundup(&ip6_ftable_hash_size);
5612}
5613
5614void
5615ip_ire_fini()
5616{
5617 int i;
5618
5619 mutex_destroy(&ire_ft_init_lock);
5620 mutex_destroy(&ire_handle_lock);
5166 power2_roundup(&ip6_ftable_hash_size);
5167}
5168
5169void
5170ip_ire_fini()
5171{
5172 int i;
5173
5174 mutex_destroy(&ire_ft_init_lock);
5175 mutex_destroy(&ire_handle_lock);
5176 mutex_destroy(&ndp4.ndp_g_lock);
5621
5177
5178 rn_fini();
5179 RADIX_NODE_HEAD_DESTROY(ip_ftable);
5180 kmem_cache_destroy(rt_entry_cache);
5181
5622 for (i = 0; i < ip_cache_table_size; i++) {
5623 rw_destroy(&ip_cache_table[i].irb_lock);
5624 }
5625 kmem_free(ip_cache_table, ip_cache_table_size * sizeof (irb_t));
5626
5627 for (i = 0; i < ip6_cache_table_size; i++) {
5628 rw_destroy(&ip_cache_table_v6[i].irb_lock);
5629 }

--- 91 unchanged lines hidden (view full) ---

5721 return (0);
5722 }
5723 }
5724
5725 /* Atomically set the ire_max_frag */
5726 max_frag = *ire->ire_max_fragp;
5727 ire->ire_max_fragp = NULL;
5728 ire->ire_max_frag = MIN(max_frag, IP_MAXPACKET);
5182 for (i = 0; i < ip_cache_table_size; i++) {
5183 rw_destroy(&ip_cache_table[i].irb_lock);
5184 }
5185 kmem_free(ip_cache_table, ip_cache_table_size * sizeof (irb_t));
5186
5187 for (i = 0; i < ip6_cache_table_size; i++) {
5188 rw_destroy(&ip_cache_table_v6[i].irb_lock);
5189 }

--- 91 unchanged lines hidden (view full) ---

5281 return (0);
5282 }
5283 }
5284
5285 /* Atomically set the ire_max_frag */
5286 max_frag = *ire->ire_max_fragp;
5287 ire->ire_max_fragp = NULL;
5288 ire->ire_max_frag = MIN(max_frag, IP_MAXPACKET);
5729
5289 ASSERT(ire->ire_type != IRE_CACHE);
5730 irep = (ire_t **)irb_ptr;
5731 if (*irep != NULL) {
5732 /* Find the last ire which matches ire_in_src_addr */
5733 ire1 = *irep;
5734 while (ire1->ire_in_src_addr == ire->ire_in_src_addr) {
5735 irep = &ire1->ire_next;
5736 ire1 = *irep;
5737 if (ire1 == NULL)

--- 170 unchanged lines hidden (view full) ---

5908
5909 ire = *ire_p;
5910 ASSERT(ire->ire_in_ill != NULL);
5911 ASSERT(ire->ire_ipversion == IPV4_VERSION);
5912 ASSERT(ire->ire_type == IRE_IF_NORESOLVER ||
5913 ire->ire_type == IRE_IF_RESOLVER);
5914
5915 ire->ire_mask = IP_HOST_MASK;
5290 irep = (ire_t **)irb_ptr;
5291 if (*irep != NULL) {
5292 /* Find the last ire which matches ire_in_src_addr */
5293 ire1 = *irep;
5294 while (ire1->ire_in_src_addr == ire->ire_in_src_addr) {
5295 irep = &ire1->ire_next;
5296 ire1 = *irep;
5297 if (ire1 == NULL)

--- 170 unchanged lines hidden (view full) ---

5468
5469 ire = *ire_p;
5470 ASSERT(ire->ire_in_ill != NULL);
5471 ASSERT(ire->ire_ipversion == IPV4_VERSION);
5472 ASSERT(ire->ire_type == IRE_IF_NORESOLVER ||
5473 ire->ire_type == IRE_IF_RESOLVER);
5474
5475 ire->ire_mask = IP_HOST_MASK;
5916 /* Update ire_dlureq_mp with NULL value upon creation */
5476 /*
5477 * Update ire_nce->nce_res_mp with NULL value upon creation;
5478 * first free the default res_mp created by ire_nce_init.
5479 */
5480 freeb(ire->ire_nce->nce_res_mp);
5917 if (ire->ire_type == IRE_IF_RESOLVER) {
5918 /*
5919 * assign NULL now, it will be updated
5920 * with correct value upon returning from
5921 * ARP
5922 */
5481 if (ire->ire_type == IRE_IF_RESOLVER) {
5482 /*
5483 * assign NULL now, it will be updated
5484 * with correct value upon returning from
5485 * ARP
5486 */
5923 ire->ire_dlureq_mp = NULL;
5487 ire->ire_nce->nce_res_mp = NULL;
5924 } else {
5488 } else {
5925 ire->ire_dlureq_mp = ill_dlur_gen(NULL,
5489 ire->ire_nce->nce_res_mp = ill_dlur_gen(NULL,
5926 ire->ire_ipif->ipif_ill->ill_phys_addr_length,
5927 ire->ire_ipif->ipif_ill->ill_sap,
5928 ire->ire_ipif->ipif_ill->ill_sap_length);
5929 }
5930 /* Make sure the address is properly masked. */
5931 ire->ire_addr &= ire->ire_mask;
5932
5933 ASSERT(ire->ire_max_fragp != NULL);

--- 133 unchanged lines hidden (view full) ---

6067 int error;
6068
6069 ASSERT(ire->ire_type != IRE_MIPRTUN &&
6070 ire->ire_ipif->ipif_net_type == IRE_IF_RESOLVER);
6071 ASSERT(ire->ire_ipversion == IPV4_VERSION);
6072
6073 /*
6074 * This ire is from ARP. Update
5490 ire->ire_ipif->ipif_ill->ill_phys_addr_length,
5491 ire->ire_ipif->ipif_ill->ill_sap,
5492 ire->ire_ipif->ipif_ill->ill_sap_length);
5493 }
5494 /* Make sure the address is properly masked. */
5495 ire->ire_addr &= ire->ire_mask;
5496
5497 ASSERT(ire->ire_max_fragp != NULL);

--- 133 unchanged lines hidden (view full) ---

5631 int error;
5632
5633 ASSERT(ire->ire_type != IRE_MIPRTUN &&
5634 ire->ire_ipif->ipif_net_type == IRE_IF_RESOLVER);
5635 ASSERT(ire->ire_ipversion == IPV4_VERSION);
5636
5637 /*
5638 * This ire is from ARP. Update
6075 * ire_dlureq_mp info
5639 * ire_nce->nce_res_mp info
6076 */
6077 ire1 = ire_srcif_table_lookup(ire->ire_addr,
6078 IRE_IF_RESOLVER, ire->ire_ipif,
6079 ire->ire_in_ill,
6080 MATCH_IRE_ILL | MATCH_IRE_TYPE);
6081 if (ire1 == NULL) {
6082 /* Mobile node registration expired ? */
6083 ire_delete(ire);

--- 18 unchanged lines hidden (view full) ---

6102 return (NULL);
6103 }
6104 ASSERT(ire->ire_max_fragp == NULL);
6105 ire->ire_max_frag = ire1->ire_max_frag;
6106 /*
6107 * Update resolver information and
6108 * send-to queue.
6109 */
5640 */
5641 ire1 = ire_srcif_table_lookup(ire->ire_addr,
5642 IRE_IF_RESOLVER, ire->ire_ipif,
5643 ire->ire_in_ill,
5644 MATCH_IRE_ILL | MATCH_IRE_TYPE);
5645 if (ire1 == NULL) {
5646 /* Mobile node registration expired ? */
5647 ire_delete(ire);

--- 18 unchanged lines hidden (view full) ---

5666 return (NULL);
5667 }
5668 ASSERT(ire->ire_max_fragp == NULL);
5669 ire->ire_max_frag = ire1->ire_max_frag;
5670 /*
5671 * Update resolver information and
5672 * send-to queue.
5673 */
6110 ASSERT(ire->ire_dlureq_mp != NULL);
6111 ire1->ire_dlureq_mp = copyb(ire->ire_dlureq_mp);
6112 if (ire1->ire_dlureq_mp == NULL) {
5674 ASSERT(ire->ire_nce->nce_res_mp != NULL);
5675 ire1->ire_nce->nce_res_mp = copyb(ire->ire_nce->nce_res_mp);
5676 if (ire1->ire_nce->nce_res_mp == NULL) {
6113 ip0dbg(("ire_update_srcif: copyb failed\n"));
6114 ire_refrele(ire1);
6115 ire_refrele(ire);
6116 ire_atomic_end(irb, ire1);
6117 return (NULL);
6118 }
6119 ire1->ire_stq = ire->ire_stq;
6120
5677 ip0dbg(("ire_update_srcif: copyb failed\n"));
5678 ire_refrele(ire1);
5679 ire_refrele(ire);
5680 ire_atomic_end(irb, ire1);
5681 return (NULL);
5682 }
5683 ire1->ire_stq = ire->ire_stq;
5684
6121 ASSERT(ire->ire_fp_mp == NULL);
5685 ASSERT(ire->ire_nce->nce_fp_mp == NULL);
6122
6123 ire_atomic_end(irb, ire1);
6124 ire_refrele(ire1);
6125 /* Return the passed ire */
6126 return (ire); /* Update done */
6127}
6128
6129

--- 290 unchanged lines hidden (view full) ---

6420 * give the top priority to this ire and exit the
6421 * loop.
6422 * This is typically the case when an ARP reply
6423 * is processed through ip_wput_nondata().
6424 */
6425 if ((flags & MULTIRT_CACHEGW) &&
6426 (gw_ire != NULL) &&
6427 (gw_ire->ire_type & IRE_CACHETABLE)) {
5686
5687 ire_atomic_end(irb, ire1);
5688 ire_refrele(ire1);
5689 /* Return the passed ire */
5690 return (ire); /* Update done */
5691}
5692
5693

--- 290 unchanged lines hidden (view full) ---

5984 * give the top priority to this ire and exit the
5985 * loop.
5986 * This is typically the case when an ARP reply
5987 * is processed through ip_wput_nondata().
5988 */
5989 if ((flags & MULTIRT_CACHEGW) &&
5990 (gw_ire != NULL) &&
5991 (gw_ire->ire_type & IRE_CACHETABLE)) {
5992 ASSERT(gw_ire->ire_nce == NULL ||
5993 gw_ire->ire_nce->nce_state == ND_REACHABLE);
6428 /*
6429 * Release the resolver associated to the
6430 * previous candidate best ire, if any.
6431 */
6432 if (best_cire != NULL) {
6433 ire_refrele(best_cire);
6434 ASSERT(best_fire != NULL);
6435 }

--- 110 unchanged lines hidden (view full) ---

6546 NULL, NULL, ALL_ZONES, 0, tsl,
6547 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE |
6548 MATCH_IRE_SECATTR);
6549
6550 /* No resolver for the gateway; we skip this ire. */
6551 if (gw_ire == NULL) {
6552 continue;
6553 }
5994 /*
5995 * Release the resolver associated to the
5996 * previous candidate best ire, if any.
5997 */
5998 if (best_cire != NULL) {
5999 ire_refrele(best_cire);
6000 ASSERT(best_fire != NULL);
6001 }

--- 110 unchanged lines hidden (view full) ---

6112 NULL, NULL, ALL_ZONES, 0, tsl,
6113 MATCH_IRE_RECURSIVE | MATCH_IRE_TYPE |
6114 MATCH_IRE_SECATTR);
6115
6116 /* No resolver for the gateway; we skip this ire. */
6117 if (gw_ire == NULL) {
6118 continue;
6119 }
6120 ASSERT(gw_ire->ire_nce == NULL ||
6121 gw_ire->ire_nce->nce_state == ND_REACHABLE);
6554
6555 if (first_cire != NULL) {
6556
6557 IRB_REFHOLD(cirb);
6558 /*
6559 * For all IRE_CACHE ires for that
6560 * destination.
6561 */

--- 119 unchanged lines hidden (view full) ---

6681 "*ire_arg %p\n",
6682 (void *)*fire_arg, (void *)*ire_arg));
6683
6684 /* No resolvable route. */
6685 return (B_FALSE);
6686}
6687
6688/*
6122
6123 if (first_cire != NULL) {
6124
6125 IRB_REFHOLD(cirb);
6126 /*
6127 * For all IRE_CACHE ires for that
6128 * destination.
6129 */

--- 119 unchanged lines hidden (view full) ---

6249 "*ire_arg %p\n",
6250 (void *)*fire_arg, (void *)*ire_arg));
6251
6252 /* No resolvable route. */
6253 return (B_FALSE);
6254}
6255
6256/*
6689 * Find an IRE_OFFSUBNET IRE entry for the multicast address 'group'
6690 * that goes through 'ipif'. As a fallback, a route that goes through
6691 * ipif->ipif_ill can be returned.
6692 */
6693ire_t *
6694ipif_lookup_multi_ire(ipif_t *ipif, ipaddr_t group)
6695{
6696 ire_t *ire;
6697 ire_t *save_ire = NULL;
6698 ire_t *gw_ire;
6699 irb_t *irb;
6700 ipaddr_t gw_addr;
6701 int match_flags = MATCH_IRE_TYPE | MATCH_IRE_ILL;
6702
6703 ASSERT(CLASSD(group));
6704
6705 ire = ire_ftable_lookup(group, 0, 0, 0, NULL, NULL, ALL_ZONES, 0,
6706 NULL, MATCH_IRE_DEFAULT);
6707
6708 if (ire == NULL)
6709 return (NULL);
6710
6711 irb = ire->ire_bucket;
6712 ASSERT(irb);
6713
6714 IRB_REFHOLD(irb);
6715 ire_refrele(ire);
6716 for (ire = irb->irb_ire; ire != NULL; ire = ire->ire_next) {
6717 if (ire->ire_addr != group ||
6718 (ipif->ipif_zoneid != ire->ire_zoneid &&
6719 ire->ire_zoneid != ALL_ZONES)) {
6720 continue;
6721 }
6722
6723 switch (ire->ire_type) {
6724 case IRE_DEFAULT:
6725 case IRE_PREFIX:
6726 case IRE_HOST:
6727 gw_addr = ire->ire_gateway_addr;
6728 gw_ire = ire_ftable_lookup(gw_addr, 0, 0, IRE_INTERFACE,
6729 ipif, NULL, ALL_ZONES, 0, NULL, match_flags);
6730
6731 if (gw_ire != NULL) {
6732 if (save_ire != NULL) {
6733 ire_refrele(save_ire);
6734 }
6735 IRE_REFHOLD(ire);
6736 if (gw_ire->ire_ipif == ipif) {
6737 ire_refrele(gw_ire);
6738
6739 IRB_REFRELE(irb);
6740 return (ire);
6741 }
6742 ire_refrele(gw_ire);
6743 save_ire = ire;
6744 }
6745 break;
6746 case IRE_IF_NORESOLVER:
6747 case IRE_IF_RESOLVER:
6748 if (ire->ire_ipif == ipif) {
6749 if (save_ire != NULL) {
6750 ire_refrele(save_ire);
6751 }
6752 IRE_REFHOLD(ire);
6753
6754 IRB_REFRELE(irb);
6755 return (ire);
6756 }
6757 break;
6758 }
6759 }
6760 IRB_REFRELE(irb);
6761
6762 return (save_ire);
6763}
6764
6765/*
6766 * The purpose of the next two functions is to provide some external access to
6767 * routing/l2 lookup functionality while hiding the implementation of routing
6768 * and interface data structures (IRE/ILL). Thus, interfaces are passed/
6769 * returned by name instead of by ILL reference. These functions are used by
6770 * IP Filter.
6771 * Return a link layer header suitable for an IP packet being sent to the
6772 * dst_addr IP address. The interface associated with the route is put into
6773 * ifname, which must be a buffer of LIFNAMSIZ bytes. The dst_addr is the
6774 * packet's ultimate destination address, not a router address.
6775 *
6776 * This function is used when the caller wants to know the outbound interface
6777 * and MAC header for a packet given only the address.
6778 */
6779mblk_t *
6780ip_nexthop_route(const struct sockaddr *target, char *ifname)
6781{
6782 struct nce_s *nce;
6783 ire_t *dir;
6784 ill_t *ill;
6257 * The purpose of the next two functions is to provide some external access to
6258 * routing/l2 lookup functionality while hiding the implementation of routing
6259 * and interface data structures (IRE/ILL). Thus, interfaces are passed/
6260 * returned by name instead of by ILL reference. These functions are used by
6261 * IP Filter.
6262 * Return a link layer header suitable for an IP packet being sent to the
6263 * dst_addr IP address. The interface associated with the route is put into
6264 * ifname, which must be a buffer of LIFNAMSIZ bytes. The dst_addr is the
6265 * packet's ultimate destination address, not a router address.
6266 *
6267 * This function is used when the caller wants to know the outbound interface
6268 * and MAC header for a packet given only the address.
6269 */
6270mblk_t *
6271ip_nexthop_route(const struct sockaddr *target, char *ifname)
6272{
6273 struct nce_s *nce;
6274 ire_t *dir;
6275 ill_t *ill;
6785 mblk_t *mp;
6276 mblk_t *mp, *tmp_mp;
6786
6787 /* parameter sanity */
6788 if (ifname == NULL || target == NULL)
6789 return (NULL);
6790
6791 /* Find the route entry, if it exists. */
6792 switch (target->sa_family) {
6793 case AF_INET:

--- 14 unchanged lines hidden (view full) ---

6808 dir = NULL;
6809 }
6810 break;
6811 default:
6812 dir = NULL;
6813 break;
6814 }
6815
6277
6278 /* parameter sanity */
6279 if (ifname == NULL || target == NULL)
6280 return (NULL);
6281
6282 /* Find the route entry, if it exists. */
6283 switch (target->sa_family) {
6284 case AF_INET:

--- 14 unchanged lines hidden (view full) ---

6299 dir = NULL;
6300 }
6301 break;
6302 default:
6303 dir = NULL;
6304 break;
6305 }
6306
6816
6817 if (dir == NULL)
6307 if (dir == NULL) {
6818 return (NULL);
6308 return (NULL);
6309 }
6819
6820 /* Map the IRE to an ILL so we can fill in ifname. */
6821 ill = ire_to_ill(dir);
6822 if (ill == NULL) {
6823 ire_refrele(dir);
6824 return (NULL);
6825 }
6826 (void) strncpy(ifname, ill->ill_name, LIFNAMSIZ);
6827
6310
6311 /* Map the IRE to an ILL so we can fill in ifname. */
6312 ill = ire_to_ill(dir);
6313 if (ill == NULL) {
6314 ire_refrele(dir);
6315 return (NULL);
6316 }
6317 (void) strncpy(ifname, ill->ill_name, LIFNAMSIZ);
6318
6319 if ((dir->ire_type & (IRE_CACHE|IRE_BROADCAST)) == 0) {
6320 mp = copyb(ill->ill_resolver_mp);
6321 ire_refrele(dir);
6322 return (mp);
6323 }
6324
6828 /* Return a copy of the header to the caller. */
6829 switch (target->sa_family) {
6830 case AF_INET :
6325 /* Return a copy of the header to the caller. */
6326 switch (target->sa_family) {
6327 case AF_INET :
6831 if (dir->ire_fp_mp != NULL) {
6832 if ((mp = dupb(dir->ire_fp_mp)) == NULL)
6833 mp = copyb(dir->ire_fp_mp);
6834 } else if (dir->ire_dlureq_mp != NULL) {
6835 if ((mp = dupb(dir->ire_dlureq_mp)) == NULL)
6836 mp = copyb(dir->ire_dlureq_mp);
6328 if (dir->ire_nce != NULL &&
6329 dir->ire_nce->nce_state == ND_REACHABLE) {
6330 if (dir->ire_nce->nce_fp_mp != NULL)
6331 tmp_mp = dir->ire_nce->nce_fp_mp;
6332 else
6333 tmp_mp = dir->ire_nce->nce_fp_mp;
6334 if ((mp = dupb(tmp_mp)) == NULL)
6335 mp = copyb(tmp_mp);
6837 } else {
6336 } else {
6838 mp = NULL;
6337 mp = copyb(ill->ill_resolver_mp);
6839 }
6840 break;
6841 case AF_INET6 :
6842 nce = dir->ire_nce;
6843 if (nce->nce_fp_mp != NULL) {
6844 if ((mp = dupb(nce->nce_fp_mp)) == NULL)
6845 mp = copyb(nce->nce_fp_mp);
6846 } else if (nce->nce_res_mp != NULL) {

--- 76 unchanged lines hidden (view full) ---

6923 switch (target->sa_family) {
6924 case AF_INET:
6925 dir = ire_route_lookup(
6926 ((struct sockaddr_in *)target)->sin_addr.s_addr,
6927 0xffffffff,
6928 0, 0, ill->ill_ipif, NULL, ALL_ZONES, NULL,
6929 MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|
6930 MATCH_IRE_RECURSIVE|MATCH_IRE_IPIF);
6338 }
6339 break;
6340 case AF_INET6 :
6341 nce = dir->ire_nce;
6342 if (nce->nce_fp_mp != NULL) {
6343 if ((mp = dupb(nce->nce_fp_mp)) == NULL)
6344 mp = copyb(nce->nce_fp_mp);
6345 } else if (nce->nce_res_mp != NULL) {

--- 76 unchanged lines hidden (view full) ---

6422 switch (target->sa_family) {
6423 case AF_INET:
6424 dir = ire_route_lookup(
6425 ((struct sockaddr_in *)target)->sin_addr.s_addr,
6426 0xffffffff,
6427 0, 0, ill->ill_ipif, NULL, ALL_ZONES, NULL,
6428 MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|
6429 MATCH_IRE_RECURSIVE|MATCH_IRE_IPIF);
6430 if ((dir != NULL) && dir->ire_nce != NULL &&
6431 dir->ire_nce->nce_state != ND_REACHABLE) {
6432 ire_refrele(dir);
6433 dir = NULL;
6434 }
6931 break;
6932 case AF_INET6:
6933 dir = ire_route_lookup_v6(
6934 &((struct sockaddr_in6 *)target)->sin6_addr, NULL,
6935 0, 0, ill->ill_ipif, NULL, ALL_ZONES, NULL,
6936 MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|
6937 MATCH_IRE_RECURSIVE|MATCH_IRE_IPIF);
6938 if ((dir != NULL) && (dir->ire_nce == NULL)) {
6939 ire_refrele(dir);
6940 dir = NULL;
6941 }
6942 break;
6943 default:
6944 dir = NULL;
6945 break;
6946 }
6947
6435 break;
6436 case AF_INET6:
6437 dir = ire_route_lookup_v6(
6438 &((struct sockaddr_in6 *)target)->sin6_addr, NULL,
6439 0, 0, ill->ill_ipif, NULL, ALL_ZONES, NULL,
6440 MATCH_IRE_DSTONLY|MATCH_IRE_DEFAULT|
6441 MATCH_IRE_RECURSIVE|MATCH_IRE_IPIF);
6442 if ((dir != NULL) && (dir->ire_nce == NULL)) {
6443 ire_refrele(dir);
6444 dir = NULL;
6445 }
6446 break;
6447 default:
6448 dir = NULL;
6449 break;
6450 }
6451
6948 ill_refrele(ill);
6949
6950 if (dir == NULL)
6452 if (dir == NULL) {
6951 return (NULL);
6453 return (NULL);
6454 }
6952
6455
6456 if ((dir->ire_type & (IRE_CACHE|IRE_BROADCAST)) == 0) {
6457 mp = copyb(ill->ill_resolver_mp);
6458 ill_refrele(ill);
6459 ire_refrele(dir);
6460 return (mp);
6461 }
6462
6953 /* Return a copy of the header to the caller. */
6954 switch (target->sa_family) {
6955 case AF_INET :
6463 /* Return a copy of the header to the caller. */
6464 switch (target->sa_family) {
6465 case AF_INET :
6956 if (dir->ire_fp_mp != NULL) {
6957 if ((mp = dupb(dir->ire_fp_mp)) == NULL)
6958 mp = copyb(dir->ire_fp_mp);
6959 } else if (dir->ire_dlureq_mp != NULL) {
6960 if ((mp = dupb(dir->ire_dlureq_mp)) == NULL)
6961 mp = copyb(dir->ire_dlureq_mp);
6466 if (dir->ire_nce->nce_fp_mp != NULL) {
6467 if ((mp = dupb(dir->ire_nce->nce_fp_mp)) == NULL)
6468 mp = copyb(dir->ire_nce->nce_fp_mp);
6469 } else if (dir->ire_nce->nce_res_mp != NULL) {
6470 if ((mp = dupb(dir->ire_nce->nce_res_mp)) == NULL)
6471 mp = copyb(dir->ire_nce->nce_res_mp);
6962 } else {
6472 } else {
6963 mp = NULL;
6473 mp = copyb(ill->ill_resolver_mp);
6964 }
6965 break;
6966 case AF_INET6 :
6967 nce = dir->ire_nce;
6968 if (nce->nce_fp_mp != NULL) {
6969 if ((mp = dupb(nce->nce_fp_mp)) == NULL)
6970 mp = copyb(nce->nce_fp_mp);
6971 } else if (nce->nce_res_mp != NULL) {
6972 if ((mp = dupb(nce->nce_res_mp)) == NULL)
6973 mp = copyb(nce->nce_res_mp);
6974 } else {
6975 mp = NULL;
6976 }
6977 break;
6978 }
6979
6980 ire_refrele(dir);
6474 }
6475 break;
6476 case AF_INET6 :
6477 nce = dir->ire_nce;
6478 if (nce->nce_fp_mp != NULL) {
6479 if ((mp = dupb(nce->nce_fp_mp)) == NULL)
6480 mp = copyb(nce->nce_fp_mp);
6481 } else if (nce->nce_res_mp != NULL) {
6482 if ((mp = dupb(nce->nce_res_mp)) == NULL)
6483 mp = copyb(nce->nce_res_mp);
6484 } else {
6485 mp = NULL;
6486 }
6487 break;
6488 }
6489
6490 ire_refrele(dir);
6491 ill_refrele(ill);
6981 return (mp);
6982}
6983
6984/*
6985 * IRE iterator for inbound and loopback broadcast processing.
6986 * Given an IRE_BROADCAST ire, walk the ires with the same destination
6987 * address, but skip over the passed-in ire. Returns the next ire without
6988 * a hold - assumes that the caller holds a reference on the IRE bucket.

--- 41 unchanged lines hidden (view full) ---

7030 /* skip over deleted IREs */
7031 continue;
7032 }
7033 return (curr);
7034 }
7035 return (NULL);
7036}
7037
6492 return (mp);
6493}
6494
6495/*
6496 * IRE iterator for inbound and loopback broadcast processing.
6497 * Given an IRE_BROADCAST ire, walk the ires with the same destination
6498 * address, but skip over the passed-in ire. Returns the next ire without
6499 * a hold - assumes that the caller holds a reference on the IRE bucket.

--- 41 unchanged lines hidden (view full) ---

6541 /* skip over deleted IREs */
6542 continue;
6543 }
6544 return (curr);
6545 }
6546 return (NULL);
6547}
6548
7038/*
7039 * IRE iterator used by ire_ftable_lookup[_v6]() to process multiple default
7040 * routes. Given a starting point in the hash list (ire_origin), walk the IREs
7041 * in the bucket skipping default interface routes and deleted entries.
7042 * Returns the next IRE (unheld), or NULL when we're back to the starting point.
7043 * Assumes that the caller holds a reference on the IRE bucket.
7044 */
7045ire_t *
7046ire_get_next_default_ire(ire_t *ire, ire_t *ire_origin)
7047{
7048 ASSERT(ire_origin->ire_bucket != NULL);
7049 ASSERT(ire != NULL);
7050
7051 do {
7052 ire = ire->ire_next;
7053 if (ire == NULL)
7054 ire = ire_origin->ire_bucket->irb_ire;
7055 if (ire == ire_origin)
7056 return (NULL);
7057 } while ((ire->ire_type & IRE_INTERFACE) ||
7058 (ire->ire_marks & IRE_MARK_CONDEMNED));
7059 ASSERT(ire != NULL);
7060 return (ire);
7061}
7062
7063#ifdef IRE_DEBUG
7064th_trace_t *
7065th_trace_ire_lookup(ire_t *ire)
7066{
7067 int bucket_id;
7068 th_trace_t *th_trace;
7069
7070 ASSERT(MUTEX_HELD(&ire->ire_lock));

--- 125 unchanged lines hidden (view full) ---

7196 }
7197 ASSERT(th_trace->th_refcnt == 0);
7198
7199 ire_trace_free(th_trace);
7200 mutex_exit(&ire->ire_lock);
7201}
7202
7203#endif
6549#ifdef IRE_DEBUG
6550th_trace_t *
6551th_trace_ire_lookup(ire_t *ire)
6552{
6553 int bucket_id;
6554 th_trace_t *th_trace;
6555
6556 ASSERT(MUTEX_HELD(&ire->ire_lock));

--- 125 unchanged lines hidden (view full) ---

6682 }
6683 ASSERT(th_trace->th_refcnt == 0);
6684
6685 ire_trace_free(th_trace);
6686 mutex_exit(&ire->ire_lock);
6687}
6688
6689#endif
6690
6691/*
6692 * Generate a message chain with an arp request to resolve the in_ire.
6693 * It is assumed that in_ire itself is currently in the ire cache table,
6694 * so we create a fake_ire filled with enough information about ire_addr etc.
6695 * to retrieve in_ire when the DL_UNITDATA response from the resolver
6696 * comes back. The fake_ire itself is created by calling esballoc with
6697 * the fr_rtnp (free routine) set to ire_freemblk. This routine will be
6698 * invoked when the mblk containing fake_ire is freed.
6699 */
6700void
6701ire_arpresolve(ire_t *in_ire, ill_t *dst_ill)
6702{
6703 areq_t *areq;
6704 ipaddr_t *addrp;
6705 mblk_t *ire_mp, *dlureq_mp;
6706 ire_t *ire, *buf;
6707 size_t bufsize;
6708 frtn_t *frtnp;
6709 ill_t *ill;
6710
6711 /*
6712 * Construct message chain for the resolver
6713 * of the form:
6714 * ARP_REQ_MBLK-->IRE_MBLK
6715 *
6716 * NOTE : If the response does not
6717 * come back, ARP frees the packet. For this reason,
6718 * we can't REFHOLD the bucket of save_ire to prevent
6719 * deletions. We may not be able to REFRELE the bucket
6720 * if the response never comes back. Thus, before
6721 * adding the ire, ire_add_v4 will make sure that the
6722 * interface route does not get deleted. This is the
6723 * only case unlike ip_newroute_v6, ip_newroute_ipif_v6
6724 * where we can always prevent deletions because of
6725 * the synchronous nature of adding IRES i.e
6726 * ire_add_then_send is called after creating the IRE.
6727 */
6728
6729 /*
6730 * We use esballoc to allocate the second part(the ire_t size mblk)
6731 * of the message chain depicted above. THis mblk will be freed
6732 * by arp when there is a timeout, and otherwise passed to IP
6733 * and IP will * free it after processing the ARP response.
6734 */
6735
6736 bufsize = sizeof (ire_t) + sizeof (frtn_t);
6737 buf = kmem_alloc(bufsize, KM_NOSLEEP);
6738 if (buf == NULL) {
6739 ip1dbg(("ire_arpresolver:alloc buffer failed\n "));
6740 return;
6741 }
6742 frtnp = (frtn_t *)(buf + 1);
6743 frtnp->free_arg = (caddr_t)buf;
6744 frtnp->free_func = ire_freemblk;
6745
6746 ire_mp = esballoc((unsigned char *)buf, bufsize, BPRI_MED, frtnp);
6747
6748 if (ire_mp == NULL) {
6749 ip1dbg(("ire_arpresolve: esballoc failed\n"));
6750 kmem_free(buf, bufsize);
6751 return;
6752 }
6753 ASSERT(in_ire->ire_nce != NULL);
6754 dlureq_mp = copyb(dst_ill->ill_resolver_mp);
6755 if (dlureq_mp == NULL) {
6756 kmem_free(buf, bufsize);
6757 return;
6758 }
6759
6760 ire_mp->b_datap->db_type = IRE_ARPRESOLVE_TYPE;
6761 ire = (ire_t *)buf;
6762 /*
6763 * keep enough info in the fake ire so that we can pull up
6764 * the incomplete ire (in_ire) after result comes back from
6765 * arp and make it complete.
6766 */
6767 *ire = ire_null;
6768 ire->ire_u = in_ire->ire_u;
6769 ire->ire_ipif_seqid = in_ire->ire_ipif_seqid;
6770 ire->ire_ipif = in_ire->ire_ipif;
6771 ire->ire_stq = in_ire->ire_stq;
6772 ill = ire_to_ill(ire);
6773 ire->ire_stq_ifindex = ill->ill_phyint->phyint_ifindex;
6774 ire->ire_zoneid = in_ire->ire_zoneid;
6775 /*
6776 * ire_freemblk will be called when ire_mp is freed, both for
6777 * successful and failed arp resolution. IRE_MARK_UNCACHED will be set
6778 * when the arp resolution failed.
6779 */
6780 ire->ire_marks |= IRE_MARK_UNCACHED;
6781 ire->ire_mp = ire_mp;
6782 ire_mp->b_wptr = (uchar_t *)&ire[1];
6783 ire_mp->b_cont = NULL;
6784 ASSERT(dlureq_mp != NULL);
6785 linkb(dlureq_mp, ire_mp);
6786
6787 /*
6788 * Fill in the source and dest addrs for the resolver.
6789 * NOTE: this depends on memory layouts imposed by
6790 * ill_init().
6791 */
6792 areq = (areq_t *)dlureq_mp->b_rptr;
6793 addrp = (ipaddr_t *)((char *)areq + areq->areq_sender_addr_offset);
6794 *addrp = ire->ire_src_addr;
6795
6796 addrp = (ipaddr_t *)((char *)areq + areq->areq_target_addr_offset);
6797 if (ire->ire_gateway_addr != INADDR_ANY) {
6798 *addrp = ire->ire_gateway_addr;
6799 } else {
6800 *addrp = ire->ire_addr;
6801 }
6802
6803 /* Up to the resolver. */
6804 if (canputnext(dst_ill->ill_rq)) {
6805 putnext(dst_ill->ill_rq, dlureq_mp);
6806 } else {
6807 /* Prepare for cleanup */
6808 freemsg(dlureq_mp);
6809 }
6810}
6811
6812/*
6813 * Esballoc free function for AR_ENTRY_QUERY request to clean up any
6814 * unresolved ire_t and/or nce_t structures when ARP resolution fails.
6815 *
6816 * This function can be called by ARP via free routine for ire_mp or
6817 * by IPv4(both host and forwarding path) via ire_delete
6818 * in case ARP resolution fails.
6819 * NOTE: Since IP is MT, ARP can call into IP but not vice versa
6820 * (for IP to talk to ARP, it still has to send AR* messages).
6821 *
6822 * Note that the ARP/IP merge should replace the functioanlity by providing
6823 * direct function calls to clean up unresolved entries in ire/nce lists.
6824 */
6825void
6826ire_freemblk(ire_t *ire_mp)
6827{
6828 nce_t *nce = NULL;
6829 ill_t *ill;
6830
6831 ASSERT(ire_mp != NULL);
6832
6833 if ((ire_mp->ire_addr == NULL) && (ire_mp->ire_gateway_addr == NULL)) {
6834 ip1dbg(("ire_freemblk(0x%p) ire_addr is NULL\n",
6835 (void *)ire_mp));
6836 goto cleanup;
6837 }
6838 if ((ire_mp->ire_marks & IRE_MARK_UNCACHED) == 0) {
6839 goto cleanup; /* everything succeeded. just free and return */
6840 }
6841
6842 /*
6843 * the arp information corresponding to this ire_mp was not
6844 * transferred to a ire_cache entry. Need
6845 * to clean up incomplete ire's and nce, if necessary.
6846 */
6847 ASSERT(ire_mp->ire_stq != NULL);
6848 ASSERT(ire_mp->ire_stq_ifindex != 0);
6849 /*
6850 * Get any nce's corresponding to this ire_mp. We first have to
6851 * make sure that the ill is still around.
6852 */
6853 ill = ill_lookup_on_ifindex(ire_mp->ire_stq_ifindex, B_FALSE,
6854 NULL, NULL, NULL, NULL);
6855 if (ill == NULL || (ire_mp->ire_stq != ill->ill_wq) ||
6856 (ill->ill_state_flags & ILL_CONDEMNED)) {
6857 /*
6858 * ill went away. no nce to clean up.
6859 * Note that the ill_state_flags could be set to
6860 * ILL_CONDEMNED after this point, but if we know
6861 * that it is CONDEMNED now, we just bail out quickly.
6862 */
6863 if (ill != NULL)
6864 ill_refrele(ill);
6865 goto cleanup;
6866 }
6867 nce = ndp_lookup_v4(ill,
6868 ((ire_mp->ire_gateway_addr != INADDR_ANY) ?
6869 &ire_mp->ire_gateway_addr : &ire_mp->ire_addr),
6870 B_FALSE);
6871 ill_refrele(ill);
6872
6873 if ((nce != NULL) && (nce->nce_state != ND_REACHABLE)) {
6874 /*
6875 * some incomplete nce was found.
6876 */
6877 DTRACE_PROBE2(ire__freemblk__arp__resolv__fail,
6878 nce_t *, nce, ire_t *, ire_mp);
6879 /*
6880 * Send the icmp_unreachable messages for the queued mblks in
6881 * ire->ire_nce->nce_qd_mp, since ARP resolution failed
6882 * for this ire
6883 */
6884 arp_resolv_failed(nce);
6885 /*
6886 * Delete the nce and clean up all ire's pointing at this nce
6887 * in the cachetable
6888 */
6889 ndp_delete(nce);
6890 }
6891 if (nce != NULL)
6892 NCE_REFRELE(nce); /* release the ref taken by ndp_lookup_v4 */
6893
6894cleanup:
6895 /*
6896 * Get rid of the ire buffer
6897 * We call kmem_free here(instead of ire_delete()), since
6898 * this is the freeb's callback.
6899 */
6900 kmem_free(ire_mp, sizeof (ire_t) + sizeof (frtn_t));
6901}
6902
6903
6904/*
6905 * create the neighbor cache entry nce_t for IRE_CACHE and
6906 * non-loopback IRE_BROADCAST ire's. Note that IRE_BROADCAST
6907 * (non-loopback) entries have the nce_res_mp set to the
6908 * template passed in (generated from ill_bcast_mp); IRE_CACHE ire's
6909 * contain the information for the nexthop (ire_gateway_addr) in the
6910 * case of indirect routes, and for the dst itself (ire_addr) in the
6911 * case of direct routes, with the nce_res_mp containing a template
6912 * DL_UNITDATA request.
6913 *
6914 * This function always consumes res_mp and fp_mp.
6915 *
6916 * The actual association of the ire_nce to the nce created here is
6917 * typically done in ire_add_v4 for IRE_CACHE entries. Exceptions
6918 * to this rule are SO_DONTROUTE ire's (IRE_MARK_NO_ADD), for which
6919 * the ire_nce assignment is done in ire_add_then_send, and mobile-ip
6920 * where the assignment is done in ire_add_mrtun().
6921 */
6922int
6923ire_nce_init(ire_t *ire, mblk_t *fp_mp, mblk_t *res_mp)
6924{
6925 in_addr_t addr4, mask4;
6926 int err;
6927 nce_t *arpce = NULL;
6928 ill_t *ire_ill;
6929 uint16_t nce_state, nce_flags;
6930
6931 if (ire->ire_stq == NULL) {
6932 if (res_mp)
6933 freemsg(res_mp);
6934 if (fp_mp)
6935 freemsg(fp_mp);
6936 return (0); /* no need to create nce for local/loopback */
6937 }
6938
6939 mask4 = IP_HOST_MASK;
6940 switch (ire->ire_type) {
6941 case IRE_CACHE:
6942 if (ire->ire_gateway_addr != INADDR_ANY)
6943 addr4 = ire->ire_gateway_addr; /* 'G' route */
6944 else
6945 addr4 = ire->ire_addr; /* direct route */
6946 break;
6947 case IRE_BROADCAST:
6948 addr4 = ire->ire_addr;
6949 break;
6950 default:
6951 if (res_mp)
6952 freemsg(res_mp);
6953 if (fp_mp)
6954 freemsg(fp_mp);
6955 return (0);
6956 }
6957
6958 /*
6959 * ire_ipif is picked based on RTF_SETSRC, usesrc etc.
6960 * rules in ire_forward_src_ipif. We want the dlureq_mp
6961 * for the outgoing interface, which we get from the ire_stq.
6962 */
6963 ire_ill = ire_to_ill(ire);
6964
6965 /*
6966 * if we are creating an nce for the first time, and this is
6967 * a NORESOLVER interface, atomically create the nce in the
6968 * REACHABLE state; else create it in the ND_INITIAL state.
6969 */
6970 if (ire_ill->ill_net_type == IRE_IF_NORESOLVER) {
6971 nce_state = ND_REACHABLE;
6972 nce_flags = NCE_F_PERMANENT;
6973 } else {
6974 if (fp_mp != NULL)
6975 nce_state = ND_REACHABLE;
6976 else
6977 nce_state = ND_INITIAL;
6978 nce_flags = 0;
6979 }
6980
6981 err = ndp_lookup_then_add(ire_ill, NULL,
6982 &addr4, &mask4, NULL, 0, nce_flags, nce_state, &arpce,
6983 fp_mp, res_mp);
6984
6985 ip1dbg(("ire 0x%p addr 0x%lx mask 0x%lx type 0x%x; "
6986 "found nce 0x%p err %d\n", (void *)ire, (ulong_t)addr4,
6987 (ulong_t)mask4, ire->ire_type, (void *)arpce, err));
6988
6989 switch (err) {
6990 case 0:
6991 break;
6992 case EEXIST:
6993 /*
6994 * return a pointer to an existing nce_t;
6995 * note that the ire-nce mapping is many-one, i.e.,
6996 * multiple ire's could point to the same nce_t;
6997 */
6998 if (fp_mp != NULL) {
6999 freemsg(fp_mp);
7000 }
7001 if (res_mp != NULL) {
7002 freemsg(res_mp);
7003 }
7004 break;
7005 default:
7006 DTRACE_PROBE2(nce__init__fail, ill_t *, ire_ill, int, err);
7007 if (res_mp)
7008 freemsg(res_mp);
7009 if (fp_mp)
7010 freemsg(fp_mp);
7011 return (EINVAL);
7012 }
7013#if DEBUG
7014 /*
7015 * if an nce_fp_mp was passed in, we should be picking up an
7016 * existing nce_t in the ND_REACHABLE state.
7017 */
7018 mutex_enter(&arpce->nce_lock);
7019 ASSERT(arpce->nce_fp_mp == NULL || arpce->nce_state == ND_REACHABLE);
7020 mutex_exit(&arpce->nce_lock);
7021#endif
7022 if (ire->ire_type == IRE_BROADCAST) {
7023 /*
7024 * Two bcast ires are created for each interface;
7025 * 1. loopback copy (which does not have an
7026 * ire_stq, and therefore has no ire_nce), and,
7027 * 2. the non-loopback copy, which has the nce_res_mp
7028 * initialized to a copy of the ill_bcast_mp, and
7029 * is marked as ND_REACHABLE at this point.
7030 * This nce does not undergo any further state changes,
7031 * and exists as long as the interface is plumbed.
7032 * Note: we do the ire_nce assignment here for IRE_BROADCAST
7033 * because some functions like ill_mark_bcast() inline the
7034 * ire_add functionality;
7035 */
7036 mutex_enter(&arpce->nce_lock);
7037 arpce->nce_state = ND_REACHABLE;
7038 arpce->nce_flags |= NCE_F_PERMANENT;
7039 arpce->nce_last = TICK_TO_MSEC(lbolt64);
7040 ire->ire_nce = arpce;
7041 mutex_exit(&arpce->nce_lock);
7042 /*
7043 * We are associating this nce to the ire,
7044 * so change the nce ref taken in
7045 * ndp_lookup_then_add_v4() from
7046 * NCE_REFHOLD to NCE_REFHOLD_NOTR
7047 */
7048 NCE_REFHOLD_TO_REFHOLD_NOTR(ire->ire_nce);
7049 } else {
7050 if (NCE_EXPIRED(arpce))
7051 arpce = nce_reinit(arpce);
7052 if (arpce != NULL) {
7053 /*
7054 * We are not using this nce_t just yet so release
7055 * the ref taken in ndp_lookup_then_add_v4()
7056 */
7057 NCE_REFRELE(arpce);
7058 } else {
7059 ip0dbg(("can't reinit arpce for ill 0x%p;\n",
7060 (void *)ire_ill));
7061 }
7062 }
7063 return (0);
7064}