/* * This file and its contents are supplied under the terms of the * Common Development and Distribution License ("CDDL"), version 1.0. * You may only use this file in accordance with the terms of version * 1.0 of the CDDL. * * A full copy of the text of the CDDL should have accompanied this * source. A copy of the CDDL is also available via the Internet at * http://www.illumos.org/license/CDDL. */ /* * This file is part of the Chelsio T4 support code. * * Copyright (C) 2010-2013 Chelsio Communications. All rights reserved. * * This program is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the LICENSE file included in this * release for licensing terms and conditions. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include "common/common.h" #include "common/t4_msg.h" #include "common/t4_regs.h" #include "common/t4_regs_values.h" #include "t4_l2t.h" /* identifies sync vs async L2T_WRITE_REQs */ #define S_SYNC_WR 12 #define V_SYNC_WR(x) ((x) << S_SYNC_WR) #define F_SYNC_WR V_SYNC_WR(1) #define VLAN_NONE 0xfff /* * jhash.h: Jenkins hash support. * * Copyright (C) 1996 Bob Jenkins (bob_jenkins@burtleburtle.net) * * http://burtleburtle.net/bob/hash/ * * These are the credits from Bob's sources: * * lookup2.c, by Bob Jenkins, December 1996, Public Domain. * hash(), hash2(), hash3, and mix() are externally useful functions. * Routines to test the hash are included if SELF_TEST is defined. * You can use this free for any purpose. It has no warranty. */ /* NOTE: Arguments are modified. */ #define __jhash_mix(a, b, c) \ { \ a -= b; a -= c; a ^= (c>>13); \ b -= c; b -= a; b ^= (a<<8); \ c -= a; c -= b; c ^= (b>>13); \ a -= b; a -= c; a ^= (c>>12); \ b -= c; b -= a; b ^= (a<<16); \ c -= a; c -= b; c ^= (b>>5); \ a -= b; a -= c; a ^= (c>>3); \ b -= c; b -= a; b ^= (a<<10); \ c -= a; c -= b; c ^= (b>>15); \ } /* The golden ration: an arbitrary value */ #define JHASH_GOLDEN_RATIO 0x9e3779b9 /* * A special ultra-optimized versions that knows they are hashing exactly * 3, 2 or 1 word(s). * * NOTE: In partilar the "c += length; __jhash_mix(a,b,c);" normally * done at the end is not done here. */ static inline u32 jhash_3words(u32 a, u32 b, u32 c, u32 initval) { a += JHASH_GOLDEN_RATIO; b += JHASH_GOLDEN_RATIO; c += initval; __jhash_mix(a, b, c); return (c); } static inline u32 jhash_2words(u32 a, u32 b, u32 initval) { return (jhash_3words(a, b, 0, initval)); } #if defined(__GNUC__) #define likely(x) __builtin_expect((x), 1) #define unlikely(x) __builtin_expect((x), 0) #else #define likely(x) (x) #define unlikely(x) (x) #endif /* defined(__GNUC__) */ enum { L2T_STATE_VALID, /* entry is up to date */ L2T_STATE_STALE, /* entry may be used but needs revalidation */ L2T_STATE_RESOLVING, /* entry needs address resolution */ L2T_STATE_SYNC_WRITE, /* synchronous write of entry underway */ /* when state is one of the below the entry is not hashed */ L2T_STATE_SWITCHING, /* entry is being used by a switching filter */ L2T_STATE_UNUSED /* entry not in use */ }; struct l2t_data { krwlock_t lock; u_int l2t_size; volatile uint_t nfree; /* number of free entries */ struct l2t_entry *rover; /* starting point for next allocation */ struct l2t_entry l2tab[]; }; #define VLAN_NONE 0xfff #define SA(x) ((struct sockaddr *)(x)) #define SIN(x) ((struct sockaddr_in *)(x)) #define SINADDR(x) (SIN(x)->sin_addr.s_addr) #define atomic_read(x) atomic_add_int_nv(x, 0) #ifdef TCP_OFFLOAD_ENABLE /* * Allocate a free L2T entry. * Must be called with l2t_data.lockatomic_load_acq_int held. */ static struct l2t_entry * alloc_l2e(struct l2t_data *d) { struct l2t_entry *end, *e, **p; ASSERT(rw_write_held(&d->lock)); if (!atomic_read(&d->nfree)) return (NULL); /* there's definitely a free entry */ for (e = d->rover, end = &d->l2tab[d->l2t_size]; e != end; ++e) if (atomic_read(&e->refcnt) == 0) goto found; for (e = d->l2tab; atomic_read(&e->refcnt); ++e) /* */; found: d->rover = e + 1; atomic_dec_uint(&d->nfree); /* * The entry we found may be an inactive entry that is * presently in the hash table. We need to remove it. */ if (e->state < L2T_STATE_SWITCHING) { for (p = &d->l2tab[e->hash].first; *p; p = &(*p)->next) { if (*p == e) { *p = e->next; e->next = NULL; break; } } } e->state = L2T_STATE_UNUSED; return (e); } /* * Write an L2T entry. Must be called with the entry locked. * The write may be synchronous or asynchronous. */ static int write_l2e(adapter_t *sc, struct l2t_entry *e, int sync) { mblk_t *m; struct cpl_l2t_write_req *req; int idx = e->idx + sc->vres.l2t.start; ASSERT(MUTEX_HELD(&e->lock)); if ((m = allocb(sizeof (*req), BPRI_HI)) == NULL) return (ENOMEM); /* LINTED: E_BAD_PTR_CAST_ALIGN */ req = (struct cpl_l2t_write_req *)m->b_wptr; /* LINTED: E_CONSTANT_CONDITION */ INIT_TP_WR(req, 0); OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, idx | V_SYNC_WR(sync) | V_TID_QID(sc->sge.fwq.abs_id))); req->params = htons(V_L2T_W_PORT(e->lport) | V_L2T_W_NOREPLY(!sync)); req->l2t_idx = htons(idx); req->vlan = htons(e->vlan); (void) memcpy(req->dst_mac, e->dmac, sizeof (req->dst_mac)); m->b_wptr += sizeof (*req); (void) t4_mgmt_tx(sc, m); if (sync && e->state != L2T_STATE_SWITCHING) e->state = L2T_STATE_SYNC_WRITE; return (0); } #endif struct l2t_data * t4_init_l2t(struct adapter *sc) { int i, l2t_size; struct l2t_data *d; l2t_size = sc->vres.l2t.size; if(l2t_size < 1) return (NULL); d = kmem_zalloc(sizeof(*d) + l2t_size * sizeof (struct l2t_entry), KM_SLEEP); if (!d) return (NULL); d->l2t_size = l2t_size; d->rover = d->l2tab; (void) atomic_swap_uint(&d->nfree, l2t_size); rw_init(&d->lock, NULL, RW_DRIVER, NULL); for (i = 0; i < l2t_size; i++) { /* LINTED: E_ASSIGN_NARROW_CONV */ d->l2tab[i].idx = i; d->l2tab[i].state = L2T_STATE_UNUSED; mutex_init(&d->l2tab[i].lock, NULL, MUTEX_DRIVER, NULL); (void) atomic_swap_uint(&d->l2tab[i].refcnt, 0); } #ifdef TCP_OFFLOAD_ENABLE (void) t4_register_cpl_handler(sc, CPL_L2T_WRITE_RPL, do_l2t_write_rpl); #endif return (d); } int t4_free_l2t(struct l2t_data *d) { int i; for (i = 0; i < L2T_SIZE; i++) mutex_destroy(&d->l2tab[i].lock); rw_destroy(&d->lock); kmem_free(d, sizeof (*d)); return (0); } #ifdef TCP_OFFLOAD_ENABLE static inline void l2t_hold(struct l2t_data *d, struct l2t_entry *e) { if (atomic_inc_uint_nv(&e->refcnt) == 1) /* 0 -> 1 transition */ atomic_dec_uint(&d->nfree); } /* * To avoid having to check address families we do not allow v4 and v6 * neighbors to be on the same hash chain. We keep v4 entries in the first * half of available hash buckets and v6 in the second. */ enum { L2T_SZ_HALF = L2T_SIZE / 2, L2T_HASH_MASK = L2T_SZ_HALF - 1 }; static inline unsigned int arp_hash(const uint32_t *key, int ifindex) { return (jhash_2words(*key, ifindex, 0) & L2T_HASH_MASK); } static inline unsigned int ipv6_hash(const uint32_t *key, int ifindex) { uint32_t xor = key[0] ^ key[1] ^ key[2] ^ key[3]; return (L2T_SZ_HALF + (jhash_2words(xor, ifindex, 0) & L2T_HASH_MASK)); } static inline unsigned int addr_hash(const uint32_t *addr, int addr_len, int ifindex) { return (addr_len == 4 ? arp_hash(addr, ifindex) : ipv6_hash(addr, ifindex)); } /* * Checks if an L2T entry is for the given IP/IPv6 address. It does not check * whether the L2T entry and the address are of the same address family. * Callers ensure an address is only checked against L2T entries of the same * family, something made trivial by the separation of IP and IPv6 hash chains * mentioned above. Returns 0 if there's a match, */ static inline int addreq(const struct l2t_entry *e, const uint32_t *addr) { if (e->v6 != 0) return ((e->addr[0] ^ addr[0]) | (e->addr[1] ^ addr[1]) | (e->addr[2] ^ addr[2]) | (e->addr[3] ^ addr[3])); return (e->addr[0] ^ addr[0]); } /* * Add a packet to an L2T entry's queue of packets awaiting resolution. * Must be called with the entry's lock held. */ static inline void arpq_enqueue(struct l2t_entry *e, mblk_t *m) { ASSERT(MUTEX_HELD(&e->lock)); ASSERT(m->b_next == NULL); if (e->arpq_head != NULL) e->arpq_tail->b_next = m; else e->arpq_head = m; e->arpq_tail = m; } int t4_l2t_send(struct adapter *sc, mblk_t *m, struct l2t_entry *e) { sin_t *sin; ip2mac_t ip2m; if (e->v6 != 0) ASSERT(0); again: switch (e->state) { case L2T_STATE_STALE: /* entry is stale, kick off revalidation */ /* Fall through */ case L2T_STATE_VALID: /* fast-path, send the packet on */ (void) t4_wrq_tx(sc, MBUF_EQ(m), m); return (0); case L2T_STATE_RESOLVING: case L2T_STATE_SYNC_WRITE: mutex_enter(&e->lock); if (e->state != L2T_STATE_SYNC_WRITE && e->state != L2T_STATE_RESOLVING) { /* state changed by the time we got here */ mutex_exit(&e->lock); goto again; } arpq_enqueue(e, m); mutex_exit(&e->lock); bzero(&ip2m, sizeof (ip2m)); sin = (sin_t *)&ip2m.ip2mac_pa; sin->sin_family = AF_INET; sin->sin_addr.s_addr = e->in_addr; ip2m.ip2mac_ifindex = e->ifindex; if (e->state == L2T_STATE_RESOLVING) { (void) ip2mac(IP2MAC_RESOLVE, &ip2m, t4_l2t_update, e, 0); if (ip2m.ip2mac_err == EINPROGRESS) ASSERT(0); else if (ip2m.ip2mac_err == 0) t4_l2t_update(&ip2m, e); else ASSERT(0); } } return (0); } /* * Called when an L2T entry has no more users. The entry is left in the hash * table since it is likely to be reused but we also bump nfree to indicate * that the entry can be reallocated for a different neighbor. We also drop * the existing neighbor reference in case the neighbor is going away and is * waiting on our reference. * * Because entries can be reallocated to other neighbors once their ref count * drops to 0 we need to take the entry's lock to avoid races with a new * incarnation. */ static void t4_l2e_free(struct l2t_entry *e) { struct l2t_data *d; mutex_enter(&e->lock); /* LINTED: E_NOP_IF_STMT */ if (atomic_read(&e->refcnt) == 0) { /* hasn't been recycled */ /* * Don't need to worry about the arpq, an L2T entry can't be * released if any packets are waiting for resolution as we * need to be able to communicate with the device to close a * connection. */ } mutex_exit(&e->lock); d = __containerof(e, struct l2t_data, l2tab[e->idx]); atomic_inc_uint(&d->nfree); } void t4_l2t_release(struct l2t_entry *e) { if (atomic_dec_uint_nv(&e->refcnt) == 0) t4_l2e_free(e); } /* ARGSUSED */ int do_l2t_write_rpl(struct sge_iq *iq, const struct rss_header *rss, mblk_t *m) { struct adapter *sc = iq->adapter; const struct cpl_l2t_write_rpl *rpl = (const void *)(rss + 1); unsigned int tid = GET_TID(rpl); unsigned int idx = tid % L2T_SIZE; if (likely(rpl->status != CPL_ERR_NONE)) { cxgb_printf(sc->dip, CE_WARN, "Unexpected L2T_WRITE_RPL status %u for entry %u", rpl->status, idx); return (-EINVAL); } return (0); } /* * The TOE wants an L2 table entry that it can use to reach the next hop over * the specified port. Produce such an entry - create one if needed. * * Note that the ifnet could be a pseudo-device like if_vlan, if_lagg, etc. on * top of the real cxgbe interface. */ struct l2t_entry * t4_l2t_get(struct port_info *pi, conn_t *connp) { struct l2t_entry *e; struct l2t_data *d = pi->adapter->l2t; int addr_len; uint32_t *addr; int hash; int index = \ connp->conn_ixa->ixa_ire->ire_ill->ill_phyint->phyint_ifindex; unsigned int smt_idx = pi->port_id; addr = (uint32_t *)&connp->conn_faddr_v4; addr_len = sizeof (connp->conn_faddr_v4); hash = addr_hash(addr, addr_len, index); rw_enter(&d->lock, RW_WRITER); for (e = d->l2tab[hash].first; e; e = e->next) { if (!addreq(e, addr) && e->smt_idx == smt_idx) { l2t_hold(d, e); goto done; } } /* Need to allocate a new entry */ e = alloc_l2e(d); if (e != NULL) { mutex_enter(&e->lock); /* avoid race with t4_l2t_free */ e->state = L2T_STATE_RESOLVING; (void) memcpy(e->addr, addr, addr_len); e->in_addr = connp->conn_faddr_v4; e->ifindex = index; /* LINTED: E_ASSIGN_NARROW_CONV */ e->smt_idx = smt_idx; /* LINTED: E_ASSIGN_NARROW_CONV */ e->hash = hash; e->lport = pi->lport; e->arpq_head = e->arpq_tail = NULL; e->v6 = (addr_len == 16); e->sc = pi->adapter; (void) atomic_swap_uint(&e->refcnt, 1); e->vlan = VLAN_NONE; e->next = d->l2tab[hash].first; d->l2tab[hash].first = e; mutex_exit(&e->lock); } else { ASSERT(0); } done: rw_exit(&d->lock); return (e); } /* * Called when the host's neighbor layer makes a change to some entry that is * loaded into the HW L2 table. */ void t4_l2t_update(ip2mac_t *ip2macp, void *arg) { struct l2t_entry *e = (struct l2t_entry *)arg; struct adapter *sc = e->sc; uchar_t *cp; if (ip2macp->ip2mac_err != 0) { ASSERT(0); /* Don't know what to do. Needs to be investigated */ } mutex_enter(&e->lock); if (atomic_read(&e->refcnt) != 0) goto found; e->state = L2T_STATE_STALE; mutex_exit(&e->lock); /* The TOE has no interest in this LLE */ return; found: if (atomic_read(&e->refcnt) != 0) { /* Entry is referenced by at least 1 offloaded connection. */ cp = (uchar_t *)LLADDR(&ip2macp->ip2mac_ha); bcopy(cp, e->dmac, 6); (void) write_l2e(sc, e, 1); e->state = L2T_STATE_VALID; } mutex_exit(&e->lock); } #endif