1/**************************************************************************
2SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3
4Copyright (c) 2007-2009, Chelsio Inc.
5All rights reserved.
6
7Redistribution and use in source and binary forms, with or without
8modification, are permitted provided that the following conditions are met:
9
10 1. Redistributions of source code must retain the above copyright notice,
11    this list of conditions and the following disclaimer.
12
13 2. Neither the name of the Chelsio Corporation nor the names of its
14    contributors may be used to endorse or promote products derived from
15    this software without specific prior written permission.
16
17THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27POSSIBILITY OF SUCH DAMAGE.
28
29***************************************************************************/
30
31#include <sys/cdefs.h>
32__FBSDID("$FreeBSD$");
33
34#include "opt_inet.h"
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/kernel.h>
39#include <sys/bus.h>
40#include <sys/module.h>
41#include <sys/pciio.h>
42#include <sys/conf.h>
43#include <machine/bus.h>
44#include <machine/resource.h>
45#include <sys/ktr.h>
46#include <sys/rman.h>
47#include <sys/ioccom.h>
48#include <sys/mbuf.h>
49#include <sys/linker.h>
50#include <sys/firmware.h>
51#include <sys/socket.h>
52#include <sys/sockio.h>
53#include <sys/smp.h>
54#include <sys/sysctl.h>
55#include <sys/syslog.h>
56#include <sys/queue.h>
57#include <sys/taskqueue.h>
58#include <sys/proc.h>
59
60#include <net/bpf.h>
61#include <net/debugnet.h>
62#include <net/ethernet.h>
63#include <net/if.h>
64#include <net/if_var.h>
65#include <net/if_arp.h>
66#include <net/if_dl.h>
67#include <net/if_media.h>
68#include <net/if_types.h>
69#include <net/if_vlan_var.h>
70
71#include <netinet/in_systm.h>
72#include <netinet/in.h>
73#include <netinet/if_ether.h>
74#include <netinet/ip.h>
75#include <netinet/ip.h>
76#include <netinet/tcp.h>
77#include <netinet/udp.h>
78
79#include <dev/pci/pcireg.h>
80#include <dev/pci/pcivar.h>
81#include <dev/pci/pci_private.h>
82
83#include <cxgb_include.h>
84
85#ifdef PRIV_SUPPORTED
86#include <sys/priv.h>
87#endif
88
89static int cxgb_setup_interrupts(adapter_t *);
90static void cxgb_teardown_interrupts(adapter_t *);
91static void cxgb_init(void *);
92static int cxgb_init_locked(struct port_info *);
93static int cxgb_uninit_locked(struct port_info *);
94static int cxgb_uninit_synchronized(struct port_info *);
95static int cxgb_ioctl(struct ifnet *, unsigned long, caddr_t);
96static int cxgb_media_change(struct ifnet *);
97static int cxgb_ifm_type(int);
98static void cxgb_build_medialist(struct port_info *);
99static void cxgb_media_status(struct ifnet *, struct ifmediareq *);
100static uint64_t cxgb_get_counter(struct ifnet *, ift_counter);
101static int setup_sge_qsets(adapter_t *);
102static void cxgb_async_intr(void *);
103static void cxgb_tick_handler(void *, int);
104static void cxgb_tick(void *);
105static void link_check_callout(void *);
106static void check_link_status(void *, int);
107static void setup_rss(adapter_t *sc);
108static int alloc_filters(struct adapter *);
109static int setup_hw_filters(struct adapter *);
110static int set_filter(struct adapter *, int, const struct filter_info *);
111static inline void mk_set_tcb_field(struct cpl_set_tcb_field *, unsigned int,
112    unsigned int, u64, u64);
113static inline void set_tcb_field_ulp(struct cpl_set_tcb_field *, unsigned int,
114    unsigned int, u64, u64);
115#ifdef TCP_OFFLOAD
116static int cpl_not_handled(struct sge_qset *, struct rsp_desc *, struct mbuf *);
117#endif
118
119/* Attachment glue for the PCI controller end of the device.  Each port of
120 * the device is attached separately, as defined later.
121 */
122static int cxgb_controller_probe(device_t);
123static int cxgb_controller_attach(device_t);
124static int cxgb_controller_detach(device_t);
125static void cxgb_free(struct adapter *);
126static __inline void reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
127    unsigned int end);
128static void cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf);
129static int cxgb_get_regs_len(void);
130static void touch_bars(device_t dev);
131static void cxgb_update_mac_settings(struct port_info *p);
132#ifdef TCP_OFFLOAD
133static int toe_capability(struct port_info *, int);
134#endif
135
136/* Table for probing the cards.  The desc field isn't actually used */
137struct cxgb_ident {
138	uint16_t	vendor;
139	uint16_t	device;
140	int		index;
141	char		*desc;
142} cxgb_identifiers[] = {
143	{PCI_VENDOR_ID_CHELSIO, 0x0020, 0, "PE9000"},
144	{PCI_VENDOR_ID_CHELSIO, 0x0021, 1, "T302E"},
145	{PCI_VENDOR_ID_CHELSIO, 0x0022, 2, "T310E"},
146	{PCI_VENDOR_ID_CHELSIO, 0x0023, 3, "T320X"},
147	{PCI_VENDOR_ID_CHELSIO, 0x0024, 1, "T302X"},
148	{PCI_VENDOR_ID_CHELSIO, 0x0025, 3, "T320E"},
149	{PCI_VENDOR_ID_CHELSIO, 0x0026, 2, "T310X"},
150	{PCI_VENDOR_ID_CHELSIO, 0x0030, 2, "T3B10"},
151	{PCI_VENDOR_ID_CHELSIO, 0x0031, 3, "T3B20"},
152	{PCI_VENDOR_ID_CHELSIO, 0x0032, 1, "T3B02"},
153	{PCI_VENDOR_ID_CHELSIO, 0x0033, 4, "T3B04"},
154	{PCI_VENDOR_ID_CHELSIO, 0x0035, 6, "T3C10"},
155	{PCI_VENDOR_ID_CHELSIO, 0x0036, 3, "S320E-CR"},
156	{PCI_VENDOR_ID_CHELSIO, 0x0037, 7, "N320E-G2"},
157	{0, 0, 0, NULL}
158};
159
160static device_method_t cxgb_controller_methods[] = {
161	DEVMETHOD(device_probe,		cxgb_controller_probe),
162	DEVMETHOD(device_attach,	cxgb_controller_attach),
163	DEVMETHOD(device_detach,	cxgb_controller_detach),
164
165	DEVMETHOD_END
166};
167
168static driver_t cxgb_controller_driver = {
169	"cxgbc",
170	cxgb_controller_methods,
171	sizeof(struct adapter)
172};
173
174static int cxgbc_mod_event(module_t, int, void *);
175static devclass_t	cxgb_controller_devclass;
176DRIVER_MODULE(cxgbc, pci, cxgb_controller_driver, cxgb_controller_devclass,
177    cxgbc_mod_event, 0);
178MODULE_PNP_INFO("U16:vendor;U16:device", pci, cxgbc, cxgb_identifiers,
179    nitems(cxgb_identifiers) - 1);
180MODULE_VERSION(cxgbc, 1);
181MODULE_DEPEND(cxgbc, firmware, 1, 1, 1);
182
183/*
184 * Attachment glue for the ports.  Attachment is done directly to the
185 * controller device.
186 */
187static int cxgb_port_probe(device_t);
188static int cxgb_port_attach(device_t);
189static int cxgb_port_detach(device_t);
190
191static device_method_t cxgb_port_methods[] = {
192	DEVMETHOD(device_probe,		cxgb_port_probe),
193	DEVMETHOD(device_attach,	cxgb_port_attach),
194	DEVMETHOD(device_detach,	cxgb_port_detach),
195	{ 0, 0 }
196};
197
198static driver_t cxgb_port_driver = {
199	"cxgb",
200	cxgb_port_methods,
201	0
202};
203
204static d_ioctl_t cxgb_extension_ioctl;
205static d_open_t cxgb_extension_open;
206static d_close_t cxgb_extension_close;
207
208static struct cdevsw cxgb_cdevsw = {
209       .d_version =    D_VERSION,
210       .d_flags =      0,
211       .d_open =       cxgb_extension_open,
212       .d_close =      cxgb_extension_close,
213       .d_ioctl =      cxgb_extension_ioctl,
214       .d_name =       "cxgb",
215};
216
217static devclass_t	cxgb_port_devclass;
218DRIVER_MODULE(cxgb, cxgbc, cxgb_port_driver, cxgb_port_devclass, 0, 0);
219MODULE_VERSION(cxgb, 1);
220
221DEBUGNET_DEFINE(cxgb);
222
223static struct mtx t3_list_lock;
224static SLIST_HEAD(, adapter) t3_list;
225#ifdef TCP_OFFLOAD
226static struct mtx t3_uld_list_lock;
227static SLIST_HEAD(, uld_info) t3_uld_list;
228#endif
229
230/*
231 * The driver uses the best interrupt scheme available on a platform in the
232 * order MSI-X, MSI, legacy pin interrupts.  This parameter determines which
233 * of these schemes the driver may consider as follows:
234 *
235 * msi = 2: choose from among all three options
236 * msi = 1 : only consider MSI and pin interrupts
237 * msi = 0: force pin interrupts
238 */
239static int msi_allowed = 2;
240
241SYSCTL_NODE(_hw, OID_AUTO, cxgb, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
242    "CXGB driver parameters");
243SYSCTL_INT(_hw_cxgb, OID_AUTO, msi_allowed, CTLFLAG_RDTUN, &msi_allowed, 0,
244    "MSI-X, MSI, INTx selector");
245
246/*
247 * The driver uses an auto-queue algorithm by default.
248 * To disable it and force a single queue-set per port, use multiq = 0
249 */
250static int multiq = 1;
251SYSCTL_INT(_hw_cxgb, OID_AUTO, multiq, CTLFLAG_RDTUN, &multiq, 0,
252    "use min(ncpus/ports, 8) queue-sets per port");
253
254/*
255 * By default the driver will not update the firmware unless
256 * it was compiled against a newer version
257 *
258 */
259static int force_fw_update = 0;
260SYSCTL_INT(_hw_cxgb, OID_AUTO, force_fw_update, CTLFLAG_RDTUN, &force_fw_update, 0,
261    "update firmware even if up to date");
262
263int cxgb_use_16k_clusters = -1;
264SYSCTL_INT(_hw_cxgb, OID_AUTO, use_16k_clusters, CTLFLAG_RDTUN,
265    &cxgb_use_16k_clusters, 0, "use 16kB clusters for the jumbo queue ");
266
267static int nfilters = -1;
268SYSCTL_INT(_hw_cxgb, OID_AUTO, nfilters, CTLFLAG_RDTUN,
269    &nfilters, 0, "max number of entries in the filter table");
270
271enum {
272	MAX_TXQ_ENTRIES      = 16384,
273	MAX_CTRL_TXQ_ENTRIES = 1024,
274	MAX_RSPQ_ENTRIES     = 16384,
275	MAX_RX_BUFFERS       = 16384,
276	MAX_RX_JUMBO_BUFFERS = 16384,
277	MIN_TXQ_ENTRIES      = 4,
278	MIN_CTRL_TXQ_ENTRIES = 4,
279	MIN_RSPQ_ENTRIES     = 32,
280	MIN_FL_ENTRIES       = 32,
281	MIN_FL_JUMBO_ENTRIES = 32
282};
283
284struct filter_info {
285	u32 sip;
286	u32 sip_mask;
287	u32 dip;
288	u16 sport;
289	u16 dport;
290	u32 vlan:12;
291	u32 vlan_prio:3;
292	u32 mac_hit:1;
293	u32 mac_idx:4;
294	u32 mac_vld:1;
295	u32 pkt_type:2;
296	u32 report_filter_id:1;
297	u32 pass:1;
298	u32 rss:1;
299	u32 qset:3;
300	u32 locked:1;
301	u32 valid:1;
302};
303
304enum { FILTER_NO_VLAN_PRI = 7 };
305
306#define EEPROM_MAGIC 0x38E2F10C
307
308#define PORT_MASK ((1 << MAX_NPORTS) - 1)
309
310
311static int set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset);
312
313
314static __inline char
315t3rev2char(struct adapter *adapter)
316{
317	char rev = 'z';
318
319	switch(adapter->params.rev) {
320	case T3_REV_A:
321		rev = 'a';
322		break;
323	case T3_REV_B:
324	case T3_REV_B2:
325		rev = 'b';
326		break;
327	case T3_REV_C:
328		rev = 'c';
329		break;
330	}
331	return rev;
332}
333
334static struct cxgb_ident *
335cxgb_get_ident(device_t dev)
336{
337	struct cxgb_ident *id;
338
339	for (id = cxgb_identifiers; id->desc != NULL; id++) {
340		if ((id->vendor == pci_get_vendor(dev)) &&
341		    (id->device == pci_get_device(dev))) {
342			return (id);
343		}
344	}
345	return (NULL);
346}
347
348static const struct adapter_info *
349cxgb_get_adapter_info(device_t dev)
350{
351	struct cxgb_ident *id;
352	const struct adapter_info *ai;
353
354	id = cxgb_get_ident(dev);
355	if (id == NULL)
356		return (NULL);
357
358	ai = t3_get_adapter_info(id->index);
359
360	return (ai);
361}
362
363static int
364cxgb_controller_probe(device_t dev)
365{
366	const struct adapter_info *ai;
367	char *ports, buf[80];
368	int nports;
369
370	ai = cxgb_get_adapter_info(dev);
371	if (ai == NULL)
372		return (ENXIO);
373
374	nports = ai->nports0 + ai->nports1;
375	if (nports == 1)
376		ports = "port";
377	else
378		ports = "ports";
379
380	snprintf(buf, sizeof(buf), "%s, %d %s", ai->desc, nports, ports);
381	device_set_desc_copy(dev, buf);
382	return (BUS_PROBE_DEFAULT);
383}
384
385#define FW_FNAME "cxgb_t3fw"
386#define TPEEPROM_NAME "cxgb_t3%c_tp_eeprom"
387#define TPSRAM_NAME "cxgb_t3%c_protocol_sram"
388
389static int
390upgrade_fw(adapter_t *sc)
391{
392	const struct firmware *fw;
393	int status;
394	u32 vers;
395
396	if ((fw = firmware_get(FW_FNAME)) == NULL)  {
397		device_printf(sc->dev, "Could not find firmware image %s\n", FW_FNAME);
398		return (ENOENT);
399	} else
400		device_printf(sc->dev, "installing firmware on card\n");
401	status = t3_load_fw(sc, (const uint8_t *)fw->data, fw->datasize);
402
403	if (status != 0) {
404		device_printf(sc->dev, "failed to install firmware: %d\n",
405		    status);
406	} else {
407		t3_get_fw_version(sc, &vers);
408		snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
409		    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
410		    G_FW_VERSION_MICRO(vers));
411	}
412
413	firmware_put(fw, FIRMWARE_UNLOAD);
414
415	return (status);
416}
417
418/*
419 * The cxgb_controller_attach function is responsible for the initial
420 * bringup of the device.  Its responsibilities include:
421 *
422 *  1. Determine if the device supports MSI or MSI-X.
423 *  2. Allocate bus resources so that we can access the Base Address Register
424 *  3. Create and initialize mutexes for the controller and its control
425 *     logic such as SGE and MDIO.
426 *  4. Call hardware specific setup routine for the adapter as a whole.
427 *  5. Allocate the BAR for doing MSI-X.
428 *  6. Setup the line interrupt iff MSI-X is not supported.
429 *  7. Create the driver's taskq.
430 *  8. Start one task queue service thread.
431 *  9. Check if the firmware and SRAM are up-to-date.  They will be
432 *     auto-updated later (before FULL_INIT_DONE), if required.
433 * 10. Create a child device for each MAC (port)
434 * 11. Initialize T3 private state.
435 * 12. Trigger the LED
436 * 13. Setup offload iff supported.
437 * 14. Reset/restart the tick callout.
438 * 15. Attach sysctls
439 *
440 * NOTE: Any modification or deviation from this list MUST be reflected in
441 * the above comment.  Failure to do so will result in problems on various
442 * error conditions including link flapping.
443 */
444static int
445cxgb_controller_attach(device_t dev)
446{
447	device_t child;
448	const struct adapter_info *ai;
449	struct adapter *sc;
450	int i, error = 0;
451	uint32_t vers;
452	int port_qsets = 1;
453	int msi_needed, reg;
454	char buf[80];
455
456	sc = device_get_softc(dev);
457	sc->dev = dev;
458	sc->msi_count = 0;
459	ai = cxgb_get_adapter_info(dev);
460
461	snprintf(sc->lockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb controller lock %d",
462	    device_get_unit(dev));
463	ADAPTER_LOCK_INIT(sc, sc->lockbuf);
464
465	snprintf(sc->reglockbuf, ADAPTER_LOCK_NAME_LEN, "SGE reg lock %d",
466	    device_get_unit(dev));
467	snprintf(sc->mdiolockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb mdio lock %d",
468	    device_get_unit(dev));
469	snprintf(sc->elmerlockbuf, ADAPTER_LOCK_NAME_LEN, "cxgb elmer lock %d",
470	    device_get_unit(dev));
471
472	MTX_INIT(&sc->sge.reg_lock, sc->reglockbuf, NULL, MTX_SPIN);
473	MTX_INIT(&sc->mdio_lock, sc->mdiolockbuf, NULL, MTX_DEF);
474	MTX_INIT(&sc->elmer_lock, sc->elmerlockbuf, NULL, MTX_DEF);
475
476	mtx_lock(&t3_list_lock);
477	SLIST_INSERT_HEAD(&t3_list, sc, link);
478	mtx_unlock(&t3_list_lock);
479
480	/* find the PCIe link width and set max read request to 4KB*/
481	if (pci_find_cap(dev, PCIY_EXPRESS, &reg) == 0) {
482		uint16_t lnk;
483
484		lnk = pci_read_config(dev, reg + PCIER_LINK_STA, 2);
485		sc->link_width = (lnk & PCIEM_LINK_STA_WIDTH) >> 4;
486		if (sc->link_width < 8 &&
487		    (ai->caps & SUPPORTED_10000baseT_Full)) {
488			device_printf(sc->dev,
489			    "PCIe x%d Link, expect reduced performance\n",
490			    sc->link_width);
491		}
492
493		pci_set_max_read_req(dev, 4096);
494	}
495
496	touch_bars(dev);
497	pci_enable_busmaster(dev);
498	/*
499	 * Allocate the registers and make them available to the driver.
500	 * The registers that we care about for NIC mode are in BAR 0
501	 */
502	sc->regs_rid = PCIR_BAR(0);
503	if ((sc->regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
504	    &sc->regs_rid, RF_ACTIVE)) == NULL) {
505		device_printf(dev, "Cannot allocate BAR region 0\n");
506		error = ENXIO;
507		goto out;
508	}
509
510	sc->bt = rman_get_bustag(sc->regs_res);
511	sc->bh = rman_get_bushandle(sc->regs_res);
512	sc->mmio_len = rman_get_size(sc->regs_res);
513
514	for (i = 0; i < MAX_NPORTS; i++)
515		sc->port[i].adapter = sc;
516
517	if (t3_prep_adapter(sc, ai, 1) < 0) {
518		printf("prep adapter failed\n");
519		error = ENODEV;
520		goto out;
521	}
522
523	sc->udbs_rid = PCIR_BAR(2);
524	sc->udbs_res = NULL;
525	if (is_offload(sc) &&
526	    ((sc->udbs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
527		   &sc->udbs_rid, RF_ACTIVE)) == NULL)) {
528		device_printf(dev, "Cannot allocate BAR region 1\n");
529		error = ENXIO;
530		goto out;
531	}
532
533        /* Allocate the BAR for doing MSI-X.  If it succeeds, try to allocate
534	 * enough messages for the queue sets.  If that fails, try falling
535	 * back to MSI.  If that fails, then try falling back to the legacy
536	 * interrupt pin model.
537	 */
538	sc->msix_regs_rid = 0x20;
539	if ((msi_allowed >= 2) &&
540	    (sc->msix_regs_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
541	    &sc->msix_regs_rid, RF_ACTIVE)) != NULL) {
542
543		if (multiq)
544			port_qsets = min(SGE_QSETS/sc->params.nports, mp_ncpus);
545		msi_needed = sc->msi_count = sc->params.nports * port_qsets + 1;
546
547		if (pci_msix_count(dev) == 0 ||
548		    (error = pci_alloc_msix(dev, &sc->msi_count)) != 0 ||
549		    sc->msi_count != msi_needed) {
550			device_printf(dev, "alloc msix failed - "
551				      "msi_count=%d, msi_needed=%d, err=%d; "
552				      "will try MSI\n", sc->msi_count,
553				      msi_needed, error);
554			sc->msi_count = 0;
555			port_qsets = 1;
556			pci_release_msi(dev);
557			bus_release_resource(dev, SYS_RES_MEMORY,
558			    sc->msix_regs_rid, sc->msix_regs_res);
559			sc->msix_regs_res = NULL;
560		} else {
561			sc->flags |= USING_MSIX;
562			sc->cxgb_intr = cxgb_async_intr;
563			device_printf(dev,
564				      "using MSI-X interrupts (%u vectors)\n",
565				      sc->msi_count);
566		}
567	}
568
569	if ((msi_allowed >= 1) && (sc->msi_count == 0)) {
570		sc->msi_count = 1;
571		if ((error = pci_alloc_msi(dev, &sc->msi_count)) != 0) {
572			device_printf(dev, "alloc msi failed - "
573				      "err=%d; will try INTx\n", error);
574			sc->msi_count = 0;
575			port_qsets = 1;
576			pci_release_msi(dev);
577		} else {
578			sc->flags |= USING_MSI;
579			sc->cxgb_intr = t3_intr_msi;
580			device_printf(dev, "using MSI interrupts\n");
581		}
582	}
583	if (sc->msi_count == 0) {
584		device_printf(dev, "using line interrupts\n");
585		sc->cxgb_intr = t3b_intr;
586	}
587
588	/* Create a private taskqueue thread for handling driver events */
589	sc->tq = taskqueue_create("cxgb_taskq", M_NOWAIT,
590	    taskqueue_thread_enqueue, &sc->tq);
591	if (sc->tq == NULL) {
592		device_printf(dev, "failed to allocate controller task queue\n");
593		goto out;
594	}
595
596	taskqueue_start_threads(&sc->tq, 1, PI_NET, "%s taskq",
597	    device_get_nameunit(dev));
598	TASK_INIT(&sc->tick_task, 0, cxgb_tick_handler, sc);
599
600
601	/* Create a periodic callout for checking adapter status */
602	callout_init(&sc->cxgb_tick_ch, 1);
603
604	if (t3_check_fw_version(sc) < 0 || force_fw_update) {
605		/*
606		 * Warn user that a firmware update will be attempted in init.
607		 */
608		device_printf(dev, "firmware needs to be updated to version %d.%d.%d\n",
609		    FW_VERSION_MAJOR, FW_VERSION_MINOR, FW_VERSION_MICRO);
610		sc->flags &= ~FW_UPTODATE;
611	} else {
612		sc->flags |= FW_UPTODATE;
613	}
614
615	if (t3_check_tpsram_version(sc) < 0) {
616		/*
617		 * Warn user that a firmware update will be attempted in init.
618		 */
619		device_printf(dev, "SRAM needs to be updated to version %c-%d.%d.%d\n",
620		    t3rev2char(sc), TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
621		sc->flags &= ~TPS_UPTODATE;
622	} else {
623		sc->flags |= TPS_UPTODATE;
624	}
625
626	/*
627	 * Create a child device for each MAC.  The ethernet attachment
628	 * will be done in these children.
629	 */
630	for (i = 0; i < (sc)->params.nports; i++) {
631		struct port_info *pi;
632
633		if ((child = device_add_child(dev, "cxgb", -1)) == NULL) {
634			device_printf(dev, "failed to add child port\n");
635			error = EINVAL;
636			goto out;
637		}
638		pi = &sc->port[i];
639		pi->adapter = sc;
640		pi->nqsets = port_qsets;
641		pi->first_qset = i*port_qsets;
642		pi->port_id = i;
643		pi->tx_chan = i >= ai->nports0;
644		pi->txpkt_intf = pi->tx_chan ? 2 * (i - ai->nports0) + 1 : 2 * i;
645		sc->rxpkt_map[pi->txpkt_intf] = i;
646		sc->port[i].tx_chan = i >= ai->nports0;
647		sc->portdev[i] = child;
648		device_set_softc(child, pi);
649	}
650	if ((error = bus_generic_attach(dev)) != 0)
651		goto out;
652
653	/* initialize sge private state */
654	t3_sge_init_adapter(sc);
655
656	t3_led_ready(sc);
657
658	error = t3_get_fw_version(sc, &vers);
659	if (error)
660		goto out;
661
662	snprintf(&sc->fw_version[0], sizeof(sc->fw_version), "%d.%d.%d",
663	    G_FW_VERSION_MAJOR(vers), G_FW_VERSION_MINOR(vers),
664	    G_FW_VERSION_MICRO(vers));
665
666	snprintf(buf, sizeof(buf), "%s %sNIC\t E/C: %s S/N: %s",
667		 ai->desc, is_offload(sc) ? "R" : "",
668		 sc->params.vpd.ec, sc->params.vpd.sn);
669	device_set_desc_copy(dev, buf);
670
671	snprintf(&sc->port_types[0], sizeof(sc->port_types), "%x%x%x%x",
672		 sc->params.vpd.port_type[0], sc->params.vpd.port_type[1],
673		 sc->params.vpd.port_type[2], sc->params.vpd.port_type[3]);
674
675	device_printf(sc->dev, "Firmware Version %s\n", &sc->fw_version[0]);
676	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
677	t3_add_attach_sysctls(sc);
678
679#ifdef TCP_OFFLOAD
680	for (i = 0; i < NUM_CPL_HANDLERS; i++)
681		sc->cpl_handler[i] = cpl_not_handled;
682#endif
683
684	t3_intr_clear(sc);
685	error = cxgb_setup_interrupts(sc);
686out:
687	if (error)
688		cxgb_free(sc);
689
690	return (error);
691}
692
693/*
694 * The cxgb_controller_detach routine is called with the device is
695 * unloaded from the system.
696 */
697
698static int
699cxgb_controller_detach(device_t dev)
700{
701	struct adapter *sc;
702
703	sc = device_get_softc(dev);
704
705	cxgb_free(sc);
706
707	return (0);
708}
709
710/*
711 * The cxgb_free() is called by the cxgb_controller_detach() routine
712 * to tear down the structures that were built up in
713 * cxgb_controller_attach(), and should be the final piece of work
714 * done when fully unloading the driver.
715 *
716 *
717 *  1. Shutting down the threads started by the cxgb_controller_attach()
718 *     routine.
719 *  2. Stopping the lower level device and all callouts (cxgb_down_locked()).
720 *  3. Detaching all of the port devices created during the
721 *     cxgb_controller_attach() routine.
722 *  4. Removing the device children created via cxgb_controller_attach().
723 *  5. Releasing PCI resources associated with the device.
724 *  6. Turning off the offload support, iff it was turned on.
725 *  7. Destroying the mutexes created in cxgb_controller_attach().
726 *
727 */
728static void
729cxgb_free(struct adapter *sc)
730{
731	int i, nqsets = 0;
732
733	ADAPTER_LOCK(sc);
734	sc->flags |= CXGB_SHUTDOWN;
735	ADAPTER_UNLOCK(sc);
736
737	/*
738	 * Make sure all child devices are gone.
739	 */
740	bus_generic_detach(sc->dev);
741	for (i = 0; i < (sc)->params.nports; i++) {
742		if (sc->portdev[i] &&
743		    device_delete_child(sc->dev, sc->portdev[i]) != 0)
744			device_printf(sc->dev, "failed to delete child port\n");
745		nqsets += sc->port[i].nqsets;
746	}
747
748	/*
749	 * At this point, it is as if cxgb_port_detach has run on all ports, and
750	 * cxgb_down has run on the adapter.  All interrupts have been silenced,
751	 * all open devices have been closed.
752	 */
753	KASSERT(sc->open_device_map == 0, ("%s: device(s) still open (%x)",
754					   __func__, sc->open_device_map));
755	for (i = 0; i < sc->params.nports; i++) {
756		KASSERT(sc->port[i].ifp == NULL, ("%s: port %i undead!",
757						  __func__, i));
758	}
759
760	/*
761	 * Finish off the adapter's callouts.
762	 */
763	callout_drain(&sc->cxgb_tick_ch);
764	callout_drain(&sc->sge_timer_ch);
765
766	/*
767	 * Release resources grabbed under FULL_INIT_DONE by cxgb_up.  The
768	 * sysctls are cleaned up by the kernel linker.
769	 */
770	if (sc->flags & FULL_INIT_DONE) {
771 		t3_free_sge_resources(sc, nqsets);
772 		sc->flags &= ~FULL_INIT_DONE;
773 	}
774
775	/*
776	 * Release all interrupt resources.
777	 */
778	cxgb_teardown_interrupts(sc);
779	if (sc->flags & (USING_MSI | USING_MSIX)) {
780		device_printf(sc->dev, "releasing msi message(s)\n");
781		pci_release_msi(sc->dev);
782	} else {
783		device_printf(sc->dev, "no msi message to release\n");
784	}
785
786	if (sc->msix_regs_res != NULL) {
787		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->msix_regs_rid,
788		    sc->msix_regs_res);
789	}
790
791	/*
792	 * Free the adapter's taskqueue.
793	 */
794	if (sc->tq != NULL) {
795		taskqueue_free(sc->tq);
796		sc->tq = NULL;
797	}
798
799	free(sc->filters, M_DEVBUF);
800	t3_sge_free(sc);
801
802	if (sc->udbs_res != NULL)
803		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->udbs_rid,
804		    sc->udbs_res);
805
806	if (sc->regs_res != NULL)
807		bus_release_resource(sc->dev, SYS_RES_MEMORY, sc->regs_rid,
808		    sc->regs_res);
809
810	MTX_DESTROY(&sc->mdio_lock);
811	MTX_DESTROY(&sc->sge.reg_lock);
812	MTX_DESTROY(&sc->elmer_lock);
813	mtx_lock(&t3_list_lock);
814	SLIST_REMOVE(&t3_list, sc, adapter, link);
815	mtx_unlock(&t3_list_lock);
816	ADAPTER_LOCK_DEINIT(sc);
817}
818
819/**
820 *	setup_sge_qsets - configure SGE Tx/Rx/response queues
821 *	@sc: the controller softc
822 *
823 *	Determines how many sets of SGE queues to use and initializes them.
824 *	We support multiple queue sets per port if we have MSI-X, otherwise
825 *	just one queue set per port.
826 */
827static int
828setup_sge_qsets(adapter_t *sc)
829{
830	int i, j, err, irq_idx = 0, qset_idx = 0;
831	u_int ntxq = SGE_TXQ_PER_SET;
832
833	if ((err = t3_sge_alloc(sc)) != 0) {
834		device_printf(sc->dev, "t3_sge_alloc returned %d\n", err);
835		return (err);
836	}
837
838	if (sc->params.rev > 0 && !(sc->flags & USING_MSI))
839		irq_idx = -1;
840
841	for (i = 0; i < (sc)->params.nports; i++) {
842		struct port_info *pi = &sc->port[i];
843
844		for (j = 0; j < pi->nqsets; j++, qset_idx++) {
845			err = t3_sge_alloc_qset(sc, qset_idx, (sc)->params.nports,
846			    (sc->flags & USING_MSIX) ? qset_idx + 1 : irq_idx,
847			    &sc->params.sge.qset[qset_idx], ntxq, pi);
848			if (err) {
849				t3_free_sge_resources(sc, qset_idx);
850				device_printf(sc->dev,
851				    "t3_sge_alloc_qset failed with %d\n", err);
852				return (err);
853			}
854		}
855	}
856
857	sc->nqsets = qset_idx;
858
859	return (0);
860}
861
862static void
863cxgb_teardown_interrupts(adapter_t *sc)
864{
865	int i;
866
867	for (i = 0; i < SGE_QSETS; i++) {
868		if (sc->msix_intr_tag[i] == NULL) {
869
870			/* Should have been setup fully or not at all */
871			KASSERT(sc->msix_irq_res[i] == NULL &&
872				sc->msix_irq_rid[i] == 0,
873				("%s: half-done interrupt (%d).", __func__, i));
874
875			continue;
876		}
877
878		bus_teardown_intr(sc->dev, sc->msix_irq_res[i],
879				  sc->msix_intr_tag[i]);
880		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->msix_irq_rid[i],
881				     sc->msix_irq_res[i]);
882
883		sc->msix_irq_res[i] = sc->msix_intr_tag[i] = NULL;
884		sc->msix_irq_rid[i] = 0;
885	}
886
887	if (sc->intr_tag) {
888		KASSERT(sc->irq_res != NULL,
889			("%s: half-done interrupt.", __func__));
890
891		bus_teardown_intr(sc->dev, sc->irq_res, sc->intr_tag);
892		bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
893				     sc->irq_res);
894
895		sc->irq_res = sc->intr_tag = NULL;
896		sc->irq_rid = 0;
897	}
898}
899
900static int
901cxgb_setup_interrupts(adapter_t *sc)
902{
903	struct resource *res;
904	void *tag;
905	int i, rid, err, intr_flag = sc->flags & (USING_MSI | USING_MSIX);
906
907	sc->irq_rid = intr_flag ? 1 : 0;
908	sc->irq_res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &sc->irq_rid,
909					     RF_SHAREABLE | RF_ACTIVE);
910	if (sc->irq_res == NULL) {
911		device_printf(sc->dev, "Cannot allocate interrupt (%x, %u)\n",
912			      intr_flag, sc->irq_rid);
913		err = EINVAL;
914		sc->irq_rid = 0;
915	} else {
916		err = bus_setup_intr(sc->dev, sc->irq_res,
917		    INTR_MPSAFE | INTR_TYPE_NET, NULL,
918		    sc->cxgb_intr, sc, &sc->intr_tag);
919
920		if (err) {
921			device_printf(sc->dev,
922				      "Cannot set up interrupt (%x, %u, %d)\n",
923				      intr_flag, sc->irq_rid, err);
924			bus_release_resource(sc->dev, SYS_RES_IRQ, sc->irq_rid,
925					     sc->irq_res);
926			sc->irq_res = sc->intr_tag = NULL;
927			sc->irq_rid = 0;
928		}
929	}
930
931	/* That's all for INTx or MSI */
932	if (!(intr_flag & USING_MSIX) || err)
933		return (err);
934
935	bus_describe_intr(sc->dev, sc->irq_res, sc->intr_tag, "err");
936	for (i = 0; i < sc->msi_count - 1; i++) {
937		rid = i + 2;
938		res = bus_alloc_resource_any(sc->dev, SYS_RES_IRQ, &rid,
939					     RF_SHAREABLE | RF_ACTIVE);
940		if (res == NULL) {
941			device_printf(sc->dev, "Cannot allocate interrupt "
942				      "for message %d\n", rid);
943			err = EINVAL;
944			break;
945		}
946
947		err = bus_setup_intr(sc->dev, res, INTR_MPSAFE | INTR_TYPE_NET,
948				     NULL, t3_intr_msix, &sc->sge.qs[i], &tag);
949		if (err) {
950			device_printf(sc->dev, "Cannot set up interrupt "
951				      "for message %d (%d)\n", rid, err);
952			bus_release_resource(sc->dev, SYS_RES_IRQ, rid, res);
953			break;
954		}
955
956		sc->msix_irq_rid[i] = rid;
957		sc->msix_irq_res[i] = res;
958		sc->msix_intr_tag[i] = tag;
959		bus_describe_intr(sc->dev, res, tag, "qs%d", i);
960	}
961
962	if (err)
963		cxgb_teardown_interrupts(sc);
964
965	return (err);
966}
967
968
969static int
970cxgb_port_probe(device_t dev)
971{
972	struct port_info *p;
973	char buf[80];
974	const char *desc;
975
976	p = device_get_softc(dev);
977	desc = p->phy.desc;
978	snprintf(buf, sizeof(buf), "Port %d %s", p->port_id, desc);
979	device_set_desc_copy(dev, buf);
980	return (0);
981}
982
983
984static int
985cxgb_makedev(struct port_info *pi)
986{
987
988	pi->port_cdev = make_dev(&cxgb_cdevsw, pi->ifp->if_dunit,
989	    UID_ROOT, GID_WHEEL, 0600, "%s", if_name(pi->ifp));
990
991	if (pi->port_cdev == NULL)
992		return (ENOMEM);
993
994	pi->port_cdev->si_drv1 = (void *)pi;
995
996	return (0);
997}
998
999#define CXGB_CAP (IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_MTU | IFCAP_HWCSUM | \
1000    IFCAP_VLAN_HWCSUM | IFCAP_TSO | IFCAP_JUMBO_MTU | IFCAP_LRO | \
1001    IFCAP_VLAN_HWTSO | IFCAP_LINKSTATE | IFCAP_HWCSUM_IPV6)
1002#define CXGB_CAP_ENABLE CXGB_CAP
1003
1004static int
1005cxgb_port_attach(device_t dev)
1006{
1007	struct port_info *p;
1008	struct ifnet *ifp;
1009	int err;
1010	struct adapter *sc;
1011
1012	p = device_get_softc(dev);
1013	sc = p->adapter;
1014	snprintf(p->lockbuf, PORT_NAME_LEN, "cxgb port lock %d:%d",
1015	    device_get_unit(device_get_parent(dev)), p->port_id);
1016	PORT_LOCK_INIT(p, p->lockbuf);
1017
1018	callout_init(&p->link_check_ch, 1);
1019	TASK_INIT(&p->link_check_task, 0, check_link_status, p);
1020
1021	/* Allocate an ifnet object and set it up */
1022	ifp = p->ifp = if_alloc(IFT_ETHER);
1023	if (ifp == NULL) {
1024		device_printf(dev, "Cannot allocate ifnet\n");
1025		return (ENOMEM);
1026	}
1027
1028	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1029	ifp->if_init = cxgb_init;
1030	ifp->if_softc = p;
1031	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1032	ifp->if_ioctl = cxgb_ioctl;
1033	ifp->if_transmit = cxgb_transmit;
1034	ifp->if_qflush = cxgb_qflush;
1035	ifp->if_get_counter = cxgb_get_counter;
1036
1037	ifp->if_capabilities = CXGB_CAP;
1038#ifdef TCP_OFFLOAD
1039	if (is_offload(sc))
1040		ifp->if_capabilities |= IFCAP_TOE4;
1041#endif
1042	ifp->if_capenable = CXGB_CAP_ENABLE;
1043	ifp->if_hwassist = CSUM_TCP | CSUM_UDP | CSUM_IP | CSUM_TSO |
1044	    CSUM_UDP_IPV6 | CSUM_TCP_IPV6;
1045
1046	/*
1047	 * Disable TSO on 4-port - it isn't supported by the firmware.
1048	 */
1049	if (sc->params.nports > 2) {
1050		ifp->if_capabilities &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1051		ifp->if_capenable &= ~(IFCAP_TSO | IFCAP_VLAN_HWTSO);
1052		ifp->if_hwassist &= ~CSUM_TSO;
1053	}
1054
1055	ether_ifattach(ifp, p->hw_addr);
1056
1057	/* Attach driver debugnet methods. */
1058	DEBUGNET_SET(ifp, cxgb);
1059
1060#ifdef DEFAULT_JUMBO
1061	if (sc->params.nports <= 2)
1062		ifp->if_mtu = ETHERMTU_JUMBO;
1063#endif
1064	if ((err = cxgb_makedev(p)) != 0) {
1065		printf("makedev failed %d\n", err);
1066		return (err);
1067	}
1068
1069	/* Create a list of media supported by this port */
1070	ifmedia_init(&p->media, IFM_IMASK, cxgb_media_change,
1071	    cxgb_media_status);
1072	cxgb_build_medialist(p);
1073
1074	t3_sge_init_port(p);
1075
1076	return (err);
1077}
1078
1079/*
1080 * cxgb_port_detach() is called via the device_detach methods when
1081 * cxgb_free() calls the bus_generic_detach.  It is responsible for
1082 * removing the device from the view of the kernel, i.e. from all
1083 * interfaces lists etc.  This routine is only called when the driver is
1084 * being unloaded, not when the link goes down.
1085 */
1086static int
1087cxgb_port_detach(device_t dev)
1088{
1089	struct port_info *p;
1090	struct adapter *sc;
1091	int i;
1092
1093	p = device_get_softc(dev);
1094	sc = p->adapter;
1095
1096	/* Tell cxgb_ioctl and if_init that the port is going away */
1097	ADAPTER_LOCK(sc);
1098	SET_DOOMED(p);
1099	wakeup(&sc->flags);
1100	while (IS_BUSY(sc))
1101		mtx_sleep(&sc->flags, &sc->lock, 0, "cxgbdtch", 0);
1102	SET_BUSY(sc);
1103	ADAPTER_UNLOCK(sc);
1104
1105	if (p->port_cdev != NULL)
1106		destroy_dev(p->port_cdev);
1107
1108	cxgb_uninit_synchronized(p);
1109	ether_ifdetach(p->ifp);
1110
1111	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1112		struct sge_qset *qs = &sc->sge.qs[i];
1113		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1114
1115		callout_drain(&txq->txq_watchdog);
1116		callout_drain(&txq->txq_timer);
1117	}
1118
1119	PORT_LOCK_DEINIT(p);
1120	if_free(p->ifp);
1121	p->ifp = NULL;
1122
1123	ADAPTER_LOCK(sc);
1124	CLR_BUSY(sc);
1125	wakeup_one(&sc->flags);
1126	ADAPTER_UNLOCK(sc);
1127	return (0);
1128}
1129
1130void
1131t3_fatal_err(struct adapter *sc)
1132{
1133	u_int fw_status[4];
1134
1135	if (sc->flags & FULL_INIT_DONE) {
1136		t3_sge_stop(sc);
1137		t3_write_reg(sc, A_XGM_TX_CTRL, 0);
1138		t3_write_reg(sc, A_XGM_RX_CTRL, 0);
1139		t3_write_reg(sc, XGM_REG(A_XGM_TX_CTRL, 1), 0);
1140		t3_write_reg(sc, XGM_REG(A_XGM_RX_CTRL, 1), 0);
1141		t3_intr_disable(sc);
1142	}
1143	device_printf(sc->dev,"encountered fatal error, operation suspended\n");
1144	if (!t3_cim_ctl_blk_read(sc, 0xa0, 4, fw_status))
1145		device_printf(sc->dev, "FW_ status: 0x%x, 0x%x, 0x%x, 0x%x\n",
1146		    fw_status[0], fw_status[1], fw_status[2], fw_status[3]);
1147}
1148
1149int
1150t3_os_find_pci_capability(adapter_t *sc, int cap)
1151{
1152	device_t dev;
1153	struct pci_devinfo *dinfo;
1154	pcicfgregs *cfg;
1155	uint32_t status;
1156	uint8_t ptr;
1157
1158	dev = sc->dev;
1159	dinfo = device_get_ivars(dev);
1160	cfg = &dinfo->cfg;
1161
1162	status = pci_read_config(dev, PCIR_STATUS, 2);
1163	if (!(status & PCIM_STATUS_CAPPRESENT))
1164		return (0);
1165
1166	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1167	case 0:
1168	case 1:
1169		ptr = PCIR_CAP_PTR;
1170		break;
1171	case 2:
1172		ptr = PCIR_CAP_PTR_2;
1173		break;
1174	default:
1175		return (0);
1176		break;
1177	}
1178	ptr = pci_read_config(dev, ptr, 1);
1179
1180	while (ptr != 0) {
1181		if (pci_read_config(dev, ptr + PCICAP_ID, 1) == cap)
1182			return (ptr);
1183		ptr = pci_read_config(dev, ptr + PCICAP_NEXTPTR, 1);
1184	}
1185
1186	return (0);
1187}
1188
1189int
1190t3_os_pci_save_state(struct adapter *sc)
1191{
1192	device_t dev;
1193	struct pci_devinfo *dinfo;
1194
1195	dev = sc->dev;
1196	dinfo = device_get_ivars(dev);
1197
1198	pci_cfg_save(dev, dinfo, 0);
1199	return (0);
1200}
1201
1202int
1203t3_os_pci_restore_state(struct adapter *sc)
1204{
1205	device_t dev;
1206	struct pci_devinfo *dinfo;
1207
1208	dev = sc->dev;
1209	dinfo = device_get_ivars(dev);
1210
1211	pci_cfg_restore(dev, dinfo);
1212	return (0);
1213}
1214
1215/**
1216 *	t3_os_link_changed - handle link status changes
1217 *	@sc: the adapter associated with the link change
1218 *	@port_id: the port index whose link status has changed
1219 *	@link_status: the new status of the link
1220 *	@speed: the new speed setting
1221 *	@duplex: the new duplex setting
1222 *	@fc: the new flow-control setting
1223 *
1224 *	This is the OS-dependent handler for link status changes.  The OS
1225 *	neutral handler takes care of most of the processing for these events,
1226 *	then calls this handler for any OS-specific processing.
1227 */
1228void
1229t3_os_link_changed(adapter_t *adapter, int port_id, int link_status, int speed,
1230     int duplex, int fc, int mac_was_reset)
1231{
1232	struct port_info *pi = &adapter->port[port_id];
1233	struct ifnet *ifp = pi->ifp;
1234
1235	/* no race with detach, so ifp should always be good */
1236	KASSERT(ifp, ("%s: if detached.", __func__));
1237
1238	/* Reapply mac settings if they were lost due to a reset */
1239	if (mac_was_reset) {
1240		PORT_LOCK(pi);
1241		cxgb_update_mac_settings(pi);
1242		PORT_UNLOCK(pi);
1243	}
1244
1245	if (link_status) {
1246		ifp->if_baudrate = IF_Mbps(speed);
1247		if_link_state_change(ifp, LINK_STATE_UP);
1248	} else
1249		if_link_state_change(ifp, LINK_STATE_DOWN);
1250}
1251
1252/**
1253 *	t3_os_phymod_changed - handle PHY module changes
1254 *	@phy: the PHY reporting the module change
1255 *	@mod_type: new module type
1256 *
1257 *	This is the OS-dependent handler for PHY module changes.  It is
1258 *	invoked when a PHY module is removed or inserted for any OS-specific
1259 *	processing.
1260 */
1261void t3_os_phymod_changed(struct adapter *adap, int port_id)
1262{
1263	static const char *mod_str[] = {
1264		NULL, "SR", "LR", "LRM", "TWINAX", "TWINAX-L", "unknown"
1265	};
1266	struct port_info *pi = &adap->port[port_id];
1267	int mod = pi->phy.modtype;
1268
1269	if (mod != pi->media.ifm_cur->ifm_data)
1270		cxgb_build_medialist(pi);
1271
1272	if (mod == phy_modtype_none)
1273		if_printf(pi->ifp, "PHY module unplugged\n");
1274	else {
1275		KASSERT(mod < ARRAY_SIZE(mod_str),
1276			("invalid PHY module type %d", mod));
1277		if_printf(pi->ifp, "%s PHY module inserted\n", mod_str[mod]);
1278	}
1279}
1280
1281void
1282t3_os_set_hw_addr(adapter_t *adapter, int port_idx, u8 hw_addr[])
1283{
1284
1285	/*
1286	 * The ifnet might not be allocated before this gets called,
1287	 * as this is called early on in attach by t3_prep_adapter
1288	 * save the address off in the port structure
1289	 */
1290	if (cxgb_debug)
1291		printf("set_hw_addr on idx %d addr %6D\n", port_idx, hw_addr, ":");
1292	bcopy(hw_addr, adapter->port[port_idx].hw_addr, ETHER_ADDR_LEN);
1293}
1294
1295/*
1296 * Programs the XGMAC based on the settings in the ifnet.  These settings
1297 * include MTU, MAC address, mcast addresses, etc.
1298 */
1299static void
1300cxgb_update_mac_settings(struct port_info *p)
1301{
1302	struct ifnet *ifp = p->ifp;
1303	struct t3_rx_mode rm;
1304	struct cmac *mac = &p->mac;
1305	int mtu, hwtagging;
1306
1307	PORT_LOCK_ASSERT_OWNED(p);
1308
1309	bcopy(IF_LLADDR(ifp), p->hw_addr, ETHER_ADDR_LEN);
1310
1311	mtu = ifp->if_mtu;
1312	if (ifp->if_capenable & IFCAP_VLAN_MTU)
1313		mtu += ETHER_VLAN_ENCAP_LEN;
1314
1315	hwtagging = (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) != 0;
1316
1317	t3_mac_set_mtu(mac, mtu);
1318	t3_set_vlan_accel(p->adapter, 1 << p->tx_chan, hwtagging);
1319	t3_mac_set_address(mac, 0, p->hw_addr);
1320	t3_init_rx_mode(&rm, p);
1321	t3_mac_set_rx_mode(mac, &rm);
1322}
1323
1324
1325static int
1326await_mgmt_replies(struct adapter *adap, unsigned long init_cnt,
1327			      unsigned long n)
1328{
1329	int attempts = 5;
1330
1331	while (adap->sge.qs[0].rspq.offload_pkts < init_cnt + n) {
1332		if (!--attempts)
1333			return (ETIMEDOUT);
1334		t3_os_sleep(10);
1335	}
1336	return 0;
1337}
1338
1339static int
1340init_tp_parity(struct adapter *adap)
1341{
1342	int i;
1343	struct mbuf *m;
1344	struct cpl_set_tcb_field *greq;
1345	unsigned long cnt = adap->sge.qs[0].rspq.offload_pkts;
1346
1347	t3_tp_set_offload_mode(adap, 1);
1348
1349	for (i = 0; i < 16; i++) {
1350		struct cpl_smt_write_req *req;
1351
1352		m = m_gethdr(M_WAITOK, MT_DATA);
1353		req = mtod(m, struct cpl_smt_write_req *);
1354		m->m_len = m->m_pkthdr.len = sizeof(*req);
1355		memset(req, 0, sizeof(*req));
1356		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1357		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SMT_WRITE_REQ, i));
1358		req->iff = i;
1359		t3_mgmt_tx(adap, m);
1360	}
1361
1362	for (i = 0; i < 2048; i++) {
1363		struct cpl_l2t_write_req *req;
1364
1365		m = m_gethdr(M_WAITOK, MT_DATA);
1366		req = mtod(m, struct cpl_l2t_write_req *);
1367		m->m_len = m->m_pkthdr.len = sizeof(*req);
1368		memset(req, 0, sizeof(*req));
1369		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1370		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_L2T_WRITE_REQ, i));
1371		req->params = htonl(V_L2T_W_IDX(i));
1372		t3_mgmt_tx(adap, m);
1373	}
1374
1375	for (i = 0; i < 2048; i++) {
1376		struct cpl_rte_write_req *req;
1377
1378		m = m_gethdr(M_WAITOK, MT_DATA);
1379		req = mtod(m, struct cpl_rte_write_req *);
1380		m->m_len = m->m_pkthdr.len = sizeof(*req);
1381		memset(req, 0, sizeof(*req));
1382		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1383		OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RTE_WRITE_REQ, i));
1384		req->l2t_idx = htonl(V_L2T_W_IDX(i));
1385		t3_mgmt_tx(adap, m);
1386	}
1387
1388	m = m_gethdr(M_WAITOK, MT_DATA);
1389	greq = mtod(m, struct cpl_set_tcb_field *);
1390	m->m_len = m->m_pkthdr.len = sizeof(*greq);
1391	memset(greq, 0, sizeof(*greq));
1392	greq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
1393	OPCODE_TID(greq) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, 0));
1394	greq->mask = htobe64(1);
1395	t3_mgmt_tx(adap, m);
1396
1397	i = await_mgmt_replies(adap, cnt, 16 + 2048 + 2048 + 1);
1398	t3_tp_set_offload_mode(adap, 0);
1399	return (i);
1400}
1401
1402/**
1403 *	setup_rss - configure Receive Side Steering (per-queue connection demux)
1404 *	@adap: the adapter
1405 *
1406 *	Sets up RSS to distribute packets to multiple receive queues.  We
1407 *	configure the RSS CPU lookup table to distribute to the number of HW
1408 *	receive queues, and the response queue lookup table to narrow that
1409 *	down to the response queues actually configured for each port.
1410 *	We always configure the RSS mapping for two ports since the mapping
1411 *	table has plenty of entries.
1412 */
1413static void
1414setup_rss(adapter_t *adap)
1415{
1416	int i;
1417	u_int nq[2];
1418	uint8_t cpus[SGE_QSETS + 1];
1419	uint16_t rspq_map[RSS_TABLE_SIZE];
1420
1421	for (i = 0; i < SGE_QSETS; ++i)
1422		cpus[i] = i;
1423	cpus[SGE_QSETS] = 0xff;
1424
1425	nq[0] = nq[1] = 0;
1426	for_each_port(adap, i) {
1427		const struct port_info *pi = adap2pinfo(adap, i);
1428
1429		nq[pi->tx_chan] += pi->nqsets;
1430	}
1431	for (i = 0; i < RSS_TABLE_SIZE / 2; ++i) {
1432		rspq_map[i] = nq[0] ? i % nq[0] : 0;
1433		rspq_map[i + RSS_TABLE_SIZE / 2] = nq[1] ? i % nq[1] + nq[0] : 0;
1434	}
1435
1436	/* Calculate the reverse RSS map table */
1437	for (i = 0; i < SGE_QSETS; ++i)
1438		adap->rrss_map[i] = 0xff;
1439	for (i = 0; i < RSS_TABLE_SIZE; ++i)
1440		if (adap->rrss_map[rspq_map[i]] == 0xff)
1441			adap->rrss_map[rspq_map[i]] = i;
1442
1443	t3_config_rss(adap, F_RQFEEDBACKENABLE | F_TNLLKPEN | F_TNLMAPEN |
1444		      F_TNLPRTEN | F_TNL2TUPEN | F_TNL4TUPEN | F_OFDMAPEN |
1445	              F_RRCPLMAPEN | V_RRCPLCPUSIZE(6) | F_HASHTOEPLITZ,
1446	              cpus, rspq_map);
1447
1448}
1449static void
1450send_pktsched_cmd(struct adapter *adap, int sched, int qidx, int lo,
1451			      int hi, int port)
1452{
1453	struct mbuf *m;
1454	struct mngt_pktsched_wr *req;
1455
1456	m = m_gethdr(M_NOWAIT, MT_DATA);
1457	if (m) {
1458		req = mtod(m, struct mngt_pktsched_wr *);
1459		req->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_MNGT));
1460		req->mngt_opcode = FW_MNGTOPCODE_PKTSCHED_SET;
1461		req->sched = sched;
1462		req->idx = qidx;
1463		req->min = lo;
1464		req->max = hi;
1465		req->binding = port;
1466		m->m_len = m->m_pkthdr.len = sizeof(*req);
1467		t3_mgmt_tx(adap, m);
1468	}
1469}
1470
1471static void
1472bind_qsets(adapter_t *sc)
1473{
1474	int i, j;
1475
1476	for (i = 0; i < (sc)->params.nports; ++i) {
1477		const struct port_info *pi = adap2pinfo(sc, i);
1478
1479		for (j = 0; j < pi->nqsets; ++j) {
1480			send_pktsched_cmd(sc, 1, pi->first_qset + j, -1,
1481					  -1, pi->tx_chan);
1482
1483		}
1484	}
1485}
1486
1487static void
1488update_tpeeprom(struct adapter *adap)
1489{
1490	const struct firmware *tpeeprom;
1491
1492	uint32_t version;
1493	unsigned int major, minor;
1494	int ret, len;
1495	char rev, name[32];
1496
1497	t3_seeprom_read(adap, TP_SRAM_OFFSET, &version);
1498
1499	major = G_TP_VERSION_MAJOR(version);
1500	minor = G_TP_VERSION_MINOR(version);
1501	if (major == TP_VERSION_MAJOR  && minor == TP_VERSION_MINOR)
1502		return;
1503
1504	rev = t3rev2char(adap);
1505	snprintf(name, sizeof(name), TPEEPROM_NAME, rev);
1506
1507	tpeeprom = firmware_get(name);
1508	if (tpeeprom == NULL) {
1509		device_printf(adap->dev,
1510			      "could not load TP EEPROM: unable to load %s\n",
1511			      name);
1512		return;
1513	}
1514
1515	len = tpeeprom->datasize - 4;
1516
1517	ret = t3_check_tpsram(adap, tpeeprom->data, tpeeprom->datasize);
1518	if (ret)
1519		goto release_tpeeprom;
1520
1521	if (len != TP_SRAM_LEN) {
1522		device_printf(adap->dev,
1523			      "%s length is wrong len=%d expected=%d\n", name,
1524			      len, TP_SRAM_LEN);
1525		return;
1526	}
1527
1528	ret = set_eeprom(&adap->port[0], tpeeprom->data, tpeeprom->datasize,
1529	    TP_SRAM_OFFSET);
1530
1531	if (!ret) {
1532		device_printf(adap->dev,
1533			"Protocol SRAM image updated in EEPROM to %d.%d.%d\n",
1534			 TP_VERSION_MAJOR, TP_VERSION_MINOR, TP_VERSION_MICRO);
1535	} else
1536		device_printf(adap->dev,
1537			      "Protocol SRAM image update in EEPROM failed\n");
1538
1539release_tpeeprom:
1540	firmware_put(tpeeprom, FIRMWARE_UNLOAD);
1541
1542	return;
1543}
1544
1545static int
1546update_tpsram(struct adapter *adap)
1547{
1548	const struct firmware *tpsram;
1549	int ret;
1550	char rev, name[32];
1551
1552	rev = t3rev2char(adap);
1553	snprintf(name, sizeof(name), TPSRAM_NAME, rev);
1554
1555	update_tpeeprom(adap);
1556
1557	tpsram = firmware_get(name);
1558	if (tpsram == NULL){
1559		device_printf(adap->dev, "could not load TP SRAM\n");
1560		return (EINVAL);
1561	} else
1562		device_printf(adap->dev, "updating TP SRAM\n");
1563
1564	ret = t3_check_tpsram(adap, tpsram->data, tpsram->datasize);
1565	if (ret)
1566		goto release_tpsram;
1567
1568	ret = t3_set_proto_sram(adap, tpsram->data);
1569	if (ret)
1570		device_printf(adap->dev, "loading protocol SRAM failed\n");
1571
1572release_tpsram:
1573	firmware_put(tpsram, FIRMWARE_UNLOAD);
1574
1575	return ret;
1576}
1577
1578/**
1579 *	cxgb_up - enable the adapter
1580 *	@adap: adapter being enabled
1581 *
1582 *	Called when the first port is enabled, this function performs the
1583 *	actions necessary to make an adapter operational, such as completing
1584 *	the initialization of HW modules, and enabling interrupts.
1585 */
1586static int
1587cxgb_up(struct adapter *sc)
1588{
1589	int err = 0;
1590	unsigned int mxf = t3_mc5_size(&sc->mc5) - MC5_MIN_TIDS;
1591
1592	KASSERT(sc->open_device_map == 0, ("%s: device(s) already open (%x)",
1593					   __func__, sc->open_device_map));
1594
1595	if ((sc->flags & FULL_INIT_DONE) == 0) {
1596
1597		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1598
1599		if ((sc->flags & FW_UPTODATE) == 0)
1600			if ((err = upgrade_fw(sc)))
1601				goto out;
1602
1603		if ((sc->flags & TPS_UPTODATE) == 0)
1604			if ((err = update_tpsram(sc)))
1605				goto out;
1606
1607		if (is_offload(sc) && nfilters != 0) {
1608			sc->params.mc5.nservers = 0;
1609
1610			if (nfilters < 0)
1611				sc->params.mc5.nfilters = mxf;
1612			else
1613				sc->params.mc5.nfilters = min(nfilters, mxf);
1614		}
1615
1616		err = t3_init_hw(sc, 0);
1617		if (err)
1618			goto out;
1619
1620		t3_set_reg_field(sc, A_TP_PARA_REG5, 0, F_RXDDPOFFINIT);
1621		t3_write_reg(sc, A_ULPRX_TDDP_PSZ, V_HPZ0(PAGE_SHIFT - 12));
1622
1623		err = setup_sge_qsets(sc);
1624		if (err)
1625			goto out;
1626
1627		alloc_filters(sc);
1628		setup_rss(sc);
1629
1630		t3_add_configured_sysctls(sc);
1631		sc->flags |= FULL_INIT_DONE;
1632	}
1633
1634	t3_intr_clear(sc);
1635	t3_sge_start(sc);
1636	t3_intr_enable(sc);
1637
1638	if (sc->params.rev >= T3_REV_C && !(sc->flags & TP_PARITY_INIT) &&
1639	    is_offload(sc) && init_tp_parity(sc) == 0)
1640		sc->flags |= TP_PARITY_INIT;
1641
1642	if (sc->flags & TP_PARITY_INIT) {
1643		t3_write_reg(sc, A_TP_INT_CAUSE, F_CMCACHEPERR | F_ARPLUTPERR);
1644		t3_write_reg(sc, A_TP_INT_ENABLE, 0x7fbfffff);
1645	}
1646
1647	if (!(sc->flags & QUEUES_BOUND)) {
1648		bind_qsets(sc);
1649		setup_hw_filters(sc);
1650		sc->flags |= QUEUES_BOUND;
1651	}
1652
1653	t3_sge_reset_adapter(sc);
1654out:
1655	return (err);
1656}
1657
1658/*
1659 * Called when the last open device is closed.  Does NOT undo all of cxgb_up's
1660 * work.  Specifically, the resources grabbed under FULL_INIT_DONE are released
1661 * during controller_detach, not here.
1662 */
1663static void
1664cxgb_down(struct adapter *sc)
1665{
1666	t3_sge_stop(sc);
1667	t3_intr_disable(sc);
1668}
1669
1670/*
1671 * if_init for cxgb ports.
1672 */
1673static void
1674cxgb_init(void *arg)
1675{
1676	struct port_info *p = arg;
1677	struct adapter *sc = p->adapter;
1678
1679	ADAPTER_LOCK(sc);
1680	cxgb_init_locked(p); /* releases adapter lock */
1681	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1682}
1683
1684static int
1685cxgb_init_locked(struct port_info *p)
1686{
1687	struct adapter *sc = p->adapter;
1688	struct ifnet *ifp = p->ifp;
1689	struct cmac *mac = &p->mac;
1690	int i, rc = 0, may_sleep = 0, gave_up_lock = 0;
1691
1692	ADAPTER_LOCK_ASSERT_OWNED(sc);
1693
1694	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1695		gave_up_lock = 1;
1696		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbinit", 0)) {
1697			rc = EINTR;
1698			goto done;
1699		}
1700	}
1701	if (IS_DOOMED(p)) {
1702		rc = ENXIO;
1703		goto done;
1704	}
1705	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1706
1707	/*
1708	 * The code that runs during one-time adapter initialization can sleep
1709	 * so it's important not to hold any locks across it.
1710	 */
1711	may_sleep = sc->flags & FULL_INIT_DONE ? 0 : 1;
1712
1713	if (may_sleep) {
1714		SET_BUSY(sc);
1715		gave_up_lock = 1;
1716		ADAPTER_UNLOCK(sc);
1717	}
1718
1719	if (sc->open_device_map == 0 && ((rc = cxgb_up(sc)) != 0))
1720			goto done;
1721
1722	PORT_LOCK(p);
1723	if (isset(&sc->open_device_map, p->port_id) &&
1724	    (ifp->if_drv_flags & IFF_DRV_RUNNING)) {
1725		PORT_UNLOCK(p);
1726		goto done;
1727	}
1728	t3_port_intr_enable(sc, p->port_id);
1729	if (!mac->multiport)
1730		t3_mac_init(mac);
1731	cxgb_update_mac_settings(p);
1732	t3_link_start(&p->phy, mac, &p->link_config);
1733	t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
1734	ifp->if_drv_flags |= IFF_DRV_RUNNING;
1735	ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
1736	PORT_UNLOCK(p);
1737
1738	for (i = p->first_qset; i < p->first_qset + p->nqsets; i++) {
1739		struct sge_qset *qs = &sc->sge.qs[i];
1740		struct sge_txq *txq = &qs->txq[TXQ_ETH];
1741
1742		callout_reset_on(&txq->txq_watchdog, hz, cxgb_tx_watchdog, qs,
1743				 txq->txq_watchdog.c_cpu);
1744	}
1745
1746	/* all ok */
1747	setbit(&sc->open_device_map, p->port_id);
1748	callout_reset(&p->link_check_ch,
1749	    p->phy.caps & SUPPORTED_LINK_IRQ ?  hz * 3 : hz / 4,
1750	    link_check_callout, p);
1751
1752done:
1753	if (may_sleep) {
1754		ADAPTER_LOCK(sc);
1755		KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1756		CLR_BUSY(sc);
1757	}
1758	if (gave_up_lock)
1759		wakeup_one(&sc->flags);
1760	ADAPTER_UNLOCK(sc);
1761	return (rc);
1762}
1763
1764static int
1765cxgb_uninit_locked(struct port_info *p)
1766{
1767	struct adapter *sc = p->adapter;
1768	int rc;
1769
1770	ADAPTER_LOCK_ASSERT_OWNED(sc);
1771
1772	while (!IS_DOOMED(p) && IS_BUSY(sc)) {
1773		if (mtx_sleep(&sc->flags, &sc->lock, PCATCH, "cxgbunin", 0)) {
1774			rc = EINTR;
1775			goto done;
1776		}
1777	}
1778	if (IS_DOOMED(p)) {
1779		rc = ENXIO;
1780		goto done;
1781	}
1782	KASSERT(!IS_BUSY(sc), ("%s: controller busy.", __func__));
1783	SET_BUSY(sc);
1784	ADAPTER_UNLOCK(sc);
1785
1786	rc = cxgb_uninit_synchronized(p);
1787
1788	ADAPTER_LOCK(sc);
1789	KASSERT(IS_BUSY(sc), ("%s: controller not busy.", __func__));
1790	CLR_BUSY(sc);
1791	wakeup_one(&sc->flags);
1792done:
1793	ADAPTER_UNLOCK(sc);
1794	return (rc);
1795}
1796
1797/*
1798 * Called on "ifconfig down", and from port_detach
1799 */
1800static int
1801cxgb_uninit_synchronized(struct port_info *pi)
1802{
1803	struct adapter *sc = pi->adapter;
1804	struct ifnet *ifp = pi->ifp;
1805
1806	/*
1807	 * taskqueue_drain may cause a deadlock if the adapter lock is held.
1808	 */
1809	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1810
1811	/*
1812	 * Clear this port's bit from the open device map, and then drain all
1813	 * the tasks that can access/manipulate this port's port_info or ifp.
1814	 * We disable this port's interrupts here and so the slow/ext
1815	 * interrupt tasks won't be enqueued.  The tick task will continue to
1816	 * be enqueued every second but the runs after this drain will not see
1817	 * this port in the open device map.
1818	 *
1819	 * A well behaved task must take open_device_map into account and ignore
1820	 * ports that are not open.
1821	 */
1822	clrbit(&sc->open_device_map, pi->port_id);
1823	t3_port_intr_disable(sc, pi->port_id);
1824	taskqueue_drain(sc->tq, &sc->slow_intr_task);
1825	taskqueue_drain(sc->tq, &sc->tick_task);
1826
1827	callout_drain(&pi->link_check_ch);
1828	taskqueue_drain(sc->tq, &pi->link_check_task);
1829
1830	PORT_LOCK(pi);
1831	ifp->if_drv_flags &= ~(IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
1832
1833	/* disable pause frames */
1834	t3_set_reg_field(sc, A_XGM_TX_CFG + pi->mac.offset, F_TXPAUSEEN, 0);
1835
1836	/* Reset RX FIFO HWM */
1837	t3_set_reg_field(sc, A_XGM_RXFIFO_CFG +  pi->mac.offset,
1838			 V_RXFIFOPAUSEHWM(M_RXFIFOPAUSEHWM), 0);
1839
1840	DELAY(100 * 1000);
1841
1842	/* Wait for TXFIFO empty */
1843	t3_wait_op_done(sc, A_XGM_TXFIFO_CFG + pi->mac.offset,
1844			F_TXFIFO_EMPTY, 1, 20, 5);
1845
1846	DELAY(100 * 1000);
1847	t3_mac_disable(&pi->mac, MAC_DIRECTION_RX);
1848
1849	pi->phy.ops->power_down(&pi->phy, 1);
1850
1851	PORT_UNLOCK(pi);
1852
1853	pi->link_config.link_ok = 0;
1854	t3_os_link_changed(sc, pi->port_id, 0, 0, 0, 0, 0);
1855
1856	if (sc->open_device_map == 0)
1857		cxgb_down(pi->adapter);
1858
1859	return (0);
1860}
1861
1862/*
1863 * Mark lro enabled or disabled in all qsets for this port
1864 */
1865static int
1866cxgb_set_lro(struct port_info *p, int enabled)
1867{
1868	int i;
1869	struct adapter *adp = p->adapter;
1870	struct sge_qset *q;
1871
1872	for (i = 0; i < p->nqsets; i++) {
1873		q = &adp->sge.qs[p->first_qset + i];
1874		q->lro.enabled = (enabled != 0);
1875	}
1876	return (0);
1877}
1878
1879static int
1880cxgb_ioctl(struct ifnet *ifp, unsigned long command, caddr_t data)
1881{
1882	struct port_info *p = ifp->if_softc;
1883	struct adapter *sc = p->adapter;
1884	struct ifreq *ifr = (struct ifreq *)data;
1885	int flags, error = 0, mtu;
1886	uint32_t mask;
1887
1888	switch (command) {
1889	case SIOCSIFMTU:
1890		ADAPTER_LOCK(sc);
1891		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1892		if (error) {
1893fail:
1894			ADAPTER_UNLOCK(sc);
1895			return (error);
1896		}
1897
1898		mtu = ifr->ifr_mtu;
1899		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO)) {
1900			error = EINVAL;
1901		} else {
1902			ifp->if_mtu = mtu;
1903			PORT_LOCK(p);
1904			cxgb_update_mac_settings(p);
1905			PORT_UNLOCK(p);
1906		}
1907		ADAPTER_UNLOCK(sc);
1908		break;
1909	case SIOCSIFFLAGS:
1910		ADAPTER_LOCK(sc);
1911		if (IS_DOOMED(p)) {
1912			error = ENXIO;
1913			goto fail;
1914		}
1915		if (ifp->if_flags & IFF_UP) {
1916			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1917				flags = p->if_flags;
1918				if (((ifp->if_flags ^ flags) & IFF_PROMISC) ||
1919				    ((ifp->if_flags ^ flags) & IFF_ALLMULTI)) {
1920					if (IS_BUSY(sc)) {
1921						error = EBUSY;
1922						goto fail;
1923					}
1924					PORT_LOCK(p);
1925					cxgb_update_mac_settings(p);
1926					PORT_UNLOCK(p);
1927				}
1928				ADAPTER_UNLOCK(sc);
1929			} else
1930				error = cxgb_init_locked(p);
1931			p->if_flags = ifp->if_flags;
1932		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
1933			error = cxgb_uninit_locked(p);
1934		else
1935			ADAPTER_UNLOCK(sc);
1936
1937		ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
1938		break;
1939	case SIOCADDMULTI:
1940	case SIOCDELMULTI:
1941		ADAPTER_LOCK(sc);
1942		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1943		if (error)
1944			goto fail;
1945
1946		if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
1947			PORT_LOCK(p);
1948			cxgb_update_mac_settings(p);
1949			PORT_UNLOCK(p);
1950		}
1951		ADAPTER_UNLOCK(sc);
1952
1953		break;
1954	case SIOCSIFCAP:
1955		ADAPTER_LOCK(sc);
1956		error = IS_DOOMED(p) ? ENXIO : (IS_BUSY(sc) ? EBUSY : 0);
1957		if (error)
1958			goto fail;
1959
1960		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
1961		if (mask & IFCAP_TXCSUM) {
1962			ifp->if_capenable ^= IFCAP_TXCSUM;
1963			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
1964
1965			if (IFCAP_TSO4 & ifp->if_capenable &&
1966			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1967				mask &= ~IFCAP_TSO4;
1968				ifp->if_capenable &= ~IFCAP_TSO4;
1969				if_printf(ifp,
1970				    "tso4 disabled due to -txcsum.\n");
1971			}
1972		}
1973		if (mask & IFCAP_TXCSUM_IPV6) {
1974			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
1975			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
1976
1977			if (IFCAP_TSO6 & ifp->if_capenable &&
1978			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
1979				mask &= ~IFCAP_TSO6;
1980				ifp->if_capenable &= ~IFCAP_TSO6;
1981				if_printf(ifp,
1982				    "tso6 disabled due to -txcsum6.\n");
1983			}
1984		}
1985		if (mask & IFCAP_RXCSUM)
1986			ifp->if_capenable ^= IFCAP_RXCSUM;
1987		if (mask & IFCAP_RXCSUM_IPV6)
1988			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
1989
1990		/*
1991		 * Note that we leave CSUM_TSO alone (it is always set).  The
1992		 * kernel takes both IFCAP_TSOx and CSUM_TSO into account before
1993		 * sending a TSO request our way, so it's sufficient to toggle
1994		 * IFCAP_TSOx only.
1995		 */
1996		if (mask & IFCAP_TSO4) {
1997			if (!(IFCAP_TSO4 & ifp->if_capenable) &&
1998			    !(IFCAP_TXCSUM & ifp->if_capenable)) {
1999				if_printf(ifp, "enable txcsum first.\n");
2000				error = EAGAIN;
2001				goto fail;
2002			}
2003			ifp->if_capenable ^= IFCAP_TSO4;
2004		}
2005		if (mask & IFCAP_TSO6) {
2006			if (!(IFCAP_TSO6 & ifp->if_capenable) &&
2007			    !(IFCAP_TXCSUM_IPV6 & ifp->if_capenable)) {
2008				if_printf(ifp, "enable txcsum6 first.\n");
2009				error = EAGAIN;
2010				goto fail;
2011			}
2012			ifp->if_capenable ^= IFCAP_TSO6;
2013		}
2014		if (mask & IFCAP_LRO) {
2015			ifp->if_capenable ^= IFCAP_LRO;
2016
2017			/* Safe to do this even if cxgb_up not called yet */
2018			cxgb_set_lro(p, ifp->if_capenable & IFCAP_LRO);
2019		}
2020#ifdef TCP_OFFLOAD
2021		if (mask & IFCAP_TOE4) {
2022			int enable = (ifp->if_capenable ^ mask) & IFCAP_TOE4;
2023
2024			error = toe_capability(p, enable);
2025			if (error == 0)
2026				ifp->if_capenable ^= mask;
2027		}
2028#endif
2029		if (mask & IFCAP_VLAN_HWTAGGING) {
2030			ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING;
2031			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2032				PORT_LOCK(p);
2033				cxgb_update_mac_settings(p);
2034				PORT_UNLOCK(p);
2035			}
2036		}
2037		if (mask & IFCAP_VLAN_MTU) {
2038			ifp->if_capenable ^= IFCAP_VLAN_MTU;
2039			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
2040				PORT_LOCK(p);
2041				cxgb_update_mac_settings(p);
2042				PORT_UNLOCK(p);
2043			}
2044		}
2045		if (mask & IFCAP_VLAN_HWTSO)
2046			ifp->if_capenable ^= IFCAP_VLAN_HWTSO;
2047		if (mask & IFCAP_VLAN_HWCSUM)
2048			ifp->if_capenable ^= IFCAP_VLAN_HWCSUM;
2049
2050#ifdef VLAN_CAPABILITIES
2051		VLAN_CAPABILITIES(ifp);
2052#endif
2053		ADAPTER_UNLOCK(sc);
2054		break;
2055	case SIOCSIFMEDIA:
2056	case SIOCGIFMEDIA:
2057		error = ifmedia_ioctl(ifp, ifr, &p->media, command);
2058		break;
2059	default:
2060		error = ether_ioctl(ifp, command, data);
2061	}
2062
2063	return (error);
2064}
2065
2066static int
2067cxgb_media_change(struct ifnet *ifp)
2068{
2069	return (EOPNOTSUPP);
2070}
2071
2072/*
2073 * Translates phy->modtype to the correct Ethernet media subtype.
2074 */
2075static int
2076cxgb_ifm_type(int mod)
2077{
2078	switch (mod) {
2079	case phy_modtype_sr:
2080		return (IFM_10G_SR);
2081	case phy_modtype_lr:
2082		return (IFM_10G_LR);
2083	case phy_modtype_lrm:
2084		return (IFM_10G_LRM);
2085	case phy_modtype_twinax:
2086		return (IFM_10G_TWINAX);
2087	case phy_modtype_twinax_long:
2088		return (IFM_10G_TWINAX_LONG);
2089	case phy_modtype_none:
2090		return (IFM_NONE);
2091	case phy_modtype_unknown:
2092		return (IFM_UNKNOWN);
2093	}
2094
2095	KASSERT(0, ("%s: modtype %d unknown", __func__, mod));
2096	return (IFM_UNKNOWN);
2097}
2098
2099/*
2100 * Rebuilds the ifmedia list for this port, and sets the current media.
2101 */
2102static void
2103cxgb_build_medialist(struct port_info *p)
2104{
2105	struct cphy *phy = &p->phy;
2106	struct ifmedia *media = &p->media;
2107	int mod = phy->modtype;
2108	int m = IFM_ETHER | IFM_FDX;
2109
2110	PORT_LOCK(p);
2111
2112	ifmedia_removeall(media);
2113	if (phy->caps & SUPPORTED_TP && phy->caps & SUPPORTED_Autoneg) {
2114		/* Copper (RJ45) */
2115
2116		if (phy->caps & SUPPORTED_10000baseT_Full)
2117			ifmedia_add(media, m | IFM_10G_T, mod, NULL);
2118
2119		if (phy->caps & SUPPORTED_1000baseT_Full)
2120			ifmedia_add(media, m | IFM_1000_T, mod, NULL);
2121
2122		if (phy->caps & SUPPORTED_100baseT_Full)
2123			ifmedia_add(media, m | IFM_100_TX, mod, NULL);
2124
2125		if (phy->caps & SUPPORTED_10baseT_Full)
2126			ifmedia_add(media, m | IFM_10_T, mod, NULL);
2127
2128		ifmedia_add(media, IFM_ETHER | IFM_AUTO, mod, NULL);
2129		ifmedia_set(media, IFM_ETHER | IFM_AUTO);
2130
2131	} else if (phy->caps & SUPPORTED_TP) {
2132		/* Copper (CX4) */
2133
2134		KASSERT(phy->caps & SUPPORTED_10000baseT_Full,
2135			("%s: unexpected cap 0x%x", __func__, phy->caps));
2136
2137		ifmedia_add(media, m | IFM_10G_CX4, mod, NULL);
2138		ifmedia_set(media, m | IFM_10G_CX4);
2139
2140	} else if (phy->caps & SUPPORTED_FIBRE &&
2141		   phy->caps & SUPPORTED_10000baseT_Full) {
2142		/* 10G optical (but includes SFP+ twinax) */
2143
2144		m |= cxgb_ifm_type(mod);
2145		if (IFM_SUBTYPE(m) == IFM_NONE)
2146			m &= ~IFM_FDX;
2147
2148		ifmedia_add(media, m, mod, NULL);
2149		ifmedia_set(media, m);
2150
2151	} else if (phy->caps & SUPPORTED_FIBRE &&
2152		   phy->caps & SUPPORTED_1000baseT_Full) {
2153		/* 1G optical */
2154
2155		/* XXX: Lie and claim to be SX, could actually be any 1G-X */
2156		ifmedia_add(media, m | IFM_1000_SX, mod, NULL);
2157		ifmedia_set(media, m | IFM_1000_SX);
2158
2159	} else {
2160		KASSERT(0, ("%s: don't know how to handle 0x%x.", __func__,
2161			    phy->caps));
2162	}
2163
2164	PORT_UNLOCK(p);
2165}
2166
2167static void
2168cxgb_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
2169{
2170	struct port_info *p = ifp->if_softc;
2171	struct ifmedia_entry *cur = p->media.ifm_cur;
2172	int speed = p->link_config.speed;
2173
2174	if (cur->ifm_data != p->phy.modtype) {
2175		cxgb_build_medialist(p);
2176		cur = p->media.ifm_cur;
2177	}
2178
2179	ifmr->ifm_status = IFM_AVALID;
2180	if (!p->link_config.link_ok)
2181		return;
2182
2183	ifmr->ifm_status |= IFM_ACTIVE;
2184
2185	/*
2186	 * active and current will differ iff current media is autoselect.  That
2187	 * can happen only for copper RJ45.
2188	 */
2189	if (IFM_SUBTYPE(cur->ifm_media) != IFM_AUTO)
2190		return;
2191	KASSERT(p->phy.caps & SUPPORTED_TP && p->phy.caps & SUPPORTED_Autoneg,
2192		("%s: unexpected PHY caps 0x%x", __func__, p->phy.caps));
2193
2194	ifmr->ifm_active = IFM_ETHER | IFM_FDX;
2195	if (speed == SPEED_10000)
2196		ifmr->ifm_active |= IFM_10G_T;
2197	else if (speed == SPEED_1000)
2198		ifmr->ifm_active |= IFM_1000_T;
2199	else if (speed == SPEED_100)
2200		ifmr->ifm_active |= IFM_100_TX;
2201	else if (speed == SPEED_10)
2202		ifmr->ifm_active |= IFM_10_T;
2203	else
2204		KASSERT(0, ("%s: link up but speed unknown (%u)", __func__,
2205			    speed));
2206}
2207
2208static uint64_t
2209cxgb_get_counter(struct ifnet *ifp, ift_counter c)
2210{
2211	struct port_info *pi = ifp->if_softc;
2212	struct adapter *sc = pi->adapter;
2213	struct cmac *mac = &pi->mac;
2214	struct mac_stats *mstats = &mac->stats;
2215
2216	cxgb_refresh_stats(pi);
2217
2218	switch (c) {
2219	case IFCOUNTER_IPACKETS:
2220		return (mstats->rx_frames);
2221
2222	case IFCOUNTER_IERRORS:
2223		return (mstats->rx_jabber + mstats->rx_data_errs +
2224		    mstats->rx_sequence_errs + mstats->rx_runt +
2225		    mstats->rx_too_long + mstats->rx_mac_internal_errs +
2226		    mstats->rx_short + mstats->rx_fcs_errs);
2227
2228	case IFCOUNTER_OPACKETS:
2229		return (mstats->tx_frames);
2230
2231	case IFCOUNTER_OERRORS:
2232		return (mstats->tx_excess_collisions + mstats->tx_underrun +
2233		    mstats->tx_len_errs + mstats->tx_mac_internal_errs +
2234		    mstats->tx_excess_deferral + mstats->tx_fcs_errs);
2235
2236	case IFCOUNTER_COLLISIONS:
2237		return (mstats->tx_total_collisions);
2238
2239	case IFCOUNTER_IBYTES:
2240		return (mstats->rx_octets);
2241
2242	case IFCOUNTER_OBYTES:
2243		return (mstats->tx_octets);
2244
2245	case IFCOUNTER_IMCASTS:
2246		return (mstats->rx_mcast_frames);
2247
2248	case IFCOUNTER_OMCASTS:
2249		return (mstats->tx_mcast_frames);
2250
2251	case IFCOUNTER_IQDROPS:
2252		return (mstats->rx_cong_drops);
2253
2254	case IFCOUNTER_OQDROPS: {
2255		int i;
2256		uint64_t drops;
2257
2258		drops = 0;
2259		if (sc->flags & FULL_INIT_DONE) {
2260			for (i = pi->first_qset; i < pi->first_qset + pi->nqsets; i++)
2261				drops += sc->sge.qs[i].txq[TXQ_ETH].txq_mr->br_drops;
2262		}
2263
2264		return (drops);
2265
2266	}
2267
2268	default:
2269		return (if_get_counter_default(ifp, c));
2270	}
2271}
2272
2273static void
2274cxgb_async_intr(void *data)
2275{
2276	adapter_t *sc = data;
2277
2278	t3_write_reg(sc, A_PL_INT_ENABLE0, 0);
2279	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
2280	taskqueue_enqueue(sc->tq, &sc->slow_intr_task);
2281}
2282
2283static void
2284link_check_callout(void *arg)
2285{
2286	struct port_info *pi = arg;
2287	struct adapter *sc = pi->adapter;
2288
2289	if (!isset(&sc->open_device_map, pi->port_id))
2290		return;
2291
2292	taskqueue_enqueue(sc->tq, &pi->link_check_task);
2293}
2294
2295static void
2296check_link_status(void *arg, int pending)
2297{
2298	struct port_info *pi = arg;
2299	struct adapter *sc = pi->adapter;
2300
2301	if (!isset(&sc->open_device_map, pi->port_id))
2302		return;
2303
2304	t3_link_changed(sc, pi->port_id);
2305
2306	if (pi->link_fault || !(pi->phy.caps & SUPPORTED_LINK_IRQ) ||
2307	    pi->link_config.link_ok == 0)
2308		callout_reset(&pi->link_check_ch, hz, link_check_callout, pi);
2309}
2310
2311void
2312t3_os_link_intr(struct port_info *pi)
2313{
2314	/*
2315	 * Schedule a link check in the near future.  If the link is flapping
2316	 * rapidly we'll keep resetting the callout and delaying the check until
2317	 * things stabilize a bit.
2318	 */
2319	callout_reset(&pi->link_check_ch, hz / 4, link_check_callout, pi);
2320}
2321
2322static void
2323check_t3b2_mac(struct adapter *sc)
2324{
2325	int i;
2326
2327	if (sc->flags & CXGB_SHUTDOWN)
2328		return;
2329
2330	for_each_port(sc, i) {
2331		struct port_info *p = &sc->port[i];
2332		int status;
2333#ifdef INVARIANTS
2334		struct ifnet *ifp = p->ifp;
2335#endif
2336
2337		if (!isset(&sc->open_device_map, p->port_id) || p->link_fault ||
2338		    !p->link_config.link_ok)
2339			continue;
2340
2341		KASSERT(ifp->if_drv_flags & IFF_DRV_RUNNING,
2342			("%s: state mismatch (drv_flags %x, device_map %x)",
2343			 __func__, ifp->if_drv_flags, sc->open_device_map));
2344
2345		PORT_LOCK(p);
2346		status = t3b2_mac_watchdog_task(&p->mac);
2347		if (status == 1)
2348			p->mac.stats.num_toggled++;
2349		else if (status == 2) {
2350			struct cmac *mac = &p->mac;
2351
2352			cxgb_update_mac_settings(p);
2353			t3_link_start(&p->phy, mac, &p->link_config);
2354			t3_mac_enable(mac, MAC_DIRECTION_RX | MAC_DIRECTION_TX);
2355			t3_port_intr_enable(sc, p->port_id);
2356			p->mac.stats.num_resets++;
2357		}
2358		PORT_UNLOCK(p);
2359	}
2360}
2361
2362static void
2363cxgb_tick(void *arg)
2364{
2365	adapter_t *sc = (adapter_t *)arg;
2366
2367	if (sc->flags & CXGB_SHUTDOWN)
2368		return;
2369
2370	taskqueue_enqueue(sc->tq, &sc->tick_task);
2371	callout_reset(&sc->cxgb_tick_ch, hz, cxgb_tick, sc);
2372}
2373
2374void
2375cxgb_refresh_stats(struct port_info *pi)
2376{
2377	struct timeval tv;
2378	const struct timeval interval = {0, 250000};    /* 250ms */
2379
2380	getmicrotime(&tv);
2381	timevalsub(&tv, &interval);
2382	if (timevalcmp(&tv, &pi->last_refreshed, <))
2383		return;
2384
2385	PORT_LOCK(pi);
2386	t3_mac_update_stats(&pi->mac);
2387	PORT_UNLOCK(pi);
2388	getmicrotime(&pi->last_refreshed);
2389}
2390
2391static void
2392cxgb_tick_handler(void *arg, int count)
2393{
2394	adapter_t *sc = (adapter_t *)arg;
2395	const struct adapter_params *p = &sc->params;
2396	int i;
2397	uint32_t cause, reset;
2398
2399	if (sc->flags & CXGB_SHUTDOWN || !(sc->flags & FULL_INIT_DONE))
2400		return;
2401
2402	if (p->rev == T3_REV_B2 && p->nports < 4 && sc->open_device_map)
2403		check_t3b2_mac(sc);
2404
2405	cause = t3_read_reg(sc, A_SG_INT_CAUSE) & (F_RSPQSTARVE | F_FLEMPTY);
2406	if (cause) {
2407		struct sge_qset *qs = &sc->sge.qs[0];
2408		uint32_t mask, v;
2409
2410		v = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS) & ~0xff00;
2411
2412		mask = 1;
2413		for (i = 0; i < SGE_QSETS; i++) {
2414			if (v & mask)
2415				qs[i].rspq.starved++;
2416			mask <<= 1;
2417		}
2418
2419		mask <<= SGE_QSETS; /* skip RSPQXDISABLED */
2420
2421		for (i = 0; i < SGE_QSETS * 2; i++) {
2422			if (v & mask) {
2423				qs[i / 2].fl[i % 2].empty++;
2424			}
2425			mask <<= 1;
2426		}
2427
2428		/* clear */
2429		t3_write_reg(sc, A_SG_RSPQ_FL_STATUS, v);
2430		t3_write_reg(sc, A_SG_INT_CAUSE, cause);
2431	}
2432
2433	for (i = 0; i < sc->params.nports; i++) {
2434		struct port_info *pi = &sc->port[i];
2435		struct cmac *mac = &pi->mac;
2436
2437		if (!isset(&sc->open_device_map, pi->port_id))
2438			continue;
2439
2440		cxgb_refresh_stats(pi);
2441
2442		if (mac->multiport)
2443			continue;
2444
2445		/* Count rx fifo overflows, once per second */
2446		cause = t3_read_reg(sc, A_XGM_INT_CAUSE + mac->offset);
2447		reset = 0;
2448		if (cause & F_RXFIFO_OVERFLOW) {
2449			mac->stats.rx_fifo_ovfl++;
2450			reset |= F_RXFIFO_OVERFLOW;
2451		}
2452		t3_write_reg(sc, A_XGM_INT_CAUSE + mac->offset, reset);
2453	}
2454}
2455
2456static void
2457touch_bars(device_t dev)
2458{
2459	/*
2460	 * Don't enable yet
2461	 */
2462#if !defined(__LP64__) && 0
2463	u32 v;
2464
2465	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_1, &v);
2466	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_1, v);
2467	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_3, &v);
2468	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_3, v);
2469	pci_read_config_dword(pdev, PCI_BASE_ADDRESS_5, &v);
2470	pci_write_config_dword(pdev, PCI_BASE_ADDRESS_5, v);
2471#endif
2472}
2473
2474static int
2475set_eeprom(struct port_info *pi, const uint8_t *data, int len, int offset)
2476{
2477	uint8_t *buf;
2478	int err = 0;
2479	u32 aligned_offset, aligned_len, *p;
2480	struct adapter *adapter = pi->adapter;
2481
2482
2483	aligned_offset = offset & ~3;
2484	aligned_len = (len + (offset & 3) + 3) & ~3;
2485
2486	if (aligned_offset != offset || aligned_len != len) {
2487		buf = malloc(aligned_len, M_DEVBUF, M_WAITOK|M_ZERO);
2488		if (!buf)
2489			return (ENOMEM);
2490		err = t3_seeprom_read(adapter, aligned_offset, (u32 *)buf);
2491		if (!err && aligned_len > 4)
2492			err = t3_seeprom_read(adapter,
2493					      aligned_offset + aligned_len - 4,
2494					      (u32 *)&buf[aligned_len - 4]);
2495		if (err)
2496			goto out;
2497		memcpy(buf + (offset & 3), data, len);
2498	} else
2499		buf = (uint8_t *)(uintptr_t)data;
2500
2501	err = t3_seeprom_wp(adapter, 0);
2502	if (err)
2503		goto out;
2504
2505	for (p = (u32 *)buf; !err && aligned_len; aligned_len -= 4, p++) {
2506		err = t3_seeprom_write(adapter, aligned_offset, *p);
2507		aligned_offset += 4;
2508	}
2509
2510	if (!err)
2511		err = t3_seeprom_wp(adapter, 1);
2512out:
2513	if (buf != data)
2514		free(buf, M_DEVBUF);
2515	return err;
2516}
2517
2518
2519static int
2520in_range(int val, int lo, int hi)
2521{
2522	return val < 0 || (val <= hi && val >= lo);
2523}
2524
2525static int
2526cxgb_extension_open(struct cdev *dev, int flags, int fmp, struct thread *td)
2527{
2528       return (0);
2529}
2530
2531static int
2532cxgb_extension_close(struct cdev *dev, int flags, int fmt, struct thread *td)
2533{
2534       return (0);
2535}
2536
2537static int
2538cxgb_extension_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data,
2539    int fflag, struct thread *td)
2540{
2541	int mmd, error = 0;
2542	struct port_info *pi = dev->si_drv1;
2543	adapter_t *sc = pi->adapter;
2544
2545#ifdef PRIV_SUPPORTED
2546	if (priv_check(td, PRIV_DRIVER)) {
2547		if (cxgb_debug)
2548			printf("user does not have access to privileged ioctls\n");
2549		return (EPERM);
2550	}
2551#else
2552	if (suser(td)) {
2553		if (cxgb_debug)
2554			printf("user does not have access to privileged ioctls\n");
2555		return (EPERM);
2556	}
2557#endif
2558
2559	switch (cmd) {
2560	case CHELSIO_GET_MIIREG: {
2561		uint32_t val;
2562		struct cphy *phy = &pi->phy;
2563		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2564
2565		if (!phy->mdio_read)
2566			return (EOPNOTSUPP);
2567		if (is_10G(sc)) {
2568			mmd = mid->phy_id >> 8;
2569			if (!mmd)
2570				mmd = MDIO_DEV_PCS;
2571			else if (mmd > MDIO_DEV_VEND2)
2572				return (EINVAL);
2573
2574			error = phy->mdio_read(sc, mid->phy_id & 0x1f, mmd,
2575					     mid->reg_num, &val);
2576		} else
2577		        error = phy->mdio_read(sc, mid->phy_id & 0x1f, 0,
2578					     mid->reg_num & 0x1f, &val);
2579		if (error == 0)
2580			mid->val_out = val;
2581		break;
2582	}
2583	case CHELSIO_SET_MIIREG: {
2584		struct cphy *phy = &pi->phy;
2585		struct ch_mii_data *mid = (struct ch_mii_data *)data;
2586
2587		if (!phy->mdio_write)
2588			return (EOPNOTSUPP);
2589		if (is_10G(sc)) {
2590			mmd = mid->phy_id >> 8;
2591			if (!mmd)
2592				mmd = MDIO_DEV_PCS;
2593			else if (mmd > MDIO_DEV_VEND2)
2594				return (EINVAL);
2595
2596			error = phy->mdio_write(sc, mid->phy_id & 0x1f,
2597					      mmd, mid->reg_num, mid->val_in);
2598		} else
2599			error = phy->mdio_write(sc, mid->phy_id & 0x1f, 0,
2600					      mid->reg_num & 0x1f,
2601					      mid->val_in);
2602		break;
2603	}
2604	case CHELSIO_SETREG: {
2605		struct ch_reg *edata = (struct ch_reg *)data;
2606		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2607			return (EFAULT);
2608		t3_write_reg(sc, edata->addr, edata->val);
2609		break;
2610	}
2611	case CHELSIO_GETREG: {
2612		struct ch_reg *edata = (struct ch_reg *)data;
2613		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
2614			return (EFAULT);
2615		edata->val = t3_read_reg(sc, edata->addr);
2616		break;
2617	}
2618	case CHELSIO_GET_SGE_CONTEXT: {
2619		struct ch_cntxt *ecntxt = (struct ch_cntxt *)data;
2620		mtx_lock_spin(&sc->sge.reg_lock);
2621		switch (ecntxt->cntxt_type) {
2622		case CNTXT_TYPE_EGRESS:
2623			error = -t3_sge_read_ecntxt(sc, ecntxt->cntxt_id,
2624			    ecntxt->data);
2625			break;
2626		case CNTXT_TYPE_FL:
2627			error = -t3_sge_read_fl(sc, ecntxt->cntxt_id,
2628			    ecntxt->data);
2629			break;
2630		case CNTXT_TYPE_RSP:
2631			error = -t3_sge_read_rspq(sc, ecntxt->cntxt_id,
2632			    ecntxt->data);
2633			break;
2634		case CNTXT_TYPE_CQ:
2635			error = -t3_sge_read_cq(sc, ecntxt->cntxt_id,
2636			    ecntxt->data);
2637			break;
2638		default:
2639			error = EINVAL;
2640			break;
2641		}
2642		mtx_unlock_spin(&sc->sge.reg_lock);
2643		break;
2644	}
2645	case CHELSIO_GET_SGE_DESC: {
2646		struct ch_desc *edesc = (struct ch_desc *)data;
2647		int ret;
2648		if (edesc->queue_num >= SGE_QSETS * 6)
2649			return (EINVAL);
2650		ret = t3_get_desc(&sc->sge.qs[edesc->queue_num / 6],
2651		    edesc->queue_num % 6, edesc->idx, edesc->data);
2652		if (ret < 0)
2653			return (EINVAL);
2654		edesc->size = ret;
2655		break;
2656	}
2657	case CHELSIO_GET_QSET_PARAMS: {
2658		struct qset_params *q;
2659		struct ch_qset_params *t = (struct ch_qset_params *)data;
2660		int q1 = pi->first_qset;
2661		int nqsets = pi->nqsets;
2662		int i;
2663
2664		if (t->qset_idx >= nqsets)
2665			return EINVAL;
2666
2667		i = q1 + t->qset_idx;
2668		q = &sc->params.sge.qset[i];
2669		t->rspq_size   = q->rspq_size;
2670		t->txq_size[0] = q->txq_size[0];
2671		t->txq_size[1] = q->txq_size[1];
2672		t->txq_size[2] = q->txq_size[2];
2673		t->fl_size[0]  = q->fl_size;
2674		t->fl_size[1]  = q->jumbo_size;
2675		t->polling     = q->polling;
2676		t->lro         = q->lro;
2677		t->intr_lat    = q->coalesce_usecs;
2678		t->cong_thres  = q->cong_thres;
2679		t->qnum        = i;
2680
2681		if ((sc->flags & FULL_INIT_DONE) == 0)
2682			t->vector = 0;
2683		else if (sc->flags & USING_MSIX)
2684			t->vector = rman_get_start(sc->msix_irq_res[i]);
2685		else
2686			t->vector = rman_get_start(sc->irq_res);
2687
2688		break;
2689	}
2690	case CHELSIO_GET_QSET_NUM: {
2691		struct ch_reg *edata = (struct ch_reg *)data;
2692		edata->val = pi->nqsets;
2693		break;
2694	}
2695	case CHELSIO_LOAD_FW: {
2696		uint8_t *fw_data;
2697		uint32_t vers;
2698		struct ch_mem_range *t = (struct ch_mem_range *)data;
2699
2700		/*
2701		 * You're allowed to load a firmware only before FULL_INIT_DONE
2702		 *
2703		 * FW_UPTODATE is also set so the rest of the initialization
2704		 * will not overwrite what was loaded here.  This gives you the
2705		 * flexibility to load any firmware (and maybe shoot yourself in
2706		 * the foot).
2707		 */
2708
2709		ADAPTER_LOCK(sc);
2710		if (sc->open_device_map || sc->flags & FULL_INIT_DONE) {
2711			ADAPTER_UNLOCK(sc);
2712			return (EBUSY);
2713		}
2714
2715		fw_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2716		if (!fw_data)
2717			error = ENOMEM;
2718		else
2719			error = copyin(t->buf, fw_data, t->len);
2720
2721		if (!error)
2722			error = -t3_load_fw(sc, fw_data, t->len);
2723
2724		if (t3_get_fw_version(sc, &vers) == 0) {
2725			snprintf(&sc->fw_version[0], sizeof(sc->fw_version),
2726			    "%d.%d.%d", G_FW_VERSION_MAJOR(vers),
2727			    G_FW_VERSION_MINOR(vers), G_FW_VERSION_MICRO(vers));
2728		}
2729
2730		if (!error)
2731			sc->flags |= FW_UPTODATE;
2732
2733		free(fw_data, M_DEVBUF);
2734		ADAPTER_UNLOCK(sc);
2735		break;
2736	}
2737	case CHELSIO_LOAD_BOOT: {
2738		uint8_t *boot_data;
2739		struct ch_mem_range *t = (struct ch_mem_range *)data;
2740
2741		boot_data = malloc(t->len, M_DEVBUF, M_NOWAIT);
2742		if (!boot_data)
2743			return ENOMEM;
2744
2745		error = copyin(t->buf, boot_data, t->len);
2746		if (!error)
2747			error = -t3_load_boot(sc, boot_data, t->len);
2748
2749		free(boot_data, M_DEVBUF);
2750		break;
2751	}
2752	case CHELSIO_GET_PM: {
2753		struct ch_pm *m = (struct ch_pm *)data;
2754		struct tp_params *p = &sc->params.tp;
2755
2756		if (!is_offload(sc))
2757			return (EOPNOTSUPP);
2758
2759		m->tx_pg_sz = p->tx_pg_size;
2760		m->tx_num_pg = p->tx_num_pgs;
2761		m->rx_pg_sz  = p->rx_pg_size;
2762		m->rx_num_pg = p->rx_num_pgs;
2763		m->pm_total  = p->pmtx_size + p->chan_rx_size * p->nchan;
2764
2765		break;
2766	}
2767	case CHELSIO_SET_PM: {
2768		struct ch_pm *m = (struct ch_pm *)data;
2769		struct tp_params *p = &sc->params.tp;
2770
2771		if (!is_offload(sc))
2772			return (EOPNOTSUPP);
2773		if (sc->flags & FULL_INIT_DONE)
2774			return (EBUSY);
2775
2776		if (!m->rx_pg_sz || (m->rx_pg_sz & (m->rx_pg_sz - 1)) ||
2777		    !m->tx_pg_sz || (m->tx_pg_sz & (m->tx_pg_sz - 1)))
2778			return (EINVAL);	/* not power of 2 */
2779		if (!(m->rx_pg_sz & 0x14000))
2780			return (EINVAL);	/* not 16KB or 64KB */
2781		if (!(m->tx_pg_sz & 0x1554000))
2782			return (EINVAL);
2783		if (m->tx_num_pg == -1)
2784			m->tx_num_pg = p->tx_num_pgs;
2785		if (m->rx_num_pg == -1)
2786			m->rx_num_pg = p->rx_num_pgs;
2787		if (m->tx_num_pg % 24 || m->rx_num_pg % 24)
2788			return (EINVAL);
2789		if (m->rx_num_pg * m->rx_pg_sz > p->chan_rx_size ||
2790		    m->tx_num_pg * m->tx_pg_sz > p->chan_tx_size)
2791			return (EINVAL);
2792
2793		p->rx_pg_size = m->rx_pg_sz;
2794		p->tx_pg_size = m->tx_pg_sz;
2795		p->rx_num_pgs = m->rx_num_pg;
2796		p->tx_num_pgs = m->tx_num_pg;
2797		break;
2798	}
2799	case CHELSIO_SETMTUTAB: {
2800		struct ch_mtus *m = (struct ch_mtus *)data;
2801		int i;
2802
2803		if (!is_offload(sc))
2804			return (EOPNOTSUPP);
2805		if (offload_running(sc))
2806			return (EBUSY);
2807		if (m->nmtus != NMTUS)
2808			return (EINVAL);
2809		if (m->mtus[0] < 81)         /* accommodate SACK */
2810			return (EINVAL);
2811
2812		/*
2813		 * MTUs must be in ascending order
2814		 */
2815		for (i = 1; i < NMTUS; ++i)
2816			if (m->mtus[i] < m->mtus[i - 1])
2817				return (EINVAL);
2818
2819		memcpy(sc->params.mtus, m->mtus, sizeof(sc->params.mtus));
2820		break;
2821	}
2822	case CHELSIO_GETMTUTAB: {
2823		struct ch_mtus *m = (struct ch_mtus *)data;
2824
2825		if (!is_offload(sc))
2826			return (EOPNOTSUPP);
2827
2828		memcpy(m->mtus, sc->params.mtus, sizeof(m->mtus));
2829		m->nmtus = NMTUS;
2830		break;
2831	}
2832	case CHELSIO_GET_MEM: {
2833		struct ch_mem_range *t = (struct ch_mem_range *)data;
2834		struct mc7 *mem;
2835		uint8_t *useraddr;
2836		u64 buf[32];
2837
2838		/*
2839		 * Use these to avoid modifying len/addr in the return
2840		 * struct
2841		 */
2842		uint32_t len = t->len, addr = t->addr;
2843
2844		if (!is_offload(sc))
2845			return (EOPNOTSUPP);
2846		if (!(sc->flags & FULL_INIT_DONE))
2847			return (EIO);         /* need the memory controllers */
2848		if ((addr & 0x7) || (len & 0x7))
2849			return (EINVAL);
2850		if (t->mem_id == MEM_CM)
2851			mem = &sc->cm;
2852		else if (t->mem_id == MEM_PMRX)
2853			mem = &sc->pmrx;
2854		else if (t->mem_id == MEM_PMTX)
2855			mem = &sc->pmtx;
2856		else
2857			return (EINVAL);
2858
2859		/*
2860		 * Version scheme:
2861		 * bits 0..9: chip version
2862		 * bits 10..15: chip revision
2863		 */
2864		t->version = 3 | (sc->params.rev << 10);
2865
2866		/*
2867		 * Read 256 bytes at a time as len can be large and we don't
2868		 * want to use huge intermediate buffers.
2869		 */
2870		useraddr = (uint8_t *)t->buf;
2871		while (len) {
2872			unsigned int chunk = min(len, sizeof(buf));
2873
2874			error = t3_mc7_bd_read(mem, addr / 8, chunk / 8, buf);
2875			if (error)
2876				return (-error);
2877			if (copyout(buf, useraddr, chunk))
2878				return (EFAULT);
2879			useraddr += chunk;
2880			addr += chunk;
2881			len -= chunk;
2882		}
2883		break;
2884	}
2885	case CHELSIO_READ_TCAM_WORD: {
2886		struct ch_tcam_word *t = (struct ch_tcam_word *)data;
2887
2888		if (!is_offload(sc))
2889			return (EOPNOTSUPP);
2890		if (!(sc->flags & FULL_INIT_DONE))
2891			return (EIO);         /* need MC5 */
2892		return -t3_read_mc5_range(&sc->mc5, t->addr, 1, t->buf);
2893		break;
2894	}
2895	case CHELSIO_SET_TRACE_FILTER: {
2896		struct ch_trace *t = (struct ch_trace *)data;
2897		const struct trace_params *tp;
2898
2899		tp = (const struct trace_params *)&t->sip;
2900		if (t->config_tx)
2901			t3_config_trace_filter(sc, tp, 0, t->invert_match,
2902					       t->trace_tx);
2903		if (t->config_rx)
2904			t3_config_trace_filter(sc, tp, 1, t->invert_match,
2905					       t->trace_rx);
2906		break;
2907	}
2908	case CHELSIO_SET_PKTSCHED: {
2909		struct ch_pktsched_params *p = (struct ch_pktsched_params *)data;
2910		if (sc->open_device_map == 0)
2911			return (EAGAIN);
2912		send_pktsched_cmd(sc, p->sched, p->idx, p->min, p->max,
2913		    p->binding);
2914		break;
2915	}
2916	case CHELSIO_IFCONF_GETREGS: {
2917		struct ch_ifconf_regs *regs = (struct ch_ifconf_regs *)data;
2918		int reglen = cxgb_get_regs_len();
2919		uint8_t *buf = malloc(reglen, M_DEVBUF, M_NOWAIT);
2920		if (buf == NULL) {
2921			return (ENOMEM);
2922		}
2923		if (regs->len > reglen)
2924			regs->len = reglen;
2925		else if (regs->len < reglen)
2926			error = ENOBUFS;
2927
2928		if (!error) {
2929			cxgb_get_regs(sc, regs, buf);
2930			error = copyout(buf, regs->data, reglen);
2931		}
2932		free(buf, M_DEVBUF);
2933
2934		break;
2935	}
2936	case CHELSIO_SET_HW_SCHED: {
2937		struct ch_hw_sched *t = (struct ch_hw_sched *)data;
2938		unsigned int ticks_per_usec = core_ticks_per_usec(sc);
2939
2940		if ((sc->flags & FULL_INIT_DONE) == 0)
2941			return (EAGAIN);       /* need TP to be initialized */
2942		if (t->sched >= NTX_SCHED || !in_range(t->mode, 0, 1) ||
2943		    !in_range(t->channel, 0, 1) ||
2944		    !in_range(t->kbps, 0, 10000000) ||
2945		    !in_range(t->class_ipg, 0, 10000 * 65535 / ticks_per_usec) ||
2946		    !in_range(t->flow_ipg, 0,
2947			      dack_ticks_to_usec(sc, 0x7ff)))
2948			return (EINVAL);
2949
2950		if (t->kbps >= 0) {
2951			error = t3_config_sched(sc, t->kbps, t->sched);
2952			if (error < 0)
2953				return (-error);
2954		}
2955		if (t->class_ipg >= 0)
2956			t3_set_sched_ipg(sc, t->sched, t->class_ipg);
2957		if (t->flow_ipg >= 0) {
2958			t->flow_ipg *= 1000;     /* us -> ns */
2959			t3_set_pace_tbl(sc, &t->flow_ipg, t->sched, 1);
2960		}
2961		if (t->mode >= 0) {
2962			int bit = 1 << (S_TX_MOD_TIMER_MODE + t->sched);
2963
2964			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2965					 bit, t->mode ? bit : 0);
2966		}
2967		if (t->channel >= 0)
2968			t3_set_reg_field(sc, A_TP_TX_MOD_QUEUE_REQ_MAP,
2969					 1 << t->sched, t->channel << t->sched);
2970		break;
2971	}
2972	case CHELSIO_GET_EEPROM: {
2973		int i;
2974		struct ch_eeprom *e = (struct ch_eeprom *)data;
2975		uint8_t *buf;
2976
2977		if (e->offset & 3 || e->offset >= EEPROMSIZE ||
2978		    e->len > EEPROMSIZE || e->offset + e->len > EEPROMSIZE) {
2979			return (EINVAL);
2980		}
2981
2982		buf = malloc(EEPROMSIZE, M_DEVBUF, M_NOWAIT);
2983		if (buf == NULL) {
2984			return (ENOMEM);
2985		}
2986		e->magic = EEPROM_MAGIC;
2987		for (i = e->offset & ~3; !error && i < e->offset + e->len; i += 4)
2988			error = -t3_seeprom_read(sc, i, (uint32_t *)&buf[i]);
2989
2990		if (!error)
2991			error = copyout(buf + e->offset, e->data, e->len);
2992
2993		free(buf, M_DEVBUF);
2994		break;
2995	}
2996	case CHELSIO_CLEAR_STATS: {
2997		if (!(sc->flags & FULL_INIT_DONE))
2998			return EAGAIN;
2999
3000		PORT_LOCK(pi);
3001		t3_mac_update_stats(&pi->mac);
3002		memset(&pi->mac.stats, 0, sizeof(pi->mac.stats));
3003		PORT_UNLOCK(pi);
3004		break;
3005	}
3006	case CHELSIO_GET_UP_LA: {
3007		struct ch_up_la *la = (struct ch_up_la *)data;
3008		uint8_t *buf = malloc(LA_BUFSIZE, M_DEVBUF, M_NOWAIT);
3009		if (buf == NULL) {
3010			return (ENOMEM);
3011		}
3012		if (la->bufsize < LA_BUFSIZE)
3013			error = ENOBUFS;
3014
3015		if (!error)
3016			error = -t3_get_up_la(sc, &la->stopped, &la->idx,
3017					      &la->bufsize, buf);
3018		if (!error)
3019			error = copyout(buf, la->data, la->bufsize);
3020
3021		free(buf, M_DEVBUF);
3022		break;
3023	}
3024	case CHELSIO_GET_UP_IOQS: {
3025		struct ch_up_ioqs *ioqs = (struct ch_up_ioqs *)data;
3026		uint8_t *buf = malloc(IOQS_BUFSIZE, M_DEVBUF, M_NOWAIT);
3027		uint32_t *v;
3028
3029		if (buf == NULL) {
3030			return (ENOMEM);
3031		}
3032		if (ioqs->bufsize < IOQS_BUFSIZE)
3033			error = ENOBUFS;
3034
3035		if (!error)
3036			error = -t3_get_up_ioqs(sc, &ioqs->bufsize, buf);
3037
3038		if (!error) {
3039			v = (uint32_t *)buf;
3040
3041			ioqs->ioq_rx_enable = *v++;
3042			ioqs->ioq_tx_enable = *v++;
3043			ioqs->ioq_rx_status = *v++;
3044			ioqs->ioq_tx_status = *v++;
3045
3046			error = copyout(v, ioqs->data, ioqs->bufsize);
3047		}
3048
3049		free(buf, M_DEVBUF);
3050		break;
3051	}
3052	case CHELSIO_SET_FILTER: {
3053		struct ch_filter *f = (struct ch_filter *)data;
3054		struct filter_info *p;
3055		unsigned int nfilters = sc->params.mc5.nfilters;
3056
3057		if (!is_offload(sc))
3058			return (EOPNOTSUPP);	/* No TCAM */
3059		if (!(sc->flags & FULL_INIT_DONE))
3060			return (EAGAIN);	/* mc5 not setup yet */
3061		if (nfilters == 0)
3062			return (EBUSY);		/* TOE will use TCAM */
3063
3064		/* sanity checks */
3065		if (f->filter_id >= nfilters ||
3066		    (f->val.dip && f->mask.dip != 0xffffffff) ||
3067		    (f->val.sport && f->mask.sport != 0xffff) ||
3068		    (f->val.dport && f->mask.dport != 0xffff) ||
3069		    (f->val.vlan && f->mask.vlan != 0xfff) ||
3070		    (f->val.vlan_prio &&
3071			f->mask.vlan_prio != FILTER_NO_VLAN_PRI) ||
3072		    (f->mac_addr_idx != 0xffff && f->mac_addr_idx > 15) ||
3073		    f->qset >= SGE_QSETS ||
3074		    sc->rrss_map[f->qset] >= RSS_TABLE_SIZE)
3075			return (EINVAL);
3076
3077		/* Was allocated with M_WAITOK */
3078		KASSERT(sc->filters, ("filter table NULL\n"));
3079
3080		p = &sc->filters[f->filter_id];
3081		if (p->locked)
3082			return (EPERM);
3083
3084		bzero(p, sizeof(*p));
3085		p->sip = f->val.sip;
3086		p->sip_mask = f->mask.sip;
3087		p->dip = f->val.dip;
3088		p->sport = f->val.sport;
3089		p->dport = f->val.dport;
3090		p->vlan = f->mask.vlan ? f->val.vlan : 0xfff;
3091		p->vlan_prio = f->mask.vlan_prio ? (f->val.vlan_prio & 6) :
3092		    FILTER_NO_VLAN_PRI;
3093		p->mac_hit = f->mac_hit;
3094		p->mac_vld = f->mac_addr_idx != 0xffff;
3095		p->mac_idx = f->mac_addr_idx;
3096		p->pkt_type = f->proto;
3097		p->report_filter_id = f->want_filter_id;
3098		p->pass = f->pass;
3099		p->rss = f->rss;
3100		p->qset = f->qset;
3101
3102		error = set_filter(sc, f->filter_id, p);
3103		if (error == 0)
3104			p->valid = 1;
3105		break;
3106	}
3107	case CHELSIO_DEL_FILTER: {
3108		struct ch_filter *f = (struct ch_filter *)data;
3109		struct filter_info *p;
3110		unsigned int nfilters = sc->params.mc5.nfilters;
3111
3112		if (!is_offload(sc))
3113			return (EOPNOTSUPP);
3114		if (!(sc->flags & FULL_INIT_DONE))
3115			return (EAGAIN);
3116		if (nfilters == 0 || sc->filters == NULL)
3117			return (EINVAL);
3118		if (f->filter_id >= nfilters)
3119		       return (EINVAL);
3120
3121		p = &sc->filters[f->filter_id];
3122		if (p->locked)
3123			return (EPERM);
3124		if (!p->valid)
3125			return (EFAULT); /* Read "Bad address" as "Bad index" */
3126
3127		bzero(p, sizeof(*p));
3128		p->sip = p->sip_mask = 0xffffffff;
3129		p->vlan = 0xfff;
3130		p->vlan_prio = FILTER_NO_VLAN_PRI;
3131		p->pkt_type = 1;
3132		error = set_filter(sc, f->filter_id, p);
3133		break;
3134	}
3135	case CHELSIO_GET_FILTER: {
3136		struct ch_filter *f = (struct ch_filter *)data;
3137		struct filter_info *p;
3138		unsigned int i, nfilters = sc->params.mc5.nfilters;
3139
3140		if (!is_offload(sc))
3141			return (EOPNOTSUPP);
3142		if (!(sc->flags & FULL_INIT_DONE))
3143			return (EAGAIN);
3144		if (nfilters == 0 || sc->filters == NULL)
3145			return (EINVAL);
3146
3147		i = f->filter_id == 0xffffffff ? 0 : f->filter_id + 1;
3148		for (; i < nfilters; i++) {
3149			p = &sc->filters[i];
3150			if (!p->valid)
3151				continue;
3152
3153			bzero(f, sizeof(*f));
3154
3155			f->filter_id = i;
3156			f->val.sip = p->sip;
3157			f->mask.sip = p->sip_mask;
3158			f->val.dip = p->dip;
3159			f->mask.dip = p->dip ? 0xffffffff : 0;
3160			f->val.sport = p->sport;
3161			f->mask.sport = p->sport ? 0xffff : 0;
3162			f->val.dport = p->dport;
3163			f->mask.dport = p->dport ? 0xffff : 0;
3164			f->val.vlan = p->vlan == 0xfff ? 0 : p->vlan;
3165			f->mask.vlan = p->vlan == 0xfff ? 0 : 0xfff;
3166			f->val.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3167			    0 : p->vlan_prio;
3168			f->mask.vlan_prio = p->vlan_prio == FILTER_NO_VLAN_PRI ?
3169			    0 : FILTER_NO_VLAN_PRI;
3170			f->mac_hit = p->mac_hit;
3171			f->mac_addr_idx = p->mac_vld ? p->mac_idx : 0xffff;
3172			f->proto = p->pkt_type;
3173			f->want_filter_id = p->report_filter_id;
3174			f->pass = p->pass;
3175			f->rss = p->rss;
3176			f->qset = p->qset;
3177
3178			break;
3179		}
3180
3181		if (i == nfilters)
3182			f->filter_id = 0xffffffff;
3183		break;
3184	}
3185	default:
3186		return (EOPNOTSUPP);
3187		break;
3188	}
3189
3190	return (error);
3191}
3192
3193static __inline void
3194reg_block_dump(struct adapter *ap, uint8_t *buf, unsigned int start,
3195    unsigned int end)
3196{
3197	uint32_t *p = (uint32_t *)(buf + start);
3198
3199	for ( ; start <= end; start += sizeof(uint32_t))
3200		*p++ = t3_read_reg(ap, start);
3201}
3202
3203#define T3_REGMAP_SIZE (3 * 1024)
3204static int
3205cxgb_get_regs_len(void)
3206{
3207	return T3_REGMAP_SIZE;
3208}
3209
3210static void
3211cxgb_get_regs(adapter_t *sc, struct ch_ifconf_regs *regs, uint8_t *buf)
3212{
3213
3214	/*
3215	 * Version scheme:
3216	 * bits 0..9: chip version
3217	 * bits 10..15: chip revision
3218	 * bit 31: set for PCIe cards
3219	 */
3220	regs->version = 3 | (sc->params.rev << 10) | (is_pcie(sc) << 31);
3221
3222	/*
3223	 * We skip the MAC statistics registers because they are clear-on-read.
3224	 * Also reading multi-register stats would need to synchronize with the
3225	 * periodic mac stats accumulation.  Hard to justify the complexity.
3226	 */
3227	memset(buf, 0, cxgb_get_regs_len());
3228	reg_block_dump(sc, buf, 0, A_SG_RSPQ_CREDIT_RETURN);
3229	reg_block_dump(sc, buf, A_SG_HI_DRB_HI_THRSH, A_ULPRX_PBL_ULIMIT);
3230	reg_block_dump(sc, buf, A_ULPTX_CONFIG, A_MPS_INT_CAUSE);
3231	reg_block_dump(sc, buf, A_CPL_SWITCH_CNTRL, A_CPL_MAP_TBL_DATA);
3232	reg_block_dump(sc, buf, A_SMB_GLOBAL_TIME_CFG, A_XGM_SERDES_STAT3);
3233	reg_block_dump(sc, buf, A_XGM_SERDES_STATUS0,
3234		       XGM_REG(A_XGM_SERDES_STAT3, 1));
3235	reg_block_dump(sc, buf, XGM_REG(A_XGM_SERDES_STATUS0, 1),
3236		       XGM_REG(A_XGM_RX_SPI4_SOP_EOP_CNT, 1));
3237}
3238
3239static int
3240alloc_filters(struct adapter *sc)
3241{
3242	struct filter_info *p;
3243	unsigned int nfilters = sc->params.mc5.nfilters;
3244
3245	if (nfilters == 0)
3246		return (0);
3247
3248	p = malloc(sizeof(*p) * nfilters, M_DEVBUF, M_WAITOK | M_ZERO);
3249	sc->filters = p;
3250
3251	p = &sc->filters[nfilters - 1];
3252	p->vlan = 0xfff;
3253	p->vlan_prio = FILTER_NO_VLAN_PRI;
3254	p->pass = p->rss = p->valid = p->locked = 1;
3255
3256	return (0);
3257}
3258
3259static int
3260setup_hw_filters(struct adapter *sc)
3261{
3262	int i, rc;
3263	unsigned int nfilters = sc->params.mc5.nfilters;
3264
3265	if (!sc->filters)
3266		return (0);
3267
3268	t3_enable_filters(sc);
3269
3270	for (i = rc = 0; i < nfilters && !rc; i++) {
3271		if (sc->filters[i].locked)
3272			rc = set_filter(sc, i, &sc->filters[i]);
3273	}
3274
3275	return (rc);
3276}
3277
3278static int
3279set_filter(struct adapter *sc, int id, const struct filter_info *f)
3280{
3281	int len;
3282	struct mbuf *m;
3283	struct ulp_txpkt *txpkt;
3284	struct work_request_hdr *wr;
3285	struct cpl_pass_open_req *oreq;
3286	struct cpl_set_tcb_field *sreq;
3287
3288	len = sizeof(*wr) + sizeof(*oreq) + 2 * sizeof(*sreq);
3289	KASSERT(len <= MHLEN, ("filter request too big for an mbuf"));
3290
3291	id += t3_mc5_size(&sc->mc5) - sc->params.mc5.nroutes -
3292	      sc->params.mc5.nfilters;
3293
3294	m = m_gethdr(M_WAITOK, MT_DATA);
3295	m->m_len = m->m_pkthdr.len = len;
3296	bzero(mtod(m, char *), len);
3297
3298	wr = mtod(m, struct work_request_hdr *);
3299	wr->wrh_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC);
3300
3301	oreq = (struct cpl_pass_open_req *)(wr + 1);
3302	txpkt = (struct ulp_txpkt *)oreq;
3303	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3304	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*oreq) / 8));
3305	OPCODE_TID(oreq) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, id));
3306	oreq->local_port = htons(f->dport);
3307	oreq->peer_port = htons(f->sport);
3308	oreq->local_ip = htonl(f->dip);
3309	oreq->peer_ip = htonl(f->sip);
3310	oreq->peer_netmask = htonl(f->sip_mask);
3311	oreq->opt0h = 0;
3312	oreq->opt0l = htonl(F_NO_OFFLOAD);
3313	oreq->opt1 = htonl(V_MAC_MATCH_VALID(f->mac_vld) |
3314			 V_CONN_POLICY(CPL_CONN_POLICY_FILTER) |
3315			 V_VLAN_PRI(f->vlan_prio >> 1) |
3316			 V_VLAN_PRI_VALID(f->vlan_prio != FILTER_NO_VLAN_PRI) |
3317			 V_PKT_TYPE(f->pkt_type) | V_OPT1_VLAN(f->vlan) |
3318			 V_MAC_MATCH(f->mac_idx | (f->mac_hit << 4)));
3319
3320	sreq = (struct cpl_set_tcb_field *)(oreq + 1);
3321	set_tcb_field_ulp(sreq, id, 1, 0x1800808000ULL,
3322			  (f->report_filter_id << 15) | (1 << 23) |
3323			  ((u64)f->pass << 35) | ((u64)!f->rss << 36));
3324	set_tcb_field_ulp(sreq + 1, id, 0, 0xffffffff, (2 << 19) | 1);
3325	t3_mgmt_tx(sc, m);
3326
3327	if (f->pass && !f->rss) {
3328		len = sizeof(*sreq);
3329		m = m_gethdr(M_WAITOK, MT_DATA);
3330		m->m_len = m->m_pkthdr.len = len;
3331		bzero(mtod(m, char *), len);
3332		sreq = mtod(m, struct cpl_set_tcb_field *);
3333		sreq->wr.wrh_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
3334		mk_set_tcb_field(sreq, id, 25, 0x3f80000,
3335				 (u64)sc->rrss_map[f->qset] << 19);
3336		t3_mgmt_tx(sc, m);
3337	}
3338	return 0;
3339}
3340
3341static inline void
3342mk_set_tcb_field(struct cpl_set_tcb_field *req, unsigned int tid,
3343    unsigned int word, u64 mask, u64 val)
3344{
3345	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid));
3346	req->reply = V_NO_REPLY(1);
3347	req->cpu_idx = 0;
3348	req->word = htons(word);
3349	req->mask = htobe64(mask);
3350	req->val = htobe64(val);
3351}
3352
3353static inline void
3354set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid,
3355    unsigned int word, u64 mask, u64 val)
3356{
3357	struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req;
3358
3359	txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT));
3360	txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8));
3361	mk_set_tcb_field(req, tid, word, mask, val);
3362}
3363
3364void
3365t3_iterate(void (*func)(struct adapter *, void *), void *arg)
3366{
3367	struct adapter *sc;
3368
3369	mtx_lock(&t3_list_lock);
3370	SLIST_FOREACH(sc, &t3_list, link) {
3371		/*
3372		 * func should not make any assumptions about what state sc is
3373		 * in - the only guarantee is that sc->sc_lock is a valid lock.
3374		 */
3375		func(sc, arg);
3376	}
3377	mtx_unlock(&t3_list_lock);
3378}
3379
3380#ifdef TCP_OFFLOAD
3381static int
3382toe_capability(struct port_info *pi, int enable)
3383{
3384	int rc;
3385	struct adapter *sc = pi->adapter;
3386
3387	ADAPTER_LOCK_ASSERT_OWNED(sc);
3388
3389	if (!is_offload(sc))
3390		return (ENODEV);
3391
3392	if (enable) {
3393		if (!(sc->flags & FULL_INIT_DONE)) {
3394			log(LOG_WARNING,
3395			    "You must enable a cxgb interface first\n");
3396			return (EAGAIN);
3397		}
3398
3399		if (isset(&sc->offload_map, pi->port_id))
3400			return (0);
3401
3402		if (!(sc->flags & TOM_INIT_DONE)) {
3403			rc = t3_activate_uld(sc, ULD_TOM);
3404			if (rc == EAGAIN) {
3405				log(LOG_WARNING,
3406				    "You must kldload t3_tom.ko before trying "
3407				    "to enable TOE on a cxgb interface.\n");
3408			}
3409			if (rc != 0)
3410				return (rc);
3411			KASSERT(sc->tom_softc != NULL,
3412			    ("%s: TOM activated but softc NULL", __func__));
3413			KASSERT(sc->flags & TOM_INIT_DONE,
3414			    ("%s: TOM activated but flag not set", __func__));
3415		}
3416
3417		setbit(&sc->offload_map, pi->port_id);
3418
3419		/*
3420		 * XXX: Temporary code to allow iWARP to be enabled when TOE is
3421		 * enabled on any port.  Need to figure out how to enable,
3422		 * disable, load, and unload iWARP cleanly.
3423		 */
3424		if (!isset(&sc->offload_map, MAX_NPORTS) &&
3425		    t3_activate_uld(sc, ULD_IWARP) == 0)
3426			setbit(&sc->offload_map, MAX_NPORTS);
3427	} else {
3428		if (!isset(&sc->offload_map, pi->port_id))
3429			return (0);
3430
3431		KASSERT(sc->flags & TOM_INIT_DONE,
3432		    ("%s: TOM never initialized?", __func__));
3433		clrbit(&sc->offload_map, pi->port_id);
3434	}
3435
3436	return (0);
3437}
3438
3439/*
3440 * Add an upper layer driver to the global list.
3441 */
3442int
3443t3_register_uld(struct uld_info *ui)
3444{
3445	int rc = 0;
3446	struct uld_info *u;
3447
3448	mtx_lock(&t3_uld_list_lock);
3449	SLIST_FOREACH(u, &t3_uld_list, link) {
3450	    if (u->uld_id == ui->uld_id) {
3451		    rc = EEXIST;
3452		    goto done;
3453	    }
3454	}
3455
3456	SLIST_INSERT_HEAD(&t3_uld_list, ui, link);
3457	ui->refcount = 0;
3458done:
3459	mtx_unlock(&t3_uld_list_lock);
3460	return (rc);
3461}
3462
3463int
3464t3_unregister_uld(struct uld_info *ui)
3465{
3466	int rc = EINVAL;
3467	struct uld_info *u;
3468
3469	mtx_lock(&t3_uld_list_lock);
3470
3471	SLIST_FOREACH(u, &t3_uld_list, link) {
3472	    if (u == ui) {
3473		    if (ui->refcount > 0) {
3474			    rc = EBUSY;
3475			    goto done;
3476		    }
3477
3478		    SLIST_REMOVE(&t3_uld_list, ui, uld_info, link);
3479		    rc = 0;
3480		    goto done;
3481	    }
3482	}
3483done:
3484	mtx_unlock(&t3_uld_list_lock);
3485	return (rc);
3486}
3487
3488int
3489t3_activate_uld(struct adapter *sc, int id)
3490{
3491	int rc = EAGAIN;
3492	struct uld_info *ui;
3493
3494	mtx_lock(&t3_uld_list_lock);
3495
3496	SLIST_FOREACH(ui, &t3_uld_list, link) {
3497		if (ui->uld_id == id) {
3498			rc = ui->activate(sc);
3499			if (rc == 0)
3500				ui->refcount++;
3501			goto done;
3502		}
3503	}
3504done:
3505	mtx_unlock(&t3_uld_list_lock);
3506
3507	return (rc);
3508}
3509
3510int
3511t3_deactivate_uld(struct adapter *sc, int id)
3512{
3513	int rc = EINVAL;
3514	struct uld_info *ui;
3515
3516	mtx_lock(&t3_uld_list_lock);
3517
3518	SLIST_FOREACH(ui, &t3_uld_list, link) {
3519		if (ui->uld_id == id) {
3520			rc = ui->deactivate(sc);
3521			if (rc == 0)
3522				ui->refcount--;
3523			goto done;
3524		}
3525	}
3526done:
3527	mtx_unlock(&t3_uld_list_lock);
3528
3529	return (rc);
3530}
3531
3532static int
3533cpl_not_handled(struct sge_qset *qs __unused, struct rsp_desc *r __unused,
3534    struct mbuf *m)
3535{
3536	m_freem(m);
3537	return (EDOOFUS);
3538}
3539
3540int
3541t3_register_cpl_handler(struct adapter *sc, int opcode, cpl_handler_t h)
3542{
3543	uintptr_t *loc, new;
3544
3545	if (opcode >= NUM_CPL_HANDLERS)
3546		return (EINVAL);
3547
3548	new = h ? (uintptr_t)h : (uintptr_t)cpl_not_handled;
3549	loc = (uintptr_t *) &sc->cpl_handler[opcode];
3550	atomic_store_rel_ptr(loc, new);
3551
3552	return (0);
3553}
3554#endif
3555
3556static int
3557cxgbc_mod_event(module_t mod, int cmd, void *arg)
3558{
3559	int rc = 0;
3560
3561	switch (cmd) {
3562	case MOD_LOAD:
3563		mtx_init(&t3_list_lock, "T3 adapters", 0, MTX_DEF);
3564		SLIST_INIT(&t3_list);
3565#ifdef TCP_OFFLOAD
3566		mtx_init(&t3_uld_list_lock, "T3 ULDs", 0, MTX_DEF);
3567		SLIST_INIT(&t3_uld_list);
3568#endif
3569		break;
3570
3571	case MOD_UNLOAD:
3572#ifdef TCP_OFFLOAD
3573		mtx_lock(&t3_uld_list_lock);
3574		if (!SLIST_EMPTY(&t3_uld_list)) {
3575			rc = EBUSY;
3576			mtx_unlock(&t3_uld_list_lock);
3577			break;
3578		}
3579		mtx_unlock(&t3_uld_list_lock);
3580		mtx_destroy(&t3_uld_list_lock);
3581#endif
3582		mtx_lock(&t3_list_lock);
3583		if (!SLIST_EMPTY(&t3_list)) {
3584			rc = EBUSY;
3585			mtx_unlock(&t3_list_lock);
3586			break;
3587		}
3588		mtx_unlock(&t3_list_lock);
3589		mtx_destroy(&t3_list_lock);
3590		break;
3591	}
3592
3593	return (rc);
3594}
3595
3596#ifdef DEBUGNET
3597static void
3598cxgb_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize)
3599{
3600	struct port_info *pi;
3601	adapter_t *adap;
3602
3603	pi = if_getsoftc(ifp);
3604	adap = pi->adapter;
3605	ADAPTER_LOCK(adap);
3606	*nrxr = adap->nqsets;
3607	*ncl = adap->sge.qs[0].fl[1].size;
3608	*clsize = adap->sge.qs[0].fl[1].buf_size;
3609	ADAPTER_UNLOCK(adap);
3610}
3611
3612static void
3613cxgb_debugnet_event(struct ifnet *ifp, enum debugnet_ev event)
3614{
3615	struct port_info *pi;
3616	struct sge_qset *qs;
3617	int i;
3618
3619	pi = if_getsoftc(ifp);
3620	if (event == DEBUGNET_START)
3621		for (i = 0; i < pi->adapter->nqsets; i++) {
3622			qs = &pi->adapter->sge.qs[i];
3623
3624			/* Need to reinit after debugnet_mbuf_start(). */
3625			qs->fl[0].zone = zone_pack;
3626			qs->fl[1].zone = zone_clust;
3627			qs->lro.enabled = 0;
3628		}
3629}
3630
3631static int
3632cxgb_debugnet_transmit(struct ifnet *ifp, struct mbuf *m)
3633{
3634	struct port_info *pi;
3635	struct sge_qset *qs;
3636
3637	pi = if_getsoftc(ifp);
3638	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
3639	    IFF_DRV_RUNNING)
3640		return (ENOENT);
3641
3642	qs = &pi->adapter->sge.qs[pi->first_qset];
3643	return (cxgb_debugnet_encap(qs, &m));
3644}
3645
3646static int
3647cxgb_debugnet_poll(struct ifnet *ifp, int count)
3648{
3649	struct port_info *pi;
3650	adapter_t *adap;
3651	int i;
3652
3653	pi = if_getsoftc(ifp);
3654	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0)
3655		return (ENOENT);
3656
3657	adap = pi->adapter;
3658	for (i = 0; i < adap->nqsets; i++)
3659		(void)cxgb_debugnet_poll_rx(adap, &adap->sge.qs[i]);
3660	(void)cxgb_debugnet_poll_tx(&adap->sge.qs[pi->first_qset]);
3661	return (0);
3662}
3663#endif /* DEBUGNET */
3664