xref: /illumos-gate/usr/src/uts/common/io/mlxcx/mlxcx.c (revision 5014e1fa)
1ebb7c6fdSAlex Wilson /*
2ebb7c6fdSAlex Wilson  * This file and its contents are supplied under the terms of the
3ebb7c6fdSAlex Wilson  * Common Development and Distribution License ("CDDL"), version 1.0.
4ebb7c6fdSAlex Wilson  * You may only use this file in accordance with the terms of version
5ebb7c6fdSAlex Wilson  * 1.0 of the CDDL.
6ebb7c6fdSAlex Wilson  *
7ebb7c6fdSAlex Wilson  * A full copy of the text of the CDDL should have accompanied this
8ebb7c6fdSAlex Wilson  * source.  A copy of the CDDL is also available via the Internet at
9ebb7c6fdSAlex Wilson  * http://www.illumos.org/license/CDDL.
10ebb7c6fdSAlex Wilson  */
11ebb7c6fdSAlex Wilson 
12ebb7c6fdSAlex Wilson /*
13*5014e1faSAlex Wilson  * Copyright 2023, The University of Queensland
14ebb7c6fdSAlex Wilson  * Copyright (c) 2018, Joyent, Inc.
1583b3f06fSJason King  * Copyright 2023 RackTop Systems, Inc.
1685e4aa97SDan McDonald  * Copyright 2023 MNX Cloud, Inc.
17ebb7c6fdSAlex Wilson  */
18ebb7c6fdSAlex Wilson 
19ebb7c6fdSAlex Wilson /*
20ebb7c6fdSAlex Wilson  * Mellanox Connect-X 4/5/6 driver.
21ebb7c6fdSAlex Wilson  */
22ebb7c6fdSAlex Wilson 
23ebb7c6fdSAlex Wilson /*
2485e4aa97SDan McDonald  * The PRM for this family of parts was freely available at:
2585e4aa97SDan McDonald  *
26ebb7c6fdSAlex Wilson  * https://www.mellanox.com/related-docs/user_manuals/ \
27ebb7c6fdSAlex Wilson  *   Ethernet_Adapters_Programming_Manual.pdf
2885e4aa97SDan McDonald  *
2985e4aa97SDan McDonald  * but has since disappeared.
30ebb7c6fdSAlex Wilson  */
31ebb7c6fdSAlex Wilson /*
32ebb7c6fdSAlex Wilson  * ConnectX glossary
33ebb7c6fdSAlex Wilson  * -----------------
34ebb7c6fdSAlex Wilson  *
35ebb7c6fdSAlex Wilson  * WR		Work Request: something we've asked the hardware to do by
36ebb7c6fdSAlex Wilson  *		creating a Work Queue Entry (WQE), e.g. send or recv a packet
37ebb7c6fdSAlex Wilson  *
38ebb7c6fdSAlex Wilson  * WQE		Work Queue Entry: a descriptor on a work queue descriptor ring
39ebb7c6fdSAlex Wilson  *
40ebb7c6fdSAlex Wilson  * WQ		Work Queue: a descriptor ring that we can place WQEs on, usually
41ebb7c6fdSAlex Wilson  *		either a Send Queue (SQ) or Receive Queue (RQ). Different WQ
42ebb7c6fdSAlex Wilson  *		types have different WQE structures, different commands for
43ebb7c6fdSAlex Wilson  *		creating and destroying them, etc, but share a common context
44ebb7c6fdSAlex Wilson  *		structure, counter setup and state graph.
45ebb7c6fdSAlex Wilson  * SQ		Send Queue, a specific type of WQ that sends packets
46ebb7c6fdSAlex Wilson  * RQ		Receive Queue, a specific type of WQ that receives packets
47ebb7c6fdSAlex Wilson  *
48ebb7c6fdSAlex Wilson  * CQ		Completion Queue: completion of WRs from a WQ are reported to
49ebb7c6fdSAlex Wilson  *		one of these, as a CQE on its entry ring.
50ebb7c6fdSAlex Wilson  * CQE		Completion Queue Entry: an entry in a CQ ring. Contains error
51ebb7c6fdSAlex Wilson  *		info, as well as packet size, the ID of the WQ, and the index
52ebb7c6fdSAlex Wilson  *		of the WQE which completed. Does not contain any packet data.
53ebb7c6fdSAlex Wilson  *
54ebb7c6fdSAlex Wilson  * EQ		Event Queue: a ring of event structs from the hardware informing
55ebb7c6fdSAlex Wilson  *		us when particular events happen. Many events can point at a
56ebb7c6fdSAlex Wilson  *		a particular CQ which we should then go look at.
57ebb7c6fdSAlex Wilson  * EQE		Event Queue Entry: an entry on the EQ ring
58ebb7c6fdSAlex Wilson  *
59ebb7c6fdSAlex Wilson  * UAR		User Access Region, a page of the device's PCI BAR which is
60ebb7c6fdSAlex Wilson  *		tied to particular EQ/CQ/WQ sets and contains doorbells to
61ebb7c6fdSAlex Wilson  *		ring to arm them for interrupts or wake them up for new work
62ebb7c6fdSAlex Wilson  *
63ebb7c6fdSAlex Wilson  * RQT		RQ Table, a collection of indexed RQs used to refer to the group
64ebb7c6fdSAlex Wilson  *		as a single unit (for e.g. hashing/RSS).
65ebb7c6fdSAlex Wilson  *
66ebb7c6fdSAlex Wilson  * TIR		Transport Interface Recieve, a bucket of resources for the
67ebb7c6fdSAlex Wilson  *		reception of packets. TIRs have to point at either a single RQ
68ebb7c6fdSAlex Wilson  *		or a table of RQs (RQT). They then serve as a target for flow
69ebb7c6fdSAlex Wilson  *		table entries (FEs). TIRs that point at an RQT also contain the
70ebb7c6fdSAlex Wilson  *		settings for hashing for RSS.
71ebb7c6fdSAlex Wilson  *
72ebb7c6fdSAlex Wilson  * TIS		Transport Interface Send, a bucket of resources associated with
73ebb7c6fdSAlex Wilson  *		the transmission of packets. In particular, the temporary
74ebb7c6fdSAlex Wilson  *		resources used for LSO internally in the card are accounted to
75ebb7c6fdSAlex Wilson  *		a TIS.
76ebb7c6fdSAlex Wilson  *
77ebb7c6fdSAlex Wilson  * FT		Flow Table, a collection of FEs and FGs that can be referred to
78ebb7c6fdSAlex Wilson  *		as a single entity (e.g. used as a target from another flow
79ebb7c6fdSAlex Wilson  *		entry or set as the "root" table to handle incoming or outgoing
80ebb7c6fdSAlex Wilson  *		packets). Packets arriving at a FT are matched against the
81ebb7c6fdSAlex Wilson  *		FEs in the table until either one matches with a terminating
82ebb7c6fdSAlex Wilson  *		action or all FEs are exhausted (it's first-match-wins but with
83ebb7c6fdSAlex Wilson  *		some actions that are non-terminal, like counting actions).
84ebb7c6fdSAlex Wilson  *
85ebb7c6fdSAlex Wilson  * FG		Flow Group, a group of FEs which share a common "mask" (i.e.
86ebb7c6fdSAlex Wilson  *		they match on the same attributes of packets coming into the
87ebb7c6fdSAlex Wilson  *		flow).
88ebb7c6fdSAlex Wilson  *
89ebb7c6fdSAlex Wilson  * FE		Flow Entry, an individual set of values to match against
90ebb7c6fdSAlex Wilson  *		packets entering the flow table, combined with an action to
91ebb7c6fdSAlex Wilson  *		take upon a successful match. The action we use most is
92ebb7c6fdSAlex Wilson  *		"forward", which sends the packets to a TIR or another flow
93ebb7c6fdSAlex Wilson  *		table and then stops further processing within the FE's FT.
94ebb7c6fdSAlex Wilson  *
95ebb7c6fdSAlex Wilson  * lkey/mkey	A reference to something similar to a page table but in the
96ebb7c6fdSAlex Wilson  *		device's internal onboard MMU. Since Connect-X parts double as
97ebb7c6fdSAlex Wilson  *		IB cards (lots of RDMA) they have extensive onboard memory mgmt
98ebb7c6fdSAlex Wilson  *		features which we try very hard not to use. For our WQEs we use
99ebb7c6fdSAlex Wilson  *		the "reserved" lkey, which is a special value which indicates
100ebb7c6fdSAlex Wilson  *		that addresses we give are linear addresses and should not be
101ebb7c6fdSAlex Wilson  *		translated.
102ebb7c6fdSAlex Wilson  *
103ebb7c6fdSAlex Wilson  * PD		Protection Domain, an IB concept. We have to allocate one to
104ebb7c6fdSAlex Wilson  *		provide as a parameter for new WQs, but we don't do anything
105ebb7c6fdSAlex Wilson  *		with it.
106ebb7c6fdSAlex Wilson  *
107ebb7c6fdSAlex Wilson  * TDOM/TD	Transport Domain, an IB concept. We allocate one in order to
108ebb7c6fdSAlex Wilson  *		provide it as a parameter to TIR/TIS creation, but we don't do
109ebb7c6fdSAlex Wilson  *		anything with it.
110ebb7c6fdSAlex Wilson  */
111ebb7c6fdSAlex Wilson /*
112ebb7c6fdSAlex Wilson  *
113ebb7c6fdSAlex Wilson  * Data flow overview
114ebb7c6fdSAlex Wilson  * ------------------
115ebb7c6fdSAlex Wilson  *
116ebb7c6fdSAlex Wilson  * This driver is a MAC ring-enabled driver which maps rings to send and recv
117ebb7c6fdSAlex Wilson  * queues in hardware on the device.
118ebb7c6fdSAlex Wilson  *
119ebb7c6fdSAlex Wilson  * Each SQ and RQ is set up to report to its own individual CQ, to ensure
120ebb7c6fdSAlex Wilson  * sufficient space, and simplify the logic needed to work out which buffer
121ebb7c6fdSAlex Wilson  * was completed.
122ebb7c6fdSAlex Wilson  *
123ebb7c6fdSAlex Wilson  * The CQs are then round-robin allocated onto EQs, of which we set up one per
124ebb7c6fdSAlex Wilson  * interrupt that the system gives us for the device. Normally this means we
125ebb7c6fdSAlex Wilson  * have 8 EQs.
126ebb7c6fdSAlex Wilson  *
127ebb7c6fdSAlex Wilson  * When we have >= 8 EQs available, we try to allocate only RX or only TX
128ebb7c6fdSAlex Wilson  * CQs on each one. The EQs are chosen for RX and TX in an alternating fashion.
129ebb7c6fdSAlex Wilson  *
130ebb7c6fdSAlex Wilson  * EQ #0 is reserved for all event types other than completion events, and has
131ebb7c6fdSAlex Wilson  * no CQs associated with it at any time. EQs #1 and upwards are only used for
132ebb7c6fdSAlex Wilson  * handling CQ completion events.
133ebb7c6fdSAlex Wilson  *
134ebb7c6fdSAlex Wilson  * +------+     +------+           +------+        +---------+
135ebb7c6fdSAlex Wilson  * | SQ 0 |---->| CQ 0 |-----+     | EQ 0 |------> | MSI-X 0 |     mlxcx_intr_0
136ebb7c6fdSAlex Wilson  * +------+     +------+     |     +------+        +---------+
137ebb7c6fdSAlex Wilson  *                           |
138ebb7c6fdSAlex Wilson  * +------+     +------+     |
139ebb7c6fdSAlex Wilson  * | SQ 1 |---->| CQ 1 |---+ |     +------+
140ebb7c6fdSAlex Wilson  * +------+     +------+   | +---> |      |
141ebb7c6fdSAlex Wilson  *                         |       |      |
142ebb7c6fdSAlex Wilson  * +------+     +------+   |       | EQ 1 |        +---------+
143ebb7c6fdSAlex Wilson  * | SQ 2 |---->| CQ 2 |---------> |      |------> | MSI-X 1 |     mlxcx_intr_n
144ebb7c6fdSAlex Wilson  * +------+     +------+   | +---> |      |        +---------+
145ebb7c6fdSAlex Wilson  *                         | |     +------+
146ebb7c6fdSAlex Wilson  *                         | |
147ebb7c6fdSAlex Wilson  *   ...                   | |
148ebb7c6fdSAlex Wilson  *                         | |     +------+
149ebb7c6fdSAlex Wilson  * +------+     +------+   +-----> |      |
150ebb7c6fdSAlex Wilson  * | RQ 0 |---->| CQ 3 |---------> |      |        +---------+
151ebb7c6fdSAlex Wilson  * +------+     +------+     |     | EQ 2 |------> | MSI-X 2 |     mlxcx_intr_n
152ebb7c6fdSAlex Wilson  *                           |     |      |        +---------+
153ebb7c6fdSAlex Wilson  * +------+     +------+     | +-> |      |
154ebb7c6fdSAlex Wilson  * | RQ 1 |---->| CQ 4 |-----+ |   +------+
155ebb7c6fdSAlex Wilson  * +------+     +------+       |
156ebb7c6fdSAlex Wilson  *                             |     ....
157ebb7c6fdSAlex Wilson  * +------+     +------+       |
158ebb7c6fdSAlex Wilson  * | RQ 2 |---->| CQ 5 |-------+
159ebb7c6fdSAlex Wilson  * +------+     +------+
160ebb7c6fdSAlex Wilson  *
161ebb7c6fdSAlex Wilson  *   ... (note this diagram does not show RX-only or TX-only EQs)
162ebb7c6fdSAlex Wilson  *
163ebb7c6fdSAlex Wilson  * For TX, we advertise all of the SQs we create as plain rings to MAC with
164ebb7c6fdSAlex Wilson  * no TX groups. This puts MAC in "virtual group" mode where it will allocate
165ebb7c6fdSAlex Wilson  * and use the rings as it sees fit.
166ebb7c6fdSAlex Wilson  *
167ebb7c6fdSAlex Wilson  * For RX, we advertise actual groups in order to make use of hardware
168ebb7c6fdSAlex Wilson  * classification.
169ebb7c6fdSAlex Wilson  *
170ebb7c6fdSAlex Wilson  * The hardware classification we use is based around Flow Tables, and we
171ebb7c6fdSAlex Wilson  * currently ignore all of the eswitch features of the card. The NIC VPORT
172ebb7c6fdSAlex Wilson  * is always set to promisc mode so that the eswitch sends us all of the
173ebb7c6fdSAlex Wilson  * traffic that arrives on the NIC, and we use flow entries to manage
174ebb7c6fdSAlex Wilson  * everything.
175ebb7c6fdSAlex Wilson  *
176ebb7c6fdSAlex Wilson  * We use 2 layers of flow tables for classification: traffic arrives at the
177ebb7c6fdSAlex Wilson  * root RX flow table which contains MAC address filters. Those then send
178ebb7c6fdSAlex Wilson  * matched traffic to the per-group L1 VLAN filter tables which contain VLAN
179ebb7c6fdSAlex Wilson  * presence and VID filters.
180ebb7c6fdSAlex Wilson  *
181ebb7c6fdSAlex Wilson  * Since these parts only support doing RSS hashing on a single protocol at a
182ebb7c6fdSAlex Wilson  * time, we have to use a third layer of flow tables as well to break traffic
183ebb7c6fdSAlex Wilson  * down by L4 and L3 protocol (TCPv6, TCPv4, UDPv6, UDPv4, IPv6, IPv4 etc)
184ebb7c6fdSAlex Wilson  * so that it can be sent to the appropriate TIR for hashing.
185ebb7c6fdSAlex Wilson  *
186ebb7c6fdSAlex Wilson  * Incoming packets
187ebb7c6fdSAlex Wilson  *        +           +---------+      +---------+
188ebb7c6fdSAlex Wilson  *        |        +->| group 0 |      | group 0 |
189ebb7c6fdSAlex Wilson  *        |        |  | vlan ft |  +-->| hash ft |
190ebb7c6fdSAlex Wilson  *        v        |  |   L1    |  |   |   L2    |
191ebb7c6fdSAlex Wilson  *   +----+----+   |  +---------+  |   +---------+    +-----+    +-----+------+
192ebb7c6fdSAlex Wilson  *   | eswitch |   |  |         |  |   |  TCPv6  |--->| TIR |--->|     |  RQ0 |
193ebb7c6fdSAlex Wilson  *   +----+----+   |  |         |  |   +---------+    +-----+    |     +------+
194ebb7c6fdSAlex Wilson  *        |        |  |         |  |   |  UDPv6  |--->| TIR |--->|     |  RQ1 |
195ebb7c6fdSAlex Wilson  *        |        |  |         |  |   +---------+    +-----+    |     +------+
196ebb7c6fdSAlex Wilson  *        |        |  |         |  |   |  TCPv4  |--->| TIR |--->|     |  RQ2 |
197ebb7c6fdSAlex Wilson  *        v        |  |         |  |   +---------+    +-----+    | RQT +------+
198ebb7c6fdSAlex Wilson  *   +----+----+   |  +---------+  |   |  UDPv4  |--->| TIR |--->|     |  ... |
199ebb7c6fdSAlex Wilson  *   | root rx |   |  | default |--+   +---------+    +-----+    |     |      |
200ebb7c6fdSAlex Wilson  *   | flow tb |   |  +---------+  |   |  IPv6   |--->| TIR |--->|     |      |
201ebb7c6fdSAlex Wilson  *   |    L0   |   |  | promisc |--+   +---------+    +-----+    |     |      |
202ebb7c6fdSAlex Wilson  *   +---------+   |  +---------+  ^   |  IPv4   |--->| TIR |--->|     |      |
203ebb7c6fdSAlex Wilson  *   |  bcast  |---|---------------+   +---------+    +-----+    +-----+------+
204ebb7c6fdSAlex Wilson  *   +---------+   |               ^   |  other  |-+
205ebb7c6fdSAlex Wilson  *   |  MAC 0  |---+               |   +---------+ |  +-----+    +-----+
206ebb7c6fdSAlex Wilson  *   +---------+                   |               +->| TIR |--->| RQ0 |
207ebb7c6fdSAlex Wilson  *   |  MAC 1  |-+                 |                  +-----+    +-----+
208ebb7c6fdSAlex Wilson  *   +---------+ | +---------------+
209ebb7c6fdSAlex Wilson  *   |  MAC 2  |-+ |               ^
210ebb7c6fdSAlex Wilson  *   +---------+ | |               |
211ebb7c6fdSAlex Wilson  *   |  MAC 3  |-+ |  +---------+  |   +---------+
212ebb7c6fdSAlex Wilson  *   +---------+ | |  | group 1 |  |   | group 1 |
213ebb7c6fdSAlex Wilson  *   |  .....  | +--->| vlan ft |  | +>| hash ft |
214ebb7c6fdSAlex Wilson  *   |         |   |  |   L1    |  | | |   L2    |
215ebb7c6fdSAlex Wilson  *   +---------+   |  +---------+  | | +---------+    +-----+    +-----+------+
216ebb7c6fdSAlex Wilson  *   | promisc |---+  | VLAN 0  |----+ |  TCPv6  |--->| TIR |--->|     |  RQ3 |
217ebb7c6fdSAlex Wilson  *   +---------+      +---------+  |   +---------+    +-----+    |     +------+
218ebb7c6fdSAlex Wilson  *                    |  .....  |  |   |  UDPv6  |--->| TIR |--->|     |  RQ4 |
219ebb7c6fdSAlex Wilson  *                    |         |  |   +---------+    +-----+    |     +------+
220ebb7c6fdSAlex Wilson  *                    |         |  |   |  TCPv4  |--->| TIR |--->|     |  RQ5 |
221ebb7c6fdSAlex Wilson  *                    |         |  |   +---------+    +-----+    | RQT +------+
222ebb7c6fdSAlex Wilson  *                    +---------+  |   |  UDPv4  |--->| TIR |--->|     |  ... |
223ebb7c6fdSAlex Wilson  *                    |         |  |   +---------+    +-----+    |     |      |
224ebb7c6fdSAlex Wilson  *                    +---------+  |   |  IPv6   |--->| TIR |--->|     |      |
225ebb7c6fdSAlex Wilson  *                    | promisc |--+   +---------+    +-----+    |     |      |
226ebb7c6fdSAlex Wilson  *                    +---------+      |  IPv4   |--->| TIR |--->|     |      |
227ebb7c6fdSAlex Wilson  *                                     +---------+    +-----+    +-----+------+
228ebb7c6fdSAlex Wilson  *                                     |  other  |-+
229ebb7c6fdSAlex Wilson  *                                     +---------+ |
230ebb7c6fdSAlex Wilson  *                      .......                    |  +-----+    +-----+
231ebb7c6fdSAlex Wilson  *                                                 +->| TIR |--->| RQ3 |
232ebb7c6fdSAlex Wilson  *                                                    +-----+    +-----+
233ebb7c6fdSAlex Wilson  *
234ebb7c6fdSAlex Wilson  * Note that the "promisc" flow entries are only set/enabled when promisc
235ebb7c6fdSAlex Wilson  * mode is enabled for the NIC. All promisc flow entries point directly at
236ebb7c6fdSAlex Wilson  * group 0's hashing flowtable (so all promisc-only traffic lands on group 0,
237ebb7c6fdSAlex Wilson  * the "default group" in MAC).
238ebb7c6fdSAlex Wilson  *
239ebb7c6fdSAlex Wilson  * The "default" entry in the L1 VLAN filter flow tables is used when there
240ebb7c6fdSAlex Wilson  * are no VLANs set for the group, to accept any traffic regardless of tag. It
241ebb7c6fdSAlex Wilson  * is deleted as soon as a VLAN filter is added (and re-instated if the
242ebb7c6fdSAlex Wilson  * last VLAN filter is removed).
243ebb7c6fdSAlex Wilson  *
244ebb7c6fdSAlex Wilson  * The actual descriptor ring structures for RX on Connect-X4 don't contain any
245ebb7c6fdSAlex Wilson  * space for packet data (they're a collection of scatter pointers only). TX
246ebb7c6fdSAlex Wilson  * descriptors contain some space for "inline headers" (and the card requires
247ebb7c6fdSAlex Wilson  * us to put at least the L2 Ethernet headers there for the eswitch to look at)
248ebb7c6fdSAlex Wilson  * but all the rest of the data comes from the gather pointers.
249ebb7c6fdSAlex Wilson  *
250ebb7c6fdSAlex Wilson  * When we get completions back they simply contain the ring index number of
251ebb7c6fdSAlex Wilson  * the WR (work request) which completed. So, we manage the buffers for actual
252ebb7c6fdSAlex Wilson  * packet data completely independently of the descriptors in this driver. When
253ebb7c6fdSAlex Wilson  * a WR is enqueued in a WQE (work queue entry), we stamp the packet data buffer
254ebb7c6fdSAlex Wilson  * with the WQE index that we put it at, and therefore don't have to look at
255ebb7c6fdSAlex Wilson  * the original descriptor at all when handling completions.
256ebb7c6fdSAlex Wilson  *
257ebb7c6fdSAlex Wilson  * For RX, we create sufficient packet data buffers to fill 150% of the
258ebb7c6fdSAlex Wilson  * available descriptors for each ring. These all are pre-set-up for DMA and
259ebb7c6fdSAlex Wilson  * have an mblk_t associated with them (with desballoc()).
260ebb7c6fdSAlex Wilson  *
261ebb7c6fdSAlex Wilson  * For TX we either borrow the mblk's memory and DMA bind it (if the packet is
262ebb7c6fdSAlex Wilson  * large enough), or we copy it into a pre-allocated buffer set up in the same
263ebb7c6fdSAlex Wilson  * as as for RX.
264ebb7c6fdSAlex Wilson  */
265ebb7c6fdSAlex Wilson 
266ebb7c6fdSAlex Wilson /*
267ebb7c6fdSAlex Wilson  * Buffer lifecycle: RX
268ebb7c6fdSAlex Wilson  * --------------------
269ebb7c6fdSAlex Wilson  *
270ebb7c6fdSAlex Wilson  * The lifecycle of an mlxcx_buffer_t (packet buffer) used for RX is pretty
271ebb7c6fdSAlex Wilson  * straightforward.
272ebb7c6fdSAlex Wilson  *
273ebb7c6fdSAlex Wilson  * It is created (and has all its memory allocated) at the time of starting up
274ebb7c6fdSAlex Wilson  * the RX ring it belongs to. Then it is placed on the "free" list in the
275ebb7c6fdSAlex Wilson  * mlxcx_buffer_shard_t associated with its RQ. When mlxcx_rq_refill() wants
276ebb7c6fdSAlex Wilson  * more buffers to add to the RQ, it takes one off and marks it as "on WQ"
277ebb7c6fdSAlex Wilson  * before making a WQE for it.
278ebb7c6fdSAlex Wilson  *
279ebb7c6fdSAlex Wilson  * After a completion event occurs, the packet is either discarded (and the
28019325e87SPaul Winder  * buffer_t returned to the free list), or it is readied for loaning to MAC
28119325e87SPaul Winder  * and placed on the "loaned" list in the mlxcx_buffer_shard_t.
282ebb7c6fdSAlex Wilson  *
283ebb7c6fdSAlex Wilson  * Once MAC and the rest of the system have finished with the packet, they call
28419325e87SPaul Winder  * freemsg() on its mblk, which will call mlxcx_buf_mp_return. At this point
28519325e87SPaul Winder  * the fate of the buffer_t is determined by the state of the
28619325e87SPaul Winder  * mlxcx_buffer_shard_t. When the shard is in its normal state the buffer_t
28719325e87SPaul Winder  * will be returned to the free list, potentially to be recycled and used
28819325e87SPaul Winder  * again. But if the shard is draining (E.g. after a ring stop) there will be
28919325e87SPaul Winder  * no recycling and the buffer_t is immediately destroyed.
290ebb7c6fdSAlex Wilson  *
291ebb7c6fdSAlex Wilson  * At detach/teardown time, buffers are only every destroyed from the free list.
292ebb7c6fdSAlex Wilson  *
293ebb7c6fdSAlex Wilson  *
294ebb7c6fdSAlex Wilson  *                         +
295ebb7c6fdSAlex Wilson  *                         |
296ebb7c6fdSAlex Wilson  *                         | mlxcx_buf_create
297ebb7c6fdSAlex Wilson  *                         |
298ebb7c6fdSAlex Wilson  *                         v
299ebb7c6fdSAlex Wilson  *                    +----+----+
300ebb7c6fdSAlex Wilson  *                    | created |
30119325e87SPaul Winder  *                    +----+----+                        +------+
30219325e87SPaul Winder  *                         |                             | dead |
30319325e87SPaul Winder  *                         |                             +------+
30419325e87SPaul Winder  *                         | mlxcx_buf_return                ^
30519325e87SPaul Winder  *                         |                                 |
30619325e87SPaul Winder  *                         v                                 | mlxcx_buf_destroy
30719325e87SPaul Winder  * mlxcx_buf_destroy  +----+----+          +-----------+     |
30819325e87SPaul Winder  *          +---------|  free   |<------no-| draining? |-yes-+
30919325e87SPaul Winder  *          |         +----+----+          +-----------+
31019325e87SPaul Winder  *          |              |                     ^
311ebb7c6fdSAlex Wilson  *          |              |                     |
31219325e87SPaul Winder  *          v              | mlxcx_buf_take      | mlxcx_buf_return
313ebb7c6fdSAlex Wilson  *      +---+--+           v                     |
314ebb7c6fdSAlex Wilson  *      | dead |       +---+---+                 |
315ebb7c6fdSAlex Wilson  *      +------+       | on WQ |- - - - - - - - >O
316ebb7c6fdSAlex Wilson  *                     +---+---+                 ^
317ebb7c6fdSAlex Wilson  *                         |                     |
318ebb7c6fdSAlex Wilson  *                         |                     |
319ebb7c6fdSAlex Wilson  *                         | mlxcx_buf_loan      | mlxcx_buf_mp_return
320ebb7c6fdSAlex Wilson  *                         v                     |
321ebb7c6fdSAlex Wilson  *                 +-------+--------+            |
322ebb7c6fdSAlex Wilson  *                 | on loan to MAC |----------->O
323ebb7c6fdSAlex Wilson  *                 +----------------+  freemsg()
324ebb7c6fdSAlex Wilson  *
325ebb7c6fdSAlex Wilson  */
326ebb7c6fdSAlex Wilson 
327ebb7c6fdSAlex Wilson /*
328ebb7c6fdSAlex Wilson  * Buffer lifecycle: TX
329ebb7c6fdSAlex Wilson  * --------------------
330ebb7c6fdSAlex Wilson  *
331ebb7c6fdSAlex Wilson  * mlxcx_buffer_ts used for TX are divided into two kinds: regular buffers, and
332ebb7c6fdSAlex Wilson  * "foreign" buffers.
333ebb7c6fdSAlex Wilson  *
334ebb7c6fdSAlex Wilson  * The former have their memory allocated and DMA bound by this driver, while
335ebb7c6fdSAlex Wilson  * the latter (the "foreign" buffers) are on loan from MAC. Their memory is
336ebb7c6fdSAlex Wilson  * not owned by us, though we do DMA bind it (and take responsibility for
337ebb7c6fdSAlex Wilson  * un-binding it when we're done with them).
338ebb7c6fdSAlex Wilson  *
339ebb7c6fdSAlex Wilson  * We use separate mlxcx_buf_shard_ts for foreign and local buffers on each
340ebb7c6fdSAlex Wilson  * SQ. Thus, there is a separate free list and mutex for each kind.
341ebb7c6fdSAlex Wilson  *
342ebb7c6fdSAlex Wilson  * Since a TX packet might consist of multiple mblks, we translate each mblk
343ebb7c6fdSAlex Wilson  * into exactly one buffer_t. The buffer_ts are chained together in the same
344ebb7c6fdSAlex Wilson  * order as the mblks, using the mlb_tx_chain/mlb_tx_chain_entry list_t.
345ebb7c6fdSAlex Wilson  *
346ebb7c6fdSAlex Wilson  * Each chain of TX buffers may consist of foreign or driver buffers, in any
347ebb7c6fdSAlex Wilson  * mixture.
348ebb7c6fdSAlex Wilson  *
349ebb7c6fdSAlex Wilson  * The head of a TX buffer chain has mlb_tx_head == itself, which distinguishes
350ebb7c6fdSAlex Wilson  * it from the rest of the chain buffers.
351ebb7c6fdSAlex Wilson  *
352ebb7c6fdSAlex Wilson  * TX buffer chains are always returned to the free list by
353ebb7c6fdSAlex Wilson  * mlxcx_buf_return_chain(), which takes care of walking the mlb_tx_chain and
354ebb7c6fdSAlex Wilson  * freeing all of the members.
355ebb7c6fdSAlex Wilson  *
356ebb7c6fdSAlex Wilson  * We only call freemsg() once, on the head of the TX buffer chain's original
357ebb7c6fdSAlex Wilson  * mblk. This is true whether we copied it or bound it in a foreign buffer.
358ebb7c6fdSAlex Wilson  */
359ebb7c6fdSAlex Wilson 
360ebb7c6fdSAlex Wilson /*
361ebb7c6fdSAlex Wilson  * Startup and command interface
362ebb7c6fdSAlex Wilson  * -----------------------------
363ebb7c6fdSAlex Wilson  *
364ebb7c6fdSAlex Wilson  * The command interface is the primary way in which we give control orders to
365ebb7c6fdSAlex Wilson  * the hardware (e.g. actions like "create this queue" or "delete this flow
366ebb7c6fdSAlex Wilson  * entry"). The command interface is never used to transmit or receive packets
367ebb7c6fdSAlex Wilson  * -- that takes place only on the queues that are set up through it.
368ebb7c6fdSAlex Wilson  *
369ebb7c6fdSAlex Wilson  * In mlxcx_cmd.c we implement our use of the command interface on top of a
3705f0e3176SPaul Winder  * simple taskq. As commands are submitted from the taskq they choose a
3715f0e3176SPaul Winder  * "slot", if there are no free slots then execution of the command will
3725f0e3176SPaul Winder  * be paused until one is free. The hardware permits up to 32 independent
3735f0e3176SPaul Winder  * slots for concurrent command execution.
3745f0e3176SPaul Winder  *
3755f0e3176SPaul Winder  * Before interrupts are enabled, command completion is polled, once
3765f0e3176SPaul Winder  * interrupts are up command completions become asynchronous and are
3775f0e3176SPaul Winder  * wired to EQ 0. A caveat to this is commands can not be submitted
3785f0e3176SPaul Winder  * directly from EQ 0's completion handler, and any processing resulting from
3795f0e3176SPaul Winder  * an asynchronous event which requires further use of the command interface
3805f0e3176SPaul Winder  * is posted through a taskq.
381ebb7c6fdSAlex Wilson  *
382ebb7c6fdSAlex Wilson  * The startup/attach process for this card involves a bunch of different steps
383ebb7c6fdSAlex Wilson  * which are summarised pretty well in the PRM. We have to send a number of
384ebb7c6fdSAlex Wilson  * commands which do different things to start the card up, give it some pages
385ebb7c6fdSAlex Wilson  * of our own memory for it to use, then start creating all the entities that
386ebb7c6fdSAlex Wilson  * we need to use like EQs, CQs, WQs, as well as their dependencies like PDs
387ebb7c6fdSAlex Wilson  * and TDoms.
388ebb7c6fdSAlex Wilson  */
389ebb7c6fdSAlex Wilson 
390ebb7c6fdSAlex Wilson /*
391ebb7c6fdSAlex Wilson  * UARs
392ebb7c6fdSAlex Wilson  * ----
393ebb7c6fdSAlex Wilson  *
394ebb7c6fdSAlex Wilson  * The pages of the PCI BAR other than the first few are reserved for use as
395ebb7c6fdSAlex Wilson  * "UAR" sections in this device. Each UAR section can be used as a set of
396ebb7c6fdSAlex Wilson  * doorbells for our queues.
397ebb7c6fdSAlex Wilson  *
398ebb7c6fdSAlex Wilson  * Currently we just make one single UAR for all of our queues. It doesn't
399ebb7c6fdSAlex Wilson  * seem to be a major limitation yet.
400ebb7c6fdSAlex Wilson  *
401ebb7c6fdSAlex Wilson  * When we're sending packets through an SQ, the PRM is not awful clear about
402ebb7c6fdSAlex Wilson  * exactly how we're meant to use the first 16 bytes of the Blueflame buffers
403ebb7c6fdSAlex Wilson  * (it's clear on the pattern of alternation you're expected to use between
404ebb7c6fdSAlex Wilson  * even and odd for Blueflame sends, but not for regular doorbells).
405ebb7c6fdSAlex Wilson  *
406ebb7c6fdSAlex Wilson  * Currently we don't do the even-odd alternating pattern for ordinary
407ebb7c6fdSAlex Wilson  * doorbells, and we don't use Blueflame at all. This seems to work fine, at
408ebb7c6fdSAlex Wilson  * least on Connect-X4 Lx.
409ebb7c6fdSAlex Wilson  */
410ebb7c6fdSAlex Wilson 
411ebb7c6fdSAlex Wilson /*
412ebb7c6fdSAlex Wilson  * Lock ordering
413ebb7c6fdSAlex Wilson  * -------------
414ebb7c6fdSAlex Wilson  *
415ebb7c6fdSAlex Wilson  * Interrupt side:
416ebb7c6fdSAlex Wilson  *
417ebb7c6fdSAlex Wilson  *  - mleq_mtx
4180207f820SPaul Winder  *    - mlcq_arm_mtx
4190207f820SPaul Winder  *      - mlcq_mtx
4200207f820SPaul Winder  *        - mlcq_bufbmtx
4210207f820SPaul Winder  *        - mlwq_mtx
4220207f820SPaul Winder  *          - mlbs_mtx
423ebb7c6fdSAlex Wilson  *    - mlp_mtx
424ebb7c6fdSAlex Wilson  *
425ebb7c6fdSAlex Wilson  * GLD side:
426ebb7c6fdSAlex Wilson  *
427ebb7c6fdSAlex Wilson  *  - mlp_mtx
428ebb7c6fdSAlex Wilson  *    - mlg_mtx
429ebb7c6fdSAlex Wilson  *      - mlg_*.mlft_mtx
430ebb7c6fdSAlex Wilson  *    - mlp_*.mlft_mtx
431ebb7c6fdSAlex Wilson  *    - mlwq_mtx
432ebb7c6fdSAlex Wilson  *      - mlbs_mtx
433ebb7c6fdSAlex Wilson  *      - mlcq_bufbmtx
434ebb7c6fdSAlex Wilson  *  - mleq_mtx
4350207f820SPaul Winder  *    - mlcq_arm_mtx
4360207f820SPaul Winder  *      - mlcq_mtx
437ebb7c6fdSAlex Wilson  *
438ebb7c6fdSAlex Wilson  */
439ebb7c6fdSAlex Wilson 
440ebb7c6fdSAlex Wilson #include <sys/modctl.h>
441ebb7c6fdSAlex Wilson #include <sys/conf.h>
442ebb7c6fdSAlex Wilson #include <sys/devops.h>
443ebb7c6fdSAlex Wilson #include <sys/sysmacros.h>
444ebb7c6fdSAlex Wilson #include <sys/time.h>
44583b3f06fSJason King #include <sys/pci.h>
446ebb7c6fdSAlex Wilson #include <sys/mac_provider.h>
447ebb7c6fdSAlex Wilson 
448ebb7c6fdSAlex Wilson #include <mlxcx.h>
449ebb7c6fdSAlex Wilson 
450ebb7c6fdSAlex Wilson CTASSERT((1 << MLXCX_RX_HASH_FT_SIZE_SHIFT) >= MLXCX_TIRS_PER_GROUP);
451ebb7c6fdSAlex Wilson 
452ebb7c6fdSAlex Wilson #define	MLXCX_MODULE_NAME	"mlxcx"
453ebb7c6fdSAlex Wilson /*
454ebb7c6fdSAlex Wilson  * We give this to the firmware, so it has to be in a fixed format that it
455ebb7c6fdSAlex Wilson  * understands.
456ebb7c6fdSAlex Wilson  */
457ebb7c6fdSAlex Wilson #define	MLXCX_DRIVER_VERSION	"illumos,mlxcx,1.0.0,1,000,000000"
458ebb7c6fdSAlex Wilson 
459ebb7c6fdSAlex Wilson /*
460ebb7c6fdSAlex Wilson  * Firmware may take a while to reclaim pages. Try a set number of times.
461ebb7c6fdSAlex Wilson  */
462ebb7c6fdSAlex Wilson clock_t mlxcx_reclaim_delay = 1000 * 50; /* 50 ms in us */
463ebb7c6fdSAlex Wilson uint_t mlxcx_reclaim_tries = 100; /* Wait at most 5000ms */
464ebb7c6fdSAlex Wilson 
465ebb7c6fdSAlex Wilson static void *mlxcx_softstate;
466ebb7c6fdSAlex Wilson 
467ebb7c6fdSAlex Wilson /*
468ebb7c6fdSAlex Wilson  * Fault detection thresholds.
469ebb7c6fdSAlex Wilson  */
470ebb7c6fdSAlex Wilson uint_t mlxcx_doorbell_tries = MLXCX_DOORBELL_TRIES_DFLT;
471ebb7c6fdSAlex Wilson uint_t mlxcx_stuck_intr_count = MLXCX_STUCK_INTR_COUNT_DFLT;
472ebb7c6fdSAlex Wilson 
473ebb7c6fdSAlex Wilson static void
mlxcx_load_prop_defaults(mlxcx_t * mlxp)47422d05228SPaul Winder mlxcx_load_prop_defaults(mlxcx_t *mlxp)
475ebb7c6fdSAlex Wilson {
476ebb7c6fdSAlex Wilson 	mlxcx_drv_props_t *p = &mlxp->mlx_props;
47722d05228SPaul Winder 	mlxcx_port_t *port = &mlxp->mlx_ports[0];
47822d05228SPaul Winder 
47922d05228SPaul Winder 	VERIFY((mlxp->mlx_attach & MLXCX_ATTACH_PORTS) != 0);
48022d05228SPaul Winder 	VERIFY((mlxp->mlx_attach & (MLXCX_ATTACH_CQS | MLXCX_ATTACH_WQS)) == 0);
48122d05228SPaul Winder 
48222d05228SPaul Winder 	/*
48322d05228SPaul Winder 	 * Currently we have different queue size defaults for two
48422d05228SPaul Winder 	 * categories of queues. One set for devices which support a
48522d05228SPaul Winder 	 * maximum speed of 10Gb/s, and another for those above that.
48622d05228SPaul Winder 	 */
48722d05228SPaul Winder 	if ((port->mlp_max_proto & (MLXCX_PROTO_25G | MLXCX_PROTO_40G |
48885e4aa97SDan McDonald 	    MLXCX_PROTO_50G | MLXCX_PROTO_100G)) != 0 ||
48985e4aa97SDan McDonald 	    (port->mlp_ext_max_proto & (MLXCX_EXTPROTO_25G |
49085e4aa97SDan McDonald 	    MLXCX_EXTPROTO_40G | MLXCX_EXTPROTO_50G | MLXCX_EXTPROTO_100G |
49185e4aa97SDan McDonald 	    MLXCX_EXTPROTO_200G | MLXCX_EXTPROTO_400G)) != 0) {
49222d05228SPaul Winder 		p->mldp_cq_size_shift_default = MLXCX_CQ_SIZE_SHIFT_25G;
49322d05228SPaul Winder 		p->mldp_rq_size_shift_default = MLXCX_RQ_SIZE_SHIFT_25G;
49422d05228SPaul Winder 		p->mldp_sq_size_shift_default = MLXCX_SQ_SIZE_SHIFT_25G;
49522d05228SPaul Winder 	} else if ((port->mlp_max_proto & (MLXCX_PROTO_100M | MLXCX_PROTO_1G |
49685e4aa97SDan McDonald 	    MLXCX_PROTO_10G)) != 0 ||
49785e4aa97SDan McDonald 	    (port->mlp_ext_max_proto & (MLXCX_EXTPROTO_100M |
49885e4aa97SDan McDonald 	    MLXCX_EXTPROTO_5G | MLXCX_EXTPROTO_1G | MLXCX_EXTPROTO_10G)) != 0) {
49922d05228SPaul Winder 		p->mldp_cq_size_shift_default = MLXCX_CQ_SIZE_SHIFT_DFLT;
50022d05228SPaul Winder 		p->mldp_rq_size_shift_default = MLXCX_RQ_SIZE_SHIFT_DFLT;
50122d05228SPaul Winder 		p->mldp_sq_size_shift_default = MLXCX_SQ_SIZE_SHIFT_DFLT;
50222d05228SPaul Winder 	} else {
50322d05228SPaul Winder 		mlxcx_warn(mlxp, "Encountered a port with a speed we don't "
50422d05228SPaul Winder 		    "recognize. Proto: 0x%x", port->mlp_max_proto);
50522d05228SPaul Winder 		p->mldp_cq_size_shift_default = MLXCX_CQ_SIZE_SHIFT_DFLT;
50622d05228SPaul Winder 		p->mldp_rq_size_shift_default = MLXCX_RQ_SIZE_SHIFT_DFLT;
50722d05228SPaul Winder 		p->mldp_sq_size_shift_default = MLXCX_SQ_SIZE_SHIFT_DFLT;
50822d05228SPaul Winder 	}
50922d05228SPaul Winder }
51022d05228SPaul Winder 
51122d05228SPaul Winder /*
51222d05228SPaul Winder  * Properties which may have different defaults based on hardware
51322d05228SPaul Winder  * characteristics.
51422d05228SPaul Winder  */
51522d05228SPaul Winder static void
mlxcx_load_model_props(mlxcx_t * mlxp)51622d05228SPaul Winder mlxcx_load_model_props(mlxcx_t *mlxp)
51722d05228SPaul Winder {
51822d05228SPaul Winder 	mlxcx_drv_props_t *p = &mlxp->mlx_props;
51922d05228SPaul Winder 
52022d05228SPaul Winder 	mlxcx_load_prop_defaults(mlxp);
521ebb7c6fdSAlex Wilson 
522ebb7c6fdSAlex Wilson 	p->mldp_cq_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
523ebb7c6fdSAlex Wilson 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "cq_size_shift",
52422d05228SPaul Winder 	    p->mldp_cq_size_shift_default);
525ebb7c6fdSAlex Wilson 	p->mldp_sq_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
526ebb7c6fdSAlex Wilson 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "sq_size_shift",
52722d05228SPaul Winder 	    p->mldp_sq_size_shift_default);
528ebb7c6fdSAlex Wilson 	p->mldp_rq_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
529ebb7c6fdSAlex Wilson 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "rq_size_shift",
53022d05228SPaul Winder 	    p->mldp_rq_size_shift_default);
53122d05228SPaul Winder }
53222d05228SPaul Winder 
53322d05228SPaul Winder static void
mlxcx_load_props(mlxcx_t * mlxp)53422d05228SPaul Winder mlxcx_load_props(mlxcx_t *mlxp)
53522d05228SPaul Winder {
53622d05228SPaul Winder 	mlxcx_drv_props_t *p = &mlxp->mlx_props;
537ebb7c6fdSAlex Wilson 
53822d05228SPaul Winder 	p->mldp_eq_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
53922d05228SPaul Winder 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "eq_size_shift",
54022d05228SPaul Winder 	    MLXCX_EQ_SIZE_SHIFT_DFLT);
541ebb7c6fdSAlex Wilson 	p->mldp_cqemod_period_usec = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
542ebb7c6fdSAlex Wilson 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "cqemod_period_usec",
543ebb7c6fdSAlex Wilson 	    MLXCX_CQEMOD_PERIOD_USEC_DFLT);
544ebb7c6fdSAlex Wilson 	p->mldp_cqemod_count = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
545ebb7c6fdSAlex Wilson 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "cqemod_count",
546ebb7c6fdSAlex Wilson 	    MLXCX_CQEMOD_COUNT_DFLT);
547ebb7c6fdSAlex Wilson 	p->mldp_intrmod_period_usec = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
548ebb7c6fdSAlex Wilson 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "intrmod_period_usec",
549ebb7c6fdSAlex Wilson 	    MLXCX_INTRMOD_PERIOD_USEC_DFLT);
550ebb7c6fdSAlex Wilson 
551ebb7c6fdSAlex Wilson 	p->mldp_tx_ngroups = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
552ebb7c6fdSAlex Wilson 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "tx_ngroups",
553ebb7c6fdSAlex Wilson 	    MLXCX_TX_NGROUPS_DFLT);
554ebb7c6fdSAlex Wilson 	p->mldp_tx_nrings_per_group = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
555ebb7c6fdSAlex Wilson 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "tx_nrings_per_group",
556ebb7c6fdSAlex Wilson 	    MLXCX_TX_NRINGS_PER_GROUP_DFLT);
557ebb7c6fdSAlex Wilson 
558ebb7c6fdSAlex Wilson 	p->mldp_rx_ngroups_large = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
559ebb7c6fdSAlex Wilson 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "rx_ngroups_large",
560ebb7c6fdSAlex Wilson 	    MLXCX_RX_NGROUPS_LARGE_DFLT);
561ebb7c6fdSAlex Wilson 	p->mldp_rx_ngroups_small = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
562ebb7c6fdSAlex Wilson 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "rx_ngroups_small",
563ebb7c6fdSAlex Wilson 	    MLXCX_RX_NGROUPS_SMALL_DFLT);
564ebb7c6fdSAlex Wilson 	p->mldp_rx_nrings_per_large_group = ddi_getprop(DDI_DEV_T_ANY,
565ebb7c6fdSAlex Wilson 	    mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS,
566ebb7c6fdSAlex Wilson 	    "rx_nrings_per_large_group", MLXCX_RX_NRINGS_PER_LARGE_GROUP_DFLT);
567ebb7c6fdSAlex Wilson 	p->mldp_rx_nrings_per_small_group = ddi_getprop(DDI_DEV_T_ANY,
568ebb7c6fdSAlex Wilson 	    mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS,
569ebb7c6fdSAlex Wilson 	    "rx_nrings_per_small_group", MLXCX_RX_NRINGS_PER_SMALL_GROUP_DFLT);
570ebb7c6fdSAlex Wilson 
571ebb7c6fdSAlex Wilson 	p->mldp_ftbl_root_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
572ebb7c6fdSAlex Wilson 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "ftbl_root_size_shift",
573ebb7c6fdSAlex Wilson 	    MLXCX_FTBL_ROOT_SIZE_SHIFT_DFLT);
574ebb7c6fdSAlex Wilson 
575ebb7c6fdSAlex Wilson 	p->mldp_tx_bind_threshold = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
576ebb7c6fdSAlex Wilson 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "tx_bind_threshold",
577ebb7c6fdSAlex Wilson 	    MLXCX_TX_BIND_THRESHOLD_DFLT);
578ebb7c6fdSAlex Wilson 
579ebb7c6fdSAlex Wilson 	p->mldp_ftbl_vlan_size_shift = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
580ebb7c6fdSAlex Wilson 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "ftbl_vlan_size_shift",
581ebb7c6fdSAlex Wilson 	    MLXCX_FTBL_VLAN_SIZE_SHIFT_DFLT);
582ebb7c6fdSAlex Wilson 
583ebb7c6fdSAlex Wilson 	p->mldp_eq_check_interval_sec = ddi_getprop(DDI_DEV_T_ANY,
584ebb7c6fdSAlex Wilson 	    mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS,
585ebb7c6fdSAlex Wilson 	    "eq_check_interval_sec", MLXCX_EQ_CHECK_INTERVAL_SEC_DFLT);
586ebb7c6fdSAlex Wilson 	p->mldp_cq_check_interval_sec = ddi_getprop(DDI_DEV_T_ANY,
587ebb7c6fdSAlex Wilson 	    mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS,
588ebb7c6fdSAlex Wilson 	    "cq_check_interval_sec", MLXCX_CQ_CHECK_INTERVAL_SEC_DFLT);
589ebb7c6fdSAlex Wilson 	p->mldp_wq_check_interval_sec = ddi_getprop(DDI_DEV_T_ANY,
590ebb7c6fdSAlex Wilson 	    mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS,
591ebb7c6fdSAlex Wilson 	    "wq_check_interval_sec", MLXCX_WQ_CHECK_INTERVAL_SEC_DFLT);
59222d05228SPaul Winder 
59322d05228SPaul Winder 	p->mldp_rx_per_cq = ddi_getprop(DDI_DEV_T_ANY, mlxp->mlx_dip,
59422d05228SPaul Winder 	    DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS, "rx_limit_per_completion",
59522d05228SPaul Winder 	    MLXCX_RX_PER_CQ_DEFAULT);
59622d05228SPaul Winder 
59722d05228SPaul Winder 	if (p->mldp_rx_per_cq < MLXCX_RX_PER_CQ_MIN ||
59822d05228SPaul Winder 	    p->mldp_rx_per_cq > MLXCX_RX_PER_CQ_MAX) {
59922d05228SPaul Winder 		mlxcx_warn(mlxp, "!rx_limit_per_completion = %u is "
60022d05228SPaul Winder 		    "out of range. Defaulting to: %d. Valid values are from "
60122d05228SPaul Winder 		    "%d to %d", p->mldp_rx_per_cq, MLXCX_RX_PER_CQ_DEFAULT,
60222d05228SPaul Winder 		    MLXCX_RX_PER_CQ_MIN, MLXCX_RX_PER_CQ_MAX);
60322d05228SPaul Winder 		p->mldp_rx_per_cq = MLXCX_RX_PER_CQ_DEFAULT;
60422d05228SPaul Winder 	}
605*5014e1faSAlex Wilson 
606*5014e1faSAlex Wilson 	p->mldp_rx_p50_loan_min_size = ddi_getprop(DDI_DEV_T_ANY,
607*5014e1faSAlex Wilson 	    mlxp->mlx_dip, DDI_PROP_CANSLEEP | DDI_PROP_DONTPASS,
608*5014e1faSAlex Wilson 	    "rx_p50_loan_min_size", MLXCX_P50_LOAN_MIN_SIZE_DFLT);
609ebb7c6fdSAlex Wilson }
610ebb7c6fdSAlex Wilson 
611ebb7c6fdSAlex Wilson void
mlxcx_note(mlxcx_t * mlxp,const char * fmt,...)612ebb7c6fdSAlex Wilson mlxcx_note(mlxcx_t *mlxp, const char *fmt, ...)
613ebb7c6fdSAlex Wilson {
614ebb7c6fdSAlex Wilson 	va_list ap;
615ebb7c6fdSAlex Wilson 
616ebb7c6fdSAlex Wilson 	va_start(ap, fmt);
617ebb7c6fdSAlex Wilson 	if (mlxp != NULL && mlxp->mlx_dip != NULL) {
618ebb7c6fdSAlex Wilson 		vdev_err(mlxp->mlx_dip, CE_NOTE, fmt, ap);
619ebb7c6fdSAlex Wilson 	} else {
620ebb7c6fdSAlex Wilson 		vcmn_err(CE_NOTE, fmt, ap);
621ebb7c6fdSAlex Wilson 	}
622ebb7c6fdSAlex Wilson 	va_end(ap);
623ebb7c6fdSAlex Wilson }
624ebb7c6fdSAlex Wilson 
625ebb7c6fdSAlex Wilson void
mlxcx_warn(mlxcx_t * mlxp,const char * fmt,...)626ebb7c6fdSAlex Wilson mlxcx_warn(mlxcx_t *mlxp, const char *fmt, ...)
627ebb7c6fdSAlex Wilson {
628ebb7c6fdSAlex Wilson 	va_list ap;
629ebb7c6fdSAlex Wilson 
630ebb7c6fdSAlex Wilson 	va_start(ap, fmt);
631ebb7c6fdSAlex Wilson 	if (mlxp != NULL && mlxp->mlx_dip != NULL) {
632ebb7c6fdSAlex Wilson 		vdev_err(mlxp->mlx_dip, CE_WARN, fmt, ap);
633ebb7c6fdSAlex Wilson 	} else {
634ebb7c6fdSAlex Wilson 		vcmn_err(CE_WARN, fmt, ap);
635ebb7c6fdSAlex Wilson 	}
636ebb7c6fdSAlex Wilson 	va_end(ap);
637ebb7c6fdSAlex Wilson }
638ebb7c6fdSAlex Wilson 
639ebb7c6fdSAlex Wilson void
mlxcx_panic(mlxcx_t * mlxp,const char * fmt,...)640ebb7c6fdSAlex Wilson mlxcx_panic(mlxcx_t *mlxp, const char *fmt, ...)
641ebb7c6fdSAlex Wilson {
642ebb7c6fdSAlex Wilson 	va_list ap;
643ebb7c6fdSAlex Wilson 
644ebb7c6fdSAlex Wilson 	va_start(ap, fmt);
645ebb7c6fdSAlex Wilson 	if (mlxp != NULL && mlxp->mlx_dip != NULL) {
646ebb7c6fdSAlex Wilson 		vdev_err(mlxp->mlx_dip, CE_PANIC, fmt, ap);
647ebb7c6fdSAlex Wilson 	} else {
648ebb7c6fdSAlex Wilson 		vcmn_err(CE_PANIC, fmt, ap);
649ebb7c6fdSAlex Wilson 	}
650ebb7c6fdSAlex Wilson 	va_end(ap);
651ebb7c6fdSAlex Wilson }
652ebb7c6fdSAlex Wilson 
653ebb7c6fdSAlex Wilson uint16_t
mlxcx_get16(mlxcx_t * mlxp,uintptr_t off)654ebb7c6fdSAlex Wilson mlxcx_get16(mlxcx_t *mlxp, uintptr_t off)
655ebb7c6fdSAlex Wilson {
656ebb7c6fdSAlex Wilson 	uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
657ebb7c6fdSAlex Wilson 	return (ddi_get16(mlxp->mlx_regs_handle, (void *)addr));
658ebb7c6fdSAlex Wilson }
659ebb7c6fdSAlex Wilson 
660ebb7c6fdSAlex Wilson uint32_t
mlxcx_get32(mlxcx_t * mlxp,uintptr_t off)661ebb7c6fdSAlex Wilson mlxcx_get32(mlxcx_t *mlxp, uintptr_t off)
662ebb7c6fdSAlex Wilson {
663ebb7c6fdSAlex Wilson 	uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
664ebb7c6fdSAlex Wilson 	return (ddi_get32(mlxp->mlx_regs_handle, (void *)addr));
665ebb7c6fdSAlex Wilson }
666ebb7c6fdSAlex Wilson 
667ebb7c6fdSAlex Wilson uint64_t
mlxcx_get64(mlxcx_t * mlxp,uintptr_t off)668ebb7c6fdSAlex Wilson mlxcx_get64(mlxcx_t *mlxp, uintptr_t off)
669ebb7c6fdSAlex Wilson {
670ebb7c6fdSAlex Wilson 	uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
671ebb7c6fdSAlex Wilson 	return (ddi_get64(mlxp->mlx_regs_handle, (void *)addr));
672ebb7c6fdSAlex Wilson }
673ebb7c6fdSAlex Wilson 
674ebb7c6fdSAlex Wilson void
mlxcx_put32(mlxcx_t * mlxp,uintptr_t off,uint32_t val)675ebb7c6fdSAlex Wilson mlxcx_put32(mlxcx_t *mlxp, uintptr_t off, uint32_t val)
676ebb7c6fdSAlex Wilson {
677ebb7c6fdSAlex Wilson 	uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
678ebb7c6fdSAlex Wilson 	ddi_put32(mlxp->mlx_regs_handle, (void *)addr, val);
679ebb7c6fdSAlex Wilson }
680ebb7c6fdSAlex Wilson 
681ebb7c6fdSAlex Wilson void
mlxcx_put64(mlxcx_t * mlxp,uintptr_t off,uint64_t val)682ebb7c6fdSAlex Wilson mlxcx_put64(mlxcx_t *mlxp, uintptr_t off, uint64_t val)
683ebb7c6fdSAlex Wilson {
684ebb7c6fdSAlex Wilson 	uintptr_t addr = off + (uintptr_t)mlxp->mlx_regs_base;
685ebb7c6fdSAlex Wilson 	ddi_put64(mlxp->mlx_regs_handle, (void *)addr, val);
686ebb7c6fdSAlex Wilson }
687ebb7c6fdSAlex Wilson 
688ebb7c6fdSAlex Wilson void
mlxcx_uar_put32(mlxcx_t * mlxp,mlxcx_uar_t * mlu,uintptr_t off,uint32_t val)689ebb7c6fdSAlex Wilson mlxcx_uar_put32(mlxcx_t *mlxp, mlxcx_uar_t *mlu, uintptr_t off, uint32_t val)
690ebb7c6fdSAlex Wilson {
691ebb7c6fdSAlex Wilson 	/*
692ebb7c6fdSAlex Wilson 	 * The UAR is always inside the first BAR, which we mapped as
693ebb7c6fdSAlex Wilson 	 * mlx_regs
694ebb7c6fdSAlex Wilson 	 */
695ebb7c6fdSAlex Wilson 	uintptr_t addr = off + (uintptr_t)mlu->mlu_base +
696ebb7c6fdSAlex Wilson 	    (uintptr_t)mlxp->mlx_regs_base;
697ebb7c6fdSAlex Wilson 	ddi_put32(mlxp->mlx_regs_handle, (void *)addr, val);
698ebb7c6fdSAlex Wilson }
699ebb7c6fdSAlex Wilson 
700ebb7c6fdSAlex Wilson void
mlxcx_uar_put64(mlxcx_t * mlxp,mlxcx_uar_t * mlu,uintptr_t off,uint64_t val)701ebb7c6fdSAlex Wilson mlxcx_uar_put64(mlxcx_t *mlxp, mlxcx_uar_t *mlu, uintptr_t off, uint64_t val)
702ebb7c6fdSAlex Wilson {
703ebb7c6fdSAlex Wilson 	uintptr_t addr = off + (uintptr_t)mlu->mlu_base +
704ebb7c6fdSAlex Wilson 	    (uintptr_t)mlxp->mlx_regs_base;
705ebb7c6fdSAlex Wilson 	ddi_put64(mlxp->mlx_regs_handle, (void *)addr, val);
706ebb7c6fdSAlex Wilson }
707ebb7c6fdSAlex Wilson 
708ebb7c6fdSAlex Wilson static void
mlxcx_fm_fini(mlxcx_t * mlxp)709ebb7c6fdSAlex Wilson mlxcx_fm_fini(mlxcx_t *mlxp)
710ebb7c6fdSAlex Wilson {
711ebb7c6fdSAlex Wilson 	if (mlxp->mlx_fm_caps == 0)
712ebb7c6fdSAlex Wilson 		return;
713ebb7c6fdSAlex Wilson 
714ebb7c6fdSAlex Wilson 	if (DDI_FM_ERRCB_CAP(mlxp->mlx_fm_caps))
715ebb7c6fdSAlex Wilson 		ddi_fm_handler_unregister(mlxp->mlx_dip);
716ebb7c6fdSAlex Wilson 
717ebb7c6fdSAlex Wilson 	if (DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps) ||
718ebb7c6fdSAlex Wilson 	    DDI_FM_ERRCB_CAP(mlxp->mlx_fm_caps))
719ebb7c6fdSAlex Wilson 		pci_ereport_teardown(mlxp->mlx_dip);
720ebb7c6fdSAlex Wilson 
721ebb7c6fdSAlex Wilson 	ddi_fm_fini(mlxp->mlx_dip);
722ebb7c6fdSAlex Wilson 
723ebb7c6fdSAlex Wilson 	mlxp->mlx_fm_caps = 0;
724ebb7c6fdSAlex Wilson }
725ebb7c6fdSAlex Wilson 
726ebb7c6fdSAlex Wilson void
mlxcx_fm_ereport(mlxcx_t * mlxp,const char * detail)727ebb7c6fdSAlex Wilson mlxcx_fm_ereport(mlxcx_t *mlxp, const char *detail)
728ebb7c6fdSAlex Wilson {
729ebb7c6fdSAlex Wilson 	uint64_t ena;
730ebb7c6fdSAlex Wilson 	char buf[FM_MAX_CLASS];
731ebb7c6fdSAlex Wilson 
732ebb7c6fdSAlex Wilson 	if (!DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps))
733ebb7c6fdSAlex Wilson 		return;
734ebb7c6fdSAlex Wilson 
735ebb7c6fdSAlex Wilson 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s", DDI_FM_DEVICE, detail);
736ebb7c6fdSAlex Wilson 	ena = fm_ena_generate(0, FM_ENA_FMT1);
737ebb7c6fdSAlex Wilson 	ddi_fm_ereport_post(mlxp->mlx_dip, buf, ena, DDI_NOSLEEP,
738ebb7c6fdSAlex Wilson 	    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
739ebb7c6fdSAlex Wilson 	    NULL);
740ebb7c6fdSAlex Wilson }
741ebb7c6fdSAlex Wilson 
742ebb7c6fdSAlex Wilson static int
mlxcx_fm_errcb(dev_info_t * dip,ddi_fm_error_t * err,const void * arg)743ebb7c6fdSAlex Wilson mlxcx_fm_errcb(dev_info_t *dip, ddi_fm_error_t *err, const void *arg)
744ebb7c6fdSAlex Wilson {
745ebb7c6fdSAlex Wilson 	/*
746ebb7c6fdSAlex Wilson 	 * as the driver can always deal with an error in any dma or
747ebb7c6fdSAlex Wilson 	 * access handle, we can just return the fme_status value.
748ebb7c6fdSAlex Wilson 	 */
749ebb7c6fdSAlex Wilson 	pci_ereport_post(dip, err, NULL);
750ebb7c6fdSAlex Wilson 	return (err->fme_status);
751ebb7c6fdSAlex Wilson }
752ebb7c6fdSAlex Wilson 
753ebb7c6fdSAlex Wilson static void
mlxcx_fm_init(mlxcx_t * mlxp)754ebb7c6fdSAlex Wilson mlxcx_fm_init(mlxcx_t *mlxp)
755ebb7c6fdSAlex Wilson {
756ebb7c6fdSAlex Wilson 	ddi_iblock_cookie_t iblk;
757ebb7c6fdSAlex Wilson 	int def = DDI_FM_EREPORT_CAPABLE | DDI_FM_ACCCHK_CAPABLE |
758ebb7c6fdSAlex Wilson 	    DDI_FM_DMACHK_CAPABLE | DDI_FM_ERRCB_CAPABLE;
759ebb7c6fdSAlex Wilson 
760ebb7c6fdSAlex Wilson 	mlxp->mlx_fm_caps = ddi_prop_get_int(DDI_DEV_T_ANY, mlxp->mlx_dip,
761ebb7c6fdSAlex Wilson 	    DDI_PROP_DONTPASS, "fm_capable", def);
762ebb7c6fdSAlex Wilson 
763ebb7c6fdSAlex Wilson 	if (mlxp->mlx_fm_caps < 0) {
764ebb7c6fdSAlex Wilson 		mlxp->mlx_fm_caps = 0;
765ebb7c6fdSAlex Wilson 	}
766ebb7c6fdSAlex Wilson 	mlxp->mlx_fm_caps &= def;
767ebb7c6fdSAlex Wilson 
768ebb7c6fdSAlex Wilson 	if (mlxp->mlx_fm_caps == 0)
769ebb7c6fdSAlex Wilson 		return;
770ebb7c6fdSAlex Wilson 
771ebb7c6fdSAlex Wilson 	ddi_fm_init(mlxp->mlx_dip, &mlxp->mlx_fm_caps, &iblk);
772ebb7c6fdSAlex Wilson 	if (DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps) ||
773ebb7c6fdSAlex Wilson 	    DDI_FM_ERRCB_CAP(mlxp->mlx_fm_caps)) {
774ebb7c6fdSAlex Wilson 		pci_ereport_setup(mlxp->mlx_dip);
775ebb7c6fdSAlex Wilson 	}
776ebb7c6fdSAlex Wilson 	if (DDI_FM_ERRCB_CAP(mlxp->mlx_fm_caps)) {
777ebb7c6fdSAlex Wilson 		ddi_fm_handler_register(mlxp->mlx_dip, mlxcx_fm_errcb,
778ebb7c6fdSAlex Wilson 		    (void *)mlxp);
779ebb7c6fdSAlex Wilson 	}
780ebb7c6fdSAlex Wilson }
781ebb7c6fdSAlex Wilson 
782ebb7c6fdSAlex Wilson static void
mlxcx_mlbs_teardown(mlxcx_t * mlxp,mlxcx_buf_shard_t * s)783ebb7c6fdSAlex Wilson mlxcx_mlbs_teardown(mlxcx_t *mlxp, mlxcx_buf_shard_t *s)
784ebb7c6fdSAlex Wilson {
785ebb7c6fdSAlex Wilson 	mlxcx_buffer_t *buf;
786ebb7c6fdSAlex Wilson 
787ebb7c6fdSAlex Wilson 	mutex_enter(&s->mlbs_mtx);
78819325e87SPaul Winder 
789ebb7c6fdSAlex Wilson 	while (!list_is_empty(&s->mlbs_busy))
790ebb7c6fdSAlex Wilson 		cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
79119325e87SPaul Winder 
79219325e87SPaul Winder 	while (!list_is_empty(&s->mlbs_loaned))
79319325e87SPaul Winder 		cv_wait(&s->mlbs_free_nonempty, &s->mlbs_mtx);
79419325e87SPaul Winder 
79519325e87SPaul Winder 	while ((buf = list_head(&s->mlbs_free)) != NULL)
796ebb7c6fdSAlex Wilson 		mlxcx_buf_destroy(mlxp, buf);
79719325e87SPaul Winder 
798ebb7c6fdSAlex Wilson 	list_destroy(&s->mlbs_free);
799ebb7c6fdSAlex Wilson 	list_destroy(&s->mlbs_busy);
80019325e87SPaul Winder 	list_destroy(&s->mlbs_loaned);
801ebb7c6fdSAlex Wilson 	mutex_exit(&s->mlbs_mtx);
802ebb7c6fdSAlex Wilson 
803ebb7c6fdSAlex Wilson 	cv_destroy(&s->mlbs_free_nonempty);
804ebb7c6fdSAlex Wilson 	mutex_destroy(&s->mlbs_mtx);
805ebb7c6fdSAlex Wilson }
806ebb7c6fdSAlex Wilson 
807ebb7c6fdSAlex Wilson static void
mlxcx_teardown_bufs(mlxcx_t * mlxp)808ebb7c6fdSAlex Wilson mlxcx_teardown_bufs(mlxcx_t *mlxp)
809ebb7c6fdSAlex Wilson {
810ebb7c6fdSAlex Wilson 	mlxcx_buf_shard_t *s;
811ebb7c6fdSAlex Wilson 
812ebb7c6fdSAlex Wilson 	while ((s = list_remove_head(&mlxp->mlx_buf_shards)) != NULL) {
813ebb7c6fdSAlex Wilson 		mlxcx_mlbs_teardown(mlxp, s);
814ebb7c6fdSAlex Wilson 		kmem_free(s, sizeof (mlxcx_buf_shard_t));
815ebb7c6fdSAlex Wilson 	}
816ebb7c6fdSAlex Wilson 	list_destroy(&mlxp->mlx_buf_shards);
817ebb7c6fdSAlex Wilson 
818ebb7c6fdSAlex Wilson 	kmem_cache_destroy(mlxp->mlx_bufs_cache);
819ebb7c6fdSAlex Wilson }
820ebb7c6fdSAlex Wilson 
821ebb7c6fdSAlex Wilson static void
mlxcx_teardown_pages(mlxcx_t * mlxp)822ebb7c6fdSAlex Wilson mlxcx_teardown_pages(mlxcx_t *mlxp)
823ebb7c6fdSAlex Wilson {
824ebb7c6fdSAlex Wilson 	uint_t nzeros = 0;
8255f0e3176SPaul Winder 	uint64_t *pas;
8265f0e3176SPaul Winder 
8275f0e3176SPaul Winder 	pas = kmem_alloc(sizeof (*pas) * MLXCX_MANAGE_PAGES_MAX_PAGES,
8285f0e3176SPaul Winder 	    KM_SLEEP);
829ebb7c6fdSAlex Wilson 
830ebb7c6fdSAlex Wilson 	mutex_enter(&mlxp->mlx_pagemtx);
831ebb7c6fdSAlex Wilson 
832ebb7c6fdSAlex Wilson 	while (mlxp->mlx_npages > 0) {
833ebb7c6fdSAlex Wilson 		int32_t req, ret;
834ebb7c6fdSAlex Wilson 
835ebb7c6fdSAlex Wilson 		ASSERT0(avl_is_empty(&mlxp->mlx_pages));
836ebb7c6fdSAlex Wilson 		req = MIN(mlxp->mlx_npages, MLXCX_MANAGE_PAGES_MAX_PAGES);
837ebb7c6fdSAlex Wilson 
838ebb7c6fdSAlex Wilson 		if (!mlxcx_cmd_return_pages(mlxp, req, pas, &ret)) {
839ebb7c6fdSAlex Wilson 			mlxcx_warn(mlxp, "hardware refused to return pages, "
840ebb7c6fdSAlex Wilson 			    "leaking %u remaining pages", mlxp->mlx_npages);
841ebb7c6fdSAlex Wilson 			goto out;
842ebb7c6fdSAlex Wilson 		}
843ebb7c6fdSAlex Wilson 
844ebb7c6fdSAlex Wilson 		for (int32_t i = 0; i < ret; i++) {
845ebb7c6fdSAlex Wilson 			mlxcx_dev_page_t *mdp, probe;
846ebb7c6fdSAlex Wilson 			bzero(&probe, sizeof (probe));
847ebb7c6fdSAlex Wilson 			probe.mxdp_pa = pas[i];
848ebb7c6fdSAlex Wilson 
849ebb7c6fdSAlex Wilson 			mdp = avl_find(&mlxp->mlx_pages, &probe, NULL);
850ebb7c6fdSAlex Wilson 
851ebb7c6fdSAlex Wilson 			if (mdp != NULL) {
852ebb7c6fdSAlex Wilson 				avl_remove(&mlxp->mlx_pages, mdp);
853ebb7c6fdSAlex Wilson 				mlxp->mlx_npages--;
854ebb7c6fdSAlex Wilson 				mlxcx_dma_free(&mdp->mxdp_dma);
855ebb7c6fdSAlex Wilson 				kmem_free(mdp, sizeof (mlxcx_dev_page_t));
856ebb7c6fdSAlex Wilson 			} else {
857ebb7c6fdSAlex Wilson 				mlxcx_panic(mlxp, "hardware returned a page "
858ebb7c6fdSAlex Wilson 				    "with PA 0x%" PRIx64 " but we have no "
859ebb7c6fdSAlex Wilson 				    "record of giving out such a page", pas[i]);
860ebb7c6fdSAlex Wilson 			}
861ebb7c6fdSAlex Wilson 		}
862ebb7c6fdSAlex Wilson 
863ebb7c6fdSAlex Wilson 		/*
864ebb7c6fdSAlex Wilson 		 * If no pages were returned, note that fact.
865ebb7c6fdSAlex Wilson 		 */
866ebb7c6fdSAlex Wilson 		if (ret == 0) {
867ebb7c6fdSAlex Wilson 			nzeros++;
868ebb7c6fdSAlex Wilson 			if (nzeros > mlxcx_reclaim_tries) {
869ebb7c6fdSAlex Wilson 				mlxcx_warn(mlxp, "hardware refused to return "
870ebb7c6fdSAlex Wilson 				    "pages, leaking %u remaining pages",
871ebb7c6fdSAlex Wilson 				    mlxp->mlx_npages);
872ebb7c6fdSAlex Wilson 				goto out;
873ebb7c6fdSAlex Wilson 			}
874ebb7c6fdSAlex Wilson 			delay(drv_usectohz(mlxcx_reclaim_delay));
875ebb7c6fdSAlex Wilson 		}
876ebb7c6fdSAlex Wilson 	}
877ebb7c6fdSAlex Wilson 
878ebb7c6fdSAlex Wilson 	avl_destroy(&mlxp->mlx_pages);
879ebb7c6fdSAlex Wilson 
880ebb7c6fdSAlex Wilson out:
881ebb7c6fdSAlex Wilson 	mutex_exit(&mlxp->mlx_pagemtx);
882ebb7c6fdSAlex Wilson 	mutex_destroy(&mlxp->mlx_pagemtx);
8835f0e3176SPaul Winder 
8845f0e3176SPaul Winder 	kmem_free(pas, sizeof (*pas) * MLXCX_MANAGE_PAGES_MAX_PAGES);
885ebb7c6fdSAlex Wilson }
886ebb7c6fdSAlex Wilson 
887ebb7c6fdSAlex Wilson static boolean_t
mlxcx_eq_alloc_dma(mlxcx_t * mlxp,mlxcx_event_queue_t * mleq)888ebb7c6fdSAlex Wilson mlxcx_eq_alloc_dma(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq)
889ebb7c6fdSAlex Wilson {
890ebb7c6fdSAlex Wilson 	ddi_device_acc_attr_t acc;
891ebb7c6fdSAlex Wilson 	ddi_dma_attr_t attr;
892ebb7c6fdSAlex Wilson 	boolean_t ret;
893ebb7c6fdSAlex Wilson 	size_t sz, i;
894ebb7c6fdSAlex Wilson 
895ebb7c6fdSAlex Wilson 	VERIFY0(mleq->mleq_state & MLXCX_EQ_ALLOC);
896ebb7c6fdSAlex Wilson 
897ebb7c6fdSAlex Wilson 	mleq->mleq_entshift = mlxp->mlx_props.mldp_eq_size_shift;
898ebb7c6fdSAlex Wilson 	mleq->mleq_nents = (1 << mleq->mleq_entshift);
899ebb7c6fdSAlex Wilson 	sz = mleq->mleq_nents * sizeof (mlxcx_eventq_ent_t);
900ebb7c6fdSAlex Wilson 	ASSERT3U(sz & (MLXCX_HW_PAGE_SIZE - 1), ==, 0);
901ebb7c6fdSAlex Wilson 
902ebb7c6fdSAlex Wilson 	mlxcx_dma_acc_attr(mlxp, &acc);
903ebb7c6fdSAlex Wilson 	mlxcx_dma_queue_attr(mlxp, &attr);
904ebb7c6fdSAlex Wilson 
905ebb7c6fdSAlex Wilson 	ret = mlxcx_dma_alloc(mlxp, &mleq->mleq_dma, &attr, &acc,
906ebb7c6fdSAlex Wilson 	    B_TRUE, sz, B_TRUE);
907ebb7c6fdSAlex Wilson 	if (!ret) {
908ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "failed to allocate EQ memory");
909ebb7c6fdSAlex Wilson 		return (B_FALSE);
910ebb7c6fdSAlex Wilson 	}
911ebb7c6fdSAlex Wilson 
912ebb7c6fdSAlex Wilson 	mleq->mleq_ent = (mlxcx_eventq_ent_t *)mleq->mleq_dma.mxdb_va;
913ebb7c6fdSAlex Wilson 
914ebb7c6fdSAlex Wilson 	for (i = 0; i < mleq->mleq_nents; ++i)
915ebb7c6fdSAlex Wilson 		mleq->mleq_ent[i].mleqe_owner = MLXCX_EQ_OWNER_INIT;
916ebb7c6fdSAlex Wilson 
917ebb7c6fdSAlex Wilson 	mleq->mleq_state |= MLXCX_EQ_ALLOC;
918ebb7c6fdSAlex Wilson 
919ebb7c6fdSAlex Wilson 	return (B_TRUE);
920ebb7c6fdSAlex Wilson }
921ebb7c6fdSAlex Wilson 
922ebb7c6fdSAlex Wilson static void
mlxcx_eq_rele_dma(mlxcx_t * mlxp,mlxcx_event_queue_t * mleq)923ebb7c6fdSAlex Wilson mlxcx_eq_rele_dma(mlxcx_t *mlxp, mlxcx_event_queue_t *mleq)
924ebb7c6fdSAlex Wilson {
925ebb7c6fdSAlex Wilson 	VERIFY(mleq->mleq_state & MLXCX_EQ_ALLOC);
926ebb7c6fdSAlex Wilson 	if (mleq->mleq_state & MLXCX_EQ_CREATED)
927ebb7c6fdSAlex Wilson 		VERIFY(mleq->mleq_state & MLXCX_EQ_DESTROYED);
928ebb7c6fdSAlex Wilson 
929ebb7c6fdSAlex Wilson 	mlxcx_dma_free(&mleq->mleq_dma);
930ebb7c6fdSAlex Wilson 	mleq->mleq_ent = NULL;
931ebb7c6fdSAlex Wilson 
932ebb7c6fdSAlex Wilson 	mleq->mleq_state &= ~MLXCX_EQ_ALLOC;
933ebb7c6fdSAlex Wilson }
934ebb7c6fdSAlex Wilson 
935ebb7c6fdSAlex Wilson void
mlxcx_teardown_flow_table(mlxcx_t * mlxp,mlxcx_flow_table_t * ft)936ebb7c6fdSAlex Wilson mlxcx_teardown_flow_table(mlxcx_t *mlxp, mlxcx_flow_table_t *ft)
937ebb7c6fdSAlex Wilson {
938ebb7c6fdSAlex Wilson 	mlxcx_flow_group_t *fg;
939ebb7c6fdSAlex Wilson 	mlxcx_flow_entry_t *fe;
940ebb7c6fdSAlex Wilson 	int i;
941ebb7c6fdSAlex Wilson 
942ebb7c6fdSAlex Wilson 	ASSERT(mutex_owned(&ft->mlft_mtx));
943ebb7c6fdSAlex Wilson 
944ebb7c6fdSAlex Wilson 	for (i = ft->mlft_nents - 1; i >= 0; --i) {
945ebb7c6fdSAlex Wilson 		fe = &ft->mlft_ent[i];
946ebb7c6fdSAlex Wilson 		if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) {
947ebb7c6fdSAlex Wilson 			if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) {
948ebb7c6fdSAlex Wilson 				mlxcx_panic(mlxp, "failed to delete flow "
949ebb7c6fdSAlex Wilson 				    "entry %u on table %u", i,
950ebb7c6fdSAlex Wilson 				    ft->mlft_num);
951ebb7c6fdSAlex Wilson 			}
952ebb7c6fdSAlex Wilson 		}
953ebb7c6fdSAlex Wilson 	}
954ebb7c6fdSAlex Wilson 
955ebb7c6fdSAlex Wilson 	while ((fg = list_remove_head(&ft->mlft_groups)) != NULL) {
956ebb7c6fdSAlex Wilson 		if (fg->mlfg_state & MLXCX_FLOW_GROUP_CREATED &&
957ebb7c6fdSAlex Wilson 		    !(fg->mlfg_state & MLXCX_FLOW_GROUP_DESTROYED)) {
958ebb7c6fdSAlex Wilson 			if (!mlxcx_cmd_destroy_flow_group(mlxp, fg)) {
959ebb7c6fdSAlex Wilson 				mlxcx_panic(mlxp, "failed to destroy flow "
960ebb7c6fdSAlex Wilson 				    "group %u", fg->mlfg_num);
961ebb7c6fdSAlex Wilson 			}
962ebb7c6fdSAlex Wilson 		}
963ebb7c6fdSAlex Wilson 		kmem_free(fg, sizeof (mlxcx_flow_group_t));
964ebb7c6fdSAlex Wilson 	}
965ebb7c6fdSAlex Wilson 	list_destroy(&ft->mlft_groups);
966ebb7c6fdSAlex Wilson 	if (ft->mlft_state & MLXCX_FLOW_TABLE_CREATED &&
967ebb7c6fdSAlex Wilson 	    !(ft->mlft_state & MLXCX_FLOW_TABLE_DESTROYED)) {
968ebb7c6fdSAlex Wilson 		if (!mlxcx_cmd_destroy_flow_table(mlxp, ft)) {
969ebb7c6fdSAlex Wilson 			mlxcx_panic(mlxp, "failed to destroy flow table %u",
970ebb7c6fdSAlex Wilson 			    ft->mlft_num);
971ebb7c6fdSAlex Wilson 		}
972ebb7c6fdSAlex Wilson 	}
973ebb7c6fdSAlex Wilson 	kmem_free(ft->mlft_ent, ft->mlft_entsize);
974ebb7c6fdSAlex Wilson 	ft->mlft_ent = NULL;
975ebb7c6fdSAlex Wilson 	mutex_exit(&ft->mlft_mtx);
976ebb7c6fdSAlex Wilson 	mutex_destroy(&ft->mlft_mtx);
977ebb7c6fdSAlex Wilson 	kmem_free(ft, sizeof (mlxcx_flow_table_t));
978ebb7c6fdSAlex Wilson }
979ebb7c6fdSAlex Wilson 
980ebb7c6fdSAlex Wilson static void
mlxcx_teardown_ports(mlxcx_t * mlxp)981ebb7c6fdSAlex Wilson mlxcx_teardown_ports(mlxcx_t *mlxp)
982ebb7c6fdSAlex Wilson {
983ebb7c6fdSAlex Wilson 	uint_t i;
984ebb7c6fdSAlex Wilson 	mlxcx_port_t *p;
985ebb7c6fdSAlex Wilson 	mlxcx_flow_table_t *ft;
986ebb7c6fdSAlex Wilson 
987ebb7c6fdSAlex Wilson 	for (i = 0; i < mlxp->mlx_nports; ++i) {
988ebb7c6fdSAlex Wilson 		p = &mlxp->mlx_ports[i];
989ebb7c6fdSAlex Wilson 		if (!(p->mlp_init & MLXCX_PORT_INIT))
990ebb7c6fdSAlex Wilson 			continue;
991ebb7c6fdSAlex Wilson 		mutex_enter(&p->mlp_mtx);
992ebb7c6fdSAlex Wilson 		if ((ft = p->mlp_rx_flow) != NULL) {
993ebb7c6fdSAlex Wilson 			mutex_enter(&ft->mlft_mtx);
994ebb7c6fdSAlex Wilson 			/*
995ebb7c6fdSAlex Wilson 			 * teardown_flow_table() will destroy the mutex, so
996ebb7c6fdSAlex Wilson 			 * we don't release it here.
997ebb7c6fdSAlex Wilson 			 */
998ebb7c6fdSAlex Wilson 			mlxcx_teardown_flow_table(mlxp, ft);
999ebb7c6fdSAlex Wilson 		}
1000ebb7c6fdSAlex Wilson 		mutex_exit(&p->mlp_mtx);
1001ebb7c6fdSAlex Wilson 		mutex_destroy(&p->mlp_mtx);
10025f0e3176SPaul Winder 		mutex_destroy(&p->mlx_port_event.mla_mtx);
10035f0e3176SPaul Winder 		p->mlx_port_event.mla_mlx = NULL;
10045f0e3176SPaul Winder 		p->mlx_port_event.mla_port = NULL;
1005ebb7c6fdSAlex Wilson 		p->mlp_init &= ~MLXCX_PORT_INIT;
1006ebb7c6fdSAlex Wilson 	}
1007ebb7c6fdSAlex Wilson 
1008ebb7c6fdSAlex Wilson 	kmem_free(mlxp->mlx_ports, mlxp->mlx_ports_size);
1009ebb7c6fdSAlex Wilson 	mlxp->mlx_ports = NULL;
1010ebb7c6fdSAlex Wilson }
1011ebb7c6fdSAlex Wilson 
1012ebb7c6fdSAlex Wilson static void
mlxcx_teardown_wqs(mlxcx_t * mlxp)1013ebb7c6fdSAlex Wilson mlxcx_teardown_wqs(mlxcx_t *mlxp)
1014ebb7c6fdSAlex Wilson {
1015ebb7c6fdSAlex Wilson 	mlxcx_work_queue_t *mlwq;
1016ebb7c6fdSAlex Wilson 
1017ebb7c6fdSAlex Wilson 	while ((mlwq = list_head(&mlxp->mlx_wqs)) != NULL) {
1018ebb7c6fdSAlex Wilson 		mlxcx_wq_teardown(mlxp, mlwq);
1019ebb7c6fdSAlex Wilson 	}
1020ebb7c6fdSAlex Wilson 	list_destroy(&mlxp->mlx_wqs);
1021ebb7c6fdSAlex Wilson }
1022ebb7c6fdSAlex Wilson 
1023ebb7c6fdSAlex Wilson static void
mlxcx_teardown_cqs(mlxcx_t * mlxp)1024ebb7c6fdSAlex Wilson mlxcx_teardown_cqs(mlxcx_t *mlxp)
1025ebb7c6fdSAlex Wilson {
1026ebb7c6fdSAlex Wilson 	mlxcx_completion_queue_t *mlcq;
1027ebb7c6fdSAlex Wilson 
1028ebb7c6fdSAlex Wilson 	while ((mlcq = list_head(&mlxp->mlx_cqs)) != NULL) {
1029ebb7c6fdSAlex Wilson 		mlxcx_cq_teardown(mlxp, mlcq);
1030ebb7c6fdSAlex Wilson 	}
1031ebb7c6fdSAlex Wilson 	list_destroy(&mlxp->mlx_cqs);
1032ebb7c6fdSAlex Wilson }
1033ebb7c6fdSAlex Wilson 
1034ebb7c6fdSAlex Wilson static void
mlxcx_teardown_eqs(mlxcx_t * mlxp)1035ebb7c6fdSAlex Wilson mlxcx_teardown_eqs(mlxcx_t *mlxp)
1036ebb7c6fdSAlex Wilson {
1037ebb7c6fdSAlex Wilson 	mlxcx_event_queue_t *mleq;
1038ebb7c6fdSAlex Wilson 	uint_t i;
1039ebb7c6fdSAlex Wilson 
1040ebb7c6fdSAlex Wilson 	for (i = 0; i < mlxp->mlx_intr_count; ++i) {
1041ebb7c6fdSAlex Wilson 		mleq = &mlxp->mlx_eqs[i];
1042ebb7c6fdSAlex Wilson 		mutex_enter(&mleq->mleq_mtx);
1043ebb7c6fdSAlex Wilson 		if ((mleq->mleq_state & MLXCX_EQ_CREATED) &&
1044ebb7c6fdSAlex Wilson 		    !(mleq->mleq_state & MLXCX_EQ_DESTROYED)) {
1045ebb7c6fdSAlex Wilson 			if (!mlxcx_cmd_destroy_eq(mlxp, mleq)) {
1046ebb7c6fdSAlex Wilson 				mlxcx_warn(mlxp, "failed to destroy "
1047ebb7c6fdSAlex Wilson 				    "event queue idx %u eqn %u",
1048ebb7c6fdSAlex Wilson 				    i, mleq->mleq_num);
1049ebb7c6fdSAlex Wilson 			}
1050ebb7c6fdSAlex Wilson 		}
1051ebb7c6fdSAlex Wilson 		if (mleq->mleq_state & MLXCX_EQ_ALLOC) {
1052ebb7c6fdSAlex Wilson 			mlxcx_eq_rele_dma(mlxp, mleq);
1053ebb7c6fdSAlex Wilson 		}
1054ebb7c6fdSAlex Wilson 		mutex_exit(&mleq->mleq_mtx);
1055ebb7c6fdSAlex Wilson 	}
1056ebb7c6fdSAlex Wilson }
1057ebb7c6fdSAlex Wilson 
1058ebb7c6fdSAlex Wilson static void
mlxcx_teardown_checktimers(mlxcx_t * mlxp)1059ebb7c6fdSAlex Wilson mlxcx_teardown_checktimers(mlxcx_t *mlxp)
1060ebb7c6fdSAlex Wilson {
1061ebb7c6fdSAlex Wilson 	if (mlxp->mlx_props.mldp_eq_check_interval_sec > 0)
1062ebb7c6fdSAlex Wilson 		ddi_periodic_delete(mlxp->mlx_eq_checktimer);
1063ebb7c6fdSAlex Wilson 	if (mlxp->mlx_props.mldp_cq_check_interval_sec > 0)
1064ebb7c6fdSAlex Wilson 		ddi_periodic_delete(mlxp->mlx_cq_checktimer);
1065ebb7c6fdSAlex Wilson 	if (mlxp->mlx_props.mldp_wq_check_interval_sec > 0)
1066ebb7c6fdSAlex Wilson 		ddi_periodic_delete(mlxp->mlx_wq_checktimer);
1067ebb7c6fdSAlex Wilson }
1068ebb7c6fdSAlex Wilson 
1069ebb7c6fdSAlex Wilson static void
mlxcx_teardown(mlxcx_t * mlxp)1070ebb7c6fdSAlex Wilson mlxcx_teardown(mlxcx_t *mlxp)
1071ebb7c6fdSAlex Wilson {
1072ebb7c6fdSAlex Wilson 	uint_t i;
1073ebb7c6fdSAlex Wilson 	dev_info_t *dip = mlxp->mlx_dip;
1074ebb7c6fdSAlex Wilson 
10750207f820SPaul Winder 	if (mlxp->mlx_attach & MLXCX_ATTACH_INTRS) {
10760207f820SPaul Winder 		/*
10770207f820SPaul Winder 		 * Disable interrupts and let any active vectors quiesce.
10780207f820SPaul Winder 		 */
10790207f820SPaul Winder 		mlxcx_intr_disable(mlxp);
10800207f820SPaul Winder 	}
10810207f820SPaul Winder 
10821718c316SRobert Mustacchi 	if (mlxp->mlx_attach & MLXCX_ATTACH_SENSORS) {
10831718c316SRobert Mustacchi 		mlxcx_teardown_sensors(mlxp);
10841718c316SRobert Mustacchi 		mlxp->mlx_attach &= ~MLXCX_ATTACH_SENSORS;
10851718c316SRobert Mustacchi 	}
10861718c316SRobert Mustacchi 
1087ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_CHKTIMERS) {
1088ebb7c6fdSAlex Wilson 		mlxcx_teardown_checktimers(mlxp);
1089ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_CHKTIMERS;
1090ebb7c6fdSAlex Wilson 	}
1091ebb7c6fdSAlex Wilson 
10925f0e3176SPaul Winder 	if (mlxp->mlx_attach & MLXCX_ATTACH_GROUPS) {
10935f0e3176SPaul Winder 		mlxcx_teardown_groups(mlxp);
10945f0e3176SPaul Winder 		mlxp->mlx_attach &= ~MLXCX_ATTACH_GROUPS;
10955f0e3176SPaul Winder 	}
10965f0e3176SPaul Winder 
1097ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_WQS) {
1098ebb7c6fdSAlex Wilson 		mlxcx_teardown_wqs(mlxp);
1099ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_WQS;
1100ebb7c6fdSAlex Wilson 	}
1101ebb7c6fdSAlex Wilson 
1102ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_CQS) {
1103ebb7c6fdSAlex Wilson 		mlxcx_teardown_cqs(mlxp);
1104ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_CQS;
1105ebb7c6fdSAlex Wilson 	}
1106ebb7c6fdSAlex Wilson 
1107ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_BUFS) {
1108ebb7c6fdSAlex Wilson 		mlxcx_teardown_bufs(mlxp);
1109ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_BUFS;
1110ebb7c6fdSAlex Wilson 	}
1111ebb7c6fdSAlex Wilson 
1112ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_PORTS) {
1113ebb7c6fdSAlex Wilson 		mlxcx_teardown_ports(mlxp);
1114ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_PORTS;
1115ebb7c6fdSAlex Wilson 	}
1116ebb7c6fdSAlex Wilson 
1117ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_INTRS) {
1118ebb7c6fdSAlex Wilson 		mlxcx_teardown_eqs(mlxp);
1119ebb7c6fdSAlex Wilson 		mlxcx_intr_teardown(mlxp);
1120ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_INTRS;
1121ebb7c6fdSAlex Wilson 	}
1122ebb7c6fdSAlex Wilson 
1123ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_UAR_PD_TD) {
1124ebb7c6fdSAlex Wilson 		if (mlxp->mlx_uar.mlu_allocated) {
1125ebb7c6fdSAlex Wilson 			if (!mlxcx_cmd_dealloc_uar(mlxp, &mlxp->mlx_uar)) {
1126ebb7c6fdSAlex Wilson 				mlxcx_warn(mlxp, "failed to release UAR");
1127ebb7c6fdSAlex Wilson 			}
1128ebb7c6fdSAlex Wilson 			for (i = 0; i < MLXCX_BF_PER_UAR; ++i)
1129ebb7c6fdSAlex Wilson 				mutex_destroy(&mlxp->mlx_uar.mlu_bf[i].mbf_mtx);
1130ebb7c6fdSAlex Wilson 		}
1131ebb7c6fdSAlex Wilson 		if (mlxp->mlx_pd.mlpd_allocated &&
1132ebb7c6fdSAlex Wilson 		    !mlxcx_cmd_dealloc_pd(mlxp, &mlxp->mlx_pd)) {
1133ebb7c6fdSAlex Wilson 			mlxcx_warn(mlxp, "failed to release PD");
1134ebb7c6fdSAlex Wilson 		}
1135ebb7c6fdSAlex Wilson 		if (mlxp->mlx_tdom.mltd_allocated &&
1136ebb7c6fdSAlex Wilson 		    !mlxcx_cmd_dealloc_tdom(mlxp, &mlxp->mlx_tdom)) {
1137ebb7c6fdSAlex Wilson 			mlxcx_warn(mlxp, "failed to release TDOM");
1138ebb7c6fdSAlex Wilson 		}
1139ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_UAR_PD_TD;
1140ebb7c6fdSAlex Wilson 	}
1141ebb7c6fdSAlex Wilson 
1142ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_INIT_HCA) {
1143ebb7c6fdSAlex Wilson 		if (!mlxcx_cmd_teardown_hca(mlxp)) {
1144ebb7c6fdSAlex Wilson 			mlxcx_warn(mlxp, "failed to send teardown HCA "
1145ebb7c6fdSAlex Wilson 			    "command during device detach");
1146ebb7c6fdSAlex Wilson 		}
1147ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_INIT_HCA;
1148ebb7c6fdSAlex Wilson 	}
1149ebb7c6fdSAlex Wilson 
1150ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_PAGE_LIST) {
1151ebb7c6fdSAlex Wilson 		mlxcx_teardown_pages(mlxp);
1152ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_PAGE_LIST;
1153ebb7c6fdSAlex Wilson 	}
1154ebb7c6fdSAlex Wilson 
11555f0e3176SPaul Winder 	if (mlxp->mlx_attach & MLXCX_ATTACH_ASYNC_TQ) {
11565f0e3176SPaul Winder 		for (i = 0; i <= MLXCX_FUNC_ID_MAX; i++) {
11575f0e3176SPaul Winder 			mlxp->mlx_npages_req[i].mla_mlx = NULL;
11585f0e3176SPaul Winder 			mutex_destroy(&mlxp->mlx_npages_req[i].mla_mtx);
11595f0e3176SPaul Winder 		}
11605f0e3176SPaul Winder 		taskq_destroy(mlxp->mlx_async_tq);
11615f0e3176SPaul Winder 		mlxp->mlx_async_tq = NULL;
11625f0e3176SPaul Winder 		mlxp->mlx_attach &= ~MLXCX_ATTACH_ASYNC_TQ;
11635f0e3176SPaul Winder 	}
11645f0e3176SPaul Winder 
1165ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_ENABLE_HCA) {
1166ebb7c6fdSAlex Wilson 		if (!mlxcx_cmd_disable_hca(mlxp)) {
1167ebb7c6fdSAlex Wilson 			mlxcx_warn(mlxp, "failed to send DISABLE HCA command "
1168ebb7c6fdSAlex Wilson 			    "during device detach");
1169ebb7c6fdSAlex Wilson 		}
1170ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_ENABLE_HCA;
1171ebb7c6fdSAlex Wilson 	}
1172ebb7c6fdSAlex Wilson 
1173ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_CMD) {
1174ebb7c6fdSAlex Wilson 		mlxcx_cmd_queue_fini(mlxp);
1175ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_CMD;
1176ebb7c6fdSAlex Wilson 	}
1177ebb7c6fdSAlex Wilson 
1178ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_CAPS) {
1179ebb7c6fdSAlex Wilson 		kmem_free(mlxp->mlx_caps, sizeof (mlxcx_caps_t));
1180ebb7c6fdSAlex Wilson 		mlxp->mlx_caps = NULL;
1181ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_CAPS;
1182ebb7c6fdSAlex Wilson 	}
1183ebb7c6fdSAlex Wilson 
1184ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_REGS) {
1185ebb7c6fdSAlex Wilson 		ddi_regs_map_free(&mlxp->mlx_regs_handle);
1186ebb7c6fdSAlex Wilson 		mlxp->mlx_regs_handle = NULL;
1187ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_REGS;
1188ebb7c6fdSAlex Wilson 	}
1189ebb7c6fdSAlex Wilson 
1190ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_PCI_CONFIG) {
1191ebb7c6fdSAlex Wilson 		pci_config_teardown(&mlxp->mlx_cfg_handle);
1192ebb7c6fdSAlex Wilson 		mlxp->mlx_cfg_handle = NULL;
1193ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_PCI_CONFIG;
1194ebb7c6fdSAlex Wilson 	}
1195ebb7c6fdSAlex Wilson 
1196ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_FM) {
1197ebb7c6fdSAlex Wilson 		mlxcx_fm_fini(mlxp);
1198ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_FM;
1199ebb7c6fdSAlex Wilson 	}
1200ebb7c6fdSAlex Wilson 
1201ebb7c6fdSAlex Wilson 	VERIFY3S(mlxp->mlx_attach, ==, 0);
1202ebb7c6fdSAlex Wilson 	ddi_soft_state_free(mlxcx_softstate, mlxp->mlx_inst);
1203ebb7c6fdSAlex Wilson 	ddi_set_driver_private(dip, NULL);
1204ebb7c6fdSAlex Wilson }
1205ebb7c6fdSAlex Wilson 
120683b3f06fSJason King static void
mlxcx_get_model(mlxcx_t * mlxp)120783b3f06fSJason King mlxcx_get_model(mlxcx_t *mlxp)
120883b3f06fSJason King {
120983b3f06fSJason King 	uint16_t venid;
121083b3f06fSJason King 	uint16_t devid;
121183b3f06fSJason King 
121283b3f06fSJason King 	venid = pci_config_get16(mlxp->mlx_cfg_handle, PCI_CONF_VENID);
121383b3f06fSJason King 	if (venid != MLXCX_VENDOR_ID) {
121483b3f06fSJason King 		/* Currently, all supported cards have a Mellanox vendor id. */
121583b3f06fSJason King 		mlxp->mlx_type = MLXCX_DEV_UNKNOWN;
121683b3f06fSJason King 		return;
121783b3f06fSJason King 	}
121883b3f06fSJason King 
121983b3f06fSJason King 	devid = pci_config_get16(mlxp->mlx_cfg_handle, PCI_CONF_DEVID);
122083b3f06fSJason King 	switch (devid) {
122183b3f06fSJason King 	case MLXCX_CX4_DEVID:
122283b3f06fSJason King 	case MLXCX_CX4_VF_DEVID:
122383b3f06fSJason King 	case MLXCX_CX4_LX_VF_DEVID:
122483b3f06fSJason King 		mlxp->mlx_type = MLXCX_DEV_CX4;
122583b3f06fSJason King 		break;
122683b3f06fSJason King 	case MLXCX_CX5_DEVID:
122783b3f06fSJason King 	case MLXCX_CX5_VF_DEVID:
122883b3f06fSJason King 	case MLXCX_CX5_EX_DEVID:
122983b3f06fSJason King 	case MLXCX_CX5_EX_VF_DEVID:
123083b3f06fSJason King 	case MLXCX_CX5_GEN_VF_DEVID:
123183b3f06fSJason King 		mlxp->mlx_type = MLXCX_DEV_CX5;
123283b3f06fSJason King 		break;
123383b3f06fSJason King 	case MLXCX_CX6_DEVID:
123483b3f06fSJason King 	case MLXCX_CX6_VF_DEVID:
123583b3f06fSJason King 	case MLXCX_CX6_DF_DEVID:
123683b3f06fSJason King 	case MLXCX_CX6_LX_DEVID:
123783b3f06fSJason King 		mlxp->mlx_type = MLXCX_DEV_CX6;
123883b3f06fSJason King 		break;
123983b3f06fSJason King 	default:
124083b3f06fSJason King 		mlxp->mlx_type = MLXCX_DEV_UNKNOWN;
124183b3f06fSJason King 	}
124283b3f06fSJason King }
124383b3f06fSJason King 
1244ebb7c6fdSAlex Wilson static boolean_t
mlxcx_regs_map(mlxcx_t * mlxp)1245ebb7c6fdSAlex Wilson mlxcx_regs_map(mlxcx_t *mlxp)
1246ebb7c6fdSAlex Wilson {
1247ebb7c6fdSAlex Wilson 	off_t memsize;
1248ebb7c6fdSAlex Wilson 	int ret;
1249ebb7c6fdSAlex Wilson 	ddi_device_acc_attr_t da;
1250ebb7c6fdSAlex Wilson 
1251ebb7c6fdSAlex Wilson 	if (ddi_dev_regsize(mlxp->mlx_dip, MLXCX_REG_NUMBER, &memsize) !=
1252ebb7c6fdSAlex Wilson 	    DDI_SUCCESS) {
1253ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "failed to get register set size");
1254ebb7c6fdSAlex Wilson 		return (B_FALSE);
1255ebb7c6fdSAlex Wilson 	}
1256ebb7c6fdSAlex Wilson 
1257ebb7c6fdSAlex Wilson 	/*
1258ebb7c6fdSAlex Wilson 	 * All data in the main BAR is kept in big-endian even though it's a PCI
1259ebb7c6fdSAlex Wilson 	 * device.
1260ebb7c6fdSAlex Wilson 	 */
1261ebb7c6fdSAlex Wilson 	bzero(&da, sizeof (ddi_device_acc_attr_t));
1262ebb7c6fdSAlex Wilson 	da.devacc_attr_version = DDI_DEVICE_ATTR_V0;
1263ebb7c6fdSAlex Wilson 	da.devacc_attr_endian_flags = DDI_STRUCTURE_BE_ACC;
1264ebb7c6fdSAlex Wilson 	da.devacc_attr_dataorder = DDI_STRICTORDER_ACC;
1265ebb7c6fdSAlex Wilson 	if (DDI_FM_ACC_ERR_CAP(mlxp->mlx_fm_caps)) {
1266ebb7c6fdSAlex Wilson 		da.devacc_attr_access = DDI_FLAGERR_ACC;
1267ebb7c6fdSAlex Wilson 	} else {
1268ebb7c6fdSAlex Wilson 		da.devacc_attr_access = DDI_DEFAULT_ACC;
1269ebb7c6fdSAlex Wilson 	}
1270ebb7c6fdSAlex Wilson 
1271ebb7c6fdSAlex Wilson 	ret = ddi_regs_map_setup(mlxp->mlx_dip, MLXCX_REG_NUMBER,
1272ebb7c6fdSAlex Wilson 	    &mlxp->mlx_regs_base, 0, memsize, &da, &mlxp->mlx_regs_handle);
1273ebb7c6fdSAlex Wilson 
1274ebb7c6fdSAlex Wilson 	if (ret != DDI_SUCCESS) {
1275ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "failed to map device registers: %d", ret);
1276ebb7c6fdSAlex Wilson 		return (B_FALSE);
1277ebb7c6fdSAlex Wilson 	}
1278ebb7c6fdSAlex Wilson 
1279ebb7c6fdSAlex Wilson 	return (B_TRUE);
1280ebb7c6fdSAlex Wilson }
1281ebb7c6fdSAlex Wilson 
1282ebb7c6fdSAlex Wilson static boolean_t
mlxcx_check_issi(mlxcx_t * mlxp)1283ebb7c6fdSAlex Wilson mlxcx_check_issi(mlxcx_t *mlxp)
1284ebb7c6fdSAlex Wilson {
1285ebb7c6fdSAlex Wilson 	uint32_t issi;
1286ebb7c6fdSAlex Wilson 
1287ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_query_issi(mlxp, &issi)) {
1288ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "failed to get ISSI");
1289ebb7c6fdSAlex Wilson 		return (B_FALSE);
1290ebb7c6fdSAlex Wilson 	}
1291ebb7c6fdSAlex Wilson 
1292ebb7c6fdSAlex Wilson 	if ((issi & (1 << MLXCX_CURRENT_ISSI)) == 0) {
1293ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "hardware does not support software ISSI, "
1294ebb7c6fdSAlex Wilson 		    "hw vector 0x%x, sw version %u", issi, MLXCX_CURRENT_ISSI);
1295ebb7c6fdSAlex Wilson 		return (B_FALSE);
1296ebb7c6fdSAlex Wilson 	}
1297ebb7c6fdSAlex Wilson 
1298ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_set_issi(mlxp, MLXCX_CURRENT_ISSI)) {
1299ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "failed to set ISSI to %u",
1300ebb7c6fdSAlex Wilson 		    MLXCX_CURRENT_ISSI);
1301ebb7c6fdSAlex Wilson 		return (B_FALSE);
1302ebb7c6fdSAlex Wilson 	}
1303ebb7c6fdSAlex Wilson 
1304ebb7c6fdSAlex Wilson 	return (B_TRUE);
1305ebb7c6fdSAlex Wilson }
1306ebb7c6fdSAlex Wilson 
1307ebb7c6fdSAlex Wilson boolean_t
mlxcx_give_pages(mlxcx_t * mlxp,int32_t npages,int32_t * ngiven)13085f0e3176SPaul Winder mlxcx_give_pages(mlxcx_t *mlxp, int32_t npages, int32_t *ngiven)
1309ebb7c6fdSAlex Wilson {
1310ebb7c6fdSAlex Wilson 	ddi_device_acc_attr_t acc;
1311ebb7c6fdSAlex Wilson 	ddi_dma_attr_t attr;
1312ebb7c6fdSAlex Wilson 	int32_t i;
1313ebb7c6fdSAlex Wilson 	list_t plist;
1314ebb7c6fdSAlex Wilson 	mlxcx_dev_page_t *mdp;
13155f0e3176SPaul Winder 	mlxcx_dev_page_t **pages;
1316ebb7c6fdSAlex Wilson 	const ddi_dma_cookie_t *ck;
1317ebb7c6fdSAlex Wilson 
1318ebb7c6fdSAlex Wilson 	/*
1319ebb7c6fdSAlex Wilson 	 * If there are no pages required, then we're done here.
1320ebb7c6fdSAlex Wilson 	 */
1321ebb7c6fdSAlex Wilson 	if (npages <= 0) {
13225f0e3176SPaul Winder 		*ngiven = 0;
1323ebb7c6fdSAlex Wilson 		return (B_TRUE);
1324ebb7c6fdSAlex Wilson 	}
1325ebb7c6fdSAlex Wilson 
13265f0e3176SPaul Winder 	npages = MIN(npages, MLXCX_MANAGE_PAGES_MAX_PAGES);
13275f0e3176SPaul Winder 
13285f0e3176SPaul Winder 	pages = kmem_alloc(sizeof (*pages) * npages, KM_SLEEP);
13295f0e3176SPaul Winder 
1330ebb7c6fdSAlex Wilson 	list_create(&plist, sizeof (mlxcx_dev_page_t),
1331ebb7c6fdSAlex Wilson 	    offsetof(mlxcx_dev_page_t, mxdp_list));
1332ebb7c6fdSAlex Wilson 
1333ebb7c6fdSAlex Wilson 	for (i = 0; i < npages; i++) {
1334ebb7c6fdSAlex Wilson 		mdp = kmem_zalloc(sizeof (mlxcx_dev_page_t), KM_SLEEP);
1335ebb7c6fdSAlex Wilson 		mlxcx_dma_acc_attr(mlxp, &acc);
1336ebb7c6fdSAlex Wilson 		mlxcx_dma_page_attr(mlxp, &attr);
1337ebb7c6fdSAlex Wilson 		if (!mlxcx_dma_alloc(mlxp, &mdp->mxdp_dma, &attr, &acc,
1338ebb7c6fdSAlex Wilson 		    B_TRUE, MLXCX_HW_PAGE_SIZE, B_TRUE)) {
1339ebb7c6fdSAlex Wilson 			mlxcx_warn(mlxp, "failed to allocate 4k page %u/%u", i,
1340ebb7c6fdSAlex Wilson 			    npages);
1341ebb7c6fdSAlex Wilson 			kmem_free(mdp, sizeof (mlxcx_dev_page_t));
1342ebb7c6fdSAlex Wilson 			goto cleanup_npages;
1343ebb7c6fdSAlex Wilson 		}
1344ebb7c6fdSAlex Wilson 		ck = mlxcx_dma_cookie_one(&mdp->mxdp_dma);
1345ebb7c6fdSAlex Wilson 		mdp->mxdp_pa = ck->dmac_laddress;
1346ebb7c6fdSAlex Wilson 
1347ebb7c6fdSAlex Wilson 		list_insert_tail(&plist, mdp);
1348ebb7c6fdSAlex Wilson 	}
1349ebb7c6fdSAlex Wilson 
1350ebb7c6fdSAlex Wilson 	/*
1351ebb7c6fdSAlex Wilson 	 * Now that all of the pages have been allocated, given them to hardware
1352ebb7c6fdSAlex Wilson 	 * in chunks.
1353ebb7c6fdSAlex Wilson 	 */
13545f0e3176SPaul Winder 	for (i = 0; i < npages; i++) {
13555f0e3176SPaul Winder 		pages[i] = list_remove_head(&plist);
13565f0e3176SPaul Winder 	}
1357ebb7c6fdSAlex Wilson 
13585f0e3176SPaul Winder 	if (!mlxcx_cmd_give_pages(mlxp,
13595f0e3176SPaul Winder 	    MLXCX_MANAGE_PAGES_OPMOD_GIVE_PAGES, npages, pages)) {
13605f0e3176SPaul Winder 		mlxcx_warn(mlxp, "!hardware refused our gift of %u "
13615f0e3176SPaul Winder 		    "pages!", npages);
13625f0e3176SPaul Winder 		for (i = 0; i < npages; i++) {
13635f0e3176SPaul Winder 			list_insert_tail(&plist, pages[i]);
1364ebb7c6fdSAlex Wilson 		}
13655f0e3176SPaul Winder 		goto cleanup_npages;
13665f0e3176SPaul Winder 	}
1367ebb7c6fdSAlex Wilson 
13685f0e3176SPaul Winder 	mutex_enter(&mlxp->mlx_pagemtx);
13695f0e3176SPaul Winder 	for (i = 0; i < npages; i++) {
13705f0e3176SPaul Winder 		avl_add(&mlxp->mlx_pages, pages[i]);
1371ebb7c6fdSAlex Wilson 	}
13725f0e3176SPaul Winder 	mlxp->mlx_npages += npages;
13735f0e3176SPaul Winder 	mutex_exit(&mlxp->mlx_pagemtx);
1374ebb7c6fdSAlex Wilson 
1375ebb7c6fdSAlex Wilson 	list_destroy(&plist);
13765f0e3176SPaul Winder 	kmem_free(pages, sizeof (*pages) * npages);
13775f0e3176SPaul Winder 
13785f0e3176SPaul Winder 	*ngiven = npages;
1379ebb7c6fdSAlex Wilson 
1380ebb7c6fdSAlex Wilson 	return (B_TRUE);
1381ebb7c6fdSAlex Wilson 
1382ebb7c6fdSAlex Wilson cleanup_npages:
13835f0e3176SPaul Winder 	kmem_free(pages, sizeof (*pages) * npages);
1384ebb7c6fdSAlex Wilson 	while ((mdp = list_remove_head(&plist)) != NULL) {
1385ebb7c6fdSAlex Wilson 		mlxcx_dma_free(&mdp->mxdp_dma);
1386ebb7c6fdSAlex Wilson 		kmem_free(mdp, sizeof (mlxcx_dev_page_t));
1387ebb7c6fdSAlex Wilson 	}
1388ebb7c6fdSAlex Wilson 	list_destroy(&plist);
1389ebb7c6fdSAlex Wilson 	return (B_FALSE);
1390ebb7c6fdSAlex Wilson }
1391ebb7c6fdSAlex Wilson 
1392ebb7c6fdSAlex Wilson static boolean_t
mlxcx_init_pages(mlxcx_t * mlxp,uint_t type)1393ebb7c6fdSAlex Wilson mlxcx_init_pages(mlxcx_t *mlxp, uint_t type)
1394ebb7c6fdSAlex Wilson {
13955f0e3176SPaul Winder 	int32_t npages, given;
1396ebb7c6fdSAlex Wilson 
1397ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_query_pages(mlxp, type, &npages)) {
1398ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "failed to determine boot pages");
1399ebb7c6fdSAlex Wilson 		return (B_FALSE);
1400ebb7c6fdSAlex Wilson 	}
1401ebb7c6fdSAlex Wilson 
14025f0e3176SPaul Winder 	while (npages > 0) {
14035f0e3176SPaul Winder 		if (!mlxcx_give_pages(mlxp, npages, &given))
14045f0e3176SPaul Winder 			return (B_FALSE);
14055f0e3176SPaul Winder 
14065f0e3176SPaul Winder 		npages -= given;
14075f0e3176SPaul Winder 	}
14085f0e3176SPaul Winder 
14095f0e3176SPaul Winder 	return (B_TRUE);
1410ebb7c6fdSAlex Wilson }
1411ebb7c6fdSAlex Wilson 
1412ebb7c6fdSAlex Wilson static int
mlxcx_bufs_cache_constr(void * arg,void * cookie,int kmflags)1413ebb7c6fdSAlex Wilson mlxcx_bufs_cache_constr(void *arg, void *cookie, int kmflags)
1414ebb7c6fdSAlex Wilson {
1415ebb7c6fdSAlex Wilson 	mlxcx_t *mlxp = cookie;
1416ebb7c6fdSAlex Wilson 	mlxcx_buffer_t *b = arg;
1417ebb7c6fdSAlex Wilson 
1418ebb7c6fdSAlex Wilson 	bzero(b, sizeof (mlxcx_buffer_t));
1419ebb7c6fdSAlex Wilson 	b->mlb_mlx = mlxp;
1420ebb7c6fdSAlex Wilson 	b->mlb_state = MLXCX_BUFFER_INIT;
1421ebb7c6fdSAlex Wilson 	list_create(&b->mlb_tx_chain, sizeof (mlxcx_buffer_t),
1422ebb7c6fdSAlex Wilson 	    offsetof(mlxcx_buffer_t, mlb_tx_chain_entry));
1423ebb7c6fdSAlex Wilson 
1424ebb7c6fdSAlex Wilson 	return (0);
1425ebb7c6fdSAlex Wilson }
1426ebb7c6fdSAlex Wilson 
1427ebb7c6fdSAlex Wilson static void
mlxcx_bufs_cache_destr(void * arg,void * cookie)1428ebb7c6fdSAlex Wilson mlxcx_bufs_cache_destr(void *arg, void *cookie)
1429ebb7c6fdSAlex Wilson {
1430ebb7c6fdSAlex Wilson 	mlxcx_t *mlxp = cookie;
1431ebb7c6fdSAlex Wilson 	mlxcx_buffer_t *b = arg;
1432ebb7c6fdSAlex Wilson 	VERIFY3P(b->mlb_mlx, ==, mlxp);
1433ebb7c6fdSAlex Wilson 	VERIFY(b->mlb_state == MLXCX_BUFFER_INIT);
1434ebb7c6fdSAlex Wilson 	list_destroy(&b->mlb_tx_chain);
1435ebb7c6fdSAlex Wilson }
1436ebb7c6fdSAlex Wilson 
1437ebb7c6fdSAlex Wilson mlxcx_buf_shard_t *
mlxcx_mlbs_create(mlxcx_t * mlxp)1438ebb7c6fdSAlex Wilson mlxcx_mlbs_create(mlxcx_t *mlxp)
1439ebb7c6fdSAlex Wilson {
1440ebb7c6fdSAlex Wilson 	mlxcx_buf_shard_t *s;
1441ebb7c6fdSAlex Wilson 
1442ebb7c6fdSAlex Wilson 	s = kmem_zalloc(sizeof (mlxcx_buf_shard_t), KM_SLEEP);
1443ebb7c6fdSAlex Wilson 
1444ebb7c6fdSAlex Wilson 	mutex_init(&s->mlbs_mtx, NULL, MUTEX_DRIVER,
1445ebb7c6fdSAlex Wilson 	    DDI_INTR_PRI(mlxp->mlx_intr_pri));
1446ebb7c6fdSAlex Wilson 	list_create(&s->mlbs_busy, sizeof (mlxcx_buffer_t),
1447ebb7c6fdSAlex Wilson 	    offsetof(mlxcx_buffer_t, mlb_entry));
1448ebb7c6fdSAlex Wilson 	list_create(&s->mlbs_free, sizeof (mlxcx_buffer_t),
1449ebb7c6fdSAlex Wilson 	    offsetof(mlxcx_buffer_t, mlb_entry));
145019325e87SPaul Winder 	list_create(&s->mlbs_loaned, sizeof (mlxcx_buffer_t),
145119325e87SPaul Winder 	    offsetof(mlxcx_buffer_t, mlb_entry));
1452ebb7c6fdSAlex Wilson 	cv_init(&s->mlbs_free_nonempty, NULL, CV_DRIVER, NULL);
1453ebb7c6fdSAlex Wilson 
1454ebb7c6fdSAlex Wilson 	list_insert_tail(&mlxp->mlx_buf_shards, s);
1455ebb7c6fdSAlex Wilson 
1456ebb7c6fdSAlex Wilson 	return (s);
1457ebb7c6fdSAlex Wilson }
1458ebb7c6fdSAlex Wilson 
1459ebb7c6fdSAlex Wilson static boolean_t
mlxcx_setup_bufs(mlxcx_t * mlxp)1460ebb7c6fdSAlex Wilson mlxcx_setup_bufs(mlxcx_t *mlxp)
1461ebb7c6fdSAlex Wilson {
1462ebb7c6fdSAlex Wilson 	char namebuf[KSTAT_STRLEN];
1463ebb7c6fdSAlex Wilson 
1464ebb7c6fdSAlex Wilson 	(void) snprintf(namebuf, KSTAT_STRLEN, "mlxcx%d_bufs_cache",
1465ebb7c6fdSAlex Wilson 	    ddi_get_instance(mlxp->mlx_dip));
1466ebb7c6fdSAlex Wilson 	mlxp->mlx_bufs_cache = kmem_cache_create(namebuf,
1467ebb7c6fdSAlex Wilson 	    sizeof (mlxcx_buffer_t), sizeof (uint64_t),
1468ebb7c6fdSAlex Wilson 	    mlxcx_bufs_cache_constr, mlxcx_bufs_cache_destr,
1469ebb7c6fdSAlex Wilson 	    NULL, mlxp, NULL, 0);
1470ebb7c6fdSAlex Wilson 
1471ebb7c6fdSAlex Wilson 	list_create(&mlxp->mlx_buf_shards, sizeof (mlxcx_buf_shard_t),
1472ebb7c6fdSAlex Wilson 	    offsetof(mlxcx_buf_shard_t, mlbs_entry));
1473ebb7c6fdSAlex Wilson 
1474ebb7c6fdSAlex Wilson 	return (B_TRUE);
1475ebb7c6fdSAlex Wilson }
1476ebb7c6fdSAlex Wilson 
1477ebb7c6fdSAlex Wilson static void
mlxcx_fm_qstate_ereport(mlxcx_t * mlxp,const char * qtype,uint32_t qnum,const char * state,uint8_t statenum)1478ebb7c6fdSAlex Wilson mlxcx_fm_qstate_ereport(mlxcx_t *mlxp, const char *qtype, uint32_t qnum,
1479ebb7c6fdSAlex Wilson     const char *state, uint8_t statenum)
1480ebb7c6fdSAlex Wilson {
1481ebb7c6fdSAlex Wilson 	uint64_t ena;
1482ebb7c6fdSAlex Wilson 	char buf[FM_MAX_CLASS];
1483ebb7c6fdSAlex Wilson 
1484ebb7c6fdSAlex Wilson 	if (!DDI_FM_EREPORT_CAP(mlxp->mlx_fm_caps))
1485ebb7c6fdSAlex Wilson 		return;
1486ebb7c6fdSAlex Wilson 
1487ebb7c6fdSAlex Wilson 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s",
1488ebb7c6fdSAlex Wilson 	    MLXCX_FM_SERVICE_MLXCX, "qstate.err");
1489ebb7c6fdSAlex Wilson 	ena = fm_ena_generate(0, FM_ENA_FMT1);
1490ebb7c6fdSAlex Wilson 
1491ebb7c6fdSAlex Wilson 	ddi_fm_ereport_post(mlxp->mlx_dip, buf, ena, DDI_NOSLEEP,
1492ebb7c6fdSAlex Wilson 	    FM_VERSION, DATA_TYPE_UINT8, FM_EREPORT_VERS0,
1493ebb7c6fdSAlex Wilson 	    "state", DATA_TYPE_STRING, state,
1494ebb7c6fdSAlex Wilson 	    "state_num", DATA_TYPE_UINT8, statenum,
1495ebb7c6fdSAlex Wilson 	    "qtype", DATA_TYPE_STRING, qtype,
1496ebb7c6fdSAlex Wilson 	    "qnum", DATA_TYPE_UINT32, qnum,
1497ebb7c6fdSAlex Wilson 	    NULL);
1498ebb7c6fdSAlex Wilson 	ddi_fm_service_impact(mlxp->mlx_dip, DDI_SERVICE_DEGRADED);
1499ebb7c6fdSAlex Wilson }
1500ebb7c6fdSAlex Wilson 
15015f0e3176SPaul Winder /*
15025f0e3176SPaul Winder  * The following set of routines are for monitoring the health of
15035f0e3176SPaul Winder  * event, completion and work queues. They run infrequently peeking at
15045f0e3176SPaul Winder  * the structs to catch stalls and inconsistent state.
15055f0e3176SPaul Winder  *
15065f0e3176SPaul Winder  * They peek at the structs *without* acquiring locks - we don't want
15075f0e3176SPaul Winder  * to impede flow of data. Driver start up and shutdown semantics
15085f0e3176SPaul Winder  * guarantee the structs are present and won't disappear underneath
15095f0e3176SPaul Winder  * these routines.
15105f0e3176SPaul Winder  *
15115f0e3176SPaul Winder  * As previously noted, the routines peek at active data in the structs and
15125f0e3176SPaul Winder  * they will store some values for comparison on next invocation. To
15135f0e3176SPaul Winder  * maintain integrity of the saved values, these values are only modified
15145f0e3176SPaul Winder  * within these routines.
15155f0e3176SPaul Winder  */
1516ebb7c6fdSAlex Wilson static void
mlxcx_eq_check(void * arg)1517ebb7c6fdSAlex Wilson mlxcx_eq_check(void *arg)
1518ebb7c6fdSAlex Wilson {
1519ebb7c6fdSAlex Wilson 	mlxcx_t *mlxp = (mlxcx_t *)arg;
1520ebb7c6fdSAlex Wilson 	mlxcx_event_queue_t *eq;
1521ebb7c6fdSAlex Wilson 	mlxcx_eventq_ctx_t ctx;
1522ebb7c6fdSAlex Wilson 	const char *str;
1523ebb7c6fdSAlex Wilson 
1524ebb7c6fdSAlex Wilson 	uint_t i;
1525ebb7c6fdSAlex Wilson 
1526ebb7c6fdSAlex Wilson 	for (i = 0; i < mlxp->mlx_intr_count; ++i) {
1527ebb7c6fdSAlex Wilson 		eq = &mlxp->mlx_eqs[i];
15285f0e3176SPaul Winder 
15295f0e3176SPaul Winder 		if ((eq->mleq_state & MLXCX_EQ_CREATED) == 0)
1530ebb7c6fdSAlex Wilson 			continue;
15315f0e3176SPaul Winder 
15325f0e3176SPaul Winder 		/*
15335f0e3176SPaul Winder 		 * If the event queue was successfully created in the HCA,
15345f0e3176SPaul Winder 		 * then initialization and shutdown sequences guarantee
15355f0e3176SPaul Winder 		 * the queue exists.
15365f0e3176SPaul Winder 		 */
15375f0e3176SPaul Winder 		ASSERT0(eq->mleq_state & MLXCX_EQ_DESTROYED);
15385f0e3176SPaul Winder 
15395f0e3176SPaul Winder 		if (!mlxcx_cmd_query_eq(mlxp, eq, &ctx))
1540ebb7c6fdSAlex Wilson 			continue;
1541ebb7c6fdSAlex Wilson 
1542ebb7c6fdSAlex Wilson 		str = "???";
1543ebb7c6fdSAlex Wilson 		switch (ctx.mleqc_status) {
1544ebb7c6fdSAlex Wilson 		case MLXCX_EQ_STATUS_OK:
1545ebb7c6fdSAlex Wilson 			break;
1546ebb7c6fdSAlex Wilson 		case MLXCX_EQ_STATUS_WRITE_FAILURE:
1547ebb7c6fdSAlex Wilson 			str = "WRITE_FAILURE";
1548ebb7c6fdSAlex Wilson 			break;
1549ebb7c6fdSAlex Wilson 		}
15505f0e3176SPaul Winder 
1551ebb7c6fdSAlex Wilson 		if (ctx.mleqc_status != MLXCX_EQ_STATUS_OK) {
1552ebb7c6fdSAlex Wilson 			mlxcx_fm_qstate_ereport(mlxp, "event",
1553ebb7c6fdSAlex Wilson 			    eq->mleq_num, str, ctx.mleqc_status);
1554ebb7c6fdSAlex Wilson 			mlxcx_warn(mlxp, "EQ %u is in bad status: %x (%s)",
1555ebb7c6fdSAlex Wilson 			    eq->mleq_intr_index, ctx.mleqc_status, str);
1556ebb7c6fdSAlex Wilson 		}
1557ebb7c6fdSAlex Wilson 
1558ebb7c6fdSAlex Wilson 		if (ctx.mleqc_state != MLXCX_EQ_ST_ARMED &&
1559ebb7c6fdSAlex Wilson 		    (eq->mleq_state & MLXCX_EQ_ARMED)) {
1560ebb7c6fdSAlex Wilson 			if (eq->mleq_cc == eq->mleq_check_disarm_cc &&
1561ebb7c6fdSAlex Wilson 			    ++eq->mleq_check_disarm_cnt >= 3) {
1562ebb7c6fdSAlex Wilson 				mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_STALL);
1563ebb7c6fdSAlex Wilson 				mlxcx_warn(mlxp, "EQ %u isn't armed",
1564ebb7c6fdSAlex Wilson 				    eq->mleq_intr_index);
1565ebb7c6fdSAlex Wilson 			}
1566ebb7c6fdSAlex Wilson 			eq->mleq_check_disarm_cc = eq->mleq_cc;
1567ebb7c6fdSAlex Wilson 		} else {
1568ebb7c6fdSAlex Wilson 			eq->mleq_check_disarm_cc = 0;
1569ebb7c6fdSAlex Wilson 			eq->mleq_check_disarm_cnt = 0;
1570ebb7c6fdSAlex Wilson 		}
1571ebb7c6fdSAlex Wilson 	}
1572ebb7c6fdSAlex Wilson }
1573ebb7c6fdSAlex Wilson 
1574ebb7c6fdSAlex Wilson static void
mlxcx_cq_check(void * arg)1575ebb7c6fdSAlex Wilson mlxcx_cq_check(void *arg)
1576ebb7c6fdSAlex Wilson {
1577ebb7c6fdSAlex Wilson 	mlxcx_t *mlxp = (mlxcx_t *)arg;
1578ebb7c6fdSAlex Wilson 	mlxcx_completion_queue_t *cq;
1579ebb7c6fdSAlex Wilson 	mlxcx_completionq_ctx_t ctx;
1580ebb7c6fdSAlex Wilson 	const char *str, *type;
1581ebb7c6fdSAlex Wilson 	uint_t v;
1582ebb7c6fdSAlex Wilson 
1583ebb7c6fdSAlex Wilson 	for (cq = list_head(&mlxp->mlx_cqs); cq != NULL;
1584ebb7c6fdSAlex Wilson 	    cq = list_next(&mlxp->mlx_cqs, cq)) {
15855f0e3176SPaul Winder 
15865f0e3176SPaul Winder 		if ((cq->mlcq_state & MLXCX_CQ_CREATED) == 0)
1587ebb7c6fdSAlex Wilson 			continue;
15885f0e3176SPaul Winder 
15895f0e3176SPaul Winder 		/*
15905f0e3176SPaul Winder 		 * If the completion queue was successfully created in the HCA,
15915f0e3176SPaul Winder 		 * then initialization and shutdown sequences guarantee
15925f0e3176SPaul Winder 		 * the queue exists.
15935f0e3176SPaul Winder 		 */
15945f0e3176SPaul Winder 		ASSERT0(cq->mlcq_state & MLXCX_CQ_DESTROYED);
15955f0e3176SPaul Winder 		ASSERT0(cq->mlcq_state & MLXCX_CQ_TEARDOWN);
15965f0e3176SPaul Winder 
15975f0e3176SPaul Winder 		if (cq->mlcq_fm_repd_qstate)
1598ebb7c6fdSAlex Wilson 			continue;
15995f0e3176SPaul Winder 
16005f0e3176SPaul Winder 		if (!mlxcx_cmd_query_cq(mlxp, cq, &ctx))
1601ebb7c6fdSAlex Wilson 			continue;
16025f0e3176SPaul Winder 
1603ebb7c6fdSAlex Wilson 		if (cq->mlcq_wq != NULL) {
1604ebb7c6fdSAlex Wilson 			mlxcx_work_queue_t *wq = cq->mlcq_wq;
1605ebb7c6fdSAlex Wilson 			if (wq->mlwq_type == MLXCX_WQ_TYPE_RECVQ)
1606ebb7c6fdSAlex Wilson 				type = "rx ";
1607ebb7c6fdSAlex Wilson 			else if (wq->mlwq_type == MLXCX_WQ_TYPE_SENDQ)
1608ebb7c6fdSAlex Wilson 				type = "tx ";
1609ebb7c6fdSAlex Wilson 			else
1610ebb7c6fdSAlex Wilson 				type = "";
1611ebb7c6fdSAlex Wilson 		} else {
1612ebb7c6fdSAlex Wilson 			type = "";
1613ebb7c6fdSAlex Wilson 		}
1614ebb7c6fdSAlex Wilson 
1615ebb7c6fdSAlex Wilson 		str = "???";
1616ebb7c6fdSAlex Wilson 		v = get_bits32(ctx.mlcqc_flags, MLXCX_CQ_CTX_STATUS);
1617ebb7c6fdSAlex Wilson 		switch (v) {
1618ebb7c6fdSAlex Wilson 		case MLXCX_CQC_STATUS_OK:
1619ebb7c6fdSAlex Wilson 			break;
1620ebb7c6fdSAlex Wilson 		case MLXCX_CQC_STATUS_OVERFLOW:
1621ebb7c6fdSAlex Wilson 			str = "OVERFLOW";
1622ebb7c6fdSAlex Wilson 			break;
1623ebb7c6fdSAlex Wilson 		case MLXCX_CQC_STATUS_WRITE_FAIL:
1624ebb7c6fdSAlex Wilson 			str = "WRITE_FAIL";
1625ebb7c6fdSAlex Wilson 			break;
1626ebb7c6fdSAlex Wilson 		case MLXCX_CQC_STATUS_INVALID:
1627ebb7c6fdSAlex Wilson 			str = "INVALID";
1628ebb7c6fdSAlex Wilson 			break;
1629ebb7c6fdSAlex Wilson 		}
16305f0e3176SPaul Winder 
1631ebb7c6fdSAlex Wilson 		if (v != MLXCX_CQC_STATUS_OK) {
1632ebb7c6fdSAlex Wilson 			mlxcx_fm_qstate_ereport(mlxp, "completion",
1633ebb7c6fdSAlex Wilson 			    cq->mlcq_num, str, v);
1634ebb7c6fdSAlex Wilson 			mlxcx_warn(mlxp, "%sCQ 0x%x is in bad status: %x (%s)",
1635ebb7c6fdSAlex Wilson 			    type, cq->mlcq_num, v, str);
1636ebb7c6fdSAlex Wilson 			cq->mlcq_fm_repd_qstate = B_TRUE;
1637ebb7c6fdSAlex Wilson 		}
1638ebb7c6fdSAlex Wilson 
1639ebb7c6fdSAlex Wilson 		v = get_bits32(ctx.mlcqc_flags, MLXCX_CQ_CTX_STATE);
1640ebb7c6fdSAlex Wilson 		if (v != MLXCX_CQC_STATE_ARMED &&
1641ebb7c6fdSAlex Wilson 		    (cq->mlcq_state & MLXCX_CQ_ARMED) &&
1642ebb7c6fdSAlex Wilson 		    !(cq->mlcq_state & MLXCX_CQ_POLLING)) {
1643ebb7c6fdSAlex Wilson 			if (cq->mlcq_cc == cq->mlcq_check_disarm_cc &&
1644ebb7c6fdSAlex Wilson 			    ++cq->mlcq_check_disarm_cnt >= 3) {
1645ebb7c6fdSAlex Wilson 				mlxcx_fm_ereport(mlxp, DDI_FM_DEVICE_STALL);
1646ebb7c6fdSAlex Wilson 				mlxcx_warn(mlxp, "%sCQ 0x%x (%p) isn't armed",
1647ebb7c6fdSAlex Wilson 				    type, cq->mlcq_num, cq);
1648ebb7c6fdSAlex Wilson 			}
1649ebb7c6fdSAlex Wilson 			cq->mlcq_check_disarm_cc = cq->mlcq_cc;
1650ebb7c6fdSAlex Wilson 		} else {
1651ebb7c6fdSAlex Wilson 			cq->mlcq_check_disarm_cnt = 0;
1652ebb7c6fdSAlex Wilson 			cq->mlcq_check_disarm_cc = 0;
1653ebb7c6fdSAlex Wilson 		}
1654ebb7c6fdSAlex Wilson 	}
1655ebb7c6fdSAlex Wilson }
1656ebb7c6fdSAlex Wilson 
1657ebb7c6fdSAlex Wilson void
mlxcx_check_sq(mlxcx_t * mlxp,mlxcx_work_queue_t * sq)1658ebb7c6fdSAlex Wilson mlxcx_check_sq(mlxcx_t *mlxp, mlxcx_work_queue_t *sq)
1659ebb7c6fdSAlex Wilson {
1660ebb7c6fdSAlex Wilson 	mlxcx_sq_ctx_t ctx;
1661ebb7c6fdSAlex Wilson 	mlxcx_sq_state_t state;
1662ebb7c6fdSAlex Wilson 
1663ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_query_sq(mlxp, sq, &ctx))
1664ebb7c6fdSAlex Wilson 		return;
1665ebb7c6fdSAlex Wilson 
1666ebb7c6fdSAlex Wilson 	ASSERT3U(from_be24(ctx.mlsqc_cqn), ==, sq->mlwq_cq->mlcq_num);
1667ebb7c6fdSAlex Wilson 	state = get_bits32(ctx.mlsqc_flags, MLXCX_SQ_STATE);
1668ebb7c6fdSAlex Wilson 	switch (state) {
1669ebb7c6fdSAlex Wilson 	case MLXCX_SQ_STATE_RST:
1670ebb7c6fdSAlex Wilson 		if (sq->mlwq_state & MLXCX_WQ_STARTED) {
1671ebb7c6fdSAlex Wilson 			mlxcx_fm_qstate_ereport(mlxp, "send",
1672ebb7c6fdSAlex Wilson 			    sq->mlwq_num, "RST", state);
1673ebb7c6fdSAlex Wilson 			sq->mlwq_fm_repd_qstate = B_TRUE;
1674ebb7c6fdSAlex Wilson 		}
1675ebb7c6fdSAlex Wilson 		break;
1676ebb7c6fdSAlex Wilson 	case MLXCX_SQ_STATE_RDY:
1677ebb7c6fdSAlex Wilson 		if (!(sq->mlwq_state & MLXCX_WQ_STARTED)) {
1678ebb7c6fdSAlex Wilson 			mlxcx_fm_qstate_ereport(mlxp, "send",
1679ebb7c6fdSAlex Wilson 			    sq->mlwq_num, "RDY", state);
1680ebb7c6fdSAlex Wilson 			sq->mlwq_fm_repd_qstate = B_TRUE;
1681ebb7c6fdSAlex Wilson 		}
1682ebb7c6fdSAlex Wilson 		break;
1683ebb7c6fdSAlex Wilson 	case MLXCX_SQ_STATE_ERR:
1684ebb7c6fdSAlex Wilson 		mlxcx_fm_qstate_ereport(mlxp, "send",
1685ebb7c6fdSAlex Wilson 		    sq->mlwq_num, "ERR", state);
1686ebb7c6fdSAlex Wilson 		sq->mlwq_fm_repd_qstate = B_TRUE;
1687ebb7c6fdSAlex Wilson 		break;
1688ebb7c6fdSAlex Wilson 	default:
1689ebb7c6fdSAlex Wilson 		mlxcx_fm_qstate_ereport(mlxp, "send",
1690ebb7c6fdSAlex Wilson 		    sq->mlwq_num, "???", state);
1691ebb7c6fdSAlex Wilson 		sq->mlwq_fm_repd_qstate = B_TRUE;
1692ebb7c6fdSAlex Wilson 		break;
1693ebb7c6fdSAlex Wilson 	}
1694ebb7c6fdSAlex Wilson }
1695ebb7c6fdSAlex Wilson 
1696ebb7c6fdSAlex Wilson void
mlxcx_check_rq(mlxcx_t * mlxp,mlxcx_work_queue_t * rq)1697ebb7c6fdSAlex Wilson mlxcx_check_rq(mlxcx_t *mlxp, mlxcx_work_queue_t *rq)
1698ebb7c6fdSAlex Wilson {
1699ebb7c6fdSAlex Wilson 	mlxcx_rq_ctx_t ctx;
1700ebb7c6fdSAlex Wilson 	mlxcx_rq_state_t state;
1701ebb7c6fdSAlex Wilson 
1702ebb7c6fdSAlex Wilson 
1703ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_query_rq(mlxp, rq, &ctx))
1704ebb7c6fdSAlex Wilson 		return;
1705ebb7c6fdSAlex Wilson 
1706ebb7c6fdSAlex Wilson 	ASSERT3U(from_be24(ctx.mlrqc_cqn), ==, rq->mlwq_cq->mlcq_num);
1707ebb7c6fdSAlex Wilson 	state = get_bits32(ctx.mlrqc_flags, MLXCX_RQ_STATE);
1708ebb7c6fdSAlex Wilson 	switch (state) {
1709ebb7c6fdSAlex Wilson 	case MLXCX_RQ_STATE_RST:
1710ebb7c6fdSAlex Wilson 		if (rq->mlwq_state & MLXCX_WQ_STARTED) {
1711ebb7c6fdSAlex Wilson 			mlxcx_fm_qstate_ereport(mlxp, "receive",
1712ebb7c6fdSAlex Wilson 			    rq->mlwq_num, "RST", state);
1713ebb7c6fdSAlex Wilson 			rq->mlwq_fm_repd_qstate = B_TRUE;
1714ebb7c6fdSAlex Wilson 		}
1715ebb7c6fdSAlex Wilson 		break;
1716ebb7c6fdSAlex Wilson 	case MLXCX_RQ_STATE_RDY:
1717ebb7c6fdSAlex Wilson 		if (!(rq->mlwq_state & MLXCX_WQ_STARTED)) {
1718ebb7c6fdSAlex Wilson 			mlxcx_fm_qstate_ereport(mlxp, "receive",
1719ebb7c6fdSAlex Wilson 			    rq->mlwq_num, "RDY", state);
1720ebb7c6fdSAlex Wilson 			rq->mlwq_fm_repd_qstate = B_TRUE;
1721ebb7c6fdSAlex Wilson 		}
1722ebb7c6fdSAlex Wilson 		break;
1723ebb7c6fdSAlex Wilson 	case MLXCX_RQ_STATE_ERR:
1724ebb7c6fdSAlex Wilson 		mlxcx_fm_qstate_ereport(mlxp, "receive",
1725ebb7c6fdSAlex Wilson 		    rq->mlwq_num, "ERR", state);
1726ebb7c6fdSAlex Wilson 		rq->mlwq_fm_repd_qstate = B_TRUE;
1727ebb7c6fdSAlex Wilson 		break;
1728ebb7c6fdSAlex Wilson 	default:
1729ebb7c6fdSAlex Wilson 		mlxcx_fm_qstate_ereport(mlxp, "receive",
1730ebb7c6fdSAlex Wilson 		    rq->mlwq_num, "???", state);
1731ebb7c6fdSAlex Wilson 		rq->mlwq_fm_repd_qstate = B_TRUE;
1732ebb7c6fdSAlex Wilson 		break;
1733ebb7c6fdSAlex Wilson 	}
1734ebb7c6fdSAlex Wilson }
1735ebb7c6fdSAlex Wilson 
1736ebb7c6fdSAlex Wilson static void
mlxcx_wq_check(void * arg)1737ebb7c6fdSAlex Wilson mlxcx_wq_check(void *arg)
1738ebb7c6fdSAlex Wilson {
1739ebb7c6fdSAlex Wilson 	mlxcx_t *mlxp = (mlxcx_t *)arg;
1740ebb7c6fdSAlex Wilson 	mlxcx_work_queue_t *wq;
1741ebb7c6fdSAlex Wilson 
1742ebb7c6fdSAlex Wilson 	for (wq = list_head(&mlxp->mlx_wqs); wq != NULL;
1743ebb7c6fdSAlex Wilson 	    wq = list_next(&mlxp->mlx_wqs, wq)) {
17445f0e3176SPaul Winder 
17455f0e3176SPaul Winder 		if ((wq->mlwq_state & MLXCX_WQ_CREATED) == 0)
1746ebb7c6fdSAlex Wilson 			continue;
17475f0e3176SPaul Winder 
17485f0e3176SPaul Winder 		/*
17495f0e3176SPaul Winder 		 * If the work queue was successfully created in the HCA,
17505f0e3176SPaul Winder 		 * then initialization and shutdown sequences guarantee
17515f0e3176SPaul Winder 		 * the queue exists.
17525f0e3176SPaul Winder 		 */
17535f0e3176SPaul Winder 		ASSERT0(wq->mlwq_state & MLXCX_WQ_DESTROYED);
17545f0e3176SPaul Winder 		ASSERT0(wq->mlwq_state & MLXCX_WQ_TEARDOWN);
17555f0e3176SPaul Winder 
17565f0e3176SPaul Winder 		if (wq->mlwq_fm_repd_qstate)
1757ebb7c6fdSAlex Wilson 			continue;
17585f0e3176SPaul Winder 
1759ebb7c6fdSAlex Wilson 		switch (wq->mlwq_type) {
1760ebb7c6fdSAlex Wilson 		case MLXCX_WQ_TYPE_SENDQ:
1761ebb7c6fdSAlex Wilson 			mlxcx_check_sq(mlxp, wq);
1762ebb7c6fdSAlex Wilson 			break;
1763ebb7c6fdSAlex Wilson 		case MLXCX_WQ_TYPE_RECVQ:
1764ebb7c6fdSAlex Wilson 			mlxcx_check_rq(mlxp, wq);
1765ebb7c6fdSAlex Wilson 			break;
1766ebb7c6fdSAlex Wilson 		}
1767ebb7c6fdSAlex Wilson 	}
1768ebb7c6fdSAlex Wilson }
1769ebb7c6fdSAlex Wilson 
1770ebb7c6fdSAlex Wilson static boolean_t
mlxcx_setup_checktimers(mlxcx_t * mlxp)1771ebb7c6fdSAlex Wilson mlxcx_setup_checktimers(mlxcx_t *mlxp)
1772ebb7c6fdSAlex Wilson {
1773ebb7c6fdSAlex Wilson 	if (mlxp->mlx_props.mldp_eq_check_interval_sec > 0) {
1774ebb7c6fdSAlex Wilson 		mlxp->mlx_eq_checktimer = ddi_periodic_add(mlxcx_eq_check, mlxp,
1775ebb7c6fdSAlex Wilson 		    mlxp->mlx_props.mldp_eq_check_interval_sec * NANOSEC,
1776ebb7c6fdSAlex Wilson 		    DDI_IPL_0);
1777ebb7c6fdSAlex Wilson 	}
1778ebb7c6fdSAlex Wilson 	if (mlxp->mlx_props.mldp_cq_check_interval_sec > 0) {
1779ebb7c6fdSAlex Wilson 		mlxp->mlx_cq_checktimer = ddi_periodic_add(mlxcx_cq_check, mlxp,
1780ebb7c6fdSAlex Wilson 		    mlxp->mlx_props.mldp_cq_check_interval_sec * NANOSEC,
1781ebb7c6fdSAlex Wilson 		    DDI_IPL_0);
1782ebb7c6fdSAlex Wilson 	}
1783ebb7c6fdSAlex Wilson 	if (mlxp->mlx_props.mldp_wq_check_interval_sec > 0) {
1784ebb7c6fdSAlex Wilson 		mlxp->mlx_wq_checktimer = ddi_periodic_add(mlxcx_wq_check, mlxp,
1785ebb7c6fdSAlex Wilson 		    mlxp->mlx_props.mldp_wq_check_interval_sec * NANOSEC,
1786ebb7c6fdSAlex Wilson 		    DDI_IPL_0);
1787ebb7c6fdSAlex Wilson 	}
1788ebb7c6fdSAlex Wilson 	return (B_TRUE);
1789ebb7c6fdSAlex Wilson }
1790ebb7c6fdSAlex Wilson 
1791ebb7c6fdSAlex Wilson int
mlxcx_dmac_fe_compare(const void * arg0,const void * arg1)1792ebb7c6fdSAlex Wilson mlxcx_dmac_fe_compare(const void *arg0, const void *arg1)
1793ebb7c6fdSAlex Wilson {
1794ebb7c6fdSAlex Wilson 	const mlxcx_flow_entry_t *left = arg0;
1795ebb7c6fdSAlex Wilson 	const mlxcx_flow_entry_t *right = arg1;
1796ebb7c6fdSAlex Wilson 	int bcmpr;
1797ebb7c6fdSAlex Wilson 
1798ebb7c6fdSAlex Wilson 	bcmpr = memcmp(left->mlfe_dmac, right->mlfe_dmac,
1799ebb7c6fdSAlex Wilson 	    sizeof (left->mlfe_dmac));
1800ebb7c6fdSAlex Wilson 	if (bcmpr < 0)
1801ebb7c6fdSAlex Wilson 		return (-1);
1802ebb7c6fdSAlex Wilson 	if (bcmpr > 0)
1803ebb7c6fdSAlex Wilson 		return (1);
1804ebb7c6fdSAlex Wilson 	if (left->mlfe_vid < right->mlfe_vid)
1805ebb7c6fdSAlex Wilson 		return (-1);
1806ebb7c6fdSAlex Wilson 	if (left->mlfe_vid > right->mlfe_vid)
1807ebb7c6fdSAlex Wilson 		return (1);
1808ebb7c6fdSAlex Wilson 	return (0);
1809ebb7c6fdSAlex Wilson }
1810ebb7c6fdSAlex Wilson 
1811ebb7c6fdSAlex Wilson int
mlxcx_grmac_compare(const void * arg0,const void * arg1)1812ebb7c6fdSAlex Wilson mlxcx_grmac_compare(const void *arg0, const void *arg1)
1813ebb7c6fdSAlex Wilson {
1814ebb7c6fdSAlex Wilson 	const mlxcx_group_mac_t *left = arg0;
1815ebb7c6fdSAlex Wilson 	const mlxcx_group_mac_t *right = arg1;
1816ebb7c6fdSAlex Wilson 	int bcmpr;
1817ebb7c6fdSAlex Wilson 
1818ebb7c6fdSAlex Wilson 	bcmpr = memcmp(left->mlgm_mac, right->mlgm_mac,
1819ebb7c6fdSAlex Wilson 	    sizeof (left->mlgm_mac));
1820ebb7c6fdSAlex Wilson 	if (bcmpr < 0)
1821ebb7c6fdSAlex Wilson 		return (-1);
1822ebb7c6fdSAlex Wilson 	if (bcmpr > 0)
1823ebb7c6fdSAlex Wilson 		return (1);
1824ebb7c6fdSAlex Wilson 	return (0);
1825ebb7c6fdSAlex Wilson }
1826ebb7c6fdSAlex Wilson 
1827ebb7c6fdSAlex Wilson int
mlxcx_page_compare(const void * arg0,const void * arg1)1828ebb7c6fdSAlex Wilson mlxcx_page_compare(const void *arg0, const void *arg1)
1829ebb7c6fdSAlex Wilson {
1830ebb7c6fdSAlex Wilson 	const mlxcx_dev_page_t *p0 = arg0;
1831ebb7c6fdSAlex Wilson 	const mlxcx_dev_page_t *p1 = arg1;
1832ebb7c6fdSAlex Wilson 
1833ebb7c6fdSAlex Wilson 	if (p0->mxdp_pa < p1->mxdp_pa)
1834ebb7c6fdSAlex Wilson 		return (-1);
1835ebb7c6fdSAlex Wilson 	if (p0->mxdp_pa > p1->mxdp_pa)
1836ebb7c6fdSAlex Wilson 		return (1);
1837ebb7c6fdSAlex Wilson 	return (0);
1838ebb7c6fdSAlex Wilson }
1839ebb7c6fdSAlex Wilson 
1840ebb7c6fdSAlex Wilson static boolean_t
mlxcx_setup_ports(mlxcx_t * mlxp)1841ebb7c6fdSAlex Wilson mlxcx_setup_ports(mlxcx_t *mlxp)
1842ebb7c6fdSAlex Wilson {
1843ebb7c6fdSAlex Wilson 	uint_t i, j;
1844ebb7c6fdSAlex Wilson 	mlxcx_port_t *p;
1845ebb7c6fdSAlex Wilson 	mlxcx_flow_table_t *ft;
1846ebb7c6fdSAlex Wilson 	mlxcx_flow_group_t *fg;
1847ebb7c6fdSAlex Wilson 	mlxcx_flow_entry_t *fe;
1848ebb7c6fdSAlex Wilson 
1849ebb7c6fdSAlex Wilson 	VERIFY3U(mlxp->mlx_nports, >, 0);
1850ebb7c6fdSAlex Wilson 	mlxp->mlx_ports_size = mlxp->mlx_nports * sizeof (mlxcx_port_t);
1851ebb7c6fdSAlex Wilson 	mlxp->mlx_ports = kmem_zalloc(mlxp->mlx_ports_size, KM_SLEEP);
1852ebb7c6fdSAlex Wilson 
1853ebb7c6fdSAlex Wilson 	for (i = 0; i < mlxp->mlx_nports; ++i) {
1854ebb7c6fdSAlex Wilson 		p = &mlxp->mlx_ports[i];
1855ebb7c6fdSAlex Wilson 		p->mlp_num = i;
18565f0e3176SPaul Winder 		p->mlx_port_event.mla_mlx = mlxp;
18575f0e3176SPaul Winder 		p->mlx_port_event.mla_port = p;
18585f0e3176SPaul Winder 		mutex_init(&p->mlx_port_event.mla_mtx, NULL,
1859e1447ca9SPaul Winder 		    MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_async_intr_pri));
1860ebb7c6fdSAlex Wilson 		p->mlp_init |= MLXCX_PORT_INIT;
1861ebb7c6fdSAlex Wilson 		mutex_init(&p->mlp_mtx, NULL, MUTEX_DRIVER,
1862ebb7c6fdSAlex Wilson 		    DDI_INTR_PRI(mlxp->mlx_intr_pri));
1863ebb7c6fdSAlex Wilson 		mutex_enter(&p->mlp_mtx);
1864ebb7c6fdSAlex Wilson 		if (!mlxcx_cmd_query_nic_vport_ctx(mlxp, p)) {
1865ebb7c6fdSAlex Wilson 			mutex_exit(&p->mlp_mtx);
1866ebb7c6fdSAlex Wilson 			goto err;
1867ebb7c6fdSAlex Wilson 		}
1868ebb7c6fdSAlex Wilson 		if (!mlxcx_cmd_query_port_mtu(mlxp, p)) {
1869ebb7c6fdSAlex Wilson 			mutex_exit(&p->mlp_mtx);
1870ebb7c6fdSAlex Wilson 			goto err;
1871ebb7c6fdSAlex Wilson 		}
1872ebb7c6fdSAlex Wilson 		if (!mlxcx_cmd_query_port_status(mlxp, p)) {
1873ebb7c6fdSAlex Wilson 			mutex_exit(&p->mlp_mtx);
1874ebb7c6fdSAlex Wilson 			goto err;
1875ebb7c6fdSAlex Wilson 		}
1876ebb7c6fdSAlex Wilson 		if (!mlxcx_cmd_query_port_speed(mlxp, p)) {
1877ebb7c6fdSAlex Wilson 			mutex_exit(&p->mlp_mtx);
1878ebb7c6fdSAlex Wilson 			goto err;
1879ebb7c6fdSAlex Wilson 		}
1880ebb7c6fdSAlex Wilson 		if (!mlxcx_cmd_modify_nic_vport_ctx(mlxp, p,
1881ebb7c6fdSAlex Wilson 		    MLXCX_MODIFY_NIC_VPORT_CTX_PROMISC)) {
1882ebb7c6fdSAlex Wilson 			mutex_exit(&p->mlp_mtx);
1883ebb7c6fdSAlex Wilson 			goto err;
1884ebb7c6fdSAlex Wilson 		}
1885d77e6e0fSPaul Winder 		if (!mlxcx_cmd_query_port_fec(mlxp, p)) {
1886d77e6e0fSPaul Winder 			mutex_exit(&p->mlp_mtx);
1887d77e6e0fSPaul Winder 			goto err;
1888d77e6e0fSPaul Winder 		}
1889d77e6e0fSPaul Winder 		p->mlp_fec_requested = LINK_FEC_AUTO;
1890ebb7c6fdSAlex Wilson 
1891ebb7c6fdSAlex Wilson 		mutex_exit(&p->mlp_mtx);
1892ebb7c6fdSAlex Wilson 	}
1893ebb7c6fdSAlex Wilson 
1894ebb7c6fdSAlex Wilson 	for (i = 0; i < mlxp->mlx_nports; ++i) {
1895ebb7c6fdSAlex Wilson 		p = &mlxp->mlx_ports[i];
1896ebb7c6fdSAlex Wilson 		mutex_enter(&p->mlp_mtx);
1897ebb7c6fdSAlex Wilson 		p->mlp_rx_flow = (ft = kmem_zalloc(sizeof (mlxcx_flow_table_t),
1898ebb7c6fdSAlex Wilson 		    KM_SLEEP));
1899ebb7c6fdSAlex Wilson 		mutex_init(&ft->mlft_mtx, NULL, MUTEX_DRIVER,
1900ebb7c6fdSAlex Wilson 		    DDI_INTR_PRI(mlxp->mlx_intr_pri));
1901ebb7c6fdSAlex Wilson 
1902ebb7c6fdSAlex Wilson 		mutex_enter(&ft->mlft_mtx);
1903ebb7c6fdSAlex Wilson 
1904ebb7c6fdSAlex Wilson 		ft->mlft_type = MLXCX_FLOW_TABLE_NIC_RX;
1905ebb7c6fdSAlex Wilson 		ft->mlft_port = p;
1906ebb7c6fdSAlex Wilson 		ft->mlft_entshift = mlxp->mlx_props.mldp_ftbl_root_size_shift;
1907ebb7c6fdSAlex Wilson 		if (ft->mlft_entshift > mlxp->mlx_caps->mlc_max_rx_ft_shift)
1908ebb7c6fdSAlex Wilson 			ft->mlft_entshift = mlxp->mlx_caps->mlc_max_rx_ft_shift;
1909ebb7c6fdSAlex Wilson 		ft->mlft_nents = (1 << ft->mlft_entshift);
1910ebb7c6fdSAlex Wilson 		ft->mlft_entsize = ft->mlft_nents * sizeof (mlxcx_flow_entry_t);
1911ebb7c6fdSAlex Wilson 		ft->mlft_ent = kmem_zalloc(ft->mlft_entsize, KM_SLEEP);
1912ebb7c6fdSAlex Wilson 		list_create(&ft->mlft_groups, sizeof (mlxcx_flow_group_t),
1913ebb7c6fdSAlex Wilson 		    offsetof(mlxcx_flow_group_t, mlfg_entry));
1914ebb7c6fdSAlex Wilson 
1915ebb7c6fdSAlex Wilson 		for (j = 0; j < ft->mlft_nents; ++j) {
1916ebb7c6fdSAlex Wilson 			ft->mlft_ent[j].mlfe_table = ft;
1917ebb7c6fdSAlex Wilson 			ft->mlft_ent[j].mlfe_index = j;
1918ebb7c6fdSAlex Wilson 		}
1919ebb7c6fdSAlex Wilson 
1920ebb7c6fdSAlex Wilson 		if (!mlxcx_cmd_create_flow_table(mlxp, ft)) {
1921ebb7c6fdSAlex Wilson 			mutex_exit(&ft->mlft_mtx);
1922ebb7c6fdSAlex Wilson 			mutex_exit(&p->mlp_mtx);
1923ebb7c6fdSAlex Wilson 			goto err;
1924ebb7c6fdSAlex Wilson 		}
1925ebb7c6fdSAlex Wilson 
1926ebb7c6fdSAlex Wilson 		if (!mlxcx_cmd_set_flow_table_root(mlxp, ft)) {
1927ebb7c6fdSAlex Wilson 			mutex_exit(&ft->mlft_mtx);
1928ebb7c6fdSAlex Wilson 			mutex_exit(&p->mlp_mtx);
1929ebb7c6fdSAlex Wilson 			goto err;
1930ebb7c6fdSAlex Wilson 		}
1931ebb7c6fdSAlex Wilson 
1932ebb7c6fdSAlex Wilson 		/*
1933ebb7c6fdSAlex Wilson 		 * We match broadcast at the top of the root flow table, then
1934ebb7c6fdSAlex Wilson 		 * all multicast/unicast MACs, then the promisc entry is down
1935ebb7c6fdSAlex Wilson 		 * the very bottom.
1936ebb7c6fdSAlex Wilson 		 *
1937ebb7c6fdSAlex Wilson 		 * This way when promisc is on, that entry simply catches any
1938ebb7c6fdSAlex Wilson 		 * remaining traffic that earlier flows haven't matched.
1939ebb7c6fdSAlex Wilson 		 */
1940ebb7c6fdSAlex Wilson 		fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
1941ebb7c6fdSAlex Wilson 		list_insert_tail(&ft->mlft_groups, fg);
1942ebb7c6fdSAlex Wilson 		fg->mlfg_table = ft;
1943ebb7c6fdSAlex Wilson 		fg->mlfg_size = 1;
1944ebb7c6fdSAlex Wilson 		fg->mlfg_mask |= MLXCX_FLOW_MATCH_DMAC;
1945ebb7c6fdSAlex Wilson 		if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
1946ebb7c6fdSAlex Wilson 			mutex_exit(&ft->mlft_mtx);
1947ebb7c6fdSAlex Wilson 			mutex_exit(&p->mlp_mtx);
1948ebb7c6fdSAlex Wilson 			goto err;
1949ebb7c6fdSAlex Wilson 		}
1950ebb7c6fdSAlex Wilson 		p->mlp_bcast = fg;
1951ebb7c6fdSAlex Wilson 		fe = list_head(&fg->mlfg_entries);
1952ebb7c6fdSAlex Wilson 		fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD;
1953ebb7c6fdSAlex Wilson 		(void) memset(fe->mlfe_dmac, 0xff, sizeof (fe->mlfe_dmac));
1954ebb7c6fdSAlex Wilson 		fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY;
1955ebb7c6fdSAlex Wilson 
1956ebb7c6fdSAlex Wilson 		fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
1957ebb7c6fdSAlex Wilson 		list_insert_tail(&ft->mlft_groups, fg);
1958ebb7c6fdSAlex Wilson 		fg->mlfg_table = ft;
1959ebb7c6fdSAlex Wilson 		fg->mlfg_size = ft->mlft_nents - 2;
1960ebb7c6fdSAlex Wilson 		fg->mlfg_mask |= MLXCX_FLOW_MATCH_DMAC;
1961ebb7c6fdSAlex Wilson 		if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
1962ebb7c6fdSAlex Wilson 			mutex_exit(&ft->mlft_mtx);
1963ebb7c6fdSAlex Wilson 			mutex_exit(&p->mlp_mtx);
1964ebb7c6fdSAlex Wilson 			goto err;
1965ebb7c6fdSAlex Wilson 		}
1966ebb7c6fdSAlex Wilson 		p->mlp_umcast = fg;
1967ebb7c6fdSAlex Wilson 
1968ebb7c6fdSAlex Wilson 		fg = kmem_zalloc(sizeof (mlxcx_flow_group_t), KM_SLEEP);
1969ebb7c6fdSAlex Wilson 		list_insert_tail(&ft->mlft_groups, fg);
1970ebb7c6fdSAlex Wilson 		fg->mlfg_table = ft;
1971ebb7c6fdSAlex Wilson 		fg->mlfg_size = 1;
1972ebb7c6fdSAlex Wilson 		if (!mlxcx_setup_flow_group(mlxp, ft, fg)) {
1973ebb7c6fdSAlex Wilson 			mutex_exit(&ft->mlft_mtx);
1974ebb7c6fdSAlex Wilson 			mutex_exit(&p->mlp_mtx);
1975ebb7c6fdSAlex Wilson 			goto err;
1976ebb7c6fdSAlex Wilson 		}
1977ebb7c6fdSAlex Wilson 		p->mlp_promisc = fg;
1978ebb7c6fdSAlex Wilson 		fe = list_head(&fg->mlfg_entries);
1979ebb7c6fdSAlex Wilson 		fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD;
1980ebb7c6fdSAlex Wilson 		fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY;
1981ebb7c6fdSAlex Wilson 
1982ebb7c6fdSAlex Wilson 		avl_create(&p->mlp_dmac_fe, mlxcx_dmac_fe_compare,
1983ebb7c6fdSAlex Wilson 		    sizeof (mlxcx_flow_entry_t), offsetof(mlxcx_flow_entry_t,
1984ebb7c6fdSAlex Wilson 		    mlfe_dmac_entry));
1985ebb7c6fdSAlex Wilson 
1986ebb7c6fdSAlex Wilson 		mutex_exit(&ft->mlft_mtx);
1987ebb7c6fdSAlex Wilson 		mutex_exit(&p->mlp_mtx);
1988ebb7c6fdSAlex Wilson 	}
1989ebb7c6fdSAlex Wilson 
1990ebb7c6fdSAlex Wilson 	return (B_TRUE);
1991ebb7c6fdSAlex Wilson 
1992ebb7c6fdSAlex Wilson err:
1993ebb7c6fdSAlex Wilson 	mlxcx_teardown_ports(mlxp);
1994ebb7c6fdSAlex Wilson 	return (B_FALSE);
1995ebb7c6fdSAlex Wilson }
1996ebb7c6fdSAlex Wilson 
1997ebb7c6fdSAlex Wilson void
mlxcx_remove_all_vlan_entries(mlxcx_t * mlxp,mlxcx_ring_group_t * g)1998ebb7c6fdSAlex Wilson mlxcx_remove_all_vlan_entries(mlxcx_t *mlxp, mlxcx_ring_group_t *g)
1999ebb7c6fdSAlex Wilson {
2000ebb7c6fdSAlex Wilson 	mlxcx_flow_table_t *ft = g->mlg_rx_vlan_ft;
2001ebb7c6fdSAlex Wilson 	mlxcx_flow_group_t *fg = g->mlg_rx_vlan_fg;
2002ebb7c6fdSAlex Wilson 	mlxcx_flow_group_t *dfg = g->mlg_rx_vlan_def_fg;
2003ebb7c6fdSAlex Wilson 	mlxcx_flow_entry_t *fe;
2004ebb7c6fdSAlex Wilson 	mlxcx_group_vlan_t *v;
2005ebb7c6fdSAlex Wilson 
2006ebb7c6fdSAlex Wilson 	ASSERT(mutex_owned(&g->mlg_mtx));
2007ebb7c6fdSAlex Wilson 
2008ebb7c6fdSAlex Wilson 	mutex_enter(&ft->mlft_mtx);
2009ebb7c6fdSAlex Wilson 
2010ebb7c6fdSAlex Wilson 	if (!list_is_empty(&g->mlg_rx_vlans)) {
2011ebb7c6fdSAlex Wilson 		fe = list_head(&dfg->mlfg_entries);
2012ebb7c6fdSAlex Wilson 		(void) mlxcx_cmd_set_flow_table_entry(mlxp, fe);
2013ebb7c6fdSAlex Wilson 	}
2014ebb7c6fdSAlex Wilson 
2015ebb7c6fdSAlex Wilson 	while ((v = list_remove_head(&g->mlg_rx_vlans)) != NULL) {
2016ebb7c6fdSAlex Wilson 		fe = v->mlgv_fe;
2017ebb7c6fdSAlex Wilson 		ASSERT3P(fe->mlfe_table, ==, ft);
2018ebb7c6fdSAlex Wilson 		ASSERT3P(fe->mlfe_group, ==, fg);
2019ebb7c6fdSAlex Wilson 		kmem_free(v, sizeof (mlxcx_group_vlan_t));
2020ebb7c6fdSAlex Wilson 
2021ebb7c6fdSAlex Wilson 		(void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe);
2022ebb7c6fdSAlex Wilson 		fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED;
2023ebb7c6fdSAlex Wilson 	}
2024ebb7c6fdSAlex Wilson 
2025ebb7c6fdSAlex Wilson 	mutex_exit(&ft->mlft_mtx);
2026ebb7c6fdSAlex Wilson }
2027ebb7c6fdSAlex Wilson 
2028ebb7c6fdSAlex Wilson boolean_t
mlxcx_remove_vlan_entry(mlxcx_t * mlxp,mlxcx_ring_group_t * g,boolean_t tagged,uint16_t vid)2029ebb7c6fdSAlex Wilson mlxcx_remove_vlan_entry(mlxcx_t *mlxp, mlxcx_ring_group_t *g,
2030ebb7c6fdSAlex Wilson     boolean_t tagged, uint16_t vid)
2031ebb7c6fdSAlex Wilson {
2032ebb7c6fdSAlex Wilson 	mlxcx_flow_table_t *ft = g->mlg_rx_vlan_ft;
2033ebb7c6fdSAlex Wilson 	mlxcx_flow_group_t *fg = g->mlg_rx_vlan_fg;
2034ebb7c6fdSAlex Wilson 	mlxcx_flow_group_t *dfg = g->mlg_rx_vlan_def_fg;
2035ebb7c6fdSAlex Wilson 	mlxcx_flow_entry_t *fe;
2036ebb7c6fdSAlex Wilson 	mlxcx_group_vlan_t *v;
2037ebb7c6fdSAlex Wilson 	boolean_t found = B_FALSE;
2038ebb7c6fdSAlex Wilson 
2039ebb7c6fdSAlex Wilson 	ASSERT(mutex_owned(&g->mlg_mtx));
2040ebb7c6fdSAlex Wilson 
2041ebb7c6fdSAlex Wilson 	mutex_enter(&ft->mlft_mtx);
2042ebb7c6fdSAlex Wilson 
2043ebb7c6fdSAlex Wilson 	for (v = list_head(&g->mlg_rx_vlans); v != NULL;
2044ebb7c6fdSAlex Wilson 	    v = list_next(&g->mlg_rx_vlans, v)) {
2045ebb7c6fdSAlex Wilson 		if (v->mlgv_tagged == tagged && v->mlgv_vid == vid) {
2046ebb7c6fdSAlex Wilson 			found = B_TRUE;
2047ebb7c6fdSAlex Wilson 			break;
2048ebb7c6fdSAlex Wilson 		}
2049ebb7c6fdSAlex Wilson 	}
2050ebb7c6fdSAlex Wilson 	if (!found) {
2051ebb7c6fdSAlex Wilson 		mutex_exit(&ft->mlft_mtx);
2052ebb7c6fdSAlex Wilson 		return (B_FALSE);
2053ebb7c6fdSAlex Wilson 	}
2054ebb7c6fdSAlex Wilson 
2055ebb7c6fdSAlex Wilson 	list_remove(&g->mlg_rx_vlans, v);
2056ebb7c6fdSAlex Wilson 
2057ebb7c6fdSAlex Wilson 	/*
2058ebb7c6fdSAlex Wilson 	 * If this is the last VLAN entry, we have to go back to accepting
2059ebb7c6fdSAlex Wilson 	 * any VLAN (which means re-enabling the default entry).
2060ebb7c6fdSAlex Wilson 	 *
2061ebb7c6fdSAlex Wilson 	 * Do this before we remove the flow entry for the last specific
2062ebb7c6fdSAlex Wilson 	 * VLAN so that we don't lose any traffic in the transition.
2063ebb7c6fdSAlex Wilson 	 */
2064ebb7c6fdSAlex Wilson 	if (list_is_empty(&g->mlg_rx_vlans)) {
2065ebb7c6fdSAlex Wilson 		fe = list_head(&dfg->mlfg_entries);
2066ebb7c6fdSAlex Wilson 		if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
2067ebb7c6fdSAlex Wilson 			list_insert_tail(&g->mlg_rx_vlans, v);
2068ebb7c6fdSAlex Wilson 			mutex_exit(&ft->mlft_mtx);
2069ebb7c6fdSAlex Wilson 			return (B_FALSE);
2070ebb7c6fdSAlex Wilson 		}
2071ebb7c6fdSAlex Wilson 	}
2072ebb7c6fdSAlex Wilson 
2073ebb7c6fdSAlex Wilson 	fe = v->mlgv_fe;
2074ebb7c6fdSAlex Wilson 	ASSERT(fe->mlfe_state & MLXCX_FLOW_ENTRY_RESERVED);
2075ebb7c6fdSAlex Wilson 	ASSERT(fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED);
2076ebb7c6fdSAlex Wilson 	ASSERT3P(fe->mlfe_table, ==, ft);
2077ebb7c6fdSAlex Wilson 	ASSERT3P(fe->mlfe_group, ==, fg);
2078ebb7c6fdSAlex Wilson 
2079ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_delete_flow_table_entry(mlxp, fe)) {
2080ebb7c6fdSAlex Wilson 		list_insert_tail(&g->mlg_rx_vlans, v);
2081ebb7c6fdSAlex Wilson 		fe = list_head(&dfg->mlfg_entries);
2082ebb7c6fdSAlex Wilson 		if (fe->mlfe_state & MLXCX_FLOW_ENTRY_CREATED) {
2083ebb7c6fdSAlex Wilson 			(void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe);
2084ebb7c6fdSAlex Wilson 		}
2085ebb7c6fdSAlex Wilson 		mutex_exit(&ft->mlft_mtx);
2086ebb7c6fdSAlex Wilson 		return (B_FALSE);
2087ebb7c6fdSAlex Wilson 	}
2088ebb7c6fdSAlex Wilson 
2089ebb7c6fdSAlex Wilson 	fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED;
2090ebb7c6fdSAlex Wilson 
2091ebb7c6fdSAlex Wilson 	kmem_free(v, sizeof (mlxcx_group_vlan_t));
2092ebb7c6fdSAlex Wilson 
2093ebb7c6fdSAlex Wilson 	mutex_exit(&ft->mlft_mtx);
2094ebb7c6fdSAlex Wilson 	return (B_TRUE);
2095ebb7c6fdSAlex Wilson }
2096ebb7c6fdSAlex Wilson 
2097ebb7c6fdSAlex Wilson boolean_t
mlxcx_add_vlan_entry(mlxcx_t * mlxp,mlxcx_ring_group_t * g,boolean_t tagged,uint16_t vid)2098ebb7c6fdSAlex Wilson mlxcx_add_vlan_entry(mlxcx_t *mlxp, mlxcx_ring_group_t *g, boolean_t tagged,
2099ebb7c6fdSAlex Wilson     uint16_t vid)
2100ebb7c6fdSAlex Wilson {
2101ebb7c6fdSAlex Wilson 	mlxcx_flow_table_t *ft = g->mlg_rx_vlan_ft;
2102ebb7c6fdSAlex Wilson 	mlxcx_flow_group_t *fg = g->mlg_rx_vlan_fg;
2103ebb7c6fdSAlex Wilson 	mlxcx_flow_group_t *dfg = g->mlg_rx_vlan_def_fg;
2104ebb7c6fdSAlex Wilson 	mlxcx_flow_entry_t *fe;
2105ebb7c6fdSAlex Wilson 	mlxcx_group_vlan_t *v;
2106ebb7c6fdSAlex Wilson 	boolean_t found = B_FALSE;
2107ebb7c6fdSAlex Wilson 	boolean_t first = B_FALSE;
2108ebb7c6fdSAlex Wilson 
2109ebb7c6fdSAlex Wilson 	ASSERT(mutex_owned(&g->mlg_mtx));
2110ebb7c6fdSAlex Wilson 
2111ebb7c6fdSAlex Wilson 	mutex_enter(&ft->mlft_mtx);
2112ebb7c6fdSAlex Wilson 
2113ebb7c6fdSAlex Wilson 	for (v = list_head(&g->mlg_rx_vlans); v != NULL;
2114ebb7c6fdSAlex Wilson 	    v = list_next(&g->mlg_rx_vlans, v)) {
2115ebb7c6fdSAlex Wilson 		if (v->mlgv_tagged == tagged && v->mlgv_vid == vid) {
2116ebb7c6fdSAlex Wilson 			mutex_exit(&ft->mlft_mtx);
2117ebb7c6fdSAlex Wilson 			return (B_TRUE);
2118ebb7c6fdSAlex Wilson 		}
2119ebb7c6fdSAlex Wilson 	}
2120ebb7c6fdSAlex Wilson 	if (list_is_empty(&g->mlg_rx_vlans))
2121ebb7c6fdSAlex Wilson 		first = B_TRUE;
2122ebb7c6fdSAlex Wilson 
2123ebb7c6fdSAlex Wilson 	for (fe = list_head(&fg->mlfg_entries); fe != NULL;
2124ebb7c6fdSAlex Wilson 	    fe = list_next(&fg->mlfg_entries, fe)) {
2125ebb7c6fdSAlex Wilson 		if (!(fe->mlfe_state & MLXCX_FLOW_ENTRY_RESERVED)) {
2126ebb7c6fdSAlex Wilson 			found = B_TRUE;
2127ebb7c6fdSAlex Wilson 			break;
2128ebb7c6fdSAlex Wilson 		}
2129ebb7c6fdSAlex Wilson 	}
2130ebb7c6fdSAlex Wilson 	if (!found) {
2131ebb7c6fdSAlex Wilson 		mutex_exit(&ft->mlft_mtx);
2132ebb7c6fdSAlex Wilson 		return (B_FALSE);
2133ebb7c6fdSAlex Wilson 	}
2134ebb7c6fdSAlex Wilson 
2135ebb7c6fdSAlex Wilson 	v = kmem_zalloc(sizeof (mlxcx_group_vlan_t), KM_SLEEP);
2136ebb7c6fdSAlex Wilson 	v->mlgv_fe = fe;
2137ebb7c6fdSAlex Wilson 	v->mlgv_tagged = tagged;
2138ebb7c6fdSAlex Wilson 	v->mlgv_vid = vid;
2139ebb7c6fdSAlex Wilson 
2140ebb7c6fdSAlex Wilson 	fe->mlfe_state |= MLXCX_FLOW_ENTRY_RESERVED;
2141ebb7c6fdSAlex Wilson 	fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY;
2142ebb7c6fdSAlex Wilson 	fe->mlfe_vid = vid;
2143ebb7c6fdSAlex Wilson 	if (tagged) {
2144ebb7c6fdSAlex Wilson 		fe->mlfe_vlan_type = MLXCX_VLAN_TYPE_CVLAN;
2145ebb7c6fdSAlex Wilson 	} else {
2146ebb7c6fdSAlex Wilson 		fe->mlfe_vlan_type = MLXCX_VLAN_TYPE_NONE;
2147ebb7c6fdSAlex Wilson 	}
2148ebb7c6fdSAlex Wilson 
2149ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
2150ebb7c6fdSAlex Wilson 		fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_DIRTY;
2151ebb7c6fdSAlex Wilson 		fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED;
2152ebb7c6fdSAlex Wilson 		kmem_free(v, sizeof (mlxcx_group_vlan_t));
2153ebb7c6fdSAlex Wilson 		mutex_exit(&ft->mlft_mtx);
2154ebb7c6fdSAlex Wilson 		return (B_FALSE);
2155ebb7c6fdSAlex Wilson 	}
2156ebb7c6fdSAlex Wilson 
2157ebb7c6fdSAlex Wilson 	list_insert_tail(&g->mlg_rx_vlans, v);
2158ebb7c6fdSAlex Wilson 
2159ebb7c6fdSAlex Wilson 	/*
2160ebb7c6fdSAlex Wilson 	 * If the vlan list was empty for this group before adding this one,
2161ebb7c6fdSAlex Wilson 	 * then we no longer want the "default" entry to allow all VLANs
2162ebb7c6fdSAlex Wilson 	 * through.
2163ebb7c6fdSAlex Wilson 	 */
2164ebb7c6fdSAlex Wilson 	if (first) {
2165ebb7c6fdSAlex Wilson 		fe = list_head(&dfg->mlfg_entries);
2166ebb7c6fdSAlex Wilson 		(void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe);
2167ebb7c6fdSAlex Wilson 	}
2168ebb7c6fdSAlex Wilson 
2169ebb7c6fdSAlex Wilson 	mutex_exit(&ft->mlft_mtx);
2170ebb7c6fdSAlex Wilson 	return (B_TRUE);
2171ebb7c6fdSAlex Wilson }
2172ebb7c6fdSAlex Wilson 
2173ebb7c6fdSAlex Wilson void
mlxcx_remove_all_umcast_entries(mlxcx_t * mlxp,mlxcx_port_t * port,mlxcx_ring_group_t * group)2174ebb7c6fdSAlex Wilson mlxcx_remove_all_umcast_entries(mlxcx_t *mlxp, mlxcx_port_t *port,
2175ebb7c6fdSAlex Wilson     mlxcx_ring_group_t *group)
2176ebb7c6fdSAlex Wilson {
2177ebb7c6fdSAlex Wilson 	mlxcx_flow_entry_t *fe;
2178ebb7c6fdSAlex Wilson 	mlxcx_flow_table_t *ft = port->mlp_rx_flow;
2179ebb7c6fdSAlex Wilson 	mlxcx_group_mac_t *gm, *ngm;
2180ebb7c6fdSAlex Wilson 
2181ebb7c6fdSAlex Wilson 	ASSERT(mutex_owned(&port->mlp_mtx));
2182ebb7c6fdSAlex Wilson 	ASSERT(mutex_owned(&group->mlg_mtx));
2183ebb7c6fdSAlex Wilson 
2184ebb7c6fdSAlex Wilson 	mutex_enter(&ft->mlft_mtx);
2185ebb7c6fdSAlex Wilson 
2186ebb7c6fdSAlex Wilson 	gm = avl_first(&group->mlg_rx_macs);
2187ebb7c6fdSAlex Wilson 	for (; gm != NULL; gm = ngm) {
2188ebb7c6fdSAlex Wilson 		ngm = AVL_NEXT(&group->mlg_rx_macs, gm);
2189ebb7c6fdSAlex Wilson 
2190ebb7c6fdSAlex Wilson 		ASSERT3P(gm->mlgm_group, ==, group);
2191ebb7c6fdSAlex Wilson 		fe = gm->mlgm_fe;
2192ebb7c6fdSAlex Wilson 		ASSERT3P(fe->mlfe_table, ==, ft);
2193ebb7c6fdSAlex Wilson 
2194ebb7c6fdSAlex Wilson 		avl_remove(&group->mlg_rx_macs, gm);
2195ebb7c6fdSAlex Wilson 		list_remove(&fe->mlfe_ring_groups, gm);
2196ebb7c6fdSAlex Wilson 		kmem_free(gm, sizeof (mlxcx_group_mac_t));
2197ebb7c6fdSAlex Wilson 
2198ebb7c6fdSAlex Wilson 		fe->mlfe_ndest = 0;
2199ebb7c6fdSAlex Wilson 		for (gm = list_head(&fe->mlfe_ring_groups); gm != NULL;
2200ebb7c6fdSAlex Wilson 		    gm = list_next(&fe->mlfe_ring_groups, gm)) {
2201ebb7c6fdSAlex Wilson 			fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow =
2202ebb7c6fdSAlex Wilson 			    gm->mlgm_group->mlg_rx_vlan_ft;
2203ebb7c6fdSAlex Wilson 		}
2204ebb7c6fdSAlex Wilson 		fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY;
2205ebb7c6fdSAlex Wilson 
2206ebb7c6fdSAlex Wilson 		if (fe->mlfe_ndest > 0) {
2207ebb7c6fdSAlex Wilson 			(void) mlxcx_cmd_set_flow_table_entry(mlxp, fe);
2208ebb7c6fdSAlex Wilson 			continue;
2209ebb7c6fdSAlex Wilson 		}
2210ebb7c6fdSAlex Wilson 
2211ebb7c6fdSAlex Wilson 		/*
2212ebb7c6fdSAlex Wilson 		 * There are no more ring groups left for this MAC (it wasn't
2213ebb7c6fdSAlex Wilson 		 * attached to any other groups since ndest == 0), so clean up
2214ebb7c6fdSAlex Wilson 		 * its flow entry.
2215ebb7c6fdSAlex Wilson 		 */
2216ebb7c6fdSAlex Wilson 		avl_remove(&port->mlp_dmac_fe, fe);
2217ebb7c6fdSAlex Wilson 		(void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe);
2218ebb7c6fdSAlex Wilson 		list_destroy(&fe->mlfe_ring_groups);
2219ebb7c6fdSAlex Wilson 		fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED;
2220ebb7c6fdSAlex Wilson 	}
2221ebb7c6fdSAlex Wilson 
2222ebb7c6fdSAlex Wilson 	mutex_exit(&ft->mlft_mtx);
2223ebb7c6fdSAlex Wilson }
2224ebb7c6fdSAlex Wilson 
2225ebb7c6fdSAlex Wilson boolean_t
mlxcx_remove_umcast_entry(mlxcx_t * mlxp,mlxcx_port_t * port,mlxcx_ring_group_t * group,const uint8_t * macaddr)2226ebb7c6fdSAlex Wilson mlxcx_remove_umcast_entry(mlxcx_t *mlxp, mlxcx_port_t *port,
2227ebb7c6fdSAlex Wilson     mlxcx_ring_group_t *group, const uint8_t *macaddr)
2228ebb7c6fdSAlex Wilson {
2229ebb7c6fdSAlex Wilson 	mlxcx_flow_entry_t *fe;
2230ebb7c6fdSAlex Wilson 	mlxcx_flow_table_t *ft = port->mlp_rx_flow;
2231ebb7c6fdSAlex Wilson 	mlxcx_group_mac_t *gm, probe;
2232ebb7c6fdSAlex Wilson 
2233ebb7c6fdSAlex Wilson 	ASSERT(mutex_owned(&port->mlp_mtx));
2234ebb7c6fdSAlex Wilson 	ASSERT(mutex_owned(&group->mlg_mtx));
2235ebb7c6fdSAlex Wilson 
2236ebb7c6fdSAlex Wilson 	bzero(&probe, sizeof (probe));
2237ebb7c6fdSAlex Wilson 	bcopy(macaddr, probe.mlgm_mac, sizeof (probe.mlgm_mac));
2238ebb7c6fdSAlex Wilson 
2239ebb7c6fdSAlex Wilson 	mutex_enter(&ft->mlft_mtx);
2240ebb7c6fdSAlex Wilson 
2241ebb7c6fdSAlex Wilson 	gm = avl_find(&group->mlg_rx_macs, &probe, NULL);
2242ebb7c6fdSAlex Wilson 	if (gm == NULL) {
2243ebb7c6fdSAlex Wilson 		mutex_exit(&ft->mlft_mtx);
2244ebb7c6fdSAlex Wilson 		return (B_FALSE);
2245ebb7c6fdSAlex Wilson 	}
2246ebb7c6fdSAlex Wilson 	ASSERT3P(gm->mlgm_group, ==, group);
2247ebb7c6fdSAlex Wilson 	ASSERT0(bcmp(macaddr, gm->mlgm_mac, sizeof (gm->mlgm_mac)));
2248ebb7c6fdSAlex Wilson 
2249ebb7c6fdSAlex Wilson 	fe = gm->mlgm_fe;
2250ebb7c6fdSAlex Wilson 	ASSERT3P(fe->mlfe_table, ==, ft);
2251ebb7c6fdSAlex Wilson 	ASSERT0(bcmp(macaddr, fe->mlfe_dmac, sizeof (fe->mlfe_dmac)));
2252ebb7c6fdSAlex Wilson 
2253ebb7c6fdSAlex Wilson 	list_remove(&fe->mlfe_ring_groups, gm);
2254ebb7c6fdSAlex Wilson 	avl_remove(&group->mlg_rx_macs, gm);
2255ebb7c6fdSAlex Wilson 	kmem_free(gm, sizeof (mlxcx_group_mac_t));
2256ebb7c6fdSAlex Wilson 
2257ebb7c6fdSAlex Wilson 	fe->mlfe_ndest = 0;
2258ebb7c6fdSAlex Wilson 	for (gm = list_head(&fe->mlfe_ring_groups); gm != NULL;
2259ebb7c6fdSAlex Wilson 	    gm = list_next(&fe->mlfe_ring_groups, gm)) {
2260ebb7c6fdSAlex Wilson 		fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow =
2261ebb7c6fdSAlex Wilson 		    gm->mlgm_group->mlg_rx_vlan_ft;
2262ebb7c6fdSAlex Wilson 	}
2263ebb7c6fdSAlex Wilson 	fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY;
2264ebb7c6fdSAlex Wilson 
2265ebb7c6fdSAlex Wilson 	if (fe->mlfe_ndest > 0) {
2266ebb7c6fdSAlex Wilson 		if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
2267ebb7c6fdSAlex Wilson 			mutex_exit(&ft->mlft_mtx);
2268ebb7c6fdSAlex Wilson 			return (B_FALSE);
2269ebb7c6fdSAlex Wilson 		}
2270ebb7c6fdSAlex Wilson 		mutex_exit(&ft->mlft_mtx);
2271ebb7c6fdSAlex Wilson 		return (B_TRUE);
2272ebb7c6fdSAlex Wilson 	}
2273ebb7c6fdSAlex Wilson 
2274ebb7c6fdSAlex Wilson 	/*
2275ebb7c6fdSAlex Wilson 	 * There are no more ring groups left for this MAC (it wasn't attached
2276ebb7c6fdSAlex Wilson 	 * to any other groups since ndest == 0), so clean up its flow entry.
2277ebb7c6fdSAlex Wilson 	 */
2278ebb7c6fdSAlex Wilson 	avl_remove(&port->mlp_dmac_fe, fe);
2279ebb7c6fdSAlex Wilson 	(void) mlxcx_cmd_delete_flow_table_entry(mlxp, fe);
2280ebb7c6fdSAlex Wilson 	list_destroy(&fe->mlfe_ring_groups);
2281ebb7c6fdSAlex Wilson 
2282ebb7c6fdSAlex Wilson 	fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED;
2283ebb7c6fdSAlex Wilson 
2284ebb7c6fdSAlex Wilson 	mutex_exit(&ft->mlft_mtx);
2285ebb7c6fdSAlex Wilson 
2286ebb7c6fdSAlex Wilson 	return (B_TRUE);
2287ebb7c6fdSAlex Wilson }
2288ebb7c6fdSAlex Wilson 
2289ebb7c6fdSAlex Wilson boolean_t
mlxcx_add_umcast_entry(mlxcx_t * mlxp,mlxcx_port_t * port,mlxcx_ring_group_t * group,const uint8_t * macaddr)2290ebb7c6fdSAlex Wilson mlxcx_add_umcast_entry(mlxcx_t *mlxp, mlxcx_port_t *port,
2291ebb7c6fdSAlex Wilson     mlxcx_ring_group_t *group, const uint8_t *macaddr)
2292ebb7c6fdSAlex Wilson {
2293ebb7c6fdSAlex Wilson 	mlxcx_flow_group_t *fg;
2294ebb7c6fdSAlex Wilson 	mlxcx_flow_entry_t *fe, probe;
2295ebb7c6fdSAlex Wilson 	mlxcx_flow_table_t *ft = port->mlp_rx_flow;
2296ebb7c6fdSAlex Wilson 	mlxcx_group_mac_t *gm;
2297ebb7c6fdSAlex Wilson 	boolean_t found = B_FALSE;
2298ebb7c6fdSAlex Wilson 
2299ebb7c6fdSAlex Wilson 	ASSERT(mutex_owned(&port->mlp_mtx));
2300ebb7c6fdSAlex Wilson 	ASSERT(mutex_owned(&group->mlg_mtx));
2301ebb7c6fdSAlex Wilson 
2302ebb7c6fdSAlex Wilson 	bzero(&probe, sizeof (probe));
2303ebb7c6fdSAlex Wilson 	bcopy(macaddr, probe.mlfe_dmac, sizeof (probe.mlfe_dmac));
2304ebb7c6fdSAlex Wilson 
2305ebb7c6fdSAlex Wilson 	mutex_enter(&ft->mlft_mtx);
2306ebb7c6fdSAlex Wilson 
2307ebb7c6fdSAlex Wilson 	fe = avl_find(&port->mlp_dmac_fe, &probe, NULL);
2308ebb7c6fdSAlex Wilson 
2309ebb7c6fdSAlex Wilson 	if (fe == NULL) {
2310ebb7c6fdSAlex Wilson 		fg = port->mlp_umcast;
2311ebb7c6fdSAlex Wilson 		for (fe = list_head(&fg->mlfg_entries); fe != NULL;
2312ebb7c6fdSAlex Wilson 		    fe = list_next(&fg->mlfg_entries, fe)) {
2313ebb7c6fdSAlex Wilson 			if (!(fe->mlfe_state & MLXCX_FLOW_ENTRY_RESERVED)) {
2314ebb7c6fdSAlex Wilson 				found = B_TRUE;
2315ebb7c6fdSAlex Wilson 				break;
2316ebb7c6fdSAlex Wilson 			}
2317ebb7c6fdSAlex Wilson 		}
2318ebb7c6fdSAlex Wilson 		if (!found) {
2319ebb7c6fdSAlex Wilson 			mutex_exit(&ft->mlft_mtx);
2320ebb7c6fdSAlex Wilson 			return (B_FALSE);
2321ebb7c6fdSAlex Wilson 		}
2322ebb7c6fdSAlex Wilson 		list_create(&fe->mlfe_ring_groups, sizeof (mlxcx_group_mac_t),
2323ebb7c6fdSAlex Wilson 		    offsetof(mlxcx_group_mac_t, mlgm_fe_entry));
2324ebb7c6fdSAlex Wilson 		fe->mlfe_state |= MLXCX_FLOW_ENTRY_RESERVED;
2325ebb7c6fdSAlex Wilson 		fe->mlfe_action = MLXCX_FLOW_ACTION_FORWARD;
2326ebb7c6fdSAlex Wilson 		bcopy(macaddr, fe->mlfe_dmac, sizeof (fe->mlfe_dmac));
2327ebb7c6fdSAlex Wilson 
2328ebb7c6fdSAlex Wilson 		avl_add(&port->mlp_dmac_fe, fe);
2329ebb7c6fdSAlex Wilson 	}
2330ebb7c6fdSAlex Wilson 
2331ebb7c6fdSAlex Wilson 	fe->mlfe_dest[fe->mlfe_ndest++].mlfed_flow = group->mlg_rx_vlan_ft;
2332ebb7c6fdSAlex Wilson 	fe->mlfe_state |= MLXCX_FLOW_ENTRY_DIRTY;
2333ebb7c6fdSAlex Wilson 
2334ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_set_flow_table_entry(mlxp, fe)) {
2335ebb7c6fdSAlex Wilson 		fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_DIRTY;
2336ebb7c6fdSAlex Wilson 		if (--fe->mlfe_ndest == 0) {
2337ebb7c6fdSAlex Wilson 			fe->mlfe_state &= ~MLXCX_FLOW_ENTRY_RESERVED;
2338ebb7c6fdSAlex Wilson 		}
2339ebb7c6fdSAlex Wilson 		mutex_exit(&ft->mlft_mtx);
2340ebb7c6fdSAlex Wilson 		return (B_FALSE);
2341ebb7c6fdSAlex Wilson 	}
2342ebb7c6fdSAlex Wilson 
2343ebb7c6fdSAlex Wilson 	gm = kmem_zalloc(sizeof (mlxcx_group_mac_t), KM_SLEEP);
2344ebb7c6fdSAlex Wilson 	gm->mlgm_group = group;
2345ebb7c6fdSAlex Wilson 	gm->mlgm_fe = fe;
2346ebb7c6fdSAlex Wilson 	bcopy(macaddr, gm->mlgm_mac, sizeof (gm->mlgm_mac));
2347ebb7c6fdSAlex Wilson 	avl_add(&group->mlg_rx_macs, gm);
2348ebb7c6fdSAlex Wilson 	list_insert_tail(&fe->mlfe_ring_groups, gm);
2349ebb7c6fdSAlex Wilson 
2350ebb7c6fdSAlex Wilson 	mutex_exit(&ft->mlft_mtx);
2351ebb7c6fdSAlex Wilson 
2352ebb7c6fdSAlex Wilson 	return (B_TRUE);
2353ebb7c6fdSAlex Wilson }
2354ebb7c6fdSAlex Wilson 
2355ebb7c6fdSAlex Wilson boolean_t
mlxcx_setup_flow_group(mlxcx_t * mlxp,mlxcx_flow_table_t * ft,mlxcx_flow_group_t * fg)2356ebb7c6fdSAlex Wilson mlxcx_setup_flow_group(mlxcx_t *mlxp, mlxcx_flow_table_t *ft,
2357ebb7c6fdSAlex Wilson     mlxcx_flow_group_t *fg)
2358ebb7c6fdSAlex Wilson {
2359ebb7c6fdSAlex Wilson 	mlxcx_flow_entry_t *fe;
2360ebb7c6fdSAlex Wilson 	uint_t i, idx;
2361ebb7c6fdSAlex Wilson 
2362ebb7c6fdSAlex Wilson 	ASSERT(mutex_owned(&ft->mlft_mtx));
2363ebb7c6fdSAlex Wilson 	ASSERT(ft->mlft_state & MLXCX_FLOW_TABLE_CREATED);
2364ebb7c6fdSAlex Wilson 	ASSERT3P(fg->mlfg_table, ==, ft);
2365ebb7c6fdSAlex Wilson 
2366ebb7c6fdSAlex Wilson 	if (ft->mlft_next_ent + fg->mlfg_size > ft->mlft_nents)
2367ebb7c6fdSAlex Wilson 		return (B_FALSE);
2368ebb7c6fdSAlex Wilson 	fg->mlfg_start_idx = ft->mlft_next_ent;
2369ebb7c6fdSAlex Wilson 
2370ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_create_flow_group(mlxp, fg)) {
2371ebb7c6fdSAlex Wilson 		return (B_FALSE);
2372ebb7c6fdSAlex Wilson 	}
2373ebb7c6fdSAlex Wilson 
2374ebb7c6fdSAlex Wilson 	list_create(&fg->mlfg_entries, sizeof (mlxcx_flow_entry_t),
2375ebb7c6fdSAlex Wilson 	    offsetof(mlxcx_flow_entry_t, mlfe_group_entry));
2376ebb7c6fdSAlex Wilson 	for (i = 0; i < fg->mlfg_size; ++i) {
2377ebb7c6fdSAlex Wilson 		idx = fg->mlfg_start_idx + i;
2378ebb7c6fdSAlex Wilson 		fe = &ft->mlft_ent[idx];
2379ebb7c6fdSAlex Wilson 		fe->mlfe_group = fg;
2380ebb7c6fdSAlex Wilson 		list_insert_tail(&fg->mlfg_entries, fe);
2381ebb7c6fdSAlex Wilson 	}
2382ebb7c6fdSAlex Wilson 	fg->mlfg_avail = fg->mlfg_size;
2383ebb7c6fdSAlex Wilson 	ft->mlft_next_ent += fg->mlfg_size;
2384ebb7c6fdSAlex Wilson 
2385ebb7c6fdSAlex Wilson 	return (B_TRUE);
2386ebb7c6fdSAlex Wilson }
2387ebb7c6fdSAlex Wilson 
2388ebb7c6fdSAlex Wilson static boolean_t
mlxcx_setup_eq(mlxcx_t * mlxp,uint_t vec,uint64_t events)23895f0e3176SPaul Winder mlxcx_setup_eq(mlxcx_t *mlxp, uint_t vec, uint64_t events)
2390ebb7c6fdSAlex Wilson {
23915f0e3176SPaul Winder 	mlxcx_event_queue_t *mleq = &mlxp->mlx_eqs[vec];
2392ebb7c6fdSAlex Wilson 
2393ebb7c6fdSAlex Wilson 	mutex_enter(&mleq->mleq_mtx);
2394ebb7c6fdSAlex Wilson 	if (!mlxcx_eq_alloc_dma(mlxp, mleq)) {
2395ebb7c6fdSAlex Wilson 		/* mlxcx_teardown_eqs() will clean this up */
2396ebb7c6fdSAlex Wilson 		mutex_exit(&mleq->mleq_mtx);
2397ebb7c6fdSAlex Wilson 		return (B_FALSE);
2398ebb7c6fdSAlex Wilson 	}
2399ebb7c6fdSAlex Wilson 	mleq->mleq_mlx = mlxp;
2400ebb7c6fdSAlex Wilson 	mleq->mleq_uar = &mlxp->mlx_uar;
24015f0e3176SPaul Winder 	mleq->mleq_events = events;
24025f0e3176SPaul Winder 	mleq->mleq_intr_index = vec;
24035f0e3176SPaul Winder 
2404ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_create_eq(mlxp, mleq)) {
2405ebb7c6fdSAlex Wilson 		/* mlxcx_teardown_eqs() will clean this up */
2406ebb7c6fdSAlex Wilson 		mutex_exit(&mleq->mleq_mtx);
2407ebb7c6fdSAlex Wilson 		return (B_FALSE);
2408ebb7c6fdSAlex Wilson 	}
24090207f820SPaul Winder 
24105f0e3176SPaul Winder 	if (ddi_intr_enable(mlxp->mlx_intr_handles[vec]) != DDI_SUCCESS) {
2411ebb7c6fdSAlex Wilson 		/*
2412ebb7c6fdSAlex Wilson 		 * mlxcx_teardown_eqs() will handle calling cmd_destroy_eq and
2413ebb7c6fdSAlex Wilson 		 * eq_rele_dma
2414ebb7c6fdSAlex Wilson 		 */
2415ebb7c6fdSAlex Wilson 		mutex_exit(&mleq->mleq_mtx);
2416ebb7c6fdSAlex Wilson 		return (B_FALSE);
2417ebb7c6fdSAlex Wilson 	}
24180207f820SPaul Winder 	mleq->mleq_state |= MLXCX_EQ_INTR_ENABLED;
241980d1a7bdSAlex Wilson 	mleq->mleq_state |= MLXCX_EQ_ATTACHING;
2420ebb7c6fdSAlex Wilson 	mlxcx_arm_eq(mlxp, mleq);
2421ebb7c6fdSAlex Wilson 	mutex_exit(&mleq->mleq_mtx);
24225f0e3176SPaul Winder 
2423ebb7c6fdSAlex Wilson 	return (B_TRUE);
2424ebb7c6fdSAlex Wilson }
2425ebb7c6fdSAlex Wilson 
242680d1a7bdSAlex Wilson static void
mlxcx_eq_set_attached(mlxcx_t * mlxp)242780d1a7bdSAlex Wilson mlxcx_eq_set_attached(mlxcx_t *mlxp)
242880d1a7bdSAlex Wilson {
242980d1a7bdSAlex Wilson 	uint_t vec;
243080d1a7bdSAlex Wilson 	mlxcx_event_queue_t *mleq;
243180d1a7bdSAlex Wilson 
243280d1a7bdSAlex Wilson 	for (vec = 0; vec < mlxp->mlx_intr_count; ++vec) {
243380d1a7bdSAlex Wilson 		mleq = &mlxp->mlx_eqs[vec];
243480d1a7bdSAlex Wilson 
243580d1a7bdSAlex Wilson 		mutex_enter(&mleq->mleq_mtx);
243680d1a7bdSAlex Wilson 		mleq->mleq_state &= ~MLXCX_EQ_ATTACHING;
243780d1a7bdSAlex Wilson 		mutex_exit(&mleq->mleq_mtx);
243880d1a7bdSAlex Wilson 	}
243980d1a7bdSAlex Wilson }
244080d1a7bdSAlex Wilson 
24415f0e3176SPaul Winder static boolean_t
mlxcx_setup_async_eqs(mlxcx_t * mlxp)24425f0e3176SPaul Winder mlxcx_setup_async_eqs(mlxcx_t *mlxp)
24435f0e3176SPaul Winder {
24445f0e3176SPaul Winder 	boolean_t ret;
24455f0e3176SPaul Winder 
24465f0e3176SPaul Winder 	ret = mlxcx_setup_eq(mlxp, 0,
24475f0e3176SPaul Winder 	    (1ULL << MLXCX_EVENT_CMD_COMPLETION) |
24485f0e3176SPaul Winder 	    (1ULL << MLXCX_EVENT_PAGE_REQUEST) |
24495f0e3176SPaul Winder 	    (1ULL << MLXCX_EVENT_PORT_STATE) |
24505f0e3176SPaul Winder 	    (1ULL << MLXCX_EVENT_INTERNAL_ERROR) |
24515f0e3176SPaul Winder 	    (1ULL << MLXCX_EVENT_PORT_MODULE) |
24525f0e3176SPaul Winder 	    (1ULL << MLXCX_EVENT_SENDQ_DRAIN) |
24535f0e3176SPaul Winder 	    (1ULL << MLXCX_EVENT_LAST_WQE) |
24545f0e3176SPaul Winder 	    (1ULL << MLXCX_EVENT_CQ_ERROR) |
24555f0e3176SPaul Winder 	    (1ULL << MLXCX_EVENT_WQ_CATASTROPHE) |
24565f0e3176SPaul Winder 	    (1ULL << MLXCX_EVENT_PAGE_FAULT) |
24575f0e3176SPaul Winder 	    (1ULL << MLXCX_EVENT_WQ_INVALID_REQ) |
24585f0e3176SPaul Winder 	    (1ULL << MLXCX_EVENT_WQ_ACCESS_VIOL) |
24595f0e3176SPaul Winder 	    (1ULL << MLXCX_EVENT_NIC_VPORT) |
24605f0e3176SPaul Winder 	    (1ULL << MLXCX_EVENT_DOORBELL_CONGEST));
24615f0e3176SPaul Winder 
24625f0e3176SPaul Winder 	if (ret)
24635f0e3176SPaul Winder 		mlxcx_cmd_eq_enable(mlxp);
24645f0e3176SPaul Winder 
24655f0e3176SPaul Winder 	return (ret);
24665f0e3176SPaul Winder }
24675f0e3176SPaul Winder 
2468ebb7c6fdSAlex Wilson int
mlxcx_cq_compare(const void * arg0,const void * arg1)2469ebb7c6fdSAlex Wilson mlxcx_cq_compare(const void *arg0, const void *arg1)
2470ebb7c6fdSAlex Wilson {
2471ebb7c6fdSAlex Wilson 	const mlxcx_completion_queue_t *left = arg0;
2472ebb7c6fdSAlex Wilson 	const mlxcx_completion_queue_t *right = arg1;
2473ebb7c6fdSAlex Wilson 
2474ebb7c6fdSAlex Wilson 	if (left->mlcq_num < right->mlcq_num) {
2475ebb7c6fdSAlex Wilson 		return (-1);
2476ebb7c6fdSAlex Wilson 	}
2477ebb7c6fdSAlex Wilson 	if (left->mlcq_num > right->mlcq_num) {
2478ebb7c6fdSAlex Wilson 		return (1);
2479ebb7c6fdSAlex Wilson 	}
2480ebb7c6fdSAlex Wilson 	return (0);
2481ebb7c6fdSAlex Wilson }
2482ebb7c6fdSAlex Wilson 
2483ebb7c6fdSAlex Wilson static boolean_t
mlxcx_setup_eqs(mlxcx_t * mlxp)2484ebb7c6fdSAlex Wilson mlxcx_setup_eqs(mlxcx_t *mlxp)
2485ebb7c6fdSAlex Wilson {
2486ebb7c6fdSAlex Wilson 	uint_t i;
2487ebb7c6fdSAlex Wilson 	mlxcx_event_queue_t *mleq;
2488ebb7c6fdSAlex Wilson 
2489ebb7c6fdSAlex Wilson 	ASSERT3S(mlxp->mlx_intr_count, >, 0);
2490ebb7c6fdSAlex Wilson 
24915f0e3176SPaul Winder 	for (i = mlxp->mlx_intr_cq0; i < mlxp->mlx_intr_count; ++i) {
2492ebb7c6fdSAlex Wilson 		mleq = &mlxp->mlx_eqs[i];
2493ebb7c6fdSAlex Wilson 		mutex_enter(&mleq->mleq_mtx);
2494ebb7c6fdSAlex Wilson 		if (!mlxcx_eq_alloc_dma(mlxp, mleq)) {
2495ebb7c6fdSAlex Wilson 			mutex_exit(&mleq->mleq_mtx);
2496ebb7c6fdSAlex Wilson 			return (B_FALSE);
2497ebb7c6fdSAlex Wilson 		}
2498ebb7c6fdSAlex Wilson 		mleq->mleq_uar = &mlxp->mlx_uar;
2499ebb7c6fdSAlex Wilson 		if (!mlxcx_cmd_create_eq(mlxp, mleq)) {
2500ebb7c6fdSAlex Wilson 			/* mlxcx_teardown() will handle calling eq_rele_dma */
2501ebb7c6fdSAlex Wilson 			mutex_exit(&mleq->mleq_mtx);
2502ebb7c6fdSAlex Wilson 			return (B_FALSE);
2503ebb7c6fdSAlex Wilson 		}
2504ebb7c6fdSAlex Wilson 		if (mlxp->mlx_props.mldp_intrmod_period_usec != 0 &&
2505ebb7c6fdSAlex Wilson 		    !mlxcx_cmd_set_int_mod(mlxp, i,
2506ebb7c6fdSAlex Wilson 		    mlxp->mlx_props.mldp_intrmod_period_usec)) {
2507ebb7c6fdSAlex Wilson 			mutex_exit(&mleq->mleq_mtx);
2508ebb7c6fdSAlex Wilson 			return (B_FALSE);
2509ebb7c6fdSAlex Wilson 		}
2510ebb7c6fdSAlex Wilson 		if (ddi_intr_enable(mlxp->mlx_intr_handles[i]) != DDI_SUCCESS) {
2511ebb7c6fdSAlex Wilson 			mutex_exit(&mleq->mleq_mtx);
2512ebb7c6fdSAlex Wilson 			return (B_FALSE);
2513ebb7c6fdSAlex Wilson 		}
25140207f820SPaul Winder 		mleq->mleq_state |= MLXCX_EQ_INTR_ENABLED;
2515ebb7c6fdSAlex Wilson 		mlxcx_arm_eq(mlxp, mleq);
2516ebb7c6fdSAlex Wilson 		mutex_exit(&mleq->mleq_mtx);
2517ebb7c6fdSAlex Wilson 	}
2518ebb7c6fdSAlex Wilson 
25195f0e3176SPaul Winder 	mlxp->mlx_next_eq = mlxp->mlx_intr_cq0;
2520ebb7c6fdSAlex Wilson 
2521ebb7c6fdSAlex Wilson 	return (B_TRUE);
2522ebb7c6fdSAlex Wilson }
2523ebb7c6fdSAlex Wilson 
252485e4aa97SDan McDonald /*
252585e4aa97SDan McDonald  * A more recent ConnectX part will have the Port CApability Mask register.
252685e4aa97SDan McDonald  * Explore it and note things here.
252785e4aa97SDan McDonald  */
252885e4aa97SDan McDonald static void
mlxcx_explore_pcam(mlxcx_t * mlxp,mlxcx_caps_t * c)252985e4aa97SDan McDonald mlxcx_explore_pcam(mlxcx_t *mlxp, mlxcx_caps_t *c)
253085e4aa97SDan McDonald {
253185e4aa97SDan McDonald 	mlxcx_register_data_t data;
253285e4aa97SDan McDonald 	mlxcx_reg_pcam_t *pcam = &data.mlrd_pcam;
253385e4aa97SDan McDonald 
253485e4aa97SDan McDonald 	ASSERT(c->mlc_pcam);
253585e4aa97SDan McDonald 	bzero(&data, sizeof (data));
253685e4aa97SDan McDonald 
253785e4aa97SDan McDonald 	/*
253885e4aa97SDan McDonald 	 * Okay, so we have access the the Ports CApability Mask (PCAM).
253985e4aa97SDan McDonald 	 * There are various things we need to check about it.
254085e4aa97SDan McDonald 	 */
254185e4aa97SDan McDonald 
254285e4aa97SDan McDonald 	VERIFY(mlxcx_cmd_access_register(mlxp, MLXCX_CMD_ACCESS_REGISTER_READ,
254385e4aa97SDan McDonald 	    MLXCX_REG_PCAM, &data));
254485e4aa97SDan McDonald 
254585e4aa97SDan McDonald 	/*
254685e4aa97SDan McDonald 	 * NOTE: These ASSERT()s may change in future mlxcx(4D) parts.
254785e4aa97SDan McDonald 	 * As of now, only 0 is valid, and 1-255 are reserved.  A future part
254885e4aa97SDan McDonald 	 * may return non-zero in these fields.
254985e4aa97SDan McDonald 	 */
255085e4aa97SDan McDonald 	ASSERT0(pcam->mlrd_pcam_feature_group);
255185e4aa97SDan McDonald 	ASSERT0(pcam->mlrd_pcam_access_reg_group);
255285e4aa97SDan McDonald 
255385e4aa97SDan McDonald 	c->mlc_ext_ptys = get_bit64(pcam->mlrd_pcam_feature_cap_mask_low,
255485e4aa97SDan McDonald 	    MLXCX_PCAM_LOW_FFLAGS_PTYS_EXTENDED);
255585e4aa97SDan McDonald }
255685e4aa97SDan McDonald 
2557ebb7c6fdSAlex Wilson /*
2558ebb7c6fdSAlex Wilson  * Snapshot all of the hardware capabilities that we care about and then modify
2559ebb7c6fdSAlex Wilson  * the HCA capabilities to get things moving.
2560ebb7c6fdSAlex Wilson  */
2561ebb7c6fdSAlex Wilson static boolean_t
mlxcx_init_caps(mlxcx_t * mlxp)2562ebb7c6fdSAlex Wilson mlxcx_init_caps(mlxcx_t *mlxp)
2563ebb7c6fdSAlex Wilson {
2564ebb7c6fdSAlex Wilson 	mlxcx_caps_t *c;
2565ebb7c6fdSAlex Wilson 
2566ebb7c6fdSAlex Wilson 	mlxp->mlx_caps = c = kmem_zalloc(sizeof (mlxcx_caps_t), KM_SLEEP);
2567ebb7c6fdSAlex Wilson 
2568ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_GENERAL,
2569ebb7c6fdSAlex Wilson 	    MLXCX_HCA_CAP_MODE_CURRENT, &c->mlc_hca_cur)) {
2570ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "failed to obtain current HCA general caps");
2571ebb7c6fdSAlex Wilson 	}
2572ebb7c6fdSAlex Wilson 
2573ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_GENERAL,
2574ebb7c6fdSAlex Wilson 	    MLXCX_HCA_CAP_MODE_MAX, &c->mlc_hca_max)) {
2575ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "failed to obtain maximum HCA general caps");
2576ebb7c6fdSAlex Wilson 	}
2577ebb7c6fdSAlex Wilson 
2578ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_ETHERNET,
2579ebb7c6fdSAlex Wilson 	    MLXCX_HCA_CAP_MODE_CURRENT, &c->mlc_ether_cur)) {
2580ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "failed to obtain current HCA eth caps");
2581ebb7c6fdSAlex Wilson 	}
2582ebb7c6fdSAlex Wilson 
2583ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_ETHERNET,
2584ebb7c6fdSAlex Wilson 	    MLXCX_HCA_CAP_MODE_MAX, &c->mlc_ether_max)) {
2585ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "failed to obtain maximum HCA eth caps");
2586ebb7c6fdSAlex Wilson 	}
2587ebb7c6fdSAlex Wilson 
2588ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_NIC_FLOW,
2589ebb7c6fdSAlex Wilson 	    MLXCX_HCA_CAP_MODE_CURRENT, &c->mlc_nic_flow_cur)) {
2590ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "failed to obtain current HCA flow caps");
2591ebb7c6fdSAlex Wilson 	}
2592ebb7c6fdSAlex Wilson 
2593ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_query_hca_cap(mlxp, MLXCX_HCA_CAP_NIC_FLOW,
2594ebb7c6fdSAlex Wilson 	    MLXCX_HCA_CAP_MODE_MAX, &c->mlc_nic_flow_max)) {
2595ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "failed to obtain maximum HCA flow caps");
2596ebb7c6fdSAlex Wilson 	}
2597ebb7c6fdSAlex Wilson 
2598ebb7c6fdSAlex Wilson 	/*
2599ebb7c6fdSAlex Wilson 	 * Check the caps meet our requirements.
2600ebb7c6fdSAlex Wilson 	 */
2601ebb7c6fdSAlex Wilson 	const mlxcx_hca_cap_general_caps_t *gen = &c->mlc_hca_cur.mhc_general;
2602ebb7c6fdSAlex Wilson 
2603ebb7c6fdSAlex Wilson 	if (gen->mlcap_general_log_pg_sz != 12) {
2604ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "!hardware has page size != 4k "
2605ebb7c6fdSAlex Wilson 		    "(log_pg_sz = %u)", (uint_t)gen->mlcap_general_log_pg_sz);
2606ebb7c6fdSAlex Wilson 		goto err;
2607ebb7c6fdSAlex Wilson 	}
2608ebb7c6fdSAlex Wilson 	if (gen->mlcap_general_cqe_version != 1) {
2609ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "!hardware does not support CQE v1 "
2610ebb7c6fdSAlex Wilson 		    "(cqe_ver = %u)", (uint_t)gen->mlcap_general_cqe_version);
2611ebb7c6fdSAlex Wilson 		goto err;
2612ebb7c6fdSAlex Wilson 	}
2613ebb7c6fdSAlex Wilson 	if (gen->mlcap_general_port_type !=
2614ebb7c6fdSAlex Wilson 	    MLXCX_CAP_GENERAL_PORT_TYPE_ETHERNET) {
2615ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "!hardware has non-ethernet ports");
2616ebb7c6fdSAlex Wilson 		goto err;
2617ebb7c6fdSAlex Wilson 	}
2618ebb7c6fdSAlex Wilson 	mlxp->mlx_nports = gen->mlcap_general_num_ports;
2619ebb7c6fdSAlex Wilson 	mlxp->mlx_max_sdu = (1 << (gen->mlcap_general_log_max_msg & 0x1F));
2620ebb7c6fdSAlex Wilson 
262183b3f06fSJason King 	if (mlxp->mlx_type >= MLXCX_DEV_CX5 &&
262283b3f06fSJason King 	    get_bit16(gen->mlcap_general_flags_c,
262385e4aa97SDan McDonald 	    MLXCX_CAP_GENERAL_FLAGS_C_PCAM_REG)) {
262485e4aa97SDan McDonald 		c->mlc_pcam = B_TRUE;
262585e4aa97SDan McDonald 	}
262685e4aa97SDan McDonald 
2627ebb7c6fdSAlex Wilson 	c->mlc_max_tir = (1 << gen->mlcap_general_log_max_tir);
2628ebb7c6fdSAlex Wilson 
2629ebb7c6fdSAlex Wilson 	c->mlc_checksum = get_bit32(c->mlc_ether_cur.mhc_eth.mlcap_eth_flags,
2630ebb7c6fdSAlex Wilson 	    MLXCX_ETH_CAP_CSUM_CAP);
2631ebb7c6fdSAlex Wilson 	c->mlc_vxlan = get_bit32(c->mlc_ether_cur.mhc_eth.mlcap_eth_flags,
2632ebb7c6fdSAlex Wilson 	    MLXCX_ETH_CAP_TUNNEL_STATELESS_VXLAN);
2633ebb7c6fdSAlex Wilson 
2634ebb7c6fdSAlex Wilson 	c->mlc_max_lso_size = (1 << get_bits32(c->mlc_ether_cur.mhc_eth.
2635ebb7c6fdSAlex Wilson 	    mlcap_eth_flags, MLXCX_ETH_CAP_MAX_LSO_CAP));
2636ebb7c6fdSAlex Wilson 	if (c->mlc_max_lso_size == 1) {
2637ebb7c6fdSAlex Wilson 		c->mlc_max_lso_size = 0;
2638ebb7c6fdSAlex Wilson 		c->mlc_lso = B_FALSE;
2639ebb7c6fdSAlex Wilson 	} else {
2640ebb7c6fdSAlex Wilson 		c->mlc_lso = B_TRUE;
2641ebb7c6fdSAlex Wilson 	}
2642ebb7c6fdSAlex Wilson 
2643ebb7c6fdSAlex Wilson 	c->mlc_max_rqt_size = (1 << get_bits32(c->mlc_ether_cur.mhc_eth.
2644ebb7c6fdSAlex Wilson 	    mlcap_eth_flags, MLXCX_ETH_CAP_RSS_IND_TBL_CAP));
2645ebb7c6fdSAlex Wilson 
2646ebb7c6fdSAlex Wilson 	if (!get_bit32(c->mlc_nic_flow_cur.mhc_flow.mlcap_flow_nic_rx.
2647ebb7c6fdSAlex Wilson 	    mlcap_flow_prop_flags, MLXCX_FLOW_CAP_PROPS_SUPPORT)) {
2648ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "!hardware does not support rx flow tables");
2649ebb7c6fdSAlex Wilson 		goto err;
2650ebb7c6fdSAlex Wilson 	}
2651ebb7c6fdSAlex Wilson 	if (!get_bit32(c->mlc_nic_flow_cur.mhc_flow.mlcap_flow_nic_rx.
2652ebb7c6fdSAlex Wilson 	    mlcap_flow_prop_flags, MLXCX_FLOW_CAP_PROPS_MODIFY)) {
2653ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "!hardware does not support modifying rx "
2654ebb7c6fdSAlex Wilson 		    "flow table entries");
2655ebb7c6fdSAlex Wilson 		goto err;
2656ebb7c6fdSAlex Wilson 	}
2657ebb7c6fdSAlex Wilson 
2658ebb7c6fdSAlex Wilson 	c->mlc_max_rx_ft_shift = c->mlc_nic_flow_cur.mhc_flow.mlcap_flow_nic_rx.
2659ebb7c6fdSAlex Wilson 	    mlcap_flow_prop_log_max_ft_size;
2660ebb7c6fdSAlex Wilson 	c->mlc_max_rx_flows = (1 << c->mlc_nic_flow_cur.mhc_flow.
2661ebb7c6fdSAlex Wilson 	    mlcap_flow_nic_rx.mlcap_flow_prop_log_max_flow);
26625f0e3176SPaul Winder 	c->mlc_max_rx_ft = (1 << c->mlc_nic_flow_cur.mhc_flow.
26635f0e3176SPaul Winder 	    mlcap_flow_nic_rx.mlcap_flow_prop_log_max_ft_num);
2664ebb7c6fdSAlex Wilson 	c->mlc_max_rx_fe_dest = (1 << c->mlc_nic_flow_cur.mhc_flow.
2665ebb7c6fdSAlex Wilson 	    mlcap_flow_nic_rx.mlcap_flow_prop_log_max_destination);
2666ebb7c6fdSAlex Wilson 
2667ebb7c6fdSAlex Wilson 	return (B_TRUE);
2668ebb7c6fdSAlex Wilson 
2669ebb7c6fdSAlex Wilson err:
2670ebb7c6fdSAlex Wilson 	kmem_free(mlxp->mlx_caps, sizeof (mlxcx_caps_t));
2671ebb7c6fdSAlex Wilson 	return (B_FALSE);
2672ebb7c6fdSAlex Wilson }
2673ebb7c6fdSAlex Wilson 
2674ebb7c6fdSAlex Wilson static int
mlxcx_detach(dev_info_t * dip,ddi_detach_cmd_t cmd)2675ebb7c6fdSAlex Wilson mlxcx_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
2676ebb7c6fdSAlex Wilson {
2677ebb7c6fdSAlex Wilson 	mlxcx_t *mlxp;
2678ebb7c6fdSAlex Wilson 
2679ebb7c6fdSAlex Wilson 	if (cmd != DDI_DETACH)
2680ebb7c6fdSAlex Wilson 		return (DDI_FAILURE);
2681ebb7c6fdSAlex Wilson 
2682ebb7c6fdSAlex Wilson 	mlxp = ddi_get_driver_private(dip);
2683ebb7c6fdSAlex Wilson 	if (mlxp == NULL) {
2684ebb7c6fdSAlex Wilson 		mlxcx_warn(NULL, "asked to detach, but missing instance "
2685ebb7c6fdSAlex Wilson 		    "private data");
2686ebb7c6fdSAlex Wilson 		return (DDI_FAILURE);
2687ebb7c6fdSAlex Wilson 	}
2688ebb7c6fdSAlex Wilson 
2689ebb7c6fdSAlex Wilson 	if (mlxp->mlx_attach & MLXCX_ATTACH_MAC_HDL) {
2690ebb7c6fdSAlex Wilson 		if (mac_unregister(mlxp->mlx_mac_hdl) != DDI_SUCCESS) {
2691ebb7c6fdSAlex Wilson 			return (DDI_FAILURE);
2692ebb7c6fdSAlex Wilson 		}
2693ebb7c6fdSAlex Wilson 		mlxp->mlx_attach &= ~MLXCX_ATTACH_MAC_HDL;
2694ebb7c6fdSAlex Wilson 	}
2695ebb7c6fdSAlex Wilson 
2696ebb7c6fdSAlex Wilson 	mlxcx_teardown(mlxp);
2697ebb7c6fdSAlex Wilson 	return (DDI_SUCCESS);
2698ebb7c6fdSAlex Wilson }
2699ebb7c6fdSAlex Wilson 
2700ebb7c6fdSAlex Wilson static size_t
mlxcx_calc_rx_ngroups(mlxcx_t * mlxp)2701ebb7c6fdSAlex Wilson mlxcx_calc_rx_ngroups(mlxcx_t *mlxp)
2702ebb7c6fdSAlex Wilson {
2703ebb7c6fdSAlex Wilson 	size_t ngroups = mlxp->mlx_props.mldp_rx_ngroups_large +
2704ebb7c6fdSAlex Wilson 	    mlxp->mlx_props.mldp_rx_ngroups_small;
2705ebb7c6fdSAlex Wilson 	size_t tirlim, flowlim, gflowlim;
2706ebb7c6fdSAlex Wilson 
2707ebb7c6fdSAlex Wilson 	tirlim = mlxp->mlx_caps->mlc_max_tir / MLXCX_TIRS_PER_GROUP;
2708ebb7c6fdSAlex Wilson 	if (tirlim < ngroups) {
2709ebb7c6fdSAlex Wilson 		mlxcx_note(mlxp, "limiting number of rx groups to %u based "
2710ebb7c6fdSAlex Wilson 		    "on number of TIRs available", tirlim);
2711ebb7c6fdSAlex Wilson 		ngroups = tirlim;
2712ebb7c6fdSAlex Wilson 	}
2713ebb7c6fdSAlex Wilson 
2714ebb7c6fdSAlex Wilson 	flowlim = (1 << mlxp->mlx_caps->mlc_max_rx_ft_shift) - 2;
2715ebb7c6fdSAlex Wilson 	if (flowlim < ngroups) {
2716ebb7c6fdSAlex Wilson 		mlxcx_note(mlxp, "limiting number of rx groups to %u based "
2717ebb7c6fdSAlex Wilson 		    "on max size of RX flow tables", flowlim);
2718ebb7c6fdSAlex Wilson 		ngroups = flowlim;
2719ebb7c6fdSAlex Wilson 	}
2720ebb7c6fdSAlex Wilson 
27215f0e3176SPaul Winder 	/*
27225f0e3176SPaul Winder 	 * Restrict the number of groups not to exceed the max flow
27235f0e3176SPaul Winder 	 * table number from the devices capabilities.
27245f0e3176SPaul Winder 	 * There is one root table entry per port and 2 entries per
27255f0e3176SPaul Winder 	 * group.
27265f0e3176SPaul Winder 	 */
27275f0e3176SPaul Winder 	flowlim = (mlxp->mlx_caps->mlc_max_rx_ft - mlxp->mlx_nports) / 2;
27285f0e3176SPaul Winder 	if (flowlim < ngroups) {
27295f0e3176SPaul Winder 		mlxcx_note(mlxp, "limiting number of rx groups to %u based "
27305f0e3176SPaul Winder 		    "on max number of RX flow tables",
27315f0e3176SPaul Winder 		    flowlim);
27325f0e3176SPaul Winder 		ngroups = flowlim;
27335f0e3176SPaul Winder 	}
27345f0e3176SPaul Winder 
2735ebb7c6fdSAlex Wilson 	do {
2736ebb7c6fdSAlex Wilson 		gflowlim = mlxp->mlx_caps->mlc_max_rx_flows - 16 * ngroups - 2;
2737ebb7c6fdSAlex Wilson 		if (gflowlim < ngroups) {
2738ebb7c6fdSAlex Wilson 			mlxcx_note(mlxp, "limiting number of rx groups to %u "
2739ebb7c6fdSAlex Wilson 			    "based on max total RX flows", gflowlim);
2740ebb7c6fdSAlex Wilson 			--ngroups;
2741ebb7c6fdSAlex Wilson 		}
2742ebb7c6fdSAlex Wilson 	} while (gflowlim < ngroups);
2743ebb7c6fdSAlex Wilson 
2744ebb7c6fdSAlex Wilson 	return (ngroups);
2745ebb7c6fdSAlex Wilson }
2746ebb7c6fdSAlex Wilson 
2747ebb7c6fdSAlex Wilson static int
mlxcx_attach(dev_info_t * dip,ddi_attach_cmd_t cmd)2748ebb7c6fdSAlex Wilson mlxcx_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
2749ebb7c6fdSAlex Wilson {
2750ebb7c6fdSAlex Wilson 	mlxcx_t *mlxp;
27515f0e3176SPaul Winder 	char tq_name[TASKQ_NAMELEN];
2752ebb7c6fdSAlex Wilson 	uint_t i;
2753ebb7c6fdSAlex Wilson 	int inst, ret;
2754ebb7c6fdSAlex Wilson 
2755ebb7c6fdSAlex Wilson 	if (cmd != DDI_ATTACH)
2756ebb7c6fdSAlex Wilson 		return (DDI_FAILURE);
2757ebb7c6fdSAlex Wilson 
2758ebb7c6fdSAlex Wilson 	inst = ddi_get_instance(dip);
2759ebb7c6fdSAlex Wilson 	ret = ddi_soft_state_zalloc(mlxcx_softstate, inst);
2760ebb7c6fdSAlex Wilson 	if (ret != 0)
2761ebb7c6fdSAlex Wilson 		return (ret);
2762ebb7c6fdSAlex Wilson 
2763ebb7c6fdSAlex Wilson 	mlxp = ddi_get_soft_state(mlxcx_softstate, inst);
2764ebb7c6fdSAlex Wilson 	if (mlxp == NULL)
2765ebb7c6fdSAlex Wilson 		return (DDI_FAILURE);
2766ebb7c6fdSAlex Wilson 	mlxp->mlx_dip = dip;
2767ebb7c6fdSAlex Wilson 	mlxp->mlx_inst = inst;
2768ebb7c6fdSAlex Wilson 	ddi_set_driver_private(dip, mlxp);
2769ebb7c6fdSAlex Wilson 
2770ebb7c6fdSAlex Wilson 	mlxcx_load_props(mlxp);
2771ebb7c6fdSAlex Wilson 
2772ebb7c6fdSAlex Wilson 	mlxcx_fm_init(mlxp);
2773ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_FM;
2774ebb7c6fdSAlex Wilson 
2775ebb7c6fdSAlex Wilson 	if (pci_config_setup(mlxp->mlx_dip, &mlxp->mlx_cfg_handle) !=
2776ebb7c6fdSAlex Wilson 	    DDI_SUCCESS) {
2777ebb7c6fdSAlex Wilson 		mlxcx_warn(mlxp, "failed to initial PCI config space");
2778ebb7c6fdSAlex Wilson 		goto err;
2779ebb7c6fdSAlex Wilson 	}
278083b3f06fSJason King 	mlxcx_get_model(mlxp);
2781ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_PCI_CONFIG;
2782ebb7c6fdSAlex Wilson 
2783ebb7c6fdSAlex Wilson 	if (!mlxcx_regs_map(mlxp)) {
2784ebb7c6fdSAlex Wilson 		goto err;
2785ebb7c6fdSAlex Wilson 	}
2786ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_REGS;
2787ebb7c6fdSAlex Wilson 
2788ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_queue_init(mlxp)) {
2789ebb7c6fdSAlex Wilson 		goto err;
2790ebb7c6fdSAlex Wilson 	}
2791ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_CMD;
2792ebb7c6fdSAlex Wilson 
2793ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_enable_hca(mlxp)) {
2794ebb7c6fdSAlex Wilson 		goto err;
2795ebb7c6fdSAlex Wilson 	}
2796ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_ENABLE_HCA;
2797ebb7c6fdSAlex Wilson 
2798ebb7c6fdSAlex Wilson 	if (!mlxcx_check_issi(mlxp)) {
2799ebb7c6fdSAlex Wilson 		goto err;
2800ebb7c6fdSAlex Wilson 	}
2801ebb7c6fdSAlex Wilson 
2802ebb7c6fdSAlex Wilson 	/*
2803ebb7c6fdSAlex Wilson 	 * We have to get our interrupts now so we know what priority to
2804ebb7c6fdSAlex Wilson 	 * create pagemtx with.
2805ebb7c6fdSAlex Wilson 	 */
2806ebb7c6fdSAlex Wilson 	if (!mlxcx_intr_setup(mlxp)) {
2807ebb7c6fdSAlex Wilson 		goto err;
2808ebb7c6fdSAlex Wilson 	}
2809ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_INTRS;
2810ebb7c6fdSAlex Wilson 
2811ebb7c6fdSAlex Wilson 	mutex_init(&mlxp->mlx_pagemtx, NULL, MUTEX_DRIVER,
2812ebb7c6fdSAlex Wilson 	    DDI_INTR_PRI(mlxp->mlx_intr_pri));
2813ebb7c6fdSAlex Wilson 	avl_create(&mlxp->mlx_pages, mlxcx_page_compare,
2814ebb7c6fdSAlex Wilson 	    sizeof (mlxcx_dev_page_t), offsetof(mlxcx_dev_page_t, mxdp_tree));
2815ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_PAGE_LIST;
2816ebb7c6fdSAlex Wilson 
28175f0e3176SPaul Winder 	/*
28185f0e3176SPaul Winder 	 * Taskq for asynchronous events which may interact with the HCA
28195f0e3176SPaul Winder 	 * via the command interface. Single threaded FIFO.
28205f0e3176SPaul Winder 	 */
28215f0e3176SPaul Winder 	(void) snprintf(tq_name, sizeof (tq_name), "%s_async_%d",
28225f0e3176SPaul Winder 	    ddi_driver_name(mlxp->mlx_dip), mlxp->mlx_inst);
28235f0e3176SPaul Winder 	mlxp->mlx_async_tq = taskq_create(tq_name, 1, minclsyspri, 1, INT_MAX,
28245f0e3176SPaul Winder 	    TASKQ_PREPOPULATE);
28255f0e3176SPaul Winder 	/*
28265f0e3176SPaul Winder 	 * Initialize any pre-allocated taskq param structs.
28275f0e3176SPaul Winder 	 */
28285f0e3176SPaul Winder 	for (i = 0; i <= MLXCX_FUNC_ID_MAX; i++) {
28295f0e3176SPaul Winder 		mlxp->mlx_npages_req[i].mla_mlx = mlxp;
28305f0e3176SPaul Winder 		mutex_init(&mlxp->mlx_npages_req[i].mla_mtx, NULL,
2831e1447ca9SPaul Winder 		    MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_async_intr_pri));
28325f0e3176SPaul Winder 	}
28335f0e3176SPaul Winder 	mlxp->mlx_attach |= MLXCX_ATTACH_ASYNC_TQ;
28345f0e3176SPaul Winder 
2835ebb7c6fdSAlex Wilson 	if (!mlxcx_init_pages(mlxp, MLXCX_QUERY_PAGES_OPMOD_BOOT)) {
2836ebb7c6fdSAlex Wilson 		goto err;
2837ebb7c6fdSAlex Wilson 	}
2838ebb7c6fdSAlex Wilson 
2839ebb7c6fdSAlex Wilson 	if (!mlxcx_init_caps(mlxp)) {
2840ebb7c6fdSAlex Wilson 		goto err;
2841ebb7c6fdSAlex Wilson 	}
2842ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_CAPS;
2843ebb7c6fdSAlex Wilson 
2844ebb7c6fdSAlex Wilson 	if (!mlxcx_init_pages(mlxp, MLXCX_QUERY_PAGES_OPMOD_INIT)) {
2845ebb7c6fdSAlex Wilson 		goto err;
2846ebb7c6fdSAlex Wilson 	}
2847ebb7c6fdSAlex Wilson 
2848ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_init_hca(mlxp)) {
2849ebb7c6fdSAlex Wilson 		goto err;
2850ebb7c6fdSAlex Wilson 	}
2851ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_INIT_HCA;
2852ebb7c6fdSAlex Wilson 
2853ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_set_driver_version(mlxp, MLXCX_DRIVER_VERSION)) {
2854ebb7c6fdSAlex Wilson 		goto err;
2855ebb7c6fdSAlex Wilson 	}
2856ebb7c6fdSAlex Wilson 
285783b3f06fSJason King 	if (mlxp->mlx_caps->mlc_pcam) {
285883b3f06fSJason King 		mlxcx_explore_pcam(mlxp, mlxp->mlx_caps);
285983b3f06fSJason King 	}
286083b3f06fSJason King 
2861ebb7c6fdSAlex Wilson 	/*
2862ebb7c6fdSAlex Wilson 	 * The User Access Region (UAR) is needed so we can ring EQ and CQ
2863ebb7c6fdSAlex Wilson 	 * doorbells.
2864ebb7c6fdSAlex Wilson 	 */
2865ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_alloc_uar(mlxp, &mlxp->mlx_uar)) {
2866ebb7c6fdSAlex Wilson 		goto err;
2867ebb7c6fdSAlex Wilson 	}
2868ebb7c6fdSAlex Wilson 	for (i = 0; i < MLXCX_BF_PER_UAR; ++i) {
2869ebb7c6fdSAlex Wilson 		mutex_init(&mlxp->mlx_uar.mlu_bf[i].mbf_mtx, NULL,
2870ebb7c6fdSAlex Wilson 		    MUTEX_DRIVER, DDI_INTR_PRI(mlxp->mlx_intr_pri));
2871ebb7c6fdSAlex Wilson 	}
2872ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_UAR_PD_TD;
2873ebb7c6fdSAlex Wilson 
2874ebb7c6fdSAlex Wilson 	/*
28755f0e3176SPaul Winder 	 * Set up asynchronous event queue which handles control type events
28765f0e3176SPaul Winder 	 * like PAGE_REQUEST and CMD completion events.
2877ebb7c6fdSAlex Wilson 	 *
287880d1a7bdSAlex Wilson 	 * This will enable and arm the interrupt on EQ 0. Note that only page
287980d1a7bdSAlex Wilson 	 * reqs and cmd completions will be handled until we call
288080d1a7bdSAlex Wilson 	 * mlxcx_eq_set_attached further down (this way we don't need an extra
288180d1a7bdSAlex Wilson 	 * set of locks over the mlxcx_t sub-structs not allocated yet)
2882ebb7c6fdSAlex Wilson 	 */
28835f0e3176SPaul Winder 	if (!mlxcx_setup_async_eqs(mlxp)) {
2884ebb7c6fdSAlex Wilson 		goto err;
2885ebb7c6fdSAlex Wilson 	}
2886ebb7c6fdSAlex Wilson 
2887ebb7c6fdSAlex Wilson 	/*
2888ebb7c6fdSAlex Wilson 	 * Allocate a protection and transport domain. These don't really do
2889ebb7c6fdSAlex Wilson 	 * anything for us (they're IB concepts), but we need to give their
2890ebb7c6fdSAlex Wilson 	 * ID numbers in other commands.
2891ebb7c6fdSAlex Wilson 	 */
2892ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_alloc_pd(mlxp, &mlxp->mlx_pd)) {
2893ebb7c6fdSAlex Wilson 		goto err;
2894ebb7c6fdSAlex Wilson 	}
2895ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_alloc_tdom(mlxp, &mlxp->mlx_tdom)) {
2896ebb7c6fdSAlex Wilson 		goto err;
2897ebb7c6fdSAlex Wilson 	}
2898ebb7c6fdSAlex Wilson 	/*
2899ebb7c6fdSAlex Wilson 	 * Fetch the "reserved" lkey that lets us give linear addresses in
2900ebb7c6fdSAlex Wilson 	 * work queue entries, rather than having to mess with the NIC's
2901ebb7c6fdSAlex Wilson 	 * internal MMU.
2902ebb7c6fdSAlex Wilson 	 */
2903ebb7c6fdSAlex Wilson 	if (!mlxcx_cmd_query_special_ctxs(mlxp)) {
2904ebb7c6fdSAlex Wilson 		goto err;
2905ebb7c6fdSAlex Wilson 	}
2906ebb7c6fdSAlex Wilson 
2907ebb7c6fdSAlex Wilson 	/*
2908ebb7c6fdSAlex Wilson 	 * Query our port information and current state, populate the
2909ebb7c6fdSAlex Wilson 	 * mlxcx_port_t structs.
2910ebb7c6fdSAlex Wilson 	 *
2911ebb7c6fdSAlex Wilson 	 * This also sets up the root flow tables and flow groups.
2912ebb7c6fdSAlex Wilson 	 */
2913ebb7c6fdSAlex Wilson 	if (!mlxcx_setup_ports(mlxp)) {
2914ebb7c6fdSAlex Wilson 		goto err;
2915ebb7c6fdSAlex Wilson 	}
2916ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_PORTS;
2917ebb7c6fdSAlex Wilson 
291822d05228SPaul Winder 	mlxcx_load_model_props(mlxp);
291922d05228SPaul Winder 
2920ebb7c6fdSAlex Wilson 	/*
2921ebb7c6fdSAlex Wilson 	 * Set up, enable and arm the rest of the interrupt EQs which will
2922ebb7c6fdSAlex Wilson 	 * service events from CQs.
2923ebb7c6fdSAlex Wilson 	 *
2924ebb7c6fdSAlex Wilson 	 * The MLXCX_ATTACH_INTRS flag covers checking if these need to be
2925ebb7c6fdSAlex Wilson 	 * cleaned up.
2926ebb7c6fdSAlex Wilson 	 */
2927ebb7c6fdSAlex Wilson 	if (!mlxcx_setup_eqs(mlxp)) {
2928ebb7c6fdSAlex Wilson 		goto err;
2929ebb7c6fdSAlex Wilson 	}
2930ebb7c6fdSAlex Wilson 
2931ebb7c6fdSAlex Wilson 	/* Completion queues */
2932ebb7c6fdSAlex Wilson 	list_create(&mlxp->mlx_cqs, sizeof (mlxcx_completion_queue_t),
2933ebb7c6fdSAlex Wilson 	    offsetof(mlxcx_completion_queue_t, mlcq_entry));
2934ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_CQS;
2935ebb7c6fdSAlex Wilson 
2936ebb7c6fdSAlex Wilson 	/* Work queues (send queues, receive queues) */
2937ebb7c6fdSAlex Wilson 	list_create(&mlxp->mlx_wqs, sizeof (mlxcx_work_queue_t),
2938ebb7c6fdSAlex Wilson 	    offsetof(mlxcx_work_queue_t, mlwq_entry));
2939ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_WQS;
2940ebb7c6fdSAlex Wilson 
2941ebb7c6fdSAlex Wilson 	/*
2942ebb7c6fdSAlex Wilson 	 * Construct our arrays of mlxcx_ring_group_ts, which represent the
2943ebb7c6fdSAlex Wilson 	 * "groups" we advertise to MAC.
2944ebb7c6fdSAlex Wilson 	 */
2945ebb7c6fdSAlex Wilson 	mlxp->mlx_rx_ngroups = mlxcx_calc_rx_ngroups(mlxp);
2946ebb7c6fdSAlex Wilson 	mlxp->mlx_rx_groups_size = mlxp->mlx_rx_ngroups *
2947ebb7c6fdSAlex Wilson 	    sizeof (mlxcx_ring_group_t);
2948ebb7c6fdSAlex Wilson 	mlxp->mlx_rx_groups = kmem_zalloc(mlxp->mlx_rx_groups_size, KM_SLEEP);
2949ebb7c6fdSAlex Wilson 
2950ebb7c6fdSAlex Wilson 	mlxp->mlx_tx_ngroups = mlxp->mlx_props.mldp_tx_ngroups;
2951ebb7c6fdSAlex Wilson 	mlxp->mlx_tx_groups_size = mlxp->mlx_tx_ngroups *
2952ebb7c6fdSAlex Wilson 	    sizeof (mlxcx_ring_group_t);
2953ebb7c6fdSAlex Wilson 	mlxp->mlx_tx_groups = kmem_zalloc(mlxp->mlx_tx_groups_size, KM_SLEEP);
2954ebb7c6fdSAlex Wilson 
2955ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_GROUPS;
2956ebb7c6fdSAlex Wilson 
2957ebb7c6fdSAlex Wilson 	/*
2958ebb7c6fdSAlex Wilson 	 * Sets up the free/busy buffers list for keeping track of packet
2959ebb7c6fdSAlex Wilson 	 * buffers.
2960ebb7c6fdSAlex Wilson 	 */
2961ebb7c6fdSAlex Wilson 	if (!mlxcx_setup_bufs(mlxp))
2962ebb7c6fdSAlex Wilson 		goto err;
2963ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_BUFS;
2964ebb7c6fdSAlex Wilson 
2965ebb7c6fdSAlex Wilson 	/*
2966ebb7c6fdSAlex Wilson 	 * Before we tell MAC about our rings/groups, we need to do enough
2967ebb7c6fdSAlex Wilson 	 * setup on them to be sure about the numbers and configuration that
2968ebb7c6fdSAlex Wilson 	 * we have. This will do basically everything short of allocating
2969ebb7c6fdSAlex Wilson 	 * packet buffers and starting the rings up.
2970ebb7c6fdSAlex Wilson 	 */
2971ebb7c6fdSAlex Wilson 	for (i = 0; i < mlxp->mlx_tx_ngroups; ++i) {
2972ebb7c6fdSAlex Wilson 		if (!mlxcx_tx_group_setup(mlxp, &mlxp->mlx_tx_groups[i]))
2973ebb7c6fdSAlex Wilson 			goto err;
2974ebb7c6fdSAlex Wilson 	}
2975ebb7c6fdSAlex Wilson 	for (i = 0; i < mlxp->mlx_rx_ngroups; ++i) {
2976ebb7c6fdSAlex Wilson 		if (!mlxcx_rx_group_setup(mlxp, &mlxp->mlx_rx_groups[i]))
2977ebb7c6fdSAlex Wilson 			goto err;
2978ebb7c6fdSAlex Wilson 	}
2979ebb7c6fdSAlex Wilson 
29805f0e3176SPaul Winder 	/*
29815f0e3176SPaul Winder 	 * Set up periodic fault check timers which check the queue states,
29825f0e3176SPaul Winder 	 * set up should be after all the queues have been initialized and
29835f0e3176SPaul Winder 	 * consequently the teardown of timers must happen before
29845f0e3176SPaul Winder 	 * queue teardown.
29855f0e3176SPaul Winder 	 */
29865f0e3176SPaul Winder 	if (!mlxcx_setup_checktimers(mlxp)) {
29875f0e3176SPaul Winder 		goto err;
29885f0e3176SPaul Winder 	}
29895f0e3176SPaul Winder 	mlxp->mlx_attach |= MLXCX_ATTACH_CHKTIMERS;
29905f0e3176SPaul Winder 
299115174c59SRobert Mustacchi 	/*
299215174c59SRobert Mustacchi 	 * Some devices may not have a working temperature sensor; however,
299315174c59SRobert Mustacchi 	 * there isn't a great way for us to know. We shouldn't fail attach if
299415174c59SRobert Mustacchi 	 * this doesn't work.
299515174c59SRobert Mustacchi 	 */
299615174c59SRobert Mustacchi 	if (mlxcx_setup_sensors(mlxp)) {
299715174c59SRobert Mustacchi 		mlxp->mlx_attach |= MLXCX_ATTACH_SENSORS;
29981718c316SRobert Mustacchi 	}
29991718c316SRobert Mustacchi 
3000ebb7c6fdSAlex Wilson 	/*
3001ebb7c6fdSAlex Wilson 	 * Finally, tell MAC that we exist!
3002ebb7c6fdSAlex Wilson 	 */
3003ebb7c6fdSAlex Wilson 	if (!mlxcx_register_mac(mlxp)) {
3004ebb7c6fdSAlex Wilson 		goto err;
3005ebb7c6fdSAlex Wilson 	}
3006ebb7c6fdSAlex Wilson 	mlxp->mlx_attach |= MLXCX_ATTACH_MAC_HDL;
3007ebb7c6fdSAlex Wilson 
300880d1a7bdSAlex Wilson 	/*
300980d1a7bdSAlex Wilson 	 * This tells the interrupt handlers they can start processing events
301080d1a7bdSAlex Wilson 	 * other than cmd completions and page requests.
301180d1a7bdSAlex Wilson 	 */
301280d1a7bdSAlex Wilson 	mlxcx_eq_set_attached(mlxp);
301380d1a7bdSAlex Wilson 
3014ebb7c6fdSAlex Wilson 	return (DDI_SUCCESS);
3015ebb7c6fdSAlex Wilson 
3016ebb7c6fdSAlex Wilson err:
3017ebb7c6fdSAlex Wilson 	mlxcx_teardown(mlxp);
3018ebb7c6fdSAlex Wilson 	return (DDI_FAILURE);
3019ebb7c6fdSAlex Wilson }
3020ebb7c6fdSAlex Wilson 
3021ebb7c6fdSAlex Wilson static struct cb_ops mlxcx_cb_ops = {
3022ebb7c6fdSAlex Wilson 	.cb_open = nulldev,
3023ebb7c6fdSAlex Wilson 	.cb_close = nulldev,
3024ebb7c6fdSAlex Wilson 	.cb_strategy = nodev,
3025ebb7c6fdSAlex Wilson 	.cb_print = nodev,
3026ebb7c6fdSAlex Wilson 	.cb_dump = nodev,
3027ebb7c6fdSAlex Wilson 	.cb_read = nodev,
3028ebb7c6fdSAlex Wilson 	.cb_write = nodev,
3029ebb7c6fdSAlex Wilson 	.cb_ioctl = nodev,
3030ebb7c6fdSAlex Wilson 	.cb_devmap = nodev,
3031ebb7c6fdSAlex Wilson 	.cb_mmap = nodev,
3032ebb7c6fdSAlex Wilson 	.cb_segmap = nodev,
3033ebb7c6fdSAlex Wilson 	.cb_chpoll = nochpoll,
3034ebb7c6fdSAlex Wilson 	.cb_prop_op = ddi_prop_op,
3035ebb7c6fdSAlex Wilson 	.cb_flag = D_MP,
3036ebb7c6fdSAlex Wilson 	.cb_rev = CB_REV,
3037ebb7c6fdSAlex Wilson 	.cb_aread = nodev,
3038ebb7c6fdSAlex Wilson 	.cb_awrite = nodev
3039ebb7c6fdSAlex Wilson };
3040ebb7c6fdSAlex Wilson 
3041ebb7c6fdSAlex Wilson static struct dev_ops mlxcx_dev_ops = {
3042ebb7c6fdSAlex Wilson 	.devo_rev = DEVO_REV,
3043ebb7c6fdSAlex Wilson 	.devo_refcnt = 0,
3044ebb7c6fdSAlex Wilson 	.devo_getinfo = NULL,
3045ebb7c6fdSAlex Wilson 	.devo_identify = nulldev,
3046ebb7c6fdSAlex Wilson 	.devo_probe = nulldev,
3047ebb7c6fdSAlex Wilson 	.devo_attach = mlxcx_attach,
3048ebb7c6fdSAlex Wilson 	.devo_detach = mlxcx_detach,
3049ebb7c6fdSAlex Wilson 	.devo_reset = nodev,
3050ebb7c6fdSAlex Wilson 	.devo_quiesce = ddi_quiesce_not_supported,
3051ebb7c6fdSAlex Wilson 	.devo_cb_ops = &mlxcx_cb_ops
3052ebb7c6fdSAlex Wilson };
3053ebb7c6fdSAlex Wilson 
3054ebb7c6fdSAlex Wilson static struct modldrv mlxcx_modldrv = {
3055ebb7c6fdSAlex Wilson 	.drv_modops = &mod_driverops,
3056ebb7c6fdSAlex Wilson 	.drv_linkinfo = "Mellanox Connect-X 4/5/6",
3057ebb7c6fdSAlex Wilson 	.drv_dev_ops = &mlxcx_dev_ops
3058ebb7c6fdSAlex Wilson };
3059ebb7c6fdSAlex Wilson 
3060ebb7c6fdSAlex Wilson static struct modlinkage mlxcx_modlinkage = {
3061ebb7c6fdSAlex Wilson 	.ml_rev = MODREV_1,
3062ebb7c6fdSAlex Wilson 	.ml_linkage = { &mlxcx_modldrv, NULL }
3063ebb7c6fdSAlex Wilson };
3064ebb7c6fdSAlex Wilson 
3065ebb7c6fdSAlex Wilson int
_init(void)3066ebb7c6fdSAlex Wilson _init(void)
3067ebb7c6fdSAlex Wilson {
3068ebb7c6fdSAlex Wilson 	int ret;
3069ebb7c6fdSAlex Wilson 
3070ebb7c6fdSAlex Wilson 	ret = ddi_soft_state_init(&mlxcx_softstate, sizeof (mlxcx_t), 0);
3071ebb7c6fdSAlex Wilson 	if (ret != 0) {
3072ebb7c6fdSAlex Wilson 		return (ret);
3073ebb7c6fdSAlex Wilson 	}
3074ebb7c6fdSAlex Wilson 
3075ebb7c6fdSAlex Wilson 	mac_init_ops(&mlxcx_dev_ops, MLXCX_MODULE_NAME);
3076ebb7c6fdSAlex Wilson 
3077ebb7c6fdSAlex Wilson 	if ((ret = mod_install(&mlxcx_modlinkage)) != DDI_SUCCESS) {
3078ebb7c6fdSAlex Wilson 		mac_fini_ops(&mlxcx_dev_ops);
3079ebb7c6fdSAlex Wilson 		ddi_soft_state_fini(&mlxcx_softstate);
3080ebb7c6fdSAlex Wilson 		return (ret);
3081ebb7c6fdSAlex Wilson 	}
3082ebb7c6fdSAlex Wilson 
3083ebb7c6fdSAlex Wilson 	return (DDI_SUCCESS);
3084ebb7c6fdSAlex Wilson }
3085ebb7c6fdSAlex Wilson 
3086ebb7c6fdSAlex Wilson int
_info(struct modinfo * modinfop)3087ebb7c6fdSAlex Wilson _info(struct modinfo *modinfop)
3088ebb7c6fdSAlex Wilson {
3089ebb7c6fdSAlex Wilson 	return (mod_info(&mlxcx_modlinkage, modinfop));
3090ebb7c6fdSAlex Wilson }
3091ebb7c6fdSAlex Wilson 
3092ebb7c6fdSAlex Wilson int
_fini(void)3093ebb7c6fdSAlex Wilson _fini(void)
3094ebb7c6fdSAlex Wilson {
3095ebb7c6fdSAlex Wilson 	int ret;
3096ebb7c6fdSAlex Wilson 
3097ebb7c6fdSAlex Wilson 	if ((ret = mod_remove(&mlxcx_modlinkage)) != DDI_SUCCESS) {
3098ebb7c6fdSAlex Wilson 		return (ret);
3099ebb7c6fdSAlex Wilson 	}
3100ebb7c6fdSAlex Wilson 
3101ebb7c6fdSAlex Wilson 	mac_fini_ops(&mlxcx_dev_ops);
3102ebb7c6fdSAlex Wilson 
3103ebb7c6fdSAlex Wilson 	ddi_soft_state_fini(&mlxcx_softstate);
3104ebb7c6fdSAlex Wilson 
3105ebb7c6fdSAlex Wilson 	return (DDI_SUCCESS);
3106ebb7c6fdSAlex Wilson }
3107