1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * hermon_cfg.c
29  *    Hermon Configuration Profile Routines
30  *
31  *    Implements the routines necessary for initializing and (later) tearing
32  *    down the list of Hermon configuration information.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
40 #include <sys/bitmap.h>
41 
42 #include <sys/ib/adapters/hermon/hermon.h>
43 
44 /*
45  * Below are the elements that make up the Hermon configuration profile.
46  * For advanced users who wish to alter these values, this can be done via
47  * the /etc/system file. By default, values are assigned to the number of
48  * supported resources, either from the HCA's reported capacities or by
49  * a by-design limit in the driver.
50  */
51 
52 /* Number of supported QPs, CQs and SRQs */
53 uint32_t hermon_log_num_qp		= HERMON_NUM_QP_SHIFT;
54 uint32_t hermon_log_num_cq		= HERMON_NUM_CQ_SHIFT;
55 uint32_t hermon_log_num_srq		= HERMON_NUM_SRQ_SHIFT;
56 
57 /* Number of supported SGL per WQE for SQ/RQ, and for SRQ */
58 /* XXX use the same for all queues if limitation in srq.h is resolved */
59 uint32_t hermon_wqe_max_sgl		= HERMON_NUM_SGL_PER_WQE;
60 uint32_t hermon_srq_max_sgl		= HERMON_SRQ_MAX_SGL;
61 
62 /* Maximum "responder resources" (in) and "initiator depth" (out) per QP */
63 uint32_t hermon_log_num_rdb_per_qp	= HERMON_LOG_NUM_RDB_PER_QP;
64 
65 /*
66  * Number of multicast groups (MCGs), number of QP per MCG, and the number
67  * of entries (from the total number) in the multicast group "hash table"
68  */
69 uint32_t hermon_log_num_mcg		= HERMON_NUM_MCG_SHIFT;
70 uint32_t hermon_num_qp_per_mcg		= HERMON_NUM_QP_PER_MCG;
71 uint32_t hermon_log_num_mcg_hash		= HERMON_NUM_MCG_HASH_SHIFT;
72 
73 /* Number of UD AVs */
74 uint32_t hermon_log_num_ah		= HERMON_NUM_AH_SHIFT;
75 
76 /* Number of EQs and their default size */
77 uint32_t hermon_log_num_eq		= HERMON_NUM_EQ_SHIFT;
78 uint32_t hermon_log_eq_sz		= HERMON_DEFAULT_EQ_SZ_SHIFT;
79 
80 /*
81  * Number of supported MPTs, MTTs and also the maximum MPT size.
82  */
83 uint32_t hermon_log_num_mtt		= HERMON_NUM_MTT_SHIFT;
84 uint32_t hermon_log_num_dmpt		= HERMON_NUM_DMPT_SHIFT;
85 uint32_t hermon_log_max_mrw_sz		= HERMON_MAX_MEM_MPT_SHIFT;
86 
87 /*
88  * Number of supported UAR (User Access Regions) for this HCA.
89  * We could in the future read in uar_sz from devlim, and thus
90  * derive the number of UAR. Since this is derived from PAGESIZE,
91  * however, this means that x86 systems would have twice as many
92  * UARs as SPARC systems. Therefore for consistency's sake, we will
93  * just use 1024 pages, which is the maximum on SPARC systems.
94  */
95 uint32_t hermon_log_num_uar		= HERMON_NUM_UAR_SHIFT;
96 
97 /*
98  * Number of remaps allowed for FMR before a sync is required.  This value
99  * determines how many times we can fmr_deregister() before the underlying fmr
100  * framework places the region to wait for an MTT_SYNC operation, cleaning up
101  * the old mappings.
102  */
103 uint32_t hermon_fmr_num_remaps		= HERMON_FMR_MAX_REMAPS;
104 
105 /*
106  * Number of supported Hermon mailboxes ("In" and "Out") and their maximum
107  * sizes, respectively
108  */
109 uint32_t hermon_log_num_inmbox		= HERMON_NUM_MAILBOXES_SHIFT;
110 uint32_t hermon_log_num_outmbox		= HERMON_NUM_MAILBOXES_SHIFT;
111 uint32_t hermon_log_inmbox_size		= HERMON_MBOX_SIZE_SHIFT;
112 uint32_t hermon_log_outmbox_size	= HERMON_MBOX_SIZE_SHIFT;
113 uint32_t hermon_log_num_intr_inmbox	= HERMON_NUM_INTR_MAILBOXES_SHIFT;
114 uint32_t hermon_log_num_intr_outmbox	= HERMON_NUM_INTR_MAILBOXES_SHIFT;
115 
116 /* Number of supported Protection Domains (PD) */
117 uint32_t hermon_log_num_pd		= HERMON_NUM_PD_SHIFT;
118 
119 /*
120  * Number of total supported PKeys per PKey table (i.e.
121  * per port).  Also the number of SGID per GID table.
122  */
123 uint32_t hermon_log_max_pkeytbl		= HERMON_NUM_PKEYTBL_SHIFT;
124 uint32_t hermon_log_max_gidtbl		= HERMON_NUM_GIDTBL_SHIFT;
125 
126 /* Maximum supported MTU and portwidth */
127 uint32_t hermon_max_mtu			= HERMON_MAX_MTU;
128 uint32_t hermon_max_port_width		= HERMON_MAX_PORT_WIDTH;
129 
130 /* Number of supported Virtual Lanes (VL) */
131 uint32_t hermon_max_vlcap		= HERMON_MAX_VLCAP;
132 
133 /*
134  * Whether or not to use the built-in (i.e. in firmware) agents for QP0 and
135  * QP1, respectively.
136  */
137 uint32_t hermon_qp0_agents_in_fw	= 0;
138 uint32_t hermon_qp1_agents_in_fw	= 0;
139 
140 /*
141  * Whether DMA mappings should bypass the PCI IOMMU or not.
142  * hermon_iommu_bypass is a global setting for all memory addresses.
143  */
144 uint32_t hermon_iommu_bypass		= 1;
145 
146 /*
147  * Whether *DATA* buffers should be bound w/ Relaxed Ordering (RO) turned on
148  * via the SW workaround (HCAs don't support RO in HW).  Defaulted on,
149  * though care must be taken w/ some Userland clients that *MAY* have
150  * peeked in the data to understand when data xfer was done - MPI does
151  * as an efficiency
152  */
153 
154 uint32_t hermon_kernel_data_ro		= HERMON_RO_ENABLED;	/* default */
155 uint32_t hermon_user_data_ro		= HERMON_RO_ENABLED;	/* default */
156 
157 /*
158  * Whether Hermon should use MSI (Message Signaled Interrupts), if available.
159  * Note: 0 indicates 'legacy interrupt', 1 indicates MSI (if available)
160  */
161 uint32_t hermon_use_msi_if_avail	= 1;
162 
163 /*
164  * This is a patchable variable that determines the time we will wait after
165  * initiating SW reset before we do our first read from Hermon config space.
166  * If this value is set too small (less than the default 100ms), it is
167  * possible for Hermon hardware to be unready to respond to the config cycle
168  * reads.  This could cause master abort on the PCI bridge.  Note: If
169  * "hermon_sw_reset_delay" is set to zero, then no software reset of the Hermon
170  * device will be attempted.
171  */
172 uint32_t hermon_sw_reset_delay		= HERMON_SW_RESET_DELAY;
173 
174 /*
175  * These are patchable variables for hermon command polling. The poll_delay is
176  * the number of usec to wait in-between calls to poll the 'go' bit.  The
177  * poll_max is the total number of usec to loop in waiting for the 'go' bit to
178  * clear.
179  */
180 uint32_t hermon_cmd_poll_delay		= HERMON_CMD_POLL_DELAY;
181 uint32_t hermon_cmd_poll_max		= HERMON_CMD_POLL_MAX;
182 
183 /*
184  * This is a patchable variable that determines the frequency with which
185  * the AckReq bit will be set in outgoing RC packets.  The AckReq bit will be
186  * set in at least every 2^hermon_qp_ackreq_freq packets (but at least once
187  * per message, i.e. in the last packet).  Tuning this value can increase
188  * IB fabric utilization by cutting down on the number of unnecessary ACKs.
189  */
190 uint32_t hermon_qp_ackreq_freq		= HERMON_QP_ACKREQ_FREQ;
191 
192 static void hermon_cfg_wqe_sizes(hermon_state_t *state,
193     hermon_cfg_profile_t *cp);
194 #ifdef __sparc
195 static void hermon_check_iommu_bypass(hermon_state_t *state,
196     hermon_cfg_profile_t *cp);
197 #endif
198 
199 /*
200  * hermon_cfg_profile_init_phase1()
201  *    Context: Only called from attach() path context
202  */
203 int
204 hermon_cfg_profile_init_phase1(hermon_state_t *state)
205 {
206 	hermon_cfg_profile_t	*cp;
207 
208 	/*
209 	 * Allocate space for the configuration profile structure
210 	 */
211 	cp = (hermon_cfg_profile_t *)kmem_zalloc(sizeof (hermon_cfg_profile_t),
212 	    KM_SLEEP);
213 
214 	/*
215 	 * Common to all profiles.
216 	 */
217 	cp->cp_qp0_agents_in_fw		= hermon_qp0_agents_in_fw;
218 	cp->cp_qp1_agents_in_fw		= hermon_qp1_agents_in_fw;
219 	cp->cp_sw_reset_delay		= hermon_sw_reset_delay;
220 	cp->cp_cmd_poll_delay		= hermon_cmd_poll_delay;
221 	cp->cp_cmd_poll_max		= hermon_cmd_poll_max;
222 	cp->cp_ackreq_freq		= hermon_qp_ackreq_freq;
223 	cp->cp_fmr_max_remaps		= hermon_fmr_num_remaps;
224 
225 	/*
226 	 * Although most of the configuration is enabled in "phase2" of the
227 	 * cfg_profile_init, we have to setup the OUT mailboxes soon, since
228 	 * they are used immediately after this "phase1" completes, to run the
229 	 * firmware and get the device limits, which we'll need for 'phase2'.
230 	 * That's done in rsrc_init_phase1, called shortly after we do this
231 	 * and the sw reset - see hermon.c
232 	 */
233 	if (state->hs_cfg_profile_setting == HERMON_CFG_MEMFREE) {
234 		cp->cp_log_num_outmbox		= hermon_log_num_outmbox;
235 		cp->cp_log_outmbox_size		= hermon_log_outmbox_size;
236 		cp->cp_log_num_inmbox		= hermon_log_num_inmbox;
237 		cp->cp_log_inmbox_size		= hermon_log_inmbox_size;
238 		cp->cp_log_num_intr_inmbox	= hermon_log_num_intr_inmbox;
239 		cp->cp_log_num_intr_outmbox	= hermon_log_num_intr_outmbox;
240 
241 	} else {
242 		return (DDI_FAILURE);
243 	}
244 
245 	/* Attach the configuration profile to Hermon softstate */
246 	state->hs_cfg_profile = cp;
247 
248 	return (DDI_SUCCESS);
249 }
250 
251 /*
252  * hermon_cfg_profile_init_phase2()
253  *    Context: Only called from attach() path context
254  */
255 int
256 hermon_cfg_profile_init_phase2(hermon_state_t *state)
257 {
258 	hermon_cfg_profile_t	*cp;
259 	hermon_hw_querydevlim_t	*devlim;
260 	hermon_hw_query_port_t	*port;
261 	uint32_t		num, size;
262 	int			i;
263 
264 	/* Read in the device limits */
265 	devlim = &state->hs_devlim;
266 	/* and the port information */
267 	port = &state->hs_queryport;
268 
269 	/* Read the configuration profile */
270 	cp = state->hs_cfg_profile;
271 
272 	/*
273 	 * We configure all Hermon HCAs with the same profile, which
274 	 * is based upon the default value assignments above. If we want to
275 	 * add additional profiles in the future, they can be added here.
276 	 * Note the reference to "Memfree" is a holdover from Arbel/Sinai
277 	 */
278 	if (state->hs_cfg_profile_setting != HERMON_CFG_MEMFREE) {
279 		return (DDI_FAILURE);
280 	}
281 
282 	/*
283 	 * Note for most configuration parameters, we use the lesser of our
284 	 * desired configuration value or the device-defined maximum value.
285 	 */
286 	cp->cp_log_num_mtt	= min(hermon_log_num_mtt, devlim->log_max_mtt);
287 	cp->cp_log_num_dmpt = min(hermon_log_num_dmpt, devlim->log_max_dmpt);
288 	cp->cp_log_num_cmpt	= HERMON_LOG_CMPT_PER_TYPE + 2;	/* times 4, */
289 								/* per PRM */
290 	cp->cp_log_max_mrw_sz	= min(hermon_log_max_mrw_sz,
291 	    devlim->log_max_mrw_sz);
292 	cp->cp_log_num_pd	= min(hermon_log_num_pd, devlim->log_max_pd);
293 	cp->cp_log_num_qp	= min(hermon_log_num_qp, devlim->log_max_qp);
294 	cp->cp_log_num_cq	= min(hermon_log_num_cq, devlim->log_max_cq);
295 	cp->cp_log_num_srq	= min(hermon_log_num_srq, devlim->log_max_srq);
296 	cp->cp_log_num_eq	= min(hermon_log_num_eq, devlim->log_max_eq);
297 	cp->cp_log_eq_sz	= min(hermon_log_eq_sz, devlim->log_max_eq_sz);
298 	cp->cp_log_num_rdb	= cp->cp_log_num_qp +
299 	    min(hermon_log_num_rdb_per_qp, devlim->log_max_ra_req_qp);
300 	cp->cp_hca_max_rdma_in_qp = cp->cp_hca_max_rdma_out_qp =
301 	    1 << min(hermon_log_num_rdb_per_qp, devlim->log_max_ra_req_qp);
302 	cp->cp_num_qp_per_mcg	= min(hermon_num_qp_per_mcg,
303 	    1 << devlim->log_max_qp_mcg);
304 	cp->cp_log_num_mcg 	= min(hermon_log_num_mcg, devlim->log_max_mcg);
305 	cp->cp_log_num_mcg_hash	= hermon_log_num_mcg_hash;
306 
307 	/* until srq_resize is debugged, disable it */
308 	cp->cp_srq_resize_enabled = 0;
309 
310 	/* cp->cp_log_num_uar	= hermon_log_num_uar; */
311 	/*
312 	 * now, we HAVE to calculate the number of UAR pages, so that we can
313 	 * get the blueflame stuff correct as well
314 	 */
315 
316 	size = devlim->log_max_uar_sz;
317 	/* 1MB (2^^20) times size (2^^size) / sparc_pg (2^^13) */
318 	num = (20 + size) - 13;		/* XXX - consider using PAGESHIFT */
319 	if (devlim->blu_flm)
320 		num -= 1;	/* if blueflame, only half the size for UARs */
321 	cp->cp_log_num_uar	= min(hermon_log_num_uar, num);
322 
323 
324 	/* while we're at it, calculate the index of the kernel uar page */
325 	/* either the reserved uar's or 128, whichever is smaller */
326 	state->hs_kernel_uar_index = (devlim->num_rsvd_uar > 128) ?
327 	    devlim->num_rsvd_uar : 128;
328 
329 	cp->cp_log_max_pkeytbl	= port->log_max_pkey;
330 
331 	cp->cp_log_max_qp_sz	= devlim->log_max_qp_sz;
332 	cp->cp_log_max_cq_sz	= devlim->log_max_cq_sz;
333 	cp->cp_log_max_srq_sz	= devlim->log_max_srq_sz;
334 	cp->cp_log_max_gidtbl	= port->log_max_gid;
335 	cp->cp_max_mtu		= port->ib_mtu;	/* XXX now from query_port */
336 	cp->cp_max_port_width	= port->ib_port_wid;  /* now from query_port */
337 	cp->cp_max_vlcap	= port->max_vl;
338 	cp->cp_num_ports	= devlim->num_ports;
339 	cp->cp_log_num_ah	= hermon_log_num_ah;
340 
341 	/* allocate variable sized arrays */
342 	for (i = 0; i < HERMON_MAX_PORTS; i++) {
343 		state->hs_pkey[i] = kmem_zalloc((1 << cp->cp_log_max_pkeytbl) *
344 		    sizeof (ib_pkey_t), KM_SLEEP);
345 		state->hs_guid[i] = kmem_zalloc((1 << cp->cp_log_max_gidtbl) *
346 		    sizeof (ib_guid_t), KM_SLEEP);
347 	}
348 
349 	/* Determine WQE sizes from requested max SGLs */
350 	hermon_cfg_wqe_sizes(state, cp);
351 
352 	/*
353 	 * Set IOMMU bypass or not.  Ensure consistency of flags with
354 	 * architecture type.
355 	 */
356 #ifdef __sparc
357 	if (hermon_iommu_bypass == 1) {
358 		hermon_check_iommu_bypass(state, cp);
359 	} else {
360 		cp->cp_iommu_bypass = HERMON_BINDMEM_NORMAL;
361 	}
362 #else
363 	cp->cp_iommu_bypass = HERMON_BINDMEM_NORMAL;
364 #endif
365 
366 	/* Set whether to use MSIs or not */
367 	cp->cp_use_msi_if_avail = hermon_use_msi_if_avail;
368 
369 	return (DDI_SUCCESS);
370 }
371 
372 
373 /*
374  * hermon_cfg_profile_fini()
375  *    Context: Only called from attach() and/or detach() path contexts
376  */
377 void
378 hermon_cfg_profile_fini(hermon_state_t *state)
379 {
380 	/*
381 	 * Free up the space for configuration profile
382 	 */
383 	kmem_free(state->hs_cfg_profile, sizeof (hermon_cfg_profile_t));
384 }
385 
386 
387 /*
388  * hermon_cfg_wqe_sizes()
389  *    Context: Only called from attach() path context
390  */
391 static void
392 hermon_cfg_wqe_sizes(hermon_state_t *state, hermon_cfg_profile_t *cp)
393 {
394 	uint_t	max_size, log2;
395 	uint_t	max_sgl, real_max_sgl;
396 
397 	/*
398 	 * Get the requested maximum number SGL per WQE from the Hermon
399 	 * patchable variable
400 	 */
401 	max_sgl = hermon_wqe_max_sgl;
402 
403 	/*
404 	 * Use requested maximum number of SGL to calculate the max descriptor
405 	 * size (while guaranteeing that the descriptor size is a power-of-2
406 	 * cachelines).  We have to use the calculation for QP1 MLX transport
407 	 * because the possibility that we might need to inline a GRH, along
408 	 * with all the other headers and alignment restrictions, sets the
409 	 * maximum for the number of SGLs that we can advertise support for.
410 	 */
411 	max_size = (HERMON_QP_WQE_MLX_QP1_HDRS + (max_sgl << 4));
412 	log2 = highbit(max_size);
413 	if ((max_size & (max_size - 1)) == 0) {
414 		log2 = log2 - 1;
415 	}
416 	max_size = (1 << log2);
417 
418 	max_size = min(max_size, state->hs_devlim.max_desc_sz_sq);
419 
420 	/*
421 	 * Then use the calculated max descriptor size to determine the "real"
422 	 * maximum SGL (the number beyond which we would roll over to the next
423 	 * power-of-2).
424 	 */
425 	real_max_sgl = (max_size - HERMON_QP_WQE_MLX_QP1_HDRS) >> 4;
426 
427 	/* Then save away this configuration information */
428 	cp->cp_wqe_max_sgl	= max_sgl;
429 	cp->cp_wqe_real_max_sgl = real_max_sgl;
430 
431 	/* SRQ SGL gets set to it's own patchable variable value */
432 	cp->cp_srq_max_sgl		= hermon_srq_max_sgl;
433 }
434 
435 #ifdef __sparc
436 /*
437  * hermon_check_iommu_bypass()
438  *    Context: Only called from attach() path context
439  *    XXX This is a DMA allocation routine outside the normal
440  *	  path. FMA hardening will not like this.
441  */
442 static void
443 hermon_check_iommu_bypass(hermon_state_t *state, hermon_cfg_profile_t *cp)
444 {
445 	ddi_dma_handle_t	dmahdl;
446 	ddi_dma_attr_t		dma_attr;
447 	int			status;
448 	ddi_acc_handle_t	acc_hdl;
449 	caddr_t			kaddr;
450 	size_t			actual_len;
451 	ddi_dma_cookie_t	cookie;
452 	uint_t			cookiecnt;
453 
454 	hermon_dma_attr_init(state, &dma_attr);
455 
456 	/* Try mapping for IOMMU bypass (Force Physical) */
457 	dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL |
458 	    DDI_DMA_RELAXED_ORDERING;
459 
460 	/*
461 	 * Call ddi_dma_alloc_handle().  If this returns DDI_DMA_BADATTR then
462 	 * it is not possible to use IOMMU bypass with our PCI bridge parent.
463 	 * Since the function we are in can only be called if iommu bypass was
464 	 * requested in the config profile, we configure for bypass if the
465 	 * ddi_dma_alloc_handle() was successful.  Otherwise, we configure
466 	 * for non-bypass (ie: normal) mapping.
467 	 */
468 	status = ddi_dma_alloc_handle(state->hs_dip, &dma_attr,
469 	    DDI_DMA_SLEEP, NULL, &dmahdl);
470 	if (status == DDI_DMA_BADATTR) {
471 		cp->cp_iommu_bypass = HERMON_BINDMEM_NORMAL;
472 		return;
473 	} else if (status != DDI_SUCCESS) {	/* failed somehow */
474 		hermon_kernel_data_ro = HERMON_RO_DISABLED;
475 		hermon_user_data_ro = HERMON_RO_DISABLED;
476 		cp->cp_iommu_bypass = HERMON_BINDMEM_BYPASS;
477 		return;
478 	} else {
479 		cp->cp_iommu_bypass = HERMON_BINDMEM_BYPASS;
480 	}
481 
482 	status = ddi_dma_mem_alloc(dmahdl, 256,
483 	    &state->hs_reg_accattr, DDI_DMA_CONSISTENT,
484 	    DDI_DMA_SLEEP, NULL, (caddr_t *)&kaddr, &actual_len, &acc_hdl);
485 
486 	if (status != DDI_SUCCESS) {		/* failed somehow */
487 		hermon_kernel_data_ro = HERMON_RO_DISABLED;
488 		hermon_user_data_ro = HERMON_RO_DISABLED;
489 		ddi_dma_free_handle(&dmahdl);
490 		return;
491 	}
492 
493 	status = ddi_dma_addr_bind_handle(dmahdl, NULL, kaddr, actual_len,
494 	    DDI_DMA_RDWR, DDI_DMA_SLEEP, NULL, &cookie, &cookiecnt);
495 
496 	if (status == DDI_DMA_MAPPED) {
497 		(void) ddi_dma_unbind_handle(dmahdl);
498 	} else {
499 		hermon_kernel_data_ro = HERMON_RO_DISABLED;
500 		hermon_user_data_ro = HERMON_RO_DISABLED;
501 	}
502 
503 	ddi_dma_mem_free(&acc_hdl);
504 	ddi_dma_free_handle(&dmahdl);
505 }
506 #endif
507