xref: /illumos-gate/usr/src/uts/common/io/ib/adapters/tavor/tavor_cfg.c (revision de710d24d2fae4468e64da999e1d952a247f142c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * tavor_cfg.c
29  *    Tavor Configuration Profile Routines
30  *
31  *    Implements the routines necessary for initializing and (later) tearing
32  *    down the list of Tavor configuration information.
33  */
34 
35 #include <sys/sysmacros.h>
36 #include <sys/types.h>
37 #include <sys/conf.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/modctl.h>
41 #include <sys/bitmap.h>
42 
43 #include <sys/ib/adapters/tavor/tavor.h>
44 
45 /* Set to enable alternative configurations: 0 = automatic config, 1 = manual */
46 uint32_t tavor_alt_config_enable	= 0;
47 
48 /* Number of supported QPs and their maximum size */
49 uint32_t tavor_log_num_qp		= TAVOR_NUM_QP_SHIFT_128;
50 uint32_t tavor_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT;
51 
52 /* Number of supported SGL per WQE */
53 uint32_t tavor_wqe_max_sgl		= TAVOR_NUM_WQE_SGL;
54 
55 /* Number of supported CQs and their maximum size */
56 uint32_t tavor_log_num_cq		= TAVOR_NUM_CQ_SHIFT_128;
57 uint32_t tavor_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT;
58 
59 /* Select to enable SRQ or not; NOTE: 0 for disabled, 1 for enabled */
60 uint32_t tavor_srq_enable		= 1;
61 
62 /* Number of supported SRQs and their maximum size */
63 uint32_t tavor_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_128;
64 uint32_t tavor_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT;
65 uint32_t tavor_srq_max_sgl		= TAVOR_SRQ_MAX_SGL;
66 
67 /* Default size for all EQs */
68 uint32_t tavor_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
69 
70 /* Number of supported RDB (for incoming RDMA Read/Atomic) */
71 uint32_t tavor_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_128;
72 
73 /*
74  * Number of support multicast groups, number of QP per multicast group, and
75  * the number of entries (from the total number) in the multicast group "hash
76  * table"
77  */
78 uint32_t tavor_log_num_mcg		= TAVOR_NUM_MCG_SHIFT;
79 uint32_t tavor_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG;
80 uint32_t tavor_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT;
81 
82 /*
83  * Number of supported MPTs (memory regions and windows) and their maximum
84  * size.  Also the number of MTT per "MTT segment" (see tavor_mr.h for more
85  * details)
86  */
87 uint32_t tavor_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_128;
88 uint32_t tavor_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_128;
89 uint32_t tavor_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT;
90 
91 /*
92  * Number of supported Tavor mailboxes ("In" and "Out") and their maximum
93  * sizes, respectively
94  */
95 uint32_t tavor_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
96 uint32_t tavor_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
97 uint32_t tavor_log_num_intr_inmbox	= TAVOR_NUM_INTR_MAILBOXES_SHIFT;
98 uint32_t tavor_log_num_intr_outmbox	= TAVOR_NUM_INTR_MAILBOXES_SHIFT;
99 uint32_t tavor_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
100 uint32_t tavor_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
101 
102 /* Number of supported UAR pages */
103 uint32_t tavor_log_num_uar		= TAVOR_NUM_UAR_SHIFT;
104 
105 /* Number of supported Protection Domains (PD) */
106 uint32_t tavor_log_num_pd		= TAVOR_NUM_PD_SHIFT;
107 
108 /* Number of supported Address Handles (AH) */
109 uint32_t tavor_log_num_ah		= TAVOR_NUM_AH_SHIFT;
110 
111 /*
112  * Number of total supported PKeys per PKey table (i.e.
113  * per port).  Also the number of SGID per GID table.
114  */
115 uint32_t tavor_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
116 uint32_t tavor_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
117 
118 /* Maximum "responder resources" (in) and "initiator depth" (out) per QP */
119 uint32_t tavor_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
120 uint32_t tavor_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
121 
122 /* Maximum supported MTU and portwidth */
123 uint32_t tavor_max_mtu			= TAVOR_MAX_MTU;
124 uint32_t tavor_max_port_width		= TAVOR_MAX_PORT_WIDTH;
125 
126 /* Number of supported Virtual Lanes (VL) */
127 uint32_t tavor_max_vlcap		= TAVOR_MAX_VLCAP;
128 
129 /* Number of supported ports (1 or 2) */
130 uint32_t tavor_num_ports		= TAVOR_NUM_PORTS;
131 
132 /*
133  * Whether or not to use the built-in (i.e. in firmware) agents for QP0 and
134  * QP1, respectively.
135  */
136 uint32_t tavor_qp0_agents_in_fw		= 1;
137 uint32_t tavor_qp1_agents_in_fw		= 0;
138 
139 /*
140  * Whether DMA mappings should be made with DDI_DMA_STREAMING or with
141  * DDI_DMA_CONSISTENT mode.  Note: 0 for "streaming", 1 for "consistent"
142  */
143 uint32_t tavor_streaming_consistent	= 1;
144 
145 /*
146  * For DMA mappings made with DDI_DMA_CONSISTENT, this flag determines
147  * whether to override the necessity for calls to ddi_dma_sync().
148  */
149 uint32_t tavor_consistent_syncoverride  = 0;
150 
151 /*
152  * Whether DMA mappings should bypass the PCI IOMMU or not.
153  * tavor_iommu_bypass is a global setting for all memory addresses.  However,
154  * if set to BYPASS, memory attempted to be registered for streaming (ie:
155  * NON-COHERENT) will necessarily turn off BYPASS for that registration.  To
156  * instead disable streaming in this situation the
157  * 'tavor_disable_streaming_on_bypass' can be set to 1.  This setting will
158  * change the memory mapping to be implicitly consistent (ie: COHERENT), and
159  * will still perform the iommu BYPASS operation.
160  */
161 uint32_t tavor_iommu_bypass		= 1;
162 uint32_t tavor_disable_streaming_on_bypass = 0;
163 
164 /*
165  * Whether QP work queues should be allocated from system memory or
166  * from Tavor DDR memory.  Note: 0 for system memory, 1 for DDR memory
167  */
168 uint32_t tavor_qp_wq_inddr		= 0;
169 
170 /*
171  * Whether SRQ work queues should be allocated from system memory or
172  * from Tavor DDR memory.  Note: 0 for system memory, 1 for DDR memory
173  */
174 uint32_t tavor_srq_wq_inddr		= 0;
175 
176 /*
177  * Whether Tavor should use MSI (Message Signaled Interrupts), if available.
178  * Note: 0 indicates 'legacy interrupt', 1 indicates MSI (if available)
179  */
180 uint32_t tavor_use_msi_if_avail		= 1;
181 
182 /*
183  * This is a patchable variable that determines the time we will wait after
184  * initiating SW reset before we do our first read from Tavor config space.
185  * If this value is set too small (less than the default 100ms), it is
186  * possible for Tavor hardware to be unready to respond to the config cycle
187  * reads.  This could cause master abort on the PCI bridge.  Note: If
188  * "tavor_sw_reset_delay" is set to zero, then no software reset of the Tavor
189  * device will be attempted.
190  */
191 uint32_t tavor_sw_reset_delay		= TAVOR_SW_RESET_DELAY;
192 
193 /*
194  * These are patchable variables for tavor command polling. The poll_delay is
195  * the number of usec to wait in-between calls to poll the 'go' bit.  The
196  * poll_max is the total number of usec to loop in waiting for the 'go' bit to
197  * clear.
198  */
199 uint32_t tavor_cmd_poll_delay		= TAVOR_CMD_POLL_DELAY;
200 uint32_t tavor_cmd_poll_max		= TAVOR_CMD_POLL_MAX;
201 
202 /*
203  * This is a patchable variable that determines the frequency with which
204  * the AckReq bit will be set in outgoing RC packets.  The AckReq bit will be
205  * set in at least every 2^tavor_qp_ackreq_freq packets (but at least once
206  * per message, i.e. in the last packet).  Tuning this value can increase
207  * IB fabric utilization by cutting down on the number of unnecessary ACKs.
208  */
209 uint32_t tavor_qp_ackreq_freq		= TAVOR_QP_ACKREQ_FREQ;
210 
211 /*
212  * This is a patchable variable that determines the default value for the
213  * maximum number of outstanding split transactions.  The number of
214  * outstanding split transations (i.e. PCI reads) has an affect on device
215  * throughput.  The value here should not be modified as it defines the
216  * default (least common denominator - one (1) PCI read) behavior that is
217  * guaranteed to work, regardless of how the Tavor firmware has been
218  * initialized.  The format for this variable is the same as the corresponding
219  * field in the "PCI-X Command Register".
220  */
221 #ifdef	__sparc
222 /*
223  * Default SPARC platforms to be 1 outstanding PCI read.
224  */
225 int tavor_max_out_splt_trans	= 0;
226 #else
227 /*
228  * Default non-SPARC platforms to be the default as set in tavor firmware
229  * number of outstanding PCI reads.
230  */
231 int tavor_max_out_splt_trans	= -1;
232 #endif
233 
234 /*
235  * This is a patchable variable that determines the default value for the
236  * maximum size of PCI read burst.  This maximum size has an affect on
237  * device throughput.  The value here should not be modified as it defines
238  * the default (least common denominator - 512B read) behavior that is
239  * guaranteed to work, regardless of how the Tavor device has been
240  * initialized.  The format for this variable is the same as the corresponding
241  * field in the "PCI-X Command Register".
242  */
243 #ifdef	__sparc
244 /*
245  * Default SPARC platforms to be 512B read.
246  */
247 int tavor_max_mem_rd_byte_cnt	= 0;
248 static void tavor_check_iommu_bypass(tavor_state_t *state,
249     tavor_cfg_profile_t *cp);
250 #else
251 /*
252  * Default non-SPARC platforms to be the default as set in tavor firmware.
253  *
254  */
255 int tavor_max_mem_rd_byte_cnt	= -1;
256 #endif
257 
258 static void tavor_cfg_wqe_sizes(tavor_cfg_profile_t *cp);
259 static void tavor_cfg_prop_lookup(tavor_state_t *state,
260     tavor_cfg_profile_t *cp);
261 
262 /*
263  * tavor_cfg_profile_init_phase1()
264  *    Context: Only called from attach() path context
265  */
266 int
267 tavor_cfg_profile_init_phase1(tavor_state_t *state)
268 {
269 	tavor_cfg_profile_t	*cp;
270 
271 	TAVOR_TNF_ENTER(tavor_cfg_profile_init_phase1);
272 
273 	/*
274 	 * Allocate space for the configuration profile structure
275 	 */
276 	cp = (tavor_cfg_profile_t *)kmem_zalloc(sizeof (tavor_cfg_profile_t),
277 	    KM_SLEEP);
278 
279 	cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
280 	cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
281 	cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
282 	cp->cp_cmd_poll_delay		= tavor_cmd_poll_delay;
283 	cp->cp_cmd_poll_max		= tavor_cmd_poll_max;
284 	cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
285 	cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
286 	cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
287 	cp->cp_srq_enable		= tavor_srq_enable;
288 	cp->cp_fmr_enable		= 0;
289 	cp->cp_fmr_max_remaps		= 0;
290 
291 	/*
292 	 * Although most of the configuration is enabled in "phase2" of the
293 	 * cfg_profile_init, we have to setup the OUT mailboxes here, since
294 	 * they are used immediately after this "phase1" completes.  Check for
295 	 * alt_config_enable, and set the values appropriately.  Otherwise, the
296 	 * config profile is setup using the values based on the dimm size.
297 	 * While it is expected that the mailbox size and number will remain
298 	 * the same independent of dimm size, we separate it out here anyway
299 	 * for completeness.
300 	 *
301 	 * We have to setup SRQ settings here because MOD_STAT_CFG must be
302 	 * called before our call to QUERY_DEVLIM.  If SRQ is enabled, then we
303 	 * must enable it in the firmware so that the phase2 settings will have
304 	 * the right device limits.
305 	 */
306 	if (tavor_alt_config_enable) {
307 		cp->cp_log_num_outmbox		= tavor_log_num_outmbox;
308 		cp->cp_log_num_intr_outmbox	= tavor_log_num_intr_outmbox;
309 		cp->cp_log_outmbox_size		= tavor_log_outmbox_size;
310 		cp->cp_log_num_inmbox		= tavor_log_num_inmbox;
311 		cp->cp_log_num_intr_inmbox	= tavor_log_num_intr_inmbox;
312 		cp->cp_log_inmbox_size		= tavor_log_inmbox_size;
313 		cp->cp_log_num_srq		= tavor_log_num_srq;
314 		cp->cp_log_max_srq_sz		= tavor_log_max_srq_sz;
315 
316 	} else if (state->ts_cfg_profile_setting >= TAVOR_DDR_SIZE_256) {
317 		cp->cp_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
318 		cp->cp_log_num_intr_outmbox	=
319 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
320 		cp->cp_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
321 		cp->cp_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
322 		cp->cp_log_num_intr_inmbox	=
323 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
324 		cp->cp_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
325 		cp->cp_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_256;
326 		cp->cp_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT;
327 
328 	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_128) {
329 		cp->cp_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
330 		cp->cp_log_num_intr_outmbox	=
331 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
332 		cp->cp_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
333 		cp->cp_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
334 		cp->cp_log_num_intr_inmbox	=
335 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
336 		cp->cp_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
337 		cp->cp_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_128;
338 		cp->cp_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT;
339 
340 	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_MIN) {
341 		cp->cp_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
342 		cp->cp_log_num_intr_outmbox	=
343 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
344 		cp->cp_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
345 		cp->cp_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
346 		cp->cp_log_num_intr_inmbox	=
347 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
348 		cp->cp_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
349 		cp->cp_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_MIN;
350 		cp->cp_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT_MIN;
351 
352 	} else {
353 		TNF_PROBE_0(tavor_cfg_profile_invalid_dimmsz_fail,
354 		    TAVOR_TNF_ERROR, "");
355 		return (DDI_FAILURE);
356 	}
357 
358 	/*
359 	 * Set default DMA mapping mode.  Ensure consistency of flags
360 	 * with both architecture type and other configuration flags.
361 	 */
362 	if (tavor_streaming_consistent == 0) {
363 #ifdef	__sparc
364 		cp->cp_streaming_consistent = DDI_DMA_STREAMING;
365 
366 		/* Can't do both "streaming" and IOMMU bypass */
367 		if (tavor_iommu_bypass != 0) {
368 			TNF_PROBE_0(tavor_cfg_profile_streamingbypass_fail,
369 			    TAVOR_TNF_ERROR, "");
370 			kmem_free(cp, sizeof (tavor_cfg_profile_t));
371 			return (DDI_FAILURE);
372 		}
373 #else
374 		cp->cp_streaming_consistent = DDI_DMA_CONSISTENT;
375 #endif
376 	} else {
377 		cp->cp_streaming_consistent = DDI_DMA_CONSISTENT;
378 	}
379 
380 	/* Determine whether to override ddi_dma_sync() */
381 	cp->cp_consistent_syncoverride = tavor_consistent_syncoverride;
382 
383 	/* Attach the configuration profile to Tavor softstate */
384 	state->ts_cfg_profile = cp;
385 
386 	TAVOR_TNF_EXIT(tavor_cfg_profile_init_phase1);
387 	return (DDI_SUCCESS);
388 }
389 
390 /*
391  * tavor_cfg_profile_init_phase2()
392  *    Context: Only called from attach() path context
393  */
394 int
395 tavor_cfg_profile_init_phase2(tavor_state_t *state)
396 {
397 	tavor_cfg_profile_t	*cp;
398 
399 	TAVOR_TNF_ENTER(tavor_cfg_profile_init_phase2);
400 
401 	/* Read the configuration profile from Tavor softstate */
402 	cp = state->ts_cfg_profile;
403 
404 	/*
405 	 * Verify the config profile setting.  The 'setting' should already be
406 	 * set, during a call to ddi_dev_regsize() to get the size of DDR
407 	 * memory, or during a fallback to a smaller supported size.  If it is
408 	 * not set, we should not have reached this 'phase2'.  So we assert
409 	 * here.
410 	 */
411 	ASSERT(state->ts_cfg_profile_setting != 0);
412 
413 	/*
414 	 * The automatic configuration override is the
415 	 * 'tavor_alt_config_enable' variable.  If this is set, we no longer
416 	 * use the DIMM size to enable the correct profile.  Instead, all of
417 	 * the tavor config options at the top of this file are used directly.
418 	 *
419 	 * This allows customization for a user who knows what they are doing
420 	 * to set tavor configuration values manually.
421 	 *
422 	 * If this variable is 0, we do automatic config for both 128MB and
423 	 * 256MB DIMM sizes.
424 	 */
425 	if (tavor_alt_config_enable) {
426 		/*
427 		 * Initialize the configuration profile
428 		 */
429 		cp->cp_log_num_qp		= tavor_log_num_qp;
430 		cp->cp_log_max_qp_sz		= tavor_log_max_qp_sz;
431 
432 		/* Determine WQE sizes from requested max SGLs */
433 		tavor_cfg_wqe_sizes(cp);
434 
435 		cp->cp_log_num_cq		= tavor_log_num_cq;
436 		cp->cp_log_max_cq_sz		= tavor_log_max_cq_sz;
437 		cp->cp_log_default_eq_sz	= tavor_log_default_eq_sz;
438 		cp->cp_log_num_rdb		= tavor_log_num_rdb;
439 		cp->cp_log_num_mcg		= tavor_log_num_mcg;
440 		cp->cp_num_qp_per_mcg		= tavor_num_qp_per_mcg;
441 		cp->cp_log_num_mcg_hash		= tavor_log_num_mcg_hash;
442 		cp->cp_log_num_mpt		= tavor_log_num_mpt;
443 		cp->cp_log_max_mrw_sz		= tavor_log_max_mrw_sz;
444 		cp->cp_log_num_mttseg		= tavor_log_num_mttseg;
445 		cp->cp_log_num_uar		= tavor_log_num_uar;
446 		cp->cp_log_num_pd		= tavor_log_num_pd;
447 		cp->cp_log_num_ah		= tavor_log_num_ah;
448 		cp->cp_log_max_pkeytbl		= tavor_log_max_pkeytbl;
449 		cp->cp_log_max_gidtbl		= tavor_log_max_gidtbl;
450 		cp->cp_hca_max_rdma_in_qp	= tavor_hca_max_rdma_in_qp;
451 		cp->cp_hca_max_rdma_out_qp	= tavor_hca_max_rdma_out_qp;
452 		cp->cp_max_mtu			= tavor_max_mtu;
453 		cp->cp_max_port_width		= tavor_max_port_width;
454 		cp->cp_max_vlcap		= tavor_max_vlcap;
455 		cp->cp_num_ports		= tavor_num_ports;
456 		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
457 		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
458 		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
459 		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
460 		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
461 		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
462 
463 	} else if (state->ts_cfg_profile_setting >= TAVOR_DDR_SIZE_256) {
464 		/*
465 		 * Initialize the configuration profile
466 		 */
467 		cp->cp_log_num_qp		= TAVOR_NUM_QP_SHIFT_256;
468 		cp->cp_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT;
469 
470 		/* Determine WQE sizes from requested max SGLs */
471 		tavor_cfg_wqe_sizes(cp);
472 
473 		cp->cp_log_num_cq		= TAVOR_NUM_CQ_SHIFT_256;
474 		cp->cp_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT;
475 		cp->cp_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
476 		cp->cp_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_256;
477 		cp->cp_log_num_mcg		= TAVOR_NUM_MCG_SHIFT;
478 		cp->cp_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG;
479 		cp->cp_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT;
480 		cp->cp_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_256;
481 		cp->cp_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_256;
482 		cp->cp_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT;
483 		cp->cp_log_num_uar		= TAVOR_NUM_UAR_SHIFT;
484 		cp->cp_log_num_pd		= TAVOR_NUM_PD_SHIFT;
485 		cp->cp_log_num_ah		= TAVOR_NUM_AH_SHIFT;
486 		cp->cp_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
487 		cp->cp_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
488 		cp->cp_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
489 		cp->cp_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
490 		cp->cp_max_mtu			= TAVOR_MAX_MTU;
491 		cp->cp_max_port_width		= TAVOR_MAX_PORT_WIDTH;
492 		cp->cp_max_vlcap		= TAVOR_MAX_VLCAP;
493 		cp->cp_num_ports		= TAVOR_NUM_PORTS;
494 		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
495 		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
496 		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
497 		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
498 		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
499 		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
500 
501 	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_128) {
502 		/*
503 		 * Initialize the configuration profile
504 		 */
505 		cp->cp_log_num_qp		= TAVOR_NUM_QP_SHIFT_128;
506 		cp->cp_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT;
507 
508 		/* Determine WQE sizes from requested max SGLs */
509 		tavor_cfg_wqe_sizes(cp);
510 
511 		cp->cp_log_num_cq		= TAVOR_NUM_CQ_SHIFT_128;
512 		cp->cp_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT;
513 		cp->cp_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
514 		cp->cp_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_128;
515 		cp->cp_log_num_mcg		= TAVOR_NUM_MCG_SHIFT;
516 		cp->cp_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG;
517 		cp->cp_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT;
518 		cp->cp_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_128;
519 		cp->cp_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_128;
520 		cp->cp_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT;
521 		cp->cp_log_num_uar		= TAVOR_NUM_UAR_SHIFT;
522 		cp->cp_log_num_pd		= TAVOR_NUM_PD_SHIFT;
523 		cp->cp_log_num_ah		= TAVOR_NUM_AH_SHIFT;
524 		cp->cp_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
525 		cp->cp_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
526 		cp->cp_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
527 		cp->cp_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
528 		cp->cp_max_mtu			= TAVOR_MAX_MTU;
529 		cp->cp_max_port_width		= TAVOR_MAX_PORT_WIDTH;
530 		cp->cp_max_vlcap		= TAVOR_MAX_VLCAP;
531 		cp->cp_num_ports		= TAVOR_NUM_PORTS;
532 		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
533 		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
534 		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
535 		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
536 		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
537 		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
538 
539 	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_MIN) {
540 		/*
541 		 * Initialize the configuration profile for minimal footprint.
542 		 */
543 
544 		cp->cp_log_num_qp		= TAVOR_NUM_QP_SHIFT_MIN;
545 		cp->cp_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT_MIN;
546 
547 		/* Determine WQE sizes from requested max SGLs */
548 		tavor_cfg_wqe_sizes(cp);
549 
550 		cp->cp_log_num_cq		= TAVOR_NUM_CQ_SHIFT_MIN;
551 		cp->cp_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT_MIN;
552 		cp->cp_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
553 		cp->cp_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_MIN;
554 		cp->cp_log_num_mcg		= TAVOR_NUM_MCG_SHIFT_MIN;
555 		cp->cp_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG_MIN;
556 		cp->cp_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT_MIN;
557 		cp->cp_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_MIN;
558 		cp->cp_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_MIN;
559 		cp->cp_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT_MIN;
560 		cp->cp_log_num_uar		= TAVOR_NUM_UAR_SHIFT_MIN;
561 		cp->cp_log_num_pd		= TAVOR_NUM_PD_SHIFT;
562 		cp->cp_log_num_ah		= TAVOR_NUM_AH_SHIFT_MIN;
563 		cp->cp_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
564 		cp->cp_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
565 		cp->cp_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
566 		cp->cp_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
567 		cp->cp_max_mtu			= TAVOR_MAX_MTU;
568 		cp->cp_max_port_width		= TAVOR_MAX_PORT_WIDTH;
569 		cp->cp_max_vlcap		= TAVOR_MAX_VLCAP;
570 		cp->cp_num_ports		= TAVOR_NUM_PORTS;
571 		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
572 		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
573 		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
574 		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
575 		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
576 		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
577 
578 	} else {
579 		TNF_PROBE_0(tavor_cfg_profile_invalid_dimmsz_fail,
580 		    TAVOR_TNF_ERROR, "");
581 		return (DDI_FAILURE);
582 	}
583 
584 	/*
585 	 * Set IOMMU bypass or not.  Ensure consistency of flags with
586 	 * architecture type.
587 	 */
588 #ifdef __sparc
589 	if (tavor_iommu_bypass == 1) {
590 		tavor_check_iommu_bypass(state, cp);
591 	} else {
592 		cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
593 		cp->cp_disable_streaming_on_bypass = 0;
594 	}
595 #else
596 	cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
597 	cp->cp_disable_streaming_on_bypass = 0;
598 #endif
599 	/* Set whether QP WQEs will be in DDR or not */
600 	cp->cp_qp_wq_inddr = (tavor_qp_wq_inddr == 0) ?
601 	    TAVOR_QUEUE_LOCATION_NORMAL : TAVOR_QUEUE_LOCATION_INDDR;
602 
603 	/* Set whether SRQ WQEs will be in DDR or not */
604 	cp->cp_srq_wq_inddr = (tavor_srq_wq_inddr == 0) ?
605 	    TAVOR_QUEUE_LOCATION_NORMAL : TAVOR_QUEUE_LOCATION_INDDR;
606 
607 	cp->cp_use_msi_if_avail = tavor_use_msi_if_avail;
608 
609 	/* Determine additional configuration from optional properties */
610 	tavor_cfg_prop_lookup(state, cp);
611 
612 	TAVOR_TNF_EXIT(tavor_cfg_profile_init_phase2);
613 	return (DDI_SUCCESS);
614 }
615 
616 
617 /*
618  * tavor_cfg_profile_fini()
619  *    Context: Only called from attach() and/or detach() path contexts
620  */
621 void
622 tavor_cfg_profile_fini(tavor_state_t *state)
623 {
624 	TAVOR_TNF_ENTER(tavor_cfg_profile_fini);
625 
626 	/*
627 	 * Free up the space for configuration profile
628 	 */
629 	kmem_free(state->ts_cfg_profile, sizeof (tavor_cfg_profile_t));
630 
631 	TAVOR_TNF_EXIT(tavor_cfg_profile_fini);
632 }
633 
634 
635 /*
636  * tavor_cfg_wqe_sizes()
637  *    Context: Only called from attach() path context
638  */
639 static void
640 tavor_cfg_wqe_sizes(tavor_cfg_profile_t *cp)
641 {
642 	uint_t	max_size, log2;
643 	uint_t	max_sgl, real_max_sgl;
644 
645 	/*
646 	 * Get the requested maximum number SGL per WQE from the Tavor
647 	 * patchable variable
648 	 */
649 	max_sgl = tavor_wqe_max_sgl;
650 
651 	/*
652 	 * Use requested maximum number of SGL to calculate the max descriptor
653 	 * size (while guaranteeing that the descriptor size is a power-of-2
654 	 * cachelines).  We have to use the calculation for QP1 MLX transport
655 	 * because the possibility that we might need to inline a GRH, along
656 	 * with all the other headers and alignment restrictions, sets the
657 	 * maximum for the number of SGLs that we can advertise support for.
658 	 */
659 	max_size = (TAVOR_QP_WQE_MLX_QP1_HDRS + (max_sgl << 4));
660 	log2 = highbit(max_size);
661 	if (ISP2(max_size)) {
662 		log2 = log2 - 1;
663 	}
664 	max_size = (1 << log2);
665 
666 	/*
667 	 * Now clip the maximum descriptor size based on Tavor HW maximum
668 	 */
669 	max_size = min(max_size, TAVOR_QP_WQE_MAX_SIZE);
670 
671 	/*
672 	 * Then use the calculated max descriptor size to determine the "real"
673 	 * maximum SGL (the number beyond which we would roll over to the next
674 	 * power-of-2).
675 	 */
676 	real_max_sgl = (max_size - TAVOR_QP_WQE_MLX_QP1_HDRS) >> 4;
677 
678 	/* Then save away this configuration information */
679 	cp->cp_wqe_max_sgl	= max_sgl;
680 	cp->cp_wqe_real_max_sgl = real_max_sgl;
681 
682 	/* SRQ SGL gets set to it's own patchable variable value */
683 	cp->cp_srq_max_sgl		= tavor_srq_max_sgl;
684 }
685 
686 
687 /*
688  * tavor_cfg_prop_lookup()
689  *    Context: Only called from attach() path context
690  */
691 static void
692 tavor_cfg_prop_lookup(tavor_state_t *state, tavor_cfg_profile_t *cp)
693 {
694 	uint_t		num_ports, nelementsp;
695 	uchar_t		*datap;
696 	int		status;
697 
698 	/*
699 	 * Read the property defining the number of Tavor ports to
700 	 * support.  If the property is undefined or invalid, then return.
701 	 * We return here assuming also that OBP is not supposed to be setting
702 	 * up other properties in this case (eg: HCA plugin cards).  But if
703 	 * this property is valid, then we print out a message for the other
704 	 * properties to show an OBP error.
705 	 */
706 	num_ports = ddi_prop_get_int(DDI_DEV_T_ANY, state->ts_dip,
707 	    DDI_PROP_DONTPASS, "#ports", 0);
708 	if ((num_ports > TAVOR_NUM_PORTS) || (num_ports == 0)) {
709 		return;
710 	}
711 	cp->cp_num_ports   = num_ports;
712 
713 	/*
714 	 * The system image guid is not currently supported in the 1275
715 	 * binding.  So we leave this commented out for now.
716 	 */
717 #ifdef SUPPORTED_IN_1275_BINDING
718 	/*
719 	 * Read the property defining the value to use later to override the
720 	 * default SystemImageGUID (in firmware).  If the property is
721 	 * undefined, then return.
722 	 */
723 	status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
724 	    DDI_PROP_DONTPASS, "system-image-guid", &datap, &nelementsp);
725 	if (status == DDI_PROP_SUCCESS) {
726 		cp->cp_sysimgguid = ((uint64_t *)datap)[0];
727 		ddi_prop_free(datap);
728 	} else {
729 		cmn_err(CE_NOTE,
730 		    "Unable to read OBP system-image-guid property");
731 	}
732 #endif
733 
734 	/*
735 	 * Read the property defining the value to use later to override
736 	 * the default SystemImageGUID (in firmware).  If the property is
737 	 * undefined, then return.
738 	 */
739 	status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
740 	    DDI_PROP_DONTPASS, "node-guid", &datap, &nelementsp);
741 	if (status == DDI_PROP_SUCCESS) {
742 		cp->cp_nodeguid = ((uint64_t *)datap)[0];
743 		ddi_prop_free(datap);
744 	} else {
745 		cmn_err(CE_NOTE, "Unable to read OBP node-guid property");
746 	}
747 
748 	/*
749 	 * Using the value for the number of ports (above) read the properties
750 	 * used to later to override the default PortGUIDs for each Tavor port.
751 	 * If either of these properties are undefined, then return.
752 	 */
753 	if (num_ports == TAVOR_NUM_PORTS) {
754 		status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY,
755 		    state->ts_dip, DDI_PROP_DONTPASS, "port-2-guid", &datap,
756 		    &nelementsp);
757 		if (status == DDI_PROP_SUCCESS) {
758 			cp->cp_portguid[1] = ((uint64_t *)datap)[0];
759 			ddi_prop_free(datap);
760 		} else {
761 			cmn_err(CE_NOTE,
762 			    "Unable to read OBP port-2-guid property");
763 		}
764 	}
765 	status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
766 	    DDI_PROP_DONTPASS, "port-1-guid", &datap, &nelementsp);
767 	if (status == DDI_PROP_SUCCESS) {
768 		cp->cp_portguid[0] = ((uint64_t *)datap)[0];
769 		ddi_prop_free(datap);
770 	} else {
771 		cmn_err(CE_NOTE, "Unable to read OBP port-1-guid property");
772 	}
773 }
774 
775 #ifdef __sparc
776 /*
777  * tavor_check_iommu_bypass()
778  *    Context: Only called from attach() path context
779  */
780 static void
781 tavor_check_iommu_bypass(tavor_state_t *state, tavor_cfg_profile_t *cp)
782 {
783 	ddi_dma_handle_t	dmahdl;
784 	ddi_dma_attr_t		dma_attr;
785 	int			status;
786 
787 	tavor_dma_attr_init(&dma_attr);
788 
789 	/* Try mapping for IOMMU bypass (Force Physical) */
790 	dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
791 
792 	/*
793 	 * Call ddi_dma_alloc_handle().  If this returns DDI_DMA_BADATTR then
794 	 * it is not possible to use IOMMU bypass with our PCI bridge parent.
795 	 * For example, certain versions of Tomatillo do not support IOMMU
796 	 * bypass.  Since the function we are in can only be called if iommu
797 	 * bypass was requested in the config profile, we configure for bypass
798 	 * if the ddi_dma_alloc_handle() was successful.  Otherwise, we
799 	 * configure for non-bypass (ie: normal) mapping.
800 	 */
801 	status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr,
802 	    DDI_DMA_SLEEP, NULL, &dmahdl);
803 	if (status == DDI_DMA_BADATTR) {
804 		cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
805 		cp->cp_disable_streaming_on_bypass = 0;
806 	} else {
807 		cp->cp_iommu_bypass = TAVOR_BINDMEM_BYPASS;
808 		cp->cp_disable_streaming_on_bypass =
809 		    tavor_disable_streaming_on_bypass;
810 
811 		if (status == DDI_SUCCESS) {
812 			ddi_dma_free_handle(&dmahdl);
813 		}
814 	}
815 }
816 #endif
817