xref: /illumos-gate/usr/src/uts/common/io/ib/adapters/tavor/tavor_cfg.c (revision c7facc54c4abed9e554ff80225311e6b7048d3c9)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * tavor_cfg.c
29  *    Tavor Configuration Profile Routines
30  *
31  *    Implements the routines necessary for initializing and (later) tearing
32  *    down the list of Tavor configuration information.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
40 #include <sys/bitmap.h>
41 
42 #include <sys/ib/adapters/tavor/tavor.h>
43 
44 /* Set to enable alternative configurations: 0 = automatic config, 1 = manual */
45 uint32_t tavor_alt_config_enable	= 0;
46 
47 /* Number of supported QPs and their maximum size */
48 uint32_t tavor_log_num_qp		= TAVOR_NUM_QP_SHIFT_128;
49 uint32_t tavor_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT;
50 
51 /* Number of supported SGL per WQE */
52 uint32_t tavor_wqe_max_sgl		= TAVOR_NUM_WQE_SGL;
53 
54 /* Number of supported CQs and their maximum size */
55 uint32_t tavor_log_num_cq		= TAVOR_NUM_CQ_SHIFT_128;
56 uint32_t tavor_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT;
57 
58 /* Select to enable SRQ or not; NOTE: 0 for disabled, 1 for enabled */
59 uint32_t tavor_srq_enable		= 1;
60 
61 /* Number of supported SRQs and their maximum size */
62 uint32_t tavor_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_128;
63 uint32_t tavor_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT;
64 uint32_t tavor_srq_max_sgl		= TAVOR_SRQ_MAX_SGL;
65 
66 /* Default size for all EQs */
67 uint32_t tavor_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
68 
69 /* Number of supported RDB (for incoming RDMA Read/Atomic) */
70 uint32_t tavor_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_128;
71 
72 /*
73  * Number of support multicast groups, number of QP per multicast group, and
74  * the number of entries (from the total number) in the multicast group "hash
75  * table"
76  */
77 uint32_t tavor_log_num_mcg		= TAVOR_NUM_MCG_SHIFT;
78 uint32_t tavor_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG;
79 uint32_t tavor_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT;
80 
81 /*
82  * Number of supported MPTs (memory regions and windows) and their maximum
83  * size.  Also the number of MTT per "MTT segment" (see tavor_mr.h for more
84  * details)
85  */
86 uint32_t tavor_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_128;
87 uint32_t tavor_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_128;
88 uint32_t tavor_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT;
89 
90 /*
91  * Number of supported Tavor mailboxes ("In" and "Out") and their maximum
92  * sizes, respectively
93  */
94 uint32_t tavor_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
95 uint32_t tavor_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
96 uint32_t tavor_log_num_intr_inmbox	= TAVOR_NUM_INTR_MAILBOXES_SHIFT;
97 uint32_t tavor_log_num_intr_outmbox	= TAVOR_NUM_INTR_MAILBOXES_SHIFT;
98 uint32_t tavor_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
99 uint32_t tavor_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
100 
101 /* Number of supported UAR pages */
102 uint32_t tavor_log_num_uar		= TAVOR_NUM_UAR_SHIFT;
103 
104 /* Number of supported Protection Domains (PD) */
105 uint32_t tavor_log_num_pd		= TAVOR_NUM_PD_SHIFT;
106 
107 /* Number of supported Address Handles (AH) */
108 uint32_t tavor_log_num_ah		= TAVOR_NUM_AH_SHIFT;
109 
110 /*
111  * Number of total supported PKeys per PKey table (i.e.
112  * per port).  Also the number of SGID per GID table.
113  */
114 uint32_t tavor_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
115 uint32_t tavor_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
116 
117 /* Maximum "responder resources" (in) and "initiator depth" (out) per QP */
118 uint32_t tavor_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
119 uint32_t tavor_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
120 
121 /* Maximum supported MTU and portwidth */
122 uint32_t tavor_max_mtu			= TAVOR_MAX_MTU;
123 uint32_t tavor_max_port_width		= TAVOR_MAX_PORT_WIDTH;
124 
125 /* Number of supported Virtual Lanes (VL) */
126 uint32_t tavor_max_vlcap		= TAVOR_MAX_VLCAP;
127 
128 /* Number of supported ports (1 or 2) */
129 uint32_t tavor_num_ports		= TAVOR_NUM_PORTS;
130 
131 /*
132  * Whether or not to use the built-in (i.e. in firmware) agents for QP0 and
133  * QP1, respectively.
134  */
135 uint32_t tavor_qp0_agents_in_fw		= 1;
136 uint32_t tavor_qp1_agents_in_fw		= 0;
137 
138 /*
139  * Whether DMA mappings should be made with DDI_DMA_STREAMING or with
140  * DDI_DMA_CONSISTENT mode.  Note: 0 for "streaming", 1 for "consistent"
141  */
142 uint32_t tavor_streaming_consistent	= 1;
143 
144 /*
145  * For DMA mappings made with DDI_DMA_CONSISTENT, this flag determines
146  * whether to override the necessity for calls to ddi_dma_sync().
147  */
148 uint32_t tavor_consistent_syncoverride  = 0;
149 
150 /*
151  * Whether DMA mappings should bypass the PCI IOMMU or not.
152  * tavor_iommu_bypass is a global setting for all memory addresses.  However,
153  * if set to BYPASS, memory attempted to be registered for streaming (ie:
154  * NON-COHERENT) will necessarily turn off BYPASS for that registration.  To
155  * instead disable streaming in this situation the
156  * 'tavor_disable_streaming_on_bypass' can be set to 1.  This setting will
157  * change the memory mapping to be implicitly consistent (ie: COHERENT), and
158  * will still perform the iommu BYPASS operation.
159  */
160 uint32_t tavor_iommu_bypass		= 1;
161 uint32_t tavor_disable_streaming_on_bypass = 0;
162 
163 /*
164  * Whether QP work queues should be allocated from system memory or
165  * from Tavor DDR memory.  Note: 0 for system memory, 1 for DDR memory
166  */
167 uint32_t tavor_qp_wq_inddr		= 0;
168 
169 /*
170  * Whether SRQ work queues should be allocated from system memory or
171  * from Tavor DDR memory.  Note: 0 for system memory, 1 for DDR memory
172  */
173 uint32_t tavor_srq_wq_inddr		= 0;
174 
175 /*
176  * Whether Tavor should use MSI (Message Signaled Interrupts), if available.
177  * Note: 0 indicates 'legacy interrupt', 1 indicates MSI (if available)
178  */
179 uint32_t tavor_use_msi_if_avail		= 1;
180 
181 /*
182  * This is a patchable variable that determines the time we will wait after
183  * initiating SW reset before we do our first read from Tavor config space.
184  * If this value is set too small (less than the default 100ms), it is
185  * possible for Tavor hardware to be unready to respond to the config cycle
186  * reads.  This could cause master abort on the PCI bridge.  Note: If
187  * "tavor_sw_reset_delay" is set to zero, then no software reset of the Tavor
188  * device will be attempted.
189  */
190 uint32_t tavor_sw_reset_delay		= TAVOR_SW_RESET_DELAY;
191 
192 /*
193  * These are patchable variables for tavor command polling. The poll_delay is
194  * the number of usec to wait in-between calls to poll the 'go' bit.  The
195  * poll_max is the total number of usec to loop in waiting for the 'go' bit to
196  * clear.
197  */
198 uint32_t tavor_cmd_poll_delay		= TAVOR_CMD_POLL_DELAY;
199 uint32_t tavor_cmd_poll_max		= TAVOR_CMD_POLL_MAX;
200 
201 /*
202  * This is a patchable variable that determines the frequency with which
203  * the AckReq bit will be set in outgoing RC packets.  The AckReq bit will be
204  * set in at least every 2^tavor_qp_ackreq_freq packets (but at least once
205  * per message, i.e. in the last packet).  Tuning this value can increase
206  * IB fabric utilization by cutting down on the number of unnecessary ACKs.
207  */
208 uint32_t tavor_qp_ackreq_freq		= TAVOR_QP_ACKREQ_FREQ;
209 
210 /*
211  * This is a patchable variable that determines the default value for the
212  * maximum number of outstanding split transactions.  The number of
213  * outstanding split transations (i.e. PCI reads) has an affect on device
214  * throughput.  The value here should not be modified as it defines the
215  * default (least common denominator - one (1) PCI read) behavior that is
216  * guaranteed to work, regardless of how the Tavor firmware has been
217  * initialized.  The format for this variable is the same as the corresponding
218  * field in the "PCI-X Command Register".
219  */
220 #ifdef	__sparc
221 /*
222  * Default SPARC platforms to be 1 outstanding PCI read.
223  */
224 int tavor_max_out_splt_trans	= 0;
225 #else
226 /*
227  * Default non-SPARC platforms to be the default as set in tavor firmware
228  * number of outstanding PCI reads.
229  */
230 int tavor_max_out_splt_trans	= -1;
231 #endif
232 
233 /*
234  * This is a patchable variable that determines the default value for the
235  * maximum size of PCI read burst.  This maximum size has an affect on
236  * device throughput.  The value here should not be modified as it defines
237  * the default (least common denominator - 512B read) behavior that is
238  * guaranteed to work, regardless of how the Tavor device has been
239  * initialized.  The format for this variable is the same as the corresponding
240  * field in the "PCI-X Command Register".
241  */
242 #ifdef	__sparc
243 /*
244  * Default SPARC platforms to be 512B read.
245  */
246 int tavor_max_mem_rd_byte_cnt	= 0;
247 static void tavor_check_iommu_bypass(tavor_state_t *state,
248     tavor_cfg_profile_t *cp);
249 #else
250 /*
251  * Default non-SPARC platforms to be the default as set in tavor firmware.
252  *
253  */
254 int tavor_max_mem_rd_byte_cnt	= -1;
255 #endif
256 
257 static void tavor_cfg_wqe_sizes(tavor_cfg_profile_t *cp);
258 static void tavor_cfg_prop_lookup(tavor_state_t *state,
259     tavor_cfg_profile_t *cp);
260 
261 /*
262  * tavor_cfg_profile_init_phase1()
263  *    Context: Only called from attach() path context
264  */
265 int
266 tavor_cfg_profile_init_phase1(tavor_state_t *state)
267 {
268 	tavor_cfg_profile_t	*cp;
269 
270 	TAVOR_TNF_ENTER(tavor_cfg_profile_init_phase1);
271 
272 	/*
273 	 * Allocate space for the configuration profile structure
274 	 */
275 	cp = (tavor_cfg_profile_t *)kmem_zalloc(sizeof (tavor_cfg_profile_t),
276 	    KM_SLEEP);
277 
278 	cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
279 	cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
280 	cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
281 	cp->cp_cmd_poll_delay		= tavor_cmd_poll_delay;
282 	cp->cp_cmd_poll_max		= tavor_cmd_poll_max;
283 	cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
284 	cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
285 	cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
286 	cp->cp_srq_enable		= tavor_srq_enable;
287 	cp->cp_fmr_enable		= 0;
288 	cp->cp_fmr_max_remaps		= 0;
289 
290 	/*
291 	 * Although most of the configuration is enabled in "phase2" of the
292 	 * cfg_profile_init, we have to setup the OUT mailboxes here, since
293 	 * they are used immediately after this "phase1" completes.  Check for
294 	 * alt_config_enable, and set the values appropriately.  Otherwise, the
295 	 * config profile is setup using the values based on the dimm size.
296 	 * While it is expected that the mailbox size and number will remain
297 	 * the same independent of dimm size, we separate it out here anyway
298 	 * for completeness.
299 	 *
300 	 * We have to setup SRQ settings here because MOD_STAT_CFG must be
301 	 * called before our call to QUERY_DEVLIM.  If SRQ is enabled, then we
302 	 * must enable it in the firmware so that the phase2 settings will have
303 	 * the right device limits.
304 	 */
305 	if (tavor_alt_config_enable) {
306 		cp->cp_log_num_outmbox		= tavor_log_num_outmbox;
307 		cp->cp_log_num_intr_outmbox	= tavor_log_num_intr_outmbox;
308 		cp->cp_log_outmbox_size		= tavor_log_outmbox_size;
309 		cp->cp_log_num_inmbox		= tavor_log_num_inmbox;
310 		cp->cp_log_num_intr_inmbox	= tavor_log_num_intr_inmbox;
311 		cp->cp_log_inmbox_size		= tavor_log_inmbox_size;
312 		cp->cp_log_num_srq		= tavor_log_num_srq;
313 		cp->cp_log_max_srq_sz		= tavor_log_max_srq_sz;
314 
315 	} else if (state->ts_cfg_profile_setting >= TAVOR_DDR_SIZE_256) {
316 		cp->cp_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
317 		cp->cp_log_num_intr_outmbox	=
318 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
319 		cp->cp_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
320 		cp->cp_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
321 		cp->cp_log_num_intr_inmbox	=
322 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
323 		cp->cp_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
324 		cp->cp_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_256;
325 		cp->cp_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT;
326 
327 	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_128) {
328 		cp->cp_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
329 		cp->cp_log_num_intr_outmbox	=
330 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
331 		cp->cp_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
332 		cp->cp_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
333 		cp->cp_log_num_intr_inmbox	=
334 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
335 		cp->cp_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
336 		cp->cp_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_128;
337 		cp->cp_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT;
338 
339 	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_MIN) {
340 		cp->cp_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
341 		cp->cp_log_num_intr_outmbox	=
342 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
343 		cp->cp_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
344 		cp->cp_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
345 		cp->cp_log_num_intr_inmbox	=
346 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
347 		cp->cp_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
348 		cp->cp_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_MIN;
349 		cp->cp_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT_MIN;
350 
351 	} else {
352 		TNF_PROBE_0(tavor_cfg_profile_invalid_dimmsz_fail,
353 		    TAVOR_TNF_ERROR, "");
354 		return (DDI_FAILURE);
355 	}
356 
357 	/*
358 	 * Set default DMA mapping mode.  Ensure consistency of flags
359 	 * with both architecture type and other configuration flags.
360 	 */
361 	if (tavor_streaming_consistent == 0) {
362 #ifdef	__sparc
363 		cp->cp_streaming_consistent = DDI_DMA_STREAMING;
364 
365 		/* Can't do both "streaming" and IOMMU bypass */
366 		if (tavor_iommu_bypass != 0) {
367 			TNF_PROBE_0(tavor_cfg_profile_streamingbypass_fail,
368 			    TAVOR_TNF_ERROR, "");
369 			kmem_free(cp, sizeof (tavor_cfg_profile_t));
370 			return (DDI_FAILURE);
371 		}
372 #else
373 		cp->cp_streaming_consistent = DDI_DMA_CONSISTENT;
374 #endif
375 	} else {
376 		cp->cp_streaming_consistent = DDI_DMA_CONSISTENT;
377 	}
378 
379 	/* Determine whether to override ddi_dma_sync() */
380 	cp->cp_consistent_syncoverride = tavor_consistent_syncoverride;
381 
382 	/* Attach the configuration profile to Tavor softstate */
383 	state->ts_cfg_profile = cp;
384 
385 	TAVOR_TNF_EXIT(tavor_cfg_profile_init_phase1);
386 	return (DDI_SUCCESS);
387 }
388 
389 /*
390  * tavor_cfg_profile_init_phase2()
391  *    Context: Only called from attach() path context
392  */
393 int
394 tavor_cfg_profile_init_phase2(tavor_state_t *state)
395 {
396 	tavor_cfg_profile_t	*cp;
397 
398 	TAVOR_TNF_ENTER(tavor_cfg_profile_init_phase2);
399 
400 	/* Read the configuration profile from Tavor softstate */
401 	cp = state->ts_cfg_profile;
402 
403 	/*
404 	 * Verify the config profile setting.  The 'setting' should already be
405 	 * set, during a call to ddi_dev_regsize() to get the size of DDR
406 	 * memory, or during a fallback to a smaller supported size.  If it is
407 	 * not set, we should not have reached this 'phase2'.  So we assert
408 	 * here.
409 	 */
410 	ASSERT(state->ts_cfg_profile_setting != 0);
411 
412 	/*
413 	 * The automatic configuration override is the
414 	 * 'tavor_alt_config_enable' variable.  If this is set, we no longer
415 	 * use the DIMM size to enable the correct profile.  Instead, all of
416 	 * the tavor config options at the top of this file are used directly.
417 	 *
418 	 * This allows customization for a user who knows what they are doing
419 	 * to set tavor configuration values manually.
420 	 *
421 	 * If this variable is 0, we do automatic config for both 128MB and
422 	 * 256MB DIMM sizes.
423 	 */
424 	if (tavor_alt_config_enable) {
425 		/*
426 		 * Initialize the configuration profile
427 		 */
428 		cp->cp_log_num_qp		= tavor_log_num_qp;
429 		cp->cp_log_max_qp_sz		= tavor_log_max_qp_sz;
430 
431 		/* Determine WQE sizes from requested max SGLs */
432 		tavor_cfg_wqe_sizes(cp);
433 
434 		cp->cp_log_num_cq		= tavor_log_num_cq;
435 		cp->cp_log_max_cq_sz		= tavor_log_max_cq_sz;
436 		cp->cp_log_default_eq_sz	= tavor_log_default_eq_sz;
437 		cp->cp_log_num_rdb		= tavor_log_num_rdb;
438 		cp->cp_log_num_mcg		= tavor_log_num_mcg;
439 		cp->cp_num_qp_per_mcg		= tavor_num_qp_per_mcg;
440 		cp->cp_log_num_mcg_hash		= tavor_log_num_mcg_hash;
441 		cp->cp_log_num_mpt		= tavor_log_num_mpt;
442 		cp->cp_log_max_mrw_sz		= tavor_log_max_mrw_sz;
443 		cp->cp_log_num_mttseg		= tavor_log_num_mttseg;
444 		cp->cp_log_num_uar		= tavor_log_num_uar;
445 		cp->cp_log_num_pd		= tavor_log_num_pd;
446 		cp->cp_log_num_ah		= tavor_log_num_ah;
447 		cp->cp_log_max_pkeytbl		= tavor_log_max_pkeytbl;
448 		cp->cp_log_max_gidtbl		= tavor_log_max_gidtbl;
449 		cp->cp_hca_max_rdma_in_qp	= tavor_hca_max_rdma_in_qp;
450 		cp->cp_hca_max_rdma_out_qp	= tavor_hca_max_rdma_out_qp;
451 		cp->cp_max_mtu			= tavor_max_mtu;
452 		cp->cp_max_port_width		= tavor_max_port_width;
453 		cp->cp_max_vlcap		= tavor_max_vlcap;
454 		cp->cp_num_ports		= tavor_num_ports;
455 		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
456 		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
457 		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
458 		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
459 		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
460 		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
461 
462 	} else if (state->ts_cfg_profile_setting >= TAVOR_DDR_SIZE_256) {
463 		/*
464 		 * Initialize the configuration profile
465 		 */
466 		cp->cp_log_num_qp		= TAVOR_NUM_QP_SHIFT_256;
467 		cp->cp_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT;
468 
469 		/* Determine WQE sizes from requested max SGLs */
470 		tavor_cfg_wqe_sizes(cp);
471 
472 		cp->cp_log_num_cq		= TAVOR_NUM_CQ_SHIFT_256;
473 		cp->cp_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT;
474 		cp->cp_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
475 		cp->cp_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_256;
476 		cp->cp_log_num_mcg		= TAVOR_NUM_MCG_SHIFT;
477 		cp->cp_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG;
478 		cp->cp_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT;
479 		cp->cp_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_256;
480 		cp->cp_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_256;
481 		cp->cp_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT;
482 		cp->cp_log_num_uar		= TAVOR_NUM_UAR_SHIFT;
483 		cp->cp_log_num_pd		= TAVOR_NUM_PD_SHIFT;
484 		cp->cp_log_num_ah		= TAVOR_NUM_AH_SHIFT;
485 		cp->cp_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
486 		cp->cp_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
487 		cp->cp_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
488 		cp->cp_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
489 		cp->cp_max_mtu			= TAVOR_MAX_MTU;
490 		cp->cp_max_port_width		= TAVOR_MAX_PORT_WIDTH;
491 		cp->cp_max_vlcap		= TAVOR_MAX_VLCAP;
492 		cp->cp_num_ports		= TAVOR_NUM_PORTS;
493 		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
494 		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
495 		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
496 		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
497 		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
498 		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
499 
500 	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_128) {
501 		/*
502 		 * Initialize the configuration profile
503 		 */
504 		cp->cp_log_num_qp		= TAVOR_NUM_QP_SHIFT_128;
505 		cp->cp_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT;
506 
507 		/* Determine WQE sizes from requested max SGLs */
508 		tavor_cfg_wqe_sizes(cp);
509 
510 		cp->cp_log_num_cq		= TAVOR_NUM_CQ_SHIFT_128;
511 		cp->cp_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT;
512 		cp->cp_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
513 		cp->cp_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_128;
514 		cp->cp_log_num_mcg		= TAVOR_NUM_MCG_SHIFT;
515 		cp->cp_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG;
516 		cp->cp_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT;
517 		cp->cp_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_128;
518 		cp->cp_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_128;
519 		cp->cp_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT;
520 		cp->cp_log_num_uar		= TAVOR_NUM_UAR_SHIFT;
521 		cp->cp_log_num_pd		= TAVOR_NUM_PD_SHIFT;
522 		cp->cp_log_num_ah		= TAVOR_NUM_AH_SHIFT;
523 		cp->cp_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
524 		cp->cp_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
525 		cp->cp_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
526 		cp->cp_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
527 		cp->cp_max_mtu			= TAVOR_MAX_MTU;
528 		cp->cp_max_port_width		= TAVOR_MAX_PORT_WIDTH;
529 		cp->cp_max_vlcap		= TAVOR_MAX_VLCAP;
530 		cp->cp_num_ports		= TAVOR_NUM_PORTS;
531 		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
532 		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
533 		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
534 		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
535 		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
536 		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
537 
538 	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_MIN) {
539 		/*
540 		 * Initialize the configuration profile for minimal footprint.
541 		 */
542 
543 		cp->cp_log_num_qp		= TAVOR_NUM_QP_SHIFT_MIN;
544 		cp->cp_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT_MIN;
545 
546 		/* Determine WQE sizes from requested max SGLs */
547 		tavor_cfg_wqe_sizes(cp);
548 
549 		cp->cp_log_num_cq		= TAVOR_NUM_CQ_SHIFT_MIN;
550 		cp->cp_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT_MIN;
551 		cp->cp_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
552 		cp->cp_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_MIN;
553 		cp->cp_log_num_mcg		= TAVOR_NUM_MCG_SHIFT_MIN;
554 		cp->cp_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG_MIN;
555 		cp->cp_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT_MIN;
556 		cp->cp_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_MIN;
557 		cp->cp_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_MIN;
558 		cp->cp_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT_MIN;
559 		cp->cp_log_num_uar		= TAVOR_NUM_UAR_SHIFT_MIN;
560 		cp->cp_log_num_pd		= TAVOR_NUM_PD_SHIFT;
561 		cp->cp_log_num_ah		= TAVOR_NUM_AH_SHIFT_MIN;
562 		cp->cp_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
563 		cp->cp_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
564 		cp->cp_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
565 		cp->cp_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
566 		cp->cp_max_mtu			= TAVOR_MAX_MTU;
567 		cp->cp_max_port_width		= TAVOR_MAX_PORT_WIDTH;
568 		cp->cp_max_vlcap		= TAVOR_MAX_VLCAP;
569 		cp->cp_num_ports		= TAVOR_NUM_PORTS;
570 		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
571 		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
572 		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
573 		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
574 		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
575 		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
576 
577 	} else {
578 		TNF_PROBE_0(tavor_cfg_profile_invalid_dimmsz_fail,
579 		    TAVOR_TNF_ERROR, "");
580 		return (DDI_FAILURE);
581 	}
582 
583 	/*
584 	 * Set IOMMU bypass or not.  Ensure consistency of flags with
585 	 * architecture type.
586 	 */
587 #ifdef __sparc
588 	if (tavor_iommu_bypass == 1) {
589 		tavor_check_iommu_bypass(state, cp);
590 	} else {
591 		cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
592 		cp->cp_disable_streaming_on_bypass = 0;
593 	}
594 #else
595 	cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
596 	cp->cp_disable_streaming_on_bypass = 0;
597 #endif
598 	/* Set whether QP WQEs will be in DDR or not */
599 	cp->cp_qp_wq_inddr = (tavor_qp_wq_inddr == 0) ?
600 	    TAVOR_QUEUE_LOCATION_NORMAL : TAVOR_QUEUE_LOCATION_INDDR;
601 
602 	/* Set whether SRQ WQEs will be in DDR or not */
603 	cp->cp_srq_wq_inddr = (tavor_srq_wq_inddr == 0) ?
604 	    TAVOR_QUEUE_LOCATION_NORMAL : TAVOR_QUEUE_LOCATION_INDDR;
605 
606 	cp->cp_use_msi_if_avail = tavor_use_msi_if_avail;
607 
608 	/* Determine additional configuration from optional properties */
609 	tavor_cfg_prop_lookup(state, cp);
610 
611 	TAVOR_TNF_EXIT(tavor_cfg_profile_init_phase2);
612 	return (DDI_SUCCESS);
613 }
614 
615 
616 /*
617  * tavor_cfg_profile_fini()
618  *    Context: Only called from attach() and/or detach() path contexts
619  */
620 void
621 tavor_cfg_profile_fini(tavor_state_t *state)
622 {
623 	TAVOR_TNF_ENTER(tavor_cfg_profile_fini);
624 
625 	/*
626 	 * Free up the space for configuration profile
627 	 */
628 	kmem_free(state->ts_cfg_profile, sizeof (tavor_cfg_profile_t));
629 
630 	TAVOR_TNF_EXIT(tavor_cfg_profile_fini);
631 }
632 
633 
634 /*
635  * tavor_cfg_wqe_sizes()
636  *    Context: Only called from attach() path context
637  */
638 static void
639 tavor_cfg_wqe_sizes(tavor_cfg_profile_t *cp)
640 {
641 	uint_t	max_size, log2;
642 	uint_t	max_sgl, real_max_sgl;
643 
644 	/*
645 	 * Get the requested maximum number SGL per WQE from the Tavor
646 	 * patchable variable
647 	 */
648 	max_sgl = tavor_wqe_max_sgl;
649 
650 	/*
651 	 * Use requested maximum number of SGL to calculate the max descriptor
652 	 * size (while guaranteeing that the descriptor size is a power-of-2
653 	 * cachelines).  We have to use the calculation for QP1 MLX transport
654 	 * because the possibility that we might need to inline a GRH, along
655 	 * with all the other headers and alignment restrictions, sets the
656 	 * maximum for the number of SGLs that we can advertise support for.
657 	 */
658 	max_size = (TAVOR_QP_WQE_MLX_QP1_HDRS + (max_sgl << 4));
659 	log2 = highbit(max_size);
660 	if ((max_size & (max_size - 1)) == 0) {
661 		log2 = log2 - 1;
662 	}
663 	max_size = (1 << log2);
664 
665 	/*
666 	 * Now clip the maximum descriptor size based on Tavor HW maximum
667 	 */
668 	max_size = min(max_size, TAVOR_QP_WQE_MAX_SIZE);
669 
670 	/*
671 	 * Then use the calculated max descriptor size to determine the "real"
672 	 * maximum SGL (the number beyond which we would roll over to the next
673 	 * power-of-2).
674 	 */
675 	real_max_sgl = (max_size - TAVOR_QP_WQE_MLX_QP1_HDRS) >> 4;
676 
677 	/* Then save away this configuration information */
678 	cp->cp_wqe_max_sgl	= max_sgl;
679 	cp->cp_wqe_real_max_sgl = real_max_sgl;
680 
681 	/* SRQ SGL gets set to it's own patchable variable value */
682 	cp->cp_srq_max_sgl		= tavor_srq_max_sgl;
683 }
684 
685 
686 /*
687  * tavor_cfg_prop_lookup()
688  *    Context: Only called from attach() path context
689  */
690 static void
691 tavor_cfg_prop_lookup(tavor_state_t *state, tavor_cfg_profile_t *cp)
692 {
693 	uint_t		num_ports, nelementsp;
694 	uchar_t		*datap;
695 	int		status;
696 
697 	/*
698 	 * Read the property defining the number of Tavor ports to
699 	 * support.  If the property is undefined or invalid, then return.
700 	 * We return here assuming also that OBP is not supposed to be setting
701 	 * up other properties in this case (eg: HCA plugin cards).  But if
702 	 * this property is valid, then we print out a message for the other
703 	 * properties to show an OBP error.
704 	 */
705 	num_ports = ddi_prop_get_int(DDI_DEV_T_ANY, state->ts_dip,
706 	    DDI_PROP_DONTPASS, "#ports", 0);
707 	if ((num_ports > TAVOR_NUM_PORTS) || (num_ports == 0)) {
708 		return;
709 	}
710 	cp->cp_num_ports   = num_ports;
711 
712 	/*
713 	 * The system image guid is not currently supported in the 1275
714 	 * binding.  So we leave this commented out for now.
715 	 */
716 #ifdef SUPPORTED_IN_1275_BINDING
717 	/*
718 	 * Read the property defining the value to use later to override the
719 	 * default SystemImageGUID (in firmware).  If the property is
720 	 * undefined, then return.
721 	 */
722 	status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
723 	    DDI_PROP_DONTPASS, "system-image-guid", &datap, &nelementsp);
724 	if (status == DDI_PROP_SUCCESS) {
725 		cp->cp_sysimgguid = ((uint64_t *)datap)[0];
726 		ddi_prop_free(datap);
727 	} else {
728 		cmn_err(CE_NOTE,
729 		    "Unable to read OBP system-image-guid property");
730 	}
731 #endif
732 
733 	/*
734 	 * Read the property defining the value to use later to override
735 	 * the default SystemImageGUID (in firmware).  If the property is
736 	 * undefined, then return.
737 	 */
738 	status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
739 	    DDI_PROP_DONTPASS, "node-guid", &datap, &nelementsp);
740 	if (status == DDI_PROP_SUCCESS) {
741 		cp->cp_nodeguid = ((uint64_t *)datap)[0];
742 		ddi_prop_free(datap);
743 	} else {
744 		cmn_err(CE_NOTE, "Unable to read OBP node-guid property");
745 	}
746 
747 	/*
748 	 * Using the value for the number of ports (above) read the properties
749 	 * used to later to override the default PortGUIDs for each Tavor port.
750 	 * If either of these properties are undefined, then return.
751 	 */
752 	if (num_ports == TAVOR_NUM_PORTS) {
753 		status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY,
754 		    state->ts_dip, DDI_PROP_DONTPASS, "port-2-guid", &datap,
755 		    &nelementsp);
756 		if (status == DDI_PROP_SUCCESS) {
757 			cp->cp_portguid[1] = ((uint64_t *)datap)[0];
758 			ddi_prop_free(datap);
759 		} else {
760 			cmn_err(CE_NOTE,
761 			    "Unable to read OBP port-2-guid property");
762 		}
763 	}
764 	status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
765 	    DDI_PROP_DONTPASS, "port-1-guid", &datap, &nelementsp);
766 	if (status == DDI_PROP_SUCCESS) {
767 		cp->cp_portguid[0] = ((uint64_t *)datap)[0];
768 		ddi_prop_free(datap);
769 	} else {
770 		cmn_err(CE_NOTE, "Unable to read OBP port-1-guid property");
771 	}
772 }
773 
774 #ifdef __sparc
775 /*
776  * tavor_check_iommu_bypass()
777  *    Context: Only called from attach() path context
778  */
779 static void
780 tavor_check_iommu_bypass(tavor_state_t *state, tavor_cfg_profile_t *cp)
781 {
782 	ddi_dma_handle_t	dmahdl;
783 	ddi_dma_attr_t		dma_attr;
784 	int			status;
785 
786 	tavor_dma_attr_init(&dma_attr);
787 
788 	/* Try mapping for IOMMU bypass (Force Physical) */
789 	dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
790 
791 	/*
792 	 * Call ddi_dma_alloc_handle().  If this returns DDI_DMA_BADATTR then
793 	 * it is not possible to use IOMMU bypass with our PCI bridge parent.
794 	 * For example, certain versions of Tomatillo do not support IOMMU
795 	 * bypass.  Since the function we are in can only be called if iommu
796 	 * bypass was requested in the config profile, we configure for bypass
797 	 * if the ddi_dma_alloc_handle() was successful.  Otherwise, we
798 	 * configure for non-bypass (ie: normal) mapping.
799 	 */
800 	status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr,
801 	    DDI_DMA_SLEEP, NULL, &dmahdl);
802 	if (status == DDI_DMA_BADATTR) {
803 		cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
804 		cp->cp_disable_streaming_on_bypass = 0;
805 	} else {
806 		cp->cp_iommu_bypass = TAVOR_BINDMEM_BYPASS;
807 		cp->cp_disable_streaming_on_bypass =
808 		    tavor_disable_streaming_on_bypass;
809 
810 		if (status == DDI_SUCCESS) {
811 			ddi_dma_free_handle(&dmahdl);
812 		}
813 	}
814 }
815 #endif
816