1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * tavor_cfg.c
29  *    Tavor Configuration Profile Routines
30  *
31  *    Implements the routines necessary for initializing and (later) tearing
32  *    down the list of Tavor configuration information.
33  */
34 
35 #include <sys/types.h>
36 #include <sys/conf.h>
37 #include <sys/ddi.h>
38 #include <sys/sunddi.h>
39 #include <sys/modctl.h>
40 #include <sys/bitmap.h>
41 
42 #include <sys/ib/adapters/tavor/tavor.h>
43 
44 /* Set to enable alternative configurations: 0 = automatic config, 1 = manual */
45 uint32_t tavor_alt_config_enable	= 0;
46 
47 /* Number of supported QPs and their maximum size */
48 uint32_t tavor_log_num_qp		= TAVOR_NUM_QP_SHIFT_128;
49 uint32_t tavor_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT;
50 
51 /* Number of supported SGL per WQE */
52 uint32_t tavor_wqe_max_sgl		= TAVOR_NUM_WQE_SGL;
53 
54 /* Number of supported CQs and their maximum size */
55 uint32_t tavor_log_num_cq		= TAVOR_NUM_CQ_SHIFT_128;
56 uint32_t tavor_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT;
57 
58 /* Select to enable SRQ or not; NOTE: 0 for disabled, 1 for enabled */
59 uint32_t tavor_srq_enable		= 1;
60 
61 /* Select to enable FMR or not; NOTE: 0 for disabled, 1 for enabled */
62 uint32_t tavor_fmr_enable		= 1;
63 
64 /* Number of supported SRQs and their maximum size */
65 uint32_t tavor_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_128;
66 uint32_t tavor_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT;
67 uint32_t tavor_srq_max_sgl		= TAVOR_SRQ_MAX_SGL;
68 
69 /* Default size for all EQs */
70 uint32_t tavor_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
71 
72 /* Number of supported RDB (for incoming RDMA Read/Atomic) */
73 uint32_t tavor_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_128;
74 
75 /*
76  * Number of support multicast groups, number of QP per multicast group, and
77  * the number of entries (from the total number) in the multicast group "hash
78  * table"
79  */
80 uint32_t tavor_log_num_mcg		= TAVOR_NUM_MCG_SHIFT;
81 uint32_t tavor_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG;
82 uint32_t tavor_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT;
83 
84 /*
85  * Number of supported MPTs (memory regions and windows) and their maximum
86  * size.  Also the number of MTT per "MTT segment" (see tavor_mr.h for more
87  * details)
88  */
89 uint32_t tavor_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_128;
90 uint32_t tavor_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_128;
91 uint32_t tavor_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT;
92 
93 /*
94  * Number of remaps allowed for FMR before a sync is required.  This value
95  * determines how many times we can fmr_deregister() before the underlying fmr
96  * framework places the region to wait for an MTT_SYNC operation, cleaning up
97  * the old mappings.
98  */
99 uint32_t tavor_fmr_num_remaps		= TAVOR_FMR_MAX_REMAPS;
100 
101 /*
102  * Number of supported Tavor mailboxes ("In" and "Out") and their maximum
103  * sizes, respectively
104  */
105 uint32_t tavor_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
106 uint32_t tavor_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
107 uint32_t tavor_log_num_intr_inmbox	= TAVOR_NUM_INTR_MAILBOXES_SHIFT;
108 uint32_t tavor_log_num_intr_outmbox	= TAVOR_NUM_INTR_MAILBOXES_SHIFT;
109 uint32_t tavor_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
110 uint32_t tavor_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
111 
112 /* Number of supported UAR pages */
113 uint32_t tavor_log_num_uar		= TAVOR_NUM_UAR_SHIFT;
114 
115 /* Number of supported Protection Domains (PD) */
116 uint32_t tavor_log_num_pd		= TAVOR_NUM_PD_SHIFT;
117 
118 /* Number of supported Address Handles (AH) */
119 uint32_t tavor_log_num_ah		= TAVOR_NUM_AH_SHIFT;
120 
121 /*
122  * Number of total supported PKeys per PKey table (i.e.
123  * per port).  Also the number of SGID per GID table.
124  */
125 uint32_t tavor_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
126 uint32_t tavor_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
127 
128 /* Maximum "responder resources" (in) and "initiator depth" (out) per QP */
129 uint32_t tavor_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
130 uint32_t tavor_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
131 
132 /* Maximum supported MTU and portwidth */
133 uint32_t tavor_max_mtu			= TAVOR_MAX_MTU;
134 uint32_t tavor_max_port_width		= TAVOR_MAX_PORT_WIDTH;
135 
136 /* Number of supported Virtual Lanes (VL) */
137 uint32_t tavor_max_vlcap		= TAVOR_MAX_VLCAP;
138 
139 /* Number of supported ports (1 or 2) */
140 uint32_t tavor_num_ports		= TAVOR_NUM_PORTS;
141 
142 /*
143  * Whether or not to use the built-in (i.e. in firmware) agents for QP0 and
144  * QP1, respectively.
145  */
146 uint32_t tavor_qp0_agents_in_fw		= 1;
147 uint32_t tavor_qp1_agents_in_fw		= 0;
148 
149 /*
150  * Whether DMA mappings should be made with DDI_DMA_STREAMING or with
151  * DDI_DMA_CONSISTENT mode.  Note: 0 for "streaming", 1 for "consistent"
152  */
153 uint32_t tavor_streaming_consistent	= 1;
154 
155 /*
156  * For DMA mappings made with DDI_DMA_CONSISTENT, this flag determines
157  * whether to override the necessity for calls to ddi_dma_sync().
158  */
159 uint32_t tavor_consistent_syncoverride  = 0;
160 
161 /*
162  * Whether DMA mappings should bypass the PCI IOMMU or not.
163  * tavor_iommu_bypass is a global setting for all memory addresses.  However,
164  * if set to BYPASS, memory attempted to be registered for streaming (ie:
165  * NON-COHERENT) will necessarily turn off BYPASS for that registration.  To
166  * instead disable streaming in this situation the
167  * 'tavor_disable_streaming_on_bypass' can be set to 1.  This setting will
168  * change the memory mapping to be implicitly consistent (ie: COHERENT), and
169  * will still perform the iommu BYPASS operation.
170  */
171 uint32_t tavor_iommu_bypass		= 1;
172 uint32_t tavor_disable_streaming_on_bypass = 0;
173 
174 /*
175  * Whether QP work queues should be allocated from system memory or
176  * from Tavor DDR memory.  Note: 0 for system memory, 1 for DDR memory
177  */
178 uint32_t tavor_qp_wq_inddr		= 0;
179 
180 /*
181  * Whether SRQ work queues should be allocated from system memory or
182  * from Tavor DDR memory.  Note: 0 for system memory, 1 for DDR memory
183  */
184 uint32_t tavor_srq_wq_inddr		= 0;
185 
186 /*
187  * Whether Tavor should use MSI (Message Signaled Interrupts), if available.
188  * Note: 0 indicates 'legacy interrupt', 1 indicates MSI (if available)
189  */
190 uint32_t tavor_use_msi_if_avail		= 1;
191 
192 /*
193  * This is a patchable variable that determines the time we will wait after
194  * initiating SW reset before we do our first read from Tavor config space.
195  * If this value is set too small (less than the default 100ms), it is
196  * possible for Tavor hardware to be unready to respond to the config cycle
197  * reads.  This could cause master abort on the PCI bridge.  Note: If
198  * "tavor_sw_reset_delay" is set to zero, then no software reset of the Tavor
199  * device will be attempted.
200  */
201 uint32_t tavor_sw_reset_delay		= TAVOR_SW_RESET_DELAY;
202 
203 /*
204  * These are patchable variables for tavor command polling. The poll_delay is
205  * the number of usec to wait in-between calls to poll the 'go' bit.  The
206  * poll_max is the total number of usec to loop in waiting for the 'go' bit to
207  * clear.
208  */
209 uint32_t tavor_cmd_poll_delay		= TAVOR_CMD_POLL_DELAY;
210 uint32_t tavor_cmd_poll_max		= TAVOR_CMD_POLL_MAX;
211 
212 /*
213  * This is a patchable variable that determines the frequency with which
214  * the AckReq bit will be set in outgoing RC packets.  The AckReq bit will be
215  * set in at least every 2^tavor_qp_ackreq_freq packets (but at least once
216  * per message, i.e. in the last packet).  Tuning this value can increase
217  * IB fabric utilization by cutting down on the number of unnecessary ACKs.
218  */
219 uint32_t tavor_qp_ackreq_freq		= TAVOR_QP_ACKREQ_FREQ;
220 
221 /*
222  * This is a patchable variable that determines the default value for the
223  * maximum number of outstanding split transactions.  The number of
224  * outstanding split transations (i.e. PCI reads) has an affect on device
225  * throughput.  The value here should not be modified as it defines the
226  * default (least common denominator - one (1) PCI read) behavior that is
227  * guaranteed to work, regardless of how the Tavor firmware has been
228  * initialized.  The format for this variable is the same as the corresponding
229  * field in the "PCI-X Command Register".
230  */
231 #ifdef	__sparc
232 /*
233  * Default SPARC platforms to be 1 outstanding PCI read.
234  */
235 int tavor_max_out_splt_trans	= 0;
236 #else
237 /*
238  * Default non-SPARC platforms to be the default as set in tavor firmware
239  * number of outstanding PCI reads.
240  */
241 int tavor_max_out_splt_trans	= -1;
242 #endif
243 
244 /*
245  * This is a patchable variable that determines the default value for the
246  * maximum size of PCI read burst.  This maximum size has an affect on
247  * device throughput.  The value here should not be modified as it defines
248  * the default (least common denominator - 512B read) behavior that is
249  * guaranteed to work, regardless of how the Tavor device has been
250  * initialized.  The format for this variable is the same as the corresponding
251  * field in the "PCI-X Command Register".
252  */
253 #ifdef	__sparc
254 /*
255  * Default SPARC platforms to be 512B read.
256  */
257 int tavor_max_mem_rd_byte_cnt	= 0;
258 static void tavor_check_iommu_bypass(tavor_state_t *state,
259     tavor_cfg_profile_t *cp);
260 #else
261 /*
262  * Default non-SPARC platforms to be the default as set in tavor firmware.
263  *
264  */
265 int tavor_max_mem_rd_byte_cnt	= -1;
266 #endif
267 
268 static void tavor_cfg_wqe_sizes(tavor_cfg_profile_t *cp);
269 static void tavor_cfg_prop_lookup(tavor_state_t *state,
270     tavor_cfg_profile_t *cp);
271 
272 /*
273  * tavor_cfg_profile_init_phase1()
274  *    Context: Only called from attach() path context
275  */
276 int
277 tavor_cfg_profile_init_phase1(tavor_state_t *state)
278 {
279 	tavor_cfg_profile_t	*cp;
280 
281 	TAVOR_TNF_ENTER(tavor_cfg_profile_init_phase1);
282 
283 	/*
284 	 * Allocate space for the configuration profile structure
285 	 */
286 	cp = (tavor_cfg_profile_t *)kmem_zalloc(sizeof (tavor_cfg_profile_t),
287 	    KM_SLEEP);
288 
289 	cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
290 	cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
291 	cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
292 	cp->cp_cmd_poll_delay		= tavor_cmd_poll_delay;
293 	cp->cp_cmd_poll_max		= tavor_cmd_poll_max;
294 	cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
295 	cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
296 	cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
297 	cp->cp_srq_enable		= tavor_srq_enable;
298 	cp->cp_fmr_enable		= tavor_fmr_enable;
299 	cp->cp_fmr_max_remaps		= tavor_fmr_num_remaps;
300 
301 	/*
302 	 * Although most of the configuration is enabled in "phase2" of the
303 	 * cfg_profile_init, we have to setup the OUT mailboxes here, since
304 	 * they are used immediately after this "phase1" completes.  Check for
305 	 * alt_config_enable, and set the values appropriately.  Otherwise, the
306 	 * config profile is setup using the values based on the dimm size.
307 	 * While it is expected that the mailbox size and number will remain
308 	 * the same independent of dimm size, we separate it out here anyway
309 	 * for completeness.
310 	 *
311 	 * We have to setup SRQ settings here because MOD_STAT_CFG must be
312 	 * called before our call to QUERY_DEVLIM.  If SRQ is enabled, then we
313 	 * must enable it in the firmware so that the phase2 settings will have
314 	 * the right device limits.
315 	 */
316 	if (tavor_alt_config_enable) {
317 		cp->cp_log_num_outmbox		= tavor_log_num_outmbox;
318 		cp->cp_log_num_intr_outmbox	= tavor_log_num_intr_outmbox;
319 		cp->cp_log_outmbox_size		= tavor_log_outmbox_size;
320 		cp->cp_log_num_inmbox		= tavor_log_num_inmbox;
321 		cp->cp_log_num_intr_inmbox	= tavor_log_num_intr_inmbox;
322 		cp->cp_log_inmbox_size		= tavor_log_inmbox_size;
323 		cp->cp_log_num_srq		= tavor_log_num_srq;
324 		cp->cp_log_max_srq_sz		= tavor_log_max_srq_sz;
325 
326 	} else if (state->ts_cfg_profile_setting >= TAVOR_DDR_SIZE_256) {
327 		cp->cp_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
328 		cp->cp_log_num_intr_outmbox	=
329 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
330 		cp->cp_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
331 		cp->cp_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
332 		cp->cp_log_num_intr_inmbox	=
333 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
334 		cp->cp_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
335 		cp->cp_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_256;
336 		cp->cp_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT;
337 
338 	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_128) {
339 		cp->cp_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
340 		cp->cp_log_num_intr_outmbox	=
341 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
342 		cp->cp_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
343 		cp->cp_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
344 		cp->cp_log_num_intr_inmbox	=
345 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
346 		cp->cp_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
347 		cp->cp_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_128;
348 		cp->cp_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT;
349 
350 	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_MIN) {
351 		cp->cp_log_num_outmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
352 		cp->cp_log_num_intr_outmbox	=
353 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
354 		cp->cp_log_outmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
355 		cp->cp_log_num_inmbox		= TAVOR_NUM_MAILBOXES_SHIFT;
356 		cp->cp_log_num_intr_inmbox	=
357 		    TAVOR_NUM_INTR_MAILBOXES_SHIFT;
358 		cp->cp_log_inmbox_size		= TAVOR_MBOX_SIZE_SHIFT;
359 		cp->cp_log_num_srq		= TAVOR_NUM_SRQ_SHIFT_MIN;
360 		cp->cp_log_max_srq_sz		= TAVOR_SRQ_SZ_SHIFT_MIN;
361 
362 	} else {
363 		TNF_PROBE_0(tavor_cfg_profile_invalid_dimmsz_fail,
364 		    TAVOR_TNF_ERROR, "");
365 		return (DDI_FAILURE);
366 	}
367 
368 	/*
369 	 * Set default DMA mapping mode.  Ensure consistency of flags
370 	 * with both architecture type and other configuration flags.
371 	 */
372 	if (tavor_streaming_consistent == 0) {
373 #ifdef	__sparc
374 		cp->cp_streaming_consistent = DDI_DMA_STREAMING;
375 
376 		/* Can't do both "streaming" and IOMMU bypass */
377 		if (tavor_iommu_bypass != 0) {
378 			TNF_PROBE_0(tavor_cfg_profile_streamingbypass_fail,
379 			    TAVOR_TNF_ERROR, "");
380 			kmem_free(cp, sizeof (tavor_cfg_profile_t));
381 			return (DDI_FAILURE);
382 		}
383 #else
384 		cp->cp_streaming_consistent = DDI_DMA_CONSISTENT;
385 #endif
386 	} else {
387 		cp->cp_streaming_consistent = DDI_DMA_CONSISTENT;
388 	}
389 
390 	/* Determine whether to override ddi_dma_sync() */
391 	cp->cp_consistent_syncoverride = tavor_consistent_syncoverride;
392 
393 	/* Attach the configuration profile to Tavor softstate */
394 	state->ts_cfg_profile = cp;
395 
396 	TAVOR_TNF_EXIT(tavor_cfg_profile_init_phase1);
397 	return (DDI_SUCCESS);
398 }
399 
400 /*
401  * tavor_cfg_profile_init_phase2()
402  *    Context: Only called from attach() path context
403  */
404 int
405 tavor_cfg_profile_init_phase2(tavor_state_t *state)
406 {
407 	tavor_cfg_profile_t	*cp;
408 
409 	TAVOR_TNF_ENTER(tavor_cfg_profile_init_phase2);
410 
411 	/* Read the configuration profile from Tavor softstate */
412 	cp = state->ts_cfg_profile;
413 
414 	/*
415 	 * Verify the config profile setting.  The 'setting' should already be
416 	 * set, during a call to ddi_dev_regsize() to get the size of DDR
417 	 * memory, or during a fallback to a smaller supported size.  If it is
418 	 * not set, we should not have reached this 'phase2'.  So we assert
419 	 * here.
420 	 */
421 	ASSERT(state->ts_cfg_profile_setting != 0);
422 
423 	/*
424 	 * The automatic configuration override is the
425 	 * 'tavor_alt_config_enable' variable.  If this is set, we no longer
426 	 * use the DIMM size to enable the correct profile.  Instead, all of
427 	 * the tavor config options at the top of this file are used directly.
428 	 *
429 	 * This allows customization for a user who knows what they are doing
430 	 * to set tavor configuration values manually.
431 	 *
432 	 * If this variable is 0, we do automatic config for both 128MB and
433 	 * 256MB DIMM sizes.
434 	 */
435 	if (tavor_alt_config_enable) {
436 		/*
437 		 * Initialize the configuration profile
438 		 */
439 		cp->cp_log_num_qp		= tavor_log_num_qp;
440 		cp->cp_log_max_qp_sz		= tavor_log_max_qp_sz;
441 
442 		/* Determine WQE sizes from requested max SGLs */
443 		tavor_cfg_wqe_sizes(cp);
444 
445 		cp->cp_log_num_cq		= tavor_log_num_cq;
446 		cp->cp_log_max_cq_sz		= tavor_log_max_cq_sz;
447 		cp->cp_log_default_eq_sz	= tavor_log_default_eq_sz;
448 		cp->cp_log_num_rdb		= tavor_log_num_rdb;
449 		cp->cp_log_num_mcg		= tavor_log_num_mcg;
450 		cp->cp_num_qp_per_mcg		= tavor_num_qp_per_mcg;
451 		cp->cp_log_num_mcg_hash		= tavor_log_num_mcg_hash;
452 		cp->cp_log_num_mpt		= tavor_log_num_mpt;
453 		cp->cp_log_max_mrw_sz		= tavor_log_max_mrw_sz;
454 		cp->cp_log_num_mttseg		= tavor_log_num_mttseg;
455 		cp->cp_log_num_uar		= tavor_log_num_uar;
456 		cp->cp_log_num_pd		= tavor_log_num_pd;
457 		cp->cp_log_num_ah		= tavor_log_num_ah;
458 		cp->cp_log_max_pkeytbl		= tavor_log_max_pkeytbl;
459 		cp->cp_log_max_gidtbl		= tavor_log_max_gidtbl;
460 		cp->cp_hca_max_rdma_in_qp	= tavor_hca_max_rdma_in_qp;
461 		cp->cp_hca_max_rdma_out_qp	= tavor_hca_max_rdma_out_qp;
462 		cp->cp_max_mtu			= tavor_max_mtu;
463 		cp->cp_max_port_width		= tavor_max_port_width;
464 		cp->cp_max_vlcap		= tavor_max_vlcap;
465 		cp->cp_num_ports		= tavor_num_ports;
466 		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
467 		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
468 		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
469 		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
470 		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
471 		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
472 
473 	} else if (state->ts_cfg_profile_setting >= TAVOR_DDR_SIZE_256) {
474 		/*
475 		 * Initialize the configuration profile
476 		 */
477 		cp->cp_log_num_qp		= TAVOR_NUM_QP_SHIFT_256;
478 		cp->cp_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT;
479 
480 		/* Determine WQE sizes from requested max SGLs */
481 		tavor_cfg_wqe_sizes(cp);
482 
483 		cp->cp_log_num_cq		= TAVOR_NUM_CQ_SHIFT_256;
484 		cp->cp_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT;
485 		cp->cp_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
486 		cp->cp_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_256;
487 		cp->cp_log_num_mcg		= TAVOR_NUM_MCG_SHIFT;
488 		cp->cp_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG;
489 		cp->cp_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT;
490 		cp->cp_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_256;
491 		cp->cp_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_256;
492 		cp->cp_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT;
493 		cp->cp_log_num_uar		= TAVOR_NUM_UAR_SHIFT;
494 		cp->cp_log_num_pd		= TAVOR_NUM_PD_SHIFT;
495 		cp->cp_log_num_ah		= TAVOR_NUM_AH_SHIFT;
496 		cp->cp_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
497 		cp->cp_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
498 		cp->cp_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
499 		cp->cp_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
500 		cp->cp_max_mtu			= TAVOR_MAX_MTU;
501 		cp->cp_max_port_width		= TAVOR_MAX_PORT_WIDTH;
502 		cp->cp_max_vlcap		= TAVOR_MAX_VLCAP;
503 		cp->cp_num_ports		= TAVOR_NUM_PORTS;
504 		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
505 		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
506 		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
507 		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
508 		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
509 		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
510 
511 	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_128) {
512 		/*
513 		 * Initialize the configuration profile
514 		 */
515 		cp->cp_log_num_qp		= TAVOR_NUM_QP_SHIFT_128;
516 		cp->cp_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT;
517 
518 		/* Determine WQE sizes from requested max SGLs */
519 		tavor_cfg_wqe_sizes(cp);
520 
521 		cp->cp_log_num_cq		= TAVOR_NUM_CQ_SHIFT_128;
522 		cp->cp_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT;
523 		cp->cp_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
524 		cp->cp_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_128;
525 		cp->cp_log_num_mcg		= TAVOR_NUM_MCG_SHIFT;
526 		cp->cp_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG;
527 		cp->cp_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT;
528 		cp->cp_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_128;
529 		cp->cp_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_128;
530 		cp->cp_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT;
531 		cp->cp_log_num_uar		= TAVOR_NUM_UAR_SHIFT;
532 		cp->cp_log_num_pd		= TAVOR_NUM_PD_SHIFT;
533 		cp->cp_log_num_ah		= TAVOR_NUM_AH_SHIFT;
534 		cp->cp_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
535 		cp->cp_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
536 		cp->cp_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
537 		cp->cp_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
538 		cp->cp_max_mtu			= TAVOR_MAX_MTU;
539 		cp->cp_max_port_width		= TAVOR_MAX_PORT_WIDTH;
540 		cp->cp_max_vlcap		= TAVOR_MAX_VLCAP;
541 		cp->cp_num_ports		= TAVOR_NUM_PORTS;
542 		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
543 		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
544 		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
545 		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
546 		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
547 		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
548 
549 	} else if (state->ts_cfg_profile_setting == TAVOR_DDR_SIZE_MIN) {
550 		/*
551 		 * Initialize the configuration profile for minimal footprint.
552 		 */
553 
554 		cp->cp_log_num_qp		= TAVOR_NUM_QP_SHIFT_MIN;
555 		cp->cp_log_max_qp_sz		= TAVOR_QP_SZ_SHIFT_MIN;
556 
557 		/* Determine WQE sizes from requested max SGLs */
558 		tavor_cfg_wqe_sizes(cp);
559 
560 		cp->cp_log_num_cq		= TAVOR_NUM_CQ_SHIFT_MIN;
561 		cp->cp_log_max_cq_sz		= TAVOR_CQ_SZ_SHIFT_MIN;
562 		cp->cp_log_default_eq_sz	= TAVOR_DEFAULT_EQ_SZ_SHIFT;
563 		cp->cp_log_num_rdb		= TAVOR_NUM_RDB_SHIFT_MIN;
564 		cp->cp_log_num_mcg		= TAVOR_NUM_MCG_SHIFT_MIN;
565 		cp->cp_num_qp_per_mcg		= TAVOR_NUM_QP_PER_MCG_MIN;
566 		cp->cp_log_num_mcg_hash		= TAVOR_NUM_MCG_HASH_SHIFT_MIN;
567 		cp->cp_log_num_mpt		= TAVOR_NUM_MPT_SHIFT_MIN;
568 		cp->cp_log_max_mrw_sz		= TAVOR_MAX_MEM_MPT_SHIFT_MIN;
569 		cp->cp_log_num_mttseg		= TAVOR_NUM_MTTSEG_SHIFT_MIN;
570 		cp->cp_log_num_uar		= TAVOR_NUM_UAR_SHIFT_MIN;
571 		cp->cp_log_num_pd		= TAVOR_NUM_PD_SHIFT;
572 		cp->cp_log_num_ah		= TAVOR_NUM_AH_SHIFT_MIN;
573 		cp->cp_log_max_pkeytbl		= TAVOR_NUM_PKEYTBL_SHIFT;
574 		cp->cp_log_max_gidtbl		= TAVOR_NUM_GIDTBL_SHIFT;
575 		cp->cp_hca_max_rdma_in_qp	= TAVOR_HCA_MAX_RDMA_IN_QP;
576 		cp->cp_hca_max_rdma_out_qp	= TAVOR_HCA_MAX_RDMA_OUT_QP;
577 		cp->cp_max_mtu			= TAVOR_MAX_MTU;
578 		cp->cp_max_port_width		= TAVOR_MAX_PORT_WIDTH;
579 		cp->cp_max_vlcap		= TAVOR_MAX_VLCAP;
580 		cp->cp_num_ports		= TAVOR_NUM_PORTS;
581 		cp->cp_qp0_agents_in_fw		= tavor_qp0_agents_in_fw;
582 		cp->cp_qp1_agents_in_fw		= tavor_qp1_agents_in_fw;
583 		cp->cp_sw_reset_delay		= tavor_sw_reset_delay;
584 		cp->cp_ackreq_freq		= tavor_qp_ackreq_freq;
585 		cp->cp_max_out_splt_trans	= tavor_max_out_splt_trans;
586 		cp->cp_max_mem_rd_byte_cnt	= tavor_max_mem_rd_byte_cnt;
587 
588 	} else {
589 		TNF_PROBE_0(tavor_cfg_profile_invalid_dimmsz_fail,
590 		    TAVOR_TNF_ERROR, "");
591 		return (DDI_FAILURE);
592 	}
593 
594 	/*
595 	 * Set IOMMU bypass or not.  Ensure consistency of flags with
596 	 * architecture type.
597 	 */
598 #ifdef __sparc
599 	if (tavor_iommu_bypass == 1) {
600 		tavor_check_iommu_bypass(state, cp);
601 	} else {
602 		cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
603 		cp->cp_disable_streaming_on_bypass = 0;
604 	}
605 #else
606 	cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
607 	cp->cp_disable_streaming_on_bypass = 0;
608 #endif
609 	/* Set whether QP WQEs will be in DDR or not */
610 	cp->cp_qp_wq_inddr = (tavor_qp_wq_inddr == 0) ?
611 	    TAVOR_QUEUE_LOCATION_NORMAL : TAVOR_QUEUE_LOCATION_INDDR;
612 
613 	/* Set whether SRQ WQEs will be in DDR or not */
614 	cp->cp_srq_wq_inddr = (tavor_srq_wq_inddr == 0) ?
615 	    TAVOR_QUEUE_LOCATION_NORMAL : TAVOR_QUEUE_LOCATION_INDDR;
616 
617 	cp->cp_use_msi_if_avail = tavor_use_msi_if_avail;
618 
619 	/* Determine additional configuration from optional properties */
620 	tavor_cfg_prop_lookup(state, cp);
621 
622 	TAVOR_TNF_EXIT(tavor_cfg_profile_init_phase2);
623 	return (DDI_SUCCESS);
624 }
625 
626 
627 /*
628  * tavor_cfg_profile_fini()
629  *    Context: Only called from attach() and/or detach() path contexts
630  */
631 void
632 tavor_cfg_profile_fini(tavor_state_t *state)
633 {
634 	TAVOR_TNF_ENTER(tavor_cfg_profile_fini);
635 
636 	/*
637 	 * Free up the space for configuration profile
638 	 */
639 	kmem_free(state->ts_cfg_profile, sizeof (tavor_cfg_profile_t));
640 
641 	TAVOR_TNF_EXIT(tavor_cfg_profile_fini);
642 }
643 
644 
645 /*
646  * tavor_cfg_wqe_sizes()
647  *    Context: Only called from attach() path context
648  */
649 static void
650 tavor_cfg_wqe_sizes(tavor_cfg_profile_t *cp)
651 {
652 	uint_t	max_size, log2;
653 	uint_t	max_sgl, real_max_sgl;
654 
655 	/*
656 	 * Get the requested maximum number SGL per WQE from the Tavor
657 	 * patchable variable
658 	 */
659 	max_sgl = tavor_wqe_max_sgl;
660 
661 	/*
662 	 * Use requested maximum number of SGL to calculate the max descriptor
663 	 * size (while guaranteeing that the descriptor size is a power-of-2
664 	 * cachelines).  We have to use the calculation for QP1 MLX transport
665 	 * because the possibility that we might need to inline a GRH, along
666 	 * with all the other headers and alignment restrictions, sets the
667 	 * maximum for the number of SGLs that we can advertise support for.
668 	 */
669 	max_size = (TAVOR_QP_WQE_MLX_QP1_HDRS + (max_sgl << 4));
670 	log2 = highbit(max_size);
671 	if ((max_size & (max_size - 1)) == 0) {
672 		log2 = log2 - 1;
673 	}
674 	max_size = (1 << log2);
675 
676 	/*
677 	 * Now clip the maximum descriptor size based on Tavor HW maximum
678 	 */
679 	max_size = min(max_size, TAVOR_QP_WQE_MAX_SIZE);
680 
681 	/*
682 	 * Then use the calculated max descriptor size to determine the "real"
683 	 * maximum SGL (the number beyond which we would roll over to the next
684 	 * power-of-2).
685 	 */
686 	real_max_sgl = (max_size - TAVOR_QP_WQE_MLX_QP1_HDRS) >> 4;
687 
688 	/* Then save away this configuration information */
689 	cp->cp_wqe_max_sgl	= max_sgl;
690 	cp->cp_wqe_real_max_sgl = real_max_sgl;
691 
692 	/* SRQ SGL gets set to it's own patchable variable value */
693 	cp->cp_srq_max_sgl		= tavor_srq_max_sgl;
694 }
695 
696 
697 /*
698  * tavor_cfg_prop_lookup()
699  *    Context: Only called from attach() path context
700  */
701 static void
702 tavor_cfg_prop_lookup(tavor_state_t *state, tavor_cfg_profile_t *cp)
703 {
704 	uint_t		num_ports, nelementsp;
705 	uchar_t		*datap;
706 	int		status;
707 
708 	/*
709 	 * Read the property defining the number of Tavor ports to
710 	 * support.  If the property is undefined or invalid, then return.
711 	 * We return here assuming also that OBP is not supposed to be setting
712 	 * up other properties in this case (eg: HCA plugin cards).  But if
713 	 * this property is valid, then we print out a message for the other
714 	 * properties to show an OBP error.
715 	 */
716 	num_ports = ddi_prop_get_int(DDI_DEV_T_ANY, state->ts_dip,
717 	    DDI_PROP_DONTPASS, "#ports", 0);
718 	if ((num_ports > TAVOR_NUM_PORTS) || (num_ports == 0)) {
719 		return;
720 	}
721 	cp->cp_num_ports   = num_ports;
722 
723 	/*
724 	 * The system image guid is not currently supported in the 1275
725 	 * binding.  So we leave this commented out for now.
726 	 */
727 #ifdef SUPPORTED_IN_1275_BINDING
728 	/*
729 	 * Read the property defining the value to use later to override the
730 	 * default SystemImageGUID (in firmware).  If the property is
731 	 * undefined, then return.
732 	 */
733 	status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
734 	    DDI_PROP_DONTPASS, "system-image-guid", &datap, &nelementsp);
735 	if (status == DDI_PROP_SUCCESS) {
736 		cp->cp_sysimgguid = ((uint64_t *)datap)[0];
737 		ddi_prop_free(datap);
738 	} else {
739 		cmn_err(CE_NOTE,
740 		    "Unable to read OBP system-image-guid property");
741 	}
742 #endif
743 
744 	/*
745 	 * Read the property defining the value to use later to override
746 	 * the default SystemImageGUID (in firmware).  If the property is
747 	 * undefined, then return.
748 	 */
749 	status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
750 	    DDI_PROP_DONTPASS, "node-guid", &datap, &nelementsp);
751 	if (status == DDI_PROP_SUCCESS) {
752 		cp->cp_nodeguid = ((uint64_t *)datap)[0];
753 		ddi_prop_free(datap);
754 	} else {
755 		cmn_err(CE_NOTE, "Unable to read OBP node-guid property");
756 	}
757 
758 	/*
759 	 * Using the value for the number of ports (above) read the properties
760 	 * used to later to override the default PortGUIDs for each Tavor port.
761 	 * If either of these properties are undefined, then return.
762 	 */
763 	if (num_ports == TAVOR_NUM_PORTS) {
764 		status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY,
765 		    state->ts_dip, DDI_PROP_DONTPASS, "port-2-guid", &datap,
766 		    &nelementsp);
767 		if (status == DDI_PROP_SUCCESS) {
768 			cp->cp_portguid[1] = ((uint64_t *)datap)[0];
769 			ddi_prop_free(datap);
770 		} else {
771 			cmn_err(CE_NOTE,
772 			    "Unable to read OBP port-2-guid property");
773 		}
774 	}
775 	status = ddi_prop_lookup_byte_array(DDI_DEV_T_ANY, state->ts_dip,
776 	    DDI_PROP_DONTPASS, "port-1-guid", &datap, &nelementsp);
777 	if (status == DDI_PROP_SUCCESS) {
778 		cp->cp_portguid[0] = ((uint64_t *)datap)[0];
779 		ddi_prop_free(datap);
780 	} else {
781 		cmn_err(CE_NOTE, "Unable to read OBP port-1-guid property");
782 	}
783 }
784 
785 #ifdef __sparc
786 /*
787  * tavor_check_iommu_bypass()
788  *    Context: Only called from attach() path context
789  */
790 static void
791 tavor_check_iommu_bypass(tavor_state_t *state, tavor_cfg_profile_t *cp)
792 {
793 	ddi_dma_handle_t	dmahdl;
794 	ddi_dma_attr_t		dma_attr;
795 	int			status;
796 
797 	tavor_dma_attr_init(&dma_attr);
798 
799 	/* Try mapping for IOMMU bypass (Force Physical) */
800 	dma_attr.dma_attr_flags = DDI_DMA_FORCE_PHYSICAL;
801 
802 	/*
803 	 * Call ddi_dma_alloc_handle().  If this returns DDI_DMA_BADATTR then
804 	 * it is not possible to use IOMMU bypass with our PCI bridge parent.
805 	 * For example, certain versions of Tomatillo do not support IOMMU
806 	 * bypass.  Since the function we are in can only be called if iommu
807 	 * bypass was requested in the config profile, we configure for bypass
808 	 * if the ddi_dma_alloc_handle() was successful.  Otherwise, we
809 	 * configure for non-bypass (ie: normal) mapping.
810 	 */
811 	status = ddi_dma_alloc_handle(state->ts_dip, &dma_attr,
812 	    DDI_DMA_SLEEP, NULL, &dmahdl);
813 	if (status == DDI_DMA_BADATTR) {
814 		cp->cp_iommu_bypass = TAVOR_BINDMEM_NORMAL;
815 		cp->cp_disable_streaming_on_bypass = 0;
816 	} else {
817 		cp->cp_iommu_bypass = TAVOR_BINDMEM_BYPASS;
818 		cp->cp_disable_streaming_on_bypass =
819 		    tavor_disable_streaming_on_bypass;
820 
821 		if (status == DDI_SUCCESS) {
822 			ddi_dma_free_handle(&dmahdl);
823 		}
824 	}
825 }
826 #endif
827