1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * tavor_stats.c
29  *    Tavor IB Performance Statistics routines
30  *
31  *    Implements all the routines necessary for setting up, querying, and
32  *    (later) tearing down all the kstats necessary for implementing to
33  *    the interfaces necessary to provide busstat(8) access.
34  */
35 
36 #include <sys/types.h>
37 #include <sys/conf.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40 #include <sys/modctl.h>
41 
42 #include <sys/ib/adapters/tavor/tavor.h>
43 
44 static kstat_t *tavor_kstat_picN_create(tavor_state_t *state, int num_pic,
45     int num_evt, tavor_ks_mask_t *ev_array);
46 static kstat_t *tavor_kstat_cntr_create(tavor_state_t *state, int num_pic,
47     int (*update)(kstat_t *, int));
48 static int tavor_kstat_cntr_update(kstat_t *ksp, int rw);
49 
50 void tavor_kstat_perfcntr64_create(tavor_state_t *state, uint_t port_num);
51 static int tavor_kstat_perfcntr64_read(tavor_state_t *state, uint_t port,
52     int reset);
53 static void tavor_kstat_perfcntr64_thread_exit(tavor_ks_info_t *ksi);
54 static int tavor_kstat_perfcntr64_update(kstat_t *ksp, int rw);
55 
56 /*
57  * Tavor IB Performance Events structure
58  *    This structure is read-only and is used to setup the individual kstats
59  *    and to initialize the tki_ib_perfcnt[] array for each Tavor instance.
60  */
61 tavor_ks_mask_t tavor_ib_perfcnt_list[TAVOR_CNTR_NUMENTRIES] = {
62 	{"port_xmit_data", TAVOR_HW_PMEG_PORTXMITDATA_OFFSET,
63 	    0, 0xFFFFFFFF, 0, 0},
64 	{"port_recv_data", TAVOR_HW_PMEG_PORTRECVDATA_OFFSET,
65 	    0, 0xFFFFFFFF, 0, 0},
66 	{"port_xmit_pkts", TAVOR_HW_PMEG_PORTXMITPKTS_OFFSET,
67 	    0, 0xFFFFFFFF, 0, 0},
68 	{"port_recv_pkts", TAVOR_HW_PMEG_PORTRECVPKTS_OFFSET,
69 	    0, 0xFFFFFFFF, 0, 0},
70 	{"port_recv_err", TAVOR_HW_PMEG_PORTRECVERR_OFFSET,
71 	    0, 0xFFFF, 0, 0},
72 	{"port_xmit_discards", TAVOR_HW_PMEG_PORTXMITDISCARD_OFFSET,
73 	    0, 0xFFFF, 0, 0},
74 	{"vl15_dropped", TAVOR_HW_PMEG_VL15DROPPED_OFFSET,
75 	    0, 0xFFFF, 0, 0},
76 	{"port_xmit_wait", TAVOR_HW_PMEG_PORTXMITWAIT_OFFSET,
77 	    0, 0xFFFFFFFF, 0, 0},
78 	{"port_recv_remote_phys_err", TAVOR_HW_PMEG_PORTRECVREMPHYSERR_OFFSET,
79 	    0, 0xFFFF, 0, 0},
80 	{"port_xmit_constraint_err", TAVOR_HW_PMEG_PORTXMITCONSTERR_OFFSET,
81 	    0, 0xFF, 0, 0},
82 	{"port_recv_constraint_err", TAVOR_HW_PMEG_PORTRECVCONSTERR_OFFSET,
83 	    0, 0xFF, 0, 0},
84 	{"symbol_err_counter", TAVOR_HW_PMEG_SYMBOLERRCNT_OFFSET,
85 	    0, 0xFFFF, 0, 0},
86 	{"link_err_recovery_cnt", TAVOR_HW_PMEG_LINKERRRECOVERCNT_OFFSET,
87 	    0, 0xFFFF, 0, 0},
88 	{"link_downed_cnt", TAVOR_HW_PMEG_LINKDOWNEDCNT_OFFSET,
89 	    16, 0xFFFF, 0, 0},
90 	{"excessive_buffer_overruns", TAVOR_HW_PMEG_EXCESSBUFOVERRUN_OFFSET,
91 	    0, 0xF, 0, 0},
92 	{"local_link_integrity_err", TAVOR_HW_PMEG_LOCALLINKINTERR_OFFSET,
93 	    8, 0xF, 0, 0},
94 	{"clear_pic", 0, 0, 0, 0}
95 };
96 
97 /*
98  * Return the maximum of (x) and (y)
99  */
100 #define	MAX(x, y)	(((x) > (y)) ? (x) : (y))
101 
102 /*
103  * Set (x) to the maximum of (x) and (y)
104  */
105 #define	SET_TO_MAX(x, y)	\
106 {				\
107 	if ((x) < (y))		\
108 		(x) = (y);	\
109 }
110 
111 /*
112  * tavor_kstat_init()
113  *    Context: Only called from attach() path context
114  */
115 int
116 tavor_kstat_init(tavor_state_t *state)
117 {
118 	tavor_ks_info_t		*ksi;
119 	uint_t			numports;
120 	int			i;
121 
122 	TAVOR_TNF_ENTER(tavor_kstat_init);
123 
124 	/* Allocate a kstat info structure */
125 	ksi = (tavor_ks_info_t *)kmem_zalloc(sizeof (tavor_ks_info_t),
126 	    KM_SLEEP);
127 	if (ksi == NULL) {
128 		TNF_PROBE_0(tavor_kstat_init_kma_fail, TAVOR_TNF_ERROR, "");
129 		TAVOR_TNF_EXIT(tavor_kstat_init);
130 		return (DDI_FAILURE);
131 	}
132 	state->ts_ks_info = ksi;
133 
134 	/*
135 	 * Create as many "pic" and perfcntr64 kstats as we have IB ports.
136 	 * Enable all of the events specified in the "tavor_ib_perfcnt_list"
137 	 * structure.
138 	 */
139 	numports = state->ts_cfg_profile->cp_num_ports;
140 	for (i = 0; i < numports; i++) {
141 		ksi->tki_picN_ksp[i] = tavor_kstat_picN_create(state, i,
142 		    TAVOR_CNTR_NUMENTRIES, tavor_ib_perfcnt_list);
143 		if (ksi->tki_picN_ksp[i] == NULL) {
144 			TNF_PROBE_0(tavor_kstat_init_picN_fail,
145 			    TAVOR_TNF_ERROR, "");
146 			goto kstat_init_fail;
147 		}
148 
149 		tavor_kstat_perfcntr64_create(state, i + 1);
150 		if (ksi->tki_perfcntr64[i].tki64_ksp == NULL) {
151 			goto kstat_init_fail;
152 		}
153 	}
154 
155 	/* Create the "counters" kstat too */
156 	ksi->tki_cntr_ksp = tavor_kstat_cntr_create(state, numports,
157 	    tavor_kstat_cntr_update);
158 	if (ksi->tki_cntr_ksp == NULL) {
159 		TNF_PROBE_0(tavor_kstat_init_cntr_fail, TAVOR_TNF_ERROR, "");
160 		goto kstat_init_fail;
161 	}
162 
163 	/* Initialize the control register and initial counter values */
164 	ksi->tki_pcr  = 0;
165 	ksi->tki_pic0 = 0;
166 	ksi->tki_pic1 = 0;
167 
168 	/*
169 	 * Initialize the Tavor tki_ib_perfcnt[] array values using the
170 	 * default values in tavor_ib_perfcnt_list[]
171 	 */
172 	for (i = 0; i < TAVOR_CNTR_NUMENTRIES; i++) {
173 		ksi->tki_ib_perfcnt[i] = tavor_ib_perfcnt_list[i];
174 	}
175 
176 	mutex_init(&ksi->tki_perfcntr64_lock, NULL, MUTEX_DRIVER, NULL);
177 	cv_init(&ksi->tki_perfcntr64_cv, NULL, CV_DRIVER, NULL);
178 
179 	TAVOR_TNF_EXIT(tavor_kstat_init);
180 	return (DDI_SUCCESS);
181 
182 
183 kstat_init_fail:
184 
185 	/* Delete all the previously created kstats */
186 	if (ksi->tki_cntr_ksp != NULL) {
187 		kstat_delete(ksi->tki_cntr_ksp);
188 	}
189 	for (i = 0; i < numports; i++) {
190 		if (ksi->tki_picN_ksp[i] != NULL) {
191 			kstat_delete(ksi->tki_picN_ksp[i]);
192 		}
193 		if (ksi->tki_perfcntr64[i].tki64_ksp != NULL) {
194 			kstat_delete(ksi->tki_perfcntr64[i].tki64_ksp);
195 		}
196 	}
197 
198 	/* Free the kstat info structure */
199 	kmem_free(ksi, sizeof (tavor_ks_info_t));
200 
201 	TAVOR_TNF_EXIT(tavor_kstat_init);
202 	return (DDI_FAILURE);
203 }
204 
205 
206 /*
207  * tavor_kstat_init()
208  *    Context: Only called from attach() and/or detach() path contexts
209  */
210 void
211 tavor_kstat_fini(tavor_state_t *state)
212 {
213 	tavor_ks_info_t		*ksi;
214 	uint_t			numports;
215 	int			i;
216 
217 	TAVOR_TNF_ENTER(tavor_kstat_fini);
218 
219 	/* Get pointer to kstat info */
220 	ksi = state->ts_ks_info;
221 
222 	/*
223 	 * Signal the perfcntr64_update_thread to exit and wait until the
224 	 * thread exits.
225 	 */
226 	mutex_enter(&ksi->tki_perfcntr64_lock);
227 	tavor_kstat_perfcntr64_thread_exit(ksi);
228 	mutex_exit(&ksi->tki_perfcntr64_lock);
229 
230 	/* Delete all the "pic" and perfcntr64 kstats (one per port) */
231 	numports = state->ts_cfg_profile->cp_num_ports;
232 	for (i = 0; i < numports; i++) {
233 		if (ksi->tki_picN_ksp[i] != NULL) {
234 			kstat_delete(ksi->tki_picN_ksp[i]);
235 		}
236 		if (ksi->tki_perfcntr64[i].tki64_ksp != NULL) {
237 			kstat_delete(ksi->tki_perfcntr64[i].tki64_ksp);
238 		}
239 	}
240 
241 	/* Delete the "counter" kstats (one per port) */
242 	kstat_delete(ksi->tki_cntr_ksp);
243 
244 	cv_destroy(&ksi->tki_perfcntr64_cv);
245 	mutex_destroy(&ksi->tki_perfcntr64_lock);
246 
247 	/* Free the kstat info structure */
248 	kmem_free(ksi, sizeof (tavor_ks_info_t));
249 
250 	TAVOR_TNF_EXIT(tavor_kstat_fini);
251 }
252 
253 
254 /*
255  * tavor_kstat_picN_create()
256  *    Context: Only called from attach() path context
257  */
258 static kstat_t *
259 tavor_kstat_picN_create(tavor_state_t *state, int num_pic, int num_evt,
260     tavor_ks_mask_t *ev_array)
261 {
262 	kstat_t			*picN_ksp;
263 	struct kstat_named	*pic_named_data;
264 	int			drv_instance, i;
265 	char			*drv_name;
266 	char			pic_name[16];
267 
268 	TAVOR_TNF_ENTER(tavor_kstat_picN_create);
269 
270 	/*
271 	 * Create the "picN" kstat.  In the steps, below we will attach
272 	 * all of our named event types to it.
273 	 */
274 	drv_name = (char *)ddi_driver_name(state->ts_dip);
275 	drv_instance = ddi_get_instance(state->ts_dip);
276 	(void) sprintf(pic_name, "pic%d", num_pic);
277 	picN_ksp = kstat_create(drv_name, drv_instance, pic_name, "bus",
278 	    KSTAT_TYPE_NAMED, num_evt, 0);
279 	if (picN_ksp == NULL) {
280 		TNF_PROBE_0(tavor_kstat_picN_create_kstat_fail,
281 		    TAVOR_TNF_ERROR, "");
282 		TAVOR_TNF_EXIT(tavor_kstat_picN_create);
283 		return (NULL);
284 	}
285 	pic_named_data = (struct kstat_named *)(picN_ksp->ks_data);
286 
287 	/*
288 	 * Write event names and their associated pcr masks. The last entry
289 	 * in the array (clear_pic) is added separately below (as its pic
290 	 * value must be inverted).
291 	 */
292 	for (i = 0; i < num_evt - 1; i++) {
293 		pic_named_data[i].value.ui64 =
294 		    ((uint64_t)i << (num_pic * TAVOR_CNTR_SIZE));
295 		kstat_named_init(&pic_named_data[i], ev_array[i].ks_evt_name,
296 		    KSTAT_DATA_UINT64);
297 	}
298 
299 	/* Add the "clear_pic" entry */
300 	pic_named_data[i].value.ui64 =
301 	    ~((uint64_t)TAVOR_CNTR_MASK << (num_pic * TAVOR_CNTR_SIZE));
302 	kstat_named_init(&pic_named_data[i], ev_array[i].ks_evt_name,
303 	    KSTAT_DATA_UINT64);
304 
305 	/* Install the kstat */
306 	kstat_install(picN_ksp);
307 
308 	TAVOR_TNF_EXIT(tavor_kstat_picN_create);
309 	return (picN_ksp);
310 }
311 
312 
313 /*
314  * tavor_kstat_cntr_create()
315  *    Context: Only called from attach() path context
316  */
317 static kstat_t *
318 tavor_kstat_cntr_create(tavor_state_t *state, int num_pic,
319     int (*update)(kstat_t *, int))
320 {
321 	struct kstat		*cntr_ksp;
322 	struct kstat_named	*cntr_named_data;
323 	int			drv_instance, i;
324 	char			*drv_name;
325 	char			pic_str[16];
326 
327 	TAVOR_TNF_ENTER(tavor_kstat_cntr_create);
328 
329 	/*
330 	 * Create the "counters" kstat.  In the steps, below we will attach
331 	 * all of our "pic" to it.   Note:  The size of this kstat is
332 	 * num_pic + 1 because it also contains the "%pcr".
333 	 */
334 	drv_name = (char *)ddi_driver_name(state->ts_dip);
335 	drv_instance = ddi_get_instance(state->ts_dip);
336 	cntr_ksp = kstat_create(drv_name, drv_instance, "counters", "bus",
337 	    KSTAT_TYPE_NAMED, num_pic + 1, KSTAT_FLAG_WRITABLE);
338 	if (cntr_ksp == NULL) {
339 		TNF_PROBE_0(tavor_kstat_picN_create_kstat_fail,
340 		    TAVOR_TNF_ERROR, "");
341 		TAVOR_TNF_EXIT(tavor_kstat_cntr_create);
342 		return (NULL);
343 	}
344 	cntr_named_data = (struct kstat_named *)(cntr_ksp->ks_data);
345 
346 	/*
347 	 * Initialize the named kstats (for the "pcr" and for the
348 	 * individual "pic" kstats)
349 	 */
350 	kstat_named_init(&cntr_named_data[0], "pcr", KSTAT_DATA_UINT64);
351 	for (i = 0; i < num_pic; i++) {
352 		(void) sprintf(pic_str, "pic%d", i);
353 		kstat_named_init(&cntr_named_data[i+1], pic_str,
354 		    KSTAT_DATA_UINT64);
355 	}
356 
357 	/*
358 	 * Store the Tavor softstate pointer in the kstat's private field so
359 	 * that it is available to the update function.
360 	 */
361 	cntr_ksp->ks_private = (void *)state;
362 	cntr_ksp->ks_update  = update;
363 
364 	/* Install the kstat */
365 	kstat_install(cntr_ksp);
366 
367 	TAVOR_TNF_ENTER(tavor_kstat_cntr_create);
368 	return (cntr_ksp);
369 }
370 
371 
372 /*
373  * tavor_kstat_cntr_update()
374  *    Context: Called from the kstat context
375  */
376 static int
377 tavor_kstat_cntr_update(kstat_t *ksp, int rw)
378 {
379 	tavor_state_t		*state;
380 	tavor_ks_mask_t		*ib_perf;
381 	tavor_ks_info_t		*ksi;
382 	struct kstat_named	*data;
383 	uint64_t		offset, pcr;
384 	uint32_t		pic0, pic1, tmp;
385 	uint32_t		shift, mask, oldval;
386 	uint_t			numports, indx;
387 
388 	TAVOR_TNF_ENTER(tavor_kstat_cntr_update);
389 
390 	/*
391 	 * Extract the Tavor softstate pointer, kstat data, pointer to the
392 	 * kstat info structure, and pointer to the tki_ib_perfcnt[] array
393 	 * from the input parameters.  Note: For warlock purposes, these
394 	 * parameters are all accessed only in this routine and are,
395 	 * therefore, protected by the lock used by the kstat framework.
396 	 */
397 	state	= ksp->ks_private;
398 	data	= (struct kstat_named *)(ksp->ks_data);
399 	ksi	= state->ts_ks_info;
400 	ib_perf = &ksi->tki_ib_perfcnt[0];
401 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ksi))
402 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*data))
403 	_NOTE(NOW_INVISIBLE_TO_OTHER_THREADS(*ib_perf))
404 
405 	/*
406 	 * Depending on whether we are reading the "pic" counters or
407 	 * writing the "pcr" control register, we need to handle and
408 	 * fill in the kstat data appropriately.
409 	 *
410 	 * If this is a write to the "pcr", then extract the value from
411 	 * the kstat data and store it in the kstat info structure.
412 	 *
413 	 * Otherwise, if this is a read of the "pic" counter(s), then
414 	 * extract the register offset, size, and mask values from the
415 	 * ib_perf[] array.  Then read the corresponding register and store
416 	 * it into the kstat data.  Note:  We only read/fill in pic1 if more
417 	 * than one port is configured.
418 	 */
419 	numports = state->ts_cfg_profile->cp_num_ports;
420 	if (rw == KSTAT_WRITE) {
421 		/* Update the stored "pcr" value */
422 		ksi->tki_pcr = data[0].value.ui64;
423 		TAVOR_TNF_EXIT(tavor_kstat_cntr_update);
424 		return (0);
425 	} else {
426 		/*
427 		 * Get the current "pcr" value and extract the lower
428 		 * portion (corresponding to the counters for "pic0")
429 		 */
430 		pcr  = ksi->tki_pcr;
431 		indx = pcr & TAVOR_CNTR_MASK;
432 		data[0].value.ui64 = pcr;
433 
434 		/*
435 		 * Fill in the "pic0" counter, corresponding to port 1.
436 		 * This involves reading in the current value in the register
437 		 * and calculating how many events have happened since this
438 		 * register was last polled.  Then we save away the current
439 		 * value for the counter and increment the "pic0" total by
440 		 * the number of new events.
441 		 */
442 		offset = ib_perf[indx].ks_reg_offset;
443 		shift  = ib_perf[indx].ks_reg_shift;
444 		mask   = ib_perf[indx].ks_reg_mask;
445 		oldval = ib_perf[indx].ks_old_pic0;
446 
447 		pic0   = ddi_get32(state->ts_reg_cmdhdl, (uint32_t *)
448 		    (uintptr_t)((uintptr_t)state->ts_reg_cmd_baseaddr +
449 		    offset));
450 		tmp = ((pic0 >> shift) & mask);
451 
452 		ib_perf[indx].ks_old_pic0 = tmp;
453 
454 		tmp = tmp - oldval;
455 		ksi->tki_pic0 += tmp;
456 		data[1].value.ui64 = ksi->tki_pic0;
457 
458 		/*
459 		 * If necessary, fill in the "pic1" counter for port 2.
460 		 * This works the same as above except that we extract the
461 		 * upper bits (corresponding to the counters for "pic1")
462 		 */
463 		if (numports == TAVOR_NUM_PORTS) {
464 			indx   = pcr >> TAVOR_CNTR_SIZE;
465 			offset = ib_perf[indx].ks_reg_offset;
466 			shift  = ib_perf[indx].ks_reg_shift;
467 			mask   = ib_perf[indx].ks_reg_mask;
468 			oldval = ib_perf[indx].ks_old_pic1;
469 
470 			pic1   = ddi_get32(state->ts_reg_cmdhdl, (uint32_t *)
471 			    (uintptr_t)((uintptr_t)state->ts_reg_cmd_baseaddr +
472 			    offset + TAVOR_HW_PORT_SIZE));
473 			tmp = ((pic1 >> shift) & mask);
474 
475 			ib_perf[indx].ks_old_pic1 = tmp;
476 
477 			tmp = tmp - oldval;
478 			ksi->tki_pic1 += tmp;
479 			data[2].value.ui64 = ksi->tki_pic1;
480 		}
481 
482 		TAVOR_TNF_EXIT(tavor_kstat_cntr_update);
483 		return (0);
484 	}
485 }
486 
487 /*
488  * 64 bit kstats for performance counters:
489  *
490  * Since the hardware as of now does not support 64 bit performance counters,
491  * we maintain 64 bit performance counters in software using the 32 bit
492  * hardware counters.
493  *
494  * We create a thread that, every one second, reads the values of 32 bit
495  * hardware counters and adds them to the 64 bit software counters. Immediately
496  * after reading, it resets the 32 bit hardware counters to zero (so that they
497  * start counting from zero again). At any time the current value of a counter
498  * is going to be the sum of the 64 bit software counter and the 32 bit
499  * hardware counter.
500  *
501  * Since this work need not be done if there is no consumer, by default
502  * we do not maintain 64 bit software counters. To enable this the consumer
503  * needs to write a non-zero value to the "enable" component of the of
504  * perf_counters kstat. Writing zero to this component will disable this work.
505  *
506  * If performance monitor is enabled in subnet manager, the SM could
507  * periodically reset the hardware counters by sending perf-MADs. So only
508  * one of either our software 64 bit counters or the SM performance monitor
509  * could be enabled at the same time. However, if both of them are enabled at
510  * the same time we still do our best by keeping track of the values of the
511  * last read 32 bit hardware counters. If the current read of a 32 bit hardware
512  * counter is less than the last read of the counter, we ignore the current
513  * value and go with the last read value.
514  */
515 
516 /*
517  * tavor_kstat_perfcntr64_create()
518  *    Context: Only called from attach() path context
519  *
520  * Create "port#/perf_counters" kstat for the specified port number.
521  */
522 void
523 tavor_kstat_perfcntr64_create(tavor_state_t *state, uint_t port_num)
524 {
525 	tavor_ks_info_t		*ksi = state->ts_ks_info;
526 	struct kstat		*cntr_ksp;
527 	struct kstat_named	*cntr_named_data;
528 	int			drv_instance;
529 	char			*drv_name;
530 	char			kname[32];
531 
532 	ASSERT(port_num != 0);
533 
534 	drv_name = (char *)ddi_driver_name(state->ts_dip);
535 	drv_instance = ddi_get_instance(state->ts_dip);
536 	(void) snprintf(kname, sizeof (kname), "port%u/perf_counters",
537 	    port_num);
538 	cntr_ksp = kstat_create(drv_name, drv_instance, kname, "ib",
539 	    KSTAT_TYPE_NAMED, TAVOR_PERFCNTR64_NUM_COUNTERS,
540 	    KSTAT_FLAG_WRITABLE);
541 	if (cntr_ksp == NULL) {
542 		return;
543 	}
544 	cntr_named_data = (struct kstat_named *)(cntr_ksp->ks_data);
545 
546 	kstat_named_init(&cntr_named_data[TAVOR_PERFCNTR64_ENABLE_IDX],
547 	    "enable", KSTAT_DATA_UINT32);
548 	kstat_named_init(&cntr_named_data[TAVOR_PERFCNTR64_XMIT_DATA_IDX],
549 	    "xmit_data", KSTAT_DATA_UINT64);
550 	kstat_named_init(&cntr_named_data[TAVOR_PERFCNTR64_RECV_DATA_IDX],
551 	    "recv_data", KSTAT_DATA_UINT64);
552 	kstat_named_init(&cntr_named_data[TAVOR_PERFCNTR64_XMIT_PKTS_IDX],
553 	    "xmit_pkts", KSTAT_DATA_UINT64);
554 	kstat_named_init(&cntr_named_data[TAVOR_PERFCNTR64_RECV_PKTS_IDX],
555 	    "recv_pkts", KSTAT_DATA_UINT64);
556 
557 	ksi->tki_perfcntr64[port_num - 1].tki64_ksp = cntr_ksp;
558 	ksi->tki_perfcntr64[port_num - 1].tki64_port_num = port_num;
559 	ksi->tki_perfcntr64[port_num - 1].tki64_state = state;
560 
561 	cntr_ksp->ks_private = &ksi->tki_perfcntr64[port_num - 1];
562 	cntr_ksp->ks_update  = tavor_kstat_perfcntr64_update;
563 
564 	/* Install the kstat */
565 	kstat_install(cntr_ksp);
566 }
567 
568 /*
569  * tavor_kstat_perfcntr64_read()
570  *
571  * Read the values of 32 bit hardware counters.
572  *
573  * If reset is true, reset the 32 bit hardware counters. Add the values of the
574  * 32 bit hardware counters to the 64 bit software counters.
575  *
576  * If reset is false, just save the values read from the 32 bit hardware
577  * counters in tki64_last_read[].
578  *
579  * See the general comment on the 64 bit performance counters
580  * regarding the use of last read 32 bit hardware counter values.
581  */
582 static int
583 tavor_kstat_perfcntr64_read(tavor_state_t *state, uint_t port, int reset)
584 {
585 	tavor_ks_info_t	*ksi = state->ts_ks_info;
586 	tavor_perfcntr64_ks_info_t *ksi64 = &ksi->tki_perfcntr64[port - 1];
587 	int			status, i;
588 	uint32_t		tmp;
589 	tavor_hw_sm_perfcntr_t	sm_perfcntr;
590 
591 	ASSERT(MUTEX_HELD(&ksi->tki_perfcntr64_lock));
592 	ASSERT(port != 0);
593 
594 	/* read the 32 bit hardware counters */
595 	status = tavor_getperfcntr_cmd_post(state, port,
596 	    TAVOR_CMD_NOSLEEP_SPIN, &sm_perfcntr, 0);
597 	if (status != TAVOR_CMD_SUCCESS) {
598 		return (status);
599 	}
600 
601 	if (reset) {
602 		/* reset the hardware counters */
603 		status = tavor_getperfcntr_cmd_post(state, port,
604 		    TAVOR_CMD_NOSLEEP_SPIN, NULL, 1);
605 		if (status != TAVOR_CMD_SUCCESS) {
606 			return (status);
607 		}
608 
609 		/*
610 		 * Update 64 bit software counters
611 		 */
612 		tmp = MAX(sm_perfcntr.portxmdata,
613 		    ksi64->tki64_last_read[TAVOR_PERFCNTR64_XMIT_DATA_IDX]);
614 		ksi64->tki64_counters[TAVOR_PERFCNTR64_XMIT_DATA_IDX] += tmp;
615 
616 		tmp = MAX(sm_perfcntr.portrcdata,
617 		    ksi64->tki64_last_read[TAVOR_PERFCNTR64_RECV_DATA_IDX]);
618 		ksi64->tki64_counters[TAVOR_PERFCNTR64_RECV_DATA_IDX] += tmp;
619 
620 		tmp = MAX(sm_perfcntr.portxmpkts,
621 		    ksi64->tki64_last_read[TAVOR_PERFCNTR64_XMIT_PKTS_IDX]);
622 		ksi64->tki64_counters[TAVOR_PERFCNTR64_XMIT_PKTS_IDX] += tmp;
623 
624 		tmp = MAX(sm_perfcntr.portrcpkts,
625 		    ksi64->tki64_last_read[TAVOR_PERFCNTR64_RECV_PKTS_IDX]);
626 		ksi64->tki64_counters[TAVOR_PERFCNTR64_RECV_PKTS_IDX] += tmp;
627 
628 		for (i = 0; i < TAVOR_PERFCNTR64_NUM_COUNTERS; i++)
629 			ksi64->tki64_last_read[i] = 0;
630 
631 	} else {
632 		/*
633 		 * Update ksi64->tki64_last_read[]
634 		 */
635 		SET_TO_MAX(
636 		    ksi64->tki64_last_read[TAVOR_PERFCNTR64_XMIT_DATA_IDX],
637 		    sm_perfcntr.portxmdata);
638 
639 		SET_TO_MAX(
640 		    ksi64->tki64_last_read[TAVOR_PERFCNTR64_RECV_DATA_IDX],
641 		    sm_perfcntr.portrcdata);
642 
643 		SET_TO_MAX(
644 		    ksi64->tki64_last_read[TAVOR_PERFCNTR64_XMIT_PKTS_IDX],
645 		    sm_perfcntr.portxmpkts);
646 
647 		SET_TO_MAX(
648 		    ksi64->tki64_last_read[TAVOR_PERFCNTR64_RECV_PKTS_IDX],
649 		    sm_perfcntr.portrcpkts);
650 	}
651 
652 	return (TAVOR_CMD_SUCCESS);
653 }
654 
655 /*
656  * tavor_kstat_perfcntr64_update_thread()
657  *    Context: Entry point for a kernel thread
658  *
659  * Maintain 64 bit performance counters in software using the 32 bit
660  * hardware counters.
661  */
662 static void
663 tavor_kstat_perfcntr64_update_thread(void *arg)
664 {
665 	tavor_state_t		*state = (tavor_state_t *)arg;
666 	tavor_ks_info_t		*ksi = state->ts_ks_info;
667 	uint_t			i;
668 
669 	mutex_enter(&ksi->tki_perfcntr64_lock);
670 	/*
671 	 * Every one second update the values 64 bit software counters
672 	 * for all ports. Exit if TAVOR_PERFCNTR64_THREAD_EXIT flag is set.
673 	 */
674 	while (!(ksi->tki_perfcntr64_flags & TAVOR_PERFCNTR64_THREAD_EXIT)) {
675 		for (i = 0; i < state->ts_cfg_profile->cp_num_ports; i++) {
676 			if (ksi->tki_perfcntr64[i].tki64_enabled) {
677 				(void) tavor_kstat_perfcntr64_read(state,
678 				    i + 1, 1);
679 			}
680 		}
681 		/* sleep for a second */
682 		(void) cv_timedwait(&ksi->tki_perfcntr64_cv,
683 		    &ksi->tki_perfcntr64_lock,
684 		    ddi_get_lbolt() + drv_usectohz(1000000));
685 	}
686 	ksi->tki_perfcntr64_flags = 0;
687 	mutex_exit(&ksi->tki_perfcntr64_lock);
688 }
689 
690 /*
691  * tavor_kstat_perfcntr64_thread_create()
692  *    Context: Called from the kstat context
693  *
694  * Create a thread that maintains 64 bit performance counters in software.
695  */
696 static void
697 tavor_kstat_perfcntr64_thread_create(tavor_state_t *state)
698 {
699 	tavor_ks_info_t	*ksi = state->ts_ks_info;
700 	kthread_t		*thr;
701 
702 	ASSERT(MUTEX_HELD(&ksi->tki_perfcntr64_lock));
703 
704 	/*
705 	 * One thread per tavor instance. Don't create a thread if already
706 	 * created.
707 	 */
708 	if (!(ksi->tki_perfcntr64_flags & TAVOR_PERFCNTR64_THREAD_CREATED)) {
709 		thr = thread_create(NULL, 0,
710 		    tavor_kstat_perfcntr64_update_thread,
711 		    state, 0, &p0, TS_RUN, minclsyspri);
712 		ksi->tki_perfcntr64_thread_id = thr->t_did;
713 		ksi->tki_perfcntr64_flags |= TAVOR_PERFCNTR64_THREAD_CREATED;
714 	}
715 }
716 
717 /*
718  * tavor_kstat_perfcntr64_thread_exit()
719  *    Context: Called from attach, detach or kstat context
720  */
721 static void
722 tavor_kstat_perfcntr64_thread_exit(tavor_ks_info_t *ksi)
723 {
724 	kt_did_t	tid;
725 
726 	ASSERT(MUTEX_HELD(&ksi->tki_perfcntr64_lock));
727 
728 	if (ksi->tki_perfcntr64_flags & TAVOR_PERFCNTR64_THREAD_CREATED) {
729 		/*
730 		 * Signal the thread to exit and wait until the thread exits.
731 		 */
732 		ksi->tki_perfcntr64_flags |= TAVOR_PERFCNTR64_THREAD_EXIT;
733 		tid = ksi->tki_perfcntr64_thread_id;
734 		cv_signal(&ksi->tki_perfcntr64_cv);
735 
736 		mutex_exit(&ksi->tki_perfcntr64_lock);
737 		thread_join(tid);
738 		mutex_enter(&ksi->tki_perfcntr64_lock);
739 	}
740 }
741 
742 /*
743  * tavor_kstat_perfcntr64_update()
744  *    Context: Called from the kstat context
745  *
746  * See the general comment on 64 bit kstats for performance counters:
747  */
748 static int
749 tavor_kstat_perfcntr64_update(kstat_t *ksp, int rw)
750 {
751 	tavor_state_t			*state;
752 	struct kstat_named		*data;
753 	tavor_ks_info_t		*ksi;
754 	tavor_perfcntr64_ks_info_t	*ksi64;
755 	int				i, thr_exit;
756 
757 	ksi64	= ksp->ks_private;
758 	state	= ksi64->tki64_state;
759 	ksi	= state->ts_ks_info;
760 	data	= (struct kstat_named *)(ksp->ks_data);
761 
762 	mutex_enter(&ksi->tki_perfcntr64_lock);
763 
764 	/*
765 	 * 64 bit performance counters maintained by the software is not
766 	 * enabled by default. Enable them upon a writing a non-zero value
767 	 * to "enable" kstat. Disable them upon a writing zero to the
768 	 * "enable" kstat.
769 	 */
770 	if (rw == KSTAT_WRITE) {
771 		if (data[TAVOR_PERFCNTR64_ENABLE_IDX].value.ui32) {
772 			if (ksi64->tki64_enabled == 0) {
773 				/*
774 				 * Reset the hardware counters to ensure that
775 				 * the hardware counter doesn't max out
776 				 * (and hence stop counting) before we get
777 				 * a chance to reset the counter in
778 				 * tavor_kstat_perfcntr64_update_thread.
779 				 */
780 				if (tavor_getperfcntr_cmd_post(state,
781 				    ksi64->tki64_port_num,
782 				    TAVOR_CMD_NOSLEEP_SPIN, NULL, 1) !=
783 				    TAVOR_CMD_SUCCESS) {
784 					mutex_exit(&ksi->tki_perfcntr64_lock);
785 					return (EIO);
786 				}
787 
788 				/* Enable 64 bit software counters */
789 				ksi64->tki64_enabled = 1;
790 				for (i = 0;
791 				    i < TAVOR_PERFCNTR64_NUM_COUNTERS; i++) {
792 					ksi64->tki64_counters[i] = 0;
793 					ksi64->tki64_last_read[i] = 0;
794 				}
795 				tavor_kstat_perfcntr64_thread_create(state);
796 			}
797 
798 		} else if (ksi64->tki64_enabled) {
799 			/* Disable 64 bit software counters */
800 			ksi64->tki64_enabled = 0;
801 			thr_exit = 1;
802 			for (i = 0; i < state->ts_cfg_profile->cp_num_ports;
803 			    i++) {
804 				if (ksi->tki_perfcntr64[i].tki64_enabled) {
805 					thr_exit = 0;
806 					break;
807 				}
808 			}
809 			if (thr_exit)
810 				tavor_kstat_perfcntr64_thread_exit(ksi);
811 		}
812 	} else if (ksi64->tki64_enabled) {
813 		/*
814 		 * Read the counters and update kstats.
815 		 */
816 		if (tavor_kstat_perfcntr64_read(state, ksi64->tki64_port_num,
817 		    0) != TAVOR_CMD_SUCCESS) {
818 			mutex_exit(&ksi->tki_perfcntr64_lock);
819 			return (EIO);
820 		}
821 
822 		data[TAVOR_PERFCNTR64_ENABLE_IDX].value.ui32 = 1;
823 
824 		data[TAVOR_PERFCNTR64_XMIT_DATA_IDX].value.ui64 =
825 		    ksi64->tki64_counters[TAVOR_PERFCNTR64_XMIT_DATA_IDX] +
826 		    ksi64->tki64_last_read[TAVOR_PERFCNTR64_XMIT_DATA_IDX];
827 
828 		data[TAVOR_PERFCNTR64_RECV_DATA_IDX].value.ui64 =
829 		    ksi64->tki64_counters[TAVOR_PERFCNTR64_RECV_DATA_IDX] +
830 		    ksi64->tki64_last_read[TAVOR_PERFCNTR64_RECV_DATA_IDX];
831 
832 		data[TAVOR_PERFCNTR64_XMIT_PKTS_IDX].value.ui64 =
833 		    ksi64->tki64_counters[TAVOR_PERFCNTR64_XMIT_PKTS_IDX] +
834 		    ksi64->tki64_last_read[TAVOR_PERFCNTR64_XMIT_PKTS_IDX];
835 
836 		data[TAVOR_PERFCNTR64_RECV_PKTS_IDX].value.ui64 =
837 		    ksi64->tki64_counters[TAVOR_PERFCNTR64_RECV_PKTS_IDX] +
838 		    ksi64->tki64_last_read[TAVOR_PERFCNTR64_RECV_PKTS_IDX];
839 
840 	} else {
841 		/* return 0 in kstats if not enabled */
842 		data[TAVOR_PERFCNTR64_ENABLE_IDX].value.ui32 = 0;
843 		for (i = 1; i < TAVOR_PERFCNTR64_NUM_COUNTERS; i++)
844 			data[i].value.ui64 = 0;
845 	}
846 
847 	mutex_exit(&ksi->tki_perfcntr64_lock);
848 	return (0);
849 }
850