1*fec509a0Sgm /*
2*fec509a0Sgm  * CDDL HEADER START
3*fec509a0Sgm  *
4*fec509a0Sgm  * The contents of this file are subject to the terms of the
5*fec509a0Sgm  * Common Development and Distribution License (the "License").
6*fec509a0Sgm  * You may not use this file except in compliance with the License.
7*fec509a0Sgm  *
8*fec509a0Sgm  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*fec509a0Sgm  * or http://www.opensolaris.org/os/licensing.
10*fec509a0Sgm  * See the License for the specific language governing permissions
11*fec509a0Sgm  * and limitations under the License.
12*fec509a0Sgm  *
13*fec509a0Sgm  * When distributing Covered Code, include this CDDL HEADER in each
14*fec509a0Sgm  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*fec509a0Sgm  * If applicable, add the following below this CDDL HEADER, with the
16*fec509a0Sgm  * fields enclosed by brackets "[]" replaced with your own identifying
17*fec509a0Sgm  * information: Portions Copyright [yyyy] [name of copyright owner]
18*fec509a0Sgm  *
19*fec509a0Sgm  * CDDL HEADER END
20*fec509a0Sgm  */
21*fec509a0Sgm /*
22*fec509a0Sgm  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23*fec509a0Sgm  * Use is subject to license terms.
24*fec509a0Sgm  */
25*fec509a0Sgm 
26*fec509a0Sgm #pragma ident	"%Z%%M%	%I%	%E% SMI"
27*fec509a0Sgm 
28*fec509a0Sgm 
29*fec509a0Sgm #include <sys/ddi.h>
30*fec509a0Sgm #include <sys/errno.h>
31*fec509a0Sgm #include <sys/types.h>
32*fec509a0Sgm #include <sys/n2rng.h>
33*fec509a0Sgm #include <sys/int_types.h>
34*fec509a0Sgm 
35*fec509a0Sgm 
36*fec509a0Sgm /*
37*fec509a0Sgm  * This whole file is really doing floating point type stuff, and
38*fec509a0Sgm  * would be quite simple in user space.  But since we are in the
39*fec509a0Sgm  * kernel, (a) we can't use floating point, and (b) we don't have a
40*fec509a0Sgm  * math library.
41*fec509a0Sgm  */
42*fec509a0Sgm 
43*fec509a0Sgm /* used inside msb */
44*fec509a0Sgm #define	MSBSTEP(word, shift, counter)  \
45*fec509a0Sgm if (word & (~0ULL << shift)) {	       \
46*fec509a0Sgm 	word >>= shift;		       \
47*fec509a0Sgm 	counter += shift;	       \
48*fec509a0Sgm }
49*fec509a0Sgm 
50*fec509a0Sgm /*
51*fec509a0Sgm  * returns the position of the MSB of x.  The 1 bit is position 0.  An
52*fec509a0Sgm  * all zero arg returns -1.
53*fec509a0Sgm  */
54*fec509a0Sgm static int
msb(uint64_t x)55*fec509a0Sgm msb(uint64_t x)
56*fec509a0Sgm {
57*fec509a0Sgm 	int		bit;
58*fec509a0Sgm 
59*fec509a0Sgm 	if (x == 0) {
60*fec509a0Sgm 		return (-1);
61*fec509a0Sgm 	}
62*fec509a0Sgm 
63*fec509a0Sgm 	bit = 0;
64*fec509a0Sgm 	MSBSTEP(x, 32, bit);
65*fec509a0Sgm 	MSBSTEP(x, 16, bit);
66*fec509a0Sgm 	MSBSTEP(x, 8, bit);
67*fec509a0Sgm 	MSBSTEP(x, 4, bit);
68*fec509a0Sgm 	MSBSTEP(x, 2, bit);
69*fec509a0Sgm 	MSBSTEP(x, 1, bit);
70*fec509a0Sgm 
71*fec509a0Sgm 	return (bit);
72*fec509a0Sgm }
73*fec509a0Sgm 
74*fec509a0Sgm /*
75*fec509a0Sgm  * lg2 computes 2^(LOG_VAL_SCALE) * log2(x/2^LOG_ARG_SCALE), where ^
76*fec509a0Sgm  * is exponentiation.
77*fec509a0Sgm  *
78*fec509a0Sgm  * The following conditions must be satisfied: LOG_VAL_SCALE <= 62,
79*fec509a0Sgm  * LOG_VAL_SCALE + log2(maxarg) < 64, LOG_VAL_SCALE >= 0,
80*fec509a0Sgm  * LOG_ARG_SCALE <= 63.  Recommended LOG_VAL_SCALE is 57, which is the
81*fec509a0Sgm  * largest value such that overflow is impossible.
82*fec509a0Sgm  */
83*fec509a0Sgm static int64_t
lg2(uint64_t x)84*fec509a0Sgm lg2(uint64_t x)
85*fec509a0Sgm {
86*fec509a0Sgm 	/*
87*fec509a0Sgm 	 * logtable[i-1] == round(2^63 * log2(2^i/(2^i - 1))), where ^
88*fec509a0Sgm 	 * is exponentiation.
89*fec509a0Sgm 	 */
90*fec509a0Sgm 	static const uint64_t logtable[] = {
91*fec509a0Sgm 		9223372036854775808ULL, 3828045265094622256ULL,
92*fec509a0Sgm 		1776837224931603046ULL, 858782676832593460ULL,
93*fec509a0Sgm 		422464469962470743ULL, 209555718266071751ULL,
94*fec509a0Sgm 		104365343613858422ULL, 52080352580344565ULL,
95*fec509a0Sgm 		26014696649359209ULL, 13000990870918027ULL,
96*fec509a0Sgm 		6498907625079429ULL, 3249057053828501ULL,
97*fec509a0Sgm 		1624429361456373ULL, 812189892390238ULL,
98*fec509a0Sgm 		406088749488886ULL, 203042825615163ULL,
99*fec509a0Sgm 		101521025531171ULL, 50760415947221ULL,
100*fec509a0Sgm 		25380183769112ULL, 12690085833443ULL,
101*fec509a0Sgm 		6345041403945ULL, 3172520323778ULL,
102*fec509a0Sgm 		1586260067341ULL, 793130010033ULL,
103*fec509a0Sgm 		396564999107ULL, 198282498076ULL,
104*fec509a0Sgm 		99141248669ULL, 49570624242ULL,
105*fec509a0Sgm 		24785312098ULL, 12392656043ULL, 6196328020ULL, 3098164010ULL,
106*fec509a0Sgm 		1549082005ULL, 774541002ULL, 387270501ULL, 193635251ULL,
107*fec509a0Sgm 		96817625ULL, 48408813ULL, 24204406ULL, 12102203ULL, 6051102ULL,
108*fec509a0Sgm 		3025551ULL, 1512775ULL, 756388ULL, 378194ULL, 189097ULL,
109*fec509a0Sgm 		94548ULL, 47274ULL, 23637ULL, 11819ULL, 5909ULL, 2955ULL,
110*fec509a0Sgm 		1477ULL, 739ULL, 369ULL, 185ULL, 92ULL, 46ULL, 23ULL,
111*fec509a0Sgm 		12ULL, 6ULL, 3ULL, 1ULL
112*fec509a0Sgm 	};
113*fec509a0Sgm 
114*fec509a0Sgm 	uint64_t	xx;
115*fec509a0Sgm 	uint64_t	logx;
116*fec509a0Sgm 	uint64_t	tmp;
117*fec509a0Sgm 	int		i;
118*fec509a0Sgm 
119*fec509a0Sgm 	if (x == 0) {
120*fec509a0Sgm 		return (-INT64_MAX - 1);
121*fec509a0Sgm 	}
122*fec509a0Sgm 
123*fec509a0Sgm 	/*
124*fec509a0Sgm 	 * Invariant: log2(xx) + logx == log2(x).  This is true at the after
125*fec509a0Sgm 	 * the normalization.  At each adjustment step we multiply xx by
126*fec509a0Sgm 	 * (2^i-1)/2^i, which effectively decreases log2(xx) by
127*fec509a0Sgm 	 * log2(2^i/(2^i-1)), and a the same time, we add table[i], which
128*fec509a0Sgm 	 * equals log2(2^i/(2^i-1)), to logx.  By induction the invariant is
129*fec509a0Sgm 	 * true at the end.  At the end xx==1, so log2(xx)==0, and thus
130*fec509a0Sgm 	 * logx=log2(x);
131*fec509a0Sgm 	 */
132*fec509a0Sgm 	/* Normalize */
133*fec509a0Sgm 	i = msb(x); /* use i in computing preshift */
134*fec509a0Sgm 	if (i - LOG_ARG_SCALE > 0) {
135*fec509a0Sgm 		xx = x >> (i - LOG_ARG_SCALE);
136*fec509a0Sgm 	} else {
137*fec509a0Sgm 		xx = x << (LOG_ARG_SCALE - i);
138*fec509a0Sgm 	}
139*fec509a0Sgm 	logx = (int64_t)(i - LOG_ARG_SCALE) << LOG_VAL_SCALE;
140*fec509a0Sgm 
141*fec509a0Sgm 	for (i = 1; i <= LOG_ARG_SCALE;	 i++) {
142*fec509a0Sgm 		/* 1ULL << (i-1) is rounding */
143*fec509a0Sgm 		while ((tmp = xx - ((xx + (1ULL << (i-1))) >> i)) >=
144*fec509a0Sgm 		    1ULL << LOG_ARG_SCALE) {
145*fec509a0Sgm 			xx = tmp;
146*fec509a0Sgm 			/* 1ULL << (63 - LOG_VAL_SCALE -1) is rounding */
147*fec509a0Sgm 			logx += (logtable[i-1] +
148*fec509a0Sgm 			    (1ULL << (63 - LOG_VAL_SCALE - 1))) >>
149*fec509a0Sgm 			    (63 - LOG_VAL_SCALE);
150*fec509a0Sgm 		}
151*fec509a0Sgm 	}
152*fec509a0Sgm 
153*fec509a0Sgm 	return (logx);
154*fec509a0Sgm }
155*fec509a0Sgm 
156*fec509a0Sgm 
157*fec509a0Sgm 
158*fec509a0Sgm /*
159*fec509a0Sgm  * The EXCHANGE macro swaps entries j & k if necessary so that
160*fec509a0Sgm  * data[j] <= data[k].
161*fec509a0Sgm  *
162*fec509a0Sgm  * If OBLIVIOUS is defined, no branches are used.  This would allow
163*fec509a0Sgm  * this algorithm to be used by the CPU manufacturing people who run
164*fec509a0Sgm  * on a tester that requires the exact same instruction address stream
165*fec509a0Sgm  * on every test. (It's a bit slower with OBLIVIOUS defined.)
166*fec509a0Sgm  */
167*fec509a0Sgm #ifdef OBLIVIOUS
168*fec509a0Sgm #define	EXCHANGE(j, k)			\
169*fec509a0Sgm 	{				\
170*fec509a0Sgm 		uint64_t tmp, mask;	\
171*fec509a0Sgm 		mask = (uint64_t)(((int64_t)(data[k] - data[j])) >> 63); \
172*fec509a0Sgm 		tmp = data[j] + data[k];			\
173*fec509a0Sgm 		data[j] = data[k] & mask | data[j] & ~mask;	\
174*fec509a0Sgm 		data[k] = tmp - data[j];			\
175*fec509a0Sgm 	}
176*fec509a0Sgm #else
177*fec509a0Sgm #define	EXCHANGE(j, k)				\
178*fec509a0Sgm 	{					\
179*fec509a0Sgm 		uint64_t tmp;			\
180*fec509a0Sgm 		if (data[j] > data[k]) {	\
181*fec509a0Sgm 			tmp = data[j];		\
182*fec509a0Sgm 			data[j] = data[k];	\
183*fec509a0Sgm 			data[k] = tmp;		\
184*fec509a0Sgm 		}				\
185*fec509a0Sgm 	}
186*fec509a0Sgm #endif
187*fec509a0Sgm 
188*fec509a0Sgm 
189*fec509a0Sgm 
190*fec509a0Sgm /*
191*fec509a0Sgm  * This is a Batcher sort from Knuth v. 3.  There is no flow control
192*fec509a0Sgm  * that depends on the values being sorted, except in the EXCHANGE
193*fec509a0Sgm  * step, but that can be made oblivious to the data values, too, by
194*fec509a0Sgm  * setting OBLIVIOUS.  So this code could be using in chip testers
195*fec509a0Sgm  * that require fixed flow through a test.
196*fec509a0Sgm  *
197*fec509a0Sgm  * This is presently hard-coded for sorting uint64_t values.
198*fec509a0Sgm  */
199*fec509a0Sgm void
n2rng_sort(uint64_t * data,int log2_size)200*fec509a0Sgm n2rng_sort(uint64_t *data, int log2_size)
201*fec509a0Sgm {
202*fec509a0Sgm 	int p, q, d, r, i;
203*fec509a0Sgm 
204*fec509a0Sgm 	for (p = 1 << (log2_size - 1); p > 0; p >>= 1) {
205*fec509a0Sgm 		d = p;
206*fec509a0Sgm 		r = 0;
207*fec509a0Sgm 		for (q = 1 << (log2_size - 1); q >= p; q >>= 1) {
208*fec509a0Sgm 			for (i = 0; i + d < (1 << log2_size); i++) {
209*fec509a0Sgm 				if ((i & p) == r) {
210*fec509a0Sgm 					EXCHANGE(i, i+d);
211*fec509a0Sgm 				}
212*fec509a0Sgm 			}
213*fec509a0Sgm 			d = q - p;
214*fec509a0Sgm 			r = p;
215*fec509a0Sgm 		}
216*fec509a0Sgm 	}
217*fec509a0Sgm }
218*fec509a0Sgm 
219*fec509a0Sgm 
220*fec509a0Sgm /*
221*fec509a0Sgm  * Computes several measures of entropy per word: Renyi H0 (log2 of
222*fec509a0Sgm  * number of distinct symbols), Renyi H1 (Shannon),
223*fec509a0Sgm  * Renyi H2 (-log2 of sum(P_i^2)), and
224*fec509a0Sgm  * Renyi H-infinity (min).  The results are coded as H *
225*fec509a0Sgm  * 2^LOG_VAL_SCALE).  The samples array is modified by sorting in
226*fec509a0Sgm  * place.
227*fec509a0Sgm  *
228*fec509a0Sgm  * None if this is really valid, since it requres that the block
229*fec509a0Sgm  * length be at least as long as the largest non-approximately-zero
230*fec509a0Sgm  * coefficient in the autocorrelation function, and that the number
231*fec509a0Sgm  * of samples be much larger than 2^longest_block_length_in_bits.
232*fec509a0Sgm  * But we hope that bigger is better, even when it is invalid.
233*fec509a0Sgm  */
234*fec509a0Sgm void
n2rng_renyi_entropy(uint64_t * samples,int lg2samples,n2rng_osc_perf_t * entp)235*fec509a0Sgm n2rng_renyi_entropy(uint64_t *samples, int lg2samples, n2rng_osc_perf_t *entp)
236*fec509a0Sgm {
237*fec509a0Sgm 	size_t i;
238*fec509a0Sgm 	uint64_t cv = samples[0]; /* current value */
239*fec509a0Sgm 	size_t count = 1;
240*fec509a0Sgm 	size_t numdistinct = 0;
241*fec509a0Sgm 	size_t largestcount = 0;
242*fec509a0Sgm 	uint64_t shannonsum = 0;
243*fec509a0Sgm 	uint64_t sqsum = 0;
244*fec509a0Sgm 
245*fec509a0Sgm 	n2rng_sort(samples, lg2samples);
246*fec509a0Sgm 
247*fec509a0Sgm 	for (i = 1; i < (1 << lg2samples); i++) {
248*fec509a0Sgm 		if (samples[i] != cv) {
249*fec509a0Sgm 			numdistinct++;
250*fec509a0Sgm 			if (count > largestcount) {
251*fec509a0Sgm 				largestcount = count;
252*fec509a0Sgm 			}
253*fec509a0Sgm #ifdef COMPUTE_SHANNON_ENTROPY
254*fec509a0Sgm 			shannonsum -= (count * (lg2(count) +
255*fec509a0Sgm 			    ((int64_t)(LOG_ARG_SCALE - lg2samples) <<
256*fec509a0Sgm 			    LOG_VAL_SCALE))) >> lg2samples;
257*fec509a0Sgm #endif /* COMPUTE_SHANNON_ENTROPY */
258*fec509a0Sgm 			sqsum += count * count;
259*fec509a0Sgm 			count = 1;
260*fec509a0Sgm 			cv = samples[i];
261*fec509a0Sgm 		} else {
262*fec509a0Sgm 			count++;
263*fec509a0Sgm 		}
264*fec509a0Sgm 	}
265*fec509a0Sgm 	/* process last block */
266*fec509a0Sgm 	numdistinct++;
267*fec509a0Sgm 	if (count > largestcount) {
268*fec509a0Sgm 		largestcount = count;
269*fec509a0Sgm 	}
270*fec509a0Sgm #ifdef COMPUTE_SHANNON_ENTROPY
271*fec509a0Sgm 	shannonsum -= (count * (lg2(count) +
272*fec509a0Sgm 	    ((int64_t)(LOG_ARG_SCALE - lg2samples) << LOG_VAL_SCALE))) >>
273*fec509a0Sgm 	    lg2samples;
274*fec509a0Sgm #endif /* COMPUTE_SHANNON_ENTROPY */
275*fec509a0Sgm 	sqsum += count * count;
276*fec509a0Sgm 
277*fec509a0Sgm 	entp->numvals = numdistinct;
278*fec509a0Sgm 	/* H1 is shannon entropy: -sum(p_i * log2(p_i)) */
279*fec509a0Sgm 	entp->H1 = shannonsum / 64;
280*fec509a0Sgm 	/* H2 is -log2(sum p_i^2) */
281*fec509a0Sgm 	entp->H2 = -(lg2(sqsum) +
282*fec509a0Sgm 	    ((int64_t)(LOG_ARG_SCALE - 2 * lg2samples) << LOG_VAL_SCALE)) / 64;
283*fec509a0Sgm 	/* Hinf = -log2(highest_probability) */
284*fec509a0Sgm 	entp->Hinf = -(lg2(largestcount) +
285*fec509a0Sgm 	    ((int64_t)(LOG_ARG_SCALE - lg2samples) << LOG_VAL_SCALE)) / 64;
286*fec509a0Sgm }
287