io/n2rng/n2rng_entp_algs.c

*fec509a0Sgm/*
*fec509a0Sgm * CDDL HEADER START
*fec509a0Sgm *
*fec509a0Sgm * The contents of this file are subject to the terms of the
*fec509a0Sgm * Common Development and Distribution License (the "License").
*fec509a0Sgm * You may not use this file except in compliance with the License.
*fec509a0Sgm *
*fec509a0Sgm * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
*fec509a0Sgm * or http://www.opensolaris.org/os/licensing.
*fec509a0Sgm * See the License for the specific language governing permissions
*fec509a0Sgm * and limitations under the License.
*fec509a0Sgm *
*fec509a0Sgm * When distributing Covered Code, include this CDDL HEADER in each
*fec509a0Sgm * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
*fec509a0Sgm * If applicable, add the following below this CDDL HEADER, with the
*fec509a0Sgm * fields enclosed by brackets "[]" replaced with your own identifying
*fec509a0Sgm * information: Portions Copyright [yyyy] [name of copyright owner]
*fec509a0Sgm *
*fec509a0Sgm * CDDL HEADER END
*fec509a0Sgm */
*fec509a0Sgm/*
*fec509a0Sgm * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
*fec509a0Sgm * Use is subject to license terms.
*fec509a0Sgm */
*fec509a0Sgm
*fec509a0Sgm#pragma ident	"%Z%%M%	%I%	%E% SMI"
*fec509a0Sgm
*fec509a0Sgm
*fec509a0Sgm#include <sys/ddi.h>
*fec509a0Sgm#include <sys/errno.h>
*fec509a0Sgm#include <sys/types.h>
*fec509a0Sgm#include <sys/n2rng.h>
*fec509a0Sgm#include <sys/int_types.h>
*fec509a0Sgm
*fec509a0Sgm
*fec509a0Sgm/*
*fec509a0Sgm * This whole file is really doing floating point type stuff, and
*fec509a0Sgm * would be quite simple in user space.  But since we are in the
*fec509a0Sgm * kernel, (a) we can't use floating point, and (b) we don't have a
*fec509a0Sgm * math library.
*fec509a0Sgm */
*fec509a0Sgm
*fec509a0Sgm/* used inside msb */
*fec509a0Sgm#define	MSBSTEP(word, shift, counter)  \
*fec509a0Sgmif (word & (~0ULL << shift)) {	       \
*fec509a0Sgm	word >>= shift;		       \
*fec509a0Sgm	counter += shift;	       \
*fec509a0Sgm}
*fec509a0Sgm
*fec509a0Sgm/*
*fec509a0Sgm * returns the position of the MSB of x.  The 1 bit is position 0.  An
*fec509a0Sgm * all zero arg returns -1.
*fec509a0Sgm */
*fec509a0Sgmstatic int
*fec509a0Sgmmsb(uint64_t x)
*fec509a0Sgm{
*fec509a0Sgm	int		bit;
*fec509a0Sgm
*fec509a0Sgm	if (x == 0) {
*fec509a0Sgm		return (-1);
*fec509a0Sgm	}
*fec509a0Sgm
*fec509a0Sgm	bit = 0;
*fec509a0Sgm	MSBSTEP(x, 32, bit);
*fec509a0Sgm	MSBSTEP(x, 16, bit);
*fec509a0Sgm	MSBSTEP(x, 8, bit);
*fec509a0Sgm	MSBSTEP(x, 4, bit);
*fec509a0Sgm	MSBSTEP(x, 2, bit);
*fec509a0Sgm	MSBSTEP(x, 1, bit);
*fec509a0Sgm
*fec509a0Sgm	return (bit);
*fec509a0Sgm}
*fec509a0Sgm
*fec509a0Sgm/*
*fec509a0Sgm * lg2 computes 2^(LOG_VAL_SCALE) * log2(x/2^LOG_ARG_SCALE), where ^
*fec509a0Sgm * is exponentiation.
*fec509a0Sgm *
*fec509a0Sgm * The following conditions must be satisfied: LOG_VAL_SCALE <= 62,
*fec509a0Sgm * LOG_VAL_SCALE + log2(maxarg) < 64, LOG_VAL_SCALE >= 0,
*fec509a0Sgm * LOG_ARG_SCALE <= 63.  Recommended LOG_VAL_SCALE is 57, which is the
*fec509a0Sgm * largest value such that overflow is impossible.
*fec509a0Sgm */
*fec509a0Sgmstatic int64_t
*fec509a0Sgmlg2(uint64_t x)
*fec509a0Sgm{
*fec509a0Sgm	/*
*fec509a0Sgm	 * logtable[i-1] == round(2^63 * log2(2^i/(2^i - 1))), where ^
*fec509a0Sgm	 * is exponentiation.
*fec509a0Sgm	 */
*fec509a0Sgm	static const uint64_t logtable[] = {
*fec509a0Sgm		9223372036854775808ULL, 3828045265094622256ULL,
*fec509a0Sgm		1776837224931603046ULL, 858782676832593460ULL,
*fec509a0Sgm		422464469962470743ULL, 209555718266071751ULL,
*fec509a0Sgm		104365343613858422ULL, 52080352580344565ULL,
*fec509a0Sgm		26014696649359209ULL, 13000990870918027ULL,
*fec509a0Sgm		6498907625079429ULL, 3249057053828501ULL,
*fec509a0Sgm		1624429361456373ULL, 812189892390238ULL,
*fec509a0Sgm		406088749488886ULL, 203042825615163ULL,
*fec509a0Sgm		101521025531171ULL, 50760415947221ULL,
*fec509a0Sgm		25380183769112ULL, 12690085833443ULL,
*fec509a0Sgm		6345041403945ULL, 3172520323778ULL,
*fec509a0Sgm		1586260067341ULL, 793130010033ULL,
*fec509a0Sgm		396564999107ULL, 198282498076ULL,
*fec509a0Sgm		99141248669ULL, 49570624242ULL,
*fec509a0Sgm		24785312098ULL, 12392656043ULL, 6196328020ULL, 3098164010ULL,
*fec509a0Sgm		1549082005ULL, 774541002ULL, 387270501ULL, 193635251ULL,
*fec509a0Sgm		96817625ULL, 48408813ULL, 24204406ULL, 12102203ULL, 6051102ULL,
*fec509a0Sgm		3025551ULL, 1512775ULL, 756388ULL, 378194ULL, 189097ULL,
*fec509a0Sgm		94548ULL, 47274ULL, 23637ULL, 11819ULL, 5909ULL, 2955ULL,
*fec509a0Sgm		1477ULL, 739ULL, 369ULL, 185ULL, 92ULL, 46ULL, 23ULL,
*fec509a0Sgm		12ULL, 6ULL, 3ULL, 1ULL
*fec509a0Sgm	};
*fec509a0Sgm
*fec509a0Sgm	uint64_t	xx;
*fec509a0Sgm	uint64_t	logx;
*fec509a0Sgm	uint64_t	tmp;
*fec509a0Sgm	int		i;
*fec509a0Sgm
*fec509a0Sgm	if (x == 0) {
*fec509a0Sgm		return (-INT64_MAX - 1);
*fec509a0Sgm	}
*fec509a0Sgm
*fec509a0Sgm	/*
*fec509a0Sgm	 * Invariant: log2(xx) + logx == log2(x).  This is true at the after
*fec509a0Sgm	 * the normalization.  At each adjustment step we multiply xx by
*fec509a0Sgm	 * (2^i-1)/2^i, which effectively decreases log2(xx) by
*fec509a0Sgm	 * log2(2^i/(2^i-1)), and a the same time, we add table[i], which
*fec509a0Sgm	 * equals log2(2^i/(2^i-1)), to logx.  By induction the invariant is
*fec509a0Sgm	 * true at the end.  At the end xx==1, so log2(xx)==0, and thus
*fec509a0Sgm	 * logx=log2(x);
*fec509a0Sgm	 */
*fec509a0Sgm	/* Normalize */
*fec509a0Sgm	i = msb(x); /* use i in computing preshift */
*fec509a0Sgm	if (i - LOG_ARG_SCALE > 0) {
*fec509a0Sgm		xx = x >> (i - LOG_ARG_SCALE);
*fec509a0Sgm	} else {
*fec509a0Sgm		xx = x << (LOG_ARG_SCALE - i);
*fec509a0Sgm	}
*fec509a0Sgm	logx = (int64_t)(i - LOG_ARG_SCALE) << LOG_VAL_SCALE;
*fec509a0Sgm
*fec509a0Sgm	for (i = 1; i <= LOG_ARG_SCALE;	 i++) {
*fec509a0Sgm		/* 1ULL << (i-1) is rounding */
*fec509a0Sgm		while ((tmp = xx - ((xx + (1ULL << (i-1))) >> i)) >=
*fec509a0Sgm		    1ULL << LOG_ARG_SCALE) {
*fec509a0Sgm			xx = tmp;
*fec509a0Sgm			/* 1ULL << (63 - LOG_VAL_SCALE -1) is rounding */
*fec509a0Sgm			logx += (logtable[i-1] +
*fec509a0Sgm			    (1ULL << (63 - LOG_VAL_SCALE - 1))) >>
*fec509a0Sgm			    (63 - LOG_VAL_SCALE);
*fec509a0Sgm		}
*fec509a0Sgm	}
*fec509a0Sgm
*fec509a0Sgm	return (logx);
*fec509a0Sgm}
*fec509a0Sgm
*fec509a0Sgm
*fec509a0Sgm
*fec509a0Sgm/*
*fec509a0Sgm * The EXCHANGE macro swaps entries j & k if necessary so that
*fec509a0Sgm * data[j] <= data[k].
*fec509a0Sgm *
*fec509a0Sgm * If OBLIVIOUS is defined, no branches are used.  This would allow
*fec509a0Sgm * this algorithm to be used by the CPU manufacturing people who run
*fec509a0Sgm * on a tester that requires the exact same instruction address stream
*fec509a0Sgm * on every test. (It's a bit slower with OBLIVIOUS defined.)
*fec509a0Sgm */
*fec509a0Sgm#ifdef OBLIVIOUS
*fec509a0Sgm#define	EXCHANGE(j, k)			\
*fec509a0Sgm	{				\
*fec509a0Sgm		uint64_t tmp, mask;	\
*fec509a0Sgm		mask = (uint64_t)(((int64_t)(data[k] - data[j])) >> 63); \
*fec509a0Sgm		tmp = data[j] + data[k];			\
*fec509a0Sgm		data[j] = data[k] & mask | data[j] & ~mask;	\
*fec509a0Sgm		data[k] = tmp - data[j];			\
*fec509a0Sgm	}
*fec509a0Sgm#else
*fec509a0Sgm#define	EXCHANGE(j, k)				\
*fec509a0Sgm	{					\
*fec509a0Sgm		uint64_t tmp;			\
*fec509a0Sgm		if (data[j] > data[k]) {	\
*fec509a0Sgm			tmp = data[j];		\
*fec509a0Sgm			data[j] = data[k];	\
*fec509a0Sgm			data[k] = tmp;		\
*fec509a0Sgm		}				\
*fec509a0Sgm	}
*fec509a0Sgm#endif
*fec509a0Sgm
*fec509a0Sgm
*fec509a0Sgm
*fec509a0Sgm/*
*fec509a0Sgm * This is a Batcher sort from Knuth v. 3.  There is no flow control
*fec509a0Sgm * that depends on the values being sorted, except in the EXCHANGE
*fec509a0Sgm * step, but that can be made oblivious to the data values, too, by
*fec509a0Sgm * setting OBLIVIOUS.  So this code could be using in chip testers
*fec509a0Sgm * that require fixed flow through a test.
*fec509a0Sgm *
*fec509a0Sgm * This is presently hard-coded for sorting uint64_t values.
*fec509a0Sgm */
*fec509a0Sgmvoid
*fec509a0Sgmn2rng_sort(uint64_t *data, int log2_size)
*fec509a0Sgm{
*fec509a0Sgm	int p, q, d, r, i;
*fec509a0Sgm
*fec509a0Sgm	for (p = 1 << (log2_size - 1); p > 0; p >>= 1) {
*fec509a0Sgm		d = p;
*fec509a0Sgm		r = 0;
*fec509a0Sgm		for (q = 1 << (log2_size - 1); q >= p; q >>= 1) {
*fec509a0Sgm			for (i = 0; i + d < (1 << log2_size); i++) {
*fec509a0Sgm				if ((i & p) == r) {
*fec509a0Sgm					EXCHANGE(i, i+d);
*fec509a0Sgm				}
*fec509a0Sgm			}
*fec509a0Sgm			d = q - p;
*fec509a0Sgm			r = p;
*fec509a0Sgm		}
*fec509a0Sgm	}
*fec509a0Sgm}
*fec509a0Sgm
*fec509a0Sgm
*fec509a0Sgm/*
*fec509a0Sgm * Computes several measures of entropy per word: Renyi H0 (log2 of
*fec509a0Sgm * number of distinct symbols), Renyi H1 (Shannon),
*fec509a0Sgm * Renyi H2 (-log2 of sum(P_i^2)), and
*fec509a0Sgm * Renyi H-infinity (min).  The results are coded as H *
*fec509a0Sgm * 2^LOG_VAL_SCALE).  The samples array is modified by sorting in
*fec509a0Sgm * place.
*fec509a0Sgm *
*fec509a0Sgm * None if this is really valid, since it requres that the block
*fec509a0Sgm * length be at least as long as the largest non-approximately-zero
*fec509a0Sgm * coefficient in the autocorrelation function, and that the number
*fec509a0Sgm * of samples be much larger than 2^longest_block_length_in_bits.
*fec509a0Sgm * But we hope that bigger is better, even when it is invalid.
*fec509a0Sgm */
*fec509a0Sgmvoid
*fec509a0Sgmn2rng_renyi_entropy(uint64_t *samples, int lg2samples, n2rng_osc_perf_t *entp)
*fec509a0Sgm{
*fec509a0Sgm	size_t i;
*fec509a0Sgm	uint64_t cv = samples[0]; /* current value */
*fec509a0Sgm	size_t count = 1;
*fec509a0Sgm	size_t numdistinct = 0;
*fec509a0Sgm	size_t largestcount = 0;
*fec509a0Sgm	uint64_t shannonsum = 0;
*fec509a0Sgm	uint64_t sqsum = 0;
*fec509a0Sgm
*fec509a0Sgm	n2rng_sort(samples, lg2samples);
*fec509a0Sgm
*fec509a0Sgm	for (i = 1; i < (1 << lg2samples); i++) {
*fec509a0Sgm		if (samples[i] != cv) {
*fec509a0Sgm			numdistinct++;
*fec509a0Sgm			if (count > largestcount) {
*fec509a0Sgm				largestcount = count;
*fec509a0Sgm			}
*fec509a0Sgm#ifdef COMPUTE_SHANNON_ENTROPY
*fec509a0Sgm			shannonsum -= (count * (lg2(count) +
*fec509a0Sgm			    ((int64_t)(LOG_ARG_SCALE - lg2samples) <<
*fec509a0Sgm			    LOG_VAL_SCALE))) >> lg2samples;
*fec509a0Sgm#endif /* COMPUTE_SHANNON_ENTROPY */
*fec509a0Sgm			sqsum += count * count;
*fec509a0Sgm			count = 1;
*fec509a0Sgm			cv = samples[i];
*fec509a0Sgm		} else {
*fec509a0Sgm			count++;
*fec509a0Sgm		}
*fec509a0Sgm	}
*fec509a0Sgm	/* process last block */
*fec509a0Sgm	numdistinct++;
*fec509a0Sgm	if (count > largestcount) {
*fec509a0Sgm		largestcount = count;
*fec509a0Sgm	}
*fec509a0Sgm#ifdef COMPUTE_SHANNON_ENTROPY
*fec509a0Sgm	shannonsum -= (count * (lg2(count) +
*fec509a0Sgm	    ((int64_t)(LOG_ARG_SCALE - lg2samples) << LOG_VAL_SCALE))) >>
*fec509a0Sgm	    lg2samples;
*fec509a0Sgm#endif /* COMPUTE_SHANNON_ENTROPY */
*fec509a0Sgm	sqsum += count * count;
*fec509a0Sgm
*fec509a0Sgm	entp->numvals = numdistinct;
*fec509a0Sgm	/* H1 is shannon entropy: -sum(p_i * log2(p_i)) */
*fec509a0Sgm	entp->H1 = shannonsum / 64;
*fec509a0Sgm	/* H2 is -log2(sum p_i^2) */
*fec509a0Sgm	entp->H2 = -(lg2(sqsum) +
*fec509a0Sgm	    ((int64_t)(LOG_ARG_SCALE - 2 * lg2samples) << LOG_VAL_SCALE)) / 64;
*fec509a0Sgm	/* Hinf = -log2(highest_probability) */
*fec509a0Sgm	entp->Hinf = -(lg2(largestcount) +
*fec509a0Sgm	    ((int64_t)(LOG_ARG_SCALE - lg2samples) << LOG_VAL_SCALE)) / 64;
*fec509a0Sgm}