awk/gnu/randtest.sh

# THIS PURPOSELY DOES NOT HAVE A !# LINE !!!!
#
# Date: Mon, 9 Sep 2013 14:49:43 -0700
# From: Bob Jewett <jewett@bill.scs.agilent.com>
# Message-Id: <201309092149.r89Lnh94010909@bill.scs.agilent.com>
# To: arnold@skeeve.com
# Subject: Re: [bug-gawk] Bug in random() in builtin.c
#
# Hi Arnold,
#
# Attached below is a script that tests gawk for this particular
# rand() problem.  The pair-wise combinations show a strong
# autocorrelation for a delay of 31 pairs of rand() samples.
#
# The script prints out the measured autocorrelation for a record
# of NSAMPLES pairs.  It also prints a fail message at the end if
# it fails.
#
# If you want to see the autocorrelation values, there is a print
# statement that if uncommented will save them to a file.
#
# Please let me know if the mailer screws up the transfer or
# if you have any questions about the test.
#
# Best regards,
# Bob
#
# -------------- test_pair_power_autocorrelation -----------------------
#
#!/bin/ksh

#GAWK=/bin/gawk

if [ -z "$AWK" ]; then
    printf '$AWK must be set\n' >&2
    exit 1
fi

# ADR: Get GAWK from the environment.
# Additional note: This wants ksh/bash for the use of $RANDOM below to
# seed the generator. However, shells that don't provide it won't be
# a problem since gawk will then seed the generator with the time of day,
# as srand() will be called without an argument.

# large NSAMPLES and NRUNS will bring any correlation out of the noise better
NSAMPLES=1024; MAX_ALLOWED_SIGMA=5; NRUNS=50;

$AWK 'BEGIN{
    srand('$RANDOM');
    nsamples=('$NSAMPLES');
    max_allowed_sigma=('$MAX_ALLOWED_SIGMA');
    nruns=('$NRUNS');
    for(tau=0;tau<nsamples/2;tau++) corr[tau]=0;

    for(run=0;run<nruns;run++) {
	sum=0;

	# Fill an array with a sequence of samples that are a
	# function of pairs of rand() values.

	for(i=0;i<nsamples;i++) {
	   samp[i]=((rand()-0.5)*(rand()-0.5))^2;
	   sum=sum+samp[i];
	   }

	# Subtract off the mean of the sequence:

	mean=sum/nsamples;
	for(i=0;i<nsamples;i++) samp[i]=samp[i]-mean;

	# Calculate an autocorrelation function on the sequence.
	# Because the values of rand() should be independent, there
	# should be no peaks in the autocorrelation.

	for(tau=0;tau<nsamples/2;tau++) {
	    sum=0;
	    for(i=0;i<nsamples/2;i++) sum=sum+samp[i]*samp[i+tau];
	    corr[tau]=corr[tau]+sum;
	    }

	}
    # Normalize the autocorrelation to the tau=0 value.

    max_corr=corr[0];
    for(tau=0;tau<nsamples/2;tau++) corr[tau]=corr[tau]/max_corr;

    # OPTIONALLY Print out the autocorrelation values:

    # for(tau=0;tau<nsamples/2;tau++) print tau, corr[tau] > "pairpower_corr.data";

    # Calculate the sigma for the non-zero tau values:

    power_sum=0;

    for(tau=1;tau<nsamples/2;tau++) power_sum=power_sum+(corr[tau])^2;

    sigma=sqrt(power_sum/(nsamples/2-1));

    # See if any of the correlations exceed a reasonable number of sigma:

    passed=1;
    for(tau=1;tau<nsamples/2;tau++) {
	if ( abs(corr[tau])/sigma > max_allowed_sigma ) {
	    print "Tau=", tau ", Autocorr=", corr[tau]/sigma, "sigma";
	    passed=0;
	    }
        }
    if(!passed) {
	print "Test failed."
	exit(1);
        }
    else exit (0);
    }

function abs(abs_input) { return(sqrt(abs_input^2)) ; }
'