1#To: bug-gnu-utils@gnu.org
2#cc: arnold@gnu.org
3#Subject: Possible bug in GNU Awk 3.0.4
4#Date: Wed, 24 Nov 1999 21:47:24 +0000
5#From: Daniel Elphick <de397@ecs.soton.ac.uk>
6#Message-Id: <E11qkG4-0000l0-00@cameron>
7#
8#This is a multipart MIME message.
9#
10#--==_Exmh_-11192982200
11#Content-Type: text/plain; charset=us-ascii
12#
13#
14#When I use the attached awk script unique on the attached data file, it
15#reports that all 4 lines of the data are the same. Using mawk it correctly
16#reports that there are no repeats.
17#
18#I don't know if there are limits on the size of associative array keys for the
19#purposes of reliable indexing but if there is then it is not (obviously)
20#documented.
21#
22#
23#--==_Exmh_-11192982200
24#Content-Type: text/plain ; name="data"; charset=us-ascii
25#Content-Description: data
26#Content-Disposition: attachment; filename="data"
27#
28#322322111111112232231111
29#322322111111112213223111
30#322322111111112211132231
31#322322111111112211113223
32#
33#--==_Exmh_-11192982200
34#Content-Type: text/plain ; name="unique"; charset=us-ascii
35#Content-Description: unique
36#Content-Disposition: attachment; filename="unique"
37#
38{
39	if($0 in a)
40	{
41		printf("line %d has been seen before at line %d\n",  NR, a[$0])
42		repeat_count += 1
43	}
44	else
45	{
46		a[$0] = NR
47	}
48	count += 1
49}
50END {
51#	printf("%d %f%%\n", repeat_count, (float)repeat_count / count * 100)
52	printf("%d %f%%\n", repeat_count, repeat_count / count * 100)
53}
54#
55#--==_Exmh_-11192982200--
56