1*3ee4fc2aSCody Peter Mello# From Gawk Manual modified by bug fix and removal of punctuation 2*3ee4fc2aSCody Peter Mello 3*3ee4fc2aSCody Peter Mello# Invoker can customize sort command if necessary. 4*3ee4fc2aSCody Peter MelloBEGIN { 5*3ee4fc2aSCody Peter Mello if (!SORT) SORT = "LC_ALL=C sort" 6*3ee4fc2aSCody Peter Mello} 7*3ee4fc2aSCody Peter Mello 8*3ee4fc2aSCody Peter Mello# Record every word which is used at least once 9*3ee4fc2aSCody Peter Mello{ 10*3ee4fc2aSCody Peter Mello for (i = 1; i <= NF; i++) { 11*3ee4fc2aSCody Peter Mello tmp = tolower($i) 12*3ee4fc2aSCody Peter Mello if (0 != (pos = match(tmp, /([[:lower:]]|-)+/))) 13*3ee4fc2aSCody Peter Mello used[substr(tmp, pos, RLENGTH)] = 1 14*3ee4fc2aSCody Peter Mello } 15*3ee4fc2aSCody Peter Mello} 16*3ee4fc2aSCody Peter Mello 17*3ee4fc2aSCody Peter Mello#Find a number of distinct words longer than 10 characters 18*3ee4fc2aSCody Peter MelloEND { 19*3ee4fc2aSCody Peter Mello num_long_words = 0 20*3ee4fc2aSCody Peter Mello for (x in used) 21*3ee4fc2aSCody Peter Mello if (length(x) > 10) { 22*3ee4fc2aSCody Peter Mello ++num_long_words 23*3ee4fc2aSCody Peter Mello print x | SORT 24*3ee4fc2aSCody Peter Mello } 25*3ee4fc2aSCody Peter Mello print(num_long_words, "long words") | SORT 26*3ee4fc2aSCody Peter Mello close(SORT) 27*3ee4fc2aSCody Peter Mello} 28