1*e6d6c189SCody Peter Mello### ==================================================================== 2*e6d6c189SCody Peter Mello### @Awk-file{ 3*e6d6c189SCody Peter Mello### author = "Nelson H. F. Beebe", 4*e6d6c189SCody Peter Mello### version = "1.00", 5*e6d6c189SCody Peter Mello### date = "09 October 1996", 6*e6d6c189SCody Peter Mello### time = "15:57:06 MDT", 7*e6d6c189SCody Peter Mello### filename = "journal-toc.awk", 8*e6d6c189SCody Peter Mello### address = "Center for Scientific Computing 9*e6d6c189SCody Peter Mello### Department of Mathematics 10*e6d6c189SCody Peter Mello### University of Utah 11*e6d6c189SCody Peter Mello### Salt Lake City, UT 84112 12*e6d6c189SCody Peter Mello### USA", 13*e6d6c189SCody Peter Mello### telephone = "+1 801 581 5254", 14*e6d6c189SCody Peter Mello### FAX = "+1 801 581 4148", 15*e6d6c189SCody Peter Mello### URL = "http://www.math.utah.edu/~beebe", 16*e6d6c189SCody Peter Mello### checksum = "25092 977 3357 26493", 17*e6d6c189SCody Peter Mello### email = "beebe@math.utah.edu (Internet)", 18*e6d6c189SCody Peter Mello### codetable = "ISO/ASCII", 19*e6d6c189SCody Peter Mello### keywords = "BibTeX, bibliography, HTML, journal table of 20*e6d6c189SCody Peter Mello### contents", 21*e6d6c189SCody Peter Mello### supported = "yes", 22*e6d6c189SCody Peter Mello### docstring = "Create a journal cover table of contents from 23*e6d6c189SCody Peter Mello### <at>Article{...} entries in a journal BibTeX 24*e6d6c189SCody Peter Mello### .bib file for checking the bibliography 25*e6d6c189SCody Peter Mello### database against the actual journal covers. 26*e6d6c189SCody Peter Mello### The output can be either plain text, or HTML. 27*e6d6c189SCody Peter Mello### 28*e6d6c189SCody Peter Mello### Usage: 29*e6d6c189SCody Peter Mello### bibclean -max-width 0 BibTeX-file(s) | \ 30*e6d6c189SCody Peter Mello### bibsort -byvolume | \ 31*e6d6c189SCody Peter Mello### awk -f journal-toc.awk \ 32*e6d6c189SCody Peter Mello### [-v HTML=nnn] [-v INDENT=nnn] \ 33*e6d6c189SCody Peter Mello### [-v BIBFILEURL=url] >foo.toc 34*e6d6c189SCody Peter Mello### 35*e6d6c189SCody Peter Mello### or if the bibliography is already sorted 36*e6d6c189SCody Peter Mello### by volume, 37*e6d6c189SCody Peter Mello### 38*e6d6c189SCody Peter Mello### bibclean -max-width 0 BibTeX-file(s) | \ 39*e6d6c189SCody Peter Mello### awk -f journal-toc.awk \ 40*e6d6c189SCody Peter Mello### [-v HTML=nnn] [-v INDENT=nnn] \ 41*e6d6c189SCody Peter Mello### [-v BIBFILEURL=url] >foo.toc 42*e6d6c189SCody Peter Mello### 43*e6d6c189SCody Peter Mello### A non-zero value of the command-line option, 44*e6d6c189SCody Peter Mello### HTML=nnn, results in HTML output instead of 45*e6d6c189SCody Peter Mello### the default plain ASCII text (corresponding 46*e6d6c189SCody Peter Mello### to HTML=0). The 47*e6d6c189SCody Peter Mello### 48*e6d6c189SCody Peter Mello### The INDENT=nnn command-line option specifies 49*e6d6c189SCody Peter Mello### the number of blanks to indent each logical 50*e6d6c189SCody Peter Mello### level of HTML. The default is INDENT=4. 51*e6d6c189SCody Peter Mello### INDENT=0 suppresses indentation. The INDENT 52*e6d6c189SCody Peter Mello### option has no effect when the default HTML=0 53*e6d6c189SCody Peter Mello### (plain text output) option is in effect. 54*e6d6c189SCody Peter Mello### 55*e6d6c189SCody Peter Mello### When HTML output is selected, the 56*e6d6c189SCody Peter Mello### BIBFILEURL=url command-line option provides a 57*e6d6c189SCody Peter Mello### way to request hypertext links from table of 58*e6d6c189SCody Peter Mello### contents page numbers to the complete BibTeX 59*e6d6c189SCody Peter Mello### entry for the article. These links are 60*e6d6c189SCody Peter Mello### created by appending a sharp (#) and the 61*e6d6c189SCody Peter Mello### citation label to the BIBFILEURL value, which 62*e6d6c189SCody Peter Mello### conforms with the practice of 63*e6d6c189SCody Peter Mello### bibtex-to-html.awk. 64*e6d6c189SCody Peter Mello### 65*e6d6c189SCody Peter Mello### The HTML output form may be useful as a more 66*e6d6c189SCody Peter Mello### compact representation of journal article 67*e6d6c189SCody Peter Mello### bibliography data than the original BibTeX 68*e6d6c189SCody Peter Mello### file provides. Of course, the 69*e6d6c189SCody Peter Mello### table-of-contents format provides less 70*e6d6c189SCody Peter Mello### information, and is considerably more 71*e6d6c189SCody Peter Mello### troublesome for a computer program to parse. 72*e6d6c189SCody Peter Mello### 73*e6d6c189SCody Peter Mello### When URL key values are provided, they will 74*e6d6c189SCody Peter Mello### be used to create hypertext links around 75*e6d6c189SCody Peter Mello### article titles. This supports journals that 76*e6d6c189SCody Peter Mello### provide article contents on the World-Wide 77*e6d6c189SCody Peter Mello### Web. 78*e6d6c189SCody Peter Mello### 79*e6d6c189SCody Peter Mello### For parsing simplicity, this program requires 80*e6d6c189SCody Peter Mello### that BibTeX 81*e6d6c189SCody Peter Mello### 82*e6d6c189SCody Peter Mello### key = "value" 83*e6d6c189SCody Peter Mello### 84*e6d6c189SCody Peter Mello### and 85*e6d6c189SCody Peter Mello### 86*e6d6c189SCody Peter Mello### @String{name = "value"} 87*e6d6c189SCody Peter Mello### 88*e6d6c189SCody Peter Mello### specifications be entirely contained on 89*e6d6c189SCody Peter Mello### single lines, which is readily provided by 90*e6d6c189SCody Peter Mello### the `bibclean -max-width 0' filter. It also 91*e6d6c189SCody Peter Mello### requires that bibliography entries begin and 92*e6d6c189SCody Peter Mello### end at the start of a line, and that 93*e6d6c189SCody Peter Mello### quotation marks, rather than balanced braces, 94*e6d6c189SCody Peter Mello### delimit string values. This is a 95*e6d6c189SCody Peter Mello### conventional format that again can be 96*e6d6c189SCody Peter Mello### guaranteed by bibclean. 97*e6d6c189SCody Peter Mello### 98*e6d6c189SCody Peter Mello### This program requires `new' awk, as described 99*e6d6c189SCody Peter Mello### in the book 100*e6d6c189SCody Peter Mello### 101*e6d6c189SCody Peter Mello### Alfred V. Aho, Brian W. Kernighan, and 102*e6d6c189SCody Peter Mello### Peter J. Weinberger, 103*e6d6c189SCody Peter Mello### ``The AWK Programming Language'', 104*e6d6c189SCody Peter Mello### Addison-Wesley (1988), ISBN 105*e6d6c189SCody Peter Mello### 0-201-07981-X, 106*e6d6c189SCody Peter Mello### 107*e6d6c189SCody Peter Mello### such as provided by programs named (GNU) 108*e6d6c189SCody Peter Mello### gawk, nawk, and recent AT&T awk. 109*e6d6c189SCody Peter Mello### 110*e6d6c189SCody Peter Mello### The checksum field above contains a CRC-16 111*e6d6c189SCody Peter Mello### checksum as the first value, followed by the 112*e6d6c189SCody Peter Mello### equivalent of the standard UNIX wc (word 113*e6d6c189SCody Peter Mello### count) utility output of lines, words, and 114*e6d6c189SCody Peter Mello### characters. This is produced by Robert 115*e6d6c189SCody Peter Mello### Solovay's checksum utility.", 116*e6d6c189SCody Peter Mello### } 117*e6d6c189SCody Peter Mello### ==================================================================== 118*e6d6c189SCody Peter Mello 119*e6d6c189SCody Peter MelloBEGIN { initialize() } 120*e6d6c189SCody Peter Mello 121*e6d6c189SCody Peter Mello/^ *@ *[Ss][Tt][Rr][Ii][Nn][Gg] *\{/ { do_String(); next } 122*e6d6c189SCody Peter Mello 123*e6d6c189SCody Peter Mello/^ *@ *[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee]/ { next } 124*e6d6c189SCody Peter Mello 125*e6d6c189SCody Peter Mello/^ *@ *[Aa][Rr][Tt][Ii][Cc][Ll][Ee]/ { do_Article(); next } 126*e6d6c189SCody Peter Mello 127*e6d6c189SCody Peter Mello/^ *@/ { do_Other(); next } 128*e6d6c189SCody Peter Mello 129*e6d6c189SCody Peter Mello/^ *author *= *\"/ { do_author(); next } 130*e6d6c189SCody Peter Mello 131*e6d6c189SCody Peter Mello/^ *journal *= */ { do_journal(); next } 132*e6d6c189SCody Peter Mello 133*e6d6c189SCody Peter Mello/^ *volume *= *\"/ { do_volume(); next } 134*e6d6c189SCody Peter Mello 135*e6d6c189SCody Peter Mello/^ *number *= *\"/ { do_number(); next } 136*e6d6c189SCody Peter Mello 137*e6d6c189SCody Peter Mello/^ *year *= *\"/ { do_year(); next } 138*e6d6c189SCody Peter Mello 139*e6d6c189SCody Peter Mello/^ *month *= */ { do_month(); next } 140*e6d6c189SCody Peter Mello 141*e6d6c189SCody Peter Mello/^ *title *= *\"/ { do_title(); next } 142*e6d6c189SCody Peter Mello 143*e6d6c189SCody Peter Mello/^ *pages *= *\"/ { do_pages(); next } 144*e6d6c189SCody Peter Mello 145*e6d6c189SCody Peter Mello/^ *URL *= *\"/ { do_URL(); next } 146*e6d6c189SCody Peter Mello 147*e6d6c189SCody Peter Mello/^ *} *$/ { if (In_Article) do_end_entry(); next } 148*e6d6c189SCody Peter Mello 149*e6d6c189SCody Peter MelloEND { terminate() } 150*e6d6c189SCody Peter Mello 151*e6d6c189SCody Peter Mello 152*e6d6c189SCody Peter Mello######################################################################## 153*e6d6c189SCody Peter Mello# NB: The programming conventions for variables in this program are: # 154*e6d6c189SCody Peter Mello# UPPERCASE global constants and user options # 155*e6d6c189SCody Peter Mello# Initialuppercase global variables # 156*e6d6c189SCody Peter Mello# lowercase local variables # 157*e6d6c189SCody Peter Mello# Any deviation is an error! # 158*e6d6c189SCody Peter Mello######################################################################## 159*e6d6c189SCody Peter Mello 160*e6d6c189SCody Peter Mello 161*e6d6c189SCody Peter Mellofunction do_Article() 162*e6d6c189SCody Peter Mello{ 163*e6d6c189SCody Peter Mello In_Article = 1 164*e6d6c189SCody Peter Mello 165*e6d6c189SCody Peter Mello Citation_label = $0 166*e6d6c189SCody Peter Mello sub(/^[^\{]*\{/,"",Citation_label) 167*e6d6c189SCody Peter Mello sub(/ *, *$/,"",Citation_label) 168*e6d6c189SCody Peter Mello 169*e6d6c189SCody Peter Mello Author = "" 170*e6d6c189SCody Peter Mello Title = "" 171*e6d6c189SCody Peter Mello Journal = "" 172*e6d6c189SCody Peter Mello Volume = "" 173*e6d6c189SCody Peter Mello Number = "" 174*e6d6c189SCody Peter Mello Month = "" 175*e6d6c189SCody Peter Mello Year = "" 176*e6d6c189SCody Peter Mello Pages = "" 177*e6d6c189SCody Peter Mello Url = "" 178*e6d6c189SCody Peter Mello} 179*e6d6c189SCody Peter Mello 180*e6d6c189SCody Peter Mello 181*e6d6c189SCody Peter Mellofunction do_author() 182*e6d6c189SCody Peter Mello{ 183*e6d6c189SCody Peter Mello Author = TeX_to_HTML(get_value($0)) 184*e6d6c189SCody Peter Mello} 185*e6d6c189SCody Peter Mello 186*e6d6c189SCody Peter Mello 187*e6d6c189SCody Peter Mellofunction do_end_entry( k,n,parts) 188*e6d6c189SCody Peter Mello{ 189*e6d6c189SCody Peter Mello n = split(Author,parts," and ") 190*e6d6c189SCody Peter Mello if (Last_number != Number) 191*e6d6c189SCody Peter Mello do_new_issue() 192*e6d6c189SCody Peter Mello for (k = 1; k < n; ++k) 193*e6d6c189SCody Peter Mello print_toc_line(parts[k] " and", "", "") 194*e6d6c189SCody Peter Mello Title_prefix = html_begin_title() 195*e6d6c189SCody Peter Mello Title_suffix = html_end_title() 196*e6d6c189SCody Peter Mello if (html_length(Title) <= (MAX_TITLE_CHARS + MIN_LEADERS)) # complete title fits on line 197*e6d6c189SCody Peter Mello print_toc_line(parts[n], Title, html_begin_pages() Pages html_end_pages()) 198*e6d6c189SCody Peter Mello else # need to split long title over multiple lines 199*e6d6c189SCody Peter Mello do_long_title(parts[n], Title, html_begin_pages() Pages html_end_pages()) 200*e6d6c189SCody Peter Mello} 201*e6d6c189SCody Peter Mello 202*e6d6c189SCody Peter Mello 203*e6d6c189SCody Peter Mellofunction do_journal() 204*e6d6c189SCody Peter Mello{ 205*e6d6c189SCody Peter Mello if ($0 ~ /[=] *"/) # have journal = "quoted journal name", 206*e6d6c189SCody Peter Mello Journal = get_value($0) 207*e6d6c189SCody Peter Mello else # have journal = journal-abbreviation, 208*e6d6c189SCody Peter Mello { 209*e6d6c189SCody Peter Mello Journal = get_abbrev($0) 210*e6d6c189SCody Peter Mello if (Journal in String) # replace abbrev by its expansion 211*e6d6c189SCody Peter Mello Journal = String[Journal] 212*e6d6c189SCody Peter Mello } 213*e6d6c189SCody Peter Mello gsub(/\\-/,"",Journal) # remove discretionary hyphens 214*e6d6c189SCody Peter Mello} 215*e6d6c189SCody Peter Mello 216*e6d6c189SCody Peter Mello 217*e6d6c189SCody Peter Mellofunction do_long_title(author,title,pages, last_title,n) 218*e6d6c189SCody Peter Mello{ 219*e6d6c189SCody Peter Mello title = trim(title) # discard leading and trailing space 220*e6d6c189SCody Peter Mello while (length(title) > 0) 221*e6d6c189SCody Peter Mello { 222*e6d6c189SCody Peter Mello n = html_breakpoint(title,MAX_TITLE_CHARS+MIN_LEADERS) 223*e6d6c189SCody Peter Mello last_title = substr(title,1,n) 224*e6d6c189SCody Peter Mello title = substr(title,n+1) 225*e6d6c189SCody Peter Mello sub(/^ +/,"",title) # discard any leading space 226*e6d6c189SCody Peter Mello print_toc_line(author, last_title, (length(title) == 0) ? pages : "") 227*e6d6c189SCody Peter Mello author = "" 228*e6d6c189SCody Peter Mello } 229*e6d6c189SCody Peter Mello} 230*e6d6c189SCody Peter Mello 231*e6d6c189SCody Peter Mello 232*e6d6c189SCody Peter Mellofunction do_month( k,n,parts) 233*e6d6c189SCody Peter Mello{ 234*e6d6c189SCody Peter Mello Month = ($0 ~ /[=] *"/) ? get_value($0) : get_abbrev($0) 235*e6d6c189SCody Peter Mello gsub(/[\"]/,"",Month) 236*e6d6c189SCody Peter Mello gsub(/ *# *\\slash *# */," / ",Month) 237*e6d6c189SCody Peter Mello gsub(/ *# *-+ *# */," / ",Month) 238*e6d6c189SCody Peter Mello n = split(Month,parts," */ *") 239*e6d6c189SCody Peter Mello Month = "" 240*e6d6c189SCody Peter Mello for (k = 1; k <= n; ++k) 241*e6d6c189SCody Peter Mello Month = Month ((k > 1) ? " / " : "") \ 242*e6d6c189SCody Peter Mello ((parts[k] in Month_expansion) ? Month_expansion[parts[k]] : parts[k]) 243*e6d6c189SCody Peter Mello} 244*e6d6c189SCody Peter Mello 245*e6d6c189SCody Peter Mello 246*e6d6c189SCody Peter Mellofunction do_new_issue() 247*e6d6c189SCody Peter Mello{ 248*e6d6c189SCody Peter Mello Last_number = Number 249*e6d6c189SCody Peter Mello if (HTML) 250*e6d6c189SCody Peter Mello { 251*e6d6c189SCody Peter Mello if (Last_volume != Volume) 252*e6d6c189SCody Peter Mello { 253*e6d6c189SCody Peter Mello Last_volume = Volume 254*e6d6c189SCody Peter Mello print_line(prefix(2) "<BR>") 255*e6d6c189SCody Peter Mello } 256*e6d6c189SCody Peter Mello html_end_toc() 257*e6d6c189SCody Peter Mello html_begin_issue() 258*e6d6c189SCody Peter Mello print_line(prefix(2) Journal "<BR>") 259*e6d6c189SCody Peter Mello } 260*e6d6c189SCody Peter Mello else 261*e6d6c189SCody Peter Mello { 262*e6d6c189SCody Peter Mello print_line("") 263*e6d6c189SCody Peter Mello print_line(Journal) 264*e6d6c189SCody Peter Mello } 265*e6d6c189SCody Peter Mello 266*e6d6c189SCody Peter Mello print_line(strip_html(vol_no_month_year())) 267*e6d6c189SCody Peter Mello 268*e6d6c189SCody Peter Mello if (HTML) 269*e6d6c189SCody Peter Mello { 270*e6d6c189SCody Peter Mello html_end_issue() 271*e6d6c189SCody Peter Mello html_toc_entry() 272*e6d6c189SCody Peter Mello html_begin_toc() 273*e6d6c189SCody Peter Mello } 274*e6d6c189SCody Peter Mello else 275*e6d6c189SCody Peter Mello print_line("") 276*e6d6c189SCody Peter Mello} 277*e6d6c189SCody Peter Mello 278*e6d6c189SCody Peter Mello 279*e6d6c189SCody Peter Mellofunction do_number() 280*e6d6c189SCody Peter Mello{ 281*e6d6c189SCody Peter Mello Number = get_value($0) 282*e6d6c189SCody Peter Mello} 283*e6d6c189SCody Peter Mello 284*e6d6c189SCody Peter Mello 285*e6d6c189SCody Peter Mellofunction do_Other() 286*e6d6c189SCody Peter Mello{ 287*e6d6c189SCody Peter Mello In_Article = 0 288*e6d6c189SCody Peter Mello} 289*e6d6c189SCody Peter Mello 290*e6d6c189SCody Peter Mello 291*e6d6c189SCody Peter Mellofunction do_pages() 292*e6d6c189SCody Peter Mello{ 293*e6d6c189SCody Peter Mello Pages = get_value($0) 294*e6d6c189SCody Peter Mello sub(/--[?][?]/,"",Pages) 295*e6d6c189SCody Peter Mello} 296*e6d6c189SCody Peter Mello 297*e6d6c189SCody Peter Mello 298*e6d6c189SCody Peter Mellofunction do_String() 299*e6d6c189SCody Peter Mello{ 300*e6d6c189SCody Peter Mello sub(/^[^\{]*\{/,"",$0) # discard up to and including open brace 301*e6d6c189SCody Peter Mello sub(/\} *$/,"",$0) # discard from optional whitespace and trailing brace to end of line 302*e6d6c189SCody Peter Mello String[get_key($0)] = get_value($0) 303*e6d6c189SCody Peter Mello} 304*e6d6c189SCody Peter Mello 305*e6d6c189SCody Peter Mello 306*e6d6c189SCody Peter Mellofunction do_title() 307*e6d6c189SCody Peter Mello{ 308*e6d6c189SCody Peter Mello Title = TeX_to_HTML(get_value($0)) 309*e6d6c189SCody Peter Mello} 310*e6d6c189SCody Peter Mello 311*e6d6c189SCody Peter Mello 312*e6d6c189SCody Peter Mellofunction do_URL( parts) 313*e6d6c189SCody Peter Mello{ 314*e6d6c189SCody Peter Mello Url = get_value($0) 315*e6d6c189SCody Peter Mello split(Url,parts,"[,;]") # in case we have multiple URLs 316*e6d6c189SCody Peter Mello Url = trim(parts[1]) 317*e6d6c189SCody Peter Mello} 318*e6d6c189SCody Peter Mello 319*e6d6c189SCody Peter Mello 320*e6d6c189SCody Peter Mellofunction do_volume() 321*e6d6c189SCody Peter Mello{ 322*e6d6c189SCody Peter Mello Volume = get_value($0) 323*e6d6c189SCody Peter Mello} 324*e6d6c189SCody Peter Mello 325*e6d6c189SCody Peter Mello 326*e6d6c189SCody Peter Mellofunction do_year() 327*e6d6c189SCody Peter Mello{ 328*e6d6c189SCody Peter Mello Year = get_value($0) 329*e6d6c189SCody Peter Mello} 330*e6d6c189SCody Peter Mello 331*e6d6c189SCody Peter Mello 332*e6d6c189SCody Peter Mellofunction get_abbrev(s) 333*e6d6c189SCody Peter Mello{ # return abbrev from ``key = abbrev,'' 334*e6d6c189SCody Peter Mello sub(/^[^=]*= */,"",s) # discard text up to start of non-blank value 335*e6d6c189SCody Peter Mello sub(/ *,? *$/,"",s) # discard trailing optional whitspace, quote, 336*e6d6c189SCody Peter Mello # optional comma, and optional space 337*e6d6c189SCody Peter Mello return (s) 338*e6d6c189SCody Peter Mello} 339*e6d6c189SCody Peter Mello 340*e6d6c189SCody Peter Mello 341*e6d6c189SCody Peter Mellofunction get_key(s) 342*e6d6c189SCody Peter Mello{ # return kay from ``key = "value",'' 343*e6d6c189SCody Peter Mello sub(/^ */,"",s) # discard leading space 344*e6d6c189SCody Peter Mello sub(/ *=.*$/,"",s) # discard everthing after key 345*e6d6c189SCody Peter Mello 346*e6d6c189SCody Peter Mello return (s) 347*e6d6c189SCody Peter Mello} 348*e6d6c189SCody Peter Mello 349*e6d6c189SCody Peter Mello 350*e6d6c189SCody Peter Mellofunction get_value(s) 351*e6d6c189SCody Peter Mello{ # return value from ``key = "value",'' 352*e6d6c189SCody Peter Mello sub(/^[^\"]*\" */,"",s) # discard text up to start of non-blank value 353*e6d6c189SCody Peter Mello sub(/ *\",? *$/,"",s) # discard trailing optional whitspace, quote, 354*e6d6c189SCody Peter Mello # optional comma, and optional space 355*e6d6c189SCody Peter Mello return (s) 356*e6d6c189SCody Peter Mello} 357*e6d6c189SCody Peter Mello 358*e6d6c189SCody Peter Mello 359*e6d6c189SCody Peter Mellofunction html_accents(s) 360*e6d6c189SCody Peter Mello{ 361*e6d6c189SCody Peter Mello if (index(s,"\\") > 0) # important optimization 362*e6d6c189SCody Peter Mello { 363*e6d6c189SCody Peter Mello # Convert common lower-case accented letters according to the 364*e6d6c189SCody Peter Mello # table on p. 169 of in Peter Flynn's ``The World Wide Web 365*e6d6c189SCody Peter Mello # Handbook'', International Thomson Computer Press, 1995, ISBN 366*e6d6c189SCody Peter Mello # 1-85032-205-8. The official table of ISO Latin 1 SGML 367*e6d6c189SCody Peter Mello # entities used in HTML can be found in the file 368*e6d6c189SCody Peter Mello # /usr/local/lib/html-check/lib/ISOlat1.sgml (your path 369*e6d6c189SCody Peter Mello # may differ). 370*e6d6c189SCody Peter Mello 371*e6d6c189SCody Peter Mello gsub(/{\\\a}/, "\\à", s) 372*e6d6c189SCody Peter Mello gsub(/{\\'a}/, "\\á", s) 373*e6d6c189SCody Peter Mello gsub(/{\\[\^]a}/,"\\â", s) 374*e6d6c189SCody Peter Mello gsub(/{\\~a}/, "\\ã", s) 375*e6d6c189SCody Peter Mello gsub(/{\\\"a}/, "\\ä", s) 376*e6d6c189SCody Peter Mello gsub(/{\\aa}/, "\\å", s) 377*e6d6c189SCody Peter Mello gsub(/{\\ae}/, "\\æ", s) 378*e6d6c189SCody Peter Mello 379*e6d6c189SCody Peter Mello gsub(/\{\\c\{c\}\}/,"\\ç", s) 380*e6d6c189SCody Peter Mello 381*e6d6c189SCody Peter Mello gsub(/\{\\\e\}/, "\\è", s) 382*e6d6c189SCody Peter Mello gsub(/\{\\'e\}/, "\\é", s) 383*e6d6c189SCody Peter Mello gsub(/\{\\[\^]e\}/,"\\ê", s) 384*e6d6c189SCody Peter Mello gsub(/\{\\\"e\}/, "\\ë", s) 385*e6d6c189SCody Peter Mello 386*e6d6c189SCody Peter Mello gsub(/\{\\\i\}/, "\\ì", s) 387*e6d6c189SCody Peter Mello gsub(/\{\\'i\}/, "\\í", s) 388*e6d6c189SCody Peter Mello gsub(/\{\\[\^]i\}/,"\\î", s) 389*e6d6c189SCody Peter Mello gsub(/\{\\\"i\}/, "\\ï", s) 390*e6d6c189SCody Peter Mello 391*e6d6c189SCody Peter Mello # ignore eth and thorn 392*e6d6c189SCody Peter Mello 393*e6d6c189SCody Peter Mello gsub(/\{\\~n\}/, "\\ñ", s) 394*e6d6c189SCody Peter Mello 395*e6d6c189SCody Peter Mello gsub(/\{\\\o\}/, "\\ò", s) 396*e6d6c189SCody Peter Mello gsub(/\{\\'o\}/, "\\ó", s) 397*e6d6c189SCody Peter Mello gsub(/\{\\[\^]o\}/, "\\ô", s) 398*e6d6c189SCody Peter Mello gsub(/\{\\~o\}/, "\\õ", s) 399*e6d6c189SCody Peter Mello gsub(/\{\\\"o\}/, "\\ö", s) 400*e6d6c189SCody Peter Mello gsub(/\{\\o\}/, "\\ø", s) 401*e6d6c189SCody Peter Mello 402*e6d6c189SCody Peter Mello gsub(/\{\\\u\}/, "\\ù", s) 403*e6d6c189SCody Peter Mello gsub(/\{\\'u\}/, "\\ú", s) 404*e6d6c189SCody Peter Mello gsub(/\{\\[\^]u\}/,"\\û", s) 405*e6d6c189SCody Peter Mello gsub(/\{\\\"u\}/, "\\ü", s) 406*e6d6c189SCody Peter Mello 407*e6d6c189SCody Peter Mello gsub(/\{\\'y\}/, "\\ý", s) 408*e6d6c189SCody Peter Mello gsub(/\{\\\"y\}/, "\\ÿ", s) 409*e6d6c189SCody Peter Mello 410*e6d6c189SCody Peter Mello # Now do the same for upper-case accents 411*e6d6c189SCody Peter Mello 412*e6d6c189SCody Peter Mello gsub(/\{\\\A\}/, "\\À", s) 413*e6d6c189SCody Peter Mello gsub(/\{\\'A\}/, "\\Á", s) 414*e6d6c189SCody Peter Mello gsub(/\{\\[\^]A\}/, "\\Â", s) 415*e6d6c189SCody Peter Mello gsub(/\{\\~A\}/, "\\Ã", s) 416*e6d6c189SCody Peter Mello gsub(/\{\\\"A\}/, "\\Ä", s) 417*e6d6c189SCody Peter Mello gsub(/\{\\AA\}/, "\\Å", s) 418*e6d6c189SCody Peter Mello gsub(/\{\\AE\}/, "\\Æ", s) 419*e6d6c189SCody Peter Mello 420*e6d6c189SCody Peter Mello gsub(/\{\\c\{C\}\}/,"\\Ç", s) 421*e6d6c189SCody Peter Mello 422*e6d6c189SCody Peter Mello gsub(/\{\\\e\}/, "\\È", s) 423*e6d6c189SCody Peter Mello gsub(/\{\\'E\}/, "\\É", s) 424*e6d6c189SCody Peter Mello gsub(/\{\\[\^]E\}/, "\\Ê", s) 425*e6d6c189SCody Peter Mello gsub(/\{\\\"E\}/, "\\Ë", s) 426*e6d6c189SCody Peter Mello 427*e6d6c189SCody Peter Mello gsub(/\{\\\I\}/, "\\Ì", s) 428*e6d6c189SCody Peter Mello gsub(/\{\\'I\}/, "\\Í", s) 429*e6d6c189SCody Peter Mello gsub(/\{\\[\^]I\}/, "\\Î", s) 430*e6d6c189SCody Peter Mello gsub(/\{\\\"I\}/, "\\Ï", s) 431*e6d6c189SCody Peter Mello 432*e6d6c189SCody Peter Mello # ignore eth and thorn 433*e6d6c189SCody Peter Mello 434*e6d6c189SCody Peter Mello gsub(/\{\\~N\}/, "\\Ñ", s) 435*e6d6c189SCody Peter Mello 436*e6d6c189SCody Peter Mello gsub(/\{\\\O\}/, "\\Ò", s) 437*e6d6c189SCody Peter Mello gsub(/\{\\'O\}/, "\\Ó", s) 438*e6d6c189SCody Peter Mello gsub(/\{\\[\^]O\}/, "\\Ô", s) 439*e6d6c189SCody Peter Mello gsub(/\{\\~O\}/, "\\Õ", s) 440*e6d6c189SCody Peter Mello gsub(/\{\\\"O\}/, "\\Ö", s) 441*e6d6c189SCody Peter Mello gsub(/\{\\O\}/, "\\Ø", s) 442*e6d6c189SCody Peter Mello 443*e6d6c189SCody Peter Mello gsub(/\{\\\U\}/, "\\Ù", s) 444*e6d6c189SCody Peter Mello gsub(/\{\\'U\}/, "\\Ú", s) 445*e6d6c189SCody Peter Mello gsub(/\{\\[\^]U\}/, "\\Û", s) 446*e6d6c189SCody Peter Mello gsub(/\{\\\"U\}/, "\\Ü", s) 447*e6d6c189SCody Peter Mello 448*e6d6c189SCody Peter Mello gsub(/\{\\'Y\}/, "\\Ý", s) 449*e6d6c189SCody Peter Mello 450*e6d6c189SCody Peter Mello gsub(/\{\\ss\}/, "\\ß", s) 451*e6d6c189SCody Peter Mello 452*e6d6c189SCody Peter Mello # Others not mentioned in Flynn's book 453*e6d6c189SCody Peter Mello gsub(/\{\\'\\i\}/,"\\í", s) 454*e6d6c189SCody Peter Mello gsub(/\{\\'\\j\}/,"j", s) 455*e6d6c189SCody Peter Mello } 456*e6d6c189SCody Peter Mello return (s) 457*e6d6c189SCody Peter Mello} 458*e6d6c189SCody Peter Mello 459*e6d6c189SCody Peter Mello 460*e6d6c189SCody Peter Mellofunction html_begin_issue() 461*e6d6c189SCody Peter Mello{ 462*e6d6c189SCody Peter Mello print_line("") 463*e6d6c189SCody Peter Mello print_line(prefix(2) "<HR>") 464*e6d6c189SCody Peter Mello print_line("") 465*e6d6c189SCody Peter Mello print_line(prefix(2) "<H1>") 466*e6d6c189SCody Peter Mello print_line(prefix(3) "<A NAME=\"" html_label() "\">") 467*e6d6c189SCody Peter Mello} 468*e6d6c189SCody Peter Mello 469*e6d6c189SCody Peter Mello 470*e6d6c189SCody Peter Mellofunction html_begin_pages() 471*e6d6c189SCody Peter Mello{ 472*e6d6c189SCody Peter Mello return ((HTML && (BIBFILEURL != "")) ? ("<A HREF=\"" BIBFILEURL "#" Citation_label "\">") : "") 473*e6d6c189SCody Peter Mello} 474*e6d6c189SCody Peter Mello 475*e6d6c189SCody Peter Mello 476*e6d6c189SCody Peter Mellofunction html_begin_pre() 477*e6d6c189SCody Peter Mello{ 478*e6d6c189SCody Peter Mello In_PRE = 1 479*e6d6c189SCody Peter Mello print_line("<PRE>") 480*e6d6c189SCody Peter Mello} 481*e6d6c189SCody Peter Mello 482*e6d6c189SCody Peter Mello 483*e6d6c189SCody Peter Mellofunction html_begin_title() 484*e6d6c189SCody Peter Mello{ 485*e6d6c189SCody Peter Mello return ((HTML && (Url != "")) ? ("<A HREF=\"" Url "\">") : "") 486*e6d6c189SCody Peter Mello} 487*e6d6c189SCody Peter Mello 488*e6d6c189SCody Peter Mello 489*e6d6c189SCody Peter Mellofunction html_begin_toc() 490*e6d6c189SCody Peter Mello{ 491*e6d6c189SCody Peter Mello html_end_toc() 492*e6d6c189SCody Peter Mello html_begin_pre() 493*e6d6c189SCody Peter Mello} 494*e6d6c189SCody Peter Mello 495*e6d6c189SCody Peter Mello 496*e6d6c189SCody Peter Mellofunction html_body( k) 497*e6d6c189SCody Peter Mello{ 498*e6d6c189SCody Peter Mello for (k = 1; k <= BodyLines; ++k) 499*e6d6c189SCody Peter Mello print Body[k] 500*e6d6c189SCody Peter Mello} 501*e6d6c189SCody Peter Mello 502*e6d6c189SCody Peter Mellofunction html_breakpoint(title,maxlength, break_after,k) 503*e6d6c189SCody Peter Mello{ 504*e6d6c189SCody Peter Mello # Return the largest character position in title AFTER which we 505*e6d6c189SCody Peter Mello # can break the title across lines, without exceeding maxlength 506*e6d6c189SCody Peter Mello # visible characters. 507*e6d6c189SCody Peter Mello if (html_length(title) > maxlength) # then need to split title across lines 508*e6d6c189SCody Peter Mello { 509*e6d6c189SCody Peter Mello # In the presence of HTML markup, the initialization of 510*e6d6c189SCody Peter Mello # k here is complicated, because we need to advance it 511*e6d6c189SCody Peter Mello # until html_length(title) is at least maxlength, 512*e6d6c189SCody Peter Mello # without invoking the expensive html_length() function 513*e6d6c189SCody Peter Mello # too frequently. The need to split the title makes the 514*e6d6c189SCody Peter Mello # alternative of delayed insertion of HTML markup much 515*e6d6c189SCody Peter Mello # more complicated. 516*e6d6c189SCody Peter Mello break_after = 0 517*e6d6c189SCody Peter Mello for (k = min(maxlength,length(title)); k < length(title); ++k) 518*e6d6c189SCody Peter Mello { 519*e6d6c189SCody Peter Mello if (substr(title,k+1,1) == " ") 520*e6d6c189SCody Peter Mello { # could break after position k 521*e6d6c189SCody Peter Mello if (html_length(substr(title,1,k)) <= maxlength) 522*e6d6c189SCody Peter Mello break_after = k 523*e6d6c189SCody Peter Mello else # advanced too far, retreat back to last break_after 524*e6d6c189SCody Peter Mello break 525*e6d6c189SCody Peter Mello } 526*e6d6c189SCody Peter Mello } 527*e6d6c189SCody Peter Mello if (break_after == 0) # no breakpoint found by forward scan 528*e6d6c189SCody Peter Mello { # so switch to backward scan 529*e6d6c189SCody Peter Mello for (k = min(maxlength,length(title)) - 1; \ 530*e6d6c189SCody Peter Mello (k > 0) && (substr(title,k+1,1) != " "); --k) 531*e6d6c189SCody Peter Mello ; # find space at which to break title 532*e6d6c189SCody Peter Mello if (k < 1) # no break point found 533*e6d6c189SCody Peter Mello k = length(title) # so must print entire string 534*e6d6c189SCody Peter Mello } 535*e6d6c189SCody Peter Mello else 536*e6d6c189SCody Peter Mello k = break_after 537*e6d6c189SCody Peter Mello } 538*e6d6c189SCody Peter Mello else # title fits on one line 539*e6d6c189SCody Peter Mello k = length(title) 540*e6d6c189SCody Peter Mello return (k) 541*e6d6c189SCody Peter Mello} 542*e6d6c189SCody Peter Mello 543*e6d6c189SCody Peter Mello 544*e6d6c189SCody Peter Mello 545*e6d6c189SCody Peter Mellofunction html_end_issue() 546*e6d6c189SCody Peter Mello{ 547*e6d6c189SCody Peter Mello print_line(prefix(3) "</A>") 548*e6d6c189SCody Peter Mello print_line(prefix(2) "</H1>") 549*e6d6c189SCody Peter Mello} 550*e6d6c189SCody Peter Mello 551*e6d6c189SCody Peter Mello 552*e6d6c189SCody Peter Mellofunction html_end_pages() 553*e6d6c189SCody Peter Mello{ 554*e6d6c189SCody Peter Mello return ((HTML && (BIBFILEURL != "")) ? "</A>" : "") 555*e6d6c189SCody Peter Mello} 556*e6d6c189SCody Peter Mello 557*e6d6c189SCody Peter Mello 558*e6d6c189SCody Peter Mellofunction html_end_pre() 559*e6d6c189SCody Peter Mello{ 560*e6d6c189SCody Peter Mello if (In_PRE) 561*e6d6c189SCody Peter Mello { 562*e6d6c189SCody Peter Mello print_line("</PRE>") 563*e6d6c189SCody Peter Mello In_PRE = 0 564*e6d6c189SCody Peter Mello } 565*e6d6c189SCody Peter Mello} 566*e6d6c189SCody Peter Mello 567*e6d6c189SCody Peter Mello 568*e6d6c189SCody Peter Mellofunction html_end_title() 569*e6d6c189SCody Peter Mello{ 570*e6d6c189SCody Peter Mello return ((HTML && (Url != "")) ? "</A>" : "") 571*e6d6c189SCody Peter Mello} 572*e6d6c189SCody Peter Mello 573*e6d6c189SCody Peter Mello 574*e6d6c189SCody Peter Mellofunction html_end_toc() 575*e6d6c189SCody Peter Mello{ 576*e6d6c189SCody Peter Mello html_end_pre() 577*e6d6c189SCody Peter Mello} 578*e6d6c189SCody Peter Mello 579*e6d6c189SCody Peter Mello 580*e6d6c189SCody Peter Mellofunction html_fonts(s, arg,control_word,k,level,n,open_brace) 581*e6d6c189SCody Peter Mello{ 582*e6d6c189SCody Peter Mello open_brace = index(s,"{") 583*e6d6c189SCody Peter Mello if (open_brace > 0) # important optimization 584*e6d6c189SCody Peter Mello { 585*e6d6c189SCody Peter Mello level = 1 586*e6d6c189SCody Peter Mello for (k = open_brace + 1; (level != 0) && (k <= length(s)); ++k) 587*e6d6c189SCody Peter Mello { 588*e6d6c189SCody Peter Mello if (substr(s,k,1) == "{") 589*e6d6c189SCody Peter Mello level++ 590*e6d6c189SCody Peter Mello else if (substr(s,k,1) == "}") 591*e6d6c189SCody Peter Mello level-- 592*e6d6c189SCody Peter Mello } 593*e6d6c189SCody Peter Mello 594*e6d6c189SCody Peter Mello # {...} is now found at open_brace ... (k-1) 595*e6d6c189SCody Peter Mello for (control_word in Font_decl_map) # look for {\xxx ...} 596*e6d6c189SCody Peter Mello { 597*e6d6c189SCody Peter Mello if (substr(s,open_brace+1,length(control_word)+1) ~ \ 598*e6d6c189SCody Peter Mello ("\\" control_word "[^A-Za-z]")) 599*e6d6c189SCody Peter Mello { 600*e6d6c189SCody Peter Mello n = open_brace + 1 + length(control_word) 601*e6d6c189SCody Peter Mello arg = trim(substr(s,n,k - n)) 602*e6d6c189SCody Peter Mello if (Font_decl_map[control_word] == "toupper") # arg -> ARG 603*e6d6c189SCody Peter Mello arg = toupper(arg) 604*e6d6c189SCody Peter Mello else if (Font_decl_map[control_word] != "") # arg -> <TAG>arg</TAG> 605*e6d6c189SCody Peter Mello arg = "<" Font_decl_map[control_word] ">" arg "</" Font_decl_map[control_word] ">" 606*e6d6c189SCody Peter Mello return (substr(s,1,open_brace-1) arg html_fonts(substr(s,k))) 607*e6d6c189SCody Peter Mello } 608*e6d6c189SCody Peter Mello } 609*e6d6c189SCody Peter Mello for (control_word in Font_cmd_map) # look for \xxx{...} 610*e6d6c189SCody Peter Mello { 611*e6d6c189SCody Peter Mello if (substr(s,open_brace - length(control_word),length(control_word)) ~ \ 612*e6d6c189SCody Peter Mello ("\\" control_word)) 613*e6d6c189SCody Peter Mello { 614*e6d6c189SCody Peter Mello n = open_brace + 1 615*e6d6c189SCody Peter Mello arg = trim(substr(s,n,k - n)) 616*e6d6c189SCody Peter Mello if (Font_cmd_map[control_word] == "toupper") # arg -> ARG 617*e6d6c189SCody Peter Mello arg = toupper(arg) 618*e6d6c189SCody Peter Mello else if (Font_cmd_map[control_word] != "") # arg -> <TAG>arg</TAG> 619*e6d6c189SCody Peter Mello arg = "<" Font_cmd_map[control_word] ">" arg "</" Font_cmd_map[control_word] ">" 620*e6d6c189SCody Peter Mello n = open_brace - length(control_word) - 1 621*e6d6c189SCody Peter Mello return (substr(s,1,n) arg html_fonts(substr(s,k))) 622*e6d6c189SCody Peter Mello } 623*e6d6c189SCody Peter Mello } 624*e6d6c189SCody Peter Mello } 625*e6d6c189SCody Peter Mello return (s) 626*e6d6c189SCody Peter Mello} 627*e6d6c189SCody Peter Mello 628*e6d6c189SCody Peter Mello 629*e6d6c189SCody Peter Mellofunction html_header() 630*e6d6c189SCody Peter Mello{ 631*e6d6c189SCody Peter Mello USER = ENVIRON["USER"] 632*e6d6c189SCody Peter Mello if (USER == "") 633*e6d6c189SCody Peter Mello USER = ENVIRON["LOGNAME"] 634*e6d6c189SCody Peter Mello if (USER == "") 635*e6d6c189SCody Peter Mello USER = "????" 636*e6d6c189SCody Peter Mello "hostname" | getline HOSTNAME 637*e6d6c189SCody Peter Mello "date" | getline DATE 638*e6d6c189SCody Peter Mello ("ypcat passwd | grep '^" USER ":' | awk -F: '{print $5}'") | getline PERSONAL_NAME 639*e6d6c189SCody Peter Mello if (PERSONAL_NAME == "") 640*e6d6c189SCody Peter Mello ("grep '^" USER ":' /etc/passwd | awk -F: '{print $5}'") | getline PERSONAL_NAME 641*e6d6c189SCody Peter Mello 642*e6d6c189SCody Peter Mello 643*e6d6c189SCody Peter Mello print "<!-- WARNING: Do NOT edit this file. It was converted from -->" 644*e6d6c189SCody Peter Mello print "<!-- BibTeX format to HTML by journal-toc.awk version " VERSION_NUMBER " " VERSION_DATE " -->" 645*e6d6c189SCody Peter Mello print "<!-- on " DATE " -->" 646*e6d6c189SCody Peter Mello print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->" 647*e6d6c189SCody Peter Mello print "" 648*e6d6c189SCody Peter Mello print "" 649*e6d6c189SCody Peter Mello print "<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">" 650*e6d6c189SCody Peter Mello print "" 651*e6d6c189SCody Peter Mello print "<HTML>" 652*e6d6c189SCody Peter Mello print prefix(1) "<HEAD>" 653*e6d6c189SCody Peter Mello print prefix(2) "<TITLE>" 654*e6d6c189SCody Peter Mello print prefix(3) Journal 655*e6d6c189SCody Peter Mello print prefix(2) "</TITLE>" 656*e6d6c189SCody Peter Mello print prefix(2) "<LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">" 657*e6d6c189SCody Peter Mello print prefix(1) "</HEAD>" 658*e6d6c189SCody Peter Mello print "" 659*e6d6c189SCody Peter Mello print prefix(1) "<BODY>" 660*e6d6c189SCody Peter Mello} 661*e6d6c189SCody Peter Mello 662*e6d6c189SCody Peter Mello 663*e6d6c189SCody Peter Mellofunction html_label( label) 664*e6d6c189SCody Peter Mello{ 665*e6d6c189SCody Peter Mello label = Volume "(" Number "):" Month ":" Year 666*e6d6c189SCody Peter Mello # gsub(/[^A-Za-z0-9():,;.\/\-]/,"",label) 667*e6d6c189SCody Peter Mello gsub(/[^[:alnum:]():,;.\/\-]/,"",label) 668*e6d6c189SCody Peter Mello return (label) 669*e6d6c189SCody Peter Mello} 670*e6d6c189SCody Peter Mello 671*e6d6c189SCody Peter Mello 672*e6d6c189SCody Peter Mellofunction html_length(s) 673*e6d6c189SCody Peter Mello{ # Return visible length of s, ignoring any HTML markup 674*e6d6c189SCody Peter Mello if (HTML) 675*e6d6c189SCody Peter Mello { 676*e6d6c189SCody Peter Mello gsub(/<\/?[^>]*>/,"",s) # remove SGML tags 677*e6d6c189SCody Peter Mello # gsub(/&[A-Za-z0-9]+;/,"",s) # remove SGML entities 678*e6d6c189SCody Peter Mello gsub(/&[[:alnum:]]+;/,"",s) # remove SGML entities 679*e6d6c189SCody Peter Mello } 680*e6d6c189SCody Peter Mello return (length(s)) 681*e6d6c189SCody Peter Mello} 682*e6d6c189SCody Peter Mello 683*e6d6c189SCody Peter Mello 684*e6d6c189SCody Peter Mellofunction html_toc() 685*e6d6c189SCody Peter Mello{ 686*e6d6c189SCody Peter Mello print prefix(2) "<H1>" 687*e6d6c189SCody Peter Mello print prefix(3) "Table of contents for issues of " Journal 688*e6d6c189SCody Peter Mello print prefix(2) "</H1>" 689*e6d6c189SCody Peter Mello print HTML_TOC 690*e6d6c189SCody Peter Mello} 691*e6d6c189SCody Peter Mello 692*e6d6c189SCody Peter Mello 693*e6d6c189SCody Peter Mellofunction html_toc_entry() 694*e6d6c189SCody Peter Mello{ 695*e6d6c189SCody Peter Mello HTML_TOC = HTML_TOC " <A HREF=\"#" html_label() "\">" 696*e6d6c189SCody Peter Mello HTML_TOC = HTML_TOC vol_no_month_year() 697*e6d6c189SCody Peter Mello HTML_TOC = HTML_TOC "</A><BR>" "\n" 698*e6d6c189SCody Peter Mello} 699*e6d6c189SCody Peter Mello 700*e6d6c189SCody Peter Mello 701*e6d6c189SCody Peter Mellofunction html_trailer() 702*e6d6c189SCody Peter Mello{ 703*e6d6c189SCody Peter Mello html_end_pre() 704*e6d6c189SCody Peter Mello print prefix(1) "</BODY>" 705*e6d6c189SCody Peter Mello print "</HTML>" 706*e6d6c189SCody Peter Mello} 707*e6d6c189SCody Peter Mello 708*e6d6c189SCody Peter Mello 709*e6d6c189SCody Peter Mellofunction initialize() 710*e6d6c189SCody Peter Mello{ 711*e6d6c189SCody Peter Mello # NB: Update these when the program changes 712*e6d6c189SCody Peter Mello VERSION_DATE = "[09-Oct-1996]" 713*e6d6c189SCody Peter Mello VERSION_NUMBER = "1.00" 714*e6d6c189SCody Peter Mello 715*e6d6c189SCody Peter Mello HTML = (HTML == "") ? 0 : (0 + HTML) 716*e6d6c189SCody Peter Mello 717*e6d6c189SCody Peter Mello if (INDENT == "") 718*e6d6c189SCody Peter Mello INDENT = 4 719*e6d6c189SCody Peter Mello 720*e6d6c189SCody Peter Mello if (HTML == 0) 721*e6d6c189SCody Peter Mello INDENT = 0 # indentation suppressed in ASCII mode 722*e6d6c189SCody Peter Mello 723*e6d6c189SCody Peter Mello LEADERS = " . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ." 724*e6d6c189SCody Peter Mello 725*e6d6c189SCody Peter Mello MAX_TITLE_CHARS = 36 # 36 produces a 79-char output line when there is 726*e6d6c189SCody Peter Mello # just an initial page number. If this is 727*e6d6c189SCody Peter Mello # increased, the LEADERS string may need to be 728*e6d6c189SCody Peter Mello # lengthened. 729*e6d6c189SCody Peter Mello 730*e6d6c189SCody Peter Mello MIN_LEADERS = 4 # Minimum number of characters from LEADERS 731*e6d6c189SCody Peter Mello # required when leaders are used. The total 732*e6d6c189SCody Peter Mello # number of characters that can appear in a 733*e6d6c189SCody Peter Mello # title line is MAX_TITLE_CHARS + MIN_LEADERS. 734*e6d6c189SCody Peter Mello # Leaders are omitted when the title length is 735*e6d6c189SCody Peter Mello # between MAX_TITLE_CHARS and this sum. 736*e6d6c189SCody Peter Mello 737*e6d6c189SCody Peter Mello MIN_LEADERS_SPACE = " " # must be at least MIN_LEADERS characters long 738*e6d6c189SCody Peter Mello 739*e6d6c189SCody Peter Mello Month_expansion["jan"] = "January" 740*e6d6c189SCody Peter Mello Month_expansion["feb"] = "February" 741*e6d6c189SCody Peter Mello Month_expansion["mar"] = "March" 742*e6d6c189SCody Peter Mello Month_expansion["apr"] = "April" 743*e6d6c189SCody Peter Mello Month_expansion["may"] = "May" 744*e6d6c189SCody Peter Mello Month_expansion["jun"] = "June" 745*e6d6c189SCody Peter Mello Month_expansion["jul"] = "July" 746*e6d6c189SCody Peter Mello Month_expansion["aug"] = "August" 747*e6d6c189SCody Peter Mello Month_expansion["sep"] = "September" 748*e6d6c189SCody Peter Mello Month_expansion["oct"] = "October" 749*e6d6c189SCody Peter Mello Month_expansion["nov"] = "November" 750*e6d6c189SCody Peter Mello Month_expansion["dec"] = "December" 751*e6d6c189SCody Peter Mello 752*e6d6c189SCody Peter Mello Font_cmd_map["\\emph"] = "EM" 753*e6d6c189SCody Peter Mello Font_cmd_map["\\textbf"] = "B" 754*e6d6c189SCody Peter Mello Font_cmd_map["\\textit"] = "I" 755*e6d6c189SCody Peter Mello Font_cmd_map["\\textmd"] = "" 756*e6d6c189SCody Peter Mello Font_cmd_map["\\textrm"] = "" 757*e6d6c189SCody Peter Mello Font_cmd_map["\\textsc"] = "toupper" 758*e6d6c189SCody Peter Mello Font_cmd_map["\\textsl"] = "I" 759*e6d6c189SCody Peter Mello Font_cmd_map["\\texttt"] = "t" 760*e6d6c189SCody Peter Mello Font_cmd_map["\\textup"] = "" 761*e6d6c189SCody Peter Mello 762*e6d6c189SCody Peter Mello Font_decl_map["\\bf"] = "B" 763*e6d6c189SCody Peter Mello Font_decl_map["\\em"] = "EM" 764*e6d6c189SCody Peter Mello Font_decl_map["\\it"] = "I" 765*e6d6c189SCody Peter Mello Font_decl_map["\\rm"] = "" 766*e6d6c189SCody Peter Mello Font_decl_map["\\sc"] = "toupper" 767*e6d6c189SCody Peter Mello Font_decl_map["\\sf"] = "" 768*e6d6c189SCody Peter Mello Font_decl_map["\\tt"] = "TT" 769*e6d6c189SCody Peter Mello Font_decl_map["\\itshape"] = "I" 770*e6d6c189SCody Peter Mello Font_decl_map["\\upshape"] = "" 771*e6d6c189SCody Peter Mello Font_decl_map["\\slshape"] = "I" 772*e6d6c189SCody Peter Mello Font_decl_map["\\scshape"] = "toupper" 773*e6d6c189SCody Peter Mello Font_decl_map["\\mdseries"] = "" 774*e6d6c189SCody Peter Mello Font_decl_map["\\bfseries"] = "B" 775*e6d6c189SCody Peter Mello Font_decl_map["\\rmfamily"] = "" 776*e6d6c189SCody Peter Mello Font_decl_map["\\sffamily"] = "" 777*e6d6c189SCody Peter Mello Font_decl_map["\\ttfamily"] = "TT" 778*e6d6c189SCody Peter Mello} 779*e6d6c189SCody Peter Mello 780*e6d6c189SCody Peter Mellofunction min(a,b) 781*e6d6c189SCody Peter Mello{ 782*e6d6c189SCody Peter Mello return (a < b) ? a : b 783*e6d6c189SCody Peter Mello} 784*e6d6c189SCody Peter Mello 785*e6d6c189SCody Peter Mello 786*e6d6c189SCody Peter Mellofunction prefix(level) 787*e6d6c189SCody Peter Mello{ 788*e6d6c189SCody Peter Mello # Return a prefix of up to 60 blanks 789*e6d6c189SCody Peter Mello 790*e6d6c189SCody Peter Mello if (In_PRE) 791*e6d6c189SCody Peter Mello return ("") 792*e6d6c189SCody Peter Mello else 793*e6d6c189SCody Peter Mello return (substr(" ", \ 794*e6d6c189SCody Peter Mello 1, INDENT * level)) 795*e6d6c189SCody Peter Mello} 796*e6d6c189SCody Peter Mello 797*e6d6c189SCody Peter Mello 798*e6d6c189SCody Peter Mellofunction print_line(line) 799*e6d6c189SCody Peter Mello{ 800*e6d6c189SCody Peter Mello if (HTML) # must buffer in memory so that we can accumulate TOC 801*e6d6c189SCody Peter Mello Body[++BodyLines] = line 802*e6d6c189SCody Peter Mello else 803*e6d6c189SCody Peter Mello print line 804*e6d6c189SCody Peter Mello} 805*e6d6c189SCody Peter Mello 806*e6d6c189SCody Peter Mello 807*e6d6c189SCody Peter Mellofunction print_toc_line(author,title,pages, extra,leaders,n,t) 808*e6d6c189SCody Peter Mello{ 809*e6d6c189SCody Peter Mello # When we have a multiline title, the hypertext link goes only 810*e6d6c189SCody Peter Mello # on the first line. A multiline hypertext link looks awful 811*e6d6c189SCody Peter Mello # because of long underlines under the leading indentation. 812*e6d6c189SCody Peter Mello 813*e6d6c189SCody Peter Mello if (pages == "") # then no leaders needed in title lines other than last one 814*e6d6c189SCody Peter Mello t = sprintf("%31s %s%s%s", author, Title_prefix, title, Title_suffix) 815*e6d6c189SCody Peter Mello else # last title line, with page number 816*e6d6c189SCody Peter Mello { 817*e6d6c189SCody Peter Mello n = html_length(title) # potentially expensive 818*e6d6c189SCody Peter Mello extra = n % 2 # extra space for aligned leader dots 819*e6d6c189SCody Peter Mello if (n <= MAX_TITLE_CHARS) # then need leaders 820*e6d6c189SCody Peter Mello leaders = substr(LEADERS, 1, MAX_TITLE_CHARS + MIN_LEADERS - extra - \ 821*e6d6c189SCody Peter Mello min(MAX_TITLE_CHARS,n)) 822*e6d6c189SCody Peter Mello else # title (almost) fills line, so no leaders 823*e6d6c189SCody Peter Mello leaders = substr(MIN_LEADERS_SPACE,1, \ 824*e6d6c189SCody Peter Mello (MAX_TITLE_CHARS + MIN_LEADERS - extra - n)) 825*e6d6c189SCody Peter Mello t = sprintf("%31s %s%s%s%s%s %4s", \ 826*e6d6c189SCody Peter Mello author, Title_prefix, title, Title_suffix, \ 827*e6d6c189SCody Peter Mello (extra ? " " : ""), leaders, pages) 828*e6d6c189SCody Peter Mello } 829*e6d6c189SCody Peter Mello 830*e6d6c189SCody Peter Mello Title_prefix = "" # forget any hypertext 831*e6d6c189SCody Peter Mello Title_suffix = "" # link material 832*e6d6c189SCody Peter Mello 833*e6d6c189SCody Peter Mello # Efficency note: an earlier version accumulated the body in a 834*e6d6c189SCody Peter Mello # single scalar like this: "Body = Body t". Profiling revealed 835*e6d6c189SCody Peter Mello # this statement as the major hot spot, and the change to array 836*e6d6c189SCody Peter Mello # storage made the program more than twice as fast. This 837*e6d6c189SCody Peter Mello # suggests that awk might benefit from an optimization of 838*e6d6c189SCody Peter Mello # "s = s t" that uses realloc() instead of malloc(). 839*e6d6c189SCody Peter Mello if (HTML) 840*e6d6c189SCody Peter Mello Body[++BodyLines] = t 841*e6d6c189SCody Peter Mello else 842*e6d6c189SCody Peter Mello print t 843*e6d6c189SCody Peter Mello} 844*e6d6c189SCody Peter Mello 845*e6d6c189SCody Peter Mello 846*e6d6c189SCody Peter Mellofunction protect_SGML_characters(s) 847*e6d6c189SCody Peter Mello{ 848*e6d6c189SCody Peter Mello gsub(/&/,"\\&",s) # NB: this one MUST be first 849*e6d6c189SCody Peter Mello gsub(/</,"\\<",s) 850*e6d6c189SCody Peter Mello gsub(/>/,"\\>",s) 851*e6d6c189SCody Peter Mello gsub(/\"/,"\\"",s) 852*e6d6c189SCody Peter Mello return (s) 853*e6d6c189SCody Peter Mello} 854*e6d6c189SCody Peter Mello 855*e6d6c189SCody Peter Mello 856*e6d6c189SCody Peter Mellofunction strip_braces(s, k) 857*e6d6c189SCody Peter Mello{ # strip non-backslashed braces from s and return the result 858*e6d6c189SCody Peter Mello 859*e6d6c189SCody Peter Mello return (strip_char(strip_char(s,"{"),"}")) 860*e6d6c189SCody Peter Mello} 861*e6d6c189SCody Peter Mello 862*e6d6c189SCody Peter Mello 863*e6d6c189SCody Peter Mellofunction strip_char(s,c, k) 864*e6d6c189SCody Peter Mello{ # strip non-backslashed instances of c from s, and return the result 865*e6d6c189SCody Peter Mello k = index(s,c) 866*e6d6c189SCody Peter Mello if (k > 0) # then found the character 867*e6d6c189SCody Peter Mello { 868*e6d6c189SCody Peter Mello if (substr(s,k-1,1) != "\\") # then not backslashed char 869*e6d6c189SCody Peter Mello s = substr(s,1,k-1) strip_char(substr(s,k+1),c) # so remove it (recursively) 870*e6d6c189SCody Peter Mello else # preserve backslashed char 871*e6d6c189SCody Peter Mello s = substr(s,1,k) strip_char(s,k+1,c) 872*e6d6c189SCody Peter Mello } 873*e6d6c189SCody Peter Mello return (s) 874*e6d6c189SCody Peter Mello} 875*e6d6c189SCody Peter Mello 876*e6d6c189SCody Peter Mello 877*e6d6c189SCody Peter Mellofunction strip_html(s) 878*e6d6c189SCody Peter Mello{ 879*e6d6c189SCody Peter Mello gsub(/<\/?[^>]*>/,"",s) 880*e6d6c189SCody Peter Mello return (s) 881*e6d6c189SCody Peter Mello} 882*e6d6c189SCody Peter Mello 883*e6d6c189SCody Peter Mello 884*e6d6c189SCody Peter Mellofunction terminate() 885*e6d6c189SCody Peter Mello{ 886*e6d6c189SCody Peter Mello if (HTML) 887*e6d6c189SCody Peter Mello { 888*e6d6c189SCody Peter Mello html_end_pre() 889*e6d6c189SCody Peter Mello 890*e6d6c189SCody Peter Mello HTML = 0 # NB: stop line buffering 891*e6d6c189SCody Peter Mello html_header() 892*e6d6c189SCody Peter Mello html_toc() 893*e6d6c189SCody Peter Mello html_body() 894*e6d6c189SCody Peter Mello html_trailer() 895*e6d6c189SCody Peter Mello } 896*e6d6c189SCody Peter Mello} 897*e6d6c189SCody Peter Mello 898*e6d6c189SCody Peter Mello 899*e6d6c189SCody Peter Mellofunction TeX_to_HTML(s, k,n,parts) 900*e6d6c189SCody Peter Mello{ 901*e6d6c189SCody Peter Mello # First convert the four SGML reserved characters to SGML entities 902*e6d6c189SCody Peter Mello if (HTML) 903*e6d6c189SCody Peter Mello { 904*e6d6c189SCody Peter Mello gsub(/>/, "\\>", s) 905*e6d6c189SCody Peter Mello gsub(/</, "\\<", s) 906*e6d6c189SCody Peter Mello gsub(/"/, "\\"", s) 907*e6d6c189SCody Peter Mello } 908*e6d6c189SCody Peter Mello 909*e6d6c189SCody Peter Mello gsub(/[$][$]/,"$$",s) # change display math to triple dollars for split 910*e6d6c189SCody Peter Mello n = split(s,parts,/[$]/)# split into non-math (odd) and math (even) parts 911*e6d6c189SCody Peter Mello 912*e6d6c189SCody Peter Mello s = "" 913*e6d6c189SCody Peter Mello for (k = 1; k <= n; ++k) # unbrace non-math part, leaving math mode intact 914*e6d6c189SCody Peter Mello s = s ((k > 1) ? "$" : "") \ 915*e6d6c189SCody Peter Mello ((k % 2) ? strip_braces(TeX_to_HTML_nonmath(parts[k])) : \ 916*e6d6c189SCody Peter Mello TeX_to_HTML_math(parts[k])) 917*e6d6c189SCody Peter Mello 918*e6d6c189SCody Peter Mello gsub(/[$][$][$]/,"$$",s) # restore display math 919*e6d6c189SCody Peter Mello 920*e6d6c189SCody Peter Mello return (s) 921*e6d6c189SCody Peter Mello} 922*e6d6c189SCody Peter Mello 923*e6d6c189SCody Peter Mello 924*e6d6c189SCody Peter Mellofunction TeX_to_HTML_math(s) 925*e6d6c189SCody Peter Mello{ 926*e6d6c189SCody Peter Mello # Mostly a dummy for now, but HTML 3 could support some math translation 927*e6d6c189SCody Peter Mello 928*e6d6c189SCody Peter Mello gsub(/\\&/,"\\&",s) # reduce TeX ampersands to SGML entities 929*e6d6c189SCody Peter Mello 930*e6d6c189SCody Peter Mello return (s) 931*e6d6c189SCody Peter Mello} 932*e6d6c189SCody Peter Mello 933*e6d6c189SCody Peter Mello 934*e6d6c189SCody Peter Mellofunction TeX_to_HTML_nonmath(s) 935*e6d6c189SCody Peter Mello{ 936*e6d6c189SCody Peter Mello if (index(s,"\\") > 0) # important optimization 937*e6d6c189SCody Peter Mello { 938*e6d6c189SCody Peter Mello gsub(/\\slash +/,"/",s) # replace TeX slashes with conventional ones 939*e6d6c189SCody Peter Mello gsub(/ *\\emdash +/," --- ",s) # replace BibNet emdashes with conventional ones 940*e6d6c189SCody Peter Mello gsub(/\\%/,"%",s) # reduce TeX percents to conventional ones 941*e6d6c189SCody Peter Mello gsub(/\\[$]/,"$",s) # reduce TeX dollars to conventional ones 942*e6d6c189SCody Peter Mello gsub(/\\#/,"#",s) # reduce TeX sharps to conventional ones 943*e6d6c189SCody Peter Mello 944*e6d6c189SCody Peter Mello if (HTML) # translate TeX markup to HTML 945*e6d6c189SCody Peter Mello { 946*e6d6c189SCody Peter Mello gsub(/\\&/,"\\&",s) # reduce TeX ampersands to SGML entities 947*e6d6c189SCody Peter Mello s = html_accents(s) 948*e6d6c189SCody Peter Mello s = html_fonts(s) 949*e6d6c189SCody Peter Mello } 950*e6d6c189SCody Peter Mello else # plain ASCII text output: discard all TeX markup 951*e6d6c189SCody Peter Mello { 952*e6d6c189SCody Peter Mello gsub(/\\\&/, "\\&", s) # reduce TeX ampersands to conventional ones 953*e6d6c189SCody Peter Mello 954*e6d6c189SCody Peter Mello #gsub(/\\[a-z][a-z] +/,"",s) # remove TeX font changes 955*e6d6c189SCody Peter Mello gsub(/\\[[:lower:]][[:lower:]] +/,"",s) # remove TeX font changes 956*e6d6c189SCody Peter Mello #gsub(/\\[^A-Za-z]/,"",s) # remove remaining TeX control symbols 957*e6d6c189SCody Peter Mello gsub(/\\[^[:alpha:]]/,"",s) # remove remaining TeX control symbols 958*e6d6c189SCody Peter Mello } 959*e6d6c189SCody Peter Mello } 960*e6d6c189SCody Peter Mello return (s) 961*e6d6c189SCody Peter Mello} 962*e6d6c189SCody Peter Mello 963*e6d6c189SCody Peter Mello 964*e6d6c189SCody Peter Mellofunction trim(s) 965*e6d6c189SCody Peter Mello{ 966*e6d6c189SCody Peter Mello gsub(/^[ \t]+/,"",s) 967*e6d6c189SCody Peter Mello gsub(/[ \t]+$/,"",s) 968*e6d6c189SCody Peter Mello return (s) 969*e6d6c189SCody Peter Mello} 970*e6d6c189SCody Peter Mello 971*e6d6c189SCody Peter Mello 972*e6d6c189SCody Peter Mellofunction vol_no_month_year() 973*e6d6c189SCody Peter Mello{ 974*e6d6c189SCody Peter Mello return ("Volume " wrap(Volume) ", Number " wrap(Number) ", " wrap(Month) ", " wrap(Year)) 975*e6d6c189SCody Peter Mello} 976*e6d6c189SCody Peter Mello 977*e6d6c189SCody Peter Mello 978*e6d6c189SCody Peter Mellofunction wrap(value) 979*e6d6c189SCody Peter Mello{ 980*e6d6c189SCody Peter Mello return (HTML ? ("<STRONG>" value "</STRONG>") : value) 981*e6d6c189SCody Peter Mello} 982