1*e6d6c189SCody Peter Mello### ====================================================================
2*e6d6c189SCody Peter Mello###  @Awk-file{
3*e6d6c189SCody Peter Mello###     author          = "Nelson H. F. Beebe",
4*e6d6c189SCody Peter Mello###     version         = "1.00",
5*e6d6c189SCody Peter Mello###     date            = "09 October 1996",
6*e6d6c189SCody Peter Mello###     time            = "15:57:06 MDT",
7*e6d6c189SCody Peter Mello###     filename        = "journal-toc.awk",
8*e6d6c189SCody Peter Mello###     address         = "Center for Scientific Computing
9*e6d6c189SCody Peter Mello###                        Department of Mathematics
10*e6d6c189SCody Peter Mello###                        University of Utah
11*e6d6c189SCody Peter Mello###                        Salt Lake City, UT 84112
12*e6d6c189SCody Peter Mello###                        USA",
13*e6d6c189SCody Peter Mello###     telephone       = "+1 801 581 5254",
14*e6d6c189SCody Peter Mello###     FAX             = "+1 801 581 4148",
15*e6d6c189SCody Peter Mello###     URL             = "http://www.math.utah.edu/~beebe",
16*e6d6c189SCody Peter Mello###     checksum        = "25092 977 3357 26493",
17*e6d6c189SCody Peter Mello###     email           = "beebe@math.utah.edu (Internet)",
18*e6d6c189SCody Peter Mello###     codetable       = "ISO/ASCII",
19*e6d6c189SCody Peter Mello###     keywords        = "BibTeX, bibliography, HTML, journal table of
20*e6d6c189SCody Peter Mello###                        contents",
21*e6d6c189SCody Peter Mello###     supported       = "yes",
22*e6d6c189SCody Peter Mello###     docstring       = "Create a journal cover table of contents from
23*e6d6c189SCody Peter Mello###                        <at>Article{...} entries in a journal BibTeX
24*e6d6c189SCody Peter Mello###                        .bib file for checking the bibliography
25*e6d6c189SCody Peter Mello###                        database against the actual journal covers.
26*e6d6c189SCody Peter Mello###                        The output can be either plain text, or HTML.
27*e6d6c189SCody Peter Mello###
28*e6d6c189SCody Peter Mello###                        Usage:
29*e6d6c189SCody Peter Mello###                            bibclean -max-width 0 BibTeX-file(s) | \
30*e6d6c189SCody Peter Mello###                                bibsort -byvolume | \
31*e6d6c189SCody Peter Mello###                                awk -f journal-toc.awk \
32*e6d6c189SCody Peter Mello###                                    [-v HTML=nnn] [-v INDENT=nnn] \
33*e6d6c189SCody Peter Mello###                                    [-v BIBFILEURL=url] >foo.toc
34*e6d6c189SCody Peter Mello###
35*e6d6c189SCody Peter Mello###                            or if the bibliography is already sorted
36*e6d6c189SCody Peter Mello###                            by volume,
37*e6d6c189SCody Peter Mello###
38*e6d6c189SCody Peter Mello###                            bibclean -max-width 0 BibTeX-file(s) | \
39*e6d6c189SCody Peter Mello###                                awk -f journal-toc.awk \
40*e6d6c189SCody Peter Mello###                                    [-v HTML=nnn] [-v INDENT=nnn] \
41*e6d6c189SCody Peter Mello###                                    [-v BIBFILEURL=url] >foo.toc
42*e6d6c189SCody Peter Mello###
43*e6d6c189SCody Peter Mello###                        A non-zero value of the command-line option,
44*e6d6c189SCody Peter Mello###                        HTML=nnn, results in HTML output instead of
45*e6d6c189SCody Peter Mello###                        the default plain ASCII text (corresponding
46*e6d6c189SCody Peter Mello###                        to HTML=0).  The
47*e6d6c189SCody Peter Mello###
48*e6d6c189SCody Peter Mello###                        The INDENT=nnn command-line option specifies
49*e6d6c189SCody Peter Mello###                        the number of blanks to indent each logical
50*e6d6c189SCody Peter Mello###                        level of HTML.  The default is INDENT=4.
51*e6d6c189SCody Peter Mello###                        INDENT=0 suppresses indentation.  The INDENT
52*e6d6c189SCody Peter Mello###                        option has no effect when the default HTML=0
53*e6d6c189SCody Peter Mello###                        (plain text output) option is in effect.
54*e6d6c189SCody Peter Mello###
55*e6d6c189SCody Peter Mello###                        When HTML output is selected, the
56*e6d6c189SCody Peter Mello###                        BIBFILEURL=url command-line option provides a
57*e6d6c189SCody Peter Mello###                        way to request hypertext links from table of
58*e6d6c189SCody Peter Mello###                        contents page numbers to the complete BibTeX
59*e6d6c189SCody Peter Mello###                        entry for the article.  These links are
60*e6d6c189SCody Peter Mello###                        created by appending a sharp (#) and the
61*e6d6c189SCody Peter Mello###                        citation label to the BIBFILEURL value, which
62*e6d6c189SCody Peter Mello###                        conforms with the practice of
63*e6d6c189SCody Peter Mello###                        bibtex-to-html.awk.
64*e6d6c189SCody Peter Mello###
65*e6d6c189SCody Peter Mello###                        The HTML output form may be useful as a more
66*e6d6c189SCody Peter Mello###                        compact representation of journal article
67*e6d6c189SCody Peter Mello###                        bibliography data than the original BibTeX
68*e6d6c189SCody Peter Mello###                        file provides.  Of course, the
69*e6d6c189SCody Peter Mello###                        table-of-contents format provides less
70*e6d6c189SCody Peter Mello###                        information, and is considerably more
71*e6d6c189SCody Peter Mello###                        troublesome for a computer program to parse.
72*e6d6c189SCody Peter Mello###
73*e6d6c189SCody Peter Mello###                        When URL key values are provided, they will
74*e6d6c189SCody Peter Mello###                        be used to create hypertext links around
75*e6d6c189SCody Peter Mello###                        article titles.  This supports journals that
76*e6d6c189SCody Peter Mello###                        provide article contents on the World-Wide
77*e6d6c189SCody Peter Mello###                        Web.
78*e6d6c189SCody Peter Mello###
79*e6d6c189SCody Peter Mello###                        For parsing simplicity, this program requires
80*e6d6c189SCody Peter Mello###                        that BibTeX
81*e6d6c189SCody Peter Mello###
82*e6d6c189SCody Peter Mello###                            key = "value"
83*e6d6c189SCody Peter Mello###
84*e6d6c189SCody Peter Mello###                        and
85*e6d6c189SCody Peter Mello###
86*e6d6c189SCody Peter Mello###                            @String{name = "value"}
87*e6d6c189SCody Peter Mello###
88*e6d6c189SCody Peter Mello###                        specifications be entirely contained on
89*e6d6c189SCody Peter Mello###                        single lines, which is readily provided by
90*e6d6c189SCody Peter Mello###                        the `bibclean -max-width 0' filter.  It also
91*e6d6c189SCody Peter Mello###                        requires that bibliography entries begin and
92*e6d6c189SCody Peter Mello###                        end at the start of a line, and that
93*e6d6c189SCody Peter Mello###                        quotation marks, rather than balanced braces,
94*e6d6c189SCody Peter Mello###                        delimit string values.  This is a
95*e6d6c189SCody Peter Mello###                        conventional format that again can be
96*e6d6c189SCody Peter Mello###                        guaranteed by bibclean.
97*e6d6c189SCody Peter Mello###
98*e6d6c189SCody Peter Mello###                        This program requires `new' awk, as described
99*e6d6c189SCody Peter Mello###                        in the book
100*e6d6c189SCody Peter Mello###
101*e6d6c189SCody Peter Mello###                            Alfred V. Aho, Brian W. Kernighan, and
102*e6d6c189SCody Peter Mello###                            Peter J. Weinberger,
103*e6d6c189SCody Peter Mello###                            ``The AWK Programming Language'',
104*e6d6c189SCody Peter Mello###                            Addison-Wesley (1988), ISBN
105*e6d6c189SCody Peter Mello###                            0-201-07981-X,
106*e6d6c189SCody Peter Mello###
107*e6d6c189SCody Peter Mello###                        such as provided by programs named (GNU)
108*e6d6c189SCody Peter Mello###                        gawk, nawk, and recent AT&T awk.
109*e6d6c189SCody Peter Mello###
110*e6d6c189SCody Peter Mello###                        The checksum field above contains a CRC-16
111*e6d6c189SCody Peter Mello###                        checksum as the first value, followed by the
112*e6d6c189SCody Peter Mello###                        equivalent of the standard UNIX wc (word
113*e6d6c189SCody Peter Mello###                        count) utility output of lines, words, and
114*e6d6c189SCody Peter Mello###                        characters.  This is produced by Robert
115*e6d6c189SCody Peter Mello###                        Solovay's checksum utility.",
116*e6d6c189SCody Peter Mello###  }
117*e6d6c189SCody Peter Mello### ====================================================================
118*e6d6c189SCody Peter Mello
119*e6d6c189SCody Peter MelloBEGIN						{ initialize() }
120*e6d6c189SCody Peter Mello
121*e6d6c189SCody Peter Mello/^ *@ *[Ss][Tt][Rr][Ii][Nn][Gg] *\{/		{ do_String(); next }
122*e6d6c189SCody Peter Mello
123*e6d6c189SCody Peter Mello/^ *@ *[Pp][Rr][Ee][Aa][Mm][Bb][Ll][Ee]/	{ next }
124*e6d6c189SCody Peter Mello
125*e6d6c189SCody Peter Mello/^ *@ *[Aa][Rr][Tt][Ii][Cc][Ll][Ee]/		{ do_Article(); next }
126*e6d6c189SCody Peter Mello
127*e6d6c189SCody Peter Mello/^ *@/						{ do_Other(); next }
128*e6d6c189SCody Peter Mello
129*e6d6c189SCody Peter Mello/^ *author *= *\"/ 				{ do_author(); next }
130*e6d6c189SCody Peter Mello
131*e6d6c189SCody Peter Mello/^ *journal *= */				{ do_journal(); next }
132*e6d6c189SCody Peter Mello
133*e6d6c189SCody Peter Mello/^ *volume *= *\"/				{ do_volume(); next }
134*e6d6c189SCody Peter Mello
135*e6d6c189SCody Peter Mello/^ *number *= *\"/				{ do_number(); next }
136*e6d6c189SCody Peter Mello
137*e6d6c189SCody Peter Mello/^ *year *= *\"/				{ do_year(); next }
138*e6d6c189SCody Peter Mello
139*e6d6c189SCody Peter Mello/^ *month *= */					{ do_month(); next }
140*e6d6c189SCody Peter Mello
141*e6d6c189SCody Peter Mello/^ *title *= *\"/				{ do_title(); next }
142*e6d6c189SCody Peter Mello
143*e6d6c189SCody Peter Mello/^ *pages *= *\"/				{ do_pages(); next }
144*e6d6c189SCody Peter Mello
145*e6d6c189SCody Peter Mello/^ *URL *= *\"/					{ do_URL(); next }
146*e6d6c189SCody Peter Mello
147*e6d6c189SCody Peter Mello/^ *} *$/					{ if (In_Article) do_end_entry(); next }
148*e6d6c189SCody Peter Mello
149*e6d6c189SCody Peter MelloEND						{ terminate() }
150*e6d6c189SCody Peter Mello
151*e6d6c189SCody Peter Mello
152*e6d6c189SCody Peter Mello########################################################################
153*e6d6c189SCody Peter Mello# NB: The programming conventions for variables in this program are:   #
154*e6d6c189SCody Peter Mello#	UPPERCASE		global constants and user options      #
155*e6d6c189SCody Peter Mello#	Initialuppercase	global variables                       #
156*e6d6c189SCody Peter Mello#	lowercase		local variables                        #
157*e6d6c189SCody Peter Mello# Any deviation is an error!                                           #
158*e6d6c189SCody Peter Mello########################################################################
159*e6d6c189SCody Peter Mello
160*e6d6c189SCody Peter Mello
161*e6d6c189SCody Peter Mellofunction do_Article()
162*e6d6c189SCody Peter Mello{
163*e6d6c189SCody Peter Mello	In_Article = 1
164*e6d6c189SCody Peter Mello
165*e6d6c189SCody Peter Mello	Citation_label = $0
166*e6d6c189SCody Peter Mello	sub(/^[^\{]*\{/,"",Citation_label)
167*e6d6c189SCody Peter Mello	sub(/ *, *$/,"",Citation_label)
168*e6d6c189SCody Peter Mello
169*e6d6c189SCody Peter Mello	Author = ""
170*e6d6c189SCody Peter Mello        Title = ""
171*e6d6c189SCody Peter Mello        Journal = ""
172*e6d6c189SCody Peter Mello        Volume = ""
173*e6d6c189SCody Peter Mello        Number = ""
174*e6d6c189SCody Peter Mello        Month = ""
175*e6d6c189SCody Peter Mello        Year = ""
176*e6d6c189SCody Peter Mello        Pages = ""
177*e6d6c189SCody Peter Mello        Url = ""
178*e6d6c189SCody Peter Mello}
179*e6d6c189SCody Peter Mello
180*e6d6c189SCody Peter Mello
181*e6d6c189SCody Peter Mellofunction do_author()
182*e6d6c189SCody Peter Mello{
183*e6d6c189SCody Peter Mello	Author = TeX_to_HTML(get_value($0))
184*e6d6c189SCody Peter Mello}
185*e6d6c189SCody Peter Mello
186*e6d6c189SCody Peter Mello
187*e6d6c189SCody Peter Mellofunction do_end_entry( k,n,parts)
188*e6d6c189SCody Peter Mello{
189*e6d6c189SCody Peter Mello	n = split(Author,parts," and ")
190*e6d6c189SCody Peter Mello	if (Last_number != Number)
191*e6d6c189SCody Peter Mello		do_new_issue()
192*e6d6c189SCody Peter Mello	for (k = 1; k < n; ++k)
193*e6d6c189SCody Peter Mello		print_toc_line(parts[k] " and", "", "")
194*e6d6c189SCody Peter Mello	Title_prefix = html_begin_title()
195*e6d6c189SCody Peter Mello	Title_suffix = html_end_title()
196*e6d6c189SCody Peter Mello	if (html_length(Title) <= (MAX_TITLE_CHARS + MIN_LEADERS)) # complete title fits on line
197*e6d6c189SCody Peter Mello		print_toc_line(parts[n], Title, html_begin_pages() Pages html_end_pages())
198*e6d6c189SCody Peter Mello	else			# need to split long title over multiple lines
199*e6d6c189SCody Peter Mello		do_long_title(parts[n], Title, html_begin_pages() Pages html_end_pages())
200*e6d6c189SCody Peter Mello}
201*e6d6c189SCody Peter Mello
202*e6d6c189SCody Peter Mello
203*e6d6c189SCody Peter Mellofunction do_journal()
204*e6d6c189SCody Peter Mello{
205*e6d6c189SCody Peter Mello	if ($0 ~ /[=] *"/)	# have journal = "quoted journal name",
206*e6d6c189SCody Peter Mello		Journal = get_value($0)
207*e6d6c189SCody Peter Mello	else			# have journal = journal-abbreviation,
208*e6d6c189SCody Peter Mello	{
209*e6d6c189SCody Peter Mello        	Journal = get_abbrev($0)
210*e6d6c189SCody Peter Mello		if (Journal in String) # replace abbrev by its expansion
211*e6d6c189SCody Peter Mello			Journal = String[Journal]
212*e6d6c189SCody Peter Mello	}
213*e6d6c189SCody Peter Mello	gsub(/\\-/,"",Journal)	# remove discretionary hyphens
214*e6d6c189SCody Peter Mello}
215*e6d6c189SCody Peter Mello
216*e6d6c189SCody Peter Mello
217*e6d6c189SCody Peter Mellofunction do_long_title(author,title,pages, last_title,n)
218*e6d6c189SCody Peter Mello{
219*e6d6c189SCody Peter Mello	title = trim(title)			# discard leading and trailing space
220*e6d6c189SCody Peter Mello	while (length(title) > 0)
221*e6d6c189SCody Peter Mello	{
222*e6d6c189SCody Peter Mello		n = html_breakpoint(title,MAX_TITLE_CHARS+MIN_LEADERS)
223*e6d6c189SCody Peter Mello		last_title = substr(title,1,n)
224*e6d6c189SCody Peter Mello		title = substr(title,n+1)
225*e6d6c189SCody Peter Mello		sub(/^ +/,"",title)		# discard any leading space
226*e6d6c189SCody Peter Mello		print_toc_line(author, last_title, (length(title) == 0) ? pages : "")
227*e6d6c189SCody Peter Mello		author = ""
228*e6d6c189SCody Peter Mello	}
229*e6d6c189SCody Peter Mello}
230*e6d6c189SCody Peter Mello
231*e6d6c189SCody Peter Mello
232*e6d6c189SCody Peter Mellofunction do_month( k,n,parts)
233*e6d6c189SCody Peter Mello{
234*e6d6c189SCody Peter Mello	Month = ($0 ~ /[=] *"/) ? get_value($0) : get_abbrev($0)
235*e6d6c189SCody Peter Mello	gsub(/[\"]/,"",Month)
236*e6d6c189SCody Peter Mello	gsub(/ *# *\\slash *# */," / ",Month)
237*e6d6c189SCody Peter Mello	gsub(/ *# *-+ *# */," / ",Month)
238*e6d6c189SCody Peter Mello	n = split(Month,parts," */ *")
239*e6d6c189SCody Peter Mello	Month = ""
240*e6d6c189SCody Peter Mello	for (k = 1; k <= n; ++k)
241*e6d6c189SCody Peter Mello		Month = Month ((k > 1) ? " / " : "") \
242*e6d6c189SCody Peter Mello			((parts[k] in Month_expansion) ? Month_expansion[parts[k]] : parts[k])
243*e6d6c189SCody Peter Mello}
244*e6d6c189SCody Peter Mello
245*e6d6c189SCody Peter Mello
246*e6d6c189SCody Peter Mellofunction do_new_issue()
247*e6d6c189SCody Peter Mello{
248*e6d6c189SCody Peter Mello	Last_number = Number
249*e6d6c189SCody Peter Mello	if (HTML)
250*e6d6c189SCody Peter Mello	{
251*e6d6c189SCody Peter Mello		if (Last_volume != Volume)
252*e6d6c189SCody Peter Mello		{
253*e6d6c189SCody Peter Mello			Last_volume = Volume
254*e6d6c189SCody Peter Mello			print_line(prefix(2) "<BR>")
255*e6d6c189SCody Peter Mello		}
256*e6d6c189SCody Peter Mello		html_end_toc()
257*e6d6c189SCody Peter Mello		html_begin_issue()
258*e6d6c189SCody Peter Mello		print_line(prefix(2) Journal "<BR>")
259*e6d6c189SCody Peter Mello	}
260*e6d6c189SCody Peter Mello	else
261*e6d6c189SCody Peter Mello	{
262*e6d6c189SCody Peter Mello		print_line("")
263*e6d6c189SCody Peter Mello		print_line(Journal)
264*e6d6c189SCody Peter Mello	}
265*e6d6c189SCody Peter Mello
266*e6d6c189SCody Peter Mello	print_line(strip_html(vol_no_month_year()))
267*e6d6c189SCody Peter Mello
268*e6d6c189SCody Peter Mello	if (HTML)
269*e6d6c189SCody Peter Mello	{
270*e6d6c189SCody Peter Mello		html_end_issue()
271*e6d6c189SCody Peter Mello		html_toc_entry()
272*e6d6c189SCody Peter Mello		html_begin_toc()
273*e6d6c189SCody Peter Mello	}
274*e6d6c189SCody Peter Mello	else
275*e6d6c189SCody Peter Mello		print_line("")
276*e6d6c189SCody Peter Mello}
277*e6d6c189SCody Peter Mello
278*e6d6c189SCody Peter Mello
279*e6d6c189SCody Peter Mellofunction do_number()
280*e6d6c189SCody Peter Mello{
281*e6d6c189SCody Peter Mello	Number = get_value($0)
282*e6d6c189SCody Peter Mello}
283*e6d6c189SCody Peter Mello
284*e6d6c189SCody Peter Mello
285*e6d6c189SCody Peter Mellofunction do_Other()
286*e6d6c189SCody Peter Mello{
287*e6d6c189SCody Peter Mello	In_Article = 0
288*e6d6c189SCody Peter Mello}
289*e6d6c189SCody Peter Mello
290*e6d6c189SCody Peter Mello
291*e6d6c189SCody Peter Mellofunction do_pages()
292*e6d6c189SCody Peter Mello{
293*e6d6c189SCody Peter Mello	Pages = get_value($0)
294*e6d6c189SCody Peter Mello	sub(/--[?][?]/,"",Pages)
295*e6d6c189SCody Peter Mello}
296*e6d6c189SCody Peter Mello
297*e6d6c189SCody Peter Mello
298*e6d6c189SCody Peter Mellofunction do_String()
299*e6d6c189SCody Peter Mello{
300*e6d6c189SCody Peter Mello	sub(/^[^\{]*\{/,"",$0)	# discard up to and including open brace
301*e6d6c189SCody Peter Mello	sub(/\} *$/,"",$0)	# discard from optional whitespace and trailing brace to end of line
302*e6d6c189SCody Peter Mello	String[get_key($0)] = get_value($0)
303*e6d6c189SCody Peter Mello}
304*e6d6c189SCody Peter Mello
305*e6d6c189SCody Peter Mello
306*e6d6c189SCody Peter Mellofunction do_title()
307*e6d6c189SCody Peter Mello{
308*e6d6c189SCody Peter Mello	Title = TeX_to_HTML(get_value($0))
309*e6d6c189SCody Peter Mello}
310*e6d6c189SCody Peter Mello
311*e6d6c189SCody Peter Mello
312*e6d6c189SCody Peter Mellofunction do_URL( parts)
313*e6d6c189SCody Peter Mello{
314*e6d6c189SCody Peter Mello	Url = get_value($0)
315*e6d6c189SCody Peter Mello	split(Url,parts,"[,;]")			# in case we have multiple URLs
316*e6d6c189SCody Peter Mello	Url = trim(parts[1])
317*e6d6c189SCody Peter Mello}
318*e6d6c189SCody Peter Mello
319*e6d6c189SCody Peter Mello
320*e6d6c189SCody Peter Mellofunction do_volume()
321*e6d6c189SCody Peter Mello{
322*e6d6c189SCody Peter Mello	Volume = get_value($0)
323*e6d6c189SCody Peter Mello}
324*e6d6c189SCody Peter Mello
325*e6d6c189SCody Peter Mello
326*e6d6c189SCody Peter Mellofunction do_year()
327*e6d6c189SCody Peter Mello{
328*e6d6c189SCody Peter Mello	Year = get_value($0)
329*e6d6c189SCody Peter Mello}
330*e6d6c189SCody Peter Mello
331*e6d6c189SCody Peter Mello
332*e6d6c189SCody Peter Mellofunction get_abbrev(s)
333*e6d6c189SCody Peter Mello{	# return abbrev from ``key = abbrev,''
334*e6d6c189SCody Peter Mello	sub(/^[^=]*= */,"",s)	# discard text up to start of non-blank value
335*e6d6c189SCody Peter Mello	sub(/ *,? *$/,"",s)	# discard trailing optional whitspace, quote,
336*e6d6c189SCody Peter Mello				# optional comma, and optional space
337*e6d6c189SCody Peter Mello	return (s)
338*e6d6c189SCody Peter Mello}
339*e6d6c189SCody Peter Mello
340*e6d6c189SCody Peter Mello
341*e6d6c189SCody Peter Mellofunction get_key(s)
342*e6d6c189SCody Peter Mello{	# return kay from ``key = "value",''
343*e6d6c189SCody Peter Mello	sub(/^ */,"",s)		# discard leading space
344*e6d6c189SCody Peter Mello	sub(/ *=.*$/,"",s)	# discard everthing after key
345*e6d6c189SCody Peter Mello
346*e6d6c189SCody Peter Mello	return (s)
347*e6d6c189SCody Peter Mello}
348*e6d6c189SCody Peter Mello
349*e6d6c189SCody Peter Mello
350*e6d6c189SCody Peter Mellofunction get_value(s)
351*e6d6c189SCody Peter Mello{	# return value from ``key = "value",''
352*e6d6c189SCody Peter Mello	sub(/^[^\"]*\" */,"",s)	# discard text up to start of non-blank value
353*e6d6c189SCody Peter Mello	sub(/ *\",? *$/,"",s)	# discard trailing optional whitspace, quote,
354*e6d6c189SCody Peter Mello				# optional comma, and optional space
355*e6d6c189SCody Peter Mello	return (s)
356*e6d6c189SCody Peter Mello}
357*e6d6c189SCody Peter Mello
358*e6d6c189SCody Peter Mello
359*e6d6c189SCody Peter Mellofunction html_accents(s)
360*e6d6c189SCody Peter Mello{
361*e6d6c189SCody Peter Mello	if (index(s,"\\") > 0)			# important optimization
362*e6d6c189SCody Peter Mello	{
363*e6d6c189SCody Peter Mello		# Convert common lower-case accented letters according to the
364*e6d6c189SCody Peter Mello		# table on p. 169 of in Peter Flynn's ``The World Wide Web
365*e6d6c189SCody Peter Mello		# Handbook'', International Thomson Computer Press, 1995, ISBN
366*e6d6c189SCody Peter Mello		# 1-85032-205-8.  The official table of ISO Latin 1 SGML
367*e6d6c189SCody Peter Mello		# entities used in HTML can be found in the file
368*e6d6c189SCody Peter Mello		# /usr/local/lib/html-check/lib/ISOlat1.sgml (your path
369*e6d6c189SCody Peter Mello		# may differ).
370*e6d6c189SCody Peter Mello
371*e6d6c189SCody Peter Mello		gsub(/{\\\a}/,	"\\&agrave;",	s)
372*e6d6c189SCody Peter Mello		gsub(/{\\'a}/,	"\\&aacute;",	s)
373*e6d6c189SCody Peter Mello		gsub(/{\\[\^]a}/,"\\&acirc;",	s)
374*e6d6c189SCody Peter Mello		gsub(/{\\~a}/,	"\\&atilde;",	s)
375*e6d6c189SCody Peter Mello		gsub(/{\\\"a}/,	"\\&auml;",	s)
376*e6d6c189SCody Peter Mello		gsub(/{\\aa}/,	"\\&aring;",	s)
377*e6d6c189SCody Peter Mello		gsub(/{\\ae}/,	"\\&aelig;",	s)
378*e6d6c189SCody Peter Mello
379*e6d6c189SCody Peter Mello		gsub(/\{\\c\{c\}\}/,"\\&ccedil;",	s)
380*e6d6c189SCody Peter Mello
381*e6d6c189SCody Peter Mello		gsub(/\{\\\e\}/,	"\\&egrave;",	s)
382*e6d6c189SCody Peter Mello		gsub(/\{\\'e\}/,	"\\&eacute;",	s)
383*e6d6c189SCody Peter Mello		gsub(/\{\\[\^]e\}/,"\\&ecirc;",	s)
384*e6d6c189SCody Peter Mello		gsub(/\{\\\"e\}/,	"\\&euml;",	s)
385*e6d6c189SCody Peter Mello
386*e6d6c189SCody Peter Mello		gsub(/\{\\\i\}/,	"\\&igrave;",	s)
387*e6d6c189SCody Peter Mello		gsub(/\{\\'i\}/,	"\\&iacute;",	s)
388*e6d6c189SCody Peter Mello		gsub(/\{\\[\^]i\}/,"\\&icirc;",	s)
389*e6d6c189SCody Peter Mello		gsub(/\{\\\"i\}/,	"\\&iuml;",	s)
390*e6d6c189SCody Peter Mello
391*e6d6c189SCody Peter Mello		# ignore eth and thorn
392*e6d6c189SCody Peter Mello
393*e6d6c189SCody Peter Mello		gsub(/\{\\~n\}/,	"\\&ntilde;",	s)
394*e6d6c189SCody Peter Mello
395*e6d6c189SCody Peter Mello		gsub(/\{\\\o\}/,	"\\&ograve;",	s)
396*e6d6c189SCody Peter Mello		gsub(/\{\\'o\}/,	"\\&oacute;",	s)
397*e6d6c189SCody Peter Mello		gsub(/\{\\[\^]o\}/, "\\&ocirc;",	s)
398*e6d6c189SCody Peter Mello		gsub(/\{\\~o\}/,	"\\&otilde;",	s)
399*e6d6c189SCody Peter Mello		gsub(/\{\\\"o\}/,	"\\&ouml;",	s)
400*e6d6c189SCody Peter Mello		gsub(/\{\\o\}/,	"\\&oslash;",	s)
401*e6d6c189SCody Peter Mello
402*e6d6c189SCody Peter Mello		gsub(/\{\\\u\}/,	"\\&ugrave;",	s)
403*e6d6c189SCody Peter Mello		gsub(/\{\\'u\}/,	"\\&uacute;",	s)
404*e6d6c189SCody Peter Mello		gsub(/\{\\[\^]u\}/,"\\&ucirc;",	s)
405*e6d6c189SCody Peter Mello		gsub(/\{\\\"u\}/,	"\\&uuml;",	s)
406*e6d6c189SCody Peter Mello
407*e6d6c189SCody Peter Mello		gsub(/\{\\'y\}/,	"\\&yacute;",	s)
408*e6d6c189SCody Peter Mello		gsub(/\{\\\"y\}/,	"\\&yuml;",	s)
409*e6d6c189SCody Peter Mello
410*e6d6c189SCody Peter Mello		# Now do the same for upper-case accents
411*e6d6c189SCody Peter Mello
412*e6d6c189SCody Peter Mello		gsub(/\{\\\A\}/,	"\\&Agrave;",	s)
413*e6d6c189SCody Peter Mello		gsub(/\{\\'A\}/,	"\\&Aacute;",	s)
414*e6d6c189SCody Peter Mello		gsub(/\{\\[\^]A\}/,	"\\&Acirc;",	s)
415*e6d6c189SCody Peter Mello		gsub(/\{\\~A\}/,	"\\&Atilde;",	s)
416*e6d6c189SCody Peter Mello		gsub(/\{\\\"A\}/,	"\\&Auml;",	s)
417*e6d6c189SCody Peter Mello		gsub(/\{\\AA\}/,	"\\&Aring;",	s)
418*e6d6c189SCody Peter Mello		gsub(/\{\\AE\}/,	"\\&AElig;",	s)
419*e6d6c189SCody Peter Mello
420*e6d6c189SCody Peter Mello		gsub(/\{\\c\{C\}\}/,"\\&Ccedil;",	s)
421*e6d6c189SCody Peter Mello
422*e6d6c189SCody Peter Mello		gsub(/\{\\\e\}/,	"\\&Egrave;",	s)
423*e6d6c189SCody Peter Mello		gsub(/\{\\'E\}/,	"\\&Eacute;",	s)
424*e6d6c189SCody Peter Mello		gsub(/\{\\[\^]E\}/,	"\\&Ecirc;",	s)
425*e6d6c189SCody Peter Mello		gsub(/\{\\\"E\}/,	"\\&Euml;",	s)
426*e6d6c189SCody Peter Mello
427*e6d6c189SCody Peter Mello		gsub(/\{\\\I\}/,	"\\&Igrave;",	s)
428*e6d6c189SCody Peter Mello		gsub(/\{\\'I\}/,	"\\&Iacute;",	s)
429*e6d6c189SCody Peter Mello		gsub(/\{\\[\^]I\}/,	"\\&Icirc;",	s)
430*e6d6c189SCody Peter Mello		gsub(/\{\\\"I\}/,	"\\&Iuml;",	s)
431*e6d6c189SCody Peter Mello
432*e6d6c189SCody Peter Mello		# ignore eth and thorn
433*e6d6c189SCody Peter Mello
434*e6d6c189SCody Peter Mello		gsub(/\{\\~N\}/,	"\\&Ntilde;",	s)
435*e6d6c189SCody Peter Mello
436*e6d6c189SCody Peter Mello		gsub(/\{\\\O\}/,	"\\&Ograve;",	s)
437*e6d6c189SCody Peter Mello		gsub(/\{\\'O\}/,	"\\&Oacute;",	s)
438*e6d6c189SCody Peter Mello		gsub(/\{\\[\^]O\}/,	"\\&Ocirc;",	s)
439*e6d6c189SCody Peter Mello		gsub(/\{\\~O\}/,	"\\&Otilde;",	s)
440*e6d6c189SCody Peter Mello		gsub(/\{\\\"O\}/,	"\\&Ouml;",	s)
441*e6d6c189SCody Peter Mello		gsub(/\{\\O\}/,	"\\&Oslash;",	s)
442*e6d6c189SCody Peter Mello
443*e6d6c189SCody Peter Mello		gsub(/\{\\\U\}/,	"\\&Ugrave;",	s)
444*e6d6c189SCody Peter Mello		gsub(/\{\\'U\}/,	"\\&Uacute;",	s)
445*e6d6c189SCody Peter Mello		gsub(/\{\\[\^]U\}/,	"\\&Ucirc;",	s)
446*e6d6c189SCody Peter Mello		gsub(/\{\\\"U\}/,	"\\&Uuml;",	s)
447*e6d6c189SCody Peter Mello
448*e6d6c189SCody Peter Mello		gsub(/\{\\'Y\}/,	"\\&Yacute;",	s)
449*e6d6c189SCody Peter Mello
450*e6d6c189SCody Peter Mello		gsub(/\{\\ss\}/,	"\\&szlig;",	s)
451*e6d6c189SCody Peter Mello
452*e6d6c189SCody Peter Mello		# Others not mentioned in Flynn's book
453*e6d6c189SCody Peter Mello		gsub(/\{\\'\\i\}/,"\\&iacute;",	s)
454*e6d6c189SCody Peter Mello		gsub(/\{\\'\\j\}/,"j",		s)
455*e6d6c189SCody Peter Mello	}
456*e6d6c189SCody Peter Mello	return (s)
457*e6d6c189SCody Peter Mello}
458*e6d6c189SCody Peter Mello
459*e6d6c189SCody Peter Mello
460*e6d6c189SCody Peter Mellofunction html_begin_issue()
461*e6d6c189SCody Peter Mello{
462*e6d6c189SCody Peter Mello	print_line("")
463*e6d6c189SCody Peter Mello	print_line(prefix(2) "<HR>")
464*e6d6c189SCody Peter Mello	print_line("")
465*e6d6c189SCody Peter Mello	print_line(prefix(2) "<H1>")
466*e6d6c189SCody Peter Mello	print_line(prefix(3) "<A NAME=\"" html_label() "\">")
467*e6d6c189SCody Peter Mello}
468*e6d6c189SCody Peter Mello
469*e6d6c189SCody Peter Mello
470*e6d6c189SCody Peter Mellofunction html_begin_pages()
471*e6d6c189SCody Peter Mello{
472*e6d6c189SCody Peter Mello	return ((HTML && (BIBFILEURL != "")) ? ("<A HREF=\"" BIBFILEURL "#" Citation_label "\">") : "")
473*e6d6c189SCody Peter Mello}
474*e6d6c189SCody Peter Mello
475*e6d6c189SCody Peter Mello
476*e6d6c189SCody Peter Mellofunction html_begin_pre()
477*e6d6c189SCody Peter Mello{
478*e6d6c189SCody Peter Mello	In_PRE = 1
479*e6d6c189SCody Peter Mello	print_line("<PRE>")
480*e6d6c189SCody Peter Mello}
481*e6d6c189SCody Peter Mello
482*e6d6c189SCody Peter Mello
483*e6d6c189SCody Peter Mellofunction html_begin_title()
484*e6d6c189SCody Peter Mello{
485*e6d6c189SCody Peter Mello	return ((HTML && (Url != "")) ? ("<A HREF=\"" Url "\">") : "")
486*e6d6c189SCody Peter Mello}
487*e6d6c189SCody Peter Mello
488*e6d6c189SCody Peter Mello
489*e6d6c189SCody Peter Mellofunction html_begin_toc()
490*e6d6c189SCody Peter Mello{
491*e6d6c189SCody Peter Mello	html_end_toc()
492*e6d6c189SCody Peter Mello	html_begin_pre()
493*e6d6c189SCody Peter Mello}
494*e6d6c189SCody Peter Mello
495*e6d6c189SCody Peter Mello
496*e6d6c189SCody Peter Mellofunction html_body( k)
497*e6d6c189SCody Peter Mello{
498*e6d6c189SCody Peter Mello	for (k = 1; k <= BodyLines; ++k)
499*e6d6c189SCody Peter Mello		print Body[k]
500*e6d6c189SCody Peter Mello}
501*e6d6c189SCody Peter Mello
502*e6d6c189SCody Peter Mellofunction html_breakpoint(title,maxlength, break_after,k)
503*e6d6c189SCody Peter Mello{
504*e6d6c189SCody Peter Mello	# Return the largest character position in title AFTER which we
505*e6d6c189SCody Peter Mello	# can break the title across lines, without exceeding maxlength
506*e6d6c189SCody Peter Mello	# visible characters.
507*e6d6c189SCody Peter Mello	if (html_length(title) > maxlength)	# then need to split title across lines
508*e6d6c189SCody Peter Mello	{
509*e6d6c189SCody Peter Mello		# In the presence of HTML markup, the initialization of
510*e6d6c189SCody Peter Mello		# k here is complicated, because we need to advance it
511*e6d6c189SCody Peter Mello		# until html_length(title) is at least maxlength,
512*e6d6c189SCody Peter Mello		# without invoking the expensive html_length() function
513*e6d6c189SCody Peter Mello		# too frequently.  The need to split the title makes the
514*e6d6c189SCody Peter Mello		# alternative of delayed insertion of HTML markup much
515*e6d6c189SCody Peter Mello		# more complicated.
516*e6d6c189SCody Peter Mello		break_after = 0
517*e6d6c189SCody Peter Mello		for (k = min(maxlength,length(title)); k < length(title); ++k)
518*e6d6c189SCody Peter Mello		{
519*e6d6c189SCody Peter Mello			if (substr(title,k+1,1) == " ")
520*e6d6c189SCody Peter Mello			{		# could break after position k
521*e6d6c189SCody Peter Mello				if (html_length(substr(title,1,k)) <= maxlength)
522*e6d6c189SCody Peter Mello					break_after = k
523*e6d6c189SCody Peter Mello				else	# advanced too far, retreat back to last break_after
524*e6d6c189SCody Peter Mello					break
525*e6d6c189SCody Peter Mello			}
526*e6d6c189SCody Peter Mello		}
527*e6d6c189SCody Peter Mello		if (break_after == 0)		# no breakpoint found by forward scan
528*e6d6c189SCody Peter Mello		{				# so switch to backward scan
529*e6d6c189SCody Peter Mello			for (k = min(maxlength,length(title)) - 1; \
530*e6d6c189SCody Peter Mello				(k > 0) && (substr(title,k+1,1) != " "); --k)
531*e6d6c189SCody Peter Mello				;		# find space at which to break title
532*e6d6c189SCody Peter Mello			if (k < 1)		# no break point found
533*e6d6c189SCody Peter Mello				k = length(title) # so must print entire string
534*e6d6c189SCody Peter Mello		}
535*e6d6c189SCody Peter Mello		else
536*e6d6c189SCody Peter Mello			k = break_after
537*e6d6c189SCody Peter Mello	}
538*e6d6c189SCody Peter Mello	else					# title fits on one line
539*e6d6c189SCody Peter Mello		k = length(title)
540*e6d6c189SCody Peter Mello	return (k)
541*e6d6c189SCody Peter Mello}
542*e6d6c189SCody Peter Mello
543*e6d6c189SCody Peter Mello
544*e6d6c189SCody Peter Mello
545*e6d6c189SCody Peter Mellofunction html_end_issue()
546*e6d6c189SCody Peter Mello{
547*e6d6c189SCody Peter Mello	print_line(prefix(3) "</A>")
548*e6d6c189SCody Peter Mello	print_line(prefix(2) "</H1>")
549*e6d6c189SCody Peter Mello}
550*e6d6c189SCody Peter Mello
551*e6d6c189SCody Peter Mello
552*e6d6c189SCody Peter Mellofunction html_end_pages()
553*e6d6c189SCody Peter Mello{
554*e6d6c189SCody Peter Mello	return ((HTML && (BIBFILEURL != "")) ? "</A>" : "")
555*e6d6c189SCody Peter Mello}
556*e6d6c189SCody Peter Mello
557*e6d6c189SCody Peter Mello
558*e6d6c189SCody Peter Mellofunction html_end_pre()
559*e6d6c189SCody Peter Mello{
560*e6d6c189SCody Peter Mello	if (In_PRE)
561*e6d6c189SCody Peter Mello	{
562*e6d6c189SCody Peter Mello		print_line("</PRE>")
563*e6d6c189SCody Peter Mello		In_PRE = 0
564*e6d6c189SCody Peter Mello	}
565*e6d6c189SCody Peter Mello}
566*e6d6c189SCody Peter Mello
567*e6d6c189SCody Peter Mello
568*e6d6c189SCody Peter Mellofunction html_end_title()
569*e6d6c189SCody Peter Mello{
570*e6d6c189SCody Peter Mello	return ((HTML && (Url != "")) ? "</A>" : "")
571*e6d6c189SCody Peter Mello}
572*e6d6c189SCody Peter Mello
573*e6d6c189SCody Peter Mello
574*e6d6c189SCody Peter Mellofunction html_end_toc()
575*e6d6c189SCody Peter Mello{
576*e6d6c189SCody Peter Mello	html_end_pre()
577*e6d6c189SCody Peter Mello}
578*e6d6c189SCody Peter Mello
579*e6d6c189SCody Peter Mello
580*e6d6c189SCody Peter Mellofunction html_fonts(s, arg,control_word,k,level,n,open_brace)
581*e6d6c189SCody Peter Mello{
582*e6d6c189SCody Peter Mello	open_brace = index(s,"{")
583*e6d6c189SCody Peter Mello	if (open_brace > 0)			# important optimization
584*e6d6c189SCody Peter Mello	{
585*e6d6c189SCody Peter Mello		level = 1
586*e6d6c189SCody Peter Mello		for (k = open_brace + 1; (level != 0) && (k <= length(s)); ++k)
587*e6d6c189SCody Peter Mello		{
588*e6d6c189SCody Peter Mello			if (substr(s,k,1) == "{")
589*e6d6c189SCody Peter Mello				level++
590*e6d6c189SCody Peter Mello			else if (substr(s,k,1) == "}")
591*e6d6c189SCody Peter Mello				level--
592*e6d6c189SCody Peter Mello		}
593*e6d6c189SCody Peter Mello
594*e6d6c189SCody Peter Mello		# {...} is now found at open_brace ... (k-1)
595*e6d6c189SCody Peter Mello		for (control_word in Font_decl_map)	# look for {\xxx ...}
596*e6d6c189SCody Peter Mello		{
597*e6d6c189SCody Peter Mello			if (substr(s,open_brace+1,length(control_word)+1) ~ \
598*e6d6c189SCody Peter Mello				("\\" control_word "[^A-Za-z]"))
599*e6d6c189SCody Peter Mello			{
600*e6d6c189SCody Peter Mello				n = open_brace + 1 + length(control_word)
601*e6d6c189SCody Peter Mello				arg = trim(substr(s,n,k - n))
602*e6d6c189SCody Peter Mello				if (Font_decl_map[control_word] == "toupper") # arg -> ARG
603*e6d6c189SCody Peter Mello					arg = toupper(arg)
604*e6d6c189SCody Peter Mello				else if (Font_decl_map[control_word] != "") # arg -> <TAG>arg</TAG>
605*e6d6c189SCody Peter Mello					arg = "<" Font_decl_map[control_word] ">" arg "</" Font_decl_map[control_word] ">"
606*e6d6c189SCody Peter Mello				return (substr(s,1,open_brace-1) arg html_fonts(substr(s,k)))
607*e6d6c189SCody Peter Mello			}
608*e6d6c189SCody Peter Mello		}
609*e6d6c189SCody Peter Mello		for (control_word in Font_cmd_map)	# look for \xxx{...}
610*e6d6c189SCody Peter Mello		{
611*e6d6c189SCody Peter Mello			if (substr(s,open_brace - length(control_word),length(control_word)) ~ \
612*e6d6c189SCody Peter Mello				("\\" control_word))
613*e6d6c189SCody Peter Mello			{
614*e6d6c189SCody Peter Mello				n = open_brace + 1
615*e6d6c189SCody Peter Mello				arg = trim(substr(s,n,k - n))
616*e6d6c189SCody Peter Mello				if (Font_cmd_map[control_word] == "toupper") # arg -> ARG
617*e6d6c189SCody Peter Mello					arg = toupper(arg)
618*e6d6c189SCody Peter Mello				else if (Font_cmd_map[control_word] != "") # arg -> <TAG>arg</TAG>
619*e6d6c189SCody Peter Mello					arg = "<" Font_cmd_map[control_word] ">" arg "</" Font_cmd_map[control_word] ">"
620*e6d6c189SCody Peter Mello				n = open_brace - length(control_word) - 1
621*e6d6c189SCody Peter Mello				return (substr(s,1,n) arg html_fonts(substr(s,k)))
622*e6d6c189SCody Peter Mello			}
623*e6d6c189SCody Peter Mello		}
624*e6d6c189SCody Peter Mello	}
625*e6d6c189SCody Peter Mello	return (s)
626*e6d6c189SCody Peter Mello}
627*e6d6c189SCody Peter Mello
628*e6d6c189SCody Peter Mello
629*e6d6c189SCody Peter Mellofunction html_header()
630*e6d6c189SCody Peter Mello{
631*e6d6c189SCody Peter Mello	USER = ENVIRON["USER"]
632*e6d6c189SCody Peter Mello	if (USER == "")
633*e6d6c189SCody Peter Mello	    USER = ENVIRON["LOGNAME"]
634*e6d6c189SCody Peter Mello	if (USER == "")
635*e6d6c189SCody Peter Mello	    USER = "????"
636*e6d6c189SCody Peter Mello	"hostname" | getline HOSTNAME
637*e6d6c189SCody Peter Mello	"date" | getline DATE
638*e6d6c189SCody Peter Mello	("ypcat passwd | grep '^" USER ":' | awk -F: '{print $5}'") | getline PERSONAL_NAME
639*e6d6c189SCody Peter Mello	if (PERSONAL_NAME == "")
640*e6d6c189SCody Peter Mello	    ("grep  '^" USER ":' /etc/passwd | awk -F: '{print $5}'") | getline PERSONAL_NAME
641*e6d6c189SCody Peter Mello
642*e6d6c189SCody Peter Mello
643*e6d6c189SCody Peter Mello	print "<!-- WARNING: Do NOT edit this file.  It was converted from -->"
644*e6d6c189SCody Peter Mello	print "<!-- BibTeX format to HTML by journal-toc.awk version " VERSION_NUMBER " " VERSION_DATE " -->"
645*e6d6c189SCody Peter Mello	print "<!-- on " DATE " -->"
646*e6d6c189SCody Peter Mello	print "<!-- for " PERSONAL_NAME " (" USER "@" HOSTNAME ") -->"
647*e6d6c189SCody Peter Mello	print ""
648*e6d6c189SCody Peter Mello	print ""
649*e6d6c189SCody Peter Mello	print "<!DOCTYPE HTML public \"-//IETF//DTD HTML//EN\">"
650*e6d6c189SCody Peter Mello	print ""
651*e6d6c189SCody Peter Mello	print "<HTML>"
652*e6d6c189SCody Peter Mello	print prefix(1) "<HEAD>"
653*e6d6c189SCody Peter Mello	print prefix(2) "<TITLE>"
654*e6d6c189SCody Peter Mello	print prefix(3)  Journal
655*e6d6c189SCody Peter Mello	print prefix(2) "</TITLE>"
656*e6d6c189SCody Peter Mello	print prefix(2) "<LINK REV=\"made\" HREF=\"mailto:" USER "@" HOSTNAME "\">"
657*e6d6c189SCody Peter Mello	print prefix(1) "</HEAD>"
658*e6d6c189SCody Peter Mello	print ""
659*e6d6c189SCody Peter Mello	print prefix(1) "<BODY>"
660*e6d6c189SCody Peter Mello}
661*e6d6c189SCody Peter Mello
662*e6d6c189SCody Peter Mello
663*e6d6c189SCody Peter Mellofunction html_label( label)
664*e6d6c189SCody Peter Mello{
665*e6d6c189SCody Peter Mello	label = Volume "(" Number "):" Month ":" Year
666*e6d6c189SCody Peter Mello	# gsub(/[^A-Za-z0-9():,;.\/\-]/,"",label)
667*e6d6c189SCody Peter Mello	gsub(/[^[:alnum:]():,;.\/\-]/,"",label)
668*e6d6c189SCody Peter Mello	return (label)
669*e6d6c189SCody Peter Mello}
670*e6d6c189SCody Peter Mello
671*e6d6c189SCody Peter Mello
672*e6d6c189SCody Peter Mellofunction html_length(s)
673*e6d6c189SCody Peter Mello{	# Return visible length of s, ignoring any HTML markup
674*e6d6c189SCody Peter Mello	if (HTML)
675*e6d6c189SCody Peter Mello	{
676*e6d6c189SCody Peter Mello		gsub(/<\/?[^>]*>/,"",s)		# remove SGML tags
677*e6d6c189SCody Peter Mello		# gsub(/&[A-Za-z0-9]+;/,"",s)	# remove SGML entities
678*e6d6c189SCody Peter Mello		gsub(/&[[:alnum:]]+;/,"",s)	# remove SGML entities
679*e6d6c189SCody Peter Mello	}
680*e6d6c189SCody Peter Mello	return (length(s))
681*e6d6c189SCody Peter Mello}
682*e6d6c189SCody Peter Mello
683*e6d6c189SCody Peter Mello
684*e6d6c189SCody Peter Mellofunction html_toc()
685*e6d6c189SCody Peter Mello{
686*e6d6c189SCody Peter Mello	print prefix(2) "<H1>"
687*e6d6c189SCody Peter Mello	print prefix(3) "Table of contents for issues of " Journal
688*e6d6c189SCody Peter Mello	print prefix(2) "</H1>"
689*e6d6c189SCody Peter Mello	print HTML_TOC
690*e6d6c189SCody Peter Mello}
691*e6d6c189SCody Peter Mello
692*e6d6c189SCody Peter Mello
693*e6d6c189SCody Peter Mellofunction html_toc_entry()
694*e6d6c189SCody Peter Mello{
695*e6d6c189SCody Peter Mello	HTML_TOC = HTML_TOC "        <A HREF=\"#" html_label() "\">"
696*e6d6c189SCody Peter Mello	HTML_TOC = HTML_TOC vol_no_month_year()
697*e6d6c189SCody Peter Mello	HTML_TOC = HTML_TOC "</A><BR>" "\n"
698*e6d6c189SCody Peter Mello}
699*e6d6c189SCody Peter Mello
700*e6d6c189SCody Peter Mello
701*e6d6c189SCody Peter Mellofunction html_trailer()
702*e6d6c189SCody Peter Mello{
703*e6d6c189SCody Peter Mello	html_end_pre()
704*e6d6c189SCody Peter Mello	print prefix(1) "</BODY>"
705*e6d6c189SCody Peter Mello	print "</HTML>"
706*e6d6c189SCody Peter Mello}
707*e6d6c189SCody Peter Mello
708*e6d6c189SCody Peter Mello
709*e6d6c189SCody Peter Mellofunction initialize()
710*e6d6c189SCody Peter Mello{
711*e6d6c189SCody Peter Mello	# NB: Update these when the program changes
712*e6d6c189SCody Peter Mello	VERSION_DATE = "[09-Oct-1996]"
713*e6d6c189SCody Peter Mello	VERSION_NUMBER = "1.00"
714*e6d6c189SCody Peter Mello
715*e6d6c189SCody Peter Mello	HTML = (HTML == "") ? 0 : (0 + HTML)
716*e6d6c189SCody Peter Mello
717*e6d6c189SCody Peter Mello	if (INDENT == "")
718*e6d6c189SCody Peter Mello		INDENT = 4
719*e6d6c189SCody Peter Mello
720*e6d6c189SCody Peter Mello	if (HTML == 0)
721*e6d6c189SCody Peter Mello		INDENT = 0	# indentation suppressed in ASCII mode
722*e6d6c189SCody Peter Mello
723*e6d6c189SCody Peter Mello	LEADERS = " . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . . ."
724*e6d6c189SCody Peter Mello
725*e6d6c189SCody Peter Mello	MAX_TITLE_CHARS = 36	# 36 produces a 79-char output line when there is
726*e6d6c189SCody Peter Mello				# just an initial page number.  If this is
727*e6d6c189SCody Peter Mello				# increased, the LEADERS string may need to be
728*e6d6c189SCody Peter Mello				# lengthened.
729*e6d6c189SCody Peter Mello
730*e6d6c189SCody Peter Mello	MIN_LEADERS = 4		# Minimum number of characters from LEADERS
731*e6d6c189SCody Peter Mello				# required when leaders are used.  The total
732*e6d6c189SCody Peter Mello				# number of characters that can appear in a
733*e6d6c189SCody Peter Mello				# title line is MAX_TITLE_CHARS + MIN_LEADERS.
734*e6d6c189SCody Peter Mello				# Leaders are omitted when the title length is
735*e6d6c189SCody Peter Mello				# between MAX_TITLE_CHARS and this sum.
736*e6d6c189SCody Peter Mello
737*e6d6c189SCody Peter Mello	MIN_LEADERS_SPACE = "        "	# must be at least MIN_LEADERS characters long
738*e6d6c189SCody Peter Mello
739*e6d6c189SCody Peter Mello	Month_expansion["jan"]	= "January"
740*e6d6c189SCody Peter Mello	Month_expansion["feb"]	= "February"
741*e6d6c189SCody Peter Mello	Month_expansion["mar"]	= "March"
742*e6d6c189SCody Peter Mello	Month_expansion["apr"]	= "April"
743*e6d6c189SCody Peter Mello	Month_expansion["may"]	= "May"
744*e6d6c189SCody Peter Mello	Month_expansion["jun"]	= "June"
745*e6d6c189SCody Peter Mello	Month_expansion["jul"]	= "July"
746*e6d6c189SCody Peter Mello	Month_expansion["aug"]	= "August"
747*e6d6c189SCody Peter Mello	Month_expansion["sep"]	= "September"
748*e6d6c189SCody Peter Mello	Month_expansion["oct"]	= "October"
749*e6d6c189SCody Peter Mello	Month_expansion["nov"]	= "November"
750*e6d6c189SCody Peter Mello	Month_expansion["dec"]	= "December"
751*e6d6c189SCody Peter Mello
752*e6d6c189SCody Peter Mello	Font_cmd_map["\\emph"]		= "EM"
753*e6d6c189SCody Peter Mello	Font_cmd_map["\\textbf"]	= "B"
754*e6d6c189SCody Peter Mello	Font_cmd_map["\\textit"]	= "I"
755*e6d6c189SCody Peter Mello	Font_cmd_map["\\textmd"]	= ""
756*e6d6c189SCody Peter Mello	Font_cmd_map["\\textrm"]	= ""
757*e6d6c189SCody Peter Mello	Font_cmd_map["\\textsc"]	= "toupper"
758*e6d6c189SCody Peter Mello	Font_cmd_map["\\textsl"]	= "I"
759*e6d6c189SCody Peter Mello	Font_cmd_map["\\texttt"]	= "t"
760*e6d6c189SCody Peter Mello	Font_cmd_map["\\textup"]	= ""
761*e6d6c189SCody Peter Mello
762*e6d6c189SCody Peter Mello	Font_decl_map["\\bf"]		= "B"
763*e6d6c189SCody Peter Mello	Font_decl_map["\\em"]		= "EM"
764*e6d6c189SCody Peter Mello	Font_decl_map["\\it"]		= "I"
765*e6d6c189SCody Peter Mello	Font_decl_map["\\rm"]		= ""
766*e6d6c189SCody Peter Mello	Font_decl_map["\\sc"]		= "toupper"
767*e6d6c189SCody Peter Mello	Font_decl_map["\\sf"]		= ""
768*e6d6c189SCody Peter Mello	Font_decl_map["\\tt"]		= "TT"
769*e6d6c189SCody Peter Mello	Font_decl_map["\\itshape"]	= "I"
770*e6d6c189SCody Peter Mello	Font_decl_map["\\upshape"]	= ""
771*e6d6c189SCody Peter Mello	Font_decl_map["\\slshape"]	= "I"
772*e6d6c189SCody Peter Mello	Font_decl_map["\\scshape"]	= "toupper"
773*e6d6c189SCody Peter Mello	Font_decl_map["\\mdseries"]	= ""
774*e6d6c189SCody Peter Mello	Font_decl_map["\\bfseries"]	= "B"
775*e6d6c189SCody Peter Mello	Font_decl_map["\\rmfamily"]	= ""
776*e6d6c189SCody Peter Mello	Font_decl_map["\\sffamily"]	= ""
777*e6d6c189SCody Peter Mello	Font_decl_map["\\ttfamily"]	= "TT"
778*e6d6c189SCody Peter Mello}
779*e6d6c189SCody Peter Mello
780*e6d6c189SCody Peter Mellofunction min(a,b)
781*e6d6c189SCody Peter Mello{
782*e6d6c189SCody Peter Mello	return (a < b) ? a : b
783*e6d6c189SCody Peter Mello}
784*e6d6c189SCody Peter Mello
785*e6d6c189SCody Peter Mello
786*e6d6c189SCody Peter Mellofunction prefix(level)
787*e6d6c189SCody Peter Mello{
788*e6d6c189SCody Peter Mello	# Return a prefix of up to 60 blanks
789*e6d6c189SCody Peter Mello
790*e6d6c189SCody Peter Mello	if (In_PRE)
791*e6d6c189SCody Peter Mello		return ("")
792*e6d6c189SCody Peter Mello	else
793*e6d6c189SCody Peter Mello		return (substr("                                                            ", \
794*e6d6c189SCody Peter Mello			1, INDENT * level))
795*e6d6c189SCody Peter Mello}
796*e6d6c189SCody Peter Mello
797*e6d6c189SCody Peter Mello
798*e6d6c189SCody Peter Mellofunction print_line(line)
799*e6d6c189SCody Peter Mello{
800*e6d6c189SCody Peter Mello	if (HTML)		# must buffer in memory so that we can accumulate TOC
801*e6d6c189SCody Peter Mello		Body[++BodyLines] = line
802*e6d6c189SCody Peter Mello	else
803*e6d6c189SCody Peter Mello		print line
804*e6d6c189SCody Peter Mello}
805*e6d6c189SCody Peter Mello
806*e6d6c189SCody Peter Mello
807*e6d6c189SCody Peter Mellofunction print_toc_line(author,title,pages, extra,leaders,n,t)
808*e6d6c189SCody Peter Mello{
809*e6d6c189SCody Peter Mello	# When we have a multiline title, the hypertext link goes only
810*e6d6c189SCody Peter Mello	# on the first line.  A multiline hypertext link looks awful
811*e6d6c189SCody Peter Mello	# because of long underlines under the leading indentation.
812*e6d6c189SCody Peter Mello
813*e6d6c189SCody Peter Mello	if (pages == "")	# then no leaders needed in title lines other than last one
814*e6d6c189SCody Peter Mello		t = sprintf("%31s   %s%s%s", author, Title_prefix, title, Title_suffix)
815*e6d6c189SCody Peter Mello	else					# last title line, with page number
816*e6d6c189SCody Peter Mello	{
817*e6d6c189SCody Peter Mello		n = html_length(title)		# potentially expensive
818*e6d6c189SCody Peter Mello		extra = n % 2			# extra space for aligned leader dots
819*e6d6c189SCody Peter Mello		if (n <= MAX_TITLE_CHARS) 	# then need leaders
820*e6d6c189SCody Peter Mello			leaders = substr(LEADERS, 1, MAX_TITLE_CHARS + MIN_LEADERS - extra - \
821*e6d6c189SCody Peter Mello				   min(MAX_TITLE_CHARS,n))
822*e6d6c189SCody Peter Mello		else				# title (almost) fills line, so no leaders
823*e6d6c189SCody Peter Mello			leaders = substr(MIN_LEADERS_SPACE,1, \
824*e6d6c189SCody Peter Mello					 (MAX_TITLE_CHARS + MIN_LEADERS - extra - n))
825*e6d6c189SCody Peter Mello		t = sprintf("%31s   %s%s%s%s%s %4s", \
826*e6d6c189SCody Peter Mello			    author, Title_prefix, title, Title_suffix, \
827*e6d6c189SCody Peter Mello			    (extra ? " " : ""), leaders, pages)
828*e6d6c189SCody Peter Mello	}
829*e6d6c189SCody Peter Mello
830*e6d6c189SCody Peter Mello	Title_prefix = ""	# forget any hypertext
831*e6d6c189SCody Peter Mello	Title_suffix = ""	# link material
832*e6d6c189SCody Peter Mello
833*e6d6c189SCody Peter Mello	# Efficency note: an earlier version accumulated the body in a
834*e6d6c189SCody Peter Mello	# single scalar like this: "Body = Body t".  Profiling revealed
835*e6d6c189SCody Peter Mello	# this statement as the major hot spot, and the change to array
836*e6d6c189SCody Peter Mello	# storage made the program more than twice as fast.  This
837*e6d6c189SCody Peter Mello	# suggests that awk might benefit from an optimization of
838*e6d6c189SCody Peter Mello	# "s = s t" that uses realloc() instead of malloc().
839*e6d6c189SCody Peter Mello	if (HTML)
840*e6d6c189SCody Peter Mello		Body[++BodyLines] = t
841*e6d6c189SCody Peter Mello	else
842*e6d6c189SCody Peter Mello		print t
843*e6d6c189SCody Peter Mello}
844*e6d6c189SCody Peter Mello
845*e6d6c189SCody Peter Mello
846*e6d6c189SCody Peter Mellofunction protect_SGML_characters(s)
847*e6d6c189SCody Peter Mello{
848*e6d6c189SCody Peter Mello    gsub(/&/,"\\&amp;",s)	# NB: this one MUST be first
849*e6d6c189SCody Peter Mello    gsub(/</,"\\&lt;",s)
850*e6d6c189SCody Peter Mello    gsub(/>/,"\\&gt;",s)
851*e6d6c189SCody Peter Mello    gsub(/\"/,"\\&quot;",s)
852*e6d6c189SCody Peter Mello    return (s)
853*e6d6c189SCody Peter Mello}
854*e6d6c189SCody Peter Mello
855*e6d6c189SCody Peter Mello
856*e6d6c189SCody Peter Mellofunction strip_braces(s, k)
857*e6d6c189SCody Peter Mello{	# strip non-backslashed braces from s and return the result
858*e6d6c189SCody Peter Mello
859*e6d6c189SCody Peter Mello	return (strip_char(strip_char(s,"{"),"}"))
860*e6d6c189SCody Peter Mello}
861*e6d6c189SCody Peter Mello
862*e6d6c189SCody Peter Mello
863*e6d6c189SCody Peter Mellofunction strip_char(s,c, k)
864*e6d6c189SCody Peter Mello{	# strip non-backslashed instances of c from s, and return the result
865*e6d6c189SCody Peter Mello	k = index(s,c)
866*e6d6c189SCody Peter Mello	if (k > 0)		# then found the character
867*e6d6c189SCody Peter Mello	{
868*e6d6c189SCody Peter Mello		if (substr(s,k-1,1) != "\\") # then not backslashed char
869*e6d6c189SCody Peter Mello			s = substr(s,1,k-1) strip_char(substr(s,k+1),c) # so remove it (recursively)
870*e6d6c189SCody Peter Mello		else		# preserve backslashed char
871*e6d6c189SCody Peter Mello			s = substr(s,1,k) strip_char(s,k+1,c)
872*e6d6c189SCody Peter Mello	}
873*e6d6c189SCody Peter Mello	return (s)
874*e6d6c189SCody Peter Mello}
875*e6d6c189SCody Peter Mello
876*e6d6c189SCody Peter Mello
877*e6d6c189SCody Peter Mellofunction strip_html(s)
878*e6d6c189SCody Peter Mello{
879*e6d6c189SCody Peter Mello	gsub(/<\/?[^>]*>/,"",s)
880*e6d6c189SCody Peter Mello	return (s)
881*e6d6c189SCody Peter Mello}
882*e6d6c189SCody Peter Mello
883*e6d6c189SCody Peter Mello
884*e6d6c189SCody Peter Mellofunction terminate()
885*e6d6c189SCody Peter Mello{
886*e6d6c189SCody Peter Mello	if (HTML)
887*e6d6c189SCody Peter Mello	{
888*e6d6c189SCody Peter Mello		html_end_pre()
889*e6d6c189SCody Peter Mello
890*e6d6c189SCody Peter Mello		HTML = 0	# NB: stop line buffering
891*e6d6c189SCody Peter Mello		html_header()
892*e6d6c189SCody Peter Mello		html_toc()
893*e6d6c189SCody Peter Mello		html_body()
894*e6d6c189SCody Peter Mello		html_trailer()
895*e6d6c189SCody Peter Mello	}
896*e6d6c189SCody Peter Mello}
897*e6d6c189SCody Peter Mello
898*e6d6c189SCody Peter Mello
899*e6d6c189SCody Peter Mellofunction TeX_to_HTML(s, k,n,parts)
900*e6d6c189SCody Peter Mello{
901*e6d6c189SCody Peter Mello	# First convert the four SGML reserved characters to SGML entities
902*e6d6c189SCody Peter Mello	if (HTML)
903*e6d6c189SCody Peter Mello	{
904*e6d6c189SCody Peter Mello	    gsub(/>/,	"\\&gt;",	s)
905*e6d6c189SCody Peter Mello	    gsub(/</,	"\\&lt;",	s)
906*e6d6c189SCody Peter Mello	    gsub(/"/,	"\\&quot;",	s)
907*e6d6c189SCody Peter Mello	}
908*e6d6c189SCody Peter Mello
909*e6d6c189SCody Peter Mello	gsub(/[$][$]/,"$$",s)	# change display math to triple dollars for split
910*e6d6c189SCody Peter Mello	n = split(s,parts,/[$]/)# split into non-math (odd) and math (even) parts
911*e6d6c189SCody Peter Mello
912*e6d6c189SCody Peter Mello	s = ""
913*e6d6c189SCody Peter Mello	for (k = 1; k <= n; ++k) # unbrace non-math part, leaving math mode intact
914*e6d6c189SCody Peter Mello		s = s ((k > 1) ? "$" : "") \
915*e6d6c189SCody Peter Mello			((k % 2) ? strip_braces(TeX_to_HTML_nonmath(parts[k])) : \
916*e6d6c189SCody Peter Mello			TeX_to_HTML_math(parts[k]))
917*e6d6c189SCody Peter Mello
918*e6d6c189SCody Peter Mello	gsub(/[$][$][$]/,"$$",s) # restore display math
919*e6d6c189SCody Peter Mello
920*e6d6c189SCody Peter Mello	return (s)
921*e6d6c189SCody Peter Mello}
922*e6d6c189SCody Peter Mello
923*e6d6c189SCody Peter Mello
924*e6d6c189SCody Peter Mellofunction TeX_to_HTML_math(s)
925*e6d6c189SCody Peter Mello{
926*e6d6c189SCody Peter Mello	# Mostly a dummy for now, but HTML 3 could support some math translation
927*e6d6c189SCody Peter Mello
928*e6d6c189SCody Peter Mello	gsub(/\\&/,"\\&amp;",s)	# reduce TeX ampersands to SGML entities
929*e6d6c189SCody Peter Mello
930*e6d6c189SCody Peter Mello	return (s)
931*e6d6c189SCody Peter Mello}
932*e6d6c189SCody Peter Mello
933*e6d6c189SCody Peter Mello
934*e6d6c189SCody Peter Mellofunction TeX_to_HTML_nonmath(s)
935*e6d6c189SCody Peter Mello{
936*e6d6c189SCody Peter Mello	if (index(s,"\\") > 0)			# important optimization
937*e6d6c189SCody Peter Mello	{
938*e6d6c189SCody Peter Mello		gsub(/\\slash +/,"/",s)		# replace TeX slashes with conventional ones
939*e6d6c189SCody Peter Mello		gsub(/ *\\emdash +/," --- ",s)	# replace BibNet emdashes with conventional ones
940*e6d6c189SCody Peter Mello		gsub(/\\%/,"%",s)		# reduce TeX percents to conventional ones
941*e6d6c189SCody Peter Mello		gsub(/\\[$]/,"$",s)		# reduce TeX dollars to conventional ones
942*e6d6c189SCody Peter Mello		gsub(/\\#/,"#",s)		# reduce TeX sharps to conventional ones
943*e6d6c189SCody Peter Mello
944*e6d6c189SCody Peter Mello		if (HTML)			# translate TeX markup to HTML
945*e6d6c189SCody Peter Mello		{
946*e6d6c189SCody Peter Mello			gsub(/\\&/,"\\&amp;",s)	# reduce TeX ampersands to SGML entities
947*e6d6c189SCody Peter Mello			s = html_accents(s)
948*e6d6c189SCody Peter Mello			s = html_fonts(s)
949*e6d6c189SCody Peter Mello		}
950*e6d6c189SCody Peter Mello		else				# plain ASCII text output: discard all TeX markup
951*e6d6c189SCody Peter Mello		{
952*e6d6c189SCody Peter Mello			gsub(/\\\&/, "\\&", s)	# reduce TeX ampersands to conventional ones
953*e6d6c189SCody Peter Mello
954*e6d6c189SCody Peter Mello			#gsub(/\\[a-z][a-z] +/,"",s) # remove TeX font changes
955*e6d6c189SCody Peter Mello			gsub(/\\[[:lower:]][[:lower:]] +/,"",s) # remove TeX font changes
956*e6d6c189SCody Peter Mello			#gsub(/\\[^A-Za-z]/,"",s) # remove remaining TeX control symbols
957*e6d6c189SCody Peter Mello			gsub(/\\[^[:alpha:]]/,"",s) # remove remaining TeX control symbols
958*e6d6c189SCody Peter Mello		}
959*e6d6c189SCody Peter Mello	}
960*e6d6c189SCody Peter Mello	return (s)
961*e6d6c189SCody Peter Mello}
962*e6d6c189SCody Peter Mello
963*e6d6c189SCody Peter Mello
964*e6d6c189SCody Peter Mellofunction trim(s)
965*e6d6c189SCody Peter Mello{
966*e6d6c189SCody Peter Mello    gsub(/^[ \t]+/,"",s)
967*e6d6c189SCody Peter Mello    gsub(/[ \t]+$/,"",s)
968*e6d6c189SCody Peter Mello    return (s)
969*e6d6c189SCody Peter Mello}
970*e6d6c189SCody Peter Mello
971*e6d6c189SCody Peter Mello
972*e6d6c189SCody Peter Mellofunction vol_no_month_year()
973*e6d6c189SCody Peter Mello{
974*e6d6c189SCody Peter Mello	return ("Volume " wrap(Volume)  ",  Number " wrap(Number) ", " wrap(Month) ", " wrap(Year))
975*e6d6c189SCody Peter Mello}
976*e6d6c189SCody Peter Mello
977*e6d6c189SCody Peter Mello
978*e6d6c189SCody Peter Mellofunction wrap(value)
979*e6d6c189SCody Peter Mello{
980*e6d6c189SCody Peter Mello	return (HTML ? ("<STRONG>" value "</STRONG>") : value)
981*e6d6c189SCody Peter Mello}
982