1#! /usr/bin/python
2#
3# CDDL HEADER START
4#
5# The contents of this file are subject to the terms of the
6# Common Development and Distribution License (the "License").
7# You may not use this file except in compliance with the License.
8#
9# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10# or http://www.opensolaris.org/os/licensing.
11# See the License for the specific language governing permissions
12# and limitations under the License.
13#
14# When distributing Covered Code, include this CDDL HEADER in each
15# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16# If applicable, add the following below this CDDL HEADER, with the
17# fields enclosed by brackets "[]" replaced with your own identifying
18# information: Portions Copyright [yyyy] [name of copyright owner]
19#
20# CDDL HEADER END
21#
22
23#
24# Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
25# Use is subject to license terms.
26#
27
28# Copyright 2008, 2010, Richard Lowe
29
30#
31# Check that header files conform to our standards
32#
33# Standards for all header files (lenient):
34#
35#       1) Begin with a comment containing a copyright message
36#
37#       2) Enclosed in a guard of the form:
38#
39#          #ifndef GUARD
40#          #define GUARD
41#          #endif /* [!]GUARD */
42#
43#          The preferred form is without the bang character, but either is
44#          acceptable.
45#
46#       3) Has a valid ident declaration
47#
48# Additional standards for system header files:
49#
50#       1) The file guard must take the form '_FILENAME_H[_]', where FILENAME
51#          matches the basename of the file.  If it is installed in a
52#          subdirectory, it must be of the form _DIR_FILENAME_H.  The form
53#          without the trailing underscore is preferred.
54#
55#       2) All #include directives must use the <> form.
56#
57#       3) If the header file contains anything besides comments and
58#          preprocessor directives, then it must be enclosed in a C++ guard of
59#          the form:
60#
61#          #ifdef __cplusplus
62#          extern "C" {
63#          #endif
64#
65#          #ifdef __cplusplus
66#          }
67#          #endif
68#
69
70import re, os, sys
71from onbld.Checks.Copyright import is_copyright
72
73class HeaderFile(object):
74	def __init__(self, fh, filename=None, lenient=False):
75		self.file = fh
76		self.lenient = lenient
77		self.lineno = 0
78		self.has_copyright = False
79		self.eof = False
80
81		if filename:
82			self.filename = filename
83		else:
84			self.filename = fh.name
85
86	def getline(self):
87		for line in self.file:
88			self.lineno += 1
89			if not line or line.isspace():
90				continue
91			else:
92				line = line.rstrip('\r\n')
93
94				# Recursively join continuation lines
95				if line.endswith('\\'):
96					line = line[0:-1] + self.getline()
97
98				return line
99		else:
100			self.eof = True
101			return ''
102
103	#
104	# Optionally take a line to start skipping/processing with
105	#
106	def skipcomments(self, curline=None):
107		line = curline or self.getline()
108		while line:
109			# When lenient, allow C++ comments
110			if self.lenient and re.search(r'^\s*//', line):
111				line = self.getline()
112				continue
113
114			if not re.search(r'^\s*/\*', line):
115				return line
116
117			while not re.search(r'\*/', line):
118				#
119				# We explicitly exclude the form used in the
120				# CDDL header rather than attempting to craft
121				# a match for every possibly valid copyright
122				# notice
123				#
124				if is_copyright(line):
125					self.has_copyright = True
126				line = self.getline()
127
128			if is_copyright(line):
129				self.has_copyright = True
130			line = self.getline()
131
132		return line
133
134
135def err(stream, msg, hdr):
136	if not hdr.eof:
137		stream.write("%s: line %d: %s\n" %
138			     (hdr.filename, hdr.lineno, msg))
139	else:
140		stream.write("%s: %s\n" % (hdr.filename, msg))
141
142
143#
144# Keyword strings (both expanded and literal) for the various SCMs
145# Be certain to wrap each full expression in parens.
146#
147idents = [
148	# SCCS
149	r'((\%Z\%(\%M\%)\t\%I\%|\%W\%)\t\%E\% SMI)',
150	r'(@\(#\)(\w[-\.\w]+\.h)\t\d+\.\d+(\.\d+\.\d+)?\t\d\d/\d\d/\d\d SMI)',
151]
152
153IDENT = re.compile(r'(%s)' % '|'.join(idents))
154
155
156def hdrchk(fh, filename=None, lenient=False, output=sys.stderr):
157	found_ident = False
158	guard = None
159	ret = 0
160
161	hdr = HeaderFile(fh, filename=filename, lenient=lenient)
162
163	#
164	# Step 1:
165	#
166	# Headers must begin with a comment containing a copyright notice.  We
167	# don't validate the contents of the copyright, only that it exists
168	#
169	line = hdr.skipcomments()
170
171	if not hdr.has_copyright:
172		err(output, "Missing copyright in opening comment", hdr)
173		ret = 1
174
175	#
176	# Step 2:
177	#
178	# For application header files only, allow the ident string to appear
179	# before the header guard.
180	if lenient and line.startswith("#pragma ident") and IDENT.search(line):
181		found_ident = 1
182		line = hdr.skipcomments()
183
184	#
185	# Step 3: Header guards
186	#
187	match = re.search(r'^#ifndef\s([a-zA-Z0-9_]+)$', line)
188	if not match:
189		err(output, "Invalid or missing header guard", hdr)
190		ret = 1
191	else:
192		guard = match.group(1)
193
194		if not lenient:
195			guardname = os.path.basename(hdr.filename)
196
197			#
198			# If we aren't being lenient, validate the name of the
199			# guard
200			#
201
202			guardname = guardname.upper()
203			guardname = guardname.replace('.', '_').replace('-','_')
204			guardname = guardname.replace('+', "_PLUS")
205
206			if not re.search(r'^_.*%s[_]?$' % guardname, guard):
207				err(output, "Header guard does not match "
208				    "suggested style (_FILEPATH_H_)", hdr)
209				ret = 1
210
211		line = hdr.getline()
212		if not re.search(r'#define\s%s$' % guard, line):
213			err(output, "Invalid header guard", hdr)
214			ret = 1
215			if not line:
216				line = hdr.skipcomments()
217		else:
218			line = hdr.skipcomments()
219
220
221	#
222	# Step 4: ident string
223	#
224	# We allow both the keyword and extracted versions
225	#
226	if (not found_ident and line.startswith("#pragma ident") and
227	    not IDENT.search(line)):
228		err(output, "Invalid #pragma ident", hdr)
229		ret = 1
230	else:
231		line = hdr.skipcomments(line)
232
233	#
234	# Main processing loop
235	#
236	in_cplusplus = False
237	found_endguard = False
238	found_cplusplus = False
239	found_code = False
240
241	while line:
242		if not (line.startswith('#') or line.startswith('using')):
243			found_code = True
244			line = hdr.getline()
245			continue
246
247		match = re.search(r'^#include(.*)$', line)
248		if match:
249			#
250			# For system files, make sure #includes are of the form:
251			# '#include <file>'
252			#
253			if not lenient and not re.search(r'\s<.*>',
254							 match.group(1)):
255				err(output, "Bad include", hdr)
256				ret = 1
257		elif not in_cplusplus and re.search(r'^#ifdef\s__cplusplus$',
258						    line):
259			#
260			# Start of C++ header guard.
261			# Make sure it is of the form:
262			#
263			# #ifdef __cplusplus
264			# extern "C" {
265			# #endif
266			#
267			line = hdr.getline()
268			if line == 'extern "C" {':
269				line = hdr.getline()
270				if line != '#endif':
271					err(output, "Bad __cplusplus clause",
272					    hdr)
273					ret = 1
274				else:
275					in_cplusplus = True
276					found_cplusplus = True
277			else:
278				continue
279		elif in_cplusplus and re.search(r'^#ifdef\s__cplusplus$', line):
280			#
281			# End of C++ header guard.  Make sure it is of the form:
282			#
283			# #ifdef __cplusplus
284			# }
285			# #endif
286			#
287			line = hdr.getline()
288			if line == '}':
289				line = hdr.getline()
290				if line != '#endif':
291					err(output, "Bad __cplusplus clause",
292					    hdr)
293					ret = 1
294				else:
295					in_cplusplus = False
296			else:
297				continue
298		elif re.search(r'^#endif\s/\* [!]?%s \*/$' % guard, line):
299			#
300			# Ending header guard
301			#
302			found_endguard = True
303
304		line = hdr.skipcomments()
305
306	#
307	# Check for missing end clauses
308	#
309	if (not lenient) and (not found_cplusplus) and found_code:
310		err(output, "Missing __cplusplus guard", hdr)
311		ret = 1
312
313	if in_cplusplus:
314		err(output, "Missing closing #ifdef __cplusplus", hdr)
315		ret = 1
316
317	if not found_endguard:
318		err(output, "Missing or invalid ending header guard", hdr)
319		ret = 1
320
321	return ret
322