xref: /illumos-gate/usr/src/lib/libscf/common/scf_type.c (revision 7257d1b4d25bfac0c802847390e98a464fd787ac)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <repcache_protocol.h>
30 #include "scf_type.h"
31 #include <errno.h>
32 #include <libgen.h>
33 #include <libscf_priv.h>
34 #include <stdlib.h>
35 #include <string.h>
36 
37 #define	UTF8_TOP_N(n) \
38 	(0xff ^ (0xff >> (n)))		/* top N bits set */
39 
40 #define	UTF8_BOTTOM_N(n) \
41 	((1 << (n)) - 1)		/* bottom N bits set */
42 
43 /*
44  * The first byte of an n-byte UTF8 encoded character looks like:
45  *
46  *	n	bits
47  *
48  *	1	0xxxxxxx
49  *	2	110xxxxx
50  *	3	1110xxxx
51  *	4	11110xxx
52  *	5	111110xx
53  *	6	1111110x
54  *
55  * Continuation bytes are 01xxxxxx.
56  */
57 
58 #define	UTF8_MAX_BYTES	6
59 
60 /*
61  * number of bits in an n-byte UTF-8 encoding.  for multi-byte encodings,
62  * You get (7 - n) bits in the first byte, and 6 bits for each additional byte.
63  */
64 #define	UTF8_BITS(n)	/* 1 <= n <= 6 */			\
65 	((n) == 1)? 7 :						\
66 	(7 - (n) + 6 * ((n) - 1))
67 
68 #define	UTF8_SINGLE_BYTE(c) \
69 	(((c) & UTF8_TOP_N(1)) == 0)	/* 0xxxxxxx */
70 
71 #define	UTF8_HEAD_CHECK(c, n)		/* 2 <= n <= 6 */		\
72 	(((c) & UTF8_TOP_N((n) + 1)) == UTF8_TOP_N(n))
73 
74 #define	UTF8_HEAD_VALUE(c, n)		/* 2 <= n <= 6 */		\
75 	((c) & UTF8_BOTTOM_N(7 - (n)))	/* 'x' mask */
76 
77 #define	UTF8_CONT_CHECK(c) \
78 	(((c) & UTF8_TOP_N(2)) == UTF8_TOP_N(1))	/* 10xxxxxx */
79 
80 /*
81  * adds in the 6 new bits from a continuation byte
82  */
83 #define	UTF8_VALUE_UPDATE(v, c) \
84 	(((v) << 6) | ((c) & UTF8_BOTTOM_N(6)))
85 
86 /*
87  * URI components
88  */
89 
90 #define	URI_COMPONENT_COUNT	5
91 
92 enum {
93 	URI_SCHEME = 0x0,		/* URI scheme */
94 	URI_AUTHORITY,			/* URI authority */
95 	URI_PATH,			/* URI path */
96 	URI_QUERY,			/* URI query */
97 	URI_FRAGMENT			/* URI fragment  */
98 };
99 
100 static int
101 valid_utf8(const char *str_arg)
102 {
103 	const char *str = str_arg;
104 	uint_t c;
105 	uint32_t v;
106 	int i, n;
107 
108 	while ((c = *str++) != 0) {
109 		if (UTF8_SINGLE_BYTE(c))
110 			continue;	/* ascii */
111 
112 		for (n = 2; n <= UTF8_MAX_BYTES; n++)
113 			if (UTF8_HEAD_CHECK(c, n))
114 				break;
115 
116 		if (n > UTF8_MAX_BYTES)
117 			return (0);		/* invalid head byte */
118 
119 		v = UTF8_HEAD_VALUE(c, n);
120 
121 		for (i = 1; i < n; i++) {
122 			c = *str++;
123 			if (!UTF8_CONT_CHECK(c))
124 				return (0);	/* invalid byte */
125 
126 			v = UTF8_VALUE_UPDATE(v, c);
127 		}
128 
129 		/*
130 		 * if v could have been encoded in the next smallest
131 		 * encoding, the string is not well-formed UTF-8.
132 		 */
133 		if ((v >> (UTF8_BITS(n - 1))) == 0)
134 			return (0);
135 	}
136 
137 	/*
138 	 * we've reached the end of the string -- make sure it is short enough
139 	 */
140 	return ((str - str_arg) < REP_PROTOCOL_VALUE_LEN);
141 }
142 
143 static int
144 valid_string(const char *str)
145 {
146 	return (strlen(str) < REP_PROTOCOL_VALUE_LEN);
147 }
148 
149 static int
150 valid_opaque(const char *str_arg)
151 {
152 	const char *str = str_arg;
153 	uint_t c;
154 	ptrdiff_t len;
155 
156 	while ((c = *str++) != 0)
157 		if ((c < '0' || c > '9') && (c < 'a' || c > 'f') &&
158 		    (c < 'A' || c > 'F'))
159 			return (0);		/* not hex digit */
160 
161 	len = (str - str_arg) - 1;		/* not counting NIL byte */
162 	return ((len % 2) == 0 && len / 2 < REP_PROTOCOL_VALUE_LEN);
163 }
164 
165 /*
166  * Return 1 if the supplied parameter is a conformant URI (as defined
167  * by RFC 2396), 0 otherwise.
168  */
169 static int
170 valid_uri(const char *str)
171 {
172 	/*
173 	 * URI Regular Expression. Compiled with regcmp(1).
174 	 *
175 	 * ^(([^:/?#]+:){0,1})$0(//([^/?#]*)$1){0,1}([^?#]*)$2
176 	 * (?([^#]*)$3){0,1}(#(.*)$4){0,1}
177 	 */
178 	char exp[] = {
179 		040, 074, 00, 060, 012, 0126, 05, 072, 057, 077, 043, 024,
180 		072, 057, 00, 00, 01, 014, 00, 00, 060, 020, 024, 057,
181 		024, 057, 074, 01, 0125, 04, 057, 077, 043, 014, 01, 01,
182 		057, 01, 00, 01, 074, 02, 0125, 03, 077, 043, 014, 02,
183 		02, 060, 014, 024, 077, 074, 03, 0125, 02, 043, 014, 03,
184 		03, 057, 02, 00, 01, 060, 012, 024, 043, 074, 04, 021,
185 		014, 04, 04, 057, 03, 00, 01, 064, 00,
186 		0};
187 	char uri[URI_COMPONENT_COUNT][REP_PROTOCOL_VALUE_LEN];
188 
189 	/*
190 	 * If the string is too long, then the URI cannot be valid. Also,
191 	 * this protects against buffer overflow attacks on the uri array.
192 	 */
193 	if (strlen(str) >= REP_PROTOCOL_VALUE_LEN)
194 		return (0);
195 
196 	if (regex(exp, str, uri[URI_SCHEME], uri[URI_AUTHORITY], uri[URI_PATH],
197 	    uri[URI_QUERY], uri[URI_FRAGMENT]) == NULL) {
198 		return (0);
199 	}
200 	/*
201 	 * To be a valid URI, the length of the URI_PATH must not be zero
202 	 */
203 	if (strlen(uri[URI_PATH]) == 0) {
204 		return (0);
205 	}
206 	return (1);
207 }
208 
209 /*
210  * Return 1 if the supplied parameter is a conformant fmri, 0
211  * otherwise.
212  */
213 static int
214 valid_fmri(const char *str)
215 {
216 	int ret;
217 	char fmri[REP_PROTOCOL_VALUE_LEN] = { 0 };
218 
219 	/*
220 	 * Try to parse the fmri, if we can parse it then it
221 	 * must be syntactically correct. Work on a copy of
222 	 * the fmri since the parsing process can modify the
223 	 * supplied string.
224 	 */
225 	if (strlcpy(fmri, str, sizeof (fmri)) >= sizeof (fmri))
226 		return (0);
227 
228 	ret = ! scf_parse_fmri(fmri, NULL, NULL, NULL, NULL, NULL, NULL);
229 
230 	return (ret);
231 }
232 
233 rep_protocol_value_type_t
234 scf_proto_underlying_type(rep_protocol_value_type_t t)
235 {
236 	switch (t) {
237 	case REP_PROTOCOL_TYPE_BOOLEAN:
238 	case REP_PROTOCOL_TYPE_COUNT:
239 	case REP_PROTOCOL_TYPE_INTEGER:
240 	case REP_PROTOCOL_TYPE_TIME:
241 	case REP_PROTOCOL_TYPE_STRING:
242 	case REP_PROTOCOL_TYPE_OPAQUE:
243 		return (t);
244 
245 	case REP_PROTOCOL_SUBTYPE_USTRING:
246 		return (REP_PROTOCOL_TYPE_STRING);
247 
248 	case REP_PROTOCOL_SUBTYPE_URI:
249 		return (REP_PROTOCOL_SUBTYPE_USTRING);
250 	case REP_PROTOCOL_SUBTYPE_FMRI:
251 		return (REP_PROTOCOL_SUBTYPE_URI);
252 
253 	case REP_PROTOCOL_SUBTYPE_HOST:
254 		return (REP_PROTOCOL_SUBTYPE_USTRING);
255 	case REP_PROTOCOL_SUBTYPE_HOSTNAME:
256 		return (REP_PROTOCOL_SUBTYPE_HOST);
257 	case REP_PROTOCOL_SUBTYPE_NETADDR_V4:
258 		return (REP_PROTOCOL_SUBTYPE_HOST);
259 	case REP_PROTOCOL_SUBTYPE_NETADDR_V6:
260 		return (REP_PROTOCOL_SUBTYPE_HOST);
261 
262 	case REP_PROTOCOL_TYPE_INVALID:
263 	default:
264 		return (REP_PROTOCOL_TYPE_INVALID);
265 	}
266 }
267 
268 int
269 scf_is_compatible_type(rep_protocol_value_type_t base,
270     rep_protocol_value_type_t new)
271 {
272 	rep_protocol_value_type_t t, cur;
273 
274 	if (base == REP_PROTOCOL_TYPE_INVALID)
275 		return (0);
276 
277 	if (base == new)
278 		return (1);
279 
280 	for (t = new; t != (cur = scf_proto_underlying_type(t)); t = cur) {
281 		if (cur == REP_PROTOCOL_TYPE_INVALID)
282 			return (0);
283 		if (cur == base)
284 			return (1);		/* base is parent of new */
285 	}
286 	return (0);
287 }
288 
289 static int
290 valid_encoded_value(rep_protocol_value_type_t t, const char *v)
291 {
292 	char *p;
293 	ulong_t ns;
294 
295 	switch (t) {
296 	case REP_PROTOCOL_TYPE_BOOLEAN:
297 		return ((*v == '0' || *v == '1') && v[1] == 0);
298 
299 	case REP_PROTOCOL_TYPE_COUNT:
300 		errno = 0;
301 		if (strtoull(v, &p, 10) != 0 && *v == '0')
302 			return (0);
303 		return (errno == 0 && p != v && *p == 0);
304 
305 	case REP_PROTOCOL_TYPE_INTEGER:
306 		errno = 0;
307 		if (strtoll(v, &p, 10) != 0 && *v == '0')
308 			return (0);
309 		return (errno == 0 && p != v && *p == 0);
310 
311 	case REP_PROTOCOL_TYPE_TIME:
312 		errno = 0;
313 		(void) strtoll(v, &p, 10);
314 		if (errno != 0 || p == v || (*p != 0 && *p != '.'))
315 			return (0);
316 		if (*p == '.') {
317 			v = p + 1;
318 			errno = 0;
319 			ns = strtoul(v, &p, 10);
320 
321 			/* must be exactly 9 digits */
322 			if ((p - v) != 9 || errno != 0 || *p != 0)
323 				return (0);
324 			if (ns >= NANOSEC)
325 				return (0);
326 		}
327 		return (1);
328 
329 	case REP_PROTOCOL_TYPE_STRING:
330 		return (valid_string(v));
331 
332 	case REP_PROTOCOL_TYPE_OPAQUE:
333 		return (valid_opaque(v));
334 
335 	/*
336 	 * The remaining types are subtypes -- because of the way
337 	 * scf_validate_encoded_value() works, we can rely on the fact
338 	 * that v is a valid example of our base type.  We only have to
339 	 * check our own additional restrictions.
340 	 */
341 	case REP_PROTOCOL_SUBTYPE_USTRING:
342 		return (valid_utf8(v));
343 
344 	case REP_PROTOCOL_SUBTYPE_URI:
345 		return (valid_uri(v));
346 
347 	case REP_PROTOCOL_SUBTYPE_FMRI:
348 		return (valid_fmri(v));
349 
350 	case REP_PROTOCOL_SUBTYPE_HOST:
351 		return (valid_encoded_value(REP_PROTOCOL_SUBTYPE_HOSTNAME, v) ||
352 		    valid_encoded_value(REP_PROTOCOL_SUBTYPE_NETADDR_V4, v) ||
353 		    valid_encoded_value(REP_PROTOCOL_SUBTYPE_NETADDR_V6, v));
354 
355 	case REP_PROTOCOL_SUBTYPE_HOSTNAME:
356 		/* XXX check for valid hostname */
357 		return (valid_utf8(v));
358 
359 	case REP_PROTOCOL_SUBTYPE_NETADDR_V4:
360 	case REP_PROTOCOL_SUBTYPE_NETADDR_V6:
361 		/* XXX check for valid netaddr */
362 		return (valid_utf8(v));
363 
364 	case REP_PROTOCOL_TYPE_INVALID:
365 	default:
366 		return (0);
367 	}
368 }
369 
370 int
371 scf_validate_encoded_value(rep_protocol_value_type_t t, const char *v)
372 {
373 	rep_protocol_value_type_t base, cur;
374 
375 	base = scf_proto_underlying_type(t);
376 	while ((cur = scf_proto_underlying_type(base)) != base)
377 		base = cur;
378 
379 	if (base != t && !valid_encoded_value(base, v))
380 		return (0);
381 
382 	return (valid_encoded_value(t, v));
383 }
384