xref: /illumos-gate/usr/src/lib/libscf/common/scf_type.c (revision a574db851cdc636fc3939b68e80d79fe7fbd57f2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #ifndef NATIVE_BUILD
30 #include "c_synonyms.h"
31 #endif
32 
33 #include <repcache_protocol.h>
34 #include "scf_type.h"
35 #include <errno.h>
36 #include <libgen.h>
37 #include <libscf_priv.h>
38 #include <stdlib.h>
39 #include <string.h>
40 
41 #define	UTF8_TOP_N(n) \
42 	(0xff ^ (0xff >> (n)))		/* top N bits set */
43 
44 #define	UTF8_BOTTOM_N(n) \
45 	((1 << (n)) - 1)		/* bottom N bits set */
46 
47 /*
48  * The first byte of an n-byte UTF8 encoded character looks like:
49  *
50  *	n	bits
51  *
52  *	1	0xxxxxxx
53  *	2	110xxxxx
54  *	3	1110xxxx
55  *	4	11110xxx
56  *	5	111110xx
57  *	6	1111110x
58  *
59  * Continuation bytes are 01xxxxxx.
60  */
61 
62 #define	UTF8_MAX_BYTES	6
63 
64 /*
65  * number of bits in an n-byte UTF-8 encoding.  for multi-byte encodings,
66  * You get (7 - n) bits in the first byte, and 6 bits for each additional byte.
67  */
68 #define	UTF8_BITS(n)	/* 1 <= n <= 6 */			\
69 	((n) == 1)? 7 :						\
70 	(7 - (n) + 6 * ((n) - 1))
71 
72 #define	UTF8_SINGLE_BYTE(c) \
73 	(((c) & UTF8_TOP_N(1)) == 0)	/* 0xxxxxxx */
74 
75 #define	UTF8_HEAD_CHECK(c, n)		/* 2 <= n <= 6 */		\
76 	(((c) & UTF8_TOP_N((n) + 1)) == UTF8_TOP_N(n))
77 
78 #define	UTF8_HEAD_VALUE(c, n)		/* 2 <= n <= 6 */		\
79 	((c) & UTF8_BOTTOM_N(7 - (n)))	/* 'x' mask */
80 
81 #define	UTF8_CONT_CHECK(c) \
82 	(((c) & UTF8_TOP_N(2)) == UTF8_TOP_N(1))	/* 10xxxxxx */
83 
84 /*
85  * adds in the 6 new bits from a continuation byte
86  */
87 #define	UTF8_VALUE_UPDATE(v, c) \
88 	(((v) << 6) | ((c) & UTF8_BOTTOM_N(6)))
89 
90 /*
91  * URI components
92  */
93 
94 #define	URI_COMPONENT_COUNT	5
95 
96 enum {
97 	URI_SCHEME = 0x0,		/* URI scheme */
98 	URI_AUTHORITY,			/* URI authority */
99 	URI_PATH,			/* URI path */
100 	URI_QUERY,			/* URI query */
101 	URI_FRAGMENT			/* URI fragment  */
102 };
103 
104 static int
105 valid_utf8(const char *str_arg)
106 {
107 	const char *str = str_arg;
108 	uint_t c;
109 	uint32_t v;
110 	int i, n;
111 
112 	while ((c = *str++) != 0) {
113 		if (UTF8_SINGLE_BYTE(c))
114 			continue;	/* ascii */
115 
116 		for (n = 2; n <= UTF8_MAX_BYTES; n++)
117 			if (UTF8_HEAD_CHECK(c, n))
118 				break;
119 
120 		if (n > UTF8_MAX_BYTES)
121 			return (0);		/* invalid head byte */
122 
123 		v = UTF8_HEAD_VALUE(c, n);
124 
125 		for (i = 1; i < n; i++) {
126 			c = *str++;
127 			if (!UTF8_CONT_CHECK(c))
128 				return (0);	/* invalid byte */
129 
130 			v = UTF8_VALUE_UPDATE(v, c);
131 		}
132 
133 		/*
134 		 * if v could have been encoded in the next smallest
135 		 * encoding, the string is not well-formed UTF-8.
136 		 */
137 		if ((v >> (UTF8_BITS(n - 1))) == 0)
138 			return (0);
139 	}
140 
141 	/*
142 	 * we've reached the end of the string -- make sure it is short enough
143 	 */
144 	return ((str - str_arg) < REP_PROTOCOL_VALUE_LEN);
145 }
146 
147 static int
148 valid_string(const char *str)
149 {
150 	return (strlen(str) < REP_PROTOCOL_VALUE_LEN);
151 }
152 
153 static int
154 valid_opaque(const char *str_arg)
155 {
156 	const char *str = str_arg;
157 	uint_t c;
158 	ptrdiff_t len;
159 
160 	while ((c = *str++) != 0)
161 		if ((c < '0' || c > '9') && (c < 'a' || c > 'f') &&
162 		    (c < 'A' || c > 'F'))
163 			return (0);		/* not hex digit */
164 
165 	len = (str - str_arg) - 1;		/* not counting NIL byte */
166 	return ((len % 2) == 0 && len / 2 < REP_PROTOCOL_VALUE_LEN);
167 }
168 
169 /*
170  * Return 1 if the supplied parameter is a conformant URI (as defined
171  * by RFC 2396), 0 otherwise.
172  */
173 static int
174 valid_uri(const char *str)
175 {
176 	/*
177 	 * URI Regular Expression. Compiled with regcmp(1).
178 	 *
179 	 * ^(([^:/?#]+:){0,1})$0(//([^/?#]*)$1){0,1}([^?#]*)$2
180 	 * (?([^#]*)$3){0,1}(#(.*)$4){0,1}
181 	 */
182 	char exp[] = {
183 		040, 074, 00, 060, 012, 0126, 05, 072, 057, 077, 043, 024,
184 		072, 057, 00, 00, 01, 014, 00, 00, 060, 020, 024, 057,
185 		024, 057, 074, 01, 0125, 04, 057, 077, 043, 014, 01, 01,
186 		057, 01, 00, 01, 074, 02, 0125, 03, 077, 043, 014, 02,
187 		02, 060, 014, 024, 077, 074, 03, 0125, 02, 043, 014, 03,
188 		03, 057, 02, 00, 01, 060, 012, 024, 043, 074, 04, 021,
189 		014, 04, 04, 057, 03, 00, 01, 064, 00,
190 		0};
191 	char uri[URI_COMPONENT_COUNT][REP_PROTOCOL_VALUE_LEN];
192 
193 	/*
194 	 * If the string is too long, then the URI cannot be valid. Also,
195 	 * this protects against buffer overflow attacks on the uri array.
196 	 */
197 	if (strlen(str) >= REP_PROTOCOL_VALUE_LEN)
198 		return (0);
199 
200 	if (regex(exp, str, uri[URI_SCHEME], uri[URI_AUTHORITY], uri[URI_PATH],
201 	    uri[URI_QUERY], uri[URI_FRAGMENT]) == NULL) {
202 		return (0);
203 	}
204 	/*
205 	 * To be a valid URI, the length of the URI_PATH must not be zero
206 	 */
207 	if (strlen(uri[URI_PATH]) == 0) {
208 		return (0);
209 	}
210 	return (1);
211 }
212 
213 /*
214  * Return 1 if the supplied parameter is a conformant fmri, 0
215  * otherwise.
216  */
217 static int
218 valid_fmri(const char *str)
219 {
220 	int ret;
221 	char fmri[REP_PROTOCOL_VALUE_LEN] = { 0 };
222 
223 	/*
224 	 * Try to parse the fmri, if we can parse it then it
225 	 * must be syntactically correct. Work on a copy of
226 	 * the fmri since the parsing process can modify the
227 	 * supplied string.
228 	 */
229 	if (strlcpy(fmri, str, sizeof (fmri)) >= sizeof (fmri))
230 		return (0);
231 
232 	ret = ! scf_parse_fmri(fmri, NULL, NULL, NULL, NULL, NULL, NULL);
233 
234 	return (ret);
235 }
236 
237 rep_protocol_value_type_t
238 scf_proto_underlying_type(rep_protocol_value_type_t t)
239 {
240 	switch (t) {
241 	case REP_PROTOCOL_TYPE_BOOLEAN:
242 	case REP_PROTOCOL_TYPE_COUNT:
243 	case REP_PROTOCOL_TYPE_INTEGER:
244 	case REP_PROTOCOL_TYPE_TIME:
245 	case REP_PROTOCOL_TYPE_STRING:
246 	case REP_PROTOCOL_TYPE_OPAQUE:
247 		return (t);
248 
249 	case REP_PROTOCOL_SUBTYPE_USTRING:
250 		return (REP_PROTOCOL_TYPE_STRING);
251 
252 	case REP_PROTOCOL_SUBTYPE_URI:
253 		return (REP_PROTOCOL_SUBTYPE_USTRING);
254 	case REP_PROTOCOL_SUBTYPE_FMRI:
255 		return (REP_PROTOCOL_SUBTYPE_URI);
256 
257 	case REP_PROTOCOL_SUBTYPE_HOST:
258 		return (REP_PROTOCOL_SUBTYPE_USTRING);
259 	case REP_PROTOCOL_SUBTYPE_HOSTNAME:
260 		return (REP_PROTOCOL_SUBTYPE_HOST);
261 	case REP_PROTOCOL_SUBTYPE_NETADDR_V4:
262 		return (REP_PROTOCOL_SUBTYPE_HOST);
263 	case REP_PROTOCOL_SUBTYPE_NETADDR_V6:
264 		return (REP_PROTOCOL_SUBTYPE_HOST);
265 
266 	case REP_PROTOCOL_TYPE_INVALID:
267 	default:
268 		return (REP_PROTOCOL_TYPE_INVALID);
269 	}
270 }
271 
272 int
273 scf_is_compatible_type(rep_protocol_value_type_t base,
274     rep_protocol_value_type_t new)
275 {
276 	rep_protocol_value_type_t t, cur;
277 
278 	if (base == REP_PROTOCOL_TYPE_INVALID)
279 		return (0);
280 
281 	if (base == new)
282 		return (1);
283 
284 	for (t = new; t != (cur = scf_proto_underlying_type(t)); t = cur) {
285 		if (cur == REP_PROTOCOL_TYPE_INVALID)
286 			return (0);
287 		if (cur == base)
288 			return (1);		/* base is parent of new */
289 	}
290 	return (0);
291 }
292 
293 static int
294 valid_encoded_value(rep_protocol_value_type_t t, const char *v)
295 {
296 	char *p;
297 	ulong_t ns;
298 
299 	switch (t) {
300 	case REP_PROTOCOL_TYPE_BOOLEAN:
301 		return ((*v == '0' || *v == '1') && v[1] == 0);
302 
303 	case REP_PROTOCOL_TYPE_COUNT:
304 		errno = 0;
305 		if (strtoull(v, &p, 10) != 0 && *v == '0')
306 			return (0);
307 		return (errno == 0 && p != v && *p == 0);
308 
309 	case REP_PROTOCOL_TYPE_INTEGER:
310 		errno = 0;
311 		if (strtoll(v, &p, 10) != 0 && *v == '0')
312 			return (0);
313 		return (errno == 0 && p != v && *p == 0);
314 
315 	case REP_PROTOCOL_TYPE_TIME:
316 		errno = 0;
317 		(void) strtoll(v, &p, 10);
318 		if (errno != 0 || p == v || (*p != 0 && *p != '.'))
319 			return (0);
320 		if (*p == '.') {
321 			v = p + 1;
322 			errno = 0;
323 			ns = strtoul(v, &p, 10);
324 
325 			/* must be exactly 9 digits */
326 			if ((p - v) != 9 || errno != 0 || *p != 0)
327 				return (0);
328 			if (ns >= NANOSEC)
329 				return (0);
330 		}
331 		return (1);
332 
333 	case REP_PROTOCOL_TYPE_STRING:
334 		return (valid_string(v));
335 
336 	case REP_PROTOCOL_TYPE_OPAQUE:
337 		return (valid_opaque(v));
338 
339 	/*
340 	 * The remaining types are subtypes -- because of the way
341 	 * scf_validate_encoded_value() works, we can rely on the fact
342 	 * that v is a valid example of our base type.  We only have to
343 	 * check our own additional restrictions.
344 	 */
345 	case REP_PROTOCOL_SUBTYPE_USTRING:
346 		return (valid_utf8(v));
347 
348 	case REP_PROTOCOL_SUBTYPE_URI:
349 		return (valid_uri(v));
350 
351 	case REP_PROTOCOL_SUBTYPE_FMRI:
352 		return (valid_fmri(v));
353 
354 	case REP_PROTOCOL_SUBTYPE_HOST:
355 		return (valid_encoded_value(REP_PROTOCOL_SUBTYPE_HOSTNAME, v) ||
356 		    valid_encoded_value(REP_PROTOCOL_SUBTYPE_NETADDR_V4, v) ||
357 		    valid_encoded_value(REP_PROTOCOL_SUBTYPE_NETADDR_V6, v));
358 
359 	case REP_PROTOCOL_SUBTYPE_HOSTNAME:
360 		/* XXX check for valid hostname */
361 		return (valid_utf8(v));
362 
363 	case REP_PROTOCOL_SUBTYPE_NETADDR_V4:
364 	case REP_PROTOCOL_SUBTYPE_NETADDR_V6:
365 		/* XXX check for valid netaddr */
366 		return (valid_utf8(v));
367 
368 	case REP_PROTOCOL_TYPE_INVALID:
369 	default:
370 		return (0);
371 	}
372 }
373 
374 int
375 scf_validate_encoded_value(rep_protocol_value_type_t t, const char *v)
376 {
377 	rep_protocol_value_type_t base, cur;
378 
379 	base = scf_proto_underlying_type(t);
380 	while ((cur = scf_proto_underlying_type(base)) != base)
381 		base = cur;
382 
383 	if (base != t && !valid_encoded_value(base, v))
384 		return (0);
385 
386 	return (valid_encoded_value(t, v));
387 }
388