xref: /illumos-gate/usr/src/lib/libscf/common/scf_type.c (revision 7c478bd95313f5f23a4c958a745db2134aa03244)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <repcache_protocol.h>
30 #include "scf_type.h"
31 
32 #include <errno.h>
33 #include <libgen.h>
34 #include <libscf_priv.h>
35 #include <stdlib.h>
36 #include <string.h>
37 
38 #define	UTF8_TOP_N(n) \
39 	(0xff ^ (0xff >> (n)))		/* top N bits set */
40 
41 #define	UTF8_BOTTOM_N(n) \
42 	((1 << (n)) - 1)		/* bottom N bits set */
43 
44 /*
45  * The first byte of an n-byte UTF8 encoded character looks like:
46  *
47  *	n	bits
48  *
49  *	1	0xxxxxxx
50  *	2	110xxxxx
51  *	3	1110xxxx
52  *	4	11110xxx
53  *	5	111110xx
54  *	6	1111110x
55  *
56  * Continuation bytes are 01xxxxxx.
57  */
58 
59 #define	UTF8_MAX_BYTES	6
60 
61 /*
62  * number of bits in an n-byte UTF-8 encoding.  for multi-byte encodings,
63  * You get (7 - n) bits in the first byte, and 6 bits for each additional byte.
64  */
65 #define	UTF8_BITS(n)	/* 1 <= n <= 6 */			\
66 	((n) == 1)? 7 :						\
67 	(7 - (n) + 6 * ((n) - 1))
68 
69 #define	UTF8_SINGLE_BYTE(c) \
70 	(((c) & UTF8_TOP_N(1)) == 0)	/* 0xxxxxxx */
71 
72 #define	UTF8_HEAD_CHECK(c, n)		/* 2 <= n <= 6 */		\
73 	(((c) & UTF8_TOP_N((n) + 1)) == UTF8_TOP_N(n))
74 
75 #define	UTF8_HEAD_VALUE(c, n)		/* 2 <= n <= 6 */		\
76 	((c) & UTF8_BOTTOM_N(7 - (n)))	/* 'x' mask */
77 
78 #define	UTF8_CONT_CHECK(c) \
79 	(((c) & UTF8_TOP_N(2)) == UTF8_TOP_N(1))	/* 10xxxxxx */
80 
81 /*
82  * adds in the 6 new bits from a continuation byte
83  */
84 #define	UTF8_VALUE_UPDATE(v, c) \
85 	(((v) << 6) | ((c) & UTF8_BOTTOM_N(6)))
86 
87 /*
88  * URI components
89  */
90 
91 #define	URI_COMPONENT_COUNT	5
92 
93 enum {
94 	URI_SCHEME = 0x0,		/* URI scheme */
95 	URI_AUTHORITY,			/* URI authority */
96 	URI_PATH,			/* URI path */
97 	URI_QUERY,			/* URI query */
98 	URI_FRAGMENT			/* URI fragment  */
99 };
100 
101 static int
102 valid_utf8(const char *str_arg)
103 {
104 	const char *str = str_arg;
105 	uint_t c;
106 	uint32_t v;
107 	int i, n;
108 
109 	while ((c = *str++) != 0) {
110 		if (UTF8_SINGLE_BYTE(c))
111 			continue;	/* ascii */
112 
113 		for (n = 2; n <= UTF8_MAX_BYTES; n++)
114 			if (UTF8_HEAD_CHECK(c, n))
115 				break;
116 
117 		if (n > UTF8_MAX_BYTES)
118 			return (0);		/* invalid head byte */
119 
120 		v = UTF8_HEAD_VALUE(c, n);
121 
122 		for (i = 1; i < n; i++) {
123 			c = *str++;
124 			if (!UTF8_CONT_CHECK(c))
125 				return (0);	/* invalid byte */
126 
127 			v = UTF8_VALUE_UPDATE(v, c);
128 		}
129 
130 		/*
131 		 * if v could have been encoded in the next smallest
132 		 * encoding, the string is not well-formed UTF-8.
133 		 */
134 		if ((v >> (UTF8_BITS(n - 1))) == 0)
135 			return (0);
136 	}
137 
138 	/*
139 	 * we've reached the end of the string -- make sure it is short enough
140 	 */
141 	return ((str - str_arg) < REP_PROTOCOL_VALUE_LEN);
142 }
143 
144 static int
145 valid_string(const char *str)
146 {
147 	return (strlen(str) < REP_PROTOCOL_VALUE_LEN);
148 }
149 
150 static int
151 valid_opaque(const char *str_arg)
152 {
153 	const char *str = str_arg;
154 	uint_t c;
155 	ptrdiff_t len;
156 
157 	while ((c = *str++) != 0)
158 		if ((c < '0' || c > '9') && (c < 'a' || c > 'f') &&
159 		    (c < 'A' || c > 'F'))
160 			return (0);		/* not hex digit */
161 
162 	len = (str - str_arg) - 1;		/* not counting NIL byte */
163 	return ((len % 2) == 0 && len / 2 < REP_PROTOCOL_VALUE_LEN);
164 }
165 
166 /*
167  * Return 1 if the supplied parameter is a conformant URI (as defined
168  * by RFC 2396), 0 otherwise.
169  */
170 static int
171 valid_uri(const char *str)
172 {
173 	/*
174 	 * URI Regular Expression. Compiled with regcmp(1).
175 	 *
176 	 * ^(([^:/?#]+:){0,1})$0(//([^/?#]*)$1){0,1}([^?#]*)$2
177 	 * (?([^#]*)$3){0,1}(#(.*)$4){0,1}
178 	 */
179 	char exp[] = {
180 		040, 074, 00, 060, 012, 0126, 05, 072, 057, 077, 043, 024,
181 		072, 057, 00, 00, 01, 014, 00, 00, 060, 020, 024, 057,
182 		024, 057, 074, 01, 0125, 04, 057, 077, 043, 014, 01, 01,
183 		057, 01, 00, 01, 074, 02, 0125, 03, 077, 043, 014, 02,
184 		02, 060, 014, 024, 077, 074, 03, 0125, 02, 043, 014, 03,
185 		03, 057, 02, 00, 01, 060, 012, 024, 043, 074, 04, 021,
186 		014, 04, 04, 057, 03, 00, 01, 064, 00,
187 		0};
188 	char uri[URI_COMPONENT_COUNT][REP_PROTOCOL_VALUE_LEN];
189 
190 	/*
191 	 * If the string is too long, then the URI cannot be valid. Also,
192 	 * this protects against buffer overflow attacks on the uri array.
193 	 */
194 	if (strlen(str) >= REP_PROTOCOL_VALUE_LEN)
195 		return (0);
196 
197 	if (regex(exp, str, uri[URI_SCHEME], uri[URI_AUTHORITY], uri[URI_PATH],
198 		uri[URI_QUERY], uri[URI_FRAGMENT]) == NULL) {
199 		return (0);
200 	}
201 	/*
202 	 * To be a valid URI, the length of the URI_PATH must not be zero
203 	 */
204 	if (strlen(uri[URI_PATH]) == 0) {
205 		return (0);
206 	}
207 	return (1);
208 }
209 
210 /*
211  * Return 1 if the supplied parameter is a conformant fmri, 0
212  * otherwise.
213  */
214 static int
215 valid_fmri(const char *str)
216 {
217 	int ret;
218 	char fmri[REP_PROTOCOL_VALUE_LEN] = { 0 };
219 
220 	/*
221 	 * Try to parse the fmri, if we can parse it then it
222 	 * must be syntactically correct. Work on a copy of
223 	 * the fmri since the parsing process can modify the
224 	 * supplied string.
225 	 */
226 	if (strlcpy(fmri, str, sizeof (fmri)) >= sizeof (fmri))
227 		return (0);
228 
229 	ret = ! scf_parse_fmri(fmri, NULL, NULL, NULL, NULL, NULL, NULL);
230 
231 	return (ret);
232 }
233 
234 rep_protocol_value_type_t
235 scf_proto_underlying_type(rep_protocol_value_type_t t)
236 {
237 	switch (t) {
238 	case REP_PROTOCOL_TYPE_BOOLEAN:
239 	case REP_PROTOCOL_TYPE_COUNT:
240 	case REP_PROTOCOL_TYPE_INTEGER:
241 	case REP_PROTOCOL_TYPE_TIME:
242 	case REP_PROTOCOL_TYPE_STRING:
243 	case REP_PROTOCOL_TYPE_OPAQUE:
244 		return (t);
245 
246 	case REP_PROTOCOL_SUBTYPE_USTRING:
247 		return (REP_PROTOCOL_TYPE_STRING);
248 
249 	case REP_PROTOCOL_SUBTYPE_URI:
250 		return (REP_PROTOCOL_SUBTYPE_USTRING);
251 	case REP_PROTOCOL_SUBTYPE_FMRI:
252 		return (REP_PROTOCOL_SUBTYPE_URI);
253 
254 	case REP_PROTOCOL_SUBTYPE_HOST:
255 		return (REP_PROTOCOL_SUBTYPE_USTRING);
256 	case REP_PROTOCOL_SUBTYPE_HOSTNAME:
257 		return (REP_PROTOCOL_SUBTYPE_HOST);
258 	case REP_PROTOCOL_SUBTYPE_NETADDR_V4:
259 		return (REP_PROTOCOL_SUBTYPE_HOST);
260 	case REP_PROTOCOL_SUBTYPE_NETADDR_V6:
261 		return (REP_PROTOCOL_SUBTYPE_HOST);
262 
263 	case REP_PROTOCOL_TYPE_INVALID:
264 	default:
265 		return (REP_PROTOCOL_TYPE_INVALID);
266 	}
267 }
268 
269 int
270 scf_is_compatible_type(rep_protocol_value_type_t base,
271     rep_protocol_value_type_t new)
272 {
273 	rep_protocol_value_type_t t, cur;
274 
275 	if (base == REP_PROTOCOL_TYPE_INVALID)
276 		return (0);
277 
278 	if (base == new)
279 		return (1);
280 
281 	for (t = new; t != (cur = scf_proto_underlying_type(t)); t = cur) {
282 		if (cur == REP_PROTOCOL_TYPE_INVALID)
283 			return (0);
284 		if (cur == base)
285 			return (1);		/* base is parent of new */
286 	}
287 	return (0);
288 }
289 
290 static int
291 valid_encoded_value(rep_protocol_value_type_t t, const char *v)
292 {
293 	char *p;
294 	ulong_t ns;
295 
296 	switch (t) {
297 	case REP_PROTOCOL_TYPE_BOOLEAN:
298 		return ((*v == '0' || *v == '1') && v[1] == 0);
299 
300 	case REP_PROTOCOL_TYPE_COUNT:
301 		errno = 0;
302 		if (strtoull(v, &p, 10) != 0 && *v == '0')
303 			return (0);
304 		return (errno == 0 && p != v && *p == 0);
305 
306 	case REP_PROTOCOL_TYPE_INTEGER:
307 		errno = 0;
308 		if (strtoll(v, &p, 10) != 0 && *v == '0')
309 			return (0);
310 		return (errno == 0 && p != v && *p == 0);
311 
312 	case REP_PROTOCOL_TYPE_TIME:
313 		errno = 0;
314 		(void) strtoll(v, &p, 10);
315 		if (errno != 0 || p == v || (*p != 0 && *p != '.'))
316 			return (0);
317 		if (*p == '.') {
318 			v = p + 1;
319 			errno = 0;
320 			ns = strtoul(v, &p, 10);
321 
322 			/* must be exactly 9 digits */
323 			if ((p - v) != 9 || errno != 0 || *p != 0)
324 				return (0);
325 			if (ns >= NANOSEC)
326 				return (0);
327 		}
328 		return (1);
329 
330 	case REP_PROTOCOL_TYPE_STRING:
331 		return (valid_string(v));
332 
333 	case REP_PROTOCOL_TYPE_OPAQUE:
334 		return (valid_opaque(v));
335 
336 	/*
337 	 * The remaining types are subtypes -- because of the way
338 	 * scf_validate_encoded_value() works, we can rely on the fact
339 	 * that v is a valid example of our base type.  We only have to
340 	 * check our own additional restrictions.
341 	 */
342 	case REP_PROTOCOL_SUBTYPE_USTRING:
343 		return (valid_utf8(v));
344 
345 	case REP_PROTOCOL_SUBTYPE_URI:
346 		return (valid_uri(v));
347 
348 	case REP_PROTOCOL_SUBTYPE_FMRI:
349 		return (valid_fmri(v));
350 
351 	case REP_PROTOCOL_SUBTYPE_HOST:
352 		return (valid_encoded_value(REP_PROTOCOL_SUBTYPE_HOSTNAME, v) ||
353 		    valid_encoded_value(REP_PROTOCOL_SUBTYPE_NETADDR_V4, v) ||
354 		    valid_encoded_value(REP_PROTOCOL_SUBTYPE_NETADDR_V6, v));
355 
356 	case REP_PROTOCOL_SUBTYPE_HOSTNAME:
357 		/* XXX check for valid hostname */
358 		return (valid_utf8(v));
359 
360 	case REP_PROTOCOL_SUBTYPE_NETADDR_V4:
361 	case REP_PROTOCOL_SUBTYPE_NETADDR_V6:
362 		/* XXX check for valid netaddr */
363 		return (valid_utf8(v));
364 
365 	case REP_PROTOCOL_TYPE_INVALID:
366 	default:
367 		return (0);
368 	}
369 }
370 
371 int
372 scf_validate_encoded_value(rep_protocol_value_type_t t, const char *v)
373 {
374 	rep_protocol_value_type_t base, cur;
375 
376 	base = scf_proto_underlying_type(t);
377 	while ((cur = scf_proto_underlying_type(base)) != base)
378 		base = cur;
379 
380 	if (base != t && !valid_encoded_value(base, v))
381 		return (0);
382 
383 	return (valid_encoded_value(t, v));
384 }
385