xref: /illumos-gate/usr/src/lib/libscf/common/scf_type.c (revision 870ad75a2b67a92c3449d93b4fef8a0baa982b4a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <repcache_protocol.h>
28 #include "scf_type.h"
29 #include <errno.h>
30 #include <libgen.h>
31 #include <libscf_priv.h>
32 #include <stdlib.h>
33 #include <string.h>
34 
35 #define	UTF8_TOP_N(n) \
36 	(0xff ^ (0xff >> (n)))		/* top N bits set */
37 
38 #define	UTF8_BOTTOM_N(n) \
39 	((1 << (n)) - 1)		/* bottom N bits set */
40 
41 /*
42  * The first byte of an n-byte UTF8 encoded character looks like:
43  *
44  *	n	bits
45  *
46  *	1	0xxxxxxx
47  *	2	110xxxxx
48  *	3	1110xxxx
49  *	4	11110xxx
50  *	5	111110xx
51  *	6	1111110x
52  *
53  * Continuation bytes are 01xxxxxx.
54  */
55 
56 #define	UTF8_MAX_BYTES	6
57 
58 /*
59  * number of bits in an n-byte UTF-8 encoding.  for multi-byte encodings,
60  * You get (7 - n) bits in the first byte, and 6 bits for each additional byte.
61  */
62 #define	UTF8_BITS(n)	/* 1 <= n <= 6 */			\
63 	((n) == 1)? 7 :						\
64 	(7 - (n) + 6 * ((n) - 1))
65 
66 #define	UTF8_SINGLE_BYTE(c) \
67 	(((c) & UTF8_TOP_N(1)) == 0)	/* 0xxxxxxx */
68 
69 #define	UTF8_HEAD_CHECK(c, n)		/* 2 <= n <= 6 */		\
70 	(((c) & UTF8_TOP_N((n) + 1)) == UTF8_TOP_N(n))
71 
72 #define	UTF8_HEAD_VALUE(c, n)		/* 2 <= n <= 6 */		\
73 	((c) & UTF8_BOTTOM_N(7 - (n)))	/* 'x' mask */
74 
75 #define	UTF8_CONT_CHECK(c) \
76 	(((c) & UTF8_TOP_N(2)) == UTF8_TOP_N(1))	/* 10xxxxxx */
77 
78 /*
79  * adds in the 6 new bits from a continuation byte
80  */
81 #define	UTF8_VALUE_UPDATE(v, c) \
82 	(((v) << 6) | ((c) & UTF8_BOTTOM_N(6)))
83 
84 /*
85  * URI components
86  */
87 
88 #define	URI_COMPONENT_COUNT	5
89 
90 enum {
91 	URI_SCHEME = 0x0,		/* URI scheme */
92 	URI_AUTHORITY,			/* URI authority */
93 	URI_PATH,			/* URI path */
94 	URI_QUERY,			/* URI query */
95 	URI_FRAGMENT			/* URI fragment  */
96 };
97 
98 static int
99 valid_utf8(const char *str_arg)
100 {
101 	const char *str = str_arg;
102 	uint_t c;
103 	uint32_t v;
104 	int i, n;
105 
106 	while ((c = *str++) != 0) {
107 		if (UTF8_SINGLE_BYTE(c))
108 			continue;	/* ascii */
109 
110 		for (n = 2; n <= UTF8_MAX_BYTES; n++)
111 			if (UTF8_HEAD_CHECK(c, n))
112 				break;
113 
114 		if (n > UTF8_MAX_BYTES)
115 			return (0);		/* invalid head byte */
116 
117 		v = UTF8_HEAD_VALUE(c, n);
118 
119 		for (i = 1; i < n; i++) {
120 			c = *str++;
121 			if (!UTF8_CONT_CHECK(c))
122 				return (0);	/* invalid byte */
123 
124 			v = UTF8_VALUE_UPDATE(v, c);
125 		}
126 
127 		/*
128 		 * if v could have been encoded in the next smallest
129 		 * encoding, the string is not well-formed UTF-8.
130 		 */
131 		if ((v >> (UTF8_BITS(n - 1))) == 0)
132 			return (0);
133 	}
134 
135 	/*
136 	 * we've reached the end of the string -- make sure it is short enough
137 	 */
138 	return ((str - str_arg) < REP_PROTOCOL_VALUE_LEN);
139 }
140 
141 static int
142 valid_string(const char *str)
143 {
144 	return (strlen(str) < REP_PROTOCOL_VALUE_LEN);
145 }
146 
147 static int
148 valid_opaque(const char *str_arg)
149 {
150 	const char *str = str_arg;
151 	uint_t c;
152 	ptrdiff_t len;
153 
154 	while ((c = *str++) != 0)
155 		if ((c < '0' || c > '9') && (c < 'a' || c > 'f') &&
156 		    (c < 'A' || c > 'F'))
157 			return (0);		/* not hex digit */
158 
159 	len = (str - str_arg) - 1;		/* not counting NIL byte */
160 	return ((len % 2) == 0 && len / 2 < REP_PROTOCOL_VALUE_LEN);
161 }
162 
163 /*
164  * Return 1 if the supplied parameter is a conformant URI (as defined
165  * by RFC 2396), 0 otherwise.
166  */
167 static int
168 valid_uri(const char *str)
169 {
170 	/*
171 	 * URI Regular Expression. Compiled with regcmp(1).
172 	 *
173 	 * ^(([^:/?#]+:){0,1})$0(//([^/?#]*)$1){0,1}([^?#]*)$2
174 	 * (?([^#]*)$3){0,1}(#(.*)$4){0,1}
175 	 */
176 	char exp[] = {
177 		040, 074, 00, 060, 012, 0126, 05, 072, 057, 077, 043, 024,
178 		072, 057, 00, 00, 01, 014, 00, 00, 060, 020, 024, 057,
179 		024, 057, 074, 01, 0125, 04, 057, 077, 043, 014, 01, 01,
180 		057, 01, 00, 01, 074, 02, 0125, 03, 077, 043, 014, 02,
181 		02, 060, 014, 024, 077, 074, 03, 0125, 02, 043, 014, 03,
182 		03, 057, 02, 00, 01, 060, 012, 024, 043, 074, 04, 021,
183 		014, 04, 04, 057, 03, 00, 01, 064, 00,
184 		0};
185 	char uri[URI_COMPONENT_COUNT][REP_PROTOCOL_VALUE_LEN];
186 
187 	/*
188 	 * If the string is too long, then the URI cannot be valid. Also,
189 	 * this protects against buffer overflow attacks on the uri array.
190 	 */
191 	if (strlen(str) >= REP_PROTOCOL_VALUE_LEN)
192 		return (0);
193 
194 	if (regex(exp, str, uri[URI_SCHEME], uri[URI_AUTHORITY], uri[URI_PATH],
195 	    uri[URI_QUERY], uri[URI_FRAGMENT]) == NULL) {
196 		return (0);
197 	}
198 	/*
199 	 * To be a valid URI, the length of the URI_PATH must not be zero
200 	 */
201 	if (strlen(uri[URI_PATH]) == 0) {
202 		return (0);
203 	}
204 	return (1);
205 }
206 
207 /*
208  * Return 1 if the supplied parameter is a conformant fmri, 0
209  * otherwise.
210  */
211 static int
212 valid_fmri(const char *str)
213 {
214 	int ret;
215 	char fmri[REP_PROTOCOL_VALUE_LEN] = { 0 };
216 
217 	/*
218 	 * Try to parse the fmri, if we can parse it then it
219 	 * must be syntactically correct. Work on a copy of
220 	 * the fmri since the parsing process can modify the
221 	 * supplied string.
222 	 */
223 	if (strlcpy(fmri, str, sizeof (fmri)) >= sizeof (fmri))
224 		return (0);
225 
226 	ret = ! scf_parse_fmri(fmri, NULL, NULL, NULL, NULL, NULL, NULL);
227 
228 	return (ret);
229 }
230 
231 rep_protocol_value_type_t
232 scf_proto_underlying_type(rep_protocol_value_type_t t)
233 {
234 	switch (t) {
235 	case REP_PROTOCOL_TYPE_BOOLEAN:
236 	case REP_PROTOCOL_TYPE_COUNT:
237 	case REP_PROTOCOL_TYPE_INTEGER:
238 	case REP_PROTOCOL_TYPE_TIME:
239 	case REP_PROTOCOL_TYPE_STRING:
240 	case REP_PROTOCOL_TYPE_OPAQUE:
241 		return (t);
242 
243 	case REP_PROTOCOL_SUBTYPE_USTRING:
244 		return (REP_PROTOCOL_TYPE_STRING);
245 
246 	case REP_PROTOCOL_SUBTYPE_URI:
247 		return (REP_PROTOCOL_SUBTYPE_USTRING);
248 	case REP_PROTOCOL_SUBTYPE_FMRI:
249 		return (REP_PROTOCOL_SUBTYPE_URI);
250 
251 	case REP_PROTOCOL_SUBTYPE_HOST:
252 		return (REP_PROTOCOL_SUBTYPE_USTRING);
253 	case REP_PROTOCOL_SUBTYPE_HOSTNAME:
254 		return (REP_PROTOCOL_SUBTYPE_HOST);
255 	case REP_PROTOCOL_SUBTYPE_NETADDR_V4:
256 		return (REP_PROTOCOL_SUBTYPE_HOST);
257 	case REP_PROTOCOL_SUBTYPE_NETADDR_V6:
258 		return (REP_PROTOCOL_SUBTYPE_HOST);
259 
260 	case REP_PROTOCOL_TYPE_INVALID:
261 	default:
262 		return (REP_PROTOCOL_TYPE_INVALID);
263 	}
264 }
265 
266 int
267 scf_is_compatible_protocol_type(rep_protocol_value_type_t base,
268     rep_protocol_value_type_t new)
269 {
270 	rep_protocol_value_type_t t, cur;
271 
272 	if (base == REP_PROTOCOL_TYPE_INVALID)
273 		return (0);
274 
275 	if (base == new)
276 		return (1);
277 
278 	for (t = new; t != (cur = scf_proto_underlying_type(t)); t = cur) {
279 		if (cur == REP_PROTOCOL_TYPE_INVALID)
280 			return (0);
281 		if (cur == base)
282 			return (1);		/* base is parent of new */
283 	}
284 	return (0);
285 }
286 
287 static int
288 valid_encoded_value(rep_protocol_value_type_t t, const char *v)
289 {
290 	char *p;
291 	ulong_t ns;
292 
293 	switch (t) {
294 	case REP_PROTOCOL_TYPE_BOOLEAN:
295 		return ((*v == '0' || *v == '1') && v[1] == 0);
296 
297 	case REP_PROTOCOL_TYPE_COUNT:
298 		errno = 0;
299 		if (strtoull(v, &p, 10) != 0 && *v == '0')
300 			return (0);
301 		return (errno == 0 && p != v && *p == 0);
302 
303 	case REP_PROTOCOL_TYPE_INTEGER:
304 		errno = 0;
305 		if (strtoll(v, &p, 10) != 0 && *v == '0')
306 			return (0);
307 		return (errno == 0 && p != v && *p == 0);
308 
309 	case REP_PROTOCOL_TYPE_TIME:
310 		errno = 0;
311 		(void) strtoll(v, &p, 10);
312 		if (errno != 0 || p == v || (*p != 0 && *p != '.'))
313 			return (0);
314 		if (*p == '.') {
315 			v = p + 1;
316 			errno = 0;
317 			ns = strtoul(v, &p, 10);
318 
319 			/* must be exactly 9 digits */
320 			if ((p - v) != 9 || errno != 0 || *p != 0)
321 				return (0);
322 			if (ns >= NANOSEC)
323 				return (0);
324 		}
325 		return (1);
326 
327 	case REP_PROTOCOL_TYPE_STRING:
328 		return (valid_string(v));
329 
330 	case REP_PROTOCOL_TYPE_OPAQUE:
331 		return (valid_opaque(v));
332 
333 	/*
334 	 * The remaining types are subtypes -- because of the way
335 	 * scf_validate_encoded_value() works, we can rely on the fact
336 	 * that v is a valid example of our base type.  We only have to
337 	 * check our own additional restrictions.
338 	 */
339 	case REP_PROTOCOL_SUBTYPE_USTRING:
340 		return (valid_utf8(v));
341 
342 	case REP_PROTOCOL_SUBTYPE_URI:
343 		return (valid_uri(v));
344 
345 	case REP_PROTOCOL_SUBTYPE_FMRI:
346 		return (valid_fmri(v));
347 
348 	case REP_PROTOCOL_SUBTYPE_HOST:
349 		return (valid_encoded_value(REP_PROTOCOL_SUBTYPE_HOSTNAME, v) ||
350 		    valid_encoded_value(REP_PROTOCOL_SUBTYPE_NETADDR_V4, v) ||
351 		    valid_encoded_value(REP_PROTOCOL_SUBTYPE_NETADDR_V6, v));
352 
353 	case REP_PROTOCOL_SUBTYPE_HOSTNAME:
354 		/* XXX check for valid hostname */
355 		return (valid_utf8(v));
356 
357 	case REP_PROTOCOL_SUBTYPE_NETADDR_V4:
358 	case REP_PROTOCOL_SUBTYPE_NETADDR_V6:
359 		/* XXX check for valid netaddr */
360 		return (valid_utf8(v));
361 
362 	case REP_PROTOCOL_TYPE_INVALID:
363 	default:
364 		return (0);
365 	}
366 }
367 
368 int
369 scf_validate_encoded_value(rep_protocol_value_type_t t, const char *v)
370 {
371 	rep_protocol_value_type_t base, cur;
372 
373 	base = scf_proto_underlying_type(t);
374 	while ((cur = scf_proto_underlying_type(base)) != base)
375 		base = cur;
376 
377 	if (base != t && !valid_encoded_value(base, v))
378 		return (0);
379 
380 	return (valid_encoded_value(t, v));
381 }
382