xref: /illumos-gate/usr/src/tools/smatch/src/c2xml.c (revision c85f09cc)
11f5207b7SJohn Levon /*
21f5207b7SJohn Levon  * Sparse c2xml
31f5207b7SJohn Levon  *
41f5207b7SJohn Levon  * Dumps the parse tree as an xml document
51f5207b7SJohn Levon  *
61f5207b7SJohn Levon  * Copyright (C) 2007 Rob Taylor
71f5207b7SJohn Levon  *
81f5207b7SJohn Levon  * Permission is hereby granted, free of charge, to any person obtaining a copy
91f5207b7SJohn Levon  * of this software and associated documentation files (the "Software"), to deal
101f5207b7SJohn Levon  * in the Software without restriction, including without limitation the rights
111f5207b7SJohn Levon  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
121f5207b7SJohn Levon  * copies of the Software, and to permit persons to whom the Software is
131f5207b7SJohn Levon  * furnished to do so, subject to the following conditions:
141f5207b7SJohn Levon  *
151f5207b7SJohn Levon  * The above copyright notice and this permission notice shall be included in
161f5207b7SJohn Levon  * all copies or substantial portions of the Software.
171f5207b7SJohn Levon  *
181f5207b7SJohn Levon  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
191f5207b7SJohn Levon  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
201f5207b7SJohn Levon  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
211f5207b7SJohn Levon  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
221f5207b7SJohn Levon  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
231f5207b7SJohn Levon  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
241f5207b7SJohn Levon  * THE SOFTWARE.
251f5207b7SJohn Levon  */
261f5207b7SJohn Levon #include <stdlib.h>
271f5207b7SJohn Levon #include <stdio.h>
281f5207b7SJohn Levon #include <string.h>
291f5207b7SJohn Levon #include <unistd.h>
301f5207b7SJohn Levon #include <fcntl.h>
311f5207b7SJohn Levon #include <assert.h>
321f5207b7SJohn Levon #include <libxml/parser.h>
331f5207b7SJohn Levon #include <libxml/tree.h>
341f5207b7SJohn Levon 
351f5207b7SJohn Levon #include "expression.h"
361f5207b7SJohn Levon #include "parse.h"
371f5207b7SJohn Levon #include "scope.h"
381f5207b7SJohn Levon #include "symbol.h"
391f5207b7SJohn Levon 
401f5207b7SJohn Levon static xmlDocPtr doc = NULL;       /* document pointer */
411f5207b7SJohn Levon static xmlNodePtr root_node = NULL;/* root node pointer */
421f5207b7SJohn Levon static int idcount = 0;
431f5207b7SJohn Levon 
441f5207b7SJohn Levon static void examine_symbol(struct symbol *sym, xmlNodePtr node);
451f5207b7SJohn Levon 
newProp(xmlNodePtr node,const char * name,const char * value)461f5207b7SJohn Levon static xmlAttrPtr newProp(xmlNodePtr node, const char *name, const char *value)
471f5207b7SJohn Levon {
481f5207b7SJohn Levon 	return xmlNewProp(node, BAD_CAST name, BAD_CAST value);
491f5207b7SJohn Levon }
501f5207b7SJohn Levon 
newNumProp(xmlNodePtr node,const char * name,int value)511f5207b7SJohn Levon static xmlAttrPtr newNumProp(xmlNodePtr node, const char *name, int value)
521f5207b7SJohn Levon {
531f5207b7SJohn Levon 	char buf[256];
541f5207b7SJohn Levon 	snprintf(buf, 256, "%d", value);
551f5207b7SJohn Levon 	return newProp(node, name, buf);
561f5207b7SJohn Levon }
571f5207b7SJohn Levon 
newIdProp(xmlNodePtr node,const char * name,unsigned int id)581f5207b7SJohn Levon static xmlAttrPtr newIdProp(xmlNodePtr node, const char *name, unsigned int id)
591f5207b7SJohn Levon {
601f5207b7SJohn Levon 	char buf[256];
611f5207b7SJohn Levon 	snprintf(buf, 256, "_%d", id);
621f5207b7SJohn Levon 	return newProp(node, name, buf);
631f5207b7SJohn Levon }
641f5207b7SJohn Levon 
new_sym_node(struct symbol * sym,const char * name,xmlNodePtr parent)651f5207b7SJohn Levon static xmlNodePtr new_sym_node(struct symbol *sym, const char *name, xmlNodePtr parent)
661f5207b7SJohn Levon {
671f5207b7SJohn Levon 	xmlNodePtr node;
681f5207b7SJohn Levon 	const char *ident = show_ident(sym->ident);
691f5207b7SJohn Levon 
701f5207b7SJohn Levon 	assert(name != NULL);
711f5207b7SJohn Levon 	assert(sym != NULL);
721f5207b7SJohn Levon 	assert(parent != NULL);
731f5207b7SJohn Levon 
741f5207b7SJohn Levon 	node = xmlNewChild(parent, NULL, BAD_CAST "symbol", NULL);
751f5207b7SJohn Levon 
761f5207b7SJohn Levon 	newProp(node, "type", name);
771f5207b7SJohn Levon 
781f5207b7SJohn Levon 	newIdProp(node, "id", idcount);
791f5207b7SJohn Levon 
801f5207b7SJohn Levon 	if (sym->ident && ident)
811f5207b7SJohn Levon 		newProp(node, "ident", ident);
821f5207b7SJohn Levon 	newProp(node, "file", stream_name(sym->pos.stream));
831f5207b7SJohn Levon 
841f5207b7SJohn Levon 	newNumProp(node, "start-line", sym->pos.line);
851f5207b7SJohn Levon 	newNumProp(node, "start-col", sym->pos.pos);
861f5207b7SJohn Levon 
871f5207b7SJohn Levon 	if (sym->endpos.type) {
881f5207b7SJohn Levon 		newNumProp(node, "end-line", sym->endpos.line);
891f5207b7SJohn Levon 		newNumProp(node, "end-col", sym->endpos.pos);
901f5207b7SJohn Levon 		if (sym->pos.stream != sym->endpos.stream)
911f5207b7SJohn Levon 			newProp(node, "end-file", stream_name(sym->endpos.stream));
921f5207b7SJohn Levon         }
931f5207b7SJohn Levon 	sym->aux = node;
941f5207b7SJohn Levon 
951f5207b7SJohn Levon 	idcount++;
961f5207b7SJohn Levon 
971f5207b7SJohn Levon 	return node;
981f5207b7SJohn Levon }
991f5207b7SJohn Levon 
examine_members(struct symbol_list * list,xmlNodePtr node)1001f5207b7SJohn Levon static inline void examine_members(struct symbol_list *list, xmlNodePtr node)
1011f5207b7SJohn Levon {
1021f5207b7SJohn Levon 	struct symbol *sym;
1031f5207b7SJohn Levon 
1041f5207b7SJohn Levon 	FOR_EACH_PTR(list, sym) {
1051f5207b7SJohn Levon 		examine_symbol(sym, node);
1061f5207b7SJohn Levon 	} END_FOR_EACH_PTR(sym);
1071f5207b7SJohn Levon }
1081f5207b7SJohn Levon 
examine_modifiers(struct symbol * sym,xmlNodePtr node)1091f5207b7SJohn Levon static void examine_modifiers(struct symbol *sym, xmlNodePtr node)
1101f5207b7SJohn Levon {
1111f5207b7SJohn Levon 	const char *modifiers[] = {
1121f5207b7SJohn Levon 			"auto",
1131f5207b7SJohn Levon 			"register",
1141f5207b7SJohn Levon 			"static",
1151f5207b7SJohn Levon 			"extern",
1161f5207b7SJohn Levon 			"const",
1171f5207b7SJohn Levon 			"volatile",
1181f5207b7SJohn Levon 			"signed",
1191f5207b7SJohn Levon 			"unsigned",
1201f5207b7SJohn Levon 			"char",
1211f5207b7SJohn Levon 			"short",
1221f5207b7SJohn Levon 			"long",
1231f5207b7SJohn Levon 			"long-long",
1241f5207b7SJohn Levon 			"typedef",
1251f5207b7SJohn Levon 			NULL,
1261f5207b7SJohn Levon 			NULL,
1271f5207b7SJohn Levon 			NULL,
1281f5207b7SJohn Levon 			NULL,
1291f5207b7SJohn Levon 			NULL,
1301f5207b7SJohn Levon 			"inline",
1311f5207b7SJohn Levon 			"addressable",
1321f5207b7SJohn Levon 			"nocast",
1331f5207b7SJohn Levon 			"noderef",
1341f5207b7SJohn Levon 			"accessed",
1351f5207b7SJohn Levon 			"toplevel",
1361f5207b7SJohn Levon 			"label",
1371f5207b7SJohn Levon 			"assigned",
1381f5207b7SJohn Levon 			"type-type",
1391f5207b7SJohn Levon 			"safe",
1401f5207b7SJohn Levon 			"user-type",
1411f5207b7SJohn Levon 			"force",
1421f5207b7SJohn Levon 			"explicitly-signed",
1431f5207b7SJohn Levon 			"bitwise"};
1441f5207b7SJohn Levon 
1451f5207b7SJohn Levon 	int i;
1461f5207b7SJohn Levon 
1471f5207b7SJohn Levon 	if (sym->namespace != NS_SYMBOL)
1481f5207b7SJohn Levon 		return;
1491f5207b7SJohn Levon 
1501f5207b7SJohn Levon 	/*iterate over the 32 bit bitfield*/
1511f5207b7SJohn Levon 	for (i=0; i < 32; i++) {
1521f5207b7SJohn Levon 		if ((sym->ctype.modifiers & 1<<i) && modifiers[i])
1531f5207b7SJohn Levon 			newProp(node, modifiers[i], "1");
1541f5207b7SJohn Levon 	}
1551f5207b7SJohn Levon }
1561f5207b7SJohn Levon 
1571f5207b7SJohn Levon static void
examine_layout(struct symbol * sym,xmlNodePtr node)1581f5207b7SJohn Levon examine_layout(struct symbol *sym, xmlNodePtr node)
1591f5207b7SJohn Levon {
1601f5207b7SJohn Levon 	examine_symbol_type(sym);
1611f5207b7SJohn Levon 
1621f5207b7SJohn Levon 	newNumProp(node, "bit-size", sym->bit_size);
1631f5207b7SJohn Levon 	newNumProp(node, "alignment", sym->ctype.alignment);
1641f5207b7SJohn Levon 	newNumProp(node, "offset", sym->offset);
1651f5207b7SJohn Levon 	if (is_bitfield_type(sym)) {
1661f5207b7SJohn Levon 		newNumProp(node, "bit-offset", sym->bit_offset);
1671f5207b7SJohn Levon 	}
1681f5207b7SJohn Levon }
1691f5207b7SJohn Levon 
examine_symbol(struct symbol * sym,xmlNodePtr node)1701f5207b7SJohn Levon static void examine_symbol(struct symbol *sym, xmlNodePtr node)
1711f5207b7SJohn Levon {
1721f5207b7SJohn Levon 	xmlNodePtr child = NULL;
1731f5207b7SJohn Levon 	const char *base;
1741f5207b7SJohn Levon 	int array_size;
1751f5207b7SJohn Levon 
1761f5207b7SJohn Levon 	if (!sym)
1771f5207b7SJohn Levon 		return;
1781f5207b7SJohn Levon 	if (sym->aux)		/*already visited */
1791f5207b7SJohn Levon 		return;
1801f5207b7SJohn Levon 
1811f5207b7SJohn Levon 	if (sym->ident && sym->ident->reserved)
1821f5207b7SJohn Levon 		return;
1831f5207b7SJohn Levon 
1841f5207b7SJohn Levon 	child = new_sym_node(sym, get_type_name(sym->type), node);
1851f5207b7SJohn Levon 	examine_modifiers(sym, child);
1861f5207b7SJohn Levon 	examine_layout(sym, child);
1871f5207b7SJohn Levon 
1881f5207b7SJohn Levon 	if (sym->ctype.base_type) {
1891f5207b7SJohn Levon 		if ((base = builtin_typename(sym->ctype.base_type)) == NULL) {
1901f5207b7SJohn Levon 			if (!sym->ctype.base_type->aux) {
1911f5207b7SJohn Levon 				examine_symbol(sym->ctype.base_type, root_node);
1921f5207b7SJohn Levon 			}
1931f5207b7SJohn Levon 			xmlNewProp(child, BAD_CAST "base-type",
1941f5207b7SJohn Levon 			           xmlGetProp((xmlNodePtr)sym->ctype.base_type->aux, BAD_CAST "id"));
1951f5207b7SJohn Levon 		} else {
1961f5207b7SJohn Levon 			newProp(child, "base-type-builtin", base);
1971f5207b7SJohn Levon 		}
1981f5207b7SJohn Levon 	}
1991f5207b7SJohn Levon 	if (sym->array_size) {
2001f5207b7SJohn Levon 		/* TODO: modify get_expression_value to give error return */
2011f5207b7SJohn Levon 		array_size = get_expression_value(sym->array_size);
2021f5207b7SJohn Levon 		newNumProp(child, "array-size", array_size);
2031f5207b7SJohn Levon 	}
2041f5207b7SJohn Levon 
2051f5207b7SJohn Levon 
2061f5207b7SJohn Levon 	switch (sym->type) {
2071f5207b7SJohn Levon 	case SYM_STRUCT:
2081f5207b7SJohn Levon 	case SYM_UNION:
2091f5207b7SJohn Levon 		examine_members(sym->symbol_list, child);
2101f5207b7SJohn Levon 		break;
2111f5207b7SJohn Levon 	case SYM_FN:
2121f5207b7SJohn Levon 		examine_members(sym->arguments, child);
2131f5207b7SJohn Levon 		break;
2141f5207b7SJohn Levon 	case SYM_UNINITIALIZED:
2151f5207b7SJohn Levon 		newProp(child, "base-type-builtin", builtin_typename(sym));
2161f5207b7SJohn Levon 		break;
2171f5207b7SJohn Levon 	default:
2181f5207b7SJohn Levon 		break;
2191f5207b7SJohn Levon 	}
2201f5207b7SJohn Levon 	return;
2211f5207b7SJohn Levon }
2221f5207b7SJohn Levon 
get_expansion_end(struct token * token)2231f5207b7SJohn Levon static struct position *get_expansion_end (struct token *token)
2241f5207b7SJohn Levon {
2251f5207b7SJohn Levon 	struct token *p1, *p2;
2261f5207b7SJohn Levon 
2271f5207b7SJohn Levon 	for (p1=NULL, p2=NULL;
2281f5207b7SJohn Levon 	     !eof_token(token);
2291f5207b7SJohn Levon 	     p2 = p1, p1 = token, token = token->next);
2301f5207b7SJohn Levon 
2311f5207b7SJohn Levon 	if (p2)
2321f5207b7SJohn Levon 		return &(p2->pos);
2331f5207b7SJohn Levon 	else
2341f5207b7SJohn Levon 		return NULL;
2351f5207b7SJohn Levon }
2361f5207b7SJohn Levon 
examine_macro(struct symbol * sym,xmlNodePtr node)2371f5207b7SJohn Levon static void examine_macro(struct symbol *sym, xmlNodePtr node)
2381f5207b7SJohn Levon {
2391f5207b7SJohn Levon 	struct position *pos;
2401f5207b7SJohn Levon 
2411f5207b7SJohn Levon 	/* this should probably go in the main codebase*/
2421f5207b7SJohn Levon 	pos = get_expansion_end(sym->expansion);
2431f5207b7SJohn Levon 	if (pos)
2441f5207b7SJohn Levon 		sym->endpos = *pos;
2451f5207b7SJohn Levon 	else
2461f5207b7SJohn Levon 		sym->endpos = sym->pos;
2471f5207b7SJohn Levon 
2481f5207b7SJohn Levon 	new_sym_node(sym, "macro", node);
2491f5207b7SJohn Levon }
2501f5207b7SJohn Levon 
examine_namespace(struct symbol * sym)2511f5207b7SJohn Levon static void examine_namespace(struct symbol *sym)
2521f5207b7SJohn Levon {
2531f5207b7SJohn Levon 	if (sym->ident && sym->ident->reserved)
2541f5207b7SJohn Levon 		return;
2551f5207b7SJohn Levon 
2561f5207b7SJohn Levon 	switch(sym->namespace) {
2571f5207b7SJohn Levon 	case NS_MACRO:
2581f5207b7SJohn Levon 		examine_macro(sym, root_node);
2591f5207b7SJohn Levon 		break;
2601f5207b7SJohn Levon 	case NS_TYPEDEF:
2611f5207b7SJohn Levon 	case NS_STRUCT:
2621f5207b7SJohn Levon 	case NS_SYMBOL:
2631f5207b7SJohn Levon 		examine_symbol(sym, root_node);
2641f5207b7SJohn Levon 		break;
2651f5207b7SJohn Levon 	case NS_NONE:
2661f5207b7SJohn Levon 	case NS_LABEL:
2671f5207b7SJohn Levon 	case NS_ITERATOR:
2681f5207b7SJohn Levon 	case NS_UNDEF:
2691f5207b7SJohn Levon 	case NS_PREPROCESSOR:
2701f5207b7SJohn Levon 	case NS_KEYWORD:
2711f5207b7SJohn Levon 		break;
2721f5207b7SJohn Levon 	default:
2731f5207b7SJohn Levon 		die("Unrecognised namespace type %d",sym->namespace);
2741f5207b7SJohn Levon 	}
2751f5207b7SJohn Levon 
2761f5207b7SJohn Levon }
2771f5207b7SJohn Levon 
get_stream_id(const char * name)2781f5207b7SJohn Levon static int get_stream_id (const char *name)
2791f5207b7SJohn Levon {
2801f5207b7SJohn Levon 	int i;
2811f5207b7SJohn Levon 	for (i=0; i<input_stream_nr; i++) {
2821f5207b7SJohn Levon 		if (strcmp(name, stream_name(i))==0)
2831f5207b7SJohn Levon 			return i;
2841f5207b7SJohn Levon 	}
2851f5207b7SJohn Levon 	return -1;
2861f5207b7SJohn Levon }
2871f5207b7SJohn Levon 
examine_symbol_list(const char * file,struct symbol_list * list)2881f5207b7SJohn Levon static inline void examine_symbol_list(const char *file, struct symbol_list *list)
2891f5207b7SJohn Levon {
2901f5207b7SJohn Levon 	struct symbol *sym;
2911f5207b7SJohn Levon 	int stream_id = get_stream_id (file);
2921f5207b7SJohn Levon 
2931f5207b7SJohn Levon 	if (!list)
2941f5207b7SJohn Levon 		return;
2951f5207b7SJohn Levon 	FOR_EACH_PTR(list, sym) {
2961f5207b7SJohn Levon 		if (sym->pos.stream == stream_id)
2971f5207b7SJohn Levon 			examine_namespace(sym);
2981f5207b7SJohn Levon 	} END_FOR_EACH_PTR(sym);
2991f5207b7SJohn Levon }
3001f5207b7SJohn Levon 
main(int argc,char ** argv)3011f5207b7SJohn Levon int main(int argc, char **argv)
3021f5207b7SJohn Levon {
3031f5207b7SJohn Levon 	struct string_list *filelist = NULL;
3041f5207b7SJohn Levon 	struct symbol_list *symlist = NULL;
3051f5207b7SJohn Levon 	char *file;
3061f5207b7SJohn Levon 
3071f5207b7SJohn Levon 	doc = xmlNewDoc(BAD_CAST "1.0");
3081f5207b7SJohn Levon 	root_node = xmlNewNode(NULL, BAD_CAST "parse");
3091f5207b7SJohn Levon 	xmlDocSetRootElement(doc, root_node);
3101f5207b7SJohn Levon 
3111f5207b7SJohn Levon /* - A DTD is probably unnecessary for something like this
3121f5207b7SJohn Levon 
3131f5207b7SJohn Levon 	dtd = xmlCreateIntSubset(doc, "parse", "http://www.kernel.org/pub/software/devel/sparse/parse.dtd" NULL, "parse.dtd");
3141f5207b7SJohn Levon 
3151f5207b7SJohn Levon 	ns = xmlNewNs (root_node, "http://www.kernel.org/pub/software/devel/sparse/parse.dtd", NULL);
3161f5207b7SJohn Levon 
3171f5207b7SJohn Levon 	xmlSetNs(root_node, ns);
3181f5207b7SJohn Levon */
3191f5207b7SJohn Levon 	symlist = sparse_initialize(argc, argv, &filelist);
3201f5207b7SJohn Levon 
321*c85f09ccSJohn Levon 	FOR_EACH_PTR(filelist, file) {
3221f5207b7SJohn Levon 		examine_symbol_list(file, symlist);
3231f5207b7SJohn Levon 		sparse_keep_tokens(file);
3241f5207b7SJohn Levon 		examine_symbol_list(file, file_scope->symbols);
3251f5207b7SJohn Levon 		examine_symbol_list(file, global_scope->symbols);
326*c85f09ccSJohn Levon 	} END_FOR_EACH_PTR(file);
3271f5207b7SJohn Levon 
3281f5207b7SJohn Levon 
3291f5207b7SJohn Levon 	xmlSaveFormatFileEnc("-", doc, "UTF-8", 1);
3301f5207b7SJohn Levon 	xmlFreeDoc(doc);
3311f5207b7SJohn Levon 	xmlCleanupParser();
3321f5207b7SJohn Levon 
3331f5207b7SJohn Levon 	return 0;
3341f5207b7SJohn Levon }
335