1 files changed, 531 insertions, 0 deletions
diff --git a/src/zcompile/zlexer.l b/src/zcompile/zlexer.l
new file mode 100644
index 0000000..c6c01fb
--- /dev/null
+++ b/src/zcompile/zlexer.l
@@ -0,0 +1,531 @@
+%{
+/*!
+ * \file zlexer.l
+ *
+ * \author minor modifications by Jan Kadlec <jan.kadlec@nic.cz>,
+ *         most of the code by NLnet Labs
+ *         Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ *
+ * \brief lexical analyzer for (DNS) zone files.
+ *
+ * \addtogroup zoneparser
+ * @{
+ */
+
+/*
+ * Copyright (c) 2001-2011, NLnet Labs. All rights reserved.
+ *
+ * This software is open source.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * Neither the name of the NLNET LABS nor the names of its contributors may
+ * be used to endorse or promote products derived from this software without
+ * specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+//#include "common.h"
+
+#include <ctype.h>
+#include <errno.h>
+#include <string.h>
+#include <strings.h>
+#include <assert.h>
+
+#include "zcompile/zcompile.h"
+#include "libknot/dname.h"
+#include "zcompile/parser-descriptor.h"
+#include "zparser.h"
+
+#define YY_NO_INPUT
+
+/* Utils */
+extern void zc_error(const char *fmt, ...);
+extern void zc_warning(const char *fmt, ...);
+
+void strip_string(char *str)
+{
+	char *start = str;
+	char *end = str + strlen(str) - 1;
+
+	while (isspace(*start))
+		++start;
+	if (start > end) {
+		/* Completely blank. */
+		str[0] = '\0';
+	} else {
+		while (isspace(*end))
+			--end;
+		*++end = '\0';
+
+		if (str != start)
+			memmove(str, start, end - start + 1);
+	}
+}
+
+int hexdigit_to_int(char ch)
+{
+	switch (ch) {
+	case '0': return 0;
+	case '1': return 1;
+	case '2': return 2;
+	case '3': return 3;
+	case '4': return 4;
+	case '5': return 5;
+	case '6': return 6;
+	case '7': return 7;
+	case '8': return 8;
+	case '9': return 9;
+	case 'a': case 'A': return 10;
+	case 'b': case 'B': return 11;
+	case 'c': case 'C': return 12;
+	case 'd': case 'D': return 13;
+	case 'e': case 'E': return 14;
+	case 'f': case 'F': return 15;
+	default:
+		abort();
+	}
+}
+
+extern uint32_t strtottl(const char *nptr, const char **endptr);
+
+#define YY_NO_UNPUT
+#define MAXINCLUDES 10
+
+#define scanner yyscanner
+extern int zp_lex(YYSTYPE *lvalp, void *scanner);
+
+#if 0
+#define LEXOUT(s)  printf s /* used ONLY when debugging */
+#else
+#define LEXOUT(s)
+#endif
+
+enum lexer_state {
+	EXPECT_OWNER,
+	PARSING_OWNER,
+	PARSING_TTL_CLASS_TYPE,
+	PARSING_RDATA
+};
+
+static YY_BUFFER_STATE include_stack[MAXINCLUDES];
+static zparser_type zparser_stack[MAXINCLUDES];
+static int include_stack_ptr = 0;
+
+static void pop_parser_state(void *scanner);
+static void push_parser_state(FILE *input, void *scanner);
+static int parse_token(void *scanner, int token, char *in_str,
+                       enum lexer_state *lexer_state);
+
+
+/*!< \todo does not compile */
+#ifndef yy_set_bol // compat definition, for flex 2.4.6
+#define yy_set_bol(at_bol) \
+{ \
+	if (!yy_current_buffer ) \
+		yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \
+	yy_current_buffer->yy_ch_buf[0] = ((at_bol)?'\n':' '); \
+}
+#endif
+
+%}
+
+%option nounput
+%option reentrant bison-bridge
+%option prefix = "zp_"
+%option outfile = "lex.yy.c"
+
+SPACE   [ \t]
+LETTER  [a-zA-Z]
+NEWLINE [\n\r]
+ZONESTR [^ \t\n\r();.\"\$]
+DOLLAR  \$
+COMMENT ;
+DOT     \.
+BIT     [^\]\n]|\\.
+ANY     [^\"\n\\]|\\.
+
+%x	incl bitlabel quotedstring
+
+%%
+	static int paren_open = 0;
+	static enum lexer_state lexer_state = EXPECT_OWNER;
+
+{SPACE}*{COMMENT}.*	/* ignore */
+^{DOLLAR}TTL            { lexer_state = PARSING_RDATA; return DOLLAR_TTL; }
+^{DOLLAR}ORIGIN         { lexer_state = PARSING_RDATA; return DOLLAR_ORIGIN; }
+
+ /*
+  * Handle $INCLUDE directives.  See
+  * http://dinosaur.compilertools.net/flex/flex_12.html#SEC12.
+  */
+^{DOLLAR}INCLUDE        {
+	BEGIN(incl);
+}
+<incl>\n 		|
+<incl><<EOF>>		{
+	int error_occurred = parser->error_occurred;
+	BEGIN(INITIAL);
+	zc_error("missing file name in $INCLUDE directive");
+	yy_set_bol(1); /* Set beginning of line, so "^" rules match.  */
+	++parser->line;
+	parser->error_occurred = error_occurred;
+}
+<incl>.+ 		{
+	char *tmp;
+	/*! \todo pointer to origin. */
+	void *origin = parser->origin;
+	/* domain_type *origin = parser->origin; */
+	int error_occurred = parser->error_occurred;
+
+	BEGIN(INITIAL);
+	if (include_stack_ptr >= MAXINCLUDES ) {
+		zc_error("includes nested too deeply, skipped (>%d)",
+			 MAXINCLUDES);
+	} else {
+		FILE *input;
+
+		/* Remove trailing comment.  */
+		tmp = strrchr(yytext, ';');
+		if (tmp) {
+			*tmp = '\0';
+		}
+		strip_string(yytext);
+
+		/* Parse origin for include file.  */
+		tmp = strrchr(yytext, ' ');
+		if (!tmp) {
+			tmp = strrchr(yytext, '\t');
+		}
+		if (tmp) {
+			/* split the original yytext */
+			*tmp = '\0';
+			strip_string(yytext);
+
+			/*! \todo knot_dname_new_from_wire() (dname.h)
+			 *        which knot_node to pass as node?
+			 */
+			knot_dname_t *dname;
+			dname = knot_dname_new_from_wire((uint8_t*)tmp + 1,
+							   strlen(tmp + 1),
+							   NULL);
+			if (!dname) {
+				zc_error("incorrect include origin '%s'",
+					 tmp + 1);
+			} else {
+				/*! \todo insert to zonedb. */
+				/* origin = domain_table_insert(
+					parser->db->domains, dname); */
+			}
+		}
+
+		if (strlen(yytext) == 0) {
+			zc_error("missing file name in $INCLUDE directive");
+		} else if (!(input = fopen(yytext, "r"))) {
+			char ebuf[256];
+			zc_error("cannot open include file '%s': %s",
+				 yytext, strerror_r(errno, ebuf, sizeof(ebuf)));
+		} else {
+			/* Initialize parser for include file.  */
+			char *filename = strdup(yytext);
+			push_parser_state(input, scanner); /* Destroys yytext.  */
+			parser->filename = filename;
+			parser->line = 1;
+			parser->origin = origin;
+			lexer_state = EXPECT_OWNER;
+		}
+	}
+
+	parser->error_occurred = error_occurred;
+}
+<INITIAL><<EOF>>	{
+	yy_set_bol(1); /* Set beginning of line, so "^" rules match.  */
+	if (include_stack_ptr == 0) {
+	// from: http://stackoverflow.com/questions/1756275/bison-end-of-file
+		static int once = 0;
+		once++;
+		if (once > 1) {
+			yyterminate();
+		} else {
+			return NL;
+		}
+	} else {
+		fclose(yyin);
+		pop_parser_state(scanner);
+	}
+}
+^{DOLLAR}{LETTER}+	{ zc_warning("Unknown directive: %s", yytext); }
+{DOT}	{
+	LEXOUT((". "));
+	return parse_token(scanner, '.', yytext, &lexer_state);
+}
+@	{
+	LEXOUT(("@ "));
+	return parse_token(scanner, '@', yytext, &lexer_state);
+}
+\\#	{
+	LEXOUT(("\\# "));
+	return parse_token(scanner, URR, yytext, &lexer_state);
+}
+{NEWLINE}	{
+	++parser->line;
+	if (!paren_open) {
+		lexer_state = EXPECT_OWNER;
+		LEXOUT(("NL\n"));
+		return NL;
+	} else {
+		LEXOUT(("SP "));
+		return SP;
+	}
+}
+\(	{
+	if (paren_open) {
+		zc_error("nested parentheses");
+		yyterminate();
+	}
+	LEXOUT(("( "));
+	paren_open = 1;
+	return SP;
+}
+\)	{
+	if (!paren_open) {
+		zc_error("closing parentheses without opening parentheses");
+		yyterminate();
+	}
+	LEXOUT((") "));
+	paren_open = 0;
+	return SP;
+}
+{SPACE}+	{
+	if (!paren_open && lexer_state == EXPECT_OWNER) {
+		lexer_state = PARSING_TTL_CLASS_TYPE;
+		LEXOUT(("PREV "));
+		return PREV;
+	}
+	if (lexer_state == PARSING_OWNER) {
+		lexer_state = PARSING_TTL_CLASS_TYPE;
+	}
+	LEXOUT(("SP "));
+	return SP;
+}
+
+	/* Bitlabels.  Strip leading and ending brackets.  */
+\\\[			{ BEGIN(bitlabel); }
+<bitlabel><<EOF>>	{
+	zc_error("EOF inside bitlabel");
+	BEGIN(INITIAL);
+}
+<bitlabel>{BIT}*	{ yymore(); }
+<bitlabel>\n		{ ++parser->line; yymore(); }
+<bitlabel>\]		{
+	BEGIN(INITIAL);
+	yytext[yyleng - 1] = '\0';
+	return parse_token(scanner, BITLAB, yytext, &lexer_state);
+}
+
+	/* Quoted strings.  Strip leading and ending quotes.  */
+\"			{ BEGIN(quotedstring); LEXOUT(("\" ")); }
+<quotedstring><<EOF>> 	{
+	zc_error("EOF inside quoted string");
+	BEGIN(INITIAL);
+}
+<quotedstring>{ANY}*	{ LEXOUT(("STR ")); yymore(); }
+<quotedstring>\n 	{ ++parser->line; yymore(); }
+<quotedstring>\" {
+	LEXOUT(("\" "));
+	BEGIN(INITIAL);
+	yytext[yyleng - 1] = '\0';
+	return parse_token(scanner, STR, yytext, &lexer_state);
+}
+
+({ZONESTR}|\\.|\\\n)+ {
+	/* Any allowed word.  */
+	return parse_token(scanner, STR, yytext, &lexer_state);
+}
+. {
+	zc_error("unknown character '%c' (\\%03d) seen - is this a zonefile?",
+		 (int) yytext[0], (int) yytext[0]);
+}
+%%
+
+/*
+ * Analyze "word" to see if it matches an RR type, possibly by using
+ * the "TYPExxx" notation.  If it matches, the corresponding token is
+ * returned and the TYPE parameter is set to the RR type value.
+ */
+static int
+rrtype_to_token(const char *word, uint16_t *type)
+{
+	uint16_t t = parser_rrtype_from_string(word);
+	if (t != 0) {
+		parser_rrtype_descriptor_t *entry = 0;
+		entry = parser_rrtype_descriptor_by_type(t);
+		*type = t;
+
+		/*! \todo entry should return associated token.
+			  see nsd/dns.c */
+		return entry->token;
+	}
+
+	return 0;
+}
+
+
+/*
+ * Remove \DDD constructs from the input. See RFC 1035, section 5.1.
+ */
+static size_t
+zoctet(char *text)
+{
+	/*
+	 * s follows the string, p lags behind and rebuilds the new
+	 * string
+	 */
+	char *s;
+	char *p;
+
+	for (s = p = text; *s; ++s, ++p) {
+		assert(p <= s);
+		if (s[0] != '\\') {
+			/* Ordinary character.  */
+			*p = *s;
+		} else if (isdigit((int)s[1]) && isdigit((int)s[2]) &&  isdigit((int)s[3])) {
+			/* \DDD escape.  */
+			int val = (hexdigit_to_int(s[1]) * 100 +
+				   hexdigit_to_int(s[2]) * 10 +
+				   hexdigit_to_int(s[3]));
+			if (0 <= val && val <= 255) {
+				s += 3;
+				*p = val;
+			} else {
+				zc_warning("text escape \\DDD overflow");
+				*p = *++s;
+			}
+		} else if (s[1] != '\0') {
+			/* \X where X is any character, keep X.  */
+			*p = *++s;
+		} else {
+			/* Trailing backslash, ignore it.  */
+			zc_warning("trailing backslash ignored");
+			--p;
+		}
+	}
+	*p = '\0';
+	return p - text;
+}
+
+static int parse_token(void *scanner, int token, char *in_str,
+                       enum lexer_state *lexer_state)
+{
+	size_t len = 0;
+	char *str = NULL;
+
+	struct yyguts_t *yyg = (struct yyguts_t *)scanner;
+
+	if (*lexer_state == EXPECT_OWNER) {
+		*lexer_state = PARSING_OWNER;
+	} else if (*lexer_state == PARSING_TTL_CLASS_TYPE) {
+		const char *t;
+		int token;
+		uint16_t rrclass;
+
+		/* type */
+		token = rrtype_to_token(in_str, &yylval->type);
+		if (token != 0) {
+			*lexer_state = PARSING_RDATA;
+			LEXOUT(("%d[%s] ", token, in_str));
+			return token;
+		}
+
+		/* class */
+		rrclass = parser_rrclass_from_string(in_str);
+		if (rrclass != 0) {
+			yylval->rclass = rrclass;
+			LEXOUT(("CLASS "));
+			return T_RRCLASS;
+		}
+
+		/* ttl */
+		yylval->ttl = strtottl(in_str, &t);
+		if (*t == '\0') {
+			LEXOUT(("TTL "));
+			return T_TTL;
+		}
+	}
+
+	str = strdup(yytext);
+	if (str == NULL) {
+		/* Out of memory */
+		ERR_ALLOC_FAILED;
+		return NO_MEM;
+	}
+	len = zoctet(str);
+
+	yylval->data.str = str;
+	assert(yylval->data.str != NULL);
+	yylval->data.len = len;
+
+	if (strcmp(yytext, ".") == 0) {
+		free(str);
+		yylval->data.str=".";
+	} else if (strcmp(str, "@") == 0) {
+		free(str);
+		yylval->data.str="@";
+	} else if (strcmp(str, "\\#") == 0) {
+		free(str);
+		yylval->data.str="\\#";
+	}
+
+	LEXOUT(("%d[%s] ", token, in_str));
+	return token;
+}
+
+/*
+ * Saves the file specific variables on the include stack.
+ */
+static void push_parser_state(FILE *input, void *scanner)
+{
+	struct yyguts_t *yyg = (struct yyguts_t *)scanner;
+	zparser_stack[include_stack_ptr].filename = parser->filename;
+	zparser_stack[include_stack_ptr].line = parser->line;
+	zparser_stack[include_stack_ptr].origin = parser->origin;
+	include_stack[include_stack_ptr] = YY_CURRENT_BUFFER;
+	yy_switch_to_buffer(yy_create_buffer(input, YY_BUF_SIZE, scanner),
+	                    scanner);
+	++include_stack_ptr;
+}
+
+/*
+ * Restores the file specific variables from the include stack.
+ */
+void pop_parser_state(void *scanner)
+{
+	struct yyguts_t *yyg = (struct yyguts_t *)scanner;
+	--include_stack_ptr;
+	parser->filename = zparser_stack[include_stack_ptr].filename;
+	parser->line = zparser_stack[include_stack_ptr].line;
+	parser->origin = zparser_stack[include_stack_ptr].origin;
+	yy_delete_buffer(YY_CURRENT_BUFFER, scanner);
+	yy_switch_to_buffer(include_stack[include_stack_ptr], scanner);
+}