From 80b5233e184625c9720c435490c464a21aff2cad Mon Sep 17 00:00:00 2001 From: "ben.franksen" <ben.franksen@online.de> Date: Wed, 10 Mar 2010 01:45:50 +0000 Subject: [PATCH] replaced lex by re2c for lexer generation; almost working --- src/snc/Makefile | 12 +- src/snc/parse.c | 5 +- src/snc/snc_lex.l | 17 +- src/snc/snc_main.c | 81 ++------- src/snc/snc_main.h | 6 +- src/snc/snl.lem | 296 +++++++++++++++------------------ src/snc/snl.re | 407 +++++++++++++++++++++++++++++++++++++++++++++ 7 files changed, 583 insertions(+), 241 deletions(-) create mode 100644 src/snc/snl.re diff --git a/src/snc/Makefile b/src/snc/Makefile index 6b6182a2..b9245db8 100644 --- a/src/snc/Makefile +++ b/src/snc/Makefile @@ -4,13 +4,14 @@ include $(TOP)/configure/CONFIG #---------------------------------------- # ADD MACRO DEFINITIONS AFTER THIS LINE -#USE_LEMON = 1 +USE_LEMON = 1 # Internal debug control #USR_CPPFLAGS = -DDEBUG # YACC options (generate y.tab.h; generate y.output) YACCOPT = -d -v +LEXOPT += -s -d # snc product PROD_HOST = snc @@ -21,7 +22,8 @@ endif snc_LIBS += $(EPICS_BASE_IOC_LIBS) ifdef USE_LEMON USR_CFLAGS += -DUSE_LEMON -snc_SRCS += snc_lex.l snl.c +snc_SRCS += snl.c +snc_SRCS += snl_lex.re else snc_SRCS += snc.c endif @@ -66,7 +68,11 @@ snl.c snl.h: snl.lem snl.lt $(LEMON) %.lt: ../%.lt $(CP) $< . -snc_lex$(OBJ) snc_lex.c: snl.h +vpath %.re . .. + +snl_lex.c: snl.re snl.h + re2c -d -o $@ $< + else snc.c: snc_lex.c endif diff --git a/src/snc/parse.c b/src/snc/parse.c index 7780aff6..7ef71c00 100644 --- a/src/snc/parse.c +++ b/src/snc/parse.c @@ -854,10 +854,7 @@ Expr *expression( ep->value = value; ep->left = left; ep->right = right; - if (type == E_TEXT) - ep->line_num = globals->c_line_num; - else - ep->line_num = globals->line_num; + ep->line_num = globals->prev_line_num; ep->src_file = globals->src_file; return ep; diff --git a/src/snc/snc_lex.l b/src/snc/snc_lex.l index 1544e7fa..5dcb6be2 100644 --- a/src/snc/snc_lex.l +++ b/src/snc/snc_lex.l @@ -51,13 +51,22 @@ #define TRUE 1 #define FALSE 0 #endif -/*#define DEBUG 1*/ + #ifdef DEBUG -#define RETURN(param) { fprintf(stderr, "return(" #param ")\n"); \ - return(param); } +#define RETURN(param) {\ + fprintf(stderr, "return(" #param ")\n");\ + globals->prev_line_num = tok_line_num;\ + tok_line_num = globals->line_num;\ + return(param);\ + } #else -#define RETURN(param) return(param) +#define RETURN(param) {\ + globals->prev_line_num = tok_line_num;\ + tok_line_num = globals->line_num;\ + return(param);\ + } #endif + #define STR_BFR_SIZE 1000 static char strBfr[STR_BFR_SIZE]; /* holding place for strings */ diff --git a/src/snc/snc_main.c b/src/snc/snc_main.c index c508d791..fa59964e 100644 --- a/src/snc/snc_main.c +++ b/src/snc/snc_main.c @@ -39,7 +39,7 @@ extern char *sncVersion; /* snc version and date created */ -extern void compile(void); /* defined in snc.y */ +extern void compile(void); /* defined in snl.re */ static Options default_options = { @@ -70,25 +70,10 @@ static void get_in_file(char *s); static void get_out_file(char *s); static void print_usage(void); -/*+************************************************************************ -* NAME: main -* -* CALLING SEQUENCE -* type argument I/O description -* ------------------------------------------------------------- -* int argc I arg count -* char *argv[] I array of ptrs to args -* -* RETURNS: n/a -* -* FUNCTION: Program entry. -* -* NOTES: The streams stdin and stdout are redirected to files named in the -* command parameters. This accomodates the use by lex of stdin for input -* and permits printf() to be used for output. Stderr is not redirected. -* -* This routine calls yyparse(), which never returns. -*-*************************************************************************/ +/* The streams stdin and stdout are redirected to files named in the + command parameters. This accomodates the use by lex of stdin for input + and permits printf() to be used for output. +*/ int main(int argc, char *argv[]) { FILE *infp, *outfp; @@ -126,51 +111,9 @@ int main(int argc, char *argv[]) return 0; /* never reached */ } -#ifdef USE_LEMON -#include "token.h" - -extern void parser( - void *yyp, /* The parser */ - int yymajor, /* The major token code number */ - Token yyminor, /* The value for the token */ - int line_num -); -extern void *parserAlloc(void *(*mallocProc)(size_t)); -void parserFree( - void *p, /* The parser to be deleted */ - void (*freeProc)(void*) /* Function used to reclaim memory */ -); - -void compile(void) -{ - int tok; - void *pParser = parserAlloc(malloc); - do - { - tok = yylex(); - parser(pParser, tok, yylval, globals->c_line_num); - } - while (tok); - parserFree(pParser, free); -} -#endif - -/*+************************************************************************ -* NAME: get_args -* -* CALLING SEQUENCE -* type argument I/O description -* ----------------------------------------------------------- -* int argc I number of arguments -* char *argv[] I shell command arguments -* RETURNS: n/a -* -* FUNCTION: Get the shell command arguments. -* -* NOTES: If "*.s" is input file then "*.c" is the output file. Otherwise, -* ".c" is appended to the input file to form the output file name. -* Sets the globals in_file[] and out_file[]. -*-*************************************************************************/ +/* If "*.s" is input file then "*.c" is the output file. Otherwise, + ".c" is appended to the input file to form the output file name. + Sets the globals in_file[] and out_file[]. */ static void get_args(int argc, char *argv[]) { char *s; @@ -331,8 +274,10 @@ void parse_error(const char *format, ...) report_location(globals->src_file, globals->line_num); va_start(args, format); - report(format, args); + vfprintf(stderr, format, args); va_end(args); + + fprintf(stderr, "\n"); } void report_location(const char *src_file, int line_num) @@ -348,8 +293,10 @@ void report_with_location( report_location(src_file, line_num); va_start(args, format); - report(format, args); + vfprintf(stderr, format, args); va_end(args); + + fprintf(stderr, "\n"); } void report(const char *format, ...) diff --git a/src/snc/snc_main.h b/src/snc/snc_main.h index c12a5f12..44580948 100644 --- a/src/snc/snc_main.h +++ b/src/snc/snc_main.h @@ -17,9 +17,9 @@ typedef struct options Options; struct globals { - char *src_file; /* ptr to (effective) source file name */ - int line_num; /* current src file and line number */ - int c_line_num; /* current src file and line number for c code */ + char *src_file; /* current source file name */ + int line_num; /* current line number */ + int prev_line_num; /* line number for previous token */ Options *options; /* compile & run-time options */ }; typedef struct globals Globals; diff --git a/src/snc/snl.lem b/src/snc/snl.lem index bac19648..089673ea 100644 --- a/src/snc/snl.lem +++ b/src/snc/snl.lem @@ -1,14 +1,15 @@ %include { #include <stdlib.h> #include <string.h> -#include "token.h" +#include <assert.h> + +#include "parse.h" #include "snc_main.h" -#include "assert.h" } %name parser -%extra_argument { int line_num } +// %extra_argument { Globals *globals } %parse_failure { parse_error("giving up"); @@ -20,10 +21,10 @@ exit(1); } -%token_type { Token } +%token_type { char* } %default_type { Expr* } -/* +/* Standard C operator table Primary Expression Operators () [] . -> expr++ expr-- left-to-right Unary Operators * & + - ! ~ ++expr --expr (typecast) sizeof() right-to-left Binary Operators * / % left-to-right @@ -41,52 +42,49 @@ Comma , left-to-right */ -/* PRE_OP and POST_OP are pseudo tokens, only for precedence */ +// PRE and POST are pseudo tokens, they only for the +// precedence declaration. // We do not support the comma operator, except // in for(;;), where it is built-in. // %left COMMA. -%right EQUAL PLUS_EQUAL MINUS_EQUAL AND_EQUAL OR_EQUAL - DIV_EQUAL MULT_EQUAL MODULO_EQUAL LEFT_EQUAL RIGHT_EQUAL XOR_EQUAL. +%right EQUAL ADDEQ SUBEQ ANDEQ OREQ + DIVEQ MULEQ MODEQ LSHEQ RSHEQ XOREQ. %right QUESTION COLON. -%left OR. -%left AND. -%left BIT_OR. -%left BIT_XOR. +%left OROR. +%left ANDAND. +%left VBAR. +%left CARET. %left AMPERSAND. %left EQ NE. %left GT GE LE LT. -%left L_SHIFT R_SHIFT. -%left PLUS MINUS. -%left ASTERISK SLASH MODULO. -%right NOT AUTO_INCR AUTO_DECR PRE_OP. -%left L_SQ_BRACKET R_SQ_BRACKET POINTER PERIOD POST_OP. -// As we do not support indirect calls, L_PAREN R_PAREN are not listed here */ - -%nonassoc BAD_CHAR DEBUG_PRINT. +%left LSHIFT RSHIFT. +%left ADD SUB. +%left ASTERISK SLASH MOD. +%right NOT INCR DECR PRE. +%left LBRACKET RBRACKET POINTER PERIOD POST. +// LPAREN RPAREN not listed, we do not support indirect calls. */ program ::= -// pp_codes program_name(pn) program_param(pp) definitions(ds) global_entry_code(en) state_sets(ss) global_exit_code(ex) -// pp_codes c_codes(cc). { program(pn,pp,ds,en,ss,ex,cc); } %type program_name {char*} -program_name(p) ::= PROGRAM NAME(n). { p = n.str; } +program_name(p) ::= PROGRAM NAME(n). { p = n; } %type program_param {char*} program_param(p) ::= - L_PAREN STRING(s) R_PAREN. { p = s.str; } -program_param(p) ::= . { p = ""; } + LPAREN STRCON(x) RPAREN. { p = x; } +program_param(p) ::= . { p = 0; } definitions(p) ::= definitions(xs) definition(x). { p = link_expr(xs, x); @@ -100,41 +98,37 @@ definition(p) ::= syncq(x). { p = x; } definition(p) ::= decl(x). { p = x; } definition(p) ::= option. { p = 0; } definition(p) ::= c_code(x). { p = x; } -// definition(p) ::= pp_code. { p = 0; } - -// definition(p) ::= error SEMI_COLON. { p = 0; snc_err("expected definition"); } -assign(p) ::= ASSIGN NAME(v) to STRING(t) SEMI_COLON. { - p = expression(E_ASSIGN, v.str, 0, expression(E_STRING, t.str, 0, 0)); +assign(p) ::= ASSIGN NAME(v) to string(t) SEMICOLON. { + p = expression(E_ASSIGN, v, 0, t); } -assign(p) ::= ASSIGN NAME(v) subscript(s) to STRING(t) SEMI_COLON. { - p = expression(E_ASSIGN, v.str, expression(E_CONST, s, 0, 0), - expression(E_STRING, t.str, 0, 0)); +assign(p) ::= ASSIGN NAME(v) subscript(s) to string(t) SEMICOLON. { + p = expression(E_ASSIGN, v, expression(E_CONST, s, 0, 0), t); } -assign(p) ::= ASSIGN NAME(v) to L_BRACKET strings(ss) R_BRACKET SEMI_COLON. { - p = expression(E_ASSIGN, v.str, 0, ss); +assign(p) ::= ASSIGN NAME(v) to LBRACE strings(ss) RBRACE SEMICOLON. { + p = expression(E_ASSIGN, v, 0, ss); } -strings(p) ::= strings(xs) COMMA STRING(x). { - p = link_expr(xs, expression(E_STRING, x.str, 0, 0)); +strings(p) ::= strings(xs) COMMA string(x). { + p = link_expr(xs, x); } -strings(p) ::= STRING(x). { - p = expression(E_STRING, x.str, 0, 0); +strings(p) ::= string(x). { + p = x; } -monitor(p) ::= MONITOR NAME(v) opt_subscript(s) SEMI_COLON. { - p = expression(E_MONITOR, v.str, s, 0); +monitor(p) ::= MONITOR NAME(v) opt_subscript(s) SEMICOLON. { + p = expression(E_MONITOR, v, s, 0); } -sync(p) ::= SYNC NAME(v) opt_subscript(s) to NAME(ev) SEMI_COLON. { - p = expression(E_SYNC, v.str, s, expression(E_X, ev.str, 0, 0)); +sync(p) ::= SYNC NAME(v) opt_subscript(s) to NAME(ev) SEMICOLON. { + p = expression(E_SYNC, v, s, expression(E_X, ev, 0, 0)); } -syncq(p) ::= SYNCQ NAME(v) opt_subscript(s) to NAME(ev) syncq_size(n) SEMI_COLON. { - p = expression(E_SYNCQ, v.str, s, expression(E_X, ev.str, n, 0)); +syncq(p) ::= SYNCQ NAME(v) opt_subscript(s) to NAME(ev) syncq_size(n) SEMICOLON. { + p = expression(E_SYNCQ, v, s, expression(E_X, ev, n, 0)); } -syncq_size(p) ::= INTNUM(n). { p = expression(E_CONST, n.str, 0, 0); } +syncq_size(p) ::= INTCON(n). { p = expression(E_CONST, n, 0, 0); } syncq_size(p) ::= . { p = 0; } to ::= TO. @@ -144,24 +138,24 @@ opt_subscript(p) ::= subscript(s). { p = expression(E_CONST, s, 0, 0); } opt_subscript(p) ::= . { p = 0; } %type subscript {char*} -subscript(p) ::= L_SQ_BRACKET INTNUM(n) R_SQ_BRACKET. { p = n.str; } - -decl(p) ::= type(t) NAME(v) SEMI_COLON. - { p = declaration(t, VC_SIMPLE, v.str, NULL, NULL, NULL); } -decl(p) ::= type(t) NAME(v) EQUAL number(n) SEMI_COLON. - { p = declaration(t, VC_SIMPLE, v.str, NULL, NULL, n); } -decl(p) ::= type(t) NAME(v) subscript(s) SEMI_COLON. - { p = declaration(t, VC_ARRAY1, v.str, s, NULL, NULL); } -decl(p) ::= type(t) NAME(v) subscript(s1) subscript(s2) SEMI_COLON. - { p = declaration(t, VC_ARRAY2, v.str, s1, s2, NULL); } -decl(p) ::= type(t) ASTERISK NAME(v) SEMI_COLON. - { p = declaration(t, VC_POINTER, v.str, NULL, NULL, NULL); } -decl(p) ::= type(t) ASTERISK NAME(v) subscript(s) SEMI_COLON. - { p = declaration(t, VC_ARRAYP, v.str, s, NULL, NULL); } +subscript(p) ::= LBRACKET INTCON(n) RBRACKET. { p = n; } + +decl(p) ::= type(t) NAME(v) SEMICOLON. + { p = declaration(t, VC_SIMPLE, v, NULL, NULL, NULL); } +decl(p) ::= type(t) NAME(v) EQUAL number(n) SEMICOLON. + { p = declaration(t, VC_SIMPLE, v, NULL, NULL, n); } +decl(p) ::= type(t) NAME(v) subscript(s) SEMICOLON. + { p = declaration(t, VC_ARRAY1, v, s, NULL, NULL); } +decl(p) ::= type(t) NAME(v) subscript(s1) subscript(s2) SEMICOLON. + { p = declaration(t, VC_ARRAY2, v, s1, s2, NULL); } +decl(p) ::= type(t) ASTERISK NAME(v) SEMICOLON. + { p = declaration(t, VC_POINTER, v, NULL, NULL, NULL); } +decl(p) ::= type(t) ASTERISK NAME(v) subscript(s) SEMICOLON. + { p = declaration(t, VC_ARRAYP, v, s, NULL, NULL); } %type number {char*} -number(p) ::= INTNUM(x). { p = x.str; } -number(p) ::= FPNUM(x). { p = x.str; } +number(p) ::= INTCON(x). { p = x; } +number(p) ::= FPCON(x). { p = x; } %type type {int} type(p) ::= CHAR. { p = V_CHAR; } @@ -174,12 +168,11 @@ type(p) ::= UNSIGNED INT. { p = V_UINT; } type(p) ::= UNSIGNED LONG. { p = V_ULONG; } type(p) ::= FLOAT. { p = V_FLOAT; } type(p) ::= DOUBLE. { p = V_DOUBLE; } -type(p) ::= STRING_DECL. { p = V_STRING; } +type(p) ::= STRING. { p = V_STRING; } type(p) ::= EVFLAG. { p = V_EVFLAG; } -/* option +/-<option>; e.g. option +a; */ -option ::= OPTION PLUS NAME(n) SEMI_COLON. { option_stmt(n.str, 1); } -option ::= OPTION MINUS NAME(n) SEMI_COLON. { option_stmt(n.str, 0); } +option ::= OPTION ADD NAME(n) SEMICOLON. { option_stmt(n, 1); } +option ::= OPTION SUB NAME(n) SEMICOLON. { option_stmt(n, 0); } // State sets and states @@ -192,26 +185,25 @@ global_exit_code(p) ::= . { p = 0; } state_sets(p) ::= state_sets(xs) state_set(x). { p = link_expr(xs, x); } state_sets(p) ::= state_set(x). { p = x; } -state_set(p) ::= STATE_SET NAME(n) L_BRACKET states(xs) R_BRACKET. { - p = expression(E_SS, n.str, xs, 0); +state_set(p) ::= SS NAME(n) LBRACE states(xs) RBRACE. { + p = expression(E_SS, n, xs, 0); } -// state_set(p) ::= pp_code. { p = 0; } states(p) ::= states(xs) state(x). { p = link_expr(xs, x); } states(p) ::= state(x). { p = x; } -state(p) ::= STATE NAME(n) L_BRACKET state_options(os) state_blocks(xs) R_BRACKET. - { p = expression(E_STATE, n.str, xs, os); } +state(p) ::= STATE NAME(n) LBRACE state_options(os) state_blocks(xs) RBRACE. + { p = expression(E_STATE, n, xs, os); } state_options(p) ::= state_options(xs) state_option(x). { p = link_expr(xs, x); } state_options(p) ::= . { p = 0; } -state_option(p) ::= OPTION state_option_value(v) NAME(n) SEMI_COLON. - { p = expression(E_OPTION, n.str, v, 0); } +state_option(p) ::= OPTION state_option_value(v) NAME(n) SEMICOLON. + { p = expression(E_OPTION, n, v, 0); } -state_option_value(p) ::= PLUS. { p = expression(E_X, "+", 0, 0); } -state_option_value(p) ::= MINUS. { p = expression(E_X, "-", 0, 0); } +state_option_value(p) ::= ADD. { p = expression(E_X, "+", 0, 0); } +state_option_value(p) ::= SUB. { p = expression(E_X, "-", 0, 0); } state_blocks(p) ::= entries(xs) transitions(ts) exits(ys). { p = link_expr(link_expr(xs, ts), ys); } @@ -229,88 +221,85 @@ exit(p) ::= EXIT block(xs). { p = expression(E_EXIT, "", 0, xs); } transitions(p) ::= transitions(xs) transition(x). { p = link_expr(xs, x); } transitions(p) ::= transition(x). { p = x; } -transition(p) ::= WHEN L_PAREN opt_expr(c) R_PAREN block(xs) STATE NAME(n). - { p = expression(E_WHEN, n.str, c, xs); } +transition(p) ::= WHEN LPAREN opt_expr(c) RPAREN block(xs) STATE NAME(n). + { p = expression(E_WHEN, n, c, xs); } -block(p) ::= L_BRACKET statements(xs) R_BRACKET.{ p = xs; } +block(p) ::= LBRACE statements(xs) RBRACE. { p = xs; } // Statements statements(p) ::= statements(xs) statement(x). { p = link_expr(xs, x); } statements(p) ::= . { p = 0; } -// statement(p) ::= pp_code. { p = 0; } -statement(p) ::= BREAK SEMI_COLON. { p = expression(E_BREAK, "", 0, 0); } +statement(p) ::= BREAK SEMICOLON. { p = expression(E_BREAK, "", 0, 0); } statement(p) ::= c_code(x). { p = x; } statement(p) ::= block(xs). { p = expression(E_CMPND, "",xs, 0); } -statement(p) ::= IF L_PAREN expr(c) R_PAREN statement(x). +statement(p) ::= IF LPAREN expr(c) RPAREN statement(x). { p = expression(E_IF, "", c, x); } statement(p) ::= ELSE statement(x). { p = expression(E_ELSE, "", x, 0); } -statement(p) ::= WHILE L_PAREN expr(c) R_PAREN statement(x). +statement(p) ::= WHILE LPAREN expr(c) RPAREN statement(x). { p = expression(E_WHILE, "", c, x); } statement(p) ::= for_statement(x). { p = x; } -statement(p) ::= opt_expr(x) SEMI_COLON. { p = expression(E_STMT, "", x, 0); } +statement(p) ::= opt_expr(x) SEMICOLON. { p = expression(E_STMT, "", x, 0); } for_statement(p) ::= - FOR L_PAREN - exprs(init) SEMI_COLON opt_expr(cond) SEMI_COLON exprs(iter) - R_PAREN statement(x). { p = expression(E_FOR, "", + FOR LPAREN + exprs(init) SEMICOLON opt_expr(cond) SEMICOLON exprs(iter) + RPAREN statement(x). { p = expression(E_FOR, "", expression(E_X, "", init, cond), expression(E_X, "", iter, x)); } // Expressions - // Atomic expr(p) ::= number(x). { p = expression(E_CONST, x, 0, 0); } -expr(p) ::= STRING(x). { p = expression(E_STRING, x.str, 0, 0); } -expr(p) ::= NAME(v). { p = expression(E_VAR, v.str, 0, 0); } +expr(p) ::= string(x). { p = x; } +expr(p) ::= NAME(v). { p = expression(E_VAR, v, 0, 0); } // Parenthesized -expr(p) ::= L_PAREN expr(x) R_PAREN. { p = expression(E_PAREN, "", x, 0); } +expr(p) ::= LPAREN expr(x) RPAREN. { p = expression(E_PAREN, "", x, 0); } // Primary Expression and Unary Postfix Operators -expr(p) ::= NAME(f) L_PAREN exprs(xs) R_PAREN. [POST_OP] - { p = expression(E_FUNC, f.str, xs, 0); } -expr(p) ::= EXIT L_PAREN exprs(xs) R_PAREN. [POST_OP] - { p = expression(E_FUNC, "exit", xs, 0); } -expr(p) ::= expr(x) L_SQ_BRACKET expr(y) R_SQ_BRACKET. [POST_OP] +expr(p) ::= NAME(f) LPAREN exprs(xs) RPAREN. [POST] + { p = expression(E_FUNC, f, xs, 0); } +expr(p) ::= EXIT LPAREN exprs(xs) RPAREN. [POST]{ p = expression(E_FUNC, "exit", xs, 0); } +expr(p) ::= expr(x) LBRACKET expr(y) RBRACKET. [POST] { p = expression(E_SUBSCR, "", x, y); } -expr(p) ::= expr(x) PERIOD expr(y). [POST_OP] { p = expression(E_BINOP, "." , x, y); } -expr(p) ::= expr(x) POINTER expr(y). [POST_OP] { p = expression(E_BINOP, "->", x, y); } -expr(p) ::= expr(x) AUTO_INCR. [POST_OP] { p = expression(E_POST, "++", x, 0); } -expr(p) ::= expr(x) AUTO_DECR. [POST_OP] { p = expression(E_POST, "--", x, 0); } +expr(p) ::= expr(x) PERIOD expr(y). [POST] { p = expression(E_BINOP, "." , x, y); } +expr(p) ::= expr(x) POINTER expr(y). [POST] { p = expression(E_BINOP, "->", x, y); } +expr(p) ::= expr(x) INCR. [POST] { p = expression(E_POST, "++", x, 0); } +expr(p) ::= expr(x) DECR. [POST] { p = expression(E_POST, "--", x, 0); } // Unary Prefix Operators -expr(p) ::= PLUS expr(x). [PRE_OP] { p = expression(E_PRE, "+", x, 0); } -expr(p) ::= MINUS expr(x). [PRE_OP] { p = expression(E_PRE, "-", x, 0); } -expr(p) ::= ASTERISK expr(x). [PRE_OP] { p = expression(E_PRE, "*", x, 0); } -expr(p) ::= AMPERSAND expr(x). [PRE_OP] { p = expression(E_PRE, "&", x, 0); } -expr(p) ::= NOT expr(x). [PRE_OP] { p = expression(E_PRE, "!", x, 0); } -expr(p) ::= COMPLEMENT expr(x). [PRE_OP] { p = expression(E_PRE, "~", x, 0); } -expr(p) ::= AUTO_INCR expr(x). [PRE_OP] { p = expression(E_PRE, "++", x, 0); } -expr(p) ::= AUTO_DECR expr(x). [PRE_OP] { p = expression(E_PRE, "--", x, 0); } +expr(p) ::= ADD expr(x). [PRE] { p = expression(E_PRE, "+", x, 0); } +expr(p) ::= SUB expr(x). [PRE] { p = expression(E_PRE, "-", x, 0); } +expr(p) ::= ASTERISK expr(x). [PRE] { p = expression(E_PRE, "*", x, 0); } +expr(p) ::= AMPERSAND expr(x). [PRE] { p = expression(E_PRE, "&", x, 0); } +expr(p) ::= NOT expr(x). [PRE] { p = expression(E_PRE, "!", x, 0); } +expr(p) ::= TILDE expr(x). [PRE] { p = expression(E_PRE, "~", x, 0); } +expr(p) ::= INCR expr(x). [PRE] { p = expression(E_PRE, "++", x, 0); } +expr(p) ::= DECR expr(x). [PRE] { p = expression(E_PRE, "--", x, 0); } // Binary Operators, left-to-right -expr(p) ::= expr(x) MINUS expr(y). { p = expression(E_BINOP, "-", x, y); } -expr(p) ::= expr(x) PLUS expr(y). { p = expression(E_BINOP, "+", x, y); } -expr(p) ::= expr(x) ASTERISK expr(y). { p = expression(E_BINOP, "*", x, y); } -expr(p) ::= expr(x) SLASH expr(y). { p = expression(E_BINOP, "/", x, y); } -expr(p) ::= expr(x) GT expr(y). { p = expression(E_BINOP, ">", x, y); } -expr(p) ::= expr(x) GE expr(y). { p = expression(E_BINOP, ">=", x, y); } -expr(p) ::= expr(x) EQ expr(y). { p = expression(E_BINOP, "==", x, y); } -expr(p) ::= expr(x) NE expr(y). { p = expression(E_BINOP, "!=", x, y); } -expr(p) ::= expr(x) LE expr(y). { p = expression(E_BINOP, "<=", x, y); } -expr(p) ::= expr(x) LT expr(y). { p = expression(E_BINOP, "<" , x, y); } -expr(p) ::= expr(x) OR expr(y). { p = expression(E_BINOP, "||", x, y); } -expr(p) ::= expr(x) AND expr(y). { p = expression(E_BINOP, "&&", x, y); } -expr(p) ::= expr(x) L_SHIFT expr(y). { p = expression(E_BINOP, "<<", x, y); } -expr(p) ::= expr(x) R_SHIFT expr(y). { p = expression(E_BINOP, ">>", x, y); } -expr(p) ::= expr(x) BIT_OR expr(y). { p = expression(E_BINOP, "|", x, y); } -expr(p) ::= expr(x) BIT_XOR expr(y). { p = expression(E_BINOP, "^", x, y); } -expr(p) ::= expr(x) AMPERSAND expr(y). { p = expression(E_BINOP, "&", x, y); } -expr(p) ::= expr(x) MODULO expr(y). { p = expression(E_BINOP, "%", x, y); } +expr(p) ::= expr(x) SUB expr(y). { p = expression(E_BINOP, "-", x, y); } +expr(p) ::= expr(x) ADD expr(y).{ p = expression(E_BINOP, "+", x, y); } +expr(p) ::= expr(x) ASTERISK expr(y).{ p = expression(E_BINOP, "*", x, y); } +expr(p) ::= expr(x) SLASH expr(y).{ p = expression(E_BINOP, "/", x, y); } +expr(p) ::= expr(x) GT expr(y).{ p = expression(E_BINOP, ">", x, y); } +expr(p) ::= expr(x) GE expr(y).{ p = expression(E_BINOP, ">=", x, y); } +expr(p) ::= expr(x) EQ expr(y).{ p = expression(E_BINOP, "==", x, y); } +expr(p) ::= expr(x) NE expr(y).{ p = expression(E_BINOP, "!=", x, y); } +expr(p) ::= expr(x) LE expr(y).{ p = expression(E_BINOP, "<=", x, y); } +expr(p) ::= expr(x) LT expr(y).{ p = expression(E_BINOP, "<" , x, y); } +expr(p) ::= expr(x) OROR expr(y).{ p = expression(E_BINOP, "||", x, y); } +expr(p) ::= expr(x) ANDAND expr(y).{ p = expression(E_BINOP, "&&", x, y); } +expr(p) ::= expr(x) LSHIFT expr(y).{ p = expression(E_BINOP, "<<", x, y); } +expr(p) ::= expr(x) RSHIFT expr(y).{ p = expression(E_BINOP, ">>", x, y); } +expr(p) ::= expr(x) VBAR expr(y).{ p = expression(E_BINOP, "|", x, y); } +expr(p) ::= expr(x) CARET expr(y).{ p = expression(E_BINOP, "^", x, y); } +expr(p) ::= expr(x) AMPERSAND expr(y).{ p = expression(E_BINOP, "&", x, y); } +expr(p) ::= expr(x) MOD expr(y).{ p = expression(E_BINOP, "%", x, y); } // Ternary Operator, right-to-left expr(p) ::= expr(x) QUESTION expr(y) COLON expr(z). { @@ -318,46 +307,33 @@ expr(p) ::= expr(x) QUESTION expr(y) COLON expr(z). { } // Assignment Operators, right-to-left -expr(p) ::= expr(x) EQUAL expr(y). { p = expression(E_BINOP, "=" , x, y); } -expr(p) ::= expr(x) PLUS_EQUAL expr(y). { p = expression(E_BINOP, "+=" , x, y); } -expr(p) ::= expr(x) MINUS_EQUAL expr(y). { p = expression(E_BINOP, "-=" , x, y); } -expr(p) ::= expr(x) AND_EQUAL expr(y). { p = expression(E_BINOP, "&=" , x, y); } -expr(p) ::= expr(x) OR_EQUAL expr(y). { p = expression(E_BINOP, "|=" , x, y); } -expr(p) ::= expr(x) DIV_EQUAL expr(y). { p = expression(E_BINOP, "/=" , x, y); } -expr(p) ::= expr(x) MULT_EQUAL expr(y). { p = expression(E_BINOP, "*=" , x, y); } -expr(p) ::= expr(x) MODULO_EQUAL expr(y). { p = expression(E_BINOP, "%=" , x, y); } -expr(p) ::= expr(x) LEFT_EQUAL expr(y). { p = expression(E_BINOP, "<<=", x, y); } -expr(p) ::= expr(x) RIGHT_EQUAL expr(y). { p = expression(E_BINOP, ">>=", x, y); } -expr(p) ::= expr(x) XOR_EQUAL expr(y). { p = expression(E_BINOP, "^=" , x, y); } +expr(p) ::= expr(x) EQUAL expr(y).{ p = expression(E_BINOP, "=" , x, y); } +expr(p) ::= expr(x) ADDEQ expr(y).{ p = expression(E_BINOP, "+=" , x, y); } +expr(p) ::= expr(x) SUBEQ expr(y).{ p = expression(E_BINOP, "-=" , x, y); } +expr(p) ::= expr(x) ANDEQ expr(y).{ p = expression(E_BINOP, "&=" , x, y); } +expr(p) ::= expr(x) OREQ expr(y).{ p = expression(E_BINOP, "|=" , x, y); } +expr(p) ::= expr(x) DIVEQ expr(y).{ p = expression(E_BINOP, "/=" , x, y); } +expr(p) ::= expr(x) MULEQ expr(y).{ p = expression(E_BINOP, "*=" , x, y); } +expr(p) ::= expr(x) MODEQ expr(y).{ p = expression(E_BINOP, "%=" , x, y); } +expr(p) ::= expr(x) LSHEQ expr(y).{ p = expression(E_BINOP, "<<=", x, y); } +expr(p) ::= expr(x) RSHEQ expr(y).{ p = expression(E_BINOP, ">>=", x, y); } +expr(p) ::= expr(x) XOREQ expr(y).{ p = expression(E_BINOP, "^=" , x, y); } // Comma, left-to-right, not supported -// expr(p) ::= expr(x) COMMA expr(y). { p = expression(E_BINOP, "," , x, y); } +// expr(p) ::= expr(x) COMMA expr(y).{ p = expression(E_BINOP, "," , x, y); } -opt_expr(p) ::= expr(x). { p = x; } -opt_expr(p) ::= . { p = 0; } +opt_expr(p) ::= expr(x). { p = x; } +opt_expr(p) ::= . { p = 0; } -exprs(p) ::= exprs(xs) COMMA expr(x). { p = link_expr(xs, x); } -exprs(p) ::= expr(x). { p = x; } -exprs(p) ::= . { p = 0; } +exprs(p) ::= exprs(xs) COMMA expr(x). { p = link_expr(xs, x); } +exprs(p) ::= expr(x). { p = x; } +exprs(p) ::= . { p = 0; } -// Literal (C) code - -c_codes(p) ::= c_codes(xs) c_code(x). { p = link_expr(xs, x); } -c_codes(p) ::= . { p = 0; } +string(p) ::= STRCON(x). { p = expression(E_STRING, x, 0, 0); } -c_code(p) ::= C_STMT(x). { p = c_code(x.str, line_num); } - -// Preprocessor code +// Literal (C) code -// pp_codes ::= pp_codes pp_code. -// pp_codes ::= . +c_codes(p) ::= c_codes(xs) c_code(x). { p = link_expr(xs, x); } +c_codes(p) ::= . { p = 0; } -// /* pre-processor code (e.g. # 1 "test.st") */ -// pp_code ::= PP_SYMBOL INTNUM(line) STRING(fname) CR. { - // globals->line_num = atoi(line.str); - // globals->src_file = fname.str; -// } -// pp_code ::= PP_SYMBOL INTNUM(line) CR. { - // globals->line_num = atoi(line.str); -// } -// pp_code ::= PP_SYMBOL STRING CR. /* Silently consume #pragma lines */ +c_code(p) ::= CCODE(x). { p = expression(E_TEXT, x, 0, 0); } diff --git a/src/snc/snl.re b/src/snc/snl.re new file mode 100644 index 00000000..30f7eab7 --- /dev/null +++ b/src/snc/snl.re @@ -0,0 +1,407 @@ +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <stdarg.h> + +#include "snl.h" +#include "snc_main.h" + +#define EOI 0 + +typedef unsigned int uint; +typedef unsigned char uchar; + +#define BSIZE 8192 + +#define YYCTYPE uchar +#define YYCURSOR cursor +#define YYLIMIT s->lim +#define YYMARKER s->ptr +#define YYFILL cursor = fill(s, cursor); +#define YYDEBUG(state, current) fprintf(stderr, "state = %d, current = %c\n", state, current); + +#define RET(i) {s->cur = cursor; return i;} + +typedef struct Scanner { + int fd; /* file descriptor */ + uchar *bot; /* pointer to bottom (start) of buffer */ + uchar *tok; /* pointer to start of current token */ + uchar *end; /* pointer to end of token (or 0, then use cur) */ + uchar *ptr; /* marker for backtracking (always > tok) */ + uchar *cur; /* saved scan position between calls to scan() */ + uchar *lim; /* pointer to one position after last read char */ + uchar *top; /* pointer to (one after) top of allocated buffer */ + uchar *eof; /* pointer to (one after) last char in file (or 0) */ + char *file; /* source file name */ + uint line; /* line number */ +} Scanner; + +static void scan_report(Scanner *s, const char *format, ...) +{ + va_list args; + + report_location(s->file, s->line); + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); +} + +/* +From the re2c docs: + + The generated code "calls" YYFILL(n) when the buffer needs (re)filling: at + least n additional characters should be provided. YYFILL(n) should adjust + YYCURSOR, YYLIMIT, YYMARKER and YYCTXMARKER as needed. Note that for typical + programming languages n will be the length of the longest keyword plus one. + +We also add a '\n' byte at the end of the file as sentinel. +*/ +static uchar *fill(Scanner *s, uchar *cursor) { + /* does not touch s->cur, instead works with argument cursor */ + if (!s->eof) { + uint read_cnt; /* number of bytes read */ + uint garbage = s->tok - s->bot; /* number of garbage bytes */ + uint valid = s->lim - s->tok; /* number of still valid bytes to copy */ + uchar *token = s->tok; /* start of valid bytes */ + uint space = (s->top - s->lim) + garbage; + /* remaining space after garbage collection */ + int need_alloc = space < BSIZE; /* do we need to allocate a new buffer? */ + + /* anything below s->tok is garbage, collect it */ + if (garbage) { + if (!need_alloc) { + /* shift valid buffer content down to bottom of buffer */ + memcpy(s->bot, token, valid); + } + /* adjust pointers */ + s->tok = s->bot; /* same as s->tok -= garbage */ + s->ptr -= garbage; + cursor -= garbage; + s->lim -= garbage; + /* invariant: s->bot, s->top, s->eof, s->lim - s->tok */ + } + /* increase the buffer size if necessary, ensuring that we have + at least BSIZE bytes of free space to fill (after s->lim) */ + if (need_alloc) { + uchar *buf = (uchar*) malloc((s->lim - s->bot + BSIZE)*sizeof(uchar)); + memcpy(buf, token, valid); + s->tok = buf; + s->ptr = &buf[s->ptr - s->bot]; + cursor = &buf[cursor - s->bot]; + s->lim = &buf[s->lim - s->bot]; + s->top = s->lim + BSIZE; + free(s->bot); + s->bot = buf; + } + /* fill the buffer, starting at s->lim, by reading a chunk of + BSIZE bytes (or less if eof is encountered) */ + if ((read_cnt = read(s->fd, (char*) s->lim, BSIZE)) != BSIZE) { + s->eof = &s->lim[read_cnt]; + /* insert sentinel and increase s->eof */ + *(s->eof)++ = '\n'; + } + s->lim += read_cnt; /* adjust limit */ + } + return cursor; +} + +/*!re2c + re2c:yyfill:parameter = 0; + + ANY = .|"\n"; + SPC = [ \t]; + OCT = [0-7]; + DEC = [0-9]; + LET = [a-zA-Z_]; + HEX = [a-fA-F0-9]; + EXP = [Ee] [+-]? DEC+; + FS = [fFlL]; + IS = [uUlL]*; + ESC = [\\] ([abfnrtv?'"\\] | "x" HEX+ | OCT+); +*/ + +static int scan(Scanner *s) { + uchar *cursor = s->cur; + uchar *str_end = 0; + + s->end = 0; +snl: + s->tok = cursor; + +/*!re2c + "\n" { + if(cursor == s->eof) RET(EOI); + s->line++; + goto snl; + } + ["] { + s->tok = cursor; + goto string_const; + } + "/*" { goto comment; } + "#" SPC* { + s->tok = cursor; + goto line_marker; + } + "%{" { + s->tok = cursor; + goto c_code; + } + ("%%" .*) { + s->tok += 2; + RET(CCODE); + } + "assign" { RET(ASSIGN); } + "break" { RET(BREAK); } + "char" { RET(CHAR); } + "double" { RET(DOUBLE); } + "else" { RET(ELSE); } + "entry" { RET(ENTRY); } + "evflag" { RET(EVFLAG); } + "exit" { RET(EXIT); } + "float" { RET(FLOAT); } + "for" { RET(FOR); } + "if" { RET(IF); } + "int" { RET(INT); } + "long" { RET(LONG); } + "monitor" { RET(MONITOR); } + "option" { RET(OPTION); } + "program" { RET(PROGRAM); } + "short" { RET(SHORT); } + "ss" { RET(SS); } + "state" { RET(STATE); } + "string" { RET(STRING); } + "syncQ" { RET(SYNCQ); } + "sync" { RET(SYNC); } + "to" { RET(TO); } + "unsigned" { RET(UNSIGNED); } + "when" { RET(WHEN); } + "while" { RET(WHILE); } + "TRUE" { RET(INTCON); } + "FALSE" { RET(INTCON); } + "ASYNC" { RET(INTCON); } + "SYNC" { RET(INTCON); } + LET (LET|DEC)* { RET(NAME); } + ("0" [xX] HEX+ IS?) | ("0" DEC+ IS?) | (DEC+ IS?) | (['] (ESC|ANY\[\n\\'])* [']) + { RET(INTCON); } + + (DEC+ EXP FS?) | (DEC* "." DEC+ EXP? FS?) | (DEC+ "." DEC* EXP? FS?) + { RET(FPCON); } + + ">>=" { RET(RSHEQ); } + "<<=" { RET(LSHEQ); } + "+=" { RET(ADDEQ); } + "-=" { RET(SUBEQ); } + "*=" { RET(MULEQ); } + "/=" { RET(DIVEQ); } + "%=" { RET(MODEQ); } + "&=" { RET(ANDEQ); } + "^=" { RET(XOREQ); } + "|=" { RET(OREQ); } + ">>" { RET(RSHIFT); } + "<<" { RET(LSHIFT); } + "++" { RET(INCR); } + "--" { RET(DECR); } + "->" { RET(POINTER); } + "&&" { RET(ANDAND); } + "||" { RET(OROR); } + "<=" { RET(LE); } + ">=" { RET(GE); } + "==" { RET(EQ); } + "!=" { RET(NE); } + ";" { RET(SEMICOLON); } + "{" { RET(LBRACE); } + "}" { RET(RBRACE); } + "," { RET(COMMA); } + ":" { RET(COLON); } + "=" { RET(EQUAL); } + "(" { RET(LPAREN); } + ")" { RET(RPAREN); } + "[" { RET(LBRACKET); } + "]" { RET(RBRACKET); } + "." { RET(PERIOD); } + "&" { RET(AMPERSAND); } + "!" { RET(NOT); } + "~" { RET(TILDE); } + "-" { RET(SUB); } + "+" { RET(ADD); } + "*" { RET(ASTERISK); } + "/" { RET(SLASH); } + "%" { RET(MOD); } + "<" { RET(LT); } + ">" { RET(GT); } + "^" { RET(CARET); } + "|" { RET(VBAR); } + "?" { RET(QUESTION); } + [ \t\v\f]+ { goto snl; } + ANY { scan_report(s, "invalid character\n"); RET(EOI); } +*/ + +string_const: +/*!re2c + (ESC | [^"\n\\])* + { goto string_const; } + ["] { + str_end = cursor - 1; + goto string_cat; + } + ANY { scan_report(s, "invalid character in string constant\n"); RET(EOI); } +*/ + +string_cat: +/*!re2c + SPC+ { goto string_cat; } + "\n" { + if (cursor == s->eof) { + s->end = str_end; + cursor -= 1; + RET(STRCON); + } + s->line++; + goto string_cat; + } + ["] { + uint len = str_end - s->tok; + memmove(cursor - len, s->tok, len); + s->tok = cursor - len; + goto string_const; + } + ANY { + s->end = str_end; + cursor -= 1; + RET(STRCON); + } +*/ + +line_marker: +/*!re2c + DEC+SPC* { + s->line = atoi((char*)s->tok) - 1; + s->tok = cursor; + goto line_marker_str; + } + ANY { goto line_marker_skip; } +*/ + +line_marker_str: +/*!re2c + (["] (ESC|ANY\[\n\\"])* ["]) + { + cursor[-1] = 0; + if (!s->file) { + s->file = strdup((char *)(s->tok + 1)); + } else if (s->file && strcmp((char*)s->tok, s->file) != 0) { + free(s->file); + s->file = strdup((char *)(s->tok + 1)); + } + goto line_marker_skip; + } + "\n" { + if (cursor == s->eof) { + s->end = str_end; + cursor -= 1; + RET(STRCON); + } + s->line++; + goto string_cat; + } + . { goto line_marker_skip; } +*/ + +line_marker_skip: +/*!re2c + .* { goto snl; } + "\n" { cursor -= 1; goto snl;} +*/ + +comment: +/*!re2c + "*/" { goto snl; } + . { goto comment; } + "\n" { + if (cursor == s->eof) { + scan_report(s, "at eof: unterminated comment\n"); + RET(EOI); + } + s->tok = cursor; + s->line++; + goto comment; + } +*/ + +c_code: +/*!re2c + "}%" { + s->end = cursor - 2; + RET(CCODE); + } + . { goto c_code; } + "\n" { + if (cursor == s->eof) { + scan_report(s, "at eof: unterminated literal c-code section\n"); + RET(EOI); + } + s->line++; + goto c_code; + } +*/ +} + +#ifdef TEST_LEXER +int main() { + Scanner s; + int t; + memset((char*) &s, 0, sizeof(s)); + s.fd = 0; + s.cur = fill(&s, s.cur); + s.line = 1; + while( (t = scan(&s)) != EOI) { + if (!s.end) s.end = s.cur; + printf("%s:%d: %2d\t£%.*s£\n", s.file, s.line, t, s.end - s.tok, s.tok); + } + close(s.fd); +} +#else + +extern void parser( + void *yyp, /* the parser */ + int yymajor, /* the major token code number */ + char *yyminor /* the value for the token */ +); +extern void *parserAlloc(void *(*mallocProc)(size_t)); +void parserFree( + void *p, /* the parser to be deleted */ + void (*freeProc)(void*) /* function used to reclaim memory */ +); + +void compile(void) +{ + Scanner s; + int t; + char *x; + + bzero(&s, sizeof(s)); + s.cur = fill(&s, s.cur); /* otherwise scanner crashes in debug mode */ + s.line = 1; + + void *pParser = parserAlloc(malloc); + do + { + globals->prev_line_num = s.line; + t = scan(&s); + globals->src_file = s.file; + globals->line_num = s.line; + + if (!s.end) s.end = s.cur; + scan_report(&s,"%2d\t£%.*s£\n", t, s.end - s.tok, s.tok); + x = malloc(s.end - s.tok + 1); + memcpy(x,s.tok,s.end - s.tok); + x[s.end - s.tok] = 0; + parser(pParser, t, x); + } + while (t); + parserFree(pParser, free); +} + +#endif -- GitLab