2017-04-16 3 views
0

Je voudrais comprendre pourquoi BISON est concaténer deux jetons sur la règle suivantegrammaire BISON + FLEX - pourquoi jetons sont concaténés

stmt: 
    declaration     { ... } 
    | assignment    { ... } 
    | exp      { ... } 
    | ID ';' <-- this rule  { ...  
           fprintf(stderr, "\n my id is '%s'", $1); 
           ... 

si vous cochez la sortie obtenir ce que je veux dire. Je cours mon analyseur et j'entre les caractères ab; au programme. Selon ma grammaire de bison ceci devrait être analysé comme un ID suivi par un ;. Et dans une certaine mesure, c'est ce qui arrive.

Cependant, lorsque je tente d'utiliser la variable $1 de la règle ID ';' les résultats du programme ab; à moi au lieu de ab.

l'exécution du programme

ab;         <-- this my input to the program 

#TOKEN 294[ID] yytext -> ab 
Next token is token "identifier" (1.1:) 
Shifting token "identifier" (1.1:) 
Entering state 5 
Reading a token: 
#TOKEN 59[] yytext -> ; 
Next token is token ';' (1.1:) 
Shifting token ';' (1.1:) 
Entering state 16 
Reducing stack by rule 6 (line 133): 
    $1 = token "identifier" (1.1:)  <-- first token which is 'ab' 
    $2 = token ';' (1.1:)    <-- second token which is ';' 

[stmt] 4: 
my id is 'ab;'      <-- the issue! This should be 'ab' not 'ab;' 
ERROR: No such ID ab; found 
-> $$ = nterm stmt (1.1:) 
Stack now 0 1 
Entering state 10 
Reducing stack by rule 2 (line 126): 
    $1 = nterm prog (1.1:) 
    $2 = nterm stmt (1.1:) 
-> $$ = nterm prog (1.1:) 
Stack now 0 
Entering state 1 
Reading a token: 

grammaire

%{ 
#include <stdio.h> 
#include <string> 
#include <map> 
#include <math.h> 
#include "noname-parse.h" 
#include "noname-types.h" 

extern int yylex(void); 
extern void yyerror(const char *error_msg); 
extern void division_by_zero(YYLTYPE &yylloc); 


std::map<std::string, symrec*> symbol_table; 
std::map<std::string, symrec*>::iterator symbol_table_it; 
%} 

////////////////////////////////////////////////// 
///////////* Bison declarations. */////////////// 
////////////////////////////////////////////////// 

%union { 

    char* id_v; 
    double double_v; 
    long long_v; 

    symrecv symrecv; 
    char* error_msg; 
}; 

%{ 

    bool symbol_exist(const char* key) { 
    std::string skey = key; 
    symbol_table_it = symbol_table.find(skey); 
    return (symbol_table_it != symbol_table.end()); 
    } 

    void symbol_insert(const char* key, symrecv symrecv) { 
    std::string skey = key; 
    symbol_table[skey] = symrecv; 
    } 

    symrecv symbol_retrieve(const char* key) { 
    std::string skey = key; 
    return symbol_table[skey]; 
    } 

    void print_stmt(symrecv sym) { 

    if (sym->type == TYPE_LONG) { 
     fprintf(stderr, "%d", sym->value.intv); 

    } else if (sym->type == TYPE_DOUBLE) { 
     fprintf(stderr, "%lf", sym->value.doublev); 

    } else { 
     fprintf(stderr, "print not implemented for type %d", sym->type); 
    } 
    } 
%} 

%token LINE_BREAK   "line_break"    
// %token ';'    "stmt_sep"   
%token LETTER    "letter"   
%token DIGIT     "digit"   
%token DIGITS    "digits"   
%token DARROW    "darrow"   
%token ELSE     "else"  
%token FALSE     "false"   
%token IF     "if"  
%token IN     "in"  
%token LET     "let"  
%token LOOP     "loop"  
%token THEN     "then"  
%token WHILE     "while"   
%token BREAK     "break"   
%token CASE     "case"  
%token NEW     "new"  
%token NOT     "not"  
%token TRUE     "true"  
%token NEWLINE    "newline"   
%token NOTNEWLINE   "notnewline"    
%token WHITESPACE   "whitespace"    
%token LE     "le"  
%token ASSIGN    "assign"   
%token NULLCH    "nullch"   
%token BACKSLASH    "backslash"    
%token STAR     "star"  
%token NOTSTAR    "notstar"   
%token LEFTPAREN    "leftparen"    
%token NOTLEFTPAREN   "notleftparen"    
%token RIGHTPAREN   "rightparen"    
%token NOTRIGHTPAREN   "notrightparen"     
%token LINE_COMMENT   "line_comment"    
%token START_COMMENT   "start_comment"     
%token END_COMMENT   "end_comment"    
%token QUOTES    "quotes"   
%token ERROR     "error" 

%token <id_v> ID    "identifier" 
%token <double_v> DOUBLE  "double" 
%token <long_v> LONG   "long" 
%type <symrecv> assignment "assignment" 
%type <symrecv> declaration "declaration" 
%type <symrecv> exp   "expression" 
%type <symrecv> stmt  "statement" 

%left '-' '+' 
%left '*' '/' 
%left LET ID 
%right '^'  /* exponentiation */ 
%precedence NEG /* negation--unary minus */ 

%start prog 

%% 

////////////////////////////////////////////////// 
///////////* The grammar follows. */////////////// 
////////////////////////////////////////////////// 

prog: 
    %empty 
    | prog stmt 
; 

stmt: 
    declaration  { fprintf(stderr, "\n[stmt] 2: "); print_stmt($1); } 
    | assignment  { fprintf(stderr, "\n[stmt] 3: "); print_stmt($1); } 
    | exp    { fprintf(stderr, "\n[stmt] 1: "); print_stmt($1); } 
    | ID ';'   { fprintf(stderr, "\n[stmt] 4: "); 

    fprintf(stderr, "\n my id is '%s'", $1); 

    $$ = (symrec *) malloc (sizeof (symrec)); 

    if (!symbol_exist($1)) { 

     char buf[1024]; 
     sprintf(buf, "No such ID %s found", $1); 
     yyerror(buf); 

    } else { 

     $$->name = $1; 
     $$->value.doublev = symbol_retrieve($1)->value.doublev; 
     printf("\nID %s -> %lf", $1, $$->value.doublev); 
    } 
    } 
    | error   { printf("%d:%d", @1.first_column, @1.last_column); } 
; 

assignment: 
    ID ASSIGN exp ';' { 

    $$ = (symrec *) malloc (sizeof (symrec)); 

    if (!symbol_exist($1)) { 

     char buf[1024]; 
     sprintf(buf, "No such ID %s found", $1); 
     yyerror(buf); 

    } else { 

     $$->name = $1; 
     $$->type = $3->type; 
     $$->value.doublev = $3->value.doublev; 
     symbol_insert($1, $$); 
     // printf("\nID %s -> %lf", $1, $$->value.doublev); 
     printf("\n[assignment]"); 
    } 
    } 
    | LET ID ASSIGN exp ';' { 

    $$ = (symrec *) malloc (sizeof (symrec)); 

    if (symbol_exist($2)) { 

     char buf[1024]; 
     sprintf(buf, "Cannot redefine ID %s", $2); 
     yyerror(buf); 

    } else { 

     $$->name = $2; 
     $$->type = $4->type; 
     $$->value.doublev = $4->value.doublev; 
     symbol_insert($2, $$); 
     // printf("\nID %s -> %lf", $1, $$->value.doublev); 
     printf("\n[assignment]"); 
    } 
    } 
; 

declaration: 
    LET ID ';' { 

    $$ = (symrec *) malloc (sizeof (symrec)); 

    if (symbol_exist($2)) { 

     char buf[1024]; 
     sprintf(buf, "Cannot redefine ID %s", $2); 
     yyerror(buf); 

    } else { 

     $$->name = $2; 
     // $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type; 
     symbol_insert($2, $$); 
     // $$->value.doublev = symbol_table_it->second->value.doublev; 
     // printf("\nID %s -> %lf", $1, $$->value.doublev); 
     printf("\n[declaration]"); 
    } 
    } 
; 

exp: 
    LONG { 
    $$ = (symrec *) malloc (sizeof (symrec)); 
    $$->name = (char*) "__annon"; 
    $$->type = TYPE_LONG; 
    $$->value.intv = $1; 
    printf("\nexp %ld", $1); 
    } 
    | DOUBLE { 
    $$ = (symrec *) malloc (sizeof (symrec)); 
    $$->name = (char*) "__annon"; 
    $$->type = TYPE_DOUBLE; 
    $$->value.doublev = $1; 
    printf("\nexp %lf", $1); 
    } 
    | exp '+' exp  { 
     // $$ = $1 + $3; 
     $$ = (symrec *) malloc (sizeof (symrec)); 
     $$->name = (char*) "__annon"; 
     $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type; 
     $$->value.doublev = $1->value.doublev + $3->value.doublev; 
     printf("\nexp + exp %lf %lf", $1->value.doublev, $3->value.doublev); 
    } 
    | exp '-' exp  { 
     // $$ = $1 - $3; 
     $$ = (symrec *) malloc (sizeof (symrec)); 
     $$->name = (char*) "__annon"; 
     $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type; 
     $$->value.doublev = $1->value.doublev - $3->value.doublev; 
     printf("\nexp - exp %lf %lf", $1->value.doublev, $3->value.doublev); 
    } 
    | exp '*' exp  { 
     // $$ = $1 * $3; 
     $$ = (symrec *) malloc (sizeof (symrec)); 
     $$->name = (char*) "__annon"; 
     $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type; 
     $$->value.doublev = $1->value.doublev * $3->value.doublev; 
     printf("\nexp * exp %lf %lf", $1->value.doublev, $3->value.doublev); 
    } 
    | exp '/' exp { 
     $$ = (symrec *) malloc (sizeof (symrec)); 
     $$->name = (char*) "__annon"; 
     $$->type = $1->type == TYPE_DOUBLE || $3->type == TYPE_DOUBLE ? TYPE_DOUBLE : $1->type; 

     if ($3->value.doublev) { 
     // $$ = $1/$3; 
     $$->value.doublev = $1->value.doublev/$3->value.doublev; 
     } else { 
     // $$ = $1; 
     $$->value.doublev = $1->value.doublev; 
     division_by_zero(@3); 
     } 
     printf("\nexp/exp %lf %lf", $1->value.doublev, $3->value.doublev); 
    } 
    | '-' exp %prec NEG { 
     /** 
     * The %prec simply instructs Bison that the rule ‘| '-' exp’ 
     * has the same precedence as NEG—in this case the next-to-highest 
     */ 
     // $$ = -($2->value.doublev); 
     $$ = (symrec *) malloc (sizeof (symrec)); 
     $$->name = (char*) "__annon"; 
     $$->type = $2->type; 
     $$->value.doublev = -$2->value.doublev; 
     printf("\nexp^exp %lf", $2->value.doublev); 
    } 
    | exp '^' exp  { 
     //$$ = pow($1->value.doublev, $3->value.doublev); 
     $$ = (symrec *) malloc (sizeof (symrec)); 
     $$->name = (char*) "__annon"; 
     $$->type = $1->type; 
     $$->value.doublev = pow($1->value.doublev, $3->value.doublev); 
     printf("\nexp^exp %lf %lf", $1->value.doublev, $3->value.doublev); 
    } 
    | '(' exp ')'  { 
     // $$ = $2->value.doublev; 
     $$ = (symrec *) malloc (sizeof (symrec)); 
     $$->name = (char*) "__annon"; 
     $$->type = $2->type; 
     $$->value.doublev = $2->value.doublev; 
     printf("\n(exp) %lf", $2->value.doublev); 
    } 
    | error     { printf("\nERROR on exp rule"); } 
    ; 
%% 

lexer

%{ 
    #include "stdio.h" 
    #include "stdlib.h" 
    #include "lexer-utilities.h" 
    #include "noname-parse.h" 
    #include "noname-types.h" 

    int num_lines = 0, num_chars = 0; 
    extern YYSTYPE yylval; 
    extern void yyerror(char const *s); 

    extern int curr_lineno; 
    extern int verbose_flag; 

    unsigned int comment = 0; 
%} 

%option noyywrap 
    // %option noyywrap nounput batch debug yylineno 
    // %option warn noyywrap nodefault yylineno reentrant bison-bridge 

%x COMMENT 
%x STRING 

LINE_BREAK  \n 
LETTER   [a-zA-Z] 
ALPHA   [a-zA-Z$_] 
DIGIT   [0-9] 
DIGITS   {DIGIT}+ 
LONG   {DIGIT}+ 
DOUBLE   {DIGIT}+(\.{DIGIT}+)? 
ID    {ALPHA}({ALPHA}|{DIGIT})* 

ELSE   [eE][lL][sS][eE] 
FALSE   f[aA][lL][sS][eE] 
IF    [iI][fF] 
IN    [iI][nN] 
LET    [lL][eE][tT] 
LOOP   [lL][oO][oO][pP] 
THEN   [tT][hH][eE][nN] 
WHILE   [wW][hH][iI][lL][eE] 
BREAK   [bB][rR][eE][aA][kK] 
CASE   [cC][aA][sS][eE] 
NEW    [nN][eE][wW] 
NOT    [nN][oO][tT] 
TRUE   t[rR][uU][eE] 
NEWLINE   [\n] 
NOTNEWLINE  [^\n] 
WHITESPACE  [ \t\r\f\v]+ 
ASSIGN   = 
LE    <= 
DARROW   => 
NULLCH   [\0] 
BACKSLASH  [\\] 
STAR   [*] 
NOTSTAR   [^*] 
LEFTPAREN  [(] 
NOTLEFTPAREN [^(] 
RIGHTPAREN  [)] 
NOTRIGHTPAREN [^)] 

LINE_COMMENT "--" 
START_COMMENT "/*" 
END_COMMENT  "*/" 

QUOTES   \" 


%% 

{LINE_BREAK}     { 
            ++num_chars; 
            ++num_lines; 
           } 

{START_COMMENT} { 
    comment++; 
    BEGIN(COMMENT); 
} 

<COMMENT><<EOF>> { 
    yylval.error_msg = "EOF in comment"; 
    BEGIN(INITIAL); 
    return (ERROR); 
} 

<COMMENT>{BACKSLASH}(.|{NEWLINE}) { 
    backslash_common(); 
}; 

<COMMENT>{BACKSLASH}    ; 

<COMMENT>{START_COMMENT} { 
    comment++; 
} 

<COMMENT>{END_COMMENT} { 
    comment--; 
    if (comment == 0) { 
    BEGIN(INITIAL); 
    } 
} 

<COMMENT>.      { ++num_chars; } 

<INITIAL>{END_COMMENT} { 
    yylval.error_msg = "Unmatched */"; 
    return (ERROR); 
} 

<*>{WHITESPACE}     { ++num_chars; } 
<INITIAL>{ASSIGN}    { return (ASSIGN); } 
<INITIAL>{ELSE}     { return (ELSE); } 
<INITIAL>{IF}     { return (IF); } 
<INITIAL>{IN}     { return (IN); } 
<INITIAL>{LET}     { return (LET); } 
<INITIAL>{THEN}     { return (THEN); } 
<INITIAL>{WHILE}     { return (WHILE); } 
<INITIAL>{CASE}     { return (CASE); } 
<INITIAL>{NEW}     { return (NEW); } 
<INITIAL>{NOT}     { return (NOT); } 
<INITIAL>{ID}  { 
    yylval.id_v = yytext; 
    return (ID); } 
<INITIAL>{LONG}  { 
    yylval.long_v = atoi(yytext); 
    return (LONG); } 
<INITIAL>{DOUBLE} { 
    yylval.double_v = atof(yytext); 
    return (DOUBLE); } 

<INITIAL>","      { return int(','); } 
<INITIAL>":"      { return int(':'); } 
<INITIAL>"{"      { return int('{'); } 
<INITIAL>"}"      { return int('}'); } 
<INITIAL>"+"      { return int('+'); } 
<INITIAL>"-"      { return int('-'); } 
<INITIAL>"*"      { return int('*'); } 
<INITIAL>"/"      { return int('/'); } 
<INITIAL>"<"      { return int('<'); } 
<INITIAL>"~"      { return int('~'); } 
<INITIAL>"."      { return int('.'); } 
<INITIAL>"@"      { return int('@'); } 
<INITIAL>"("      { return int('('); } 
<INITIAL>")"      { return int(')'); } 
<INITIAL>"&"      { return int('&'); } 
<INITIAL>";"      { return int(';'); } 

<INITIAL>. { 
    printf("lexer error '%s'", yytext); 
    yylval.error_msg = yytext; return 0; 
    } 

%% 

Répondre

1

Cette action flex est incorrect:

yylval.id_v = yytext; 

yytext des points dans un tampon de travail interne. Son contenu changera chaque fois que le scanner est appelé. Donc, si vous voulez conserver la chaîne qui compose le jeton, vous devez copier la chaîne dans votre propre stockage, par exemple en utilisant strdup. (N'oubliez pas de libérer le stockage alloué lorsque vous en avez fini.)