2016-11-05 1 views
-2

Ceci est mon interpréteur Python actuel qui utilise des règles d'analyse pour prendre des entrées, puis imprimer l'expression. L'interpréteur fonctionne bien, mais je veux faire et ajouter certaines modifications de mes règles de grammaire actuelles aux nouvelles règles de grammaire. Jusqu'à présent, je ne peux obtenir que des changements de grammaire que je veux.Comment réécrire/modifier la structure grammaticale de mon interpréteur Python actuel, pas trop bon avec Python?

Ce sont les changements que je veux faire de ma grammaire actuelle:

# <stmt-list> ::= empty | <stmt> <stmt-list> 
to 
# <stmt_list> ::= <stmt> | <stmt> <stmt_list> 


# <factor> ::= id | intnum | (<expr>) 
to 
# <base> ::= (<expr>) | id | number 


<stmt> ::= id = <expr> ; | print <expr>; 
to 
<stmt> ::= id = <expr> ; | iprint <expr> ; | rprint <expr> ; 

Je suis aussi sûr de ne pas sûr de savoir comment mettre en œuvre les nouvelles règles de grammaire ci-dessous dans mon interprète, je pense que je pourrais déjà avoir?

<prog> ::= <decl_list> <stmt_list> 
<decl-list> ::= <decl> | <decl> <decl_list> 
<decl> ::= <type> <id_list> ; 
<type> ::= int | real 
<id_list> ::= id | id {, <id_list>} 

Ceci est mon code actuel pour ma grammaire actuelle:

import sys 

global varTable 
varTable = {} 

def main(): 
    global itProgram, nextToken, nextChar, nextLex, flagEof, strStmt 
    nextToken = "" 
    nextChar = "" 
    flagEof = False 
    strStmt = "" 

    try: 
     fileProgram = open(sys.argv[1], "rt") 
    except IndexError: 
     print "Missing input file!" 
     return 
    except IOError: 
     print "Could not open \'" + sys.argv[1] + "\'!" 
     return 

    strProgram = fileProgram.read() 
    itProgram = iter(strProgram) 

    if strProgram == "": 
     nextChar = "" 
    else: 
     nextChar = itProgram.next() 

    #while not flagEof: 
    funcLex() 

    stmtList() 

def funcLex(): 
    global itProgram, nextToken, nextLex, nextChar, flagEof, strStmt 
    nextToken = "" 
    nextLex = "" 

    isFloat = False 

    try: 
     while nextChar.isspace(): 
      nextChar = itProgram.next() 
    except StopIteration: 
     nextChar = "" 
     funcLex() 

     return 

    try: 
     if nextChar == "(": 
      nextToken = "LPARA" 
      nextLex = nextChar 
      nextChar = itProgram.next() 
     elif nextChar == ")": 
      nextToken = "RPARA" 
      nextLex = nextChar 
      nextChar = itProgram.next() 
     elif nextChar == "+": 
      nextToken = "ADD" 
      nextLex = nextChar 
      nextChar = itProgram.next() 
     elif nextChar == "-": 
      nextToken = "SUB" 
      nextLex = nextChar 
      nextChar = itProgram.next() 
     elif nextChar == "*": 
      nextToken = "MULT" 
      nextLex = nextChar 
      nextChar = itProgram.next() 
     elif nextChar == "/": 
      nextToken = "DIV" 
      nextLex = nextChar 
      nextChar = itProgram.next() 
     elif nextChar == "=": 
      nextToken = "ASSIGN" 
      nextLex = nextChar 
      nextChar = itProgram.next() 
     elif nextChar == ";": 
      nextToken = "SEMI" 
      nextLex = nextChar 
      nextChar = itProgram.next() 
     elif nextChar.isalpha(): 
      nextLex = nextChar 
      nextChar = itProgram.next() 
      while nextChar.isalnum(): 
       nextLex += nextChar 
       nextChar = itProgram.next() 
      if nextLex == "print": 
       nextToken = "PRINT" 
      else: 
       nextToken = "ID" 
     elif nextChar.isalnum(): 
      nextLex = nextChar 
      nextChar = itProgram.next() 
      while nextChar.isalnum() or nextChar == ".": 
       if nextChar == ".": 
        isFloat = True 
       nextLex += nextChar 
       nextChar = itProgram.next() 
      if isFloat: 
       nextToken = "FLOAT" 
      else: 
       nextToken = "INT" 
     elif nextChar == "": 
      nextLex = nextChar 
      nextToken = "EMPTY" 
      flagEof = True 
     else: 
      nextToken = "UNKNOWN" 
      #print "Syntax error!" 
    except StopIteration: 
     nextChar = "" 

    strStmt = strStmt + nextLex + " " 
    if nextToken == "SEMI": 
     print strStmt 
     strStmt = "" 

# <stmt-list> ::= empty | <stmt> <stmt-list> 
def stmtList(): 
    global nextToken 

    if nextToken == "EMPTY": 
     print ">>> Empty .tiny file." 
    else: 
     while nextToken != "EMPTY": 
      stmt() 

# <stmt> ::= id = <expr> ; | 
#   print <expr> ; 
def stmt(): 
    global nextToken, nextLex 

    if nextToken == "ID": 
     varName = nextLex 
     funcLex() 
     if nextToken == "ASSIGN": 
      funcLex() 
      result = expr() 
      if result[1] != "UNKNOWN": 
       lookupVarTable(varName, result[0], result[1]) 
      else: 
       printError("undefined variable.") 
    elif nextToken == "PRINT": 
     funcLex() 
     result = expr() 
     if result[1] != "UNKNOWN" and nextToken == "SEMI": 
      print ">>> " + str(result[0]) 
     elif result[1] == "UNKNOWN": 
      printError("undefined variable.") 
    else: 
     printError("<stmt> syntax error.") 
     return 

    if nextToken == "SEMI": 
     funcLex() 
    else: 
     printError("<stmt> missing ';'") 

# <expr> ::= <term> { + <term> | - <term> } 
def expr(): 
    global nextToken, nextLex 

    lResult = term() 

    while nextToken == "ADD" or nextToken == "SUB": 
     operator = nextToken 
     funcLex() 
     rResult = term() 
     #Variable is not defined 
     if lResult[1] == "UNKNOWN" or rResult[1] == "UNKNOWN": 
      printError("Undefined variable!") 
     if lResult[1] != rResult[1]: #type mismatch 
      printError("Type mismatch!") 
     elif operator == "ADD": 
      lResult = (lResult[0]+rResult[0], lResult[1]) 
     else: 
      lResult = (lResult[0]-rResult[0], lResult[1]) 

    return lResult 

# <term> ::= <factor> { * <factor> |/<factor> } 
def term(): 
    global nextToken, nextLex 

    lResult = factor() 

    while nextToken == "MULT" or nextToken == "DIV": 
     operator = nextToken 
     funcLex() 
     rResult = factor() 
     #Variable is not defined 
     if lResult[1] == "UNKNOWN" or rResult[1] == "UNKNOWN": 
      printError("Undefined variable!") 
     if lResult[1] != rResult[1]: #type mismatch 
      printError("Type mismatch!") 
     elif operator == "MULT": 
      lResult = (lResult[0]*rResult[0], lResult[1]) 
     else: 
      lResult = (lResult[0]/rResult[0], lResult[1]) 

    return lResult 

# <factor> ::= id | intnum | (<expr>) 
def factor(): 
    global nextToken, nextLex 

    if nextToken == "ID": 
     result = lookupVarTable(nextLex, 0, "UNKNOWN") 
     funcLex() 
    elif nextToken == "INT": 
     result = (int(nextLex), "INT") 
     funcLex() 
    elif nextToken == "FLOAT": 
     result = (float(nextLex), "FLOAT") 
     funcLex() 
    elif nextToken == "LPARA": 
     funcLex() 
     result = expr() 
     if nextToken == "RPARA": 
      funcLex() 
     else: 
      printError("<factor>") 

    return result 

def printError(strMessage): 
    global strStmt 

    if strStmt != "": 
     print strStmt 

    print ">>> Error: " + strMessage 
    exit() 

def lookupVarTable(varName, varValue, varType): 

    #if varName not in varTable: 
    # varValue == "UNKNOWN" 
    if varType != "UNKNOWN": 
     varTable[varName] = (varValue, varType) 
     return varTable[varName] 
    elif varName in varTable: 
     return varTable[varName] 
    else: 
     return (varValue, varType) 

if __name__ == "__main__": 
    main() 
+0

Le mot-clé 'global' n'est utile que dans le corps des fonctions. Voir aussi: [FAQ - Quelles sont les règles pour les variables locales et globales en Python] (https://docs.python.org/3.6/faq/programming.html#what-are-the-rules-for-local -and-global-variables-in-python), et http://stackoverflow.com/a/423596/1513933 –

+0

Cette variable doit être définie au niveau du module: 'itProgram = None',' nextToken = "" ',' nextChar = "" ',' nextLex = "" ',' flagEof = False', 'strStmt =" "' –

+0

Selon le [Guide de style PEP8] (https://www.python.org/dev/peps/pep -0008/# prescriptive-naming-conventions), vous devez utiliser snake-case pour les variables et les fonctions. –

Répondre

1

Vous devriez envisager d'utiliser Antlr, il y a un port Python.

En attendant, voici comment vous pouvez vous concevoir lexer:

def parser_file(file_obj): 
    for line in file_obj: 
     for char in line: 
      yield char 


mapping = {'(': 'LPARA', 
      ')': 'RPARA', 
      '+': 'ADD', 
      '-': 'SUB', 
      '*': 'MUL', 
      '/': 'DIV', 
      '=': 'ASSIGN', 
      ';': 'SEMI'} 


def lexer(chars): 
    it_char = iter(chars) 

    char = next(it_char) 

    while True: 
     # skip spaces 
     while char.isspace(): 
      char = next(it_char) 

     # find simple tokens 
     if char in mapping: 
      yield mapping[char], char 
      char = next(it_char) 
      continue 

     # find complex tokens 
     if char.isalpha(): 
      lex = char 
      char = next(it_char) 
      while char.isalnum(): 
       lex += char 
       char = next(it_char) 
      if lex == "print": 
       yield "PRINT", lex 
      else: 
       yield "ID", lex 
      continue 
     elif char.isdigit(): 
      lex = char 
      char = next(it_char) 
      while char.isdigit(): 
       lex += char 
       char = next(it_char) 
      if char == ".": 
       lex += char 
       char = next(it_char) 
       while char.isdigit(): 
        lex += char 
        char = next(it_char) 
      if "." in lex: 
       yield "FLOAT", lex 
      else: 
       yield "INT", lex 
      continue 
     else: 
      raise SyntaxError(char) 

Pour utiliser, vous pouvez traiter comme suit:

import io 

content = """\ 
10 + 12.5/18 
(8 + 3.14) 
""" 

file_obj = io.BytesIO(content) 

for token in lexer(parser_file(file_obj)): 
    print(token) 

Vous obtenez:

('INT', '10') 
('ADD', '+') 
('FLOAT', '12.5') 
('DIV', '/') 
('INT', '18') 
('LPARA', '(') 
('INT', '8') 
('ADD', '+') 
('FLOAT', '3.14') 
('RPARA', ')') 

Vous pouvez utiliser un vrai fichier bien sûr.

Pour votre analyseur: utilisez une pile pour construire l'arbre de syntaxe abstraite et évaluez-le. Je suis désolé, c'est trop long à expliquer et ce n'est pas pertinent sur SO, pensez à poster sur Code Review.