LISP口译员(Bison和Flex编程)

时间:2018-03-05 19:47:19

标签: c lisp bison flex-lexer

我正在尝试使用Bison和Flex Programming制作LISP解释器。我有大部分所需的功能,但是,它仅适用于2个数字,即(+ 2 2)或(* 2 2)。我需要它工作超过2,即(+ 1 2 3 4 5 6 7 8 9)。我将不胜感激任何帮助。

除了我的C代码和头文件之外,我还包含了我的Bison和Flex文件。

LISP.y代码

%{
#include "lisptab.h"
%}

%union
{
   double dval;
   char *sval;
   struct ast_node *astNode;
   struct symbol_ast_node* symNode;
};

%token <sval> FUNC
%token <sval> TYPE
%token <sval> SYMBOL
%token <dval> NUMBER
%token LPAREN RPAREN EOL QUIT LET COND

%type <astNode> s_expr
%type <symNode> let_elem
%type <symNode> let_list

%%

program:/* empty */ {
                       printf("> ");
                    }
        | program s_expr EOL
          {
          //printf("yacc: program expr\n");
          printf("\n%lf", eval($2)->value);
          freeNode($2);
          printf("\n> ");
          }
          ;

s_expr:
        NUMBER
        {
          $$ = number($1);
        }

        | SYMBOL
        {
          $$ = symbol($1);
        }
        | LPAREN FUNC s_expr RPAREN
        {
          $$ = function($2, $3, 0);
        }
        | LPAREN FUNC s_expr s_expr RPAREN
        {
          $$ = function($2, $3, $4);
        }
        | LPAREN COND s_expr s_expr s_expr RPAREN
        {
          $$ = condition($3, $4, $5);
        }
        | LPAREN LPAREN LET let_list RPAREN s_expr RPAREN
        {
          $$ = let($4, $6);
        }
        | QUIT
        {
          exit(0);
        }

        | error
        {
        }
        ;

let_list:
        let_elem
        {
          $$ = let_list($1, NULL);
        }

        | let_list let_elem
        {
            $$ = let_list($2, $1);
        }
        ;

let_elem:
        LPAREN TYPE SYMBOL s_expr RPAREN
        {
          $$ = let_elem($2, $3, $4);
        }

        | LPAREN SYMBOL s_expr RPAREN
        {
          $$ = let_elem(NULL, $2, $3);
        }
        ;

%%

LISP.l代码

%option noyywrap

%{
    #include "lisptab.h"
%}

digit [0-9]
number [+-]?{digit}+(\.{digit}+)?
func "neg"|"abs"|"+"|"-"|"*"|"div"|"^"|"max"|"min"|"hypot"|"print"|"EQ"|"LT"|"GT"|"GE"|"LE"|"NE"
type "integer"|"real"
quit "(stop)"
letter [a-zA-Z]
symbol {letter}+

%%

{number} {
                yylval.dval = strtod(yytext, NULL);
                //printf("lex: NUMBER dval = %lf\n", yylval.dval);
                return NUMBER;
            }

"let"       {
                //printf("lex: let\n");
                return LET;
            }

"(stop)"      {
                return QUIT;
            }
"cond"      {
                return COND;
            }

{func}      {
                yylval.sval = (char *) malloc(strlen(yytext)*sizeof(char));
                strcpy(yylval.sval, yytext);
                //printf("lex: FUNC sval = %s\n", yylval.sval);
                return FUNC;
            }

{type}      {
                yylval.sval = (char *) malloc(strlen(yytext)*sizeof(char));
                strcpy(yylval.sval, yytext);
                //printf("lex: TYPE sval = %s\n", yylval.sval);
                return TYPE;
            }

{symbol}    {
                yylval.sval = (char *) malloc(strlen(yytext)*sizeof(char));
                strcpy(yylval.sval, yytext);
                //printf("lex: SYMBOL");
                return SYMBOL;
            }

"("         {
                //printf("lex: LPAREN\n");
                return LPAREN;
            }
")"         {
                //printf("lex: RPAREN\n");
                return RPAREN;
            }
"\n"        {
                //printf("lex: RPAREN\n");
                return EOL;
            }

[ \t]       ; /* skip whitespace */

.           // yyerror("lex: invalid character");

%%

LISP.c

#include <math.h>
#include <string.h>
#include "lisptab.h"

SCOPE_NODE* currentScope;

int main(void)
{
  yyparse();
  return 0;
}

void yyerror(char *s)
{
  fprintf(stderr, "%s\n", s);
}

SYMBOL_TYPE getType(char *name)
{
  if (currentScope)
  {
    // make a new node to iterate through the table
    SCOPE_NODE* currScope = currentScope;
    SYMBOL_AST_NODE* currentSymbol = currScope->symbols;

    // start with current scope and work up through parents
    while(currScope)
    {
      while (currentSymbol)
      {
        //check to see if this is the symbol were looking for
        if (!strcmp(currentSymbol->name, name) && currentSymbol->type != INVALID) {
          return currentSymbol->type;
        }
        // otherwise keep on iterating
        currentSymbol = currentSymbol->next;
      }
      // move to the next scope and keep looking
      currScope = currScope->parent;
      if (currScope)
        currentSymbol = currScope->symbols;
      }
  }
  return INVALID;
}

SYMBOL_AST_NODE* getSymbol(char *name)
{
  SYMBOL_AST_NODE *result = NULL;
  int found = 0;

  // only check if there is an existing scope
  if (currentScope)
  {
    // make a new node to iterate through the table
    SCOPE_NODE* currScope = currentScope;
    SYMBOL_AST_NODE* currentSymbol = currScope->symbols;

    // start with current scope and work up through parents
    while(currScope)
    {
      while (currentSymbol)
      {
        //check to see if this is the symbol were looking for
        if (!strcmp(currentSymbol->name, name)) {
          result = currentSymbol;
          found = 1;
          break;
        }
        // otherwise keep on iterating
        currentSymbol = currentSymbol->next;
      }
      if (found) break;
      // move to the next scope and keep looking
      currScope = currScope->parent;
      if (currScope)
        currentSymbol = currScope->symbols;
      }
  }
  // throw error if its not found
  else if (!found) {
    printf("ERROR: Undeclared variable <%s> used\n", name);
    exit(1);
  }

  return result;
}

void leaveScope()
{
  if(currentScope)
    currentScope = currentScope->parent;
}

void enterScope(SCOPE_NODE* newScope)
{
  newScope->parent = currentScope;
  currentScope = newScope;
}

// find out which function it is
int resolveFunc(char *func)
{
  char *funcs[] = { "neg", "abs", "hypot", "+", "-", "*", "/", "^", "max", "min", "print", "EQ", "LT", "GT", "GE", "LE", "NE", ""};

  int i = 0;
  while (funcs[i][0] !='\0')
  {
    if (!strcmp(funcs[i], func))
      return i;

    i++;
  }
  yyerror("invalid function"); // paranoic -- should never happen
  return -1;
}

int resolveType(char* type)
{
  char *types[] = { "integer", "real" };

  int i = 0;
  while (types[i][0] != '\0')
  {
    if (!strcmp(types[i], type))
      return i;
    i++;
  }
  return -1;
}

// create a node for let
AST_NODE *let(SYMBOL_AST_NODE *symbols, AST_NODE *s_expr)
{
  AST_NODE *p;
  size_t nodeSize;

  // create a new scope node
  SCOPE_NODE *localScope = malloc(sizeof(SCOPE_NODE));
  localScope->symbols = symbols;

  // allocate space for the fixed size and the variable part (union)
  nodeSize = sizeof(AST_NODE) + sizeof(LET_AST_NODE);
  if ((p = malloc(nodeSize)) == NULL)
    yyerror("out of memory");

  // put scope and s_expr into the let node and return it
  p->type = LET_TYPE;
  p->data.let.scope = localScope;
  p->data.let.s_expr = s_expr;

  return p;
}

// add the new symbol to the list and return it
SYMBOL_AST_NODE* let_list(SYMBOL_AST_NODE *symbol, SYMBOL_AST_NODE *let_list)
{
  // insert the new symbol into the let_list
  SYMBOL_AST_NODE* current = let_list;
  int found = 0;

  // check for duplicates
  while (current)
  {
    if (!strcmp(symbol->name, current->name))
    {
      printf("ERROR: redeclaration of variable <%s> attempted\n", symbol->name);
      current->value = symbol->value;
      found = 1;
      symbol = let_list;
      break;
    }
    // keep looking through the list
    current = current->next;
  }

  // otherwise add it to the front of the list
  if (!found)
    symbol->next = let_list;

  // return the list with the new symbol
  return symbol;
}

// create a new symbol and return it
SYMBOL_AST_NODE *let_elem(char* type, char* symbol, AST_NODE *s_expr)
{
  SYMBOL_AST_NODE *p;
  size_t nodeSize;

  // allocate space the symbol
  nodeSize = sizeof(SYMBOL_AST_NODE);
  if ((p = malloc(nodeSize)) == NULL)
    yyerror("out of memory");

  if (type)
    p->type = resolveType(type);
  else
    p->type = INVALID;
  p->name = symbol;
  p->value = s_expr;
  return p;
}

// create a symbol node
AST_NODE *symbol(char* name)
{
  AST_NODE *p;
  size_t nodeSize;

  // allocate space for the fixed sie and the variable part (union)
  nodeSize = sizeof(AST_NODE) + sizeof(SYMBOL_AST_NODE);
  if ((p = malloc(nodeSize)) == NULL)
    yyerror("out of memory");

  p->type = SYM;
  p->data.symbol.name = name;
  return p;
}

// create a node for a number
AST_NODE *number(double value)
{
  AST_NODE *p;
  size_t nodeSize;

  // allocate space for the fixed sie and the variable part (union)
  nodeSize = sizeof(AST_NODE) + sizeof(NUMBER_AST_NODE);
  if ((p = malloc(nodeSize)) == NULL)
    yyerror("out of memory");

  p->type = NUM_TYPE;
  p->data.number.type = REAL;
  p->data.number.value = value;
  return p;
}

// create a node for a condition
AST_NODE *condition(AST_NODE* condition, AST_NODE* ifTrue, AST_NODE* ifFalse)
{
  AST_NODE *p;
  size_t nodeSize;

  // allocate space for the fixed sie and the variable part (union)
  nodeSize = sizeof(AST_NODE) + sizeof(COND_AST_NODE);
  if ((p = malloc(nodeSize)) == NULL)
    yyerror("out of memory");

  p->type = COND_TYPE;
  p->data.condition.condition = condition;
  p->data.condition.true_expr = ifTrue;
  p->data.condition.false_expr = ifFalse;

  return p;
}

// create a node for a function
AST_NODE *function(char *funcName, AST_NODE *op1, AST_NODE *op2)
{
  AST_NODE *p;
  size_t nodeSize;

  // allocate space for the fixed sie and the variable part (union)
  nodeSize = sizeof(AST_NODE) + sizeof(FUNCTION_AST_NODE);
  if ((p = malloc(nodeSize)) == NULL)
    yyerror("out of memory");

  p->type = FUNC_TYPE;
  p->data.function.name = funcName;
  p->data.function.op1 = op1;
  p->data.function.op2 = op2;

  return p;
}

// free a node
void freeNode(AST_NODE *p)
{
  if (!p)
    return;

  if (p->type == FUNC_TYPE)
  {
    free(p->data.function.name);
    freeNode(p->data.function.op1);
    freeNode(p->data.function.op2);
  }

  free (p);
}

NUMBER_AST_NODE* eval(AST_NODE *p)
{
  NUMBER_AST_NODE* result = malloc(sizeof(NUMBER_AST_NODE));
  NUMBER_AST_NODE* op1 = malloc(sizeof(NUMBER_AST_NODE));
  NUMBER_AST_NODE* op2 = malloc(sizeof(NUMBER_AST_NODE));

  if (!p)
    return result;

  else if (p->type == NUM_TYPE)
  {
    result = &p->data.number;
    result->value = result->type == INTEGER ? round(result->value) : result->value;
  }

  else if (p->type == FUNC_TYPE)
  {
    int funcType = resolveFunc(p->data.function.name);
    switch (funcType)
    {
      //unary instructions
      case NEG:
        result = eval(p->data.function.op1);
        result->value = -result->value;
        break;
      case ABS:
        result = eval(p->data.function.op1);
        result->value = fabs(result->value);
        break;

      //arithmetic operations
      case ADD:
        op1 = eval(p->data.function.op1);
        op2 = eval(p->data.function.op2);

        // set the new type and value
        result->type = ((op1->type==INTEGER) && (op2->type==INTEGER)) ? INTEGER : REAL;
        result->value = op1->value + op2->value;
        break;
      case SUB:
        op1 = eval(p->data.function.op1);
        op2 = eval(p->data.function.op2);
        if ((op1->type==INTEGER) && (op2->type==INTEGER)) {
          result->type = INTEGER;
          result->value = round(op1->value - op2->value);
        }
        else {
          result->type = REAL;
          result->value = op1->value - op2->value;
        }
        break;
      case MULT:
        op1 = eval(p->data.function.op1);
        op2 = eval(p->data.function.op2);

        // set the new type and value
        result->type = ((op1->type==INTEGER) && (op2->type==INTEGER)) ? INTEGER : REAL;
        result->value = op1->value * op2->value;
        break;
      case DIV:
        op1 = eval(p->data.function.op1);
        op2 = eval(p->data.function.op2);

        if ((op1->type==INTEGER) && (op2->type==INTEGER)) {
          result->type = INTEGER;
          result->value = round(op1->value / op2->value);
        }
        else {
          result->type = REAL;
          result->value = op1->value / op2->value;
        }
        break;

      // other binary operations
      case POW:
        op1 = eval(p->data.function.op1);
        op2 = eval(p->data.function.op2);

        result->type = ((op1->type==INTEGER) && (op2->type==INTEGER)) ? INTEGER : REAL;
        result->value = pow(op1->value, op2->value);
        result->value = result->type == INTEGER ? round(result->value) : result->value;
        break;
      case MAX:
        op1 = eval(p->data.function.op1);
        op2 = eval(p->data.function.op2);
        result->type = ((op1->type==INTEGER) && (op2->type==INTEGER)) ? INTEGER : REAL;
        result->value = fmax(op1->value, op2->value);
        break;
      case MIN:
        op1 = eval(p->data.function.op1);
        op2 = eval(p->data.function.op2);
        result->type = ((op1->type==INTEGER) && (op2->type==INTEGER)) ? INTEGER : REAL;
        result->value = fmin(op1->value, op2->value);
        break;
      case HYPOT:
        op1 = eval(p->data.function.op1);
        op2 = eval(p->data.function.op2);
        result->type = ((op1->type==INTEGER) && (op2->type==INTEGER)) ? INTEGER : REAL;
        result->value = hypot(op1->value, op1->value);
        break;
      case PRINT:
        result = eval(p->data.function.op1);
        if (result->type == INTEGER)
          printf("%d", (int)result->value);
        else printf("%.2lf", result->value);
        break;
      case EQUAL:
        op1 = eval(p->data.function.op1);
        op2 = eval(p->data.function.op2);
        result->type = INTEGER;
        result->value = op1->value == op2->value ? 1 : 0;
        break;
      case SMALLER:
        op1 = eval(p->data.function.op1);
        op2 = eval(p->data.function.op2);
        result->type = INTEGER;
        result->value = op1->value < op2->value ? 1 : 0;
        break;
      case LARGER:
        op1 = eval(p->data.function.op1);
        op2 = eval(p->data.function.op2);
        result->type = INTEGER;
        result->value = op1->value > op2->value ? 1 : 0;
        break;
      case GREATEREQUAL:
        op1 = eval(p->data.function.op1);
        op2 = eval(p->data.function.op2);
        result->type = INTEGER;
        result->value = op1->value >= op2->value ? 1 : 0;
        break;
      case LESSEQUAL:
        op1 = eval(p->data.function.op1);
        op2 = eval(p->data.function.op2);
        result->type = INTEGER;
        result->value = op1->value <= op2->value ? 1 : 0;
        break;
      case NOTEQUAL:
        op1 = eval(p->data.function.op1);
        op2 = eval(p->data.function.op2);
        result->type = INTEGER;
        result->value = op1->value != op2->value ? 1 : 0;
        break;
    }
  }
  // just evaluate the right side of each let
  else if (p->type == LET_TYPE)
  {
    //enter the new scope, evaluate, then leave
    enterScope(p->data.let.scope);
    result = eval(p->data.let.s_expr);
    leaveScope();
  }

  else if (p->type == SYM)
  {
    p->data.symbol = *getSymbol(p->data.symbol.name);
    // if there was no type declared
    // check with previously declared values
    if (p->data.symbol.type == INVALID)
      p->data.symbol.type = getType(p->data.symbol.name);
    // if theres still no type, error
    if (p->data.symbol.type == INVALID) {
      printf("ERROR: undeclared variable <%s> used\n", p->data.symbol.name);
      exit(1);
    }
    else {
      op1 = eval(p->data.symbol.value);
      if (p->data.symbol.type == INTEGER) {
        result->type = INTEGER;
        if (fmod(op1->value, 1) != 0)
          printf("WARNING: incompatible type assignment for variable <%s>\n", p->data.symbol.name);
        result->value = round(op1->value);
      }
      else {
        result->type = REAL;
        result->value = op1->value;
      }
    }
  }
  else if (p->type == COND_TYPE)
  {
    result = eval(p->data.condition.condition)->value ? eval(p->data.condition.true_expr) : eval(p->data.condition.false_expr);
  }

  return result;
}

LISP.h

#ifndef __lisp_h_
#define __lisp_h_

#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include <math.h>

#include "y.tab.h"

int yyparse(void);
int yylex(void);
void yyerror(char *);

typedef enum { INVALID=-1, INTEGER, REAL } SYMBOL_TYPE;
typedef enum { NUM_TYPE, FUNC_TYPE, LET_TYPE, SYM, COND_TYPE } AST_NODE_TYPE;
typedef enum { NEG = 0, ABS, HYPOT, ADD, SUB, MULT, DIV, POW, MAX, MIN, PRINT, EQUAL, SMALLER, LARGER, GREATEREQUAL, LESSEQUAL, NOTEQUAL } FUNC_NAMES;

typedef struct scope_node
{
   struct symbol_ast_node* symbols;
   struct scope_node* parent;
} SCOPE_NODE;

typedef struct
{
    SYMBOL_TYPE type;
    double value;
} NUMBER_AST_NODE;

typedef struct
{
    struct ast_node *condition;
    struct ast_node *true_expr;
    struct ast_node *false_expr;
} COND_AST_NODE;

typedef struct function_ast_node
{
   char *name;
   struct ast_node *op1;
   struct ast_node *op2;
} FUNCTION_AST_NODE;

typedef struct symbol_ast_node
{
   char* name;
   SYMBOL_TYPE type;
   struct ast_node* value;
   struct symbol_ast_node* next;
} SYMBOL_AST_NODE;

typedef struct let_ast_node
{
   SCOPE_NODE* scope;
   struct ast_node* s_expr;
} LET_AST_NODE;

typedef struct ast_node
{
   AST_NODE_TYPE type;
   union
   {
      NUMBER_AST_NODE number;
      FUNCTION_AST_NODE function;
      LET_AST_NODE let;
      SYMBOL_AST_NODE symbol;
      COND_AST_NODE condition;
   } data;
} AST_NODE;

// functions for creating ast_nodes
AST_NODE* number(double value);
AST_NODE* function(char *funcName, AST_NODE *op1, AST_NODE *op2);
AST_NODE* let(SYMBOL_AST_NODE *symbols, AST_NODE *s_expr);
SYMBOL_AST_NODE* let_list(SYMBOL_AST_NODE *symbol, SYMBOL_AST_NODE *let_list);
SYMBOL_AST_NODE* let_elem(char* type, char* symbol, AST_NODE* s_expr);
AST_NODE *condition(AST_NODE* condition, AST_NODE* ifTrue, AST_NODE* ifFalse);
AST_NODE* symbol(char *name);

// functions for doing stuff with the symbol table
double getSymbolValue(char* name);
void leaveScope();
void enterScope(SCOPE_NODE* newScope);

// functions for other stuff
NUMBER_AST_NODE* eval(AST_NODE *ast);
void freeNode(AST_NODE *p);

#endif

1 个答案:

答案 0 :(得分:0)

LPAREN FUNC s_expr s_expr RPAREN定义了一个只有两个参数的函数。您可以使用越来越多的参数来定义这样的参数,但这通常不是您通常所做的。

相反,你想让一个函数获取一个s表达式列表:

s_expr_list: s_expr | s_expr_list s_expr ;

然后你只有LPAREN FUNC s_expr_list RPAREN