Erlang中的递归下降解析器

时间:2016-03-25 03:44:12

标签: parsing recursion erlang

我试图在Erlang中构建一个RDP。到目前为止,我已经阅读并处理了一个令牌文件,我将把它传递给函数,如[2,6,3,7,3,2,4,6,3,2,4,4,99](样本)输入应该工作)我需要确保通过从默认规则[bexp0]转换为一些匹配的终端列表来导出每个字符(或一组)。

get_terminal_list() ->
    [1,2,3,4,5,6,7,8,9,10,11,12,99].

get_prod_list() ->
[bexp0,bexp,bexp1,bterm].

get_sym_list(Prod) ->
    case Prod of
        bexp0 -> [[bexp,99]];
        bexp  -> [[bterm,bexp1]];
        bexp1 -> [[5,bexp,bexp1],[6,bexp,bexp1]];
        bterm -> [[3,bexp,4],[2],[8],[9],[2,10,1],[2,12,1],[2,11,1],[7,bterm]]
    end.

get_sym_list显示正在使用的语法 - 其中每个int代表一个终端字符,每个子列表是一个集合,即bterm-> [[7,bterm]]意味着bterm可以变成终端' 7'其次是非终端' bterm'

现在我正在以某种方式实现这一点:

Check if first set of rule has some terminal
 if so, check which side, reduce list of tokens from same side until first occurrence of this token,  
     parse this new list (w/o token) with rest of the set of this rule (also without the matched terminal). 
       return {success|failure, list_of_tokens, list_of_rules}  
   if success -> check with new list_of_tokens, default_rule
   if failure, check with old list_of_tokens, and new list_of_rules.

我假设如果规则列表为空,将达到最终状态 - 因此我们已经耗尽了所有可能的生产,因此无效,或者 令牌列表为空,因此我们将每个令牌/令牌集匹配到某个规则

1 个答案:

答案 0 :(得分:2)

这可能会做你想要的:

-module(parse).
-export([parse1/0, parse1/1, parse2/0, parse2/1]).

parse1() ->
    parse([bexp], [2,6,3,7,3,2,4,6,3,2,4,4,99], fun get_sym_list1/1).

parse1(Input) ->
    parse([bexp], Input, fun get_sym_list1/1).


parse2() ->
    parse([bexp0], [2,6,3,7,3,2,4,6,3,2,4,4,99], fun get_sym_list2/1).

parse2(Input) ->
    parse([bexp0], Input, fun get_sym_list2/1).


parse([], [], _) ->
    true;
parse([], _, _) ->
    false;
parse([X | TX], [X | TY], Fun) ->
    io:format("+ Current:~w\tTokens:~w Input:~w~n", [X, TX, TY]),
    parse(TX, TY, Fun);
parse([X | TX], Input, Fun) ->
    io:format("  Current:~w\tTokens:~w Input:~w~n", [X, TX, Input]),
    case lists:member(X, get_terminal_list()) of
        true -> false;
        false -> lists:any(fun(T) -> parse(T ++ TX, Input, Fun) end, Fun(X))
    end.

get_terminal_list() ->
    [1,2,3,4,5,6,7,8,9,10,11,12,99].

get_sym_list1(Prod) ->
    case Prod of
        bexp    -> [[bexp1],[bterm],[bterm,bexp2]];
        bexp1   -> [[99]];
        bexp2   -> [[5,bterm,bexp2],[6,bterm,bexp2]];
        bterm   -> [[bfactor],[7,bterm]];
        bfactor -> [[3,bexp,4],[bconst],[2,10,1],[2,12,1],[2,11,1],[2]];
        bconst  -> [[8],[9]]
    end.

get_sym_list2(Prod) ->
    case Prod of
        bexp0 -> [[bterm,bexp1]];
        bexp  -> [[bterm,bexp1]];
        bexp1 -> [[5,u1],[6,bexp,bexp1],[99]];
        bterm -> [[u1,4],[2],[8],[9],[2,10,1],[2,12,1],[2,11,1],[7,bterm]];
        u1    -> [[3,bexp]]
    end.

然而,看起来语法或输入列表是不正确的,因为据我所知,旧语法和新语法都不会解析输入。它似乎工作正常,因为它将解析像这样的输入:

41> parse:parse2([2,6,8,5,3,bterm,5,3,9,99,99]).
  Current:bexp0 Tokens:[] Input:[2,6,8,5,3,bterm,5,3,9,99,99]
  Current:bterm Tokens:[bexp1] Input:[2,6,8,5,3,bterm,5,3,9,99,99]
  Current:u1    Tokens:[4,bexp1] Input:[2,6,8,5,3,bterm,5,3,9,99,99]
  Current:3     Tokens:[bexp,4,bexp1] Input:[2,6,8,5,3,bterm,5,3,9,99,99]
+ Current:2     Tokens:[bexp1] Input:[6,8,5,3,bterm,5,3,9,99,99]
  Current:bexp1 Tokens:[] Input:[6,8,5,3,bterm,5,3,9,99,99]
  Current:5     Tokens:[u1] Input:[6,8,5,3,bterm,5,3,9,99,99]
+ Current:6     Tokens:[bexp,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
  Current:bexp  Tokens:[bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
  Current:bterm Tokens:[bexp1,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
  Current:u1    Tokens:[4,bexp1,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
  Current:3     Tokens:[bexp,4,bexp1,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
  Current:2     Tokens:[bexp1,bexp1] Input:[8,5,3,bterm,5,3,9,99,99]
+ Current:8     Tokens:[bexp1,bexp1] Input:[5,3,bterm,5,3,9,99,99]
  Current:bexp1 Tokens:[bexp1] Input:[5,3,bterm,5,3,9,99,99]
+ Current:5     Tokens:[u1,bexp1] Input:[3,bterm,5,3,9,99,99]
  Current:u1    Tokens:[bexp1] Input:[3,bterm,5,3,9,99,99]
+ Current:3     Tokens:[bexp,bexp1] Input:[bterm,5,3,9,99,99]
  Current:bexp  Tokens:[bexp1] Input:[bterm,5,3,9,99,99]
+ Current:bterm Tokens:[bexp1,bexp1] Input:[5,3,9,99,99]
  Current:bexp1 Tokens:[bexp1] Input:[5,3,9,99,99]
+ Current:5     Tokens:[u1,bexp1] Input:[3,9,99,99]
  Current:u1    Tokens:[bexp1] Input:[3,9,99,99]
+ Current:3     Tokens:[bexp,bexp1] Input:[9,99,99]
  Current:bexp  Tokens:[bexp1] Input:[9,99,99]
  Current:bterm Tokens:[bexp1,bexp1] Input:[9,99,99]
  Current:u1    Tokens:[4,bexp1,bexp1] Input:[9,99,99]
  Current:3     Tokens:[bexp,4,bexp1,bexp1] Input:[9,99,99]
  Current:2     Tokens:[bexp1,bexp1] Input:[9,99,99]
  Current:8     Tokens:[bexp1,bexp1] Input:[9,99,99]
+ Current:9     Tokens:[bexp1,bexp1] Input:[99,99]
  Current:bexp1 Tokens:[bexp1] Input:[99,99]
  Current:5     Tokens:[u1,bexp1] Input:[99,99]
  Current:6     Tokens:[bexp,bexp1,bexp1] Input:[99,99]
+ Current:99    Tokens:[bexp1] Input:[99]
  Current:bexp1 Tokens:[] Input:[99]
  Current:5     Tokens:[u1] Input:[99]
  Current:6     Tokens:[bexp,bexp1] Input:[99]
+ Current:99    Tokens:[] Input:[]
true

BTW true表示已解析输入,而false表示尚未解析。

相关问题