构建预测解析器

时间:2014-09-07 02:41:50

标签: java parsing

我正在为Cocol / R的某些产品构建一个预测解析器

这是我正在使用的语法:

ScannerSpecification =
[ "CHARACTERS" { SetDecl } ]
[ "KEYWORDS" { KeywordDecl } ]
[ "TOKENS" { TokenDecl } ]
{ WhiteSpaceDecl }.
SetDecl = ident '=' Set '.'.
Set = BasicSet { ('+'|'-') BasicSet }.
BasicSet = string | ident | Char [ ".." Char ].
Char = char | "CHR" '(' number ')'.

花括号({})表示:0或更多 方括号([])表示:1或无

这是输入:

COMPILER Test

CHARACTERS
letter = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".
digit = "0123456789".
hexdigit = digit+"ABCDEF".

这是我现在写的代码:

    public void Cocol( ){
    if(in.hasNextLine()){
        currentLine = in.nextLine();
        numLines++;
        tokenizer = new StringTokenizer(currentLine, " ");
        if(tokenizer.hasMoreElements()){
            lookahead = tokenizer.nextToken();
            match("COMPILER",false);
            match("ident",false);
            ScannerSpecification();
            match("END",false);
            match("ident",false);
            match(".",false);

        }
        else{
            System.out.println("ERROR: Expected:"+"COMPILER"+"But Found: "+" ");
            System.out.println("Line :"+numLines);
        }

    }
    else{
        System.out.println("ERROR: Expected:"+"COMPILER"+"But Found: "+" ");
        System.out.println("Line :"+numLines);
    }

}

public void ScannerSpecification(){
    // 1 o mas veces “CHARACTERS” { SetDecl }
    if(lookahead.equals("CHARACTERS")){
        match("CHARACTERS",false);
        while(SetDecl(true)==true){
            SetDecl(false);

        }
    }
    if (lookahead.equals("KEYWORDS")){

    }

    //if( WhiteSpaceDecl()){

    //}
    match(".",false);


}

public boolean SetDecl(boolean backtrack){
    if (backtrack){
        if(match("ident",true)){
            return true;
        }
    }
    else{
        return match("ident",backtrack)&&match("=",backtrack)&&Set(backtrack)&&match(".",backtrack);
    }
    return false;

}

public boolean match(String terminal,boolean backtrack){

    AFN afn = new AFN(terminal);
    boolean result;
    if(terminal.equals("number")){
        result = automataNumber.simularAFN(lookahead, automataNumber.inicial, conjuntoSimbolos);

    }
    else if(terminal.equals("ident")){
        result = automataident.simularAFN(lookahead,automataident.inicial,conjuntoSimbolos);

    }
    else if(terminal.equals("string")){
       result =  automataString.simularAFN(lookahead,automataString.inicial,conjuntoSimbolos);

    }   
    else if(terminal.equals("char")){
        result = automataChar.simularAFN(lookahead,automataChar.inicial,conjuntoSimbolos);

    }
    else{
        if(this.lookahead.equals(terminal)){
            result= true;
        }
        else{

            result= false;
        }
    }
    //Verificando si se esta haciendo backtrack o es un match "real"
    if(backtrack==false&&result==true){
        if(tokenizer.hasMoreElements()){
            lookahead = tokenizer.nextToken();
        }
        else if(in.hasNextLine()){
            do{
                currentLine =in.nextLine();
                numLines++;
            } while (currentLine.equals(""));
            tokenizer = new StringTokenizer(currentLine, " ");
            lookahead = tokenizer.nextToken();
        }   
    }
    else if(backtrack==false&&result==false){
        System.out.println("ERROR: Expected:"+terminal+"But Found: "+lookahead);
        //Encontrando linea
        try {
            ArrayList<Integer> line = findLineOf(lookahead);
            System.out.println("Near Line: "+line);
        } catch (Exception ex) {
            Logger.getLogger(ScannerCocol.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
    return result;

我面临的问题是我在SetDecl Production上。因为我忽略空格并获得输入直到找到新的空格,所以我在解析ident'字母'后得到的令牌是:

“abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ”。

用'。'包括在内。正如所料,我得到一个错误,因为我应该直到最后才读 字符串的'''。我真的不知道如何在终端之间没有空格的情况下让解析器识别何时应该停止读取。如果删除id和'之间的空格,我会遇到类似的问题='符号。

任何人都可以帮我解决这个问题吗?

由于

0 个答案:

没有答案