时间:2010-11-11 03:25:58

标签: f# parser-combinators

我最近一直在使用FParsec,我发现缺少通用解析器对我来说是一个重要的停止点。我对这个小库的目标是简单性以及对通用输入的支持。你能想到任何可以改善这一点或者是什么特别糟糕的补充吗?

open LazyList 

type State<'a, 'b> (input:LazyList<'a>, data:'b) =
    member this.Input = input
    member this.Data = data

type Result<'a, 'b, 'c> =
| Success of 'c * State<'a, 'b>
| Failure of string * State<'a, 'b>

type Parser<'a,'b, 'c> = State<'a, 'b> -> Result<'a, 'b, 'c>

let (>>=) left right state =
    match left state with
    | Success (result, state) -> (right result) state
    | Failure (message, _) -> Result<'a, 'b, 'd>.Failure (message, state)

let (<|>) left right state =
    match left state with
    | Success (_, _) as result -> result
    | Failure (_, _) -> right state

let (|>>) parser transform state =
    match parser state with
    | Success (result, state) -> Success (transform result, state)
    | Failure (message, _) -> Failure (message, state)

let (<?>) parser errorMessage state =
    match parser state with
    | Success (_, _) as result -> result
    | Failure (_, _) -> Failure (errorMessage, state)                     

type ParseMonad() =
    member this.Bind (f, g) = f >>= g
    member this.Return x s = Success(x, s)
    member this.Zero () s = Failure("", s)                           
    member this.Delay (f:unit -> Parser<_,_,_>) = f()

let parse = ParseMonad()

回溯

令人惊讶的是,没有太多代码来实现您描述的内容。它有点草率但似乎运作得很好。

let (>>=) left right state =
    seq {
        for res in left state do
            match res with
            | Success(v, s) ->
                let v  = 
                    right v s 
                    |> List.tryFind (
                        fun res -> 
                            match res with 
                            | Success (_, _) -> true 
                            | _ -> false
                    ) 
                match v with
                | Some v -> yield v
                | None -> ()
    } |> Seq.toList

let (<|>) left right state = 
    left state @ right state

回溯第2部分

在代码周围切换以使用惰性列表和尾调用优化递归。

let (>>=) left right state =
    let rec readRight lst =
        match lst with
        | Cons (x, xs) ->
            match x with
            | Success (r, s) as q -> LazyList.ofList [q]                     
            | Failure (m, s) -> readRight xs
        | Nil -> LazyList.empty<Result<'a, 'b, 'd>>
    let rec readLeft lst =
        match lst with
        | Cons (x, xs) ->
            match x with
            | Success (r, s) -> 
                match readRight (right r s) with 
                | Cons (x, xs) ->
                    match x with
                    | Success (r, s) as q -> LazyList.ofList [q]                     
                    | Failure (m, s) -> readRight xs
                | Nil -> readLeft xs   
            | Failure (m, s) -> readLeft xs
        | Nil -> LazyList.empty<Result<'a, 'b, 'd>>
    readLeft (left state)

let (<|>) (left:Parser<'a, 'b, 'c>) (right:Parser<'a, 'b, 'c>) state = 
    LazyList.delayed (fun () -> left state) 
    |> LazyList.append 
    <| LazyList.delayed (fun () -> right state)

1 个答案:

答案 0 :(得分:2)

我认为你需要做的一个重要的设计决定是你是否想要支持你的解析器中的回溯(我不太记得解析理论,但这可能会指定你的语言类型)解析器可以处理)。

回溯。在您的实现中,解析器可能会失败(Failure情况)或只生成一个结果(Success情况)。另一种选择是生成零个或多个结果(例如,将结果表示为seq<'c>)。对不起,如果这是你已经考虑过的事情:-),但无论如何......

不同之处在于您的解析器始终遵循第一个可能的选项。例如,如果您编写如下内容:

let! s1 = (str "ab" <|> str "a")
let! s2 = str "bcd"

使用您的实现,输入“abcd”将失败。它将选择<|>运算符的第一个分支,然后处理前两个字符,序列中的下一个解析器将失败。基于序列的实现将能够回溯并跟踪<|>中的第二条路径并解析输入。

结合。我想到的另一个想法是,您还可以将Combine成员添加到解析器计算构建器中。这有点微妙(因为你需要理解计算表达式是如何被翻译的),但它有时是有用的。如果你添加:

member x.Combine(a, b) = a <|> b
member x.ReturnFrom(p) = p

然后你可以很好地编写递归解析器:

let rec many p acc = 
  parser { let! r = p                  // Parse 'p' at least once
           return! many p (r::acc)     // Try parsing 'p' multiple times
           return r::acc |> List.rev } // If fails, return the result