试图了解如何在OCaml中使用哈希表实现Tries

时间:2017-05-03 01:08:50

标签: ocaml hashtable trie

我试图了解如何在Ocaml中使用哈希表实现Tries。它是来自MOOC" OCaml中功能编程简介"的练习W06 04。在https://www.fun-mooc.fr/

如果有人能帮助我理解如何使用命令式哈希表来实现递归Tries,我真的很感激。

课程已经结束,我只想了解。

这就是我正在尝试的(模块GenericTrie和模块Trie的模板,我们必须实例化Hashtbl.Make仿函数并实现空,查找和插入):

module type GenericTrie = sig
  type 'a char_table
  type 'a trie = Trie of 'a option * 'a trie char_table
  val empty : unit -> 'a trie
  val insert : 'a trie -> string -> 'a -> 'a trie
  val lookup : 'a trie -> string -> 'a option
end

module CharHashedType = struct
  type t = char
  let equal a b = a = b
  let hash a = Hashtbl.hash a
end

module CharHashtbl  = Hashtbl.Make(CharHashedType)

module Trie : GenericTrie with type 'a char_table = 'a CharHashtbl.t =
  struct
    type 'a char_table = 'a CharHashtbl.t
    type 'a trie = Trie of 'a option * 'a trie char_table

    let empty () =
      Trie (None, CharHashtbl.create 10)
    ;;

    let lookup trie w =
      let len = String.length w in
      let rec lookup' trie' idx =
        if idx >= len then let Trie (x, _) = trie' in x
        else
          let Trie (_, table) = trie' in
          if CharHashtbl.mem table w.[idx] then
            lookup' (CharHashtbl.find table w.[idx]) (idx + 1)
          else raise Not_found
      in
      lookup' trie 0
    ;;

    let insert trie w v =

    ;;
  end

1 个答案:

答案 0 :(得分:3)

根据我给出的答案here,唯一的区别是没有Map而是Hashtbl

所以,根据我的理解,你的trie与string'a option相关联,这意味着你可以存储结果字符串或布尔值或其他任何内容,但我们并不关心它。

首先,我会写

type 'a trie = {value : 'a option;
                children : 'a trie char_table}

因为我觉得使用构造函数很奇怪,如果你只需要一个构造函数,并且该记录将有助于未来:

module Trie : GenericTrie with type 'a char_table = 'a CharHashtbl.t = struct
  type 'a char_table = 'a CharHashtbl.t
  type 'a trie = {value : 'a option;
                  children : 'a trie char_table}

  let empty () =
    {value = None; children = CharHashtbl.create 10}
  ;;


  let lookup trie w =
    let len = String.length w in
    let rec lookup' {value; children} idx =
      if idx >= len then value
      else
        let char = w.[idx] in
        let child = CharHashtbl.find children char in
        lookup' child (idx + 1)
    in
    lookup' trie 0
  ;;

我简化lookup(请注意,撰写if Module.mem ... then Module.find ... else raise Not_found严格等同于Module.find ...

那么,insert呢?算法非常简单:

  • 如果我到达了我的字符串的最后一个字母,我将其与值
  • 相关联
  • 如果没有,或者有一个孩子与当前的信件相关联,我递归地通过这个分支,或者没有,我需要创建这个分支。

在OCaml中,它给出了:

  let insert trie w v =
    let len = String.length w in
    let rec aux trie idx =
      if idx >= len then {trie with value = Some v}
      else
        let char = w.[idx] in
        let child = try CharHashtbl.find trie.children char
          with Not_found -> empty () in
        let child' = aux child (idx + 1) in
        CharHashtbl.add trie.children char child';
        trie
    in aux trie 0
  ;;

作为旁注,我想指出一个事实,即混合持久性和可变数据类型真的很奇怪。在这种情况下,我的偏好是:

module type GenericTrie = sig
  type 'a char_table
  type 'a trie = {mutable value : 'a option;
                  children : 'a trie char_table}
  val empty : unit -> 'a trie
  val insert : 'a trie -> string -> 'a -> unit
  val lookup : 'a trie -> string -> 'a option
end;;

module CharHashedType = struct
  type t = char
  let equal a b = a = b
  let hash a = Hashtbl.hash a
end;;

module CharHashtbl  = Hashtbl.Make(CharHashedType)

module Trie : GenericTrie with type 'a char_table = 'a CharHashtbl.t = struct
  type 'a char_table = 'a CharHashtbl.t
  type 'a trie = {mutable value : 'a option;
                  children : 'a trie char_table}

  let empty () =
    {value = None; children = CharHashtbl.create 10}
  ;;

  let lookup trie w =
    let len = String.length w in
    let rec lookup' {value; children} idx =
      if idx >= len then value
      else
        let char = w.[idx] in
        let child = CharHashtbl.find children char in
        lookup' child (idx + 1)
    in
    lookup' trie 0
  ;;

  let insert trie w v =
    let len = String.length w in
    let rec aux trie idx =
      if idx >= len then trie.value <- Some v
      else
        let char = w.[idx] in
        let child = try CharHashtbl.find trie.children char
          with Not_found ->
            let child = empty () in
            CharHashtbl.add trie.children char child;
            child
        in
        aux child (idx + 1)
    in aux trie 0
  ;;

在这两种情况下,让我们用这个函数打印它:

let (t : string Trie.trie) = Trie.empty ();;

let rec pp fmt trie =
  let open Trie in
  let {value; children} = trie in
  Format.fprintf fmt "@[<v 1>";
  (match value with
    | None -> Format.fprintf fmt "None"
    | Some s -> Format.fprintf fmt "Some %s" s
  );
  CharHashtbl.iter (fun char trie ->
    Format.fprintf fmt "@ %c@ %a" char pp trie
  ) children;
  Format.fprintf fmt "@]"

let () = 
  Trie.insert t "foo" "foo";
  Trie.insert t "fol" "fol";
  Trie.insert t "far" "far";
  Format.printf "%a@." pp t

我有这个输出:

None
 f
 None
  o
  None
   o
   Some foo
   l
   Some fol
  a
  None
   r
   Some far