open Common open Format type refined_machine_state = BasicState of Tokeniser_states.machine_state | EOFDataState let prettyStateName = function BasicState s -> Tokeniser_states.prettyStateName s | EOFDataState -> "EOFDataState" type refined_action = BasicAction of Tokeniser_spec.tokeniser_action | SwitchMachineState of refined_machine_state let partialEvaluate p expr = let rec f e = match p e with None -> g e | Some false -> Some false | Some true -> Some true and g = function Tokeniser_spec.AND(a, b) -> (match f a, f b with Some true, Some true -> Some true | Some false, _ -> Some false | _, Some false -> Some false | _ -> None) | Tokeniser_spec.OR(a, b) -> (match f a, f b with Some false, Some false -> Some false | Some true, _ -> Some true | _, Some true -> Some true | _ -> None) | Tokeniser_spec.NOT a -> (match f a with Some false -> Some true | Some true -> Some false | None -> None) | _ -> None in f expr let printMachineStateDot ff algorithm = fprintf ff "digraph G {\n"; fprintf ff "graph [ overlap=false, size=\"30,20\" ];\n"; fprintf ff "node [ color=lightgrey, fontname=\"Dejavu Sans\" ];\n"; let edges = ref [] in let nodes = ref [] in List.iter (fun (machineState, consume, steps) -> let machineState = BasicState machineState in nodes := machineState :: !nodes; List.iter (fun (matcher, actions) -> let target = ref machineState in List.iter (function SwitchMachineState ms -> target := ms | _ -> ()) actions; edges := (machineState, !target, List.mem (BasicAction Tokeniser_spec.ParseError) actions, ( match partialEvaluate (function Tokeniser_spec.IsConsumedCharacterEOF -> Some false | _ -> None) matcher with Some false -> true | _ -> false )) :: !edges; ) steps; ) algorithm; List.iter (fun ms -> fprintf ff "%s [ color=black ];\n" ms ) (unique (List.map prettyStateName !nodes)); let namedEdges = unique (List.map (fun (ms1, ms2, err, eof) -> prettyStateName ms1, prettyStateName ms2, err, eof) !edges) in List.iter (function ms1, ms2, err, eof -> fprintf ff "%s -> %s%s;\n" ms1 ms2 (if eof then " [color=blue]" else if err then " [color=red]" else "") ) namedEdges; fprintf ff "}\n" let switchStates from into = let rec f = function (SwitchMachineState s)::acts when s = from -> (SwitchMachineState into)::(f acts) | act::acts -> act::(f acts) | [] -> [] in f let refineInit algorithm = List.map (fun (state, consume, steps) -> state, consume, List.map (fun (matcher, actions) -> matcher, List.map (function Tokeniser_spec.SwitchMachineState s -> SwitchMachineState (BasicState s) | a -> BasicAction a ) actions ) steps ) algorithm let separateEOFDataState algorithm = List.map (fun (state, consume, steps) -> state, consume, List.map (fun (matcher, actions) -> match partialEvaluate (function Tokeniser_spec.IsConsumedCharacterEOF -> Some false | _ -> None) matcher with Some false -> matcher, switchStates (BasicState Tokeniser_states.DataState) EOFDataState actions | _ -> matcher, actions ) steps ) algorithm let printInsertionModeDot ff algorithm modes string_of_insertion_mode = fprintf ff "digraph G {\n"; fprintf ff "graph [ dpi=72 ];\n"; fprintf ff "node [ color=lightgrey, fontname=\"sans-serif\" ];\n"; fprintf ff "edge [ labelfloat=false, fontsize=12.0, fontname=\"sans-serif\" ];\n"; List.iter (fun mode -> fprintf ff "%s [ color=black ];\n" (string_of_insertion_mode mode)) modes; List.iter (fun mode -> let name = string_of_insertion_mode mode in List.iter (fun (matcher, cmds) -> let label = Str.global_replace (Str.regexp "\\(................\\) ") "\\1\\n" (Treeconstructor_spec.string_of_token_match matcher) in let rec f n err = function | _ when n > 10 -> () | Treeconstructor_spec.SetInsertionMode target :: cs -> fprintf ff "%s -> %s [color=%s, label=\"%s\"];\n" name (string_of_insertion_mode target) (if err then "red" else "black") label; f n err cs | Treeconstructor_spec.ReprocessAsIf target :: cs -> fprintf ff "%s -> %s [color=%s, label=\"%s\"];\n" name (string_of_insertion_mode target) (if err then "red" else "blue") label; f n err cs | Treeconstructor_spec.ReprocessWithFosteringAsIf target :: cs -> fprintf ff "%s -> %s [color=%s, label=\"%s\"];\n" name (string_of_insertion_mode target) (if err then "red" else "blue") label; f n err cs | Treeconstructor_spec.ResetInsertionModeAppropriately :: cs-> fprintf ff "%s -> %s [color=%s, label=\"%s\"];\n" name "\"(appropriate)\"" (if err then "red" else "black") label; f n err cs | Treeconstructor_spec.If (_, a, b) :: cs -> f n err a; f n err b; f n err cs | Treeconstructor_spec.ParseError _ :: cs -> f n true cs; | Treeconstructor_spec.ActAsIfStartTag t :: cs -> f (n+1) err ( let (matcher', cmds') = List.find (function | (Treeconstructor_spec.StartTag ts, _) when List.mem t ts -> true | (Treeconstructor_spec.AnyStartTag, _) -> true | (Treeconstructor_spec.Anything, _) -> true | _ -> false) (algorithm mode) in cmds') | Treeconstructor_spec.ActAsIfEndTag t :: cs -> f (n+1) err ( let (matcher', cmds') = List.find (function | (Treeconstructor_spec.EndTag ts, _) when List.mem t ts -> true | (Treeconstructor_spec.AnyEndTag, _) -> true | (Treeconstructor_spec.Anything, _) -> true | _ -> false) (algorithm mode) in cmds') | _::cs -> f n err cs | [] -> () in f 0 false cmds ) (algorithm mode); ) modes; fprintf ff "}\n" let _ = let out = open_out "graph_states.dot" in printMachineStateDot (formatter_of_out_channel out) (separateEOFDataState (refineInit Tokeniser_spec.tokeniserAlgorithm)); close_out out; let out = open_out "graph_modes.dot" in printInsertionModeDot (formatter_of_out_channel out) Treeconstructor_spec.insertionModeAlgorithm Treeconstructor_spec.insertionModes Treeconstructor_spec.string_of_insertion_mode; close_out out;