open Common open Tokeniser_spec open Tokeniser_states module Code = Lang_base (* The tokeniser algorithm gets progressively refined into more useful versions. The refined_* types are an intermediate stage, before code generation. *) type refined_matcher = | AND' of refined_matcher list | OR' of refined_matcher list | NOT' of refined_matcher | BasicMatcher of tokeniser_matcher | True | False type refined_action = | BasicAction of tokeniser_action | Comment of string | ParseErrorWithMessage of string | Annotate of string type refined_step = | BasicStep of refined_matcher * refined_action list | CompoundStep of refined_matcher * refined_step list | SwitchStep of (int list * refined_step) list (* (matched chars or [] if default, actions); ... *) let rec resultantMachineState prev = function (* TODO: don't duplicate this from elsewhere *) | BasicAction (SwitchMachineState ms) :: acts -> resultantMachineState ms acts | _::acts -> resultantMachineState prev acts | [] -> prev let partialEvaluate p expr = let rec f e = match p e with None -> g e | Some false -> Some false | Some true -> Some true and g = function | AND' xs -> let fxs = List.map f xs in (if List.exists (fun a -> match a with Some false -> true | _ -> false) fxs then Some false else if List.for_all (fun a -> match a with Some true -> true | _ -> false) fxs then Some true else None) | OR' xs -> let fxs = List.map f xs in (if List.exists (fun a -> match a with Some true -> true | _ -> false) fxs then Some true else if List.for_all (fun a -> match a with Some false -> true | _ -> false) fxs then Some false else None) | NOT' x -> (match f x with | Some b -> Some (not b) | None -> None) | _ -> None in f expr (* Flatten nested ANDs/ORs (e.g. "a && (b && c)" -> "a && b && c") *) let rec flattenExpr = let rec flattenOR = function | (OR' x) :: xs -> flattenOR x @ flattenOR xs | x::xs -> flattenExpr x :: flattenOR xs | [] -> [] in let rec flattenAND = function | (AND' x) :: xs -> flattenAND x @ flattenAND xs | x::xs -> flattenExpr x :: flattenAND xs | [] -> [] in function | OR' xs -> OR' (flattenOR xs) | AND' xs -> AND' (flattenAND xs) | x -> x (* Remove "false || ...", "true && ...", etc *) let rec removeConstants = function | OR' xs -> let xs' = List.filter (function False -> false | _ -> true) (List.map removeConstants xs) in if List.exists (function True -> true | _ -> false) xs' then True else if List.length xs' = 0 then False else OR' xs' | AND' xs -> let xs' = List.filter (function True -> false | _ -> true) (List.map removeConstants xs) in if List.exists (function False -> true | _ -> false) xs' then False else if List.length xs' = 0 then True else AND' xs' | NOT' True -> False | NOT' False -> True | NOT' x -> let x' = removeConstants x in if x = x' then NOT' x else removeConstants (NOT' x) | x -> x let rec prepareForMatcher = function (* Precompute the values of some functions, to avoid calling them multiple times - this returns a list of steps that must be executed (duplicates will be removed later) *) | AND' ts -> List.concat (List.map prepareForMatcher ts) | OR' ts -> List.concat (List.map prepareForMatcher ts) | NOT' a -> prepareForMatcher a | BasicMatcher IsEndOfCData -> [Code.InitVar ("bool", "isEndOfCData_", Code.Call (Code.Method "isEndOfCData", []))] (* TODO: this is only really true if there are no state changes, which is incorrect if we start doing more complex code transformations *) | _ -> [] let stmt_of_basic_action = function | SwitchMachineState ms -> Code.Assign (Code.Var "machineState", Code.Const (string_of_machine_state ms)) | SetEscapeFlag ef -> Code.Assign (Code.Var "escapeFlag", Code.Bool ef) | UnconsumeCharacter -> Code.CallStmt (Code.Method "reconsumeCharacter", [Code.Var "currentCharacter"]) (* TODO: verify that the current character is never used until the next consume, else it'll be duplicated *) | EmitConsumedCharacter -> Code.CallStmt (Code.Method "emitCharacterToken", [Code.Var "currentCharacter"]) | EmitCharacter c -> Code.CallStmt (Code.Method "emitCharacterToken", [Code.Int c]) | EmitEOFToken -> Code.StmtList [ Code.CallStmt (Code.Method "emitEOFToken", []); Code.Return None ] | ConsumeCharacter -> Code.Assign (Code.Var "currentCharacter", Code.Call (Code.Method "consumeCharacter", [])) | ConsumeAndEmitCharRef c -> Code.CallStmt (Code.Method "consumeAndEmitCharRef", [Code.Int (match c with Some n -> n | _ -> 0)]) | ConsumeAndAppendCharRefToAttributeValue c -> Code.CallStmt (Code.Method "consumeAndAppendCharRef", [Code.Int (match c with Some n -> n | _ -> 0)]) | CreateStartTagToken -> Code.CallStmt (Code.Method "createStartTagToken", []) | CreateEndTagToken -> Code.CallStmt (Code.Method "createEndTagToken", []) | CreateTagTokenAttribute -> Code.CallStmt (Code.Method "createTagTokenAttribute", []) | CreateCommentToken -> Code.CallStmt (Code.Method "createCommentToken", []) | CreateDoctypeToken -> Code.CallStmt (Code.Method "createDoctypeToken", []) | EmitCurrentTagToken -> Code.CallStmt (Code.Method "emitCurrentTagToken", []) | EmitCurrentCommentToken -> Code.CallStmt (Code.Method "emitCurrentCommentToken", []) | EmitCurrentDoctypeToken -> Code.CallStmt (Code.Method "emitCurrentDoctypeToken", []) | HandleDuplicateAttributes -> Code.CallStmt (Code.Method "handleDuplicateAttributes", []) | AppendToTagTokenName -> Code.CallStmt (Code.Method "appendToTagTokenName", [Code.Var "currentCharacter"]) | AppendToTagTokenNameLowercase -> Code.CallStmt (Code.Method "appendToTagTokenName", [Code.Add [Code.Var "currentCharacter"; Code.Int 0x0020]]) | AppendToTagTokenAttributeName -> Code.CallStmt (Code.Method "appendToTagTokenAttributeName", [Code.Var "currentCharacter"]) | AppendToTagTokenAttributeNameLowercase -> Code.CallStmt (Code.Method "appendToTagTokenAttributeName", [Code.Add [Code.Var "currentCharacter"; Code.Int 0x0020]]) | AppendToTagTokenAttributeValue -> Code.CallStmt (Code.Method "appendToTagTokenAttributeValue", [Code.Var "currentCharacter"]) | AppendAmpersandToTagTokenAttributeValue -> Code.CallStmt (Code.Method "appendToTagTokenAttributeValue", [Code.Int 0x0026]) | SetTagTokenSelfClosingFlag -> Code.CallStmt (Code.Method "setTagTokenSelfClosingFlag", []) | AppendToCommentToken -> Code.CallStmt (Code.Method "appendToCommentToken", [Code.Var "currentCharacter"]) | AppendHyphenToCommentToken -> Code.CallStmt (Code.Method "appendToCommentToken", [Code.Int 0x002D]) | AppendToDoctypeTokenName -> Code.CallStmt (Code.Method "appendToDoctypeTokenName", [Code.Var "currentCharacter"]) | AppendToDoctypeTokenNameLowercase -> Code.CallStmt (Code.Method "appendToDoctypeTokenNameLowercase", [Code.Var "currentCharacter"]) | AppendToDoctypeTokenPubId -> Code.CallStmt (Code.Method "appendToDoctypeTokenPubId", [Code.Var "currentCharacter"]) | AppendToDoctypeTokenSysId -> Code.CallStmt (Code.Method "appendToDoctypeTokenSysId", [Code.Var "currentCharacter"]) | SetDoctypeTokenIncorrect -> Code.CallStmt (Code.Method "setDoctypeTokenIncorrect", []) | SetDoctypeTokenPubIdEmpty -> Code.CallStmt (Code.Method "setDoctypeTokenPubIdEmpty", []) | SetDoctypeTokenSysIdEmpty -> Code.CallStmt (Code.Method "setDoctypeTokenSysIdEmpty", []) | ParseError -> Code.CallStmt (Code.Method "parseError", []) | ParseErrorIfEndTagWithAttributes -> Code.CallStmt (Code.Method "parseErrorIfEndTagWithAttributes", []) | ParseErrorIfEndTagWithSelfClosing -> Code.CallStmt (Code.Method "parseErrorIfEndTagWithSelfClosing", []) let rec stmt_of_action = function | BasicAction a -> stmt_of_basic_action a | Comment msg -> Code.Comment msg | ParseErrorWithMessage msg -> Code.CallStmt (Code.Method "parseError", [Code.Str msg]) | Annotate msg -> Code.CallStmt (Code.Method "annotate", [Code.Str msg]) let rec convertMatcher = function | AND (AND _ as a, x) -> (match convertMatcher a with AND' xs -> AND' (xs @ [convertMatcher x]) | y -> AND' [y; convertMatcher x]) | OR (OR _ as a, x) -> (match convertMatcher a with OR' xs -> OR' (xs @ [convertMatcher x]) | y -> OR' [y; convertMatcher x]) | AND (x, y) -> AND' [ convertMatcher x; convertMatcher y ] | OR (x, y) -> OR' [ convertMatcher x; convertMatcher y ] | NOT x -> NOT' (convertMatcher x) | NotYetHandled -> True | x -> BasicMatcher x (* Do the initial conversion from the spec algorithm into the refined format *) let refineInit algorithm = List.map (fun (ms, consume, steps) -> ms, consume, [], List.map (fun (matcher, actions) -> BasicStep (convertMatcher matcher, List.map (fun a -> BasicAction a) actions) ) steps ) algorithm (* Change parse errors to say e.g. "Unexpected '<' in WhateverState" *) let refineParseErrorMessages algorithm = let requiresCharacterEOF m = ( match partialEvaluate (function BasicMatcher IsConsumedCharacterEOF -> Some false | _ -> None) m with | Some false -> true | _ -> false ) in let requiresCharacter m c = ( match partialEvaluate (function BasicMatcher (IsConsumedCharacter c') when c = c' -> Some false | _ -> None) m with | Some false -> true | _ -> false ) in List.map (fun (ms, consume, preps, steps) -> ms, consume, preps, List.map (function BasicStep (matcher, actions) -> let prefix = if requiresCharacter matcher 0x003E then "Character '>'" else if requiresCharacter matcher 0x003F then "Character '?'" else if requiresCharacter matcher 0x002D then "Character '-'" else if requiresCharacter matcher 0x002F then "Character '/'" else if requiresCharacterEOF matcher then "EOF" else "Unexpected character" in BasicStep (matcher, List.map (function | BasicAction ParseError -> ParseErrorWithMessage (prefix ^ " in " ^ string_of_machine_state ms) | a -> a ) actions) ) steps ) algorithm (* Add in any necessary code to prepare before the matcher expressions (usually for caching expensive values) *) let refinePreparations algorithm = List.map (fun (ms, consume, preps, steps) -> ms, consume, unique (preps @ List.concat (List.map (function BasicStep (m, a) -> prepareForMatcher m) steps)), steps ) algorithm (* Convert steps like "if (a && b && c) ... else if (a && !(b && c)) ..." into "if (a) { if (b && c) ... else ... }" *) let refineCompound algorithm = let rec convertSteps = function | BasicStep (AND' (a0::b0s), s0) :: BasicStep (AND' (a1::b1::[]), s1) :: ss when a0 = a1 && b1 = NOT' (AND' b0s) -> CompoundStep (a0, [ BasicStep (AND' b0s, s0); BasicStep (b1, s1) ]) :: convertSteps ss | BasicStep (AND' (a0::b0::[]), s0) :: BasicStep (AND' (a1::b1::[]), s1) :: ss when a0 = a1 && b1 = NOT' b0 -> CompoundStep (a0, [ BasicStep (b0, s0); BasicStep (b1, s1) ]) :: convertSteps ss | BasicStep (m, a) :: ss -> BasicStep (m, a) :: convertSteps ss | [] -> [] in List.map (fun (ms, consume, preps, steps) -> ms, consume, preps, convertSteps steps ) algorithm (* Convert steps like "if (c == 'a') ... else if (c == 'b' || c == 'c') ... else ..." into "switch (c) { case 'a': ...; case 'b': case 'c': ...; default: ... }" *) (* TODO: This doesn't seem to be particularly worthwhile in practice *) let refineCharacterSwitch algorithm = let rec isCharacterMatcher = function | BasicMatcher (IsConsumedCharacter c) -> true | BasicMatcher IsConsumedCharacterEOF -> true | OR' xs when List.for_all isCharacterMatcher xs -> true | True -> true | _ -> false in let rec extractCharacters = function | BasicMatcher (IsConsumedCharacter c) -> [c] | BasicMatcher IsConsumedCharacterEOF -> [0] | OR' xs when List.for_all isCharacterMatcher xs -> List.concat (List.map extractCharacters xs) | True -> [] | _ -> failwith "Broken" in let convertSteps steps = if not (List.for_all (function BasicStep (m, a) -> isCharacterMatcher m | CompoundStep (m, ss) -> isCharacterMatcher m) steps) then steps else [ SwitchStep (List.map (function | BasicStep (m, a) -> extractCharacters m, BasicStep (True, a) | CompoundStep (m, ss) -> extractCharacters m, CompoundStep (True, ss) ) steps) ] in List.map (fun (ms, consume, preps, steps) -> ms, consume, preps, convertSteps steps ) algorithm let refineToCode_BasicMatcher = function | IsContentModel cm -> Code.Eq(Code.Var "contentModel", Code.Const (string_of_content_model cm)) | IsConsumedCharacter c -> Code.Eq(Code.Var "currentCharacter", Code.Int c) | IsConsumedCharacterEOF -> Code.Eq(Code.Var "currentCharacter", Code.Var "Char_EOF") | IsConsumedCharacterInRange (c0, c1) -> Code.And [Code.Gte(Code.Var "currentCharacter", Code.Int c0); Code.Lte(Code.Var "currentCharacter", Code.Int c1)] | IsChar1 c -> Code.Eq(Code.Call(Code.Method "getOldCharacter", [Code.Int 1]), Code.Int c) | IsChar2 c -> Code.Eq(Code.Call(Code.Method "getOldCharacter", [Code.Int 2]), Code.Int c) | IsChar3 c -> Code.Eq(Code.Call(Code.Method "getOldCharacter", [Code.Int 3]), Code.Int c) | IsEndOfCData -> Code.LocalVar "isEndOfCData_" | IsStartOfComment -> Code.Call(Code.Method "isFollowedBy", [Code.Str "--"]) | IsDoctype -> Code.Call(Code.Method "isFollowedBy", [Code.Str "doctype"]) | IsPublic -> Code.Call(Code.Method "isFollowedBy", [Code.Str "public"]) | IsSystem -> Code.Call(Code.Method "isFollowedBy", [Code.Str "system"]) | IsEscapeFlag -> Code.Var "escapeFlag" | NotYetHandled -> Code.Bool true | _ -> failwith "Invalid matcher expression" let rec refineToCodeExpr = function | AND' es -> Code.And (List.map refineToCodeExpr es) | OR' es -> Code.Or (List.map refineToCodeExpr es) | NOT' e -> Code.Not (refineToCodeExpr e) | True -> Code.Bool true | False -> Code.Bool false | BasicMatcher m -> refineToCode_BasicMatcher m let rec refineToCodeSteps steps = let rec removeRedundancy = function (* (TODO: do this at an earlier stage) *) | Some (Code.If (c0, s0, Some (Code.If (c1, s1, None)))) when c1 = Code.Not c0 -> Some (Code.If (c0, s0, Some (Code.Block s1))) | Some(Code.If (c, s, e)) -> Some (Code.If (c, s, removeRedundancy e)) | e -> e in Code.Optional ( removeRedundancy ( List.fold_right ( fun step els -> Some (refineToCodeStep els step) ) steps None ) ) and refineToCodeStep els = let rec f = function | BasicStep (m, actions) -> List.map (function a -> stmt_of_action a) actions | CompoundStep (m, substeps) -> [ refineToCodeSteps substeps ] | SwitchStep cases -> failwith "Broken" in function | BasicStep (matcher, actions) as s -> Code.If (refineToCodeExpr matcher, f s, els) | CompoundStep (matcher, substeps) as s -> Code.If (refineToCodeExpr matcher, f s, els) | SwitchStep cases -> Code.Switch (Code.Var "currentCharacter", List.map (fun (cs, step) -> List.map (fun c -> Code.Int c) cs, f step @ [ Code.Break ]) cases) let refineToCode algorithm = List.map (fun (ms, consume, preps, steps) -> ms, consume, preps, refineToCodeSteps steps ) algorithm (* Convert unscoped global variable name (like "x") into object-scoped name ("this.x") *) let moveGlobalsIntoObject code = let rec expr = function | Code.And es -> Code.And (List.map expr es) | Code.Or es -> Code.Or (List.map expr es) | Code.Not e -> Code.Not (expr e) | Code.Add es -> Code.Add (List.map expr es) | Code.Call (n, es) -> Code.Call (expr n, List.map expr es) | Code.Eq (e0, e1) -> Code.Eq (expr e0, expr e1) | Code.Lte (e0, e1) -> Code.Lte (expr e0, expr e1) | Code.Gte (e0, e1) -> Code.Gte (expr e0, expr e1) | Code.Var s -> Code.Var ("this." ^ s) | Code.Method s -> Code.Method ("this." ^ s) | e -> e and stmt = function | Code.Assign (n, e) -> Code.Assign (expr n, expr e) | Code.Block s -> Code.Block (List.map stmt s) | Code.CallStmt (n, es) -> Code.CallStmt (expr n, List.map expr es) | Code.If (cond, body, els) -> Code.If (expr cond, List.map stmt body, match els with Some s -> Some (stmt s) | None -> None) | Code.InitVar (t, n, e) -> Code.InitVar (t, n, expr e) | Code.Optional (Some s) -> Code.Optional (Some (stmt s)) | Code.StmtList ss -> Code.StmtList (List.map stmt ss) | Code.Switch (e, cases) -> Code.Switch (expr e, List.map (fun (es, b) -> List.map expr es, List.map stmt b) cases) | Code.While (c, b) -> Code.While (c, List.map stmt b) | s -> s in stmt code (* Add annotation calls to each step, for profiling purposes *) let refineStepAnnotation algorithm = List.map (fun (ms, consume, preps, steps) -> ms, consume, preps, List.map (function BasicStep (matcher, actions) -> BasicStep (matcher, Annotate (string_of_machine_state ms ^ ": " ^ Lang_cpp.printExpr (refineToCodeExpr matcher)) :: actions) ) steps ) algorithm (* When there is a "stay in the same state" case, assume that it's likely to repeat that step for quite a while, and so optimise it by doing an inner loop over the characters in that range *) (* TODO: this doesn't actually seem particularly useful at the moment, so it's unused and doesn't really do anything now *) let refineInnerLoops algorithm = List.map (fun (ms, consume, preps, steps) -> let rec f conditions = function BasicStep (matcher, actions) as step :: steps -> (if resultantMachineState ms actions = ms then BasicStep (matcher, actions @ [Comment ("loop until " ^ Lang_cpp.printExpr (refineToCodeExpr (removeConstants (flattenExpr (OR' [conditions; NOT' matcher])))))]) else step) :: f (OR'[conditions; matcher]) steps | [] -> [] in ms, consume, preps, (if consume then f False steps else steps) ) algorithm let codeMainLoop_Switch algorithm = Code.While (Code.Bool true, [ Code.Switch (Code.Var "machineState", List.map (fun (ms, consume, preps, steps) -> [ Code.Const (string_of_machine_state ms) ], (if consume then [stmt_of_basic_action ConsumeCharacter] else []) @ preps @ [ steps; Code.Break ] ) algorithm )]) let codeMainLoop_Table algorithm = Code.StmtList ( List.map (fun (ms, consume, preps, steps) -> Code.Assign ( Code.Var (string_of_machine_state ms), Code.AnonFunction ([], (if consume then [stmt_of_basic_action ConsumeCharacter] else []) @ preps @ [ steps; Code.Return None ] ) ) ) algorithm ) let printCPP algorithm = let entities = List.sort (fun (an, av) (bn, bv) -> compare an bn) Entities.charRefTable in Lang_cpp.printStmt (Code.StmtList [ Code.InitVar ("static const char*", "entityNames[]", Code.Array (List.map (fun (n,v) -> Code.Str n) entities)); Code.Newline; Code.InitVar ("static int", "entityValues[]", Code.Array (List.map (fun (n,v) -> Code.HexInt v) entities)); Code.Newline; Code.RawStmt "class Tokeniser"; Code.RawStmt "{"; (* (this is ugly but it makes the C++ class layout work correctly) *) Code.RawStmt "public:"; Code.Newline; Code.Enum ("MachineState", List.map string_of_machine_state enumerate_machine_state); Code.Enum ("ContentModel", List.map string_of_content_model enumerate_content_model); Code.RawStmt "private:"; Code.Function ("void", "run", [], [codeMainLoop_Switch algorithm]); Code.Newline; Code.Function ("Char", "lookupCharacter", ["int", "n"], [ Code.Switch (Code.Var "n", List.map (fun (a, b) -> [Code.HexInt a], [Code.Return (Some (Code.HexInt b))]) Entities.charRefMappingTable); Code.Return (Some (Code.Int 0)); ]); ]) let entityRegexps = (* Regexp-based entity matching requires best (longest) match first *) let f es = String.concat "|" (List.stable_sort (fun a b -> compare (String.length b) (String.length a)) es) in let entities = List.sort (fun (an, av) (bn, bv) -> compare an bn) Entities.charRefTable in "^(" ^ f (List.map (fun (n,v) -> n) entities) ^ ")", (* In attributes, accept non-semicolon names as long as they're not followed by an alphanumeric *) "^(?:" ^ f (List.filter (fun n -> n.[String.length n - 1] <> ';') (List.map (fun (n,v) -> n) entities)) ^ ")(?:[^0-9A-Za-z]|$)", (* and a couple of extra tables *) Code.Hash (List.map (fun (n, v) -> Code.Str n, Code.HexInt v) entities), Code.Hash (List.map (fun (a, b) -> Code.HexInt a, Code.HexInt b) Entities.charRefMappingTable) let printJS algorithm = let a, b, c, d = entityRegexps in Lang_js.printStmt (Code.StmtList [ Code.Assign (Code.Var "Tokeniser.prototype.entityNameMatch", Code.Regexp a); Code.Newline; Code.Assign (Code.Var "Tokeniser.prototype.entityNameMatchAttr", Code.Regexp b); Code.Newline; Code.Assign (Code.Var "Tokeniser.prototype.entityNameValues", c); Code.Newline; Code.Assign (Code.Var "Tokeniser.prototype.entityMap", d); Code.Newline; Code.Enum ("MachineState", List.map string_of_machine_state enumerate_machine_state); Code.Newline; Code.Enum ("ContentModel", List.map string_of_content_model enumerate_content_model); Code.Newline; Code.Assign (Code.Var "Tokeniser.prototype.run", Code.AnonFunction ([], [moveGlobalsIntoObject (codeMainLoop_Switch algorithm)])) ]) let printPerl algorithm = let a, b, c, d = entityRegexps in Lang_perl.printStmt (Code.StmtList ([ Code.RawStmt "package TokeniserImpl;"; Code.RawStmt "use strict;"; Code.RawStmt "use warnings;"; Code.Newline; Code.RawStmt "my ($machineState, $contentModel, $Char_EOF, $currentCharacter, $escapeFlag);"; Code.Newline; Code.InitVar ("our", "entityNameMatch", Code.Regexp a); Code.Newline; Code.InitVar ("our", "entityNameMatchAttr", Code.Regexp b); Code.Newline; Code.InitVar ("our", "entityNameValues", c); Code.Newline; Code.InitVar ("our", "entityMap", d); Code.Newline; Code.Enum ("ContentModel", List.map string_of_content_model enumerate_content_model); Code.Newline; Code.StmtList (List.map (fun ms -> Code.DeclVar ("", string_of_machine_state ms)) enumerate_machine_state); Code.Newline; codeMainLoop_Table algorithm; Code.RawStmt "sub init { $machineState = $DataState; $contentModel = $PCDATA; $Char_EOF = 0; $escapeFlag = 0 }"; Code.RawStmt "sub step { $machineState->(); return $machineState; }"; ])) let printPython algorithm = Lang_perl.printStmt (Code.StmtList ([ Code.RawStmt "package TokeniserImpl;"; Code.RawStmt "use strict;"; Code.RawStmt "use warnings;"; Code.Newline; Code.RawStmt "my ($machineState, $contentModel, $Char_EOF, $currentCharacter, $escapeFlag);"; Code.Newline; Code.Enum ("ContentModel", List.map string_of_content_model enumerate_content_model); Code.Newline; Code.StmtList (List.map (fun ms -> Code.DeclVar ("", string_of_machine_state ms)) enumerate_machine_state); Code.Newline; codeMainLoop_Table algorithm; Code.RawStmt "sub init { $machineState = $DataState; $contentModel = $PCDATA; $Char_EOF = 0; $escapeFlag = 0 }"; Code.RawStmt "sub step { $machineState->(); return $machineState; }"; ])) ;; let a = refineInit Tokeniser_spec.tokeniserAlgorithm in let a = refineParseErrorMessages a in (*let a = refineStepAnnotation a in*) (*let a = refineInnerLoops a in*) (* (there isn't any point in this bit) *) let a = refinePreparations a in let a = refineCompound a in (* Generate the different languages in slightly different ways: *) (* ( let a = refineCharacterSwitch a in (* (is there any point in this bit?) *) let a = refineToCode a in let out = open_out "tokeniser_auto.js" in output_string out (printJS a); close_out out; ); ( let a = refineToCode a in let out = open_out "tokeniser_auto.cpp" in output_string out (printCPP a); close_out out; let out = open_out "tokeniser_auto.pl" in output_string out (printPerl a); close_out out; ); *) ( let a = refineToCode a in let out = open_out "tokeniser_auto.py" in output_string out (printPython a); close_out out; );