open Tokeniser_states (* Constructed from r2548 of the HTML5 spec *) type tokeniser_matcher = AND of tokeniser_matcher * tokeniser_matcher | OR of tokeniser_matcher * tokeniser_matcher | NOT of tokeniser_matcher | IsContentModel of content_model | IsConsumedCharacter of int | IsConsumedCharacterEOF | IsConsumedCharacterInRange of int * int (* inclusive of both end points *) | IsChar1 of int (* one before the currently-consumed character *) | IsChar2 of int (* two before etc *) | IsChar3 of int | IsEndOfCData (* complex conditions *) | IsStartOfComment (* consumed = '-', next = "-" *) | IsDoctype (* consumed = 'D', next = "OCTYPE" *) | IsPublic (* consumed = 'P', next = "UBLIC" *) | IsSystem (* consumed = 'S', next = "YSTEM" *) | IsEscapeFlag | NotYetHandled (* Define some infix operators, to make things easier to write *) let ( &&^ ) a b = AND(a, b) let ( ||^ ) a b = OR(a, b) type tokeniser_action = SwitchMachineState of machine_state | SetEscapeFlag of bool | UnconsumeCharacter | EmitConsumedCharacter | EmitCharacter of int | EmitEOFToken | ConsumeCharacter | ConsumeAndEmitCharRef of int option (* TODO: split into consume-from-CharRef + emit-consumed ? *) | ConsumeAndAppendCharRefToAttributeValue of int option (* ditto *) | CreateStartTagToken | CreateEndTagToken | CreateTagTokenAttribute | CreateCommentToken | CreateDoctypeToken | EmitCurrentTagToken | EmitCurrentCommentToken | EmitCurrentDoctypeToken | HandleDuplicateAttributes | AppendToTagTokenName | AppendToTagTokenNameLowercase | AppendToTagTokenAttributeName | AppendToTagTokenAttributeNameLowercase | AppendToTagTokenAttributeValue | AppendAmpersandToTagTokenAttributeValue | SetTagTokenSelfClosingFlag | AppendToCommentToken | AppendHyphenToCommentToken | AppendToDoctypeTokenName | AppendToDoctypeTokenNameLowercase | AppendToDoctypeTokenPubId | AppendToDoctypeTokenSysId | SetDoctypeTokenIncorrect | SetDoctypeTokenPubIdEmpty | SetDoctypeTokenSysIdEmpty | ParseError (* Added as a post-processing step through the whole algorithm: *) | ParseErrorIfEndTagWithAttributes | ParseErrorIfEndTagWithSelfClosing let tokeniserAlgorithm = let algorithm = [ (DataState, true, [ ( (IsConsumedCharacter 0x0026) &&^ ((IsContentModel PCDATA) ||^ (IsContentModel RCDATA) &&^ (NOT IsEscapeFlag)), [ SwitchMachineState CharRefDataState ] ); ( (IsConsumedCharacter 0x002D) &&^ ((IsContentModel RCDATA) ||^ (IsContentModel CDATA)) &&^ (NOT IsEscapeFlag) &&^ (IsChar1 0x002D) &&^ (IsChar2 0x0021) &&^ (IsChar3 0x003C), [ SetEscapeFlag true; EmitConsumedCharacter ] ); ( (IsConsumedCharacter 0x003C) &&^ (IsContentModel PCDATA), [ SwitchMachineState TagOpenState ] ); ( (IsConsumedCharacter 0x003C) &&^ ((IsContentModel RCDATA) ||^ (IsContentModel CDATA)) &&^ (NOT IsEscapeFlag), [ SwitchMachineState TagOpenState ] ); ( (IsConsumedCharacter 0x003E) &&^ ((IsContentModel RCDATA) ||^ (IsContentModel CDATA)) &&^ IsEscapeFlag &&^ (IsChar1 0x002D) &&^ (IsChar2 0x002D), [ SetEscapeFlag false; EmitConsumedCharacter ] ); ( IsConsumedCharacterEOF, [ EmitEOFToken ] ); ( NotYetHandled, [ EmitConsumedCharacter ] ); ]); (CharRefDataState, false, [ ( NotYetHandled, [ ConsumeAndEmitCharRef None; SwitchMachineState DataState ] ); ]); (TagOpenState, true, [ ( ((IsContentModel RCDATA) ||^ (IsContentModel CDATA)) &&^ (IsConsumedCharacter 0x002F), [ SwitchMachineState CloseTagOpenState ] ); ( ((IsContentModel RCDATA) ||^ (IsContentModel CDATA)) &&^ NOT (IsConsumedCharacter 0x002F), [ EmitCharacter 0x003C; UnconsumeCharacter; SwitchMachineState DataState ] ); ( (IsContentModel PCDATA) &&^ (IsConsumedCharacter 0x0021), [ SwitchMachineState MarkupDeclarationOpenState ] ); ( (IsContentModel PCDATA) &&^ (IsConsumedCharacter 0x002F), [ SwitchMachineState CloseTagOpenState ] ); ( (IsContentModel PCDATA) &&^ (IsConsumedCharacterInRange (0x0041, 0x005A)), [ CreateStartTagToken; AppendToTagTokenNameLowercase; SwitchMachineState TagNameState ] ); ( (IsContentModel PCDATA) &&^ (IsConsumedCharacterInRange (0x0061, 0x007A)), [ CreateStartTagToken; AppendToTagTokenName; SwitchMachineState TagNameState ] ); ( (IsContentModel PCDATA) &&^ (IsConsumedCharacter 0x003E), [ ParseError; EmitCharacter 0x003C; EmitCharacter 0x003E; SwitchMachineState DataState ] ); ( (IsContentModel PCDATA) &&^ (IsConsumedCharacter 0x003F), [ ParseError; SwitchMachineState BogusCommentState ] ); ( (IsContentModel PCDATA) &&^ NotYetHandled, [ ParseError; EmitCharacter 0x003C; UnconsumeCharacter; SwitchMachineState DataState ] ); ]); (CloseTagOpenState, true, [ (* TODO: could split this into two states, to be more efficient? *) ( ((IsContentModel RCDATA) ||^ (IsContentModel CDATA)) &&^ NOT IsEndOfCData, [ EmitCharacter 0x003C; EmitCharacter 0x002F; UnconsumeCharacter; SwitchMachineState DataState ] ); ( ((IsContentModel PCDATA) ||^ IsEndOfCData) &&^ (IsConsumedCharacterInRange (0x0041, 0x005A)), [ CreateEndTagToken; AppendToTagTokenNameLowercase; SwitchMachineState TagNameState ] ); ( ((IsContentModel PCDATA) ||^ IsEndOfCData) &&^ (IsConsumedCharacterInRange (0x0061, 0x007A)), [ CreateEndTagToken; AppendToTagTokenName; SwitchMachineState TagNameState ] ); ( ((IsContentModel PCDATA) ||^ IsEndOfCData) &&^ (IsConsumedCharacter 0x003E), [ ParseError; SwitchMachineState DataState ] ); ( ((IsContentModel PCDATA) ||^ IsEndOfCData) &&^ IsConsumedCharacterEOF, [ ParseError; EmitCharacter 0x003C; EmitCharacter 0x002F; UnconsumeCharacter; SwitchMachineState DataState ] ); ( ((IsContentModel PCDATA) ||^ IsEndOfCData) &&^ NotYetHandled, [ ParseError; SwitchMachineState BogusCommentState ] ); ]); (TagNameState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ SwitchMachineState BeforeAttributeNameState ] ); ( IsConsumedCharacter 0x002F, [ SwitchMachineState SelfClosingStartTagState ] ); ( IsConsumedCharacter 0x003E, [ EmitCurrentTagToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterInRange (0x0041, 0x005A), [ AppendToTagTokenNameLowercase ] ); ( IsConsumedCharacterEOF, [ ParseError; EmitCurrentTagToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendToTagTokenName ] ); ]); (BeforeAttributeNameState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ ] ); ( IsConsumedCharacter 0x002F, [ SwitchMachineState SelfClosingStartTagState ] ); ( IsConsumedCharacter 0x003E, [ EmitCurrentTagToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterInRange (0x0041, 0x005A), [ CreateTagTokenAttribute; AppendToTagTokenAttributeNameLowercase; SwitchMachineState AttributeNameState ] ); ( (IsConsumedCharacter 0x0022) ||^ (IsConsumedCharacter 0x0027) ||^ (IsConsumedCharacter 0x003D), [ ParseError; CreateTagTokenAttribute; AppendToTagTokenAttributeName; SwitchMachineState AttributeNameState ] ); ( IsConsumedCharacterEOF, [ ParseError; EmitCurrentTagToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ CreateTagTokenAttribute; AppendToTagTokenAttributeName; SwitchMachineState AttributeNameState ] ); ]); (AttributeNameState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ HandleDuplicateAttributes; SwitchMachineState AfterAttributeNameState ] ); ( IsConsumedCharacter 0x002F, [ HandleDuplicateAttributes; SwitchMachineState SelfClosingStartTagState ] ); ( IsConsumedCharacter 0x003D, [ HandleDuplicateAttributes; SwitchMachineState BeforeAttributeValueState ] ); ( IsConsumedCharacter 0x003E, [ HandleDuplicateAttributes; EmitCurrentTagToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterInRange (0x0041, 0x005A), [ AppendToTagTokenAttributeNameLowercase ] ); ( (IsConsumedCharacter 0x0022) ||^ (IsConsumedCharacter 0x0027), [ ParseError; AppendToTagTokenAttributeName ] ); ( IsConsumedCharacterEOF, [ ParseError; HandleDuplicateAttributes; EmitCurrentTagToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendToTagTokenAttributeName ] ); ]); (AfterAttributeNameState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ ] ); ( IsConsumedCharacter 0x002F, [ SwitchMachineState SelfClosingStartTagState ] ); ( IsConsumedCharacter 0x003D, [ SwitchMachineState BeforeAttributeValueState ] ); ( IsConsumedCharacter 0x003E, [ EmitCurrentTagToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterInRange (0x0041, 0x005A), [ CreateTagTokenAttribute; AppendToTagTokenAttributeNameLowercase; SwitchMachineState AttributeNameState ] ); ( (IsConsumedCharacter 0x0022) ||^ (IsConsumedCharacter 0x0027), [ ParseError; CreateTagTokenAttribute; AppendToTagTokenAttributeName; SwitchMachineState AttributeNameState ] ); ( IsConsumedCharacterEOF, [ ParseError; EmitCurrentTagToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ CreateTagTokenAttribute; AppendToTagTokenAttributeName; SwitchMachineState AttributeNameState ] ); ]); (BeforeAttributeValueState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ ] ); ( IsConsumedCharacter 0x0022, [ SwitchMachineState AttributeValueDoubleQuotedState ] ); ( IsConsumedCharacter 0x0026, [ UnconsumeCharacter; SwitchMachineState AttributeValueUnquotedState ] ); ( IsConsumedCharacter 0x0027, [ SwitchMachineState AttributeValueSingleQuotedState ] ); ( IsConsumedCharacter 0x003E, [ ParseError; EmitCurrentTagToken; SwitchMachineState DataState ] ); ( IsConsumedCharacter 0x003D, [ ParseError; AppendToTagTokenAttributeValue; SwitchMachineState AttributeValueUnquotedState ] ); ( IsConsumedCharacterEOF, [ ParseError; EmitCurrentTagToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendToTagTokenAttributeValue; SwitchMachineState AttributeValueUnquotedState ] ); ]); (AttributeValueDoubleQuotedState, true, [ ( IsConsumedCharacter 0x0022, [ SwitchMachineState AfterAttributeValueQuotedState ] ); ( IsConsumedCharacter 0x0026, [ SwitchMachineState CharRefInAttributeValueState_DoubleQuoted ] ); ( IsConsumedCharacterEOF, [ ParseError; EmitCurrentTagToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendToTagTokenAttributeValue ] ); ]); (AttributeValueSingleQuotedState, true, [ ( IsConsumedCharacter 0x0027, [ SwitchMachineState AfterAttributeValueQuotedState ] ); ( IsConsumedCharacter 0x0026, [ SwitchMachineState CharRefInAttributeValueState_SingleQuoted ] ); ( IsConsumedCharacterEOF, [ ParseError; EmitCurrentTagToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendToTagTokenAttributeValue ] ); ]); (AttributeValueUnquotedState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ SwitchMachineState BeforeAttributeNameState ] ); ( IsConsumedCharacter 0x0026, [ SwitchMachineState CharRefInAttributeValueState_Unquoted ] ); ( IsConsumedCharacter 0x003E, [ EmitCurrentTagToken; SwitchMachineState DataState ] ); ( (IsConsumedCharacter 0x0022) ||^ (IsConsumedCharacter 0x0027) ||^ (IsConsumedCharacter 0x003D), [ ParseError; AppendToTagTokenAttributeValue ] ); ( IsConsumedCharacterEOF, [ ParseError; EmitCurrentTagToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendToTagTokenAttributeValue ] ); ]); (CharRefInAttributeValueState_DoubleQuoted, false, [ ( NotYetHandled, [ ConsumeAndAppendCharRefToAttributeValue (Some 0x0022); SwitchMachineState AttributeValueDoubleQuotedState ] ); ]); (CharRefInAttributeValueState_SingleQuoted, false, [ ( NotYetHandled, [ ConsumeAndAppendCharRefToAttributeValue (Some 0x0027); SwitchMachineState AttributeValueSingleQuotedState ] ); ]); (CharRefInAttributeValueState_Unquoted, false, [ ( NotYetHandled, [ ConsumeAndAppendCharRefToAttributeValue None; SwitchMachineState AttributeValueUnquotedState ] ); ]); (AfterAttributeValueQuotedState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ SwitchMachineState BeforeAttributeNameState ] ); ( IsConsumedCharacter 0x002F, [ SwitchMachineState SelfClosingStartTagState ] ); ( IsConsumedCharacter 0x003E, [ EmitCurrentTagToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterEOF, [ ParseError; EmitCurrentTagToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ ParseError; UnconsumeCharacter; SwitchMachineState BeforeAttributeNameState ] ); ]); (SelfClosingStartTagState, true, [ ( IsConsumedCharacter 0x003E, [ SetTagTokenSelfClosingFlag; EmitCurrentTagToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterEOF, [ ParseError; EmitCurrentTagToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ ParseError; UnconsumeCharacter; SwitchMachineState BeforeAttributeNameState ] ); ]); (BogusCommentState, false, [ ( IsConsumedCharacterEOF, [ CreateCommentToken; EmitCurrentCommentToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( IsConsumedCharacter 0x003E, [ CreateCommentToken; EmitCurrentCommentToken; SwitchMachineState DataState ] ); ( NotYetHandled, [ CreateCommentToken; AppendToCommentToken; SwitchMachineState BogusCommentState_Continue ] ); ]); (BogusCommentState_Continue, true, [ ( IsConsumedCharacterEOF, [ EmitCurrentCommentToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( IsConsumedCharacter 0x003E, [ EmitCurrentCommentToken; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendToCommentToken ] ); ]); (MarkupDeclarationOpenState, true, [ ( IsStartOfComment, [ ConsumeCharacter; CreateCommentToken; SwitchMachineState CommentStartState ] ); ( (NOT IsStartOfComment) &&^ IsDoctype, [ ConsumeCharacter; ConsumeCharacter; ConsumeCharacter; ConsumeCharacter; ConsumeCharacter; ConsumeCharacter; SwitchMachineState DoctypeState ] ); (* TODO: foreign content / CDATA *) ( NotYetHandled, [ ParseError; SwitchMachineState BogusCommentState ] ); ]); (CommentStartState, true, [ ( IsConsumedCharacter 0x002D, [ SwitchMachineState CommentStartDashState ] ); ( IsConsumedCharacter 0x003E, [ ParseError; EmitCurrentCommentToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterEOF, [ ParseError; EmitCurrentCommentToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendToCommentToken; SwitchMachineState CommentState ] ); ]); (CommentStartDashState, true, [ ( IsConsumedCharacter 0x002D, [ SwitchMachineState CommentEndState ] ); ( IsConsumedCharacter 0x003E, [ ParseError; EmitCurrentCommentToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterEOF, [ ParseError; EmitCurrentCommentToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendHyphenToCommentToken; AppendToCommentToken; SwitchMachineState CommentState ] ); ]); (CommentState, true, [ ( IsConsumedCharacter 0x002D, [ SwitchMachineState CommentEndDashState ] ); ( IsConsumedCharacterEOF, [ ParseError; EmitCurrentCommentToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendToCommentToken ] ); ]); (CommentEndDashState, true, [ ( IsConsumedCharacter 0x002D, [ SwitchMachineState CommentEndState ] ); ( IsConsumedCharacterEOF, [ ParseError; EmitCurrentCommentToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendHyphenToCommentToken; AppendToCommentToken; SwitchMachineState CommentState ] ); ]); (CommentEndState, true, [ ( IsConsumedCharacter 0x003E, [ EmitCurrentCommentToken; SwitchMachineState DataState ] ); ( IsConsumedCharacter 0x002D, [ ParseError; AppendHyphenToCommentToken ] ); ( IsConsumedCharacterEOF, [ ParseError; EmitCurrentCommentToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ ParseError; AppendHyphenToCommentToken; AppendHyphenToCommentToken; AppendToCommentToken; SwitchMachineState CommentState ] ); ]); (DoctypeState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ SwitchMachineState BeforeDoctypeNameState ] ); ( NotYetHandled, [ ParseError; UnconsumeCharacter; SwitchMachineState BeforeDoctypeNameState ] ); ]); (BeforeDoctypeNameState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ ] ); ( IsConsumedCharacter 0x003E, [ ParseError; CreateDoctypeToken; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterInRange (0x0041, 0x005A), [ CreateDoctypeToken; AppendToDoctypeTokenNameLowercase; SwitchMachineState DoctypeNameState ] ); ( IsConsumedCharacterEOF, [ ParseError; CreateDoctypeToken; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ CreateDoctypeToken; AppendToDoctypeTokenName; SwitchMachineState DoctypeNameState ] ); ]); (DoctypeNameState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ SwitchMachineState AfterDoctypeNameState ] ); ( IsConsumedCharacter 0x003E, [ EmitCurrentDoctypeToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterInRange (0x0041, 0x005A), [ AppendToDoctypeTokenNameLowercase ] ); ( IsConsumedCharacterEOF, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendToDoctypeTokenName ] ); ]); (AfterDoctypeNameState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ ] ); ( IsConsumedCharacter 0x003E, [ EmitCurrentDoctypeToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterEOF, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( IsPublic, [ ConsumeCharacter; ConsumeCharacter; ConsumeCharacter; ConsumeCharacter; ConsumeCharacter; SwitchMachineState BeforeDoctypePublicIdentifierState ] ); ( IsSystem, [ ConsumeCharacter; ConsumeCharacter; ConsumeCharacter; ConsumeCharacter; ConsumeCharacter; SwitchMachineState BeforeDoctypeSystemIdentifierState ] ); ( NotYetHandled, [ ParseError; SetDoctypeTokenIncorrect; SwitchMachineState BogusDoctypeState ] ); ]); (BeforeDoctypePublicIdentifierState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ ] ); ( IsConsumedCharacter 0x0022, [ SetDoctypeTokenPubIdEmpty; SwitchMachineState DoctypePublicIdentifierDoubleQuotedState ] ); ( IsConsumedCharacter 0x0027, [ SetDoctypeTokenPubIdEmpty; SwitchMachineState DoctypePublicIdentifierSingleQuotedState ] ); ( IsConsumedCharacter 0x003E, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterEOF, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ ParseError; SetDoctypeTokenIncorrect; SwitchMachineState BogusDoctypeState ] ); ]); (DoctypePublicIdentifierDoubleQuotedState, true, [ ( IsConsumedCharacter 0x0022, [ SwitchMachineState AfterDoctypePublicIdentifierState ] ); ( IsConsumedCharacter 0x003E, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterEOF, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendToDoctypeTokenPubId ] ); ]); (DoctypePublicIdentifierSingleQuotedState, true, [ ( IsConsumedCharacter 0x0027, [ SwitchMachineState AfterDoctypePublicIdentifierState ] ); ( IsConsumedCharacter 0x003E, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterEOF, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendToDoctypeTokenPubId ] ); ]); (AfterDoctypePublicIdentifierState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ ] ); ( IsConsumedCharacter 0x0022, [ SetDoctypeTokenSysIdEmpty; SwitchMachineState DoctypeSystemIdentifierDoubleQuotedState ] ); ( IsConsumedCharacter 0x0027, [ SetDoctypeTokenSysIdEmpty; SwitchMachineState DoctypeSystemIdentifierSingleQuotedState ] ); ( IsConsumedCharacter 0x003E, [ EmitCurrentDoctypeToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterEOF, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ ParseError; SetDoctypeTokenIncorrect; SwitchMachineState BogusDoctypeState ] ); ]); (BeforeDoctypeSystemIdentifierState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ ] ); ( IsConsumedCharacter 0x0022, [ SetDoctypeTokenSysIdEmpty; SwitchMachineState DoctypeSystemIdentifierDoubleQuotedState ] ); ( IsConsumedCharacter 0x0027, [ SetDoctypeTokenSysIdEmpty; SwitchMachineState DoctypeSystemIdentifierSingleQuotedState ] ); ( IsConsumedCharacter 0x003E, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterEOF, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ ParseError; SetDoctypeTokenIncorrect; SwitchMachineState BogusDoctypeState ] ); ]); (DoctypeSystemIdentifierDoubleQuotedState, true, [ ( IsConsumedCharacter 0x0022, [ SwitchMachineState AfterDoctypeSystemIdentifierState ] ); ( IsConsumedCharacter 0x003E, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterEOF, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendToDoctypeTokenSysId ] ); ]); (DoctypeSystemIdentifierSingleQuotedState, true, [ ( IsConsumedCharacter 0x0027, [ SwitchMachineState AfterDoctypeSystemIdentifierState ] ); ( IsConsumedCharacter 0x003E, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterEOF, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ AppendToDoctypeTokenSysId ] ); ]); (AfterDoctypeSystemIdentifierState, true, [ ( (IsConsumedCharacter 0x0009) ||^ (IsConsumedCharacter 0x000A) ||^ (IsConsumedCharacter 0x000C) ||^ (IsConsumedCharacter 0x0020), [ ] ); ( IsConsumedCharacter 0x003E, [ EmitCurrentDoctypeToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterEOF, [ ParseError; SetDoctypeTokenIncorrect; EmitCurrentDoctypeToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ ParseError; SwitchMachineState BogusDoctypeState ] ); ]); (BogusDoctypeState, true, [ ( IsConsumedCharacter 0x003E, [ EmitCurrentDoctypeToken; SwitchMachineState DataState ] ); ( IsConsumedCharacterEOF, [ EmitCurrentDoctypeToken; UnconsumeCharacter; SwitchMachineState DataState ] ); ( NotYetHandled, [ ] ); ]); ] in let transformActions f = List.map (fun (ms, consume, steps) -> ms, consume, List.map (fun (matcher, actions) -> matcher, f actions ) steps ) in let rec f = function (* "When an end tag token is emitted with attributes, that is a parse error" *) | EmitCurrentTagToken::acts -> ParseErrorIfEndTagWithAttributes::ParseErrorIfEndTagWithSelfClosing::EmitCurrentTagToken::(f acts) | act::acts -> act::(f acts) | [] -> [] (* TODO - "When an end tag token is emitted, the content model flag must be switched to the PCDATA state." *) in transformActions f algorithm