function Tokeniser() { this.Char_EOF = 0; this.machineState = DataState; this.contentModel = PCDATA; this.inputStreamPos = 0; this.tokenStream = []; this.characterToken = ""; this.currentToken = null; this.currentCharacter = this.Char_EOF; } Tokeniser.prototype.tokenise = function (str) { str = str.replace(/\r\n?/g, "\n").replace(/\0/g, function() { parseError(); return "\uFFFD" }); this.inputStream = str; this.run(); return this.tokenStream; } Tokeniser.prototype.emitToken = function (t) { if (this.characterToken.length) { this.tokenStream.push(["Character", this.characterToken]); this.characterToken = ""; } this.tokenStream.push(t); } Tokeniser.prototype.reconsumeCharacter = function () { --this.inputStreamPos } Tokeniser.prototype.emitCharacterToken = function (c) { this.characterToken += String.fromCharCode(c) } Tokeniser.prototype.emitEOFToken = function () { this.emitToken("EOF") } Tokeniser.prototype.consumeCharacter = function () { var c; if (this.inputStreamPos >= this.inputStream.length) c = this.Char_EOF; else c = this.inputStream.charCodeAt(this.inputStreamPos); this.inputStreamPos++; return c; } Tokeniser.prototype.consumeAndEmitEntity = function () { } Tokeniser.prototype.consumeAndAppendEntity = function () { } Tokeniser.prototype.createStartTagToken = function () { this.currentToken = ["StartTag", "", [], false] } Tokeniser.prototype.createEndTagToken = function () { this.currentToken = ["EndTag", "", [], false] } Tokeniser.prototype.createTagTokenAttribute = function () { this.currentToken[2].push(["", ""]); this.currentToken[3] = false; } Tokeniser.prototype.createCommentToken = function () { this.currentToken = ["Comment", ""] } Tokeniser.prototype.createDoctypeToken = function () { this.currentToken = ["DOCTYPE", "", null, null, true] } Tokeniser.prototype.emitCurrentTagToken = function () { this.emitToken(this.currentToken) } Tokeniser.prototype.emitCurrentCommentToken = function () { this.emitToken(this.currentToken) } Tokeniser.prototype.emitCurrentDoctypeToken = function () { this.emitToken(this.currentToken) } Tokeniser.prototype.handleDuplicateAttributes = function () { var n = this.currentToken[2][this.currentToken[2].length-1][0]; for (var i = 0; i < this.currentToken[2].length-1; ++i) { if (this.currentToken[2][i][0] == n) { this.parseError(); this.currentToken[3] = true; this.currentToken[2].pop(); return; } } } Tokeniser.prototype.appendToTagTokenName = function (c) { this.currentToken[1] += String.fromCharCode(c) } Tokeniser.prototype.appendToTagTokenAttributeName = function (c) { this.currentToken[2][this.currentToken[2].length-1][0] += String.fromCharCode(c) } Tokeniser.prototype.appendToTagTokenAttributeValue = function (c) { if (!this.currentToken[3]) this.currentToken[2][this.currentToken[2].length-1][1] += String.fromCharCode(c) } Tokeniser.prototype.appendToCommentToken = function (c) { this.currentToken[1] += String.fromCharCode(c) } Tokeniser.prototype.appendToDoctypeTokenName = function (c) { this.currentToken[1] += String.fromCharCode(c) } Tokeniser.prototype.appendToDoctypeTokenPubId = function (c) { this.currentToken[2] += String.fromCharCode(c) } Tokeniser.prototype.appendToDoctypeTokenSysId = function (c) { this.currentToken[3] += String.fromCharCode(c) } Tokeniser.prototype.setDoctypeTokenIncorrect = function () { this.currentToken[4] = false } Tokeniser.prototype.setDoctypeTokenPubIdEmpty = function () { this.currentToken[2] = "" } Tokeniser.prototype.setDoctypeTokenSysIdEmpty = function () { this.currentToken[3] = "" } Tokeniser.prototype.parseError = function () { this.emitToken("ParseError") } Tokeniser.prototype.parseErrorIfNonpermittedSlash = function () { if (! (this.inputStream[this.inputStreamPos] == '>' && this.currentToken[0] == 'StartTag' && this.currentToken[1].match(/^(base|link|meta|hr|br|img|embed|param|area|col|input)$/))) this.parseError(); } Tokeniser.prototype.parseErrorIfEndTagWithAttributes = function () { if (this.currentToken[0] == 'EndTag' && this.currentToken[2].length) this.parseError(); } Tokeniser.prototype.getOldCharacter = function (n) { return n >= this.inputStreamPos ? 0 : this.inputStream.charCodeAt(this.inputStreamPos - n - 1) } Tokeniser.prototype.isFollowedBy = function (s) { return this.inputStream.substr(this.inputStreamPos-1, s.length).toLowerCase() == s } Tokeniser.prototype.isEndOfCData = function () { return false } // XXX Tokeniser.prototype.hasConsumableEntity = function (attr) { var s = this.inputStream.substr(this.inputStreamPos); if (s.match(/^([\t\u000A\u000B\r <&]|$)/)) return false; var r; var n; var len; if (r = s.match(/^#([0-9]+)(;?)/)) { n = +r[1]; len = r[0].length; if (!r[2]) this.parseError(); } else if (r = s.match(/^#[xX]([0-9A-Fa-f]+)(;?)/)) { n = +("0x"+r[1]); len = r[0].length; if (!r[2]) this.parseError(); } else if (s.match(/^#/)) { this.parseError(); return false; } if (n !== undefined) { if (this.entityMap[n]) { this.parseError(); n = this.entityMap[n]; } else if (n == 0 || n > 0x10FFFF || (n >= 0xD800 && n <= 0xDFFF)) { this.parseError(); n = 0xFFFD; } } else { if (r = this.entityNameMatch.exec(s)) { if (r[1][r[1].length-1] != ';') { this.parseError(); if (attr && !this.entityNameMatchAttr.exec(s)) return false; } n = this.entityNameValues[r[1]]; len = r[0].length; } else { this.parseError(); return false; } } if (attr) this.appendToTagTokenAttributeValue(n); else this.emitCharacterToken(n); this.inputStreamPos += len; return true; }