From 5ca5cdd3966bfa9dea94f196b060405f01335481 Mon Sep 17 00:00:00 2001 From: Erik Mogensen Date: Wed, 26 Sep 2018 16:28:53 +0200 Subject: [PATCH] Improve performance of parsing Looping through strings in Javascript land is a lot slower than having the JS engine do it natively. String's indexOf does this faster than looping through each character. This change deals with parsed data (i.e. the raw text between the tags), attribute values (what's in the quotes) and inside XML comments. These three types of data account for a very large portion of characters in any XML document, leaving behind mainly names of tags and of attributes. It might be worth it to rewrite the switch statement, or move the optimisations into the switch statement itself. --- lib/parsers/ltx.js | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/lib/parsers/ltx.js b/lib/parsers/ltx.js index b770524d..70da1071 100644 --- a/lib/parsers/ltx.js +++ b/lib/parsers/ltx.js @@ -25,6 +25,7 @@ var SaxLtx = module.exports = function SaxLtx () { var endTag var selfClosing var attrQuote + var attrQuoteChar var recordStart = 0 var attrName @@ -54,13 +55,36 @@ var SaxLtx = module.exports = function SaxLtx () { function endRecording () { if (typeof recordStart === 'number') { - var recorded = data.slice(recordStart, pos) + var recorded = data.substring(recordStart, pos) recordStart = undefined return recorded } } for (; pos < data.length; pos++) { + if (state === STATE_TEXT) { + // if we're looping through text, fast-forward using indexOf to + // the next '<' character + const lt = data.indexOf('<', pos) + if (lt !== -1 && pos !== lt) { + pos = lt + } + } else if (state === STATE_ATTR_VALUE) { + // if we're looping through an attribute, fast-forward using + // indexOf to the next end quote character + const quot = data.indexOf(attrQuoteChar, pos) + if (quot !== -1) { + pos = quot + } + } else if (state === STATE_IGNORE_COMMENT) { + // if we're looping through a comment, fast-forward using + // indexOf to the first end-comment character + const endcomment = data.indexOf('-->', pos) + if (endcomment !== -1) { + pos = endcomment + 2 // target the '>' character + } + } + var c = data.charCodeAt(pos) switch (state) { case STATE_TEXT: @@ -153,6 +177,7 @@ var SaxLtx = module.exports = function SaxLtx () { case STATE_ATTR_QUOT: if (c === 34 /* " */ || c === 39 /* ' */) { attrQuote = c + attrQuoteChar = c === 34 ? '"' : "'" state = STATE_ATTR_VALUE recordStart = pos + 1 }