Skip to content

Commit

Permalink
Improve performance of parsing
Browse files Browse the repository at this point in the history
Looping through strings in Javascript land is a lot slower than having
the JS engine do it natively.  String's indexOf does this faster than
looping through each character.

This change deals with parsed data (i.e. the raw text between the
tags), attribute values (what's in the quotes) and inside XML
comments.  These three types of data account for a very large portion
of characters in any XML document, leaving behind mainly names of tags
and of attributes.

It might be worth it to rewrite the switch statement, or move the
optimisations into the switch statement itself.
  • Loading branch information
Erik Mogensen committed Sep 28, 2018
1 parent 0f710e1 commit b3c32c1
Showing 1 changed file with 26 additions and 1 deletion.
27 changes: 26 additions & 1 deletion lib/parsers/ltx.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ var SaxLtx = module.exports = function SaxLtx () {
var endTag
var selfClosing
var attrQuote
var attrQuoteChar
var recordStart = 0
var attrName

Expand Down Expand Up @@ -54,13 +55,36 @@ var SaxLtx = module.exports = function SaxLtx () {

function endRecording () {
if (typeof recordStart === 'number') {
var recorded = data.slice(recordStart, pos)
var recorded = data.substring(recordStart, pos)
recordStart = undefined
return recorded
}
}

for (; pos < data.length; pos++) {
if (state === STATE_TEXT) {
// if we're looping through text, fast-forward using indexOf to
// the next '<' character
const lt = data.indexOf('<', pos)
if (lt !== -1 && pos !== lt) {
pos = lt
}
} else if (state === STATE_ATTR_VALUE) {
// if we're looping through an attribute, fast-forward using
// indexOf to the next end quote character
const quot = data.indexOf(attrQuoteChar, pos)
if (quot !== -1) {
pos = quot
}
} else if (state === STATE_IGNORE_COMMENT) {
// if we're looping through a comment, fast-forward using
// indexOf to the first end-comment character
const endcomment = data.indexOf('-->', pos)
if (endcomment !== -1) {
pos = endcomment + 2 // target the '>' character
}
}

var c = data.charCodeAt(pos)
switch (state) {
case STATE_TEXT:
Expand Down Expand Up @@ -153,6 +177,7 @@ var SaxLtx = module.exports = function SaxLtx () {
case STATE_ATTR_QUOT:
if (c === 34 /* " */ || c === 39 /* ' */) {
attrQuote = c
attrQuoteChar = c === 34 ? '"' : "'"
state = STATE_ATTR_VALUE
recordStart = pos + 1
}
Expand Down

0 comments on commit b3c32c1

Please sign in to comment.