diff --git a/components/prism-core.js b/components/prism-core.js index 10596f881c..cd5cee1e81 100644 --- a/components/prism-core.js +++ b/components/prism-core.js @@ -331,137 +331,8 @@ var _ = { return Token.stringify(_.util.encode(env.tokens), env.language); }, - matchGrammar: function (text, strarr, grammar, index, startPos, oneshot, target) { - for (var token in grammar) { - if (!grammar.hasOwnProperty(token) || !grammar[token]) { - continue; - } - - var patterns = grammar[token]; - patterns = Array.isArray(patterns) ? patterns : [patterns]; - - for (var j = 0; j < patterns.length; ++j) { - if (target && target == token + ',' + j) { - return; - } - - var pattern = patterns[j], - inside = pattern.inside, - lookbehind = !!pattern.lookbehind, - greedy = !!pattern.greedy, - lookbehindLength = 0, - alias = pattern.alias; - - if (greedy && !pattern.pattern.global) { - // Without the global flag, lastIndex won't work - var flags = pattern.pattern.toString().match(/[imsuy]*$/)[0]; - pattern.pattern = RegExp(pattern.pattern.source, flags + 'g'); - } - - pattern = pattern.pattern || pattern; - - // Don’t cache length as it changes during the loop - for (var i = index, pos = startPos; i < strarr.length; pos += strarr[i].length, ++i) { - - var str = strarr[i]; - - if (strarr.length > text.length) { - // Something went terribly wrong, ABORT, ABORT! - return; - } - - if (str instanceof Token) { - continue; - } - - if (greedy && i != strarr.length - 1) { - pattern.lastIndex = pos; - var match = pattern.exec(text); - if (!match) { - break; - } - - var from = match.index + (lookbehind && match[1] ? match[1].length : 0), - to = match.index + match[0].length, - k = i, - p = pos; - - for (var len = strarr.length; k < len && (p < to || (!strarr[k].type && !strarr[k - 1].greedy)); ++k) { - p += strarr[k].length; - // Move the index i to the element in strarr that is closest to from - if (from >= p) { - ++i; - pos = p; - } - } - - // If strarr[i] is a Token, then the match starts inside another Token, which is invalid - if (strarr[i] instanceof Token) { - continue; - } - - // Number of tokens to delete and replace with the new match - delNum = k - i; - str = text.slice(pos, p); - match.index -= pos; - } else { - pattern.lastIndex = 0; - - var match = pattern.exec(str), - delNum = 1; - } - - if (!match) { - if (oneshot) { - break; - } - - continue; - } - - if(lookbehind) { - lookbehindLength = match[1] ? match[1].length : 0; - } - - var from = match.index + lookbehindLength, - match = match[0].slice(lookbehindLength), - to = from + match.length, - before = str.slice(0, from), - after = str.slice(to); - - var args = [i, delNum]; - - if (before) { - ++i; - pos += before.length; - args.push(before); - } - - var wrapped = new Token(token, inside? _.tokenize(match, inside) : match, alias, match, greedy); - - args.push(wrapped); - - if (after) { - args.push(after); - } - - Array.prototype.splice.apply(strarr, args); - - if (delNum != 1) - _.matchGrammar(text, strarr, grammar, i, pos, true, token + ',' + j); - - if (oneshot) - break; - } - } - } - }, - tokenize: function(text, grammar) { - var strarr = [text]; - var rest = grammar.rest; - if (rest) { for (var token in rest) { grammar[token] = rest[token]; @@ -470,9 +341,12 @@ var _ = { delete grammar.rest; } - _.matchGrammar(text, strarr, grammar, 0, 0, false); + var tokenList = new LinkedList(); + addAfter(tokenList, tokenList.head, text); - return strarr; + matchGrammar(text, tokenList, grammar, tokenList.head, 0); + + return toArray(tokenList); }, hooks: { @@ -553,6 +427,230 @@ Token.stringify = function stringify(o, language) { return '<' + env.tag + ' class="' + env.classes.join(' ') + '"' + attributes + '>' + env.content + ''; }; +/** + * @param {string} text + * @param {LinkedList} tokenList + * @param {any} grammar + * @param {LinkedListNode} startNode + * @param {number} startPos + * @param {boolean} [oneshot=false] + * @param {string} [target] + */ +function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, target) { + for (var token in grammar) { + if (!grammar.hasOwnProperty(token) || !grammar[token]) { + continue; + } + + var patterns = grammar[token]; + patterns = Array.isArray(patterns) ? patterns : [patterns]; + + for (var j = 0; j < patterns.length; ++j) { + if (target && target == token + ',' + j) { + return; + } + + var pattern = patterns[j], + inside = pattern.inside, + lookbehind = !!pattern.lookbehind, + greedy = !!pattern.greedy, + lookbehindLength = 0, + alias = pattern.alias; + + if (greedy && !pattern.pattern.global) { + // Without the global flag, lastIndex won't work + var flags = pattern.pattern.toString().match(/[imsuy]*$/)[0]; + pattern.pattern = RegExp(pattern.pattern.source, flags + 'g'); + } + + pattern = pattern.pattern || pattern; + + for ( // iterate the token list and keep track of the current token/string position + var currentNode = startNode.next, pos = startPos; + currentNode !== tokenList.tail; + pos += currentNode.value.length, currentNode = currentNode.next + ) { + + var str = currentNode.value; + + if (tokenList.length > text.length) { + // Something went terribly wrong, ABORT, ABORT! + return; + } + + if (str instanceof Token) { + continue; + } + + var removeCount = 1; // this is the to parameter of removeBetween + + if (greedy && currentNode != tokenList.tail.prev) { + pattern.lastIndex = pos; + var match = pattern.exec(text); + if (!match) { + break; + } + + var from = match.index + (lookbehind && match[1] ? match[1].length : 0); + var to = match.index + match[0].length; + var p = pos; + + // find the node that contains the match + p += currentNode.value.length; + while (from >= p) { + currentNode = currentNode.next; + p += currentNode.value.length; + } + // adjust pos (and p) + p -= currentNode.value.length; + pos = p; + + // the current node is a Token, then the match starts inside another Token, which is invalid + if (currentNode.value instanceof Token) { + continue; + } + + // find the last node which is affected by this match + for ( + var k = currentNode; + k !== tokenList.tail && (p < to || (typeof k.value === 'string' && !k.prev.value.greedy)); + k = k.next + ) { + removeCount++; + p += k.value.length; + } + removeCount--; + + // replace with the new match + str = text.slice(pos, p); + match.index -= pos; + } else { + pattern.lastIndex = 0; + + var match = pattern.exec(str); + } + + if (!match) { + if (oneshot) { + break; + } + + continue; + } + + if (lookbehind) { + lookbehindLength = match[1] ? match[1].length : 0; + } + + var from = match.index + lookbehindLength, + match = match[0].slice(lookbehindLength), + to = from + match.length, + before = str.slice(0, from), + after = str.slice(to); + + var removeFrom = currentNode.prev; + + if (before) { + removeFrom = addAfter(tokenList, removeFrom, before); + pos += before.length; + } + + removeRange(tokenList, removeFrom, removeCount); + + var wrapped = new Token(token, inside ? _.tokenize(match, inside) : match, alias, match, greedy); + currentNode = addAfter(tokenList, removeFrom, wrapped); + + if (after) { + addAfter(tokenList, currentNode, after); + } + + + if (removeCount > 1) + matchGrammar(text, tokenList, grammar, currentNode.prev, pos, true, token + ',' + j); + + if (oneshot) + break; + } + } + } +} + +/** + * @typedef LinkedListNode + * @property {T} value + * @property {LinkedListNode | null} prev The previous node. + * @property {LinkedListNode | null} next The next node. + * @template T + */ + +/** + * @template T + */ +function LinkedList() { + /** @type {LinkedListNode} */ + var head = { value: null, prev: null, next: null }; + /** @type {LinkedListNode} */ + var tail = { value: null, prev: head, next: null }; + head.next = tail; + + /** @type {LinkedListNode} */ + this.head = head; + /** @type {LinkedListNode} */ + this.tail = tail; + this.length = 0; +} + +/** + * Adds a new node with the given value to the list. + * @param {LinkedList} list + * @param {LinkedListNode} node + * @param {T} value + * @returns {LinkedListNode} The added node. + * @template T + */ +function addAfter(list, node, value) { + // assumes that node != list.tail && values.length >= 0 + var next = node.next; + + var newNode = { value: value, prev: node, next: next }; + node.next = newNode; + next.prev = newNode; + list.length++; + + return newNode; +} +/** + * Removes `count` nodes after the given node. The given node will not be removed. + * @param {LinkedList} list + * @param {LinkedListNode} node + * @param {number} count + * @template T + */ +function removeRange(list, node, count) { + var next = node.next; + for (var i = 0; i < count && next !== list.tail; i++) { + next = next.next; + } + node.next = next; + next.prev = node; + list.length -= i; +} +/** + * @param {LinkedList} list + * @returns {T[]} + * @template T + */ +function toArray(list) { + var array = []; + var node = list.head.next; + while (node !== list.tail) { + array.push(node.value); + node = node.next; + } + return array; +} + + if (!_self.document) { if (!_self.addEventListener) { // in Node.js diff --git a/components/prism-core.min.js b/components/prism-core.min.js index 91338d07e1..698c572d7c 100644 --- a/components/prism-core.min.js +++ b/components/prism-core.min.js @@ -1 +1 @@ -var _self="undefined"!=typeof window?window:"undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?self:{},Prism=function(u){var c=/\blang(?:uage)?-([\w-]+)\b/i,r=0,C={manual:u.Prism&&u.Prism.manual,disableWorkerMessageHandler:u.Prism&&u.Prism.disableWorkerMessageHandler,util:{encode:function e(r){return r instanceof _?new _(r.type,e(r.content),r.alias):Array.isArray(r)?r.map(e):r.replace(/&/g,"&").replace(/e.length)return;if(!(k instanceof _)){if(h&&y!=r.length-1){if(c.lastIndex=v,!(S=c.exec(e)))break;for(var b=S.index+(f&&S[1]?S[1].length:0),w=S.index+S[0].length,A=y,P=v,x=r.length;A"+a.content+""},!u.document)return u.addEventListener&&(C.disableWorkerMessageHandler||u.addEventListener("message",function(e){var r=JSON.parse(e.data),n=r.language,t=r.code,a=r.immediateClose;u.postMessage(C.highlight(t,C.languages[n],n)),a&&u.close()},!1)),C;var e=C.util.currentScript();function n(){C.manual||C.highlightAll()}if(e&&(C.filename=e.src,e.hasAttribute("data-manual")&&(C.manual=!0)),!C.manual){var t=document.readyState;"loading"===t||"interactive"===t&&e&&e.defer?document.addEventListener("DOMContentLoaded",n):window.requestAnimationFrame?window.requestAnimationFrame(n):window.setTimeout(n,16)}return C}(_self);"undefined"!=typeof module&&module.exports&&(module.exports=Prism),"undefined"!=typeof global&&(global.Prism=Prism); \ No newline at end of file +var _self="undefined"!=typeof window?window:"undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?self:{},Prism=function(u){var c=/\blang(?:uage)?-([\w-]+)\b/i,n=0,C={manual:u.Prism&&u.Prism.manual,disableWorkerMessageHandler:u.Prism&&u.Prism.disableWorkerMessageHandler,util:{encode:function e(n){return n instanceof _?new _(n.type,e(n.content),n.alias):Array.isArray(n)?n.map(e):n.replace(/&/g,"&").replace(/n.length)return;if(!(b instanceof _)){var x=1;if(d&&y!=t.tail.prev){g.lastIndex=k;var w=g.exec(n);if(!w)break;var A=w.index+(h&&w[1]?w[1].length:0),P=w.index+w[0].length,S=k;for(S+=y.value.length;S<=A;)y=y.next,S+=y.value.length;if(S-=y.value.length,k=S,y.value instanceof _)continue;for(var O=y;O!==t.tail&&(S"+a.content+""},!u.document)return u.addEventListener&&(C.disableWorkerMessageHandler||u.addEventListener("message",function(e){var n=JSON.parse(e.data),t=n.language,r=n.code,a=n.immediateClose;u.postMessage(C.highlight(r,C.languages[t],t)),a&&u.close()},!1)),C;var e=C.util.currentScript();function t(){C.manual||C.highlightAll()}if(e&&(C.filename=e.src,e.hasAttribute("data-manual")&&(C.manual=!0)),!C.manual){var r=document.readyState;"loading"===r||"interactive"===r&&e&&e.defer?document.addEventListener("DOMContentLoaded",t):window.requestAnimationFrame?window.requestAnimationFrame(t):window.setTimeout(t,16)}return C}(_self);"undefined"!=typeof module&&module.exports&&(module.exports=Prism),"undefined"!=typeof global&&(global.Prism=Prism); \ No newline at end of file diff --git a/prism.js b/prism.js index 6b09145205..810031392c 100644 --- a/prism.js +++ b/prism.js @@ -336,137 +336,8 @@ var _ = { return Token.stringify(_.util.encode(env.tokens), env.language); }, - matchGrammar: function (text, strarr, grammar, index, startPos, oneshot, target) { - for (var token in grammar) { - if (!grammar.hasOwnProperty(token) || !grammar[token]) { - continue; - } - - var patterns = grammar[token]; - patterns = Array.isArray(patterns) ? patterns : [patterns]; - - for (var j = 0; j < patterns.length; ++j) { - if (target && target == token + ',' + j) { - return; - } - - var pattern = patterns[j], - inside = pattern.inside, - lookbehind = !!pattern.lookbehind, - greedy = !!pattern.greedy, - lookbehindLength = 0, - alias = pattern.alias; - - if (greedy && !pattern.pattern.global) { - // Without the global flag, lastIndex won't work - var flags = pattern.pattern.toString().match(/[imsuy]*$/)[0]; - pattern.pattern = RegExp(pattern.pattern.source, flags + 'g'); - } - - pattern = pattern.pattern || pattern; - - // Don’t cache length as it changes during the loop - for (var i = index, pos = startPos; i < strarr.length; pos += strarr[i].length, ++i) { - - var str = strarr[i]; - - if (strarr.length > text.length) { - // Something went terribly wrong, ABORT, ABORT! - return; - } - - if (str instanceof Token) { - continue; - } - - if (greedy && i != strarr.length - 1) { - pattern.lastIndex = pos; - var match = pattern.exec(text); - if (!match) { - break; - } - - var from = match.index + (lookbehind && match[1] ? match[1].length : 0), - to = match.index + match[0].length, - k = i, - p = pos; - - for (var len = strarr.length; k < len && (p < to || (!strarr[k].type && !strarr[k - 1].greedy)); ++k) { - p += strarr[k].length; - // Move the index i to the element in strarr that is closest to from - if (from >= p) { - ++i; - pos = p; - } - } - - // If strarr[i] is a Token, then the match starts inside another Token, which is invalid - if (strarr[i] instanceof Token) { - continue; - } - - // Number of tokens to delete and replace with the new match - delNum = k - i; - str = text.slice(pos, p); - match.index -= pos; - } else { - pattern.lastIndex = 0; - - var match = pattern.exec(str), - delNum = 1; - } - - if (!match) { - if (oneshot) { - break; - } - - continue; - } - - if(lookbehind) { - lookbehindLength = match[1] ? match[1].length : 0; - } - - var from = match.index + lookbehindLength, - match = match[0].slice(lookbehindLength), - to = from + match.length, - before = str.slice(0, from), - after = str.slice(to); - - var args = [i, delNum]; - - if (before) { - ++i; - pos += before.length; - args.push(before); - } - - var wrapped = new Token(token, inside? _.tokenize(match, inside) : match, alias, match, greedy); - - args.push(wrapped); - - if (after) { - args.push(after); - } - - Array.prototype.splice.apply(strarr, args); - - if (delNum != 1) - _.matchGrammar(text, strarr, grammar, i, pos, true, token + ',' + j); - - if (oneshot) - break; - } - } - } - }, - tokenize: function(text, grammar) { - var strarr = [text]; - var rest = grammar.rest; - if (rest) { for (var token in rest) { grammar[token] = rest[token]; @@ -475,9 +346,12 @@ var _ = { delete grammar.rest; } - _.matchGrammar(text, strarr, grammar, 0, 0, false); + var tokenList = new LinkedList(); + addAfter(tokenList, tokenList.head, text); - return strarr; + matchGrammar(text, tokenList, grammar, tokenList.head, 0); + + return toArray(tokenList); }, hooks: { @@ -558,6 +432,230 @@ Token.stringify = function stringify(o, language) { return '<' + env.tag + ' class="' + env.classes.join(' ') + '"' + attributes + '>' + env.content + ''; }; +/** + * @param {string} text + * @param {LinkedList} tokenList + * @param {any} grammar + * @param {LinkedListNode} startNode + * @param {number} startPos + * @param {boolean} [oneshot=false] + * @param {string} [target] + */ +function matchGrammar(text, tokenList, grammar, startNode, startPos, oneshot, target) { + for (var token in grammar) { + if (!grammar.hasOwnProperty(token) || !grammar[token]) { + continue; + } + + var patterns = grammar[token]; + patterns = Array.isArray(patterns) ? patterns : [patterns]; + + for (var j = 0; j < patterns.length; ++j) { + if (target && target == token + ',' + j) { + return; + } + + var pattern = patterns[j], + inside = pattern.inside, + lookbehind = !!pattern.lookbehind, + greedy = !!pattern.greedy, + lookbehindLength = 0, + alias = pattern.alias; + + if (greedy && !pattern.pattern.global) { + // Without the global flag, lastIndex won't work + var flags = pattern.pattern.toString().match(/[imsuy]*$/)[0]; + pattern.pattern = RegExp(pattern.pattern.source, flags + 'g'); + } + + pattern = pattern.pattern || pattern; + + for ( // iterate the token list and keep track of the current token/string position + var currentNode = startNode.next, pos = startPos; + currentNode !== tokenList.tail; + pos += currentNode.value.length, currentNode = currentNode.next + ) { + + var str = currentNode.value; + + if (tokenList.length > text.length) { + // Something went terribly wrong, ABORT, ABORT! + return; + } + + if (str instanceof Token) { + continue; + } + + var removeCount = 1; // this is the to parameter of removeBetween + + if (greedy && currentNode != tokenList.tail.prev) { + pattern.lastIndex = pos; + var match = pattern.exec(text); + if (!match) { + break; + } + + var from = match.index + (lookbehind && match[1] ? match[1].length : 0); + var to = match.index + match[0].length; + var p = pos; + + // find the node that contains the match + p += currentNode.value.length; + while (from >= p) { + currentNode = currentNode.next; + p += currentNode.value.length; + } + // adjust pos (and p) + p -= currentNode.value.length; + pos = p; + + // the current node is a Token, then the match starts inside another Token, which is invalid + if (currentNode.value instanceof Token) { + continue; + } + + // find the last node which is affected by this match + for ( + var k = currentNode; + k !== tokenList.tail && (p < to || (typeof k.value === 'string' && !k.prev.value.greedy)); + k = k.next + ) { + removeCount++; + p += k.value.length; + } + removeCount--; + + // replace with the new match + str = text.slice(pos, p); + match.index -= pos; + } else { + pattern.lastIndex = 0; + + var match = pattern.exec(str); + } + + if (!match) { + if (oneshot) { + break; + } + + continue; + } + + if (lookbehind) { + lookbehindLength = match[1] ? match[1].length : 0; + } + + var from = match.index + lookbehindLength, + match = match[0].slice(lookbehindLength), + to = from + match.length, + before = str.slice(0, from), + after = str.slice(to); + + var removeFrom = currentNode.prev; + + if (before) { + removeFrom = addAfter(tokenList, removeFrom, before); + pos += before.length; + } + + removeRange(tokenList, removeFrom, removeCount); + + var wrapped = new Token(token, inside ? _.tokenize(match, inside) : match, alias, match, greedy); + currentNode = addAfter(tokenList, removeFrom, wrapped); + + if (after) { + addAfter(tokenList, currentNode, after); + } + + + if (removeCount > 1) + matchGrammar(text, tokenList, grammar, currentNode.prev, pos, true, token + ',' + j); + + if (oneshot) + break; + } + } + } +} + +/** + * @typedef LinkedListNode + * @property {T} value + * @property {LinkedListNode | null} prev The previous node. + * @property {LinkedListNode | null} next The next node. + * @template T + */ + +/** + * @template T + */ +function LinkedList() { + /** @type {LinkedListNode} */ + var head = { value: null, prev: null, next: null }; + /** @type {LinkedListNode} */ + var tail = { value: null, prev: head, next: null }; + head.next = tail; + + /** @type {LinkedListNode} */ + this.head = head; + /** @type {LinkedListNode} */ + this.tail = tail; + this.length = 0; +} + +/** + * Adds a new node with the given value to the list. + * @param {LinkedList} list + * @param {LinkedListNode} node + * @param {T} value + * @returns {LinkedListNode} The added node. + * @template T + */ +function addAfter(list, node, value) { + // assumes that node != list.tail && values.length >= 0 + var next = node.next; + + var newNode = { value: value, prev: node, next: next }; + node.next = newNode; + next.prev = newNode; + list.length++; + + return newNode; +} +/** + * Removes `count` nodes after the given node. The given node will not be removed. + * @param {LinkedList} list + * @param {LinkedListNode} node + * @param {number} count + * @template T + */ +function removeRange(list, node, count) { + var next = node.next; + for (var i = 0; i < count && next !== list.tail; i++) { + next = next.next; + } + node.next = next; + next.prev = node; + list.length -= i; +} +/** + * @param {LinkedList} list + * @returns {T[]} + * @template T + */ +function toArray(list) { + var array = []; + var node = list.head.next; + while (node !== list.tail) { + array.push(node.value); + node = node.next; + } + return array; +} + + if (!_self.document) { if (!_self.addEventListener) { // in Node.js