Skip to content

Commit

Permalink
Merge pull request #967 from zeitgeist87/ImprovedGreedyFlag
Browse files Browse the repository at this point in the history
Improvement to greedy-flag
  • Loading branch information
zeitgeist87 authored Jun 11, 2016
2 parents 4a44f01 + d80900c commit 500121b
Show file tree
Hide file tree
Showing 5 changed files with 69 additions and 57 deletions.
58 changes: 31 additions & 27 deletions components/prism-core.js
Original file line number Diff line number Diff line change
Expand Up @@ -280,9 +280,15 @@ var _ = _self.Prism = {
lookbehindLength = 0,
alias = pattern.alias;

if (greedy && !pattern.pattern.global) {
// Without the global flag, lastIndex won't work
pattern.pattern = RegExp(pattern.pattern.source, pattern.pattern.flags + "g");
}

pattern = pattern.pattern || pattern;

for (var i=0; i<strarr.length; i++) { // Don’t cache length as it changes during the loop
// Don’t cache length as it changes during the loop
for (var i=0, pos = 0; i<strarr.length; pos += (strarr[i].matchedStr || strarr[i]).length, ++i) {

var str = strarr[i];

Expand All @@ -302,40 +308,38 @@ var _ = _self.Prism = {

// Greedy patterns can override/remove up to two previously matched tokens
if (!match && greedy && i != strarr.length - 1) {
// Reconstruct the original text using the next two tokens
var nextToken = strarr[i + 1].matchedStr || strarr[i + 1],
combStr = str + nextToken;

if (i < strarr.length - 2) {
combStr += strarr[i + 2].matchedStr || strarr[i + 2];
pattern.lastIndex = pos;
match = pattern.exec(text);
if (!match) {
break;
}

// Try the pattern again on the reconstructed text
pattern.lastIndex = 0;
match = pattern.exec(combStr);
if (!match) {
continue;
var from = match.index + (lookbehind ? match[1].length : 0),
to = match.index + match[0].length,
k = i,
p = pos;

for (var len = strarr.length; k < len && p < to; ++k) {
p += (strarr[k].matchedStr || strarr[k]).length;
// Move the index i to the element in strarr that is closest to from
if (from >= p) {
++i;
pos = p;
}
}

var from = match.index + (lookbehind ? match[1].length : 0);
// To be a valid candidate, the new match has to start inside of str
if (from >= str.length) {
/*
* If strarr[i] is a Token, then the match starts inside another Token, which is invalid
* If strarr[k - 1] is greedy we are in conflict with another greedy pattern
*/
if (strarr[i] instanceof Token || strarr[k - 1].greedy) {
continue;
}
var to = match.index + match[0].length,
len = str.length + nextToken.length;

// Number of tokens to delete and replace with the new match
delNum = 3;

if (to <= len) {
if (strarr[i + 1].greedy) {
continue;
}
delNum = 2;
combStr = combStr.slice(0, len);
}
str = combStr;
delNum = k - i;
str = text.slice(pos, p);
match.index -= pos;
}

if (!match) {
Expand Down
2 changes: 1 addition & 1 deletion components/prism-core.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 31 additions & 27 deletions prism.js
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,15 @@ var _ = _self.Prism = {
lookbehindLength = 0,
alias = pattern.alias;

if (greedy && !pattern.pattern.global) {
// Without the global flag, lastIndex won't work
pattern.pattern = RegExp(pattern.pattern.source, pattern.pattern.flags + "g");
}

pattern = pattern.pattern || pattern;

for (var i=0; i<strarr.length; i++) { // Don’t cache length as it changes during the loop
// Don’t cache length as it changes during the loop
for (var i=0, pos = 0; i<strarr.length; pos += (strarr[i].matchedStr || strarr[i]).length, ++i) {

var str = strarr[i];

Expand All @@ -307,40 +313,38 @@ var _ = _self.Prism = {

// Greedy patterns can override/remove up to two previously matched tokens
if (!match && greedy && i != strarr.length - 1) {
// Reconstruct the original text using the next two tokens
var nextToken = strarr[i + 1].matchedStr || strarr[i + 1],
combStr = str + nextToken;

if (i < strarr.length - 2) {
combStr += strarr[i + 2].matchedStr || strarr[i + 2];
pattern.lastIndex = pos;
match = pattern.exec(text);
if (!match) {
break;
}

// Try the pattern again on the reconstructed text
pattern.lastIndex = 0;
match = pattern.exec(combStr);
if (!match) {
continue;
var from = match.index + (lookbehind ? match[1].length : 0),
to = match.index + match[0].length,
k = i,
p = pos;

for (var len = strarr.length; k < len && p < to; ++k) {
p += (strarr[k].matchedStr || strarr[k]).length;
// Move the index i to the element in strarr that is closest to from
if (from >= p) {
++i;
pos = p;
}
}

var from = match.index + (lookbehind ? match[1].length : 0);
// To be a valid candidate, the new match has to start inside of str
if (from >= str.length) {
/*
* If strarr[i] is a Token, then the match starts inside another Token, which is invalid
* If strarr[k - 1] is greedy we are in conflict with another greedy pattern
*/
if (strarr[i] instanceof Token || strarr[k - 1].greedy) {
continue;
}
var to = match.index + match[0].length,
len = str.length + nextToken.length;

// Number of tokens to delete and replace with the new match
delNum = 3;

if (to <= len) {
if (strarr[i + 1].greedy) {
continue;
}
delNum = 2;
combStr = combStr.slice(0, len);
}
str = combStr;
delNum = k - i;
str = text.slice(pos, p);
match.index -= pos;
}

if (!match) {
Expand Down
4 changes: 3 additions & 1 deletion tests/languages/javascript/regex_feature.test
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
/foo"test"bar/
/foo\//
1 / 4 + "/, not a regex";
/ '1' '2' '3' '4' '5' /

----------------------------------------------------

Expand All @@ -13,7 +14,8 @@
["regex", "/[\\[\\]]{2,4}(?:foo)*/"], ["punctuation", ";"],
["regex", "/foo\"test\"bar/"],
["regex", "/foo\\//"],
["number", "1"], ["operator", "/"], ["number", "4"], ["operator", "+"], ["string", "\"/, not a regex\""], ["punctuation", ";"]
["number", "1"], ["operator", "/"], ["number", "4"], ["operator", "+"], ["string", "\"/, not a regex\""], ["punctuation", ";"],
["regex", "/ '1' '2' '3' '4' '5' /"]
]

----------------------------------------------------
Expand Down
4 changes: 3 additions & 1 deletion tests/languages/javascript/template-string_feature.test
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
bar`
`40+2=${40+2}`
`${foo()}`
"foo `a` `b` `c` `d` bar"

----------------------------------------------------

Expand Down Expand Up @@ -34,7 +35,8 @@ bar`
["interpolation-punctuation", "}"]
]],
["string", "`"]
]]
]],
["string", "\"foo `a` `b` `c` `d` bar\""]
]

----------------------------------------------------
Expand Down

0 comments on commit 500121b

Please sign in to comment.