Skip to content

Commit

Permalink
Core: Fixed greedy targeting bug (#1932)
Browse files Browse the repository at this point in the history
The `target` parameter defines the token which causes rematching. Rematching is done by recursively calling `matchGrammar` where `target` will be the current token name. The rematching is done if either a match was found or we reached the target token, whatever comes first.

The bug is that a token is identified by its name. But the name alone doesn't uniquely identify a token in a grammar because many tokens can have the same name (array).

This fixes this bug.
  • Loading branch information
RunDevelopment committed Aug 29, 2019
1 parent 3509f3e commit e864d51
Show file tree
Hide file tree
Showing 7 changed files with 114 additions and 30 deletions.
14 changes: 7 additions & 7 deletions components/prism-core.js
Original file line number Diff line number Diff line change
Expand Up @@ -279,18 +279,18 @@ var _ = {

matchGrammar: function (text, strarr, grammar, index, startPos, oneshot, target) {
for (var token in grammar) {
if(!grammar.hasOwnProperty(token) || !grammar[token]) {
if (!grammar.hasOwnProperty(token) || !grammar[token]) {
continue;
}

if (token == target) {
return;
}

var patterns = grammar[token];
patterns = (_.util.type(patterns) === "Array") ? patterns : [patterns];
patterns = Array.isArray(patterns) ? patterns : [patterns];

for (var j = 0; j < patterns.length; ++j) {
if (target && target == token + ',' + j) {
return;
}

var pattern = patterns[j],
inside = pattern.inside,
lookbehind = !!pattern.lookbehind,
Expand Down Expand Up @@ -394,7 +394,7 @@ var _ = {
Array.prototype.splice.apply(strarr, args);

if (delNum != 1)
_.matchGrammar(text, strarr, grammar, i, pos, true, token);
_.matchGrammar(text, strarr, grammar, i, pos, true, token + ',' + j);

if (oneshot)
break;
Expand Down
2 changes: 1 addition & 1 deletion components/prism-core.min.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
"style": "themes/prism.css",
"scripts": {
"test:aliases": "mocha tests/aliases-test.js",
"test:core": "mocha tests/core/**/*.js",
"test:languages": "mocha tests/run.js",
"test:plugins": "mocha tests/plugins/**/*.js",
"test:regex": "mocha tests/regex-tests.js",
"test:runner": "mocha tests/testrunner-tests.js",
"test": "npm run test:runner && npm run test:languages && npm run test:plugins && npm run test:aliases && npm run test:regex"
"test": "npm run test:runner && npm run test:core && npm run test:languages && npm run test:plugins && npm run test:aliases && npm run test:regex"
},
"repository": {
"type": "git",
Expand Down
14 changes: 7 additions & 7 deletions prism.js
Original file line number Diff line number Diff line change
Expand Up @@ -284,18 +284,18 @@ var _ = {

matchGrammar: function (text, strarr, grammar, index, startPos, oneshot, target) {
for (var token in grammar) {
if(!grammar.hasOwnProperty(token) || !grammar[token]) {
if (!grammar.hasOwnProperty(token) || !grammar[token]) {
continue;
}

if (token == target) {
return;
}

var patterns = grammar[token];
patterns = (_.util.type(patterns) === "Array") ? patterns : [patterns];
patterns = Array.isArray(patterns) ? patterns : [patterns];

for (var j = 0; j < patterns.length; ++j) {
if (target && target == token + ',' + j) {
return;
}

var pattern = patterns[j],
inside = pattern.inside,
lookbehind = !!pattern.lookbehind,
Expand Down Expand Up @@ -399,7 +399,7 @@ var _ = {
Array.prototype.splice.apply(strarr, args);

if (delNum != 1)
_.matchGrammar(text, strarr, grammar, i, pos, true, token);
_.matchGrammar(text, strarr, grammar, i, pos, true, token + ',' + j);

if (oneshot)
break;
Expand Down
69 changes: 69 additions & 0 deletions tests/core/greedy.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
"use strict";

const { assert } = require('chai');
const PrismLoader = require('../helper/prism-loader');
const TestCase = require('../helper/test-case');


function testTokens({ grammar, code, expected }) {
const Prism = PrismLoader.createEmptyPrism();
Prism.languages.test = grammar;

const simpleTokens = TestCase.simpleTokenize(Prism, code, 'test');

assert.deepStrictEqual(simpleTokens, expected);
}

describe('Greedy matching', function () {

it('should correctly handle tokens with the same name', function () {
testTokens({
grammar: {
'comment': [
/\/\/.*/,
{
pattern: /\/\*[\s\S]*?(?:\*\/|$)/,
greedy: true
}
]
},
code: '// /*\n/* comment */',
expected: [
["comment", "// /*"],
["comment", "/* comment */"]
]
});
});

// https://github.com/PrismJS/prism/issues/1492
/*
it('should correctly rematch tokens', function () {
testTokens({
grammar: {
'a': {
pattern: /'[^'\r\n]*'/,
},
'b': {
pattern: /"[^"\r\n]*"/,
greedy: true,
},
'c': {
pattern: /<[^>\r\n]*>/,
greedy: true,
}
},
code: `<'> '' ''\n<"> "" ""`,
expected: [
["c", "<'>"],
["a", "''"],
["a", "''"],
["c", "<\">"],
["b", "\"\""],
["b", "\"\""],
]
});
});
*/
});

40 changes: 27 additions & 13 deletions tests/helper/test-case.js
Original file line number Diff line number Diff line change
Expand Up @@ -60,19 +60,9 @@ module.exports = {
}

const Prism = PrismLoader.createInstance(usedLanguages.languages);
// the first language is the main language to highlight
const mainLanguageGrammar = Prism.languages[usedLanguages.mainLanguage];
const env = {
code: testCase.testSource,
grammar: mainLanguageGrammar,
language: usedLanguages.mainLanguage
};
Prism.hooks.run('before-tokenize', env);
env.tokens = Prism.tokenize(env.code, env.grammar);
Prism.hooks.run('after-tokenize', env);
const compiledTokenStream = env.tokens;

const simplifiedTokenStream = TokenStreamTransformer.simplify(compiledTokenStream);
// the first language is the main language to highlight
const simplifiedTokenStream = this.simpleTokenize(Prism, testCase.testSource, usedLanguages.mainLanguage);

const tzd = JSON.stringify(simplifiedTokenStream);
const exp = JSON.stringify(testCase.expectedTokenStream);
Expand All @@ -93,7 +83,31 @@ module.exports = {
"Expected Token Stream: \n" + exp +
"\n-----------------------------------------\n" + diff;

const result = assert.deepEqual(simplifiedTokenStream, testCase.expectedTokenStream, testCase.comment + message);
assert.deepEqual(simplifiedTokenStream, testCase.expectedTokenStream, testCase.comment + message);
},

/**
* Returns the simplified token stream of the given code highlighted with `language`.
*
* The `before-tokenize` and `after-tokenize` hooks will also be executed.
*
* @param {any} Prism The Prism instance which will tokenize `code`.
* @param {string} code The code to tokenize.
* @param {string} language The language id.
* @returns {Array<string|Array<string|any[]>>}
*/
simpleTokenize(Prism, code, language) {
const env = {
code,
grammar: Prism.languages[language],
language
};

Prism.hooks.run('before-tokenize', env);
env.tokens = Prism.tokenize(env.code, env.grammar);
Prism.hooks.run('after-tokenize', env);

return TokenStreamTransformer.simplify(env.tokens);
},


Expand Down
2 changes: 1 addition & 1 deletion tests/helper/token-stream-transformer.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ module.exports = {
* @typedef TokenStreamItem
* @property {string} type
* @property {string | TokenStreamItem | Array<string|TokenStreamItem>} content
*/
*/

/**
* Simplifies the token stream to ease the matching with the expected token stream.
Expand Down

0 comments on commit e864d51

Please sign in to comment.