From 2b8321d35f3dfa454344fd81553d456c8f31c3b4 Mon Sep 17 00:00:00 2001 From: Golmote Date: Tue, 20 Mar 2018 08:37:58 +0100 Subject: [PATCH] JSX: Add support for plain text inside tags (#1357) * JSX: Add support for plain text inside tags * JSX: Use a "plain-text" token to identify plain text * TSX: Add support for plain text * Fix test after merge --- components/prism-core.js | 10 ++- components/prism-core.min.js | 2 +- components/prism-jsx.js | 86 +++++++++++++++++++++ components/prism-jsx.min.js | 2 +- prism.js | 10 ++- tests/helper/test-case.js | 6 ++ tests/languages/jsx/issue1294.test | 69 +++++++++++++++++ tests/languages/jsx/issue1342.test | 6 +- tests/languages/jsx/issue1356.test | 2 +- tests/languages/jsx/plain-text_feature.test | 57 ++++++++++++++ tests/languages/tsx/tag_feature.test | 7 +- 11 files changed, 242 insertions(+), 15 deletions(-) create mode 100644 tests/languages/jsx/issue1294.test create mode 100644 tests/languages/jsx/plain-text_feature.test diff --git a/components/prism-core.js b/components/prism-core.js index ef6fb7874f..dc3819a008 100644 --- a/components/prism-core.js +++ b/components/prism-core.js @@ -272,8 +272,14 @@ var _ = _self.Prism = { }, highlight: function (text, grammar, language) { - var tokens = _.tokenize(text, grammar); - return Token.stringify(_.util.encode(tokens), language); + var env = { + text: text, + grammar: grammar, + language: language + }; + env.tokens = _.tokenize(text, grammar); + _.hooks.run('after-tokenize', env); + return Token.stringify(_.util.encode(env.tokens), language); }, matchGrammar: function (text, strarr, grammar, index, startPos, oneshot, target) { diff --git a/components/prism-core.min.js b/components/prism-core.min.js index 1ed2a99300..78a46e88f8 100644 --- a/components/prism-core.min.js +++ b/components/prism-core.min.js @@ -1 +1 @@ -var _self="undefined"!=typeof window?window:"undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?self:{},Prism=function(){var e=/\blang(?:uage)?-(\w+)\b/i,t=0,n=_self.Prism={manual:_self.Prism&&_self.Prism.manual,disableWorkerMessageHandler:_self.Prism&&_self.Prism.disableWorkerMessageHandler,util:{encode:function(e){return e instanceof r?new r(e.type,n.util.encode(e.content),e.alias):"Array"===n.util.type(e)?e.map(n.util.encode):e.replace(/&/g,"&").replace(/e.length)return;if(!(w instanceof s)){h.lastIndex=0;var _=h.exec(w),j=1;if(!_&&m&&b!=t.length-1){if(h.lastIndex=k,_=h.exec(e),!_)break;for(var P=_.index+(d?_[1].length:0),A=_.index+_[0].length,x=b,O=k,I=t.length;I>x&&(A>O||!t[x].type&&!t[x-1].greedy);++x)O+=t[x].length,P>=O&&(++b,k=O);if(t[b]instanceof s||t[x-1].greedy)continue;j=x-b,w=e.slice(k,O),_.index-=k}if(_){d&&(p=_[1]?_[1].length:0);var P=_.index+p,_=_[0].slice(p),A=P+_.length,N=w.slice(0,P),S=w.slice(A),C=[b,j];N&&(++b,k+=N.length,C.push(N));var E=new s(u,f?n.tokenize(_,f):_,y,_,m);if(C.push(E),S&&C.push(S),Array.prototype.splice.apply(t,C),1!=j&&n.matchGrammar(e,t,r,b,k,!0,u),l)break}else if(l)break}}}}},tokenize:function(e,t){var r=[e],a=t.rest;if(a){for(var i in a)t[i]=a[i];delete t.rest}return n.matchGrammar(e,r,t,0,0,!1),r},hooks:{all:{},add:function(e,t){var r=n.hooks.all;r[e]=r[e]||[],r[e].push(t)},run:function(e,t){var r=n.hooks.all[e];if(r&&r.length)for(var a,i=0;a=r[i++];)a(t)}}},r=n.Token=function(e,t,n,r,a){this.type=e,this.content=t,this.alias=n,this.length=0|(r||"").length,this.greedy=!!a};if(r.stringify=function(e,t,a){if("string"==typeof e)return e;if("Array"===n.util.type(e))return e.map(function(n){return r.stringify(n,t,e)}).join("");var i={type:e.type,content:r.stringify(e.content,t,a),tag:"span",classes:["token",e.type],attributes:{},language:t,parent:a};if(e.alias){var l="Array"===n.util.type(e.alias)?e.alias:[e.alias];Array.prototype.push.apply(i.classes,l)}n.hooks.run("wrap",i);var o=Object.keys(i.attributes).map(function(e){return e+'="'+(i.attributes[e]||"").replace(/"/g,""")+'"'}).join(" ");return"<"+i.tag+' class="'+i.classes.join(" ")+'"'+(o?" "+o:"")+">"+i.content+""},!_self.document)return _self.addEventListener?(n.disableWorkerMessageHandler||_self.addEventListener("message",function(e){var t=JSON.parse(e.data),r=t.language,a=t.code,i=t.immediateClose;_self.postMessage(n.highlight(a,n.languages[r],r)),i&&_self.close()},!1),_self.Prism):_self.Prism;var a=document.currentScript||[].slice.call(document.getElementsByTagName("script")).pop();return a&&(n.filename=a.src,n.manual||a.hasAttribute("data-manual")||("loading"!==document.readyState?window.requestAnimationFrame?window.requestAnimationFrame(n.highlightAll):window.setTimeout(n.highlightAll,16):document.addEventListener("DOMContentLoaded",n.highlightAll))),_self.Prism}();"undefined"!=typeof module&&module.exports&&(module.exports=Prism),"undefined"!=typeof global&&(global.Prism=Prism); \ No newline at end of file +var _self="undefined"!=typeof window?window:"undefined"!=typeof WorkerGlobalScope&&self instanceof WorkerGlobalScope?self:{},Prism=function(){var e=/\blang(?:uage)?-(\w+)\b/i,t=0,n=_self.Prism={manual:_self.Prism&&_self.Prism.manual,disableWorkerMessageHandler:_self.Prism&&_self.Prism.disableWorkerMessageHandler,util:{encode:function(e){return e instanceof r?new r(e.type,n.util.encode(e.content),e.alias):"Array"===n.util.type(e)?e.map(n.util.encode):e.replace(/&/g,"&").replace(/e.length)return;if(!(w instanceof s)){h.lastIndex=0;var _=h.exec(w),j=1;if(!_&&m&&b!=t.length-1){if(h.lastIndex=k,_=h.exec(e),!_)break;for(var P=_.index+(d?_[1].length:0),A=_.index+_[0].length,x=b,O=k,I=t.length;I>x&&(A>O||!t[x].type&&!t[x-1].greedy);++x)O+=t[x].length,P>=O&&(++b,k=O);if(t[b]instanceof s||t[x-1].greedy)continue;j=x-b,w=e.slice(k,O),_.index-=k}if(_){d&&(p=_[1]?_[1].length:0);var P=_.index+p,_=_[0].slice(p),A=P+_.length,N=w.slice(0,P),S=w.slice(A),C=[b,j];N&&(++b,k+=N.length,C.push(N));var E=new s(u,f?n.tokenize(_,f):_,y,_,m);if(C.push(E),S&&C.push(S),Array.prototype.splice.apply(t,C),1!=j&&n.matchGrammar(e,t,r,b,k,!0,u),l)break}else if(l)break}}}}},tokenize:function(e,t){var r=[e],a=t.rest;if(a){for(var i in a)t[i]=a[i];delete t.rest}return n.matchGrammar(e,r,t,0,0,!1),r},hooks:{all:{},add:function(e,t){var r=n.hooks.all;r[e]=r[e]||[],r[e].push(t)},run:function(e,t){var r=n.hooks.all[e];if(r&&r.length)for(var a,i=0;a=r[i++];)a(t)}}},r=n.Token=function(e,t,n,r,a){this.type=e,this.content=t,this.alias=n,this.length=0|(r||"").length,this.greedy=!!a};if(r.stringify=function(e,t,a){if("string"==typeof e)return e;if("Array"===n.util.type(e))return e.map(function(n){return r.stringify(n,t,e)}).join("");var i={type:e.type,content:r.stringify(e.content,t,a),tag:"span",classes:["token",e.type],attributes:{},language:t,parent:a};if(e.alias){var l="Array"===n.util.type(e.alias)?e.alias:[e.alias];Array.prototype.push.apply(i.classes,l)}n.hooks.run("wrap",i);var o=Object.keys(i.attributes).map(function(e){return e+'="'+(i.attributes[e]||"").replace(/"/g,""")+'"'}).join(" ");return"<"+i.tag+' class="'+i.classes.join(" ")+'"'+(o?" "+o:"")+">"+i.content+""},!_self.document)return _self.addEventListener?(n.disableWorkerMessageHandler||_self.addEventListener("message",function(e){var t=JSON.parse(e.data),r=t.language,a=t.code,i=t.immediateClose;_self.postMessage(n.highlight(a,n.languages[r],r)),i&&_self.close()},!1),_self.Prism):_self.Prism;var a=document.currentScript||[].slice.call(document.getElementsByTagName("script")).pop();return a&&(n.filename=a.src,n.manual||a.hasAttribute("data-manual")||("loading"!==document.readyState?window.requestAnimationFrame?window.requestAnimationFrame(n.highlightAll):window.setTimeout(n.highlightAll,16):document.addEventListener("DOMContentLoaded",n.highlightAll))),_self.Prism}();"undefined"!=typeof module&&module.exports&&(module.exports=Prism),"undefined"!=typeof global&&(global.Prism=Prism); \ No newline at end of file diff --git a/components/prism-jsx.js b/components/prism-jsx.js index 1ad68fe658..346322189e 100644 --- a/components/prism-jsx.js +++ b/components/prism-jsx.js @@ -32,4 +32,90 @@ Prism.languages.insertBefore('inside', 'attr-value',{ } }, Prism.languages.jsx.tag); +// The following will handle plain text inside tags +var stringifyToken = function (token) { + if (typeof token === 'string') { + return token; + } + if (typeof token.content === 'string') { + return token.content; + } + return token.content.map(stringifyToken).join(''); +}; + +var walkTokens = function (tokens) { + var openedTags = []; + for (var i = 0; i < tokens.length; i++) { + var token = tokens[i]; + var notTagNorBrace = false; + + if (typeof token !== 'string') { + if (token.type === 'tag' && token.content[0] && token.content[0].type === 'tag') { + // We found a tag, now find its kind + + if (token.content[0].content[0].content === ' 0 && openedTags[openedTags.length - 1].tagName === stringifyToken(token.content[0].content[1])) { + // Pop matching opening tag + openedTags.pop(); + } + } else { + if (token.content[token.content.length - 1].content === '/>') { + // Autoclosed tag, ignore + } else { + // Opening tag + openedTags.push({ + tagName: stringifyToken(token.content[0].content[1]), + openedBraces: 0 + }); + } + } + } else if (openedTags.length > 0 && token.type === 'punctuation' && token.content === '{') { + + // Here we might have entered a JSX context inside a tag + openedTags[openedTags.length - 1].openedBraces++; + + } else if (openedTags.length > 0 && openedTags[openedTags.length - 1].openedBraces > 0 && token.type === 'punctuation' && token.content === '}') { + + // Here we might have left a JSX context inside a tag + openedTags[openedTags.length - 1].openedBraces--; + + } else { + notTagNorBrace = true + } + } + if (notTagNorBrace || typeof token === 'string') { + if (openedTags.length > 0 && openedTags[openedTags.length - 1].openedBraces === 0) { + // Here we are inside a tag, and not inside a JSX context. + // That's plain text: drop any tokens matched. + var plainText = stringifyToken(token); + + // And merge text with adjacent text + if (i < tokens.length - 1 && (typeof tokens[i + 1] === 'string' || tokens[i + 1].type === 'plain-text')) { + plainText += stringifyToken(tokens[i + 1]); + tokens.splice(i + 1, 1); + } + if (i > 0 && (typeof tokens[i - 1] === 'string' || tokens[i - 1].type === 'plain-text')) { + plainText = stringifyToken(tokens[i - 1]) + plainText; + tokens.splice(i - 1, 1); + i--; + } + + tokens[i] = new Prism.Token('plain-text', plainText, null, plainText); + } + } + + if (token.content && typeof token.content !== 'string') { + walkTokens(token.content); + } + } +}; + +Prism.hooks.add('after-tokenize', function (env) { + if (env.language !== 'jsx' && env.language !== 'tsx') { + return; + } + walkTokens(env.tokens); +}); + }(Prism)); diff --git a/components/prism-jsx.min.js b/components/prism-jsx.min.js index e53d5953ef..fd60b1e281 100644 --- a/components/prism-jsx.min.js +++ b/components/prism-jsx.min.js @@ -1 +1 @@ -!function(a){var s=a.util.clone(a.languages.javascript);a.languages.jsx=a.languages.extend("markup",s),a.languages.jsx.tag.pattern=/<\/?[\w.:-]+\s*(?:\s+(?:[\w.:-]+(?:=(?:("|')(?:\\[\s\S]|(?!\1)[^\\])*\1|[^\s{'">=]+|\{(?:\{[^}]+\}|[^{}])+\}))?|\{\.{3}[a-z_$][\w$]*(?:\.[a-z_$][\w$]*)*\}))*\s*\/?>/i,a.languages.jsx.tag.inside["attr-value"].pattern=/=(?!\{)(?:("|')(?:\\[\s\S]|(?!\1)[^\\])*\1|[^\s'">]+)/i,a.languages.insertBefore("inside","attr-name",{spread:{pattern:/\{\.{3}[a-z_$][\w$]*(?:\.[a-z_$][\w$]*)*\}/,inside:{punctuation:/\.{3}|[{}.]/,"attr-value":/\w+/}}},a.languages.jsx.tag),a.languages.insertBefore("inside","attr-value",{script:{pattern:/=(\{(?:\{[^}]*\}|[^}])+\})/i,inside:{"script-punctuation":{pattern:/^=(?={)/,alias:"punctuation"},rest:a.languages.jsx},alias:"language-javascript"}},a.languages.jsx.tag)}(Prism); \ No newline at end of file +!function(t){var n=t.util.clone(t.languages.javascript);t.languages.jsx=t.languages.extend("markup",n),t.languages.jsx.tag.pattern=/<\/?[\w.:-]+\s*(?:\s+(?:[\w.:-]+(?:=(?:("|')(?:\\[\s\S]|(?!\1)[^\\])*\1|[^\s{'">=]+|\{(?:\{[^}]+\}|[^{}])+\}))?|\{\.{3}[a-z_$][\w$]*(?:\.[a-z_$][\w$]*)*\}))*\s*\/?>/i,t.languages.jsx.tag.inside["attr-value"].pattern=/=(?!\{)(?:("|')(?:\\[\s\S]|(?!\1)[^\\])*\1|[^\s'">]+)/i,t.languages.insertBefore("inside","attr-name",{spread:{pattern:/\{\.{3}[a-z_$][\w$]*(?:\.[a-z_$][\w$]*)*\}/,inside:{punctuation:/\.{3}|[{}.]/,"attr-value":/\w+/}}},t.languages.jsx.tag),t.languages.insertBefore("inside","attr-value",{script:{pattern:/=(\{(?:\{[^}]*\}|[^}])+\})/i,inside:{"script-punctuation":{pattern:/^=(?={)/,alias:"punctuation"},rest:t.languages.jsx},alias:"language-javascript"}},t.languages.jsx.tag);var e=function(t){return"string"==typeof t?t:"string"==typeof t.content?t.content:t.content.map(e).join("")},a=function(n){for(var s=[],g=0;g0&&s[s.length-1].tagName===e(o.content[0].content[1])&&s.pop():"/>"===o.content[o.content.length-1].content||s.push({tagName:e(o.content[0].content[1]),openedBraces:0}):s.length>0&&"punctuation"===o.type&&"{"===o.content?s[s.length-1].openedBraces++:s.length>0&&s[s.length-1].openedBraces>0&&"punctuation"===o.type&&"}"===o.content?s[s.length-1].openedBraces--:i=!0),(i||"string"==typeof o)&&s.length>0&&0===s[s.length-1].openedBraces){var p=e(o);g0&&("string"==typeof n[g-1]||"plain-text"===n[g-1].type)&&(p=e(n[g-1])+p,n.splice(g-1,1),g--),n[g]=new t.Token("plain-text",p,null,p)}o.content&&"string"!=typeof o.content&&a(o.content)}};t.hooks.add("after-tokenize",function(t){("jsx"===t.language||"tsx"===t.language)&&a(t.tokens)})}(Prism); \ No newline at end of file diff --git a/prism.js b/prism.js index e199e4fd72..e8570edc92 100644 --- a/prism.js +++ b/prism.js @@ -277,8 +277,14 @@ var _ = _self.Prism = { }, highlight: function (text, grammar, language) { - var tokens = _.tokenize(text, grammar); - return Token.stringify(_.util.encode(tokens), language); + var env = { + text: text, + grammar: grammar, + language: language + }; + env.tokens = _.tokenize(text, grammar); + _.hooks.run('after-tokenize', env); + return Token.stringify(_.util.encode(env.tokens), language); }, matchGrammar: function (text, strarr, grammar, index, startPos, oneshot, target) { diff --git a/tests/helper/test-case.js b/tests/helper/test-case.js index c92ca116b9..baf67a14ab 100644 --- a/tests/helper/test-case.js +++ b/tests/helper/test-case.js @@ -63,6 +63,12 @@ module.exports = { // the first language is the main language to highlight var mainLanguageGrammar = Prism.languages[usedLanguages.mainLanguage]; var compiledTokenStream = Prism.tokenize(testCase.testSource, mainLanguageGrammar); + Prism.hooks.run('after-tokenize', { + text: testCase.testSource, + grammar: mainLanguageGrammar, + language: usedLanguages.mainLanguage, + tokens: compiledTokenStream + }); var simplifiedTokenStream = TokenStreamTransformer.simplify(compiledTokenStream); var tzd = JSON.stringify( simplifiedTokenStream ); var exp = JSON.stringify( testCase.expectedTokenStream ); diff --git a/tests/languages/jsx/issue1294.test b/tests/languages/jsx/issue1294.test new file mode 100644 index 0000000000..997f335124 --- /dev/null +++ b/tests/languages/jsx/issue1294.test @@ -0,0 +1,69 @@ +export default () => ( +
+

Hi! I'm building a fake Gatsby site as part of a tutorial!

+

+ What do I like to do? Lots of course but definitely enjoy building + websites. +

+
+); + +---------------------------------------------------- + +[ + ["keyword", "export"], + ["keyword", "default"], + ["punctuation", "("], ["punctuation", ")"], ["operator", "=>"], ["punctuation", "("], + ["tag", [ + ["tag", [ + ["punctuation", "<"], + "div" + ]], + ["punctuation", ">"] + ]], + ["plain-text", "\r\n\t\t"], + ["tag", [ + ["tag", [ + ["punctuation", "<"], + "h1" + ]], + ["punctuation", ">"] + ]], + ["plain-text", "Hi! I'm building a fake Gatsby site as part of a tutorial!"], + ["tag", [ + ["tag", [ + ["punctuation", ""] + ]], + ["plain-text", "\r\n\t\t"], + ["tag", [ + ["tag", [ + ["punctuation", "<"], + "p" + ]], + ["punctuation", ">"] + ]], + ["plain-text", "\r\n\t\t\tWhat do I like to do? Lots of course but definitely enjoy building\r\n\t\t\twebsites.\r\n\t\t"], + ["tag", [ + ["tag", [ + ["punctuation", ""] + ]], + ["plain-text", "\r\n\t"], + ["tag", [ + ["tag", [ + ["punctuation", ""] + ]], + ["punctuation", ")"], ["punctuation", ";"] +] + +---------------------------------------------------- + +See #1294. \ No newline at end of file diff --git a/tests/languages/jsx/issue1342.test b/tests/languages/jsx/issue1342.test index 524cbc56ba..1d8a9fa0f4 100644 --- a/tests/languages/jsx/issue1342.test +++ b/tests/languages/jsx/issue1342.test @@ -26,7 +26,7 @@ ]], ["punctuation", ">"] ]], - ["number", "2"], + ["plain-text", "2"], ["tag", [ ["tag", [ ["punctuation", ""] ]], - ["keyword", "for"], - ["keyword", "of"], - ["keyword", "as"], + ["plain-text", "for of as"], ["tag", [ ["tag", [ ["punctuation", ""] ]], - "link", + ["plain-text", "link"], ["tag", [ ["tag", [ ["punctuation", " + foobar for +
+ foobar for +
+ foobar for {i == 0 ? 42 : 0}} + + +---------------------------------------------------- + +[ + ["tag", [ + ["tag", [ + ["punctuation", "<"], + "div" + ]], + ["punctuation", ">"] + ]], + ["plain-text", "\r\n\tfoobar for\r\n\t"], + ["tag", [ + ["tag", [ + ["punctuation", "<"], + "div" + ]], + ["punctuation", ">"] + ]], + ["plain-text", "\r\n\t\tfoobar for\r\n\t"], + ["tag", [ + ["tag", [ + ["punctuation", ""] + ]], + ["plain-text", "\r\n\tfoobar for "], + ["punctuation", "{"], + "i ", + ["operator", "=="], + ["number", "0"], + ["operator", "?"], + ["number", "42"], + ["punctuation", ":"], + ["number", "0"], + ["punctuation", "}"], + ["plain-text", "}\r\n"], + ["tag", [ + ["tag", [ + ["punctuation", ""] + ]] +] + +---------------------------------------------------- + +Checks that plain text inside tags is unmatched. \ No newline at end of file diff --git a/tests/languages/tsx/tag_feature.test b/tests/languages/tsx/tag_feature.test index c029531008..fa6e87a313 100644 --- a/tests/languages/tsx/tag_feature.test +++ b/tests/languages/tsx/tag_feature.test @@ -1,6 +1,6 @@ var myDivElement =
; var myElement = ; -
+
class Test extends Component { render() { return

Hello world.

; @@ -58,7 +58,7 @@ class Test extends Component { ["attr-value", "foo"], ["punctuation", "}"] ]], - ["punctuation", ">"] + ["punctuation", "/>"] ]], ["keyword", "class"], @@ -87,8 +87,7 @@ class Test extends Component { ]], ["punctuation", ">"] ]], - "Hello world", - ["punctuation", "."], + ["plain-text", "Hello world."], ["tag", [ ["tag", [ ["punctuation", "