Skip to content

Commit

Permalink
Merge branch 'master' into ionatan_defaultparams_log
Browse files Browse the repository at this point in the history
  • Loading branch information
iwiznia committed Jul 29, 2021
2 parents 1861959 + 7ed3ca6 commit 144b817
Show file tree
Hide file tree
Showing 3 changed files with 91 additions and 11 deletions.
14 changes: 12 additions & 2 deletions __tests__/ExpensiMark-HTML-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,12 @@ test('Test markdown and url links with inconsistent starting and closing parens'
+ '(http://foo.com/(something)?after=parens) '
+ '(((http://foo.com/(something)?after=parens '
+ '(((http://foo.com/(something)?after=parens))) '
+ 'http://foo.com/(something)?after=parens))) ';
+ 'http://foo.com/(something)?after=parens))) '
+ '[Yo (click here to see a cool cat)](https://c8.alamy.com/compes/ha11pc/cookie-cat-con-sombrero-de-cowboy-y-sun-glass-ha11pc.jpg) '
+ '[Yo click here to see a cool cat)](https://c8.alamy.com/compes/ha11pc/cookie-cat-con-sombrero-de-cowboy-y-sun-glass-ha11pc.jpg) '
+ '[Yo (click here to see a cool cat](https://c8.alamy.com/compes/ha11pc/cookie-cat-con-sombrero-de-cowboy-y-sun-glass-ha11pc.jpg) '
+ '[Yo click * $ & here to see a cool cat](https://c8.alamy.com/compes/ha11pc/cookie-cat-con-sombrero-de-cowboy-y-sun-glass-ha11pc.jpg) ';


const resultString = '<a href="http://google.com/(something)?after=parens" target="_blank">google</a> '
+ '(<a href="http://google.com/(something)?after=parens" target="_blank">google</a>) '
Expand All @@ -344,7 +349,12 @@ test('Test markdown and url links with inconsistent starting and closing parens'
+ '(<a href="http://foo.com/(something)?after=parens" target="_blank">http://foo.com/(something)?after=parens</a>) '
+ '(((<a href="http://foo.com/(something)?after=parens" target="_blank">http://foo.com/(something)?after=parens</a> '
+ '(((<a href="http://foo.com/(something)?after=parens" target="_blank">http://foo.com/(something)?after=parens</a>))) '
+ '<a href="http://foo.com/(something)?after=parens" target="_blank">http://foo.com/(something)?after=parens</a>))) ';
+ '<a href="http://foo.com/(something)?after=parens" target="_blank">http://foo.com/(something)?after=parens</a>))) '
+ '<a href="https://c8.alamy.com/compes/ha11pc/cookie-cat-con-sombrero-de-cowboy-y-sun-glass-ha11pc.jpg" target="_blank">Yo (click here to see a cool cat)</a> '
+ '<a href="https://c8.alamy.com/compes/ha11pc/cookie-cat-con-sombrero-de-cowboy-y-sun-glass-ha11pc.jpg" target="_blank">Yo click here to see a cool cat)</a> '
+ '<a href="https://c8.alamy.com/compes/ha11pc/cookie-cat-con-sombrero-de-cowboy-y-sun-glass-ha11pc.jpg" target="_blank">Yo (click here to see a cool cat</a> '
+ '<a href="https://c8.alamy.com/compes/ha11pc/cookie-cat-con-sombrero-de-cowboy-y-sun-glass-ha11pc.jpg" target="_blank">Yo click * $ &amp; here to see a cool cat</a> ';


expect(parser.replace(testString)).toBe(resultString);
});
Expand Down
60 changes: 55 additions & 5 deletions __tests__/ExpensiMark-Markdown-test.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ const parser = new ExpensiMark();

test('Test bold HTML replacement', () => {
const boldTestStartString = 'This is a <strong>sentence,</strong> and it has some <strong>punctuation, words, and spaces</strong>. '
+ '<strong>test</strong> * testing* test*test*test. * testing * *testing * '
+ 'This is a <b>sentence,</b> and it has some <b>punctuation, words, and spaces</b>. '
+ '<b>test</b> * testing* test*test*test. * testing * *testing *';
+ '<strong>test</strong> * testing* test*test*test. * testing * *testing * '
+ 'This is a <b>sentence,</b> and it has some <b>punctuation, words, and spaces</b>. '
+ '<b>test</b> * testing* test*test*test. * testing * *testing *';
const boldTestReplacedString = 'This is a *sentence,* and it has some *punctuation, words, and spaces*. '
+ '*test* * testing* test*test*test. * testing * *testing * '
+ 'This is a *sentence,* and it has some *punctuation, words, and spaces*. '
Expand All @@ -18,9 +18,9 @@ test('Test bold HTML replacement', () => {

test('Test italic HTML replacement', () => {
const italicTestStartString = 'This is a <em>sentence,</em> and it has some <em>punctuation, words, and spaces</em>. <em>test</em> _ testing_ test_test_test. _ test _ _test _ '
+ 'This is a <i>sentence,</i> and it has some <i>punctuation, words, and spaces</i>. <i>test</i> _ testing_ test_test_test. _ test _ _test _';
+ 'This is a <i>sentence,</i> and it has some <i>punctuation, words, and spaces</i>. <i>test</i> _ testing_ test_test_test. _ test _ _test _';
const italicTestReplacedString = 'This is a _sentence,_ and it has some _punctuation, words, and spaces_. _test_ _ testing_ test_test_test. _ test _ _test _ '
+ 'This is a _sentence,_ and it has some _punctuation, words, and spaces_. _test_ _ testing_ test_test_test. _ test _ _test _';
+ 'This is a _sentence,_ and it has some _punctuation, words, and spaces_. _test_ _ testing_ test_test_test. _ test _ _test _';
expect(parser.htmlToMarkdown(italicTestStartString)).toBe(italicTestReplacedString);
});

Expand Down Expand Up @@ -64,3 +64,53 @@ test('Test HTML string with attributes', () => {

expect(parser.htmlToMarkdown(testString)).toBe(resultString);
});

test('Test HTML string with spcial Tags', () => {
const testString = '<html>\n<body>\n<!--StartFragment--><span style="color: rgb(0, 0, 0); font-family: &quot;Times New Roman&quot;; font-size: medium; font-style: normal; font-variant-ligatures: normal; font-variant-caps: normal; font-weight: 400; letter-spacing: normal; orphans: 2; text-align: start; text-indent: 0px; text-transform: none; white-space: pre-wrap; widows: 2; word-spacing: 0px; -webkit-text-stroke-width: 0px; text-decoration-thickness: initial; text-decoration-style: initial; text-decoration-color: initial; display: inline !important; float: none;">test message</span><!--EndFragment-->\n</body>\n</html>\n';
const resultString = 'test message';

expect(parser.htmlToMarkdown(testString)).toBe(resultString);
});


test('Test HTML string with Internal Tags', () => {
const testString = `<style>
span {
color: rgb(0, 0, 0);
font-family: "Times New Roman";
font-size: medium;
font-style: normal;
font-variant-ligatures: normal;
font-variant-caps: normal;
font-weight: 400;
letter-spacing: normal;
orphans: 2;
text-align: start;
text-indent: 0px;
text-transform: none;
white-space: pre-wrap;
widows: 2;
word-spacing: 0px;
-webkit-text-stroke-width: 0px;
text-decoration-thickness: initial;
text-decoration-style: initial;
text-decoration-color: initial;
display: inline !important;
float: none;
}
</style>
<script type="text/javascript">
document.write('Hacked');
</script>
<p>test message</p>`;
const resultString = 'test message';

expect(parser.htmlToMarkdown(testString)).toBe(resultString);
});

test('Test HTML string with encoded entities', () => {
const testString = 'Text Entity &amp; &quot;';
const resultString = 'Text Entity & "';

expect(parser.htmlToMarkdown(testString)).toBe(resultString);
});
28 changes: 24 additions & 4 deletions lib/ExpensiMark.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import _ from 'underscore';
import Str from './str';
import TLD_REGEX from './tlds';

Expand Down Expand Up @@ -33,7 +34,7 @@ export default class ExpensiMark {
// with the new lines here since they need to be converted into <br>. And we don't
// want to do this anywhere else since that would break HTML.
// &nbsp; will create styling issues so use &#32;
replacement: (match, _, textWithinFences) => {
replacement: (match, __, textWithinFences) => {
const group = textWithinFences.replace(/(?:(?![\n\r])\s)/g, '&#32;');
return `<pre>${group}</pre>`;
},
Expand Down Expand Up @@ -73,7 +74,7 @@ export default class ExpensiMark {

process: (textToProcess, replacement) => {
const regex = new RegExp(
`\\[((?:[\\w\\s\\d!?&#;:\\/\\-\\.\\+=<>,@\\[\\]‘’“”]|(?:<code>.+<\\/code>))+)\\]\\(${URL_REGEX}\\)(?![^<]*(<\\/pre>|<\\/code>))`,
`\\[(.+?)\\]\\(${URL_REGEX}\\)(?![^<]*(<\\/pre>|<\\/code>))`,
'gi'
);
return this.modifyTextForUrlLinks(regex, textToProcess, replacement);
Expand Down Expand Up @@ -170,14 +171,32 @@ export default class ExpensiMark {
* @type {Object[]}
*/
this.htmlToMarkdownRules = [
{
name: 'Strip Special Tags',
regex: /(\n|\r\n)?<\/?(html|body)(?:"[^"]*"|'[^']*'|[^'"><])*>(?![^<]*(<\/pre>|<\/code>))(\n|\r\n)?/gim,
replacement: ''
},

// Used to Exclude tags
{
name: 'exclude',
regex: new RegExp(
[
'<(script|style)(?:"[^"]*"|\'[^\']*\'|[^\'">])*>([\\s\\S]*?)<\\/\\1>',
'(?![^<]*(<\\/pre>|<\\/code>))(\n|\r\n)?'
].join(''),
'gim'
),
replacement: '',
},
{
name: 'newline',

// Replaces open and closing <br><br/> tags with a single <br/>
pre: inputString => inputString.replace('<br></br>', '<br/>').replace('<br><br/>', '<br/>'),

// Include the immediately followed newline as `<br>\n` should be equal to one \n.
regex: /<br(?:"[^"]*"|'[^']*'|[^'">])*>(?![^<]*(<\/pre>|<\/code>))\n?/gi,
regex: /<br(?:"[^"]*"|'[^']*'|[^'"><])*>(?![^<]*(<\/pre>|<\/code>))\n?/gi,
replacement: '\n'
},
{
Expand Down Expand Up @@ -298,7 +317,8 @@ export default class ExpensiMark {
* @returns {String}
*/
htmlToMarkdown(htmlString) {
let generatedMarkdown = htmlString;
let generatedMarkdown = _.unescape(htmlString);

this.htmlToMarkdownRules.forEach((rule) => {
// Pre-processes input HTML before applying regex
if (rule.pre) {
Expand Down

0 comments on commit 144b817

Please sign in to comment.