diff --git a/__tests__/ExpensiMark-Markdown-test.js b/__tests__/ExpensiMark-Markdown-test.js index e6b4a12c..cfd9e288 100644 --- a/__tests__/ExpensiMark-Markdown-test.js +++ b/__tests__/ExpensiMark-Markdown-test.js @@ -5,9 +5,9 @@ const parser = new ExpensiMark(); test('Test bold HTML replacement', () => { const boldTestStartString = 'This is a sentence, and it has some punctuation, words, and spaces. ' - + 'test * testing* test*test*test. * testing * *testing * ' - + 'This is a sentence, and it has some punctuation, words, and spaces. ' - + 'test * testing* test*test*test. * testing * *testing *'; + + 'test * testing* test*test*test. * testing * *testing * ' + + 'This is a sentence, and it has some punctuation, words, and spaces. ' + + 'test * testing* test*test*test. * testing * *testing *'; const boldTestReplacedString = 'This is a *sentence,* and it has some *punctuation, words, and spaces*. ' + '*test* * testing* test*test*test. * testing * *testing * ' + 'This is a *sentence,* and it has some *punctuation, words, and spaces*. ' @@ -18,9 +18,9 @@ test('Test bold HTML replacement', () => { test('Test italic HTML replacement', () => { const italicTestStartString = 'This is a sentence, and it has some punctuation, words, and spaces. test _ testing_ test_test_test. _ test _ _test _ ' - + 'This is a sentence, and it has some punctuation, words, and spaces. test _ testing_ test_test_test. _ test _ _test _'; + + 'This is a sentence, and it has some punctuation, words, and spaces. test _ testing_ test_test_test. _ test _ _test _'; const italicTestReplacedString = 'This is a _sentence,_ and it has some _punctuation, words, and spaces_. _test_ _ testing_ test_test_test. _ test _ _test _ ' - + 'This is a _sentence,_ and it has some _punctuation, words, and spaces_. _test_ _ testing_ test_test_test. _ test _ _test _'; + + 'This is a _sentence,_ and it has some _punctuation, words, and spaces_. _test_ _ testing_ test_test_test. _ test _ _test _'; expect(parser.htmlToMarkdown(italicTestStartString)).toBe(italicTestReplacedString); }); @@ -64,3 +64,53 @@ test('Test HTML string with attributes', () => { expect(parser.htmlToMarkdown(testString)).toBe(resultString); }); + +test('Test HTML string with spcial Tags', () => { + const testString = '\n\ntest message\n\n\n'; + const resultString = 'test message'; + + expect(parser.htmlToMarkdown(testString)).toBe(resultString); +}); + + +test('Test HTML string with Internal Tags', () => { + const testString = ` + +

test message

`; + const resultString = 'test message'; + + expect(parser.htmlToMarkdown(testString)).toBe(resultString); +}); + +test('Test HTML string with encoded entities', () => { + const testString = 'Text Entity & "'; + const resultString = 'Text Entity & "'; + + expect(parser.htmlToMarkdown(testString)).toBe(resultString); +}); diff --git a/lib/ExpensiMark.js b/lib/ExpensiMark.js index 7704e4b7..e5e683b0 100644 --- a/lib/ExpensiMark.js +++ b/lib/ExpensiMark.js @@ -1,3 +1,4 @@ +import _ from 'underscore'; import Str from './str'; import TLD_REGEX from './tlds'; @@ -33,7 +34,7 @@ export default class ExpensiMark { // with the new lines here since they need to be converted into
. And we don't // want to do this anywhere else since that would break HTML. //   will create styling issues so use - replacement: (match, _, textWithinFences) => { + replacement: (match, __, textWithinFences) => { const group = textWithinFences.replace(/(?:(?![\n\r])\s)/g, ' '); return `
${group}
`; }, @@ -170,6 +171,24 @@ export default class ExpensiMark { * @type {Object[]} */ this.htmlToMarkdownRules = [ + { + name: 'Strip Special Tags', + regex: /(\n|\r\n)?<\/?(html|body)(?:"[^"]*"|'[^']*'|[^'"><])*>(?![^<]*(<\/pre>|<\/code>))(\n|\r\n)?/gim, + replacement: '' + }, + + // Used to Exclude tags + { + name: 'exclude', + regex: new RegExp( + [ + '<(script|style)(?:"[^"]*"|\'[^\']*\'|[^\'">])*>([\\s\\S]*?)<\\/\\1>', + '(?![^<]*(<\\/pre>|<\\/code>))(\n|\r\n)?' + ].join(''), + 'gim' + ), + replacement: '', + }, { name: 'newline', @@ -177,7 +196,7 @@ export default class ExpensiMark { pre: inputString => inputString.replace('

', '
').replace('

', '
'), // Include the immediately followed newline as `
\n` should be equal to one \n. - regex: /])*>(?![^<]*(<\/pre>|<\/code>))\n?/gi, + regex: /<])*>(?![^<]*(<\/pre>|<\/code>))\n?/gi, replacement: '\n' }, { @@ -298,7 +317,8 @@ export default class ExpensiMark { * @returns {String} */ htmlToMarkdown(htmlString) { - let generatedMarkdown = htmlString; + let generatedMarkdown = _.unescape(htmlString); + this.htmlToMarkdownRules.forEach((rule) => { // Pre-processes input HTML before applying regex if (rule.pre) {