Skip to content

Commit

Permalink
Added support for ambiguous ampersands
Browse files Browse the repository at this point in the history
  • Loading branch information
anthonyjb committed Oct 29, 2016
1 parent 8ca1028 commit 1a41c2d
Show file tree
Hide file tree
Showing 7 changed files with 202 additions and 8 deletions.
40 changes: 39 additions & 1 deletion build/html-string.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions build/html-string.min.js

Large diffs are not rendered by default.

112 changes: 112 additions & 0 deletions npm-debug.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
0 info it worked if it ends with ok
1 verbose cli [ '/usr/local/bin/node', '/usr/local/bin/npm', 'publish' ]
2 info using npm@1.4.28
3 info using node@v0.10.33
4 verbose publish [ '.' ]
5 verbose cache add [ '.', null ]
6 verbose cache add name=undefined spec="." args=[".",null]
7 verbose parsed url { protocol: null,
7 verbose parsed url slashes: null,
7 verbose parsed url auth: null,
7 verbose parsed url host: null,
7 verbose parsed url port: null,
7 verbose parsed url hostname: null,
7 verbose parsed url hash: null,
7 verbose parsed url search: null,
7 verbose parsed url query: null,
7 verbose parsed url pathname: '.',
7 verbose parsed url path: '.',
7 verbose parsed url href: '.' }
8 silly lockFile 3a52ce78- .
9 verbose lock . /home/anthony/.npm/3a52ce78-.lock
10 verbose tar pack [ '/home/anthony/.npm/HTMLString/1.0.6/package.tgz', '.' ]
11 verbose tarball /home/anthony/.npm/HTMLString/1.0.6/package.tgz
12 verbose folder .
13 info prepublish HTMLString@1.0.6
14 silly lockFile 1f1177db-tar tar://.
15 verbose lock tar://. /home/anthony/.npm/1f1177db-tar.lock
16 silly lockFile 423837ce-npm-HTMLString-1-0-6-package-tgz tar:///home/anthony/.npm/HTMLString/1.0.6/package.tgz
17 verbose lock tar:///home/anthony/.npm/HTMLString/1.0.6/package.tgz /home/anthony/.npm/423837ce-npm-HTMLString-1-0-6-package-tgz.lock
18 silly lockFile 1f1177db-tar tar://.
19 silly lockFile 1f1177db-tar tar://.
20 silly lockFile 423837ce-npm-HTMLString-1-0-6-package-tgz tar:///home/anthony/.npm/HTMLString/1.0.6/package.tgz
21 silly lockFile 423837ce-npm-HTMLString-1-0-6-package-tgz tar:///home/anthony/.npm/HTMLString/1.0.6/package.tgz
22 silly lockFile b99c44c8-ony-npm-HTMLString-1-0-6-package /home/anthony/.npm/HTMLString/1.0.6/package
23 verbose lock /home/anthony/.npm/HTMLString/1.0.6/package /home/anthony/.npm/b99c44c8-ony-npm-HTMLString-1-0-6-package.lock
24 silly lockFile b99c44c8-ony-npm-HTMLString-1-0-6-package /home/anthony/.npm/HTMLString/1.0.6/package
25 silly lockFile b99c44c8-ony-npm-HTMLString-1-0-6-package /home/anthony/.npm/HTMLString/1.0.6/package
26 silly lockFile 3a52ce78- .
27 silly lockFile 3a52ce78- .
28 silly publish { name: 'HTMLString',
28 silly publish description: 'An HTML parser written in JavaScript that\'s probably not what you\'re looking for.',
28 silly publish version: '1.0.6',
28 silly publish keywords: [ 'html', 'parser' ],
28 silly publish author:
28 silly publish { name: 'Anthony Blackshaw',
28 silly publish email: 'ant@getme.co.uk',
28 silly publish url: 'https://github.com/anthonyjb' },
28 silly publish main: 'build/html-string.js',
28 silly publish devDependencies:
28 silly publish { grunt: '~0.4.5',
28 silly publish 'grunt-contrib-clean': '^0.6.0',
28 silly publish 'grunt-contrib-coffee': '^0.11.1',
28 silly publish 'grunt-contrib-concat': '^0.5.0',
28 silly publish 'grunt-contrib-jasmine': '^0.9.2',
28 silly publish 'grunt-contrib-uglify': '^0.5.1',
28 silly publish 'grunt-contrib-watch': '^0.6.1' },
28 silly publish scripts: { test: 'grunt jasmine --verbose' },
28 silly publish repository:
28 silly publish { type: 'git',
28 silly publish url: 'https://github.com/GetmeUK/HTMLString.git' },
28 silly publish license: 'MIT',
28 silly publish readme: '# HTMLString\n\n[![Build Status](https://travis-ci.org/GetmeUK/HTMLString.svg?branch=master)](https://travis-ci.org/GetmeUK/HTMLString)\n\n> An HTML parser written in JavaScript that\'s probably not what you\'re looking for.\n\n## Install\n\n**Using bower**\n\n```\nbower install --save HTMLString\n```\n\n**Using npm**\n\n```\nnpm install --save HTMLString\n```\n\n## Building\nTo build the library you\'ll need to use Grunt. First install the required node modules ([grunt-cli](http://gruntjs.com/getting-started) must be installed):\n```\ngit clone https://github.com/GetmeUK/HTMLString.git\ncd HTMLString\nnpm install\n```\n\nThen run `grunt build` to build the project.\n\n## Testing\nTo test the library you\'ll need to use Jasmine. First install Jasmine:\n```\ngit clone https://github.com/pivotal/jasmine.git\nmkdir HTMLString/jasmine\nmv jasmine/dist/jasmine-standalone-2.0.3.zip HTMLString/jasmine\ncd HTMLString/jasmine\nunzip jasmine-standalone-2.0.3.zip\n```\n\nThen open `HTMLString/SpecRunner.html` in a browser to run the tests.\n\nAlternatively you can use `grunt jasmine` to run the tests from the command line.\n\n## Documentation\nFull documentation is available at http://getcontenttools.com/api/html-string\n\n## Browser support\n- Chrome\n- Firefox\n- IE9+\n',
28 silly publish readmeFilename: 'README.md',
28 silly publish gitHead: '8ca1028b8f00c0ae20e25887e57999c7eb6fb03b',
28 silly publish bugs: { url: 'https://github.com/GetmeUK/HTMLString/issues' },
28 silly publish homepage: 'https://github.com/GetmeUK/HTMLString',
28 silly publish _id: 'HTMLString@1.0.6',
28 silly publish _shasum: '0dbd2b90ad9ff5634d21f366ecbf9c4954477425',
28 silly publish _from: '.' }
29 verbose request where is /HTMLString
30 verbose request registry https://registry.npmjs.org/
31 verbose request id 9915c72d0522b64e
32 verbose url raw /HTMLString
33 verbose url resolving [ 'https://registry.npmjs.org/', './HTMLString' ]
34 verbose url resolved https://registry.npmjs.org/HTMLString
35 verbose request where is https://registry.npmjs.org/HTMLString
36 info trying registry request attempt 1 at 21:57:55
37 http PUT https://registry.npmjs.org/HTMLString
38 http 401 https://registry.npmjs.org/HTMLString
39 verbose headers { 'content-type': 'application/json',
39 verbose headers 'cache-control': 'max-age=300',
39 verbose headers 'content-length': '42',
39 verbose headers 'accept-ranges': 'bytes',
39 verbose headers date: 'Sun, 23 Oct 2016 20:57:49 GMT',
39 verbose headers via: '1.1 varnish',
39 verbose headers connection: 'keep-alive',
39 verbose headers 'x-served-by': 'cache-lcy1130-LCY',
39 verbose headers 'x-cache': 'MISS',
39 verbose headers 'x-cache-hits': '0',
39 verbose headers 'x-timer': 'S1477256268.696056,VS0,VE1166',
39 verbose headers vary: 'Accept-Encoding' }
40 error publish Failed PUT 401
41 error Error: Could not authenticate getmeuk : HTMLString
41 error at RegClient.<anonymous> (/usr/local/lib/node_modules/npm/node_modules/npm-registry-client/lib/request.js:308:14)
41 error at Request._callback (/usr/local/lib/node_modules/npm/node_modules/npm-registry-client/lib/request.js:246:65)
41 error at Request.self.callback (/usr/local/lib/node_modules/npm/node_modules/request/request.js:236:22)
41 error at Request.emit (events.js:98:17)
41 error at Request.<anonymous> (/usr/local/lib/node_modules/npm/node_modules/request/request.js:1142:14)
41 error at Request.emit (events.js:117:20)
41 error at IncomingMessage.<anonymous> (/usr/local/lib/node_modules/npm/node_modules/request/request.js:1096:12)
41 error at IncomingMessage.emit (events.js:117:20)
41 error at _stream_readable.js:943:16
41 error at process._tickCallback (node.js:419:13)
42 error If you need help, you may report this *entire* log,
42 error including the npm and node versions, at:
42 error <http://github.com/npm/npm/issues>
43 error System Linux 3.13.0-100-generic
44 error command "/usr/local/bin/node" "/usr/local/bin/npm" "publish"
45 error cwd /home/anthony/Desktop/Work/Public/CoffeeScript/github/HTMLString
46 error node -v v0.10.33
47 error npm -v 1.4.28
48 verbose exit [ 1, true ]
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "HTMLString",
"description": "An HTML parser written in JavaScript that's probably not what you're looking for.",
"version": "1.0.6",
"version": "1.0.7",
"keywords": [
"html",
"parser"
Expand Down
12 changes: 9 additions & 3 deletions spec/html-string-spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@
Turing: '<q id="turings-quote">Machines take me by <br> <span class="suprised">surprise</span> with <i>great&nbsp;frequency.</i></q>',
Wozniak: 'all the best people in life seem to like LINUX.',
WozniakNamespaced: 'all the best people in life seem to like <ns:tag ns:attr="foo">LINUX</ns:tag>.',
WozniakWhitespace: 'all the best people in life seem to like LINUX.'
WozniakWhitespace: 'all the best people in life seem to like LINUX.',
AmbiguousAmpersand: '&amp; &<a href="/foo?bar=1&zee=2&amp;omm=3&end">amp</a> &foo && &&amp; &end'
};

describe('HTMLString.String()', function() {
Expand All @@ -22,12 +23,17 @@
string = new HTMLString.String(quotes.WozniakWhitespace, true);
return expect(string.text()).toBe(quotes.WozniakWhitespace);
});
return it('should parse and render a string (HTML)', function() {
it('should parse and render a string (HTML)', function() {
var string;
string = new HTMLString.String(quotes.Turing);
expect(string.html()).toBe(quotes.Turing);
string = new HTMLString.String(quotes.WozniakNamespaced);
expect(string.html()).toBe(quotes.WozniakNamespaced);
return expect(string.html()).toBe(quotes.WozniakNamespaced);
});
return it('should parse and render a string (HTML with ambiguous ampersands)', function() {
var string;
string = new HTMLString.String(quotes.AmbiguousAmpersand);
expect(string.html()).toBe(quotes.AmbiguousAmpersand);
return console.log(string.html());
});
});
Expand Down
7 changes: 7 additions & 0 deletions src/spec/html-string-spec.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ all the best people in life seem to like <ns:tag ns:attr="foo">LINUX</ns:tag>.
WozniakWhitespace: '''
all the best people in life seem to like LINUX.
'''
AmbiguousAmpersand: '''
&amp; &<a href="/foo?bar=1&zee=2&amp;omm=3&end">amp</a> &foo && &&amp; &end
'''
}

describe 'HTMLString.String()', () ->
Expand All @@ -41,6 +44,10 @@ describe 'HTMLString.String()', () ->
string = new HTMLString.String(quotes.WozniakNamespaced)
expect(string.html()).toBe quotes.WozniakNamespaced

it 'should parse and render a string (HTML with ambiguous ampersands)', () ->
string = new HTMLString.String(quotes.AmbiguousAmpersand)
expect(string.html()).toBe quotes.AmbiguousAmpersand

console.log string.html()


Expand Down
33 changes: 32 additions & 1 deletion src/strings.coffee
Original file line number Diff line number Diff line change
Expand Up @@ -659,6 +659,7 @@ class _Parser

@fsm.addTransition '<', CHAR_OR_ENTITY_OR_TAG, OPENNING_OR_CLOSING_TAG
@fsm.addTransition '&', CHAR_OR_ENTITY_OR_TAG, ENTITY
@fsm.addTransition 'END', CHAR_OR_ENTITY_OR_TAG, null

# Entity
@fsm.addTransitions ENTITY_CHARS, ENTITY, null, (c) ->
Expand All @@ -668,6 +669,19 @@ class _Parser
@_pushChar("&#{ @entity };")
@entity = ''

@fsm.addTransitionAny ENTITY, CHAR_OR_ENTITY_OR_TAG, (c) ->
@_pushChar('&')
for c in @entity.split('')
@_pushChar(c)
@entity = ''
@_back()

@fsm.addTransition 'END', ENTITY, null, () ->
@_pushChar('&')
for c in @entity.split('')
@_pushChar(c)
@entity = ''

# Opening or closing Tag
@fsm.addTransitions [' ', '\n'], OPENNING_OR_CLOSING_TAG
@fsm.addTransitions ALPHA_CHARS, OPENNING_OR_CLOSING_TAG, OPENING_TAG, () ->
Expand Down Expand Up @@ -779,7 +793,7 @@ class _Parser
@fsm.addTransitions ENTITY_CHARS, ATTR_ENTITY_NO_DELIM, null, (c) ->
@entity += c

@fsm.addTransitions ENTITY_CHARS, ATTR_ENTITY_SINGLE_DELIM, (c) ->
@fsm.addTransitions ENTITY_CHARS, ATTR_ENTITY_SINGLE_DELIM, null, (c) ->
@entity += c

@fsm.addTransitions ENTITY_CHARS, ATTR_ENTITY_DOUBLE_DELIM, null, (c) ->
Expand All @@ -797,6 +811,21 @@ class _Parser
@attributeValue += "&#{ @entity };"
@entity = ''

@fsm.addTransitionAny ATTR_ENTITY_NO_DELIM, ATTR_VALUE_NO_DELIM, (c) ->
@attributeValue += '&' + @entity
@entity = ''
@_back()

@fsm.addTransitionAny ATTR_ENTITY_SINGLE_DELIM, ATTR_VALUE_SINGLE_DELIM, (c) ->
@attributeValue += '&' + @entity
@entity = ''
@_back()

@fsm.addTransitionAny ATTR_ENTITY_DOUBLE_DELIM, ATTR_VALUE_DOUBLE_DELIM, (c) ->
@attributeValue += '&' + @entity
@entity = ''
@_back()

# Parsing methods

_back: () ->
Expand Down Expand Up @@ -896,6 +925,8 @@ class _Parser

@head++

@fsm.process('END')

return @string

preprocess: (html) ->
Expand Down

0 comments on commit 1a41c2d

Please sign in to comment.