Skip to content

Commit

Permalink
Merge pull request #71 from rwjblue/doctype
Browse files Browse the repository at this point in the history
  • Loading branch information
rwjblue authored Feb 3, 2021
2 parents 220bf73 + c3223ab commit 074f3c1
Show file tree
Hide file tree
Showing 4 changed files with 423 additions and 1 deletion.
188 changes: 188 additions & 0 deletions src/evented-tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,194 @@ export default class EventedTokenizer {
this.consume();
this.transitionTo(TokenizerState.commentStart);
this.delegate.beginComment();
} else {
let maybeDoctype = char.toUpperCase() + this.input.substring(this.index, this.index + 6).toUpperCase();

if (maybeDoctype === 'DOCTYPE') {
this.consume();
this.consume();
this.consume();
this.consume();
this.consume();
this.consume();
this.transitionTo(TokenizerState.doctype);
if (this.delegate.beginDoctype) this.delegate.beginDoctype();
}
}
},

doctype() {
let char = this.consume();

if (isSpace(char)) {
this.transitionTo(TokenizerState.beforeDoctypeName);
}
},

beforeDoctypeName() {
let char = this.consume();

if (isSpace(char)) {
return;
} else {
this.transitionTo(TokenizerState.doctypeName);
if (this.delegate.appendToDoctypeName) this.delegate.appendToDoctypeName(char.toLowerCase());
}
},

doctypeName() {
let char = this.consume();

if (isSpace(char)) {
this.transitionTo(TokenizerState.afterDoctypeName);
} else if (char === '>') {
if (this.delegate.endDoctype) this.delegate.endDoctype();
this.transitionTo(TokenizerState.beforeData);
} else {
if (this.delegate.appendToDoctypeName) this.delegate.appendToDoctypeName(char.toLowerCase());
}
},

afterDoctypeName() {
let char = this.consume();

if (isSpace(char)) {
return;
} else if (char === '>') {
if (this.delegate.endDoctype) this.delegate.endDoctype();
this.transitionTo(TokenizerState.beforeData);
} else {
let nextSixChars = char.toUpperCase() + this.input.substring(this.index, this.index + 5).toUpperCase();

let isPublic = nextSixChars.toUpperCase() === 'PUBLIC';
let isSystem = nextSixChars.toUpperCase() === 'SYSTEM';

if (isPublic || isSystem) {
this.consume();
this.consume();
this.consume();
this.consume();
this.consume();
this.consume();
}

if (isPublic) {
this.transitionTo(TokenizerState.afterDoctypePublicKeyword);
} else if (isSystem) {
this.transitionTo(TokenizerState.afterDoctypeSystemKeyword);
}
}
},

afterDoctypePublicKeyword() {
let char = this.peek();

if (isSpace(char)) {
this.transitionTo(TokenizerState.beforeDoctypePublicIdentifier);
this.consume();
} else if (char === '"') {
this.transitionTo(TokenizerState.doctypePublicIdentifierDoubleQuoted);
this.consume();
} else if (char === "'") {
this.transitionTo(TokenizerState.doctypePublicIdentifierSingleQuoted);
this.consume();
} else if (char === '>') {
this.consume();
if (this.delegate.endDoctype) this.delegate.endDoctype();
this.transitionTo(TokenizerState.beforeData);
}
},

doctypePublicIdentifierDoubleQuoted() {
let char = this.consume();

if (char === '"') {
this.transitionTo(TokenizerState.afterDoctypePublicIdentifier);
} else if (char === '>') {
if (this.delegate.endDoctype) this.delegate.endDoctype();
this.transitionTo(TokenizerState.beforeData);
} else {
if (this.delegate.appendToDoctypePublicIdentifier) this.delegate.appendToDoctypePublicIdentifier(char);
}
},

doctypePublicIdentifierSingleQuoted() {
let char = this.consume();

if (char === "'") {
this.transitionTo(TokenizerState.afterDoctypePublicIdentifier);
} else if (char === '>') {
if (this.delegate.endDoctype) this.delegate.endDoctype();
this.transitionTo(TokenizerState.beforeData);
} else {
if (this.delegate.appendToDoctypePublicIdentifier) this.delegate.appendToDoctypePublicIdentifier(char);
}
},

afterDoctypePublicIdentifier() {
let char = this.consume();

if (isSpace(char)) {
this.transitionTo(TokenizerState.betweenDoctypePublicAndSystemIdentifiers);
} else if (char === '>') {
if (this.delegate.endDoctype) this.delegate.endDoctype();
this.transitionTo(TokenizerState.beforeData);
} else if (char === '"') {
this.transitionTo(TokenizerState.doctypeSystemIdentifierDoubleQuoted);
} else if (char === "'") {
this.transitionTo(TokenizerState.doctypeSystemIdentifierSingleQuoted);
}
},

betweenDoctypePublicAndSystemIdentifiers() {
let char = this.consume();

if (isSpace(char)) {
return;
} else if (char === '>') {
if (this.delegate.endDoctype) this.delegate.endDoctype();
this.transitionTo(TokenizerState.beforeData);
} else if (char === '"') {
this.transitionTo(TokenizerState.doctypeSystemIdentifierDoubleQuoted);
} else if (char === "'") {
this.transitionTo(TokenizerState.doctypeSystemIdentifierSingleQuoted);
}
},

doctypeSystemIdentifierDoubleQuoted() {
let char = this.consume();

if (char === '"') {
this.transitionTo(TokenizerState.afterDoctypeSystemIdentifier);
} else if (char === '>') {
if (this.delegate.endDoctype) this.delegate.endDoctype();
this.transitionTo(TokenizerState.beforeData);
} else {
if (this.delegate.appendToDoctypeSystemIdentifier) this.delegate.appendToDoctypeSystemIdentifier(char);
}
},

doctypeSystemIdentifierSingleQuoted() {
let char = this.consume();

if (char === "'") {
this.transitionTo(TokenizerState.afterDoctypeSystemIdentifier);
} else if (char === '>') {
if (this.delegate.endDoctype) this.delegate.endDoctype();
this.transitionTo(TokenizerState.beforeData);
} else {
if (this.delegate.appendToDoctypeSystemIdentifier) this.delegate.appendToDoctypeSystemIdentifier(char);
}
},

afterDoctypeSystemIdentifier() {
let char = this.consume();

if (isSpace(char)) {
return;
} else if (char === '>') {
if (this.delegate.endDoctype) this.delegate.endDoctype();
this.transitionTo(TokenizerState.beforeData);
}
},

Expand Down
35 changes: 35 additions & 0 deletions src/tokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,41 @@ export default class Tokenizer implements TokenizerDelegate {

// Data

beginDoctype() {
this.push({
type: TokenType.Doctype,
name: '',
});
}

appendToDoctypeName(char: string) {
this.current(TokenType.Doctype).name += char;
}

appendToDoctypePublicIdentifier(char: string) {
let doctype = this.current(TokenType.Doctype);

if (doctype.publicIdentifier === undefined) {
doctype.publicIdentifier = char;
} else {
doctype.publicIdentifier += char;
}
}

appendToDoctypeSystemIdentifier(char: string) {
let doctype = this.current(TokenType.Doctype);

if (doctype.systemIdentifier === undefined) {
doctype.systemIdentifier = char;
} else {
doctype.systemIdentifier += char;
}
}

endDoctype() {
this.addLocInfo();
}

beginData() {
this.push({
type: TokenType.Chars,
Expand Down
17 changes: 16 additions & 1 deletion src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ export interface TokenBase<T extends TokenType> {
loc?: Location;
}

export interface Doctype extends TokenBase<TokenType.Doctype> {
name: string;
publicIdentifier?: string;
systemIdentifier?: string;
}

export interface StartTag extends TokenBase<TokenType.StartTag> {
tagName: string;
attributes: Attribute[];
Expand All @@ -48,9 +54,10 @@ export interface Comment extends TokenBase<TokenType.Comment> {
chars: string;
}

export type Token = StartTag | EndTag | Chars | Comment;
export type Token = StartTag | EndTag | Chars | Comment | Doctype;

export const enum TokenType {
Doctype = 'Doctype',
StartTag = 'StartTag',
EndTag = 'EndTag',
Chars = 'Chars',
Expand All @@ -62,13 +69,21 @@ export interface TokenMap {
EndTag: EndTag;
Chars: Chars;
Comment: Comment;
Doctype: Doctype;
}

export interface TokenizerDelegate {
reset(): void;
finishData(): void;
tagOpen(): void;

// TODO: make these non-optional in preparation for the next major version release
beginDoctype?(): void;
appendToDoctypeName?(char: string): void;
appendToDoctypePublicIdentifier?(char: string): void;
appendToDoctypeSystemIdentifier?(char: string): void;
endDoctype?(): void;

beginData(): void;
appendToData(char: string): void;

Expand Down
Loading

0 comments on commit 074f3c1

Please sign in to comment.