This repository has been archived by the owner on Mar 21, 2021. It is now read-only.
-
-
Notifications
You must be signed in to change notification settings - Fork 116
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Cleaned up and single "happy path spec".
* Lexer, Parser and APIs in different files. * A single test which parses a simple valid input and outputs a CST.
- Loading branch information
Showing
5 changed files
with
224 additions
and
116 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
const JDLLexer = require('./lexer').JDLLexer; | ||
const JDLParser = require('./parser').JDLParser; | ||
|
||
|
||
const parserSingleton = new JDLParser([]); | ||
|
||
function parse(input, startRule = 'prog') { | ||
const lexResult = JDLLexer.tokenize(input); | ||
|
||
// ".input" is a setter which will reset the parser's internal state. | ||
parserSingleton.input = lexResult.tokens; | ||
|
||
// 1. We can dynamically any of the parser rules, They are just methods on the parser instance... | ||
// 2. The JDL Parser is configured to automatically output a ParseTree, a.k.a Concrete Syntax Tree (CST). | ||
// This allows us to keep the grammar as a "pure" grammar without any embedded semantic actions. | ||
const cst = parserSingleton[startRule](); | ||
|
||
return { | ||
cst, | ||
lexErrors: lexResult.errors, | ||
parseErrors: parserSingleton.errors, | ||
comments: lexResult.groups.comments | ||
}; | ||
} | ||
|
||
function getSyntaticAutoCompleteSuggestions(input, startRule = 'prog') { | ||
|
||
} | ||
|
||
module.exports = { | ||
parse, | ||
getSyntaticAutoCompleteSuggestions | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,130 @@ | ||
const chevrotain = require('chevrotain'); | ||
|
||
const Parser = chevrotain.Parser; | ||
const tokensVocabulary = require('./lexer').tokens; | ||
|
||
// short name to reduce grammar's verbosity | ||
const t = tokensVocabulary; | ||
|
||
|
||
class JDLParser extends Parser { | ||
// Our Parser only gets initialized once, new inputs will be transferred via | ||
// the ".input" setter. | ||
constructor() { | ||
super([], tokensVocabulary, { | ||
recoveryEnabled: true, | ||
outputCst: true | ||
}); | ||
|
||
const $ = this; | ||
|
||
// HIGHLIGHTS1: Any rule may be used as a start rule, there is no artificial limit | ||
// like in pegjs. This capability is useful for partial parsing, e.g.: | ||
// 1. Code snippets | ||
// 2. Incremental parsing of only the changed parts of an active Editor. | ||
// 3. writing Unit tests for micro code samples. | ||
$.RULE('prog', () => { | ||
$.OR([ | ||
{ ALT: () => { $.SUBRULE($.constantDecl); } }, | ||
{ ALT: () => { $.SUBRULE($.entityDecl); } } | ||
]); | ||
}); | ||
|
||
$.RULE('constantDecl', () => { | ||
$.CONSUME(t.NAME); | ||
$.CONSUME(t.EQUALS); | ||
$.CONSUME(t.INTEGER); | ||
}); | ||
|
||
$.RULE('entityDecl', () => { | ||
$.CONSUME(t.ENTITY); | ||
$.CONSUME(t.NAME); | ||
|
||
$.OPTION(() => { | ||
$.SUBRULE($.entityTableNameDecl); | ||
}); | ||
|
||
// the "2" suffix is a quirk of Chevrotain, more details: | ||
// https://github.com/SAP/chevrotain/blob/master/docs/faq.md#-why-are-the-unique-numerical-suffixes-consume1consume2-needed-for-the-dsl-rules | ||
$.OPTION2(() => { | ||
$.SUBRULE($.entityBody); | ||
}); | ||
}); | ||
|
||
$.RULE('entityTableNameDecl', () => { | ||
$.CONSUME(t.LPAREN); | ||
$.CONSUME(t.NAME); | ||
$.CONSUME(t.RPAREN); | ||
}); | ||
|
||
$.RULE('entityBody', () => { | ||
$.CONSUME(t.LCURLY); | ||
$.AT_LEAST_ONE_SEP({ | ||
// TODO: I do not understand why the original grammar seems to have allowed | ||
// consecutive fields without a separating comma. | ||
SEP: t.COMMA, | ||
DEF: () => { | ||
$.SUBRULE($.fieldDec); | ||
} | ||
}); | ||
$.CONSUME(t.RCURLY); | ||
}); | ||
|
||
$.RULE('fieldDec', () => { | ||
$.CONSUME(t.NAME); | ||
$.SUBRULE($.type); | ||
// Short form for: "(X(,X)*)?" | ||
$.MANY_SEP({ | ||
SEP: t.COMMA, | ||
DEF: () => { | ||
$.SUBRULE($.validation); | ||
} | ||
}); | ||
}); | ||
|
||
$.RULE('type', () => { | ||
$.CONSUME(t.NAME); | ||
}); | ||
|
||
$.RULE('validation', () => { | ||
$.OR([ | ||
{ ALT: () => { $.CONSUME(t.REQUIRED); } }, | ||
{ ALT: () => { $.SUBRULE($.minMaxValidation); } }, | ||
{ ALT: () => { $.SUBRULE($.pattern); } } | ||
]); | ||
}); | ||
|
||
$.RULE('minMaxValidation', () => { | ||
// HIGHLIGHT: | ||
// Note that "MIN_MAX_KEYWORD" is an abstract token and could match 6 different concrete token types | ||
$.CONSUME(t.MIN_MAX_KEYWORD); | ||
$.CONSUME(t.LPAREN); | ||
$.OR([ | ||
{ ALT: () => { $.CONSUME(t.INTEGER); } }, | ||
{ ALT: () => { $.CONSUME(t.NAME); } } | ||
]); | ||
$.CONSUME(t.RPAREN); | ||
}); | ||
|
||
$.RULE('pattern', () => { | ||
$.CONSUME(t.PATTERN); | ||
$.CONSUME(t.LPAREN); | ||
// HIGHLIGHT: | ||
// With Chevrotain the grammar can be debugged directly by using good old fashioned breakpoints. | ||
// No need to to try and figure out a 10,000 lines generated file, or worse not even have that | ||
// if we would be use some JS combinator. | ||
// debugger; | ||
$.CONSUME(t.REGEX); | ||
$.CONSUME(t.RPAREN); | ||
}); | ||
|
||
// very important to call this after all the rules have been defined. | ||
// otherwise the parser may not work correctly as it will lack information | ||
// derived during the self analysis phase. | ||
Parser.performSelfAnalysis(this); | ||
} | ||
} | ||
|
||
module.exports = { | ||
JDLParser | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
/* eslint-disable no-new, no-unused-expressions */ | ||
const expect = require('chai').expect; | ||
const parse = require('../../../lib/dsl/poc/api').parse; | ||
|
||
|
||
describe('Chevrotain Parser POC', () => { | ||
context('parsing', () => { | ||
it('Can parse a simple valid JDL text', () => { | ||
const input = ` | ||
entity JobHistory { | ||
startDate ZonedDateTime, | ||
endDate ZonedDateTime, | ||
language Language | ||
}`; | ||
|
||
// debug and step into this to experience debugging the parser's code directly without | ||
// the abstraction of a 10,000 lines of generated source code in the way. | ||
const result = parse(input); | ||
expect(result.parseErrors).to.be.empty; | ||
|
||
const cst = result.cst; | ||
// We can now explore the automatically created Concrete Syntax Tree. | ||
// See detailed CST docs here: https://github.com/SAP/chevrotain/blob/master/docs/concrete_syntax_tree.md | ||
expect(cst.name).to.equal('prog'); | ||
expect(cst.children.constantDecl).to.be.empty; | ||
expect(cst.children.entityDecl).to.have.lengthOf(1); | ||
expect(cst.children.entityDecl[0].children.NAME[0].image).to.equal('JobHistory'); | ||
// ... | ||
}); | ||
|
||
it('Can parse a simple valid JDL text using a custom startRule', () => { | ||
// TODO: TBD | ||
}); | ||
|
||
it('Can parse a simple invalid JDL text with a single syntax error', () => { | ||
// TODO: TBD | ||
}); | ||
|
||
it('Can parse a simple invalid JDL text with multiple syntax errors', () => { | ||
// TODO: TBD | ||
}); | ||
|
||
it('Can recover from errors and continue parsing #1 (single token insertion)', () => { | ||
// TODO: TBD | ||
}); | ||
|
||
it('Can recover from errors and continue parsing #2 (single token deletion)', () => { | ||
// TODO: TBD | ||
}); | ||
|
||
it('Can recover from errors and continue parsing #3 (re-sync)', () => { | ||
// TODO: TBD | ||
}); | ||
}); | ||
|
||
context('AutoComplete', () => { | ||
// TODO: TBD | ||
}); | ||
}); |