Skip to content
This repository has been archived by the owner on Mar 21, 2021. It is now read-only.

Commit

Permalink
Cleaned up and single "happy path spec".
Browse files Browse the repository at this point in the history
* Lexer, Parser and APIs in different files.
* A single test which parses a simple valid input and outputs a CST.
  • Loading branch information
bd82 committed Aug 30, 2017
1 parent be0544d commit 2bf7903
Show file tree
Hide file tree
Showing 5 changed files with 224 additions and 116 deletions.
33 changes: 33 additions & 0 deletions lib/dsl/poc/api.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
const JDLLexer = require('./lexer').JDLLexer;
const JDLParser = require('./parser').JDLParser;


const parserSingleton = new JDLParser([]);

function parse(input, startRule = 'prog') {
const lexResult = JDLLexer.tokenize(input);

// ".input" is a setter which will reset the parser's internal state.
parserSingleton.input = lexResult.tokens;

// 1. We can dynamically any of the parser rules, They are just methods on the parser instance...
// 2. The JDL Parser is configured to automatically output a ParseTree, a.k.a Concrete Syntax Tree (CST).
// This allows us to keep the grammar as a "pure" grammar without any embedded semantic actions.
const cst = parserSingleton[startRule]();

return {
cst,
lexErrors: lexResult.errors,
parseErrors: parserSingleton.errors,
comments: lexResult.groups.comments
};
}

function getSyntaticAutoCompleteSuggestions(input, startRule = 'prog') {

}

module.exports = {
parse,
getSyntaticAutoCompleteSuggestions
};
116 changes: 1 addition & 115 deletions lib/dsl/chev_grammar.js → lib/dsl/poc/lexer.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ const _ = require('lodash');


const Lexer = chevrotain.Lexer;
const Parser = chevrotain.Parser;

// ----------------- lexer -----------------
const tokens = {};
Expand Down Expand Up @@ -146,120 +145,7 @@ createToken({ name: 'DOT', pattern: '.' });
// It is useful to help debug the token vector results.
const JDLLexer = new Lexer(_.values(tokens), { debug: true });

// short prefix to reduce verbosity.
const t = tokens;

class JDLParser extends Parser {
constructor(input) {
super(input, tokens);

const $ = this;

// HIGHLIGHTS1: Any rule may be used as a start rule, there is no artificial limit
// like in pegjs. This capability is useful for partial parsing, e.g.:
// 1. Code snippets
// 2. Incremental parsing of only the changed parts of an active Editor.
// 3. writing Unit tests for micro code samples.
$.RULE('prog', () => {
$.OR([
{ ALT: () => { $.SUBRULE($.constantDecl); } },
{ ALT: () => { $.SUBRULE($.entityDecl); } }
]);
});

$.RULE('constantDecl', () => {
$.CONSUME(t.NAME);
$.CONSUME(t.EQUALS);
$.CONSUME(t.INTEGER);
});

$.RULE('entityDecl', () => {
$.CONSUME(t.ENTITY);
$.CONSUME(t.NAME);

$.OPTION(() => {
$.SUBRULE($.entityTableNameDecl);
});

// the "2" suffix is a quirk of Chevrotain, more details:
// https://github.com/SAP/chevrotain/blob/master/docs/faq.md#-why-are-the-unique-numerical-suffixes-consume1consume2-needed-for-the-dsl-rules
$.OPTION2(() => {
$.SUBRULE($.entityBody);
});
});

$.RULE('entityTableNameDecl', () => {
$.CONSUME(t.LPAREN);
$.CONSUME(t.NAME);
$.CONSUME(t.RPAREN);
});

$.RULE('entityBody', () => {
$.CONSUME(t.LCURLY);
$.AT_LEAST_ONE(() => {
$.SUBRULE($.fieldDec);
});
$.CONSUME(t.RCURLY);
});

$.RULE('fieldDec', () => {
$.CONSUME(t.NAME);
$.SUBRULE($.type);
// Short form for: "(X(,X)*)?"
$.MANY_SEP({
SEP: t.COMMA,
DEF: () => {
$.SUBRULE($.validation);
}
});
$.CONSUME(t.RCURLY);
});

$.RULE('type', () => {
$.CONSUME($.NAME);
});

$.RULE('validation', () => {
$.OR([
{ ALT: () => { $.CONSUME(t.REQUIRED); } },
{ ALT: () => { $.SUBRULE($.minMaxValidation); } },
{ ALT: () => { $.SUBRULE($.pattern); } }
]);
});

$.RULE('minMaxValidation', () => {
// HIGHLIGHT:
// Note that "MIN_MAX_KEYWORD" is an abstract token and could match 6 different concrete token types
$.CONSUME(t.MIN_MAX_KEYWORD);
$.CONSUME(t.LPAREN);
$.OR([
{ ALT: () => { $.CONSUME(t.INTEGER); } },
{ ALT: () => { $.CONSUME(t.NAME); } }
]);
$.CONSUME(t.RPAREN);
});

$.RULE('pattern', () => {
$.CONSUME(t.PATTERN);
$.CONSUME(t.LPAREN);
// HIGHLIGHT:
// With Chevrotain the grammar can be debugged directly by using good old fashioned breakpoints.
// No need to to try and figure out a 10,000 lines generated file, or worse not even have that
// if we would be use some JS combinator.
// debugger;
$.CONSUME(t.REGEX);
$.CONSUME(t.RPAREN);
});

// very important to call this after all the rules have been defined.
// otherwise the parser may not work correctly as it will lack information
// derived during the self analysis phase.
Parser.performSelfAnalysis(this);
}
}

module.exports = {
tokens,
JDLLexer,
JDLParser
JDLLexer
};
130 changes: 130 additions & 0 deletions lib/dsl/poc/parser.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
const chevrotain = require('chevrotain');

const Parser = chevrotain.Parser;
const tokensVocabulary = require('./lexer').tokens;

// short name to reduce grammar's verbosity
const t = tokensVocabulary;


class JDLParser extends Parser {
// Our Parser only gets initialized once, new inputs will be transferred via
// the ".input" setter.
constructor() {
super([], tokensVocabulary, {
recoveryEnabled: true,
outputCst: true
});

const $ = this;

// HIGHLIGHTS1: Any rule may be used as a start rule, there is no artificial limit
// like in pegjs. This capability is useful for partial parsing, e.g.:
// 1. Code snippets
// 2. Incremental parsing of only the changed parts of an active Editor.
// 3. writing Unit tests for micro code samples.
$.RULE('prog', () => {
$.OR([
{ ALT: () => { $.SUBRULE($.constantDecl); } },
{ ALT: () => { $.SUBRULE($.entityDecl); } }
]);
});

$.RULE('constantDecl', () => {
$.CONSUME(t.NAME);
$.CONSUME(t.EQUALS);
$.CONSUME(t.INTEGER);
});

$.RULE('entityDecl', () => {
$.CONSUME(t.ENTITY);
$.CONSUME(t.NAME);

$.OPTION(() => {
$.SUBRULE($.entityTableNameDecl);
});

// the "2" suffix is a quirk of Chevrotain, more details:
// https://github.com/SAP/chevrotain/blob/master/docs/faq.md#-why-are-the-unique-numerical-suffixes-consume1consume2-needed-for-the-dsl-rules
$.OPTION2(() => {
$.SUBRULE($.entityBody);
});
});

$.RULE('entityTableNameDecl', () => {
$.CONSUME(t.LPAREN);
$.CONSUME(t.NAME);
$.CONSUME(t.RPAREN);
});

$.RULE('entityBody', () => {
$.CONSUME(t.LCURLY);
$.AT_LEAST_ONE_SEP({
// TODO: I do not understand why the original grammar seems to have allowed
// consecutive fields without a separating comma.
SEP: t.COMMA,
DEF: () => {
$.SUBRULE($.fieldDec);
}
});
$.CONSUME(t.RCURLY);
});

$.RULE('fieldDec', () => {
$.CONSUME(t.NAME);
$.SUBRULE($.type);
// Short form for: "(X(,X)*)?"
$.MANY_SEP({
SEP: t.COMMA,
DEF: () => {
$.SUBRULE($.validation);
}
});
});

$.RULE('type', () => {
$.CONSUME(t.NAME);
});

$.RULE('validation', () => {
$.OR([
{ ALT: () => { $.CONSUME(t.REQUIRED); } },
{ ALT: () => { $.SUBRULE($.minMaxValidation); } },
{ ALT: () => { $.SUBRULE($.pattern); } }
]);
});

$.RULE('minMaxValidation', () => {
// HIGHLIGHT:
// Note that "MIN_MAX_KEYWORD" is an abstract token and could match 6 different concrete token types
$.CONSUME(t.MIN_MAX_KEYWORD);
$.CONSUME(t.LPAREN);
$.OR([
{ ALT: () => { $.CONSUME(t.INTEGER); } },
{ ALT: () => { $.CONSUME(t.NAME); } }
]);
$.CONSUME(t.RPAREN);
});

$.RULE('pattern', () => {
$.CONSUME(t.PATTERN);
$.CONSUME(t.LPAREN);
// HIGHLIGHT:
// With Chevrotain the grammar can be debugged directly by using good old fashioned breakpoints.
// No need to to try and figure out a 10,000 lines generated file, or worse not even have that
// if we would be use some JS combinator.
// debugger;
$.CONSUME(t.REGEX);
$.CONSUME(t.RPAREN);
});

// very important to call this after all the rules have been defined.
// otherwise the parser may not work correctly as it will lack information
// derived during the self analysis phase.
Parser.performSelfAnalysis(this);
}
}

module.exports = {
JDLParser
};
2 changes: 1 addition & 1 deletion test/spec/grammar/lexer_test.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/* eslint-disable no-new, no-unused-expressions */
const expect = require('chai').expect;
const lexerModule = require('../../../lib/dsl/chev_grammar');
const lexerModule = require('../../../lib/dsl/poc/lexer');

const JDLLexer = lexerModule.JDLLexer;

Expand Down
59 changes: 59 additions & 0 deletions test/spec/grammar/parser_test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
/* eslint-disable no-new, no-unused-expressions */
const expect = require('chai').expect;
const parse = require('../../../lib/dsl/poc/api').parse;


describe('Chevrotain Parser POC', () => {
context('parsing', () => {
it('Can parse a simple valid JDL text', () => {
const input = `
entity JobHistory {
startDate ZonedDateTime,
endDate ZonedDateTime,
language Language
}`;

// debug and step into this to experience debugging the parser's code directly without
// the abstraction of a 10,000 lines of generated source code in the way.
const result = parse(input);
expect(result.parseErrors).to.be.empty;

const cst = result.cst;
// We can now explore the automatically created Concrete Syntax Tree.
// See detailed CST docs here: https://github.com/SAP/chevrotain/blob/master/docs/concrete_syntax_tree.md
expect(cst.name).to.equal('prog');
expect(cst.children.constantDecl).to.be.empty;
expect(cst.children.entityDecl).to.have.lengthOf(1);
expect(cst.children.entityDecl[0].children.NAME[0].image).to.equal('JobHistory');
// ...
});

it('Can parse a simple valid JDL text using a custom startRule', () => {
// TODO: TBD
});

it('Can parse a simple invalid JDL text with a single syntax error', () => {
// TODO: TBD
});

it('Can parse a simple invalid JDL text with multiple syntax errors', () => {
// TODO: TBD
});

it('Can recover from errors and continue parsing #1 (single token insertion)', () => {
// TODO: TBD
});

it('Can recover from errors and continue parsing #2 (single token deletion)', () => {
// TODO: TBD
});

it('Can recover from errors and continue parsing #3 (re-sync)', () => {
// TODO: TBD
});
});

context('AutoComplete', () => {
// TODO: TBD
});
});

0 comments on commit 2bf7903

Please sign in to comment.