Cleaned up and single "happy path spec".

* Lexer, Parser and APIs in different files. * A single test which parses a simple valid input and outputs a CST.
jhipster · Aug 30, 2017 · 2bf7903 · 2bf7903
1 parent be0544d
commit 2bf7903
Show file tree

Hide file tree

Showing 5 changed files with 224 additions and 116 deletions.
diff --git a/lib/dsl/poc/api.js b/lib/dsl/poc/api.js
@@ -0,0 +1,33 @@
+const JDLLexer = require('./lexer').JDLLexer;
+const JDLParser = require('./parser').JDLParser;
+
+
+const parserSingleton = new JDLParser([]);
+
+function parse(input, startRule = 'prog') {
+  const lexResult = JDLLexer.tokenize(input);
+
+  // ".input" is a setter which will reset the parser's internal state.
+  parserSingleton.input = lexResult.tokens;
+
+  // 1. We can dynamically any of the parser rules, They are just methods on the parser instance...
+  // 2. The JDL Parser is configured to automatically output a ParseTree, a.k.a Concrete Syntax Tree (CST).
+  //    This allows us to keep the grammar as a "pure" grammar without any embedded semantic actions.
+  const cst = parserSingleton[startRule]();
+
+  return {
+    cst,
+    lexErrors: lexResult.errors,
+    parseErrors: parserSingleton.errors,
+    comments: lexResult.groups.comments
+  };
+}
+
+function getSyntaticAutoCompleteSuggestions(input, startRule = 'prog') {
+
+}
+
+module.exports = {
+  parse,
+  getSyntaticAutoCompleteSuggestions
+};
diff --git a/lib/dsl/chev_grammar.js → lib/dsl/poc/lexer.js b/lib/dsl/chev_grammar.js → lib/dsl/poc/lexer.js
@@ -3,7 +3,6 @@ const _ = require('lodash');
 
 
 const Lexer = chevrotain.Lexer;
-const Parser = chevrotain.Parser;
 
 // ----------------- lexer -----------------
 const tokens = {};
@@ -146,120 +145,7 @@ createToken({ name: 'DOT', pattern: '.' });
 // It is useful to help debug the token vector results.
 const JDLLexer = new Lexer(_.values(tokens), { debug: true });
 
-// short prefix to reduce verbosity.
-const t = tokens;
-
-class JDLParser extends Parser {
-  constructor(input) {
-    super(input, tokens);
-
-    const $ = this;
-
-    // HIGHLIGHTS1: Any rule may be used as a start rule, there is no artificial limit
-    // like in pegjs. This capability is useful for partial parsing, e.g.:
-    // 1. Code snippets
-    // 2. Incremental parsing of only the changed parts of an active Editor.
-    // 3. writing Unit tests for micro code samples.
-    $.RULE('prog', () => {
-      $.OR([
-        { ALT: () => { $.SUBRULE($.constantDecl); } },
-        { ALT: () => { $.SUBRULE($.entityDecl); } }
-      ]);
-    });
-
-    $.RULE('constantDecl', () => {
-      $.CONSUME(t.NAME);
-      $.CONSUME(t.EQUALS);
-      $.CONSUME(t.INTEGER);
-    });
-
-    $.RULE('entityDecl', () => {
-      $.CONSUME(t.ENTITY);
-      $.CONSUME(t.NAME);
-
-      $.OPTION(() => {
-        $.SUBRULE($.entityTableNameDecl);
-      });
-
-      // the "2" suffix is a quirk of Chevrotain, more details:
-      // https://github.com/SAP/chevrotain/blob/master/docs/faq.md#-why-are-the-unique-numerical-suffixes-consume1consume2-needed-for-the-dsl-rules
-      $.OPTION2(() => {
-        $.SUBRULE($.entityBody);
-      });
-    });
-
-    $.RULE('entityTableNameDecl', () => {
-      $.CONSUME(t.LPAREN);
-      $.CONSUME(t.NAME);
-      $.CONSUME(t.RPAREN);
-    });
-
-    $.RULE('entityBody', () => {
-      $.CONSUME(t.LCURLY);
-      $.AT_LEAST_ONE(() => {
-        $.SUBRULE($.fieldDec);
-      });
-      $.CONSUME(t.RCURLY);
-    });
-
-    $.RULE('fieldDec', () => {
-      $.CONSUME(t.NAME);
-      $.SUBRULE($.type);
-      // Short form for: "(X(,X)*)?"
-      $.MANY_SEP({
-        SEP: t.COMMA,
-        DEF: () => {
-          $.SUBRULE($.validation);
-        }
-      });
-      $.CONSUME(t.RCURLY);
-    });
-
-    $.RULE('type', () => {
-      $.CONSUME($.NAME);
-    });
-
-    $.RULE('validation', () => {
-      $.OR([
-        { ALT: () => { $.CONSUME(t.REQUIRED); } },
-        { ALT: () => { $.SUBRULE($.minMaxValidation); } },
-        { ALT: () => { $.SUBRULE($.pattern); } }
-      ]);
-    });
-
-    $.RULE('minMaxValidation', () => {
-      // HIGHLIGHT:
-      // Note that "MIN_MAX_KEYWORD" is an abstract token and could match 6 different concrete token types
-      $.CONSUME(t.MIN_MAX_KEYWORD);
-      $.CONSUME(t.LPAREN);
-      $.OR([
-        { ALT: () => { $.CONSUME(t.INTEGER); } },
-        { ALT: () => { $.CONSUME(t.NAME); } }
-      ]);
-      $.CONSUME(t.RPAREN);
-    });
-
-    $.RULE('pattern', () => {
-      $.CONSUME(t.PATTERN);
-      $.CONSUME(t.LPAREN);
-      // HIGHLIGHT:
-      // With Chevrotain the grammar can be debugged directly by using good old fashioned breakpoints.
-      // No need to to try and figure out a 10,000 lines generated file, or worse not even have that
-      // if we would be use some JS combinator.
-      // debugger; 
-      $.CONSUME(t.REGEX);
-      $.CONSUME(t.RPAREN);
-    });
-
-    // very important to call this after all the rules have been defined.
-    // otherwise the parser may not work correctly as it will lack information
-    // derived during the self analysis phase.
-    Parser.performSelfAnalysis(this);
-  }
-}
-
 module.exports = {
   tokens,
-  JDLLexer,
-  JDLParser
+  JDLLexer
 };
diff --git a/lib/dsl/poc/parser.js b/lib/dsl/poc/parser.js
@@ -0,0 +1,130 @@
+const chevrotain = require('chevrotain');
+
+const Parser = chevrotain.Parser;
+const tokensVocabulary = require('./lexer').tokens;
+
+// short name to reduce grammar's verbosity
+const t = tokensVocabulary;
+
+
+class JDLParser extends Parser {
+  // Our Parser only gets initialized once, new inputs will be transferred via
+  // the ".input" setter.
+  constructor() {
+    super([], tokensVocabulary, {
+      recoveryEnabled: true,
+      outputCst: true
+    });
+
+    const $ = this;
+
+    // HIGHLIGHTS1: Any rule may be used as a start rule, there is no artificial limit
+    // like in pegjs. This capability is useful for partial parsing, e.g.:
+    // 1. Code snippets
+    // 2. Incremental parsing of only the changed parts of an active Editor.
+    // 3. writing Unit tests for micro code samples.
+    $.RULE('prog', () => {
+      $.OR([
+        { ALT: () => { $.SUBRULE($.constantDecl); } },
+        { ALT: () => { $.SUBRULE($.entityDecl); } }
+      ]);
+    });
+
+    $.RULE('constantDecl', () => {
+      $.CONSUME(t.NAME);
+      $.CONSUME(t.EQUALS);
+      $.CONSUME(t.INTEGER);
+    });
+
+    $.RULE('entityDecl', () => {
+      $.CONSUME(t.ENTITY);
+      $.CONSUME(t.NAME);
+
+      $.OPTION(() => {
+        $.SUBRULE($.entityTableNameDecl);
+      });
+
+      // the "2" suffix is a quirk of Chevrotain, more details:
+      // https://github.com/SAP/chevrotain/blob/master/docs/faq.md#-why-are-the-unique-numerical-suffixes-consume1consume2-needed-for-the-dsl-rules
+      $.OPTION2(() => {
+        $.SUBRULE($.entityBody);
+      });
+    });
+
+    $.RULE('entityTableNameDecl', () => {
+      $.CONSUME(t.LPAREN);
+      $.CONSUME(t.NAME);
+      $.CONSUME(t.RPAREN);
+    });
+
+    $.RULE('entityBody', () => {
+      $.CONSUME(t.LCURLY);
+      $.AT_LEAST_ONE_SEP({
+        // TODO: I do not understand why the original grammar seems to have allowed
+        // consecutive fields without a separating comma.
+        SEP: t.COMMA,
+        DEF: () => {
+          $.SUBRULE($.fieldDec);
+        }
+      });
+      $.CONSUME(t.RCURLY);
+    });
+
+    $.RULE('fieldDec', () => {
+      $.CONSUME(t.NAME);
+      $.SUBRULE($.type);
+      // Short form for: "(X(,X)*)?"
+      $.MANY_SEP({
+        SEP: t.COMMA,
+        DEF: () => {
+          $.SUBRULE($.validation);
+        }
+      });
+    });
+
+    $.RULE('type', () => {
+      $.CONSUME(t.NAME);
+    });
+
+    $.RULE('validation', () => {
+      $.OR([
+        { ALT: () => { $.CONSUME(t.REQUIRED); } },
+        { ALT: () => { $.SUBRULE($.minMaxValidation); } },
+        { ALT: () => { $.SUBRULE($.pattern); } }
+      ]);
+    });
+
+    $.RULE('minMaxValidation', () => {
+      // HIGHLIGHT:
+      // Note that "MIN_MAX_KEYWORD" is an abstract token and could match 6 different concrete token types
+      $.CONSUME(t.MIN_MAX_KEYWORD);
+      $.CONSUME(t.LPAREN);
+      $.OR([
+        { ALT: () => { $.CONSUME(t.INTEGER); } },
+        { ALT: () => { $.CONSUME(t.NAME); } }
+      ]);
+      $.CONSUME(t.RPAREN);
+    });
+
+    $.RULE('pattern', () => {
+      $.CONSUME(t.PATTERN);
+      $.CONSUME(t.LPAREN);
+      // HIGHLIGHT:
+      // With Chevrotain the grammar can be debugged directly by using good old fashioned breakpoints.
+      // No need to to try and figure out a 10,000 lines generated file, or worse not even have that
+      // if we would be use some JS combinator.
+      // debugger;
+      $.CONSUME(t.REGEX);
+      $.CONSUME(t.RPAREN);
+    });
+
+    // very important to call this after all the rules have been defined.
+    // otherwise the parser may not work correctly as it will lack information
+    // derived during the self analysis phase.
+    Parser.performSelfAnalysis(this);
+  }
+}
+
+module.exports = {
+  JDLParser
+};
diff --git a/test/spec/grammar/lexer_test.js b/test/spec/grammar/lexer_test.js
@@ -1,6 +1,6 @@
 /* eslint-disable no-new, no-unused-expressions */
 const expect = require('chai').expect;
-const lexerModule = require('../../../lib/dsl/chev_grammar');
+const lexerModule = require('../../../lib/dsl/poc/lexer');
 
 const JDLLexer = lexerModule.JDLLexer;
 

diff --git a/test/spec/grammar/parser_test.js b/test/spec/grammar/parser_test.js
@@ -0,0 +1,59 @@
+/* eslint-disable no-new, no-unused-expressions */
+const expect = require('chai').expect;
+const parse = require('../../../lib/dsl/poc/api').parse;
+
+
+describe('Chevrotain Parser POC', () => {
+  context('parsing', () => {
+    it('Can parse a simple valid JDL text', () => {
+      const input = `
+       entity JobHistory {
+         startDate ZonedDateTime,
+         endDate ZonedDateTime,
+         language Language
+       }`;
+
+      // debug and step into this to experience debugging the parser's code directly without
+      // the abstraction of a 10,000 lines of generated source code in the way.
+      const result = parse(input);
+      expect(result.parseErrors).to.be.empty;
+
+      const cst = result.cst;
+      // We can now explore the automatically created Concrete Syntax Tree.
+      // See detailed CST docs here: https://github.com/SAP/chevrotain/blob/master/docs/concrete_syntax_tree.md
+      expect(cst.name).to.equal('prog');
+      expect(cst.children.constantDecl).to.be.empty;
+      expect(cst.children.entityDecl).to.have.lengthOf(1);
+      expect(cst.children.entityDecl[0].children.NAME[0].image).to.equal('JobHistory');
+      // ...
+    });
+
+    it('Can parse a simple valid JDL text using a custom startRule', () => {
+      // TODO: TBD
+    });
+
+    it('Can parse a simple invalid JDL text with a single syntax error', () => {
+      // TODO: TBD
+    });
+
+    it('Can parse a simple invalid JDL text with multiple syntax errors', () => {
+      // TODO: TBD
+    });
+
+    it('Can recover from errors and continue parsing #1 (single token insertion)', () => {
+      // TODO: TBD
+    });
+
+    it('Can recover from errors and continue parsing #2 (single token deletion)', () => {
+      // TODO: TBD
+    });
+
+    it('Can recover from errors and continue parsing #3 (re-sync)', () => {
+      // TODO: TBD
+    });
+  });
+
+  context('AutoComplete', () => {
+    // TODO: TBD
+  });
+});