Skip to content
This repository has been archived by the owner on Mar 21, 2021. It is now read-only.

WIP - POC with Chevrotain Parser. #142

Merged
merged 9 commits into from
Sep 23, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 51 additions & 0 deletions lib/dsl/poc/api.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
const JDLLexer = require('./lexer').JDLLexer;
const JDLParser = require('./parser').JDLParser;


const parserSingleton = new JDLParser([]);

function parse(input, startRule = 'prog') {
const lexResult = JDLLexer.tokenize(input);

// ".input" is a setter which will reset the parser's internal state.
parserSingleton.input = lexResult.tokens;

// 1. We can dynamically any of the parser rules, They are just methods on the parser instance...
// 2. The JDL Parser is configured to automatically output a ParseTree, a.k.a Concrete Syntax Tree (CST).
// This allows us to keep the grammar as a "pure" grammar without any embedded semantic actions.
const cst = parserSingleton[startRule]();

return {
cst,
lexErrors: lexResult.errors,
parseErrors: parserSingleton.errors,
comments: lexResult.groups.comments
};
}

// TODO: this is a very naive implementation, for example if the content assist was requested
// while typing an identifier or keyword we may need to drop the last element in the token vector
// and filter the results according to the existing prefix.
// A more complete example can be found here:
// https://github.com/SAP/chevrotain/blob/master/examples/parser/content_assist/official_feature_content_assist.js#L134
function getSyntaticAutoCompleteSuggestions(input, startRule = 'prog') {
const lexResult = JDLLexer.tokenize(input);

// ".input" is a setter which will reset the parser's internal state.
parserSingleton.input = lexResult.tokens;

const syntacticSuggestions = parserSingleton.computeContentAssist(
startRule,
lexResult.tokens
);

// Each suggestion includes additional information such as the "Rule Stack" at suggestion point.
// This may be handy for advanced implementations, e.g: different logic for suggesting a NAME token in an entity
// or a field. But it is irrelevant in the scope of the POC.
return syntacticSuggestions.map(suggestion => suggestion.nextTokenType);
}

module.exports = {
parse,
getSyntaticAutoCompleteSuggestions
};
108 changes: 108 additions & 0 deletions lib/dsl/poc/ast_builder.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
const JDLParser = require('./parser').JDLParser;
const _ = require('lodash');


function buildAst(cst) {
// eslint-disable-next-line no-use-before-define
const astBuilderVisitor = new JDLAstBuilderVisitor();
const ast = astBuilderVisitor.visit(cst);
return ast;
}

const BaseJDLCSTVisitor = new JDLParser().getBaseCstVisitorConstructor();

/**
* Note that the logic here assumes the input CST is valid.
* To make this work with partially formed CST created during automatic error recovery
* would require refactoring for greater robustness.
* Meaning we can never assume at least one element exists in a ctx child array
* e.g:
* 1. ctx.NAME[0].image --> ctx.NAME[0] ? ctx.NAME[0].image : "???"
*/
class JDLAstBuilderVisitor extends BaseJDLCSTVisitor {
constructor() {
super();
this.validateVisitor();
}

prog(ctx) {
return {
entities: _.map(ctx.entityDecl, elm => this.visit(elm)),
constants: _.map(ctx.constantDecl, elm => this.visit(elm))
};
}

constantDecl(ctx) {
return {
name: ctx.NAME[0].image,
value: parseInt(ctx.INTEGER[0].image, 10)
};
}

entityDecl(ctx) {
return {
name: ctx.NAME[0].image,
// ctx.entityTableNameDecl is optional which means
// either an empty array or an array of a single element
// the "this.visit" API will handle this transparently and return
// undefined in the case of empty array.
tableName: this.visit(ctx.entityTableNameDecl),
fields: this.visit(ctx.entityBody)
};
}

entityTableNameDecl(ctx) {
return ctx.NAME[0].image;
}

entityBody(ctx) {
return _.map(ctx.fieldDec, elm => this.visit(elm));
}

fieldDec(ctx) {
return {
name: ctx.NAME[0].image,
// ctx.type is an array with a single item.
// in that case:
// this.visit(ctx.type) is equivalent to this.visit(ctx.type[0])
type: this.visit(ctx.type),
validations: _.map(ctx.validation, elm => this.visit(elm))
};
}

type(ctx) {
return ctx.NAME[0].image;
}

validation(ctx) {
// only one of these alternatives can exist at the same time.
if (!_.isEmpty(ctx.REQUIRED)) {
return {
validationType: 'required'
};
} else if (!_.isEmpty(ctx.minMaxValidation)) {
return this.visit(ctx.minMaxValidation);
}
return this.visit(ctx.pattern);
}

minMaxValidation(ctx) {
return {
validationType: ctx.MIN_MAX_KEYWORD[0].image,
limit: _.isEmpty(ctx.NAME) ?
parseInt(ctx.INTEGER[0].image, 10) :
ctx.NAME[0].image
};
}

pattern(ctx) {
return {
validationType: 'pattern',
pattern: ctx.REGEX[0].image
};
}
}

module.exports = {
buildAst
};
24 changes: 24 additions & 0 deletions lib/dsl/poc/diagrams.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
<!DOCTYPE html>
<meta charset="utf-8">
<style>
body {
background-color: hsl(30, 20%, 95%)
}
</style>

<link rel='stylesheet' href='https://unpkg.com/chevrotain/diagrams/diagrams.css'>
<script src='https://unpkg.com/chevrotain/diagrams/vendor/railroad-diagrams.js'></script>
<script src='https://unpkg.com/chevrotain/diagrams/src/diagrams_builder.js'></script>
<script src='https://unpkg.com/chevrotain/diagrams/src/diagrams_behavior.js'></script>
<script src='https://unpkg.com/chevrotain/diagrams/src/main.js'></script>

<body>
<div id="diagrams" align="center"></div>

<script src='gen/generated_serialized_grammar.js'></script>

<script>
var diagramsDiv = document.getElementById("diagrams");
main.drawDiagramsFromSerializedGrammar(serializedGrammar, diagramsDiv)
</script>
</body>
118 changes: 118 additions & 0 deletions lib/dsl/poc/formatter.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
const JDLParser = require('./parser').JDLParser;
const _ = require('lodash');

/**
* Will provide formatting suggestions in the form of
* {
* start: number,
* end: number,
* newText: string
* }
*
* examples:
* 1. {start:44, end: 46, newText: " "} --> replace a double space with a single space
* 2. {start:200, end: 201, newText: ","} --> replace a double space with a single space
*
*
* This is just a naive implementation, a productive one may have additional logic
* 1. Supporting DELETE/INSERT operation too.
* 2. Being able to deal with multiple changes on interwoven offset ranges.
* 3. Configurable to user's preferences.
*
* And a-lot more rules... :)
*/
function formatJDL(cst, orgText) {
// eslint-disable-next-line no-use-before-define
const formatterVisitor = new JDLCstFormatterVisitor(orgText);
formatterVisitor.visit(cst);
const textChanges = formatterVisitor.replaces;

let formattedText = orgText;
let changeOffset = 0;
textChanges.forEach((currReplace) => {
const actualReplaceStart = currReplace.start + changeOffset + 1;
const actualReplaceEnd = currReplace.end + changeOffset;

formattedText = formattedText.substr(0, actualReplaceStart) + currReplace.newText + formattedText.substr(actualReplaceEnd + 1);
changeOffset += -((currReplace.end - currReplace.start) - currReplace.newText.length);
});
return formattedText;
}


const BaseJDLCSTVisitor = new JDLParser().getBaseCstVisitorConstructorWithDefaults();

class JDLCstFormatterVisitor extends BaseJDLCSTVisitor {
constructor(orgText) {
super();
this.orgText = orgText;
this.replaces = [];
this.validateVisitor();
}

entityDecl(ctx) {
const entityKW = ctx.ENTITY[0];
const entityName = ctx.NAME[0];

if (
// not separated by a single character
entityKW.endOffset !== entityName.startOffset - 2 ||
// separated by a single character, but it is not a space char.
this.orgText[entityKW.endOffset + 1] !== ' ') {
this.replaces.push({
start: entityKW.endOffset,
end: entityName.startOffset - 1,
newText: ' '
});
}

this.visit(ctx.entityBody);
}

entityBody(ctx) {
const commas = ctx.COMMA;
// the last field does not have a comma
const fieldsWithCommas = _.dropRight(ctx.fieldDec);

_.forEach(commas, (currComma, idx) => {
const currField = fieldsWithCommas[idx];
const fieldEndOffset = findEndOffset(currField);
if (fieldEndOffset !== currComma.startOffset - 1) {
this.replaces.push({
start: fieldEndOffset,
end: currComma.startOffset - 1,
newText: ''
});
}
});
}
}

function findEndOffset(cstOrCstElemArr, oldMax = -1) {
let newMax = oldMax;
if (cstOrCstElemArr.children !== undefined) {
_.forEach(cstOrCstElemArr.children, (item) => {
_.forEach(item, () => {
newMax = Math.max(newMax, findEndOffset(item, newMax));
});
});
return newMax;
} else if (_.isArray(cstOrCstElemArr)) {
// relying on knowledge that Chevrotain built the CST children arrays in the order encountered.
// but that is not an official API...
const lastElem = _.last(cstOrCstElemArr);
// concrete tokens.
if (lastElem.tokenType !== undefined) {
return Math.max(newMax, lastElem.endOffset);
}
// a cst subnode
return Math.max(newMax, findEndOffset(lastElem));
}

throw Error('non exhaustive match');
}


module.exports = {
formatJDL
};
Loading