From 8400f87ad8ed2b47e659bc8bb6c3cf2467802425 Mon Sep 17 00:00:00 2001 From: gideongoodwin <30703429+gideongoodwin@users.noreply.github.com> Date: Tue, 6 Feb 2018 04:39:58 -0800 Subject: [PATCH] New: Add IParseOptions#alternateCommentMode (#968) This PR adds a new option to IParseOptions, alternateCommentMode. When enabled, this activates an alternate comment parsing mode that preserves double-slash comments. --- index.d.ts | 6 ++ src/parse.js | 2 +- src/tokenize.js | 114 +++++++++++++++++----- tests/data/comments-alternate-parse.proto | 73 ++++++++++++++ tests/docs_comments_alternate_parse.js | 35 +++++++ 5 files changed, 207 insertions(+), 23 deletions(-) create mode 100644 tests/data/comments-alternate-parse.proto create mode 100644 tests/docs_comments_alternate_parse.js diff --git a/index.d.ts b/index.d.ts index 91ad65ea8..4eb773d20 100644 --- a/index.d.ts +++ b/index.d.ts @@ -1015,6 +1015,12 @@ export interface IParseOptions { /** Keeps field casing instead of converting to camel case */ keepCase?: boolean; + + /** + * Turns on an alternate comment parsing mode that preserves double-slash + * and slash-star comments as documentation. + */ + alternateCommentMode?: boolean; } /** diff --git a/src/parse.js b/src/parse.js index 0acf919df..e1e572de2 100644 --- a/src/parse.js +++ b/src/parse.js @@ -61,7 +61,7 @@ function parse(source, root, options) { if (!options) options = parse.defaults; - var tn = tokenize(source), + var tn = tokenize(source, options.alternateCommentMode || false), next = tn.next, push = tn.push, peek = tn.peek, diff --git a/src/tokenize.js b/src/tokenize.js index 825a7af4f..b939ef289 100644 --- a/src/tokenize.js +++ b/src/tokenize.js @@ -6,6 +6,7 @@ var delimRe = /[\s{}=;:[\],'"()<>]/g, stringSingleRe = /(?:'([^'\\]*(?:\\.[^'\\]*)*)')/g; var setCommentRe = /^ *[*/]+ */, + setCommentAltRe = /^\s*\*?\/*/, setCommentSplitRe = /\n/g, whitespaceRe = /\s/, unescapeRe = /\\(.?)/g; @@ -92,9 +93,10 @@ tokenize.unescape = unescape; /** * Tokenizes the given .proto source and returns an object with useful utility functions. * @param {string} source Source contents + * @param {boolean} alternateCommentMode Whether we should activate alternate comment parsing mode. * @returns {ITokenizerHandle} Tokenizer handle */ -function tokenize(source) { +function tokenize(source, alternateCommentMode) { /* eslint-disable callback-return */ source = source.toString(); @@ -159,10 +161,17 @@ function tokenize(source) { commentType = source.charAt(start++); commentLine = line; commentLineEmpty = false; - var offset = start - 3, // "///" or "/**" + var lookback; + if (alternateCommentMode) { + lookback = 2; // alternate comment parsing: "//" or "/*" + } else { + lookback = 3; // "///" or "/**" + } + var commentOffset = start - lookback, c; do { - if (--offset < 0 || (c = source.charAt(offset)) === "\n") { + if (--commentOffset < 0 || + (c = source.charAt(commentOffset)) === "\n") { commentLineEmpty = true; break; } @@ -171,12 +180,34 @@ function tokenize(source) { .substring(start, end) .split(setCommentSplitRe); for (var i = 0; i < lines.length; ++i) - lines[i] = lines[i].replace(setCommentRe, "").trim(); + lines[i] = lines[i] + .replace(alternateCommentMode ? setCommentAltRe : setCommentRe, "") + .trim(); commentText = lines .join("\n") .trim(); } + function isDoubleSlashCommentLine(startOffset) { + var endOffset = findEndOfLine(startOffset); + + // see if remaining line matches comment pattern + var lineText = source.substring(startOffset, endOffset); + // look for 1 or 2 slashes since startOffset would already point past + // the first slash that started the comment. + var isComment = /^\s*\/{1,2}/.test(lineText); + return isComment; + } + + function findEndOfLine(cursor) { + // find end of cursor's line + var endOffset = cursor; + while (endOffset < length && charAt(endOffset) !== "\n") { + endOffset++; + } + return endOffset; + } + /** * Obtains the next token. * @returns {string|null} Next token or `null` on eof @@ -202,35 +233,71 @@ function tokenize(source) { if (++offset === length) return null; } + if (charAt(offset) === "/") { - if (++offset === length) + if (++offset === length) { throw illegal("comment"); + } if (charAt(offset) === "/") { // Line - isDoc = charAt(start = offset + 1) === "/"; - while (charAt(++offset) !== "\n") - if (offset === length) - return null; - ++offset; - if (isDoc) /// Comment - setComment(start, offset - 1); - ++line; - repeat = true; + if (!alternateCommentMode) { + // check for triple-slash comment + isDoc = charAt(start = offset + 1) === "/"; + + while (charAt(++offset) !== "\n") { + if (offset === length) { + return null; + } + } + ++offset; + if (isDoc) { + setComment(start, offset - 1); + } + ++line; + repeat = true; + } else { + // check for double-slash comments, consolidating consecutive lines + start = offset; + isDoc = false; + if (isDoubleSlashCommentLine(offset)) { + isDoc = true; + do { + offset = findEndOfLine(offset); + if (offset === length) { + break; + } + offset++; + } while (isDoubleSlashCommentLine(offset)); + } else { + offset = Math.min(length, findEndOfLine(offset) + 1); + } + if (isDoc) { + setComment(start, offset); + } + line++; + repeat = true; + } } else if ((curr = charAt(offset)) === "*") { /* Block */ - isDoc = charAt(start = offset + 1) === "*"; + // check for /** (regular comment mode) or /* (alternate comment mode) + start = offset + 1; + isDoc = alternateCommentMode || charAt(start) === "*"; do { - if (curr === "\n") + if (curr === "\n") { ++line; - if (++offset === length) + } + if (++offset === length) { throw illegal("comment"); + } prev = curr; curr = charAt(offset); } while (prev !== "*" || curr !== "/"); ++offset; - if (isDoc) /** Comment */ + if (isDoc) { setComment(start, offset - 2); + } repeat = true; - } else + } else { return "/"; + } } } while (repeat); @@ -302,14 +369,17 @@ function tokenize(source) { function cmnt(trailingLine) { var ret = null; if (trailingLine === undefined) { - if (commentLine === line - 1 && (commentType === "*" || commentLineEmpty)) + if (commentLine === line - 1 && (alternateCommentMode || commentType === "*" || commentLineEmpty)) { ret = commentText; + } } else { /* istanbul ignore else */ - if (commentLine < trailingLine) + if (commentLine < trailingLine) { peek(); - if (commentLine === trailingLine && !commentLineEmpty && commentType === "/") + } + if (commentLine === trailingLine && !commentLineEmpty && (alternateCommentMode || commentType === "/")) { ret = commentText; + } } return ret; } diff --git a/tests/data/comments-alternate-parse.proto b/tests/data/comments-alternate-parse.proto new file mode 100644 index 000000000..4d01f672f --- /dev/null +++ b/tests/data/comments-alternate-parse.proto @@ -0,0 +1,73 @@ +/** + * File with alternate comment syntax. + * This file uses double slash and regular star-slash comment styles for doc + * strings. + */ + +syntax = "proto3"; + +// Message with +// a +// multi-line comment. +message Test1 { + + /** + * Field with a doc-block comment. + */ + string field1 = 1; + + // Field with a single-line comment starting with two slashes. + uint32 field2 = 2; + + /// Field with a single-line comment starting with three slashes. + bool field3 = 3; + + /* Field with a single-line slash-star comment. */ + bool field4 = 4; + + bool field5 = 5; // Field with a trailing single-line two-slash comment. + + bool field6 = 6; /// Field with a trailing single-line three-slash comment. + + bool field7 = 7; /* Field with a trailing single-line slash-star comment. */ + + bool field8 = 8; + + // Field with a + // multi-line comment. + bool field9 = 9; + + /** + * Field with a + * multi-line doc-block comment. + */ + string field10 = 10; +} + +/* Message + with + a multiline plain slash-star + comment. +*/ +message Test2 { +} + +/* + * Message + * with + * a + * comment and stars. + */ +enum Test3 { + + /** Value with a comment. */ + ONE = 1; + + // Value with a single-line comment. + TWO = 2; + + /// Value with a triple-slash comment. + THREE = 3; // ignored + + FOUR = 4; /// Other value with a comment. +} diff --git a/tests/docs_comments_alternate_parse.js b/tests/docs_comments_alternate_parse.js new file mode 100644 index 000000000..5793ef091 --- /dev/null +++ b/tests/docs_comments_alternate_parse.js @@ -0,0 +1,35 @@ +var tape = require("tape"); + +var protobuf = require(".."); + +tape.test("proto comments in alternate-parse mode", function(test) { + test.plan(17); + var options = {alternateCommentMode: true}; + var root = new protobuf.Root(); + root.load("tests/data/comments-alternate-parse.proto", options, function(err, root) { + if (err) + throw test.fail(err.message); + + test.equal(root.lookup("Test1").comment, "Message with\na\nmulti-line comment.", "should parse double-slash multiline comment"); + test.equal(root.lookup("Test2").comment, "Message\nwith\na multiline plain slash-star\ncomment.", "should parse slash-star multiline comment"); + test.equal(root.lookup("Test3").comment, "Message\nwith\na\ncomment and stars.", "should parse doc-block multiline comment"); + + test.equal(root.lookup("Test1.field1").comment, "Field with a doc-block comment.", "should parse doc-block field comment"); + test.equal(root.lookup("Test1.field2").comment, "Field with a single-line comment starting with two slashes.", "should parse double-slash field comment"); + test.equal(root.lookup("Test1.field3").comment, "Field with a single-line comment starting with three slashes.", "should parse triple-slash field comment"); + test.equal(root.lookup("Test1.field4").comment, "Field with a single-line slash-star comment.", "should parse single-line slash-star field comment"); + test.equal(root.lookup("Test1.field5").comment, "Field with a trailing single-line two-slash comment.", "should parse trailing double-slash comment"); + test.equal(root.lookup("Test1.field6").comment, "Field with a trailing single-line three-slash comment.", "should parse trailing triple-slash comment"); + test.equal(root.lookup("Test1.field7").comment, "Field with a trailing single-line slash-star comment.", "should parse trailing slash-star comment"); + test.equal(root.lookup("Test1.field8").comment, null, "should parse no comment"); + test.equal(root.lookup("Test1.field9").comment, "Field with a\nmulti-line comment.", "should parse multiline double-slash field comment"); + test.equal(root.lookup("Test1.field10").comment, "Field with a\nmulti-line doc-block comment.", "should parse multiline doc-block field comment"); + + test.equal(root.lookup("Test3").comments.ONE, "Value with a comment.", "should parse blocks for enum values"); + test.equal(root.lookup("Test3").comments.TWO, "Value with a single-line comment.", "should parse double-slash comments for enum values"); + test.equal(root.lookup("Test3").comments.THREE, "Value with a triple-slash comment.", "should parse lines for enum values and prefer on top over trailing"); + test.equal(root.lookup("Test3").comments.FOUR, "Other value with a comment.", "should not confuse previous trailing comments with comments for the next field"); + + test.end(); + }); +});