Skip to content

Commit

Permalink
Fix JSON surrounding pairs (#370)
Browse files Browse the repository at this point in the history
* Initial working version

* Fixed java surrounding pair

* Fix typescript

* Improve angles

* Add documentation

* Typescript template string tweaks

* Fix ci

* Remove unused import
  • Loading branch information
pokey authored Dec 8, 2021
1 parent 83543d3 commit 4150c58
Show file tree
Hide file tree
Showing 16 changed files with 441 additions and 160 deletions.
15 changes: 15 additions & 0 deletions src/languages/constants.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
export const supportedLanguageIds = [
"c",
"cpp",
"csharp",
"java",
"javascript",
"javascriptreact",
"json",
"jsonc",
"python",
"typescript",
"typescriptreact",
] as const;

export type SupportedLanguageId = typeof supportedLanguageIds[number];
99 changes: 99 additions & 0 deletions src/languages/getNodeMatcher.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import { SyntaxNode } from "web-tree-sitter";
import { notSupported } from "../util/nodeMatchers";
import { selectionWithEditorFromRange } from "../util/selectionUtils";
import {
NodeMatcher,
NodeMatcherValue,
ScopeType,
SelectionWithEditor,
} from "../typings/Types";
import cpp from "./cpp";
import csharp from "./csharp";
import { patternMatchers as json } from "./json";
import { patternMatchers as typescript } from "./typescript";
import { patternMatchers as java } from "./java";
import python from "./python";
import { UnsupportedLanguageError } from "../errors";
import { SupportedLanguageId } from "./constants";

export function getNodeMatcher(
languageId: string,
scopeType: ScopeType,
includeSiblings: boolean
): NodeMatcher {
const matchers = languageMatchers[languageId as SupportedLanguageId];

if (matchers == null) {
throw new UnsupportedLanguageError(languageId);
}

const matcher = matchers[scopeType];

if (matcher == null) {
return notSupported;
}

if (includeSiblings) {
return matcherIncludeSiblings(matcher);
}

return matcher;
}

const languageMatchers: Record<
SupportedLanguageId,
Record<ScopeType, NodeMatcher>
> = {
c: cpp,
cpp: cpp,
csharp: csharp,
java,
javascript: typescript,
javascriptreact: typescript,
json,
jsonc: json,
python,
typescript,
typescriptreact: typescript,
};

function matcherIncludeSiblings(matcher: NodeMatcher): NodeMatcher {
return (
selection: SelectionWithEditor,
node: SyntaxNode
): NodeMatcherValue[] | null => {
let matches = matcher(selection, node);
if (matches == null) {
return null;
}
matches = matches.flatMap((match) =>
iterateNearestIterableAncestor(
match.node,
selectionWithEditorFromRange(selection, match.selection.selection),
matcher
)
) as NodeMatcherValue[];
if (matches.length > 0) {
return matches;
}
return null;
};
}

function iterateNearestIterableAncestor(
node: SyntaxNode,
selection: SelectionWithEditor,
nodeMatcher: NodeMatcher
) {
let parent: SyntaxNode | null = node.parent;
while (parent != null) {
const matches = parent!.namedChildren
.flatMap((sibling) => nodeMatcher(selection, sibling))
.filter((match) => match != null) as NodeMatcherValue[];
if (matches.length > 0) {
return matches;
}
parent = parent.parent;
}
return [];
}
128 changes: 128 additions & 0 deletions src/languages/getTextFragmentExtractor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import { SyntaxNode } from "web-tree-sitter";
import { SelectionWithEditor } from "../typings/Types";
import { stringTextFragmentExtractor as jsonStringTextFragmentExtractor } from "./json";
import { stringTextFragmentExtractor as javaStringTextFragmentExtractor } from "./java";
import { stringTextFragmentExtractor as typescriptStringTextFragmentExtractor } from "./typescript";
import { UnsupportedLanguageError } from "../errors";
import { Range } from "vscode";
import { SupportedLanguageId } from "./constants";
import {
getNodeInternalRange,
getNodeRange,
makeRangeFromPositions,
} from "../util/nodeSelectors";
import { getNodeMatcher } from "./getNodeMatcher";
import { notSupported } from "../util/nodeMatchers";

export type TextFragmentExtractor = (
node: SyntaxNode,
selection: SelectionWithEditor
) => Range | null;

function constructDefaultTextFragmentExtractor(
languageId: SupportedLanguageId,
stringTextFragmentExtractor?: TextFragmentExtractor
): TextFragmentExtractor {
const commentNodeMatcher = getNodeMatcher(languageId, "comment", false);
stringTextFragmentExtractor =
stringTextFragmentExtractor ??
constructDefaultStringTextFragmentExtractor(languageId);

return (node: SyntaxNode, selection: SelectionWithEditor) => {
const stringTextFragment = stringTextFragmentExtractor!(node, selection);

if (stringTextFragment != null) {
return stringTextFragment;
}

if (
commentNodeMatcher !== notSupported &&
commentNodeMatcher(selection, node) != null
) {
return getNodeRange(node);
}

// Treat error nodes as raw text so that the surrounding pair matcher can
// still be useful when we have a bad parse tree
if (node.type === "ERROR") {
return getNodeRange(node);
}

return null;
};
}

function constructDefaultStringTextFragmentExtractor(
languageId: SupportedLanguageId
): TextFragmentExtractor {
const stringNodeMatcher = getNodeMatcher(languageId, "string", false);

return (node: SyntaxNode, selection: SelectionWithEditor) => {
if (stringNodeMatcher(selection, node) != null) {
// Exclude starting and ending quotation marks
return getNodeInternalRange(node);
}

return null;
};
}

/**
* Returns a function which can be used to extract the range of a text fragment
* from within a parsed language. This function should only return a nominal
* range for fragments within the document that should be treated like raw text,
* such as comments strings or error nodes. In these cases we want our
* surrounding pair algorithm to fall back to a pure raw text-based approach.
* @param languageId The language for which to get the text fragment extractor
* for
* @returns The text fragment extractor for the given language
*/
export default function getTextFragmentExtractor(
languageId: string
): TextFragmentExtractor {
const extractor = textFragmentExtractors[languageId as SupportedLanguageId];

if (extractor == null) {
throw new UnsupportedLanguageError(languageId);
}

return extractor;
}

const textFragmentExtractors: Record<
SupportedLanguageId,
TextFragmentExtractor
> = {
c: constructDefaultTextFragmentExtractor("c"),
cpp: constructDefaultTextFragmentExtractor("cpp"),
csharp: constructDefaultTextFragmentExtractor("csharp"),
java: constructDefaultTextFragmentExtractor(
"java",
javaStringTextFragmentExtractor
),
javascript: constructDefaultTextFragmentExtractor(
"javascript",
typescriptStringTextFragmentExtractor
),
javascriptreact: constructDefaultTextFragmentExtractor(
"javascriptreact",
typescriptStringTextFragmentExtractor
),
jsonc: constructDefaultTextFragmentExtractor(
"jsonc",
jsonStringTextFragmentExtractor
),
json: constructDefaultTextFragmentExtractor(
"json",
jsonStringTextFragmentExtractor
),
python: constructDefaultTextFragmentExtractor("python"),
typescript: constructDefaultTextFragmentExtractor(
"typescript",
typescriptStringTextFragmentExtractor
),
typescriptreact: constructDefaultTextFragmentExtractor(
"typescriptreact",
typescriptStringTextFragmentExtractor
),
};
98 changes: 5 additions & 93 deletions src/languages/index.ts
Original file line number Diff line number Diff line change
@@ -1,95 +1,7 @@
import { SyntaxNode } from "web-tree-sitter";
import { notSupported } from "../util/nodeMatchers";
import { selectionWithEditorFromRange } from "../util/selectionUtils";
import {
NodeMatcher,
NodeMatcherValue,
ScopeType,
SelectionWithEditor,
} from "../typings/Types";
import cpp from "./cpp";
import csharp from "./csharp";
import java from "./java";
import json from "./json";
import python from "./python";
import typescript from "./typescript";
import { UnsupportedLanguageError } from "../errors";
import { SupportedLanguageId, supportedLanguageIds } from "./constants";

const languageMatchers: Record<string, Record<ScopeType, NodeMatcher>> = {
c: cpp,
cpp: cpp,
csharp: csharp,
java,
javascript: typescript,
javascriptreact: typescript,
json,
jsonc: json,
python,
typescript,
typescriptreact: typescript,
};

export function getNodeMatcher(
languageId: string,
scopeType: ScopeType,
includeSiblings: boolean
): NodeMatcher {
const matchers = languageMatchers[languageId];

if (matchers == null) {
throw new UnsupportedLanguageError(languageId);
}

const matcher = matchers[scopeType];

if (matcher == null) {
return notSupported;
}

if (includeSiblings) {
return matcherIncludeSiblings(matcher);
}

return matcher;
}

function matcherIncludeSiblings(matcher: NodeMatcher): NodeMatcher {
return (
selection: SelectionWithEditor,
node: SyntaxNode
): NodeMatcherValue[] | null => {
let matches = matcher(selection, node);
if (matches == null) {
return null;
}
matches = matches.flatMap((match) =>
iterateNearestIterableAncestor(
match.node,
selectionWithEditorFromRange(selection, match.selection.selection),
matcher
)
) as NodeMatcherValue[];
if (matches.length > 0) {
return matches;
}
return null;
};
}

function iterateNearestIterableAncestor(
node: SyntaxNode,
selection: SelectionWithEditor,
nodeMatcher: NodeMatcher
) {
let parent: SyntaxNode | null = node.parent;
while (parent != null) {
const matches = parent!.namedChildren
.flatMap((sibling) => nodeMatcher(selection, sibling))
.filter((match) => match != null) as NodeMatcherValue[];
if (matches.length > 0) {
return matches;
}
parent = parent.parent;
}
return [];
export function isLanguageSupported(
languageId: string
): languageId is SupportedLanguageId {
return languageId in supportedLanguageIds;
}
34 changes: 32 additions & 2 deletions src/languages/java.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@ import {
conditionMatcher,
trailingMatcher,
} from "../util/nodeMatchers";
import { NodeMatcherAlternative, ScopeType } from "../typings/Types";
import {
NodeMatcherAlternative,
ScopeType,
SelectionWithEditor,
} from "../typings/Types";
import { getNodeRange } from "../util/nodeSelectors";
import { SyntaxNode } from "web-tree-sitter";

// Generated by the following command:
// > curl https://github.com/raw/tree-sitter/tree-sitter-java/master/src/node-types.json | jq '[.[] | select(.type == "statement" or .type == "declaration") | .subtypes[].type]'
Expand Down Expand Up @@ -70,4 +76,28 @@ const nodeMatchers: Partial<Record<ScopeType, NodeMatcherAlternative>> = {
argumentOrParameter: argumentMatcher("formal_parameters", "argument_list"),
};

export default createPatternMatchers(nodeMatchers);
export const patternMatchers = createPatternMatchers(nodeMatchers);

/**
* Extracts string text fragments in java.
*
* This is a hack to deal with the fact that java doesn't have
* quotation mark tokens as children of the string. Rather than letting
* the parse tree handle the quotation marks in java, we instead just
* let the textual surround handle them by letting it see the quotation
* marks. In other languages we prefer to let the parser handle the
* quotation marks in case they are more than one character long.
* @param node The node which might be a string node
* @param selection The selection from which to expand
* @returns The range of the string text or null if the node is not a string
*/
export function stringTextFragmentExtractor(
node: SyntaxNode,
selection: SelectionWithEditor
) {
if (node.type === "string_literal") {
return getNodeRange(node);
}

return null;
}
Loading

0 comments on commit 4150c58

Please sign in to comment.