Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix JSON surrounding pairs #370

Merged
merged 8 commits into from
Dec 8, 2021
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions src/languages/constants.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
export const supportedLanguageIds = [
"c",
"cpp",
"csharp",
"java",
"javascript",
"javascriptreact",
"json",
"jsonc",
"python",
"typescript",
"typescriptreact",
] as const;

export type SupportedLanguageId = typeof supportedLanguageIds[number];
99 changes: 99 additions & 0 deletions src/languages/getNodeMatcher.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import { SyntaxNode } from "web-tree-sitter";
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved stuff out of index.ts

import { notSupported } from "../util/nodeMatchers";
import { selectionWithEditorFromRange } from "../util/selectionUtils";
import {
NodeMatcher,
NodeMatcherValue,
ScopeType,
SelectionWithEditor,
} from "../typings/Types";
import cpp from "./cpp";
import csharp from "./csharp";
import { patternMatchers as json } from "./json";
import { patternMatchers as typescript } from "./typescript";
import { patternMatchers as java } from "./java";
import python from "./python";
import { UnsupportedLanguageError } from "../errors";
import { SupportedLanguageId } from "./constants";

export function getNodeMatcher(
languageId: string,
scopeType: ScopeType,
includeSiblings: boolean
): NodeMatcher {
const matchers = languageMatchers[languageId as SupportedLanguageId];

if (matchers == null) {
throw new UnsupportedLanguageError(languageId);
}

const matcher = matchers[scopeType];

if (matcher == null) {
return notSupported;
}

if (includeSiblings) {
return matcherIncludeSiblings(matcher);
}

return matcher;
}

const languageMatchers: Record<
SupportedLanguageId,
Record<ScopeType, NodeMatcher>
> = {
c: cpp,
cpp: cpp,
csharp: csharp,
java,
javascript: typescript,
javascriptreact: typescript,
json,
jsonc: json,
python,
typescript,
typescriptreact: typescript,
};

function matcherIncludeSiblings(matcher: NodeMatcher): NodeMatcher {
return (
selection: SelectionWithEditor,
node: SyntaxNode
): NodeMatcherValue[] | null => {
let matches = matcher(selection, node);
if (matches == null) {
return null;
}
matches = matches.flatMap((match) =>
iterateNearestIterableAncestor(
match.node,
selectionWithEditorFromRange(selection, match.selection.selection),
matcher
)
) as NodeMatcherValue[];
if (matches.length > 0) {
return matches;
}
return null;
};
}

function iterateNearestIterableAncestor(
node: SyntaxNode,
selection: SelectionWithEditor,
nodeMatcher: NodeMatcher
) {
let parent: SyntaxNode | null = node.parent;
while (parent != null) {
const matches = parent!.namedChildren
.flatMap((sibling) => nodeMatcher(selection, sibling))
.filter((match) => match != null) as NodeMatcherValue[];
if (matches.length > 0) {
return matches;
}
parent = parent.parent;
}
return [];
}
128 changes: 128 additions & 0 deletions src/languages/getTextFragmentExtractor.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
import { SyntaxNode } from "web-tree-sitter";
import { SelectionWithEditor } from "../typings/Types";
import { stringTextFragmentExtractor as jsonStringTextFragmentExtractor } from "./json";
import { stringTextFragmentExtractor as javaStringTextFragmentExtractor } from "./java";
import { stringTextFragmentExtractor as typescriptStringTextFragmentExtractor } from "./typescript";
import { UnsupportedLanguageError } from "../errors";
import { Range } from "vscode";
import { SupportedLanguageId } from "./constants";
import {
getNodeInternalRange,
getNodeRange,
makeRangeFromPositions,
} from "../util/nodeSelectors";
import { getNodeMatcher } from "./getNodeMatcher";
import { notSupported } from "../util/nodeMatchers";

export type TextFragmentExtractor = (
node: SyntaxNode,
selection: SelectionWithEditor
) => Range | null;

function constructDefaultTextFragmentExtractor(
languageId: SupportedLanguageId,
stringTextFragmentExtractor?: TextFragmentExtractor
): TextFragmentExtractor {
const commentNodeMatcher = getNodeMatcher(languageId, "comment", false);
stringTextFragmentExtractor =
stringTextFragmentExtractor ??
constructDefaultStringTextFragmentExtractor(languageId);

return (node: SyntaxNode, selection: SelectionWithEditor) => {
const stringTextFragment = stringTextFragmentExtractor!(node, selection);

if (stringTextFragment != null) {
return stringTextFragment;
}

if (
commentNodeMatcher !== notSupported &&
commentNodeMatcher(selection, node) != null
) {
return getNodeRange(node);
}

// Treat error nodes as raw text so that the surrounding pair matcher can
// still be useful when we have a bad parse tree
if (node.type === "ERROR") {
return getNodeRange(node);
}

return null;
};
}

function constructDefaultStringTextFragmentExtractor(
languageId: SupportedLanguageId
): TextFragmentExtractor {
const stringNodeMatcher = getNodeMatcher(languageId, "string", false);

return (node: SyntaxNode, selection: SelectionWithEditor) => {
if (stringNodeMatcher(selection, node) != null) {
// Exclude starting and ending quotation marks
return getNodeInternalRange(node);
}

return null;
};
}

/**
* Returns a function which can be used to extract the range of a text fragment
* from within a parsed language. This function should only return a nominal
* range for fragments within the document that should be treated like raw text,
* such as comments strings or error nodes. In these cases we want our
* surrounding pair algorithm to fall back to a pure raw text-based approach.
* @param languageId The language for which to get the text fragment extractor
* for
* @returns The text fragment extractor for the given language
*/
export default function getTextFragmentExtractor(
languageId: string
): TextFragmentExtractor {
const extractor = textFragmentExtractors[languageId as SupportedLanguageId];

if (extractor == null) {
throw new UnsupportedLanguageError(languageId);
}

return extractor;
}

const textFragmentExtractors: Record<
SupportedLanguageId,
TextFragmentExtractor
> = {
c: constructDefaultTextFragmentExtractor("c"),
cpp: constructDefaultTextFragmentExtractor("cpp"),
csharp: constructDefaultTextFragmentExtractor("csharp"),
java: constructDefaultTextFragmentExtractor(
"java",
javaStringTextFragmentExtractor
),
javascript: constructDefaultTextFragmentExtractor(
"javascript",
typescriptStringTextFragmentExtractor
),
javascriptreact: constructDefaultTextFragmentExtractor(
"javascriptreact",
typescriptStringTextFragmentExtractor
),
jsonc: constructDefaultTextFragmentExtractor(
"jsonc",
jsonStringTextFragmentExtractor
),
json: constructDefaultTextFragmentExtractor(
"json",
jsonStringTextFragmentExtractor
),
python: constructDefaultTextFragmentExtractor("python"),
typescript: constructDefaultTextFragmentExtractor(
"typescript",
typescriptStringTextFragmentExtractor
),
typescriptreact: constructDefaultTextFragmentExtractor(
"typescriptreact",
typescriptStringTextFragmentExtractor
),
};
98 changes: 5 additions & 93 deletions src/languages/index.ts
Original file line number Diff line number Diff line change
@@ -1,95 +1,7 @@
import { SyntaxNode } from "web-tree-sitter";
import { notSupported } from "../util/nodeMatchers";
import { selectionWithEditorFromRange } from "../util/selectionUtils";
import {
NodeMatcher,
NodeMatcherValue,
ScopeType,
SelectionWithEditor,
} from "../typings/Types";
import cpp from "./cpp";
import csharp from "./csharp";
import java from "./java";
import json from "./json";
import python from "./python";
import typescript from "./typescript";
import { UnsupportedLanguageError } from "../errors";
import { SupportedLanguageId, supportedLanguageIds } from "./constants";

const languageMatchers: Record<string, Record<ScopeType, NodeMatcher>> = {
c: cpp,
cpp: cpp,
csharp: csharp,
java,
javascript: typescript,
javascriptreact: typescript,
json,
jsonc: json,
python,
typescript,
typescriptreact: typescript,
};

export function getNodeMatcher(
languageId: string,
scopeType: ScopeType,
includeSiblings: boolean
): NodeMatcher {
const matchers = languageMatchers[languageId];

if (matchers == null) {
throw new UnsupportedLanguageError(languageId);
}

const matcher = matchers[scopeType];

if (matcher == null) {
return notSupported;
}

if (includeSiblings) {
return matcherIncludeSiblings(matcher);
}

return matcher;
}

function matcherIncludeSiblings(matcher: NodeMatcher): NodeMatcher {
return (
selection: SelectionWithEditor,
node: SyntaxNode
): NodeMatcherValue[] | null => {
let matches = matcher(selection, node);
if (matches == null) {
return null;
}
matches = matches.flatMap((match) =>
iterateNearestIterableAncestor(
match.node,
selectionWithEditorFromRange(selection, match.selection.selection),
matcher
)
) as NodeMatcherValue[];
if (matches.length > 0) {
return matches;
}
return null;
};
}

function iterateNearestIterableAncestor(
node: SyntaxNode,
selection: SelectionWithEditor,
nodeMatcher: NodeMatcher
) {
let parent: SyntaxNode | null = node.parent;
while (parent != null) {
const matches = parent!.namedChildren
.flatMap((sibling) => nodeMatcher(selection, sibling))
.filter((match) => match != null) as NodeMatcherValue[];
if (matches.length > 0) {
return matches;
}
parent = parent.parent;
}
return [];
export function isLanguageSupported(
languageId: string
): languageId is SupportedLanguageId {
return languageId in supportedLanguageIds;
}
34 changes: 32 additions & 2 deletions src/languages/java.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,13 @@ import {
conditionMatcher,
trailingMatcher,
} from "../util/nodeMatchers";
import { NodeMatcherAlternative, ScopeType } from "../typings/Types";
import {
NodeMatcherAlternative,
ScopeType,
SelectionWithEditor,
} from "../typings/Types";
import { getNodeRange } from "../util/nodeSelectors";
import { SyntaxNode } from "web-tree-sitter";

// Generated by the following command:
// > curl https://github.com/raw/tree-sitter/tree-sitter-java/master/src/node-types.json | jq '[.[] | select(.type == "statement" or .type == "declaration") | .subtypes[].type]'
Expand Down Expand Up @@ -70,4 +76,28 @@ const nodeMatchers: Partial<Record<ScopeType, NodeMatcherAlternative>> = {
argumentOrParameter: argumentMatcher("formal_parameters", "argument_list"),
};

export default createPatternMatchers(nodeMatchers);
export const patternMatchers = createPatternMatchers(nodeMatchers);

/**
* Extracts string text fragments in java.
*
* This is a hack to deal with the fact that java doesn't have
* quotation mark tokens as children of the string. Rather than letting
* the parse tree handle the quotation marks in java, we instead just
* let the textual surround handle them by letting it see the quotation
* marks. In other languages we prefer to let the parser handle the
* quotation marks in case they are more than one character long.
* @param node The node which might be a string node
* @param selection The selection from which to expand
* @returns The range of the string text or null if the node is not a string
*/
export function stringTextFragmentExtractor(
node: SyntaxNode,
selection: SelectionWithEditor
) {
if (node.type === "string_literal") {
return getNodeRange(node);
}

return null;
}
Loading