Skip to content

Commit

Permalink
Improve tokenization of function names, closes #1930
Browse files Browse the repository at this point in the history
  • Loading branch information
josevalim committed Jul 6, 2024
1 parent 56a3efa commit 8bd43bb
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 11 deletions.
49 changes: 39 additions & 10 deletions assets/js/search-page.js
Original file line number Diff line number Diff line change
Expand Up @@ -153,17 +153,46 @@ function docTokenSplitter (builder) {
}

function docTokenFunction (token) {
// Split on : . / _ - to make easier to partially match on function names.
// We split only when tokenizing, not when searching.
const tokens = token
.toString()
.split(/\:|\.|\/|_|-/)
.map(part => {
return token.clone().update(() => part)
})
// If we have something with an arity, we split on : . to make partial
// matches easier. We split only when tokenizing, not when searching.
// Below we use ExDoc.Markdown.to_ast/2 as an example.
const tokens = [token]
const arityRegex = /\/\d+$/
const namespaceRegex = /\:|\./
let toSplitWords = token.toString()

if(arityRegex.test(toSplitWords)) {

Check failure on line 164 in assets/js/search-page.js

View workflow job for this annotation

GitHub Actions / Check JS

Expected space(s) after "if"
const withoutArity = token
.toString()
.replace(arityRegex, "")

Check failure on line 167 in assets/js/search-page.js

View workflow job for this annotation

GitHub Actions / Check JS

Strings must use singlequote

// This token represents ExDoc.Markdown.to_ast
tokens.push(token.clone().update(() => withoutArity))

// And now we get each part as token: ExDoc, Markdown, and to_ast
let parts = withoutArity.split(namespaceRegex)

Check failure on line 173 in assets/js/search-page.js

View workflow job for this annotation

GitHub Actions / Check JS

'parts' is never reassigned. Use 'const' instead

if(parts.length > 1) {

Check failure on line 175 in assets/js/search-page.js

View workflow job for this annotation

GitHub Actions / Check JS

Expected space(s) after "if"
for(let part of parts) {

Check failure on line 176 in assets/js/search-page.js

View workflow job for this annotation

GitHub Actions / Check JS

Expected space(s) after "for"

Check failure on line 176 in assets/js/search-page.js

View workflow job for this annotation

GitHub Actions / Check JS

'part' is never reassigned. Use 'const' instead
tokens.push(token.clone().update(() => part))
}

// Let's also add to_ast/2
let lastWithArity = token.toString().split(namespaceRegex)

Check failure on line 181 in assets/js/search-page.js

View workflow job for this annotation

GitHub Actions / Check JS

'lastWithArity' is never reassigned. Use 'const' instead
tokens.push(token.clone().update(() => lastWithArity[lastWithArity.length - 1]))
}

if (tokens.length > 1) {
return [...tokens, token]
toSplitWords = parts[parts.length - 1]
}

// Now split the function name (or the token, if that's all we had),
// on _ or - (but we keep the original)
let words = toSplitWords.split(/\_|\-/)

Check failure on line 190 in assets/js/search-page.js

View workflow job for this annotation

GitHub Actions / Check JS

'words' is never reassigned. Use 'const' instead

if(words.length > 1) {

Check failure on line 192 in assets/js/search-page.js

View workflow job for this annotation

GitHub Actions / Check JS

Expected space(s) after "if"
for(let word of words) {

Check failure on line 193 in assets/js/search-page.js

View workflow job for this annotation

GitHub Actions / Check JS

Expected space(s) after "for"
tokens.push(token.clone().update(() => word))
}
}

return tokens
Expand Down

Large diffs are not rendered by default.

0 comments on commit 8bd43bb

Please sign in to comment.