Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactoring of the string path syntax parser #115

Merged
merged 19 commits into from
Nov 29, 2017
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 56 additions & 0 deletions packages/immutadot/src/core/parser.utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/**
* @typedef {function(string): T | null} Parser<T>
*/

const maybeMap = (maybe, fn) => maybe === null ? maybe : fn(maybe)

/**
* Creates a parser from a regular expression by matching the input string with
* the regular expression, returning the resulting match object.
* @function
* @param {RegExp} regexp the regular expression
* @return {Parser<string[]>} the resulting parser
*/
export const regexp = regexp => str => maybeMap(str.match(regexp), match => match.slice(1))

/**
* Returns a new parser that will return <code>null</code> if a predicate about
* the result of another parser does not hold. If the predicate holds then
* the new parser returns the result of the other parser unchanged.
* @function
* @param {Parser<T>} parser parser to filter
* @param {function(*): boolean} predicate predicate to use
* @return {Parser<T>} resulting parser
*/
export const filter = (parser, predicate) => str => maybeMap(parser(str), parsed => predicate(parsed) ? parsed : null)

/**
* Returns a new parser which will post-process the result of another parser.
* @function
* @param {Parser<T>} parser parser for which to process the result
* @param {function(T): R} mapper function to transform the result of the parser
* @return {Parser<R>} resulting parser
*/
export const map = (parser, mapper) => str => maybeMap(parser(str), mapper)

/**
* Returns a new parser that attempts parsing with a first parser then falls
* back to a second parser if the first returns <code>null</code>.
* @function
* @param {Parser<A>} parser the first parser
* @param {Parser<B>} other the second parser
* @return {Parser<A | B>} resulting parser
*/
export const fallback = (parser, other) => str => {
const parsed = parser(str)
if (parsed !== null) return parsed
return other(str)
}

/**
* Chains a list of parsers together using <code>fallback</code>.
* @function
* @param {Array<Parser<*>>} parsers a list of parsers to try in order
* @return {Parser<*>} resulting parser
*/
export const race = parsers => parsers.reduce((chainedParser, parser) => fallback(chainedParser, parser))
242 changes: 76 additions & 166 deletions packages/immutadot/src/core/toPath.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,10 @@
import {
filter,
map,
race,
regexp,
} from './parser.utils'

import {
isSymbol,
toString,
Expand All @@ -24,44 +31,18 @@ const toKey = arg => {
return toString(arg)
}

const quotes = ['"', '\'']

/**
* Tests whether <code>index</code>th char of <code>str</code> is a quote.<br />
* Quotes are <code>"</code> and <code>'</code>.
* Strip slashes preceding occurences of <code>quote</code> from <code>str</code><br />
* Possible quotes are <code>"</code> and <code>'</code>.
* @function
* @param {string} str The string
* @param {number} index Index of the char to test
* @return {{ quoted: boolean, quote: string }} A boolean <code>quoted</code>, true if <code>str.charAt(index)</code> is a quote and the <code>quote</code>.
* @param {string} quote The quote to unescape
* @return {string} The unescaped string
* @memberof core
* @private
* @since 1.0.0
*/
const isQuoteChar = (str, index) => {
const char = str.charAt(index)
const quote = quotes.find(c => c === char)
return {
quoted: Boolean(quote),
quote,
}
}

const escapedQuotesRegexps = {}
for (const quote of quotes)
escapedQuotesRegexps[quote] = new RegExp(`\\\\${quote}`, 'g')

/**
* Strip slashes preceding occurences of <code>quote</code> from <code>str</code><br />
* Possible quotes are <code>"</code> and <code>'</code>.
* @function
* @param {string} str The string
* @param {string} quote The quote to unescape
* @return {string} The unescaped string
* @memberof core
* @private
* @since 1.0.0
*/
const unescapeQuotes = (str, quote) => str.replace(escapedQuotesRegexps[quote], quote)
const unescapeQuotes = (str, quote) => str.replace(new RegExp(`\\\\${quote}`, 'g'), quote)

/**
* Converts <code>str</code> to a slice index.
Expand All @@ -84,6 +65,15 @@ const toSliceIndex = str => str === '' ? undefined : Number(str)
*/
const isSliceIndex = arg => arg === undefined || Number.isSafeInteger(arg)

/**
* Tests whether <code>arg</code> is a valid slice index once converted to a number.
* @function
* @param {*} arg The value to test
* @return {boolean} True if <code>arg</code> is a valid slice index once converted to a number, false otherwise.
* @private
*/
const isSliceIndexString = arg => isSliceIndex(arg ? Number(arg) : undefined)

/**
* Wraps <code>fn</code> allowing to call it with an array instead of a string.<br />
* The returned function behaviour is :<br />
Expand All @@ -102,6 +92,50 @@ const allowingArrays = fn => arg => {
return fn(toString(arg))
}

const emptyStringParser = str => str.length === 0 ? [] : null

const quotedBracketNotationParser = map(
Copy link
Contributor

@frinyvonnick frinyvonnick Nov 29, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe we could test each parser so it will be easier to work on them individually later ?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea but could we do this in a later PR ? The current tests cover all of the code so I think it's OK to merge this as is.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with @nlepage. An issue could be filed to make sure it's not forgotten.

Copy link
Contributor

@frinyvonnick frinyvonnick Nov 29, 2017

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I opened an issue about this topic.

regexp(/^\[(['"])(.*?[^\\])\1\]?\.?(.*)$/),
([quote, property, rest]) => [unescapeQuotes(property, quote), ...stringToPath(rest)],
)

const incompleteQuotedBracketNotationParser = map(
regexp(/^\[["'](.*)$/),
([rest]) => rest ? [rest] : [],
)

const bareBracketNotationParser = map(
regexp(/^\[([^\]]*)\]\.?(.*)$/),
([property, rest]) => {
return isIndex(Number(property))
? [Number(property), ...stringToPath(rest)]
: [property, ...stringToPath(rest)]
},
)

const incompleteBareBracketNotationParser = map(
regexp(/^\[(.*)$/),
([rest]) => rest ? [rest] : [],
)

const sliceNotationParser = map(
filter(
regexp(/^\[([^:\]]*):([^:\]]*)\]\.?(.*)$/),
([sliceStart, sliceEnd]) => isSliceIndexString(sliceStart) && isSliceIndexString(sliceEnd),
),
([sliceStart, sliceEnd, rest]) => [[toSliceIndex(sliceStart), toSliceIndex(sliceEnd)], ...stringToPath(rest)],
)

const pathSegmentEndedByDotParser = map(
regexp(/^([^.[]*?)\.(.*)$/),
([beforeDot, afterDot]) => [beforeDot, ...stringToPath(afterDot)],
)

const pathSegmentEndedByBracketParser = map(
regexp(/^([^.[]*?)(\[.*)$/),
([beforeBracket, atBracket]) => [beforeBracket, ...stringToPath(atBracket)],
)

/**
* Converts <code>str</code> to a path represented as an array of keys.
* @function
Expand All @@ -111,141 +145,17 @@ const allowingArrays = fn => arg => {
* @private
* @since 1.0.0
*/
const stringToPath = str => {
const path = []
let index = 0

while (true) { // eslint-disable-line no-constant-condition
// Look for new dot or opening square bracket
const nextPointIndex = str.indexOf('.', index)
const nextBracketIndex = str.indexOf('[', index)

// If neither one is found add the end of str to the path and stop
if (nextPointIndex === -1 && nextBracketIndex === -1) {
path.push(str.substring(index))
break
}

let isArrayNotation = false

// If a dot is found before an opening square bracket
if (nextPointIndex !== -1 && (nextBracketIndex === -1 || nextPointIndex < nextBracketIndex)) {
// Add the text preceding the dot to the path and move index after the dot
path.push(str.substring(index, nextPointIndex))
index = nextPointIndex + 1

// If an opening square bracket follows the dot,
// enable array notation and move index after the bracket
if (nextBracketIndex === nextPointIndex + 1) {
isArrayNotation = true
index = nextBracketIndex + 1
}

// If an opening square bracket is found before a dot
} else if (nextBracketIndex !== -1) {
// Enable array notation
isArrayNotation = true

// If any text precedes the bracket, add it to the path
if (nextBracketIndex !== index)
path.push(str.substring(index, nextBracketIndex))

// Move index after the bracket
index = nextBracketIndex + 1
}

// If array notation is enabled
if (isArrayNotation) {
// Check if next character is a string quote
const { quoted, quote } = isQuoteChar(str, index)

// If array index is a quoted string
if (quoted) {
// Move index after the string quote
index++

// Look for the next unescaped matching string quote
let endQuoteIndex, quotedIndex = index
do {
endQuoteIndex = str.indexOf(quote, quotedIndex)
quotedIndex = endQuoteIndex + 1
} while (endQuoteIndex !== -1 && str.charAt(endQuoteIndex - 1) === '\\')

// If no end quote found, stop if end of str is reached, or continue to next iteration
if (endQuoteIndex === -1) {
if (index !== str.length) path.push(str.substring(index))
break
}

// Add the content of quotes to the path, unescaping escaped quotes
path.push(unescapeQuotes(str.substring(index, endQuoteIndex), quote))

// Move index after end quote
index = endQuoteIndex + 1

// If next character is a closing square bracket, move index after it
if (str.charAt(index) === ']') index++

// Stop if end of str has been reached
if (index === str.length) break

// If next character is a dot, move index after it (skip it)
if (str.charAt(index) === '.') index++

} else { // If array index is not a quoted string

// Look for the closing square bracket
const closingBracketIndex = str.indexOf(']', index)

// If no closing bracket found, stop if end of str is reached, or continue to next iteration
if (closingBracketIndex === -1) {
if (index !== str.length) path.push(str.substring(index))
break
}

// Fetch the content of brackets and move index after closing bracket
const arrayIndexValue = str.substring(index, closingBracketIndex)
index = closingBracketIndex + 1

// If next character is a dot, move index after it (skip it)
if (str.charAt(index) === '.') index++

// Shorthand: if array index is the whole slice add it to path
if (arrayIndexValue === ':') {
path.push([undefined, undefined])
} else {

// Look for a slice quote
const sliceDelimIndex = arrayIndexValue.indexOf(':')

// If no slice quote found
if (sliceDelimIndex === -1) {
// Parse array index as a number
const nArrayIndexValue = Number(arrayIndexValue)

// Add array index to path, either as a valid index (positive int), or as a string
path.push(isIndex(nArrayIndexValue) ? nArrayIndexValue : arrayIndexValue)

} else { // If a slice quote is found

// Fetch slice start and end, and parse them as slice indexes (empty or valid int)
const sliceStart = arrayIndexValue.substring(0, sliceDelimIndex), sliceEnd = arrayIndexValue.substring(sliceDelimIndex + 1)
const nSliceStart = toSliceIndex(sliceStart), nSliceEnd = toSliceIndex(sliceEnd)

// Add array index to path, as a slice if both slice indexes are valid (undefined or int), or as a string
path.push(isSliceIndex(nSliceStart) && isSliceIndex(nSliceEnd) ? [nSliceStart, nSliceEnd] : arrayIndexValue)
}
}

// Stop if end of string has been reached
if (index === str.length) break
}
}

}

return path
}
const stringToPath = race([
emptyStringParser,
quotedBracketNotationParser,
incompleteQuotedBracketNotationParser,
sliceNotationParser,
bareBracketNotationParser,
incompleteBareBracketNotationParser,
pathSegmentEndedByDotParser,
pathSegmentEndedByBracketParser,
str => [str],
])

const MAX_CACHE_SIZE = 1000
const cache = new Map()
Expand Down
4 changes: 2 additions & 2 deletions packages/immutadot/src/core/toPath.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ describe('ToPath', () => {
it('should convert basic path', () => {
expect(toPath('a.22.ccc')).toEqual(['a', '22', 'ccc'])
// Empty properties should be kept
expect(toPath('.')).toEqual(['', ''])
expect(toPath('..')).toEqual(['', '', ''])
expect(toPath('.')).toEqual([''])
expect(toPath('..')).toEqual(['', ''])
// If no separators, path should be interpreted as one property
expect(toPath('\']"\\')).toEqual(['\']"\\'])
})
Expand Down