From 9ab44fb55e3ec755bdb8dd8bf0ffb7a62124ebe8 Mon Sep 17 00:00:00 2001 From: Serge Klochkov <3175289+slvrtrn@users.noreply.github.com> Date: Mon, 23 Sep 2024 20:04:49 +0200 Subject: [PATCH] Expose `parseColumnType` function (#316) --- .github/workflows/scorecard.yml | 22 +- CHANGELOG.md | 56 ++ .../integration/abort_request.test.ts | 1 - .../integration/error_parsing.test.ts | 5 +- .../__tests__/unit/parse_column_types.test.ts | 56 ++ .../unit/parse_column_types_array.test.ts | 308 ++++++++ .../unit/parse_column_types_datetime.test.ts | 113 +++ .../unit/parse_column_types_decimal.test.ts | 103 +++ .../unit/parse_column_types_enum.test.ts | 89 +++ .../unit/parse_column_types_map.test.ts | 41 + .../unit/parse_column_types_nullable.test.ts | 266 +++++++ .../unit/parse_column_types_tuple.test.ts | 164 ++++ .../__tests__/utils/native_columns.ts | 124 +++ packages/client-common/src/index.ts | 15 + .../client-common/src/parse/column_types.ts | 723 ++++++++++++++++++ packages/client-common/src/parse/index.ts | 1 + packages/client-common/src/version.ts | 2 +- packages/client-node/src/index.ts | 14 + packages/client-node/src/version.ts | 2 +- packages/client-web/src/index.ts | 14 + packages/client-web/src/version.ts | 2 +- 21 files changed, 2110 insertions(+), 11 deletions(-) create mode 100644 packages/client-common/__tests__/unit/parse_column_types.test.ts create mode 100644 packages/client-common/__tests__/unit/parse_column_types_array.test.ts create mode 100644 packages/client-common/__tests__/unit/parse_column_types_datetime.test.ts create mode 100644 packages/client-common/__tests__/unit/parse_column_types_decimal.test.ts create mode 100644 packages/client-common/__tests__/unit/parse_column_types_enum.test.ts create mode 100644 packages/client-common/__tests__/unit/parse_column_types_map.test.ts create mode 100644 packages/client-common/__tests__/unit/parse_column_types_nullable.test.ts create mode 100644 packages/client-common/__tests__/unit/parse_column_types_tuple.test.ts create mode 100644 packages/client-common/__tests__/utils/native_columns.ts create mode 100644 packages/client-common/src/parse/column_types.ts create mode 100644 packages/client-common/src/parse/index.ts diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 294bee6..4cc2a2c 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -12,7 +12,19 @@ on: schedule: - cron: '43 12 * * 6' push: - branches: [ "main" ] + branches: + - main + paths-ignore: + - '**/*.md' + - 'LICENSE' + - 'benchmarks/**' + - 'examples/**' + pull_request: + paths-ignore: + - '**/*.md' + - 'LICENSE' + - 'benchmarks/**' + - 'examples/**' workflow_dispatch: # Declare default permissions as read only. @@ -32,12 +44,12 @@ jobs: # actions: read steps: - - name: "Checkout code" + - name: 'Checkout code' uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 with: persist-credentials: false - - name: "Run analysis" + - name: 'Run analysis' uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1 with: results_file: results.sarif @@ -59,7 +71,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - - name: "Upload artifact" + - name: 'Upload artifact' uses: actions/upload-artifact@97a0fba1372883ab732affbe8f94b823f91727db # v3.pre.node20 with: name: SARIF file @@ -68,7 +80,7 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - - name: "Upload to code-scanning" + - name: 'Upload to code-scanning' uses: github/codeql-action/upload-sarif@v3 with: sarif_file: results.sarif diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f2ee2b..6b9024f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,59 @@ +# 1.7.0 (Common, Node.js, Web) + +- (Experimental) Exposed the `parseColumnType` function that takes a string representation of a ClickHouse type (e.g., `FixedString(16)`, `Nullable(Int32)`, etc.) and returns an AST-like object that represents the type. For example: + + ```ts + for (const type of [ + 'Int32', + 'Array(Nullable(String))', + `Map(Int32, DateTime64(9, 'UTC'))`, + ]) { + console.log(`##### Source ClickHouse type: ${type}`) + console.log(parseColumnType(type)) + } + ``` + + The above code will output: + + ``` + ##### Source ClickHouse type: Int32 + { type: 'Simple', columnType: 'Int32', sourceType: 'Int32' } + ##### Source ClickHouse type: Array(Nullable(String)) + { + type: 'Array', + value: { + type: 'Nullable', + sourceType: 'Nullable(String)', + value: { type: 'Simple', columnType: 'String', sourceType: 'String' } + }, + dimensions: 1, + sourceType: 'Array(Nullable(String))' + } + ##### Source ClickHouse type: Map(Int32, DateTime64(9, 'UTC')) + { + type: 'Map', + key: { type: 'Simple', columnType: 'Int32', sourceType: 'Int32' }, + value: { + type: 'DateTime64', + timezone: 'UTC', + precision: 9, + sourceType: "DateTime64(9, 'UTC')" + }, + sourceType: "Map(Int32, DateTime64(9, 'UTC'))" + } + ``` + + While the original intention was to use this function internally for `Native`/`RowBinaryWithNamesAndTypes` data formats headers parsing, it can be useful for other purposes as well (e.g., interfaces generation, or custom JSON serializers). + + NB: currently unsupported source types to parse: + + - Geo + - (Simple)AggregateFunction + - Nested + - Old/new experimental JSON + - Dynamic + - Variant + # 1.6.0 (Common, Node.js, Web) ## New features diff --git a/packages/client-common/__tests__/integration/abort_request.test.ts b/packages/client-common/__tests__/integration/abort_request.test.ts index 92b1543..efefaf5 100644 --- a/packages/client-common/__tests__/integration/abort_request.test.ts +++ b/packages/client-common/__tests__/integration/abort_request.test.ts @@ -7,7 +7,6 @@ describe('abort request', () => { beforeEach(() => { client = createTestClient() }) - afterEach(async () => { await client.close() }) diff --git a/packages/client-common/__tests__/integration/error_parsing.test.ts b/packages/client-common/__tests__/integration/error_parsing.test.ts index 7cb3dc1..542e77a 100644 --- a/packages/client-common/__tests__/integration/error_parsing.test.ts +++ b/packages/client-common/__tests__/integration/error_parsing.test.ts @@ -14,9 +14,10 @@ describe('ClickHouse server errors parsing', () => { // Possible error messages here: // (since 24.3+, Cloud SMT): Unknown expression identifier 'number' in scope SELECT number AS FR // (since 23.8+, Cloud RMT): Missing columns: 'number' while processing query: 'SELECT number AS FR', required columns: 'number' + // (since 24.9+): Unknown expression identifier `number` in scope SELECT number AS FR const errorMessagePattern = `((?:Missing columns: 'number' while processing query: 'SELECT number AS FR', required columns: 'number')|` + - `(?:Unknown expression identifier 'number' in scope SELECT number AS FR))` + `(?:Unknown expression identifier ('|\`)number('|\`) in scope SELECT number AS FR))` await expectAsync( client.query({ query: 'SELECT number FR', @@ -37,7 +38,7 @@ describe('ClickHouse server errors parsing', () => { const dbName = getTestDatabaseName() const errorMessagePattern = `((?:^Table ${dbName}.unknown_table does not exist.*)|` + - `(?:Unknown table expression identifier 'unknown_table' in scope))` + `(?:Unknown table expression identifier ('|\`)unknown_table('|\`) in scope))` await expectAsync( client.query({ query: 'SELECT * FROM unknown_table', diff --git a/packages/client-common/__tests__/unit/parse_column_types.test.ts b/packages/client-common/__tests__/unit/parse_column_types.test.ts new file mode 100644 index 0000000..2a68eb6 --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types.test.ts @@ -0,0 +1,56 @@ +import { parseFixedStringType } from '../../src/parse' + +describe('Columns types parser', () => { + describe('FixedString', () => { + it('should parse FixedString', async () => { + const args: [string, number][] = [ + ['FixedString(1)', 1], + ['FixedString(42)', 42], + ['FixedString(100)', 100], + ['FixedString(32768)', 32768], + ] + args.forEach(([columnType, sizeBytes]) => { + const result = parseFixedStringType({ + columnType, + sourceType: columnType, + }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as a FixedString with size ${sizeBytes}`, + ) + .toEqual({ type: 'FixedString', sizeBytes, sourceType: columnType }) + }) + }) + + it('should throw on invalid FixedString type', async () => { + const args: [string][] = [ + ['FixedString'], + ['FixedString('], + ['FixedString()'], + ['String'], + ] + args.forEach(([columnType]) => { + expect(() => + parseFixedStringType({ columnType, sourceType: columnType }), + ) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid FixedString type') + }) + }) + + it('should throw on invalid FixedString size', async () => { + const args: [string][] = [ + ['FixedString(0)'], + ['FixedString(x)'], + [`FixedString(')`], + ] + args.forEach(([columnType]) => { + expect(() => + parseFixedStringType({ columnType, sourceType: columnType }), + ) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid FixedString size in bytes') + }) + }) + }) +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types_array.test.ts b/packages/client-common/__tests__/unit/parse_column_types_array.test.ts new file mode 100644 index 0000000..acb7d76 --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types_array.test.ts @@ -0,0 +1,308 @@ +import type { + ParsedColumnDateTime, + ParsedColumnDateTime64, + ParsedColumnEnum, + SimpleColumnType, +} from '../../src/parse' +import { parseArrayType } from '../../src/parse' + +describe('Columns types parser - Array', () => { + it('should parse Array with a simple value type', async () => { + type TestArgs = { + columnType: string + valueType: SimpleColumnType + dimensions: number + } + const args: TestArgs[] = [ + { + columnType: 'Array(String)', + valueType: 'String', + dimensions: 1, + }, + { + columnType: 'Array(UInt8)', + valueType: 'UInt8', + dimensions: 1, + }, + { + columnType: 'Array(Array(Int32))', + valueType: 'Int32', + dimensions: 2, + }, + { + columnType: 'Array(Array(Array(Date32)))', + valueType: 'Date32', + dimensions: 3, + }, + { + columnType: 'Array(Array(Array(Array(Float32))))', + valueType: 'Float32', + dimensions: 4, + }, + ] + args.forEach((args: TestArgs) => { + const { columnType, valueType, dimensions } = args + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${valueType} and ${dimensions} dimensions`, + ) + .toEqual({ + type: 'Array', + value: { + type: 'Simple', + columnType: valueType, + sourceType: valueType, // T + }, + sourceType: columnType, // Array(T) + dimensions, + }) + }) + }) + + it('should parse Array with Nullable', async () => { + type TestArgs = { + columnType: string + valueType: SimpleColumnType + dimensions: number + } + const args: TestArgs[] = [ + { + columnType: 'Array(Nullable(String))', + valueType: 'String', + dimensions: 1, + }, + { + columnType: 'Array(Array(Nullable(Int32)))', + valueType: 'Int32', + dimensions: 2, + }, + ] + args.forEach(({ columnType, valueType, dimensions }: TestArgs) => { + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${valueType} and ${dimensions} dimensions`, + ) + .toEqual({ + type: 'Array', + value: { + type: 'Nullable', + value: { + type: 'Simple', + columnType: valueType, + sourceType: valueType, // T + }, + sourceType: `Nullable(${valueType})`, // Nullable(T) + }, + sourceType: columnType, // Array(Nullable(T)) + dimensions, + }) + }) + }) + + it('should parse Array with Enum value type', async () => { + type TestArgs = { + value: ParsedColumnEnum + dimensions: number + columnType: string + } + const sourceEnum8 = `Enum8('foo' = 42)` + const valuesEnum8 = { 42: 'foo' } + const sourceEnum16 = `Enum16('bar' = 144, 'qaz' = 500)` + const valuesEnum16 = { + 144: 'bar', + 500: 'qaz', + } + const args: TestArgs[] = [ + { + value: { + type: 'Enum', + intSize: 8, + values: valuesEnum8, + sourceType: sourceEnum8, + }, + dimensions: 1, + columnType: `Array(${sourceEnum8})`, + }, + { + value: { + type: 'Enum', + intSize: 16, + values: valuesEnum16, + sourceType: sourceEnum16, + }, + dimensions: 1, + columnType: `Array(${sourceEnum16})`, + }, + { + value: { + type: 'Enum', + intSize: 8, + values: valuesEnum8, + sourceType: sourceEnum8, + }, + dimensions: 2, + columnType: `Array(Array(${sourceEnum8}))`, + }, + { + value: { + type: 'Enum', + intSize: 16, + values: valuesEnum16, + sourceType: sourceEnum16, + }, + dimensions: 3, + columnType: `Array(Array(Array(${sourceEnum16})))`, + }, + ] + args.forEach(({ columnType, dimensions, value }) => { + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions`, + ) + .toEqual({ + type: 'Array', + sourceType: columnType, + dimensions, + value, + }) + }) + }) + + it('should parse Array of DateTime', async () => { + type TestArgs = { + value: ParsedColumnDateTime + dimensions: number + columnType: string + } + const args: TestArgs[] = [ + { + value: { + type: 'DateTime', + timezone: null, + sourceType: 'DateTime', + }, + dimensions: 1, + columnType: 'Array(DateTime)', + }, + { + value: { + type: 'DateTime', + timezone: 'UTC', + sourceType: `DateTime('UTC')`, + }, + dimensions: 1, + columnType: `Array(DateTime('UTC'))`, + }, + { + value: { + type: 'DateTime', + timezone: 'Etc/GMT-5', + sourceType: `DateTime('Etc/GMT-5')`, + }, + dimensions: 2, + columnType: `Array(Array(DateTime('Etc/GMT-5')))`, + }, + ] + args.forEach(({ columnType, dimensions, value }) => { + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions`, + ) + .toEqual({ + type: 'Array', + sourceType: columnType, + dimensions, + value, + }) + }) + }) + + it('should parse Array of DateTime64', async () => { + type TestArgs = { + value: ParsedColumnDateTime64 + dimensions: number + columnType: string + } + const args: TestArgs[] = [ + { + value: { + type: 'DateTime64', + timezone: null, + sourceType: 'DateTime64(0)', + precision: 0, + }, + dimensions: 1, + columnType: 'Array(DateTime64(0))', + }, + { + value: { + type: 'DateTime64', + timezone: 'UTC', + sourceType: `DateTime64(3, 'UTC')`, + precision: 3, + }, + dimensions: 1, + columnType: `Array(DateTime64(3, 'UTC'))`, + }, + { + value: { + type: 'DateTime64', + timezone: 'Etc/GMT-5', + sourceType: `DateTime64(6, 'Etc/GMT-5')`, + precision: 6, + }, + dimensions: 2, + columnType: `Array(Array(DateTime64(6, 'Etc/GMT-5')))`, + }, + { + value: { + type: 'DateTime64', + timezone: 'Europe/Sofia', + sourceType: `DateTime64(9, 'Europe/Sofia')`, + precision: 9, + }, + dimensions: 3, + columnType: `Array(Array(Array(DateTime64(9, 'Europe/Sofia'))))`, + }, + ] + + args.forEach(({ columnType, dimensions, value }) => { + const result = parseArrayType({ columnType, sourceType: columnType }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as an Array with value type ${value.sourceType} and ${dimensions} dimensions`, + ) + .toEqual({ + type: 'Array', + sourceType: columnType, + dimensions, + value, + }) + }) + }) + + // TODO: Map type test. + + it('should throw on invalid Array type', async () => { + // Array(Int8) is the shortest valid definition + const args = [ + ['Array'], + ['Array('], + ['Array()'], + ['Array(a'], + ['Array(ab'], + ['Array(ab)'], + ['Array(abc)'], + ['String'], + ] + args.forEach(([columnType]) => { + expect(() => parseArrayType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Array type') + }) + }) +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types_datetime.test.ts b/packages/client-common/__tests__/unit/parse_column_types_datetime.test.ts new file mode 100644 index 0000000..6fc0c00 --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types_datetime.test.ts @@ -0,0 +1,113 @@ +import { parseDateTime64Type, parseDateTimeType } from '../../src/parse' + +describe('Columns types parser - DateTime and DateTime64', () => { + describe('DateTime', () => { + it('should parse DateTime', async () => { + const args: [string, string | null][] = [ + ['DateTime', null], + [`DateTime('GB')`, 'GB'], + [`DateTime('UTC')`, 'UTC'], + [`DateTime('Europe/Amsterdam')`, 'Europe/Amsterdam'], + ] + args.forEach(([columnType, timezone]) => { + const result = parseDateTimeType({ columnType, sourceType: columnType }) + expect(result) + .withContext(`Expected ${columnType} to be parsed as a DateTime`) + .toEqual({ type: 'DateTime', sourceType: columnType, timezone }) + }) + }) + + it('should throw on invalid DateTime', async () => { + // DateTime('GB') has the least amount of chars allowed for a valid DateTime type. + const args: [string][] = [ + ['DateTime()'], + [`DateTime(')`], + [`DateTime('')`], + [`DateTime('A')`], + ['String'], + ] + args.forEach(([columnType]) => { + expect(() => parseDateTimeType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid DateTime type') + }) + }) + }) + + describe('DateTime64', () => { + const precisionRange = [...Array(10).keys()] // 0..9 + + it('should parse DateTime64 without timezone', async () => { + const args: [string, number][] = precisionRange.map((precision) => [ + `DateTime64(${precision})`, + precision, + ]) + args.forEach(([columnType, precision]) => { + const result = parseDateTime64Type({ + columnType, + sourceType: columnType, + }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as a DateTime64 with precision ${precision}`, + ) + .toEqual({ + type: 'DateTime64', + timezone: null, + sourceType: columnType, + precision, + }) + }) + }) + + it('should parse DateTime64 with timezone', async () => { + const allPrecisionArgs: [string, number, string][][] = precisionRange.map( + (precision) => [ + [`DateTime64(${precision}, 'GB')`, precision, 'GB'], + [`DateTime64(${precision}, 'UTC')`, precision, 'UTC'], + [`DateTime64(${precision}, 'Etc/GMT-5')`, precision, 'Etc/GMT-5'], + ], + ) + allPrecisionArgs.forEach((args) => + args.forEach(([columnType, precision, timezone]) => { + const result = parseDateTime64Type({ + columnType, + sourceType: columnType, + }) + expect(result) + .withContext( + `Expected ${columnType} to be parsed as a DateTime64 with precision ${precision} and timezone ${timezone}`, + ) + .toEqual({ + type: 'DateTime64', + sourceType: columnType, + timezone, + precision, + }) + }), + ) + }) + + it('should throw on invalid DateTime64 type', async () => { + const args = [['DateTime64('], ['DateTime64()'], ['String']] + args.forEach(([columnType]) => { + expect(() => + parseDateTime64Type({ columnType, sourceType: columnType }), + ) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid DateTime64 type') + }) + }) + + it('should throw on invalid DateTime64 precision', async () => { + const args = [[`DateTime64(')`], [`DateTime64(foo)`]] + args.forEach(([columnType]) => { + expect(() => + parseDateTime64Type({ columnType, sourceType: columnType }), + ) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid DateTime64 precision') + }) + }) + }) +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types_decimal.test.ts b/packages/client-common/__tests__/unit/parse_column_types_decimal.test.ts new file mode 100644 index 0000000..f0010db --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types_decimal.test.ts @@ -0,0 +1,103 @@ +import { parseDecimalType } from '../../src/parse' + +describe('Columns types parser - Decimal', () => { + type TestArgs = { + sourceType: string + precision: number + scale: number + intSize: 32 | 64 | 128 | 256 + } + + it('should parse Decimal', async () => { + const args: TestArgs[] = [ + { + sourceType: 'Decimal(7, 2)', + precision: 7, + scale: 2, + intSize: 32, + }, + { + sourceType: 'Decimal(12, 4)', + precision: 12, + scale: 4, + intSize: 64, + }, + { + sourceType: 'Decimal(27, 6)', + precision: 27, + scale: 6, + intSize: 128, + }, + { + sourceType: 'Decimal(42, 8)', + precision: 42, + scale: 8, + intSize: 256, + }, + ] + args.forEach(({ sourceType, precision, scale, intSize }) => { + const result = parseDecimalType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to be parsed as a Decimal with precision ${precision}, scale ${scale} and intSize ${intSize}`, + ) + .toEqual({ + type: 'Decimal', + params: { precision, scale, intSize }, + sourceType, + }) + }) + }) + + it('should throw on invalid Decimal type', async () => { + const args: [string][] = [ + ['Decimal'], + ['Decimal('], + ['Decimal()'], + ['Decimal(1)'], + ['Decimal(1,)'], + ['Decimal(1, )'], + ['String'], + ] + args.forEach(([columnType]) => { + expect(() => parseDecimalType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Decimal type') + }) + }) + + it('should throw on invalid Decimal precision', async () => { + const args: [string][] = [ + ['Decimal(0, 0)'], + ['Decimal(x, 0)'], + [`Decimal(', ')`], + [`Decimal(77, 1)`], // max is 76 + ] + args.forEach(([columnType]) => { + expect(() => parseDecimalType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Decimal precision') + }) + }) + + it('should throw on invalid Decimal scale', async () => { + const args: [string][] = [ + ['Decimal(1, 2)'], // scale should be less than precision + ['Decimal(1, x)'], + [`Decimal(42, ,)`], + [`Decimal(42, ')`], + ] + args.forEach(([columnType]) => { + expect(() => parseDecimalType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Decimal scale') + }) + }) + + it('should throw when precision or scale cannot be parsed', async () => { + const columnType = 'Decimal(foobar)' + expect(() => + parseDecimalType({ columnType, sourceType: columnType }), + ).toThrowError('Expected Decimal type to have both precision and scale') + }) +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types_enum.test.ts b/packages/client-common/__tests__/unit/parse_column_types_enum.test.ts new file mode 100644 index 0000000..a3ac56c --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types_enum.test.ts @@ -0,0 +1,89 @@ +import { enumTypes, parsedEnumTestArgs } from '@test/utils/native_columns' +import { parseEnumType } from '../../src/parse' + +describe('Columns types parser - Enum', () => { + it('should parse correct values', async () => { + parsedEnumTestArgs.forEach((expected) => { + const result = parseEnumType({ + sourceType: expected.sourceType, + columnType: expected.sourceType, + }) + expect(result) + .withContext( + `Expected ${ + expected.sourceType + } to be parsed as an Enum with intSize ${ + expected.intSize + } and values ${JSON.stringify(expected.values)}`, + ) + .toEqual(expected) + }) + }) + + it('should throw when the type is not a valid enum', async () => { + const args: [string][] = [ + ['Enum'], // should be either 8 or 16 + ['Enum32'], + ['Enum64'], + ['String'], + ['Enum(String)'], + ] + args.forEach(([columnType]) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Expected Enum to be either Enum8 or Enum16') + }) + }) + + it('should throw when the values are not valid', async () => { + const args: [string][] = [["Enum8('a' = x)"], ["Enum16('foo' = 'bar')"]] + args.forEach(([columnType]) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Expected Enum index to be a valid number') + }) + }) + + it('should throw on duplicate indices', async () => { + const args: [string][] = [ + ["Enum8('a' = 0, 'b' = 0)"], + ["Enum8('a' = 0, 'b' = 1, 'c' = 1)"], + ] + args.forEach(([columnType]) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Duplicate Enum index') + }) + }) + + it('should throw on duplicate names', async () => { + const args: [string][] = [ + ["Enum8('a' = 0, 'a' = 1)"], + ["Enum8('a' = 0, 'b' = 1, 'b' = 2)"], + ] + args.forEach(([columnType]) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Duplicate Enum name') + }) + }) + + it('should throw when Enum has no values to parse', async () => { + // The minimal allowed Enum definition is Enum8('' = 0), i.e. 6 chars inside. + const allEnumTypeArgs: string[][] = enumTypes.map(([enumType]) => [ + `${enumType}()`, + `${enumType}(')`, + `${enumType}('')`, + `${enumType}('' )`, + `${enumType}('' =)`, + `${enumType}('' = )`, + ]) + allEnumTypeArgs.forEach((args) => + args.forEach((columnType) => { + expect(() => parseEnumType({ columnType, sourceType: columnType })) + .withContext(`Expected ${columnType} to throw`) + .toThrowError('Invalid Enum type values') + }), + ) + }) +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types_map.test.ts b/packages/client-common/__tests__/unit/parse_column_types_map.test.ts new file mode 100644 index 0000000..57b4b30 --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types_map.test.ts @@ -0,0 +1,41 @@ +import type { ParsedColumnMap } from '../../src/parse' +import { parseMapType } from '../../src/parse' + +describe('Columns types parser - Map', () => { + it('should parse Map with simple types', async () => { + const args: [ParsedColumnMap, string][] = [ + [ + { + type: 'Map', + key: { type: 'Simple', columnType: 'String', sourceType: 'String' }, + value: { type: 'Simple', columnType: 'UInt8', sourceType: 'UInt8' }, + sourceType: 'Map(String, UInt8)', + }, + 'Map(String, UInt8)', + ], + [ + { + type: 'Map', + key: { type: 'Simple', columnType: 'Int32', sourceType: 'Int32' }, + value: { + type: 'Simple', + columnType: 'Float32', + sourceType: 'Float32', + }, + sourceType: 'Map(Int32, Float32)', + }, + 'Map(Int32, Float32)', + ], + ] + args.forEach(([expected, sourceType]) => { + const result = parseMapType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to be parsed as a Map with key type ${expected.key.sourceType} and value type ${expected.value.sourceType}`, + ) + .toEqual(expected) + }) + }) + + // TODO: rest of the allowed types. +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types_nullable.test.ts b/packages/client-common/__tests__/unit/parse_column_types_nullable.test.ts new file mode 100644 index 0000000..daa9ca1 --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types_nullable.test.ts @@ -0,0 +1,266 @@ +import type { + ParsedColumnDateTime, + ParsedColumnDateTime64, + ParsedColumnDecimal, + ParsedColumnEnum, + ParsedColumnSimple, +} from '../../src/parse' +import { asNullableType } from '../../src/parse' + +describe('Columns types parser - Nullable', () => { + it('should wrap a simple type', async () => { + const args: [ParsedColumnSimple, string][] = [ + [ + { type: 'Simple', columnType: 'String', sourceType: 'String' }, + 'Nullable(String)', + ], + [ + { type: 'Simple', columnType: 'UInt8', sourceType: 'UInt8' }, + 'Nullable(UInt8)', + ], + [ + { type: 'Simple', columnType: 'Int32', sourceType: 'Int32' }, + 'Nullable(Int32)', + ], + [ + { type: 'Simple', columnType: 'Float32', sourceType: 'Float32' }, + 'Nullable(Float32)', + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext( + `Expected ${value.columnType} to be wrapped as ${sourceType}`, + ) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should wrap an Enum', async () => { + const sourceEnum8 = `Enum8('foo' = 42)` + const valuesEnum8 = { 42: 'foo' } + const sourceEnum16 = `Enum16('bar' = 144, 'qaz' = 500)` + const valuesEnum16 = { + 144: 'bar', + 500: 'qaz', + } + const args: [ParsedColumnEnum, string][] = [ + [ + { + type: 'Enum', + intSize: 8, + values: valuesEnum8, + sourceType: sourceEnum8, + }, + 'Nullable(Enum8)', + ], + [ + { + type: 'Enum', + intSize: 16, + values: valuesEnum16, + sourceType: sourceEnum16, + }, + 'Nullable(Enum16)', + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext(`Expected ${value.type} to be wrapped as ${sourceType}`) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should wrap a Decimal', async () => { + const args: [ParsedColumnDecimal, string][] = [ + [ + { + type: 'Decimal', + params: { intSize: 32, precision: 4, scale: 3 }, + sourceType: 'Decimal(4, 3)', + }, + 'Nullable(Decimal(4, 3))', + ], + [ + { + type: 'Decimal', + params: { intSize: 64, precision: 12, scale: 6 }, + sourceType: 'Decimal(12, 6)', + }, + 'Nullable(Decimal(12, 6))', + ], + [ + { + type: 'Decimal', + params: { intSize: 128, precision: 24, scale: 12 }, + sourceType: 'Decimal(24, 12)', + }, + 'Nullable(Decimal(24, 12))', + ], + [ + { + type: 'Decimal', + params: { intSize: 256, precision: 42, scale: 20 }, + sourceType: 'Decimal(42, 20)', + }, + 'Nullable(Decimal(42, 20))', + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext( + `Expected ${value.sourceType} to be wrapped as ${sourceType}`, + ) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should wrap a DateTime', async () => { + const args: [ParsedColumnDateTime, string][] = [ + [ + { type: 'DateTime', timezone: null, sourceType: 'DateTime' }, + 'Nullable(DateTime)', + ], + [ + { type: 'DateTime', timezone: 'UTC', sourceType: "DateTime('UTC')" }, + `Nullable(DateTime('UTC'))`, + ], + [ + { type: 'DateTime', timezone: 'GB', sourceType: "DateTime('GB')" }, + `Nullable(DateTime('GB'))`, + ], + [ + { + type: 'DateTime', + timezone: 'Etc/GMT-5', + sourceType: `DateTime('Etc/GMT-5')`, + }, + `Nullable(DateTime('Etc/GMT-5'))`, + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext( + `Expected ${value.sourceType} to be wrapped as ${sourceType}`, + ) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should wrap a DateTime64', async () => { + const args: [ParsedColumnDateTime64, string][] = [ + [ + { + type: 'DateTime64', + timezone: null, + sourceType: 'DateTime64(0)', + precision: 3, + }, + 'Nullable(DateTime64(0))', + ], + [ + { + type: 'DateTime64', + timezone: null, + sourceType: 'DateTime64(3)', + precision: 3, + }, + 'Nullable(DateTime64(3))', + ], + [ + { + type: 'DateTime64', + timezone: 'UTC', + sourceType: `DateTime64(3, 'UTC')`, + precision: 3, + }, + `Nullable(DateTime64(3, 'UTC'))`, + ], + [ + { + type: 'DateTime64', + timezone: 'GB', + sourceType: `DateTime64(6, 'GB')`, + precision: 6, + }, + `Nullable(DateTime64(6, 'GB'))`, + ], + [ + { + type: 'DateTime64', + timezone: 'Etc/GMT-5', + sourceType: `DateTime64(9, 'Etc/GMT-5')`, + precision: 9, + }, + `Nullable(DateTime64(9, 'Etc/GMT-5'))`, + ], + ] + args.forEach(([value, sourceType]) => { + const result = asNullableType(value, sourceType) + expect(result) + .withContext( + `Expected ${value.sourceType} to be wrapped as ${sourceType}`, + ) + .toEqual({ + type: 'Nullable', + sourceType, + value, + }) + }) + }) + + it('should throw in case of Array or Map', async () => { + const columnUInt8: ParsedColumnSimple = { + type: 'Simple', + columnType: 'UInt8', + sourceType: 'UInt8', + } + const columnString: ParsedColumnSimple = { + type: 'Simple', + columnType: 'String', + sourceType: 'String', + } + expect(() => + asNullableType( + { + type: 'Map', + key: columnUInt8, + value: columnString, + sourceType: 'Map(UInt8, String)', + }, + '...', + ), + ).toThrowError('Map cannot be Nullable') + expect(() => + asNullableType( + { + type: 'Array', + value: columnUInt8, + dimensions: 1, + sourceType: 'Array(UInt8)', + }, + '...', + ), + ).toThrowError('Array cannot be Nullable') + }) +}) diff --git a/packages/client-common/__tests__/unit/parse_column_types_tuple.test.ts b/packages/client-common/__tests__/unit/parse_column_types_tuple.test.ts new file mode 100644 index 0000000..2f100f8 --- /dev/null +++ b/packages/client-common/__tests__/unit/parse_column_types_tuple.test.ts @@ -0,0 +1,164 @@ +import { parsedEnumTestArgs } from '@test/utils/native_columns' +import type { + ParsedColumnDateTime, + ParsedColumnDateTime64, + ParsedColumnFixedString, + ParsedColumnSimple, + ParsedColumnTuple, +} from '../../src/parse' +import { parseTupleType } from '../../src/parse' + +describe('Columns types parser - Tuple', () => { + it('should parse Tuple with simple types', async () => { + const args: TestArgs[] = [ + { + sourceType: 'Tuple(String, UInt8)', + expected: { + type: 'Tuple', + elements: [ + { type: 'Simple', columnType: 'String', sourceType: 'String' }, + { type: 'Simple', columnType: 'UInt8', sourceType: 'UInt8' }, + ], + sourceType: 'Tuple(String, UInt8)', + }, + }, + { + sourceType: 'Tuple(Int32, Float32)', + expected: { + type: 'Tuple', + elements: [ + { type: 'Simple', columnType: 'Int32', sourceType: 'Int32' }, + { type: 'Simple', columnType: 'Float32', sourceType: 'Float32' }, + ], + sourceType: 'Tuple(Int32, Float32)', + }, + }, + ] + args.forEach(({ expected, sourceType }) => { + const result = parseTupleType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to have ${joinElements(expected)} elements`, + ) + .toEqual(expected) + }) + }) + + it('should parse Tuple with Decimals', async () => { + const args: TestArgs[] = [ + { + sourceType: 'Tuple(Decimal(7, 2), Decimal(18, 4))', + expected: { + type: 'Tuple', + elements: [ + { + type: 'Decimal', + sourceType: 'Decimal(7, 2)', + params: { precision: 7, scale: 2, intSize: 32 }, + }, + { + type: 'Decimal', + sourceType: 'Decimal(18, 4)', + params: { precision: 18, scale: 4, intSize: 64 }, + }, + ], + sourceType: 'Tuple(Decimal(7, 2), Decimal(18, 4))', + }, + }, + ] + args.forEach(({ expected, sourceType }) => { + const result = parseTupleType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to have ${joinElements(expected)} elements`, + ) + .toEqual(expected) + }) + }) + + it('should parse Tuple with Enums', async () => { + const args: TestArgs[] = parsedEnumTestArgs.map((enumElement) => { + // e.g. Tuple(String, Enum8('a' = 1)) + const sourceType = `Tuple(${stringElement.sourceType}, ${enumElement.sourceType})` + return { + sourceType, + expected: { + type: 'Tuple', + elements: [stringElement, enumElement], + sourceType, + }, + } + }) + args.forEach(({ expected, sourceType }) => { + const result = parseTupleType({ columnType: sourceType, sourceType }) + expect(result) + .withContext( + `Expected ${sourceType} to have ${joinElements(expected)} elements`, + ) + .toEqual(expected) + }) + }) + + it('should parse Tuple with FixedString/DateTime', async () => { + const fixedStringElement: ParsedColumnFixedString = { + type: 'FixedString', + sourceType: 'FixedString(16)', + sizeBytes: 16, + } + const dateTimeElement: ParsedColumnDateTime = { + type: 'DateTime', + timezone: null, + sourceType: 'DateTime', + } + const dateTimeWithTimezoneElement: ParsedColumnDateTime = { + type: 'DateTime', + timezone: 'Europe/Amsterdam', + sourceType: `DateTime('Europe/Amsterdam')`, + } + const dateTime64Element: ParsedColumnDateTime64 = { + type: 'DateTime64', + timezone: null, + precision: 3, + sourceType: 'DateTime64(3)', + } + const dateTime64WithTimezoneElement: ParsedColumnDateTime64 = { + type: 'DateTime64', + timezone: 'Europe/Amsterdam', + precision: 9, + sourceType: `DateTime64(9, 'Europe/Amsterdam')`, + } + const elements = [ + fixedStringElement, + dateTimeElement, + dateTimeWithTimezoneElement, + dateTime64Element, + dateTime64WithTimezoneElement, + ] + const elementsSourceTypes = elements.map((el) => el.sourceType).join(', ') + const sourceType = `Tuple(${elementsSourceTypes})` + const expected: ParsedColumnTuple = { + type: 'Tuple', + elements, + sourceType, + } + const result = parseTupleType({ columnType: sourceType, sourceType }) + expect(result).toEqual(expected) + }) + + // TODO: Simple types permutations, Nullable, Arrays, Maps, Nested Tuples + + const stringElement: ParsedColumnSimple = { + type: 'Simple', + sourceType: 'String', + columnType: 'String', + } +}) + +function joinElements(expected: ParsedColumnTuple) { + return expected.elements.map((el) => el.sourceType).join(', ') +} + +type TestArgs = { + sourceType: string + expected: ParsedColumnTuple +} diff --git a/packages/client-common/__tests__/utils/native_columns.ts b/packages/client-common/__tests__/utils/native_columns.ts new file mode 100644 index 0000000..61761b2 --- /dev/null +++ b/packages/client-common/__tests__/utils/native_columns.ts @@ -0,0 +1,124 @@ +import type { ParsedColumnEnum } from '../../src/parse' + +export const enumTypes: ['Enum8' | 'Enum16', 8 | 16][] = [ + ['Enum8', 8], + ['Enum16', 16], +] + +export const parsedEnumTestArgs: ParsedColumnEnum[] = enumTypes.flatMap( + ([enumType, intSize]) => [ + { + type: 'Enum', + sourceType: `${enumType}('a' = 1)`, + values: { + 1: 'a', + } as Record, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('a' = 0, 'b' = 2)`, + values: { + 0: 'a', + 2: 'b', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('a' = 1, 'b' = 2, 'c' = 42)`, + values: { + 1: 'a', + 2: 'b', + 42: 'c', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('f\\'' = 1, 'x =' = 2, 'b\\'\\'\\'' = 3, '\\'c=4=' = 42, '4' = 100)`, + values: { + 1: "f\\'", + 2: 'x =', + 3: "b\\'\\'\\'", + 42: "\\'c=4=", + 100: '4', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('f\\'()' = 1)`, + values: { + 1: "f\\'()", + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('\\'' = 0)`, + values: { + 0: `\\'`, + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('' = 0)`, + values: { + 0: '', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('' = 42)`, + values: { + 42: '', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('foo' = 1, '' = 42)`, + values: { + 1: 'foo', + 42: '', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('' = 0, 'foo' = 42)`, + values: { + 0: '', + 42: 'foo', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('(' = 1)`, + values: { + 1: '(', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}(')' = 1)`, + values: { + 1: ')', + }, + intSize, + }, + { + type: 'Enum', + sourceType: `${enumType}('()' = 1)`, + values: { + 1: '()', + }, + intSize, + }, + ], +) diff --git a/packages/client-common/src/index.ts b/packages/client-common/src/index.ts index 881b11e..b7eb270 100644 --- a/packages/client-common/src/index.ts +++ b/packages/client-common/src/index.ts @@ -57,6 +57,21 @@ export { type MergeTreeSettings, SettingsMap, } from './settings' +export type { + SimpleColumnType, + ParsedColumnSimple, + ParsedColumnEnum, + ParsedColumnFixedString, + ParsedColumnNullable, + ParsedColumnDecimal, + ParsedColumnDateTime, + ParsedColumnDateTime64, + ParsedColumnArray, + ParsedColumnTuple, + ParsedColumnMap, + ParsedColumnType, +} from './parse' +export { SimpleColumnTypes, parseColumnType } from './parse' /** For implementations usage only - should not be re-exported */ export { diff --git a/packages/client-common/src/parse/column_types.ts b/packages/client-common/src/parse/column_types.ts new file mode 100644 index 0000000..ee29475 --- /dev/null +++ b/packages/client-common/src/parse/column_types.ts @@ -0,0 +1,723 @@ +export class ColumnTypeParseError extends Error { + readonly args: Record + constructor(message: string, args?: Record) { + super(message) + this.args = args ?? {} + + // Set the prototype explicitly, see: + // https://github.com/Microsoft/TypeScript/wiki/Breaking-Changes#extending-built-ins-like-error-array-and-map-may-no-longer-work + Object.setPrototypeOf(this, ColumnTypeParseError.prototype) + } +} + +export const SimpleColumnTypes = [ + 'Bool', + 'UInt8', + 'Int8', + 'UInt16', + 'Int16', + 'UInt32', + 'Int32', + 'UInt64', + 'Int64', + 'UInt128', + 'Int128', + 'UInt256', + 'Int256', + 'Float32', + 'Float64', + 'String', + 'UUID', + 'Date', + 'Date32', + 'IPv4', + 'IPv6', +] as const +export type SimpleColumnType = (typeof SimpleColumnTypes)[number] + +export interface ParsedColumnSimple { + type: 'Simple' + /** Without LowCardinality and Nullable. For example: + * * UInt8 -> UInt8 + * * LowCardinality(Nullable(String)) -> String */ + columnType: SimpleColumnType + /** The original type before parsing. */ + sourceType: string +} + +export interface ParsedColumnFixedString { + type: 'FixedString' + sizeBytes: number + sourceType: string +} + +export interface ParsedColumnDateTime { + type: 'DateTime' + timezone: string | null + sourceType: string +} + +export interface ParsedColumnDateTime64 { + type: 'DateTime64' + timezone: string | null + /** Valid range: [0 : 9] */ + precision: number + sourceType: string +} + +export interface ParsedColumnEnum { + type: 'Enum' + /** Index to name */ + values: Record + /** UInt8 or UInt16 */ + intSize: 8 | 16 + sourceType: string +} + +/** Int size for Decimal depends on the Precision + * * 32 bits for precision < 10 + * * 64 bits for precision < 19 + * * 128 bits for precision < 39 + * * 256 bits for precision >= 39 + */ +export interface DecimalParams { + precision: number + scale: number + intSize: 32 | 64 | 128 | 256 +} +export interface ParsedColumnDecimal { + type: 'Decimal' + params: DecimalParams + sourceType: string +} + +/** Tuple, Array or Map itself cannot be Nullable */ +export interface ParsedColumnNullable { + type: 'Nullable' + value: + | ParsedColumnSimple + | ParsedColumnEnum + | ParsedColumnDecimal + | ParsedColumnFixedString + | ParsedColumnDateTime + | ParsedColumnDateTime64 + sourceType: string +} + +/** Array cannot be Nullable or LowCardinality, but its value type can be. + * Arrays can be multidimensional, e.g. Array(Array(Array(T))). + * Arrays are allowed to have a Map as the value type. + */ +export interface ParsedColumnArray { + type: 'Array' + value: + | ParsedColumnNullable + | ParsedColumnSimple + | ParsedColumnFixedString + | ParsedColumnDecimal + | ParsedColumnEnum + | ParsedColumnMap + | ParsedColumnDateTime + | ParsedColumnDateTime64 + | ParsedColumnTuple + /** Array(T) = 1 dimension, Array(Array(T)) = 2, etc. */ + dimensions: number + sourceType: string +} + +/** @see https://clickhouse.com/docs/en/sql-reference/data-types/map */ +export interface ParsedColumnMap { + type: 'Map' + /** Possible key types: + * - String, Integer, UUID, Date, Date32, etc ({@link ParsedColumnSimple}) + * - FixedString + * - DateTime + * - Enum + */ + key: + | ParsedColumnSimple + | ParsedColumnFixedString + | ParsedColumnEnum + | ParsedColumnDateTime + /** Value types are arbitrary, including Map, Array, and Tuple. */ + value: ParsedColumnType + sourceType: string +} + +export interface ParsedColumnTuple { + type: 'Tuple' + /** Element types are arbitrary, including Map, Array, and Tuple. */ + elements: ParsedColumnType[] + sourceType: string +} + +export type ParsedColumnType = + | ParsedColumnSimple + | ParsedColumnEnum + | ParsedColumnFixedString + | ParsedColumnNullable + | ParsedColumnDecimal + | ParsedColumnDateTime + | ParsedColumnDateTime64 + | ParsedColumnArray + | ParsedColumnTuple + | ParsedColumnMap + +/** + * @experimental - incomplete, unstable API; + * originally intended to be used for RowBinary/Native header parsing internally. + * Currently unsupported source types: + * * Geo + * * (Simple)AggregateFunction + * * Nested + * * Old/new JSON + * * Dynamic + * * Variant + */ +export function parseColumnType(sourceType: string): ParsedColumnType { + let columnType = sourceType + let isNullable = false + if (columnType.startsWith(LowCardinalityPrefix)) { + columnType = columnType.slice(LowCardinalityPrefix.length, -1) + } + if (columnType.startsWith(NullablePrefix)) { + columnType = columnType.slice(NullablePrefix.length, -1) + isNullable = true + } + let result: ParsedColumnType + if ((SimpleColumnTypes as unknown as string[]).includes(columnType)) { + result = { + type: 'Simple', + columnType: columnType as SimpleColumnType, + sourceType, + } + } else if (columnType.startsWith(DecimalPrefix)) { + result = parseDecimalType({ + sourceType, + columnType, + }) + } else if (columnType.startsWith(DateTime64Prefix)) { + result = parseDateTime64Type({ sourceType, columnType }) + } else if (columnType.startsWith(DateTimePrefix)) { + result = parseDateTimeType({ sourceType, columnType }) + } else if (columnType.startsWith(FixedStringPrefix)) { + result = parseFixedStringType({ sourceType, columnType }) + } else if ( + columnType.startsWith(Enum8Prefix) || + columnType.startsWith(Enum16Prefix) + ) { + result = parseEnumType({ sourceType, columnType }) + } else if (columnType.startsWith(ArrayPrefix)) { + result = parseArrayType({ sourceType, columnType }) + } else if (columnType.startsWith(MapPrefix)) { + result = parseMapType({ sourceType, columnType }) + } else if (columnType.startsWith(TuplePrefix)) { + result = parseTupleType({ sourceType, columnType }) + } else { + throw new ColumnTypeParseError('Unsupported column type', { columnType }) + } + if (isNullable) { + return asNullableType(result, sourceType) + } else { + return result + } +} + +export function parseDecimalType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnDecimal { + if ( + !columnType.startsWith(DecimalPrefix) || + columnType.length < DecimalPrefix.length + 5 // Decimal(1, 0) is the shortest valid definition + ) { + throw new ColumnTypeParseError('Invalid Decimal type', { + sourceType, + columnType, + }) + } + const split = columnType.slice(DecimalPrefix.length, -1).split(', ') + if (split.length !== 2) { + throw new ColumnTypeParseError( + 'Expected Decimal type to have both precision and scale', + { + sourceType, + columnType, + split, + }, + ) + } + let intSize: DecimalParams['intSize'] = 32 + const precision = parseInt(split[0], 10) + if (Number.isNaN(precision) || precision < 1 || precision > 76) { + throw new ColumnTypeParseError('Invalid Decimal precision', { + columnType, + sourceType, + precision, + }) + } + const scale = parseInt(split[1], 10) + if (Number.isNaN(scale) || scale < 0 || scale > precision) { + throw new ColumnTypeParseError('Invalid Decimal scale', { + columnType, + sourceType, + precision, + scale, + }) + } + if (precision > 38) { + intSize = 256 + } else if (precision > 18) { + intSize = 128 + } else if (precision > 9) { + intSize = 64 + } + return { + type: 'Decimal', + params: { + precision, + scale, + intSize, + }, + sourceType, + } +} + +export function parseEnumType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnEnum { + let intSize: 8 | 16 + if (columnType.startsWith(Enum8Prefix)) { + columnType = columnType.slice(Enum8Prefix.length, -1) + intSize = 8 + } else if (columnType.startsWith(Enum16Prefix)) { + columnType = columnType.slice(Enum16Prefix.length, -1) + intSize = 16 + } else { + throw new ColumnTypeParseError( + 'Expected Enum to be either Enum8 or Enum16', + { + columnType, + sourceType, + }, + ) + } + // The minimal allowed Enum definition is Enum8('' = 0), i.e. 6 chars inside. + if (columnType.length < 6) { + throw new ColumnTypeParseError('Invalid Enum type values', { + columnType, + sourceType, + }) + } + + const names: string[] = [] + const indices: number[] = [] + let parsingName = true // false when parsing the index + let charEscaped = false // we should ignore escaped ticks + let startIndex = 1 // Skip the first ' + + // Should support the most complicated enums, such as Enum8('f\'' = 1, 'x =' = 2, 'b\'\'\'' = 3, '\'c=4=' = 42, '4' = 100) + for (let i = 1; i < columnType.length; i++) { + if (parsingName) { + if (charEscaped) { + charEscaped = false + } else { + if (columnType.charCodeAt(i) === BackslashASCII) { + charEscaped = true + } else if (columnType.charCodeAt(i) === SingleQuoteASCII) { + // non-escaped closing tick - push the name + const name = columnType.slice(startIndex, i) + if (names.includes(name)) { + throw new ColumnTypeParseError('Duplicate Enum name', { + columnType, + sourceType, + name, + names, + indices, + }) + } + names.push(name) + i += 4 // skip ` = ` and the first digit, as it will always have at least one. + startIndex = i + parsingName = false + } + } + } + // Parsing the index, skipping next iterations until the first non-digit one + else if ( + columnType.charCodeAt(i) < ZeroASCII || + columnType.charCodeAt(i) > NineASCII + ) { + pushEnumIndex(startIndex, i) + // the char at this index should be comma. + i += 2 // skip ` '`, but not the first char - ClickHouse allows something like Enum8('foo' = 0, '' = 42) + startIndex = i + 1 + parsingName = true + charEscaped = false + } + } + + // Push the last index + pushEnumIndex(startIndex, columnType.length) + if (names.length !== indices.length) { + throw new ColumnTypeParseError( + 'Expected Enum to have the same number of names and indices', + { columnType, sourceType, names, indices }, + ) + } + + const values: ParsedColumnEnum['values'] = {} + for (let i = 0; i < names.length; i++) { + values[indices[i]] = names[i] + } + return { + type: 'Enum', + values, + intSize, + sourceType, + } + + function pushEnumIndex(start: number, end: number) { + const index = parseInt(columnType.slice(start, end), 10) + if (Number.isNaN(index) || index < 0) { + throw new ColumnTypeParseError( + 'Expected Enum index to be a valid number', + { + columnType, + sourceType, + names, + indices, + index, + start, + end, + }, + ) + } + if (indices.includes(index)) { + throw new ColumnTypeParseError('Duplicate Enum index', { + columnType, + sourceType, + index, + names, + indices, + }) + } + indices.push(index) + } +} + +export function parseMapType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnMap { + if ( + !columnType.startsWith(MapPrefix) || + columnType.length < MapPrefix.length + 11 // the shortest definition seems to be Map(Int8, Int8) + ) { + throw new ColumnTypeParseError('Invalid Map type', { + columnType, + sourceType, + }) + } + columnType = columnType.slice(MapPrefix.length, -1) + const [keyType, valueType] = getElementsTypes({ columnType, sourceType }, 2) + const key = parseColumnType(keyType) + if ( + key.type === 'DateTime64' || + key.type === 'Nullable' || + key.type === 'Array' || + key.type === 'Map' || + key.type === 'Decimal' || + key.type === 'Tuple' + ) { + throw new ColumnTypeParseError('Invalid Map key type', { + key, + sourceType, + }) + } + const value = parseColumnType(valueType) + return { + type: 'Map', + key, + value, + sourceType, + } +} + +export function parseTupleType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnTuple { + if ( + !columnType.startsWith(TuplePrefix) || + columnType.length < TuplePrefix.length + 5 // Tuple(Int8) is the shortest valid definition + ) { + throw new ColumnTypeParseError('Invalid Tuple type', { + columnType, + sourceType, + }) + } + columnType = columnType.slice(TuplePrefix.length, -1) + const elements = getElementsTypes({ columnType, sourceType }, 1).map((type) => + parseColumnType(type), + ) + return { + type: 'Tuple', + elements, + sourceType, + } +} + +export function parseArrayType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnArray { + if ( + !columnType.startsWith(ArrayPrefix) || + columnType.length < ArrayPrefix.length + 5 // Array(Int8) is the shortest valid definition + ) { + throw new ColumnTypeParseError('Invalid Array type', { + columnType, + sourceType, + }) + } + + let dimensions = 0 + while (columnType.length > 0) { + if (columnType.startsWith(ArrayPrefix)) { + columnType = columnType.slice(ArrayPrefix.length, -1) // Array(T) -> T + dimensions++ + } else { + break + } + } + if (dimensions === 0 || dimensions > 10) { + // TODO: check how many we can handle; max 10 seems more than enough. + throw new ColumnTypeParseError( + 'Expected Array to have between 1 and 10 dimensions', + { columnType }, + ) + } + const value = parseColumnType(columnType) + if (value.type === 'Array') { + throw new ColumnTypeParseError('Unexpected Array as value type', { + columnType, + sourceType, + }) + } + return { + type: 'Array', + value, + dimensions, + sourceType, + } +} + +export function parseDateTimeType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnDateTime { + if ( + columnType.startsWith(DateTimeWithTimezonePrefix) && + columnType.length > DateTimeWithTimezonePrefix.length + 4 // DateTime('GB') has the least amount of chars + ) { + const timezone = columnType.slice(DateTimeWithTimezonePrefix.length + 1, -2) + return { + type: 'DateTime', + timezone, + sourceType, + } + } else if ( + columnType.startsWith(DateTimePrefix) && + columnType.length === DateTimePrefix.length + ) { + return { + type: 'DateTime', + timezone: null, + sourceType, + } + } else { + throw new ColumnTypeParseError('Invalid DateTime type', { + columnType, + sourceType, + }) + } +} + +export function parseDateTime64Type({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnDateTime64 { + if ( + !columnType.startsWith(DateTime64Prefix) || + columnType.length < DateTime64Prefix.length + 2 // should at least have a precision + ) { + throw new ColumnTypeParseError('Invalid DateTime64 type', { + columnType, + sourceType, + }) + } + const precision = parseInt(columnType[DateTime64Prefix.length], 10) + if (Number.isNaN(precision) || precision < 0 || precision > 9) { + throw new ColumnTypeParseError('Invalid DateTime64 precision', { + columnType, + sourceType, + precision, + }) + } + let timezone = null + if (columnType.length > DateTime64Prefix.length + 2) { + // e.g. DateTime64(3, 'UTC') -> UTC + timezone = columnType.slice(DateTime64Prefix.length + 4, -2) + } + return { + type: 'DateTime64', + timezone, + precision, + sourceType, + } +} + +export function parseFixedStringType({ + columnType, + sourceType, +}: ParseColumnTypeParams): ParsedColumnFixedString { + if ( + !columnType.startsWith(FixedStringPrefix) || + columnType.length < FixedStringPrefix.length + 2 // i.e. at least FixedString(1) + ) { + throw new ColumnTypeParseError('Invalid FixedString type', { + columnType, + sourceType, + }) + } + const sizeBytes = parseInt(columnType.slice(FixedStringPrefix.length, -1), 10) + if (Number.isNaN(sizeBytes) || sizeBytes < 1) { + throw new ColumnTypeParseError('Invalid FixedString size in bytes', { + columnType, + sourceType, + sizeBytes, + }) + } + return { + type: 'FixedString', + sizeBytes, + sourceType, + } +} + +export function asNullableType( + value: ParsedColumnType, + sourceType: string, +): ParsedColumnNullable { + if ( + value.type === 'Array' || + value.type === 'Map' || + value.type === 'Tuple' || + value.type === 'Nullable' + ) { + throw new ColumnTypeParseError(`${value.type} cannot be Nullable`, { + sourceType, + }) + } + if (value.sourceType.startsWith(NullablePrefix)) { + value.sourceType = value.sourceType.slice(NullablePrefix.length, -1) + } + return { + type: 'Nullable', + sourceType, + value, + } +} + +/** Used for Map key/value types and Tuple elements. + * * `String, UInt8` results in [`String`, `UInt8`]. + * * `String, UInt8, Array(String)` results in [`String`, `UInt8`, `Array(String)`]. + * * Throws if parsed values are below the required minimum. */ +export function getElementsTypes( + { columnType, sourceType }: ParseColumnTypeParams, + minElements: number, +): string[] { + const elements: string[] = [] + /** Consider the element type parsed once we reach a comma outside of parens AND after an unescaped tick. + * The most complicated cases are values names in the self-defined Enum types: + * * `Tuple(Enum8('f\'()' = 1))` -> `f\'()` + * * `Tuple(Enum8('(' = 1))` -> `(` + * See also: {@link parseEnumType }, which works similarly (but has to deal with the indices following the names). */ + let openParens = 0 + let quoteOpen = false + let charEscaped = false + let lastElementIndex = 0 + for (let i = 0; i < columnType.length; i++) { + // prettier-ignore + // console.log(i, 'Current char:', columnType[i], 'openParens:', openParens, 'quoteOpen:', quoteOpen, 'charEscaped:', charEscaped) + if (charEscaped) { + charEscaped = false + } else if (columnType.charCodeAt(i) === BackslashASCII) { + charEscaped = true + } else if (columnType.charCodeAt(i) === SingleQuoteASCII) { + quoteOpen = !quoteOpen // unescaped quote + } else { + if (!quoteOpen) { + if (columnType.charCodeAt(i) === LeftParenASCII) { + openParens++ + } else if (columnType.charCodeAt(i) === RightParenASCII) { + openParens-- + } else if (columnType.charCodeAt(i) === CommaASCII) { + if (openParens === 0) { + elements.push(columnType.slice(lastElementIndex, i)) + // console.log('Pushed element:', elements[elements.length - 1]) + i += 2 // skip ', ' + lastElementIndex = i + } + } + } + } + } + + // prettier-ignore + // console.log('Final elements:', elements, 'nextElementIndex:', lastElementIndex, 'minElements:', minElements, 'openParens:', openParens) + + // Push the remaining part of the type if it seems to be valid (at least all parentheses are closed) + if (!openParens && lastElementIndex < columnType.length - 1) { + elements.push(columnType.slice(lastElementIndex)) + } + if (elements.length < minElements) { + throw new ColumnTypeParseError('Expected more elements in the type', { + sourceType, + columnType, + elements, + minElements, + }) + } + return elements +} + +interface ParseColumnTypeParams { + /** A particular type to parse, such as DateTime. */ + columnType: string + /** Full type definition, such as Map(String, DateTime). */ + sourceType: string +} + +const NullablePrefix = 'Nullable(' as const +const LowCardinalityPrefix = 'LowCardinality(' as const +const DecimalPrefix = 'Decimal(' as const +const ArrayPrefix = 'Array(' as const +const MapPrefix = 'Map(' as const +const Enum8Prefix = 'Enum8(' as const +const Enum16Prefix = 'Enum16(' as const +const TuplePrefix = 'Tuple(' as const +const DateTimePrefix = 'DateTime' as const +const DateTimeWithTimezonePrefix = 'DateTime(' as const +const DateTime64Prefix = 'DateTime64(' as const +const FixedStringPrefix = 'FixedString(' as const + +const SingleQuoteASCII = 39 as const +const LeftParenASCII = 40 as const +const RightParenASCII = 41 as const +const CommaASCII = 44 as const +const ZeroASCII = 48 as const +const NineASCII = 57 as const +const BackslashASCII = 92 as const diff --git a/packages/client-common/src/parse/index.ts b/packages/client-common/src/parse/index.ts new file mode 100644 index 0000000..12260e5 --- /dev/null +++ b/packages/client-common/src/parse/index.ts @@ -0,0 +1 @@ +export * from './column_types' diff --git a/packages/client-common/src/version.ts b/packages/client-common/src/version.ts index d657419..c801573 100644 --- a/packages/client-common/src/version.ts +++ b/packages/client-common/src/version.ts @@ -1 +1 @@ -export default '1.6.0' +export default '1.7.0' diff --git a/packages/client-node/src/index.ts b/packages/client-node/src/index.ts index 787a039..6078689 100644 --- a/packages/client-node/src/index.ts +++ b/packages/client-node/src/index.ts @@ -46,4 +46,18 @@ export { StreamableJSONFormats, SingleDocumentJSONFormats, RecordsJSONFormats, + type SimpleColumnType, + type ParsedColumnSimple, + type ParsedColumnEnum, + type ParsedColumnFixedString, + type ParsedColumnNullable, + type ParsedColumnDecimal, + type ParsedColumnDateTime, + type ParsedColumnDateTime64, + type ParsedColumnArray, + type ParsedColumnTuple, + type ParsedColumnMap, + type ParsedColumnType, + parseColumnType, + SimpleColumnTypes, } from '@clickhouse/client-common' diff --git a/packages/client-node/src/version.ts b/packages/client-node/src/version.ts index d657419..c801573 100644 --- a/packages/client-node/src/version.ts +++ b/packages/client-node/src/version.ts @@ -1 +1 @@ -export default '1.6.0' +export default '1.7.0' diff --git a/packages/client-web/src/index.ts b/packages/client-web/src/index.ts index 090df9e..791a74a 100644 --- a/packages/client-web/src/index.ts +++ b/packages/client-web/src/index.ts @@ -45,4 +45,18 @@ export { StreamableJSONFormats, SingleDocumentJSONFormats, RecordsJSONFormats, + type SimpleColumnType, + type ParsedColumnSimple, + type ParsedColumnEnum, + type ParsedColumnFixedString, + type ParsedColumnNullable, + type ParsedColumnDecimal, + type ParsedColumnDateTime, + type ParsedColumnDateTime64, + type ParsedColumnArray, + type ParsedColumnTuple, + type ParsedColumnMap, + type ParsedColumnType, + parseColumnType, + SimpleColumnTypes, } from '@clickhouse/client-common' diff --git a/packages/client-web/src/version.ts b/packages/client-web/src/version.ts index d657419..c801573 100644 --- a/packages/client-web/src/version.ts +++ b/packages/client-web/src/version.ts @@ -1 +1 @@ -export default '1.6.0' +export default '1.7.0'