From fd5f0d4c3e672d8616fc6bb6dafa477785d78612 Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Mon, 15 Jul 2024 10:07:40 +0800 Subject: [PATCH] [bugfix](paimon)Fixed the reading of timestamp with time zone type data (#37716) ## Proposed changes 1. When using jni to read timestamps with time zones, the time needs to be converted to local time 2. In version 0.8 of paimon, the time zone (isAdjustToUTC) information of parquet files is added, and doris can parse data directly according to the time zone information --- be/src/vec/exec/scan/vfile_scanner.cpp | 15 +- .../iceberg/tools/save_docker.sh | 4 + .../doris/paimon/PaimonColumnValue.java | 30 ++- .../apache/doris/paimon/PaimonJniScanner.java | 14 +- .../paimon/PaimonExternalTable.java | 25 +- .../paimon/paimon_timestamp_types.out | 139 +++++++++++ .../paimon/paimon_timestamp_types.groovy | 224 ++++++++++++++++-- 7 files changed, 400 insertions(+), 51 deletions(-) create mode 100644 regression-test/data/external_table_p0/paimon/paimon_timestamp_types.out diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 1e0b42ebfc88ff..3ef07c50b64b3d 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -785,20 +785,9 @@ Status VFileScanner::_get_next_reader() { break; } case TFileFormatType::FORMAT_PARQUET: { - static const cctz::time_zone utc0 = cctz::utc_time_zone(); - cctz::time_zone* tz; - if (range.__isset.table_format_params && - range.table_format_params.table_format_type == "paimon") { - // The timestmap generated by paimon does not carry metadata information (e.g., isAdjustToUTC, etc.), - // and the stored data is UTC0 by default, so it is directly set to the UTC time zone. - // In version 0.7, paimon fixed this issue and can remove the judgment here - tz = const_cast(&utc0); - } else { - tz = const_cast(&_state->timezone_obj()); - } std::unique_ptr parquet_reader = ParquetReader::create_unique( - _profile, *_params, range, _state->query_options().batch_size, tz, - _io_ctx.get(), _state, + _profile, *_params, range, _state->query_options().batch_size, + const_cast(&_state->timezone_obj()), _io_ctx.get(), _state, _shoudl_enable_file_meta_cache() ? ExecEnv::GetInstance()->file_meta_cache() : nullptr, _state->query_options().enable_parquet_lazy_mat); diff --git a/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh b/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh index cc149d481180da..acc84535b96a5b 100644 --- a/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh +++ b/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh @@ -21,3 +21,7 @@ docker exec iceberg-rest bash -c 'cp /tmp/iceberg_rest_mode\=memory /mnt/data/in # save iceberg from s3 docker exec mc bash -c 'mc cp -r minio/warehouse /mnt/data/input/minio' + +# package zip +cp -r data iceberg_data +zip -rq iceberg_data.zip iceberg_data \ No newline at end of file diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java index d102ebd2fdf0ef..73aa6ce8550a4b 100644 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java +++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java @@ -24,6 +24,12 @@ import org.apache.paimon.data.InternalArray; import org.apache.paimon.data.InternalMap; import org.apache.paimon.data.InternalRow; +import org.apache.paimon.data.Timestamp; +import org.apache.paimon.types.ArrayType; +import org.apache.paimon.types.DataType; +import org.apache.paimon.types.LocalZonedTimestampType; +import org.apache.paimon.types.MapType; +import org.apache.paimon.types.RowType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,6 +37,7 @@ import java.math.BigInteger; import java.time.LocalDate; import java.time.LocalDateTime; +import java.time.ZoneId; import java.util.List; public class PaimonColumnValue implements ColumnValue { @@ -38,19 +45,22 @@ public class PaimonColumnValue implements ColumnValue { private int idx; private DataGetters record; private ColumnType dorisType; + private DataType dataType; public PaimonColumnValue() { } - public PaimonColumnValue(DataGetters record, int idx, ColumnType columnType) { + public PaimonColumnValue(DataGetters record, int idx, ColumnType columnType, DataType dataType) { this.idx = idx; this.record = record; this.dorisType = columnType; + this.dataType = dataType; } - public void setIdx(int idx, ColumnType dorisType) { + public void setIdx(int idx, ColumnType dorisType, DataType dataType) { this.idx = idx; this.dorisType = dorisType; + this.dataType = dataType; } public void setOffsetRow(InternalRow record) { @@ -124,7 +134,12 @@ public LocalDate getDate() { @Override public LocalDateTime getDateTime() { - return record.getTimestamp(idx, dorisType.getPrecision()).toLocalDateTime(); + Timestamp ts = record.getTimestamp(idx, dorisType.getPrecision()); + if (dataType instanceof LocalZonedTimestampType) { + return LocalDateTime.ofInstant(ts.toInstant(), ZoneId.systemDefault()); + } else { + return ts.toLocalDateTime(); + } } @Override @@ -142,7 +157,7 @@ public void unpackArray(List values) { InternalArray recordArray = record.getArray(idx); for (int i = 0; i < recordArray.size(); i++) { PaimonColumnValue arrayColumnValue = new PaimonColumnValue((DataGetters) recordArray, i, - dorisType.getChildTypes().get(0)); + dorisType.getChildTypes().get(0), ((ArrayType) dataType).getElementType()); values.add(arrayColumnValue); } } @@ -153,13 +168,13 @@ public void unpackMap(List keys, List values) { InternalArray key = map.keyArray(); for (int i = 0; i < key.size(); i++) { PaimonColumnValue keyColumnValue = new PaimonColumnValue((DataGetters) key, i, - dorisType.getChildTypes().get(0)); + dorisType.getChildTypes().get(0), ((MapType) dataType).getKeyType()); keys.add(keyColumnValue); } InternalArray value = map.valueArray(); for (int i = 0; i < value.size(); i++) { PaimonColumnValue valueColumnValue = new PaimonColumnValue((DataGetters) value, i, - dorisType.getChildTypes().get(1)); + dorisType.getChildTypes().get(1), ((MapType) dataType).getValueType()); values.add(valueColumnValue); } } @@ -169,7 +184,8 @@ public void unpackStruct(List structFieldIndex, List value // todo: support pruned struct fields InternalRow row = record.getRow(idx, structFieldIndex.size()); for (int i : structFieldIndex) { - values.add(new PaimonColumnValue(row, i, dorisType.getChildTypes().get(i))); + values.add(new PaimonColumnValue(row, i, dorisType.getChildTypes().get(i), + ((RowType) dataType).getFields().get(i).type())); } } } diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java index ca01633946c809..719a7ea0b9d9e9 100644 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java +++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java @@ -53,6 +53,7 @@ public class PaimonJniScanner extends JniScanner { private RecordReader reader; private final PaimonColumnValue columnValue = new PaimonColumnValue(); private List paimonAllFieldNames; + private List paimonDataTypeList; private long ctlId; private long dbId; @@ -99,7 +100,7 @@ public void open() throws IOException { initTable(); initReader(); resetDatetimeV2Precision(); - } catch (Exception e) { + } catch (Throwable e) { LOG.warn("Failed to open paimon_scanner: " + e.getMessage(), e); throw e; } @@ -114,9 +115,12 @@ private void initReader() throws IOException { + " Please refresh table and try again", fields.length, paimonAllFieldNames.size())); } - readBuilder.withProjection(getProjected()); + int[] projected = getProjected(); + readBuilder.withProjection(projected); readBuilder.withFilter(getPredicates()); reader = readBuilder.newRead().createReader(getSplit()); + paimonDataTypeList = + Arrays.stream(projected).mapToObj(i -> table.rowType().getTypeAt(i)).collect(Collectors.toList()); } private int[] getProjected() { @@ -175,7 +179,7 @@ protected int getNext() throws IOException { while ((record = recordIterator.next()) != null) { columnValue.setOffsetRow(record); for (int i = 0; i < fields.length; i++) { - columnValue.setIdx(i, types[i]); + columnValue.setIdx(i, types[i], paimonDataTypeList.get(i)); appendData(i, columnValue); } rows++; @@ -189,8 +193,8 @@ protected int getNext() throws IOException { } catch (Exception e) { close(); LOG.warn("Failed to get the next batch of paimon. " - + "split: {}, requiredFieldNames: {}, paimonAllFieldNames: {}", - getSplit(), params.get("required_fields"), paimonAllFieldNames, e); + + "split: {}, requiredFieldNames: {}, paimonAllFieldNames: {}, dataType: {}", + getSplit(), params.get("required_fields"), paimonAllFieldNames, paimonDataTypeList, e); throw new IOException(e); } return rows; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java index 2bcd095e037d3c..c9579d03e94086 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java @@ -88,6 +88,7 @@ public Optional initSchema() { } private Type paimonPrimitiveTypeToDorisType(org.apache.paimon.types.DataType dataType) { + int tsScale = 3; // default switch (dataType.getTypeRoot()) { case BOOLEAN: return Type.BOOLEAN; @@ -114,20 +115,26 @@ private Type paimonPrimitiveTypeToDorisType(org.apache.paimon.types.DataType dat case DATE: return ScalarType.createDateV2Type(); case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - int scale = 3; // default if (dataType instanceof org.apache.paimon.types.TimestampType) { - scale = ((org.apache.paimon.types.TimestampType) dataType).getPrecision(); - if (scale > 6) { - scale = 6; + tsScale = ((org.apache.paimon.types.TimestampType) dataType).getPrecision(); + if (tsScale > 6) { + tsScale = 6; } } else if (dataType instanceof org.apache.paimon.types.LocalZonedTimestampType) { - scale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision(); - if (scale > 6) { - scale = 6; + tsScale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision(); + if (tsScale > 6) { + tsScale = 6; + } + } + return ScalarType.createDatetimeV2Type(tsScale); + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + if (dataType instanceof org.apache.paimon.types.LocalZonedTimestampType) { + tsScale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision(); + if (tsScale > 6) { + tsScale = 6; } } - return ScalarType.createDatetimeV2Type(scale); + return ScalarType.createDatetimeV2Type(tsScale); case ARRAY: ArrayType arrayType = (ArrayType) dataType; Type innerType = paimonPrimitiveTypeToDorisType(arrayType.getElementType()); diff --git a/regression-test/data/external_table_p0/paimon/paimon_timestamp_types.out b/regression-test/data/external_table_p0/paimon/paimon_timestamp_types.out new file mode 100644 index 00000000000000..9df5553d1493c5 --- /dev/null +++ b/regression-test/data/external_table_p0/paimon/paimon_timestamp_types.out @@ -0,0 +1,139 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !c1 -- +1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 + +-- !c2 -- +1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 + +-- !ltz_ntz_simple2 -- +1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"} + +-- !ltz_ntz_simple3 -- +{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} + +-- !ltz_ntz_simple4 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple5 -- +2024-01-04T10:12:34.123456 + +-- !ltz_ntz_simple6 -- +{"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"} + +-- !ltz_ntz_simple7 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple8 -- +2024-01-04T10:12:34.123456 + +-- !ltz_ntz_simple9 -- +["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] + +-- !ltz_ntz_simple10 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple11 -- +["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] + +-- !ltz_ntz_simple12 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple13 -- +{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"} + +-- !ltz_ntz_simple14 -- +2024-01-01T10:12:34.123456 + +-- !ltz_ntz_simple15 -- +2024-01-02T10:12:34.123456 + +-- !c1 -- +1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T02:04:05.100 2024-01-02T02:04:05.120 2024-01-02T02:04:05.123 2024-01-02T02:04:05.123400 2024-01-02T02:04:05.123450 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456 + +-- !c2 -- +1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 + +-- !ltz_ntz_simple2 -- +1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 02:12:34.123456":"2024-01-04 02:12:34.123456", "2024-01-01 02:12:34.123456":"2024-01-02 02:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 02:12:34.123456", "2024-01-02 02:12:34.123456", "2024-01-03 02:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 02:12:34.123456"} + +-- !ltz_ntz_simple3 -- +{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} + +-- !ltz_ntz_simple4 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple5 -- +2024-01-04T10:12:34.123456 + +-- !ltz_ntz_simple6 -- +{"2024-01-03 02:12:34.123456":"2024-01-04 02:12:34.123456", "2024-01-01 02:12:34.123456":"2024-01-02 02:12:34.123456"} + +-- !ltz_ntz_simple7 -- +\N + +-- !ltz_ntz_simple8 -- +\N + +-- !ltz_ntz_simple9 -- +["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] + +-- !ltz_ntz_simple10 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple11 -- +["2024-01-01 02:12:34.123456", "2024-01-02 02:12:34.123456", "2024-01-03 02:12:34.123456"] + +-- !ltz_ntz_simple12 -- +2024-01-02T02:12:34.123456 + +-- !ltz_ntz_simple13 -- +{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 02:12:34.123456"} + +-- !ltz_ntz_simple14 -- +2024-01-01T10:12:34.123456 + +-- !ltz_ntz_simple15 -- +2024-01-02T02:12:34.123456 + +-- !ltz_ntz_simple2 -- +1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"} + +-- !ltz_ntz_simple3 -- +{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} + +-- !ltz_ntz_simple4 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple5 -- +2024-01-04T10:12:34.123456 + +-- !ltz_ntz_simple6 -- +{"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"} + +-- !ltz_ntz_simple7 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple8 -- +2024-01-04T10:12:34.123456 + +-- !ltz_ntz_simple9 -- +["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] + +-- !ltz_ntz_simple10 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple11 -- +["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] + +-- !ltz_ntz_simple12 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple13 -- +{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"} + +-- !ltz_ntz_simple14 -- +2024-01-01T10:12:34.123456 + +-- !ltz_ntz_simple15 -- +2024-01-02T10:12:34.123456 + diff --git a/regression-test/suites/external_table_p0/paimon/paimon_timestamp_types.groovy b/regression-test/suites/external_table_p0/paimon/paimon_timestamp_types.groovy index 81b0e48e99091f..09fcba3ec18397 100644 --- a/regression-test/suites/external_table_p0/paimon/paimon_timestamp_types.groovy +++ b/regression-test/suites/external_table_p0/paimon/paimon_timestamp_types.groovy @@ -17,14 +17,9 @@ suite("paimon_timestamp_types", "p0,external,doris,external_docker,external_docker_doris") { - def ts_orc = """select * from ts_orc""" - def ts_parquet = """select * from ts_parquet""" - String enabled = context.config.otherConfigs.get("enablePaimonTest") - // The timestamp type of paimon has no logical or converted type, - // and is conflict with column type change from bigint to timestamp. - // Deprecated currently. - if (enabled == null || !enabled.equalsIgnoreCase("enable_deprecated_case")) { + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disabled paimon test") return } @@ -46,20 +41,85 @@ suite("paimon_timestamp_types", "p0,external,doris,external_docker,external_dock logger.info("catalog " + catalog_name + " created") sql """switch ${catalog_name};""" logger.info("switched to catalog " + catalog_name) - sql """use test_paimon_db;""" + sql """use flink_paimon;""" logger.info("use test_paimon_db") + + def test_ltz_ntz = { table -> + qt_ltz_ntz2 """ select * from ${table} """ + qt_ltz_ntz3 """ select cmap1 from ${table} """ + qt_ltz_ntz4 """ select cmap1['2024-01-01 10:12:34.123456'] from ${table} """ + qt_ltz_ntz5 """ select cmap1['2024-01-03 10:12:34.123456'] from ${table} """ + qt_ltz_ntz6 """ select cmap2 from ${table} """ + qt_ltz_ntz7 """ select cmap2['2024-01-01 10:12:34.123456'] from ${table} """ + qt_ltz_ntz8 """ select cmap2['2024-01-03 10:12:34.123456'] from ${table} """ + qt_ltz_ntz9 """ select cmap3 from ${table} """ + qt_ltz_ntz10 """ select cmap4 from ${table} """ + qt_ltz_ntz11 """ select cmap4['1'] from ${table} """ + qt_ltz_ntz12 """ select cmap4['2'] from ${table} """ + qt_ltz_ntz13 """ select cmap4['3'] from ${table} """ + qt_ltz_ntz14 """ select cmap4['4'] from ${table} """ + qt_ltz_ntz15 """ select carray1 from ${table} """ + qt_ltz_ntz16 """ select carray1[2] from ${table} """ + qt_ltz_ntz17 """ select carray2 from ${table} """ + qt_ltz_ntz18 """ select carray2[2] from ${table} """ + qt_ltz_ntz19 """ select crow1 from ${table} """ + qt_ltz_ntz20 """ select crow2 from ${table} """ + qt_ltz_ntz21 """ select crow3 from ${table} """ + } + + def test_ltz_ntz_simple = { table -> + qt_ltz_ntz_simple2 """ select * from ${table} """ + qt_ltz_ntz_simple3 """ select cmap1 from ${table} """ + qt_ltz_ntz_simple4 """ select cmap1['2024-01-01 10:12:34.123456'] from ${table} """ + qt_ltz_ntz_simple5 """ select cmap1['2024-01-03 10:12:34.123456'] from ${table} """ + qt_ltz_ntz_simple6 """ select cmap2 from ${table} """ + qt_ltz_ntz_simple7 """ select cmap2['2024-01-01 10:12:34.123456'] from ${table} """ + qt_ltz_ntz_simple8 """ select cmap2['2024-01-03 10:12:34.123456'] from ${table} """ + qt_ltz_ntz_simple9 """ select carray1 from ${table} """ + qt_ltz_ntz_simple10 """ select carray1[2] from ${table} """ + qt_ltz_ntz_simple11 """ select carray2 from ${table} """ + qt_ltz_ntz_simple12 """ select carray2[2] from ${table} """ + qt_ltz_ntz_simple13 """ select crow from ${table} """ + qt_ltz_ntz_simple14 """ select STRUCT_ELEMENT(crow, 'crow1') from ${table} """ + qt_ltz_ntz_simple15 """ select STRUCT_ELEMENT(crow, 'crow2') from ${table} """ + } + + def test_scale = { + def ts_scale_orc = """select * from ts_scale_orc""" + def ts_scale_parquet = """select * from ts_scale_parquet""" + qt_c1 ts_scale_orc + qt_c2 ts_scale_parquet + + } + sql """set force_jni_scanner=true""" - qt_c1 ts_orc - qt_c2 ts_parquet + test_scale() + // test_ltz_ntz("test_timestamp_ntz_ltz_orc") + // test_ltz_ntz("test_timestamp_ntz_ltz_parquet") + test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_orc") + // test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_parquet") sql """set force_jni_scanner=false""" - qt_c3 ts_orc - qt_c4 ts_parquet - + test_scale() + // test_ltz_ntz("test_timestamp_ntz_ltz_orc") + // test_ltz_ntz("test_timestamp_ntz_ltz_parquet") + test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_orc") + test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_parquet") } finally { sql """set force_jni_scanner=false""" } + + // TODO: + // 1. Fix: native read + parquet + timestamp(7/8/9) (ts7,ts8,ts9), it will be 8 hour more + // 2. paimon bugs: native read + orc + timestamp_ltz. + // In the Shanghai time zone, the read data will be 8 hours less, + // because the data written by Flink to the orc file is UTC, but the time zone saved in the orc file is Shanghai. + // Currently, Paimon will not fix this problem, but recommends using the parquet format. + // 3. paimon bugs: jni read + parquet + row types + timestamp. + // Data of the timestamp type should be converted to the timestamp type, but paimon converted it to the long type. + // Will be fixed in paimon0.9 + } @@ -69,7 +129,22 @@ suite("paimon_timestamp_types", "p0,external,doris,external_docker,external_dock SET 'table.local-time-zone' = 'Asia/Shanghai'; -create table ts_orc ( +CREATE CATALOG paimon_minio WITH ( + 'type' = 'paimon', + 'warehouse' = 's3://warehouse/wh', + 's3.endpoint' = 'http://172.21.0.101:19001', + 's3.access-key' = 'admin', + 's3.secret-key' = 'password', + 's3.path.style.access' = 'true' +); + +use catalog paimon_minio; + +CREATE DATABASE IF NOT EXISTS flink_paimon; + +use flink_paimon; + +create table ts_scale_orc ( id int, ts1 timestamp(1), ts2 timestamp(2), @@ -91,7 +166,7 @@ ts18 timestamp_ltz(8), ts19 timestamp_ltz(9)) WITH ('file.format' = 'orc','write-only'='true'); -create table ts_parquet ( +create table ts_scale_parquet ( id int, ts1 timestamp(1), ts2 timestamp(2), @@ -113,7 +188,7 @@ ts18 timestamp_ltz(8), ts19 timestamp_ltz(9)) WITH ('file.format' = 'parquet','write-only'='true'); -insert into ts_orc values ( +insert into ts_scale_orc values ( 1, timestamp '2024-01-02 10:04:05.123456789', timestamp '2024-01-02 10:04:05.123456789', @@ -134,7 +209,7 @@ insert into ts_orc values ( timestamp '2024-01-02 10:04:05.123456789', timestamp '2024-01-02 10:04:05.123456789'); -insert into ts_parquet values ( +insert into ts_scale_parquet values ( 1, timestamp '2024-01-02 10:04:05.123456789', timestamp '2024-01-02 10:04:05.123456789', @@ -155,4 +230,119 @@ insert into ts_parquet values ( timestamp '2024-01-02 10:04:05.123456789', timestamp '2024-01-02 10:04:05.123456789'); +create table test_timestamp_ntz_ltz_orc ( + id int, + cmap1 MAP, + cmap2 MAP, + cmap3 MAP>, + cmap4 MAP>, + carray1 ARRAY, + carray2 ARRAY, + crow1 ROW, + crow2 ROW>, + crow3 ROW, crow_array2 ARRAY> +) with ( + 'write-only' = 'true', + 'file.format' = 'orc' +); + +create table test_timestamp_ntz_ltz_parquet ( + id int, + cmap1 MAP, + cmap2 MAP, + cmap3 MAP>, + cmap4 MAP>, + carray1 ARRAY, + carray2 ARRAY, + crow1 ROW, + crow2 ROW>, + crow3 ROW, crow_array2 ARRAY> +) with ( + 'write-only' = 'true', + 'file.format' = 'parquet' +); + +insert into test_timestamp_ntz_ltz_orc values ( + 1, + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + MAP[1, ROW(timestamp '2024-01-01 10:12:34.123456'), + 2, ROW(timestamp '2024-01-02 10:12:34.123456'), + 3, ROW(timestamp '2024-01-03 10:12:34.123456') + ], + MAP[1, ROW(1, timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'), + 2, ROW(2, timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'), + 3, ROW(3, timestamp '2024-01-05 10:12:34.123456', timestamp '2024-01-06 10:12:34.123456'), + 4, ROW(4, timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456') + ], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'), + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ROW(timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456')), + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ARRAY[timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456', timestamp '2024-01-05 10:12:34.123456'], ARRAY[timestamp '2024-01-06 10:12:34.123456', timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456']) +); + +-- Currently paimon does not support nested parquet formats +insert into test_timestamp_ntz_ltz_parquet values ( + 1, + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + MAP[1, ROW(timestamp '2024-01-01 10:12:34.123456'), + 2, ROW(timestamp '2024-01-02 10:12:34.123456'), + 3, ROW(timestamp '2024-01-03 10:12:34.123456') + ], + MAP[1, ROW(1, timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'), + 2, ROW(2, timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'), + 3, ROW(3, timestamp '2024-01-05 10:12:34.123456', timestamp '2024-01-06 10:12:34.123456'), + 4, ROW(4, timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456') + ], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'), + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ROW(timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456')), + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ARRAY[timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456', timestamp '2024-01-05 10:12:34.123456'], ARRAY[timestamp '2024-01-06 10:12:34.123456', timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456']) +); + +create table test_timestamp_ntz_ltz_simple_orc ( + id int, + cmap1 MAP, + cmap2 MAP, + carray1 ARRAY, + carray2 ARRAY, + crow ROW +) with ( + 'write-only' = 'true', + 'file.format' = 'orc' +); + +create table test_timestamp_ntz_ltz_simple_parquet ( + id int, + cmap1 MAP, + cmap2 MAP, + carray1 ARRAY, + carray2 ARRAY, + crow ROW +) with ( + 'write-only' = 'true', + 'file.format' = 'parquet' +); + +insert into test_timestamp_ntz_ltz_simple_orc values ( + 1, + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456') +); + +insert into test_timestamp_ntz_ltz_simple_parquet values ( + 1, + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456') +); + */