From c42c5d3183c83d44c333b48cb93e04dee372508d Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Tue, 9 Jul 2024 15:03:36 +0800 Subject: [PATCH 1/6] fix --- be/src/vec/exec/scan/vfile_scanner.cpp | 14 ++------- .../doris/paimon/PaimonColumnValue.java | 31 ++++++++++++++----- .../apache/doris/paimon/PaimonJniScanner.java | 14 ++++++--- .../paimon/PaimonExternalTable.java | 19 ++++++++---- 4 files changed, 48 insertions(+), 30 deletions(-) diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index 1e0b42ebfc88ff..f106aa9365cfea 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -785,19 +785,9 @@ Status VFileScanner::_get_next_reader() { break; } case TFileFormatType::FORMAT_PARQUET: { - static const cctz::time_zone utc0 = cctz::utc_time_zone(); - cctz::time_zone* tz; - if (range.__isset.table_format_params && - range.table_format_params.table_format_type == "paimon") { - // The timestmap generated by paimon does not carry metadata information (e.g., isAdjustToUTC, etc.), - // and the stored data is UTC0 by default, so it is directly set to the UTC time zone. - // In version 0.7, paimon fixed this issue and can remove the judgment here - tz = const_cast(&utc0); - } else { - tz = const_cast(&_state->timezone_obj()); - } std::unique_ptr parquet_reader = ParquetReader::create_unique( - _profile, *_params, range, _state->query_options().batch_size, tz, + _profile, *_params, range, _state->query_options().batch_size, + const_cast(&_state->timezone_obj()), _io_ctx.get(), _state, _shoudl_enable_file_meta_cache() ? ExecEnv::GetInstance()->file_meta_cache() : nullptr, diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java index d102ebd2fdf0ef..291ef33a6512e9 100644 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java +++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java @@ -24,6 +24,12 @@ import org.apache.paimon.data.InternalArray; import org.apache.paimon.data.InternalMap; import org.apache.paimon.data.InternalRow; +import org.apache.paimon.data.Timestamp; +import org.apache.paimon.types.ArrayType; +import org.apache.paimon.types.DataType; +import org.apache.paimon.types.LocalZonedTimestampType; +import org.apache.paimon.types.MapType; +import org.apache.paimon.types.RowType; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -31,6 +37,7 @@ import java.math.BigInteger; import java.time.LocalDate; import java.time.LocalDateTime; +import java.time.ZoneId; import java.util.List; public class PaimonColumnValue implements ColumnValue { @@ -38,19 +45,22 @@ public class PaimonColumnValue implements ColumnValue { private int idx; private DataGetters record; private ColumnType dorisType; + private DataType dataType; public PaimonColumnValue() { } - public PaimonColumnValue(DataGetters record, int idx, ColumnType columnType) { + public PaimonColumnValue(DataGetters record, int idx, ColumnType columnType, DataType dataType) { this.idx = idx; this.record = record; this.dorisType = columnType; + this.dataType = dataType; } - public void setIdx(int idx, ColumnType dorisType) { + public void setIdx(int idx, ColumnType dorisType, DataType dataType) { this.idx = idx; this.dorisType = dorisType; + this.dataType = dataType; } public void setOffsetRow(InternalRow record) { @@ -124,7 +134,12 @@ public LocalDate getDate() { @Override public LocalDateTime getDateTime() { - return record.getTimestamp(idx, dorisType.getPrecision()).toLocalDateTime(); + Timestamp ts = record.getTimestamp(idx, dorisType.getPrecision()); + if (dataType instanceof LocalZonedTimestampType) { + return LocalDateTime.ofInstant(ts.toInstant(), ZoneId.systemDefault()); + } else { + return ts.toLocalDateTime(); + } } @Override @@ -142,7 +157,7 @@ public void unpackArray(List values) { InternalArray recordArray = record.getArray(idx); for (int i = 0; i < recordArray.size(); i++) { PaimonColumnValue arrayColumnValue = new PaimonColumnValue((DataGetters) recordArray, i, - dorisType.getChildTypes().get(0)); + dorisType.getChildTypes().get(0), ((ArrayType) dataType).getElementType()); values.add(arrayColumnValue); } } @@ -153,13 +168,13 @@ public void unpackMap(List keys, List values) { InternalArray key = map.keyArray(); for (int i = 0; i < key.size(); i++) { PaimonColumnValue keyColumnValue = new PaimonColumnValue((DataGetters) key, i, - dorisType.getChildTypes().get(0)); + dorisType.getChildTypes().get(0), ((MapType) dataType).getKeyType()); keys.add(keyColumnValue); } InternalArray value = map.valueArray(); for (int i = 0; i < value.size(); i++) { PaimonColumnValue valueColumnValue = new PaimonColumnValue((DataGetters) value, i, - dorisType.getChildTypes().get(1)); + dorisType.getChildTypes().get(1), ((MapType) dataType).getValueType()); values.add(valueColumnValue); } } @@ -169,7 +184,9 @@ public void unpackStruct(List structFieldIndex, List value // todo: support pruned struct fields InternalRow row = record.getRow(idx, structFieldIndex.size()); for (int i : structFieldIndex) { - values.add(new PaimonColumnValue(row, i, dorisType.getChildTypes().get(i))); + LOG.info("mmc struct i:{}, dataType:{}", i, dataType); + values.add(new PaimonColumnValue(row, i, dorisType.getChildTypes().get(i), + ((RowType) dataType).getFields().get(i).type())); } } } diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java index ca01633946c809..c4f98978c4289b 100644 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java +++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java @@ -51,8 +51,9 @@ public class PaimonJniScanner extends JniScanner { private final String paimonPredicate; private Table table; private RecordReader reader; - private final PaimonColumnValue columnValue = new PaimonColumnValue(); + private PaimonColumnValue columnValue = new PaimonColumnValue(); private List paimonAllFieldNames; + private List paimonDataTypeList; private long ctlId; private long dbId; @@ -114,9 +115,12 @@ private void initReader() throws IOException { + " Please refresh table and try again", fields.length, paimonAllFieldNames.size())); } - readBuilder.withProjection(getProjected()); + int[] projected = getProjected(); + readBuilder.withProjection(projected); readBuilder.withFilter(getPredicates()); reader = readBuilder.newRead().createReader(getSplit()); + paimonDataTypeList = + Arrays.stream(projected).mapToObj(i -> table.rowType().getTypeAt(i)).collect(Collectors.toList()); } private int[] getProjected() { @@ -175,7 +179,7 @@ protected int getNext() throws IOException { while ((record = recordIterator.next()) != null) { columnValue.setOffsetRow(record); for (int i = 0; i < fields.length; i++) { - columnValue.setIdx(i, types[i]); + columnValue.setIdx(i, types[i], paimonDataTypeList.get(i)); appendData(i, columnValue); } rows++; @@ -189,8 +193,8 @@ protected int getNext() throws IOException { } catch (Exception e) { close(); LOG.warn("Failed to get the next batch of paimon. " - + "split: {}, requiredFieldNames: {}, paimonAllFieldNames: {}", - getSplit(), params.get("required_fields"), paimonAllFieldNames, e); + + "split: {}, requiredFieldNames: {}, paimonAllFieldNames: {}, dataType: {}", + getSplit(), params.get("required_fields"), paimonAllFieldNames, paimonDataTypeList, e); throw new IOException(e); } return rows; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java index 2bcd095e037d3c..8a40710ad74ec4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java @@ -88,6 +88,7 @@ public Optional initSchema() { } private Type paimonPrimitiveTypeToDorisType(org.apache.paimon.types.DataType dataType) { + int tsScale = 3; // default switch (dataType.getTypeRoot()) { case BOOLEAN: return Type.BOOLEAN; @@ -114,12 +115,10 @@ private Type paimonPrimitiveTypeToDorisType(org.apache.paimon.types.DataType dat case DATE: return ScalarType.createDateV2Type(); case TIMESTAMP_WITHOUT_TIME_ZONE: - case TIMESTAMP_WITH_LOCAL_TIME_ZONE: - int scale = 3; // default if (dataType instanceof org.apache.paimon.types.TimestampType) { - scale = ((org.apache.paimon.types.TimestampType) dataType).getPrecision(); - if (scale > 6) { - scale = 6; + tsScale = ((org.apache.paimon.types.TimestampType) dataType).getPrecision(); + if (tsScale > 6) { + tsScale = 6; } } else if (dataType instanceof org.apache.paimon.types.LocalZonedTimestampType) { scale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision(); @@ -127,7 +126,15 @@ private Type paimonPrimitiveTypeToDorisType(org.apache.paimon.types.DataType dat scale = 6; } } - return ScalarType.createDatetimeV2Type(scale); + return ScalarType.createDatetimeV2Type(tsScale); + case TIMESTAMP_WITH_LOCAL_TIME_ZONE: + if (dataType instanceof org.apache.paimon.types.LocalZonedTimestampType) { + tsScale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision(); + if (tsScale > 6) { + tsScale = 6; + } + } + return ScalarType.createDatetimeV2Type(tsScale); case ARRAY: ArrayType arrayType = (ArrayType) dataType; Type innerType = paimonPrimitiveTypeToDorisType(arrayType.getElementType()); From f613f21aecb04b6c1a6ef41a335ac65a62e8781e Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Thu, 11 Jul 2024 15:06:07 +0800 Subject: [PATCH 2/6] fix --- .../main/java/org/apache/doris/paimon/PaimonColumnValue.java | 1 - .../src/main/java/org/apache/doris/paimon/PaimonJniScanner.java | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java index 291ef33a6512e9..73aa6ce8550a4b 100644 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java +++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonColumnValue.java @@ -184,7 +184,6 @@ public void unpackStruct(List structFieldIndex, List value // todo: support pruned struct fields InternalRow row = record.getRow(idx, structFieldIndex.size()); for (int i : structFieldIndex) { - LOG.info("mmc struct i:{}, dataType:{}", i, dataType); values.add(new PaimonColumnValue(row, i, dorisType.getChildTypes().get(i), ((RowType) dataType).getFields().get(i).type())); } diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java index c4f98978c4289b..abcd428b2d7ff4 100644 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java +++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java @@ -51,7 +51,7 @@ public class PaimonJniScanner extends JniScanner { private final String paimonPredicate; private Table table; private RecordReader reader; - private PaimonColumnValue columnValue = new PaimonColumnValue(); + private final PaimonColumnValue columnValue = new PaimonColumnValue(); private List paimonAllFieldNames; private List paimonDataTypeList; From e52dba7228b24b466b4f80a656200f98c7741c52 Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Fri, 12 Jul 2024 15:42:04 +0800 Subject: [PATCH 3/6] fix --- .../paimon/PaimonExternalTable.java | 6 +- .../paimon/paimon_timestamp_types.out | 139 +++++++++++ .../paimon/paimon_timestamp_types.groovy | 224 ++++++++++++++++-- 3 files changed, 349 insertions(+), 20 deletions(-) create mode 100644 regression-test/data/external_table_p0/paimon/paimon_timestamp_types.out diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java index 8a40710ad74ec4..c9579d03e94086 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/PaimonExternalTable.java @@ -121,9 +121,9 @@ private Type paimonPrimitiveTypeToDorisType(org.apache.paimon.types.DataType dat tsScale = 6; } } else if (dataType instanceof org.apache.paimon.types.LocalZonedTimestampType) { - scale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision(); - if (scale > 6) { - scale = 6; + tsScale = ((org.apache.paimon.types.LocalZonedTimestampType) dataType).getPrecision(); + if (tsScale > 6) { + tsScale = 6; } } return ScalarType.createDatetimeV2Type(tsScale); diff --git a/regression-test/data/external_table_p0/paimon/paimon_timestamp_types.out b/regression-test/data/external_table_p0/paimon/paimon_timestamp_types.out new file mode 100644 index 00000000000000..9df5553d1493c5 --- /dev/null +++ b/regression-test/data/external_table_p0/paimon/paimon_timestamp_types.out @@ -0,0 +1,139 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !c1 -- +1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 + +-- !c2 -- +1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 + +-- !ltz_ntz_simple2 -- +1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"} + +-- !ltz_ntz_simple3 -- +{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} + +-- !ltz_ntz_simple4 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple5 -- +2024-01-04T10:12:34.123456 + +-- !ltz_ntz_simple6 -- +{"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"} + +-- !ltz_ntz_simple7 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple8 -- +2024-01-04T10:12:34.123456 + +-- !ltz_ntz_simple9 -- +["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] + +-- !ltz_ntz_simple10 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple11 -- +["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] + +-- !ltz_ntz_simple12 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple13 -- +{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"} + +-- !ltz_ntz_simple14 -- +2024-01-01T10:12:34.123456 + +-- !ltz_ntz_simple15 -- +2024-01-02T10:12:34.123456 + +-- !c1 -- +1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T02:04:05.100 2024-01-02T02:04:05.120 2024-01-02T02:04:05.123 2024-01-02T02:04:05.123400 2024-01-02T02:04:05.123450 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456 2024-01-02T02:04:05.123456 + +-- !c2 -- +1 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T18:04:05.123456 2024-01-02T10:04:05.100 2024-01-02T10:04:05.120 2024-01-02T10:04:05.123 2024-01-02T10:04:05.123400 2024-01-02T10:04:05.123450 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 2024-01-02T10:04:05.123456 + +-- !ltz_ntz_simple2 -- +1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 02:12:34.123456":"2024-01-04 02:12:34.123456", "2024-01-01 02:12:34.123456":"2024-01-02 02:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 02:12:34.123456", "2024-01-02 02:12:34.123456", "2024-01-03 02:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 02:12:34.123456"} + +-- !ltz_ntz_simple3 -- +{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} + +-- !ltz_ntz_simple4 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple5 -- +2024-01-04T10:12:34.123456 + +-- !ltz_ntz_simple6 -- +{"2024-01-03 02:12:34.123456":"2024-01-04 02:12:34.123456", "2024-01-01 02:12:34.123456":"2024-01-02 02:12:34.123456"} + +-- !ltz_ntz_simple7 -- +\N + +-- !ltz_ntz_simple8 -- +\N + +-- !ltz_ntz_simple9 -- +["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] + +-- !ltz_ntz_simple10 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple11 -- +["2024-01-01 02:12:34.123456", "2024-01-02 02:12:34.123456", "2024-01-03 02:12:34.123456"] + +-- !ltz_ntz_simple12 -- +2024-01-02T02:12:34.123456 + +-- !ltz_ntz_simple13 -- +{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 02:12:34.123456"} + +-- !ltz_ntz_simple14 -- +2024-01-01T10:12:34.123456 + +-- !ltz_ntz_simple15 -- +2024-01-02T02:12:34.123456 + +-- !ltz_ntz_simple2 -- +1 {"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} {"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"} ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] ["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] {"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"} + +-- !ltz_ntz_simple3 -- +{"2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456"} + +-- !ltz_ntz_simple4 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple5 -- +2024-01-04T10:12:34.123456 + +-- !ltz_ntz_simple6 -- +{"2024-01-03 10:12:34.123456":"2024-01-04 10:12:34.123456", "2024-01-01 10:12:34.123456":"2024-01-02 10:12:34.123456"} + +-- !ltz_ntz_simple7 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple8 -- +2024-01-04T10:12:34.123456 + +-- !ltz_ntz_simple9 -- +["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] + +-- !ltz_ntz_simple10 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple11 -- +["2024-01-01 10:12:34.123456", "2024-01-02 10:12:34.123456", "2024-01-03 10:12:34.123456"] + +-- !ltz_ntz_simple12 -- +2024-01-02T10:12:34.123456 + +-- !ltz_ntz_simple13 -- +{"crow1":"2024-01-01 10:12:34.123456", "crow2":"2024-01-02 10:12:34.123456"} + +-- !ltz_ntz_simple14 -- +2024-01-01T10:12:34.123456 + +-- !ltz_ntz_simple15 -- +2024-01-02T10:12:34.123456 + diff --git a/regression-test/suites/external_table_p0/paimon/paimon_timestamp_types.groovy b/regression-test/suites/external_table_p0/paimon/paimon_timestamp_types.groovy index 81b0e48e99091f..09fcba3ec18397 100644 --- a/regression-test/suites/external_table_p0/paimon/paimon_timestamp_types.groovy +++ b/regression-test/suites/external_table_p0/paimon/paimon_timestamp_types.groovy @@ -17,14 +17,9 @@ suite("paimon_timestamp_types", "p0,external,doris,external_docker,external_docker_doris") { - def ts_orc = """select * from ts_orc""" - def ts_parquet = """select * from ts_parquet""" - String enabled = context.config.otherConfigs.get("enablePaimonTest") - // The timestamp type of paimon has no logical or converted type, - // and is conflict with column type change from bigint to timestamp. - // Deprecated currently. - if (enabled == null || !enabled.equalsIgnoreCase("enable_deprecated_case")) { + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disabled paimon test") return } @@ -46,20 +41,85 @@ suite("paimon_timestamp_types", "p0,external,doris,external_docker,external_dock logger.info("catalog " + catalog_name + " created") sql """switch ${catalog_name};""" logger.info("switched to catalog " + catalog_name) - sql """use test_paimon_db;""" + sql """use flink_paimon;""" logger.info("use test_paimon_db") + + def test_ltz_ntz = { table -> + qt_ltz_ntz2 """ select * from ${table} """ + qt_ltz_ntz3 """ select cmap1 from ${table} """ + qt_ltz_ntz4 """ select cmap1['2024-01-01 10:12:34.123456'] from ${table} """ + qt_ltz_ntz5 """ select cmap1['2024-01-03 10:12:34.123456'] from ${table} """ + qt_ltz_ntz6 """ select cmap2 from ${table} """ + qt_ltz_ntz7 """ select cmap2['2024-01-01 10:12:34.123456'] from ${table} """ + qt_ltz_ntz8 """ select cmap2['2024-01-03 10:12:34.123456'] from ${table} """ + qt_ltz_ntz9 """ select cmap3 from ${table} """ + qt_ltz_ntz10 """ select cmap4 from ${table} """ + qt_ltz_ntz11 """ select cmap4['1'] from ${table} """ + qt_ltz_ntz12 """ select cmap4['2'] from ${table} """ + qt_ltz_ntz13 """ select cmap4['3'] from ${table} """ + qt_ltz_ntz14 """ select cmap4['4'] from ${table} """ + qt_ltz_ntz15 """ select carray1 from ${table} """ + qt_ltz_ntz16 """ select carray1[2] from ${table} """ + qt_ltz_ntz17 """ select carray2 from ${table} """ + qt_ltz_ntz18 """ select carray2[2] from ${table} """ + qt_ltz_ntz19 """ select crow1 from ${table} """ + qt_ltz_ntz20 """ select crow2 from ${table} """ + qt_ltz_ntz21 """ select crow3 from ${table} """ + } + + def test_ltz_ntz_simple = { table -> + qt_ltz_ntz_simple2 """ select * from ${table} """ + qt_ltz_ntz_simple3 """ select cmap1 from ${table} """ + qt_ltz_ntz_simple4 """ select cmap1['2024-01-01 10:12:34.123456'] from ${table} """ + qt_ltz_ntz_simple5 """ select cmap1['2024-01-03 10:12:34.123456'] from ${table} """ + qt_ltz_ntz_simple6 """ select cmap2 from ${table} """ + qt_ltz_ntz_simple7 """ select cmap2['2024-01-01 10:12:34.123456'] from ${table} """ + qt_ltz_ntz_simple8 """ select cmap2['2024-01-03 10:12:34.123456'] from ${table} """ + qt_ltz_ntz_simple9 """ select carray1 from ${table} """ + qt_ltz_ntz_simple10 """ select carray1[2] from ${table} """ + qt_ltz_ntz_simple11 """ select carray2 from ${table} """ + qt_ltz_ntz_simple12 """ select carray2[2] from ${table} """ + qt_ltz_ntz_simple13 """ select crow from ${table} """ + qt_ltz_ntz_simple14 """ select STRUCT_ELEMENT(crow, 'crow1') from ${table} """ + qt_ltz_ntz_simple15 """ select STRUCT_ELEMENT(crow, 'crow2') from ${table} """ + } + + def test_scale = { + def ts_scale_orc = """select * from ts_scale_orc""" + def ts_scale_parquet = """select * from ts_scale_parquet""" + qt_c1 ts_scale_orc + qt_c2 ts_scale_parquet + + } + sql """set force_jni_scanner=true""" - qt_c1 ts_orc - qt_c2 ts_parquet + test_scale() + // test_ltz_ntz("test_timestamp_ntz_ltz_orc") + // test_ltz_ntz("test_timestamp_ntz_ltz_parquet") + test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_orc") + // test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_parquet") sql """set force_jni_scanner=false""" - qt_c3 ts_orc - qt_c4 ts_parquet - + test_scale() + // test_ltz_ntz("test_timestamp_ntz_ltz_orc") + // test_ltz_ntz("test_timestamp_ntz_ltz_parquet") + test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_orc") + test_ltz_ntz_simple("test_timestamp_ntz_ltz_simple_parquet") } finally { sql """set force_jni_scanner=false""" } + + // TODO: + // 1. Fix: native read + parquet + timestamp(7/8/9) (ts7,ts8,ts9), it will be 8 hour more + // 2. paimon bugs: native read + orc + timestamp_ltz. + // In the Shanghai time zone, the read data will be 8 hours less, + // because the data written by Flink to the orc file is UTC, but the time zone saved in the orc file is Shanghai. + // Currently, Paimon will not fix this problem, but recommends using the parquet format. + // 3. paimon bugs: jni read + parquet + row types + timestamp. + // Data of the timestamp type should be converted to the timestamp type, but paimon converted it to the long type. + // Will be fixed in paimon0.9 + } @@ -69,7 +129,22 @@ suite("paimon_timestamp_types", "p0,external,doris,external_docker,external_dock SET 'table.local-time-zone' = 'Asia/Shanghai'; -create table ts_orc ( +CREATE CATALOG paimon_minio WITH ( + 'type' = 'paimon', + 'warehouse' = 's3://warehouse/wh', + 's3.endpoint' = 'http://172.21.0.101:19001', + 's3.access-key' = 'admin', + 's3.secret-key' = 'password', + 's3.path.style.access' = 'true' +); + +use catalog paimon_minio; + +CREATE DATABASE IF NOT EXISTS flink_paimon; + +use flink_paimon; + +create table ts_scale_orc ( id int, ts1 timestamp(1), ts2 timestamp(2), @@ -91,7 +166,7 @@ ts18 timestamp_ltz(8), ts19 timestamp_ltz(9)) WITH ('file.format' = 'orc','write-only'='true'); -create table ts_parquet ( +create table ts_scale_parquet ( id int, ts1 timestamp(1), ts2 timestamp(2), @@ -113,7 +188,7 @@ ts18 timestamp_ltz(8), ts19 timestamp_ltz(9)) WITH ('file.format' = 'parquet','write-only'='true'); -insert into ts_orc values ( +insert into ts_scale_orc values ( 1, timestamp '2024-01-02 10:04:05.123456789', timestamp '2024-01-02 10:04:05.123456789', @@ -134,7 +209,7 @@ insert into ts_orc values ( timestamp '2024-01-02 10:04:05.123456789', timestamp '2024-01-02 10:04:05.123456789'); -insert into ts_parquet values ( +insert into ts_scale_parquet values ( 1, timestamp '2024-01-02 10:04:05.123456789', timestamp '2024-01-02 10:04:05.123456789', @@ -155,4 +230,119 @@ insert into ts_parquet values ( timestamp '2024-01-02 10:04:05.123456789', timestamp '2024-01-02 10:04:05.123456789'); +create table test_timestamp_ntz_ltz_orc ( + id int, + cmap1 MAP, + cmap2 MAP, + cmap3 MAP>, + cmap4 MAP>, + carray1 ARRAY, + carray2 ARRAY, + crow1 ROW, + crow2 ROW>, + crow3 ROW, crow_array2 ARRAY> +) with ( + 'write-only' = 'true', + 'file.format' = 'orc' +); + +create table test_timestamp_ntz_ltz_parquet ( + id int, + cmap1 MAP, + cmap2 MAP, + cmap3 MAP>, + cmap4 MAP>, + carray1 ARRAY, + carray2 ARRAY, + crow1 ROW, + crow2 ROW>, + crow3 ROW, crow_array2 ARRAY> +) with ( + 'write-only' = 'true', + 'file.format' = 'parquet' +); + +insert into test_timestamp_ntz_ltz_orc values ( + 1, + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + MAP[1, ROW(timestamp '2024-01-01 10:12:34.123456'), + 2, ROW(timestamp '2024-01-02 10:12:34.123456'), + 3, ROW(timestamp '2024-01-03 10:12:34.123456') + ], + MAP[1, ROW(1, timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'), + 2, ROW(2, timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'), + 3, ROW(3, timestamp '2024-01-05 10:12:34.123456', timestamp '2024-01-06 10:12:34.123456'), + 4, ROW(4, timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456') + ], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'), + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ROW(timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456')), + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ARRAY[timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456', timestamp '2024-01-05 10:12:34.123456'], ARRAY[timestamp '2024-01-06 10:12:34.123456', timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456']) +); + +-- Currently paimon does not support nested parquet formats +insert into test_timestamp_ntz_ltz_parquet values ( + 1, + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + MAP[1, ROW(timestamp '2024-01-01 10:12:34.123456'), + 2, ROW(timestamp '2024-01-02 10:12:34.123456'), + 3, ROW(timestamp '2024-01-03 10:12:34.123456') + ], + MAP[1, ROW(1, timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'), + 2, ROW(2, timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'), + 3, ROW(3, timestamp '2024-01-05 10:12:34.123456', timestamp '2024-01-06 10:12:34.123456'), + 4, ROW(4, timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456') + ], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456'), + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ROW(timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456')), + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', ARRAY[timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456', timestamp '2024-01-05 10:12:34.123456'], ARRAY[timestamp '2024-01-06 10:12:34.123456', timestamp '2024-01-07 10:12:34.123456', timestamp '2024-01-08 10:12:34.123456']) +); + +create table test_timestamp_ntz_ltz_simple_orc ( + id int, + cmap1 MAP, + cmap2 MAP, + carray1 ARRAY, + carray2 ARRAY, + crow ROW +) with ( + 'write-only' = 'true', + 'file.format' = 'orc' +); + +create table test_timestamp_ntz_ltz_simple_parquet ( + id int, + cmap1 MAP, + cmap2 MAP, + carray1 ARRAY, + carray2 ARRAY, + crow ROW +) with ( + 'write-only' = 'true', + 'file.format' = 'parquet' +); + +insert into test_timestamp_ntz_ltz_simple_orc values ( + 1, + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456') +); + +insert into test_timestamp_ntz_ltz_simple_parquet values ( + 1, + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + MAP[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456', timestamp '2024-01-04 10:12:34.123456'], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ARRAY[timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456', timestamp '2024-01-03 10:12:34.123456'], + ROW(timestamp '2024-01-01 10:12:34.123456', timestamp '2024-01-02 10:12:34.123456') +); + */ From df99a6de22b617de2e48da69977b24dc12d307cc Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Fri, 12 Jul 2024 15:55:34 +0800 Subject: [PATCH 4/6] fix --- .../thirdparties/docker-compose/iceberg/tools/save_docker.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh b/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh index cc149d481180da..acc84535b96a5b 100644 --- a/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh +++ b/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh @@ -21,3 +21,7 @@ docker exec iceberg-rest bash -c 'cp /tmp/iceberg_rest_mode\=memory /mnt/data/in # save iceberg from s3 docker exec mc bash -c 'mc cp -r minio/warehouse /mnt/data/input/minio' + +# package zip +cp -r data iceberg_data +zip -rq iceberg_data.zip iceberg_data \ No newline at end of file From 3ed9eefd57f4506786d406b6228fc313e126e7e9 Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Fri, 12 Jul 2024 16:22:45 +0800 Subject: [PATCH 5/6] fix --- be/src/vec/exec/scan/vfile_scanner.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/be/src/vec/exec/scan/vfile_scanner.cpp b/be/src/vec/exec/scan/vfile_scanner.cpp index f106aa9365cfea..3ef07c50b64b3d 100644 --- a/be/src/vec/exec/scan/vfile_scanner.cpp +++ b/be/src/vec/exec/scan/vfile_scanner.cpp @@ -787,8 +787,7 @@ Status VFileScanner::_get_next_reader() { case TFileFormatType::FORMAT_PARQUET: { std::unique_ptr parquet_reader = ParquetReader::create_unique( _profile, *_params, range, _state->query_options().batch_size, - const_cast(&_state->timezone_obj()), - _io_ctx.get(), _state, + const_cast(&_state->timezone_obj()), _io_ctx.get(), _state, _shoudl_enable_file_meta_cache() ? ExecEnv::GetInstance()->file_meta_cache() : nullptr, _state->query_options().enable_parquet_lazy_mat); From 3f2ebe95138931a628cad5cf0d20ebb5754bf2cf Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Fri, 12 Jul 2024 17:12:55 +0800 Subject: [PATCH 6/6] fix --- .../src/main/java/org/apache/doris/paimon/PaimonJniScanner.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java index abcd428b2d7ff4..719a7ea0b9d9e9 100644 --- a/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java +++ b/fe/be-java-extensions/paimon-scanner/src/main/java/org/apache/doris/paimon/PaimonJniScanner.java @@ -100,7 +100,7 @@ public void open() throws IOException { initTable(); initReader(); resetDatetimeV2Precision(); - } catch (Exception e) { + } catch (Throwable e) { LOG.warn("Failed to open paimon_scanner: " + e.getMessage(), e); throw e; }