From f53ea8dd8d80614bad22b9ba49957db70418cf5b Mon Sep 17 00:00:00 2001 From: ziyi chen Date: Thu, 2 Nov 2023 16:52:48 -0400 Subject: [PATCH] Add more types to parquet reader/writer --- .../timestamp/impala_timestamp.parquet | Bin 0 -> 1690 bytes .../parquet/timestamp/timestamp_ms_ns.parquet | Bin 0 -> 2495 bytes src/common/types/timestamp_t.cpp | 16 +++-- src/include/common/types/timestamp_t.h | 10 ++- .../function/timestamp/timestamp_function.h | 4 +- .../persistent/reader/parquet/column_reader.h | 6 ++ .../reader/parquet/parquet_timestamp.h | 27 ++++++++ .../persistent/reader/parquet/CMakeLists.txt | 3 +- .../reader/parquet/column_reader.cpp | 62 ++++++++++++++++-- .../reader/parquet/parquet_reader.cpp | 15 ++++- .../reader/parquet/parquet_timestamp.cpp | 35 ++++++++++ .../writer/parquet/column_writer.cpp | 2 + .../writer/parquet/parquet_writer.cpp | 9 +++ test/test_files/copy/copy_to_parquet.test | 26 ++++---- test/test_files/reader/parquet/timestamp.test | 18 +++++ tools/nodejs_api/src_cpp/node_util.cpp | 4 +- 16 files changed, 207 insertions(+), 30 deletions(-) create mode 100644 dataset/reader/parquet/timestamp/impala_timestamp.parquet create mode 100644 dataset/reader/parquet/timestamp/timestamp_ms_ns.parquet create mode 100644 src/include/processor/operator/persistent/reader/parquet/parquet_timestamp.h create mode 100644 src/processor/operator/persistent/reader/parquet/parquet_timestamp.cpp create mode 100644 test/test_files/reader/parquet/timestamp.test diff --git a/dataset/reader/parquet/timestamp/impala_timestamp.parquet b/dataset/reader/parquet/timestamp/impala_timestamp.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6193d2674e194871fd82e9d9bf8441e1b75045ba GIT binary patch literal 1690 zcmbtVOK;mo5MC=O5d=LLMQBKX4gz7jC?KaIEjd+y99BvsQI(WLwn>UcYVc)>5=pUq zn34^I-c&VX<5kY^vV9k$nF z85Vtn(C#+9d1?I9MiPuP4J&TPZW z6cIv?cXRM%wsH@4(f-d{MV2Ylwh>dvZ^w>jMX}}u=f=!UJfF?&FgxFF?R>wrAM8K+ zqznWQnEDrZr!4j=1Ngvny^2I*_%tDQ3}e_Fvz zYv6vxSw6TszD>@ap0o}=<5usH*<``py)S~c!RcO)I}9PTMSy6LHYna$j;d#V^zIH- zQpk*1Xld~*r0iWs54CVX$Oj883}-1hb!ml>GxLd-7oJw0^4tovYZ~tviA~^EDE}cI z4qwa9v$o3r$$Uzls@=pGzlE1PpJQCb4I8&GwsF&@x<+Heo)YD&f9>=dRU z4IKH@lIj&pX;56zt2>$}P0S{>$Eq?M14di(xdT4P-Ai*pa}ZReyVT@*LVIHJYSEkEq<`G4Z*o>$!yg2? zP^B1sr<(RAqH~dTcAK=GvCa=f+KSNRRDu=ybc7oz?h_R>^UXXonq#8DGxDK_h_JTb zTX$%8n4b(Un;$X?FHpV|XyKQM6(1Nua6pH+v>wtD9p6f!NPZ)Ke*ixk_$&Pb`$_^E literal 0 HcmV?d00001 diff --git a/dataset/reader/parquet/timestamp/timestamp_ms_ns.parquet b/dataset/reader/parquet/timestamp/timestamp_ms_ns.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2af7e86f131ca2933eab11a93b71b1fa504e6757 GIT binary patch literal 2495 zcmc&$&u`;Y5PogbPyvUk)R80gP^@jlAzkb=SxChmo|@dr3pnrj=Ggi>3J4GvCbnzVVEmN4PF9 zDTZf?J4~8EO9-)_JS`l(_55v8;2Tan}0VWfM(u@@ejT{rf8W;T<~JfF$Dn8|6m zg_qeVn+G+BnMH6~gax7N>d;qj%>DgK3ZZX)_*VFB9--gYe|>v%o&;P|cZpyZE@Ozg zo7U^s0R99O%25^p>rr>C1jlLToxTPi#+TQXqJ3c4}4EmUg~eT3G?Q zq$-o3F}by60?JcRCY@c$=3nX4%Dvj+WqzE?q;m_IXLHL4W%F!0(0xk}+3!>5oa0;e z>8K$?ZgchMNq&_Z_UAG6+0=T@`MPI=-;Z4v5G}`ct>a1CFza!Y1O&*%zGHSRn*+2T zdqFaGbd8Q}M#S#fk#6ac4)Al%_5tqrZ4!IJY%9u?urZsRt)i_!$>KcqxU*0c4f+oNT59iylB5>nM5 z(R;n_fUxS@z0m3U#3~f-^Y;Y~v~VLDSDeIG;4TpRCa%}_)y?7ju|F-mOO6D8h%r8Z zA2~k4xQN9zu43HAl{U4B;)H)dj9X*Bge${)IMXcDFJnBw5CaHy*XL6_8RyRV&@TyVsvHU=mN`fH` zLb+1&bX5tP)pFcW1()CqUkQy0*`*=DIGdH73=^uGPm89 zwe1dQ-b$_C@B+87aWM3&3g1xcCziBD_yKPvZWPL&>T1a+d&#bHSaR zQXBJ<{d!=i%9$lqBePnLfLF&ToC=0oq7z3UJLdP zEmeqR=N5gS*OC+(M=>7MNfS%jM!jT$=6we{O{vN^HNxevuvd*@Q{bm*jV<*wY{B!m zLj%OVqz32xwj7;leykcga{DN?%2MHV createTimestampReader(ParquetReader& reader, + std::unique_ptr type, + const kuzu_parquet::format::SchemaElement& schema, uint64_t fileIdx, uint64_t maxDefine, + uint64_t maxRepeat); + protected: const kuzu_parquet::format::SchemaElement& schema; diff --git a/src/include/processor/operator/persistent/reader/parquet/parquet_timestamp.h b/src/include/processor/operator/persistent/reader/parquet/parquet_timestamp.h new file mode 100644 index 0000000000..0da8d1600b --- /dev/null +++ b/src/include/processor/operator/persistent/reader/parquet/parquet_timestamp.h @@ -0,0 +1,27 @@ +#pragma once + +#include "common/types/timestamp_t.h" + +namespace kuzu { +namespace processor { + +struct Int96 { + uint32_t value[3]; +}; + +struct ParquetTimeStampUtils { + static constexpr int64_t JULIAN_TO_UNIX_EPOCH_DAYS = 2440588LL; + static constexpr int64_t MILLISECONDS_PER_DAY = 86400000LL; + static constexpr int64_t MICROSECONDS_PER_DAY = MILLISECONDS_PER_DAY * 1000LL; + static constexpr int64_t NANOSECONDS_PER_MICRO = 1000LL; + + static common::timestamp_t impalaTimestampToTimestamp(const Int96& rawTS); + static common::timestamp_t parquetTimestampMicrosToTimestamp(const int64_t& rawTS); + static common::timestamp_t parquetTimestampMsToTimestamp(const int64_t& rawTS); + static common::timestamp_t parquetTimestampNsToTimestamp(const int64_t& rawTS); + static int64_t impalaTimestampToMicroseconds(const Int96& impalaTimestamp); + static common::date_t parquetIntToDate(const int32_t& raw_date); +}; + +} // namespace processor +} // namespace kuzu diff --git a/src/processor/operator/persistent/reader/parquet/CMakeLists.txt b/src/processor/operator/persistent/reader/parquet/CMakeLists.txt index 5756b577d9..e2d58f119c 100644 --- a/src/processor/operator/persistent/reader/parquet/CMakeLists.txt +++ b/src/processor/operator/persistent/reader/parquet/CMakeLists.txt @@ -6,7 +6,8 @@ add_library(kuzu_processor_operator_parquet_reader interval_column_reader.cpp struct_column_reader.cpp string_column_reader.cpp - list_column_reader.cpp) + list_column_reader.cpp + parquet_timestamp.cpp) set(ALL_OBJECT_FILES ${ALL_OBJECT_FILES} $ diff --git a/src/processor/operator/persistent/reader/parquet/column_reader.cpp b/src/processor/operator/persistent/reader/parquet/column_reader.cpp index b23f6fcb1c..13aafc099d 100644 --- a/src/processor/operator/persistent/reader/parquet/column_reader.cpp +++ b/src/processor/operator/persistent/reader/parquet/column_reader.cpp @@ -6,6 +6,7 @@ #include "processor/operator/persistent/reader/parquet/boolean_column_reader.h" #include "processor/operator/persistent/reader/parquet/callback_column_reader.h" #include "processor/operator/persistent/reader/parquet/interval_column_reader.h" +#include "processor/operator/persistent/reader/parquet/parquet_timestamp.h" #include "processor/operator/persistent/reader/parquet/string_column_reader.h" #include "processor/operator/persistent/reader/parquet/templated_column_reader.h" #include "snappy/snappy.h" @@ -20,10 +21,6 @@ using kuzu_parquet::format::Encoding; using kuzu_parquet::format::PageType; using kuzu_parquet::format::Type; -static common::date_t ParquetIntToDate(const int32_t& raw_date) { - return common::date_t(raw_date); -} - ColumnReader::ColumnReader(ParquetReader& reader, std::unique_ptr type, const kuzu_parquet::format::SchemaElement& schema, uint64_t fileIdx, uint64_t maxDefinition, uint64_t maxRepeat) @@ -239,16 +236,23 @@ std::unique_ptr ColumnReader::createReader(ParquetReader& reader, TemplatedColumnReader>>( reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat); case common::LogicalTypeID::DATE: - return std::make_unique>( + return std::make_unique< + CallbackColumnReader>( reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat); + case common::LogicalTypeID::BLOB: case common::LogicalTypeID::STRING: return std::make_unique( reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat); case common::LogicalTypeID::INTERVAL: return std::make_unique( reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat); + case common::LogicalTypeID::TIMESTAMP: + return createTimestampReader( + reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat); + // LCOV_EXCL_START default: throw common::NotImplementedException{"ColumnReader::createReader"}; + // LCOV_EXCL_STOP } } @@ -479,6 +483,54 @@ uint64_t ColumnReader::getTotalCompressedSize() { return chunk->meta_data.total_compressed_size; } +std::unique_ptr ColumnReader::createTimestampReader(ParquetReader& reader, + std::unique_ptr type, const kuzu_parquet::format::SchemaElement& schema, + uint64_t fileIdx, uint64_t maxDefine, uint64_t maxRepeat) { + switch (schema.type) { + case Type::INT96: { + return std::make_unique>( + reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat); + } + case Type::INT64: { + if (schema.__isset.logicalType && schema.logicalType.__isset.TIMESTAMP) { + if (schema.logicalType.TIMESTAMP.unit.__isset.MILLIS) { + return std::make_unique>( + reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat); + } else if (schema.logicalType.TIMESTAMP.unit.__isset.MICROS) { + return std::make_unique>( + reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat); + } else if (schema.logicalType.TIMESTAMP.unit.__isset.NANOS) { + return std::make_unique>( + reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat); + } + // LCOV_EXCL_START + } else if (schema.__isset.converted_type) { + // For legacy compatibility. + switch (schema.converted_type) { + case ConvertedType::TIMESTAMP_MICROS: + return std::make_unique>( + reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat); + case ConvertedType::TIMESTAMP_MILLIS: + return std::make_unique>( + reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat); + default: + throw common::NotImplementedException{"ColumnReader::createReader"}; + } + // LCOV_EXCL_STOP + } + } + default: { // LCOV_EXCL_START + throw common::NotImplementedException{"ColumnReader::createReader"}; + } // LCOV_EXCL_STOP + } +} + const uint64_t ParquetDecodeUtils::BITPACK_MASKS[] = {0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767, 65535, 131071, 262143, 524287, 1048575, 2097151, 4194303, 8388607, 16777215, 33554431, 67108863, 134217727, 268435455, 536870911, 1073741823, 2147483647, diff --git a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp index 66104d7bae..f49c80a579 100644 --- a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +++ b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp @@ -461,6 +461,16 @@ std::unique_ptr ParquetReader::deriveLogicalType( "DATE converted type can only be set for value of Type::INT32"}; // LCOV_EXCL_STOP } + case ConvertedType::TIMESTAMP_MICROS: + case ConvertedType::TIMESTAMP_MILLIS: + if (s_ele.type == Type::INT64) { + return std::make_unique(common::LogicalTypeID::TIMESTAMP); + } else { + // LCOV_EXCL_START + throw common::CopyException( + "TIMESTAMP converted type can only be set for value of Type::INT64"); + // LCOV_EXCL_STOP + } case ConvertedType::INTERVAL: { return std::make_unique(common::LogicalTypeID::INTERVAL); } @@ -490,13 +500,16 @@ std::unique_ptr ParquetReader::deriveLogicalType( return std::make_unique(common::LogicalTypeID::INT32); case Type::INT64: return std::make_unique(common::LogicalTypeID::INT64); + case Type::INT96: + return std::make_unique(common::LogicalTypeID::TIMESTAMP); case Type::FLOAT: return std::make_unique(common::LogicalTypeID::FLOAT); case Type::DOUBLE: return std::make_unique(common::LogicalTypeID::DOUBLE); case Type::BYTE_ARRAY: case Type::FIXED_LEN_BYTE_ARRAY: - return std::make_unique(common::LogicalTypeID::STRING); + // TODO(Ziyi): Support parquet copy option(binary_as_string). + return std::make_unique(common::LogicalTypeID::BLOB); default: return std::make_unique(common::LogicalTypeID::ANY); } diff --git a/src/processor/operator/persistent/reader/parquet/parquet_timestamp.cpp b/src/processor/operator/persistent/reader/parquet/parquet_timestamp.cpp new file mode 100644 index 0000000000..64959cdc16 --- /dev/null +++ b/src/processor/operator/persistent/reader/parquet/parquet_timestamp.cpp @@ -0,0 +1,35 @@ +#include "processor/operator/persistent/reader/parquet/parquet_timestamp.h" + +namespace kuzu { +namespace processor { + +common::timestamp_t ParquetTimeStampUtils::impalaTimestampToTimestamp(const Int96& rawTS) { + auto impalaUS = impalaTimestampToMicroseconds(rawTS); + return common::Timestamp::fromEpochMicroSeconds(impalaUS); +} + +common::timestamp_t ParquetTimeStampUtils::parquetTimestampMicrosToTimestamp(const int64_t& rawTS) { + return common::Timestamp::fromEpochMicroSeconds(rawTS); +} + +common::timestamp_t ParquetTimeStampUtils::parquetTimestampMsToTimestamp(const int64_t& rawTS) { + return common::Timestamp::fromEpochMilliSeconds(rawTS); +} + +common::timestamp_t ParquetTimeStampUtils::parquetTimestampNsToTimestamp(const int64_t& rawTS) { + return common::Timestamp::fromEpochNanoSeconds(rawTS); +} + +int64_t ParquetTimeStampUtils::impalaTimestampToMicroseconds(const Int96& impalaTimestamp) { + int64_t daysSinceEpoch = impalaTimestamp.value[2] - JULIAN_TO_UNIX_EPOCH_DAYS; + auto nanoSeconds = *reinterpret_cast(impalaTimestamp.value); + auto microseconds = nanoSeconds / NANOSECONDS_PER_MICRO; + return daysSinceEpoch * MICROSECONDS_PER_DAY + microseconds; +} + +common::date_t ParquetTimeStampUtils::parquetIntToDate(const int32_t& raw_date) { + return common::date_t(raw_date); +} + +} // namespace processor +} // namespace kuzu diff --git a/src/processor/operator/persistent/writer/parquet/column_writer.cpp b/src/processor/operator/persistent/writer/parquet/column_writer.cpp index 83a701fb73..ac7c2bdbdb 100644 --- a/src/processor/operator/persistent/writer/parquet/column_writer.cpp +++ b/src/processor/operator/persistent/writer/parquet/column_writer.cpp @@ -50,6 +50,7 @@ std::unique_ptr ColumnWriter::createWriterRecursive( } auto schemaIdx = schemas.size(); switch (type->getLogicalTypeID()) { + case LogicalTypeID::UNION: case LogicalTypeID::STRUCT: { auto fields = StructType::getFields(type); // set up the schema element for this struct @@ -187,6 +188,7 @@ std::unique_ptr ColumnWriter::createWriterRecursive( return std::make_unique>(writer, schemaIdx, std::move(schemaPathToCreate), maxRepeatToCreate, maxDefineToCreate, canHaveNullsToCreate); + case LogicalTypeID::TIMESTAMP: case LogicalTypeID::INT64: return std::make_unique>(writer, schemaIdx, std::move(schemaPathToCreate), maxRepeatToCreate, maxDefineToCreate, diff --git a/src/processor/operator/persistent/writer/parquet/parquet_writer.cpp b/src/processor/operator/persistent/writer/parquet/parquet_writer.cpp index 851538f653..433ad7aebf 100644 --- a/src/processor/operator/persistent/writer/parquet/parquet_writer.cpp +++ b/src/processor/operator/persistent/writer/parquet/parquet_writer.cpp @@ -61,6 +61,7 @@ Type::type ParquetWriter::convertToParquetType(LogicalType* type) { return Type::INT32; case LogicalTypeID::UINT64: case LogicalTypeID::INT64: + case LogicalTypeID::TIMESTAMP: return Type::INT64; case LogicalTypeID::FLOAT: return Type::FLOAT; @@ -125,6 +126,14 @@ void ParquetWriter::setSchemaProperties(LogicalType* type, SchemaElement& schema schemaElement.__isset.type_length = true; schemaElement.__isset.converted_type = true; } break; + case LogicalTypeID::TIMESTAMP: { + schemaElement.converted_type = ConvertedType::TIMESTAMP_MICROS; + schemaElement.__isset.converted_type = true; + schemaElement.__isset.logicalType = true; + schemaElement.logicalType.__isset.TIMESTAMP = true; + schemaElement.logicalType.TIMESTAMP.isAdjustedToUTC = false; + schemaElement.logicalType.TIMESTAMP.unit.__isset.MICROS = true; + } break; default: break; } diff --git a/test/test_files/copy/copy_to_parquet.test b/test/test_files/copy/copy_to_parquet.test index ee6b16313b..1764d22320 100644 --- a/test/test_files/copy/copy_to_parquet.test +++ b/test/test_files/copy/copy_to_parquet.test @@ -6,18 +6,18 @@ -CASE TinySnbCopyToParquet -CASE CopyPersonToParquet --STATEMENT COPY (MATCH (p:person) return p.ID, p.fName, p.gender, p.isStudent, p.isWorker, p.age, p.eyeSight, p.birthdate, p.lastJobDuration, p.workedHours, p.usedNames, p.courseScoresPerTerm, p.height) to '${DATABASE_PATH}/tinysnb.parquet'; +-STATEMENT COPY (MATCH (p:person) return p.ID, p.fName, p.gender, p.isStudent, p.isWorker, p.age, p.eyeSight, p.birthdate, p.registerTime, p.lastJobDuration, p.workedHours, p.usedNames, p.courseScoresPerTerm, p.height) to '${DATABASE_PATH}/tinysnb.parquet'; ---- ok -STATEMENT LOAD FROM '${DATABASE_PATH}/tinysnb.parquet' RETURN *; ---- 8 -0|Alice|1|True|False|35|5.000000|1900-01-01|3 years 2 days 13:02:00|[10,5]|[Aida]|[[10,8],[6,7,8]]|1.731000 -2|Bob|2|True|False|30|5.100000|1900-01-01|10 years 5 months 13:00:00|[12,8]|[Bobby]|[[8,9],[9,10]]|0.990000 -3|Carol|1|False|True|45|5.000000|1940-06-22|48:24:11|[4,5]|[Carmen,Fred]|[[8,10]]|1.000000 -5|Dan|2|False|True|20|4.800000|1950-07-23|10 years 5 months 13:00:00|[1,9]|[Wolfeschlegelstein,Daniel]|[[7,4],[8,8],[9]]|1.300000 -7|Elizabeth|1|False|True|20|4.700000|1980-10-26|48:24:11|[2]|[Ein]|[[6],[7],[8]]|1.463000 -8|Farooq|2|True|False|25|4.500000|1980-10-26|00:18:00.024|[3,4,5,6,7]|[Fesdwe]|[[8]]|1.510000 -9|Greg|2|False|False|40|4.900000|1980-10-26|10 years 5 months 13:00:00|[1]|[Grad]|[[10]]|1.600000 -10|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|2|False|True|83|4.900000|1990-11-27|3 years 2 days 13:02:00|[10,11,12,3,4,5,6,7]|[Ad,De,Hi,Kye,Orlan]|[[7],[10],[6,7]]|1.323000 +0|Alice|1|True|False|35|5.000000|1900-01-01|2011-08-20 11:25:30|3 years 2 days 13:02:00|[10,5]|[Aida]|[[10,8],[6,7,8]]|1.731000 +2|Bob|2|True|False|30|5.100000|1900-01-01|2008-11-03 15:25:30.000526|10 years 5 months 13:00:00|[12,8]|[Bobby]|[[8,9],[9,10]]|0.990000 +3|Carol|1|False|True|45|5.000000|1940-06-22|1911-08-20 02:32:21|48:24:11|[4,5]|[Carmen,Fred]|[[8,10]]|1.000000 +5|Dan|2|False|True|20|4.800000|1950-07-23|2031-11-30 12:25:30|10 years 5 months 13:00:00|[1,9]|[Wolfeschlegelstein,Daniel]|[[7,4],[8,8],[9]]|1.300000 +7|Elizabeth|1|False|True|20|4.700000|1980-10-26|1976-12-23 11:21:42|48:24:11|[2]|[Ein]|[[6],[7],[8]]|1.463000 +8|Farooq|2|True|False|25|4.500000|1980-10-26|1972-07-31 13:22:30.678559|00:18:00.024|[3,4,5,6,7]|[Fesdwe]|[[8]]|1.510000 +9|Greg|2|False|False|40|4.900000|1980-10-26|1976-12-23 04:41:42|10 years 5 months 13:00:00|[1]|[Grad]|[[10]]|1.600000 +10|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|2|False|True|83|4.900000|1990-11-27|2023-02-21 13:25:30|3 years 2 days 13:02:00|[10,11,12,3,4,5,6,7]|[Ad,De,Hi,Kye,Orlan]|[[7],[10],[6,7]]|1.323000 -LOG CopyOrganisationToParquet -STATEMENT COPY (MATCH (o:organisation) RETURN o.ID, o.state) TO "${DATABASE_PATH}/organisation.parquet" @@ -29,13 +29,13 @@ 6|{revenue: 558, location: ['very long city name', 'new york'], stock: {price: [22], volume: 99}} -LOG CopyMoviesToParquet --STATEMENT COPY (MATCH (m:movies) RETURN m.length, m.audience) TO "${DATABASE_PATH}/movies.parquet" +-STATEMENT COPY (MATCH (m:movies) RETURN m.*) TO "${DATABASE_PATH}/movies.parquet" ---- ok -STATEMENT LOAD FROM "${DATABASE_PATH}/movies.parquet" RETURN *; ---- 3 -126|{audience1=52, audience53=42} -2544|{audience1=33} -298|{} +Sóló cón tu párejâ|126| this is a very very good movie|{rating: 5.300000, stars: 2, views: 152, release: 2011-08-20 11:25:30, film: 2012-05-11, u8: 220, u16: 20, u32: 1, u64: 180, hugedata: 1844674407370955161600000000.000000}|\xAA\xABinteresting\x0B|{audience1=52, audience53=42}|{tag: 0, credit: True, grade1: 0.000000, grade2: 0} +The 😂😃🧘🏻‍♂️🌍🌦️🍞🚗 movie|2544| the movie is very very good|{rating: 7.000000, stars: 10, views: 982, release: 2018-11-13 13:33:11, film: 2014-09-12, u8: 12, u16: 120, u32: 55, u64: 1, hugedata: -1844674407370955161600.000000}|\xAB\xCD|{audience1=33}|{tag: 1, credit: , grade1: 8.989000, grade2: 0} +Roma|298|the movie is very interesting and funny|{rating: 1223.000000, stars: 100, views: 10003, release: 2011-02-11 16:44:22, film: 2013-02-22, u8: 1, u16: 15, u32: 200, u64: 4, hugedata: -15.000000}|pure ascii characters|{}|{tag: 1, credit: , grade1: 254.000000, grade2: 0} -LOG CopyStudyAtToParquet -STATEMENT COPY (match (:person)-[s:studyAt]->(:organisation) return s.*) to "${DATABASE_PATH}/studyAt.parquet" diff --git a/test/test_files/reader/parquet/timestamp.test b/test/test_files/reader/parquet/timestamp.test new file mode 100644 index 0000000000..4400eef64b --- /dev/null +++ b/test/test_files/reader/parquet/timestamp.test @@ -0,0 +1,18 @@ +-GROUP ParquetTimestampTest +-DATASET CSV empty + +-- + +-CASE impalaTimestamp +-STATEMENT LOAD FROM '${KUZU_ROOT_DIRECTORY}/dataset/reader/parquet/timestamp/impala_timestamp.parquet' RETURN * +---- 3 +2023-11-02 12:00:00 +1949-10-01 13:30:45 +1965-12-30 15:15:20 + +-CASE timestampMSNS +-STATEMENT LOAD FROM '${KUZU_ROOT_DIRECTORY}/dataset/reader/parquet/timestamp/timestamp_ms_ns.parquet' RETURN * +---- 3 +2021-11-02 20:33:20|2012-05-01 15:13:24 +2021-11-02 21:20:00|2021-09-30 07:46:40 +2021-11-02 22:20:00|2016-08-15 20:06:40 diff --git a/tools/nodejs_api/src_cpp/node_util.cpp b/tools/nodejs_api/src_cpp/node_util.cpp index 85a7320201..e90a901c5b 100644 --- a/tools/nodejs_api/src_cpp/node_util.cpp +++ b/tools/nodejs_api/src_cpp/node_util.cpp @@ -240,7 +240,7 @@ Value Util::TransformNapiValue( throw Exception("Expected a date for parameter " + key + "."); } auto napiDate = napiValue.As(); - timestamp_t timestamp = Timestamp::fromEpochMs(napiDate.ValueOf()); + timestamp_t timestamp = Timestamp::fromEpochMilliSeconds(napiDate.ValueOf()); auto dateVal = Timestamp::getDate(timestamp); return Value(dateVal); } @@ -249,7 +249,7 @@ Value Util::TransformNapiValue( throw Exception("Expected a date for parameter " + key + "."); } auto napiDate = napiValue.As(); - timestamp_t timestamp = Timestamp::fromEpochMs(napiDate.ValueOf()); + timestamp_t timestamp = Timestamp::fromEpochMilliSeconds(napiDate.ValueOf()); return Value(timestamp); } case LogicalTypeID::INTERVAL: {