From f53ea8dd8d80614bad22b9ba49957db70418cf5b Mon Sep 17 00:00:00 2001
From: ziyi chen <chenziyi990424@gmail.com>
Date: Thu, 2 Nov 2023 16:52:48 -0400
Subject: [PATCH] Add more types to parquet reader/writer

---
 .../timestamp/impala_timestamp.parquet        | Bin 0 -> 1690 bytes
 .../parquet/timestamp/timestamp_ms_ns.parquet | Bin 0 -> 2495 bytes
 src/common/types/timestamp_t.cpp              |  16 +++--
 src/include/common/types/timestamp_t.h        |  10 ++-
 .../function/timestamp/timestamp_function.h   |   4 +-
 .../persistent/reader/parquet/column_reader.h |   6 ++
 .../reader/parquet/parquet_timestamp.h        |  27 ++++++++
 .../persistent/reader/parquet/CMakeLists.txt  |   3 +-
 .../reader/parquet/column_reader.cpp          |  62 ++++++++++++++++--
 .../reader/parquet/parquet_reader.cpp         |  15 ++++-
 .../reader/parquet/parquet_timestamp.cpp      |  35 ++++++++++
 .../writer/parquet/column_writer.cpp          |   2 +
 .../writer/parquet/parquet_writer.cpp         |   9 +++
 test/test_files/copy/copy_to_parquet.test     |  26 ++++----
 test/test_files/reader/parquet/timestamp.test |  18 +++++
 tools/nodejs_api/src_cpp/node_util.cpp        |   4 +-
 16 files changed, 207 insertions(+), 30 deletions(-)
 create mode 100644 dataset/reader/parquet/timestamp/impala_timestamp.parquet
 create mode 100644 dataset/reader/parquet/timestamp/timestamp_ms_ns.parquet
 create mode 100644 src/include/processor/operator/persistent/reader/parquet/parquet_timestamp.h
 create mode 100644 src/processor/operator/persistent/reader/parquet/parquet_timestamp.cpp
 create mode 100644 test/test_files/reader/parquet/timestamp.test
diff --git a/dataset/reader/parquet/timestamp/impala_timestamp.parquet b/dataset/reader/parquet/timestamp/impala_timestamp.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6193d2674e194871fd82e9d9bf8441e1b75045ba
GIT binary patch
literal 1690
zcmbtVOK;mo5MC=O5d=LLMQBKX4gz7jC?KaIEjd+y99BvsQI(WLwn>UcYVc)>5=pUq
zn34<uJs15AJ@nAWrN5)co_g!4*PePW+}Wk<My_uqK<@0!>^I-c&VX<5kY^vV9k$nF
z85Vtn(C#<rs~<Yuj}bcidJmz$zFYqE`}OsYC6M3zvijnW>+9d1?I9MiPuP4J&TPZW
z6cIv?cXRM%wsH@4(f-d{MV2Ylwh>dvZ^w>jMX}}u=f=!UJfF?&FgxFF?R>wrAM8K+
zqznWQn<JRpupq=fER<^~&qT)9@-$*$_HzG)K|4&LSSbF#ScC_&{|LA|L8ws9GlAxt
zT9p6w!4>EDrZr!4j=1Ngvny^2I*_<f^KFYOmpEVZEKvA~>%tDQ3}e_<s5}cmJfxyU
zM1J)SHdzhMDY&(U-9))?EZ02W)Md2J<3&Kiaf$C3Gc#k5c){Y<Fw-ZN5ff&f6>Fvz
zYv6vxSw6TszD>@ap0o}=<5usH*<``py)S~c!RcO)I}9PTMSy6LHYna$j;d#V^zIH-
zQpk*1Xld~*r0iWs54CVX$Oj883}-1hb!ml>GxLd-7oJw0^4tovYZ~tviA~^EDE}cI
z4qwa9v$o3r$$Uzls@=pGzlE1PpJQCb4I8&GwsF&@x<+Heo)YD&f9>=<?BSs()GlJ2
zVmJpY0eNW?8|*P2kfrfV^xQ;kOI1Y}T<RSu*8RTo+G*(4q8n;*Z7NP?w&-`qx@Wqg
zliH$J8|(5Z@kva19`%~oF%E}g!+#4poUZqmMpHZzU-l!h*<G4#DJ3|4g*S<PB-A5Q
z<`YH8Vynt+%w#0<F7efUDbkxHo*Jm~e5wfd#BH~Lduyp24r@l6aKer$*8;$ws>dRU
z4IKH@lIj&pX;56zt2>$}P0S{>$Eq?M14d<Z417D0qsq03E(`<RGbo-U4comF+qt;I
z-B^HsD~9^r@PgQF7w`rZ$RWM0r$gg-nk+?XtA3lG=tB%yg4(oVqg98Pb6%IL4z1rk
zc@d7l70#SeAE-}kbleNG<ED`75#%}@DMOcXI2l!Bp|(`A8f@m+uJL+%URRDuZ3hcg
zZdLR{k=!TbG7m;@*M>i(xdT4P-Ai*pa}ZReyVT@*LVIHJYSEkEq<`G4Z*o>$!yg2?
zP^B1sr<(RAqH~dTcAK=GvCa=f+KSNRRDu=ybc7oz?h_R>^UXXonq#8DGxDK_h_JTb
zTX$%8n4b(Un;$X?FHpV|XyKQM6(1Nua6pH+v>wtD9p6f!NPZ)Ke*ixk_$&Pb`$_^E

literal 0
HcmV?d00001

diff --git a/dataset/reader/parquet/timestamp/timestamp_ms_ns.parquet b/dataset/reader/parquet/timestamp/timestamp_ms_ns.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2af7e86f131ca2933eab11a93b71b1fa504e6757
GIT binary patch
literal 2495
zcmc&$&u`;Y5PogbPyvUk)R80gP^@jlAzkb=SxChmo<rk0^v5<!Z6{?_`Bzd0+cB}7
z)Gd|v!VPX5_y<66=fDART8`W}?QxHYD<>|@dr3pnrj=Ggi>3J4GvCbnzVVEmN4PF9
zDTZf?J4~8EO9-)_JS`l(_<RN-^ye3Jefs=ox-y7aX4pL77U7p(VHYkva5aj_pm(NN
zA=m5oX5pNknVq|ba(~RMFsW>55v8;2Tan}0VWfM(u@@ejT{rf8W;T<~JfF$Dn8|6m
zg_qeVn+G+BnMH6~gax7N>d;qj%>DgK3ZZX)_*VFB9--gYe|>v%o&;P|cZpyZE@Ozg
zo7U^<zMG$_&Yz<0>s0R99O%25^p>rr>C1jlLToxTPi#+TQXqJ3c4}4EmUg~eT3G?Q
zq$-o3F}by60?JcRCY@c$=3nX4%Dvj+WqzE?q;m_IXLHL4W%F!0(0xk}+3!>5oa0;e
z>8K$?ZgchMNq&_Z_UAG6+0=T@`MPI=-;Z4v5G}`ct>a1CFza!Y1O&*%zGHSRn*+2T
zdqFaGbd8Q}M#S#fk#6ac4)Al%_5tqrZ4!I<adrKD?&9KKGLtO0DdR*EPSCmybd4tv
zut~a`L@zxLibsC<<Qid8h>JY%9u?urZsRt)i_!$>KcqxU*0c4f+oNT59iylB5>nM5
z(R;n_fUxS@z0m3U#3~f-^Y;Y~v~VLDSDeIG;4TpRCa%}_)y?7ju|F-mOO6D8h%r8Z
zA2~k4xQN9zu43HAl{U4B;)H)dj9X*Bge${)IMXcDFJnBw5C<zwQX~u9V@L{k@jcmd
z<EEq(wZdNC*j6IL-*xsK(a;_b+OjvYx#hs{>aHy*XL6_8RyRV&@TyVsvHU=mN`fH`
zLb+1&bX5tP)pFcW1()CqUkQy0<nhH5h(9rfGH+`l^~t^x>*`*=DIGdH73=^uGPm89
zwe1dQ-b$_C@B+87aWM3&3g1xcCziBD_yKPvZWPL&>T1a+d&#bHSa<lM><i@y?jt>R
zQXBJ<{d!=i%9$lqBePnLfLF&ToC=0oq<b|z*EOViYpk(S<8{zvZq!c<O>7z3UJLdP
zEmeqR=N5gS*OC+(M=>7MNfS%jM!jT$=6we{O{vN^HNxevuvd*@Q{bm*jV<*wY{B!m
zLj%OVqz3<g(4g?7PdcWjKyAtid2XO7)YV$j&}t?69DT6_*NL%V(pp%al9&azZ#d9v
z75Q}6DGqizTMntmmA)!8wc44kmSdWuBgeayVj}xI>2xwj7;leykc<VnhslBi<_F{C
z5zHU4A9U&D)QE9BiU1<M?TMbbT5lZnNAUv)5)a5AN*J4Tpvkb&O=!wt{vC3CcmAtA
zNx+b=O?1FY56JrPA!1hpz4uvcN9$$~tkW0L>ga{DN?%2MHV<EK@&W(rHH03)U$=h%
DzpCD7

literal 0
HcmV?d00001

diff --git a/src/common/types/timestamp_t.cpp b/src/common/types/timestamp_t.cpp
index 9b63b71a58..99036ed7f2 100644
--- a/src/common/types/timestamp_t.cpp
+++ b/src/common/types/timestamp_t.cpp
@@ -256,12 +256,20 @@ void Timestamp::convert(timestamp_t timestamp, date_t& out_date, dtime_t& out_ti
     out_time = getTime(timestamp);
 }
 
-timestamp_t Timestamp::fromEpochMs(int64_t epochMs) {
-    return timestamp_t(epochMs * Interval::MICROS_PER_MSEC);
+timestamp_t Timestamp::fromEpochMicroSeconds(int64_t micros) {
+    return timestamp_t(micros);
 }
 
-timestamp_t Timestamp::fromEpochSec(int64_t epochSec) {
-    return timestamp_t(epochSec * Interval::MICROS_PER_SEC);
+timestamp_t Timestamp::fromEpochMilliSeconds(int64_t ms) {
+    return fromEpochMicroSeconds(ms * Interval::MICROS_PER_MSEC);
+}
+
+timestamp_t Timestamp::fromEpochSeconds(int64_t sec) {
+    return fromEpochMicroSeconds(sec * Interval::MICROS_PER_SEC);
+}
+
+timestamp_t Timestamp::fromEpochNanoSeconds(int64_t ns) {
+    return fromEpochMicroSeconds(ns / 1000);
 }
 
 int32_t Timestamp::getTimestampPart(DatePartSpecifier specifier, timestamp_t& timestamp) {
diff --git a/src/include/common/types/timestamp_t.h b/src/include/common/types/timestamp_t.h
index dd42f329c1..2a6dfe0803 100644
--- a/src/include/common/types/timestamp_t.h
+++ b/src/include/common/types/timestamp_t.h
@@ -69,10 +69,16 @@ class Timestamp {
     KUZU_API static void convert(timestamp_t timestamp, date_t& out_date, dtime_t& out_time);
 
     // Create a Timestamp object from the specified epochMs.
-    KUZU_API static timestamp_t fromEpochMs(int64_t epochMs);
+    KUZU_API static timestamp_t fromEpochMicroSeconds(int64_t epochMs);
+
+    // Create a Timestamp object from the specified epochMs.
+    KUZU_API static timestamp_t fromEpochMilliSeconds(int64_t ms);
 
     // Create a Timestamp object from the specified epochSec.
-    KUZU_API static timestamp_t fromEpochSec(int64_t epochSec);
+    KUZU_API static timestamp_t fromEpochSeconds(int64_t sec);
+
+    // Create a Timestamp object from the specified epochNs.
+    KUZU_API static timestamp_t fromEpochNanoSeconds(int64_t ns);
 
     KUZU_API static int32_t getTimestampPart(DatePartSpecifier specifier, timestamp_t& timestamp);
 
diff --git a/src/include/function/timestamp/timestamp_function.h b/src/include/function/timestamp/timestamp_function.h
index 6340043d6a..e82427435c 100644
--- a/src/include/function/timestamp/timestamp_function.h
+++ b/src/include/function/timestamp/timestamp_function.h
@@ -14,13 +14,13 @@ struct Century {
 
 struct EpochMs {
     static inline void operation(int64_t& ms, common::timestamp_t& result) {
-        result = common::Timestamp::fromEpochMs(ms);
+        result = common::Timestamp::fromEpochMilliSeconds(ms);
     }
 };
 
 struct ToTimestamp {
     static inline void operation(int64_t& sec, common::timestamp_t& result) {
-        result = common::Timestamp::fromEpochSec(sec);
+        result = common::Timestamp::fromEpochSeconds(sec);
     }
 };
 
diff --git a/src/include/processor/operator/persistent/reader/parquet/column_reader.h b/src/include/processor/operator/persistent/reader/parquet/column_reader.h
index 364ee3349f..65c865e97c 100644
--- a/src/include/processor/operator/persistent/reader/parquet/column_reader.h
+++ b/src/include/processor/operator/persistent/reader/parquet/column_reader.h
@@ -80,6 +80,12 @@ class ColumnReader {
         }
     }
 
+private:
+    static std::unique_ptr<ColumnReader> createTimestampReader(ParquetReader& reader,
+        std::unique_ptr<common::LogicalType> type,
+        const kuzu_parquet::format::SchemaElement& schema, uint64_t fileIdx, uint64_t maxDefine,
+        uint64_t maxRepeat);
+
 protected:
     const kuzu_parquet::format::SchemaElement& schema;
 
diff --git a/src/include/processor/operator/persistent/reader/parquet/parquet_timestamp.h b/src/include/processor/operator/persistent/reader/parquet/parquet_timestamp.h
new file mode 100644
index 0000000000..0da8d1600b
--- /dev/null
+++ b/src/include/processor/operator/persistent/reader/parquet/parquet_timestamp.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#include "common/types/timestamp_t.h"
+
+namespace kuzu {
+namespace processor {
+
+struct Int96 {
+    uint32_t value[3];
+};
+
+struct ParquetTimeStampUtils {
+    static constexpr int64_t JULIAN_TO_UNIX_EPOCH_DAYS = 2440588LL;
+    static constexpr int64_t MILLISECONDS_PER_DAY = 86400000LL;
+    static constexpr int64_t MICROSECONDS_PER_DAY = MILLISECONDS_PER_DAY * 1000LL;
+    static constexpr int64_t NANOSECONDS_PER_MICRO = 1000LL;
+
+    static common::timestamp_t impalaTimestampToTimestamp(const Int96& rawTS);
+    static common::timestamp_t parquetTimestampMicrosToTimestamp(const int64_t& rawTS);
+    static common::timestamp_t parquetTimestampMsToTimestamp(const int64_t& rawTS);
+    static common::timestamp_t parquetTimestampNsToTimestamp(const int64_t& rawTS);
+    static int64_t impalaTimestampToMicroseconds(const Int96& impalaTimestamp);
+    static common::date_t parquetIntToDate(const int32_t& raw_date);
+};
+
+} // namespace processor
+} // namespace kuzu
diff --git a/src/processor/operator/persistent/reader/parquet/CMakeLists.txt b/src/processor/operator/persistent/reader/parquet/CMakeLists.txt
index 5756b577d9..e2d58f119c 100644
--- a/src/processor/operator/persistent/reader/parquet/CMakeLists.txt
+++ b/src/processor/operator/persistent/reader/parquet/CMakeLists.txt
@@ -6,7 +6,8 @@ add_library(kuzu_processor_operator_parquet_reader
         interval_column_reader.cpp
         struct_column_reader.cpp
         string_column_reader.cpp
-        list_column_reader.cpp)
+        list_column_reader.cpp
+        parquet_timestamp.cpp)
 
 set(ALL_OBJECT_FILES
         ${ALL_OBJECT_FILES} $<TARGET_OBJECTS:kuzu_processor_operator_parquet_reader>
diff --git a/src/processor/operator/persistent/reader/parquet/column_reader.cpp b/src/processor/operator/persistent/reader/parquet/column_reader.cpp
index b23f6fcb1c..13aafc099d 100644
--- a/src/processor/operator/persistent/reader/parquet/column_reader.cpp
+++ b/src/processor/operator/persistent/reader/parquet/column_reader.cpp
@@ -6,6 +6,7 @@
 #include "processor/operator/persistent/reader/parquet/boolean_column_reader.h"
 #include "processor/operator/persistent/reader/parquet/callback_column_reader.h"
 #include "processor/operator/persistent/reader/parquet/interval_column_reader.h"
+#include "processor/operator/persistent/reader/parquet/parquet_timestamp.h"
 #include "processor/operator/persistent/reader/parquet/string_column_reader.h"
 #include "processor/operator/persistent/reader/parquet/templated_column_reader.h"
 #include "snappy/snappy.h"
@@ -20,10 +21,6 @@ using kuzu_parquet::format::Encoding;
 using kuzu_parquet::format::PageType;
 using kuzu_parquet::format::Type;
 
-static common::date_t ParquetIntToDate(const int32_t& raw_date) {
-    return common::date_t(raw_date);
-}
-
 ColumnReader::ColumnReader(ParquetReader& reader, std::unique_ptr<common::LogicalType> type,
     const kuzu_parquet::format::SchemaElement& schema, uint64_t fileIdx, uint64_t maxDefinition,
     uint64_t maxRepeat)
@@ -239,16 +236,23 @@ std::unique_ptr<ColumnReader> ColumnReader::createReader(ParquetReader& reader,
             TemplatedColumnReader<double, TemplatedParquetValueConversion<double>>>(
             reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat);
     case common::LogicalTypeID::DATE:
-        return std::make_unique<CallbackColumnReader<int32_t, common::date_t, ParquetIntToDate>>(
+        return std::make_unique<
+            CallbackColumnReader<int32_t, common::date_t, ParquetTimeStampUtils::parquetIntToDate>>(
             reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat);
+    case common::LogicalTypeID::BLOB:
     case common::LogicalTypeID::STRING:
         return std::make_unique<StringColumnReader>(
             reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat);
     case common::LogicalTypeID::INTERVAL:
         return std::make_unique<IntervalColumnReader>(
             reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat);
+    case common::LogicalTypeID::TIMESTAMP:
+        return createTimestampReader(
+            reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat);
+        // LCOV_EXCL_START
     default:
         throw common::NotImplementedException{"ColumnReader::createReader"};
+        // LCOV_EXCL_STOP
     }
 }
 
@@ -479,6 +483,54 @@ uint64_t ColumnReader::getTotalCompressedSize() {
     return chunk->meta_data.total_compressed_size;
 }
 
+std::unique_ptr<ColumnReader> ColumnReader::createTimestampReader(ParquetReader& reader,
+    std::unique_ptr<common::LogicalType> type, const kuzu_parquet::format::SchemaElement& schema,
+    uint64_t fileIdx, uint64_t maxDefine, uint64_t maxRepeat) {
+    switch (schema.type) {
+    case Type::INT96: {
+        return std::make_unique<CallbackColumnReader<Int96, common::timestamp_t,
+            ParquetTimeStampUtils::impalaTimestampToTimestamp>>(
+            reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat);
+    }
+    case Type::INT64: {
+        if (schema.__isset.logicalType && schema.logicalType.__isset.TIMESTAMP) {
+            if (schema.logicalType.TIMESTAMP.unit.__isset.MILLIS) {
+                return std::make_unique<CallbackColumnReader<int64_t, common::timestamp_t,
+                    ParquetTimeStampUtils::parquetTimestampMsToTimestamp>>(
+                    reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat);
+            } else if (schema.logicalType.TIMESTAMP.unit.__isset.MICROS) {
+                return std::make_unique<CallbackColumnReader<int64_t, common::timestamp_t,
+                    ParquetTimeStampUtils::parquetTimestampMicrosToTimestamp>>(
+                    reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat);
+            } else if (schema.logicalType.TIMESTAMP.unit.__isset.NANOS) {
+                return std::make_unique<CallbackColumnReader<int64_t, common::timestamp_t,
+                    ParquetTimeStampUtils::parquetTimestampNsToTimestamp>>(
+                    reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat);
+            }
+            // LCOV_EXCL_START
+        } else if (schema.__isset.converted_type) {
+            // For legacy compatibility.
+            switch (schema.converted_type) {
+            case ConvertedType::TIMESTAMP_MICROS:
+                return std::make_unique<CallbackColumnReader<int64_t, common::timestamp_t,
+                    ParquetTimeStampUtils::parquetTimestampMicrosToTimestamp>>(
+                    reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat);
+            case ConvertedType::TIMESTAMP_MILLIS:
+                return std::make_unique<CallbackColumnReader<int64_t, common::timestamp_t,
+                    ParquetTimeStampUtils::parquetTimestampMsToTimestamp>>(
+                    reader, std::move(type), schema, fileIdx, maxDefine, maxRepeat);
+            default:
+                throw common::NotImplementedException{"ColumnReader::createReader"};
+            }
+            // LCOV_EXCL_STOP
+        }
+    }
+    default: { // LCOV_EXCL_START
+        throw common::NotImplementedException{"ColumnReader::createReader"};
+    } // LCOV_EXCL_STOP
+    }
+}
+
 const uint64_t ParquetDecodeUtils::BITPACK_MASKS[] = {0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023,
     2047, 4095, 8191, 16383, 32767, 65535, 131071, 262143, 524287, 1048575, 2097151, 4194303,
     8388607, 16777215, 33554431, 67108863, 134217727, 268435455, 536870911, 1073741823, 2147483647,
diff --git a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp
index 66104d7bae..f49c80a579 100644
--- a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp
+++ b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp
@@ -461,6 +461,16 @@ std::unique_ptr<common::LogicalType> ParquetReader::deriveLogicalType(
                     "DATE converted type can only be set for value of Type::INT32"};
                 // LCOV_EXCL_STOP
             }
+        case ConvertedType::TIMESTAMP_MICROS:
+        case ConvertedType::TIMESTAMP_MILLIS:
+            if (s_ele.type == Type::INT64) {
+                return std::make_unique<common::LogicalType>(common::LogicalTypeID::TIMESTAMP);
+            } else {
+                // LCOV_EXCL_START
+                throw common::CopyException(
+                    "TIMESTAMP converted type can only be set for value of Type::INT64");
+                // LCOV_EXCL_STOP
+            }
         case ConvertedType::INTERVAL: {
             return std::make_unique<common::LogicalType>(common::LogicalTypeID::INTERVAL);
         }
@@ -490,13 +500,16 @@ std::unique_ptr<common::LogicalType> ParquetReader::deriveLogicalType(
             return std::make_unique<common::LogicalType>(common::LogicalTypeID::INT32);
         case Type::INT64:
             return std::make_unique<common::LogicalType>(common::LogicalTypeID::INT64);
+        case Type::INT96:
+            return std::make_unique<common::LogicalType>(common::LogicalTypeID::TIMESTAMP);
         case Type::FLOAT:
             return std::make_unique<common::LogicalType>(common::LogicalTypeID::FLOAT);
         case Type::DOUBLE:
             return std::make_unique<common::LogicalType>(common::LogicalTypeID::DOUBLE);
         case Type::BYTE_ARRAY:
         case Type::FIXED_LEN_BYTE_ARRAY:
-            return std::make_unique<common::LogicalType>(common::LogicalTypeID::STRING);
+            // TODO(Ziyi): Support parquet copy option(binary_as_string).
+            return std::make_unique<common::LogicalType>(common::LogicalTypeID::BLOB);
         default:
             return std::make_unique<common::LogicalType>(common::LogicalTypeID::ANY);
         }
diff --git a/src/processor/operator/persistent/reader/parquet/parquet_timestamp.cpp b/src/processor/operator/persistent/reader/parquet/parquet_timestamp.cpp
new file mode 100644
index 0000000000..64959cdc16
--- /dev/null
+++ b/src/processor/operator/persistent/reader/parquet/parquet_timestamp.cpp
@@ -0,0 +1,35 @@
+#include "processor/operator/persistent/reader/parquet/parquet_timestamp.h"
+
+namespace kuzu {
+namespace processor {
+
+common::timestamp_t ParquetTimeStampUtils::impalaTimestampToTimestamp(const Int96& rawTS) {
+    auto impalaUS = impalaTimestampToMicroseconds(rawTS);
+    return common::Timestamp::fromEpochMicroSeconds(impalaUS);
+}
+
+common::timestamp_t ParquetTimeStampUtils::parquetTimestampMicrosToTimestamp(const int64_t& rawTS) {
+    return common::Timestamp::fromEpochMicroSeconds(rawTS);
+}
+
+common::timestamp_t ParquetTimeStampUtils::parquetTimestampMsToTimestamp(const int64_t& rawTS) {
+    return common::Timestamp::fromEpochMilliSeconds(rawTS);
+}
+
+common::timestamp_t ParquetTimeStampUtils::parquetTimestampNsToTimestamp(const int64_t& rawTS) {
+    return common::Timestamp::fromEpochNanoSeconds(rawTS);
+}
+
+int64_t ParquetTimeStampUtils::impalaTimestampToMicroseconds(const Int96& impalaTimestamp) {
+    int64_t daysSinceEpoch = impalaTimestamp.value[2] - JULIAN_TO_UNIX_EPOCH_DAYS;
+    auto nanoSeconds = *reinterpret_cast<const int64_t*>(impalaTimestamp.value);
+    auto microseconds = nanoSeconds / NANOSECONDS_PER_MICRO;
+    return daysSinceEpoch * MICROSECONDS_PER_DAY + microseconds;
+}
+
+common::date_t ParquetTimeStampUtils::parquetIntToDate(const int32_t& raw_date) {
+    return common::date_t(raw_date);
+}
+
+} // namespace processor
+} // namespace kuzu
diff --git a/src/processor/operator/persistent/writer/parquet/column_writer.cpp b/src/processor/operator/persistent/writer/parquet/column_writer.cpp
index 83a701fb73..ac7c2bdbdb 100644
--- a/src/processor/operator/persistent/writer/parquet/column_writer.cpp
+++ b/src/processor/operator/persistent/writer/parquet/column_writer.cpp
@@ -50,6 +50,7 @@ std::unique_ptr<ColumnWriter> ColumnWriter::createWriterRecursive(
     }
     auto schemaIdx = schemas.size();
     switch (type->getLogicalTypeID()) {
+    case LogicalTypeID::UNION:
     case LogicalTypeID::STRUCT: {
         auto fields = StructType::getFields(type);
         // set up the schema element for this struct
@@ -187,6 +188,7 @@ std::unique_ptr<ColumnWriter> ColumnWriter::createWriterRecursive(
             return std::make_unique<StandardColumnWriter<int32_t, int32_t>>(writer, schemaIdx,
                 std::move(schemaPathToCreate), maxRepeatToCreate, maxDefineToCreate,
                 canHaveNullsToCreate);
+        case LogicalTypeID::TIMESTAMP:
         case LogicalTypeID::INT64:
             return std::make_unique<StandardColumnWriter<int64_t, int64_t>>(writer, schemaIdx,
                 std::move(schemaPathToCreate), maxRepeatToCreate, maxDefineToCreate,
diff --git a/src/processor/operator/persistent/writer/parquet/parquet_writer.cpp b/src/processor/operator/persistent/writer/parquet/parquet_writer.cpp
index 851538f653..433ad7aebf 100644
--- a/src/processor/operator/persistent/writer/parquet/parquet_writer.cpp
+++ b/src/processor/operator/persistent/writer/parquet/parquet_writer.cpp
@@ -61,6 +61,7 @@ Type::type ParquetWriter::convertToParquetType(LogicalType* type) {
         return Type::INT32;
     case LogicalTypeID::UINT64:
     case LogicalTypeID::INT64:
+    case LogicalTypeID::TIMESTAMP:
         return Type::INT64;
     case LogicalTypeID::FLOAT:
         return Type::FLOAT;
@@ -125,6 +126,14 @@ void ParquetWriter::setSchemaProperties(LogicalType* type, SchemaElement& schema
         schemaElement.__isset.type_length = true;
         schemaElement.__isset.converted_type = true;
     } break;
+    case LogicalTypeID::TIMESTAMP: {
+        schemaElement.converted_type = ConvertedType::TIMESTAMP_MICROS;
+        schemaElement.__isset.converted_type = true;
+        schemaElement.__isset.logicalType = true;
+        schemaElement.logicalType.__isset.TIMESTAMP = true;
+        schemaElement.logicalType.TIMESTAMP.isAdjustedToUTC = false;
+        schemaElement.logicalType.TIMESTAMP.unit.__isset.MICROS = true;
+    } break;
     default:
         break;
     }
diff --git a/test/test_files/copy/copy_to_parquet.test b/test/test_files/copy/copy_to_parquet.test
index ee6b16313b..1764d22320 100644
--- a/test/test_files/copy/copy_to_parquet.test
+++ b/test/test_files/copy/copy_to_parquet.test
@@ -6,18 +6,18 @@
 -CASE TinySnbCopyToParquet
 
 -CASE CopyPersonToParquet
--STATEMENT COPY (MATCH (p:person) return p.ID, p.fName, p.gender, p.isStudent, p.isWorker, p.age, p.eyeSight, p.birthdate, p.lastJobDuration, p.workedHours, p.usedNames, p.courseScoresPerTerm, p.height) to '${DATABASE_PATH}/tinysnb.parquet';
+-STATEMENT COPY (MATCH (p:person) return p.ID, p.fName, p.gender, p.isStudent, p.isWorker, p.age, p.eyeSight, p.birthdate, p.registerTime, p.lastJobDuration, p.workedHours, p.usedNames, p.courseScoresPerTerm, p.height) to '${DATABASE_PATH}/tinysnb.parquet';
 ---- ok
 -STATEMENT LOAD FROM '${DATABASE_PATH}/tinysnb.parquet' RETURN *;
 ---- 8
-0|Alice|1|True|False|35|5.000000|1900-01-01|3 years 2 days 13:02:00|[10,5]|[Aida]|[[10,8],[6,7,8]]|1.731000
-2|Bob|2|True|False|30|5.100000|1900-01-01|10 years 5 months 13:00:00|[12,8]|[Bobby]|[[8,9],[9,10]]|0.990000
-3|Carol|1|False|True|45|5.000000|1940-06-22|48:24:11|[4,5]|[Carmen,Fred]|[[8,10]]|1.000000
-5|Dan|2|False|True|20|4.800000|1950-07-23|10 years 5 months 13:00:00|[1,9]|[Wolfeschlegelstein,Daniel]|[[7,4],[8,8],[9]]|1.300000
-7|Elizabeth|1|False|True|20|4.700000|1980-10-26|48:24:11|[2]|[Ein]|[[6],[7],[8]]|1.463000
-8|Farooq|2|True|False|25|4.500000|1980-10-26|00:18:00.024|[3,4,5,6,7]|[Fesdwe]|[[8]]|1.510000
-9|Greg|2|False|False|40|4.900000|1980-10-26|10 years 5 months 13:00:00|[1]|[Grad]|[[10]]|1.600000
-10|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|2|False|True|83|4.900000|1990-11-27|3 years 2 days 13:02:00|[10,11,12,3,4,5,6,7]|[Ad,De,Hi,Kye,Orlan]|[[7],[10],[6,7]]|1.323000
+0|Alice|1|True|False|35|5.000000|1900-01-01|2011-08-20 11:25:30|3 years 2 days 13:02:00|[10,5]|[Aida]|[[10,8],[6,7,8]]|1.731000
+2|Bob|2|True|False|30|5.100000|1900-01-01|2008-11-03 15:25:30.000526|10 years 5 months 13:00:00|[12,8]|[Bobby]|[[8,9],[9,10]]|0.990000
+3|Carol|1|False|True|45|5.000000|1940-06-22|1911-08-20 02:32:21|48:24:11|[4,5]|[Carmen,Fred]|[[8,10]]|1.000000
+5|Dan|2|False|True|20|4.800000|1950-07-23|2031-11-30 12:25:30|10 years 5 months 13:00:00|[1,9]|[Wolfeschlegelstein,Daniel]|[[7,4],[8,8],[9]]|1.300000
+7|Elizabeth|1|False|True|20|4.700000|1980-10-26|1976-12-23 11:21:42|48:24:11|[2]|[Ein]|[[6],[7],[8]]|1.463000
+8|Farooq|2|True|False|25|4.500000|1980-10-26|1972-07-31 13:22:30.678559|00:18:00.024|[3,4,5,6,7]|[Fesdwe]|[[8]]|1.510000
+9|Greg|2|False|False|40|4.900000|1980-10-26|1976-12-23 04:41:42|10 years 5 months 13:00:00|[1]|[Grad]|[[10]]|1.600000
+10|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|2|False|True|83|4.900000|1990-11-27|2023-02-21 13:25:30|3 years 2 days 13:02:00|[10,11,12,3,4,5,6,7]|[Ad,De,Hi,Kye,Orlan]|[[7],[10],[6,7]]|1.323000
 
 -LOG CopyOrganisationToParquet
 -STATEMENT COPY (MATCH (o:organisation) RETURN o.ID, o.state) TO "${DATABASE_PATH}/organisation.parquet"
@@ -29,13 +29,13 @@
 6|{revenue: 558, location: ['very long city name', 'new york'], stock: {price: [22], volume: 99}}
 
 -LOG CopyMoviesToParquet
--STATEMENT COPY (MATCH (m:movies) RETURN m.length, m.audience) TO "${DATABASE_PATH}/movies.parquet"
+-STATEMENT COPY (MATCH (m:movies) RETURN m.*) TO "${DATABASE_PATH}/movies.parquet"
 ---- ok
 -STATEMENT LOAD FROM "${DATABASE_PATH}/movies.parquet" RETURN *;
 ---- 3
-126|{audience1=52, audience53=42}
-2544|{audience1=33}
-298|{}
+Sóló cón tu párejâ|126| this is a very very good movie|{rating: 5.300000, stars: 2, views: 152, release: 2011-08-20 11:25:30, film: 2012-05-11, u8: 220, u16: 20, u32: 1, u64: 180, hugedata: 1844674407370955161600000000.000000}|\xAA\xABinteresting\x0B|{audience1=52, audience53=42}|{tag: 0, credit: True, grade1: 0.000000, grade2: 0}
+The 😂😃🧘🏻‍♂️🌍🌦️🍞🚗 movie|2544| the movie is very very good|{rating: 7.000000, stars: 10, views: 982, release: 2018-11-13 13:33:11, film: 2014-09-12, u8: 12, u16: 120, u32: 55, u64: 1, hugedata: -1844674407370955161600.000000}|\xAB\xCD|{audience1=33}|{tag: 1, credit: , grade1: 8.989000, grade2: 0}
+Roma|298|the movie is very interesting and funny|{rating: 1223.000000, stars: 100, views: 10003, release: 2011-02-11 16:44:22, film: 2013-02-22, u8: 1, u16: 15, u32: 200, u64: 4, hugedata: -15.000000}|pure ascii characters|{}|{tag: 1, credit: , grade1: 254.000000, grade2: 0}
 
 -LOG CopyStudyAtToParquet
 -STATEMENT COPY (match (:person)-[s:studyAt]->(:organisation) return s.*) to "${DATABASE_PATH}/studyAt.parquet"
diff --git a/test/test_files/reader/parquet/timestamp.test b/test/test_files/reader/parquet/timestamp.test
new file mode 100644
index 0000000000..4400eef64b
--- /dev/null
+++ b/test/test_files/reader/parquet/timestamp.test
@@ -0,0 +1,18 @@
+-GROUP ParquetTimestampTest
+-DATASET CSV empty
+
+--
+
+-CASE impalaTimestamp
+-STATEMENT LOAD FROM '${KUZU_ROOT_DIRECTORY}/dataset/reader/parquet/timestamp/impala_timestamp.parquet' RETURN *
+---- 3
+2023-11-02 12:00:00
+1949-10-01 13:30:45
+1965-12-30 15:15:20
+
+-CASE timestampMSNS
+-STATEMENT LOAD FROM '${KUZU_ROOT_DIRECTORY}/dataset/reader/parquet/timestamp/timestamp_ms_ns.parquet' RETURN *
+---- 3
+2021-11-02 20:33:20|2012-05-01 15:13:24
+2021-11-02 21:20:00|2021-09-30 07:46:40
+2021-11-02 22:20:00|2016-08-15 20:06:40
diff --git a/tools/nodejs_api/src_cpp/node_util.cpp b/tools/nodejs_api/src_cpp/node_util.cpp
index 85a7320201..e90a901c5b 100644
--- a/tools/nodejs_api/src_cpp/node_util.cpp
+++ b/tools/nodejs_api/src_cpp/node_util.cpp
@@ -240,7 +240,7 @@ Value Util::TransformNapiValue(
             throw Exception("Expected a date for parameter " + key + ".");
         }
         auto napiDate = napiValue.As<Napi::Date>();
-        timestamp_t timestamp = Timestamp::fromEpochMs(napiDate.ValueOf());
+        timestamp_t timestamp = Timestamp::fromEpochMilliSeconds(napiDate.ValueOf());
         auto dateVal = Timestamp::getDate(timestamp);
         return Value(dateVal);
     }
@@ -249,7 +249,7 @@ Value Util::TransformNapiValue(
             throw Exception("Expected a date for parameter " + key + ".");
         }
         auto napiDate = napiValue.As<Napi::Date>();
-        timestamp_t timestamp = Timestamp::fromEpochMs(napiDate.ValueOf());
+        timestamp_t timestamp = Timestamp::fromEpochMilliSeconds(napiDate.ValueOf());
         return Value(timestamp);
     }
     case LogicalTypeID::INTERVAL: {