From a3214443a0124ce2a1783ab6dbd7012c9584d2cc Mon Sep 17 00:00:00 2001
From: Manh Dinh <mdinh@uwaterloo.ca>
Date: Thu, 29 Feb 2024 13:51:29 -0500
Subject: [PATCH] Rework CSV_TO_PARQUET testing feature

---
 dataset/long-string-pk-tests/schema.cypher    |   2 +-
 src/common/file_system/file_system.cpp        |   4 +
 src/common/file_system/local_file_system.cpp  |  12 ++
 src/include/common/file_system/file_system.h  |   2 +
 .../common/file_system/local_file_system.h    |   2 +
 .../test_runner/csv_to_parquet_converter.h    |  64 +++---
 test/runner/e2e_test.cpp                      |  23 ++-
 .../copy/copy_pk_long_string_parquet.test     |   1 -
 ...isabled => interactive-short-parquet.test} |  32 +--
 ...uet.disabled => lsqb_queries_parquet.test} |   1 -
 .../test_files/order_by/order_by_parquet.test |   1 -
 .../read_list/large_adj_list_parquet.test     |   1 -
 .../shortest_path/bfs_sssp_parquet.test       |  59 +++---
 .../tinysnb/parquet/tinysnb_parquet.test      |   6 +-
 test/test_runner/CMakeLists.txt               |   1 +
 test/test_runner/csv_to_parquet_converter.cpp | 191 ++++++++++++++++++
 16 files changed, 316 insertions(+), 86 deletions(-)
 rename test/test_files/ldbc/ldbc-interactive/{interactive-short-parquet.disabled => interactive-short-parquet.test} (69%)
 rename test/test_files/lsqb/{lsqb_queries_parquet.disabled => lsqb_queries_parquet.test} (99%)
 create mode 100644 test/test_runner/csv_to_parquet_converter.cpp

diff --git a/dataset/long-string-pk-tests/schema.cypher b/dataset/long-string-pk-tests/schema.cypher
index c2d3476cb8..014a774e00 100644
--- a/dataset/long-string-pk-tests/schema.cypher
+++ b/dataset/long-string-pk-tests/schema.cypher
@@ -1,2 +1,2 @@
-CREATE NODE TABLE Person(name STRING, spouse STRING, PRIMARY KEY (name))
+CREATE NODE TABLE Person (name STRING, spouse STRING, PRIMARY KEY (name))
 create REL TABLE Knows (FROM Person TO Person);
diff --git a/src/common/file_system/file_system.cpp b/src/common/file_system/file_system.cpp
index 6bdca6dd63..bc595b94d8 100644
--- a/src/common/file_system/file_system.cpp
+++ b/src/common/file_system/file_system.cpp
@@ -7,6 +7,10 @@ void FileSystem::overwriteFile(const std::string& /*from*/, const std::string& /
     KU_UNREACHABLE;
 }
 
+void FileSystem::copyFile(const std::string& /*from*/, const std::string& /*to*/) const {
+    KU_UNREACHABLE;
+}
+
 void FileSystem::createDir(const std::string& /*dir*/) const {
     KU_UNREACHABLE;
 }
diff --git a/src/common/file_system/local_file_system.cpp b/src/common/file_system/local_file_system.cpp
index 1f91d006b0..71ff32ff9a 100644
--- a/src/common/file_system/local_file_system.cpp
+++ b/src/common/file_system/local_file_system.cpp
@@ -147,6 +147,18 @@ void LocalFileSystem::overwriteFile(const std::string& from, const std::string&
     }
 }
 
+void LocalFileSystem::copyFile(const std::string& from, const std::string& to) const {
+    if (!fileOrPathExists(from))
+        return;
+    std::error_code errorCode;
+    if (!std::filesystem::copy_file(from, to, std::filesystem::copy_options::none, errorCode)) {
+        // LCOV_EXCL_START
+        throw Exception(stringFormat(
+            "Error copying file {} to {}.  ErrorMessage: {}", from, to, errorCode.message()));
+        // LCOV_EXCL_STOP
+    }
+}
+
 void LocalFileSystem::createDir(const std::string& dir) const {
     try {
         if (std::filesystem::exists(dir)) {
diff --git a/src/include/common/file_system/file_system.h b/src/include/common/file_system/file_system.h
index dce4f2c0db..8e7860a134 100644
--- a/src/include/common/file_system/file_system.h
+++ b/src/include/common/file_system/file_system.h
@@ -29,6 +29,8 @@ class KUZU_API FileSystem {
 
     virtual void overwriteFile(const std::string& from, const std::string& to) const;
 
+    virtual void copyFile(const std::string& from, const std::string& to) const;
+
     virtual void createDir(const std::string& dir) const;
 
     virtual void removeFileIfExists(const std::string& path) const;
diff --git a/src/include/common/file_system/local_file_system.h b/src/include/common/file_system/local_file_system.h
index caad85db2b..c29f7b577d 100644
--- a/src/include/common/file_system/local_file_system.h
+++ b/src/include/common/file_system/local_file_system.h
@@ -36,6 +36,8 @@ class LocalFileSystem final : public FileSystem {
 
     void overwriteFile(const std::string& from, const std::string& to) const override;
 
+    void copyFile(const std::string& from, const std::string& to) const override;
+
     void createDir(const std::string& dir) const override;
 
     void removeFileIfExists(const std::string& path) const override;
diff --git a/test/include/test_runner/csv_to_parquet_converter.h b/test/include/test_runner/csv_to_parquet_converter.h
index 9a2fe67114..54b9d80551 100644
--- a/test/include/test_runner/csv_to_parquet_converter.h
+++ b/test/include/test_runner/csv_to_parquet_converter.h
@@ -2,6 +2,8 @@
 #include <string>
 #include <vector>
 
+#include "main/kuzu.h"
+
 namespace kuzu {
 namespace testing {
 
@@ -9,41 +11,51 @@ namespace testing {
 // The dataset directory must contain schema and copy files.
 class CSVToParquetConverter {
 public:
-    static std::string convertCSVDatasetToParquet(
-        const std::string& csvDatasetPath, const std::string& parquetDatasetPath);
+    explicit CSVToParquetConverter(
+        std::string csvDatasetPath, std::string parquetDatasetPath, uint64_t bufferPoolSize)
+        : csvDatasetPath{csvDatasetPath}, parquetDatasetPath{parquetDatasetPath},
+          bufferPoolSize{bufferPoolSize} {}
+
+    void convertCSVDatasetToParquet();
 
-    inline static std::string replaceSlashesWithUnderscores(std::string dataset) {
-        std::replace(dataset.begin(), dataset.end(), '/', '_');
-        return dataset;
-    }
+private:
+    void copySchemaFile();
+    void createTableInfo(std::string schemaFile);
+    void readCopyCommandsFromCSVDataset();
+    void createCopyFile();
+    void convertCSVFilesToParquet();
 
 private:
-    struct CopyCommandInfo {
-        std::string table;
+    struct TableInfo {
+        std::string name;
         std::string csvFilePath;
         std::string parquetFilePath;
-        bool csvHasHeader;
-        char delimiter;
+        // get cypher query to convert csv file to parquet file
+        virtual std::string getConverterQuery() const = 0;
     };
 
-    static std::vector<CopyCommandInfo> readCopyCommandsFromCopyCypherFile(
-        const std::string& csvDatasetPath, const std::string& parquetDatasetPath);
-
-    static void convertCSVFilesToParquet(
-        const std::vector<CSVToParquetConverter::CopyCommandInfo>& copyCommands);
-
-    static CopyCommandInfo createCopyCommandInfo(
-        const std::string& parquetDatasetPath, std::string copyStatement);
-
-    // TODO: This has to be re-implemented to not rely on arrow, instead of on our own parquet lib.
-    //    static arrow::Status runCSVToParquetConversion(const std::string& inputFile,
-    //        const std::string& outputFile, char delimiter, bool hasHeader);
+    struct NodeTableInfo final : public TableInfo {
+        std::string primaryKey;
+        // get converter query for node table
+        std::string getConverterQuery() const override;
+    };
 
-    static void copySchema(
-        const std::string& csvDatasetPath, const std::string& parquetDatasetPath);
+    struct RelTableInfo final : public TableInfo {
+        std::shared_ptr<NodeTableInfo> fromTable;
+        std::shared_ptr<NodeTableInfo> toTable;
+        // get converter query for rel table
+        std::string getConverterQuery() const override;
+    };
 
-    static void createCopyFile(const std::string& parquetDatasetPath,
-        const std::vector<CSVToParquetConverter::CopyCommandInfo>& copyCommands);
+private:
+    std::string csvDatasetPath;
+    std::string parquetDatasetPath;
+    std::vector<std::shared_ptr<TableInfo>> tables;
+    std::unordered_map<std::string, std::shared_ptr<TableInfo>> tableNameMap;
+    uint64_t bufferPoolSize;
+    std::unique_ptr<main::SystemConfig> systemConfig;
+    std::unique_ptr<main::Database> tempDb;
+    std::unique_ptr<main::Connection> tempConn;
 };
 
 } // namespace testing
diff --git a/test/runner/e2e_test.cpp b/test/runner/e2e_test.cpp
index 4b397a2287..3cf9c9d0ac 100644
--- a/test/runner/e2e_test.cpp
+++ b/test/runner/e2e_test.cpp
@@ -30,17 +30,23 @@ class EndToEndTest : public DBTest {
     }
 
     void setUpDataset() {
-        parquetTempDatasetPath = generateParquetTempDatasetPath();
-        dataset = TestHelper::appendKuzuRootPath("dataset/" + dataset);
         if (datasetType == TestGroup::DatasetType::CSV_TO_PARQUET) {
-            throw NotImplementedException("CSV_TO_PARQUET dataset type is not implemented yet.");
+            auto csvDatasetPath = TestHelper::appendKuzuRootPath("dataset/" + dataset);
+            parquetTempDatasetPath = generateParquetTempDatasetPath();
+            CSVToParquetConverter converter(csvDatasetPath, parquetTempDatasetPath, bufferPoolSize);
+            converter.convertCSVDatasetToParquet();
+            dataset = parquetTempDatasetPath;
+        } else {
+            dataset = TestHelper::appendKuzuRootPath("dataset/" + dataset);
         }
     }
 
     void TearDown() override {
         std::filesystem::remove_all(databasePath);
-        std::filesystem::remove_all(parquetTempDatasetPath);
         BaseGraphTest::removeIEDBPath();
+        if (datasetType == TestGroup::DatasetType::CSV_TO_PARQUET) {
+            std::filesystem::remove_all(parquetTempDatasetPath);
+        }
     }
 
     void TestBody() override { runTest(testStatements, checkpointWaitTimeout, connNames); }
@@ -56,10 +62,11 @@ class EndToEndTest : public DBTest {
     std::set<std::string> connNames;
 
     std::string generateParquetTempDatasetPath() {
-        return TestHelper::appendKuzuRootPath(
-            TestHelper::PARQUET_TEMP_DATASET_PATH +
-            CSVToParquetConverter::replaceSlashesWithUnderscores(dataset) + getTestGroupAndName() +
-            TestHelper::getMillisecondsSuffix());
+        std::string datasetName = dataset;
+        std::replace(datasetName.begin(), datasetName.end(), '/', '_');
+        return TestHelper::appendKuzuRootPath(TestHelper::PARQUET_TEMP_DATASET_PATH + datasetName +
+                                              "_" + getTestGroupAndName() + "_" +
+                                              TestHelper::getMillisecondsSuffix());
     }
 };
 
diff --git a/test/test_files/copy/copy_pk_long_string_parquet.test b/test/test_files/copy/copy_pk_long_string_parquet.test
index 0a34d037ff..c70f0e7058 100644
--- a/test/test_files/copy/copy_pk_long_string_parquet.test
+++ b/test/test_files/copy/copy_pk_long_string_parquet.test
@@ -1,6 +1,5 @@
 -GROUP LongStringPKTest
 -DATASET PARQUET CSV_TO_PARQUET(long-string-pk-tests)
--SKIP
 
 --
 
diff --git a/test/test_files/ldbc/ldbc-interactive/interactive-short-parquet.disabled b/test/test_files/ldbc/ldbc-interactive/interactive-short-parquet.test
similarity index 69%
rename from test/test_files/ldbc/ldbc-interactive/interactive-short-parquet.disabled
rename to test/test_files/ldbc/ldbc-interactive/interactive-short-parquet.test
index d89129ac1b..1a2cafdfa1 100644
--- a/test/test_files/ldbc/ldbc-interactive/interactive-short-parquet.disabled
+++ b/test/test_files/ldbc/ldbc-interactive/interactive-short-parquet.test
@@ -1,9 +1,7 @@
-# FIXME: this test is failing on Parquet dataset 
 -GROUP LDBCTest
 -DATASET PARQUET CSV_TO_PARQUET(ldbc-sf01)
 -BUFFER_POOL_SIZE 1073741824
 -SKIP
-
 --
 
 -CASE LDBCInteractiveShortParquet
@@ -22,6 +20,20 @@
 ---- 1
 Mahinda|Perera|19891203|119.235.7.103|Firefox|1353|male|20100214153210447
 
+# IS2 should be changed to use Kleene Star relationship once that is implemented.
+# The 'Case When' statement should be supported as coalesce().
+-LOG IS2
+-STATEMENT MATCH (:Person {id: 21990232555803})<-[:Post_hasCreator|:Comment_hasCreator]-(message)
+       WITH message,message.id AS messageId, message.creationDate AS messageCreationDate
+       ORDER BY messageCreationDate DESC, messageId ASC LIMIT 10
+       MATCH (message)-[:replyOf_Post|:replyOf_Comment*1..2]->(post:Post), (post)-[:Post_hasCreator]->(person)
+       RETURN messageId, CASE WHEN message.imageFile is NULL THEN message.content ELSE message.imageFile END AS messageContent, messageCreationDate, post.id AS postId, person.id AS personId, person.firstName AS personFirstName, person.lastName AS personLastName
+       ORDER BY messageCreationDate DESC, messageId ASC;
+---- 3
+1030792343617|About H•A•M, d Kanye West, released as the first single from their collaborati|20120823142307823|1030792343610|21990232556463|Victor|Antonescu
+1030792343876|thx|20120818110412997|1030792343869|21990232555803|Carlos|Lopez
+1030792343986|fine|20120810030544084|1030792343978|21990232556837|Bing|Li
+
 -LOG IS3
 -STATEMENT MATCH (n:Person {id: 1129})-[r:knows]-(friend)
        RETURN friend.id AS personId,
@@ -58,17 +70,11 @@ Mahinda|Perera|19891203|119.235.7.103|Firefox|1353|male|20100214153210447
 2199023256077|Ibrahim Bare|Ousmane
 
 # IS6 should be changed to use Kleene Star relationship once that is implemented
-# This query is currently commented out, but will work as soon as multilabelled recursive rels are merged to master.
-# -LOG IS6
-# -STATEMENT MATCH (m:Comment {id: 962072675825 })-[:replyOf_Post|:replyOf_Comment*1..2]->(p:Post)<-[:containerOf]-(f:Forum)-[:hasModerator]->(mod:Person)
-#       RETURN
-#           f.id AS forumId,
-#           f.title AS forumTitle,
-#           mod.id AS moderatorId,
-#           mod.firstName AS moderatorFirstName,
-#           mod.lastName AS moderatorLastName;
-# ---- 1
-# 687194767491|Wall of Faisal Malik|21990232556585|Faisal|Malik
+-LOG IS6
+-STATEMENT MATCH (m:Comment {id: 962072675825 })-[:replyOf_Post|:replyOf_Comment*1..2]->(p:Post)<-[:containerOf]-(f:Forum)-[:hasModerator]->(mod:Person)
+       RETURN f.id AS forumId,f.title AS forumTitle,mod.id AS moderatorId,mod.firstName AS moderatorFirstName, mod.lastName AS moderatorLastName;
+---- 1
+687194767491|Wall of Faisal Malik|21990232556585|Faisal|Malik
 
 -LOG IS7
 -STATEMENT MATCH (m:Post:Comment {id: 962072971887})<-[:replyOf_Comment|:replyOf_Post]-(c:Comment)-[:Comment_hasCreator]->(p:Person)
diff --git a/test/test_files/lsqb/lsqb_queries_parquet.disabled b/test/test_files/lsqb/lsqb_queries_parquet.test
similarity index 99%
rename from test/test_files/lsqb/lsqb_queries_parquet.disabled
rename to test/test_files/lsqb/lsqb_queries_parquet.test
index 52497b65ed..1694656414 100644
--- a/test/test_files/lsqb/lsqb_queries_parquet.disabled
+++ b/test/test_files/lsqb/lsqb_queries_parquet.test
@@ -2,7 +2,6 @@
 -DATASET PARQUET CSV_TO_PARQUET(lsqb-sf01)
 -BUFFER_POOL_SIZE 1073741824
 -SKIP
-
 --
 
 -CASE LSQBTestParquet
diff --git a/test/test_files/order_by/order_by_parquet.test b/test/test_files/order_by/order_by_parquet.test
index 7e59732e50..6b587d3c68 100644
--- a/test/test_files/order_by/order_by_parquet.test
+++ b/test/test_files/order_by/order_by_parquet.test
@@ -10,7 +10,6 @@
 
 -GROUP OrderByTests
 -DATASET PARQUET CSV_TO_PARQUET(order-by-tests)
--SKIP
 
 --
 
diff --git a/test/test_files/read_list/large_adj_list_parquet.test b/test/test_files/read_list/large_adj_list_parquet.test
index 5012aad215..629908f427 100644
--- a/test/test_files/read_list/large_adj_list_parquet.test
+++ b/test/test_files/read_list/large_adj_list_parquet.test
@@ -1,6 +1,5 @@
 -GROUP EndToEndReadLargeListsTest
 -DATASET PARQUET CSV_TO_PARQUET(read-list-tests/large-list)
--SKIP
 
 --
 
diff --git a/test/test_files/shortest_path/bfs_sssp_parquet.test b/test/test_files/shortest_path/bfs_sssp_parquet.test
index 6fdaa31dba..fde3cc4253 100644
--- a/test/test_files/shortest_path/bfs_sssp_parquet.test
+++ b/test/test_files/shortest_path/bfs_sssp_parquet.test
@@ -1,42 +1,41 @@
 -GROUP ShortestPathTest
 -DATASET PARQUET CSV_TO_PARQUET(shortest-path-tests)
--SKIP
 # Timestamp and Date columns are written as strings in parquet file. Parquet reader should support casting operations.
 --
 
 -CASE BfsParquet
 
 -LOG SingleSourceAllDestinationsSSP
--STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE a.fName = 'Alice' RETURN a.fName, b.fName, rels(r), properties(nodes(r), 'fName')
+-STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE a.fName = 'Alice' RETURN a.fName, b.fName, properties(nodes(r), 'fName')
 ---- 7
-Alice|Bob|[(0:0)-{_LABEL: knows, _ID: 1:0}->(0:1)]|[]
-Alice|Carol|[(0:0)-{_LABEL: knows, _ID: 1:1}->(0:2)]|[]
-Alice|Dan|[(0:0)-{_LABEL: knows, _ID: 1:2}->(0:3)]|[]
-Alice|Elizabeth|[(0:0)-{_LABEL: knows, _ID: 1:0}->(0:1),(0:1)-{_LABEL: knows, _ID: 1:6}->(0:4)]|[Bob]
-Alice|Farooq|[(0:0)-{_LABEL: knows, _ID: 1:0}->(0:1),(0:1)-{_LABEL: knows, _ID: 1:6}->(0:4),(0:4)-{_LABEL: knows, _ID: 1:13}->(0:5)]|[Bob,Elizabeth]
-Alice|Greg|[(0:0)-{_LABEL: knows, _ID: 1:0}->(0:1),(0:1)-{_LABEL: knows, _ID: 1:6}->(0:4),(0:4)-{_LABEL: knows, _ID: 1:14}->(0:6)]|[Bob,Elizabeth]
-Alice|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|[(0:0)-{_LABEL: knows, _ID: 1:0}->(0:1),(0:1)-{_LABEL: knows, _ID: 1:6}->(0:4),(0:4)-{_LABEL: knows, _ID: 1:15}->(0:7)]|[Bob,Elizabeth]
+Alice|Bob|[]
+Alice|Carol|[]
+Alice|Dan|[]
+Alice|Elizabeth|[Bob]
+Alice|Farooq|[Bob,Elizabeth]
+Alice|Greg|[Bob,Elizabeth]
+Alice|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|[Bob,Elizabeth]
 
 -LOG AllSourcesSingleDestinationQuery
--STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE b.fName = 'Alice' RETURN a.fName, b.fName, rels(r), properties(nodes(r), 'usedNames')
+-STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE b.fName = 'Alice' RETURN a.fName, b.fName, properties(nodes(r), 'usedNames')
 ---- 6
-Bob|Alice|[(0:0)-{_LABEL: knows, _ID: 1:3}->(0:1)]|[]
-Carol|Alice|[(0:0)-{_LABEL: knows, _ID: 1:7}->(0:2)]|[]
-Dan|Alice|[(0:0)-{_LABEL: knows, _ID: 1:10}->(0:3)]|[]
-Elizabeth|Alice|[(0:0)-{_LABEL: knows, _ID: 1:20}->(0:7),(0:7)-{_LABEL: knows, _ID: 1:15}->(0:4)]|[[Ad,De,Hi,Kye,Orlan]]
-Farooq|Alice|[(0:0)-{_LABEL: knows, _ID: 1:20}->(0:7),(0:7)-{_LABEL: knows, _ID: 1:17}->(0:5)]|[[Ad,De,Hi,Kye,Orlan]]
-Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|Alice|[(0:0)-{_LABEL: knows, _ID: 1:20}->(0:7)]|[]
+Bob|Alice|[]
+Carol|Alice|[]
+Dan|Alice|[]
+Elizabeth|Alice|[[Ad,De,Hi,Kye,Orlan]]
+Farooq|Alice|[[Ad,De,Hi,Kye,Orlan]]
+Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|Alice|[]
 
 -LOG SingleSourceWithAllProperties
--STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE a.fName = 'Alice' RETURN length(r), b, a
+-STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE a.fName = 'Alice' RETURN length(r), b.*, a.*
 ---- 7
-1|{_ID: 0:1, _LABEL: person, ID: 2, fName: Bob, gender: 2, isStudent: True, isWorker: False, age: 30, eyeSight: 5.100000, birthdate: 1900-01-01, registerTime: 2008-11-03 15:25:30.000526, lastJobDuration: 10 years 5 months 13:00:00.000024, workedHours: [12,8], usedNames: [Bobby], courseScoresPerTerm: [[8,9],[9,10]]}|{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]]}
-1|{_ID: 0:2, _LABEL: person, ID: 3, fName: Carol, gender: 1, isStudent: False, isWorker: True, age: 45, eyeSight: 5.000000, birthdate: 1940-06-22, registerTime: 1911-08-20 02:32:21, lastJobDuration: 48:24:11, workedHours: [4,5], usedNames: [Carmen,Fred], courseScoresPerTerm: [[8,10]]}|{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]]}
-1|{_ID: 0:3, _LABEL: person, ID: 5, fName: Dan, gender: 2, isStudent: False, isWorker: True, age: 20, eyeSight: 4.800000, birthdate: 1950-07-23, registerTime: 2031-11-30 12:25:30, lastJobDuration: 10 years 5 months 13:00:00.000024, workedHours: [1,9], usedNames: [Wolfeschlegelstein,Daniel], courseScoresPerTerm: [[7,4],[8,8],[9]]}|{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]]}
-2|{_ID: 0:4, _LABEL: person, ID: 7, fName: Elizabeth, gender: 1, isStudent: False, isWorker: True, age: 20, eyeSight: 4.700000, birthdate: 1980-10-26, registerTime: 1976-12-23 11:21:42, lastJobDuration: 48:24:11, workedHours: [2], usedNames: [Ein], courseScoresPerTerm: [[6],[7],[8]]}|{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]]}
-3|{_ID: 0:5, _LABEL: person, ID: 8, fName: Farooq, gender: 2, isStudent: True, isWorker: False, age: 25, eyeSight: 4.500000, birthdate: 1980-10-26, registerTime: 1972-07-31 13:22:30.678559, lastJobDuration: 00:18:00.024, workedHours: [3,4,5,6,7], usedNames: [Fesdwe], courseScoresPerTerm: [[8]]}|{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]]}
-3|{_ID: 0:6, _LABEL: person, ID: 9, fName: Greg, gender: 2, isStudent: False, isWorker: False, age: 40, eyeSight: 4.900000, birthdate: 1980-10-26, registerTime: 1976-12-23 04:41:42, lastJobDuration: 10 years 5 months 13:00:00.000024, workedHours: [1], usedNames: [Grad], courseScoresPerTerm: [[10]]}|{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]]}
-3|{_ID: 0:7, _LABEL: person, ID: 10, fName: Hubert Blaine Wolfeschlegelsteinhausenbergerdorff, gender: 2, isStudent: False, isWorker: True, age: 83, eyeSight: 4.900000, birthdate: 1990-11-27, registerTime: 2023-02-21 13:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,11,12,3,4,5,6,7], usedNames: [Ad,De,Hi,Kye,Orlan], courseScoresPerTerm: [[7],[10],[6,7]]}|{_ID: 0:0, _LABEL: person, ID: 0, fName: Alice, gender: 1, isStudent: True, isWorker: False, age: 35, eyeSight: 5.000000, birthdate: 1900-01-01, registerTime: 2011-08-20 11:25:30, lastJobDuration: 3 years 2 days 13:02:00, workedHours: [10,5], usedNames: [Aida], courseScoresPerTerm: [[10,8],[6,7,8]]}
+1|2|Bob|2|True|False|30|5.100000|1900-01-01|2008-11-03 15:25:30.000526|10 years 5 months 13:00:00|[12,8]|[Bobby]|[[8,9],[9,10]]|0|Alice|1|True|False|35|5.000000|1900-01-01|2011-08-20 11:25:30|3 years 2 days 13:02:00|[10,5]|[Aida]|[[10,8],[6,7,8]]
+1|3|Carol|1|False|True|45|5.000000|1940-06-22|1911-08-20 02:32:21|48:24:11|[4,5]|[Carmen,Fred]|[[8,10]]|0|Alice|1|True|False|35|5.000000|1900-01-01|2011-08-20 11:25:30|3 years 2 days 13:02:00|[10,5]|[Aida]|[[10,8],[6,7,8]]
+1|5|Dan|2|False|True|20|4.800000|1950-07-23|2031-11-30 12:25:30|10 years 5 months 13:00:00|[1,9]|[Wolfeschlegelstein,Daniel]|[[7,4],[8,8],[9]]|0|Alice|1|True|False|35|5.000000|1900-01-01|2011-08-20 11:25:30|3 years 2 days 13:02:00|[10,5]|[Aida]|[[10,8],[6,7,8]]
+2|7|Elizabeth|1|False|True|20|4.700000|1980-10-26|1976-12-23 11:21:42|48:24:11|[2]|[Ein]|[[6],[7],[8]]|0|Alice|1|True|False|35|5.000000|1900-01-01|2011-08-20 11:25:30|3 years 2 days 13:02:00|[10,5]|[Aida]|[[10,8],[6,7,8]]
+3|10|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|2|False|True|83|4.900000|1990-11-27|2023-02-21 13:25:30|3 years 2 days 13:02:00|[10,11,12,3,4,5,6,7]|[Ad,De,Hi,Kye,Orlan]|[[7],[10],[6,7]]|0|Alice|1|True|False|35|5.000000|1900-01-01|2011-08-20 11:25:30|3 years 2 days 13:02:00|[10,5]|[Aida]|[[10,8],[6,7,8]]
+3|8|Farooq|2|True|False|25|4.500000|1980-10-26|1972-07-31 13:22:30.678559|00:18:00.024|[3,4,5,6,7]|[Fesdwe]|[[8]]|0|Alice|1|True|False|35|5.000000|1900-01-01|2011-08-20 11:25:30|3 years 2 days 13:02:00|[10,5]|[Aida]|[[10,8],[6,7,8]]
+3|9|Greg|2|False|False|40|4.900000|1980-10-26|1976-12-23 04:41:42|10 years 5 months 13:00:00|[1]|[Grad]|[[10]]|0|Alice|1|True|False|35|5.000000|1900-01-01|2011-08-20 11:25:30|3 years 2 days 13:02:00|[10,5]|[Aida]|[[10,8],[6,7,8]]
 
 -LOG SingleSourceSingleDestination
 -STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE a.fName = 'Alice' AND b.fName = 'Bob' RETURN a.fName, b.fName, length(r)
@@ -44,13 +43,13 @@ Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|Alice|[(0:0)-{_LABEL: knows, _
 Alice|Bob|1
 
 -LOG SingleSourceAllDestinations2
--STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..2]->(b:person) WHERE a.fName = 'Elizabeth' RETURN a.fName, b.fName, rels(r), properties(nodes(r), '_Label')
+-STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..2]->(b:person) WHERE a.fName = 'Elizabeth' RETURN a.fName, b.fName, properties(nodes(r), '_Label')
 ---- 5
-Elizabeth|Alice|[(0:4)-{_LABEL: knows, _ID: 1:15}->(0:7),(0:7)-{_LABEL: knows, _ID: 1:20}->(0:0)]|[person]
-Elizabeth|Dan|[(0:4)-{_LABEL: knows, _ID: 1:15}->(0:7),(0:7)-{_LABEL: knows, _ID: 1:21}->(0:3)]|[person]
-Elizabeth|Farooq|[(0:4)-{_LABEL: knows, _ID: 1:13}->(0:5)]|[]
-Elizabeth|Greg|[(0:4)-{_LABEL: knows, _ID: 1:14}->(0:6)]|[]
-Elizabeth|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|[(0:4)-{_LABEL: knows, _ID: 1:15}->(0:7)]|[]
+Elizabeth|Alice|[person]
+Elizabeth|Dan|[person]
+Elizabeth|Farooq|[]
+Elizabeth|Greg|[]
+Elizabeth|Hubert Blaine Wolfeschlegelsteinhausenbergerdorff|[]
 
 -LOG SingleSourceUnreachableDestination
 -STATEMENT MATCH (a:person)-[r:knows* SHORTEST 1..30]->(b:person) WHERE a.fName = 'Alice' AND b.fName = 'Alice11' RETURN a.fName, b.fName, r
diff --git a/test/test_files/tinysnb/parquet/tinysnb_parquet.test b/test/test_files/tinysnb/parquet/tinysnb_parquet.test
index 18378e4dab..bff6331e0e 100644
--- a/test/test_files/tinysnb/parquet/tinysnb_parquet.test
+++ b/test/test_files/tinysnb/parquet/tinysnb_parquet.test
@@ -1,9 +1,7 @@
-# one sample from each tinysnb csv test CSV_TO_PARQUET 
-# https://github.com/kuzudb/kuzu/issues/1627
+# This test group is currently disabled since exporting fixed-list to parquet is not supported yet
 -GROUP TinySnbParquet
--SKIP
 -DATASET PARQUET CSV_TO_PARQUET(tinysnb)
-
+-SKIP
 --
 
 -CASE TinySnbParquet
diff --git a/test/test_runner/CMakeLists.txt b/test/test_runner/CMakeLists.txt
index 594f6028b6..f1703a4572 100644
--- a/test/test_runner/CMakeLists.txt
+++ b/test/test_runner/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_library(
         test_runner
         OBJECT
+        csv_to_parquet_converter.cpp
         test_parser.cpp
         test_runner.cpp
 )
diff --git a/test/test_runner/csv_to_parquet_converter.cpp b/test/test_runner/csv_to_parquet_converter.cpp
new file mode 100644
index 0000000000..a76047b2c9
--- /dev/null
+++ b/test/test_runner/csv_to_parquet_converter.cpp
@@ -0,0 +1,191 @@
+#include "test_runner/csv_to_parquet_converter.h"
+
+#include <fstream>
+
+#include "common/exception/test.h"
+#include "common/file_system/local_file_system.h"
+#include "common/string_utils.h"
+#include "spdlog/spdlog.h"
+#include "test_helper/test_helper.h"
+
+using namespace kuzu::common;
+
+namespace kuzu {
+namespace testing {
+
+void CSVToParquetConverter::copySchemaFile() {
+    LocalFileSystem localFileSystem;
+    auto csvSchemaFile =
+        localFileSystem.joinPath(csvDatasetPath, std::string(TestHelper::SCHEMA_FILE_NAME));
+    auto parquetSchemaFile =
+        localFileSystem.joinPath(parquetDatasetPath, std::string(TestHelper::SCHEMA_FILE_NAME));
+    if (!localFileSystem.fileOrPathExists(parquetSchemaFile)) {
+        localFileSystem.copyFile(csvSchemaFile, parquetSchemaFile);
+    } else {
+        localFileSystem.overwriteFile(csvSchemaFile, parquetSchemaFile);
+    }
+    createTableInfo(parquetSchemaFile);
+}
+
+void CSVToParquetConverter::createTableInfo(std::string schemaFile) {
+    std::ifstream file(schemaFile);
+    if (!file.is_open()) {
+        throw TestException(stringFormat("Error opening file: {}, errno: {}.", schemaFile, errno));
+    }
+    // This implementation stays as a temporary solution to create copy statements for rel tables
+    // We'll switch to use table_info once that function can provide everything needed
+    // table_info is mentioned in this issue https://github.com/kuzudb/kuzu/issues/2991
+    std::string line;
+    while (getline(file, line)) {
+        auto tokens = StringUtils::split(line, " ");
+
+        std::transform(tokens[0].begin(), tokens[0].end(), tokens[0].begin(),
+            [](unsigned char c) { return std::tolower(c); });
+        std::transform(tokens[2].begin(), tokens[2].end(), tokens[2].begin(),
+            [](unsigned char c) { return std::tolower(c); });
+        if (tokens[0] != "create" || tokens[2] != "table") {
+            throw TestException(stringFormat("Invalid CREATE statement: {}", line));
+        }
+
+        auto tableType = tokens[1];
+        std::transform(tableType.begin(), tableType.end(), tableType.begin(),
+            [](unsigned char c) { return std::tolower(c); });
+        auto tableName = tokens[3];
+
+        std::shared_ptr<TableInfo> table;
+        if (tableType == "node") {
+            auto nodeTable = std::make_shared<NodeTableInfo>();
+            size_t primaryKeyPos = line.find("PRIMARY KEY");
+            if (primaryKeyPos != std::string::npos) {
+                size_t openParenPos = line.find("(", primaryKeyPos);
+                size_t closeParenPos = line.find(")", primaryKeyPos);
+                if (openParenPos != std::string::npos && closeParenPos != std::string::npos &&
+                    openParenPos < closeParenPos) {
+                    nodeTable->primaryKey =
+                        line.substr(openParenPos + 1, closeParenPos - openParenPos - 1);
+                    table = nodeTable;
+                } else {
+                    throw TestException(
+                        stringFormat("PRIMARY KEY is not defined in node table: {}", line));
+                }
+            } else {
+                throw TestException(
+                    stringFormat("PRIMARY KEY is not defined in node table: {}", line));
+            }
+        } else {
+            auto relTable = std::make_shared<RelTableInfo>();
+            size_t startPos = line.find("FROM");
+            if (startPos != std::string::npos) {
+                size_t endPos = line.find_first_of(",)", startPos);
+                if (endPos != std::string::npos) {
+                    auto tmp = StringUtils::splitBySpace(line.substr(startPos, endPos - startPos));
+                    relTable->fromTable =
+                        std::dynamic_pointer_cast<NodeTableInfo>(tableNameMap[tmp[1]]);
+                    relTable->toTable =
+                        std::dynamic_pointer_cast<NodeTableInfo>(tableNameMap[tmp[3]]);
+                    table = relTable;
+                } else {
+                    throw TestException(stringFormat(
+                        "FROM node and TO node are not defined in rel table: {}", line));
+                }
+            } else {
+                throw TestException(
+                    stringFormat("FROM node and TO node are not defined in rel table: {}", line));
+            }
+        }
+        table->name = tableName;
+        tables.push_back(table);
+        tableNameMap[tableName] = table;
+    }
+}
+
+std::string extractPath(std::string& str, char delimiter) {
+    std::string::size_type posStart = str.find_first_of(delimiter);
+    std::string::size_type posEnd = str.find_last_of(delimiter);
+    return str.substr(posStart + 1, posEnd - posStart - 1);
+}
+
+void CSVToParquetConverter::readCopyCommandsFromCSVDataset() {
+    auto csvCopyFile =
+        LocalFileSystem::joinPath(csvDatasetPath, std::string(TestHelper::COPY_FILE_NAME));
+    std::ifstream file(csvCopyFile);
+    if (!file.is_open()) {
+        throw TestException(stringFormat("Error opening file: {}, errno: {}.", csvCopyFile, errno));
+    }
+    std::string line;
+    while (getline(file, line)) {
+        auto tokens = StringUtils::split(line, " ");
+        auto path = std::filesystem::path(extractPath(tokens[3], '"'));
+        auto table = tableNameMap[tokens[1]];
+        table->csvFilePath = TestHelper::appendKuzuRootPath(path.string());
+        auto parquetFileName = path.stem().string() + ".parquet";
+        table->parquetFilePath = parquetDatasetPath + "/" + parquetFileName;
+    }
+}
+
+void CSVToParquetConverter::createCopyFile() {
+    readCopyCommandsFromCSVDataset();
+    auto parquetCopyFile =
+        LocalFileSystem::joinPath(parquetDatasetPath, std::string(TestHelper::COPY_FILE_NAME));
+    std::ofstream outfile(parquetCopyFile);
+    if (!outfile.is_open()) {
+        throw TestException(
+            stringFormat("Error opening file: {}, errno: {}.", parquetCopyFile, errno));
+    }
+    std::string kuzuRootPath = KUZU_ROOT_DIRECTORY + std::string("/");
+    for (auto table : tables) {
+        auto cmd = stringFormat("COPY {} FROM \"{}\";", table->name,
+            table->parquetFilePath.substr(kuzuRootPath.length()));
+        outfile << cmd << '\n';
+    }
+}
+
+void CSVToParquetConverter::convertCSVFilesToParquet() {
+    // Load CSV Files to temp database
+    TestHelper::executeScript(
+        LocalFileSystem::joinPath(csvDatasetPath, std::string(TestHelper::SCHEMA_FILE_NAME)),
+        *tempConn);
+    TestHelper::executeScript(
+        LocalFileSystem::joinPath(csvDatasetPath, std::string(TestHelper::COPY_FILE_NAME)),
+        *tempConn);
+
+    spdlog::set_level(spdlog::level::info);
+    for (auto table : tables) {
+        spdlog::info("Converting: {} to {}", table->csvFilePath, table->parquetFilePath);
+        auto cmd = table->getConverterQuery();
+        tempConn->query(cmd);
+        spdlog::info("Executed query: {}", cmd);
+    }
+}
+
+void CSVToParquetConverter::convertCSVDatasetToParquet() {
+    LocalFileSystem localFileSystem;
+    if (!localFileSystem.fileOrPathExists(parquetDatasetPath)) {
+        localFileSystem.createDir(parquetDatasetPath);
+    }
+
+    copySchemaFile();
+    createCopyFile();
+
+    systemConfig = std::make_unique<main::SystemConfig>(bufferPoolSize);
+    std::string tempDatabasePath = TestHelper::appendKuzuRootPath(
+        std::string(TestHelper::TMP_TEST_DIR) + "csv_to_parquet_converter_" +
+        TestHelper::getMillisecondsSuffix());
+    tempDb = std::make_unique<main::Database>(tempDatabasePath, *systemConfig);
+    tempConn = std::make_unique<main::Connection>(tempDb.get());
+
+    convertCSVFilesToParquet();
+    std::filesystem::remove_all(tempDatabasePath);
+}
+
+std::string CSVToParquetConverter::NodeTableInfo::getConverterQuery() const {
+    return stringFormat("COPY (MATCH (a:{}) RETURN a.*) TO \"{}\";", name, parquetFilePath);
+}
+
+std::string CSVToParquetConverter::RelTableInfo::getConverterQuery() const {
+    return stringFormat("COPY (MATCH (a)-[e:{}]->(b) RETURN a.{}, b.{}, e.*) TO \"{}\";", name,
+        fromTable->primaryKey, toTable->primaryKey, parquetFilePath);
+}
+
+} // namespace testing
+} // namespace kuzu