Skip to content

Commit

Permalink
Adjustments and refactoring
Browse files Browse the repository at this point in the history
- Use joinPath on DeleteNodeWithEdgesErrorTest to handle windows path test
  failure
- Refactor CSV to Parquet converter
  • Loading branch information
rfdavid committed Jun 11, 2023
1 parent e0122be commit e10c5e1
Show file tree
Hide file tree
Showing 9 changed files with 45 additions and 49 deletions.
12 changes: 3 additions & 9 deletions test/include/graph_test/graph_test.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
#pragma once

#include <chrono>
#include <cstring>

#include "common/file_utils.h"
Expand Down Expand Up @@ -35,9 +34,7 @@ class BaseGraphTest : public Test {

virtual std::string getInputDir() = 0;

void TearDown() override {
common::FileUtils::removeDir(databasePath);
}
void TearDown() override { common::FileUtils::removeDir(databasePath); }

void createDBAndConn();

Expand Down Expand Up @@ -144,13 +141,10 @@ class BaseGraphTest : public Test {

private:
void setDatabasePath() {
databasePath = TestHelper::getTmpTestDir();
uint64_t ms = duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count();
const ::testing::TestInfo* const testInfo =
::testing::UnitTest::GetInstance()->current_test_info();
databasePath = databasePath + getTestGroupAndName() + std::to_string(ms);
databasePath = TestHelper::appendKuzuRootPath(
TestHelper::TMP_TEST_DIR + getTestGroupAndName() + TestHelper::getMillisecondsSuffix());
}

void validateRelPropertyFiles(catalog::RelTableSchema* relTableSchema,
Expand Down
9 changes: 3 additions & 6 deletions test/include/test_helper/test_helper.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,21 @@ class TestHelper {
static constexpr char SCHEMA_FILE_NAME[] = "schema.cypher";
static constexpr char COPY_FILE_NAME[] = "copy.cypher";
static constexpr char PARQUET_TEMP_DATASET_PATH[] = "dataset/parquet_temp";
static constexpr char TMP_TEST_DIR[] = "test/unittest_temp";

static std::string getTmpTestDir() { return appendKuzuRootPath("test/unittest_temp"); }
static std::string getTestListFile() {
return appendKuzuRootPath(std::string(E2E_TEST_FILES_DIRECTORY) + "/test_list");
}

// FIXME: REMOVE ME
// static std::string appendParquetDatasetTempDir(const std::string& dataset) {
// return TestHelper::appendKuzuRootPath(TestHelper::PARQUET_TEMP_DATASET_PATH + dataset);
// }

static std::string appendKuzuRootPath(const std::string& path) {
return KUZU_ROOT_DIRECTORY + std::string("/") + path;
}

static std::unique_ptr<planner::LogicalPlan> getLogicalPlan(
const std::string& query, Connection& conn);

static std::string getMillisecondsSuffix();

private:
static void initializeConnection(TestQueryConfig* config, Connection& conn);
static bool testQuery(TestQueryConfig* config, Connection& conn);
Expand Down
21 changes: 11 additions & 10 deletions test/include/test_runner/csv_to_parquet_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@ namespace testing {
// The dataset directory must contain schema and copy files.
class CSVToParquetConverter {
public:
static void convertCSVDatasetToParquet(std::string& dataset);
static std::string convertCSVDatasetToParquet(
const std::string& csvDatasetPath, const std::string& parquetDatasetPath);

inline static std::string replaceSlashesWithUnderscores(std::string dataset) {
std::replace(dataset.begin(), dataset.end(), '/', '_');
return dataset;
}

private:
struct CopyCommandInfo {
Expand All @@ -20,28 +26,23 @@ class CSVToParquetConverter {
};

static std::vector<CopyCommandInfo> readCopyCommandsFromCopyCypherFile(
const std::string& dataset);
const std::string& csvDatasetPath, const std::string& parquetDatasetPath);

static void convertCSVFilesToParquet(
const std::vector<CSVToParquetConverter::CopyCommandInfo>& copyCommands);

static CopyCommandInfo createCopyCommandInfo(
const std::string& dataset, std::string copyStatement);
static CopyCommandInfo createCopyCommandInfo(const std::string& csvDatasetPath,
const std::string& parquetDatasetPath, std::string copyStatement);

static arrow::Status runCSVToParquetConversion(const std::string& inputFile,
const std::string& outputFile, char delimiter, bool hasHeader);

static void copySchema(
const std::string& csvDatasetPath, const std::string& parquetDatasetPath);

static void createCopyFile(const std::string& dataset,
static void createCopyFile(const std::string& dataset, const std::string& parquetDatasetPath,
const std::vector<CSVToParquetConverter::CopyCommandInfo>& copyCommands);

inline static std::string replaceSlashesWithUnderscores(std::string dataset) {
std::replace(dataset.begin(), dataset.end(), '/', '_');
return dataset;
}

static std::string extractPath(std::string& str, char delimiter);
};

Expand Down
2 changes: 1 addition & 1 deletion test/runner/cleanup_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@

int main(int argc, char** argv) {
std::cout << "CLEANUP TEST!!!!!!!!!!!!!!!!!" << std::endl;
return 0;
return 0;
}
2 changes: 1 addition & 1 deletion test/runner/e2e_delete_create_transaction_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -305,7 +305,7 @@ TEST_F(DeleteNodeWithEdgesErrorTest, DeleteNodeWithEdgesError) {
ASSERT_EQ(result->getErrorMessage(),
"Runtime exception: Currently deleting a node with edges is not supported. node table 0 "
"nodeOffset 0 has 1 (one-to-many or many-to-many) edges for edge file: " +
databasePath + "/r-1-0.lists.");
kuzu::common::FileUtils::joinPath(databasePath, "r-1-0.lists."));
}

TEST_F(CreateDeleteInt64NodeTrxTest, MixedInsertDeleteCommitNormalExecution) {
Expand Down
18 changes: 6 additions & 12 deletions test/runner/e2e_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,17 @@ class EndToEndTest : public DBTest {
}

void setUpDataset() {

uint64_t ms = duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count();

parquetDatasetPath = TestHelper::appendKuzuRootPath(
TestHelper::PARQUET_TEMP_DATASET_PATH + replaceSlashesWithUnderscores(dataset) + getTestGroupAndName() + std::to_string(ms));

// TestHelper::PARQUET_TEMP_DATASET_PATH
// parquetDatasetPath = replaceSlashesWithUnderscores(dataset);
parquetTempDatasetPath = TestHelper::appendKuzuRootPath(
TestHelper::PARQUET_TEMP_DATASET_PATH +
CSVToParquetConverter::replaceSlashesWithUnderscores(dataset) + getTestGroupAndName() +
TestHelper::getMillisecondsSuffix());

dataset = TestHelper::appendKuzuRootPath("dataset/" + dataset);
if (datasetType == TestGroup::DatasetType::CSV_TO_PARQUET) {
FileUtils::createDirIfNotExists(parquetTempDatasetPath);
dataset = CSVToParquetConverter::convertCSVDatasetToParquet(csvDatasetPath, parquetDatasetPath);
dataset =
CSVToParquetConverter::convertCSVDatasetToParquet(dataset, parquetTempDatasetPath);
}

}

void TearDown() override {
Expand Down
2 changes: 1 addition & 1 deletion test/runner/setup_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@

int main(int argc, char** argv) {
std::cout << "SETUP TEST!!!!!!!!!!!!!!!!!" << std::endl;
return 0;
return 0;
}
8 changes: 8 additions & 0 deletions test/test_helper/test_helper.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#include "test_helper/test_helper.h"

#include <chrono>
#include <fstream>

#include "json.hpp"
Expand Down Expand Up @@ -141,6 +142,13 @@ bool TestHelper::testQuery(TestQueryConfig* config, Connection& conn) {
return numPassedPlans == numPlans;
}

std::string TestHelper::getMillisecondsSuffix() {
uint64_t ms = duration_cast<std::chrono::milliseconds>(
std::chrono::system_clock::now().time_since_epoch())
.count();
return std::to_string(ms);
}

std::unique_ptr<planner::LogicalPlan> TestHelper::getLogicalPlan(
const std::string& query, kuzu::main::Connection& conn) {
return std::move(conn.prepare(query)->logicalPlans[0]);
Expand Down
20 changes: 11 additions & 9 deletions test/test_runner/csv_to_parquet_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ CSVToParquetConverter::CopyCommandInfo CSVToParquetConverter::createCopyCommandI
CopyCommandInfo copyCommandInfo;
copyCommandInfo.table = tokens[1];
copyCommandInfo.csvFilePath = TestHelper::appendKuzuRootPath(path.string());
copyCommandInfo.parquetFilePath = parquetDatasetPath + "/" + path.stem().string() + ".parquet");
copyCommandInfo.parquetFilePath = parquetDatasetPath + "/" + path.stem().string() + ".parquet";
std::transform(copyStatement.begin(), copyStatement.end(), copyStatement.begin(),
[](unsigned char c) { return std::tolower(c); });
copyCommandInfo.csvHasHeader = (copyStatement.find("header=true") != std::string::npos);
Expand All @@ -63,9 +63,9 @@ CSVToParquetConverter::CopyCommandInfo CSVToParquetConverter::createCopyCommandI
}

std::vector<CSVToParquetConverter::CopyCommandInfo>
CSVToParquetConverter::readCopyCommandsFromCopyCypherFile(const std::string& dataset, const std::string& parquetDatasetPath) {
auto copyFile = TestHelper::appendKuzuRootPath(
FileUtils::joinPath("dataset/" + dataset, TestHelper::COPY_FILE_NAME));
CSVToParquetConverter::readCopyCommandsFromCopyCypherFile(
const std::string& csvDatasetPath, const std::string& parquetDatasetPath) {
auto copyFile = FileUtils::joinPath(csvDatasetPath, TestHelper::COPY_FILE_NAME);
std::ifstream file(copyFile);
if (!file.is_open()) {
throw TestException(
Expand All @@ -74,12 +74,13 @@ CSVToParquetConverter::readCopyCommandsFromCopyCypherFile(const std::string& dat
std::string line;
std::vector<CopyCommandInfo> copyCommands;
while (getline(file, line)) {
copyCommands.push_back(createCopyCommandInfo(dataset, parquetDatasetPath, line));
copyCommands.push_back(createCopyCommandInfo(csvDatasetPath, parquetDatasetPath, line));
}
return copyCommands;
}

void CSVToParquetConverter::createCopyFile(const std::string& dataset, const std::string& parquetDatasetPath,
void CSVToParquetConverter::createCopyFile(const std::string& dataset,
const std::string& parquetDatasetPath,
const std::vector<CSVToParquetConverter::CopyCommandInfo>& copyCommands) {
auto targetCopyCypherFile = FileUtils::joinPath(parquetDatasetPath, TestHelper::COPY_FILE_NAME);
std::ofstream outfile(targetCopyCypherFile);
Expand All @@ -105,12 +106,13 @@ void CSVToParquetConverter::convertCSVFilesToParquet(
}
}

std::string CSVToParquetConverter::convertCSVDatasetToParquet(const std::string& csvDatasetPath, const std::string& parquetDatasetPath) {
std::string CSVToParquetConverter::convertCSVDatasetToParquet(
const std::string& csvDatasetPath, const std::string& parquetDatasetPath) {
FileUtils::createDirIfNotExists(parquetDatasetPath);
std::vector<CSVToParquetConverter::CopyCommandInfo> copyCommands =
readCopyCommandsFromCopyCypherFile(csvDatasetPath);
readCopyCommandsFromCopyCypherFile(csvDatasetPath, parquetDatasetPath);
copySchema(csvDatasetPath, parquetDatasetPath);
createCopyFile(csvDatasetPath, copyCommands);
createCopyFile(csvDatasetPath, parquetDatasetPath, copyCommands);
convertCSVFilesToParquet(copyCommands);
return parquetDatasetPath;
}
Expand Down

0 comments on commit e10c5e1

Please sign in to comment.