Skip to content

Commit

Permalink
Refactor copy compilation
Browse files Browse the repository at this point in the history
  • Loading branch information
andyfengHKU committed Sep 17, 2023
1 parent 9a8b6a4 commit ccea071
Show file tree
Hide file tree
Showing 50 changed files with 815 additions and 468 deletions.
1 change: 0 additions & 1 deletion src/binder/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
add_subdirectory(bind)
add_subdirectory(bind_expression)
add_subdirectory(copy)
add_subdirectory(ddl)
add_subdirectory(expression)
add_subdirectory(query)
Expand Down
1 change: 1 addition & 0 deletions src/binder/bind/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ add_library(
bind_create_macro.cpp
bind_ddl.cpp
bind_explain.cpp
bind_file_scan.cpp
bind_graph_pattern.cpp
bind_projection_clause.cpp
bind_query.cpp
Expand Down
269 changes: 96 additions & 173 deletions src/binder/bind/bind_copy.cpp

Large diffs are not rendered by default.

140 changes: 140 additions & 0 deletions src/binder/bind/bind_file_scan.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#include "binder/binder.h"
#include "binder/copy/bound_file_scan_info.h"
#include "binder/expression/literal_expression.h"
#include "common/exception/binder.h"
#include "common/exception/copy.h"
#include "common/string_utils.h"

using namespace kuzu::parser;
using namespace kuzu::binder;
using namespace kuzu::common;

namespace kuzu {
namespace binder {

/*
* Bind file.
*/
CopyDescription::FileType Binder::bindFileType(const std::string& filePath) {
std::filesystem::path fileName(filePath);
auto extension = FileUtils::getFileExtension(fileName);
auto fileType = CopyDescription::getFileTypeFromExtension(extension);
if (fileType == CopyDescription::FileType::UNKNOWN) {
throw CopyException("Unsupported file type: " + filePath);

Check warning on line 23 in src/binder/bind/bind_file_scan.cpp

View check run for this annotation

Codecov / codecov/patch

src/binder/bind/bind_file_scan.cpp#L23

Added line #L23 was not covered by tests
}
return fileType;
}

CopyDescription::FileType Binder::bindFileType(const std::vector<std::string>& filePaths) {
auto expectedFileType = CopyDescription::FileType::UNKNOWN;
for (auto& filePath : filePaths) {
auto fileType = bindFileType(filePath);
expectedFileType =
(expectedFileType == CopyDescription::FileType::UNKNOWN) ? fileType : expectedFileType;
if (fileType != expectedFileType) {
throw CopyException("Loading files with different types is not currently supported.");

Check warning on line 35 in src/binder/bind/bind_file_scan.cpp

View check run for this annotation

Codecov / codecov/patch

src/binder/bind/bind_file_scan.cpp#L35

Added line #L35 was not covered by tests
}
}
return expectedFileType;
}

static std::vector<std::string> bindFilePaths(const std::vector<std::string>& filePaths) {
std::vector<std::string> boundFilePaths;
for (auto& filePath : filePaths) {
auto globbedFilePaths = FileUtils::globFilePath(filePath);
if (globbedFilePaths.empty()) {
throw BinderException{StringUtils::string_format(
"No file found that matches the pattern: {}.", filePath)};
}
boundFilePaths.insert(
boundFilePaths.end(), globbedFilePaths.begin(), globbedFilePaths.end());
}
if (boundFilePaths.empty()) {
throw BinderException{StringUtils::string_format("Invalid file path: {}.", filePaths[0])};

Check warning on line 53 in src/binder/bind/bind_file_scan.cpp

View check run for this annotation

Codecov / codecov/patch

src/binder/bind/bind_file_scan.cpp#L53

Added line #L53 was not covered by tests
}
return boundFilePaths;
}

/*
* Bind parsing options.
*/
static char bindParsingOptionValue(std::string value) {
if (value == "\\t") {
return '\t';
}
if (value.length() > 2 || (value.length() == 2 && value[0] != '\\')) {
throw BinderException("Copy csv option value can only be a single character with an "
"optional escape character.");
}
return value[value.length() - 1];
}

static void bindStringParsingOptions(
CSVReaderConfig& csvReaderConfig, const std::string& optionName, std::string& optionValue) {
auto parsingOptionValue = bindParsingOptionValue(optionValue);
if (optionName == "ESCAPE") {
csvReaderConfig.escapeChar = parsingOptionValue;

Check warning on line 76 in src/binder/bind/bind_file_scan.cpp

View check run for this annotation

Codecov / codecov/patch

src/binder/bind/bind_file_scan.cpp#L76

Added line #L76 was not covered by tests
} else if (optionName == "DELIM") {
csvReaderConfig.delimiter = parsingOptionValue;
} else if (optionName == "QUOTE") {
csvReaderConfig.quoteChar = parsingOptionValue;
} else if (optionName == "LIST_BEGIN") {
csvReaderConfig.listBeginChar = parsingOptionValue;
} else if (optionName == "LIST_END") {
csvReaderConfig.listEndChar = parsingOptionValue;

Check warning on line 84 in src/binder/bind/bind_file_scan.cpp

View check run for this annotation

Codecov / codecov/patch

src/binder/bind/bind_file_scan.cpp#L79-L84

Added lines #L79 - L84 were not covered by tests
}
}

static bool validateStringParsingOptionName(std::string& parsingOptionName) {
for (auto i = 0; i < std::size(CopyConstants::STRING_CSV_PARSING_OPTIONS); i++) {
if (parsingOptionName == CopyConstants::STRING_CSV_PARSING_OPTIONS[i]) {
return true;
}
}
return false;
}

std::unique_ptr<common::CopyDescription> Binder::bindCopyDesc(
const std::vector<std::string>& filePaths, const parsing_option_t& parsingOptions) {
auto csvReaderConfig = bindParsingOptions(parsingOptions);
auto boundFilePaths = bindFilePaths(filePaths);
auto fileType = bindFileType(boundFilePaths);
return std::make_unique<CopyDescription>(fileType, boundFilePaths, std::move(csvReaderConfig));
}

std::unique_ptr<CSVReaderConfig> Binder::bindParsingOptions(
const std::unordered_map<std::string, std::unique_ptr<ParsedExpression>>& parsingOptions) {
auto csvReaderConfig = std::make_unique<CSVReaderConfig>();
for (auto& parsingOption : parsingOptions) {
auto copyOptionName = parsingOption.first;
StringUtils::toUpper(copyOptionName);
bool isValidStringParsingOption = validateStringParsingOptionName(copyOptionName);
auto copyOptionExpression = parsingOption.second.get();
auto boundCopyOptionExpression = expressionBinder.bindExpression(*copyOptionExpression);
assert(boundCopyOptionExpression->expressionType == LITERAL);
if (copyOptionName == "HEADER") {
if (boundCopyOptionExpression->dataType.getLogicalTypeID() != LogicalTypeID::BOOL) {
throw BinderException(
"The value type of parsing csv option " + copyOptionName + " must be boolean.");

Check warning on line 118 in src/binder/bind/bind_file_scan.cpp

View check run for this annotation

Codecov / codecov/patch

src/binder/bind/bind_file_scan.cpp#L117-L118

Added lines #L117 - L118 were not covered by tests
}
csvReaderConfig->hasHeader =
((LiteralExpression&)(*boundCopyOptionExpression)).value->getValue<bool>();
} else if (boundCopyOptionExpression->dataType.getLogicalTypeID() ==
LogicalTypeID::STRING &&
isValidStringParsingOption) {
if (boundCopyOptionExpression->dataType.getLogicalTypeID() != LogicalTypeID::STRING) {
throw BinderException(
"The value type of parsing csv option " + copyOptionName + " must be string.");
}
auto copyOptionValue =
((LiteralExpression&)(*boundCopyOptionExpression)).value->getValue<std::string>();
bindStringParsingOptions(*csvReaderConfig, copyOptionName, copyOptionValue);
} else {
throw BinderException("Unrecognized parsing csv option: " + copyOptionName + ".");
}
}
return csvReaderConfig;
}

} // namespace binder
} // namespace kuzu
18 changes: 11 additions & 7 deletions src/binder/bind/bind_reading_clause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,29 @@
#include "binder/query/reading_clause/bound_in_query_call.h"
#include "binder/query/reading_clause/bound_match_clause.h"
#include "binder/query/reading_clause/bound_unwind_clause.h"
#include "common/exception/binder.h"
#include "parser/query/reading_clause/in_query_call_clause.h"
#include "parser/query/reading_clause/unwind_clause.h"
#include "processor/operator/persistent/reader/csv_reader.h"

using namespace kuzu::common;
using namespace kuzu::parser;
using namespace kuzu::processor;
using namespace kuzu::catalog;

namespace kuzu {
namespace binder {

std::unique_ptr<BoundReadingClause> Binder::bindReadingClause(const ReadingClause& readingClause) {
switch (readingClause.getClauseType()) {
case ClauseType::MATCH: {
return bindMatchClause((MatchClause&)readingClause);
return bindMatchClause(readingClause);
}
case ClauseType::UNWIND: {
return bindUnwindClause((UnwindClause&)readingClause);
return bindUnwindClause(readingClause);
}
case ClauseType::InQueryCall: {
return bindInQueryCall((InQueryCallClause&)readingClause);
case ClauseType::IN_QUERY_CALL: {
return bindInQueryCall(readingClause);
}
default:
throw NotImplementedException("bindReadingClause().");
Expand Down Expand Up @@ -84,11 +88,11 @@ std::unique_ptr<BoundReadingClause> Binder::bindUnwindClause(const ReadingClause
}

std::unique_ptr<BoundReadingClause> Binder::bindInQueryCall(const ReadingClause& readingClause) {
auto& callStatement = reinterpret_cast<const parser::InQueryCallClause&>(readingClause);
auto& call = reinterpret_cast<const InQueryCallClause&>(readingClause);
auto tableFunctionDefinition =
catalog.getBuiltInTableFunction()->mathTableFunction(callStatement.getFuncName());
catalog.getBuiltInTableFunction()->mathTableFunction(call.getFuncName());
auto inputValues = std::vector<Value>{};
for (auto& parameter : callStatement.getParameters()) {
for (auto& parameter : call.getParameters()) {
auto boundExpr = expressionBinder.bindLiteralExpression(*parameter);
inputValues.push_back(*reinterpret_cast<LiteralExpression*>(boundExpr.get())->getValue());
}
Expand Down
5 changes: 5 additions & 0 deletions src/binder/binder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ table_id_t Binder::bindNodeTableID(const std::string& tableName) const {
return catalogContent->getTableID(tableName);
}

std::shared_ptr<Expression> Binder::createVariable(
const std::string& name, LogicalTypeID logicalTypeID) {
return createVariable(name, LogicalType{logicalTypeID});
}

std::shared_ptr<Expression> Binder::createVariable(
const std::string& name, const LogicalType& dataType) {
if (scope->contains(name)) {
Expand Down
2 changes: 1 addition & 1 deletion src/binder/bound_statement_visitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ void BoundStatementVisitor::visitReadingClause(const BoundReadingClause& reading
case ClauseType::UNWIND: {
visitUnwind(readingClause);
} break;
case ClauseType::InQueryCall: {
case ClauseType::IN_QUERY_CALL: {
visitInQueryCall(readingClause);
} break;
default:
Expand Down
8 changes: 0 additions & 8 deletions src/binder/copy/CMakeLists.txt

This file was deleted.

22 changes: 0 additions & 22 deletions src/binder/copy/bound_copy_from.cpp

This file was deleted.

28 changes: 20 additions & 8 deletions src/include/binder/binder.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class BoundSetPropertyInfo;
class BoundDeleteInfo;
class BoundWithClause;
class BoundReturnClause;
class BoundFileScanInfo;

// BinderScope keeps track of expressions in scope and their aliases. We maintain the order of
// expressions in
Expand Down Expand Up @@ -79,6 +80,8 @@ class Binder {

common::table_id_t bindNodeTableID(const std::string& tableName) const;

std::shared_ptr<Expression> createVariable(
const std::string& name, common::LogicalTypeID logicalTypeID);
std::shared_ptr<Expression> createVariable(
const std::string& name, const common::LogicalType& dataType);

Expand Down Expand Up @@ -106,19 +109,28 @@ class Binder {
/*** bind copy ***/
std::unique_ptr<BoundStatement> bindCopyFromClause(const parser::Statement& statement);
std::unique_ptr<BoundStatement> bindCopyNodeFrom(
std::unique_ptr<common::CopyDescription> copyDescription,
catalog::TableSchema* tableSchema);
std::unique_ptr<common::CopyDescription> copyDesc, catalog::TableSchema* tableSchema);
std::unique_ptr<BoundStatement> bindCopyRelFrom(
std::unique_ptr<common::CopyDescription> copyDescription,
catalog::TableSchema* tableSchema);
expression_vector bindCopyNodeColumns(
std::unique_ptr<common::CopyDescription> copyDesc, catalog::TableSchema* tableSchema);
std::unique_ptr<BoundStatement> bindCopyRdfRelFrom(
std::unique_ptr<common::CopyDescription> copyDesc, catalog::TableSchema* tableSchema);
expression_vector bindExpectedNodeFileColumns(
catalog::TableSchema* tableSchema, common::CopyDescription::FileType fileType);
expression_vector bindCopyRelColumns(
// expression_vector bindCopyRelColumns(
// catalog::TableSchema* tableSchema, common::CopyDescription::FileType fileType);

expression_vector bindExpectedRelFileColumns(
catalog::TableSchema* tableSchema, common::CopyDescription::FileType fileType);
std::unique_ptr<BoundStatement> bindCopyToClause(const parser::Statement& statement);

/*** bind file scan ***/
std::unique_ptr<common::CopyDescription> bindCopyDesc(
const std::vector<std::string>& filePaths, const parser::parsing_option_t& parsingOptions);
std::unique_ptr<common::CSVReaderConfig> bindParsingOptions(
const std::unordered_map<std::string, std::unique_ptr<parser::ParsedExpression>>&
parsingOptions);
const parser::parsing_option_t& parsingOptions);
static common::CopyDescription::FileType bindFileType(
const std::vector<std::string>& filePaths);
static common::CopyDescription::FileType bindFileType(const std::string& filePath);

/*** bind query ***/
std::unique_ptr<BoundRegularQuery> bindQuery(const parser::RegularQuery& regularQuery);
Expand Down
Loading

0 comments on commit ccea071

Please sign in to comment.