Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor copy compilation #2043

Merged
merged 1 commit into from
Sep 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion src/binder/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
add_subdirectory(bind)
add_subdirectory(bind_expression)
add_subdirectory(copy)
add_subdirectory(ddl)
add_subdirectory(expression)
add_subdirectory(query)
Expand Down
1 change: 1 addition & 0 deletions src/binder/bind/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ add_library(
bind_create_macro.cpp
bind_ddl.cpp
bind_explain.cpp
bind_file_scan.cpp
bind_graph_pattern.cpp
bind_projection_clause.cpp
bind_query.cpp
Expand Down
269 changes: 96 additions & 173 deletions src/binder/bind/bind_copy.cpp

Large diffs are not rendered by default.

140 changes: 140 additions & 0 deletions src/binder/bind/bind_file_scan.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
#include "binder/binder.h"
#include "binder/copy/bound_file_scan_info.h"
#include "binder/expression/literal_expression.h"
#include "common/exception/binder.h"
#include "common/exception/copy.h"
#include "common/string_utils.h"

using namespace kuzu::parser;
using namespace kuzu::binder;
using namespace kuzu::common;

namespace kuzu {
namespace binder {

/*
* Bind file.
*/
CopyDescription::FileType Binder::bindFileType(const std::string& filePath) {
std::filesystem::path fileName(filePath);
auto extension = FileUtils::getFileExtension(fileName);
auto fileType = CopyDescription::getFileTypeFromExtension(extension);
if (fileType == CopyDescription::FileType::UNKNOWN) {
throw CopyException("Unsupported file type: " + filePath);

Check warning on line 23 in src/binder/bind/bind_file_scan.cpp

View check run for this annotation

Codecov / codecov/patch

src/binder/bind/bind_file_scan.cpp#L23

Added line #L23 was not covered by tests
}
return fileType;
}

CopyDescription::FileType Binder::bindFileType(const std::vector<std::string>& filePaths) {
auto expectedFileType = CopyDescription::FileType::UNKNOWN;
for (auto& filePath : filePaths) {
auto fileType = bindFileType(filePath);
expectedFileType =
(expectedFileType == CopyDescription::FileType::UNKNOWN) ? fileType : expectedFileType;
if (fileType != expectedFileType) {
throw CopyException("Loading files with different types is not currently supported.");

Check warning on line 35 in src/binder/bind/bind_file_scan.cpp

View check run for this annotation

Codecov / codecov/patch

src/binder/bind/bind_file_scan.cpp#L35

Added line #L35 was not covered by tests
}
}
return expectedFileType;
}

static std::vector<std::string> bindFilePaths(const std::vector<std::string>& filePaths) {
std::vector<std::string> boundFilePaths;
for (auto& filePath : filePaths) {
auto globbedFilePaths = FileUtils::globFilePath(filePath);
if (globbedFilePaths.empty()) {
throw BinderException{StringUtils::string_format(
"No file found that matches the pattern: {}.", filePath)};
}
boundFilePaths.insert(
boundFilePaths.end(), globbedFilePaths.begin(), globbedFilePaths.end());
}
if (boundFilePaths.empty()) {
throw BinderException{StringUtils::string_format("Invalid file path: {}.", filePaths[0])};

Check warning on line 53 in src/binder/bind/bind_file_scan.cpp

View check run for this annotation

Codecov / codecov/patch

src/binder/bind/bind_file_scan.cpp#L53

Added line #L53 was not covered by tests
}
return boundFilePaths;
}

/*
* Bind parsing options.
*/
static char bindParsingOptionValue(std::string value) {
if (value == "\\t") {
return '\t';
}
if (value.length() > 2 || (value.length() == 2 && value[0] != '\\')) {
throw BinderException("Copy csv option value can only be a single character with an "
"optional escape character.");
}
return value[value.length() - 1];
}

static void bindStringParsingOptions(
CSVReaderConfig& csvReaderConfig, const std::string& optionName, std::string& optionValue) {
auto parsingOptionValue = bindParsingOptionValue(optionValue);
if (optionName == "ESCAPE") {
csvReaderConfig.escapeChar = parsingOptionValue;

Check warning on line 76 in src/binder/bind/bind_file_scan.cpp

View check run for this annotation

Codecov / codecov/patch

src/binder/bind/bind_file_scan.cpp#L76

Added line #L76 was not covered by tests
} else if (optionName == "DELIM") {
csvReaderConfig.delimiter = parsingOptionValue;
} else if (optionName == "QUOTE") {
csvReaderConfig.quoteChar = parsingOptionValue;
} else if (optionName == "LIST_BEGIN") {
csvReaderConfig.listBeginChar = parsingOptionValue;
} else if (optionName == "LIST_END") {
csvReaderConfig.listEndChar = parsingOptionValue;

Check warning on line 84 in src/binder/bind/bind_file_scan.cpp

View check run for this annotation

Codecov / codecov/patch

src/binder/bind/bind_file_scan.cpp#L79-L84

Added lines #L79 - L84 were not covered by tests
}
}

static bool validateStringParsingOptionName(std::string& parsingOptionName) {
for (auto i = 0; i < std::size(CopyConstants::STRING_CSV_PARSING_OPTIONS); i++) {
if (parsingOptionName == CopyConstants::STRING_CSV_PARSING_OPTIONS[i]) {
return true;
}
}
return false;
}

std::unique_ptr<common::CopyDescription> Binder::bindCopyDesc(
const std::vector<std::string>& filePaths, const parsing_option_t& parsingOptions) {
auto csvReaderConfig = bindParsingOptions(parsingOptions);
auto boundFilePaths = bindFilePaths(filePaths);
auto fileType = bindFileType(boundFilePaths);
return std::make_unique<CopyDescription>(fileType, boundFilePaths, std::move(csvReaderConfig));
}

std::unique_ptr<CSVReaderConfig> Binder::bindParsingOptions(
const std::unordered_map<std::string, std::unique_ptr<ParsedExpression>>& parsingOptions) {
auto csvReaderConfig = std::make_unique<CSVReaderConfig>();
for (auto& parsingOption : parsingOptions) {
auto copyOptionName = parsingOption.first;
StringUtils::toUpper(copyOptionName);
bool isValidStringParsingOption = validateStringParsingOptionName(copyOptionName);
auto copyOptionExpression = parsingOption.second.get();
auto boundCopyOptionExpression = expressionBinder.bindExpression(*copyOptionExpression);
assert(boundCopyOptionExpression->expressionType == LITERAL);
if (copyOptionName == "HEADER") {
if (boundCopyOptionExpression->dataType.getLogicalTypeID() != LogicalTypeID::BOOL) {
throw BinderException(
"The value type of parsing csv option " + copyOptionName + " must be boolean.");

Check warning on line 118 in src/binder/bind/bind_file_scan.cpp

View check run for this annotation

Codecov / codecov/patch

src/binder/bind/bind_file_scan.cpp#L117-L118

Added lines #L117 - L118 were not covered by tests
}
csvReaderConfig->hasHeader =
((LiteralExpression&)(*boundCopyOptionExpression)).value->getValue<bool>();
} else if (boundCopyOptionExpression->dataType.getLogicalTypeID() ==
LogicalTypeID::STRING &&
isValidStringParsingOption) {
if (boundCopyOptionExpression->dataType.getLogicalTypeID() != LogicalTypeID::STRING) {
throw BinderException(
"The value type of parsing csv option " + copyOptionName + " must be string.");
}
auto copyOptionValue =
((LiteralExpression&)(*boundCopyOptionExpression)).value->getValue<std::string>();
bindStringParsingOptions(*csvReaderConfig, copyOptionName, copyOptionValue);
} else {
throw BinderException("Unrecognized parsing csv option: " + copyOptionName + ".");
}
}
return csvReaderConfig;
}

} // namespace binder
} // namespace kuzu
18 changes: 11 additions & 7 deletions src/binder/bind/bind_reading_clause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,25 +3,29 @@
#include "binder/query/reading_clause/bound_in_query_call.h"
#include "binder/query/reading_clause/bound_match_clause.h"
#include "binder/query/reading_clause/bound_unwind_clause.h"
#include "common/exception/binder.h"
#include "parser/query/reading_clause/in_query_call_clause.h"
#include "parser/query/reading_clause/unwind_clause.h"
#include "processor/operator/persistent/reader/csv_reader.h"

using namespace kuzu::common;
using namespace kuzu::parser;
using namespace kuzu::processor;
using namespace kuzu::catalog;

namespace kuzu {
namespace binder {

std::unique_ptr<BoundReadingClause> Binder::bindReadingClause(const ReadingClause& readingClause) {
switch (readingClause.getClauseType()) {
case ClauseType::MATCH: {
return bindMatchClause((MatchClause&)readingClause);
return bindMatchClause(readingClause);
}
case ClauseType::UNWIND: {
return bindUnwindClause((UnwindClause&)readingClause);
return bindUnwindClause(readingClause);
}
case ClauseType::InQueryCall: {
return bindInQueryCall((InQueryCallClause&)readingClause);
case ClauseType::IN_QUERY_CALL: {
return bindInQueryCall(readingClause);
}
default:
throw NotImplementedException("bindReadingClause().");
Expand Down Expand Up @@ -84,11 +88,11 @@ std::unique_ptr<BoundReadingClause> Binder::bindUnwindClause(const ReadingClause
}

std::unique_ptr<BoundReadingClause> Binder::bindInQueryCall(const ReadingClause& readingClause) {
auto& callStatement = reinterpret_cast<const parser::InQueryCallClause&>(readingClause);
auto& call = reinterpret_cast<const InQueryCallClause&>(readingClause);
auto tableFunctionDefinition =
catalog.getBuiltInTableFunction()->mathTableFunction(callStatement.getFuncName());
catalog.getBuiltInTableFunction()->mathTableFunction(call.getFuncName());
auto inputValues = std::vector<Value>{};
for (auto& parameter : callStatement.getParameters()) {
for (auto& parameter : call.getParameters()) {
auto boundExpr = expressionBinder.bindLiteralExpression(*parameter);
inputValues.push_back(*reinterpret_cast<LiteralExpression*>(boundExpr.get())->getValue());
}
Expand Down
5 changes: 5 additions & 0 deletions src/binder/binder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,11 @@ table_id_t Binder::bindNodeTableID(const std::string& tableName) const {
return catalogContent->getTableID(tableName);
}

std::shared_ptr<Expression> Binder::createVariable(
const std::string& name, LogicalTypeID logicalTypeID) {
return createVariable(name, LogicalType{logicalTypeID});
}

std::shared_ptr<Expression> Binder::createVariable(
const std::string& name, const LogicalType& dataType) {
if (scope->contains(name)) {
Expand Down
2 changes: 1 addition & 1 deletion src/binder/bound_statement_visitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ void BoundStatementVisitor::visitReadingClause(const BoundReadingClause& reading
case ClauseType::UNWIND: {
visitUnwind(readingClause);
} break;
case ClauseType::InQueryCall: {
case ClauseType::IN_QUERY_CALL: {
visitInQueryCall(readingClause);
} break;
default:
Expand Down
8 changes: 0 additions & 8 deletions src/binder/copy/CMakeLists.txt

This file was deleted.

22 changes: 0 additions & 22 deletions src/binder/copy/bound_copy_from.cpp

This file was deleted.

28 changes: 20 additions & 8 deletions src/include/binder/binder.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ class BoundSetPropertyInfo;
class BoundDeleteInfo;
class BoundWithClause;
class BoundReturnClause;
class BoundFileScanInfo;

// BinderScope keeps track of expressions in scope and their aliases. We maintain the order of
// expressions in
Expand Down Expand Up @@ -79,6 +80,8 @@ class Binder {

common::table_id_t bindNodeTableID(const std::string& tableName) const;

std::shared_ptr<Expression> createVariable(
const std::string& name, common::LogicalTypeID logicalTypeID);
std::shared_ptr<Expression> createVariable(
const std::string& name, const common::LogicalType& dataType);

Expand Down Expand Up @@ -106,19 +109,28 @@ class Binder {
/*** bind copy ***/
std::unique_ptr<BoundStatement> bindCopyFromClause(const parser::Statement& statement);
std::unique_ptr<BoundStatement> bindCopyNodeFrom(
std::unique_ptr<common::CopyDescription> copyDescription,
catalog::TableSchema* tableSchema);
std::unique_ptr<common::CopyDescription> copyDesc, catalog::TableSchema* tableSchema);
std::unique_ptr<BoundStatement> bindCopyRelFrom(
std::unique_ptr<common::CopyDescription> copyDescription,
catalog::TableSchema* tableSchema);
expression_vector bindCopyNodeColumns(
std::unique_ptr<common::CopyDescription> copyDesc, catalog::TableSchema* tableSchema);
std::unique_ptr<BoundStatement> bindCopyRdfRelFrom(
std::unique_ptr<common::CopyDescription> copyDesc, catalog::TableSchema* tableSchema);
expression_vector bindExpectedNodeFileColumns(
catalog::TableSchema* tableSchema, common::CopyDescription::FileType fileType);
expression_vector bindCopyRelColumns(
// expression_vector bindCopyRelColumns(
// catalog::TableSchema* tableSchema, common::CopyDescription::FileType fileType);

expression_vector bindExpectedRelFileColumns(
catalog::TableSchema* tableSchema, common::CopyDescription::FileType fileType);
std::unique_ptr<BoundStatement> bindCopyToClause(const parser::Statement& statement);

/*** bind file scan ***/
std::unique_ptr<common::CopyDescription> bindCopyDesc(
const std::vector<std::string>& filePaths, const parser::parsing_option_t& parsingOptions);
std::unique_ptr<common::CSVReaderConfig> bindParsingOptions(
const std::unordered_map<std::string, std::unique_ptr<parser::ParsedExpression>>&
parsingOptions);
const parser::parsing_option_t& parsingOptions);
static common::CopyDescription::FileType bindFileType(
const std::vector<std::string>& filePaths);
static common::CopyDescription::FileType bindFileType(const std::string& filePath);

/*** bind query ***/
std::unique_ptr<BoundRegularQuery> bindQuery(const parser::RegularQuery& regularQuery);
Expand Down
Loading