Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

cast string to list #2145

Merged
merged 1 commit into from
Oct 10, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions dataset/load-from-test/bracket_fail.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"(()"
4 changes: 4 additions & 0 deletions dataset/load-from-test/change_config.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
list | str
'(this | is a word | normal not escape | ~' ( ) | , ~~ ~' )'|'try escape ~~'
'(escape | is escape success? ~~)'|' ~' ( ) do not need to escape sepeical | ()'
'(~~ ~' not work also this "~'" )'|'nu'
1 change: 1 addition & 0 deletions dataset/load-from-test/conversion_fail.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"[432412, 34234, 23424]"
1 change: 1 addition & 0 deletions dataset/load-from-test/delim_fail.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"((hello),(bdfadf),)"
1 change: 1 addition & 0 deletions dataset/load-from-test/quote_fail.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
'[23, 432, 234]'
Empty file.
7 changes: 7 additions & 0 deletions dataset/load-from-test/should_pass.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"[[1,3,423,124,43242],[432]]"
" [ [ 1 ,3, 423 , 124,43242 ] , [ 432 ]]"
"[ [ 1 ,3, 423 , 124,43242 ] , [432]] "
"[,[], [1, 2, 3]]"
"[null, NULL, Null, nUll, nuLl, nulL, nuLL, NUll, NuLl , NulL, [1, 2, 3]]"
"[[], [], [] ]"
"[, ,]"
1 change: 1 addition & 0 deletions dataset/load-from-test/single_quote.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"['fdsfdsfe werw]"
1 change: 1 addition & 0 deletions dataset/load-from-test/single_quote2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"[[' this is a signle quote]]"
1 change: 1 addition & 0 deletions dataset/load-from-test/single_struct.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"[{32, 32]"
8 changes: 6 additions & 2 deletions src/binder/bind/bind_graph_pattern.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "catalog/rel_table_schema.h"
#include "common/exception/binder.h"
#include "common/string_utils.h"
#include "function/cast/cast_utils.h"
#include "main/client_context.h"

using namespace kuzu::common;
Expand Down Expand Up @@ -424,10 +425,13 @@ std::shared_ptr<RelExpression> Binder::createRecursiveQueryRel(const parser::Rel
std::pair<uint64_t, uint64_t> Binder::bindVariableLengthRelBound(
const kuzu::parser::RelPattern& relPattern) {
auto recursiveInfo = relPattern.getRecursiveInfo();
auto lowerBound = TypeUtils::convertToUint32(recursiveInfo->lowerBound.c_str());
uint32_t lowerBound;
function::simpleIntegerCast(
recursiveInfo->lowerBound.c_str(), recursiveInfo->lowerBound.length(), lowerBound);
auto upperBound = clientContext->varLengthExtendMaxDepth;
if (!recursiveInfo->upperBound.empty()) {
upperBound = TypeUtils::convertToUint32(recursiveInfo->upperBound.c_str());
function::simpleIntegerCast(
recursiveInfo->upperBound.c_str(), recursiveInfo->upperBound.length(), upperBound);
}
if (lowerBound > upperBound) {
throw BinderException(
Expand Down
10 changes: 0 additions & 10 deletions src/common/type_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,16 +8,6 @@
namespace kuzu {
namespace common {

uint32_t TypeUtils::convertToUint32(const char* data) {
std::istringstream iss(data);
uint32_t val;
if (!(iss >> val)) {
throw ConversionException(
StringUtils::string_format("Failed to convert {} to uint32_t", data));
}
return val;
}

std::string TypeUtils::castValueToString(
const LogicalType& dataType, uint8_t* value, void* vector) {
auto valueVector = reinterpret_cast<ValueVector*>(vector);
Expand Down
1 change: 1 addition & 0 deletions src/function/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ add_library(kuzu_function
built_in_aggregate_functions.cpp
built_in_vector_functions.cpp
built_in_table_functions.cpp
cast_utils.cpp
comparison_functions.cpp
find_function.cpp
scalar_macro_function.cpp
Expand Down
57 changes: 57 additions & 0 deletions src/function/cast_utils.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#include "function/cast/cast_utils.h"

namespace kuzu {
namespace function {

bool tryCastToBool(const char* input, uint64_t len, bool& result) {
common::StringUtils::removeCStringWhiteSpaces(input, len);

switch (len) {
case 1: {
char c = std::tolower(*input);
if (c == 't' || c == '1') {
result = true;
return true;
} else if (c == 'f' || c == '0') {
result = false;
return true;
}
return false;
}
case 4: {
auto t = std::tolower(input[0]);
auto r = std::tolower(input[1]);
auto u = std::tolower(input[2]);
auto e = std::tolower(input[3]);
if (t == 't' && r == 'r' && u == 'u' && e == 'e') {
result = true;
return true;
}
return false;
}
case 5: {
auto f = std::tolower(input[0]);
auto a = std::tolower(input[1]);
auto l = std::tolower(input[2]);
auto s = std::tolower(input[3]);
auto e = std::tolower(input[4]);
if (f == 'f' && a == 'a' && l == 'l' && s == 's' && e == 'e') {
result = false;
return true;
}
return false;
}
default:
return false;
}
}

void castStringToBool(const char* input, uint64_t len, bool& result) {
if (!tryCastToBool(input, len, result)) {
throw common::ConversionException(
"Cast failed. " + std::string{input, len} + " is not in BOOL range.");
}

Check warning on line 53 in src/function/cast_utils.cpp

View check run for this annotation

Codecov / codecov/patch

src/function/cast_utils.cpp#L51-L53

Added lines #L51 - L53 were not covered by tests
}

} // namespace function
} // namespace kuzu
1 change: 0 additions & 1 deletion src/include/common/type_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ namespace common {
class TypeUtils {

public:
static uint32_t convertToUint32(const char* data);
template<typename T>
static inline std::string toString(const T& val, void* valueVector = nullptr) {
static_assert(std::is_same<T, int64_t>::value || std::is_same<T, int32_t>::value ||
Expand Down
2 changes: 1 addition & 1 deletion src/include/function/cast/cast_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

#include <cassert>

#include "cast_utils.h"
#include "common/exception/runtime.h"
#include "common/string_utils.h"
#include "common/type_utils.h"
#include "common/types/blob.h"
#include "common/vector/value_vector.h"
#include "numeric_cast.h"

namespace kuzu {
namespace function {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,55 +6,14 @@
#include "common/string_utils.h"
#include "common/type_utils.h"
#include "common/types/ku_string.h"
#include "common/vector/value_vector.h"
#include "fast_float.h"
#include "numeric_limits.h"

namespace kuzu {
namespace function {

static bool tryCastToBool(const char* input, uint64_t len, bool& result) {
common::StringUtils::removeCStringWhiteSpaces(input, len);

switch (len) {
case 1: {
char c = std::tolower(*input);
if (c == 't' || c == '1') {
result = true;
return true;
} else if (c == 'f' || c == '0') {
result = false;
return true;
}
return false;
}
case 4: {
auto t = std::tolower(input[0]);
auto r = std::tolower(input[1]);
auto u = std::tolower(input[2]);
auto e = std::tolower(input[3]);
if (t == 't' && r == 'r' && u == 'u' && e == 'e') {
result = true;
return true;
}
return false;
}
case 5: {
auto f = std::tolower(input[0]);
auto a = std::tolower(input[1]);
auto l = std::tolower(input[2]);
auto s = std::tolower(input[3]);
auto e = std::tolower(input[4]);
if (f == 'f' && a == 'a' && l == 'l' && s == 's' && e == 'e') {
result = false;
return true;
}
return false;
}
default:
return false;
}
}

// cast string to numerical
template<typename T>
struct IntegerCastData {
using Result = T;
Expand Down Expand Up @@ -86,6 +45,11 @@ struct IntegerCastOperation {
}
};

// cast string to bool
bool tryCastToBool(const char* input, uint64_t len, bool& result);
void castStringToBool(const char* input, uint64_t len, bool& result);

// cast to numerical values
// TODO: support exponent + decimal
template<class T, bool NEGATIVE, bool ALLOW_EXPONENT = false, class OP = IntegerCastOperation>
static bool integerCastLoop(const char* input, uint64_t len, T& result) {
Expand Down Expand Up @@ -190,13 +154,6 @@ static void doubleCast(const char* input, uint64_t len, T& result,
}
}

static void castStringToBool(const char* input, uint64_t len, bool& result) {
if (!tryCastToBool(input, len, result)) {
throw common::ConversionException(
"Cast failed. " + std::string{input, len} + " is not in BOOL range.");
}
}

template<typename T>
static inline T castStringToNum(const char* input, uint64_t len,
const common::LogicalType& type = common::LogicalType{common::LogicalTypeID::ANY}) {
Expand All @@ -213,21 +170,21 @@ inline uint64_t castStringToNum(const char* input, uint64_t len, const common::L
}

template<>
inline uint32_t castStringToNum(const char* input, uint64_t len, const common::LogicalType& type) {
uint32_t castStringToNum(const char* input, uint64_t len, const common::LogicalType& type) {
uint32_t result;
simpleIntegerCast<uint32_t, false>(input, len, result, type);
return result;
}

template<>
inline uint16_t castStringToNum(const char* input, uint64_t len, const common::LogicalType& type) {
uint16_t castStringToNum(const char* input, uint64_t len, const common::LogicalType& type) {
uint16_t result;
simpleIntegerCast<uint16_t, false>(input, len, result, type);
return result;
}

template<>
inline uint8_t castStringToNum(const char* input, uint64_t len, const common::LogicalType& type) {
uint8_t castStringToNum(const char* input, uint64_t len, const common::LogicalType& type) {
uint8_t result;
simpleIntegerCast<uint8_t, false>(input, len, result, type);
return result;
Expand Down
8 changes: 4 additions & 4 deletions src/include/function/cast/vector_cast_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,10 @@ struct CastToBlobVectorFunction : public VectorCastFunction {
static vector_function_definitions getDefinitions();
};

struct CastToBoolVectorFunction : public VectorCastFunction {
static vector_function_definitions getDefinitions();
};

struct CastToDoubleVectorFunction : public VectorCastFunction {
static vector_function_definitions getDefinitions();
};
Expand Down Expand Up @@ -169,9 +173,5 @@ struct CastToUInt8VectorFunction : public VectorCastFunction {
static vector_function_definitions getDefinitions();
};

struct CastToBoolVectorFunction : public VectorCastFunction {
static vector_function_definitions getDefinitions();
};

} // namespace function
} // namespace kuzu
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#pragma once

#include "common/types/types.h"
#include "function/cast/numeric_cast.h"
#include "function/cast/cast_utils.h"
#include "storage/storage_structure/in_mem_file.h"
#include "storage/store/table_copy_utils.h"
#include <arrow/array/array_base.h>
Expand Down
2 changes: 1 addition & 1 deletion src/include/storage/store/column_chunk.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#include "common/types/types.h"
#include "common/vector/value_vector.h"
#include "compression.h"
#include "function/cast/numeric_cast.h"
#include "function/cast/cast_utils.h"
#include "storage/buffer_manager/bm_file_handle.h"
#include "storage/wal/wal.h"
#include "transaction/transaction.h"
Expand Down
2 changes: 1 addition & 1 deletion src/parser/transform/transform_expression.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#include "common/string_utils.h"
#include "function/cast/numeric_cast.h"
#include "function/cast/cast_utils.h"
#include "parser/expression/parsed_case_expression.h"
#include "parser/expression/parsed_function_expression.h"
#include "parser/expression/parsed_literal_expression.h"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,8 @@ add_row : {
// escaped quote, return to quoted state and store escape position
escapePositions.push_back(position - start);
goto in_quotes;
} else if (buffer[position] == csvReaderConfig.delimiter) {
} else if (buffer[position] == csvReaderConfig.delimiter ||
buffer[position] == csvReaderConfig.listEndChar) {
// delimiter, add value
goto add_value;
} else if (isNewLine(buffer[position])) {
Expand Down
Loading
Loading