Skip to content

Commit

Permalink
Merge pull request #2227 from kuzudb/cast
Browse files Browse the repository at this point in the history
replace cast string to union function in driver.cpp
  • Loading branch information
AEsir777 committed Oct 18, 2023
2 parents d81c628 + dba1a3b commit 76074a5
Show file tree
Hide file tree
Showing 34 changed files with 174 additions and 119 deletions.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
9 changes: 9 additions & 0 deletions dataset/load-from-test/union/union_correct.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
"false","","255","18446744073709551615",fsdfa
" true ","432","0","-1.43241543","543fasf"
" 34234 ","4294967295","65535",-128,432
" -42342345 ","-1","-1","-129",fasf
" T ","2022-06-06","4324.123","-32768",
"null","2019-03-19","-12.3432","32768",""
"","-2147483648","1970-01-01 00:00:00.004666-10","-32769",fsdxcv
"0","0","2014-05-12 12:11:59",4324254534123134324321.4343252435,"fsaf"
" F","-4325"," Null ",18446744073709551616," dfsa"
1 change: 1 addition & 0 deletions dataset/load-from-test/union/union_error.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
fdsaf
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
2 changes: 0 additions & 2 deletions src/include/storage/store/table_copy_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,6 @@ class TableCopyUtils {

static std::shared_ptr<arrow::DataType> toArrowDataType(const common::LogicalType& dataType);

static bool tryCast(const common::LogicalType& targetType, const char* value, uint64_t length);

static std::vector<StructFieldIdxAndValue> parseStructFieldNameAndValues(
common::LogicalType& type, std::string_view structString,
const common::CSVReaderConfig& csvReaderConfig);
Expand Down
143 changes: 119 additions & 24 deletions src/processor/operator/persistent/reader/csv/driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -465,6 +465,124 @@ static void castStringToStruct(const char* input, uint64_t len, ValueVector* vec
}
}

template<typename T>
static inline void testAndSetValue(ValueVector* vector, uint64_t rowToAdd, T result, bool success) {
if (success) {
vector->setValue(rowToAdd, result);
}
}

static bool tryCastUnionField(
ValueVector* vector, uint64_t rowToAdd, const char* input, uint64_t len) {
auto& targetType = vector->dataType;
bool success = false;
switch (targetType.getLogicalTypeID()) {
case LogicalTypeID::BOOL: {
bool result;
success = function::tryCastToBool(input, len, result);
testAndSetValue(vector, rowToAdd, result, success);
} break;
case LogicalTypeID::INT64: {
int64_t result;
success = function::trySimpleIntegerCast(input, len, result);
testAndSetValue(vector, rowToAdd, result, success);
} break;
case LogicalTypeID::INT32: {
int32_t result;
success = function::trySimpleIntegerCast(input, len, result);
testAndSetValue(vector, rowToAdd, result, success);
} break;
case LogicalTypeID::INT16: {
int16_t result;
success = function::trySimpleIntegerCast(input, len, result);
testAndSetValue(vector, rowToAdd, result, success);
} break;
case LogicalTypeID::INT8: {
int8_t result;
success = function::trySimpleIntegerCast(input, len, result);
testAndSetValue(vector, rowToAdd, result, success);
} break;
case LogicalTypeID::UINT64: {
uint64_t result;
success = function::trySimpleIntegerCast<uint64_t, false>(input, len, result);
testAndSetValue(vector, rowToAdd, result, success);
} break;
case LogicalTypeID::UINT32: {
uint32_t result;
success = function::trySimpleIntegerCast<uint32_t, false>(input, len, result);
testAndSetValue(vector, rowToAdd, result, success);
} break;
case LogicalTypeID::UINT16: {
uint16_t result;
success = function::trySimpleIntegerCast<uint16_t, false>(input, len, result);
testAndSetValue(vector, rowToAdd, result, success);
} break;
case LogicalTypeID::UINT8: {
uint8_t result;
success = function::trySimpleIntegerCast<uint8_t, false>(input, len, result);
testAndSetValue(vector, rowToAdd, result, success);
} break;
case LogicalTypeID::DOUBLE: {
double_t result;
success = function::tryDoubleCast(input, len, result);
testAndSetValue(vector, rowToAdd, result, success);
} break;
case LogicalTypeID::FLOAT: {
float_t result;
success = function::tryDoubleCast(input, len, result);
testAndSetValue(vector, rowToAdd, result, success);
} break;
case LogicalTypeID::DATE: {
date_t result;
uint64_t pos;
success = Date::tryConvertDate(input, len, pos, result);
testAndSetValue(vector, rowToAdd, result, success);
} break;
case LogicalTypeID::TIMESTAMP: {
timestamp_t result;
success = Timestamp::tryConvertTimestamp(input, len, result);
testAndSetValue(vector, rowToAdd, result, success);
} break;
case LogicalTypeID::STRING: {
if (!utf8proc::Utf8Proc::isValid(input, len)) {
throw common::CopyException{"Invalid UTF8-encoded string."};
}
StringVector::addString(vector, rowToAdd, input, len);
return true;
} break;
default: {
return false;
}
}
return success;
}

static void castStringToUnion(ValueVector* vector, std::string_view strVal, uint64_t rowToAdd) {
auto& type = vector->dataType;
union_field_idx_t selectedFieldIdx = INVALID_STRUCT_FIELD_IDX;

for (auto i = 0u; i < UnionType::getNumFields(&type); i++) {
auto internalFieldIdx = UnionType::getInternalFieldIdx(i);
auto fieldVector = StructVector::getFieldVector(vector, internalFieldIdx).get();
if (tryCastUnionField(fieldVector, rowToAdd, strVal.data(), strVal.length())) {
fieldVector->setNull(rowToAdd, false /* isNull */);
selectedFieldIdx = i;
break;
} else {
fieldVector->setNull(rowToAdd, true /* isNull */);
}
}

if (selectedFieldIdx == INVALID_STRUCT_FIELD_IDX) {
throw ConversionException{stringFormat("Could not convert to union type {}: {}.",
LogicalTypeUtils::dataTypeToString(type), strVal)};
}
StructVector::getFieldVector(vector, UnionType::TAG_FIELD_IDX)
->setValue(rowToAdd, selectedFieldIdx);
StructVector::getFieldVector(vector, UnionType::TAG_FIELD_IDX)
->setNull(rowToAdd, false /* isNull */);
}

void copyStringToVector(ValueVector* vector, uint64_t rowToAdd, std::string_view strVal,
const CSVReaderConfig& csvReaderConfig) {
auto& type = vector->dataType;
Expand Down Expand Up @@ -569,30 +687,7 @@ void copyStringToVector(ValueVector* vector, uint64_t rowToAdd, std::string_view
castStringToStruct(strVal.data(), strVal.length(), vector, rowToAdd, csvReaderConfig);
} break;
case LogicalTypeID::UNION: {
union_field_idx_t selectedFieldIdx = INVALID_STRUCT_FIELD_IDX;
for (auto i = 0u; i < UnionType::getNumFields(&type); i++) {
auto internalFieldIdx = UnionType::getInternalFieldIdx(i);
if (storage::TableCopyUtils::tryCast(
*UnionType::getFieldType(&type, i), strVal.data(), strVal.length())) {
StructVector::getFieldVector(vector, internalFieldIdx)
->setNull(rowToAdd, false /* isNull */);
copyStringToVector(StructVector::getFieldVector(vector, internalFieldIdx).get(),
rowToAdd, strVal, csvReaderConfig);
selectedFieldIdx = i;
break;
} else {
StructVector::getFieldVector(vector, internalFieldIdx)
->setNull(rowToAdd, true /* isNull */);
}
}
if (selectedFieldIdx == INVALID_STRUCT_FIELD_IDX) {
throw ConversionException{stringFormat("Could not convert to union type {}: {}.",
LogicalTypeUtils::dataTypeToString(type), strVal)};
}
StructVector::getFieldVector(vector, UnionType::TAG_FIELD_IDX)
->setValue(rowToAdd, selectedFieldIdx);
StructVector::getFieldVector(vector, UnionType::TAG_FIELD_IDX)
->setNull(rowToAdd, false /* isNull */);
castStringToUnion(vector, strVal, rowToAdd);
} break;
default: { // LCOV_EXCL_START
throw NotImplementedException("BaseCSVReader::copyStringToVector");
Expand Down
65 changes: 0 additions & 65 deletions src/storage/store/table_copy_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -306,71 +306,6 @@ std::shared_ptr<arrow::DataType> TableCopyUtils::toArrowDataType(const LogicalTy
}
}

bool TableCopyUtils::tryCast(
const common::LogicalType& targetType, const char* value, uint64_t length) {
switch (targetType.getLogicalTypeID()) {
case LogicalTypeID::BOOL: {
bool result;
return function::tryCastToBool(value, length, result);
}
case LogicalTypeID::INT64: {
int64_t result;
return function::trySimpleIntegerCast(value, length, result);
}
case LogicalTypeID::INT32: {
int32_t result;
return function::trySimpleIntegerCast(value, length, result);
}
case LogicalTypeID::INT16: {
int16_t result;
return function::trySimpleIntegerCast(value, length, result);
}
case LogicalTypeID::INT8: {
int8_t result;
return function::trySimpleIntegerCast(value, length, result);
}
case LogicalTypeID::UINT64: {
uint64_t result;
return function::trySimpleIntegerCast<uint64_t, false>(value, length, result);
}
case LogicalTypeID::UINT32: {
uint32_t result;
return function::trySimpleIntegerCast<uint32_t, false>(value, length, result);
}
case LogicalTypeID::UINT16: {
uint16_t result;
return function::trySimpleIntegerCast<uint16_t, false>(value, length, result);
}
case LogicalTypeID::UINT8: {
uint8_t result;
return function::trySimpleIntegerCast<uint8_t, false>(value, length, result);
}
case LogicalTypeID::DOUBLE: {
double_t result;
return function::tryDoubleCast(value, length, result);
}
case LogicalTypeID::FLOAT: {
float_t result;
return function::tryDoubleCast(value, length, result);
}
case LogicalTypeID::DATE: {
date_t result;
uint64_t pos;
return Date::tryConvertDate(value, length, pos, result);
}
case LogicalTypeID::TIMESTAMP: {
timestamp_t result;
return Timestamp::tryConvertTimestamp(value, length, result);
}
case LogicalTypeID::STRING: {
return true;
}
default: {
return false;
}
}
}

std::vector<StructFieldIdxAndValue> TableCopyUtils::parseStructFieldNameAndValues(
LogicalType& type, std::string_view structString, const CSVReaderConfig& csvReaderConfig) {
std::vector<StructFieldIdxAndValue> structFieldIdxAndValueParis;
Expand Down
Loading

0 comments on commit 76074a5

Please sign in to comment.