diff --git a/src/include/storage/store/column_chunk.h b/src/include/storage/store/column_chunk.h index 47d62ce93a..4ca3bf31f9 100644 --- a/src/include/storage/store/column_chunk.h +++ b/src/include/storage/store/column_chunk.h @@ -101,11 +101,6 @@ class ColumnChunk { // Returns the size of the data type in bytes static uint32_t getDataTypeSizeInChunk(common::LogicalType& dataType); - template - void setValueFromString(const char* value, uint64_t length, common::offset_t pos) { - setValue(function::castStringToNum(value, length), pos); - } - static inline common::page_idx_t getNumPagesForBytes(uint64_t numBytes) { return (numBytes + common::BufferPoolConstants::PAGE_4KB_SIZE - 1) / common::BufferPoolConstants::PAGE_4KB_SIZE; @@ -270,24 +265,5 @@ struct ColumnChunkFactory { bool enableCompression, common::CSVReaderConfig* csvReaderConfig = nullptr); }; -// BOOL -template<> -void ColumnChunk::setValueFromString(const char* value, uint64_t length, uint64_t pos); -// FIXED_LIST -template<> -void ColumnChunk::setValueFromString(const char* value, uint64_t length, uint64_t pos); -// INTERVAL -template<> -void ColumnChunk::setValueFromString( - const char* value, uint64_t length, uint64_t pos); -// DATE -template<> -void ColumnChunk::setValueFromString( - const char* value, uint64_t length, uint64_t pos); -// TIMESTAMP -template<> -void ColumnChunk::setValueFromString( - const char* value, uint64_t length, uint64_t pos); - } // namespace storage } // namespace kuzu diff --git a/src/include/storage/store/string_column_chunk.h b/src/include/storage/store/string_column_chunk.h index a29e348d66..e65f66ffb4 100644 --- a/src/include/storage/store/string_column_chunk.h +++ b/src/include/storage/store/string_column_chunk.h @@ -19,10 +19,6 @@ class StringColumnChunk : public ColumnChunk { void update(common::ValueVector* vector, common::vector_idx_t vectorIdx) override; - template - void setValueFromString(const char* value, uint64_t length, uint64_t pos) { - throw common::NotImplementedException("VarSizedColumnChunk::setValueFromString"); - } template T getValue(common::offset_t pos) const { throw common::NotImplementedException("VarSizedColumnChunk::getValue"); @@ -31,7 +27,7 @@ class StringColumnChunk : public ColumnChunk { common::page_idx_t flushOverflowBuffer(BMFileHandle* dataFH, common::page_idx_t startPageIdx); inline InMemOverflowFile* getOverflowFile() { return overflowFile.get(); } - inline common::offset_t getLastOffsetInPage() { return overflowCursor.offsetInPage; } + inline common::offset_t getLastOffsetInPage() const { return overflowCursor.offsetInPage; } private: void appendStringColumnChunk(StringColumnChunk* other, common::offset_t startPosInOtherChunk, @@ -39,20 +35,13 @@ class StringColumnChunk : public ColumnChunk { void write(const common::Value& val, uint64_t posToWrite) override; + void setValueFromString(const char* value, uint64_t length, uint64_t pos); + private: std::unique_ptr overflowFile; PageByteCursor overflowCursor; }; -// BLOB -template<> -void StringColumnChunk::setValueFromString( - const char* value, uint64_t length, uint64_t pos); -// STRING -template<> -void StringColumnChunk::setValueFromString( - const char* value, uint64_t length, uint64_t pos); - // STRING template<> std::string StringColumnChunk::getValue(common::offset_t pos) const; diff --git a/src/include/storage/store/struct_column_chunk.h b/src/include/storage/store/struct_column_chunk.h index 278d7b10c6..8ac4508e1b 100644 --- a/src/include/storage/store/struct_column_chunk.h +++ b/src/include/storage/store/struct_column_chunk.h @@ -16,11 +16,6 @@ class StructColumnChunk : public ColumnChunk { void append(common::ValueVector* vector, common::offset_t startPosInChunk) final; private: - // TODO(Guodong): These methods are duplicated from `InMemStructColumnChunk`, which will be - // merged later. - void setStructFields(const char* value, uint64_t length, uint64_t pos); - void setValueToStructField(common::offset_t pos, const std::string& structFieldValue, - common::struct_field_idx_t structFiledIdx); void write(const common::Value& val, uint64_t posToWrite) final; }; diff --git a/src/include/storage/store/var_list_column_chunk.h b/src/include/storage/store/var_list_column_chunk.h index 9bc286a0f2..5abd9fdb0e 100644 --- a/src/include/storage/store/var_list_column_chunk.h +++ b/src/include/storage/store/var_list_column_chunk.h @@ -38,8 +38,6 @@ class VarListColumnChunk : public ColumnChunk { return varListDataColumnChunk.dataColumnChunk.get(); } - void setValueFromString(const char* value, uint64_t length, uint64_t pos); - void resetToEmpty() final; void append(common::ValueVector* vector, common::offset_t startPosInChunk) final; diff --git a/src/storage/store/column_chunk.cpp b/src/storage/store/column_chunk.cpp index 34034eceab..edb61a5af7 100644 --- a/src/storage/store/column_chunk.cpp +++ b/src/storage/store/column_chunk.cpp @@ -492,45 +492,6 @@ std::unique_ptr ColumnChunkFactory::createColumnChunk( return chunk; } -// Bool -template<> -void ColumnChunk::setValueFromString(const char* value, uint64_t length, uint64_t pos) { - std::istringstream boolStream{std::string(value)}; - bool booleanVal; - boolStream >> std::boolalpha >> booleanVal; - setValue(booleanVal, pos); -} - -// Fixed list -template<> -void ColumnChunk::setValueFromString(const char* value, uint64_t length, uint64_t pos) { - auto fixedListVal = - TableCopyUtils::getArrowFixedList(value, 1, length - 2, dataType, *csvReaderConfig); - memcpy(buffer.get() + pos * numBytesPerValue, fixedListVal.get(), numBytesPerValue); -} - -// Interval -template<> -void ColumnChunk::setValueFromString(const char* value, uint64_t length, uint64_t pos) { - auto val = Interval::fromCString(value, length); - setValue(val, pos); -} - -// Date -template<> -void ColumnChunk::setValueFromString(const char* value, uint64_t length, uint64_t pos) { - auto val = Date::fromCString(value, length); - setValue(val, pos); -} - -// Timestamp -template<> -void ColumnChunk::setValueFromString( - const char* value, uint64_t length, uint64_t pos) { - auto val = Timestamp::fromCString(value, length); - setValue(val, pos); -} - offset_t ColumnChunk::getOffsetInBuffer(offset_t pos) const { auto numElementsInAPage = PageUtils::getNumElementsInAPage(numBytesPerValue, false /* hasNull */); diff --git a/src/storage/store/string_column_chunk.cpp b/src/storage/store/string_column_chunk.cpp index b40f88dbab..63c18e2f79 100644 --- a/src/storage/store/string_column_chunk.cpp +++ b/src/storage/store/string_column_chunk.cpp @@ -64,7 +64,7 @@ void StringColumnChunk::update(ValueVector* vector, vector_idx_t vectorIdx) { nullChunk->setNull(offsetInChunk, vector->isNull(pos)); if (!vector->isNull(pos)) { auto kuStr = vector->getValue(pos); - setValueFromString(kuStr.getAsString().c_str(), kuStr.len, offsetInChunk); + setValueFromString(kuStr.getAsString().c_str(), kuStr.len, offsetInChunk); } } } @@ -106,27 +106,10 @@ void StringColumnChunk::write(const Value& val, uint64_t posToWrite) { return; } auto strVal = val.getValue(); - setValueFromString(strVal.c_str(), strVal.length(), posToWrite); + setValueFromString(strVal.c_str(), strVal.length(), posToWrite); } -// BLOB -template<> -void StringColumnChunk::setValueFromString( - const char* value, uint64_t length, uint64_t pos) { - if (length > BufferPoolConstants::PAGE_4KB_SIZE) { - throw CopyException( - ExceptionMessage::overLargeStringValueException(std::to_string(length))); - } - auto blobBuffer = std::make_unique(length); - auto blobLen = Blob::fromString(value, length, blobBuffer.get()); - auto val = overflowFile->copyString((char*)blobBuffer.get(), blobLen, overflowCursor); - setValue(val, pos); -} - -// STRING -template<> -void StringColumnChunk::setValueFromString( - const char* value, uint64_t length, uint64_t pos) { +void StringColumnChunk::setValueFromString(const char* value, uint64_t length, uint64_t pos) { if (length > BufferPoolConstants::PAGE_4KB_SIZE) { throw CopyException( ExceptionMessage::overLargeStringValueException(std::to_string(length))); diff --git a/src/storage/store/struct_column_chunk.cpp b/src/storage/store/struct_column_chunk.cpp index e51e2e6702..06f4f44046 100644 --- a/src/storage/store/struct_column_chunk.cpp +++ b/src/storage/store/struct_column_chunk.cpp @@ -49,123 +49,6 @@ void StructColumnChunk::append(common::ValueVector* vector, common::offset_t sta numValues += vector->state->selVector->selectedSize; } -void StructColumnChunk::setStructFields(const char* value, uint64_t length, uint64_t pos) { - // Removes the leading and the trailing brackets. - switch (dataType.getLogicalTypeID()) { - case LogicalTypeID::STRUCT: { - auto structString = std::string(value, length).substr(1, length - 2); - auto structFieldIdxAndValuePairs = - TableCopyUtils::parseStructFieldNameAndValues(dataType, structString, *csvReaderConfig); - for (auto& fieldIdxAndValue : structFieldIdxAndValuePairs) { - setValueToStructField(pos, fieldIdxAndValue.fieldValue, fieldIdxAndValue.fieldIdx); - } - } break; - case LogicalTypeID::UNION: { - union_field_idx_t selectedFieldIdx = INVALID_STRUCT_FIELD_IDX; - for (auto i = 0u; i < UnionType::getNumFields(&dataType); i++) { - auto internalFieldIdx = UnionType::getInternalFieldIdx(i); - if (TableCopyUtils::tryCast(*UnionType::getFieldType(&dataType, i), value, length)) { - childrenChunks[internalFieldIdx]->getNullChunk()->setNull(pos, false /* isNull */); - setValueToStructField(pos, std::string(value, length), internalFieldIdx); - selectedFieldIdx = i; - break; - } else { - childrenChunks[internalFieldIdx]->getNullChunk()->setNull(pos, true /* isNull */); - } - } - if (selectedFieldIdx == INVALID_STRUCT_FIELD_IDX) { - throw ParserException{StringUtils::string_format( - "No parsing rule matches value: {}.", std::string(value, length))}; - } - childrenChunks[UnionType::TAG_FIELD_IDX]->setValue(selectedFieldIdx, pos); - childrenChunks[UnionType::TAG_FIELD_IDX]->getNullChunk()->setNull(pos, false /* isNull */); - } break; - default: { - throw NotImplementedException("StructColumnChunk::setStructFields"); - } - } -} - -void StructColumnChunk::setValueToStructField( - offset_t pos, const std::string& structFieldValue, struct_field_idx_t structFiledIdx) { - auto fieldChunk = childrenChunks[structFiledIdx].get(); - switch (fieldChunk->getDataType().getLogicalTypeID()) { - case LogicalTypeID::INT64: { - fieldChunk->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::INT32: { - fieldChunk->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::INT16: { - fieldChunk->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::INT8: { - fieldChunk->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::UINT64: { - fieldChunk->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::UINT32: { - fieldChunk->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::UINT16: { - fieldChunk->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::UINT8: { - fieldChunk->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::DOUBLE: { - fieldChunk->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::FLOAT: { - fieldChunk->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::BOOL: { - fieldChunk->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::DATE: { - fieldChunk->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::TIMESTAMP: { - fieldChunk->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::INTERVAL: { - fieldChunk->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::STRING: { - reinterpret_cast(fieldChunk) - ->setValueFromString( - structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::VAR_LIST: { - reinterpret_cast(fieldChunk) - ->setValueFromString(structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - case LogicalTypeID::STRUCT: { - reinterpret_cast(fieldChunk) - ->setStructFields(structFieldValue.c_str(), structFieldValue.length(), pos); - } break; - default: { - throw NotImplementedException{StringUtils::string_format( - "Unsupported data type: {}.", LogicalTypeUtils::dataTypeToString(dataType))}; - } - } -} - void StructColumnChunk::write(const Value& val, uint64_t posToWrite) { assert(val.getDataType()->getPhysicalType() == PhysicalTypeID::STRUCT); auto numElements = NestedVal::getChildrenSize(&val); diff --git a/src/storage/store/var_list_column_chunk.cpp b/src/storage/store/var_list_column_chunk.cpp index 040eacbd32..9f2e468146 100644 --- a/src/storage/store/var_list_column_chunk.cpp +++ b/src/storage/store/var_list_column_chunk.cpp @@ -65,12 +65,6 @@ void VarListColumnChunk::write(const Value& listVal, uint64_t posToWrite) { setValue(varListDataColumnChunk.getNumValues(), posToWrite); } -void VarListColumnChunk::setValueFromString(const char* value, uint64_t length, uint64_t pos) { - auto listVal = - TableCopyUtils::getVarListValue(value, 1, length - 2, dataType, *csvReaderConfig); - write(*listVal, pos); -} - void VarListColumnChunk::resetToEmpty() { ColumnChunk::resetToEmpty(); varListDataColumnChunk.reset();