Skip to content

Commit

Permalink
Merge pull request #1846 from kuzudb/parsed-expr-serialize
Browse files Browse the repository at this point in the history
Add ser/deser for macro function
  • Loading branch information
acquamarin committed Jul 23, 2023
2 parents 1988fc7 + eab2e25 commit d6abf42
Show file tree
Hide file tree
Showing 34 changed files with 782 additions and 307 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.11)

project(Kuzu VERSION 0.0.6 LANGUAGES CXX)
project(Kuzu VERSION 0.0.6.1 LANGUAGES CXX)

find_package(Threads REQUIRED)

Expand Down
218 changes: 91 additions & 127 deletions src/catalog/catalog.cpp

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion src/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ add_library(kuzu_common
profiler.cpp
type_utils.cpp
utils.cpp
string_utils.cpp)
string_utils.cpp
ser_deser.cpp)

target_link_libraries(kuzu_common Glob)

Expand Down
2 changes: 1 addition & 1 deletion src/common/arrow/arrow_row_batch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ static void setBitToOne(std::uint8_t* data, std::int64_t pos) {

void ArrowRowBatch::appendValue(
ArrowVector* vector, const main::DataTypeInfo& typeInfo, Value* value) {
if (value->isNull_) {
if (value->isNull()) {
copyNullValue(vector, value, vector->numValues);
} else {
copyNonNullValue(vector, typeInfo, value, vector->numValues);
Expand Down
27 changes: 27 additions & 0 deletions src/common/ser_deser.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#include "common/ser_deser.h"

namespace kuzu {
namespace common {

template<>
void SerDeser::serializeValue<std::string>(
const std::string& value, FileInfo* fileInfo, uint64_t& offset) {
uint64_t valueLength = value.length();
FileUtils::writeToFile(fileInfo, (uint8_t*)&valueLength, sizeof(uint64_t), offset);
FileUtils::writeToFile(
fileInfo, (uint8_t*)value.data(), valueLength, offset + sizeof(uint64_t));
offset = offset + sizeof(uint64_t) + valueLength;
}

template<>
void SerDeser::deserializeValue<std::string>(
std::string& value, FileInfo* fileInfo, uint64_t& offset) {
uint64_t valueLength = 0;
deserializeValue<uint64_t>(valueLength, fileInfo, offset);
value.resize(valueLength);
FileUtils::readFromFile(fileInfo, (uint8_t*)value.data(), valueLength, offset);
offset += valueLength;
}

} // namespace common
} // namespace kuzu
72 changes: 33 additions & 39 deletions src/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -599,106 +599,100 @@ std::vector<std::string> LogicalTypeUtils::parseStructFields(const std::string&

// Specialized Ser/Deser functions for logical dataTypes.
template<>
uint64_t SerDeser::serializeValue(
const VarListTypeInfo& value, FileInfo* fileInfo, uint64_t offset) {
return SerDeser::serializeValue(*value.getChildType(), fileInfo, offset);
void SerDeser::serializeValue(const VarListTypeInfo& value, FileInfo* fileInfo, uint64_t& offset) {
SerDeser::serializeValue(*value.getChildType(), fileInfo, offset);
}

template<>
uint64_t SerDeser::deserializeValue(VarListTypeInfo& value, FileInfo* fileInfo, uint64_t offset) {
void SerDeser::deserializeValue(VarListTypeInfo& value, FileInfo* fileInfo, uint64_t& offset) {
value.childType = std::make_unique<LogicalType>();
offset = SerDeser::deserializeValue(*value.getChildType(), fileInfo, offset);
return offset;
deserializeValue(*value.getChildType(), fileInfo, offset);
}

template<>
uint64_t SerDeser::serializeValue(
const FixedListTypeInfo& value, FileInfo* fileInfo, uint64_t offset) {
offset = SerDeser::serializeValue(*value.getChildType(), fileInfo, offset);
return SerDeser::serializeValue(value.getNumElementsInList(), fileInfo, offset);
void SerDeser::serializeValue(
const FixedListTypeInfo& value, FileInfo* fileInfo, uint64_t& offset) {
SerDeser::serializeValue(*value.getChildType(), fileInfo, offset);
SerDeser::serializeValue(value.getNumElementsInList(), fileInfo, offset);
}

template<>
uint64_t SerDeser::deserializeValue(FixedListTypeInfo& value, FileInfo* fileInfo, uint64_t offset) {
void SerDeser::deserializeValue(FixedListTypeInfo& value, FileInfo* fileInfo, uint64_t& offset) {
value.childType = std::make_unique<LogicalType>();
offset = SerDeser::deserializeValue(*value.getChildType(), fileInfo, offset);
offset = SerDeser::deserializeValue(value.fixedNumElementsInList, fileInfo, offset);
return offset;
deserializeValue(*value.getChildType(), fileInfo, offset);
deserializeValue(value.fixedNumElementsInList, fileInfo, offset);
}

template<>
uint64_t SerDeser::serializeValue(
const StructTypeInfo& value, FileInfo* fileInfo, uint64_t offset) {
return serializeVector(value.fields, fileInfo, offset);
void SerDeser::serializeValue(const StructTypeInfo& value, FileInfo* fileInfo, uint64_t& offset) {
serializeVector(value.fields, fileInfo, offset);
}

template<>
uint64_t SerDeser::deserializeValue(StructTypeInfo& value, FileInfo* fileInfo, uint64_t offset) {
return deserializeVector(value.fields, fileInfo, offset);
void SerDeser::deserializeValue(StructTypeInfo& value, FileInfo* fileInfo, uint64_t& offset) {
deserializeVector(value.fields, fileInfo, offset);
}

template<>
uint64_t SerDeser::serializeValue(
const std::unique_ptr<StructField>& value, FileInfo* fileInfo, uint64_t offset) {
offset = serializeValue<std::string>(value->name, fileInfo, offset);
return serializeValue(*value->getType(), fileInfo, offset);
void SerDeser::serializeValue(
const std::unique_ptr<StructField>& value, FileInfo* fileInfo, uint64_t& offset) {
serializeValue(value->name, fileInfo, offset);
serializeValue(*value->getType(), fileInfo, offset);
}

template<>
uint64_t SerDeser::deserializeValue(
std::unique_ptr<StructField>& value, FileInfo* fileInfo, uint64_t offset) {
void SerDeser::deserializeValue(
std::unique_ptr<StructField>& value, FileInfo* fileInfo, uint64_t& offset) {
value = std::make_unique<StructField>();
offset = deserializeValue<std::string>(value->name, fileInfo, offset);
return deserializeValue(*value->type, fileInfo, offset);
deserializeValue<std::string>(value->name, fileInfo, offset);
deserializeValue(*value->type, fileInfo, offset);
}

template<>
uint64_t SerDeser::serializeValue(const LogicalType& value, FileInfo* fileInfo, uint64_t offset) {
offset = SerDeser::serializeValue(value.getLogicalTypeID(), fileInfo, offset);
void SerDeser::serializeValue(const LogicalType& value, FileInfo* fileInfo, uint64_t& offset) {
SerDeser::serializeValue(value.getLogicalTypeID(), fileInfo, offset);
switch (value.getPhysicalType()) {
case PhysicalTypeID::VAR_LIST: {
auto varListTypeInfo = reinterpret_cast<VarListTypeInfo*>(value.extraTypeInfo.get());
offset = serializeValue(*varListTypeInfo, fileInfo, offset);
serializeValue(*varListTypeInfo, fileInfo, offset);
} break;
case PhysicalTypeID::FIXED_LIST: {
auto fixedListTypeInfo = reinterpret_cast<FixedListTypeInfo*>(value.extraTypeInfo.get());
offset = serializeValue(*fixedListTypeInfo, fileInfo, offset);
serializeValue(*fixedListTypeInfo, fileInfo, offset);
} break;
case PhysicalTypeID::STRUCT: {
auto structTypeInfo = reinterpret_cast<StructTypeInfo*>(value.extraTypeInfo.get());
offset = serializeValue(*structTypeInfo, fileInfo, offset);
serializeValue(*structTypeInfo, fileInfo, offset);
} break;
default:
break;
}
return offset;
}

template<>
uint64_t SerDeser::deserializeValue(LogicalType& value, FileInfo* fileInfo, uint64_t offset) {
offset = SerDeser::deserializeValue(value.typeID, fileInfo, offset);
void SerDeser::deserializeValue(LogicalType& value, FileInfo* fileInfo, uint64_t& offset) {
SerDeser::deserializeValue(value.typeID, fileInfo, offset);
value.setPhysicalType();
switch (value.getPhysicalType()) {
case PhysicalTypeID::VAR_LIST: {
value.extraTypeInfo = std::make_unique<VarListTypeInfo>();
offset = deserializeValue(
deserializeValue(
*reinterpret_cast<VarListTypeInfo*>(value.extraTypeInfo.get()), fileInfo, offset);

} break;
case PhysicalTypeID::FIXED_LIST: {
value.extraTypeInfo = std::make_unique<FixedListTypeInfo>();
offset = deserializeValue(
deserializeValue(
*reinterpret_cast<FixedListTypeInfo*>(value.extraTypeInfo.get()), fileInfo, offset);
} break;
case PhysicalTypeID::STRUCT: {
value.extraTypeInfo = std::make_unique<StructTypeInfo>();
offset = deserializeValue(
deserializeValue(
*reinterpret_cast<StructTypeInfo*>(value.extraTypeInfo.get()), fileInfo, offset);
} break;
default:
break;
}
return offset;
}

} // namespace common
Expand Down
110 changes: 91 additions & 19 deletions src/common/types/value.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "common/types/value.h"

#include "common/null_buffer.h"
#include "common/ser_deser.h"
#include "common/string_utils.h"
#include "storage/storage_utils.h"

Expand Down Expand Up @@ -136,14 +137,6 @@ Value::Value(LogicalType dataType, std::vector<std::unique_ptr<Value>> vals)
nestedTypeVal = std::move(vals);
}

Value::Value(std::unique_ptr<NodeVal> val_) : dataType{LogicalTypeID::NODE}, isNull_{false} {
nodeVal = std::move(val_);
}

Value::Value(std::unique_ptr<RelVal> val_) : dataType{LogicalTypeID::REL}, isNull_{false} {
relVal = std::move(val_);
}

Value::Value(LogicalType dataType, const uint8_t* val_)
: dataType{std::move(dataType)}, isNull_{false} {
copyValueFrom(val_);
Expand Down Expand Up @@ -339,7 +332,7 @@ std::string Value::toString() const {
std::string result = "{";
auto fieldNames = StructType::getFieldNames(&dataType);
for (auto i = 0u; i < nestedTypeVal.size(); ++i) {
if (nestedTypeVal[i]->isNull_) {
if (nestedTypeVal[i]->isNull()) {
// Avoid printing null key value pair.
continue;
}
Expand All @@ -355,7 +348,7 @@ std::string Value::toString() const {
std::string result = "(" + nestedTypeVal[0]->toString() + ")-{";
auto fieldNames = StructType::getFieldNames(&dataType);
for (auto i = 2u; i < nestedTypeVal.size(); ++i) {
if (nestedTypeVal[i]->isNull_) {
if (nestedTypeVal[i]->isNull()) {
// Avoid printing null key value pair.
continue;
}
Expand Down Expand Up @@ -470,16 +463,95 @@ std::vector<std::unique_ptr<Value>> Value::convertKUUnionToVector(const uint8_t*
return unionVal;
}

static std::string propertiesToString(
const std::vector<std::pair<std::string, std::unique_ptr<Value>>>& properties) {
std::string result = "{";
for (auto i = 0u; i < properties.size(); ++i) {
auto& [name, value] = properties[i];
result += name + ":" + value->toString();
result += (i == properties.size() - 1 ? "" : ", ");
void Value::serialize(FileInfo* fileInfo, uint64_t& offset) const {
SerDeser::serializeValue(dataType, fileInfo, offset);
SerDeser::serializeValue(isNull_, fileInfo, offset);
switch (dataType.getPhysicalType()) {
case PhysicalTypeID::BOOL: {
SerDeser::serializeValue(val.booleanVal, fileInfo, offset);
} break;
case PhysicalTypeID::INT64: {
SerDeser::serializeValue(val.int64Val, fileInfo, offset);
} break;
case PhysicalTypeID::INT32: {
SerDeser::serializeValue(val.int32Val, fileInfo, offset);
} break;
case PhysicalTypeID::INT16: {
SerDeser::serializeValue(val.int16Val, fileInfo, offset);
} break;
case PhysicalTypeID::DOUBLE: {
SerDeser::serializeValue(val.doubleVal, fileInfo, offset);
} break;
case PhysicalTypeID::FLOAT: {
SerDeser::serializeValue(val.floatVal, fileInfo, offset);
} break;
case PhysicalTypeID::INTERVAL: {
SerDeser::serializeValue(val.intervalVal, fileInfo, offset);
} break;
case PhysicalTypeID::INTERNAL_ID: {
SerDeser::serializeValue(val.internalIDVal, fileInfo, offset);
} break;
case PhysicalTypeID::STRING: {
SerDeser::serializeValue(strVal, fileInfo, offset);
} break;
case PhysicalTypeID::VAR_LIST:
case PhysicalTypeID::FIXED_LIST:
case PhysicalTypeID::STRUCT: {
for (auto& value : nestedTypeVal) {
value->serialize(fileInfo, offset);
}
} break;
default: {
throw NotImplementedException{"Value::serialize"};
}
}
}

std::unique_ptr<Value> Value::deserialize(kuzu::common::FileInfo* fileInfo, uint64_t& offset) {
LogicalType dataType;
SerDeser::deserializeValue(dataType, fileInfo, offset);
bool isNull;
SerDeser::deserializeValue(isNull, fileInfo, offset);
std::unique_ptr<Value> val = std::make_unique<Value>(createDefaultValue(dataType));
switch (dataType.getPhysicalType()) {
case PhysicalTypeID::BOOL: {
SerDeser::deserializeValue(val->val.booleanVal, fileInfo, offset);
} break;
case PhysicalTypeID::INT64: {
SerDeser::deserializeValue(val->val.int64Val, fileInfo, offset);
} break;
case PhysicalTypeID::INT32: {
SerDeser::deserializeValue(val->val.int32Val, fileInfo, offset);
} break;
case PhysicalTypeID::INT16: {
SerDeser::deserializeValue(val->val.int16Val, fileInfo, offset);
} break;
case PhysicalTypeID::DOUBLE: {
SerDeser::deserializeValue(val->val.doubleVal, fileInfo, offset);
} break;
case PhysicalTypeID::FLOAT: {
SerDeser::deserializeValue(val->val.floatVal, fileInfo, offset);
} break;
case PhysicalTypeID::INTERVAL: {
SerDeser::deserializeValue(val->val.intervalVal, fileInfo, offset);
} break;
case PhysicalTypeID::INTERNAL_ID: {
SerDeser::deserializeValue(val->val.internalIDVal, fileInfo, offset);
} break;
case PhysicalTypeID::STRING: {
SerDeser::deserializeValue(val->strVal, fileInfo, offset);
} break;
case PhysicalTypeID::VAR_LIST:
case PhysicalTypeID::FIXED_LIST:
case PhysicalTypeID::STRUCT: {
SerDeser::deserializeVectorOfPtrs(val->nestedTypeVal, fileInfo, offset);
} break;
default: {
throw NotImplementedException{"Value::deserializeValue"};
}
}
result += "}";
return result;
val->setNull(isNull);
return val;
}

std::vector<std::pair<std::string, std::unique_ptr<Value>>> NodeVal::getProperties(
Expand Down
37 changes: 36 additions & 1 deletion src/function/scalar_macro_function.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
#include "function/scalar_macro_function.h"

#include "common/ser_deser.h"

using namespace kuzu::common;
using namespace kuzu::parser;

namespace kuzu {
namespace function {

Expand All @@ -12,13 +17,43 @@ macro_parameter_value_map ScalarMacroFunction::getDefaultParameterVals() const {
}

std::unique_ptr<ScalarMacroFunction> ScalarMacroFunction::copy() const {
parser::default_macro_args defaultArgsCopy;
default_macro_args defaultArgsCopy;
for (auto& defaultArg : defaultArgs) {
defaultArgsCopy.emplace_back(defaultArg.first, defaultArg.second->copy());
}
return std::make_unique<ScalarMacroFunction>(
expression->copy(), positionalArgs, std::move(defaultArgsCopy));
}

void ScalarMacroFunction::serialize(FileInfo* fileInfo, uint64_t& offset) const {
expression->serialize(fileInfo, offset);
SerDeser::serializeVector(positionalArgs, fileInfo, offset);
auto vectorSize = defaultArgs.size();
SerDeser::serializeValue(vectorSize, fileInfo, offset);
for (auto& defaultArg : defaultArgs) {
SerDeser::serializeValue(defaultArg.first, fileInfo, offset);
defaultArg.second->serialize(fileInfo, offset);
}
}

std::unique_ptr<ScalarMacroFunction> ScalarMacroFunction::deserialize(
FileInfo* fileInfo, uint64_t& offset) {
auto expression = ParsedExpression::deserialize(fileInfo, offset);
std::vector<std::string> positionalArgs;
SerDeser::deserializeVector(positionalArgs, fileInfo, offset);
default_macro_args defaultArgs;
uint64_t vectorSize;
SerDeser::deserializeValue<uint64_t>(vectorSize, fileInfo, offset);
defaultArgs.reserve(vectorSize);
for (auto i = 0u; i < vectorSize; i++) {
std::string key;
SerDeser::deserializeValue(key, fileInfo, offset);
auto val = ParsedExpression::deserialize(fileInfo, offset);
defaultArgs.emplace_back(std::move(key), std::move(val));
}
return std::make_unique<ScalarMacroFunction>(
std::move(expression), std::move(positionalArgs), std::move(defaultArgs));
}

} // namespace function
} // namespace kuzu
Loading

0 comments on commit d6abf42

Please sign in to comment.