Skip to content

Commit

Permalink
Implement blob dataType
Browse files Browse the repository at this point in the history
  • Loading branch information
acquamarin committed Jun 18, 2023
1 parent c6ca558 commit 8b3b793
Show file tree
Hide file tree
Showing 36 changed files with 2,658 additions and 2,352 deletions.
12 changes: 8 additions & 4 deletions src/binder/bind_expression/bind_function_expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,12 @@ static std::vector<std::unique_ptr<Value>> populateLabelValues(
labels.resize(maxTableID + 1);
for (auto i = 0; i < labels.size(); ++i) {
if (tableIDsSet.contains(i)) {
labels[i] = std::make_unique<Value>(catalogContent.getTableName(i));
labels[i] = std::make_unique<Value>(
LogicalType{LogicalTypeID::STRING}, catalogContent.getTableName(i));
} else {
// TODO(Xiyang/Guodong): change to null literal once we support null in LIST type.
labels[i] = std::make_unique<Value>(std::string(""));
labels[i] =
std::make_unique<Value>(LogicalType{LogicalTypeID::STRING}, std::string(""));
}
}
return labels;
Expand All @@ -201,7 +203,8 @@ std::shared_ptr<Expression> ExpressionBinder::bindLabelFunction(const Expression
auto& node = (NodeExpression&)expression;
if (!node.isMultiLabeled()) {
auto labelName = catalogContent->getTableName(node.getSingleTableID());
return createLiteralExpression(std::make_unique<Value>(labelName));
return createLiteralExpression(
std::make_unique<Value>(LogicalType{LogicalTypeID::STRING}, labelName));
}
auto nodeTableIDs = catalogContent->getNodeTableIDs();
children.push_back(node.getInternalIDProperty());
Expand All @@ -213,7 +216,8 @@ std::shared_ptr<Expression> ExpressionBinder::bindLabelFunction(const Expression
auto& rel = (RelExpression&)expression;
if (!rel.isMultiLabeled()) {
auto labelName = catalogContent->getTableName(rel.getSingleTableID());
return createLiteralExpression(std::make_unique<Value>(labelName));
return createLiteralExpression(
std::make_unique<Value>(LogicalType{LogicalTypeID::STRING}, labelName));
}
auto relTableIDs = catalogContent->getRelTableIDs();
children.push_back(rel.getInternalIDProperty());
Expand Down
3 changes: 2 additions & 1 deletion src/binder/bind_expression/bind_property_expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ std::shared_ptr<Expression> ExpressionBinder::bindPropertyExpression(
return bindRelPropertyExpression(*child, propertyName);
} else {
assert(LogicalTypeID::STRUCT == childTypeID);
auto stringValue = std::make_unique<Value>(propertyName);
auto stringValue =
std::make_unique<Value>(LogicalType{LogicalTypeID::STRING}, propertyName);
return bindScalarFunctionExpression(
expression_vector{child, createLiteralExpression(std::move(stringValue))},
STRUCT_EXTRACT_FUNC_NAME);
Expand Down
3 changes: 2 additions & 1 deletion src/binder/bound_statement_result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ namespace binder {
std::unique_ptr<BoundStatementResult> BoundStatementResult::createSingleStringColumnResult() {
auto result = std::make_unique<BoundStatementResult>();
auto columnName = std::string("result");
auto value = std::make_unique<common::Value>(columnName);
auto value = std::make_unique<common::Value>(
common::LogicalType{common::LogicalTypeID::STRING}, columnName);
auto stringColumn = std::make_shared<LiteralExpression>(std::move(value), columnName);
result->addColumn(stringColumn, expression_vector{stringColumn});
return result;
Expand Down
4 changes: 2 additions & 2 deletions src/c_api/prepared_statement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ void kuzu_prepared_statement_bind_interval(

void kuzu_prepared_statement_bind_string(
kuzu_prepared_statement* prepared_statement, const char* param_name, const char* value) {
auto string_value = std::string(value);
auto value_ptr = std::make_shared<Value>(string_value);
auto value_ptr =
std::make_shared<Value>(LogicalType{LogicalTypeID::STRING}, std::string(value));
kuzu_prepared_statement_bind_cpp_value(prepared_statement, param_name, value_ptr);
}

Expand Down
3 changes: 2 additions & 1 deletion src/c_api/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,8 @@ kuzu_rel_val* kuzu_rel_val_create(
kuzu_internal_id_t src_id, kuzu_internal_id_t dst_id, const char* label) {
auto src_id_val = std::make_unique<Value>(internalID_t(src_id.offset, src_id.table_id));
auto dst_id_val = std::make_unique<Value>(internalID_t(dst_id.offset, dst_id.table_id));
auto label_val = std::make_unique<Value>(std::string(label));
auto label_val =
std::make_unique<Value>(LogicalType{LogicalTypeID::STRING}, std::string(label));
auto* c_rel_val = (kuzu_rel_val*)calloc(1, sizeof(kuzu_rel_val));
c_rel_val->_rel_val =
new RelVal(std::move(src_id_val), std::move(dst_id_val), std::move(label_val));
Expand Down
3 changes: 2 additions & 1 deletion src/common/types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ add_library(kuzu_common_types
ku_string.cpp
value.cpp
timestamp_t.cpp
types.cpp)
types.cpp
blob.cpp)

set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:kuzu_common_types>
Expand Down
106 changes: 106 additions & 0 deletions src/common/types/blob.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#include "common/types/blob.h"

#include "common/exception.h"
#include "common/string_utils.h"

namespace kuzu {
namespace common {

const int HexFormatConstants::HEX_MAP[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1,
-1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1};

static bool isRegularChar(char c) {
return c >= 32 && c <= 126 && c != '\\' && c != '\'' && c != '"';
}

uint64_t Blob::getBlobSize(const ku_string_t& blob) {
uint64_t blobSize = 0;
auto length = blob.len;
auto blobStr = blob.getData();
for (auto i = 0; i < length; i++) {
if (blobStr[i] == '\\') {
validateHexCode(blobStr, length, i);
blobSize++;
i += HexFormatConstants::LENGTH - 1;
} else if (blobStr[i] <= 127) {
blobSize++;
} else {
throw ConversionException(StringUtils::string_format(
"Invalid byte encountered in STRING -> BLOB conversion. All non-ascii characters "
"must be escaped with hex codes (e.g. \\xAA)"));
}
}
return blobSize;
}

void Blob::fromString(ku_string_t& str, uint8_t* resultBuffer) {
auto resultPos = 0u;
auto blobData = str.getData();
for (auto i = 0u; i < str.len; i++) {
if (blobData[i] == '\\') {
validateHexCode(blobData, str.len, i);
auto firstByte =
HexFormatConstants::HEX_MAP[blobData[i + HexFormatConstants::FIRST_BYTE_POS]];
auto secondByte =
HexFormatConstants::HEX_MAP[blobData[i + HexFormatConstants::SECOND_BYTES_POS]];
resultBuffer[resultPos++] =
(firstByte << HexFormatConstants::NUM_BYTES_TO_SHIFT_FOR_FIRST_BYTE) + secondByte;
i += HexFormatConstants::LENGTH - 1;
} else if (blobData[i] <= 127) {
resultBuffer[resultPos++] = blobData[i];
} else {
throw ConversionException(
"Invalid byte encountered in STRING -> BLOB conversion. All non-ascii characters "
"must be escaped with hex codes (e.g. \\xAA)");
}
}
}

std::string Blob::toString(blob_t& blob) {
std::string result;
auto blobData = (uint8_t*)blob.value.getData();
for (auto i = 0u; i < blob.value.len; i++) {
if (isRegularChar(blobData[i])) {
// ascii characters are rendered as-is.
result += blobData[i];
} else {
auto firstByte = blobData[i] >> HexFormatConstants::NUM_BYTES_TO_SHIFT_FOR_FIRST_BYTE;
auto secondByte = blobData[i] & HexFormatConstants::SECOND_BYTE_MASK;
// non-ascii characters are rendered as hexadecimal (e.g. \x00).
result += '\\';
result += 'x';
result += HexFormatConstants::HEX_TABLE[firstByte];
result += HexFormatConstants::HEX_TABLE[secondByte];
}
}
return result;
}

void Blob::validateHexCode(const uint8_t* blobStr, uint64_t length, uint64_t curPos) {
if (curPos + HexFormatConstants::LENGTH > length) {
throw ConversionException(
"Invalid hex escape code encountered in string -> blob conversion: "
"unterminated escape code at end of blob");
}
if (memcmp(blobStr + curPos, HexFormatConstants::PREFIX, HexFormatConstants::PREFIX_LENGTH) !=
0 ||
HexFormatConstants::HEX_MAP[blobStr[curPos + HexFormatConstants::FIRST_BYTE_POS]] < 0 ||
HexFormatConstants::HEX_MAP[blobStr[curPos + HexFormatConstants::SECOND_BYTES_POS]] < 0) {
throw ConversionException(StringUtils::string_format(
"Invalid hex escape code encountered in string -> blob conversion: {}",
std::string((char*)blobStr + curPos, HexFormatConstants::LENGTH)));
}
}

} // namespace common
} // namespace kuzu
4 changes: 4 additions & 0 deletions src/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ void LogicalType::setPhysicalType() {
case LogicalTypeID::INTERNAL_ID: {
physicalType = PhysicalTypeID::INTERNAL_ID;
} break;
case LogicalTypeID::BLOB:
case LogicalTypeID::STRING: {
physicalType = PhysicalTypeID::STRING;
} break;
Expand Down Expand Up @@ -405,6 +406,7 @@ std::string LogicalTypeUtils::dataTypeToString(const LogicalType& dataType) {
case LogicalTypeID::INTERVAL:
case LogicalTypeID::STRING:
case LogicalTypeID::SERIAL:
case LogicalTypeID::BLOB:
return dataTypeToString(dataType.typeID);
default:
throw NotImplementedException("LogicalTypeUtils::dataTypeToString.");
Expand Down Expand Up @@ -455,6 +457,8 @@ std::string LogicalTypeUtils::dataTypeToString(LogicalTypeID dataTypeID) {
return "MAP";
case LogicalTypeID::UNION:
return "UNION";
case LogicalTypeID::BLOB:
return "BLOB";
default:
throw NotImplementedException("LogicalTypeUtils::dataTypeToString.");
}
Expand Down
12 changes: 10 additions & 2 deletions src/common/types/value.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "common/types/value.h"

#include "common/null_buffer.h"
#include "common/types/blob.h"
#include "storage/storage_utils.h"

namespace kuzu {
Expand Down Expand Up @@ -60,8 +61,10 @@ Value Value::createDefaultValue(const LogicalType& dataType) {
return Value(interval_t());
case LogicalTypeID::INTERNAL_ID:
return Value(nodeID_t());
case LogicalTypeID::BLOB:
return Value(LogicalType{LogicalTypeID::BLOB}, std::string(""));
case LogicalTypeID::STRING:
return Value(std::string(""));
return Value(LogicalType{LogicalTypeID::STRING}, std::string(""));
case LogicalTypeID::FLOAT:
return Value((float_t)0);
case LogicalTypeID::RECURSIVE_REL:
Expand Down Expand Up @@ -121,7 +124,8 @@ Value::Value(const char* val_) : dataType{LogicalTypeID::STRING}, isNull_{false}
strVal = std::string(val_);
}

Value::Value(const std::string& val_) : dataType{LogicalTypeID::STRING}, isNull_{false} {
Value::Value(LogicalType type, const std::string& val_)
: dataType{std::move(type)}, isNull_{false} {
strVal = val_;
}

Expand Down Expand Up @@ -179,6 +183,9 @@ void Value::copyValueFrom(const uint8_t* value) {
case LogicalTypeID::STRING: {
strVal = ((ku_string_t*)value)->getAsString();
} break;
case LogicalTypeID::BLOB: {
strVal = Blob::toString(*(blob_t*)value);
} break;
case LogicalTypeID::MAP:
case LogicalTypeID::VAR_LIST: {
nestedTypeVal =
Expand Down Expand Up @@ -291,6 +298,7 @@ std::string Value::toString() const {
return TypeUtils::toString(val.intervalVal);
case LogicalTypeID::INTERNAL_ID:
return TypeUtils::toString(val.internalIDVal);
case LogicalTypeID::BLOB:
case LogicalTypeID::STRING:
return strVal;
case LogicalTypeID::MAP: {
Expand Down
3 changes: 2 additions & 1 deletion src/function/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ add_library(kuzu_function
vector_timestamp_operations.cpp
vector_struct_operations.cpp
vector_map_operation.cpp
vector_union_operations.cpp)
vector_union_operations.cpp
vector_blob_operations.cpp)

set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:kuzu_function>
Expand Down
10 changes: 10 additions & 0 deletions src/function/built_in_vector_operations.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "function/built_in_vector_operations.h"

#include "function/arithmetic/vector_arithmetic_operations.h"
#include "function/blob/vector_blob_operations.h"
#include "function/cast/vector_cast_operations.h"
#include "function/comparison/vector_comparison_operations.h"
#include "function/date/vector_date_operations.h"
Expand Down Expand Up @@ -31,6 +32,7 @@ void BuiltInVectorOperations::registerVectorOperations() {
registerMapOperations();
registerUnionOperations();
registerNodeRelOperations();
registerBlobOperations();
}

bool BuiltInVectorOperations::canApplyStaticEvaluation(
Expand Down Expand Up @@ -423,6 +425,7 @@ void BuiltInVectorOperations::registerCastOperations() {
{CAST_TO_INTERVAL_FUNC_NAME, CastToIntervalVectorOperation::getDefinitions()});
vectorOperations.insert(
{CAST_TO_STRING_FUNC_NAME, CastToStringVectorOperation::getDefinitions()});
vectorOperations.insert({CAST_TO_BLOB_FUNC_NAME, CastToBlobVectorOperation::getDefinitions()});
vectorOperations.insert(
{CAST_TO_DOUBLE_FUNC_NAME, CastToDoubleVectorOperation::getDefinitions()});
vectorOperations.insert(
Expand Down Expand Up @@ -510,5 +513,12 @@ void BuiltInVectorOperations::registerNodeRelOperations() {
vectorOperations.insert({RELS_FUNC_NAME, RelsVectorOperation::getDefinitions()});
}

void BuiltInVectorOperations::registerBlobOperations() {
vectorOperations.insert(
{OCTET_LENGTH_FUNC_NAME, OctetLengthVectorOperations::getDefinitions()});
vectorOperations.insert({ENCODE_FUNC_NAME, EncodeVectorOperations::getDefinitions()});
vectorOperations.insert({DECODE_FUNC_NAME, DecodeVectorOperations::getDefinitions()});
}

} // namespace function
} // namespace kuzu
44 changes: 44 additions & 0 deletions src/function/vector_blob_operations.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#include "function/blob/vector_blob_operations.h"

#include "function/blob/operations/decode_operation.h"
#include "function/blob/operations/encode_operation.h"
#include "function/string/operations/length_operation.h"
#include "function/string/vector_string_operations.h"

namespace kuzu {
namespace function {

vector_operation_definitions OctetLengthVectorOperations::getDefinitions() {
vector_operation_definitions definitions;
definitions.push_back(make_unique<VectorOperationDefinition>(common::OCTET_LENGTH_FUNC_NAME,
std::vector<common::LogicalTypeID>{common::LogicalTypeID::BLOB},
common::LogicalTypeID::INT64,
UnaryExecFunction<common::ku_string_t, int64_t, operation::Length>, nullptr, nullptr,
nullptr, false /* isVarLength */));
return definitions;
}

vector_operation_definitions EncodeVectorOperations::getDefinitions() {
vector_operation_definitions definitions;
definitions.push_back(make_unique<VectorOperationDefinition>(common::ENCODE_FUNC_NAME,
std::vector<common::LogicalTypeID>{common::LogicalTypeID::STRING},
common::LogicalTypeID::BLOB,
VectorStringOperations::UnaryStringExecFunction<common::ku_string_t, common::blob_t,
operation::Encode>,
nullptr, false /* isVarLength */));
return definitions;
}

vector_operation_definitions DecodeVectorOperations::getDefinitions() {
vector_operation_definitions definitions;
definitions.push_back(make_unique<VectorOperationDefinition>(common::DECODE_FUNC_NAME,
std::vector<common::LogicalTypeID>{common::LogicalTypeID::BLOB},
common::LogicalTypeID::STRING,
VectorStringOperations::UnaryStringExecFunction<common::blob_t, common::ku_string_t,
operation::Decode>,
nullptr, false /* isVarLength */));
return definitions;
}

} // namespace function
} // namespace kuzu
8 changes: 8 additions & 0 deletions src/function/vector_cast_operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,14 @@ vector_operation_definitions CastToStringVectorOperation::getDefinitions() {
return result;
}

vector_operation_definitions CastToBlobVectorOperation::getDefinitions() {
vector_operation_definitions result;
result.push_back(make_unique<VectorOperationDefinition>(CAST_TO_BLOB_FUNC_NAME,
std::vector<LogicalTypeID>{LogicalTypeID::STRING}, LogicalTypeID::BLOB,
UnaryCastExecFunction<ku_string_t, blob_t, operation::CastToBlob>));
return result;
}

vector_operation_definitions CastToDoubleVectorOperation::getDefinitions() {
vector_operation_definitions result;
result.push_back(bindVectorOperation<int16_t, double_t, operation::CastToDouble>(
Expand Down
6 changes: 6 additions & 0 deletions src/include/common/expression_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const std::string CAST_TO_FLOAT_FUNC_NAME = "TO_FLOAT";
const std::string CAST_TO_INT64_FUNC_NAME = "TO_INT64";
const std::string CAST_TO_INT32_FUNC_NAME = "TO_INT32";
const std::string CAST_TO_INT16_FUNC_NAME = "TO_INT16";
const std::string CAST_TO_BLOB_FUNC_NAME = "BLOB";

// list
const std::string LIST_CREATION_FUNC_NAME = "LIST_CREATION";
Expand Down Expand Up @@ -195,6 +196,11 @@ const std::string OFFSET_FUNC_NAME = "OFFSET";
const std::string NODES_FUNC_NAME = "NODES";
const std::string RELS_FUNC_NAME = "RELS";

// Blob functions
const std::string OCTET_LENGTH_FUNC_NAME = "OCTET_LENGTH";
const std::string ENCODE_FUNC_NAME = "ENCODE";
const std::string DECODE_FUNC_NAME = "DECODE";

enum ExpressionType : uint8_t {

// Boolean Connection Expressions
Expand Down
Loading

0 comments on commit 8b3b793

Please sign in to comment.