Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement blob dataType #1690

Merged
merged 1 commit into from
Jun 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions src/binder/bind_expression/bind_function_expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,10 +180,12 @@ static std::vector<std::unique_ptr<Value>> populateLabelValues(
labels.resize(maxTableID + 1);
for (auto i = 0; i < labels.size(); ++i) {
if (tableIDsSet.contains(i)) {
labels[i] = std::make_unique<Value>(catalogContent.getTableName(i));
labels[i] = std::make_unique<Value>(
LogicalType{LogicalTypeID::STRING}, catalogContent.getTableName(i));
} else {
// TODO(Xiyang/Guodong): change to null literal once we support null in LIST type.
labels[i] = std::make_unique<Value>(std::string(""));
labels[i] =
std::make_unique<Value>(LogicalType{LogicalTypeID::STRING}, std::string(""));
}
}
return labels;
Expand All @@ -201,7 +203,8 @@ std::shared_ptr<Expression> ExpressionBinder::bindLabelFunction(const Expression
auto& node = (NodeExpression&)expression;
if (!node.isMultiLabeled()) {
auto labelName = catalogContent->getTableName(node.getSingleTableID());
return createLiteralExpression(std::make_unique<Value>(labelName));
return createLiteralExpression(
std::make_unique<Value>(LogicalType{LogicalTypeID::STRING}, labelName));
}
auto nodeTableIDs = catalogContent->getNodeTableIDs();
children.push_back(node.getInternalIDProperty());
Expand All @@ -213,7 +216,8 @@ std::shared_ptr<Expression> ExpressionBinder::bindLabelFunction(const Expression
auto& rel = (RelExpression&)expression;
if (!rel.isMultiLabeled()) {
auto labelName = catalogContent->getTableName(rel.getSingleTableID());
return createLiteralExpression(std::make_unique<Value>(labelName));
return createLiteralExpression(
std::make_unique<Value>(LogicalType{LogicalTypeID::STRING}, labelName));
}
auto relTableIDs = catalogContent->getRelTableIDs();
children.push_back(rel.getInternalIDProperty());
Expand Down
3 changes: 2 additions & 1 deletion src/binder/bind_expression/bind_property_expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ std::shared_ptr<Expression> ExpressionBinder::bindPropertyExpression(
return bindRelPropertyExpression(*child, propertyName);
} else {
assert(LogicalTypeID::STRUCT == childTypeID);
auto stringValue = std::make_unique<Value>(propertyName);
auto stringValue =
std::make_unique<Value>(LogicalType{LogicalTypeID::STRING}, propertyName);
return bindScalarFunctionExpression(
expression_vector{child, createLiteralExpression(std::move(stringValue))},
STRUCT_EXTRACT_FUNC_NAME);
Expand Down
3 changes: 2 additions & 1 deletion src/binder/bound_statement_result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ namespace binder {
std::unique_ptr<BoundStatementResult> BoundStatementResult::createSingleStringColumnResult() {
auto result = std::make_unique<BoundStatementResult>();
auto columnName = std::string("result");
auto value = std::make_unique<common::Value>(columnName);
auto value = std::make_unique<common::Value>(
common::LogicalType{common::LogicalTypeID::STRING}, columnName);
auto stringColumn = std::make_shared<LiteralExpression>(std::move(value), columnName);
result->addColumn(stringColumn, expression_vector{stringColumn});
return result;
Expand Down
4 changes: 2 additions & 2 deletions src/c_api/prepared_statement.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ void kuzu_prepared_statement_bind_interval(

void kuzu_prepared_statement_bind_string(
kuzu_prepared_statement* prepared_statement, const char* param_name, const char* value) {
auto string_value = std::string(value);
auto value_ptr = std::make_shared<Value>(string_value);
auto value_ptr =
std::make_shared<Value>(LogicalType{LogicalTypeID::STRING}, std::string(value));
kuzu_prepared_statement_bind_cpp_value(prepared_statement, param_name, value_ptr);
}

Expand Down
3 changes: 2 additions & 1 deletion src/c_api/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,8 @@ kuzu_rel_val* kuzu_rel_val_create(
kuzu_internal_id_t src_id, kuzu_internal_id_t dst_id, const char* label) {
auto src_id_val = std::make_unique<Value>(internalID_t(src_id.offset, src_id.table_id));
auto dst_id_val = std::make_unique<Value>(internalID_t(dst_id.offset, dst_id.table_id));
auto label_val = std::make_unique<Value>(std::string(label));
auto label_val =
std::make_unique<Value>(LogicalType{LogicalTypeID::STRING}, std::string(label));
auto* c_rel_val = (kuzu_rel_val*)calloc(1, sizeof(kuzu_rel_val));
c_rel_val->_rel_val =
new RelVal(std::move(src_id_val), std::move(dst_id_val), std::move(label_val));
Expand Down
3 changes: 2 additions & 1 deletion src/common/types/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ add_library(kuzu_common_types
ku_string.cpp
value.cpp
timestamp_t.cpp
types.cpp)
types.cpp
blob.cpp)

set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:kuzu_common_types>
Expand Down
106 changes: 106 additions & 0 deletions src/common/types/blob.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#include "common/types/blob.h"

#include "common/exception.h"
#include "common/string_utils.h"

namespace kuzu {
namespace common {

const int HexFormatConstants::HEX_MAP[256] = {-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1,
-1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 10, 11, 12, 13, 14, 15, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1};

static bool isRegularChar(char c) {
return c >= 32 && c <= 126 && c != '\\' && c != '\'' && c != '"';
}

uint64_t Blob::getBlobSize(const ku_string_t& blob) {
uint64_t blobSize = 0;
auto length = blob.len;
auto blobStr = blob.getData();
for (auto i = 0; i < length; i++) {
if (blobStr[i] == '\\') {
validateHexCode(blobStr, length, i);
blobSize++;
i += HexFormatConstants::LENGTH - 1;
} else if (blobStr[i] <= 127) {
blobSize++;
} else {
throw ConversionException(StringUtils::string_format(

Check warning on line 38 in src/common/types/blob.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/blob.cpp#L38

Added line #L38 was not covered by tests
"Invalid byte encountered in STRING -> BLOB conversion. All non-ascii characters "
"must be escaped with hex codes (e.g. \\xAA)"));

Check warning on line 40 in src/common/types/blob.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/blob.cpp#L40

Added line #L40 was not covered by tests
}
}
return blobSize;
}

void Blob::fromString(ku_string_t& str, uint8_t* resultBuffer) {
auto resultPos = 0u;
auto blobData = str.getData();
for (auto i = 0u; i < str.len; i++) {
if (blobData[i] == '\\') {
validateHexCode(blobData, str.len, i);
auto firstByte =
HexFormatConstants::HEX_MAP[blobData[i + HexFormatConstants::FIRST_BYTE_POS]];
auto secondByte =
HexFormatConstants::HEX_MAP[blobData[i + HexFormatConstants::SECOND_BYTES_POS]];
resultBuffer[resultPos++] =
(firstByte << HexFormatConstants::NUM_BYTES_TO_SHIFT_FOR_FIRST_BYTE) + secondByte;
i += HexFormatConstants::LENGTH - 1;
} else if (blobData[i] <= 127) {
resultBuffer[resultPos++] = blobData[i];
} else {
throw ConversionException(

Check warning on line 62 in src/common/types/blob.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/blob.cpp#L62

Added line #L62 was not covered by tests
"Invalid byte encountered in STRING -> BLOB conversion. All non-ascii characters "
"must be escaped with hex codes (e.g. \\xAA)");

Check warning on line 64 in src/common/types/blob.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/blob.cpp#L64

Added line #L64 was not covered by tests
}
}
}

std::string Blob::toString(blob_t& blob) {
std::string result;
auto blobData = (uint8_t*)blob.value.getData();
for (auto i = 0u; i < blob.value.len; i++) {
if (isRegularChar(blobData[i])) {
// ascii characters are rendered as-is.
result += blobData[i];
} else {
auto firstByte = blobData[i] >> HexFormatConstants::NUM_BYTES_TO_SHIFT_FOR_FIRST_BYTE;
auto secondByte = blobData[i] & HexFormatConstants::SECOND_BYTE_MASK;
// non-ascii characters are rendered as hexadecimal (e.g. \x00).
result += '\\';
result += 'x';
result += HexFormatConstants::HEX_TABLE[firstByte];
result += HexFormatConstants::HEX_TABLE[secondByte];
}
}
return result;
}

void Blob::validateHexCode(const uint8_t* blobStr, uint64_t length, uint64_t curPos) {
if (curPos + HexFormatConstants::LENGTH > length) {
throw ConversionException(

Check warning on line 91 in src/common/types/blob.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/blob.cpp#L91

Added line #L91 was not covered by tests
"Invalid hex escape code encountered in string -> blob conversion: "
"unterminated escape code at end of blob");

Check warning on line 93 in src/common/types/blob.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/blob.cpp#L93

Added line #L93 was not covered by tests
}
if (memcmp(blobStr + curPos, HexFormatConstants::PREFIX, HexFormatConstants::PREFIX_LENGTH) !=
0 ||
HexFormatConstants::HEX_MAP[blobStr[curPos + HexFormatConstants::FIRST_BYTE_POS]] < 0 ||
HexFormatConstants::HEX_MAP[blobStr[curPos + HexFormatConstants::SECOND_BYTES_POS]] < 0) {
throw ConversionException(StringUtils::string_format(

Check warning on line 99 in src/common/types/blob.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/blob.cpp#L99

Added line #L99 was not covered by tests
"Invalid hex escape code encountered in string -> blob conversion: {}",
std::string((char*)blobStr + curPos, HexFormatConstants::LENGTH)));

Check warning on line 101 in src/common/types/blob.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/blob.cpp#L101

Added line #L101 was not covered by tests
}
}

} // namespace common
} // namespace kuzu
4 changes: 4 additions & 0 deletions src/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@
case LogicalTypeID::INTERNAL_ID: {
physicalType = PhysicalTypeID::INTERNAL_ID;
} break;
case LogicalTypeID::BLOB:
case LogicalTypeID::STRING: {
physicalType = PhysicalTypeID::STRING;
} break;
Expand Down Expand Up @@ -403,6 +404,7 @@
case LogicalTypeID::DATE:
case LogicalTypeID::TIMESTAMP:
case LogicalTypeID::INTERVAL:
case LogicalTypeID::BLOB:
case LogicalTypeID::STRING:
case LogicalTypeID::SERIAL:
return dataTypeToString(dataType.typeID);
Expand Down Expand Up @@ -441,6 +443,8 @@
return "TIMESTAMP";
case LogicalTypeID::INTERVAL:
return "INTERVAL";
case LogicalTypeID::BLOB:
return "BLOB";

Check warning on line 447 in src/common/types/types.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/types.cpp#L447

Added line #L447 was not covered by tests
case LogicalTypeID::STRING:
return "STRING";
case LogicalTypeID::VAR_LIST:
Expand Down
11 changes: 9 additions & 2 deletions src/common/types/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,10 @@ Value Value::createDefaultValue(const LogicalType& dataType) {
return Value(interval_t());
case LogicalTypeID::INTERNAL_ID:
return Value(nodeID_t());
case LogicalTypeID::BLOB:
return Value(LogicalType{LogicalTypeID::BLOB}, std::string(""));
case LogicalTypeID::STRING:
return Value(std::string(""));
return Value(LogicalType{LogicalTypeID::STRING}, std::string(""));
case LogicalTypeID::FLOAT:
return Value((float_t)0);
case LogicalTypeID::RECURSIVE_REL:
Expand Down Expand Up @@ -121,7 +123,8 @@ Value::Value(const char* val_) : dataType{LogicalTypeID::STRING}, isNull_{false}
strVal = std::string(val_);
}

Value::Value(const std::string& val_) : dataType{LogicalTypeID::STRING}, isNull_{false} {
Value::Value(LogicalType type, const std::string& val_)
: dataType{std::move(type)}, isNull_{false} {
strVal = val_;
}

Expand Down Expand Up @@ -176,6 +179,9 @@ void Value::copyValueFrom(const uint8_t* value) {
case LogicalTypeID::INTERNAL_ID: {
val.internalIDVal = *((nodeID_t*)value);
} break;
case LogicalTypeID::BLOB: {
strVal = Blob::toString(*(blob_t*)value);
} break;
case LogicalTypeID::STRING: {
strVal = ((ku_string_t*)value)->getAsString();
} break;
Expand Down Expand Up @@ -291,6 +297,7 @@ std::string Value::toString() const {
return TypeUtils::toString(val.intervalVal);
case LogicalTypeID::INTERNAL_ID:
return TypeUtils::toString(val.internalIDVal);
case LogicalTypeID::BLOB:
case LogicalTypeID::STRING:
return strVal;
case LogicalTypeID::MAP: {
Expand Down
3 changes: 2 additions & 1 deletion src/function/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ add_library(kuzu_function
vector_timestamp_operations.cpp
vector_struct_operations.cpp
vector_map_operation.cpp
vector_union_operations.cpp)
vector_union_operations.cpp
vector_blob_operations.cpp)

set(ALL_OBJECT_FILES
${ALL_OBJECT_FILES} $<TARGET_OBJECTS:kuzu_function>
Expand Down
10 changes: 10 additions & 0 deletions src/function/built_in_vector_operations.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "function/built_in_vector_operations.h"

#include "function/arithmetic/vector_arithmetic_operations.h"
#include "function/blob/vector_blob_operations.h"
#include "function/cast/vector_cast_operations.h"
#include "function/comparison/vector_comparison_operations.h"
#include "function/date/vector_date_operations.h"
Expand Down Expand Up @@ -31,6 +32,7 @@ void BuiltInVectorOperations::registerVectorOperations() {
registerMapOperations();
registerUnionOperations();
registerNodeRelOperations();
registerBlobOperations();
}

bool BuiltInVectorOperations::canApplyStaticEvaluation(
Expand Down Expand Up @@ -381,6 +383,13 @@ void BuiltInVectorOperations::registerIntervalOperations() {
{TO_MICROSECONDS_FUNC_NAME, ToMicrosecondsVectorOperation::getDefinitions()});
}

void BuiltInVectorOperations::registerBlobOperations() {
vectorOperations.insert(
{OCTET_LENGTH_FUNC_NAME, OctetLengthVectorOperations::getDefinitions()});
vectorOperations.insert({ENCODE_FUNC_NAME, EncodeVectorOperations::getDefinitions()});
vectorOperations.insert({DECODE_FUNC_NAME, DecodeVectorOperations::getDefinitions()});
}

void BuiltInVectorOperations::registerStringOperations() {
vectorOperations.insert(
{ARRAY_EXTRACT_FUNC_NAME, ArrayExtractVectorOperation::getDefinitions()});
Expand Down Expand Up @@ -423,6 +432,7 @@ void BuiltInVectorOperations::registerCastOperations() {
{CAST_TO_INTERVAL_FUNC_NAME, CastToIntervalVectorOperation::getDefinitions()});
vectorOperations.insert(
{CAST_TO_STRING_FUNC_NAME, CastToStringVectorOperation::getDefinitions()});
vectorOperations.insert({CAST_TO_BLOB_FUNC_NAME, CastToBlobVectorOperation::getDefinitions()});
vectorOperations.insert(
{CAST_TO_DOUBLE_FUNC_NAME, CastToDoubleVectorOperation::getDefinitions()});
vectorOperations.insert(
Expand Down
44 changes: 44 additions & 0 deletions src/function/vector_blob_operations.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#include "function/blob/vector_blob_operations.h"

#include "function/blob/operations/decode_operation.h"
#include "function/blob/operations/encode_operation.h"
#include "function/blob/operations/octet_length_operation.h"
#include "function/string/vector_string_operations.h"

namespace kuzu {
namespace function {

vector_operation_definitions OctetLengthVectorOperations::getDefinitions() {
vector_operation_definitions definitions;
definitions.push_back(make_unique<VectorOperationDefinition>(common::OCTET_LENGTH_FUNC_NAME,
std::vector<common::LogicalTypeID>{common::LogicalTypeID::BLOB},
common::LogicalTypeID::INT64,
UnaryExecFunction<common::blob_t, int64_t, operation::OctetLength>, nullptr, nullptr,
nullptr, false /* isVarLength */));
return definitions;
}

Check warning on line 19 in src/function/vector_blob_operations.cpp

View check run for this annotation

Codecov / codecov/patch

src/function/vector_blob_operations.cpp#L19

Added line #L19 was not covered by tests

vector_operation_definitions EncodeVectorOperations::getDefinitions() {
vector_operation_definitions definitions;
definitions.push_back(make_unique<VectorOperationDefinition>(common::ENCODE_FUNC_NAME,
std::vector<common::LogicalTypeID>{common::LogicalTypeID::STRING},
common::LogicalTypeID::BLOB,
VectorStringOperations::UnaryStringExecFunction<common::ku_string_t, common::blob_t,
operation::Encode>,
nullptr, false /* isVarLength */));
return definitions;
}

Check warning on line 30 in src/function/vector_blob_operations.cpp

View check run for this annotation

Codecov / codecov/patch

src/function/vector_blob_operations.cpp#L30

Added line #L30 was not covered by tests

vector_operation_definitions DecodeVectorOperations::getDefinitions() {
vector_operation_definitions definitions;
definitions.push_back(make_unique<VectorOperationDefinition>(common::DECODE_FUNC_NAME,
std::vector<common::LogicalTypeID>{common::LogicalTypeID::BLOB},
common::LogicalTypeID::STRING,
VectorStringOperations::UnaryStringExecFunction<common::blob_t, common::ku_string_t,
operation::Decode>,
nullptr, false /* isVarLength */));
return definitions;
}

Check warning on line 41 in src/function/vector_blob_operations.cpp

View check run for this annotation

Codecov / codecov/patch

src/function/vector_blob_operations.cpp#L41

Added line #L41 was not covered by tests

} // namespace function
} // namespace kuzu
8 changes: 8 additions & 0 deletions src/function/vector_cast_operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,6 +180,14 @@
return result;
}

vector_operation_definitions CastToBlobVectorOperation::getDefinitions() {
vector_operation_definitions result;
result.push_back(make_unique<VectorOperationDefinition>(CAST_TO_BLOB_FUNC_NAME,
std::vector<LogicalTypeID>{LogicalTypeID::STRING}, LogicalTypeID::BLOB,
UnaryCastExecFunction<ku_string_t, blob_t, operation::CastToBlob>));
return result;
}

Check warning on line 189 in src/function/vector_cast_operations.cpp

View check run for this annotation

Codecov / codecov/patch

src/function/vector_cast_operations.cpp#L189

Added line #L189 was not covered by tests

vector_operation_definitions CastToDoubleVectorOperation::getDefinitions() {
vector_operation_definitions result;
result.push_back(bindVectorOperation<int16_t, double_t, operation::CastToDouble>(
Expand Down
6 changes: 6 additions & 0 deletions src/include/common/expression_type.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ const std::string CAST_TO_FLOAT_FUNC_NAME = "TO_FLOAT";
const std::string CAST_TO_INT64_FUNC_NAME = "TO_INT64";
const std::string CAST_TO_INT32_FUNC_NAME = "TO_INT32";
const std::string CAST_TO_INT16_FUNC_NAME = "TO_INT16";
const std::string CAST_TO_BLOB_FUNC_NAME = "BLOB";

// list
const std::string LIST_CREATION_FUNC_NAME = "LIST_CREATION";
Expand Down Expand Up @@ -195,6 +196,11 @@ const std::string OFFSET_FUNC_NAME = "OFFSET";
const std::string NODES_FUNC_NAME = "NODES";
const std::string RELS_FUNC_NAME = "RELS";

// Blob functions
const std::string OCTET_LENGTH_FUNC_NAME = "OCTET_LENGTH";
const std::string ENCODE_FUNC_NAME = "ENCODE";
const std::string DECODE_FUNC_NAME = "DECODE";

enum ExpressionType : uint8_t {

// Boolean Connection Expressions
Expand Down
Loading
Loading