Skip to content

Commit

Permalink
Store raw blob data in value (#1774)
Browse files Browse the repository at this point in the history
* Store raw blob data in value

* Blob API for Python

* Node.js Blob

* BLOB API for C and Java

---------

Co-authored-by: Chang Liu <liuc223@gmail.com>
  • Loading branch information
acquamarin and mewim committed Jul 7, 2023
1 parent 52bc493 commit 761585c
Show file tree
Hide file tree
Showing 13 changed files with 121 additions and 17 deletions.
7 changes: 7 additions & 0 deletions src/c_api/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,13 @@ char* kuzu_value_get_string(kuzu_value* value) {
return c_string;
}

uint8_t* kuzu_value_get_blob(kuzu_value* value) {
auto string_val = static_cast<Value*>(value->_value)->getValue<std::string>();
auto* c_blob = (uint8_t*)malloc(string_val.size() + 1);
strcpy((char*)c_blob, string_val.c_str());
return c_blob;
}

char* kuzu_value_to_string(kuzu_value* value) {
auto string_val = static_cast<Value*>(value->_value)->toString();
auto* c_string = (char*)malloc(string_val.size() + 1);
Expand Down
13 changes: 6 additions & 7 deletions src/common/types/blob.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,16 +66,15 @@ uint64_t Blob::fromString(const char* str, uint64_t length, uint8_t* resultBuffe
return resultPos;
}

std::string Blob::toString(blob_t& blob) {
std::string Blob::toString(const uint8_t* value, uint64_t len) {
std::string result;
auto blobData = (uint8_t*)blob.value.getData();
for (auto i = 0u; i < blob.value.len; i++) {
if (isRegularChar(blobData[i])) {
for (auto i = 0u; i < len; i++) {
if (isRegularChar(value[i])) {
// ascii characters are rendered as-is.
result += blobData[i];
result += value[i];
} else {
auto firstByte = blobData[i] >> HexFormatConstants::NUM_BYTES_TO_SHIFT_FOR_FIRST_BYTE;
auto secondByte = blobData[i] & HexFormatConstants::SECOND_BYTE_MASK;
auto firstByte = value[i] >> HexFormatConstants::NUM_BYTES_TO_SHIFT_FOR_FIRST_BYTE;
auto secondByte = value[i] & HexFormatConstants::SECOND_BYTE_MASK;
// non-ascii characters are rendered as hexadecimal (e.g. \x00).
result += '\\';
result += 'x';
Expand Down
3 changes: 2 additions & 1 deletion src/common/types/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ void Value::copyValueFrom(const uint8_t* value) {
val.internalIDVal = *((nodeID_t*)value);
} break;
case LogicalTypeID::BLOB: {
strVal = Blob::toString(*(blob_t*)value);
strVal = ((blob_t*)value)->value.getAsString();
} break;
case LogicalTypeID::STRING: {
strVal = ((ku_string_t*)value)->getAsString();
Expand Down Expand Up @@ -289,6 +289,7 @@ std::string Value::toString() const {
case LogicalTypeID::INTERNAL_ID:
return TypeUtils::toString(val.internalIDVal);
case LogicalTypeID::BLOB:
return Blob::toString(reinterpret_cast<const uint8_t*>(strVal.c_str()), strVal.length());
case LogicalTypeID::STRING:
return strVal;
case LogicalTypeID::MAP: {
Expand Down
8 changes: 7 additions & 1 deletion src/include/c_api/kuzu.h
Original file line number Diff line number Diff line change
Expand Up @@ -649,7 +649,7 @@ KUZU_C_API int16_t kuzu_value_get_int16(kuzu_value* value);
*/
KUZU_C_API int32_t kuzu_value_get_int32(kuzu_value* value);
/**
* @brief Returns the int64 value of the given value. The value must be of type INT64.
* @brief Returns the int64 value of the given value. The value must be of type INT64 or SERIAL.
* @param value The value to return.
*/
KUZU_C_API int64_t kuzu_value_get_int64(kuzu_value* value);
Expand Down Expand Up @@ -688,6 +688,12 @@ KUZU_C_API kuzu_interval_t kuzu_value_get_interval(kuzu_value* value);
* @param value The value to return.
*/
KUZU_C_API char* kuzu_value_get_string(kuzu_value* value);
/**
* @brief Returns the blob value of the given value. The returned buffer is null-terminated similar
* to a string. The value must be of type BLOB.
* @param value The value to return.
*/
KUZU_C_API uint8_t* kuzu_value_get_blob(kuzu_value* value);
/**
* @brief Converts the given value to string.
* @param value The value to convert.
Expand Down
6 changes: 5 additions & 1 deletion src/include/common/types/blob.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,11 @@ struct HexFormatConstants {
};

struct Blob {
static std::string toString(blob_t& blob);
static std::string toString(const uint8_t* value, uint64_t len);

static inline std::string toString(blob_t& blob) {
return toString(blob.value.getData(), blob.value.len);
}

static uint64_t getBlobSize(const ku_string_t& blob);

Expand Down
3 changes: 2 additions & 1 deletion src/include/common/types/value.h
Original file line number Diff line number Diff line change
Expand Up @@ -434,7 +434,8 @@ inline internalID_t Value::getValue() const {
*/
KUZU_API template<>
inline std::string Value::getValue() const {
assert(dataType.getLogicalTypeID() == LogicalTypeID::STRING);
assert(dataType.getLogicalTypeID() == LogicalTypeID::STRING ||
dataType.getLogicalTypeID() == LogicalTypeID::BLOB);
return strVal;
}

Expand Down
22 changes: 22 additions & 0 deletions test/c_api/value_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,28 @@ TEST_F(CApiValueTest, GetString) {
kuzu_query_result_destroy(result);
}

TEST_F(CApiValueTest, GetBlob) {
auto connection = getConnection();
auto result =
kuzu_connection_query(connection, (char*)R"(RETURN BLOB('\\xAA\\xBB\\xCD\\x1A');)");
ASSERT_TRUE(kuzu_query_result_is_success(result));
ASSERT_TRUE(kuzu_query_result_has_next(result));
auto flatTuple = kuzu_query_result_get_next(result);
auto value = kuzu_flat_tuple_get_value(flatTuple, 0);
ASSERT_TRUE(value->_is_owned_by_cpp);
ASSERT_FALSE(kuzu_value_is_null(value));
auto blob = kuzu_value_get_blob(value);
ASSERT_EQ(blob[0], 0xAA);
ASSERT_EQ(blob[1], 0xBB);
ASSERT_EQ(blob[2], 0xCD);
ASSERT_EQ(blob[3], 0x1A);
ASSERT_EQ(blob[4], 0x00);
free(blob);
kuzu_value_destroy(value);
kuzu_flat_tuple_destroy(flatTuple);
kuzu_query_result_destroy(result);
}

TEST_F(CApiValueTest, ToSting) {
auto connection = getConnection();
auto result = kuzu_connection_query(connection,
Expand Down
10 changes: 8 additions & 2 deletions tools/java_api/src/jni/kuzu_java.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -927,12 +927,18 @@ JNIEXPORT jobject JNICALL Java_com_kuzudb_KuzuNative_kuzu_1value_1get_1value(
jobject ret = env->NewObject(retClass, ctor, iid.tableID, iid.offset);
return ret;
}
case LogicalTypeID::STRING:
case LogicalTypeID::BLOB: {
case LogicalTypeID::STRING: {
std::string str = v->getValue<std::string>();
jstring ret = env->NewStringUTF(str.c_str());
return ret;
}
case LogicalTypeID::BLOB: {
auto str = v->getValue<std::string>();
auto byteBuffer = str.c_str();
auto ret = env->NewByteArray(str.size());
env->SetByteArrayRegion(ret, 0, str.size(), (jbyte*)byteBuffer);
return ret;
}
default: {
// Throw exception here?
return nullptr;
Expand Down
18 changes: 18 additions & 0 deletions tools/java_api/src/test/java/com/kuzudb/test/ValueTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,24 @@ void ValueGetString() throws KuzuObjectRefDestroyedException {
result.destroy();
}

@Test
void ValueGetBlob() throws KuzuObjectRefDestroyedException {
KuzuQueryResult result = conn.query("RETURN BLOB('\\\\xAA\\\\xBB\\\\xCD\\\\x1A');");
assertTrue(result.isSuccess());
assertTrue(result.hasNext());
KuzuFlatTuple flatTuple = result.getNext();
KuzuValue value = flatTuple.getValue(0);
assertTrue(value.isOwnedByCPP());
assertFalse(value.isNull());

byte[] bytes = value.getValue();
assertTrue(bytes.length == 4);
assertTrue(bytes[0] == (byte) 0xAA);
assertTrue(bytes[1] == (byte) 0xBB);
assertTrue(bytes[2] == (byte) 0xCD);
assertTrue(bytes[3] == (byte) 0x1A);
}

@Test
void ValueToString() throws KuzuObjectRefDestroyedException {
KuzuQueryResult result = conn.query("MATCH (a:person) RETURN a.fName, a.isStudent, a.workedHours");
Expand Down
7 changes: 5 additions & 2 deletions tools/nodejs_api/src_cpp/node_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,13 @@ Napi::Value Util::ConvertToNapiObject(const Value& value, Napi::Env env) {
case LogicalTypeID::DOUBLE: {
return Napi::Number::New(env, value.getValue<double>());
}
case LogicalTypeID::STRING:
case LogicalTypeID::BLOB: {
case LogicalTypeID::STRING: {
return Napi::String::New(env, value.getValue<std::string>());
}
case LogicalTypeID::BLOB: {
auto blobVal = value.getValue<std::string>();
return Napi::Buffer<char>::Copy(env, blobVal.c_str(), blobVal.size());
}
case LogicalTypeID::DATE: {
auto dateVal = value.getValue<date_t>();
// Javascript Date type contains both date and time information. This returns the Date at
Expand Down
18 changes: 18 additions & 0 deletions tools/nodejs_api/test/test_data_type.js
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,24 @@ describe("STRING", function () {
});
});

describe("BLOB", function () {
it("should convert BLOB type", async function () {
const queryResult = await conn.query(
"RETURN BLOB('\\\\xAA\\\\xBB\\\\xCD\\\\x1A');"
);
const result = await queryResult.getAll();
assert.equal(result.length, 1);
assert.equal(Object.keys(result[0]).length, 1);
const value = Object.values(result[0])[0];
assert.isTrue(value instanceof Uint8Array);
assert.equal(value.length, 4);
assert.equal(value[0], 0xaa);
assert.equal(value[1], 0xbb);
assert.equal(value[2], 0xcd);
assert.equal(value[3], 0x1a);
});
});

describe("DATE", function () {
it("should convert DATE type", async function () {
const queryResult = await conn.query(
Expand Down
8 changes: 6 additions & 2 deletions tools/python_api/src_cpp/py_query_result.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -90,10 +90,14 @@ py::object PyQueryResult::convertValueToPyObject(const Value& value) {
case LogicalTypeID::DOUBLE: {
return py::cast(value.getValue<double>());
}
case LogicalTypeID::STRING:
case LogicalTypeID::BLOB:{
case LogicalTypeID::STRING: {
return py::cast(value.getValue<std::string>());
}
case LogicalTypeID::BLOB: {
auto blobStr = value.getValue<std::string>();
auto blobBytesArray = blobStr.c_str();
return py::bytes(blobBytesArray, blobStr.size());
}
case LogicalTypeID::DATE: {
auto dateVal = value.getValue<date_t>();
int32_t year, month, day;
Expand Down
15 changes: 15 additions & 0 deletions tools/python_api/test/test_datatype.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ def test_string(establish_connection):
assert not result.has_next()
result.close()


def test_blob(establish_connection):
conn, db = establish_connection
result = conn.execute("RETURN BLOB('\\\\xAA\\\\xBB\\\\xCD\\\\x1A')")
assert result.has_next()
result_blob = result.get_next()[0]
assert len(result_blob) == 4
assert result_blob[0] == 0xAA
assert result_blob[1] == 0xBB
assert result_blob[2] == 0xCD
assert result_blob[3] == 0x1A
assert not result.has_next()
result.close()


def test_date(establish_connection):
conn, db = establish_connection
result = conn.execute(
Expand Down

0 comments on commit 761585c

Please sign in to comment.