Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add INT8 type #1994

Merged
merged 1 commit into from
Sep 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.11)

project(Kuzu VERSION 0.0.8.5 LANGUAGES CXX)
project(Kuzu VERSION 0.0.8.6 LANGUAGES CXX)

find_package(Threads REQUIRED)

Expand Down
8 changes: 4 additions & 4 deletions dataset/tinysnb/eStudyAt.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from,to,YEAR,Places,length
0,1,2021,"[wwAewsdndweusd,wek]",5
2,1,2020,"[anew,jsdnwusklklklwewsd]",55
8,1,2020,"[awndsnjwejwen,isuhuwennjnuhuhuwewe]",22
from,to,YEAR,Places,length,level
0,1,2021,"[wwAewsdndweusd,wek]",5,5
2,1,2020,"[anew,jsdnwusklklklwewsd]",55,120
8,1,2020,"[awndsnjwejwen,isuhuwennjnuhuhuwewe]",22,2
4 changes: 2 additions & 2 deletions dataset/tinysnb/schema.cypher
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
create node table person (ID INt64, fName StRING, gender INT64, isStudent BoOLEAN, isWorker BOOLEAN, age INT64, eyeSight DOUBLE, birthdate DATE, registerTime TIMESTAMP, lastJobDuration interval, workedHours INT64[], usedNames STRING[], courseScoresPerTerm INT64[][], grades INT64[4], height float, PRIMARY KEY (ID));
create node table organisation (ID INT64, name STRING, orgCode INT64, mark DOUBLE, score INT64, history STRING, licenseValidInterval INTERVAL, rating DOUBLE, state STRUCT(revenue INT16, location STRING[], stock STRUCT(price INT64[], volume INT64)), info UNION(price FLOAT, movein DATE, note STRING),PRIMARY KEY (ID));
create node table movies (name STRING, length INT32, note STRING, description STRUCT(rating DOUBLE, views INT64, release TIMESTAMP, film DATE), content BYTEA, audience MAP(STRING, INT64), grade union(credit boolean, grade1 double, grade2 int64), PRIMARY KEY (name));
create node table movies (name STRING, length INT32, note STRING, description STRUCT(rating DOUBLE, stars INT8, views INT64, release TIMESTAMP, film DATE), content BYTEA, audience MAP(STRING, INT64), grade union(credit boolean, grade1 double, grade2 int64), PRIMARY KEY (name));
create rel table knows (FROM person TO person, date DATE, meetTime TIMESTAMP, validInterval INTERVAL, comments STRING[], MANY_MANY);
create rel table studyAt (FROM person TO organisation, year INT64, places STRING[], length INT16,MANY_ONE);
create rel table studyAt (FROM person TO organisation, year INT64, places STRING[], length INT16, level INT8, MANY_ONE);
create rel table workAt (FROM person TO organisation, year INT64, grading DOUBLE[2], rating float, MANY_ONE);
create rel table meets (FROM person TO person, location FLOAT[2], times INT, data BYTEA, MANY_ONE);
create rel table marries (FROM person TO person, usedAddress STRING[], address INT16[2], note STRING, ONE_ONE);
6 changes: 3 additions & 3 deletions dataset/tinysnb/vMovies.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Sóló cón tu párejâ,126, this is a very very good movie,"{rating: 5.3, views: 152, release: 2011-08-20 11:25:30, film: 2012-05-11}","\\xAA\\xABinteresting\\x0B","{audience1= 52,audience53= 42}",true
The 😂😃🧘🏻‍♂️🌍🌦️🍞🚗 movie,2544, the movie is very very good,"{rating: 7, views: 982, release: 2018-11-13 13:33:11, film: 2014-09-12}","\\xAB\\xCD",{audience1= 33},8.989
Roma,298,the movie is very interesting and funny,"{rating: 1223, views: 10003, release: 2011-02-11 16:44:22, film: 2013-02-22}","pure ascii characters","{}",254
Sóló cón tu párejâ,126, this is a very very good movie,"{rating: 5.3, stars:2, views: 152, release: 2011-08-20 11:25:30, film: 2012-05-11}","\\xAA\\xABinteresting\\x0B","{audience1= 52,audience53= 42}",true
The 😂😃🧘🏻‍♂️🌍🌦️🍞🚗 movie,2544, the movie is very very good,"{rating: 7, stars:10, views: 982, release: 2018-11-13 13:33:11, film: 2014-09-12}","\\xAB\\xCD",{audience1= 33},8.989
Roma,298,the movie is very interesting and funny,"{rating: 1223, stars:100, views: 10003, release: 2011-02-11 16:44:22, film: 2013-02-22}","pure ascii characters","{}",254
12 changes: 12 additions & 0 deletions src/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
return "INT32";
case PhysicalTypeID::INT16:
return "INT16";
case PhysicalTypeID::INT8:
return "INT8";

Check warning on line 26 in src/common/types/types.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/types.cpp#L26

Added line #L26 was not covered by tests
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is for exception handling, keep this.

case PhysicalTypeID::DOUBLE:
return "DOUBLE";
case PhysicalTypeID::FLOAT:
Expand Down Expand Up @@ -53,6 +55,8 @@
return sizeof(int32_t);
case PhysicalTypeID::INT16:
return sizeof(int16_t);
case PhysicalTypeID::INT8:
return sizeof(int8_t);
case PhysicalTypeID::DOUBLE:
return sizeof(double_t);
case PhysicalTypeID::FLOAT:
Expand Down Expand Up @@ -334,6 +338,9 @@
case LogicalTypeID::INT16: {
physicalType = PhysicalTypeID::INT16;
} break;
case LogicalTypeID::INT8: {
physicalType = PhysicalTypeID::INT8;
} break;
case LogicalTypeID::DOUBLE: {
physicalType = PhysicalTypeID::DOUBLE;
} break;
Expand Down Expand Up @@ -405,6 +412,8 @@
return LogicalTypeID::INT16;
} else if ("INT" == upperDataTypeIDString) {
return LogicalTypeID::INT32;
} else if ("INT8" == upperDataTypeIDString) {
return LogicalTypeID::INT8;
} else if ("DOUBLE" == upperDataTypeIDString) {
return LogicalTypeID::DOUBLE;
} else if ("FLOAT" == upperDataTypeIDString) {
Expand Down Expand Up @@ -480,6 +489,7 @@
case LogicalTypeID::INT64:
case LogicalTypeID::INT32:
case LogicalTypeID::INT16:
case LogicalTypeID::INT8:
case LogicalTypeID::DOUBLE:
case LogicalTypeID::FLOAT:
case LogicalTypeID::DATE:
Expand Down Expand Up @@ -514,6 +524,8 @@
return "INT32";
case LogicalTypeID::INT16:
return "INT16";
case LogicalTypeID::INT8:
return "INT8";
case LogicalTypeID::DOUBLE:
return "DOUBLE";
case LogicalTypeID::FLOAT:
Expand Down
21 changes: 21 additions & 0 deletions src/common/types/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
return Value((int32_t)0);
case LogicalTypeID::INT16:
return Value((int16_t)0);
case LogicalTypeID::INT8:
return Value((int8_t)0);
case LogicalTypeID::BOOL:
return Value(true);
case LogicalTypeID::DOUBLE:
Expand Down Expand Up @@ -106,6 +108,11 @@
val.booleanVal = val_;
}

Value::Value(int8_t val_) : isNull_{false} {
dataType = std::make_unique<LogicalType>(LogicalTypeID::INT8);
val.int8Val = val_;
}

Value::Value(int16_t val_) : isNull_{false} {
dataType = std::make_unique<LogicalType>(LogicalTypeID::INT16);
val.int16Val = val_;
Expand Down Expand Up @@ -192,6 +199,9 @@
case LogicalTypeID::INT16: {
val.int16Val = *((int16_t*)value);
} break;
case LogicalTypeID::INT8: {
val.int8Val = *((int8_t*)value);
} break;
case LogicalTypeID::BOOL: {
val.booleanVal = *((bool*)value);
} break;
Expand Down Expand Up @@ -255,6 +265,9 @@
case PhysicalTypeID::INT16: {
val.int16Val = other.val.int16Val;
} break;
case PhysicalTypeID::INT8: {
val.int8Val = other.val.int8Val;
} break;
case PhysicalTypeID::DOUBLE: {
val.doubleVal = other.val.doubleVal;
} break;
Expand Down Expand Up @@ -297,6 +310,8 @@
return TypeUtils::toString(val.int32Val);
case LogicalTypeID::INT16:
return TypeUtils::toString(val.int16Val);
case LogicalTypeID::INT8:
return TypeUtils::toString(val.int8Val);
case LogicalTypeID::DOUBLE:
return TypeUtils::toString(val.doubleVal);
case LogicalTypeID::FLOAT:
Expand Down Expand Up @@ -502,6 +517,9 @@
case PhysicalTypeID::INT16: {
SerDeser::serializeValue(val.int16Val, fileInfo, offset);
} break;
case PhysicalTypeID::INT8: {
SerDeser::serializeValue(val.int8Val, fileInfo, offset);

Check warning on line 521 in src/common/types/value.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/value.cpp#L520-L521

Added lines #L520 - L521 were not covered by tests
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Keep switch cases in this file, they are used for literalExpression in UDF. You don't need to add a test for this.

} break;
case PhysicalTypeID::DOUBLE: {
SerDeser::serializeValue(val.doubleVal, fileInfo, offset);
} break;
Expand Down Expand Up @@ -549,6 +567,9 @@
case PhysicalTypeID::INT16: {
SerDeser::deserializeValue(val->val.int16Val, fileInfo, offset);
} break;
case PhysicalTypeID::INT8: {
SerDeser::deserializeValue(val->val.int8Val, fileInfo, offset);

Check warning on line 571 in src/common/types/value.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/value.cpp#L571

Added line #L571 was not covered by tests
} break;
case PhysicalTypeID::DOUBLE: {
SerDeser::deserializeValue(val->val.doubleVal, fileInfo, offset);
} break;
Expand Down
7 changes: 7 additions & 0 deletions src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@
case PhysicalTypeID::INT16: {
memcpy(dstValue, &value.val.int16Val, numBytesPerValue);
} break;
case PhysicalTypeID::INT8: {
memcpy(dstValue, &value.val.int8Val, numBytesPerValue);

Check warning on line 144 in src/common/vector/value_vector.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/vector/value_vector.cpp#L143-L144

Added lines #L143 - L144 were not covered by tests
Ashleyhx marked this conversation as resolved.
Show resolved Hide resolved
} break;
case PhysicalTypeID::DOUBLE: {
memcpy(dstValue, &value.val.doubleVal, numBytesPerValue);
} break;
Expand Down Expand Up @@ -226,6 +229,9 @@
case PhysicalTypeID::INT16: {
value->val.int16Val = getValue<int16_t>(pos);
} break;
case PhysicalTypeID::INT8: {
value->val.int8Val = getValue<int8_t>(pos);
} break;

Check warning on line 234 in src/common/vector/value_vector.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/vector/value_vector.cpp#L232-L234

Added lines #L232 - L234 were not covered by tests
case PhysicalTypeID::DOUBLE: {
value->val.doubleVal = getValue<double_t>(pos);
} break;
Expand Down Expand Up @@ -345,6 +351,7 @@
template void ValueVector::setValue<int64_t>(uint32_t pos, int64_t val);
template void ValueVector::setValue<int32_t>(uint32_t pos, int32_t val);
template void ValueVector::setValue<int16_t>(uint32_t pos, int16_t val);
template void ValueVector::setValue<int8_t>(uint32_t pos, int8_t val);
template void ValueVector::setValue<double_t>(uint32_t pos, double_t val);
template void ValueVector::setValue<float_t>(uint32_t pos, float_t val);
template void ValueVector::setValue<hash_t>(uint32_t pos, hash_t val);
Expand Down
12 changes: 6 additions & 6 deletions src/include/c_api/kuzu.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,12 @@ KUZU_C_API typedef enum {
KUZU_INT64 = 23,
KUZU_INT32 = 24,
KUZU_INT16 = 25,
KUZU_DOUBLE = 26,
KUZU_FLOAT = 27,
KUZU_DATE = 28,
KUZU_TIMESTAMP = 29,
KUZU_INTERVAL = 30,
KUZU_FIXED_LIST = 31,
KUZU_DOUBLE = 27,
KUZU_FLOAT = 28,
KUZU_DATE = 29,
KUZU_TIMESTAMP = 30,
KUZU_INTERVAL = 31,
KUZU_FIXED_LIST = 32,
KUZU_INTERNAL_ID = 40,
KUZU_ARROW_COLUMN = 41,
// variable size types
Expand Down
10 changes: 9 additions & 1 deletion src/include/common/type_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,14 @@ class StringCastUtils {
if (iss.str().empty()) {
throw ConversionException{"Empty string."};
}
iss >> result;

if constexpr (std::is_same_v<int8_t, T>) {
Ashleyhx marked this conversation as resolved.
Show resolved Hide resolved
int val;
iss >> val; // C++ will recognize int8 as char if we don't separate this case.
result = val;
} else
iss >> result;

if (iss.fail() || !iss.eof()) {
return false;
}
Expand Down Expand Up @@ -51,6 +58,7 @@ class TypeUtils {
static inline std::string toString(int64_t val) { return std::to_string(val); }
static inline std::string toString(int32_t val) { return std::to_string(val); }
static inline std::string toString(int16_t val) { return std::to_string(val); }
static inline std::string toString(int8_t val) { return std::to_string(val); }
static inline std::string toString(double_t val) { return std::to_string(val); }
static inline std::string toString(float_t val) { return std::to_string(val); }
static inline std::string toString(const internalID_t& val) {
Expand Down
20 changes: 11 additions & 9 deletions src/include/common/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,13 @@ KUZU_API enum class LogicalTypeID : uint8_t {
INT64 = 23,
INT32 = 24,
INT16 = 25,
DOUBLE = 26,
FLOAT = 27,
DATE = 28,
TIMESTAMP = 29,
INTERVAL = 30,
FIXED_LIST = 31,
INT8 = 26,
DOUBLE = 27,
FLOAT = 28,
DATE = 29,
TIMESTAMP = 30,
INTERVAL = 31,
FIXED_LIST = 32,

INTERNAL_ID = 40,

Expand All @@ -105,9 +106,10 @@ enum class PhysicalTypeID : uint8_t {
INT64 = 2,
INT32 = 3,
INT16 = 4,
DOUBLE = 5,
FLOAT = 6,
INTERVAL = 7,
INT8 = 5,
DOUBLE = 6,
FLOAT = 7,
INTERVAL = 8,
INTERNAL_ID = 9,
ARROW_COLUMN = 10,

Expand Down
26 changes: 26 additions & 0 deletions src/include/common/types/value.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class Value {
* @param val_ the int16_t value to set.
* @return a Value with INT16 type and val_ value.
*/
KUZU_API explicit Value(int8_t val_);

KUZU_API explicit Value(int16_t val_);
/**
* @param val_ the int32_t value to set.
Expand Down Expand Up @@ -200,6 +202,7 @@ class Value {
int64_t int64Val;
int32_t int32Val;
int16_t int16Val;
int8_t int8Val;
double doubleVal;
float floatVal;
interval_t intervalVal;
Expand Down Expand Up @@ -363,6 +366,15 @@ inline bool Value::getValue() const {
return val.booleanVal;
}

/**
* @return int8 value.
*/
KUZU_API template<>
inline int8_t Value::getValue() const {
assert(dataType->getLogicalTypeID() == LogicalTypeID::INT8);
return val.int8Val;
}

/**
* @return int16 value.
*/
Expand Down Expand Up @@ -463,6 +475,15 @@ inline bool& Value::getValueReference() {
return val.booleanVal;
}

/**
* @return the reference to the int8 value.
*/
KUZU_API template<>
inline int8_t& Value::getValueReference() {
assert(dataType->getLogicalTypeID() == LogicalTypeID::INT8);
return val.int8Val;
Ashleyhx marked this conversation as resolved.
Show resolved Hide resolved
}

/**
* @return the reference to the int16 value.
*/
Expand Down Expand Up @@ -562,6 +583,11 @@ inline Value Value::createValue(bool val) {
return Value(val);
}

KUZU_API template<>
inline Value Value::createValue(int8_t val) {
return Value(val);
}

/**
* @param val the int16 value
* @return a Value with INT16 type and val value.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ namespace processor {

#define BSWAP16(x) ((uint16_t)((((uint16_t)(x)&0xff00) >> 8) | (((uint16_t)(x)&0x00ff) << 8)))

#define BSWAP8(x) ((uint8_t)(x))

// The OrderByKeyEncoder encodes all columns in the ORDER BY clause into a single binary sequence
// that, when compared using memcmp will yield the correct overall sorting order. On little-endian
// hardware, the least-significant byte is stored at the smallest address. To encode the sorting
Expand Down
10 changes: 5 additions & 5 deletions src/include/storage/storage_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@ using storage_version_t = uint64_t;

struct StorageVersionInfo {
static std::unordered_map<std::string, storage_version_t> getStorageVersionInfo() {
return {{"0.0.8.5", 19}, {"0.0.8.4", 19}, {"0.0.8.3", 19}, {"0.0.8.2", 19}, {"0.0.8.1", 18},
{"0.0.8", 17}, {"0.0.7.1", 16}, {"0.0.7", 15}, {"0.0.6.5", 14}, {"0.0.6.4", 13},
{"0.0.6.3", 12}, {"0.0.6.2", 11}, {"0.0.6.1", 10}, {"0.0.6", 9}, {"0.0.5", 8},
{"0.0.4", 7}, {"0.0.3.5", 6}, {"0.0.3.4", 5}, {"0.0.3.3", 4}, {"0.0.3.2", 3},
{"0.0.3.1", 2}, {"0.0.3", 1}};
return {{"0.0.8.6", 20}, {"0.0.8.5", 19}, {"0.0.8.4", 19}, {"0.0.8.3", 19}, {"0.0.8.2", 19},
{"0.0.8.1", 18}, {"0.0.8", 17}, {"0.0.7.1", 16}, {"0.0.7", 15}, {"0.0.6.5", 14},
{"0.0.6.4", 13}, {"0.0.6.3", 12}, {"0.0.6.2", 11}, {"0.0.6.1", 10}, {"0.0.6", 9},
{"0.0.5", 8}, {"0.0.4", 7}, {"0.0.3.5", 6}, {"0.0.3.4", 5}, {"0.0.3.3", 4},
{"0.0.3.2", 3}, {"0.0.3.1", 2}, {"0.0.3", 1}};
}

static storage_version_t getStorageVersion();
Expand Down
1 change: 1 addition & 0 deletions src/include/storage/storage_structure/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ class ColumnFactory {
case common::LogicalTypeID::INT64:
case common::LogicalTypeID::INT32:
case common::LogicalTypeID::INT16:
case common::LogicalTypeID::INT8:
case common::LogicalTypeID::DOUBLE:
case common::LogicalTypeID::FLOAT:
case common::LogicalTypeID::BOOL:
Expand Down
1 change: 1 addition & 0 deletions src/include/storage/storage_structure/lists/lists.h
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ class ListsFactory {
case common::LogicalTypeID::INT64:
case common::LogicalTypeID::INT32:
case common::LogicalTypeID::INT16:
case common::LogicalTypeID::INT8:
case common::LogicalTypeID::DOUBLE:
case common::LogicalTypeID::FLOAT:
case common::LogicalTypeID::BOOL:
Expand Down
13 changes: 13 additions & 0 deletions src/processor/operator/order_by/order_by_key_encoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,10 @@ void OrderByKeyEncoder::getEncodingFunction(PhysicalTypeID physicalType, encode_
func = encodeTemplate<int16_t>;
return;
}
case PhysicalTypeID::INT8: {
func = encodeTemplate<int8_t>;
return;
Ashleyhx marked this conversation as resolved.
Show resolved Hide resolved
}
case PhysicalTypeID::DOUBLE: {
func = encodeTemplate<double_t>;
return;
Expand All @@ -237,6 +241,15 @@ void OrderByKeyEncoder::getEncodingFunction(PhysicalTypeID physicalType, encode_
}
}

template<>
void OrderByKeyEncoder::encodeData(int8_t data, uint8_t* resultPtr, bool swapBytes) {
if (swapBytes) {
data = BSWAP8(data);
}
memcpy(resultPtr, (void*)&data, sizeof(data));
resultPtr[0] = flipSign(resultPtr[0]);
}

template<>
void OrderByKeyEncoder::encodeData(int16_t data, uint8_t* resultPtr, bool swapBytes) {
if (swapBytes) {
Expand Down
Loading