Skip to content

Commit

Permalink
add int8 data type
Browse files Browse the repository at this point in the history
  • Loading branch information
Ashleyhx committed Sep 11, 2023
1 parent 134ced4 commit 6ed50fb
Show file tree
Hide file tree
Showing 45 changed files with 317 additions and 86 deletions.
8 changes: 4 additions & 4 deletions dataset/tinysnb/eStudyAt.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from,to,YEAR,Places,length
0,1,2021,"[wwAewsdndweusd,wek]",5
2,1,2020,"[anew,jsdnwusklklklwewsd]",55
8,1,2020,"[awndsnjwejwen,isuhuwennjnuhuhuwewe]",22
from,to,YEAR,Places,length,level
0,1,2021,"[wwAewsdndweusd,wek]",5,5
2,1,2020,"[anew,jsdnwusklklklwewsd]",55,120
8,1,2020,"[awndsnjwejwen,isuhuwennjnuhuhuwewe]",22,2
4 changes: 2 additions & 2 deletions dataset/tinysnb/schema.cypher
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
create node table person (ID INt64, fName StRING, gender INT64, isStudent BoOLEAN, isWorker BOOLEAN, age INT64, eyeSight DOUBLE, birthdate DATE, registerTime TIMESTAMP, lastJobDuration interval, workedHours INT64[], usedNames STRING[], courseScoresPerTerm INT64[][], grades INT64[4], height float, PRIMARY KEY (ID));
create node table organisation (ID INT64, name STRING, orgCode INT64, mark DOUBLE, score INT64, history STRING, licenseValidInterval INTERVAL, rating DOUBLE, state STRUCT(revenue INT16, location STRING[], stock STRUCT(price INT64[], volume INT64)), info UNION(price FLOAT, movein DATE, note STRING),PRIMARY KEY (ID));
create node table movies (name STRING, length INT32, note STRING, description STRUCT(rating DOUBLE, views INT64, release TIMESTAMP, film DATE), content BYTEA, audience MAP(STRING, INT64), grade union(credit boolean, grade1 double, grade2 int64), PRIMARY KEY (name));
create node table movies (name STRING, length INT32, note STRING, description STRUCT(rating DOUBLE, stars INT8, views INT64, release TIMESTAMP, film DATE), content BYTEA, audience MAP(STRING, INT64), grade union(credit boolean, grade1 double, grade2 int64), PRIMARY KEY (name));
create rel table knows (FROM person TO person, date DATE, meetTime TIMESTAMP, validInterval INTERVAL, comments STRING[], MANY_MANY);
create rel table studyAt (FROM person TO organisation, year INT64, places STRING[], length INT16,MANY_ONE);
create rel table studyAt (FROM person TO organisation, year INT64, places STRING[], length INT16, level INT8, MANY_ONE);
create rel table workAt (FROM person TO organisation, year INT64, grading DOUBLE[2], rating float, MANY_ONE);
create rel table meets (FROM person TO person, location FLOAT[2], times INT, data BYTEA, MANY_ONE);
create rel table marries (FROM person TO person, usedAddress STRING[], address INT16[2], note STRING, ONE_ONE);
6 changes: 3 additions & 3 deletions dataset/tinysnb/vMovies.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Sóló cón tu párejâ,126, this is a very very good movie,"{rating: 5.3, views: 152, release: 2011-08-20 11:25:30, film: 2012-05-11}","\\xAA\\xABinteresting\\x0B","{audience1= 52,audience53= 42}",true
The 😂😃🧘🏻‍♂️🌍🌦️🍞🚗 movie,2544, the movie is very very good,"{rating: 7, views: 982, release: 2018-11-13 13:33:11, film: 2014-09-12}","\\xAB\\xCD",{audience1= 33},8.989
Roma,298,the movie is very interesting and funny,"{rating: 1223, views: 10003, release: 2011-02-11 16:44:22, film: 2013-02-22}","pure ascii characters","{}",254
Sóló cón tu párejâ,126, this is a very very good movie,"{rating: 5.3, stars:2, views: 152, release: 2011-08-20 11:25:30, film: 2012-05-11}","\\xAA\\xABinteresting\\x0B","{audience1= 52,audience53= 42}",true
The 😂😃🧘🏻‍♂️🌍🌦️🍞🚗 movie,2544, the movie is very very good,"{rating: 7, stars:10, views: 982, release: 2018-11-13 13:33:11, film: 2014-09-12}","\\xAB\\xCD",{audience1= 33},8.989
Roma,298,the movie is very interesting and funny,"{rating: 1223, stars:100, views: 10003, release: 2011-02-11 16:44:22, film: 2013-02-22}","pure ascii characters","{}",254
47 changes: 47 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

5 changes: 5 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
{
"dependencies": {
"node": "^20.6.0"
}
}
6 changes: 6 additions & 0 deletions src/common/arrow/arrow_row_batch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ std::unique_ptr<ArrowVector> ArrowRowBatch::createVector(
case LogicalTypeID::INT16: {
templateInitializeVector<LogicalTypeID::INT16>(result.get(), typeInfo, capacity);
} break;
case LogicalTypeID::INT8: {
templateInitializeVector<LogicalTypeID::INT8>(result.get(), typeInfo, capacity);
} break;
case LogicalTypeID::DOUBLE: {
templateInitializeVector<LogicalTypeID::DOUBLE>(result.get(), typeInfo, capacity);
} break;
Expand Down Expand Up @@ -274,6 +277,9 @@ void ArrowRowBatch::copyNonNullValue(
case LogicalTypeID::INT16: {
templateCopyNonNullValue<LogicalTypeID::INT16>(vector, typeInfo, value, pos);
} break;
case LogicalTypeID::INT8: {
templateCopyNonNullValue<LogicalTypeID::INT8>(vector, typeInfo, value, pos);
} break;
case LogicalTypeID::DOUBLE: {
templateCopyNonNullValue<LogicalTypeID::DOUBLE>(vector, typeInfo, value, pos);
} break;
Expand Down
12 changes: 12 additions & 0 deletions src/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ std::string PhysicalTypeUtils::physicalTypeToString(PhysicalTypeID physicalType)
return "INT32";
case PhysicalTypeID::INT16:
return "INT16";
case PhysicalTypeID::INT8:
return "INT8";
case PhysicalTypeID::DOUBLE:
return "DOUBLE";
case PhysicalTypeID::FLOAT:
Expand Down Expand Up @@ -53,6 +55,8 @@ uint32_t PhysicalTypeUtils::getFixedTypeSize(PhysicalTypeID physicalType) {
return sizeof(int32_t);
case PhysicalTypeID::INT16:
return sizeof(int16_t);
case PhysicalTypeID::INT8:
return sizeof(int8_t);
case PhysicalTypeID::DOUBLE:
return sizeof(double_t);
case PhysicalTypeID::FLOAT:
Expand Down Expand Up @@ -334,6 +338,9 @@ void LogicalType::setPhysicalType() {
case LogicalTypeID::INT16: {
physicalType = PhysicalTypeID::INT16;
} break;
case LogicalTypeID::INT8: {
physicalType = PhysicalTypeID::INT8;
} break;
case LogicalTypeID::DOUBLE: {
physicalType = PhysicalTypeID::DOUBLE;
} break;
Expand Down Expand Up @@ -405,6 +412,8 @@ LogicalTypeID LogicalTypeUtils::dataTypeIDFromString(const std::string& dataType
return LogicalTypeID::INT16;
} else if ("INT" == upperDataTypeIDString) {
return LogicalTypeID::INT32;
} else if ("INT8" == upperDataTypeIDString) {
return LogicalTypeID::INT8;
} else if ("DOUBLE" == upperDataTypeIDString) {
return LogicalTypeID::DOUBLE;
} else if ("FLOAT" == upperDataTypeIDString) {
Expand Down Expand Up @@ -480,6 +489,7 @@ std::string LogicalTypeUtils::dataTypeToString(const LogicalType& dataType) {
case LogicalTypeID::INT64:
case LogicalTypeID::INT32:
case LogicalTypeID::INT16:
case LogicalTypeID::INT8:
case LogicalTypeID::DOUBLE:
case LogicalTypeID::FLOAT:
case LogicalTypeID::DATE:
Expand Down Expand Up @@ -514,6 +524,8 @@ std::string LogicalTypeUtils::dataTypeToString(LogicalTypeID dataTypeID) {
return "INT32";
case LogicalTypeID::INT16:
return "INT16";
case LogicalTypeID::INT8:
return "INT8";
case LogicalTypeID::DOUBLE:
return "DOUBLE";
case LogicalTypeID::FLOAT:
Expand Down
21 changes: 21 additions & 0 deletions src/common/types/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ Value Value::createDefaultValue(const LogicalType& dataType) {
return Value((int32_t)0);
case LogicalTypeID::INT16:
return Value((int16_t)0);
case LogicalTypeID::INT8:
return Value((int8_t)0);
case LogicalTypeID::BOOL:
return Value(true);
case LogicalTypeID::DOUBLE:
Expand Down Expand Up @@ -106,6 +108,11 @@ Value::Value(bool val_) : isNull_{false} {
val.booleanVal = val_;
}

Value::Value(int8_t val_) : isNull_{false} {
dataType = std::make_unique<LogicalType>(LogicalTypeID::INT8);
val.int8Val = val_;
}

Value::Value(int16_t val_) : isNull_{false} {
dataType = std::make_unique<LogicalType>(LogicalTypeID::INT16);
val.int16Val = val_;
Expand Down Expand Up @@ -192,6 +199,9 @@ void Value::copyValueFrom(const uint8_t* value) {
case LogicalTypeID::INT16: {
val.int16Val = *((int16_t*)value);
} break;
case LogicalTypeID::INT8: {
val.int8Val = *((int8_t*)value);
} break;
case LogicalTypeID::BOOL: {
val.booleanVal = *((bool*)value);
} break;
Expand Down Expand Up @@ -255,6 +265,9 @@ void Value::copyValueFrom(const Value& other) {
case PhysicalTypeID::INT16: {
val.int16Val = other.val.int16Val;
} break;
case PhysicalTypeID::INT8: {
val.int8Val = other.val.int8Val;
} break;
case PhysicalTypeID::DOUBLE: {
val.doubleVal = other.val.doubleVal;
} break;
Expand Down Expand Up @@ -297,6 +310,8 @@ std::string Value::toString() const {
return TypeUtils::toString(val.int32Val);
case LogicalTypeID::INT16:
return TypeUtils::toString(val.int16Val);
case LogicalTypeID::INT8:
return TypeUtils::toString(val.int8Val);
case LogicalTypeID::DOUBLE:
return TypeUtils::toString(val.doubleVal);
case LogicalTypeID::FLOAT:
Expand Down Expand Up @@ -502,6 +517,9 @@ void Value::serialize(FileInfo* fileInfo, uint64_t& offset) const {
case PhysicalTypeID::INT16: {
SerDeser::serializeValue(val.int16Val, fileInfo, offset);
} break;
case PhysicalTypeID::INT8: {
SerDeser::serializeValue(val.int8Val, fileInfo, offset);
} break;
case PhysicalTypeID::DOUBLE: {
SerDeser::serializeValue(val.doubleVal, fileInfo, offset);
} break;
Expand Down Expand Up @@ -549,6 +567,9 @@ std::unique_ptr<Value> Value::deserialize(FileInfo* fileInfo, uint64_t& offset)
case PhysicalTypeID::INT16: {
SerDeser::deserializeValue(val->val.int16Val, fileInfo, offset);
} break;
case PhysicalTypeID::INT8: {
SerDeser::deserializeValue(val->val.int8Val, fileInfo, offset);
} break;
case PhysicalTypeID::DOUBLE: {
SerDeser::deserializeValue(val->val.doubleVal, fileInfo, offset);
} break;
Expand Down
7 changes: 7 additions & 0 deletions src/common/vector/value_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,9 @@ void ValueVector::copyFromValue(uint64_t pos, const Value& value) {
case PhysicalTypeID::INT16: {
memcpy(dstValue, &value.val.int16Val, numBytesPerValue);
} break;
case PhysicalTypeID::INT8: {
memcpy(dstValue, &value.val.int8Val, numBytesPerValue);
} break;
case PhysicalTypeID::DOUBLE: {
memcpy(dstValue, &value.val.doubleVal, numBytesPerValue);
} break;
Expand Down Expand Up @@ -226,6 +229,9 @@ std::unique_ptr<Value> ValueVector::getAsValue(uint64_t pos) {
case PhysicalTypeID::INT16: {
value->val.int16Val = getValue<int16_t>(pos);
} break;
case PhysicalTypeID::INT8: {
value->val.int8Val = getValue<int8_t>(pos);
} break;
case PhysicalTypeID::DOUBLE: {
value->val.doubleVal = getValue<double_t>(pos);
} break;
Expand Down Expand Up @@ -345,6 +351,7 @@ template void ValueVector::setValue<bool>(uint32_t pos, bool val);
template void ValueVector::setValue<int64_t>(uint32_t pos, int64_t val);
template void ValueVector::setValue<int32_t>(uint32_t pos, int32_t val);
template void ValueVector::setValue<int16_t>(uint32_t pos, int16_t val);
template void ValueVector::setValue<int8_t>(uint32_t pos, int8_t val);
template void ValueVector::setValue<double_t>(uint32_t pos, double_t val);
template void ValueVector::setValue<float_t>(uint32_t pos, float_t val);
template void ValueVector::setValue<hash_t>(uint32_t pos, hash_t val);
Expand Down
12 changes: 6 additions & 6 deletions src/include/c_api/kuzu.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,12 +165,12 @@ KUZU_C_API typedef enum {
KUZU_INT64 = 23,
KUZU_INT32 = 24,
KUZU_INT16 = 25,
KUZU_DOUBLE = 26,
KUZU_FLOAT = 27,
KUZU_DATE = 28,
KUZU_TIMESTAMP = 29,
KUZU_INTERVAL = 30,
KUZU_FIXED_LIST = 31,
KUZU_DOUBLE = 27,
KUZU_FLOAT = 28,
KUZU_DATE = 29,
KUZU_TIMESTAMP = 30,
KUZU_INTERVAL = 31,
KUZU_FIXED_LIST = 32,
KUZU_INTERNAL_ID = 40,
KUZU_ARROW_COLUMN = 41,
// variable size types
Expand Down
10 changes: 9 additions & 1 deletion src/include/common/type_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,14 @@ class StringCastUtils {
if (iss.str().empty()) {
throw ConversionException{"Empty string."};
}
iss >> result;

if constexpr (std::is_same_v<int8_t, T>) {
int val;
iss >> val;
result = val;
} else
iss >> result; // add condition for int8_t

if (iss.fail() || !iss.eof()) {
return false;
}
Expand Down Expand Up @@ -51,6 +58,7 @@ class TypeUtils {
static inline std::string toString(int64_t val) { return std::to_string(val); }
static inline std::string toString(int32_t val) { return std::to_string(val); }
static inline std::string toString(int16_t val) { return std::to_string(val); }
static inline std::string toString(int8_t val) { return std::to_string(val); }
static inline std::string toString(double_t val) { return std::to_string(val); }
static inline std::string toString(float_t val) { return std::to_string(val); }
static inline std::string toString(const internalID_t& val) {
Expand Down
21 changes: 12 additions & 9 deletions src/include/common/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,12 +78,14 @@ KUZU_API enum class LogicalTypeID : uint8_t {
INT64 = 23,
INT32 = 24,
INT16 = 25,
DOUBLE = 26,
FLOAT = 27,
DATE = 28,
TIMESTAMP = 29,
INTERVAL = 30,
FIXED_LIST = 31,
INT8 = 26,

DOUBLE = 27,
FLOAT = 28,
DATE = 29,
TIMESTAMP = 30,
INTERVAL = 31,
FIXED_LIST = 32,

INTERNAL_ID = 40,

Expand All @@ -105,9 +107,10 @@ enum class PhysicalTypeID : uint8_t {
INT64 = 2,
INT32 = 3,
INT16 = 4,
DOUBLE = 5,
FLOAT = 6,
INTERVAL = 7,
INT8 = 5,
DOUBLE = 6,
FLOAT = 7,
INTERVAL = 8,
INTERNAL_ID = 9,
ARROW_COLUMN = 10,

Expand Down
26 changes: 26 additions & 0 deletions src/include/common/types/value.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ class Value {
* @param val_ the int16_t value to set.
* @return a Value with INT16 type and val_ value.
*/
KUZU_API explicit Value(int8_t val_);

KUZU_API explicit Value(int16_t val_);
/**
* @param val_ the int32_t value to set.
Expand Down Expand Up @@ -200,6 +202,7 @@ class Value {
int64_t int64Val;
int32_t int32Val;
int16_t int16Val;
int8_t int8Val;
double doubleVal;
float floatVal;
interval_t intervalVal;
Expand Down Expand Up @@ -363,6 +366,15 @@ inline bool Value::getValue() const {
return val.booleanVal;
}

/**
* @return int8 value.
*/
KUZU_API template<>
inline int8_t Value::getValue() const {
assert(dataType->getLogicalTypeID() == LogicalTypeID::INT8);
return val.int8Val;
}

/**
* @return int16 value.
*/
Expand Down Expand Up @@ -463,6 +475,15 @@ inline bool& Value::getValueReference() {
return val.booleanVal;
}

/**
* @return the reference to the int8 value.
*/
KUZU_API template<>
inline int8_t& Value::getValueReference() {
assert(dataType->getLogicalTypeID() == LogicalTypeID::INT8);
return val.int8Val;
}

/**
* @return the reference to the int16 value.
*/
Expand Down Expand Up @@ -562,6 +583,11 @@ inline Value Value::createValue(bool val) {
return Value(val);
}

KUZU_API template<>
inline Value Value::createValue(int8_t val) {
return Value(val);
}

/**
* @param val the int16 value
* @return a Value with INT16 type and val value.
Expand Down
Loading

0 comments on commit 6ed50fb

Please sign in to comment.