Skip to content

Commit

Permalink
Merge pull request #1328 from kuzudb/int-types
Browse files Browse the repository at this point in the history
Add INT16,INT32 datatype
  • Loading branch information
acquamarin committed Mar 1, 2023
2 parents f33e003 + 67f1c81 commit d151554
Show file tree
Hide file tree
Showing 53 changed files with 815 additions and 246 deletions.
2 changes: 2 additions & 0 deletions dataset/copy-fault-tests/invalid-number/schema.cypher
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
create node table person (ID INT64, gender INT32, PRIMARY KEY (ID));
create node table movie (ID INT64, length INT32, PRIMARY KEY (ID));
1 change: 1 addition & 0 deletions dataset/copy-fault-tests/invalid-number/vMovie.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0,312abc
2 changes: 2 additions & 0 deletions dataset/copy-fault-tests/invalid-number/vPerson.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
0,4
2,2147483650
6 changes: 3 additions & 3 deletions dataset/tinysnb/eMarries.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
0,2,"[toronto]",
3,5,,"long long long string"
7,8,"[vancouver]","short str"
0,2,"[toronto]","[4,5]",
3,5,,"[2,5]","long long long string"
7,8,"[vancouver]","[3,9]","short str"
14 changes: 7 additions & 7 deletions dataset/tinysnb/eMeets.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
0,2,"[7.82,3.54]"
2,5,"[2.87,4.23]"
3,7,"[3.65,8.44]"
7,3,"[2.11,3.1]"
8,3,"[2.2,9.0]"
9,3,"[3,5.2]"
10,2,"[3.5,1.1]"
0,2,"[7.82,3.54]",5
2,5,"[2.87,4.23]",2
3,7,"[3.65,8.44]",3
7,3,"[2.11,3.1]",7
8,3,"[2.2,9.0]",9
9,3,"[3,5.2]",11
10,2,"[3.5,1.1]",13
8 changes: 4 additions & 4 deletions dataset/tinysnb/eStudyAt.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from,to,YEAR,Places
0,1,2021,"[wwAewsdndweusd,wek]"
2,1,2020,"[anew,jsdnwusklklklwewsd]"
8,1,2020,"[awndsnjwejwen,isuhuwennjnuhuhuwewe]"
from,to,YEAR,Places,length
0,1,2021,"[wwAewsdndweusd,wek]",5
2,1,2020,"[anew,jsdnwusklklklwewsd]",55
8,1,2020,"[awndsnjwejwen,isuhuwennjnuhuhuwewe]",22
8 changes: 4 additions & 4 deletions dataset/tinysnb/schema.cypher
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
create node table person (ID INt64, fName StRING, gender INT64, isStudent BoOLEAN, isWorker BOOLEAN, age INT64, eyeSight DOUBLE, birthdate DATE, registerTime TIMESTAMP, lastJobDuration interval, workedHours INT64[], usedNames STRING[], courseScoresPerTerm INT64[][], grades INT64[4], height float, PRIMARY KEY (ID));
create node table organisation (ID INT64, name STRING, orgCode INT64, mark DOUBLE, score INT64, history STRING, licenseValidInterval INTERVAL, rating DOUBLE, PRIMARY KEY (ID));
create node table movies (name STRING, PRIMARY KEY (name));
create node table movies (name STRING, length INT32, PRIMARY KEY (name));
create rel table knows (FROM person TO person, date DATE, meetTime TIMESTAMP, validInterval INTERVAL, comments STRING[], MANY_MANY);
create rel table studyAt (FROM person TO organisation, year INT64, places STRING[], MANY_ONE);
create rel table studyAt (FROM person TO organisation, year INT64, places STRING[], length INT16,MANY_ONE);
create rel table workAt (FROM person TO organisation, year INT64, grading DOUBLE[2], rating float, MANY_ONE);
create rel table meets (FROM person TO person, location FLOAT[2], MANY_ONE);
create rel table marries (FROM person TO person, usedAddress STRING[], note STRING, ONE_ONE);
create rel table meets (FROM person TO person, location FLOAT[2], times INT, MANY_ONE);
create rel table marries (FROM person TO person, usedAddress STRING[], address INT16[2], note STRING, ONE_ONE);
6 changes: 3 additions & 3 deletions dataset/tinysnb/vMovies.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Sóló cón tu párejâ
The 😂😃🧘🏻‍♂️🌍🌦️🍞🚗 movie
Roma
Sóló cón tu párejâ,126
The 😂😃🧘🏻‍♂️🌍🌦️🍞🚗 movie,2544
Roma,298
3 changes: 2 additions & 1 deletion src/binder/expression_binder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ std::shared_ptr<Expression> ExpressionBinder::implicitCast(
auto uniqueName = ScalarFunctionExpression::getUniqueName(functionName, children);
return std::make_shared<ScalarFunctionExpression>(functionName, FUNCTION,
DataType{targetType.typeID}, std::move(children),
VectorCastOperations::bindExecFunc(expression->dataType.typeID, targetType.typeID),
VectorCastOperations::bindImplicitCastFunc(
expression->dataType.typeID, targetType.typeID),
nullptr /* selectFunc */, std::move(uniqueName));
} else {
throw common::BinderException("Expression " + expression->toString() + " has data type " +
Expand Down
4 changes: 2 additions & 2 deletions src/common/csv_reader/csv_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -218,12 +218,12 @@ bool CSVReader::hasNextTokenOrError() {

int64_t CSVReader::getInt64() {
setNextTokenIsProcessed();
return TypeUtils::convertToInt64(line + linePtrStart);
return TypeUtils::convertStringToNumber<int64_t>(line + linePtrStart);
}

double_t CSVReader::getDouble() {
setNextTokenIsProcessed();
return TypeUtils::convertFloatingPointNumber<double_t>(line + linePtrStart, DOUBLE);
return TypeUtils::convertStringToNumber<double_t>(line + linePtrStart);
}

uint8_t CSVReader::getBoolean() {
Expand Down
33 changes: 0 additions & 33 deletions src/common/type_utils.cpp
Original file line number Diff line number Diff line change
@@ -1,26 +1,11 @@
#include "common/type_utils.h"

#include <cerrno>
#include <climits>

#include "common/exception.h"
#include "common/utils.h"

namespace kuzu {
namespace common {

int64_t TypeUtils::convertToInt64(const char* data) {
char* eptr;
errno = 0;
auto retVal = strtoll(data, &eptr, 10);
throwConversionExceptionIfNoOrNotEveryCharacterIsConsumed(data, eptr, INT64);
if ((LLONG_MAX == retVal || LLONG_MIN == retVal) && errno == ERANGE) {
throw ConversionException(
prefixConversionExceptionMessage(data, INT64) + " Input out of range.");
}
return retVal;
}

uint32_t TypeUtils::convertToUint32(const char* data) {
std::istringstream iss(data);
uint32_t val;
Expand Down Expand Up @@ -88,23 +73,5 @@ std::string TypeUtils::prefixConversionExceptionMessage(const char* data, DataTy
Types::dataTypeToString(dataTypeID) + ".";
}

void TypeUtils::throwConversionExceptionIfNoOrNotEveryCharacterIsConsumed(
const char* data, const char* eptr, DataTypeID dataTypeID) {
if (data == eptr) {
throw ConversionException(prefixConversionExceptionMessage(data, dataTypeID) +
". Invalid input. No characters consumed.");
}
if (*eptr != '\0') {
throw ConversionException(prefixConversionExceptionMessage(data, dataTypeID) +
" Not all characters were read. read from character " + *data +
" up to character: " + *eptr + ".");
}
}

void TypeUtils::throwConversionExceptionOutOfRange(const char* data, DataTypeID dataTypeID) {
throw ConversionException(
prefixConversionExceptionMessage(data, dataTypeID) + " Input out of range.");
}

} // namespace common
} // namespace kuzu
56 changes: 41 additions & 15 deletions src/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,13 @@ DataType::DataType(const DataType& other) {
case INTERNAL_ID:
case BOOL:
case INT64:
case INT32:
case INT16:
case DOUBLE:
case FLOAT:
case DATE:
case TIMESTAMP:
case INTERVAL:
case FLOAT:
case STRING: {
typeID = other.typeID;
} break;
Expand All @@ -58,13 +60,13 @@ DataType::DataType(DataType&& other) noexcept
fixedNumElementsInList{other.fixedNumElementsInList} {}

std::vector<DataTypeID> DataType::getNumericalTypeIDs() {
return std::vector<DataTypeID>{INT64, DOUBLE, FLOAT};
return std::vector<DataTypeID>{INT64, INT32, INT16, DOUBLE, FLOAT};
}

std::vector<DataTypeID> DataType::getAllValidTypeIDs() {
// TODO(Ziyi): Add FIX_LIST type to allValidTypeID when we support functions on VAR_LIST.
return std::vector<DataTypeID>{
INTERNAL_ID, BOOL, INT64, DOUBLE, STRING, DATE, TIMESTAMP, INTERVAL, VAR_LIST, FLOAT};
return std::vector<DataTypeID>{INTERNAL_ID, BOOL, INT64, INT32, INT16, DOUBLE, STRING, DATE,
TIMESTAMP, INTERVAL, VAR_LIST, FLOAT};
}

DataType& DataType::operator=(const DataType& other) {
Expand All @@ -81,11 +83,13 @@ DataType& DataType::operator=(const DataType& other) {
case INTERNAL_ID:
case BOOL:
case INT64:
case INT32:
case INT16:
case DOUBLE:
case FLOAT:
case DATE:
case TIMESTAMP:
case INTERVAL:
case FLOAT:
case STRING: {
typeID = other.typeID;
} break;
Expand All @@ -110,11 +114,13 @@ bool DataType::operator==(const DataType& other) const {
case INTERNAL_ID:
case BOOL:
case INT64:
case INT32:
case INT16:
case DOUBLE:
case FLOAT:
case DATE:
case TIMESTAMP:
case INTERVAL:
case FLOAT:
case STRING:
return typeID == other.typeID;
default:
Expand Down Expand Up @@ -147,12 +153,14 @@ std::unique_ptr<DataType> DataType::copy() {
case INTERNAL_ID:
case BOOL:
case INT64:
case INT32:
case INT16:
case DOUBLE:
case FLOAT:
case DATE:
case TIMESTAMP:
case INTERVAL:
case STRING:
case FLOAT:
return std::make_unique<DataType>(typeID);
default:
throw InternalException("Unsupported DataType: " + Types::dataTypeToString(typeID) + ".");
Expand Down Expand Up @@ -193,8 +201,16 @@ DataTypeID Types::dataTypeIDFromString(const std::string& dataTypeIDString) {
return INTERNAL_ID;
} else if ("INT64" == dataTypeIDString) {
return INT64;
} else if ("INT32" == dataTypeIDString) {
return INT32;
} else if ("INT16" == dataTypeIDString) {
return INT16;
} else if ("INT" == dataTypeIDString) {
return INT32;
} else if ("DOUBLE" == dataTypeIDString) {
return DOUBLE;
} else if ("FLOAT" == dataTypeIDString) {
return FLOAT;
} else if ("BOOLEAN" == dataTypeIDString) {
return BOOL;
} else if ("STRING" == dataTypeIDString) {
Expand All @@ -205,8 +221,6 @@ DataTypeID Types::dataTypeIDFromString(const std::string& dataTypeIDString) {
return TIMESTAMP;
} else if ("INTERVAL" == dataTypeIDString) {
return INTERVAL;
} else if ("FLOAT" == dataTypeIDString) {
return FLOAT;
} else {
throw InternalException("Cannot parse dataTypeID: " + dataTypeIDString);
}
Expand All @@ -225,12 +239,14 @@ std::string Types::dataTypeToString(const DataType& dataType) {
case INTERNAL_ID:
case BOOL:
case INT64:
case INT32:
case INT16:
case DOUBLE:
case FLOAT:
case DATE:
case TIMESTAMP:
case INTERVAL:
case STRING:
case FLOAT:
return dataTypeToString(dataType.typeID);
default:
throw InternalException("Unsupported DataType: " + Types::dataTypeToString(dataType) + ".");
Expand All @@ -251,8 +267,14 @@ std::string Types::dataTypeToString(DataTypeID dataTypeID) {
return "BOOL";
case INT64:
return "INT64";
case INT32:
return "INT32";
case INT16:
return "INT16";
case DOUBLE:
return "DOUBLE";
case FLOAT:
return "FLOAT";
case DATE:
return "DATE";
case TIMESTAMP:
Expand All @@ -265,8 +287,6 @@ std::string Types::dataTypeToString(DataTypeID dataTypeID) {
return "VAR_LIST";
case FIXED_LIST:
return "FIXED_LIST";
case FLOAT:
return "FLOAT";
default:
throw InternalException(
"Unsupported DataType: " + Types::dataTypeToString(dataTypeID) + ".");
Expand Down Expand Up @@ -301,8 +321,14 @@ uint32_t Types::getDataTypeSize(DataTypeID dataTypeID) {
return sizeof(uint8_t);
case INT64:
return sizeof(int64_t);
case INT32:
return sizeof(int32_t);
case INT16:
return sizeof(int16_t);
case DOUBLE:
return sizeof(double_t);
case FLOAT:
return sizeof(float_t);
case DATE:
return sizeof(date_t);
case TIMESTAMP:
Expand All @@ -313,8 +339,6 @@ uint32_t Types::getDataTypeSize(DataTypeID dataTypeID) {
return sizeof(ku_string_t);
case VAR_LIST:
return sizeof(ku_list_t);
case FLOAT:
return sizeof(float_t);
default:
throw InternalException(
"Cannot infer the size of dataTypeID: " + dataTypeToString(dataTypeID) + ".");
Expand All @@ -328,13 +352,15 @@ uint32_t Types::getDataTypeSize(const DataType& dataType) {
case INTERNAL_ID:
case BOOL:
case INT64:
case INT32:
case INT16:
case DOUBLE:
case FLOAT:
case DATE:
case TIMESTAMP:
case INTERVAL:
case STRING:
case VAR_LIST:
case FLOAT:
return getDataTypeSize(dataType.typeID);
default:
throw InternalException(
Expand Down
36 changes: 33 additions & 3 deletions src/common/types/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ Value Value::createNullValue(DataType dataType) {
Value Value::createDefaultValue(const DataType& dataType) {
switch (dataType.typeID) {
case INT64:
return Value(0);
return Value((int64_t)0);
case INT32:
return Value((int32_t)0);
case INT16:
return Value((int16_t)0);
case BOOL:
return Value(true);
case DOUBLE:
Expand Down Expand Up @@ -69,8 +73,12 @@ Value::Value(bool val_) : dataType{BOOL}, isNull_{false} {
val.booleanVal = val_;
}

Value::Value(int32_t val_) : dataType{INT64}, isNull_{false} {
val.int64Val = (int64_t)val_;
Value::Value(int16_t val_) : dataType{INT16}, isNull_{false} {
val.int16Val = val_;
}

Value::Value(int32_t val_) : dataType{INT32}, isNull_{false} {
val.int32Val = val_;
}

Value::Value(int64_t val_) : dataType{INT64}, isNull_{false} {
Expand Down Expand Up @@ -136,6 +144,12 @@ void Value::copyValueFrom(const uint8_t* value) {
case INT64: {
val.int64Val = *((int64_t*)value);
} break;
case INT32: {
val.int32Val = *((int32_t*)value);
} break;
case INT16: {
val.int16Val = *((int16_t*)value);
} break;
case BOOL: {
val.booleanVal = *((bool*)value);
} break;
Expand Down Expand Up @@ -186,6 +200,12 @@ void Value::copyValueFrom(const Value& other) {
case INT64: {
val.int64Val = other.val.int64Val;
} break;
case INT32: {
val.int32Val = other.val.int32Val;
} break;
case INT16: {
val.int16Val = other.val.int16Val;
} break;
case DOUBLE: {
val.doubleVal = other.val.doubleVal;
} break;
Expand Down Expand Up @@ -238,6 +258,10 @@ std::string Value::toString() const {
return TypeUtils::toString(val.booleanVal);
case INT64:
return TypeUtils::toString(val.int64Val);
case INT32:
return TypeUtils::toString(val.int32Val);
case INT16:
return TypeUtils::toString(val.int16Val);
case DOUBLE:
return TypeUtils::toString(val.doubleVal);
case DATE:
Expand Down Expand Up @@ -307,6 +331,12 @@ std::vector<std::unique_ptr<Value>> Value::convertKUFixedListToVector(
case common::DataTypeID::INT64: {
putValuesIntoVector<int64_t>(fixedListResultVal, fixedList, numBytesPerElement);
} break;
case common::DataTypeID::INT32: {
putValuesIntoVector<int32_t>(fixedListResultVal, fixedList, numBytesPerElement);
} break;
case common::DataTypeID::INT16: {
putValuesIntoVector<int16_t>(fixedListResultVal, fixedList, numBytesPerElement);
} break;
case common::DataTypeID::DOUBLE: {
putValuesIntoVector<double_t>(fixedListResultVal, fixedList, numBytesPerElement);
} break;
Expand Down
Loading

0 comments on commit d151554

Please sign in to comment.