Skip to content

Commit

Permalink
Implement float dataType
Browse files Browse the repository at this point in the history
  • Loading branch information
acquamarin committed Feb 27, 2023
1 parent 7c9ccf9 commit ec1c8e4
Show file tree
Hide file tree
Showing 44 changed files with 576 additions and 380 deletions.
14 changes: 7 additions & 7 deletions dataset/tinysnb/eMeets.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
0,2
2,5
3,7
7,3
8,3
9,3
10,2
0,2,"[7.82,3.54]"
2,5,"[2.87,4.23]"
3,7,"[3.65,8.44]"
7,3,"[2.11,3.1]"
8,3,"[2.2,9.0]"
9,3,"[3,5.2]"
10,2,"[3.5,1.1]"
6 changes: 3 additions & 3 deletions dataset/tinysnb/eWorkAt.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
3,4,2015,"[3.8,2.5]"
5,6,2010,"[2.1,4.4]"
7,6,2015,"[9.2,3.1]"
3,4,2015,"[3.8,2.5]",8.2
5,6,2010,"[2.1,4.4]",7.6
7,6,2015,"[9.2,3.1]",9.2
6 changes: 3 additions & 3 deletions dataset/tinysnb/schema.cypher
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
create node table person (ID INt64, fName StRING, gender INT64, isStudent BoOLEAN, isWorker BOOLEAN, age INT64, eyeSight DOUBLE, birthdate DATE, registerTime TIMESTAMP, lastJobDuration interval, workedHours INT64[], usedNames STRING[], courseScoresPerTerm INT64[][], grades INT64[4],PRIMARY KEY (ID));
create node table person (ID INt64, fName StRING, gender INT64, isStudent BoOLEAN, isWorker BOOLEAN, age INT64, eyeSight DOUBLE, birthdate DATE, registerTime TIMESTAMP, lastJobDuration interval, workedHours INT64[], usedNames STRING[], courseScoresPerTerm INT64[][], grades INT64[4], height float, PRIMARY KEY (ID));
create node table organisation (ID INT64, name STRING, orgCode INT64, mark DOUBLE, score INT64, history STRING, licenseValidInterval INTERVAL, rating DOUBLE, PRIMARY KEY (ID));
create node table movies (name STRING, PRIMARY KEY (name));
create rel table knows (FROM person TO person, date DATE, meetTime TIMESTAMP, validInterval INTERVAL, comments STRING[], MANY_MANY);
create rel table studyAt (FROM person TO organisation, year INT64, places STRING[], MANY_ONE);
create rel table workAt (FROM person TO organisation, year INT64, grading DOUBLE[2], MANY_ONE);
create rel table meets (FROM person TO person, MANY_ONE);
create rel table workAt (FROM person TO organisation, year INT64, grading DOUBLE[2], rating float, MANY_ONE);
create rel table meets (FROM person TO person, location FLOAT[2], MANY_ONE);
create rel table marries (FROM person TO person, usedAddress STRING[], note STRING, ONE_ONE);
18 changes: 9 additions & 9 deletions dataset/tinysnb/vPerson.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
id,fname,Gender,ISStudent,isWorker,age,eyeSight,birthdate,registerTime,lastJobDuration,workedHours,usedNames,courseScoresPerTerm,grades
0,Alice,1,true,false,35,5.0,1900-01-01,2011-08-20 11:25:30Z+00:00,3 years 2 days 13 hours 2 minutes,"[10,5]","[Aida]","[[10,8],[6,7,8]]","[96,54,86,92]"
2,Bob,2,true,false,30,5.1,1900-01-01,2008-11-03 13:25:30.000526-02:00,10 years 5 months 13 hours 24 us,"[12,8]","[Bobby]","[[8,9],[9,10]]","[98,42,93,88]"
3,Carol,1,false,true,45,5.0,1940-06-22,1911-08-20 02:32:21,48 hours 24 minutes 11 seconds,"[4,5]","[Carmen,Fred]","[[8,10]]","[91,75,21,95]"
5,Dan,2,false,true,20,4.8,1950-7-23,2031-11-30 12:25:30Z,10 years 5 months 13 hours 24 us,"[1,9]","[Wolfeschlegelstein,Daniel]","[[7,4],[8,8],[9]]","[76,88,99,89]"
7,Elizabeth,1,false,true,20,4.7,1980-10-26,1976-12-23 11:21:42,48 hours 24 minutes 11 seconds,"[2]","[Ein]","[[6],[7],[8]]","[96,59,65,88]"
8,Farooq,2,true,false,25,4.5,1980-10-26,1972-07-31 13:22:30.678559,18 minutes 24 milliseconds,"[3,4,5,6,7]","[Fesdwe]","[[8]]","[80,78,34,83]"
9,Greg,2,false,false,40,4.9,1980-10-26,1976-12-23 11:21:42Z+06:40,10 years 5 months 13 hours 24 us,"[1]","[Grad]","[[10]]","[43,83,67,43]"
10,Hubert Blaine Wolfeschlegelsteinhausenbergerdorff,2,false,true,83,4.9,1990-11-27,2023-02-21 13:25:30,3 years 2 days 13 hours 2 minutes,"[10,11,12,3,4,5,6,7]","[Ad,De,Hi,Kye,Orlan]","[[7],[10],[6,7]]","[77,64,100,54]"
id,fname,Gender,ISStudent,isWorker,age,eyeSight,birthdate,registerTime,lastJobDuration,workedHours,usedNames,courseScoresPerTerm,grades,height
0,Alice,1,true,false,35,5.0,1900-01-01,2011-08-20 11:25:30Z+00:00,3 years 2 days 13 hours 2 minutes,"[10,5]","[Aida]","[[10,8],[6,7,8]]","[96,54,86,92]",1.731
2,Bob,2,true,false,30,5.1,1900-01-01,2008-11-03 13:25:30.000526-02:00,10 years 5 months 13 hours 24 us,"[12,8]","[Bobby]","[[8,9],[9,10]]","[98,42,93,88]",0.99
3,Carol,1,false,true,45,5.0,1940-06-22,1911-08-20 02:32:21,48 hours 24 minutes 11 seconds,"[4,5]","[Carmen,Fred]","[[8,10]]","[91,75,21,95]",1.00
5,Dan,2,false,true,20,4.8,1950-7-23,2031-11-30 12:25:30Z,10 years 5 months 13 hours 24 us,"[1,9]","[Wolfeschlegelstein,Daniel]","[[7,4],[8,8],[9]]","[76,88,99,89]",1.30
7,Elizabeth,1,false,true,20,4.7,1980-10-26,1976-12-23 11:21:42,48 hours 24 minutes 11 seconds,"[2]","[Ein]","[[6],[7],[8]]","[96,59,65,88]",1.463
8,Farooq,2,true,false,25,4.5,1980-10-26,1972-07-31 13:22:30.678559,18 minutes 24 milliseconds,"[3,4,5,6,7]","[Fesdwe]","[[8]]","[80,78,34,83]",1.51
9,Greg,2,false,false,40,4.9,1980-10-26,1976-12-23 11:21:42Z+06:40,10 years 5 months 13 hours 24 us,"[1]","[Grad]","[[10]]","[43,83,67,43]",1.6
10,Hubert Blaine Wolfeschlegelsteinhausenbergerdorff,2,false,true,83,4.9,1990-11-27,2023-02-21 13:25:30,3 years 2 days 13 hours 2 minutes,"[10,11,12,3,4,5,6,7]","[Ad,De,Hi,Kye,Orlan]","[[7],[10],[6,7]]","[77,64,100,54]",1.323
2 changes: 1 addition & 1 deletion src/common/csv_reader/csv_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ int64_t CSVReader::getInt64() {

double_t CSVReader::getDouble() {
setNextTokenIsProcessed();
return TypeUtils::convertToDouble(line + linePtrStart);
return TypeUtils::convertFloatingPointNumber<double_t>(line + linePtrStart, DOUBLE);
}

uint8_t CSVReader::getBoolean() {
Expand Down
11 changes: 0 additions & 11 deletions src/common/type_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,6 @@ uint32_t TypeUtils::convertToUint32(const char* data) {
return val;
}

double_t TypeUtils::convertToDouble(const char* data) {
char* eptr;
errno = 0;
auto retVal = strtod(data, &eptr);
throwConversionExceptionIfNoOrNotEveryCharacterIsConsumed(data, eptr, DOUBLE);
if ((HUGE_VAL == retVal || -HUGE_VAL == retVal) && errno == ERANGE) {
throwConversionExceptionOutOfRange(data, DOUBLE);
}
return retVal;
};

bool TypeUtils::convertToBoolean(const char* data) {
auto len = strlen(data);
if (len == 4 && 't' == tolower(data[0]) && 'r' == tolower(data[1]) && 'u' == tolower(data[2]) &&
Expand Down
16 changes: 14 additions & 2 deletions src/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ DataType::DataType(const DataType& other) {
case DATE:
case TIMESTAMP:
case INTERVAL:
case FLOAT:
case STRING: {
typeID = other.typeID;
} break;
Expand All @@ -57,13 +58,13 @@ DataType::DataType(DataType&& other) noexcept
fixedNumElementsInList{other.fixedNumElementsInList} {}

std::vector<DataTypeID> DataType::getNumericalTypeIDs() {
return std::vector<DataTypeID>{INT64, DOUBLE};
return std::vector<DataTypeID>{INT64, DOUBLE, FLOAT};
}

std::vector<DataTypeID> DataType::getAllValidTypeIDs() {
// TODO(Ziyi): Add FIX_LIST type to allValidTypeID when we support functions on VAR_LIST.
return std::vector<DataTypeID>{
INTERNAL_ID, BOOL, INT64, DOUBLE, STRING, DATE, TIMESTAMP, INTERVAL, VAR_LIST};
INTERNAL_ID, BOOL, INT64, DOUBLE, STRING, DATE, TIMESTAMP, INTERVAL, VAR_LIST, FLOAT};
}

DataType& DataType::operator=(const DataType& other) {
Expand All @@ -84,6 +85,7 @@ DataType& DataType::operator=(const DataType& other) {
case DATE:
case TIMESTAMP:
case INTERVAL:
case FLOAT:
case STRING: {
typeID = other.typeID;
} break;
Expand Down Expand Up @@ -112,6 +114,7 @@ bool DataType::operator==(const DataType& other) const {
case DATE:
case TIMESTAMP:
case INTERVAL:
case FLOAT:
case STRING:
return typeID == other.typeID;
default:
Expand Down Expand Up @@ -149,6 +152,7 @@ std::unique_ptr<DataType> DataType::copy() {
case TIMESTAMP:
case INTERVAL:
case STRING:
case FLOAT:
return std::make_unique<DataType>(typeID);
default:
throw InternalException("Unsupported DataType: " + Types::dataTypeToString(typeID) + ".");
Expand Down Expand Up @@ -202,6 +206,8 @@ DataTypeID Types::dataTypeIDFromString(const std::string& dataTypeIDString) {
return TIMESTAMP;
} else if ("INTERVAL" == dataTypeIDString) {
return INTERVAL;
} else if ("FLOAT" == dataTypeIDString) {
return FLOAT;
} else {
throw InternalException("Cannot parse dataTypeID: " + dataTypeIDString);
}
Expand All @@ -225,6 +231,7 @@ std::string Types::dataTypeToString(const DataType& dataType) {
case TIMESTAMP:
case INTERVAL:
case STRING:
case FLOAT:
return dataTypeToString(dataType.typeID);
default:
throw InternalException("Unsupported DataType: " + Types::dataTypeToString(dataType) + ".");
Expand Down Expand Up @@ -259,6 +266,8 @@ std::string Types::dataTypeToString(DataTypeID dataTypeID) {
return "VAR_LIST";
case FIXED_LIST:
return "FIXED_LIST";
case FLOAT:
return "FLOAT";
default:
throw InternalException(
"Unsupported DataType: " + Types::dataTypeToString(dataTypeID) + ".");
Expand Down Expand Up @@ -305,6 +314,8 @@ uint32_t Types::getDataTypeSize(DataTypeID dataTypeID) {
return sizeof(ku_string_t);
case VAR_LIST:
return sizeof(ku_list_t);
case FLOAT:
return sizeof(float_t);
default:
throw InternalException(
"Cannot infer the size of dataTypeID: " + dataTypeToString(dataTypeID) + ".");
Expand All @@ -324,6 +335,7 @@ uint32_t Types::getDataTypeSize(const DataType& dataType) {
case INTERVAL:
case STRING:
case VAR_LIST:
case FLOAT:
return getDataTypeSize(dataType.typeID);
default:
throw InternalException(
Expand Down
31 changes: 21 additions & 10 deletions src/common/types/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ Value Value::createDefaultValue(const DataType& dataType) {
case BOOL:
return Value(true);
case DOUBLE:
return Value(0.0);
return Value((double_t)0);
case DATE:
return Value(date_t());
case TIMESTAMP:
Expand All @@ -54,6 +54,8 @@ Value Value::createDefaultValue(const DataType& dataType) {
return Value(nodeID_t());
case STRING:
return Value(std::string(""));
case FLOAT:
return Value((float_t)0);
case VAR_LIST:
case FIXED_LIST:
return Value(dataType, std::vector<std::unique_ptr<Value>>{});
Expand Down Expand Up @@ -108,6 +110,10 @@ Value::Value(DataType dataType, std::vector<std::unique_ptr<Value>> vals)
listVal = std::move(vals);
}

Value::Value(float_t val_) : dataType{FLOAT}, isNull_{false} {
val.floatVal = val_;
}

Value::Value(std::unique_ptr<NodeVal> val_) : dataType{NODE}, isNull_{false} {
nodeVal = std::move(val_);
}
Expand Down Expand Up @@ -157,6 +163,9 @@ void Value::copyValueFrom(const uint8_t* value) {
case FIXED_LIST: {
listVal = convertKUFixedListToVector(value);
} break;
case FLOAT: {
val.floatVal = *((float_t*)value);
} break;
default:
throw RuntimeException(
"Data type " + Types::dataTypeToString(dataType) + " is not supported for Value::set");
Expand Down Expand Up @@ -207,6 +216,9 @@ void Value::copyValueFrom(const Value& other) {
case REL: {
relVal = other.relVal->copy();
} break;
case FLOAT: {
val.floatVal = other.val.floatVal;
} break;
default:
throw NotImplementedException("Value::Value(const Value&) for type " +
Types::dataTypeToString(dataType) + " is not implemented.");
Expand Down Expand Up @@ -251,6 +263,8 @@ std::string Value::toString() const {
return nodeVal->toString();
case REL:
return relVal->toString();
case FLOAT:
return TypeUtils::toString(val.floatVal);
default:
throw NotImplementedException("Value::toString for type " +
Types::dataTypeToString(dataType) + " is not implemented.");
Expand Down Expand Up @@ -287,20 +301,17 @@ std::vector<std::unique_ptr<Value>> Value::convertKUVarListToVector(ku_list_t& l

std::vector<std::unique_ptr<Value>> Value::convertKUFixedListToVector(
const uint8_t* fixedList) const {
std::vector<std::unique_ptr<Value>> fixedListResultVal;
std::vector<std::unique_ptr<Value>> fixedListResultVal{dataType.fixedNumElementsInList};
auto numBytesPerElement = Types::getDataTypeSize(*dataType.childType);
switch (dataType.childType->typeID) {
case common::DataTypeID::INT64: {
for (auto i = 0; i < dataType.fixedNumElementsInList; ++i) {
fixedListResultVal.emplace_back(
std::make_unique<Value>(*(int64_t*)(fixedList + i * numBytesPerElement)));
}
putValuesIntoVector<int64_t>(fixedListResultVal, fixedList, numBytesPerElement);
} break;
case common::DataTypeID::DOUBLE: {
for (auto i = 0; i < dataType.fixedNumElementsInList; ++i) {
fixedListResultVal.emplace_back(
std::make_unique<Value>(*(double_t*)(fixedList + i * numBytesPerElement)));
}
putValuesIntoVector<double_t>(fixedListResultVal, fixedList, numBytesPerElement);
} break;
case common::DataTypeID::FLOAT: {
putValuesIntoVector<float_t>(fixedListResultVal, fixedList, numBytesPerElement);
} break;
default:
assert(false);
Expand Down
8 changes: 8 additions & 0 deletions src/function/aggregate_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ std::unique_ptr<AggregateFunction> AggregateFunctionUtil::getAvgFunction(
return std::make_unique<AggregateFunction>(AvgFunction<double_t>::initialize,
AvgFunction<double_t>::updateAll, AvgFunction<double_t>::updatePos,
AvgFunction<double_t>::combine, AvgFunction<double_t>::finalize, inputType, isDistinct);
case FLOAT:
return std::make_unique<AggregateFunction>(AvgFunction<float_t>::initialize,
AvgFunction<float_t>::updateAll, AvgFunction<float_t>::updatePos,
AvgFunction<float_t>::combine, AvgFunction<float_t>::finalize, inputType, isDistinct);
default:
throw RuntimeException("Unsupported input data type " + Types::dataTypeToString(inputType) +
" for AggregateFunctionUtil::getAvgFunction.");
Expand All @@ -54,6 +58,10 @@ std::unique_ptr<AggregateFunction> AggregateFunctionUtil::getSumFunction(
return std::make_unique<AggregateFunction>(SumFunction<double_t>::initialize,
SumFunction<double_t>::updateAll, SumFunction<double_t>::updatePos,
SumFunction<double_t>::combine, SumFunction<double_t>::finalize, inputType, isDistinct);
case FLOAT:
return std::make_unique<AggregateFunction>(SumFunction<float_t>::initialize,
SumFunction<float_t>::updateAll, SumFunction<float_t>::updatePos,
SumFunction<float_t>::combine, SumFunction<float_t>::finalize, inputType, isDistinct);
default:
throw RuntimeException("Unsupported input data type " + Types::dataTypeToString(inputType) +
" for AggregateFunctionUtil::getSumFunction.");
Expand Down
35 changes: 30 additions & 5 deletions src/function/built_in_vector_operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,11 @@ uint32_t BuiltInVectorOperations::getCastCost(DataTypeID inputTypeID, DataTypeID
// ANY type can be any type
return 0;
case common::INT64:
return implicitCastInt64(targetTypeID);
return castInt64(targetTypeID);
case common::DOUBLE:
return implicitCastDouble(targetTypeID);
return castDouble(targetTypeID);
case common::FLOAT:
return castFloat(targetTypeID);
default:
return UINT32_MAX;
}
Expand All @@ -110,22 +112,43 @@ uint32_t BuiltInVectorOperations::getCastCost(
}
}

uint32_t BuiltInVectorOperations::implicitCastInt64(common::DataTypeID targetTypeID) {
switch (targetTypeID) {
uint32_t BuiltInVectorOperations::getTargetTypeCost(common::DataTypeID typeID) {
switch (typeID) {
case common::FLOAT:
return 110;
case common::DOUBLE:
return 102;
default:
throw InternalException("Unsupported casting operation.");
}
}

uint32_t BuiltInVectorOperations::castInt64(common::DataTypeID targetTypeID) {
switch (targetTypeID) {
case common::FLOAT:
case common::DOUBLE:
return getTargetTypeCost(targetTypeID);
default:
return UINT32_MAX;
}
}

uint32_t BuiltInVectorOperations::implicitCastDouble(common::DataTypeID targetTypeID) {
uint32_t BuiltInVectorOperations::castDouble(common::DataTypeID targetTypeID) {
switch (targetTypeID) {
default:
return UINT32_MAX;
}
}

uint32_t BuiltInVectorOperations::castFloat(common::DataTypeID targetTypeID) {
switch (targetTypeID) {
case common::DOUBLE:
return getTargetTypeCost(targetTypeID);
default:
return UINT32_MAX;
}
}

// When there is multiple candidates functions, e.g. double + int and double + double for input
// "1.5 + parameter", we prefer the one without any implicit casting i.e. double + double.
VectorOperationDefinition* BuiltInVectorOperations::getBestMatch(
Expand Down Expand Up @@ -329,6 +352,8 @@ void BuiltInVectorOperations::registerCastOperations() {
{CAST_TO_STRING_FUNC_NAME, CastToStringVectorOperation::getDefinitions()});
vectorOperations.insert(
{CAST_TO_DOUBLE_FUNC_NAME, CastToDoubleVectorOperation::getDefinitions()});
vectorOperations.insert(
{CAST_TO_FLOAT_FUNC_NAME, CastToFloatVectorOperation::getDefinitions()});
}

void BuiltInVectorOperations::registerListOperations() {
Expand Down
Loading

0 comments on commit ec1c8e4

Please sign in to comment.