Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement float(alias: REAL) dataType #1321

Merged
merged 1 commit into from
Feb 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions dataset/tinysnb/eMeets.csv
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
0,2
2,5
3,7
7,3
8,3
9,3
10,2
0,2,"[7.82,3.54]"
2,5,"[2.87,4.23]"
3,7,"[3.65,8.44]"
7,3,"[2.11,3.1]"
8,3,"[2.2,9.0]"
9,3,"[3,5.2]"
10,2,"[3.5,1.1]"
6 changes: 3 additions & 3 deletions dataset/tinysnb/eWorkAt.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
3,4,2015,"[3.8,2.5]"
5,6,2010,"[2.1,4.4]"
7,6,2015,"[9.2,3.1]"
3,4,2015,"[3.8,2.5]",8.2
5,6,2010,"[2.1,4.4]",7.6
7,6,2015,"[9.2,3.1]",9.2
6 changes: 3 additions & 3 deletions dataset/tinysnb/schema.cypher
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
create node table person (ID INt64, fName StRING, gender INT64, isStudent BoOLEAN, isWorker BOOLEAN, age INT64, eyeSight DOUBLE, birthdate DATE, registerTime TIMESTAMP, lastJobDuration interval, workedHours INT64[], usedNames STRING[], courseScoresPerTerm INT64[][], grades INT64[4],PRIMARY KEY (ID));
create node table person (ID INt64, fName StRING, gender INT64, isStudent BoOLEAN, isWorker BOOLEAN, age INT64, eyeSight DOUBLE, birthdate DATE, registerTime TIMESTAMP, lastJobDuration interval, workedHours INT64[], usedNames STRING[], courseScoresPerTerm INT64[][], grades INT64[4], height float, PRIMARY KEY (ID));
create node table organisation (ID INT64, name STRING, orgCode INT64, mark DOUBLE, score INT64, history STRING, licenseValidInterval INTERVAL, rating DOUBLE, PRIMARY KEY (ID));
create node table movies (name STRING, PRIMARY KEY (name));
create rel table knows (FROM person TO person, date DATE, meetTime TIMESTAMP, validInterval INTERVAL, comments STRING[], MANY_MANY);
create rel table studyAt (FROM person TO organisation, year INT64, places STRING[], MANY_ONE);
create rel table workAt (FROM person TO organisation, year INT64, grading DOUBLE[2], MANY_ONE);
create rel table meets (FROM person TO person, MANY_ONE);
create rel table workAt (FROM person TO organisation, year INT64, grading DOUBLE[2], rating float, MANY_ONE);
create rel table meets (FROM person TO person, location FLOAT[2], MANY_ONE);
acquamarin marked this conversation as resolved.
Show resolved Hide resolved
create rel table marries (FROM person TO person, usedAddress STRING[], note STRING, ONE_ONE);
18 changes: 9 additions & 9 deletions dataset/tinysnb/vPerson.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
id,fname,Gender,ISStudent,isWorker,age,eyeSight,birthdate,registerTime,lastJobDuration,workedHours,usedNames,courseScoresPerTerm,grades
0,Alice,1,true,false,35,5.0,1900-01-01,2011-08-20 11:25:30Z+00:00,3 years 2 days 13 hours 2 minutes,"[10,5]","[Aida]","[[10,8],[6,7,8]]","[96,54,86,92]"
2,Bob,2,true,false,30,5.1,1900-01-01,2008-11-03 13:25:30.000526-02:00,10 years 5 months 13 hours 24 us,"[12,8]","[Bobby]","[[8,9],[9,10]]","[98,42,93,88]"
3,Carol,1,false,true,45,5.0,1940-06-22,1911-08-20 02:32:21,48 hours 24 minutes 11 seconds,"[4,5]","[Carmen,Fred]","[[8,10]]","[91,75,21,95]"
5,Dan,2,false,true,20,4.8,1950-7-23,2031-11-30 12:25:30Z,10 years 5 months 13 hours 24 us,"[1,9]","[Wolfeschlegelstein,Daniel]","[[7,4],[8,8],[9]]","[76,88,99,89]"
7,Elizabeth,1,false,true,20,4.7,1980-10-26,1976-12-23 11:21:42,48 hours 24 minutes 11 seconds,"[2]","[Ein]","[[6],[7],[8]]","[96,59,65,88]"
8,Farooq,2,true,false,25,4.5,1980-10-26,1972-07-31 13:22:30.678559,18 minutes 24 milliseconds,"[3,4,5,6,7]","[Fesdwe]","[[8]]","[80,78,34,83]"
9,Greg,2,false,false,40,4.9,1980-10-26,1976-12-23 11:21:42Z+06:40,10 years 5 months 13 hours 24 us,"[1]","[Grad]","[[10]]","[43,83,67,43]"
10,Hubert Blaine Wolfeschlegelsteinhausenbergerdorff,2,false,true,83,4.9,1990-11-27,2023-02-21 13:25:30,3 years 2 days 13 hours 2 minutes,"[10,11,12,3,4,5,6,7]","[Ad,De,Hi,Kye,Orlan]","[[7],[10],[6,7]]","[77,64,100,54]"
id,fname,Gender,ISStudent,isWorker,age,eyeSight,birthdate,registerTime,lastJobDuration,workedHours,usedNames,courseScoresPerTerm,grades,height
0,Alice,1,true,false,35,5.0,1900-01-01,2011-08-20 11:25:30Z+00:00,3 years 2 days 13 hours 2 minutes,"[10,5]","[Aida]","[[10,8],[6,7,8]]","[96,54,86,92]",1.731
2,Bob,2,true,false,30,5.1,1900-01-01,2008-11-03 13:25:30.000526-02:00,10 years 5 months 13 hours 24 us,"[12,8]","[Bobby]","[[8,9],[9,10]]","[98,42,93,88]",0.99
3,Carol,1,false,true,45,5.0,1940-06-22,1911-08-20 02:32:21,48 hours 24 minutes 11 seconds,"[4,5]","[Carmen,Fred]","[[8,10]]","[91,75,21,95]",1.00
5,Dan,2,false,true,20,4.8,1950-7-23,2031-11-30 12:25:30Z,10 years 5 months 13 hours 24 us,"[1,9]","[Wolfeschlegelstein,Daniel]","[[7,4],[8,8],[9]]","[76,88,99,89]",1.30
7,Elizabeth,1,false,true,20,4.7,1980-10-26,1976-12-23 11:21:42,48 hours 24 minutes 11 seconds,"[2]","[Ein]","[[6],[7],[8]]","[96,59,65,88]",1.463
8,Farooq,2,true,false,25,4.5,1980-10-26,1972-07-31 13:22:30.678559,18 minutes 24 milliseconds,"[3,4,5,6,7]","[Fesdwe]","[[8]]","[80,78,34,83]",1.51
9,Greg,2,false,false,40,4.9,1980-10-26,1976-12-23 11:21:42Z+06:40,10 years 5 months 13 hours 24 us,"[1]","[Grad]","[[10]]","[43,83,67,43]",1.6
10,Hubert Blaine Wolfeschlegelsteinhausenbergerdorff,2,false,true,83,4.9,1990-11-27,2023-02-21 13:25:30,3 years 2 days 13 hours 2 minutes,"[10,11,12,3,4,5,6,7]","[Ad,De,Hi,Kye,Orlan]","[[7],[10],[6,7]]","[77,64,100,54]",1.323
2 changes: 1 addition & 1 deletion src/common/csv_reader/csv_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ int64_t CSVReader::getInt64() {

double_t CSVReader::getDouble() {
setNextTokenIsProcessed();
return TypeUtils::convertToDouble(line + linePtrStart);
return TypeUtils::convertFloatingPointNumber<double_t>(line + linePtrStart, DOUBLE);
}

uint8_t CSVReader::getBoolean() {
Expand Down
11 changes: 0 additions & 11 deletions src/common/type_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,6 @@ uint32_t TypeUtils::convertToUint32(const char* data) {
return val;
}

double_t TypeUtils::convertToDouble(const char* data) {
char* eptr;
errno = 0;
auto retVal = strtod(data, &eptr);
throwConversionExceptionIfNoOrNotEveryCharacterIsConsumed(data, eptr, DOUBLE);
if ((HUGE_VAL == retVal || -HUGE_VAL == retVal) && errno == ERANGE) {
throwConversionExceptionOutOfRange(data, DOUBLE);
}
return retVal;
};

bool TypeUtils::convertToBoolean(const char* data) {
auto len = strlen(data);
if (len == 4 && 't' == tolower(data[0]) && 'r' == tolower(data[1]) && 'u' == tolower(data[2]) &&
Expand Down
16 changes: 14 additions & 2 deletions src/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ DataType::DataType(const DataType& other) {
case DATE:
case TIMESTAMP:
case INTERVAL:
case FLOAT:
case STRING: {
typeID = other.typeID;
} break;
Expand All @@ -57,13 +58,13 @@ DataType::DataType(DataType&& other) noexcept
fixedNumElementsInList{other.fixedNumElementsInList} {}

std::vector<DataTypeID> DataType::getNumericalTypeIDs() {
return std::vector<DataTypeID>{INT64, DOUBLE};
return std::vector<DataTypeID>{INT64, DOUBLE, FLOAT};
}

std::vector<DataTypeID> DataType::getAllValidTypeIDs() {
// TODO(Ziyi): Add FIX_LIST type to allValidTypeID when we support functions on VAR_LIST.
return std::vector<DataTypeID>{
INTERNAL_ID, BOOL, INT64, DOUBLE, STRING, DATE, TIMESTAMP, INTERVAL, VAR_LIST};
INTERNAL_ID, BOOL, INT64, DOUBLE, STRING, DATE, TIMESTAMP, INTERVAL, VAR_LIST, FLOAT};
}

DataType& DataType::operator=(const DataType& other) {
Expand All @@ -84,6 +85,7 @@ DataType& DataType::operator=(const DataType& other) {
case DATE:
case TIMESTAMP:
case INTERVAL:
case FLOAT:
case STRING: {
typeID = other.typeID;
} break;
Expand Down Expand Up @@ -112,6 +114,7 @@ bool DataType::operator==(const DataType& other) const {
case DATE:
case TIMESTAMP:
case INTERVAL:
case FLOAT:
case STRING:
return typeID == other.typeID;
default:
Expand Down Expand Up @@ -149,6 +152,7 @@ std::unique_ptr<DataType> DataType::copy() {
case TIMESTAMP:
case INTERVAL:
case STRING:
case FLOAT:
return std::make_unique<DataType>(typeID);
default:
throw InternalException("Unsupported DataType: " + Types::dataTypeToString(typeID) + ".");
Expand Down Expand Up @@ -202,6 +206,8 @@ DataTypeID Types::dataTypeIDFromString(const std::string& dataTypeIDString) {
return TIMESTAMP;
} else if ("INTERVAL" == dataTypeIDString) {
return INTERVAL;
} else if ("FLOAT" == dataTypeIDString) {
return FLOAT;
} else {
throw InternalException("Cannot parse dataTypeID: " + dataTypeIDString);
}
Expand All @@ -225,6 +231,7 @@ std::string Types::dataTypeToString(const DataType& dataType) {
case TIMESTAMP:
case INTERVAL:
case STRING:
case FLOAT:
return dataTypeToString(dataType.typeID);
default:
throw InternalException("Unsupported DataType: " + Types::dataTypeToString(dataType) + ".");
Expand Down Expand Up @@ -259,6 +266,8 @@ std::string Types::dataTypeToString(DataTypeID dataTypeID) {
return "VAR_LIST";
case FIXED_LIST:
return "FIXED_LIST";
case FLOAT:
return "FLOAT";
default:
throw InternalException(
"Unsupported DataType: " + Types::dataTypeToString(dataTypeID) + ".");
Expand Down Expand Up @@ -305,6 +314,8 @@ uint32_t Types::getDataTypeSize(DataTypeID dataTypeID) {
return sizeof(ku_string_t);
case VAR_LIST:
return sizeof(ku_list_t);
case FLOAT:
return sizeof(float_t);
default:
throw InternalException(
"Cannot infer the size of dataTypeID: " + dataTypeToString(dataTypeID) + ".");
Expand All @@ -324,6 +335,7 @@ uint32_t Types::getDataTypeSize(const DataType& dataType) {
case INTERVAL:
case STRING:
case VAR_LIST:
case FLOAT:
return getDataTypeSize(dataType.typeID);
default:
throw InternalException(
Expand Down
31 changes: 21 additions & 10 deletions src/common/types/value.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ Value Value::createDefaultValue(const DataType& dataType) {
case BOOL:
return Value(true);
case DOUBLE:
return Value(0.0);
return Value((double_t)0);
case DATE:
return Value(date_t());
case TIMESTAMP:
Expand All @@ -54,6 +54,8 @@ Value Value::createDefaultValue(const DataType& dataType) {
return Value(nodeID_t());
case STRING:
return Value(std::string(""));
case FLOAT:
return Value((float_t)0);
case VAR_LIST:
case FIXED_LIST:
return Value(dataType, std::vector<std::unique_ptr<Value>>{});
Expand Down Expand Up @@ -108,6 +110,10 @@ Value::Value(DataType dataType, std::vector<std::unique_ptr<Value>> vals)
listVal = std::move(vals);
}

Value::Value(float_t val_) : dataType{FLOAT}, isNull_{false} {
val.floatVal = val_;
}

Value::Value(std::unique_ptr<NodeVal> val_) : dataType{NODE}, isNull_{false} {
nodeVal = std::move(val_);
}
Expand Down Expand Up @@ -157,6 +163,9 @@ void Value::copyValueFrom(const uint8_t* value) {
case FIXED_LIST: {
listVal = convertKUFixedListToVector(value);
} break;
case FLOAT: {
val.floatVal = *((float_t*)value);
} break;
default:
throw RuntimeException(
"Data type " + Types::dataTypeToString(dataType) + " is not supported for Value::set");
Expand Down Expand Up @@ -207,6 +216,9 @@ void Value::copyValueFrom(const Value& other) {
case REL: {
relVal = other.relVal->copy();
} break;
case FLOAT: {
val.floatVal = other.val.floatVal;
} break;
default:
throw NotImplementedException("Value::Value(const Value&) for type " +
Types::dataTypeToString(dataType) + " is not implemented.");
Expand Down Expand Up @@ -251,6 +263,8 @@ std::string Value::toString() const {
return nodeVal->toString();
case REL:
return relVal->toString();
case FLOAT:
return TypeUtils::toString(val.floatVal);
default:
throw NotImplementedException("Value::toString for type " +
Types::dataTypeToString(dataType) + " is not implemented.");
Expand Down Expand Up @@ -287,20 +301,17 @@ std::vector<std::unique_ptr<Value>> Value::convertKUVarListToVector(ku_list_t& l

std::vector<std::unique_ptr<Value>> Value::convertKUFixedListToVector(
const uint8_t* fixedList) const {
std::vector<std::unique_ptr<Value>> fixedListResultVal;
std::vector<std::unique_ptr<Value>> fixedListResultVal{dataType.fixedNumElementsInList};
auto numBytesPerElement = Types::getDataTypeSize(*dataType.childType);
switch (dataType.childType->typeID) {
case common::DataTypeID::INT64: {
for (auto i = 0; i < dataType.fixedNumElementsInList; ++i) {
fixedListResultVal.emplace_back(
std::make_unique<Value>(*(int64_t*)(fixedList + i * numBytesPerElement)));
}
putValuesIntoVector<int64_t>(fixedListResultVal, fixedList, numBytesPerElement);
} break;
case common::DataTypeID::DOUBLE: {
for (auto i = 0; i < dataType.fixedNumElementsInList; ++i) {
fixedListResultVal.emplace_back(
std::make_unique<Value>(*(double_t*)(fixedList + i * numBytesPerElement)));
}
putValuesIntoVector<double_t>(fixedListResultVal, fixedList, numBytesPerElement);
} break;
case common::DataTypeID::FLOAT: {
acquamarin marked this conversation as resolved.
Show resolved Hide resolved
putValuesIntoVector<float_t>(fixedListResultVal, fixedList, numBytesPerElement);
} break;
default:
assert(false);
Expand Down
8 changes: 8 additions & 0 deletions src/function/aggregate_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ std::unique_ptr<AggregateFunction> AggregateFunctionUtil::getAvgFunction(
return std::make_unique<AggregateFunction>(AvgFunction<double_t>::initialize,
AvgFunction<double_t>::updateAll, AvgFunction<double_t>::updatePos,
AvgFunction<double_t>::combine, AvgFunction<double_t>::finalize, inputType, isDistinct);
case FLOAT:
return std::make_unique<AggregateFunction>(AvgFunction<float_t>::initialize,
AvgFunction<float_t>::updateAll, AvgFunction<float_t>::updatePos,
AvgFunction<float_t>::combine, AvgFunction<float_t>::finalize, inputType, isDistinct);
default:
throw RuntimeException("Unsupported input data type " + Types::dataTypeToString(inputType) +
" for AggregateFunctionUtil::getAvgFunction.");
Expand All @@ -54,6 +58,10 @@ std::unique_ptr<AggregateFunction> AggregateFunctionUtil::getSumFunction(
return std::make_unique<AggregateFunction>(SumFunction<double_t>::initialize,
SumFunction<double_t>::updateAll, SumFunction<double_t>::updatePos,
SumFunction<double_t>::combine, SumFunction<double_t>::finalize, inputType, isDistinct);
case FLOAT:
return std::make_unique<AggregateFunction>(SumFunction<float_t>::initialize,
SumFunction<float_t>::updateAll, SumFunction<float_t>::updatePos,
SumFunction<float_t>::combine, SumFunction<float_t>::finalize, inputType, isDistinct);
default:
throw RuntimeException("Unsupported input data type " + Types::dataTypeToString(inputType) +
" for AggregateFunctionUtil::getSumFunction.");
Expand Down
35 changes: 30 additions & 5 deletions src/function/built_in_vector_operations.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,11 @@ uint32_t BuiltInVectorOperations::getCastCost(DataTypeID inputTypeID, DataTypeID
// ANY type can be any type
return 0;
case common::INT64:
return implicitCastInt64(targetTypeID);
return castInt64(targetTypeID);
case common::DOUBLE:
return implicitCastDouble(targetTypeID);
return castDouble(targetTypeID);
case common::FLOAT:
return castFloat(targetTypeID);
default:
return UINT32_MAX;
}
Expand All @@ -110,22 +112,43 @@ uint32_t BuiltInVectorOperations::getCastCost(
}
}

uint32_t BuiltInVectorOperations::implicitCastInt64(common::DataTypeID targetTypeID) {
switch (targetTypeID) {
uint32_t BuiltInVectorOperations::getTargetTypeCost(common::DataTypeID typeID) {
switch (typeID) {
case common::FLOAT:
return 110;
case common::DOUBLE:
return 102;
default:
throw InternalException("Unsupported casting operation.");
}
}

uint32_t BuiltInVectorOperations::castInt64(common::DataTypeID targetTypeID) {
switch (targetTypeID) {
case common::FLOAT:
case common::DOUBLE:
return getTargetTypeCost(targetTypeID);
default:
return UINT32_MAX;
}
}

uint32_t BuiltInVectorOperations::implicitCastDouble(common::DataTypeID targetTypeID) {
uint32_t BuiltInVectorOperations::castDouble(common::DataTypeID targetTypeID) {
switch (targetTypeID) {
default:
return UINT32_MAX;
}
}

uint32_t BuiltInVectorOperations::castFloat(common::DataTypeID targetTypeID) {
switch (targetTypeID) {
case common::DOUBLE:
return getTargetTypeCost(targetTypeID);
default:
return UINT32_MAX;
}
}

// When there is multiple candidates functions, e.g. double + int and double + double for input
// "1.5 + parameter", we prefer the one without any implicit casting i.e. double + double.
VectorOperationDefinition* BuiltInVectorOperations::getBestMatch(
Expand Down Expand Up @@ -329,6 +352,8 @@ void BuiltInVectorOperations::registerCastOperations() {
{CAST_TO_STRING_FUNC_NAME, CastToStringVectorOperation::getDefinitions()});
vectorOperations.insert(
{CAST_TO_DOUBLE_FUNC_NAME, CastToDoubleVectorOperation::getDefinitions()});
vectorOperations.insert(
{CAST_TO_FLOAT_FUNC_NAME, CastToFloatVectorOperation::getDefinitions()});
}

void BuiltInVectorOperations::registerListOperations() {
Expand Down
Loading