Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Fixed-list DataType to System #1298

Merged
merged 1 commit into from
Feb 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.11)

project(Kuzu VERSION 0.0.2 LANGUAGES CXX)
project(Kuzu VERSION 0.0.3 LANGUAGES CXX)

find_package(Threads REQUIRED)

Expand Down
6 changes: 3 additions & 3 deletions dataset/tinysnb/eWorkAt.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
3,4,2015
5,6,2010
7,6,2015
3,4,2015,"[3.8,2.5]"
5,6,2010,"[2.1,4.4]"
7,6,2015,"[9.2,3.1]"
4 changes: 2 additions & 2 deletions dataset/tinysnb/schema.cypher
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
create node table person (ID INt64, fName StRING, gender INT64, isStudent BoOLEAN, isWorker BOOLEAN, age INT64, eyeSight DOUBLE, birthdate DATE, registerTime TIMESTAMP, lastJobDuration interval, workedHours INT64[], usedNames STRING[], courseScoresPerTerm INT64[][], PRIMARY KEY (ID));
create node table person (ID INt64, fName StRING, gender INT64, isStudent BoOLEAN, isWorker BOOLEAN, age INT64, eyeSight DOUBLE, birthdate DATE, registerTime TIMESTAMP, lastJobDuration interval, workedHours INT64[], usedNames STRING[], courseScoresPerTerm INT64[][], grades INT64[4],PRIMARY KEY (ID));
create node table organisation (ID INT64, name STRING, orgCode INT64, mark DOUBLE, score INT64, history STRING, licenseValidInterval INTERVAL, rating DOUBLE, PRIMARY KEY (ID));
create node table movies (name STRING, PRIMARY KEY (name));
create rel table knows (FROM person TO person, date DATE, meetTime TIMESTAMP, validInterval INTERVAL, comments STRING[], MANY_MANY);
create rel table studyAt (FROM person TO organisation, year INT64, places STRING[], MANY_ONE);
create rel table workAt (FROM person TO organisation, year INT64, MANY_ONE);
create rel table workAt (FROM person TO organisation, year INT64, grading DOUBLE[2], MANY_ONE);
create rel table meets (FROM person TO person, MANY_ONE);
create rel table marries (FROM person TO person, usedAddress STRING[], note STRING, ONE_ONE);
18 changes: 9 additions & 9 deletions dataset/tinysnb/vPerson.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
id,fname,Gender,ISStudent,isWorker,age,eyeSight,birthdate,registerTime,lastJobDuration,workedHours,usedNames,courseScoresPerTerm
0,Alice,1,true,false,35,5.0,1900-01-01,2011-08-20 11:25:30Z+00:00,3 years 2 days 13 hours 2 minutes,"[10,5]","[Aida]","[[10,8],[6,7,8]]"
2,Bob,2,true,false,30,5.1,1900-01-01,2008-11-03 13:25:30.000526-02:00,10 years 5 months 13 hours 24 us,"[12,8]","[Bobby]","[[8,9],[9,10]]"
3,Carol,1,false,true,45,5.0,1940-06-22,1911-08-20 02:32:21,48 hours 24 minutes 11 seconds,"[4,5]","[Carmen,Fred]","[[8,10]]"
5,Dan,2,false,true,20,4.8,1950-7-23,2031-11-30 12:25:30Z,10 years 5 months 13 hours 24 us,"[1,9]","[Wolfeschlegelstein,Daniel]","[[7,4],[8,8],[9]]"
7,Elizabeth,1,false,true,20,4.7,1980-10-26,1976-12-23 11:21:42,48 hours 24 minutes 11 seconds,"[2]","[Ein]","[[6],[7],[8]]"
8,Farooq,2,true,false,25,4.5,1980-10-26,1972-07-31 13:22:30.678559,18 minutes 24 milliseconds,"[3,4,5,6,7]","[Fesdwe]","[[8]]"
9,Greg,2,false,false,40,4.9,1980-10-26,1976-12-23 11:21:42Z+06:40,10 years 5 months 13 hours 24 us,"[1]","[Grad]","[[10]]"
10,Hubert Blaine Wolfeschlegelsteinhausenbergerdorff,2,false,true,83,4.9,1990-11-27,2023-02-21 13:25:30,3 years 2 days 13 hours 2 minutes,"[10,11,12,3,4,5,6,7]","[Ad,De,Hi,Kye,Orlan]","[[7],[10],[6,7]]"
id,fname,Gender,ISStudent,isWorker,age,eyeSight,birthdate,registerTime,lastJobDuration,workedHours,usedNames,courseScoresPerTerm,grades
0,Alice,1,true,false,35,5.0,1900-01-01,2011-08-20 11:25:30Z+00:00,3 years 2 days 13 hours 2 minutes,"[10,5]","[Aida]","[[10,8],[6,7,8]]","[96,54,86,92]"
2,Bob,2,true,false,30,5.1,1900-01-01,2008-11-03 13:25:30.000526-02:00,10 years 5 months 13 hours 24 us,"[12,8]","[Bobby]","[[8,9],[9,10]]","[98,42,93,88]"
3,Carol,1,false,true,45,5.0,1940-06-22,1911-08-20 02:32:21,48 hours 24 minutes 11 seconds,"[4,5]","[Carmen,Fred]","[[8,10]]","[91,75,21,95]"
5,Dan,2,false,true,20,4.8,1950-7-23,2031-11-30 12:25:30Z,10 years 5 months 13 hours 24 us,"[1,9]","[Wolfeschlegelstein,Daniel]","[[7,4],[8,8],[9]]","[76,88,99,89]"
7,Elizabeth,1,false,true,20,4.7,1980-10-26,1976-12-23 11:21:42,48 hours 24 minutes 11 seconds,"[2]","[Ein]","[[6],[7],[8]]","[96,59,65,88]"
8,Farooq,2,true,false,25,4.5,1980-10-26,1972-07-31 13:22:30.678559,18 minutes 24 milliseconds,"[3,4,5,6,7]","[Fesdwe]","[[8]]","[80,78,34,83]"
9,Greg,2,false,false,40,4.9,1980-10-26,1976-12-23 11:21:42Z+06:40,10 years 5 months 13 hours 24 us,"[1]","[Grad]","[[10]]","[43,83,67,43]"
10,Hubert Blaine Wolfeschlegelsteinhausenbergerdorff,2,false,true,83,4.9,1990-11-27,2023-02-21 13:25:30,3 years 2 days 13 hours 2 minutes,"[10,11,12,3,4,5,6,7]","[Ad,De,Hi,Kye,Orlan]","[[7],[10],[6,7]]","[77,64,100,54]"
2 changes: 1 addition & 1 deletion src/antlr4/Cypher.g4
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ kU_DataType

kU_ListIdentifiers : kU_ListIdentifier ( kU_ListIdentifier )* ;

kU_ListIdentifier : '[' ']' ;
kU_ListIdentifier : '[' oC_IntegerLiteral? ']' ;

oC_AnyCypherOption
: oC_Explain
Expand Down
31 changes: 28 additions & 3 deletions src/binder/bind/bind_ddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ std::unique_ptr<BoundStatement> Binder::bindAddProperty(const Statement& stateme
validateTableExist(catalog, tableName);
auto catalogContent = catalog.getReadOnlyVersion();
auto tableID = catalogContent->getTableID(tableName);
auto dataType = Types::dataTypeFromString(addProperty.getDataType());
auto dataType = bindDataType(addProperty.getDataType());
if (catalogContent->getTableSchema(tableID)->containProperty(addProperty.getPropertyName())) {
throw BinderException("Property: " + addProperty.getPropertyName() + " already exists.");
}
Expand Down Expand Up @@ -137,8 +137,8 @@ std::vector<PropertyNameDataType> Binder::bindPropertyNameDataTypes(
propertyNameDataType.first.c_str()));
}
StringUtils::toUpper(propertyNameDataType.second);
boundPropertyNameDataTypes.emplace_back(
propertyNameDataType.first, Types::dataTypeFromString(propertyNameDataType.second));
auto dataType = bindDataType(propertyNameDataType.second);
boundPropertyNameDataTypes.emplace_back(propertyNameDataType.first, dataType);
boundPropertyNames.emplace(propertyNameDataType.first);
}
return boundPropertyNameDataTypes;
Expand Down Expand Up @@ -176,5 +176,30 @@ property_id_t Binder::bindPropertyName(TableSchema* tableSchema, const std::stri
tableSchema->tableName + " table doesn't have property: " + propertyName + ".");
}

DataType Binder::bindDataType(const std::string& dataType) {
auto boundType = Types::dataTypeFromString(dataType);
if (boundType.typeID == common::FIXED_LIST) {
auto validNumericTypes = common::DataType::getNumericalTypeIDs();
if (find(validNumericTypes.begin(), validNumericTypes.end(), boundType.childType->typeID) ==
validNumericTypes.end()) {
throw common::BinderException(
"The child type of a fixed list must be a numeric type. Given: " +
common::Types::dataTypeToString(*boundType.childType) + ".");
}
if (boundType.fixedNumElementsInList == 0) {
// Note: the parser already guarantees that the number of elements is a non-negative
// number. However, we still need to check whether the number of elements is 0.
throw common::BinderException(
"The number of elements in a fixed list must be greater than 0. Given: " +
std::to_string(boundType.fixedNumElementsInList) + ".");
}
if (Types::getDataTypeSize(boundType) > common::BufferPoolConstants::DEFAULT_PAGE_SIZE) {
throw common::BinderException("The size of fixed list is larger than a "
"DEFAULT_PAGE_SIZE, which is not supported yet.");
}
}
return boundType;
}

} // namespace binder
} // namespace kuzu
2 changes: 1 addition & 1 deletion src/binder/bind/bind_reading_clause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ std::unique_ptr<BoundReadingClause> Binder::bindMatchClause(const ReadingClause&
std::unique_ptr<BoundReadingClause> Binder::bindUnwindClause(const ReadingClause& readingClause) {
auto& unwindClause = (UnwindClause&)readingClause;
auto boundExpression = expressionBinder.bindExpression(*unwindClause.getExpression());
boundExpression = ExpressionBinder::implicitCastIfNecessary(boundExpression, LIST);
boundExpression = ExpressionBinder::implicitCastIfNecessary(boundExpression, VAR_LIST);
auto aliasExpression =
createVariable(unwindClause.getAlias(), *boundExpression->dataType.childType);
return make_unique<BoundUnwindClause>(std::move(boundExpression), std::move(aliasExpression));
Expand Down
10 changes: 6 additions & 4 deletions src/binder/bind_expression/bind_function_expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,9 @@ std::shared_ptr<Expression> ExpressionBinder::bindNodeLabelFunction(const Expres
auto nodeTableIDs = catalogContent->getNodeTableIDs();
expression_vector children;
children.push_back(node.getInternalIDProperty());
auto labelsValue = std::make_unique<Value>(DataType(LIST, std::make_unique<DataType>(STRING)),
populateLabelValues(nodeTableIDs, *catalogContent));
auto labelsValue =
std::make_unique<Value>(DataType(VAR_LIST, std::make_unique<DataType>(STRING)),
populateLabelValues(nodeTableIDs, *catalogContent));
children.push_back(createLiteralExpression(std::move(labelsValue)));
auto execFunc = function::LabelVectorOperation::execFunction;
auto uniqueExpressionName = ScalarFunctionExpression::getUniqueName(LABEL_FUNC_NAME, children);
Expand All @@ -199,8 +200,9 @@ std::shared_ptr<Expression> ExpressionBinder::bindRelLabelFunction(const Express
auto relTableIDs = catalogContent->getRelTableIDs();
expression_vector children;
children.push_back(rel.getInternalIDProperty());
auto labelsValue = std::make_unique<Value>(DataType(LIST, std::make_unique<DataType>(STRING)),
populateLabelValues(relTableIDs, *catalogContent));
auto labelsValue =
std::make_unique<Value>(DataType(VAR_LIST, std::make_unique<DataType>(STRING)),
populateLabelValues(relTableIDs, *catalogContent));
children.push_back(createLiteralExpression(std::move(labelsValue)));
auto execFunc = function::LabelVectorOperation::execFunction;
auto uniqueExpressionName = ScalarFunctionExpression::getUniqueName(LABEL_FUNC_NAME, children);
Expand Down
4 changes: 2 additions & 2 deletions src/binder/expression_binder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@ std::shared_ptr<Expression> ExpressionBinder::implicitCastIfNecessary(
return expression;
}
if (expression->dataType.typeID == ANY) {
if (targetTypeID == LIST) {
if (targetTypeID == VAR_LIST) {
// e.g. len($1) we cannot infer the child type for $1.
throw BinderException("Cannot resolve recursive data type for expression " +
expression->getRawName() + ".");
}
resolveAnyDataType(*expression, DataType(targetTypeID));
return expression;
}
assert(targetTypeID != LIST);
assert(targetTypeID != VAR_LIST);
return implicitCast(expression, DataType(targetTypeID));
}

Expand Down
10 changes: 5 additions & 5 deletions src/catalog/catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ template<>
uint64_t SerDeser::serializeValue<DataType>(
const DataType& value, FileInfo* fileInfo, uint64_t offset) {
offset = SerDeser::serializeValue<DataTypeID>(value.typeID, fileInfo, offset);
if (value.childType) {
assert(value.typeID == LIST);
return SerDeser::serializeValue<DataType>(*value.childType, fileInfo, offset);
offset = SerDeser::serializeValue<uint64_t>(value.fixedNumElementsInList, fileInfo, offset);
if (value.typeID == VAR_LIST || value.typeID == FIXED_LIST) {
offset = SerDeser::serializeValue<DataType>(*value.childType, fileInfo, offset);
}
return offset;
}
Expand All @@ -49,11 +49,11 @@ template<>
uint64_t SerDeser::deserializeValue<DataType>(
DataType& value, FileInfo* fileInfo, uint64_t offset) {
offset = SerDeser::deserializeValue<DataTypeID>(value.typeID, fileInfo, offset);
if (value.typeID == LIST) {
offset = SerDeser::deserializeValue<uint64_t>(value.fixedNumElementsInList, fileInfo, offset);
if (value.typeID == VAR_LIST || value.typeID == FIXED_LIST) {
auto childDataType = std::make_unique<DataType>();
offset = SerDeser::deserializeValue<DataType>(*childDataType, fileInfo, offset);
value.childType = std::move(childDataType);
return offset;
}
return offset;
}
Expand Down
2 changes: 1 addition & 1 deletion src/common/arrow/arrow_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ void ArrowConverter::setArrowFormat(
case DataTypeID::STRING: {
child.format = "u";
} break;
case LIST: {
case VAR_LIST: {
acquamarin marked this conversation as resolved.
Show resolved Hide resolved
child.format = "+l";
child.n_children = 1;
rootHolder.nestedChildren.emplace_back();
Expand Down
27 changes: 13 additions & 14 deletions src/common/arrow/arrow_row_batch.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include "common/arrow/arrow_row_batch.h"

#include "common/types/value.h"
#include "processor/result/flat_tuple.h"

namespace kuzu {
namespace common {
Expand Down Expand Up @@ -41,7 +40,7 @@ void ArrowRowBatch::templateInitializeVector<STRING>(
}

template<>
void ArrowRowBatch::templateInitializeVector<LIST>(
void ArrowRowBatch::templateInitializeVector<VAR_LIST>(
ArrowVector* vector, const main::DataTypeInfo& typeInfo, std::int64_t capacity) {
initializeNullBits(vector->validity, capacity);
assert(typeInfo.childrenTypesInfo.size() == 1);
Expand Down Expand Up @@ -105,8 +104,8 @@ std::unique_ptr<ArrowVector> ArrowRowBatch::createVector(
case STRING: {
templateInitializeVector<STRING>(result.get(), typeInfo, capacity);
} break;
case LIST: {
templateInitializeVector<LIST>(result.get(), typeInfo, capacity);
case VAR_LIST: {
templateInitializeVector<VAR_LIST>(result.get(), typeInfo, capacity);
} break;
case INTERNAL_ID: {
templateInitializeVector<INTERNAL_ID>(result.get(), typeInfo, capacity);
Expand Down Expand Up @@ -180,7 +179,7 @@ void ArrowRowBatch::templateCopyNonNullValue<STRING>(
}

template<>
void ArrowRowBatch::templateCopyNonNullValue<LIST>(
void ArrowRowBatch::templateCopyNonNullValue<VAR_LIST>(
ArrowVector* vector, const main::DataTypeInfo& typeInfo, Value* value, std::int64_t pos) {
vector->data.resize((pos + 2) * sizeof(std::uint32_t));
auto offsets = (std::uint32_t*)vector->data.data();
Expand All @@ -194,7 +193,7 @@ void ArrowRowBatch::templateCopyNonNullValue<LIST>(
for (auto i = currentNumBytesForChildValidity; i < numBytesForChildValidity; i++) {
vector->childData[0]->validity.data()[i] = 0xFF; // Init each value to be valid (as 1).
}
if (typeInfo.childrenTypesInfo[0]->typeID != LIST) {
if (typeInfo.childrenTypesInfo[0]->typeID != VAR_LIST) {
vector->childData[0]->data.resize(
numChildElements * Types::getDataTypeSize(typeInfo.childrenTypesInfo[0]->typeID));
}
Expand Down Expand Up @@ -268,8 +267,8 @@ void ArrowRowBatch::copyNonNullValue(
case STRING: {
templateCopyNonNullValue<STRING>(vector, typeInfo, value, pos);
} break;
case LIST: {
templateCopyNonNullValue<LIST>(vector, typeInfo, value, pos);
case VAR_LIST: {
templateCopyNonNullValue<VAR_LIST>(vector, typeInfo, value, pos);
} break;
case INTERNAL_ID: {
templateCopyNonNullValue<INTERNAL_ID>(vector, typeInfo, value, pos);
Expand Down Expand Up @@ -303,7 +302,7 @@ void ArrowRowBatch::templateCopyNullValue<STRING>(ArrowVector* vector, std::int6
}

template<>
void ArrowRowBatch::templateCopyNullValue<LIST>(ArrowVector* vector, std::int64_t pos) {
void ArrowRowBatch::templateCopyNullValue<VAR_LIST>(ArrowVector* vector, std::int64_t pos) {
auto offsets = (std::uint32_t*)vector->data.data();
offsets[pos + 1] = offsets[pos];
setBitToZero(vector->validity.data(), pos);
Expand Down Expand Up @@ -333,8 +332,8 @@ void ArrowRowBatch::copyNullValue(ArrowVector* vector, Value* value, std::int64_
case STRING: {
templateCopyNullValue<STRING>(vector, pos);
} break;
case LIST: {
templateCopyNullValue<LIST>(vector, pos);
case VAR_LIST: {
templateCopyNullValue<VAR_LIST>(vector, pos);
} break;
case INTERNAL_ID: {
templateCopyNullValue<INTERNAL_ID>(vector, pos);
Expand Down Expand Up @@ -396,7 +395,7 @@ ArrowArray* ArrowRowBatch::templateCreateArray<STRING>(
}

template<>
ArrowArray* ArrowRowBatch::templateCreateArray<LIST>(
ArrowArray* ArrowRowBatch::templateCreateArray<VAR_LIST>(
ArrowVector& vector, const main::DataTypeInfo& typeInfo) {
auto result = createArrayFromVector(vector);
vector.childPointers.resize(1);
Expand Down Expand Up @@ -465,8 +464,8 @@ ArrowArray* ArrowRowBatch::convertVectorToArray(
case STRING: {
return templateCreateArray<STRING>(vector, typeInfo);
}
case LIST: {
return templateCreateArray<LIST>(vector, typeInfo);
case VAR_LIST: {
return templateCreateArray<VAR_LIST>(vector, typeInfo);
}
case INTERNAL_ID: {
return templateCreateArray<INTERNAL_ID>(vector, typeInfo);
Expand Down
4 changes: 2 additions & 2 deletions src/common/csv_reader/csv_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ std::unique_ptr<Value> CSVReader::getList(const DataType& dataType) {
case INTERVAL: {
val = std::make_unique<Value>(listCSVReader.getInterval());
} break;
case LIST: {
case VAR_LIST: {
val = listCSVReader.getList(*dataType.childType);
} break;
default:
Expand All @@ -315,7 +315,7 @@ std::unique_ptr<Value> CSVReader::getList(const DataType& dataType) {
BufferPoolConstants::DEFAULT_PAGE_SIZE, numBytesOfOverflow));
}
return std::make_unique<Value>(
DataType(LIST, std::make_unique<DataType>(dataType)), std::move(listVal));
DataType(VAR_LIST, std::make_unique<DataType>(dataType)), std::move(listVal));
}

void CSVReader::setNextTokenIsProcessed() {
Expand Down
2 changes: 1 addition & 1 deletion src/common/in_mem_overflow_buffer_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void InMemOverflowBufferUtils::copyListRecursiveIfNested(const ku_list_t& src, k
((ku_string_t*)dest.overflowPtr)[i], inMemOverflowBuffer);
}
}
if (dataType.childType->typeID == LIST) {
if (dataType.childType->typeID == VAR_LIST) {
for (auto i = 0u; i < dest.size; i++) {
InMemOverflowBufferUtils::copyListRecursiveIfNested(
((ku_list_t*)src.overflowPtr)[i + srcStartIdx], ((ku_list_t*)dest.overflowPtr)[i],
Expand Down
2 changes: 1 addition & 1 deletion src/common/type_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ std::string TypeUtils::elementToString(
return TypeUtils::toString(((interval_t*)overflowPtr)[pos]);
case STRING:
return TypeUtils::toString(((ku_string_t*)overflowPtr)[pos]);
case LIST:
case VAR_LIST:
return TypeUtils::toString(((ku_list_t*)overflowPtr)[pos], dataType);
default:
throw RuntimeException("Invalid data type " + Types::dataTypeToString(dataType) +
Expand Down
Loading