Skip to content

Commit

Permalink
Add fixed_list dataType
Browse files Browse the repository at this point in the history
  • Loading branch information
acquamarin committed Feb 22, 2023
1 parent 25224b8 commit 18879de
Show file tree
Hide file tree
Showing 59 changed files with 2,398 additions and 2,067 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmake_minimum_required(VERSION 3.11)

project(Kuzu VERSION 0.0.2 LANGUAGES CXX)
project(Kuzu VERSION 0.0.3 LANGUAGES CXX)

find_package(Threads REQUIRED)

Expand Down
6 changes: 3 additions & 3 deletions dataset/tinysnb/eWorkAt.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
3,4,2015
5,6,2010
7,6,2015
3,4,2015,"[3.8,2.5]"
5,6,2010,"[2.1,4.4]"
7,6,2015,"[9.2,3.1]"
4 changes: 2 additions & 2 deletions dataset/tinysnb/schema.cypher
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
create node table person (ID INt64, fName StRING, gender INT64, isStudent BoOLEAN, isWorker BOOLEAN, age INT64, eyeSight DOUBLE, birthdate DATE, registerTime TIMESTAMP, lastJobDuration interval, workedHours INT64[], usedNames STRING[], courseScoresPerTerm INT64[][], PRIMARY KEY (ID));
create node table person (ID INt64, fName StRING, gender INT64, isStudent BoOLEAN, isWorker BOOLEAN, age INT64, eyeSight DOUBLE, birthdate DATE, registerTime TIMESTAMP, lastJobDuration interval, workedHours INT64[], usedNames STRING[], courseScoresPerTerm INT64[][], grades INT64[4],PRIMARY KEY (ID));
create node table organisation (ID INT64, name STRING, orgCode INT64, mark DOUBLE, score INT64, history STRING, licenseValidInterval INTERVAL, rating DOUBLE, PRIMARY KEY (ID));
create node table movies (name STRING, PRIMARY KEY (name));
create rel table knows (FROM person TO person, date DATE, meetTime TIMESTAMP, validInterval INTERVAL, comments STRING[], MANY_MANY);
create rel table studyAt (FROM person TO organisation, year INT64, places STRING[], MANY_ONE);
create rel table workAt (FROM person TO organisation, year INT64, MANY_ONE);
create rel table workAt (FROM person TO organisation, year INT64, grading DOUBLE[2], MANY_ONE);
create rel table meets (FROM person TO person, MANY_ONE);
create rel table marries (FROM person TO person, usedAddress STRING[], note STRING, ONE_ONE);
18 changes: 9 additions & 9 deletions dataset/tinysnb/vPerson.csv
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
id,fname,Gender,ISStudent,isWorker,age,eyeSight,birthdate,registerTime,lastJobDuration,workedHours,usedNames,courseScoresPerTerm
0,Alice,1,true,false,35,5.0,1900-01-01,2011-08-20 11:25:30Z+00:00,3 years 2 days 13 hours 2 minutes,"[10,5]","[Aida]","[[10,8],[6,7,8]]"
2,Bob,2,true,false,30,5.1,1900-01-01,2008-11-03 13:25:30.000526-02:00,10 years 5 months 13 hours 24 us,"[12,8]","[Bobby]","[[8,9],[9,10]]"
3,Carol,1,false,true,45,5.0,1940-06-22,1911-08-20 02:32:21,48 hours 24 minutes 11 seconds,"[4,5]","[Carmen,Fred]","[[8,10]]"
5,Dan,2,false,true,20,4.8,1950-7-23,2031-11-30 12:25:30Z,10 years 5 months 13 hours 24 us,"[1,9]","[Wolfeschlegelstein,Daniel]","[[7,4],[8,8],[9]]"
7,Elizabeth,1,false,true,20,4.7,1980-10-26,1976-12-23 11:21:42,48 hours 24 minutes 11 seconds,"[2]","[Ein]","[[6],[7],[8]]"
8,Farooq,2,true,false,25,4.5,1980-10-26,1972-07-31 13:22:30.678559,18 minutes 24 milliseconds,"[3,4,5,6,7]","[Fesdwe]","[[8]]"
9,Greg,2,false,false,40,4.9,1980-10-26,1976-12-23 11:21:42Z+06:40,10 years 5 months 13 hours 24 us,"[1]","[Grad]","[[10]]"
10,Hubert Blaine Wolfeschlegelsteinhausenbergerdorff,2,false,true,83,4.9,1990-11-27,2023-02-21 13:25:30,3 years 2 days 13 hours 2 minutes,"[10,11,12,3,4,5,6,7]","[Ad,De,Hi,Kye,Orlan]","[[7],[10],[6,7]]"
id,fname,Gender,ISStudent,isWorker,age,eyeSight,birthdate,registerTime,lastJobDuration,workedHours,usedNames,courseScoresPerTerm,grades
0,Alice,1,true,false,35,5.0,1900-01-01,2011-08-20 11:25:30Z+00:00,3 years 2 days 13 hours 2 minutes,"[10,5]","[Aida]","[[10,8],[6,7,8]]","[96,54,86,92]"
2,Bob,2,true,false,30,5.1,1900-01-01,2008-11-03 13:25:30.000526-02:00,10 years 5 months 13 hours 24 us,"[12,8]","[Bobby]","[[8,9],[9,10]]","[98,42,93,88]"
3,Carol,1,false,true,45,5.0,1940-06-22,1911-08-20 02:32:21,48 hours 24 minutes 11 seconds,"[4,5]","[Carmen,Fred]","[[8,10]]","[91,75,21,95]"
5,Dan,2,false,true,20,4.8,1950-7-23,2031-11-30 12:25:30Z,10 years 5 months 13 hours 24 us,"[1,9]","[Wolfeschlegelstein,Daniel]","[[7,4],[8,8],[9]]","[76,88,99,89]"
7,Elizabeth,1,false,true,20,4.7,1980-10-26,1976-12-23 11:21:42,48 hours 24 minutes 11 seconds,"[2]","[Ein]","[[6],[7],[8]]","[96,59,65,88]"
8,Farooq,2,true,false,25,4.5,1980-10-26,1972-07-31 13:22:30.678559,18 minutes 24 milliseconds,"[3,4,5,6,7]","[Fesdwe]","[[8]]","[80,78,34,83]"
9,Greg,2,false,false,40,4.9,1980-10-26,1976-12-23 11:21:42Z+06:40,10 years 5 months 13 hours 24 us,"[1]","[Grad]","[[10]]","[43,83,67,43]"
10,Hubert Blaine Wolfeschlegelsteinhausenbergerdorff,2,false,true,83,4.9,1990-11-27,2023-02-21 13:25:30,3 years 2 days 13 hours 2 minutes,"[10,11,12,3,4,5,6,7]","[Ad,De,Hi,Kye,Orlan]","[[7],[10],[6,7]]","[77,64,100,54]"
2 changes: 1 addition & 1 deletion src/antlr4/Cypher.g4
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ kU_DataType

kU_ListIdentifiers : kU_ListIdentifier ( kU_ListIdentifier )* ;

kU_ListIdentifier : '[' ']' ;
kU_ListIdentifier : '[' oC_IntegerLiteral? ']' ;

oC_AnyCypherOption
: oC_Explain
Expand Down
31 changes: 28 additions & 3 deletions src/binder/bind/bind_ddl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ std::unique_ptr<BoundStatement> Binder::bindAddProperty(const Statement& stateme
validateTableExist(catalog, tableName);
auto catalogContent = catalog.getReadOnlyVersion();
auto tableID = catalogContent->getTableID(tableName);
auto dataType = Types::dataTypeFromString(addProperty.getDataType());
auto dataType = bindDataType(addProperty.getDataType());
if (catalogContent->getTableSchema(tableID)->containProperty(addProperty.getPropertyName())) {
throw BinderException("Property: " + addProperty.getPropertyName() + " already exists.");
}
Expand Down Expand Up @@ -137,8 +137,8 @@ std::vector<PropertyNameDataType> Binder::bindPropertyNameDataTypes(
propertyNameDataType.first.c_str()));
}
StringUtils::toUpper(propertyNameDataType.second);
boundPropertyNameDataTypes.emplace_back(
propertyNameDataType.first, Types::dataTypeFromString(propertyNameDataType.second));
auto dataType = bindDataType(propertyNameDataType.second);
boundPropertyNameDataTypes.emplace_back(propertyNameDataType.first, dataType);
boundPropertyNames.emplace(propertyNameDataType.first);
}
return boundPropertyNameDataTypes;
Expand Down Expand Up @@ -176,5 +176,30 @@ property_id_t Binder::bindPropertyName(TableSchema* tableSchema, const std::stri
tableSchema->tableName + " table doesn't have property: " + propertyName + ".");
}

DataType Binder::bindDataType(const std::string& dataType) {
auto boundType = Types::dataTypeFromString(dataType);
if (boundType.typeID == common::FIXED_LIST) {
auto validNumericTypes = common::DataType::getNumericalTypeIDs();
if (find(validNumericTypes.begin(), validNumericTypes.end(), boundType.childType->typeID) ==
validNumericTypes.end()) {
throw common::BinderException(
"The child type of a fixed list must be a numeric type. Given: " +
common::Types::dataTypeToString(*boundType.childType) + ".");
}
if (boundType.fixedNumElementsInList == 0) {
// Note: the parser already guarantees that the number of elements is a non-negative
// number. However, we still need to check whether the number of elements is 0.
throw common::BinderException(
"The number of elements in a fixed list must be greater than 0. Given: " +
std::to_string(boundType.fixedNumElementsInList) + ".");
}
if (Types::getDataTypeSize(boundType) > common::BufferPoolConstants::DEFAULT_PAGE_SIZE) {
throw common::BinderException("The size of fixed list is larger than a "
"DEFAULT_PAGE_SIZE, which is not supported yet.");
}
}
return boundType;
}

} // namespace binder
} // namespace kuzu
2 changes: 1 addition & 1 deletion src/binder/bind/bind_reading_clause.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ std::unique_ptr<BoundReadingClause> Binder::bindMatchClause(const ReadingClause&
std::unique_ptr<BoundReadingClause> Binder::bindUnwindClause(const ReadingClause& readingClause) {
auto& unwindClause = (UnwindClause&)readingClause;
auto boundExpression = expressionBinder.bindExpression(*unwindClause.getExpression());
boundExpression = ExpressionBinder::implicitCastIfNecessary(boundExpression, LIST);
boundExpression = ExpressionBinder::implicitCastIfNecessary(boundExpression, VAR_LIST);
auto aliasExpression =
createVariable(unwindClause.getAlias(), *boundExpression->dataType.childType);
return make_unique<BoundUnwindClause>(std::move(boundExpression), std::move(aliasExpression));
Expand Down
10 changes: 6 additions & 4 deletions src/binder/bind_expression/bind_function_expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -180,8 +180,9 @@ std::shared_ptr<Expression> ExpressionBinder::bindNodeLabelFunction(const Expres
auto nodeTableIDs = catalogContent->getNodeTableIDs();
expression_vector children;
children.push_back(node.getInternalIDProperty());
auto labelsValue = std::make_unique<Value>(DataType(LIST, std::make_unique<DataType>(STRING)),
populateLabelValues(nodeTableIDs, *catalogContent));
auto labelsValue =
std::make_unique<Value>(DataType(VAR_LIST, std::make_unique<DataType>(STRING)),
populateLabelValues(nodeTableIDs, *catalogContent));
children.push_back(createLiteralExpression(std::move(labelsValue)));
auto execFunc = function::LabelVectorOperation::execFunction;
auto uniqueExpressionName = ScalarFunctionExpression::getUniqueName(LABEL_FUNC_NAME, children);
Expand All @@ -199,8 +200,9 @@ std::shared_ptr<Expression> ExpressionBinder::bindRelLabelFunction(const Express
auto relTableIDs = catalogContent->getRelTableIDs();
expression_vector children;
children.push_back(rel.getInternalIDProperty());
auto labelsValue = std::make_unique<Value>(DataType(LIST, std::make_unique<DataType>(STRING)),
populateLabelValues(relTableIDs, *catalogContent));
auto labelsValue =
std::make_unique<Value>(DataType(VAR_LIST, std::make_unique<DataType>(STRING)),
populateLabelValues(relTableIDs, *catalogContent));
children.push_back(createLiteralExpression(std::move(labelsValue)));
auto execFunc = function::LabelVectorOperation::execFunction;
auto uniqueExpressionName = ScalarFunctionExpression::getUniqueName(LABEL_FUNC_NAME, children);
Expand Down
4 changes: 2 additions & 2 deletions src/binder/expression_binder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,15 @@ std::shared_ptr<Expression> ExpressionBinder::implicitCastIfNecessary(
return expression;
}
if (expression->dataType.typeID == ANY) {
if (targetTypeID == LIST) {
if (targetTypeID == VAR_LIST) {
// e.g. len($1) we cannot infer the child type for $1.
throw BinderException("Cannot resolve recursive data type for expression " +
expression->getRawName() + ".");
}
resolveAnyDataType(*expression, DataType(targetTypeID));
return expression;
}
assert(targetTypeID != LIST);
assert(targetTypeID != VAR_LIST);
return implicitCast(expression, DataType(targetTypeID));
}

Expand Down
10 changes: 5 additions & 5 deletions src/catalog/catalog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,9 @@ template<>
uint64_t SerDeser::serializeValue<DataType>(
const DataType& value, FileInfo* fileInfo, uint64_t offset) {
offset = SerDeser::serializeValue<DataTypeID>(value.typeID, fileInfo, offset);
if (value.childType) {
assert(value.typeID == LIST);
return SerDeser::serializeValue<DataType>(*value.childType, fileInfo, offset);
offset = SerDeser::serializeValue<uint64_t>(value.fixedNumElementsInList, fileInfo, offset);
if (value.typeID == VAR_LIST || value.typeID == FIXED_LIST) {
offset = SerDeser::serializeValue<DataType>(*value.childType, fileInfo, offset);
}
return offset;
}
Expand All @@ -49,11 +49,11 @@ template<>
uint64_t SerDeser::deserializeValue<DataType>(
DataType& value, FileInfo* fileInfo, uint64_t offset) {
offset = SerDeser::deserializeValue<DataTypeID>(value.typeID, fileInfo, offset);
if (value.typeID == LIST) {
offset = SerDeser::deserializeValue<uint64_t>(value.fixedNumElementsInList, fileInfo, offset);
if (value.typeID == VAR_LIST || value.typeID == FIXED_LIST) {
auto childDataType = std::make_unique<DataType>();
offset = SerDeser::deserializeValue<DataType>(*childDataType, fileInfo, offset);
value.childType = std::move(childDataType);
return offset;
}
return offset;
}
Expand Down
2 changes: 1 addition & 1 deletion src/common/arrow/arrow_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ void ArrowConverter::setArrowFormat(
case DataTypeID::STRING: {
child.format = "u";
} break;
case LIST: {
case VAR_LIST: {
child.format = "+l";
child.n_children = 1;
rootHolder.nestedChildren.emplace_back();
Expand Down
27 changes: 13 additions & 14 deletions src/common/arrow/arrow_row_batch.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include "common/arrow/arrow_row_batch.h"

#include "common/types/value.h"
#include "processor/result/flat_tuple.h"

namespace kuzu {
namespace common {
Expand Down Expand Up @@ -41,7 +40,7 @@ void ArrowRowBatch::templateInitializeVector<STRING>(
}

template<>
void ArrowRowBatch::templateInitializeVector<LIST>(
void ArrowRowBatch::templateInitializeVector<VAR_LIST>(
ArrowVector* vector, const main::DataTypeInfo& typeInfo, std::int64_t capacity) {
initializeNullBits(vector->validity, capacity);
assert(typeInfo.childrenTypesInfo.size() == 1);
Expand Down Expand Up @@ -105,8 +104,8 @@ std::unique_ptr<ArrowVector> ArrowRowBatch::createVector(
case STRING: {
templateInitializeVector<STRING>(result.get(), typeInfo, capacity);
} break;
case LIST: {
templateInitializeVector<LIST>(result.get(), typeInfo, capacity);
case VAR_LIST: {
templateInitializeVector<VAR_LIST>(result.get(), typeInfo, capacity);
} break;
case INTERNAL_ID: {
templateInitializeVector<INTERNAL_ID>(result.get(), typeInfo, capacity);
Expand Down Expand Up @@ -180,7 +179,7 @@ void ArrowRowBatch::templateCopyNonNullValue<STRING>(
}

template<>
void ArrowRowBatch::templateCopyNonNullValue<LIST>(
void ArrowRowBatch::templateCopyNonNullValue<VAR_LIST>(
ArrowVector* vector, const main::DataTypeInfo& typeInfo, Value* value, std::int64_t pos) {
vector->data.resize((pos + 2) * sizeof(std::uint32_t));
auto offsets = (std::uint32_t*)vector->data.data();
Expand All @@ -194,7 +193,7 @@ void ArrowRowBatch::templateCopyNonNullValue<LIST>(
for (auto i = currentNumBytesForChildValidity; i < numBytesForChildValidity; i++) {
vector->childData[0]->validity.data()[i] = 0xFF; // Init each value to be valid (as 1).
}
if (typeInfo.childrenTypesInfo[0]->typeID != LIST) {
if (typeInfo.childrenTypesInfo[0]->typeID != VAR_LIST) {
vector->childData[0]->data.resize(
numChildElements * Types::getDataTypeSize(typeInfo.childrenTypesInfo[0]->typeID));
}
Expand Down Expand Up @@ -268,8 +267,8 @@ void ArrowRowBatch::copyNonNullValue(
case STRING: {
templateCopyNonNullValue<STRING>(vector, typeInfo, value, pos);
} break;
case LIST: {
templateCopyNonNullValue<LIST>(vector, typeInfo, value, pos);
case VAR_LIST: {
templateCopyNonNullValue<VAR_LIST>(vector, typeInfo, value, pos);
} break;
case INTERNAL_ID: {
templateCopyNonNullValue<INTERNAL_ID>(vector, typeInfo, value, pos);
Expand Down Expand Up @@ -303,7 +302,7 @@ void ArrowRowBatch::templateCopyNullValue<STRING>(ArrowVector* vector, std::int6
}

template<>
void ArrowRowBatch::templateCopyNullValue<LIST>(ArrowVector* vector, std::int64_t pos) {
void ArrowRowBatch::templateCopyNullValue<VAR_LIST>(ArrowVector* vector, std::int64_t pos) {
auto offsets = (std::uint32_t*)vector->data.data();
offsets[pos + 1] = offsets[pos];
setBitToZero(vector->validity.data(), pos);
Expand Down Expand Up @@ -333,8 +332,8 @@ void ArrowRowBatch::copyNullValue(ArrowVector* vector, Value* value, std::int64_
case STRING: {
templateCopyNullValue<STRING>(vector, pos);
} break;
case LIST: {
templateCopyNullValue<LIST>(vector, pos);
case VAR_LIST: {
templateCopyNullValue<VAR_LIST>(vector, pos);
} break;
case INTERNAL_ID: {
templateCopyNullValue<INTERNAL_ID>(vector, pos);
Expand Down Expand Up @@ -396,7 +395,7 @@ ArrowArray* ArrowRowBatch::templateCreateArray<STRING>(
}

template<>
ArrowArray* ArrowRowBatch::templateCreateArray<LIST>(
ArrowArray* ArrowRowBatch::templateCreateArray<VAR_LIST>(
ArrowVector& vector, const main::DataTypeInfo& typeInfo) {
auto result = createArrayFromVector(vector);
vector.childPointers.resize(1);
Expand Down Expand Up @@ -465,8 +464,8 @@ ArrowArray* ArrowRowBatch::convertVectorToArray(
case STRING: {
return templateCreateArray<STRING>(vector, typeInfo);
}
case LIST: {
return templateCreateArray<LIST>(vector, typeInfo);
case VAR_LIST: {
return templateCreateArray<VAR_LIST>(vector, typeInfo);
}
case INTERNAL_ID: {
return templateCreateArray<INTERNAL_ID>(vector, typeInfo);
Expand Down
4 changes: 2 additions & 2 deletions src/common/csv_reader/csv_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -297,7 +297,7 @@ std::unique_ptr<Value> CSVReader::getList(const DataType& dataType) {
case INTERVAL: {
val = std::make_unique<Value>(listCSVReader.getInterval());
} break;
case LIST: {
case VAR_LIST: {
val = listCSVReader.getList(*dataType.childType);
} break;
default:
Expand All @@ -315,7 +315,7 @@ std::unique_ptr<Value> CSVReader::getList(const DataType& dataType) {
BufferPoolConstants::DEFAULT_PAGE_SIZE, numBytesOfOverflow));
}
return std::make_unique<Value>(
DataType(LIST, std::make_unique<DataType>(dataType)), std::move(listVal));
DataType(VAR_LIST, std::make_unique<DataType>(dataType)), std::move(listVal));
}

void CSVReader::setNextTokenIsProcessed() {
Expand Down
2 changes: 1 addition & 1 deletion src/common/in_mem_overflow_buffer_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ void InMemOverflowBufferUtils::copyListRecursiveIfNested(const ku_list_t& src, k
((ku_string_t*)dest.overflowPtr)[i], inMemOverflowBuffer);
}
}
if (dataType.childType->typeID == LIST) {
if (dataType.childType->typeID == VAR_LIST) {
for (auto i = 0u; i < dest.size; i++) {
InMemOverflowBufferUtils::copyListRecursiveIfNested(
((ku_list_t*)src.overflowPtr)[i + srcStartIdx], ((ku_list_t*)dest.overflowPtr)[i],
Expand Down
2 changes: 1 addition & 1 deletion src/common/type_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ std::string TypeUtils::elementToString(
return TypeUtils::toString(((interval_t*)overflowPtr)[pos]);
case STRING:
return TypeUtils::toString(((ku_string_t*)overflowPtr)[pos]);
case LIST:
case VAR_LIST:
return TypeUtils::toString(((ku_list_t*)overflowPtr)[pos], dataType);
default:
throw RuntimeException("Invalid data type " + Types::dataTypeToString(dataType) +
Expand Down
Loading

0 comments on commit 18879de

Please sign in to comment.