Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement union-loader #1907

Merged
merged 1 commit into from
Aug 8, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dataset/tinysnb/schema.cypher
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
create node table person (ID INt64, fName StRING, gender INT64, isStudent BoOLEAN, isWorker BOOLEAN, age INT64, eyeSight DOUBLE, birthdate DATE, registerTime TIMESTAMP, lastJobDuration interval, workedHours INT64[], usedNames STRING[], courseScoresPerTerm INT64[][], grades INT64[4], height float, PRIMARY KEY (ID));
create node table organisation (ID INT64, name STRING, orgCode INT64, mark DOUBLE, score INT64, history STRING, licenseValidInterval INTERVAL, rating DOUBLE, state STRUCT(revenue INT16, location STRING[], stock STRUCT(price INT64[], volume INT64)), PRIMARY KEY (ID));
create node table movies (name STRING, length INT32, note STRING, description STRUCT(rating DOUBLE, views INT64, release TIMESTAMP, film DATE), content BYTEA, audience MAP(STRING, INT64), PRIMARY KEY (name));
create node table organisation (ID INT64, name STRING, orgCode INT64, mark DOUBLE, score INT64, history STRING, licenseValidInterval INTERVAL, rating DOUBLE, state STRUCT(revenue INT16, location STRING[], stock STRUCT(price INT64[], volume INT64)), info UNION(price FLOAT, movein DATE, note STRING),PRIMARY KEY (ID));
create node table movies (name STRING, length INT32, note STRING, description STRUCT(rating DOUBLE, views INT64, release TIMESTAMP, film DATE), content BYTEA, audience MAP(STRING, INT64), grade union(credit boolean, grade1 double, grade2 int64), PRIMARY KEY (name));
create rel table knows (FROM person TO person, date DATE, meetTime TIMESTAMP, validInterval INTERVAL, comments STRING[], MANY_MANY);
create rel table studyAt (FROM person TO organisation, year INT64, places STRING[], length INT16,MANY_ONE);
create rel table workAt (FROM person TO organisation, year INT64, grading DOUBLE[2], rating float, MANY_ONE);
Expand Down
6 changes: 3 additions & 3 deletions dataset/tinysnb/vMovies.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
Sóló cón tu párejâ,126, this is a very very good movie,"{rating: 5.3, views: 152, release: 2011-08-20 11:25:30, film: 2012-05-11}","\\xAA\\xABinteresting\\x0B","{audience1= 52,audience53= 42}"
The 😂😃🧘🏻‍♂️🌍🌦️🍞🚗 movie,2544, the movie is very very good,"{rating: 7, views: 982, release: 2018-11-13 13:33:11, film: 2014-09-12}","\\xAB\\xCD",{audience1= 33}
Roma,298,the movie is very interesting and funny,"{rating: 1223, views: 10003, release: 2011-02-11 16:44:22, film: 2013-02-22}","pure ascii characters","{}"
Sóló cón tu párejâ,126, this is a very very good movie,"{rating: 5.3, views: 152, release: 2011-08-20 11:25:30, film: 2012-05-11}","\\xAA\\xABinteresting\\x0B","{audience1= 52,audience53= 42}",true
The 😂😃🧘🏻‍♂️🌍🌦️🍞🚗 movie,2544, the movie is very very good,"{rating: 7, views: 982, release: 2018-11-13 13:33:11, film: 2014-09-12}","\\xAB\\xCD",{audience1= 33},8.989
Roma,298,the movie is very interesting and funny,"{rating: 1223, views: 10003, release: 2011-02-11 16:44:22, film: 2013-02-22}","pure ascii characters","{}",254
6 changes: 3 additions & 3 deletions dataset/tinysnb/vOrganisation.csv
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
1,ABFsUni,325,3.7,-2,10 years 5 months 13 hours 24 us,3 years 5 days,1,"{revenue: 138, location: ['toronto', 'montr,eal'], stock: {price: [96, 56], volume: 1000}}"
4,CsWork,934,4.1,-100,2 years 4 days 10 hours,26 years 52 days 48 hours,0.78,"{revenue: 152, location: [\"vanco,uver north area\"], stock: {price: [15, 78, 671], volume: 432}}"
6,DEsWork,824,4.1,7,2 years 4 hours 22 us 34 minutes,82 hours 100 milliseconds,0.52,"{revenue: 558, location: ['very long city name', 'new york'], stock: {price: [22], volume: 99}}"
1,ABFsUni,325,3.7,-2,10 years 5 months 13 hours 24 us,3 years 5 days,1,"{revenue: 138, location: ['toronto', 'montr,eal'], stock: {price: [96, 56], volume: 1000}}",3.12
4,CsWork,934,4.1,-100,2 years 4 days 10 hours,26 years 52 days 48 hours,0.78,"{revenue: 152, location: [\"vanco,uver north area\"], stock: {price: [15, 78, 671], volume: 432}}",abcd
6,DEsWork,824,4.1,7,2 years 4 hours 22 us 34 minutes,82 hours 100 milliseconds,0.52,"{revenue: 558, location: ['very long city name', 'new york'], stock: {price: [22], volume: 99}}",2023-12-15
1 change: 1 addition & 0 deletions src/antlr4/Cypher.g4
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ TO: ( 'T' | 't' ) ( 'O' | 'o' ) ;
kU_DataType
: oC_SymbolicName
| ( oC_SymbolicName kU_ListIdentifiers )
| UNION SP? '(' SP? kU_PropertyDefinitions SP? ')'
| oC_SymbolicName SP? '(' SP? kU_PropertyDefinitions SP? ')'
| oC_SymbolicName SP? '(' SP? kU_DataType SP? ',' SP? kU_DataType SP? ')' ;

Expand Down
6 changes: 3 additions & 3 deletions src/binder/bind_expression/bind_function_expression.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,12 +140,12 @@ std::shared_ptr<Expression> ExpressionBinder::staticEvaluate(
auto strVal = ((LiteralExpression*)children[0].get())->getValue()->getValue<std::string>();
std::unique_ptr<Value> value;
if (functionName == CAST_TO_DATE_FUNC_NAME) {
value = std::make_unique<Value>(Date::FromCString(strVal.c_str(), strVal.length()));
value = std::make_unique<Value>(Date::fromCString(strVal.c_str(), strVal.length()));
} else if (functionName == CAST_TO_TIMESTAMP_FUNC_NAME) {
value = std::make_unique<Value>(Timestamp::FromCString(strVal.c_str(), strVal.length()));
value = std::make_unique<Value>(Timestamp::fromCString(strVal.c_str(), strVal.length()));
} else {
assert(functionName == CAST_TO_INTERVAL_FUNC_NAME);
value = std::make_unique<Value>(Interval::FromCString(strVal.c_str(), strVal.length()));
value = std::make_unique<Value>(Interval::fromCString(strVal.c_str(), strVal.length()));
}
return createLiteralExpression(std::move(value));
}
Expand Down
39 changes: 25 additions & 14 deletions src/common/type_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,31 @@
namespace kuzu {
namespace common {

bool StringCastUtils::tryCastToBoolean(const char* data, uint64_t length, bool& result) {
auto booleanStr = std::string{data, length};
booleanStr = StringUtils::rtrim(StringUtils::ltrim(booleanStr));
std::istringstream iss{booleanStr};
iss >> std::boolalpha >> result;
if (iss.fail()) {
return false;
}
return true;
}

bool StringCastUtils::castToBool(const char* data, uint64_t length) {
bool result;
if (!tryCastToBoolean(data, length, result)) {
throw ConversionException(
TypeUtils::prefixConversionExceptionMessage(data, LogicalTypeID::BOOL) +
". Input is not equal to True or False (in a case-insensitive manner)");

Check warning on line 26 in src/common/type_utils.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/type_utils.cpp#L24-L26

Added lines #L24 - L26 were not covered by tests
}
return result;
}

void StringCastUtils::removeSpace(std::string& str) {
str = StringUtils::rtrim(StringUtils::ltrim(str));
}

uint32_t TypeUtils::convertToUint32(const char* data) {
std::istringstream iss(data);
uint32_t val;
Expand All @@ -17,20 +42,6 @@
return val;
}

bool TypeUtils::convertToBoolean(const char* data) {
auto len = strlen(data);
if (len == 4 && 't' == tolower(data[0]) && 'r' == tolower(data[1]) && 'u' == tolower(data[2]) &&
'e' == tolower(data[3])) {
return true;
} else if (len == 5 && 'f' == tolower(data[0]) && 'a' == tolower(data[1]) &&
'l' == tolower(data[2]) && 's' == tolower(data[3]) && 'e' == tolower(data[4])) {
return false;
}
throw ConversionException(
prefixConversionExceptionMessage(data, LogicalTypeID::BOOL) +
". Input is not equal to True or False (in a case-insensitive manner)");
}

std::string TypeUtils::castValueToString(
const LogicalType& dataType, uint8_t* value, void* vector) {
auto valueVector = reinterpret_cast<ValueVector*>(vector);
Expand Down
95 changes: 48 additions & 47 deletions src/common/types/date_t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@
date_t result{};
if (interval.months != 0) {
int32_t year, month, day, maxDayInMonth;
Date::Convert(*this, year, month, day);
Date::convert(*this, year, month, day);
int32_t year_diff = interval.months / Interval::MONTHS_PER_YEAR;
year += year_diff;
month += interval.months - year_diff * Interval::MONTHS_PER_YEAR;
Expand All @@ -55,9 +55,9 @@
}
// handle date overflow
// example: 2020-01-31 + "1 months"
maxDayInMonth = Date::MonthDays(year, month);
maxDayInMonth = Date::monthDays(year, month);
day = day > maxDayInMonth ? maxDayInMonth : day;
result = Date::FromDate(year, month, day);
result = Date::fromDate(year, month, day);
} else {
result = *this;
}
Expand All @@ -83,15 +83,15 @@
}

bool date_t::operator==(const timestamp_t& rhs) const {
return Timestamp::FromDatetime(*this, dtime_t(0)).value == rhs.value;
return Timestamp::fromDateTime(*this, dtime_t(0)).value == rhs.value;

Check warning on line 86 in src/common/types/date_t.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/date_t.cpp#L86

Added line #L86 was not covered by tests
}

bool date_t::operator!=(const timestamp_t& rhs) const {
return !(*this == rhs);
}

bool date_t::operator<(const timestamp_t& rhs) const {
return Timestamp::FromDatetime(*this, dtime_t(0)).value < rhs.value;
return Timestamp::fromDateTime(*this, dtime_t(0)).value < rhs.value;

Check warning on line 94 in src/common/types/date_t.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/date_t.cpp#L94

Added line #L94 was not covered by tests
}

bool date_t::operator<=(const timestamp_t& rhs) const {
Expand Down Expand Up @@ -178,7 +178,7 @@
139522, 139888, 140253, 140618, 140983, 141349, 141714, 142079, 142444, 142810, 143175, 143540,
143905, 144271, 144636, 145001, 145366, 145732, 146097};

void Date::ExtractYearOffset(int32_t& n, int32_t& year, int32_t& year_offset) {
void Date::extractYearOffset(int32_t& n, int32_t& year, int32_t& year_offset) {
year = Date::EPOCH_YEAR;
// first we normalize n to be in the year range [1970, 2370]
// since leap years repeat every 400 years, we can safely normalize just by "shifting" the
Expand All @@ -204,31 +204,32 @@
KU_ASSERT(n >= Date::CUMULATIVE_YEAR_DAYS[year_offset]);
}

void Date::Convert(date_t d, int32_t& year, int32_t& month, int32_t& day) {
auto n = d.days;
void Date::convert(date_t date, int32_t& out_year, int32_t& out_month, int32_t& out_day) {
auto n = date.days;
int32_t year_offset;
Date::ExtractYearOffset(n, year, year_offset);
Date::extractYearOffset(n, out_year, year_offset);

day = n - Date::CUMULATIVE_YEAR_DAYS[year_offset];
KU_ASSERT(day >= 0 && day <= 365);
out_day = n - Date::CUMULATIVE_YEAR_DAYS[year_offset];
KU_ASSERT(out_day >= 0 && out_day <= 365);

bool is_leap_year = (Date::CUMULATIVE_YEAR_DAYS[year_offset + 1] -
Date::CUMULATIVE_YEAR_DAYS[year_offset]) == 366;
if (is_leap_year) {
month = Date::LEAP_MONTH_PER_DAY_OF_YEAR[day];
day -= Date::CUMULATIVE_LEAP_DAYS[month - 1];
out_month = Date::LEAP_MONTH_PER_DAY_OF_YEAR[out_day];
out_day -= Date::CUMULATIVE_LEAP_DAYS[out_month - 1];
} else {
month = Date::MONTH_PER_DAY_OF_YEAR[day];
day -= Date::CUMULATIVE_DAYS[month - 1];
out_month = Date::MONTH_PER_DAY_OF_YEAR[out_day];
out_day -= Date::CUMULATIVE_DAYS[out_month - 1];
}
day++;
KU_ASSERT(day > 0 && day <= (is_leap_year ? Date::LEAP_DAYS[month] : Date::NORMAL_DAYS[month]));
KU_ASSERT(month > 0 && month <= 12);
out_day++;
KU_ASSERT(out_day > 0 && out_day <= (is_leap_year ? Date::LEAP_DAYS[out_month] :
Date::NORMAL_DAYS[out_month]));
KU_ASSERT(out_month > 0 && out_month <= 12);
}

date_t Date::FromDate(int32_t year, int32_t month, int32_t day) {
date_t Date::fromDate(int32_t year, int32_t month, int32_t day) {
int32_t n = 0;
if (!Date::IsValid(year, month, day)) {
if (!Date::isValid(year, month, day)) {
throw ConversionException(
StringUtils::string_format("Date out of range: {}-{}-{}.", year, month, day));
}
Expand All @@ -241,13 +242,13 @@
n += Date::DAYS_PER_YEAR_INTERVAL;
}
n += Date::CUMULATIVE_YEAR_DAYS[year - 1970];
n += Date::IsLeapYear(year) ? Date::CUMULATIVE_LEAP_DAYS[month - 1] :
n += Date::isLeapYear(year) ? Date::CUMULATIVE_LEAP_DAYS[month - 1] :
Date::CUMULATIVE_DAYS[month - 1];
n += day - 1;
return date_t(n);
}

bool Date::ParseDoubleDigit(const char* buf, uint64_t len, uint64_t& pos, int32_t& result) {
bool Date::parseDoubleDigit(const char* buf, uint64_t len, uint64_t& pos, int32_t& result) {
if (pos < len && StringUtils::CharacterIsDigit(buf[pos])) {
result = buf[pos++] - '0';
if (pos < len && StringUtils::CharacterIsDigit(buf[pos])) {
Expand All @@ -264,7 +265,7 @@
// trailing "BC". 3) we do not allow the "strict/non-strict" parsing, which lets the caller
// configure this function to either strictly return false if the date std::string has trailing
// characters that won't be parsed or just ignore those characters. We always run in strict mode.
bool Date::TryConvertDate(const char* buf, uint64_t len, uint64_t& pos, date_t& result) {
bool Date::tryConvertDate(const char* buf, uint64_t len, uint64_t& pos, date_t& result) {
pos = 0;
if (len == 0) {
return false;
Expand Down Expand Up @@ -307,7 +308,7 @@
}

// parse the month
if (!Date::ParseDoubleDigit(buf, len, pos, month)) {
if (!Date::parseDoubleDigit(buf, len, pos, month)) {
return false;
}

Expand All @@ -324,7 +325,7 @@
}

// now parse the day
if (!Date::ParseDoubleDigit(buf, len, pos, day)) {
if (!Date::parseDoubleDigit(buf, len, pos, day)) {
return false;
}

Expand All @@ -337,16 +338,16 @@
return false;
}

result = Date::FromDate(year, month, day);
result = Date::fromDate(year, month, day);
return true;
}

date_t Date::FromCString(const char* buf, uint64_t len) {
date_t Date::fromCString(const char* str, uint64_t len) {
date_t result;
uint64_t pos;
if (!TryConvertDate(buf, len, pos, result)) {
if (!tryConvertDate(str, len, pos, result)) {
throw ConversionException("Error occurred during parsing date. Given: \"" +
std::string(buf, len) + "\". Expected format: (YYYY-MM-DD)");
std::string(str, len) + "\". Expected format: (YYYY-MM-DD)");

Check warning on line 350 in src/common/types/date_t.cpp

View check run for this annotation

Codecov / codecov/patch

src/common/types/date_t.cpp#L350

Added line #L350 was not covered by tests
}
return result;
}
Expand All @@ -355,19 +356,19 @@
int32_t date_units[3];
uint64_t year_length;
bool add_bc;
Date::Convert(date, date_units[0], date_units[1], date_units[2]);
Date::convert(date, date_units[0], date_units[1], date_units[2]);

auto length = DateToStringCast::Length(date_units, year_length, add_bc);
auto buffer = std::unique_ptr<char[]>(new char[length]);
DateToStringCast::Format(buffer.get(), date_units, year_length, add_bc);
return std::string(buffer.get(), length);
}

bool Date::IsLeapYear(int32_t year) {
bool Date::isLeapYear(int32_t year) {
return year % 4 == 0 && (year % 100 != 0 || year % 400 == 0);
}

bool Date::IsValid(int32_t year, int32_t month, int32_t day) {
bool Date::isValid(int32_t year, int32_t month, int32_t day) {
if (month < 1 || month > 12) {
return false;
}
Expand All @@ -377,12 +378,12 @@
if (day < 1) {
return false;
}
return Date::IsLeapYear(year) ? day <= Date::LEAP_DAYS[month] : day <= Date::NORMAL_DAYS[month];
return Date::isLeapYear(year) ? day <= Date::LEAP_DAYS[month] : day <= Date::NORMAL_DAYS[month];
}

int32_t Date::MonthDays(int32_t year, int32_t month) {
int32_t Date::monthDays(int32_t year, int32_t month) {
KU_ASSERT(month >= 1 && month <= 12);
return Date::IsLeapYear(year) ? Date::LEAP_DAYS[month] : Date::NORMAL_DAYS[month];
return Date::isLeapYear(year) ? Date::LEAP_DAYS[month] : Date::NORMAL_DAYS[month];
}

std::string Date::getDayName(date_t& date) {
Expand All @@ -395,26 +396,26 @@
std::string monthNames[] = {"January", "February", "March", "April", "May", "June", "July",
"August", "September", "October", "November", "December"};
int32_t year, month, day;
Date::Convert(date, year, month, day);
Date::convert(date, year, month, day);
return monthNames[month - 1];
}

date_t Date::getLastDay(date_t& date) {
int32_t year, month, day;
Date::Convert(date, year, month, day);
Date::convert(date, year, month, day);
year += (month / 12);
month %= 12;
++month;
return Date::FromDate(year, month, 1) - 1;
return Date::fromDate(year, month, 1) - 1;
}

int32_t Date::getDatePart(DatePartSpecifier specifier, date_t& date) {
int32_t year, month, day;
Date::Convert(date, year, month, day);
Date::convert(date, year, month, day);
switch (specifier) {
case DatePartSpecifier::YEAR:
int32_t yearOffset;
ExtractYearOffset(date.days, year, yearOffset);
extractYearOffset(date.days, year, yearOffset);
return year;
case DatePartSpecifier::MONTH:
return month;
Expand Down Expand Up @@ -443,27 +444,27 @@
date_t Date::trunc(DatePartSpecifier specifier, date_t& date) {
switch (specifier) {
case DatePartSpecifier::YEAR:
return Date::FromDate(
return Date::fromDate(
Date::getDatePart(DatePartSpecifier::YEAR, date), 1 /* month */, 1 /* day */);
case DatePartSpecifier::MONTH:
return Date::FromDate(Date::getDatePart(DatePartSpecifier::YEAR, date),
return Date::fromDate(Date::getDatePart(DatePartSpecifier::YEAR, date),
Date::getDatePart(DatePartSpecifier::MONTH, date), 1 /* day */);
case DatePartSpecifier::DAY:
return date;
case DatePartSpecifier::DECADE:
return Date::FromDate((Date::getDatePart(DatePartSpecifier::YEAR, date) / 10) * 10,
return Date::fromDate((Date::getDatePart(DatePartSpecifier::YEAR, date) / 10) * 10,
1 /* month */, 1 /* day */);
case DatePartSpecifier::CENTURY:
return Date::FromDate((Date::getDatePart(DatePartSpecifier::YEAR, date) / 100) * 100,
return Date::fromDate((Date::getDatePart(DatePartSpecifier::YEAR, date) / 100) * 100,
1 /* month */, 1 /* day */);
case DatePartSpecifier::MILLENNIUM:
return Date::FromDate((Date::getDatePart(DatePartSpecifier::YEAR, date) / 1000) * 1000,
return Date::fromDate((Date::getDatePart(DatePartSpecifier::YEAR, date) / 1000) * 1000,
1 /* month */, 1 /* day */);
case DatePartSpecifier::QUARTER:
int32_t year, month, day;
Date::Convert(date, year, month, day);
Date::convert(date, year, month, day);
month = 1 + (((month - 1) / 3) * 3);
return Date::FromDate(year, month, 1);
return Date::fromDate(year, month, 1);
default:
return date;
}
Expand Down
6 changes: 3 additions & 3 deletions src/common/types/dtime_t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ bool Time::TryConvertTime(const char* buf, uint64_t len, uint64_t& pos, dtime_t&
return false;
}

if (!Date::ParseDoubleDigit(buf, len, pos, hour)) {
if (!Date::parseDoubleDigit(buf, len, pos, hour)) {
return false;
}

Expand All @@ -92,7 +92,7 @@ bool Time::TryConvertTime(const char* buf, uint64_t len, uint64_t& pos, dtime_t&
return false;
}

if (!Date::ParseDoubleDigit(buf, len, pos, min)) {
if (!Date::parseDoubleDigit(buf, len, pos, min)) {
return false;
}

Expand All @@ -104,7 +104,7 @@ bool Time::TryConvertTime(const char* buf, uint64_t len, uint64_t& pos, dtime_t&
return false;
}

if (!Date::ParseDoubleDigit(buf, len, pos, sec)) {
if (!Date::parseDoubleDigit(buf, len, pos, sec)) {
return false;
}

Expand Down
Loading
Loading