Skip to content

Commit

Permalink
Merge pull request #2476 from kuzudb/cast_test
Browse files Browse the repository at this point in the history
fix #2474: parse dataType map() with 0 or no arg aborts
  • Loading branch information
AEsir777 committed Nov 21, 2023
2 parents 68329a8 + 7e129a8 commit d10bd0c
Show file tree
Hide file tree
Showing 8 changed files with 138 additions and 6 deletions.
2 changes: 1 addition & 1 deletion src/common/string_utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ std::vector<std::string> StringUtils::splitComma(const std::string& input) {
currentPos++;
}
result.push_back(input.substr(0, currentPos));
result.push_back(input.substr(currentPos + 1));
result.push_back(input.substr(currentPos == input.length() ? input.length() : currentPos + 1));
return result;
}

Expand Down
12 changes: 12 additions & 0 deletions src/common/types/timestamp_t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -323,5 +323,17 @@ int64_t Timestamp::getEpochNanoSeconds(const timestamp_t& timestamp) {
return timestamp.value * Interval::NANOS_PER_MICRO;
}

int64_t Timestamp::getEpochMicroSeconds(const timestamp_t& timestamp) {
return timestamp.value;
}

int64_t Timestamp::getEpochMilliSeconds(const timestamp_t& timestamp) {
return timestamp.value / Interval::MICROS_PER_MSEC;
}

int64_t Timestamp::getEpochSeconds(const timestamp_t& timestamp) {
return timestamp.value / Interval::MICROS_PER_SEC;
}

} // namespace common
} // namespace kuzu
21 changes: 20 additions & 1 deletion src/common/types/types.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,10 @@ void LogicalType::setPhysicalType() {
case LogicalTypeID::BOOL: {
physicalType = PhysicalTypeID::BOOL;
} break;
case LogicalTypeID::TIMESTAMP:
case LogicalTypeID::TIMESTAMP_MS:
case LogicalTypeID::TIMESTAMP_NS:
case LogicalTypeID::TIMESTAMP_TZ:
case LogicalTypeID::TIMESTAMP_SEC:
case LogicalTypeID::SERIAL:
case LogicalTypeID::INT64: {
physicalType = PhysicalTypeID::INT64;
Expand Down Expand Up @@ -580,6 +583,14 @@ LogicalTypeID LogicalTypeUtils::dataTypeIDFromString(const std::string& dataType
return LogicalTypeID::DATE;
} else if ("TIMESTAMP" == upperDataTypeIDString) {
return LogicalTypeID::TIMESTAMP;
} else if ("TIMESTAMP_NS" == upperDataTypeIDString) {
return LogicalTypeID::TIMESTAMP_NS;
} else if ("TIMESTAMP_MS" == upperDataTypeIDString) {
return LogicalTypeID::TIMESTAMP_MS;
} else if ("TIMESTAMP_SEC" == upperDataTypeIDString || "TIMESTAMP_S" == upperDataTypeIDString) {
return LogicalTypeID::TIMESTAMP_SEC;
} else if ("TIMESTAMP_TZ" == upperDataTypeIDString) {
return LogicalTypeID::TIMESTAMP_TZ;
} else if ("INTERVAL" == upperDataTypeIDString) {
return LogicalTypeID::INTERVAL;
} else if ("SERIAL" == upperDataTypeIDString) {
Expand Down Expand Up @@ -628,6 +639,14 @@ std::string LogicalTypeUtils::toString(LogicalTypeID dataTypeID) {
return "FLOAT";
case LogicalTypeID::DATE:
return "DATE";
case LogicalTypeID::TIMESTAMP_NS:
return "TIMESTAMP_NS";
case LogicalTypeID::TIMESTAMP_MS:
return "TIMESTAMP_MS";
case LogicalTypeID::TIMESTAMP_SEC:
return "TIMESTAMP_SEC";
case LogicalTypeID::TIMESTAMP_TZ:
return "TIMESTAMP_TZ";
case LogicalTypeID::TIMESTAMP:
return "TIMESTAMP";
case LogicalTypeID::INTERVAL:
Expand Down
38 changes: 38 additions & 0 deletions src/function/cast_from_string_functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,38 @@ using namespace kuzu::common;
namespace kuzu {
namespace function {

// ---------------------- try cast String to Timestamp -------------------- //
struct TryCastToTimestampNS {
static inline bool tryCast(const char* input, uint64_t len, timestamp_t& result) {
if (!Timestamp::tryConvertTimestamp(input, len, result)) {
return false;
}
result = Timestamp::getEpochNanoSeconds(result);
return true;
}
};

struct TryCastToTimestampMS {
static inline bool tryCast(const char* input, uint64_t len, timestamp_t& result) {
if (!Timestamp::tryConvertTimestamp(input, len, result)) {
return false;
}
result = Timestamp::getEpochMicroSeconds(result);
return true;
}
};

struct TryCastToTimestampSec {
static inline bool tryCast(const char* input, uint64_t len, timestamp_t& result) {
if (!Timestamp::tryConvertTimestamp(input, len, result)) {
return false;
}
result = Timestamp::getEpochSeconds(result);
return true;
}
};


// ---------------------- cast String Helper ------------------------------ //
struct CastStringHelper {
template<typename T>
Expand Down Expand Up @@ -97,6 +129,12 @@ inline void CastStringHelper::cast(const char* input, uint64_t len, date_t& resu
result = Date::fromCString(input, len);
}

template<>
inline void CastStringHelper::cast(const char* input, uint64_t len, timestamp_ms_t& result,
ValueVector* /*vector*/, uint64_t /*rowToAdd*/, const CSVReaderConfig* /*csvReaderConfig*/) {
TryCastToTimestampMS::tryCast(input, len, result);
}

template<>
inline void CastStringHelper::cast(const char* input, uint64_t len, timestamp_t& result,
ValueVector* /*vector*/, uint64_t /*rowToAdd*/, const CSVReaderConfig* /*csvReaderConfig*/) {
Expand Down
16 changes: 16 additions & 0 deletions src/include/common/types/timestamp_t.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,16 @@ struct KUZU_API timestamp_t {
interval_t operator-(const timestamp_t& rhs) const;
};


struct timestamp_tz_t : public timestamp_t { // NOLINT
};
struct timestamp_ns_t : public timestamp_t { // NOLINT
};
struct timestamp_ms_t : public timestamp_t { // NOLINT
};
struct timestamp_sec_t : public timestamp_t { // NOLINT
};

// Note: Aside from some minor changes, this implementation is copied from DuckDB's source code:
// https://github.com/duckdb/duckdb/blob/master/src/include/duckdb/common/types/timestamp.hpp.
// https://github.com/duckdb/duckdb/blob/master/src/common/types/timestamp.cpp.
Expand Down Expand Up @@ -86,6 +96,12 @@ class Timestamp {

KUZU_API static int64_t getEpochNanoSeconds(const timestamp_t& timestamp);

KUZU_API static int64_t getEpochMicroSeconds(const timestamp_t& timestamp);

KUZU_API static int64_t getEpochMilliSeconds(const timestamp_t& timestamp);

KUZU_API static int64_t getEpochSeconds(const timestamp_t& timestamp);

KUZU_API static bool tryParseUTCOffset(
const char* str, uint64_t& pos, uint64_t len, int& hour_offset, int& minute_offset);

Expand Down
12 changes: 8 additions & 4 deletions src/include/common/types/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -94,10 +94,14 @@ enum class KUZU_API LogicalTypeID : uint8_t {
FLOAT = 33,
DATE = 34,
TIMESTAMP = 35,
INTERVAL = 36,
FIXED_LIST = 37,

INTERNAL_ID = 40,
TIMESTAMP_SEC = 36,
TIMESTAMP_MS = 37,
TIMESTAMP_NS = 38,
TIMESTAMP_TZ = 39,
INTERVAL = 40,
FIXED_LIST = 41,

INTERNAL_ID = 45,

STRING = 50,
BLOB = 51,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,12 @@ inline void CastString::operation(const ku_string_t& input, timestamp_t& result,
result = Timestamp::fromCString((const char*)input.getData(), input.len);
}

template<>
inline void CastString::operation(const ku_string_t& input, timestamp_ns_t& result,
ValueVector* /*resultVector*/, uint64_t /*rowToAdd*/,
const CSVReaderConfig* /*csvReaderConfig*/) {
}

template<>
inline void CastString::operation(const ku_string_t& input, interval_t& result,
ValueVector* /*resultVector*/, uint64_t /*rowToAdd*/,
Expand Down
37 changes: 37 additions & 0 deletions test/test_files/tinysnb/cast/cast_error.test
Original file line number Diff line number Diff line change
Expand Up @@ -798,3 +798,40 @@ Conversion exception: Unsupported casting function from INT16[2] to FLOAT[1].
---- error
Runtime exception: Unsupported FIXED_LIST type: Function::getFixedListChildCastFunc

-LOG InvalidNestedCast
-STATEMENT RETURN cast({a: {b: {c: [[1, 3, 4]], d: "str"}}, e: [1, 9, NULL]}, "STRUCT(a STRUCT(b STRUCT(c INT64[2][], d STRING)), e INT128[3])");
---- error
Conversion exception: Unsupported casting VAR_LIST with incorrect list entry to FIXED_LIST. Expected: 2, Actual: 3.
-STATEMENT RETURN cast({a: {b: {c: [[1, 3, 4]], d: "str"}}, e: [1, 9, NULL]}, "STRUCT(a STRUCT(b STRUCT(c INT64[3][], d STRING)), e INT128[3])");
---- error
Conversion exception: Cast failed. NULL is not allowed for FIXED_LIST.
-STATEMENT RETURN cast(cast("{a: {b: {c: [[1, 3, 4]], d: {18=[3, 2]}}}, e: [1, 9, 3]}", "STRUCT(a STRUCT(b STRUCT(c INT64[3][], d MAP(INT32,INT32[]))), e FLOAT[3])"), "STRUCT(a STRUCT(b STRUCT(c INT64[3][], d MAP(INT32,INT32[5]))), e INT32[3])");
---- error
Conversion exception: Unsupported casting VAR_LIST with incorrect list entry to FIXED_LIST. Expected: 5, Actual: 2.

-LOG InvalidNameCast
-STATEMENT RETURN cast("nop", "STRUCT()");
---- error
Cannot parse dataTypeID:
-STATEMENT RETURN cast("nop", "STRUCT(a=fds)");
---- error
Cannot parse dataTypeID: A=FDS
-STATEMENT RETURN cast("nop", "STRUCT(a: )");
---- error
Cannot parse dataTypeID:
-STATEMENT RETURN cast("nop", "MAP()");
---- error
Cannot parse dataTypeID:
-STATEMENT RETURN cast("nop", "MAP(");
---- error
Cannot parse map type: MAP(
-STATEMENT RETURN cast("nop", "UNION(a:STRING)");
---- error
Cannot parse dataTypeID: A:STRING
-STATEMENT RETURN cast("nop", "MAP(int)");
---- error
Cannot parse dataTypeID:
-STATEMENT RETURN cast("nop", "STRUCT(a: INT, b MAP(INT, STRING, INT))");
---- error
Cannot parse dataTypeID: STRING, INT

0 comments on commit d10bd0c

Please sign in to comment.