From 159f6125b6ef506acdda24cebfafd566bed83df2 Mon Sep 17 00:00:00 2001 From: Keenan Gugeler Date: Thu, 5 Oct 2023 16:03:55 -0400 Subject: [PATCH] tidy: add clang-tidy clang-tidy is a static analyzer, and can find many different issues. So far I have integrated a few simple checks, and disabled the remaining ones. They will be enabled in future PRs. Closes #2152. Ref #2148. --- .clang-tidy | 16 +++++++++++ .github/workflows/ci-workflow.yml | 27 ++++++++++++++----- Makefile | 10 +++++++ dataset/invalid-utf8/invalid-utf8.csv | 1 + src/common/arrow/arrow_converter.cpp | 2 +- src/common/types/date_t.cpp | 2 +- src/common/types/dtime_t.cpp | 2 +- src/common/types/types.cpp | 4 +-- src/function/base_lower_upper_operation.cpp | 14 ++++++++-- src/include/common/system_message.h | 22 +++++++++++++++ src/include/common/types/cast_helpers.h | 4 +-- .../arithmetic/arithmetic_functions.h | 3 ++- .../function/boolean/boolean_functions.h | 8 ++---- src/include/processor/operator/scan_node_id.h | 8 +----- src/main/plan_printer.cpp | 11 +++++--- src/main/query_result.cpp | 8 +++--- .../agg_key_dependency_optimizer.cpp | 9 ++++--- .../operator/persistent/copy_rel_columns.cpp | 1 + .../persistent/reader/csv/base_csv_reader.cpp | 20 +++++++------- .../reader/csv/parallel_csv_reader.cpp | 3 ++- .../reader/parquet/parquet_reader.cpp | 10 ------- .../operator/persistent/reader_functions.cpp | 9 +++++-- src/processor/operator/scan_node_id.cpp | 9 +++++++ src/processor/result/factorized_table.cpp | 3 +-- src/storage/buffer_manager/vm_region.cpp | 12 ++++++--- src/storage/stats/node_table_statistics.cpp | 2 +- .../lists/lists_update_store.cpp | 4 +-- src/storage/store/column_chunk.cpp | 4 +-- test/c_api/query_result_test.cpp | 16 +++++++++++ .../exceptions/runtime/invalid_utf8.test | 9 +++++++ tools/benchmark/main.cpp | 6 ++--- tools/java_api/src/jni/kuzu_java.cpp | 7 +++-- tools/nodejs_api/CMakeLists.txt | 14 +++++----- tools/shell/embedded_shell.cpp | 2 -- 34 files changed, 189 insertions(+), 93 deletions(-) create mode 100644 .clang-tidy create mode 100644 dataset/invalid-utf8/invalid-utf8.csv create mode 100644 src/include/common/system_message.h create mode 100644 test/test_files/exceptions/runtime/invalid_utf8.test diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 0000000000..f2a3fc43bd --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,16 @@ +# Bugprone exception escape is super unhelpful. +Checks: + -*, + + bugprone-*, + -bugprone-easily-swappable-parameters, + -bugprone-exception-escape, + -bugprone-forward-declaration-namespace, + -bugprone-implicit-widening-of-multiplication-result, + -bugprone-narrowing-conversions, + -bugprone-signed-char-misuse, + + clang-analyzer-deadcode.DeadStores, + concurrency-*, +HeaderFilterRegex: src/include|tools/benchmark/include|tools/nodejs_api/src_cpp/include|tools/python_api/src_cpp/include|tools/rust_api/include|tools/shell/include +WarningsAsErrors: "*" diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 310dc73437..79e62f2479 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -20,7 +20,7 @@ jobs: run: | pip install torch~=2.0.0 --extra-index-url https://download.pytorch.org/whl/cpu &&\ pip install --user -r tools/python_api/requirements_dev.txt -f https://data.pyg.org/whl/torch-2.0.0+cpu.html - + - name: Ensure Node.js dependencies run: npm install --include=dev working-directory: tools/nodejs_api @@ -30,7 +30,7 @@ jobs: - name: Python test run: CC=gcc CXX=g++ make pytest NUM_THREADS=32 - + - name: Node.js test run: CC=gcc CXX=g++ make nodejstest NUM_THREADS=32 @@ -117,7 +117,7 @@ jobs: shell: bash clang-build-test: - name: clang build & test + name: clang build, test & tidy needs: [clang-formatting-check, header-include-guard-check] runs-on: kuzu-self-hosted-testing steps: @@ -140,13 +140,26 @@ jobs: - name: Python test run: CC=clang-14 CXX=clang++-14 make pytest NUM_THREADS=32 - + - name: Node.js test run: CC=clang-14 CXX=clang++-14 make nodejstest NUM_THREADS=32 - + - name: Java test run: CC=clang-14 CXX=clang++-14 make javatest NUM_THREADS=32 + - name: Create compilation commands database + run: make clang-tidy-ci + + - name: Run clang-tidy on src + run: run-clang-tidy -p build/release -quiet -j 32 "^$(realpath src)" + + - name: Run clang-tidy on tools + run: run-clang-tidy -p build/release -quiet -j 32 "^$(realpath tools)/(?!shell/linenoise.cpp)" + + - name: Run clang-tidy on examples + run: run-clang-tidy -p build/release -quiet -j 32 "^$(realpath examples)" + + msvc-build-test: name: msvc build & test needs: [clang-formatting-check, header-include-guard-check] @@ -226,12 +239,12 @@ jobs: runs-on: kuzu-self-hosted-benchmarking steps: - uses: actions/checkout@v3 - + - name: Ensure Python dependencies run: | pip install torch~=1.13 --extra-index-url https://download.pytorch.org/whl/cpu &&\ pip install --user -r tools/python_api/requirements_dev.txt -f https://data.pyg.org/whl/torch-1.13.0+cpu.html - + - name: Ensure Node.js dependencies run: npm install working-directory: tools/nodejs_api diff --git a/Makefile b/Makefile index 0ed0f0cfc4..7190461a1a 100644 --- a/Makefile +++ b/Makefile @@ -103,6 +103,16 @@ lcov: cd $(ROOT_DIR)/build/release/test && \ ctest --output-on-failure -j ${TEST_JOBS} +clangd: + $(call mkdirp,build/clangd) && cd build/clangd && \ + cmake $(GENERATOR) $(FORCE_COLOR) $(SANITIZER_FLAG) -DCMAKE_EXPORT_COMPILE_COMMANDS=1 -DCMAKE_BUILD_TYPE=Release -DBUILD_EXAMPLES=TRUE \ + -DBUILD_BENCHMARK=TRUE -DBUILD_JAVA=TRUE -DBUILD_NODEJS=TRUE -DBUILD_TESTS=TRUE ../.. + +clang-tidy-ci: + # For CI: export compile commands for the built items. + $(call mkdirp,build/release) && cd build/release && \ + cmake $(GENERATOR) $(FORCE_COLOR) $(SANITIZER_FLAG) -DCMAKE_EXPORT_COMPILE_COMMANDS=1 ../.. + pytest: $(MAKE) release cd $(ROOT_DIR)/tools/python_api/test && \ diff --git a/dataset/invalid-utf8/invalid-utf8.csv b/dataset/invalid-utf8/invalid-utf8.csv new file mode 100644 index 0000000000..ce542efaa5 --- /dev/null +++ b/dataset/invalid-utf8/invalid-utf8.csv @@ -0,0 +1 @@ +ÿ \ No newline at end of file diff --git a/src/common/arrow/arrow_converter.cpp b/src/common/arrow/arrow_converter.cpp index 1cd7156214..51e827dbfc 100644 --- a/src/common/arrow/arrow_converter.cpp +++ b/src/common/arrow/arrow_converter.cpp @@ -44,7 +44,7 @@ void ArrowConverter::setArrowFormatForStruct( ArrowSchemaHolder& rootHolder, ArrowSchema& child, const main::DataTypeInfo& typeInfo) { auto& childrenTypesInfo = typeInfo.childrenTypesInfo; child.format = "+s"; - child.name = typeInfo.name.c_str(); + // name is set by parent. child.n_children = (std::int64_t)childrenTypesInfo.size(); rootHolder.nestedChildren.emplace_back(); rootHolder.nestedChildren.back().resize(child.n_children); diff --git a/src/common/types/date_t.cpp b/src/common/types/date_t.cpp index 41acd57674..20368fd367 100644 --- a/src/common/types/date_t.cpp +++ b/src/common/types/date_t.cpp @@ -274,7 +274,7 @@ bool Date::tryConvertDate(const char* buf, uint64_t len, uint64_t& pos, date_t& int32_t day = 0; int32_t month = -1; int32_t year = 0; - int sep; + char sep; // skip leading spaces while (pos < len && StringUtils::CharacterIsSpace(buf[pos])) { diff --git a/src/common/types/dtime_t.cpp b/src/common/types/dtime_t.cpp index 1eb7f1422d..41b512f117 100644 --- a/src/common/types/dtime_t.cpp +++ b/src/common/types/dtime_t.cpp @@ -62,7 +62,7 @@ bool Time::TryConvertTime(const char* buf, uint64_t len, uint64_t& pos, dtime_t& return false; } - int sep; + char sep; // skip leading spaces while (pos < len && isspace(buf[pos])) { diff --git a/src/common/types/types.cpp b/src/common/types/types.cpp index 5f96367dd4..6bab4e47a9 100644 --- a/src/common/types/types.cpp +++ b/src/common/types/types.cpp @@ -448,12 +448,10 @@ LogicalTypeID LogicalTypeUtils::dataTypeIDFromString(const std::string& dataType return LogicalTypeID::INTERNAL_ID; } else if ("INT64" == upperDataTypeIDString) { return LogicalTypeID::INT64; - } else if ("INT32" == upperDataTypeIDString) { + } else if ("INT32" == upperDataTypeIDString || "INT" == upperDataTypeIDString) { return LogicalTypeID::INT32; } else if ("INT16" == upperDataTypeIDString) { return LogicalTypeID::INT16; - } else if ("INT" == upperDataTypeIDString) { - return LogicalTypeID::INT32; } else if ("INT8" == upperDataTypeIDString) { return LogicalTypeID::INT8; } else if ("UINT64" == upperDataTypeIDString) { diff --git a/src/function/base_lower_upper_operation.cpp b/src/function/base_lower_upper_operation.cpp index ade1725904..5f1f8c48a4 100644 --- a/src/function/base_lower_upper_operation.cpp +++ b/src/function/base_lower_upper_operation.cpp @@ -1,5 +1,9 @@ +#include "common/assert.h" +#include "common/exception/runtime.h" +#include "common/string_utils.h" #include "function/string/functions/base_lower_upper_function.h" +using namespace kuzu::common; using namespace kuzu::utf8proc; namespace kuzu { @@ -13,6 +17,12 @@ uint32_t BaseLowerUpperFunction::getResultLen(char* inputStr, uint32_t inputLen, if (inputStr[i] & 0x80) { int size = 0; int codepoint = utf8proc_codepoint(inputStr + i, size); + if (codepoint < 0) { + // TODO(Xiyang): We shouldn't allow invalid UTF-8 to enter a string column. + std::string funcName = isUpper ? "UPPER" : "LOWER"; + throw RuntimeException(common::StringUtils::string_format( + "Failed calling {}: Invalid UTF-8.", funcName)); + } int convertedCodepoint = isUpper ? utf8proc_toupper(codepoint) : utf8proc_tolower(codepoint); int newSize = utf8proc_codepoint_length(convertedCodepoint); @@ -32,10 +42,10 @@ void BaseLowerUpperFunction::convertCase(char* result, uint32_t len, char* input if (input[i] & 0x80) { int size = 0, newSize = 0; int codepoint = utf8proc_codepoint(input + i, size); + assert(codepoint >= 0); // Validity ensured by getResultLen. int convertedCodepoint = toUpper ? utf8proc_toupper(codepoint) : utf8proc_tolower(codepoint); - auto success = utf8proc_codepoint_to_utf8(convertedCodepoint, newSize, result); - assert(success); + utf8proc_codepoint_to_utf8(convertedCodepoint, newSize, result); result += newSize; i += size; } else { diff --git a/src/include/common/system_message.h b/src/include/common/system_message.h new file mode 100644 index 0000000000..0ed6e2e30d --- /dev/null +++ b/src/include/common/system_message.h @@ -0,0 +1,22 @@ +#pragma once + +#include +#include +#include + +namespace kuzu { +namespace common { +inline std::string systemErrMessage(int code) { + // System errors are unexpected. For anything expected, we should catch it explicitly and + // provide a better error message to the user. + // LCOV_EXCL_START + return std::system_category().message(code); + // LCOV_EXCL_END +} +inline std::string posixErrMessage() { + // LCOV_EXCL_START + return systemErrMessage(errno); + // LCOV_EXCL_END +} +} // namespace common +} // namespace kuzu diff --git a/src/include/common/types/cast_helpers.h b/src/include/common/types/cast_helpers.h index 2a313348ab..442a9126c2 100644 --- a/src/include/common/types/cast_helpers.h +++ b/src/include/common/types/cast_helpers.h @@ -122,7 +122,7 @@ struct DateToStringCast { } // optionally add BC to the end of the date if (add_bc) { - memcpy(ptr, " (BC)", 5); + strcpy(ptr, " (BC)"); } } }; @@ -282,7 +282,7 @@ struct IntervalToStringCast { } } else if (length == 0) { // empty interval: default to 00:00:00 - memcpy(buffer, "00:00:00", 8); + strcpy(buffer, "00:00:00"); return 8; } return length; diff --git a/src/include/function/arithmetic/arithmetic_functions.h b/src/include/function/arithmetic/arithmetic_functions.h index 8456536c6a..375628b40a 100644 --- a/src/include/function/arithmetic/arithmetic_functions.h +++ b/src/include/function/arithmetic/arithmetic_functions.h @@ -206,7 +206,8 @@ struct Gamma { struct Lgamma { template static inline void operation(T& input, double_t& result) { - result = lgamma(input); + result = + lgamma(input); // NOLINT(concurrency-mt-unsafe): We don't use the thread-unsafe signgam. } }; diff --git a/src/include/function/boolean/boolean_functions.h b/src/include/function/boolean/boolean_functions.h index 8147bf535d..f90a8b5743 100644 --- a/src/include/function/boolean/boolean_functions.h +++ b/src/include/function/boolean/boolean_functions.h @@ -38,9 +38,7 @@ const uint8_t NULL_BOOL = 2; struct And { static inline void operation( bool left, bool right, uint8_t& result, bool isLeftNull, bool isRightNull) { - if (!left && !isLeftNull) { - result = false; - } else if (!right && !isRightNull) { + if ((!left && !isLeftNull) || (!right && !isRightNull)) { result = false; } else if (isLeftNull || isRightNull) { result = NULL_BOOL; @@ -68,9 +66,7 @@ struct And { struct Or { static inline void operation( bool left, bool right, uint8_t& result, bool isLeftNull, bool isRightNull) { - if (left && !isLeftNull) { - result = true; - } else if (right && !isRightNull) { + if ((left && !isLeftNull) || (right && !isRightNull)) { result = true; } else if (isLeftNull || isRightNull) { result = NULL_BOOL; diff --git a/src/include/processor/operator/scan_node_id.h b/src/include/processor/operator/scan_node_id.h index 95a43afb05..2b586e24a9 100644 --- a/src/include/processor/operator/scan_node_id.h +++ b/src/include/processor/operator/scan_node_id.h @@ -43,13 +43,7 @@ class ScanNodeIDSharedState { inline uint32_t getNumTableStates() const { return tableStates.size(); } inline NodeTableScanState* getTableState(uint32_t idx) const { return tableStates[idx].get(); } - inline void initialize(transaction::Transaction* transaction) { - auto numMask = tableStates[0]->getSemiMask()->getNumMasks(); - for (auto& tableState : tableStates) { - assert(tableState->getSemiMask()->getNumMasks() == numMask); - tableState->initializeMaxOffset(transaction); - } - } + void initialize(transaction::Transaction* transaction); std::tuple getNextRangeToRead(); diff --git a/src/main/plan_printer.cpp b/src/main/plan_printer.cpp index 8290ffe9fc..3710de1470 100644 --- a/src/main/plan_printer.cpp +++ b/src/main/plan_printer.cpp @@ -150,6 +150,10 @@ void OpProfileTree::printOpProfileBoxUpperFrame(uint32_t rowIdx, std::ostringstr oss << std::endl; } +static std::string dashedLineAccountingForIndex(uint32_t width, uint32_t indent) { + return std::string(width - (1 + indent) * 2, '-'); +} + void OpProfileTree::printOpProfileBoxes(uint32_t rowIdx, std::ostringstream& oss) const { auto height = calculateRowHeight(rowIdx); auto halfWayPoint = height / 2; @@ -164,12 +168,13 @@ void OpProfileTree::printOpProfileBoxes(uint32_t rowIdx, std::ostringstream& oss unsigned int numParams = opProfileBox->getNumParams(); if (i == 0) { textToPrint = opProfileBox->getOpName(); - } else if (i == 1) { - textToPrint = std::string(opProfileBoxWidth - (1 + INDENT_WIDTH) * 2, '-'); + } else if (i == 1) { // NOLINT(bugprone-branch-clone): Merging these branches is a + // logical error, and this conditional chain is pleasant. + textToPrint = dashedLineAccountingForIndex(opProfileBoxWidth, INDENT_WIDTH); } else if (i <= numParams + 1) { textToPrint = opProfileBox->getParamsName(i - 2); } else if ((i - numParams - 1) % 2) { - textToPrint = std::string(opProfileBoxWidth - (1 + INDENT_WIDTH) * 2, '-'); + textToPrint = dashedLineAccountingForIndex(opProfileBoxWidth, INDENT_WIDTH); } else { textToPrint = opProfileBox->getAttribute((i - numParams - 1) / 2 - 1); } diff --git a/src/main/query_result.cpp b/src/main/query_result.cpp index 4d96ef3607..8a3f08ab4e 100644 --- a/src/main/query_result.cpp +++ b/src/main/query_result.cpp @@ -91,7 +91,7 @@ std::vector> QueryResult::getColumnTypesInfo() con columnTypeInfo->childrenTypesInfo.push_back( DataTypeInfo::getInfoForDataType(LogicalType(LogicalTypeID::STRING), "_label")); auto numProperties = NodeVal::getNumProperties(value); - for (auto j = 0u; i < numProperties; j++) { + for (auto j = 0u; j < numProperties; j++) { auto name = NodeVal::getPropertyName(value, j); auto val = NodeVal::getPropertyVal(value, j); columnTypeInfo->childrenTypesInfo.push_back( @@ -104,9 +104,9 @@ std::vector> QueryResult::getColumnTypesInfo() con columnTypeInfo->childrenTypesInfo.push_back( DataTypeInfo::getInfoForDataType(LogicalType(LogicalTypeID::INTERNAL_ID), "_dst")); auto numProperties = RelVal::getNumProperties(value); - for (auto j = 0u; i < numProperties; j++) { - auto name = NodeVal::getPropertyName(value, j); - auto val = NodeVal::getPropertyVal(value, j); + for (auto j = 0u; j < numProperties; j++) { + auto name = RelVal::getPropertyName(value, j); + auto val = RelVal::getPropertyVal(value, j); columnTypeInfo->childrenTypesInfo.push_back( DataTypeInfo::getInfoForDataType(*val->getDataType(), name)); } diff --git a/src/optimizer/agg_key_dependency_optimizer.cpp b/src/optimizer/agg_key_dependency_optimizer.cpp index eae657dffd..64d1394868 100644 --- a/src/optimizer/agg_key_dependency_optimizer.cpp +++ b/src/optimizer/agg_key_dependency_optimizer.cpp @@ -67,16 +67,17 @@ AggKeyDependencyOptimizer::resolveKeysAndDependentKeys(const binder::expression_ for (auto& expression : keys) { if (expression->expressionType == PROPERTY) { auto propertyExpression = (binder::PropertyExpression*)expression.get(); - if (propertyExpression->isPrimaryKey() || propertyExpression->isInternalID()) { + if (propertyExpression->isPrimaryKey() || + propertyExpression->isInternalID()) { // NOLINT(bugprone-branch-clone): Collapsing + // is a logical error. groupExpressions.push_back(expression); } else if (primaryVarNames.contains(propertyExpression->getVariableName())) { dependentExpressions.push_back(expression); } else { groupExpressions.push_back(expression); } - } else if (ExpressionUtil::isNodeVariable(*expression)) { - dependentExpressions.push_back(expression); - } else if (ExpressionUtil::isRelVariable(*expression)) { + } else if (ExpressionUtil::isNodeVariable(*expression) || + ExpressionUtil::isRelVariable(*expression)) { dependentExpressions.push_back(expression); } else { groupExpressions.push_back(expression); diff --git a/src/processor/operator/persistent/copy_rel_columns.cpp b/src/processor/operator/persistent/copy_rel_columns.cpp index daa7c8b42b..d5ab32fd8e 100644 --- a/src/processor/operator/persistent/copy_rel_columns.cpp +++ b/src/processor/operator/persistent/copy_rel_columns.cpp @@ -26,6 +26,7 @@ void CopyRelColumns::executeInternal(ExecutionContext* context) { numBwdRows = countRelLists(sharedState->bwdRelData.get(), info.nbrOffsetPos); } assert(numFwdRows == numBwdRows); + (void)numBwdRows; // For clang-tidy, value calculated for debug assert. numRows += numFwdRows; } sharedState->incrementNumRows(numRows); diff --git a/src/processor/operator/persistent/reader/csv/base_csv_reader.cpp b/src/processor/operator/persistent/reader/csv/base_csv_reader.cpp index f1953d0576..4273731d20 100644 --- a/src/processor/operator/persistent/reader/csv/base_csv_reader.cpp +++ b/src/processor/operator/persistent/reader/csv/base_csv_reader.cpp @@ -5,6 +5,7 @@ #include "common/data_chunk/data_chunk.h" #include "common/exception/copy.h" #include "common/string_utils.h" +#include "common/system_message.h" #include "processor/operator/persistent/reader/csv/driver.h" using namespace kuzu::common; @@ -23,8 +24,10 @@ BaseCSVReader::BaseCSVReader(const std::string& filePath, const common::ReaderCo #endif ); if (fd == -1) { + // LCOV_EXCL_START throw CopyException( - StringUtils::string_format("Could not open file {}: {}", filePath, strerror(errno))); + StringUtils::string_format("Could not open file {}: {}", filePath, posixErrMessage())); + // LCOV_EXCL_END } } @@ -195,8 +198,10 @@ bool BaseCSVReader::readBuffer(uint64_t* start) { uint64_t readCount = read(fd, buffer.get() + remaining, bufferReadSize); if (readCount == -1) { + // LCOV_EXCL_START throw CopyException(StringUtils::string_format( - "Could not read from file {}: {}", filePath, strerror(errno))); + "Could not read from file {}: {}", filePath, posixErrMessage())); + // LCOV_EXCL_END } bufferSize = remaining + readCount; @@ -414,7 +419,7 @@ uint64_t BaseCSVReader::getFileOffset() const { if (offset == -1) { // LCOV_EXCL_START throw CopyException(StringUtils::string_format( - "Could not get current file position for file {}: {}", filePath, strerror(errno))); + "Could not get current file position for file {}: {}", filePath, posixErrMessage())); // LCOV_EXCL_END } assert(offset >= bufferSize); @@ -429,7 +434,7 @@ uint64_t BaseCSVReader::getLineNumber() { if (lseek(fd, 0, SEEK_SET) == -1) { // LCOV_EXCL_START throw CopyException(StringUtils::string_format( - "Could not seek to beginning of file {}: {}", filePath, strerror(errno))); + "Could not seek to beginning of file {}: {}", filePath, posixErrMessage())); // LCOV_EXCL_END } @@ -440,16 +445,13 @@ uint64_t BaseCSVReader::getLineNumber() { if (bytesRead == -1) { // LCOV_EXCL_START throw CopyException(StringUtils::string_format( - "Could not read from file {}: {}", filePath, strerror(errno))); + "Could not read from file {}: {}", filePath, posixErrMessage())); // LCOV_EXCL_END } totalBytes += bytesRead; for (uint64_t i = 0; i < bytesRead; i++) { - if (buf[i] == '\n') { - lineNumber++; - carriageReturn = false; - } else if (carriageReturn) { + if (buf[i] == '\n' || carriageReturn) { lineNumber++; carriageReturn = false; } diff --git a/src/processor/operator/persistent/reader/csv/parallel_csv_reader.cpp b/src/processor/operator/persistent/reader/csv/parallel_csv_reader.cpp index baf64fbb3a..7cd1434648 100644 --- a/src/processor/operator/persistent/reader/csv/parallel_csv_reader.cpp +++ b/src/processor/operator/persistent/reader/csv/parallel_csv_reader.cpp @@ -2,6 +2,7 @@ #include "common/exception/copy.h" #include "common/string_utils.h" +#include "common/system_message.h" #include "processor/operator/persistent/reader/csv/driver.h" using namespace kuzu::common; @@ -46,7 +47,7 @@ void ParallelCSVReader::seekToBlockStart() { if (lseek(fd, currentBlockIdx * CopyConstants::PARALLEL_BLOCK_SIZE, SEEK_SET) == -1) { // LCOV_EXCL_START throw CopyException(StringUtils::string_format("Failed to seek to block {} in file {}: {}", - currentBlockIdx, filePath, strerror(errno))); + currentBlockIdx, filePath, posixErrMessage())); // LCOV_EXCL_END } diff --git a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp index 6655bf934e..604f93ba86 100644 --- a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +++ b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp @@ -243,16 +243,6 @@ std::unique_ptr ParquetReader::createReaderRecursive(uint64_t dept bool isRepeated = repetition_type == FieldRepetitionType::REPEATED; bool isList = sEle.__isset.converted_type && sEle.converted_type == ConvertedType::LIST; bool isMap = sEle.__isset.converted_type && sEle.converted_type == ConvertedType::MAP; - bool isMapKv = - sEle.__isset.converted_type && sEle.converted_type == ConvertedType::MAP_KEY_VALUE; - if (!isMapKv && thisIdx > 0) { - // check if the parent node of this is a map - auto& pEle = metadata->schema[thisIdx - 1]; - bool parentIsMap = - pEle.__isset.converted_type && pEle.converted_type == ConvertedType::MAP; - bool parentHasChildren = pEle.__isset.num_children && pEle.num_children == 1; - isMapKv = parentIsMap && parentHasChildren; - } if (structFields.size() > 1 || (!isList && !isMap && !isRepeated)) { resultType = std::make_unique(common::LogicalTypeID::STRUCT, std::make_unique(std::move(structFields))); diff --git a/src/processor/operator/persistent/reader_functions.cpp b/src/processor/operator/persistent/reader_functions.cpp index c95aca4063..9c0a7797b2 100644 --- a/src/processor/operator/persistent/reader_functions.cpp +++ b/src/processor/operator/persistent/reader_functions.cpp @@ -1,6 +1,7 @@ #include "processor/operator/persistent/reader_functions.h" #include "common/exception/copy.h" +#include "common/system_message.h" #include using namespace kuzu::common; @@ -202,14 +203,18 @@ std::vector ReaderFunctions::countRowsInParallelCSVFile( for (const auto& path : config.filePaths) { int fd = open(path.c_str(), O_RDONLY); if (fd == -1) { + // LCOV_EXCL_START throw CopyException( - StringUtils::string_format("Failed to open file {}: {}", path, strerror(errno))); + StringUtils::string_format("Failed to open file {}: {}", path, posixErrMessage())); + // LCOV_EXCL_END } uint64_t length = lseek(fd, 0, SEEK_END); close(fd); if (length == -1) { + // LCOV_EXCL_START throw CopyException(StringUtils::string_format( - "Failed to seek to end of file {}: {}", path, strerror(errno))); + "Failed to seek to end of file {}: {}", path, posixErrMessage())); + // LCOV_EXCL_END } auto reader = make_unique(path, config); row_idx_t numRowsInFile = reader->countRows(); diff --git a/src/processor/operator/scan_node_id.cpp b/src/processor/operator/scan_node_id.cpp index 2888319c34..569c3e65a7 100644 --- a/src/processor/operator/scan_node_id.cpp +++ b/src/processor/operator/scan_node_id.cpp @@ -24,6 +24,15 @@ std::pair NodeTableScanState::getNextRangeToRead() { return std::make_pair(startOffset, startOffset + range); } +void ScanNodeIDSharedState::initialize(transaction::Transaction* transaction) { + auto numMask = tableStates[0]->getSemiMask()->getNumMasks(); + for (auto& tableState : tableStates) { + assert(tableState->getSemiMask()->getNumMasks() == numMask); + tableState->initializeMaxOffset(transaction); + } + (void)numMask; // For clang-tidy: used for assert. +} + std::tuple ScanNodeIDSharedState::getNextRangeToRead() { std::unique_lock lck{mtx}; if (currentStateIdx == tableStates.size()) { diff --git a/src/processor/result/factorized_table.cpp b/src/processor/result/factorized_table.cpp index 5f17924790..c7ba029925 100644 --- a/src/processor/result/factorized_table.cpp +++ b/src/processor/result/factorized_table.cpp @@ -287,8 +287,7 @@ void FactorizedTable::copyToInMemList(ft_col_idx_t colIdx, std::vector& tupleIdxesToRead, uint8_t* data, NullMask* nullMask, uint64_t startElemPosInList, DiskOverflowFile* overflowFileOfInMemList, const LogicalType& type) const { - auto column = tableSchema->getColumn(colIdx); - assert(column->isFlat() == true); + assert(tableSchema->getColumn(colIdx)->isFlat() == true); auto numBytesPerValue = type.getLogicalTypeID() == LogicalTypeID::INTERNAL_ID ? sizeof(offset_t) : LogicalTypeUtils::getRowLayoutSize(type); diff --git a/src/storage/buffer_manager/vm_region.cpp b/src/storage/buffer_manager/vm_region.cpp index 5bda4c80f2..84e2d43e26 100644 --- a/src/storage/buffer_manager/vm_region.cpp +++ b/src/storage/buffer_manager/vm_region.cpp @@ -1,6 +1,7 @@ #include "storage/buffer_manager/vm_region.h" #include "common/string_utils.h" +#include "common/system_message.h" #ifdef _WIN32 #include @@ -71,17 +72,20 @@ void VMRegion::releaseFrame(frame_idx_t frameIdx) { // See https://arvid.io/2018/04/02/memory-mapping-on-windows/#1 // Not sure what the differences are if (!VirtualFree(getFrame(frameIdx), frameSize, MEM_DECOMMIT)) { + auto code = GetLastError(); throw BufferManagerException(StringUtils::string_format( "Releasing physical memory associated with a frame failed with error code {}: {}.", - GetLastError(), std::system_category().message(GetLastError()))); + code, systemErrMessage(code))); } #else int error = madvise(getFrame(frameIdx), frameSize, MADV_DONTNEED); if (error != 0) { - throw BufferManagerException( - "Releasing physical memory associated with a frame failed with error code " + - std::to_string(error) + ": " + std::string(std::strerror(errno))); + // LCOV_EXCL_START + throw BufferManagerException(StringUtils::string_format( + "Releasing physical memory associated with a frame failed with error code {}: {}.", + error, posixErrMessage())); + // LCOV_EXCL_END } #endif } diff --git a/src/storage/stats/node_table_statistics.cpp b/src/storage/stats/node_table_statistics.cpp index ab48dc2532..a285c6588f 100644 --- a/src/storage/stats/node_table_statistics.cpp +++ b/src/storage/stats/node_table_statistics.cpp @@ -113,7 +113,7 @@ void NodeTableStatsAndDeletedIDs::setDeletedNodeOffsetsForMorsel( auto itr = deletedNodeOffsets.begin(); sel_t nextDeletedNodeOffset = *itr - morselBeginOffset; uint64_t nextSelectedPosition = 0; - for (sel_t pos = 0; pos < nodeOffsetVector->state->getOriginalSize(); ++pos) { + for (auto pos = 0; pos < nodeOffsetVector->state->getOriginalSize(); ++pos) { if (pos == nextDeletedNodeOffset) { itr++; if (itr == deletedNodeOffsets.end()) { diff --git a/src/storage/storage_structure/lists/lists_update_store.cpp b/src/storage/storage_structure/lists/lists_update_store.cpp index 82f04fdaad..217659e4f5 100644 --- a/src/storage/storage_structure/lists/lists_update_store.cpp +++ b/src/storage/storage_structure/lists/lists_update_store.cpp @@ -322,9 +322,7 @@ ft_col_idx_t ListsUpdatesStore::getColIdxInFT(ListFileID& listFileID) const { void ListsUpdatesStore::initListsUpdatesPerTablePerDirection() { listsUpdatesPerDirection.clear(); - for (auto direction : RelDataDirectionUtils::getRelDataDirections()) { - listsUpdatesPerDirection.emplace_back(); - } + listsUpdatesPerDirection.resize(RelDataDirectionUtils::getRelDataDirections().size()); } ListsUpdatesForNodeOffset* ListsUpdatesStore::getOrCreateListsUpdatesForNodeOffset( diff --git a/src/storage/store/column_chunk.cpp b/src/storage/store/column_chunk.cpp index 5931535f1e..22f5ba9f5a 100644 --- a/src/storage/store/column_chunk.cpp +++ b/src/storage/store/column_chunk.cpp @@ -445,12 +445,10 @@ uint32_t ColumnChunk::getDataTypeSizeInChunk(LogicalType& dataType) { case LogicalTypeID::STRING: { return sizeof(ku_string_t); } + case LogicalTypeID::INTERNAL_ID: case LogicalTypeID::VAR_LIST: { return sizeof(offset_t); } - case LogicalTypeID::INTERNAL_ID: { - return sizeof(offset_t); - } case LogicalTypeID::SERIAL: { return sizeof(int64_t); } diff --git a/test/c_api/query_result_test.cpp b/test/c_api/query_result_test.cpp index e172571b42..f45d760ba0 100644 --- a/test/c_api/query_result_test.cpp +++ b/test/c_api/query_result_test.cpp @@ -80,6 +80,22 @@ TEST_F(CApiQueryResultTest, GetColumnDataType) { kuzu_query_result_destroy(result); } +TEST_F(CApiQueryResultTest, GetArrowSchema) { + auto connection = getConnection(); + auto result = kuzu_connection_query(connection, "MATCH (p:person)-[k:knows]-(q:person) RETURN " + "p.fName, k, q.fName"); + ASSERT_TRUE(kuzu_query_result_is_success(result)); + auto schema = kuzu_query_result_get_arrow_schema(result); + ASSERT_STREQ(schema.name, "kuzu_query_result"); + ASSERT_EQ(schema.n_children, 3); + ASSERT_STREQ(schema.children[0]->name, "p.fName"); + ASSERT_STREQ(schema.children[1]->name, "k"); + ASSERT_STREQ(schema.children[2]->name, "q.fName"); + + schema.release(&schema); + kuzu_query_result_destroy(result); +} + TEST_F(CApiQueryResultTest, GetQuerySummary) { auto connection = getConnection(); auto result = diff --git a/test/test_files/exceptions/runtime/invalid_utf8.test b/test/test_files/exceptions/runtime/invalid_utf8.test new file mode 100644 index 0000000000..30f9f579da --- /dev/null +++ b/test/test_files/exceptions/runtime/invalid_utf8.test @@ -0,0 +1,9 @@ +-GROUP RuntimeErrorTest +-DATASET CSV empty + +-- + +-CASE InvalidUTF8 +-STATEMENT LOAD FROM "${KUZU_ROOT_DIRECTORY}/dataset/invalid-utf8/invalid-utf8.csv" RETURN UPPER(column0); +---- error +Runtime exception: Failed calling UPPER: Invalid UTF-8. diff --git a/tools/benchmark/main.cpp b/tools/benchmark/main.cpp index db940e105e..48264377f4 100644 --- a/tools/benchmark/main.cpp +++ b/tools/benchmark/main.cpp @@ -38,16 +38,16 @@ int main(int argc, char** argv) { config->bufferPoolSize = (uint64_t)stoull(getArgumentValue(arg)) << 20; } else { printf("Unrecognized option %s", arg.c_str()); - exit(1); + return 1; } } if (datasetPath.empty()) { printf("Missing --dataset input."); - exit(1); + return 1; } if (benchmarkPath.empty()) { printf("Missing --benchmark input"); - exit(1); + return 1; } auto runner = BenchmarkRunner(datasetPath, std::move(config)); try { diff --git a/tools/java_api/src/jni/kuzu_java.cpp b/tools/java_api/src/jni/kuzu_java.cpp index d234cf906a..4bd6687fce 100644 --- a/tools/java_api/src/jni/kuzu_java.cpp +++ b/tools/java_api/src/jni/kuzu_java.cpp @@ -6,10 +6,10 @@ #include "com_kuzudb_KuzuNative.h" #include "common/exception/exception.h" #include "common/types/types.h" -#include "common/types/value/value.h" #include "common/types/value/nested.h" #include "common/types/value/node.h" #include "common/types/value/rel.h" +#include "common/types/value/value.h" #include "json.hpp" #include "main/kuzu.h" #include "main/query_summary.h" @@ -152,8 +152,8 @@ void javaMapToCPPMap( * All Database native functions */ -JNIEXPORT jlong JNICALL Java_com_kuzudb_KuzuNative_kuzu_1database_1init( - JNIEnv* env, jclass, jstring database_path, jlong buffer_pool_size, jboolean enable_compression) { +JNIEXPORT jlong JNICALL Java_com_kuzudb_KuzuNative_kuzu_1database_1init(JNIEnv* env, jclass, + jstring database_path, jlong buffer_pool_size, jboolean enable_compression) { const char* path = env->GetStringUTFChars(database_path, JNI_FALSE); uint64_t buffer = static_cast(buffer_pool_size); @@ -887,7 +887,6 @@ JNIEXPORT jobject JNICALL Java_com_kuzudb_KuzuNative_kuzu_1value_1get_1value( return ret; } case LogicalTypeID::DATE: { - jclass retClass = env->FindClass("java/time/LocalDate"); date_t date = v->getValue(); jclass ldClass = env->FindClass("java/time/LocalDate"); jmethodID ofEpochDay = diff --git a/tools/nodejs_api/CMakeLists.txt b/tools/nodejs_api/CMakeLists.txt index 55421c56a7..04fbd82692 100644 --- a/tools/nodejs_api/CMakeLists.txt +++ b/tools/nodejs_api/CMakeLists.txt @@ -15,18 +15,18 @@ else() endif() execute_process( - COMMAND ${NPX_CMD} cmake-js print-cmakejs-include - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND ${NPX_CMD} cmake-js print-cmakejs-include + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} OUTPUT_VARIABLE CMAKE_JS_INC ) execute_process( - COMMAND ${NPX_CMD} cmake-js print-cmakejs-lib - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND ${NPX_CMD} cmake-js print-cmakejs-lib + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} OUTPUT_VARIABLE CMAKE_JS_LIB ) execute_process( COMMAND ${NPX_CMD} cmake-js print-cmakejs-src - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} OUTPUT_VARIABLE CMAKE_JS_SRC ) @@ -48,10 +48,10 @@ file(COPY ${JS_SOURCE_FILES} DESTINATION "${CMAKE_CURRENT_SOURCE_DIR}/build") add_library(${PROJECT_NAME} SHARED ${CPP_SOURCE_FILES} ${CMAKE_JS_SRC}) set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node") -set_target_properties(${PROJECT_NAME} +set_target_properties(${PROJECT_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/build" - LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/build" + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/build" ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/build") if(APPLE) target_link_options(${PROJECT_NAME} PRIVATE -undefined dynamic_lookup) diff --git a/tools/shell/embedded_shell.cpp b/tools/shell/embedded_shell.cpp index 008000c78c..b6babf8064 100644 --- a/tools/shell/embedded_shell.cpp +++ b/tools/shell/embedded_shell.cpp @@ -162,7 +162,6 @@ void highlight(char* buffer, char* resultBuf, uint32_t maxLen, uint32_t cursorPo } lineLen -= thisChar; buf = buf.substr(thisChar, lineLen); - bufLen = buf.length(); } else if (buf.length() > maxLen) { uint32_t counter = 0; uint32_t lineLen = 0; @@ -171,7 +170,6 @@ void highlight(char* buffer, char* resultBuf, uint32_t maxLen, uint32_t cursorPo lineLen = utf8proc_next_grapheme(buffer, bufLen, lineLen); } buf = buf.substr(0, lineLen); - bufLen = buf.length(); } for (auto i = 0u; i < buf.length(); i++) { if ((buf[i] != ' ' && !word.empty() && word[0] == ' ') ||