From fa8296787b15fa4aa7a74254c4c5c6f4285e97b0 Mon Sep 17 00:00:00 2001 From: Keenan Gugeler Date: Thu, 5 Oct 2023 16:03:55 -0400 Subject: [PATCH] tidy: add clang-tidy clang-tidy is a static analyzer, and can find many different issues. So far I have integrated a few simple checks, and disabled the remaining ones. They will be enabled in future PRs. Closes #2152. Ref #2148. --- .clang-tidy | 16 +++++++++++ .github/workflows/ci-workflow.yml | 27 ++++++++++++++----- Makefile | 5 ++++ src/common/types/date_t.cpp | 2 +- src/common/types/dtime_t.cpp | 2 +- src/common/types/types.cpp | 4 +-- src/function/base_lower_upper_operation.cpp | 13 +++++++-- src/include/common/system_message.h | 16 +++++++++++ src/include/common/types/cast_helpers.h | 4 +-- .../arithmetic/arithmetic_functions.h | 3 ++- .../function/boolean/boolean_functions.h | 8 ++---- src/include/processor/operator/scan_node_id.h | 8 +----- src/main/plan_printer.cpp | 11 +++++--- src/main/query_result.cpp | 4 +-- .../agg_key_dependency_optimizer.cpp | 9 ++++--- .../operator/persistent/copy_rel_columns.cpp | 1 + .../persistent/reader/csv/base_csv_reader.cpp | 18 ++++++------- .../reader/csv/parallel_csv_reader.cpp | 3 ++- .../reader/parquet/parquet_reader.cpp | 10 ------- .../operator/persistent/reader_functions.cpp | 7 ++--- src/processor/operator/scan_node_id.cpp | 9 +++++++ src/processor/result/factorized_table.cpp | 3 +-- src/storage/buffer_manager/vm_region.cpp | 10 ++++--- src/storage/stats/node_table_statistics.cpp | 2 +- .../lists/lists_update_store.cpp | 4 +-- src/storage/store/column_chunk.cpp | 4 +-- tools/benchmark/main.cpp | 6 ++--- tools/java_api/src/jni/kuzu_java.cpp | 7 +++-- tools/nodejs_api/CMakeLists.txt | 14 +++++----- tools/shell/embedded_shell.cpp | 2 -- 30 files changed, 140 insertions(+), 92 deletions(-) create mode 100644 .clang-tidy create mode 100644 src/include/common/system_message.h diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 00000000000..f2a3fc43bde --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,16 @@ +# Bugprone exception escape is super unhelpful. +Checks: + -*, + + bugprone-*, + -bugprone-easily-swappable-parameters, + -bugprone-exception-escape, + -bugprone-forward-declaration-namespace, + -bugprone-implicit-widening-of-multiplication-result, + -bugprone-narrowing-conversions, + -bugprone-signed-char-misuse, + + clang-analyzer-deadcode.DeadStores, + concurrency-*, +HeaderFilterRegex: src/include|tools/benchmark/include|tools/nodejs_api/src_cpp/include|tools/python_api/src_cpp/include|tools/rust_api/include|tools/shell/include +WarningsAsErrors: "*" diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 310dc734376..e8f71fca2ab 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -20,7 +20,7 @@ jobs: run: | pip install torch~=2.0.0 --extra-index-url https://download.pytorch.org/whl/cpu &&\ pip install --user -r tools/python_api/requirements_dev.txt -f https://data.pyg.org/whl/torch-2.0.0+cpu.html - + - name: Ensure Node.js dependencies run: npm install --include=dev working-directory: tools/nodejs_api @@ -30,7 +30,7 @@ jobs: - name: Python test run: CC=gcc CXX=g++ make pytest NUM_THREADS=32 - + - name: Node.js test run: CC=gcc CXX=g++ make nodejstest NUM_THREADS=32 @@ -117,7 +117,7 @@ jobs: shell: bash clang-build-test: - name: clang build & test + name: clang build, test & tidy needs: [clang-formatting-check, header-include-guard-check] runs-on: kuzu-self-hosted-testing steps: @@ -140,13 +140,26 @@ jobs: - name: Python test run: CC=clang-14 CXX=clang++-14 make pytest NUM_THREADS=32 - + - name: Node.js test run: CC=clang-14 CXX=clang++-14 make nodejstest NUM_THREADS=32 - + - name: Java test run: CC=clang-14 CXX=clang++-14 make javatest NUM_THREADS=32 + - name: Create compilation commands database + run: make clangd NUM_THREADS=32 + + - name: Run clang-tidy on src + run: run-clang-tidy -p build/release -quiet -j 32 "^$(realpath src)" + + - name: Run clang-tidy on tools + run: run-clang-tidy -p build/release -quiet -j 32 "^$(realpath tools)/(?!shell/linenoise.cpp)" + + - name: Run clang-tidy on examples + run: run-clang-tidy -p build/release -quiet -j 32 "^$(realpath examples)" + + msvc-build-test: name: msvc build & test needs: [clang-formatting-check, header-include-guard-check] @@ -226,12 +239,12 @@ jobs: runs-on: kuzu-self-hosted-benchmarking steps: - uses: actions/checkout@v3 - + - name: Ensure Python dependencies run: | pip install torch~=1.13 --extra-index-url https://download.pytorch.org/whl/cpu &&\ pip install --user -r tools/python_api/requirements_dev.txt -f https://data.pyg.org/whl/torch-1.13.0+cpu.html - + - name: Ensure Node.js dependencies run: npm install working-directory: tools/nodejs_api diff --git a/Makefile b/Makefile index 0ed0f0cfc41..78f5fe32a0b 100644 --- a/Makefile +++ b/Makefile @@ -103,6 +103,11 @@ lcov: cd $(ROOT_DIR)/build/release/test && \ ctest --output-on-failure -j ${TEST_JOBS} +clangd: + $(call mkdirp,build/release) && cd build/release && \ + cmake $(GENERATOR) $(FORCE_COLOR) $(SANITIZER_FLAG) -DCMAKE_EXPORT_COMPILE_COMMANDS=1 -DCMAKE_BUILD_TYPE=Release -DBUILD_EXAMPLES=TRUE \ + -DBUILD_BENCHMARK=TRUE -DBUILD_JAVA=TRUE -DBUILD_NODEJS=TRUE -DBUILD_TESTS=TRUE ../.. + pytest: $(MAKE) release cd $(ROOT_DIR)/tools/python_api/test && \ diff --git a/src/common/types/date_t.cpp b/src/common/types/date_t.cpp index 41acd57674b..20368fd367a 100644 --- a/src/common/types/date_t.cpp +++ b/src/common/types/date_t.cpp @@ -274,7 +274,7 @@ bool Date::tryConvertDate(const char* buf, uint64_t len, uint64_t& pos, date_t& int32_t day = 0; int32_t month = -1; int32_t year = 0; - int sep; + char sep; // skip leading spaces while (pos < len && StringUtils::CharacterIsSpace(buf[pos])) { diff --git a/src/common/types/dtime_t.cpp b/src/common/types/dtime_t.cpp index 1eb7f1422da..41b512f1179 100644 --- a/src/common/types/dtime_t.cpp +++ b/src/common/types/dtime_t.cpp @@ -62,7 +62,7 @@ bool Time::TryConvertTime(const char* buf, uint64_t len, uint64_t& pos, dtime_t& return false; } - int sep; + char sep; // skip leading spaces while (pos < len && isspace(buf[pos])) { diff --git a/src/common/types/types.cpp b/src/common/types/types.cpp index 5f96367dd4a..6bab4e47a9c 100644 --- a/src/common/types/types.cpp +++ b/src/common/types/types.cpp @@ -448,12 +448,10 @@ LogicalTypeID LogicalTypeUtils::dataTypeIDFromString(const std::string& dataType return LogicalTypeID::INTERNAL_ID; } else if ("INT64" == upperDataTypeIDString) { return LogicalTypeID::INT64; - } else if ("INT32" == upperDataTypeIDString) { + } else if ("INT32" == upperDataTypeIDString || "INT" == upperDataTypeIDString) { return LogicalTypeID::INT32; } else if ("INT16" == upperDataTypeIDString) { return LogicalTypeID::INT16; - } else if ("INT" == upperDataTypeIDString) { - return LogicalTypeID::INT32; } else if ("INT8" == upperDataTypeIDString) { return LogicalTypeID::INT8; } else if ("UINT64" == upperDataTypeIDString) { diff --git a/src/function/base_lower_upper_operation.cpp b/src/function/base_lower_upper_operation.cpp index ade1725904e..45286b3de81 100644 --- a/src/function/base_lower_upper_operation.cpp +++ b/src/function/base_lower_upper_operation.cpp @@ -1,5 +1,9 @@ +#include "common/assert.h" +#include "common/exception/runtime.h" +#include "common/string_utils.h" #include "function/string/functions/base_lower_upper_function.h" +using namespace kuzu::common; using namespace kuzu::utf8proc; namespace kuzu { @@ -32,10 +36,15 @@ void BaseLowerUpperFunction::convertCase(char* result, uint32_t len, char* input if (input[i] & 0x80) { int size = 0, newSize = 0; int codepoint = utf8proc_codepoint(input + i, size); + if (codepoint < 0) { + // TODO(Xiyang): We shouldn't allow invalid UTF-8 to enter a string column. + std::string funcName = toUpper ? "UPPER" : "LOWER"; + throw RuntimeException(common::StringUtils::string_format( + "Failed calling {}: Invalid UTF-8", funcName)); + } int convertedCodepoint = toUpper ? utf8proc_toupper(codepoint) : utf8proc_tolower(codepoint); - auto success = utf8proc_codepoint_to_utf8(convertedCodepoint, newSize, result); - assert(success); + utf8proc_codepoint_to_utf8(convertedCodepoint, newSize, result); result += newSize; i += size; } else { diff --git a/src/include/common/system_message.h b/src/include/common/system_message.h new file mode 100644 index 00000000000..0abe6528e17 --- /dev/null +++ b/src/include/common/system_message.h @@ -0,0 +1,16 @@ +#pragma once + +#include +#include +#include + +namespace kuzu { +namespace common { +inline std::string system_err_message(int code) { + return std::system_category().message(code); +} +inline std::string posix_err_message() { + return system_err_message(errno); +} +} // namespace common +} // namespace kuzu diff --git a/src/include/common/types/cast_helpers.h b/src/include/common/types/cast_helpers.h index 2a313348abe..442a9126c27 100644 --- a/src/include/common/types/cast_helpers.h +++ b/src/include/common/types/cast_helpers.h @@ -122,7 +122,7 @@ struct DateToStringCast { } // optionally add BC to the end of the date if (add_bc) { - memcpy(ptr, " (BC)", 5); + strcpy(ptr, " (BC)"); } } }; @@ -282,7 +282,7 @@ struct IntervalToStringCast { } } else if (length == 0) { // empty interval: default to 00:00:00 - memcpy(buffer, "00:00:00", 8); + strcpy(buffer, "00:00:00"); return 8; } return length; diff --git a/src/include/function/arithmetic/arithmetic_functions.h b/src/include/function/arithmetic/arithmetic_functions.h index 8456536c6a5..375628b40a7 100644 --- a/src/include/function/arithmetic/arithmetic_functions.h +++ b/src/include/function/arithmetic/arithmetic_functions.h @@ -206,7 +206,8 @@ struct Gamma { struct Lgamma { template static inline void operation(T& input, double_t& result) { - result = lgamma(input); + result = + lgamma(input); // NOLINT(concurrency-mt-unsafe): We don't use the thread-unsafe signgam. } }; diff --git a/src/include/function/boolean/boolean_functions.h b/src/include/function/boolean/boolean_functions.h index 8147bf535d5..f90a8b57433 100644 --- a/src/include/function/boolean/boolean_functions.h +++ b/src/include/function/boolean/boolean_functions.h @@ -38,9 +38,7 @@ const uint8_t NULL_BOOL = 2; struct And { static inline void operation( bool left, bool right, uint8_t& result, bool isLeftNull, bool isRightNull) { - if (!left && !isLeftNull) { - result = false; - } else if (!right && !isRightNull) { + if ((!left && !isLeftNull) || (!right && !isRightNull)) { result = false; } else if (isLeftNull || isRightNull) { result = NULL_BOOL; @@ -68,9 +66,7 @@ struct And { struct Or { static inline void operation( bool left, bool right, uint8_t& result, bool isLeftNull, bool isRightNull) { - if (left && !isLeftNull) { - result = true; - } else if (right && !isRightNull) { + if ((left && !isLeftNull) || (right && !isRightNull)) { result = true; } else if (isLeftNull || isRightNull) { result = NULL_BOOL; diff --git a/src/include/processor/operator/scan_node_id.h b/src/include/processor/operator/scan_node_id.h index 95a43afb05c..2b586e24a90 100644 --- a/src/include/processor/operator/scan_node_id.h +++ b/src/include/processor/operator/scan_node_id.h @@ -43,13 +43,7 @@ class ScanNodeIDSharedState { inline uint32_t getNumTableStates() const { return tableStates.size(); } inline NodeTableScanState* getTableState(uint32_t idx) const { return tableStates[idx].get(); } - inline void initialize(transaction::Transaction* transaction) { - auto numMask = tableStates[0]->getSemiMask()->getNumMasks(); - for (auto& tableState : tableStates) { - assert(tableState->getSemiMask()->getNumMasks() == numMask); - tableState->initializeMaxOffset(transaction); - } - } + void initialize(transaction::Transaction* transaction); std::tuple getNextRangeToRead(); diff --git a/src/main/plan_printer.cpp b/src/main/plan_printer.cpp index 8290ffe9fc1..3710de14708 100644 --- a/src/main/plan_printer.cpp +++ b/src/main/plan_printer.cpp @@ -150,6 +150,10 @@ void OpProfileTree::printOpProfileBoxUpperFrame(uint32_t rowIdx, std::ostringstr oss << std::endl; } +static std::string dashedLineAccountingForIndex(uint32_t width, uint32_t indent) { + return std::string(width - (1 + indent) * 2, '-'); +} + void OpProfileTree::printOpProfileBoxes(uint32_t rowIdx, std::ostringstream& oss) const { auto height = calculateRowHeight(rowIdx); auto halfWayPoint = height / 2; @@ -164,12 +168,13 @@ void OpProfileTree::printOpProfileBoxes(uint32_t rowIdx, std::ostringstream& oss unsigned int numParams = opProfileBox->getNumParams(); if (i == 0) { textToPrint = opProfileBox->getOpName(); - } else if (i == 1) { - textToPrint = std::string(opProfileBoxWidth - (1 + INDENT_WIDTH) * 2, '-'); + } else if (i == 1) { // NOLINT(bugprone-branch-clone): Merging these branches is a + // logical error, and this conditional chain is pleasant. + textToPrint = dashedLineAccountingForIndex(opProfileBoxWidth, INDENT_WIDTH); } else if (i <= numParams + 1) { textToPrint = opProfileBox->getParamsName(i - 2); } else if ((i - numParams - 1) % 2) { - textToPrint = std::string(opProfileBoxWidth - (1 + INDENT_WIDTH) * 2, '-'); + textToPrint = dashedLineAccountingForIndex(opProfileBoxWidth, INDENT_WIDTH); } else { textToPrint = opProfileBox->getAttribute((i - numParams - 1) / 2 - 1); } diff --git a/src/main/query_result.cpp b/src/main/query_result.cpp index 4d96ef36079..488fcfb7f4a 100644 --- a/src/main/query_result.cpp +++ b/src/main/query_result.cpp @@ -91,7 +91,7 @@ std::vector> QueryResult::getColumnTypesInfo() con columnTypeInfo->childrenTypesInfo.push_back( DataTypeInfo::getInfoForDataType(LogicalType(LogicalTypeID::STRING), "_label")); auto numProperties = NodeVal::getNumProperties(value); - for (auto j = 0u; i < numProperties; j++) { + for (auto j = 0u; j < numProperties; j++) { auto name = NodeVal::getPropertyName(value, j); auto val = NodeVal::getPropertyVal(value, j); columnTypeInfo->childrenTypesInfo.push_back( @@ -104,7 +104,7 @@ std::vector> QueryResult::getColumnTypesInfo() con columnTypeInfo->childrenTypesInfo.push_back( DataTypeInfo::getInfoForDataType(LogicalType(LogicalTypeID::INTERNAL_ID), "_dst")); auto numProperties = RelVal::getNumProperties(value); - for (auto j = 0u; i < numProperties; j++) { + for (auto j = 0u; j < numProperties; j++) { auto name = NodeVal::getPropertyName(value, j); auto val = NodeVal::getPropertyVal(value, j); columnTypeInfo->childrenTypesInfo.push_back( diff --git a/src/optimizer/agg_key_dependency_optimizer.cpp b/src/optimizer/agg_key_dependency_optimizer.cpp index eae657dffd8..64d1394868e 100644 --- a/src/optimizer/agg_key_dependency_optimizer.cpp +++ b/src/optimizer/agg_key_dependency_optimizer.cpp @@ -67,16 +67,17 @@ AggKeyDependencyOptimizer::resolveKeysAndDependentKeys(const binder::expression_ for (auto& expression : keys) { if (expression->expressionType == PROPERTY) { auto propertyExpression = (binder::PropertyExpression*)expression.get(); - if (propertyExpression->isPrimaryKey() || propertyExpression->isInternalID()) { + if (propertyExpression->isPrimaryKey() || + propertyExpression->isInternalID()) { // NOLINT(bugprone-branch-clone): Collapsing + // is a logical error. groupExpressions.push_back(expression); } else if (primaryVarNames.contains(propertyExpression->getVariableName())) { dependentExpressions.push_back(expression); } else { groupExpressions.push_back(expression); } - } else if (ExpressionUtil::isNodeVariable(*expression)) { - dependentExpressions.push_back(expression); - } else if (ExpressionUtil::isRelVariable(*expression)) { + } else if (ExpressionUtil::isNodeVariable(*expression) || + ExpressionUtil::isRelVariable(*expression)) { dependentExpressions.push_back(expression); } else { groupExpressions.push_back(expression); diff --git a/src/processor/operator/persistent/copy_rel_columns.cpp b/src/processor/operator/persistent/copy_rel_columns.cpp index daa7c8b42b5..d5ab32fd8e8 100644 --- a/src/processor/operator/persistent/copy_rel_columns.cpp +++ b/src/processor/operator/persistent/copy_rel_columns.cpp @@ -26,6 +26,7 @@ void CopyRelColumns::executeInternal(ExecutionContext* context) { numBwdRows = countRelLists(sharedState->bwdRelData.get(), info.nbrOffsetPos); } assert(numFwdRows == numBwdRows); + (void)numBwdRows; // For clang-tidy, value calculated for debug assert. numRows += numFwdRows; } sharedState->incrementNumRows(numRows); diff --git a/src/processor/operator/persistent/reader/csv/base_csv_reader.cpp b/src/processor/operator/persistent/reader/csv/base_csv_reader.cpp index f1953d05769..a98ac0fbc79 100644 --- a/src/processor/operator/persistent/reader/csv/base_csv_reader.cpp +++ b/src/processor/operator/persistent/reader/csv/base_csv_reader.cpp @@ -5,6 +5,7 @@ #include "common/data_chunk/data_chunk.h" #include "common/exception/copy.h" #include "common/string_utils.h" +#include "common/system_message.h" #include "processor/operator/persistent/reader/csv/driver.h" using namespace kuzu::common; @@ -23,8 +24,8 @@ BaseCSVReader::BaseCSVReader(const std::string& filePath, const common::ReaderCo #endif ); if (fd == -1) { - throw CopyException( - StringUtils::string_format("Could not open file {}: {}", filePath, strerror(errno))); + throw CopyException(StringUtils::string_format( + "Could not open file {}: {}", filePath, posix_err_message())); } } @@ -196,7 +197,7 @@ bool BaseCSVReader::readBuffer(uint64_t* start) { uint64_t readCount = read(fd, buffer.get() + remaining, bufferReadSize); if (readCount == -1) { throw CopyException(StringUtils::string_format( - "Could not read from file {}: {}", filePath, strerror(errno))); + "Could not read from file {}: {}", filePath, posix_err_message())); } bufferSize = remaining + readCount; @@ -414,7 +415,7 @@ uint64_t BaseCSVReader::getFileOffset() const { if (offset == -1) { // LCOV_EXCL_START throw CopyException(StringUtils::string_format( - "Could not get current file position for file {}: {}", filePath, strerror(errno))); + "Could not get current file position for file {}: {}", filePath, posix_err_message())); // LCOV_EXCL_END } assert(offset >= bufferSize); @@ -429,7 +430,7 @@ uint64_t BaseCSVReader::getLineNumber() { if (lseek(fd, 0, SEEK_SET) == -1) { // LCOV_EXCL_START throw CopyException(StringUtils::string_format( - "Could not seek to beginning of file {}: {}", filePath, strerror(errno))); + "Could not seek to beginning of file {}: {}", filePath, posix_err_message())); // LCOV_EXCL_END } @@ -440,16 +441,13 @@ uint64_t BaseCSVReader::getLineNumber() { if (bytesRead == -1) { // LCOV_EXCL_START throw CopyException(StringUtils::string_format( - "Could not read from file {}: {}", filePath, strerror(errno))); + "Could not read from file {}: {}", filePath, posix_err_message())); // LCOV_EXCL_END } totalBytes += bytesRead; for (uint64_t i = 0; i < bytesRead; i++) { - if (buf[i] == '\n') { - lineNumber++; - carriageReturn = false; - } else if (carriageReturn) { + if (buf[i] == '\n' || carriageReturn) { lineNumber++; carriageReturn = false; } diff --git a/src/processor/operator/persistent/reader/csv/parallel_csv_reader.cpp b/src/processor/operator/persistent/reader/csv/parallel_csv_reader.cpp index baf64fbb3a8..0e194c48feb 100644 --- a/src/processor/operator/persistent/reader/csv/parallel_csv_reader.cpp +++ b/src/processor/operator/persistent/reader/csv/parallel_csv_reader.cpp @@ -2,6 +2,7 @@ #include "common/exception/copy.h" #include "common/string_utils.h" +#include "common/system_message.h" #include "processor/operator/persistent/reader/csv/driver.h" using namespace kuzu::common; @@ -46,7 +47,7 @@ void ParallelCSVReader::seekToBlockStart() { if (lseek(fd, currentBlockIdx * CopyConstants::PARALLEL_BLOCK_SIZE, SEEK_SET) == -1) { // LCOV_EXCL_START throw CopyException(StringUtils::string_format("Failed to seek to block {} in file {}: {}", - currentBlockIdx, filePath, strerror(errno))); + currentBlockIdx, filePath, posix_err_message())); // LCOV_EXCL_END } diff --git a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp index 6655bf934e3..604f93ba86f 100644 --- a/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp +++ b/src/processor/operator/persistent/reader/parquet/parquet_reader.cpp @@ -243,16 +243,6 @@ std::unique_ptr ParquetReader::createReaderRecursive(uint64_t dept bool isRepeated = repetition_type == FieldRepetitionType::REPEATED; bool isList = sEle.__isset.converted_type && sEle.converted_type == ConvertedType::LIST; bool isMap = sEle.__isset.converted_type && sEle.converted_type == ConvertedType::MAP; - bool isMapKv = - sEle.__isset.converted_type && sEle.converted_type == ConvertedType::MAP_KEY_VALUE; - if (!isMapKv && thisIdx > 0) { - // check if the parent node of this is a map - auto& pEle = metadata->schema[thisIdx - 1]; - bool parentIsMap = - pEle.__isset.converted_type && pEle.converted_type == ConvertedType::MAP; - bool parentHasChildren = pEle.__isset.num_children && pEle.num_children == 1; - isMapKv = parentIsMap && parentHasChildren; - } if (structFields.size() > 1 || (!isList && !isMap && !isRepeated)) { resultType = std::make_unique(common::LogicalTypeID::STRUCT, std::make_unique(std::move(structFields))); diff --git a/src/processor/operator/persistent/reader_functions.cpp b/src/processor/operator/persistent/reader_functions.cpp index c95aca40636..2b5b21bb957 100644 --- a/src/processor/operator/persistent/reader_functions.cpp +++ b/src/processor/operator/persistent/reader_functions.cpp @@ -1,6 +1,7 @@ #include "processor/operator/persistent/reader_functions.h" #include "common/exception/copy.h" +#include "common/system_message.h" #include using namespace kuzu::common; @@ -202,14 +203,14 @@ std::vector ReaderFunctions::countRowsInParallelCSVFile( for (const auto& path : config.filePaths) { int fd = open(path.c_str(), O_RDONLY); if (fd == -1) { - throw CopyException( - StringUtils::string_format("Failed to open file {}: {}", path, strerror(errno))); + throw CopyException(StringUtils::string_format( + "Failed to open file {}: {}", path, posix_err_message())); } uint64_t length = lseek(fd, 0, SEEK_END); close(fd); if (length == -1) { throw CopyException(StringUtils::string_format( - "Failed to seek to end of file {}: {}", path, strerror(errno))); + "Failed to seek to end of file {}: {}", path, posix_err_message())); } auto reader = make_unique(path, config); row_idx_t numRowsInFile = reader->countRows(); diff --git a/src/processor/operator/scan_node_id.cpp b/src/processor/operator/scan_node_id.cpp index 2888319c34b..569c3e65a71 100644 --- a/src/processor/operator/scan_node_id.cpp +++ b/src/processor/operator/scan_node_id.cpp @@ -24,6 +24,15 @@ std::pair NodeTableScanState::getNextRangeToRead() { return std::make_pair(startOffset, startOffset + range); } +void ScanNodeIDSharedState::initialize(transaction::Transaction* transaction) { + auto numMask = tableStates[0]->getSemiMask()->getNumMasks(); + for (auto& tableState : tableStates) { + assert(tableState->getSemiMask()->getNumMasks() == numMask); + tableState->initializeMaxOffset(transaction); + } + (void)numMask; // For clang-tidy: used for assert. +} + std::tuple ScanNodeIDSharedState::getNextRangeToRead() { std::unique_lock lck{mtx}; if (currentStateIdx == tableStates.size()) { diff --git a/src/processor/result/factorized_table.cpp b/src/processor/result/factorized_table.cpp index 5f17924790e..c7ba0299250 100644 --- a/src/processor/result/factorized_table.cpp +++ b/src/processor/result/factorized_table.cpp @@ -287,8 +287,7 @@ void FactorizedTable::copyToInMemList(ft_col_idx_t colIdx, std::vector& tupleIdxesToRead, uint8_t* data, NullMask* nullMask, uint64_t startElemPosInList, DiskOverflowFile* overflowFileOfInMemList, const LogicalType& type) const { - auto column = tableSchema->getColumn(colIdx); - assert(column->isFlat() == true); + assert(tableSchema->getColumn(colIdx)->isFlat() == true); auto numBytesPerValue = type.getLogicalTypeID() == LogicalTypeID::INTERNAL_ID ? sizeof(offset_t) : LogicalTypeUtils::getRowLayoutSize(type); diff --git a/src/storage/buffer_manager/vm_region.cpp b/src/storage/buffer_manager/vm_region.cpp index 5bda4c80f27..ec5658bcd0a 100644 --- a/src/storage/buffer_manager/vm_region.cpp +++ b/src/storage/buffer_manager/vm_region.cpp @@ -1,6 +1,7 @@ #include "storage/buffer_manager/vm_region.h" #include "common/string_utils.h" +#include "common/system_message.h" #ifdef _WIN32 #include @@ -71,17 +72,18 @@ void VMRegion::releaseFrame(frame_idx_t frameIdx) { // See https://arvid.io/2018/04/02/memory-mapping-on-windows/#1 // Not sure what the differences are if (!VirtualFree(getFrame(frameIdx), frameSize, MEM_DECOMMIT)) { + auto code = GetLastError(); throw BufferManagerException(StringUtils::string_format( "Releasing physical memory associated with a frame failed with error code {}: {}.", - GetLastError(), std::system_category().message(GetLastError()))); + code, system_err_message(code))); } #else int error = madvise(getFrame(frameIdx), frameSize, MADV_DONTNEED); if (error != 0) { - throw BufferManagerException( - "Releasing physical memory associated with a frame failed with error code " + - std::to_string(error) + ": " + std::string(std::strerror(errno))); + throw BufferManagerException(StringUtils::string_format( + "Releasing physical memory associated with a frame failed with error code {}: {}.", + error, posix_err_message())); } #endif } diff --git a/src/storage/stats/node_table_statistics.cpp b/src/storage/stats/node_table_statistics.cpp index ab48dc25321..a285c6588ff 100644 --- a/src/storage/stats/node_table_statistics.cpp +++ b/src/storage/stats/node_table_statistics.cpp @@ -113,7 +113,7 @@ void NodeTableStatsAndDeletedIDs::setDeletedNodeOffsetsForMorsel( auto itr = deletedNodeOffsets.begin(); sel_t nextDeletedNodeOffset = *itr - morselBeginOffset; uint64_t nextSelectedPosition = 0; - for (sel_t pos = 0; pos < nodeOffsetVector->state->getOriginalSize(); ++pos) { + for (auto pos = 0; pos < nodeOffsetVector->state->getOriginalSize(); ++pos) { if (pos == nextDeletedNodeOffset) { itr++; if (itr == deletedNodeOffsets.end()) { diff --git a/src/storage/storage_structure/lists/lists_update_store.cpp b/src/storage/storage_structure/lists/lists_update_store.cpp index 82f04fdaad5..217659e4f53 100644 --- a/src/storage/storage_structure/lists/lists_update_store.cpp +++ b/src/storage/storage_structure/lists/lists_update_store.cpp @@ -322,9 +322,7 @@ ft_col_idx_t ListsUpdatesStore::getColIdxInFT(ListFileID& listFileID) const { void ListsUpdatesStore::initListsUpdatesPerTablePerDirection() { listsUpdatesPerDirection.clear(); - for (auto direction : RelDataDirectionUtils::getRelDataDirections()) { - listsUpdatesPerDirection.emplace_back(); - } + listsUpdatesPerDirection.resize(RelDataDirectionUtils::getRelDataDirections().size()); } ListsUpdatesForNodeOffset* ListsUpdatesStore::getOrCreateListsUpdatesForNodeOffset( diff --git a/src/storage/store/column_chunk.cpp b/src/storage/store/column_chunk.cpp index 34034eceab1..346a6f1a612 100644 --- a/src/storage/store/column_chunk.cpp +++ b/src/storage/store/column_chunk.cpp @@ -322,12 +322,10 @@ uint32_t ColumnChunk::getDataTypeSizeInChunk(LogicalType& dataType) { case LogicalTypeID::STRING: { return sizeof(ku_string_t); } + case LogicalTypeID::INTERNAL_ID: case LogicalTypeID::VAR_LIST: { return sizeof(offset_t); } - case LogicalTypeID::INTERNAL_ID: { - return sizeof(offset_t); - } case LogicalTypeID::SERIAL: { return sizeof(int64_t); } diff --git a/tools/benchmark/main.cpp b/tools/benchmark/main.cpp index db940e105ed..48264377f46 100644 --- a/tools/benchmark/main.cpp +++ b/tools/benchmark/main.cpp @@ -38,16 +38,16 @@ int main(int argc, char** argv) { config->bufferPoolSize = (uint64_t)stoull(getArgumentValue(arg)) << 20; } else { printf("Unrecognized option %s", arg.c_str()); - exit(1); + return 1; } } if (datasetPath.empty()) { printf("Missing --dataset input."); - exit(1); + return 1; } if (benchmarkPath.empty()) { printf("Missing --benchmark input"); - exit(1); + return 1; } auto runner = BenchmarkRunner(datasetPath, std::move(config)); try { diff --git a/tools/java_api/src/jni/kuzu_java.cpp b/tools/java_api/src/jni/kuzu_java.cpp index d234cf906a7..4bd6687fcec 100644 --- a/tools/java_api/src/jni/kuzu_java.cpp +++ b/tools/java_api/src/jni/kuzu_java.cpp @@ -6,10 +6,10 @@ #include "com_kuzudb_KuzuNative.h" #include "common/exception/exception.h" #include "common/types/types.h" -#include "common/types/value/value.h" #include "common/types/value/nested.h" #include "common/types/value/node.h" #include "common/types/value/rel.h" +#include "common/types/value/value.h" #include "json.hpp" #include "main/kuzu.h" #include "main/query_summary.h" @@ -152,8 +152,8 @@ void javaMapToCPPMap( * All Database native functions */ -JNIEXPORT jlong JNICALL Java_com_kuzudb_KuzuNative_kuzu_1database_1init( - JNIEnv* env, jclass, jstring database_path, jlong buffer_pool_size, jboolean enable_compression) { +JNIEXPORT jlong JNICALL Java_com_kuzudb_KuzuNative_kuzu_1database_1init(JNIEnv* env, jclass, + jstring database_path, jlong buffer_pool_size, jboolean enable_compression) { const char* path = env->GetStringUTFChars(database_path, JNI_FALSE); uint64_t buffer = static_cast(buffer_pool_size); @@ -887,7 +887,6 @@ JNIEXPORT jobject JNICALL Java_com_kuzudb_KuzuNative_kuzu_1value_1get_1value( return ret; } case LogicalTypeID::DATE: { - jclass retClass = env->FindClass("java/time/LocalDate"); date_t date = v->getValue(); jclass ldClass = env->FindClass("java/time/LocalDate"); jmethodID ofEpochDay = diff --git a/tools/nodejs_api/CMakeLists.txt b/tools/nodejs_api/CMakeLists.txt index 55421c56a74..04fbd826920 100644 --- a/tools/nodejs_api/CMakeLists.txt +++ b/tools/nodejs_api/CMakeLists.txt @@ -15,18 +15,18 @@ else() endif() execute_process( - COMMAND ${NPX_CMD} cmake-js print-cmakejs-include - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND ${NPX_CMD} cmake-js print-cmakejs-include + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} OUTPUT_VARIABLE CMAKE_JS_INC ) execute_process( - COMMAND ${NPX_CMD} cmake-js print-cmakejs-lib - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + COMMAND ${NPX_CMD} cmake-js print-cmakejs-lib + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} OUTPUT_VARIABLE CMAKE_JS_LIB ) execute_process( COMMAND ${NPX_CMD} cmake-js print-cmakejs-src - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} OUTPUT_VARIABLE CMAKE_JS_SRC ) @@ -48,10 +48,10 @@ file(COPY ${JS_SOURCE_FILES} DESTINATION "${CMAKE_CURRENT_SOURCE_DIR}/build") add_library(${PROJECT_NAME} SHARED ${CPP_SOURCE_FILES} ${CMAKE_JS_SRC}) set_target_properties(${PROJECT_NAME} PROPERTIES PREFIX "" SUFFIX ".node") -set_target_properties(${PROJECT_NAME} +set_target_properties(${PROJECT_NAME} PROPERTIES RUNTIME_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/build" - LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/build" + LIBRARY_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/build" ARCHIVE_OUTPUT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/build") if(APPLE) target_link_options(${PROJECT_NAME} PRIVATE -undefined dynamic_lookup) diff --git a/tools/shell/embedded_shell.cpp b/tools/shell/embedded_shell.cpp index 008000c78cc..b6babf80646 100644 --- a/tools/shell/embedded_shell.cpp +++ b/tools/shell/embedded_shell.cpp @@ -162,7 +162,6 @@ void highlight(char* buffer, char* resultBuf, uint32_t maxLen, uint32_t cursorPo } lineLen -= thisChar; buf = buf.substr(thisChar, lineLen); - bufLen = buf.length(); } else if (buf.length() > maxLen) { uint32_t counter = 0; uint32_t lineLen = 0; @@ -171,7 +170,6 @@ void highlight(char* buffer, char* resultBuf, uint32_t maxLen, uint32_t cursorPo lineLen = utf8proc_next_grapheme(buffer, bufLen, lineLen); } buf = buf.substr(0, lineLen); - bufLen = buf.length(); } for (auto i = 0u; i < buf.length(); i++) { if ((buf[i] != ' ' && !word.empty() && word[0] == ' ') ||