From 1df08af6c293f0d64c6da5661b38924067597170 Mon Sep 17 00:00:00 2001 From: Chang Liu Date: Sat, 7 Jan 2023 00:56:12 -0500 Subject: [PATCH 1/2] Build Arrow from source --- .gitignore | 1 + CMakeLists.txt | 14 +++++---- Makefile | 20 ++++++++++--- README.md | 2 +- benchmark/Dockerfile | 6 +--- external/CMakeLists.txt | 21 ++++++++++++++ external/arrow/apache_arrow.cmake | 29 +++++++++++++++++++ scripts/dockerized-ci-tests-runner/Dockerfile | 5 ---- src/CMakeLists.txt | 3 +- 9 files changed, 79 insertions(+), 22 deletions(-) create mode 100644 external/CMakeLists.txt create mode 100644 external/arrow/apache_arrow.cmake diff --git a/.gitignore b/.gitignore index f15ca36039..4b7196f0ca 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,4 @@ __pycache__/ *.py[cod] *$py.class cmake-build-debug/ +test/unittest_temp/ diff --git a/CMakeLists.txt b/CMakeLists.txt index 69b1f5bb4f..33f5b9339f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,9 +5,6 @@ project(Kuzu VERSION 0.0.1.1 LANGUAGES CXX) find_package(Threads REQUIRED) set(CMAKE_FIND_PACKAGE_RESOLVE_SYMLINKS TRUE) -find_package(Arrow REQUIRED) -find_package(Parquet REQUIRED) - set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED True) set(CMAKE_POSITION_INDEPENDENT_CODE ON) @@ -66,8 +63,15 @@ endfunction() add_definitions(-DKUZU_ROOT_DIRECTORY="${PROJECT_SOURCE_DIR}") -set(ARROW_INSTALL ${PROJECT_SOURCE_DIR}/external/build/arrow/install) +set(ARROW_INSTALL ${CMAKE_CURRENT_SOURCE_DIR}/external/build/arrow/install) +add_library(arrow_deps STATIC IMPORTED) +set_target_properties(arrow_deps PROPERTIES IMPORTED_LOCATION ${ARROW_INSTALL}/lib/libarrow_bundled_dependencies.a) +add_library(parquet_lib STATIC IMPORTED) +set_target_properties(parquet_lib PROPERTIES IMPORTED_LOCATION ${ARROW_INSTALL}/lib/libparquet.a) +add_library(arrow_lib STATIC IMPORTED) +set_target_properties(arrow_lib PROPERTIES IMPORTED_LOCATION ${ARROW_INSTALL}/lib/libarrow.a) include_directories(${ARROW_INSTALL}/include) + include_directories(src/include) include_directories(third_party/antlr4_cypher/include) include_directories(third_party/antlr4_runtime/src) @@ -75,8 +79,6 @@ include_directories(third_party/spdlog) include_directories(third_party/nlohmann_json) include_directories(third_party/utf8proc/include) include_directories(third_party/pybind11/include) -include_directories(${ARROW_INCLUDE_DIR}) -include_directories(${PARQUET_INCLUDE_DIR}) add_subdirectory(third_party) add_subdirectory(src) diff --git a/Makefile b/Makefile index e43e1512c9..656caec1c4 100644 --- a/Makefile +++ b/Makefile @@ -1,4 +1,4 @@ -.PHONY: all release debug test clean +.PHONY: all release debug test clean arrow clean-external clean-all all: release @@ -27,22 +27,34 @@ ifeq ($(UBSAN), 1) SANITIZER_FLAG=-DENABLE_ADDRESS_SANITIZER=FALSE -DENABLE_THREAD_SANITIZER=TRUE -DENABLE_UBSAN=TRUE endif -release: +arrow: + cd external && \ + mkdir -p build && \ + cd build && \ + cmake $(GENERATOR) $(FORCE_COLOR) $(SANITIZER_FLAG) -DCMAKE_BUILD_TYPE=Release ../ && \ + cmake --build . --config Release -- -j $(NUM_THREADS) + +release: arrow mkdir -p build/release && \ cd build/release && \ cmake $(GENERATOR) $(FORCE_COLOR) $(SANITIZER_FLAG) -DCMAKE_BUILD_TYPE=Release ../.. && \ cmake --build . --config Release -- -j $(NUM_THREADS) -debug: +debug: arrow mkdir -p build/debug && \ cd build/debug && \ cmake $(GENERATOR) $(FORCE_COLOR) $(SANITIZER_FLAG) -DCMAKE_BUILD_TYPE=Debug ../.. && \ cmake --build . --config Debug -- -j $(NUM_THREADS) -test: +test: release cd $(ROOT_DIR)/build/release/test && \ ctest +clean-external: + rm -rf external/build + clean: rm -rf build + +clean-all: clean-external clean diff --git a/README.md b/README.md index ad8f746a0a..cb8a39e026 100644 --- a/README.md +++ b/README.md @@ -17,7 +17,7 @@ Kùzu is an in-process property graph database management system (GDBMS) built f Kùzu is being actively developed at University of Waterloo as a feature-rich and usable GDBMS. Kùzu is available under a permissible license. So try it out and help us make it better! We welcome your feedback and feature requests. ## Build -To build from source code, Kùzu requires Cmake(>=3.11), Python 3, [Apache Arrow](https://arrow.apache.org/), [xsimd](https://xsimd.readthedocs.io/en/latest/), and a compiler that supports `C++20`. +To build from source code, Kùzu requires Cmake(>=3.11), Python 3, and a compiler that supports `C++20`. - Perform a full clean build - `make clean && make` - Run tests (optional) diff --git a/benchmark/Dockerfile b/benchmark/Dockerfile index 6561bb0629..993841019a 100644 --- a/benchmark/Dockerfile +++ b/benchmark/Dockerfile @@ -4,11 +4,7 @@ ENV CSV_DIR /csv ENV SERIALIZED_DIR /serialized ENV DEBIAN_FRONTEND=noninteractive RUN apt-get update && apt-get install -y --no-install-recommends apt-utils -RUN apt-get update && apt-get install -y lsb-release wget -RUN wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb -RUN apt-get install -y ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb -RUN rm ./apache-arrow* -RUN apt-get update && apt-get -y install python3-dev python3-pip python-is-python3 cmake nodejs jq curl apt-transport-https gnupg sudo git libarrow-dev libparquet-dev +RUN apt-get update && apt-get -y install python3-dev python3-pip python-is-python3 cmake nodejs jq curl apt-transport-https gnupg sudo git RUN pip3 install requests psutil RUN mkdir -p $CSV_DIR $SERIALIZED_DIR diff --git a/external/CMakeLists.txt b/external/CMakeLists.txt new file mode 100644 index 0000000000..1aa5aba9b6 --- /dev/null +++ b/external/CMakeLists.txt @@ -0,0 +1,21 @@ +cmake_minimum_required(VERSION 3.11) + +project(Kuzu_external VERSION 0.0.1 LANGUAGES CXX) + +find_package(Threads REQUIRED) + +set(CMAKE_FIND_PACKAGE_RESOLVE_SYMLINKS TRUE) +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED True) +set(CMAKE_POSITION_INDEPENDENT_CODE ON) +find_program(CCACHE_PROGRAM ccache) +if (CCACHE_PROGRAM) + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}") +else () + find_program(CCACHE_PROGRAM sccache) + if (CCACHE_PROGRAM) + set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}") + endif () +endif () + +include(arrow/apache_arrow.cmake) diff --git a/external/arrow/apache_arrow.cmake b/external/arrow/apache_arrow.cmake new file mode 100644 index 0000000000..0765df14e1 --- /dev/null +++ b/external/arrow/apache_arrow.cmake @@ -0,0 +1,29 @@ +include(ExternalProject) + +set(CMAKE_POSITION_INDEPENDENT_CODE ON) + +ExternalProject_Add(apache_arrow + GIT_REPOSITORY "https://github.com/apache/arrow" + GIT_TAG 89f9a0948961f6e94f1ef5e4f310b707d22a3c11 + PREFIX "${CMAKE_BINARY_DIR}/arrow/" + INSTALL_DIR "${CMAKE_BINARY_DIR}/arrow/install" + CONFIGURE_COMMAND + ${CMAKE_COMMAND} -G${CMAKE_GENERATOR} -DCMAKE_BUILD_TYPE=Release + -DBUILD_WARNING_LEVEL=PRODUCTION -DARROW_DEPENDENCY_SOURCE=BUNDLED + -DCMAKE_CXX_COMPILER_LAUNCHER=${CMAKE_CXX_COMPILER_LAUNCHER} + -DCMAKE_CXX_FLAGS=${CMAKE_CXX_FLAGS} + -DCMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD} + -DCMAKE_C_COMPILER_LAUNCHER=${CMAKE_C_COMPILER_LAUNCHER} + -DCMAKE_INSTALL_PREFIX=${CMAKE_BINARY_DIR}/arrow/install + -DCMAKE_MODULE_PATH=${CMAKE_MODULE_PATH} + -DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE} -DARROW_ALTIVEC=OFF + -DARROW_USE_CCACHE=ON -DARROW_BOOST_USE_SHARED=OFF -DARROW_BUILD_SHARED=OFF + -DARROW_BUILD_STATIC=ON -DARROW_COMPUTE=OFF -DARROW_CSV=ON -DARROW_IPC=ON -DARROW_JEMALLOC=OFF -DARROW_JSON=OFF + -DARROW_PARQUET=ON -DARROW_SIMD_LEVEL=NONE -DARROW_RUNTIME_SIMD_LEVEL=NONE -DARROW_WITH_BROTLI=OFF + -DARROW_WITH_LZ4=ON -Dlz4_SOURCE=BUNDLED -DARROW_WITH_PROTOBUF=OFF -DARROW_WITH_RAPIDJSON=OFF + -DARROW_WITH_SNAPPY=ON -DSnappy_SOURCE=BUNDLED -DARROW_WITH_ZLIB=ON -DZLIB_SOURCE=BUNDLED + -DARROW_WITH_ZSTD=ON -Dzstd_SOURCE=BUNDLED -DThrift_SOURCE=BUNDLED -DBOOST_SOURCE=BUNDLED -DARROW_WITH_RE2=OFF + -DARROW_WITH_UTF8PROC=OFF -DARROW_BUILD_BENCHMARKS=OFF -DARROW_BUILD_EXAMPLES=OFF -DARROW_BUILD_INTEGRATION=OFF + -DARROW_BUILD_UTILITIES=OFF -DARROW_BUILD_TESTS=OFF -DARROW_ENABLE_TIMING_TESTS=OFF -DARROW_FUZZING=OFF + /cpp + UPDATE_COMMAND "") diff --git a/scripts/dockerized-ci-tests-runner/Dockerfile b/scripts/dockerized-ci-tests-runner/Dockerfile index fc09149bf3..bf46eb525a 100644 --- a/scripts/dockerized-ci-tests-runner/Dockerfile +++ b/scripts/dockerized-ci-tests-runner/Dockerfile @@ -12,11 +12,6 @@ RUN apt-get install -y clang-13 RUN apt-get install -y nodejs RUN apt-get install -y jq RUN apt-get install -y ca-certificates lsb-release wget -RUN wget https://apache.jfrog.io/artifactory/arrow/$(lsb_release --id --short | tr 'A-Z' 'a-z')/apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb -RUN apt-get install -y ./apache-arrow-apt-source-latest-$(lsb_release --codename --short).deb -RUN rm ./apache-arrow* -RUN apt-get update -RUN apt-get install -y libarrow-dev libparquet-dev # Install GitHub action runner RUN mkdir /actions-runner diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7e7f1a0f47..6752630149 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -11,7 +11,8 @@ add_subdirectory(storage) add_subdirectory(transaction) add_library(kuzu STATIC ${ALL_OBJECT_FILES}) -target_link_libraries(kuzu PUBLIC antlr4_cypher antlr4_runtime utf8proc Threads::Threads Arrow::arrow_shared parquet_shared) +target_link_libraries(kuzu PUBLIC antlr4_cypher antlr4_runtime utf8proc + Threads::Threads parquet_lib arrow_lib arrow_deps) target_include_directories( kuzu PUBLIC $ $) From e47c8c7afc5c9dd164d1090965757b9800bf91f0 Mon Sep 17 00:00:00 2001 From: Chang Liu Date: Sat, 7 Jan 2023 01:37:31 -0500 Subject: [PATCH 2/2] Modify CI test Docker --- .github/workflows/ci-workflow.yml | 10 +++---- scripts/dockerized-ci-tests-runner/Dockerfile | 29 ++++++++----------- scripts/dockerized-ci-tests-runner/start.sh | 2 +- 3 files changed, 18 insertions(+), 23 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 866424befc..e54e7279f5 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -27,13 +27,13 @@ jobs: runs-on: kuzu-self-hosted-testing steps: - uses: actions/checkout@v2 - - run: pip install -r tools/python_api/requirements_dev.txt + - run: pip3 install --user -r tools/python_api/requirements_dev.txt - name: build - run: CC=clang-13 CXX=clang++-13 make release NUM_THREADS=32 + run: CC=clang-14 CXX=clang++-14 make release NUM_THREADS=32 - name: test - run: CC=clang-13 CXX=clang++-13 make test + run: CC=clang-14 CXX=clang++-14 make test clang-formatting-check: name: clang-formatting-check @@ -44,8 +44,8 @@ jobs: with: repository: Sarcasm/run-clang-format path: run-clang-format - - run: python3 run-clang-format/run-clang-format.py --clang-format-executable /usr/bin/clang-format -r src/ - - run: python3 run-clang-format/run-clang-format.py --clang-format-executable /usr/bin/clang-format -r test/ + - run: python3 run-clang-format/run-clang-format.py --clang-format-executable /usr/bin/clang-format-11 -r src/ + - run: python3 run-clang-format/run-clang-format.py --clang-format-executable /usr/bin/clang-format-11 -r test/ benchmark: name: benchmark diff --git a/scripts/dockerized-ci-tests-runner/Dockerfile b/scripts/dockerized-ci-tests-runner/Dockerfile index bf46eb525a..baf3bf0139 100644 --- a/scripts/dockerized-ci-tests-runner/Dockerfile +++ b/scripts/dockerized-ci-tests-runner/Dockerfile @@ -1,26 +1,21 @@ -FROM python:3.9 - -ENV RUNNER_ALLOW_RUNASROOT=1 +FROM ubuntu:22.04 +ENV DEBIAN_FRONTEND=noninteractive # Install dependencies -RUN apt-get update -RUN apt-get install -y apt-transport-https curl gnupg -RUN apt-get install -y cmake -RUN apt-get install -y clang -RUN apt-get install -y clang-format -RUN apt-get install -y clang-13 -RUN apt-get install -y nodejs -RUN apt-get install -y jq -RUN apt-get install -y ca-certificates lsb-release wget +RUN apt-get update && apt-get install -y --no-install-recommends apt-utils +RUN apt-get update && apt-get install -y g++ gcc clang-14 python3-dev python3-pip python-is-python3 cmake nodejs jq curl apt-transport-https gnupg sudo git clang-format-11 ca-certificates lsb-release wget + +RUN useradd --create-home runner +USER runner -# Install GitHub action runner -RUN mkdir /actions-runner -WORKDIR /actions-runner -RUN curl -o actions-runner-linux-x64-2.298.2.tar.gz -L https://github.com/actions/runner/releases/download/v2.298.2/actions-runner-linux-x64-2.298.2.tar.gz +RUN mkdir /home/runner/actions-runner +WORKDIR /home/runner/actions-runner +RUN curl -o actions-runner-linux-x64-2.298.2.tar.gz -L\ + https://github.com/actions/runner/releases/download/v2.298.2/actions-runner-linux-x64-2.298.2.tar.gz RUN echo "0bfd792196ce0ec6f1c65d2a9ad00215b2926ef2c416b8d97615265194477117 actions-runner-linux-x64-2.298.2.tar.gz" | shasum -a 256 RUN tar xzf ./actions-runner-linux-x64-2.298.2.tar.gz -COPY start.sh start.sh +COPY --chown=runner:runner start.sh start.sh RUN chmod +x start.sh ENTRYPOINT ["./start.sh"] diff --git a/scripts/dockerized-ci-tests-runner/start.sh b/scripts/dockerized-ci-tests-runner/start.sh index a0d2d4fcf2..0cf792e16f 100644 --- a/scripts/dockerized-ci-tests-runner/start.sh +++ b/scripts/dockerized-ci-tests-runner/start.sh @@ -1,6 +1,6 @@ #!/bin/bash -cd /actions-runner +cd /home/runner/actions-runner # Get registration token REG_TOKEN=$(curl \