Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CMake Python wheel pipeline for macOS and Linux #1240

Merged
merged 1 commit into from
Feb 6, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/linux-wheel-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@ jobs:
build-linux-wheels:
runs-on: self-hosted-linux-building
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3

- name: build-wheels
- name: Build Wheels
working-directory: ./scripts/pip-package/
run: bash build_all_packages.sh

Expand Down
16 changes: 8 additions & 8 deletions .github/workflows/mac-wheel-workflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,18 @@ env:

jobs:
build-wheels:
runs-on: self-hosted-mac
runs-on: self-hosted-mac-arm
steps:
- uses: actions/checkout@v2
- name: create-source-distribution
- uses: actions/checkout@v3
- name: Create source distribution
working-directory: ./scripts/pip-package/
run: |
rm -rf wheelhouse kuzu.tar.gz
mkdir wheelhouse
bash package_tar.sh

- name: build-arm-wheels
uses: pypa/cibuildwheel@v2.11.1
- name: Build wheels for Apple Silicon
uses: pypa/cibuildwheel@v2.12.0
env:
CIBW_SKIP: pp*
CIBW_ARCHS_MACOS: arm64
Expand All @@ -34,10 +34,10 @@ jobs:
package-dir: ./scripts/pip-package/kuzu.tar.gz
output-dir: ./scripts/pip-package/wheelhouse

- name: build-x86-wheel
uses: pypa/cibuildwheel@v2.11.1
- name: Build wheels for Intel
uses: pypa/cibuildwheel@v2.12.0
env:
CIBW_SKIP: pp*
CIBW_SKIP: pp* cp36*
CIBW_ARCHS_MACOS: x86_64
CIBW_BUILD_VERBOSITY: 3
MACOSX_DEPLOYMENT_TARGET: 10.15
Expand Down
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,7 @@ scripts/antlr4/antlr4.jar

# macOS
.DS_Store

# Archive files
*.zip
*.tar.gz
8 changes: 8 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,14 @@ set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED True)
set(CMAKE_POSITION_INDEPENDENT_CODE ON)

if(DEFINED ENV{PYBIND11_PYTHON_VERSION})
set(PYBIND11_PYTHON_VERSION $ENV{PYBIND11_PYTHON_VERSION})
endif()

if(DEFINED ENV{PYTHON_EXECUTABLE})
set(PYTHON_EXECUTABLE $ENV{PYTHON_EXECUTABLE})
endif()

find_program(CCACHE_PROGRAM ccache)
if (CCACHE_PROGRAM)
set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE "${CCACHE_PROGRAM}")
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<div align="center">
<img src="/logo/kuzu-logo.png" height="100">
<img src="https://kuzudb.com/kuzu-logo.png" height="100">
</div>

# Kùzu
Expand Down
10 changes: 1 addition & 9 deletions scripts/pip-package/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,11 @@ FROM quay.io/pypa/manylinux2014_x86_64

ENV RUNNER_ALLOW_RUNASROOT=1

# Configure bazel
RUN curl https://copr.fedorainfracloud.org/coprs/vbatts/bazel/repo/epel-7/vbatts-bazel-epel-7.repo > /etc/yum.repos.d/vbatts-bazel-epel-7.repo

# Install dependencies
RUN yum --disablerepo=epel -y upgrade curl ca-certificates
RUN yum update -y
RUN yum install -y java-11-openjdk-devel
RUN yum install -y bazel4
RUN yum install -y nodejs
RUN yum install -y jq
RUN yum install -y git
RUN yum install -y perl-Digest-SHA
RUN yum -y install libicu libicu-devel
RUN yum install -y cmake nodejs jq git perl-Digest-SHA libicu libicu-devel

# Install GitHub action runner
RUN mkdir /actions-runner
Expand Down
13 changes: 6 additions & 7 deletions scripts/pip-package/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,14 @@ The self-hosted runner needs to be properly configured for the pipeline to run.

- OS requirement: for support for C++20 and proper cross-compilation for ARM64, Xcode 13+ and macOS 11+ is required. If the hardware does not support macOS 11 officially, consider using [OpenCore Legacy Patcher](https://dortania.github.io/OpenCore-Legacy-Patcher/).
- Machine configurations:
- Username: The username must be set to `runner` to be consistent with the GitHub-hosted runners. Otherwise, cibuildwheel configuration step will fail due to writing to directories that does not exist.
- `sudo` without password: The `runner` user needs to have the permission to `sudo` without password. Otherwise, cibuildwheel will not be able to install Python automatically due to not able to take user input for the password. To enable `sudo` without password, create a file (with arbitrary name) under `/private/etc/sudoers.d/` and add `runner ALL = (ALL) NOPASSWD: ALL` to it.
- Keep the machine from going to sleep automatically: under System Preferences > Energy Saver, check "Prevent your Mac from sleeping automatically when the display is off" and uncheck "Put hard disks to sleep when possible".
- Automatic login: the GitHub self-hosted runner service on macOS is configured for the user space only. For the listener to be back online automatically after each reboot without manually logging in, automatic login should be turned on for `runner` user. Please follow [this instruction](https://support.apple.com/en-us/HT201476) to configure it.
- For the ease of remote management, consider enabling `sshd` and configure a DDNS service to keep the hostname updated with the correct IP address.
- [REQUIRED] Username: The username must be set to `runner` to be consistent with the GitHub-hosted runners. Otherwise, cibuildwheel configuration step will fail due to writing to directories that does not exist.
- [REQUIRED] `sudo` without password: The `runner` user needs to have the permission to `sudo` without password. Otherwise, cibuildwheel will not be able to install Python automatically due to not able to take user input for the password. To enable `sudo` without password, create a file (with arbitrary name) under `/private/etc/sudoers.d/` and add `runner ALL = (ALL) NOPASSWD: ALL` to it.
- [REQUIRED] Keep the machine from going to sleep automatically: under System Preferences > Energy Saver, check "Prevent your Mac from sleeping automatically when the display is off" and uncheck "Put hard disks to sleep when possible". Alternatively, sleep can be disabled with `sudo pmset disablesleep 1`.
- [OPTIONAL] Automatic login: the GitHub self-hosted runner service on macOS is configured for the user space only. For the listener to be back online automatically after each reboot without manually logging in, automatic login should be turned on for `runner` user. Please follow [this instruction](https://support.apple.com/en-us/HT201476) to configure it.
- [OPTIONAL] For the ease of remote management, consider enabling `sshd` and configure a DDNS service to keep the hostname updated with the correct IP address.
- Dependencies installation:
- Xcode toolchain: after installing Xcode, run `xcode-select --install` to install Xcode Command Line Tools.
- Homebrew: follow the instructions on [brew.sh](https://brew.sh) to install it.
- Bazel: `brew install bazel`.
- OpenJDK-11: Installing bazel via Homebrew should also install it as a dependency automatically. However, For it to be detected by macOS, we need to run `sudo ln -sfn /usr/local/opt/openjdk@11/libexec/openjdk.jdk /Library/Java/JavaVirtualMachines/openjdk-11.jdk`.
- CMake: `brew install cmake`.
- Pipx: `brew install pipx` and `pipx ensurepath`.
- Github self-hosted runners configuration: follow [this documentation](https://docs.github.com/en/actions/hosting-your-own-runners/adding-self-hosted-runners) to add the self-hosted runner and [this documentation](https://docs.github.com/en/actions/hosting-your-own-runners/configuring-the-self-hosted-runner-application-as-a-service) for configuring self-hosted runner as a service.
5 changes: 2 additions & 3 deletions scripts/pip-package/build_all_packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,12 @@ chmod +x ./package_tar.sh
rm -rf wheelhouse kuzu.tar.gz && ./package_tar.sh
mkdir wheelhouse

# Build wheels, excluding pypy platforms
# Build wheels, excluding pypy platforms and python 3.6
for PYBIN in /opt/python/*/bin; do
if [[ $PYBIN == *"pypy"* ]]; then
if [[ $PYBIN == *"pypy"* ]] || [[ $PYBIN == *"cp36"* ]]; then
continue
fi
echo "Building wheel for $PYBIN..."
"${PYBIN}/pip" install -r ../../tools/python_api/requirements_dev.txt
"${PYBIN}/pip" wheel kuzu.tar.gz --no-deps -w wheelhouse/
done

Expand Down
1 change: 0 additions & 1 deletion scripts/pip-package/kuzu/__init__.py

This file was deleted.

20 changes: 18 additions & 2 deletions scripts/pip-package/package_tar.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
#!/bin/bash
# Remove existing tar.gz
rm -rf kuzu.tar.gz

# Collect source files
# Add necessary files and directories
mkdir -p kuzu
cp ../../LICENSE ./LICENSE.txt
cp ./README.md ./README_PYTHON_BUILD.md
cp ../../README.md ./README.md

# Collect source files
tar --exclude="$(pwd)" \
--exclude="./build" \
--exclude="./scripts" \
Expand All @@ -15,13 +22,22 @@ rm -rf kuzu-source.tar

# Add all files under current directory
touch sdist.tar
tar -cf sdist.tar --exclude=./sdist.tar .
tar -cf sdist.tar \
--exclude=./sdist.tar \
--exclude="./.?*" \
--exclude="./Dockerfile" \
--exclude="./README_PYTHON_BUILD.md" \
--exclude="./*.sh" .
rm -rf sdist && mkdir sdist
tar -xf sdist.tar -C ./sdist
rm -rf sdist.tar

# Create tar.gz for PyPI
rm -rf kuzu.tar.gz
tar -czf kuzu.tar.gz sdist

# Clean up
rm -rf sdist kuzu-source
rm -rf LICENSE.txt
rm -rf kuzu
mv README_PYTHON_BUILD.md README.md
78 changes: 47 additions & 31 deletions scripts/pip-package/setup.py
Original file line number Diff line number Diff line change
@@ -1,60 +1,76 @@
import platform
import shutil
import subprocess
import multiprocessing
import os
import sys

from setuptools import setup, Extension
from setuptools.command.build_ext import build_ext
from setuptools.command.build_py import build_py as _build_py

num_cores = multiprocessing.cpu_count()
base_dir = os.path.dirname(__file__)

with open(os.path.join(base_dir, 'kuzu-source', 'tools', 'python_api', 'requirements_dev.txt')) as f:
requirements = f.read().splitlines()


class BazelExtension(Extension):
class CMakeExtension(Extension):
def __init__(self, name: str, sourcedir: str = "") -> None:
super().__init__(name, sources=[])
self.sourcedir = os.path.abspath(sourcedir)


class BazelBuild(build_ext):
def build_extension(self, ext: BazelExtension) -> None:
self.announce("Building native extension...", level=3)
args = ['--cxxopt=-std=c++2a', '--cxxopt=-O3',
'--cxxopt=-fPIC', '--cxxopt=-DNDEBUG']
# Pass the platform architecture for arm64 to bazel for
class CMakeBuild(build_ext):
def build_extension(self, ext: CMakeExtension) -> None:
self.announce("Building native extension...")
# Pass the platform architecture for arm64 to cmake for
# cross-compilation.
env_vars = os.environ.copy()
python_version = '.'.join(
(str(sys.version_info.major), str(sys.version_info.minor)))
self.announce("Python version is %s" % python_version)
env_vars['PYBIND11_PYTHON_VERSION'] = python_version
env_vars['PYTHON_EXECUTABLE'] = sys.executable

if sys.platform == 'darwin':
archflags = os.getenv("ARCHFLAGS", "")

if len(archflags) > 0:
self.announce("The ARCHFLAGS is set to '%s'." %
archflags, level=3)
archflags)
if archflags == "-arch arm64":
env_vars['CMAKE_OSX_ARCHITECTURES'] = "arm64"
elif archflags == "-arch x86_64":
env_vars['CMAKE_OSX_ARCHITECTURES'] = "x86_64"
else:
self.announce(
"The ARCHFLAGS is not valid and will be ignored.")
else:
self.announce("The ARCHFLAGS is not set.", level=3)
if "arm64" in archflags and platform.machine() == "x86_64":
args.append("--macos_cpus=arm64")
args.append("--cpu=darwin_arm64")

# It seems bazel does not automatically pick up
# MACOSX_DEPLOYMENT_TARGETfrom the environment, so we need to pass
# it explicitly.
if "MACOSX_DEPLOYMENT_TARGET" in os.environ:
args.append("--macos_minimum_os=" +
os.environ["MACOSX_DEPLOYMENT_TARGET"])
full_cmd = ['bazel', 'build', *args, '//tools/python_api:all']
env_vars = os.environ.copy()
env_vars['PYTHON_BIN_PATH'] = sys.executable
self.announce("The ARCHFLAGS is not set.")

deploy_target = os.getenv("MACOSX_DEPLOYMENT_TARGET", "")
if len(deploy_target) > 0:
self.announce("The deployment target is set to '%s'." %
deploy_target)
env_vars['CMAKE_OSX_DEPLOYMENT_TARGET'] = deploy_target


build_dir = os.path.join(ext.sourcedir, 'kuzu-source')

# Clean the build directory.
subprocess.run(['make', 'clean'], check=True, cwd=build_dir)

# Build the native extension.
full_cmd = ['make', 'release', 'NUM_THREADS=%d' % num_cores]
subprocess.run(full_cmd, cwd=build_dir, check=True, env=env_vars)
self.announce("Done building native extension.", level=3)
self.announce("Copying native extension...", level=3)
shutil.copyfile(os.path.join(build_dir, 'bazel-bin', 'tools', 'python_api',
'_kuzu.so'), os.path.join(ext.sourcedir, ext.name, '_kuzu.so'))
self.announce("Done copying native extension.", level=3)
self.announce("Done building native extension.")
self.announce("Copying native extension...")
dst = os.path.join(ext.sourcedir, ext.name)
shutil.rmtree(dst, ignore_errors=True)
shutil.copytree(os.path.join(build_dir, 'tools', 'python_api', 'build',
ext.name), dst)
self.announce("Done copying native extension.")


class BuildExtFirst(_build_py):
Expand All @@ -66,8 +82,8 @@ def run(self):

setup(name='kuzu',
version=os.environ['PYTHON_PACKAGE_VERSION'] if 'PYTHON_PACKAGE_VERSION' in os.environ else '0.0.1',
install_requires=requirements,
ext_modules=[BazelExtension(
install_requires=[],
ext_modules=[CMakeExtension(
name="kuzu", sourcedir=base_dir)],
description='KuzuDB Python API',
license='MIT',
Expand All @@ -78,6 +94,6 @@ def run(self):
include_package_data=True,
cmdclass={
'build_py': BuildExtFirst,
'build_ext': BazelBuild,
'build_ext': CMakeBuild,
}
)