diff --git a/Makefile b/Makefile index e43e1512c9..59ad1ec34a 100644 --- a/Makefile +++ b/Makefile @@ -39,7 +39,6 @@ debug: cmake $(GENERATOR) $(FORCE_COLOR) $(SANITIZER_FLAG) -DCMAKE_BUILD_TYPE=Debug ../.. && \ cmake --build . --config Debug -- -j $(NUM_THREADS) - test: cd $(ROOT_DIR)/build/release/test && \ ctest diff --git a/README.md b/README.md index 644911648a..972551f1d8 100644 --- a/README.md +++ b/README.md @@ -26,9 +26,18 @@ To build from source code, Kùzu requires Cmake(>=3.11), Python 3, and a compile After build, our CLI binary `kuzu_shell` is available under the directory `build/release/tools/shell/`. ## Installation -### Precompiled binary +### Install from source +```python +cd scripts/pip-package +chmod +x package_tar.sh +./package_tar.sh +pip install kuzu.tar.gz +``` + +### Install from latest release +#### Precompiled binary Precompiled binary of our latest release can be downloaded [here](https://github.com/kuzudb/kuzu/releases/tag/0.0.1). -### Python package +#### Python package Our Python package can be directly install through pip. ``` pip install kuzu @@ -75,6 +84,7 @@ conn.execute("COPY knows FROM 'dataset/tinysnb/eKnows.csv';") result = conn.execute("MATCH (b:person)<-[e1:knows]-(a:person)-[e2:knows]->(c:person) RETURN COUNT(*)") while result.hasNext(): print(result.getNext()) +result.close() ``` Refer to our [Data Import](https://kuzudb.com/docs/data-import) and [Cypher](https://kuzudb.com/docs/cypher) section for more information. diff --git a/scripts/pip-package/setup.py b/scripts/pip-package/setup.py index c7fd09e539..b53c07e05f 100644 --- a/scripts/pip-package/setup.py +++ b/scripts/pip-package/setup.py @@ -1,4 +1,3 @@ -import platform import shutil import subprocess import os @@ -9,42 +8,23 @@ from setuptools.command.build_py import build_py as _build_py base_dir = os.path.dirname(__file__) +print("base_dir: " + base_dir) with open(os.path.join(base_dir, 'kuzu-source', 'tools', 'python_api', 'requirements_dev.txt')) as f: requirements = f.read().splitlines() -class BazelExtension(Extension): +class BuildExtension(Extension): def __init__(self, name: str, sourcedir: str = "") -> None: super().__init__(name, sources=[]) self.sourcedir = os.path.abspath(sourcedir) + print("sourcedir: " + self.sourcedir) -class BazelBuild(build_ext): - def build_extension(self, ext: BazelExtension) -> None: +class Build(build_ext): + def build_extension(self, ext: BuildExtension) -> None: self.announce("Building native extension...", level=3) - args = ['--cxxopt=-std=c++2a', '--cxxopt=-O3', - '--cxxopt=-fPIC', '--cxxopt=-DNDEBUG'] - # Pass the platform architecture for arm64 to bazel for - # cross-compilation. - if sys.platform == 'darwin': - archflags = os.getenv("ARCHFLAGS", "") - if len(archflags) > 0: - self.announce("The ARCHFLAGS is set to '%s'." % - archflags, level=3) - else: - self.announce("The ARCHFLAGS is not set.", level=3) - if "arm64" in archflags and platform.machine() == "x86_64": - args.append("--macos_cpus=arm64") - args.append("--cpu=darwin_arm64") - - # It seems bazel does not automatically pick up - # MACOSX_DEPLOYMENT_TARGETfrom the environment, so we need to pass - # it explicitly. - if "MACOSX_DEPLOYMENT_TARGET" in os.environ: - args.append("--macos_minimum_os=" + - os.environ["MACOSX_DEPLOYMENT_TARGET"]) - full_cmd = ['bazel', 'build', *args, '//tools/python_api:all'] + full_cmd = ['make', 'release', 'NUM_THREADS=10'] env_vars = os.environ.copy() env_vars['PYTHON_BIN_PATH'] = sys.executable build_dir = os.path.join(ext.sourcedir, 'kuzu-source') @@ -52,8 +32,8 @@ def build_extension(self, ext: BazelExtension) -> None: subprocess.run(full_cmd, cwd=build_dir, check=True, env=env_vars) self.announce("Done building native extension.", level=3) self.announce("Copying native extension...", level=3) - shutil.copyfile(os.path.join(build_dir, 'bazel-bin', 'tools', 'python_api', - '_kuzu.so'), os.path.join(ext.sourcedir, ext.name, '_kuzu.so')) + shutil.copyfile(os.path.join(build_dir, 'tools', 'python_api', 'build', '_kuzu.so'), + os.path.join(ext.sourcedir, ext.name, '_kuzu.so')) self.announce("Done copying native extension.", level=3) @@ -65,9 +45,9 @@ def run(self): setup(name='kuzu', - version=os.environ['PYTHON_PACKAGE_VERSION'] if 'PYTHON_PACKAGE_VERSION' in os.environ else '0.0.1', + version=os.environ['PYTHON_PACKAGE_VERSION'] if 'PYTHON_PACKAGE_VERSION' in os.environ else '0.0.2', install_requires=requirements, - ext_modules=[BazelExtension( + ext_modules=[BuildExtension( name="kuzu", sourcedir=base_dir)], description='KuzuDB Python API', license='MIT', @@ -78,6 +58,6 @@ def run(self): include_package_data=True, cmdclass={ 'build_py': BuildExtFirst, - 'build_ext': BazelBuild, + 'build_ext': Build, } ) diff --git a/third_party/pybind11/tools/pybind11Tools.cmake b/third_party/pybind11/tools/pybind11Tools.cmake index 66ad00a478..0032d2f24e 100644 --- a/third_party/pybind11/tools/pybind11Tools.cmake +++ b/third_party/pybind11/tools/pybind11Tools.cmake @@ -149,7 +149,7 @@ endif() function(pybind11_extension name) # The prefix and extension are provided by FindPythonLibsNew.cmake set_target_properties(${name} PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}" - SUFFIX "${PYTHON_MODULE_EXTENSION}") + SUFFIX ".so") endfunction() # Build a Python extension module: diff --git a/tools/python_api/CMakeLists.txt b/tools/python_api/CMakeLists.txt index 572a40709b..46a3520b81 100644 --- a/tools/python_api/CMakeLists.txt +++ b/tools/python_api/CMakeLists.txt @@ -1,4 +1,3 @@ -include(FetchContent) project(_kuzu) set(CMAKE_CXX_STANDARD 20) diff --git a/tools/python_api/README.md b/tools/python_api/README.md deleted file mode 100644 index 6e27284707..0000000000 --- a/tools/python_api/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Python APIs - -## Build \ No newline at end of file diff --git a/tools/python_api/include/py_connection.h b/tools/python_api/include/py_connection.h index e074c215e8..8be32e5d16 100644 --- a/tools/python_api/include/py_connection.h +++ b/tools/python_api/include/py_connection.h @@ -6,12 +6,11 @@ class PyConnection { public: - static void initialize(py::handle& m); - explicit PyConnection(PyDatabase* pyDatabase, uint64_t numThreads); - ~PyConnection() = default; + static void initialize(py::handle& m); + unique_ptr execute(const string& query, py::list params); void setMaxNumThreadForExec(uint64_t numThreads); diff --git a/tools/python_api/include/py_database.h b/tools/python_api/include/py_database.h index b08b4565e9..60114795cc 100644 --- a/tools/python_api/include/py_database.h +++ b/tools/python_api/include/py_database.h @@ -9,18 +9,17 @@ class PyDatabase { friend class PyConnection; public: + explicit PyDatabase(const string& databasePath, uint64_t bufferPoolSize); + ~PyDatabase() = default; + inline void setLoggingLevel(spdlog::level::level_enum logging_level) { database->setLoggingLevel(logging_level); } static void initialize(py::handle& m); - explicit PyDatabase(const string& databasePath, uint64_t bufferPoolSize); - void resizeBufferManager(uint64_t newSize); - ~PyDatabase() = default; - private: unique_ptr database; }; diff --git a/tools/python_api/include/py_query_result.h b/tools/python_api/include/py_query_result.h index 1411f27c76..eefc28f64c 100644 --- a/tools/python_api/include/py_query_result.h +++ b/tools/python_api/include/py_query_result.h @@ -9,16 +9,17 @@ class PyQueryResult { friend class PyConnection; public: - static void initialize(py::handle& m); - - PyQueryResult() = default; + PyQueryResult(); ~PyQueryResult() = default; + static void initialize(py::handle& m); + bool hasNext(); py::list getNext(); - void writeToCSV(py::str filename, py::str delimiter, py::str escapeCharacter, py::str newline); + void writeToCSV(const py::str& filename, const py::str& delimiter, + const py::str& escapeCharacter, const py::str& newline); void close(); diff --git a/tools/python_api/py_connection.cpp b/tools/python_api/py_connection.cpp index 446b78f711..68254a7b2f 100644 --- a/tools/python_api/py_connection.cpp +++ b/tools/python_api/py_connection.cpp @@ -2,6 +2,15 @@ #include "datetime.h" // from Python +PyConnection::PyConnection(PyDatabase* pyDatabase, uint64_t numThreads) { + conn = make_unique(pyDatabase->database.get()); + if (numThreads > 0) { + conn->setMaxNumThreadForExec(numThreads); + } + auto atexit = py::module_::import("atexit"); + atexit.attr("register")(py::cpp_function([&]() { conn.reset(); })); +} + void PyConnection::initialize(py::handle& m) { py::class_(m, "connection") .def(py::init(), py::arg("database"), py::arg("num_threads") = 0) @@ -12,16 +21,9 @@ void PyConnection::initialize(py::handle& m) { PyDateTime_IMPORT; } -PyConnection::PyConnection(PyDatabase* pyDatabase, uint64_t numThreads) { - conn = make_unique(pyDatabase->database.get()); - if (numThreads > 0) { - conn->setMaxNumThreadForExec(numThreads); - } -} - unique_ptr PyConnection::execute(const string& query, py::list params) { auto preparedStatement = conn->prepare(query); - auto parameters = transformPythonParameters(params); + auto parameters = transformPythonParameters(std::move(params)); py::gil_scoped_release release; auto queryResult = conn->executeWithParams(preparedStatement.get(), parameters); py::gil_scoped_acquire acquire; @@ -29,7 +31,7 @@ unique_ptr PyConnection::execute(const string& query, py::list pa throw runtime_error(queryResult->getErrorMessage()); } auto pyQueryResult = make_unique(); - pyQueryResult->queryResult = move(queryResult); + pyQueryResult->queryResult = std::move(queryResult); return pyQueryResult; } diff --git a/tools/python_api/py_database.cpp b/tools/python_api/py_database.cpp index b854b1a936..9bf6e6375c 100644 --- a/tools/python_api/py_database.cpp +++ b/tools/python_api/py_database.cpp @@ -1,13 +1,5 @@ #include "include/py_database.h" -void PyDatabase::initialize(py::handle& m) { - py::class_(m, "database") - .def(py::init(), py::arg("database_path"), - py::arg("buffer_pool_size") = 0) - .def("resize_buffer_manager", &PyDatabase::resizeBufferManager, py::arg("new_size")) - .def("set_logging_level", &PyDatabase::setLoggingLevel, py::arg("logging_level")); -} - PyDatabase::PyDatabase(const string& databasePath, uint64_t bufferPoolSize) { auto systemConfig = SystemConfig(); if (bufferPoolSize > 0) { @@ -17,6 +9,16 @@ PyDatabase::PyDatabase(const string& databasePath, uint64_t bufferPoolSize) { bufferPoolSize * StorageConfig::LARGE_PAGES_BUFFER_RATIO; } database = make_unique(DatabaseConfig(databasePath), systemConfig); + auto atexit = py::module_::import("atexit"); + atexit.attr("register")(py::cpp_function([&]() { database.reset(); })); +} + +void PyDatabase::initialize(py::handle& m) { + py::class_(m, "database") + .def(py::init(), py::arg("database_path"), + py::arg("buffer_pool_size") = 0) + .def("resize_buffer_manager", &PyDatabase::resizeBufferManager, py::arg("new_size")) + .def("set_logging_level", &PyDatabase::setLoggingLevel, py::arg("logging_level")); } void PyDatabase::resizeBufferManager(uint64_t newSize) { diff --git a/tools/python_api/py_query_result.cpp b/tools/python_api/py_query_result.cpp index c0ddfb8216..9ef53c04b9 100644 --- a/tools/python_api/py_query_result.cpp +++ b/tools/python_api/py_query_result.cpp @@ -1,6 +1,5 @@ #include "include/py_query_result.h" -#include #include #include "datetime.h" // python lib @@ -8,6 +7,11 @@ using namespace kuzu::common; +PyQueryResult::PyQueryResult() { + auto atexit = py::module_::import("atexit"); + atexit.attr("register")(py::cpp_function([&]() { queryResult.reset(); })); +} + void PyQueryResult::initialize(py::handle& m) { py::class_(m, "result") .def("hasNext", &PyQueryResult::hasNext) @@ -35,11 +39,11 @@ py::list PyQueryResult::getNext() { for (auto i = 0u; i < tuple->len(); ++i) { result[i] = convertValueToPyObject(*tuple->getResultValue(i)); } - return move(result); + return std::move(result); } -void PyQueryResult::writeToCSV( - py::str filename, py::str delimiter, py::str escapeCharacter, py::str newline) { +void PyQueryResult::writeToCSV(const py::str& filename, const py::str& delimiter, + const py::str& escapeCharacter, const py::str& newline) { std::string delimiterStr = delimiter; std::string escapeCharacterStr = escapeCharacter; std::string newlineStr = newline; @@ -104,7 +108,7 @@ py::object PyQueryResult::convertValueToPyObject(const ResultValue& value) { for (auto i = 0u; i < listVal.size(); ++i) { list.append(convertValueToPyObject(listVal[i])); } - return move(list); + return std::move(list); } default: throw NotImplementedException("Unsupported type2: " + Types::dataTypeToString(dataType)); @@ -121,7 +125,7 @@ py::list PyQueryResult::getColumnDataTypes() { for (auto i = 0u; i < columnDataTypes.size(); ++i) { result[i] = py::cast(Types::dataTypeToString(columnDataTypes[i])); } - return move(result); + return std::move(result); } py::list PyQueryResult::getColumnNames() { @@ -130,5 +134,5 @@ py::list PyQueryResult::getColumnNames() { for (auto i = 0u; i < columnNames.size(); ++i) { result[i] = py::cast(columnNames[i]); } - return move(result); + return std::move(result); }