Skip to content

Commit

Permalink
Revert "Implement Python Import Caching"
Browse files Browse the repository at this point in the history
  • Loading branch information
acquamarin committed Feb 26, 2024
1 parent c8146fa commit 1653b40
Show file tree
Hide file tree
Showing 14 changed files with 37 additions and 279 deletions.
2 changes: 0 additions & 2 deletions tools/python_api/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,6 @@ file(GLOB SOURCE_PY
pybind11_add_module(_kuzu
SHARED
src_cpp/kuzu_binding.cpp
src_cpp/cached_import/py_cached_item.cpp
src_cpp/cached_import/py_cached_import.cpp
src_cpp/py_connection.cpp
src_cpp/py_database.cpp
src_cpp/py_prepared_statement.cpp
Expand Down
18 changes: 0 additions & 18 deletions tools/python_api/src_cpp/cached_import/py_cached_import.cpp

This file was deleted.

24 changes: 0 additions & 24 deletions tools/python_api/src_cpp/cached_import/py_cached_item.cpp

This file was deleted.

33 changes: 0 additions & 33 deletions tools/python_api/src_cpp/include/cached_import/py_cached_import.h

This file was deleted.

26 changes: 0 additions & 26 deletions tools/python_api/src_cpp/include/cached_import/py_cached_item.h

This file was deleted.

128 changes: 0 additions & 128 deletions tools/python_api/src_cpp/include/cached_import/py_cached_modules.h

This file was deleted.

2 changes: 1 addition & 1 deletion tools/python_api/src_cpp/include/py_database.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ class PyDatabase {
explicit PyDatabase(const std::string& databasePath, uint64_t bufferPoolSize,
uint64_t maxNumThreads, bool compression, bool readOnly);

~PyDatabase();
~PyDatabase() = default;

template<class T>
void scanNodeTable(const std::string& tableName, const std::string& propName,
Expand Down
7 changes: 3 additions & 4 deletions tools/python_api/src_cpp/pandas/pandas_analyzer.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include "pandas/pandas_analyzer.h"

#include "function/built_in_function_utils.h"
#include "cached_import/py_cached_import.h"
#include "py_conversion.h"

namespace kuzu {
Expand Down Expand Up @@ -38,7 +37,7 @@ common::LogicalType PandasAnalyzer::getListType(py::object& ele, bool& canConver
for (auto pyVal : ele) {
auto object = py::reinterpret_borrow<py::object>(pyVal);
auto itemType = getItemType(object, canConvert);
if (i == 0) {
if (i != 0) {
listType = itemType;
} else {
if (!upgradeType(listType, itemType)) {
Expand Down Expand Up @@ -89,8 +88,8 @@ static py::object findFirstNonNull(const py::handle& row, uint64_t numRows) {

common::LogicalType PandasAnalyzer::innerAnalyze(py::object column, bool& canConvert) {
auto numRows = py::len(column);
auto pandasModule = importCache->pandas;
auto pandasSeries = pandasModule.core.series.Series();
auto pandasModule = py::module::import("pandas");
auto pandasSeries = pandasModule.attr("core").attr("series").attr("Series");

if (py::isinstance(column, pandasSeries)) {
column = column.attr("__array__")();
Expand Down
4 changes: 2 additions & 2 deletions tools/python_api/src_cpp/pandas/pandas_scan.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#include "pandas/pandas_scan.h"

#include "function/table/bind_input.h"
#include "cached_import/py_cached_import.h"
#include "numpy/numpy_scan.h"
#include "py_connection.h"
#include "pybind11/pytypes.h"
Expand Down Expand Up @@ -128,9 +127,10 @@ std::unique_ptr<Value> tryReplacePD(py::dict& dict, py::str& tableName) {
}

std::unique_ptr<common::Value> replacePD(common::Value* value) {
py::gil_scoped_acquire acquire;
auto pyTableName = py::str(value->getValue<std::string>());
// Here we do an exhaustive search on the frame lineage.
auto currentFrame = importCache->inspect.currentframe()();
auto currentFrame = py::module::import("inspect").attr("currentframe")();
while (hasattr(currentFrame, "f_locals")) {
auto localDict = py::reinterpret_borrow<py::dict>(currentFrame.attr("f_locals"));
if (localDict) {
Expand Down
15 changes: 8 additions & 7 deletions tools/python_api/src_cpp/py_connection.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@

#include "common/string_format.h"
#include "datetime.h" // from Python
#include "cached_import/py_cached_import.h"
#include "main/connection.h"
#include "pandas/pandas_scan.h"
#include "processor/result/factorized_table.h"
#include "common/types/uuid.h"

using namespace kuzu::common;
using namespace kuzu;

void PyConnection::initialize(py::handle& m) {
py::class_<PyConnection>(m, "Connection")
Expand Down Expand Up @@ -153,7 +151,9 @@ void PyConnection::getAllEdgesForTorchGeometric(py::array_t<int64_t>& npArray,
}

bool PyConnection::isPandasDataframe(const py::object& object) {
return py::isinstance(object, importCache->pandas.DataFrame());
// TODO(Ziyi): introduce PythonCachedImport to avoid unnecessary import.
py::module pandas = py::module::import("pandas");
return py::isinstance(object, pandas.attr("DataFrame"));
}

static Value transformPythonValue(py::handle val);
Expand All @@ -176,10 +176,11 @@ std::unordered_map<std::string, std::unique_ptr<Value>> transformPythonParameter
}

Value transformPythonValue(py::handle val) {
auto datetime_datetime = importCache->datetime.datetime();
auto time_delta = importCache->datetime.timedelta();
auto datetime_date = importCache->datetime.date();
auto uuid = importCache->uuid.UUID();
auto datetime_mod = py::module::import("datetime");
auto datetime_datetime = datetime_mod.attr("datetime");
auto time_delta = datetime_mod.attr("timedelta");
auto datetime_date = datetime_mod.attr("date");
auto uuid = py::module::import("uuid").attr("UUID");
if (py::isinstance<py::bool_>(val)) {
return Value::createValue<bool>(val.cast<bool>());
} else if (py::isinstance<py::int_>(val)) {
Expand Down
12 changes: 6 additions & 6 deletions tools/python_api/src_cpp/py_conversion.cpp
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
#include "py_conversion.h"

#include "common/type_utils.h"
#include "cached_import/py_cached_import.h"

namespace kuzu {

using namespace kuzu::common;
using kuzu::importCache;

PythonObjectType getPythonObjectType(py::handle& ele) {
auto pandasNa = importCache->pandas.NA();
auto pyDateTime = importCache->datetime.datetime();
auto pandasNat = importCache->pandas.NaT();
auto pyDate = importCache->datetime.date();
py::object pandas = py::module::import("pandas");
auto pandasNa = pandas.attr("NA");
auto pandasNat = pandas.attr("NaT");
py::object datetime = py::module::import("datetime");
auto pyDateTime = datetime.attr("datetime");
auto pyDate = datetime.attr("date");
if (ele.is_none() || ele.is(pandasNa) || ele.is(pandasNat)) {
return PythonObjectType::None;
} else if (py::isinstance<py::bool_>(ele)) {
Expand Down
7 changes: 0 additions & 7 deletions tools/python_api/src_cpp/py_database.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
#include "include/py_database.h"

#include "include/cached_import/py_cached_import.h"
#include "pandas/pandas_scan.h"

#include <memory>
Expand Down Expand Up @@ -39,11 +37,6 @@ PyDatabase::PyDatabase(const std::string& databasePath, uint64_t bufferPoolSize,
database = std::make_unique<Database>(databasePath, systemConfig);
database->addBuiltInFunction(READ_PANDAS_FUNC_NAME, kuzu::PandasScanFunction::getFunctionSet());
storageDriver = std::make_unique<kuzu::main::StorageDriver>(database.get());
kuzu::importCache = std::make_shared<kuzu::PythonCachedImport>();
}

PyDatabase::~PyDatabase() {
kuzu::importCache.reset();
}

template<class T>
Expand Down
Loading

0 comments on commit 1653b40

Please sign in to comment.