pybind · rwgk · May 23, 2023 · May 17, 2023 · May 17, 2023 · May 17, 2023
diff --git a/include/pybind11/detail/common.h b/include/pybind11/detail/common.h
@@ -1025,6 +1025,20 @@ PYBIND11_RUNTIME_EXCEPTION(reference_cast_error, PyExc_RuntimeError) /// Used in
 template <typename T, typename SFINAE = void>
 struct format_descriptor {};
 
+template <typename T>
+struct format_descriptor<
+    T,
+    detail::enable_if_t<detail::is_same_ignoring_cvref<T, PyObject *>::value>> {
+    static constexpr const char c = 'O';
+    static constexpr const char value[2] = {c, '\0'};
+    static std::string format() { return std::string(1, c); }
+};
+
+// Common message for `static_assert()`s, which are useful to easily preempt much less obvious
+// errors in code that does not support `format_descriptor<PyObject *>`.
+#define PYBIND11_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED                                          \
+    "Pointer types (in particular `PyObject *`) are not supported."
+
 PYBIND11_NAMESPACE_BEGIN(detail)
 // Returns the index of the given type in the type char array below, and in the list in numpy.h
 // The order here is: bool; 8 ints ((signed,unsigned)x(8,16,32,64)bits); float,double,long double;

diff --git a/include/pybind11/eigen/matrix.h b/include/pybind11/eigen/matrix.h
@@ -287,6 +287,8 @@ handle eigen_encapsulate(Type *src) {
 template <typename Type>
 struct type_caster<Type, enable_if_t<is_eigen_dense_plain<Type>::value>> {
     using Scalar = typename Type::Scalar;
+    static_assert(!std::is_pointer<Scalar>::value,
+                  PYBIND11_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED);
     using props = EigenProps<Type>;
 
     bool load(handle src, bool convert) {
@@ -405,6 +407,9 @@ struct type_caster<Type, enable_if_t<is_eigen_dense_plain<Type>::value>> {
 // Base class for casting reference/map/block/etc. objects back to python.
 template <typename MapType>
 struct eigen_map_caster {
+    static_assert(!std::is_pointer<typename MapType::Scalar>::value,
+                  PYBIND11_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED);
+
 private:
     using props = EigenProps<MapType>;
 
@@ -457,6 +462,8 @@ struct type_caster<
     using Type = Eigen::Ref<PlainObjectType, 0, StrideType>;
     using props = EigenProps<Type>;
     using Scalar = typename props::Scalar;
+    static_assert(!std::is_pointer<Scalar>::value,
+                  PYBIND11_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED);
     using MapType = Eigen::Map<PlainObjectType, 0, StrideType>;
     using Array
         = array_t<Scalar,
@@ -604,6 +611,9 @@ struct type_caster<
 // regular Eigen::Matrix, then casting that.
 template <typename Type>
 struct type_caster<Type, enable_if_t<is_eigen_other<Type>::value>> {
+    static_assert(!std::is_pointer<typename Type::Scalar>::value,
+                  PYBIND11_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED);
+
 protected:
     using Matrix
         = Eigen::Matrix<typename Type::Scalar, Type::RowsAtCompileTime, Type::ColsAtCompileTime>;
@@ -632,6 +642,8 @@ struct type_caster<Type, enable_if_t<is_eigen_other<Type>::value>> {
 template <typename Type>
 struct type_caster<Type, enable_if_t<is_eigen_sparse<Type>::value>> {
     using Scalar = typename Type::Scalar;
+    static_assert(!std::is_pointer<Scalar>::value,
+                  PYBIND11_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED);
     using StorageIndex = remove_reference_t<decltype(*std::declval<Type>().outerIndexPtr())>;
     using Index = typename Type::Index;
     static constexpr bool rowMajor = Type::IsRowMajor;

diff --git a/include/pybind11/eigen/tensor.h b/include/pybind11/eigen/tensor.h
@@ -164,6 +164,8 @@ PYBIND11_WARNING_POP
 
 template <typename Type>
 struct type_caster<Type, typename eigen_tensor_helper<Type>::ValidType> {
+    static_assert(!std::is_pointer<typename Type::Scalar>::value,
+                  PYBIND11_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED);
     using Helper = eigen_tensor_helper<Type>;
     static constexpr auto temp_name = get_tensor_descriptor<Type, false>::value;
     PYBIND11_TYPE_CASTER(Type, temp_name);
@@ -359,6 +361,8 @@ struct get_storage_pointer_type<MapType, void_t<typename MapType::PointerArgType
 template <typename Type, int Options>
 struct type_caster<Eigen::TensorMap<Type, Options>,
                    typename eigen_tensor_helper<remove_cv_t<Type>>::ValidType> {
+    static_assert(!std::is_pointer<typename Type::Scalar>::value,
+                  PYBIND11_MESSAGE_POINTER_TYPES_ARE_NOT_SUPPORTED);
     using MapType = Eigen::TensorMap<Type, Options>;
     using Helper = eigen_tensor_helper<remove_cv_t<Type>>;
 

diff --git a/include/pybind11/numpy.h b/include/pybind11/numpy.h
@@ -564,6 +564,8 @@ class dtype : public object {
         m_ptr = from_args(args).release().ptr();
     }
 
+    /// Return dtype for the given typenum (one of the NPY_TYPES).
+    /// https://numpy.org/devdocs/reference/c-api/array.html#c.PyArray_DescrFromType
     explicit dtype(int typenum)
         : object(detail::npy_api::get().PyArray_DescrFromType_(typenum), stolen_t{}) {
         if (m_ptr == nullptr) {
@@ -1283,12 +1285,16 @@ struct npy_format_descriptor<
 public:
     static constexpr int value = values[detail::is_fmt_numeric<T>::index];
 
-    static pybind11::dtype dtype() {
-        if (auto *ptr = npy_api::get().PyArray_DescrFromType_(value)) {
-            return reinterpret_steal<pybind11::dtype>(ptr);
-        }
-        pybind11_fail("Unsupported buffer format!");
-    }
+    static pybind11::dtype dtype() { return pybind11::dtype(/*typenum*/ value); }
+};
+
+template <typename T>
+struct npy_format_descriptor<T, enable_if_t<is_same_ignoring_cvref<T, PyObject *>::value>> {
+    static constexpr auto name = const_name("object");
+
+    static constexpr int value = npy_api::NPY_OBJECT_;
+
+    static pybind11::dtype dtype() { return pybind11::dtype(/*typenum*/ value); }
 };
 
 #define PYBIND11_DECL_CHAR_FMT                                                                    \

diff --git a/tests/test_buffers.cpp b/tests/test_buffers.cpp
@@ -7,12 +7,40 @@
     BSD-style license that can be found in the LICENSE file.
 */
 
+#include <pybind11/complex.h>
 #include <pybind11/stl.h>
 
 #include "constructor_stats.h"
 #include "pybind11_tests.h"
 
 TEST_SUBMODULE(buffers, m) {
+
+#define PYBIND11_LOCAL_DEF(...)                                                                   \
+    if (cpp_name == #__VA_ARGS__)                                                                 \
+        return py::format_descriptor<__VA_ARGS__>::format();
+
+    m.def("format_descriptor_format", [](const std::string &cpp_name) {
+        PYBIND11_LOCAL_DEF(PyObject *)
+        PYBIND11_LOCAL_DEF(bool)
+        PYBIND11_LOCAL_DEF(std::int8_t)
+        PYBIND11_LOCAL_DEF(std::uint8_t)
+        PYBIND11_LOCAL_DEF(std::int16_t)
+        PYBIND11_LOCAL_DEF(std::uint16_t)
+        PYBIND11_LOCAL_DEF(std::int32_t)
+        PYBIND11_LOCAL_DEF(std::uint32_t)
+        PYBIND11_LOCAL_DEF(std::int64_t)
+        PYBIND11_LOCAL_DEF(std::uint64_t)
+        PYBIND11_LOCAL_DEF(float)
+        PYBIND11_LOCAL_DEF(double)
+        PYBIND11_LOCAL_DEF(long double)
+        PYBIND11_LOCAL_DEF(std::complex<float>)
+        PYBIND11_LOCAL_DEF(std::complex<double>)
+        PYBIND11_LOCAL_DEF(std::complex<long double>)
+        return std::string("UNKNOWN");
+    });
+
+#undef PYBIND11_LOCAL_DEF
+
     // test_from_python / test_to_python:
     class Matrix {
     public:

diff --git a/tests/test_buffers.py b/tests/test_buffers.py
@@ -11,6 +11,32 @@
 np = pytest.importorskip("numpy")
 
 
+@pytest.mark.parametrize(
+    ("cpp_name", "expected_codes"),
+    [
+        ("PyObject *", ["O"]),
+        ("bool", ["?"]),
+        ("std::int8_t", ["b"]),
+        ("std::uint8_t", ["B"]),
+        ("std::int16_t", ["h"]),
+        ("std::uint16_t", ["H"]),
+        ("std::int32_t", ["i"]),
+        ("std::uint32_t", ["I"]),
+        ("std::int64_t", ["q"]),
+        ("std::uint64_t", ["Q"]),
+        ("float", ["f"]),
+        ("double", ["d"]),
+        ("long double", ["g", "d"]),
+        ("std::complex<float>", ["Zf"]),
+        ("std::complex<double>", ["Zd"]),
+        ("std::complex<long double>", ["Zg", "Zd"]),
+        ("", ["UNKNOWN"]),
+    ],
+)
+def test_format_descriptor_format(cpp_name, expected_codes):
+    assert m.format_descriptor_format(cpp_name) in expected_codes
 template <typename T, typename SFINAE = void> 
 struct compare_buffer_info { 
     static bool compare(const buffer_info &b) { 
         return b.format == format_descriptor<T>::format() && b.itemsize == (ssize_t) sizeof(T); 
     } 
 }; 
 template <typename T> 
 struct compare_buffer_info<T, detail::enable_if_t<std::is_integral<T>::value>> { 
     static bool compare(const buffer_info &b) { 
         return (size_t) b.itemsize == sizeof(T) 
                && (b.format == format_descriptor<T>::value 
                    || ((sizeof(T) == sizeof(long)) 
                        && b.format == (std::is_unsigned<T>::value ? "L" : "l")) 
                    || ((sizeof(T) == sizeof(size_t)) 
                        && b.format == (std::is_unsigned<T>::value ? "N" : "n"))); 
     } 
 }; 
 template <typename T, typename SFINAE = void> 
 struct compare_buffer_info { 
     static bool compare(const buffer_info &b) { 
         return b.format == format_descriptor<T>::format() && b.itemsize == (ssize_t) sizeof(T); 
     } 
 }; 
  
 template <typename T> 
 struct compare_buffer_info<T, detail::enable_if_t<std::is_integral<T>::value>> { 
     static bool compare(const buffer_info &b) { 
         return (size_t) b.itemsize == sizeof(T) 
                && (b.format == format_descriptor<T>::value 
                    || ((sizeof(T) == sizeof(long)) 
                        && b.format == (std::is_unsigned<T>::value ? "L" : "l")) 
                    || ((sizeof(T) == sizeof(size_t)) 
                        && b.format == (std::is_unsigned<T>::value ? "N" : "n"))); 
     } 
 }; 
+
+
 def test_from_python():
     with pytest.raises(RuntimeError) as excinfo:
         m.Matrix(np.array([1, 2, 3]))  # trying to assign a 1D array

diff --git a/tests/test_numpy_array.cpp b/tests/test_numpy_array.cpp
@@ -523,4 +523,30 @@ TEST_SUBMODULE(numpy_array, sm) {
     sm.def("test_fmt_desc_const_double", [](const py::array_t<const double> &) {});
 
     sm.def("round_trip_float", [](double d) { return d; });
+
+    sm.def("pass_array_pyobject_ptr_return_sum_str_values",
+           [](const py::array_t<PyObject *> &objs) {
+               std::string sum_str_values;
+               for (const auto &obj : objs) {
+                   sum_str_values += py::str(obj.attr("value"));
+               }
+               return sum_str_values;
+           });
+
+    sm.def("pass_array_pyobject_ptr_return_as_list",
+           [](const py::array_t<PyObject *> &objs) -> py::list { return objs; });
+
+    sm.def("return_array_pyobject_ptr_cpp_loop", [](const py::list &objs) {
+        py::size_t arr_size = py::len(objs);
+        py::array_t<PyObject *> arr_from_list(static_cast<py::ssize_t>(arr_size));
+        PyObject **data = arr_from_list.mutable_data();
+        for (py::size_t i = 0; i < arr_size; i++) {
+            assert(data[i] == nullptr);
+            data[i] = py::cast<PyObject *>(objs[i].attr("value"));
 // Note that `cast<PyObject *>(obj)` increments the reference count of `obj`. 
 // This is necessary for the case that `obj` is a temporary, and could 
 // not possibly be different, given 
 // 1. the established convention that the passed `handle` is borrowed, and 
 // 2. we don't want to force all generic code using `cast<T>()` to special-case 
 //    handling of `T` = `PyObject *` (to increment the reference count there). 
 // It is the responsibility of the caller to ensure that the reference count 
 // is decremented. 
 template <typename T, 
           typename Handle, 
           detail::enable_if_t<detail::is_same_ignoring_cvref<T, PyObject *>::value 
                                   && detail::is_same_ignoring_cvref<Handle, handle>::value, 
                               int> 
           = 0> 
 T cast(Handle &&handle) { 
     return handle.inc_ref().ptr(); 
 } 
 // Note that `cast<PyObject *>(obj)` increments the reference count of `obj`. 
 // This is necessary for the case that `obj` is a temporary, and could 
 // not possibly be different, given 
 // 1. the established convention that the passed `handle` is borrowed, and 
 // 2. we don't want to force all generic code using `cast<T>()` to special-case 
 //    handling of `T` = `PyObject *` (to increment the reference count there). 
 // It is the responsibility of the caller to ensure that the reference count 
 // is decremented. 
 template <typename T, 
           typename Handle, 
           detail::enable_if_t<detail::is_same_ignoring_cvref<T, PyObject *>::value 
                                   && detail::is_same_ignoring_cvref<Handle, handle>::value, 
                               int> 
           = 0> 
 T cast(Handle &&handle) { 
     return handle.inc_ref().ptr(); 
 } 
+        }
+        return arr_from_list;
+    });
+
+    sm.def("return_array_pyobject_ptr_from_list",
+           [](const py::list &objs) -> py::array_t<PyObject *> { return objs; });
 }
diff --git a/tests/test_numpy_array.py b/tests/test_numpy_array.py
@@ -595,3 +595,74 @@ def test_round_trip_float():
     arr = np.zeros((), np.float64)
     arr[()] = 37.2
     assert m.round_trip_float(arr) == 37.2
+
+
+# HINT: An easy and robust way (although only manual unfortunately) to check for
+#       ref-count leaks in the test_.*pyobject_ptr.* functions below is to
+#           * temporarily insert `while True:` (one-by-one),
+#           * run this test, and
+#           * run the Linux `top` command in another shell to visually monitor
+#             `RES` for a minute or two.
+#       If there is a leak, it is usually evident in seconds because the `RES`
+#       value increases without bounds. (Don't forget to Ctrl-C the test!)
+
+
+# For use as a temporary user-defined object, to maximize sensitivity of the tests below:
+#     * Ref-count leaks will be immediately evident.
+#     * Sanitizers are much more likely to detect heap-use-after-free due to
+#       other ref-count bugs.
+class PyValueHolder:
+    def __init__(self, value):
+        self.value = value
+
+
+def WrapWithPyValueHolder(*values):
+    return [PyValueHolder(v) for v in values]
+
+
+def UnwrapPyValueHolder(vhs):
+    return [vh.value for vh in vhs]
+
+
+def test_pass_array_pyobject_ptr_return_sum_str_values_ndarray():
+    # Intentionally all temporaries, do not change.
+    assert (
+        m.pass_array_pyobject_ptr_return_sum_str_values(
+            np.array(WrapWithPyValueHolder(-3, "four", 5.0), dtype=object)
+        )
+        == "-3four5.0"
+    )
+
+
+def test_pass_array_pyobject_ptr_return_sum_str_values_list():
+    # Intentionally all temporaries, do not change.
+    assert (
+        m.pass_array_pyobject_ptr_return_sum_str_values(
+            WrapWithPyValueHolder(2, "three", -4.0)
+        )
+        == "2three-4.0"
+    )
+
+
+def test_pass_array_pyobject_ptr_return_as_list():
+    # Intentionally all temporaries, do not change.
+    assert UnwrapPyValueHolder(
+        m.pass_array_pyobject_ptr_return_as_list(
+            np.array(WrapWithPyValueHolder(-1, "two", 3.0), dtype=object)
+        )
+    ) == [-1, "two", 3.0]
+
+
+@pytest.mark.parametrize(
+    ("return_array_pyobject_ptr", "unwrap"),
+    [
+        (m.return_array_pyobject_ptr_cpp_loop, list),
+        (m.return_array_pyobject_ptr_from_list, UnwrapPyValueHolder),
+    ],
+)
+def test_return_array_pyobject_ptr_cpp_loop(return_array_pyobject_ptr, unwrap):
+    # Intentionally all temporaries, do not change.
+    arr_from_list = return_array_pyobject_ptr(WrapWithPyValueHolder(6, "seven", -8.0))
+    assert isinstance(arr_from_list, np.ndarray)
+    assert arr_from_list.dtype == np.dtype("O")
+    assert unwrap(arr_from_list) == [6, "seven", -8.0]