From 64373a797614da2f460c9c8db051fcbf472b5144 Mon Sep 17 00:00:00 2001 From: Enrico Minack Date: Thu, 26 Sep 2024 10:51:20 +0200 Subject: [PATCH 1/3] Align buffers in RecordBatch.from_pyarrow_bound --- arrow/src/pyarrow.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index b05c967d7d9b..2532bafbe3c6 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -363,13 +363,14 @@ impl FromPyArrow for RecordBatch { let schema_ptr = unsafe { schema_capsule.reference::() }; let ffi_array = unsafe { FFI_ArrowArray::from_raw(array_capsule.pointer().cast()) }; - let array_data = unsafe { ffi::from_ffi(ffi_array, schema_ptr) }.map_err(to_py_err)?; + let mut array_data = unsafe { ffi::from_ffi(ffi_array, schema_ptr) }.map_err(to_py_err)?; if !matches!(array_data.data_type(), DataType::Struct(_)) { return Err(PyTypeError::new_err( "Expected Struct type from __arrow_c_array.", )); } let options = RecordBatchOptions::default().with_row_count(Some(array_data.len())); + array_data.align_buffers(); let array = StructArray::from(array_data); // StructArray does not embed metadata from schema. We need to override // the output schema with the schema from the capsule. From ac58cdcebdd94a31f0226e22eac4c2ec460de31f Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 2 Oct 2024 06:44:59 -0400 Subject: [PATCH 2/3] Update arrow/src/pyarrow.rs --- arrow/src/pyarrow.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index 2532bafbe3c6..b8278b475f7c 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -370,6 +370,10 @@ impl FromPyArrow for RecordBatch { )); } let options = RecordBatchOptions::default().with_row_count(Some(array_data.len())); + // Ensure data is aligned (by potentially copying the buffers). + // This is needed because some python code (for example the + // python flight client) produces unaligned buffers + // See https://github.com/apache/arrow/issues/43552 for details array_data.align_buffers(); let array = StructArray::from(array_data); // StructArray does not embed metadata from schema. We need to override From 1875d6dc66f481e6b98c17e3c7cc1c263803fffc Mon Sep 17 00:00:00 2001 From: Andrew Lamb Date: Wed, 2 Oct 2024 06:45:22 -0400 Subject: [PATCH 3/3] cargo fmt --- arrow/src/pyarrow.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arrow/src/pyarrow.rs b/arrow/src/pyarrow.rs index b8278b475f7c..6effe1c03e01 100644 --- a/arrow/src/pyarrow.rs +++ b/arrow/src/pyarrow.rs @@ -363,7 +363,8 @@ impl FromPyArrow for RecordBatch { let schema_ptr = unsafe { schema_capsule.reference::() }; let ffi_array = unsafe { FFI_ArrowArray::from_raw(array_capsule.pointer().cast()) }; - let mut array_data = unsafe { ffi::from_ffi(ffi_array, schema_ptr) }.map_err(to_py_err)?; + let mut array_data = + unsafe { ffi::from_ffi(ffi_array, schema_ptr) }.map_err(to_py_err)?; if !matches!(array_data.data_type(), DataType::Struct(_)) { return Err(PyTypeError::new_err( "Expected Struct type from __arrow_c_array.", @@ -371,7 +372,7 @@ impl FromPyArrow for RecordBatch { } let options = RecordBatchOptions::default().with_row_count(Some(array_data.len())); // Ensure data is aligned (by potentially copying the buffers). - // This is needed because some python code (for example the + // This is needed because some python code (for example the // python flight client) produces unaligned buffers // See https://github.com/apache/arrow/issues/43552 for details array_data.align_buffers();