Skip to content

Commit

Permalink
apacheGH-41699: [Python][Parquet] Implement to_dict method on Sorting…
Browse files Browse the repository at this point in the history
…Column (apache#41704)

### Rationale for this change
Resolves apache#41699 .

### What changes are included in this PR?
Add `to_dict` method and test case

### Are these changes tested?
Yes

### Are there any user-facing changes?
No

* GitHub Issue: apache#41699

Authored-by: Tai Le Manh <manhtai.lmt@gmail.com>
Signed-off-by: AlenkaF <frim.alenka@gmail.com>
  • Loading branch information
tlm365 authored and vibhatha committed May 25, 2024
1 parent 1e981a5 commit 7a0e2dd
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 7 deletions.
16 changes: 16 additions & 0 deletions python/pyarrow/_parquet.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -705,6 +705,22 @@ cdef class SortingColumn:
"""Whether null values appear before valid values (bool)."""
return self.nulls_first

def to_dict(self):
"""
Get dictionary representation of the SortingColumn.
Returns
-------
dict
Dictionary with a key for each attribute of this class.
"""
d = dict(
column_index=self.column_index,
descending=self.descending,
nulls_first=self.nulls_first
)
return d


cdef class RowGroupMetaData(_Weakrefable):
"""Metadata for a single row group."""
Expand Down
22 changes: 15 additions & 7 deletions python/pyarrow/tests/parquet/test_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,14 +303,18 @@ def test_parquet_write_disable_statistics(tempdir):

def test_parquet_sorting_column():
sorting_col = pq.SortingColumn(10)
assert sorting_col.column_index == 10
assert sorting_col.descending is False
assert sorting_col.nulls_first is False
assert sorting_col.to_dict() == {
'column_index': 10,
'descending': False,
'nulls_first': False
}

sorting_col = pq.SortingColumn(0, descending=True, nulls_first=True)
assert sorting_col.column_index == 0
assert sorting_col.descending is True
assert sorting_col.nulls_first is True
assert sorting_col.to_dict() == {
'column_index': 0,
'descending': True,
'nulls_first': True
}

schema = pa.schema([('a', pa.int64()), ('b', pa.int64())])
sorting_cols = (
Expand Down Expand Up @@ -381,9 +385,13 @@ def test_parquet_file_sorting_columns():

# Can retrieve sorting columns from metadata
metadata = pq.read_metadata(reader)
assert metadata.num_row_groups == 1
assert sorting_columns == metadata.row_group(0).sorting_columns

metadata_dict = metadata.to_dict()
assert metadata_dict.get('num_columns') == 2
assert metadata_dict.get('num_rows') == 3
assert metadata_dict.get('num_row_groups') == 1


def test_field_id_metadata():
# ARROW-7080
Expand Down

0 comments on commit 7a0e2dd

Please sign in to comment.