Skip to content

Commit

Permalink
REFACTOR-modin-project#7313: Add similar methods as in 7294 for opera…
Browse files Browse the repository at this point in the history
…ting on columns (#7314)

Signed-off-by: arunjose696 <arunjose696@gmail.com>
  • Loading branch information
arunjose696 committed Jun 14, 2024
1 parent 767e28e commit eac3c77
Show file tree
Hide file tree
Showing 6 changed files with 63 additions and 23 deletions.
17 changes: 7 additions & 10 deletions modin/core/dataframe/algebra/binary.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,10 @@ def maybe_build_dtypes_series(
Finds a union of columns and finds dtypes for all these columns.
"""
if not trigger_computations:
if not first._modin_frame.has_columns_cache:
if not first.frame_has_columns_cache:
return None

if (
isinstance(second, type(first))
and not second._modin_frame.has_columns_cache
):
if isinstance(second, type(first)) and not second.frame_has_columns_cache:
return None

columns_first = set(first.columns)
Expand Down Expand Up @@ -384,8 +381,8 @@ def caller(
if isinstance(other, type(query_compiler)):
if broadcast:
if (
query_compiler._modin_frame.has_materialized_columns
and other._modin_frame.has_materialized_columns
query_compiler.frame_has_materialized_columns
and other.frame_has_materialized_columns
):
if (
len(query_compiler.columns) == 1
Expand All @@ -408,8 +405,8 @@ def caller(
)
else:
if (
query_compiler._modin_frame.has_materialized_columns
and other._modin_frame.has_materialized_columns
query_compiler.frame_has_materialized_columns
and other.frame_has_materialized_columns
):
if (
len(query_compiler.columns) == 1
Expand Down Expand Up @@ -440,7 +437,7 @@ def caller(
)
else:
if (
query_compiler._modin_frame.has_materialized_columns
query_compiler.frame_has_materialized_columns
and len(query_compiler._modin_frame.columns) == 1
and is_scalar(other)
):
Expand Down
43 changes: 43 additions & 0 deletions modin/core/storage_formats/base/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4532,6 +4532,28 @@ def frame_has_materialized_dtypes(self) -> bool:
"""
return self._modin_frame.has_materialized_dtypes

@property
def frame_has_materialized_columns(self) -> bool:
"""
Check if the undelying dataframe has materialized columns.
Returns
-------
bool
"""
return self._modin_frame.has_materialized_columns

@property
def frame_has_materialized_index(self) -> bool:
"""
Check if the undelying dataframe has materialized index.
Returns
-------
bool
"""
return self._modin_frame.has_materialized_index

def set_frame_dtypes_cache(self, dtypes):
"""
Set dtypes cache for the underlying dataframe frame.
Expand All @@ -4552,6 +4574,16 @@ def set_frame_index_cache(self, index):
"""
self._modin_frame.set_index_cache(index)

def set_frame_columns_cache(self, index):
"""
Set columns cache for underlying dataframe.
Parameters
----------
index : sequence, callable or None
"""
self._modin_frame.set_columns_cache(index)

@property
def frame_has_index_cache(self):
"""
Expand All @@ -4563,6 +4595,17 @@ def frame_has_index_cache(self):
"""
return self._modin_frame.has_index_cache

@property
def frame_has_columns_cache(self):
"""
Check if the columns cache exists for underlying dataframe.
Returns
-------
bool
"""
return self._modin_frame.has_columns_cache

@property
def frame_has_dtypes_cache(self) -> bool:
"""
Expand Down
2 changes: 1 addition & 1 deletion modin/core/storage_formats/pandas/aggregations.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def corr_method(
method=method, min_periods=min_periods, numeric_only=numeric_only
)

if not numeric_only and qc._modin_frame.has_materialized_columns:
if not numeric_only and qc.frame_has_materialized_columns:
new_index, new_columns = (
qc._modin_frame.copy_columns_cache(),
qc._modin_frame.copy_columns_cache(),
Expand Down
4 changes: 2 additions & 2 deletions modin/core/storage_formats/pandas/merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def map_func(
# it's fine too, we can also decide that by columns, which tend to be already
# materialized quite often compared to the indexes.
keep_index = False
if left._modin_frame.has_materialized_index:
if left.frame_has_materialized_index:
keep_index = should_keep_index(left, right)
else:
# Have to trigger columns materialization. Hope they're already available at this point.
Expand Down Expand Up @@ -286,7 +286,7 @@ def _compute_result_metadata(
new_columns = None
new_dtypes = None

if not left._modin_frame.has_materialized_columns:
if not left.frame_has_materialized_columns:
return new_columns, new_dtypes

if left_on is None and right_on is None:
Expand Down
4 changes: 2 additions & 2 deletions modin/core/storage_formats/pandas/query_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,7 +623,7 @@ def _reset(df, *axis_lengths, partition_idx): # pragma: no cover
new_columns = None
if kwargs["drop"]:
dtypes = self._modin_frame.copy_dtypes_cache()
if self._modin_frame.has_columns_cache:
if self.frame_has_columns_cache:
new_columns = self._modin_frame.copy_columns_cache(
copy_lengths=True
)
Expand All @@ -642,7 +642,7 @@ def _reset(df, *axis_lengths, partition_idx): # pragma: no cover
dtypes = None
if (
# can precompute new columns if we know columns and index names
self._modin_frame.has_materialized_columns
self.frame_has_materialized_columns
and index_dtypes is not None
):
empty_index = (
Expand Down
16 changes: 8 additions & 8 deletions modin/tests/core/storage_formats/pandas/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -1171,13 +1171,13 @@ def test_concat_dont_materialize_opposite_axis(axis):

def assert_no_cache(df, axis):
if axis:
assert not df._query_compiler._modin_frame.has_materialized_columns
assert not df._query_compiler.frame_has_materialized_columns
else:
assert not df._query_compiler._modin_frame.has_materialized_index
assert not df._query_compiler.frame_has_materialized_index

def remove_cache(df, axis):
if axis:
df._query_compiler._modin_frame.set_columns_cache(None)
df._query_compiler.set_frame_columns_cache(None)
else:
df._query_compiler.set_frame_index_cache(None)
assert_no_cache(df, axis)
Expand Down Expand Up @@ -2038,7 +2038,7 @@ def test_concat_axis_1(
or remaining_dtype is not None
)
# setting columns cache to 'None', in order to prevent completing 'dtypes' with the materialized columns
md_df._query_compiler._modin_frame.set_columns_cache(None)
md_df._query_compiler.set_frame_columns_cache(None)
md_df._query_compiler.set_frame_dtypes_cache(
ModinDtypes(
DtypesDescriptor(
Expand Down Expand Up @@ -2401,10 +2401,10 @@ def test_preserve_dtypes_reset_index(self, drop, has_materialized_index):
# case 1: 'df' has complete dtype by default
df = pd.DataFrame({"a": [1, 2, 3]})
if has_materialized_index:
assert df._query_compiler._modin_frame.has_materialized_index
assert df._query_compiler.frame_has_materialized_index
else:
df._query_compiler.set_frame_index_cache(None)
assert not df._query_compiler._modin_frame.has_materialized_index
assert not df._query_compiler.frame_has_materialized_index
assert df._query_compiler.frame_has_materialized_dtypes

res = df.reset_index(drop=drop)
Expand Down Expand Up @@ -2444,10 +2444,10 @@ def test_preserve_dtypes_reset_index(self, drop, has_materialized_index):
)
)
if has_materialized_index:
assert df._query_compiler._modin_frame.has_materialized_index
assert df._query_compiler.frame_has_materialized_index
else:
df._query_compiler.set_frame_index_cache(None)
assert not df._query_compiler._modin_frame.has_materialized_index
assert not df._query_compiler.frame_has_materialized_index

res = df.reset_index(drop=drop)
if drop:
Expand Down

0 comments on commit eac3c77

Please sign in to comment.