Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Metaclass approach #3

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion pandas/core/arrays/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from ._reshaping import implement_2d # noqa:F401
from .base import ( # noqa: F401
ExtensionArray,
ExtensionOpsMixin,
Expand Down
88 changes: 71 additions & 17 deletions pandas/core/arrays/_reshaping.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,43 +3,97 @@
"""
from functools import wraps
from typing import Tuple
import warnings

import numpy as np

from pandas._libs.lib import is_integer

msg = (
"ExtensionArray subclass {name} defines {method}. "
"In the future, this will not be supported. Please "
"inherit {method} from the ExtensionArray base class."
)


def implement_2d(cls):
"""
A decorator to take a 1-dimension-only ExtensionArray subclass and make
Patch a 1-dimension-only ExtensionArray subclass and make
it support limited 2-dimensional operations.
"""
from pandas.core.arrays import ExtensionArray

# For backwards-compatibility, if an EA author implemented __len__
# but not size, we use that __len__ method to get an array's size.
We achieve this by rewriting dimension-dependent methods to
pre-process the inputs to make them look 1d, call the underlying
method, and post-process the output.
"""
if cls._allows_2d:
return

if cls.__name__ == "ExtensionArray" and cls.__module__ == "pandas.core.arrays.base":
# No need to patch for ExtensionArray base class.
return
else:
from pandas.core.arrays import ExtensionArray

# For backwards-compatibility, we use the length, size, or shape
# defined by the subclass. We can always define the other two in
# terms of the one.
has_size = cls.size is not ExtensionArray.size
has_shape = cls.shape is not ExtensionArray.shape
has_len = cls.__len__ is not ExtensionArray.__len__

if not has_size and has_len:
cls.size = property(cls.__len__)
cls.__len__ = ExtensionArray.__len__
orig_len = cls.__len__
# TODO: Find a better way to do this. I suspect we could check whether
# our cls.bases contains ExtensionArray...
if hasattr(orig_len, "_original_len"):
# When a user does class Foo(Bar(ExtensionArray)):
# we want to use the unpatched verison.
orig_len = orig_len._original_len

orig_shape = cls.shape

@wraps(orig_len)
def __len__(self):
length = orig_len(self)
if self._ExtensionArray__expanded_dim is None:
result = length
elif self._ExtensionArray__expanded_dim == 0:
result = length
else:
result = 1
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

im not sure i follow. is expanded_dim an indicator or is it a patched ndim or something else? Is 1 hard-coded here because we support only (N, 1) and (1, N)?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

expanded_dim is an indicator.

If array._expanded_dim == 1, that means array.shape is (1, N) and len(array) is 1.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(i see this is clarified below)


return result

cls.__len__ = __len__
cls.__len__._original_len = orig_len

if has_shape:
warnings.warn(msg.format(name=cls.__name__, method="shape"), DeprecationWarning)

def get_shape(self):
return ExtensionArray.shape.fget(self)

def set_shape(self, value):
if orig_shape.fset:
orig_shape.fset(self, value)

ExtensionArray.shape.fset(self, value)

cls.shape = property(fget=get_shape, fset=set_shape)

elif not has_size and has_shape:
if has_size:
warnings.warn(msg.format(name=cls.__name__, method="size"), DeprecationWarning)

@property
def size(self) -> int:
return np.prod(self.shape)
def get_size(self):
return ExtensionArray.size.fget(self)

cls.size = size
cls.size = property(fget=get_size)

orig_copy = cls.copy

@wraps(orig_copy)
def copy(self):
result = orig_copy(self)
result._shape = self._shape
# TODO: Can this setattr be done in the metaclass? Less likely to forget.
result._ExtensionArray__expanded_dim = self._ExtensionArray__expanded_dim
return result

cls.copy = copy
Expand All @@ -59,7 +113,7 @@ def __getitem__(self, key):
if isinstance(key[0], slice):
if slice_contains_zero(key[0]):
result = orig_getitem(self, key[1])
result._shape = (1, result.size)
result._ExtensionArray__expanded_dim = 1
return result

raise NotImplementedError(key)
Expand Down Expand Up @@ -92,7 +146,7 @@ def take(self, indices, allow_fill=None, fill_value=None, axis=0, **kwargs):
result = orig_take(
self, indices, allow_fill=allow_fill, fill_value=fill_value
)
result._shape = (1, result.size)
result._ExtensionArray__expanded_dim = 1
return result

# For axis == 0, because we only support shape (1, N)
Expand Down
62 changes: 50 additions & 12 deletions pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from pandas._typing import ArrayLike
from pandas.core import ops
from pandas.core.algorithms import _factorize_array, unique
from pandas.core.arrays._reshaping import can_safe_ravel, tuplify_shape
from pandas.core.arrays._reshaping import can_safe_ravel, implement_2d, tuplify_shape
from pandas.core.missing import backfill_1d, pad_1d
from pandas.core.sorting import nargsort

Expand All @@ -33,7 +33,13 @@
_extension_array_shared_docs = dict() # type: Dict[str, str]


class ExtensionArray:
class Reshapable(type):
def __init__(cls, name: str, bases: tuple, clsdict: dict):
super().__init__(name, bases, clsdict)
implement_2d(cls)


class ExtensionArray(metaclass=Reshapable):
"""
Abstract base class for custom 1-D array types.

Expand Down Expand Up @@ -326,7 +332,7 @@ def __len__(self) -> int:
-------
length : int
"""
return self.shape[0]
raise AbstractMethodError()

def __iter__(self):
"""
Expand All @@ -341,7 +347,13 @@ def __iter__(self):
# ------------------------------------------------------------------------
# Required attributes
# ------------------------------------------------------------------------
_shape = None
# The currently expanded dimension.
# * None : (N,) array
# * 0 : (N, 1) array
# * 1 : (1, N) array
# We use a double-underscore to mangle the name to _ExtensionArray__expanded_dim
# to avoid clashes with subclasses.
__expanded_dim = None

@property
def dtype(self) -> ExtensionDtype:
Expand All @@ -355,19 +367,41 @@ def shape(self) -> Tuple[int, ...]:
"""
Return a tuple of the array dimensions.
"""
if self._shape is not None:
return self._shape
if not self._allows_2d:
length = self.__len__.__wrapped__(self)
else:
length = len(self)

# Default to 1D
length = self.size
return (length,)
if self._ExtensionArray__expanded_dim == 0:
result = length, 1
elif self._ExtensionArray__expanded_dim == 1:
result = 1, length
else:
result = (length,)

assert np.prod(result) == length
return result

@shape.setter
def shape(self, value):
# TODO: support negative dimensions in value.
size = np.prod(value)
if size != self.size:
raise ValueError("Implied size must match actual size.")
self._shape = value

list_like = is_list_like(value)
if list_like and len(value) > 2:
raise ValueError("Only 1 or 2-dimensions allowed.")
elif list_like and len(value) == 2:
if value[1] == 1:
self._ExtensionArray__expanded_dim = 0
elif value[0] == 1:
self._ExtensionArray__expanded_dim = 1
else:
raise ValueError

else:
self._ExtensionArray__expanded_dim = None

@property
def ndim(self) -> int:
Expand All @@ -381,7 +415,7 @@ def size(self) -> int:
"""
The number of elements in this array.
"""
raise AbstractMethodError(self)
return np.prod(self.shape)

@property
def nbytes(self) -> int:
Expand Down Expand Up @@ -967,7 +1001,11 @@ def reshape(self, *shape):
# numpy accepts either a single tuple or an expanded tuple
shape = tuplify_shape(self.size, shape)
result = self.view()
result._shape = shape
result.shape = shape
# if len(shape) > 1:
# expand_dim = int(shape[1] > 1)
# result._ExtensionArray__expanded_dim = expand_dim
# # TODO: is this missing cases?
return result

@property
Expand Down
9 changes: 0 additions & 9 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@

from pandas.io.formats import console

from ._reshaping import implement_2d
from .base import ExtensionArray, _extension_array_shared_docs

_take_msg = textwrap.dedent(
Expand Down Expand Up @@ -226,7 +225,6 @@ def contains(cat, key, container):
"""


@implement_2d
class Categorical(ExtensionArray, PandasObject):
"""
Represent a categorical variable in classic R / S-plus fashion.
Expand Down Expand Up @@ -518,13 +516,6 @@ def astype(self, dtype: Dtype, copy: bool = True) -> ArrayLike:
return self._set_dtype(dtype)
return np.array(self, dtype=dtype, copy=copy)

@cache_readonly
def size(self) -> int:
"""
return the len of myself
"""
return self._codes.size

@cache_readonly
def itemsize(self) -> int:
"""
Expand Down
9 changes: 3 additions & 6 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@
from pandas.tseries import frequencies
from pandas.tseries.offsets import DateOffset, Tick

from ._reshaping import implement_2d
from .base import ExtensionArray, ExtensionOpsMixin


Expand Down Expand Up @@ -318,7 +317,6 @@ def ceil(self, freq, ambiguous="raise", nonexistent="raise"):
return self._round(freq, RoundTo.PLUS_INFTY, ambiguous, nonexistent)


@implement_2d
class DatetimeLikeArrayMixin(ExtensionOpsMixin, AttributesMixin, ExtensionArray):
"""
Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray
Expand Down Expand Up @@ -384,6 +382,9 @@ def _formatter(self, boxed=False):
# ----------------------------------------------------------------
# Array-Like / EA-Interface Methods

def __len__(self):
return len(self._data)

@property
def nbytes(self):
return self._data.nbytes
Expand All @@ -394,10 +395,6 @@ def __array__(self, dtype=None):
return np.array(list(self), dtype=object)
return self._data

@property
def size(self) -> int:
return self._data.size

def __getitem__(self, key):
"""
This getitem defers to the underlying array, which by-definition can
Expand Down
8 changes: 3 additions & 5 deletions pandas/core/arrays/integer.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

from pandas.core import nanops, ops
from pandas.core.algorithms import take
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin, implement_2d
from pandas.core.arrays import ExtensionArray, ExtensionOpsMixin
from pandas.core.tools.numeric import to_numeric


Expand Down Expand Up @@ -232,7 +232,6 @@ def coerce_to_array(values, dtype, mask=None, copy=False):
return values, mask


@implement_2d
class IntegerArray(ExtensionArray, ExtensionOpsMixin):
"""
Array of integer (optional missing) values.
Expand Down Expand Up @@ -462,9 +461,8 @@ def __setitem__(self, key, value):
self._data[key] = value
self._mask[key] = mask

@property
def size(self) -> int:
return self._data.size
def __len__(self):
return len(self._data)

@property
def nbytes(self):
Expand Down
8 changes: 2 additions & 6 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
from pandas.core.dtypes.missing import isna, notna

from pandas.core.algorithms import take, value_counts
from pandas.core.arrays._reshaping import implement_2d
from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs
from pandas.core.arrays.categorical import Categorical
import pandas.core.common as com
Expand Down Expand Up @@ -141,7 +140,6 @@
),
)
)
@implement_2d
class IntervalArray(IntervalMixin, ExtensionArray):
ndim = 1
can_hold_na = True
Expand Down Expand Up @@ -697,10 +695,8 @@ def isna(self):
def nbytes(self) -> int:
return self.left.nbytes + self.right.nbytes

@property
def size(self) -> int:
# Avoid materializing self.values
return self.left.size
def __len__(self):
return len(self.left)

def take(self, indices, allow_fill=False, fill_value=None, axis=None, **kwargs):
"""
Expand Down
Loading