Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix test failures with Pandas 1.2.0 #157

Merged
merged 8 commits into from
Jan 6, 2021
Merged
10 changes: 7 additions & 3 deletions .github/workflows/run_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,18 @@ jobs:
strategy:
matrix:
python-version: [3.6, 3.7, 3.8]
# Test against Pandas 1.0 and latest version
pandas-version: ["1.0.*", ""]
# Test against Pandas 1.0, 1.1, and latest version
pandas-version: ["1.0.*", "1.1.*", ""]
exclude:
# Only run one test with Pandas 1.0.x and Python 3.7, exclude others
# Only run one test with Pandas 1.x.x and Python 3.7, exclude others
- python-version: 3.6
pandas-version: "1.0.*"
- python-version: 3.6
pandas-version: "1.1.*"
- python-version: 3.8
pandas-version: "1.0.*"
- python-version: 3.8
pandas-version: "1.1.*"

steps:
- uses: actions/checkout@v2
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
numpy>=1.17
pandas>=1.0.3
pandas>=1.0.3,<1.2.0
pyarrow>=1.0.0
regex
# TODO: The following dependency should go away when we switch to Python 3.8.
Expand Down
3 changes: 3 additions & 0 deletions text_extensions_for_pandas/array/span.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,9 @@ def __eq__(self, other):
"'{}' and '{}'".format(type(self), type(other)))

def __ne__(self, other):
if isinstance(other, (ABCDataFrame, ABCSeries, ABCIndexClass)):
# Rely on pandas to unbox and dispatch to us.
return NotImplemented
return ~(self == other)

def __hash__(self):
Expand Down
41 changes: 38 additions & 3 deletions text_extensions_for_pandas/array/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import numpy as np
import pandas as pd
from pandas.compat import set_function_name
from pandas.core import ops
from pandas.core.dtypes.generic import ABCDataFrame, ABCIndexClass, ABCSeries
from pandas.core.indexers import check_array_indexer, validate_indices

Expand Down Expand Up @@ -117,7 +116,7 @@ def _binop(self, other):

return result_wrapped

op_name = ops._get_op_name(op, True)
op_name = f"__{op.__name__}__"
return set_function_name(_binop, op_name, cls)


Expand Down Expand Up @@ -336,7 +335,7 @@ def astype(self, dtype, copy=True):
dtype = pd.api.types.pandas_dtype(dtype)

if isinstance(dtype, TensorDtype):
values = TensorArray(self._tensor.copy() if copy else self._tensor)
values = TensorArray(self._tensor.copy()) if copy else self
elif not pd.api.types.is_object_dtype(dtype) and \
pd.api.types.is_string_dtype(dtype):
values = np.array([str(t) for t in self._tensor])
Expand All @@ -348,6 +347,35 @@ def astype(self, dtype, copy=True):
values = self._tensor.astype(dtype, copy=copy)
return values

def any(self, axis=None, out=None, keepdims=False):
"""
Test whether any array element along a given axis evaluates to True.

See numpy.any() documentation for more information
https://numpy.org/doc/stable/reference/generated/numpy.any.html#numpy.any

:param axis: Axis or axes along which a logical OR reduction is performed.
:param out: Alternate output array in which to place the result.
:param keepdims: If this is set to True, the axes which are reduced are left in the
result as dimensions with size one.
:return: single boolean unless axis is not None else TensorArray
"""
result = self._tensor.any(axis=axis, out=out, keepdims=keepdims)
return result if axis is None else TensorArray(result)

def all(self, axis=None, out=None, keepdims=False):
"""
Test whether all array elements along a given axis evaluate to True.

:param axis: Axis or axes along which a logical AND reduction is performed.
:param out: Alternate output array in which to place the result.
:param keepdims: If this is set to True, the axes which are reduced are left in the
result as dimensions with size one.
:return: single boolean unless axis is not None else TensorArray
"""
result = self._tensor.all(axis=axis, out=out, keepdims=keepdims)
return result if axis is None else TensorArray(result)

def __len__(self) -> int:
return len(self._tensor)

Expand Down Expand Up @@ -389,6 +417,13 @@ def __setitem__(self, key: Union[int, np.ndarray], value: Any) -> None:
raise NotImplementedError(f"__setitem__ with key type '{type(key)}' "
f"not implemented")

def __contains__(self, item) -> bool:
if isinstance(item, TensorElement):
npitem = np.asarray(item)
if npitem.size == 1 and np.isnan(npitem).all():
return self.isna().any()
return super().__contains__(item)

def __repr__(self):
"""
See docstring in `ExtensionArray` class in `pandas/core/arrays/base.py`
Expand Down
15 changes: 9 additions & 6 deletions text_extensions_for_pandas/array/test_span.py
Original file line number Diff line number Diff line change
Expand Up @@ -542,12 +542,15 @@ def _compare_other(self, s, data, op_name, other):
# Compare with scalar
other = data[0]

# TODO check result
op(data, other)

@pytest.mark.skip("assert result is NotImplemented")
def test_direct_arith_with_series_returns_not_implemented(self, data):
pass
result = op(data, other)

if op_name in ["__gt__", "__ne__"]:
assert not result[0]
assert result[1:].all()
elif op_name in ["__lt__", "__eq__"]:
assert not result.all()
else:
raise NotImplementedError("Unknown Operation Comparison")


class TestPandasReshaping(base.BaseReshapingTests):
Expand Down
29 changes: 10 additions & 19 deletions text_extensions_for_pandas/array/test_tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import pandas as pd
import pandas.testing as pdt
from pandas.tests.extension import base
from pandas.core.dtypes.generic import ABCSeries
import pyarrow as pa
import pytest

Expand Down Expand Up @@ -811,24 +812,7 @@ def test_reindex(self, data, na_value):


class TestPandasSetitem(base.BaseSetitemTests):

def test_setitem_mask_boolean_array_with_na(self, data, box_in_series):
mask = pd.array(np.zeros(data.shape, dtype="bool"), dtype="boolean")
mask[:3] = True
mask[3:5] = pd.NA

if box_in_series:
data = pd.Series(data)

data[mask] = data[0]

result = data[:3]
if box_in_series:
# Must unwrap Series
result = result.values

# Must compare all values of result
assert np.all(result == data[0])
pass


class TestPandasMissing(base.BaseMissingTests):
Expand All @@ -853,11 +837,18 @@ class TestPandasArithmeticOps(base.BaseArithmeticOpsTests):
base.BaseArithmeticOpsTests.frame_scalar_exc = None
base.BaseArithmeticOpsTests.divmod_exc = NotImplementedError

def test_arith_series_with_scalar(self, data, all_arithmetic_operators):
""" Override to prevent div by zero warning."""
# series & scalar
op_name = all_arithmetic_operators
s = pd.Series(data[1:]) # Avoid zero values for div
self.check_opname(s, op_name, s.iloc[0], exc=self.series_scalar_exc)

def test_arith_series_with_array(self, data, all_arithmetic_operators):
""" Override because creates Series from list of TensorElements as dtype=object."""
# ndarray & other series
op_name = all_arithmetic_operators
s = pd.Series(data)
s = pd.Series(data[1:]) # Avoid zero values for div
self.check_opname(
s, op_name, pd.Series([s.iloc[0]] * len(s), dtype=TensorDtype()), exc=self.series_array_exc
)
Expand Down
13 changes: 8 additions & 5 deletions text_extensions_for_pandas/array/test_token_span.py
Original file line number Diff line number Diff line change
Expand Up @@ -560,12 +560,15 @@ def _compare_other(self, s, data, op_name, other):
# Compare with scalar
other = data[0]

# TODO check result
op(data, other)
result = op(data, other)

@pytest.mark.skip("assert result is NotImplemented")
def test_direct_arith_with_series_returns_not_implemented(self, data):
pass
if op_name in ["__gt__", "__ne__"]:
assert not result[0]
assert result[1:].all()
elif op_name in ["__lt__", "__eq__"]:
assert not result.all()
else:
raise NotImplementedError("Unknown Operation Comparison")


class TestPandasReshaping(base.BaseReshapingTests):
Expand Down
7 changes: 5 additions & 2 deletions text_extensions_for_pandas/io/watson/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,11 @@
# limitations under the License.
#

from distutils.version import LooseVersion
import json
import os
import textwrap
import unittest
import pandas as pd
import pytest

from text_extensions_for_pandas.io.watson.tables import *

Expand Down Expand Up @@ -445,6 +446,8 @@ def test_make_exploded_df(self):
15 Total tax rate \
""")

@pytest.mark.skipif(LooseVersion(pd.__version__) >= LooseVersion("1.2.0"),
reason="TODO: Rank col gets converted to float")
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure why this is different now, but I think it's safe to skip and I can make an issue to follow up with later

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Works for me.

def test_make_table(self):
double_header_table = make_table(parse_response(self.responses_dict["double_header_table"]))
self.assertEqual(repr(double_header_table), """\
Expand Down