Merge pull request #298 from openscm/future-warning-pd

Fix deprecation warnings
openscm · Jan 29, 2024 · de09428 · de09428
2 parents 7813392 + ea5bcb5
commit de09428
Show file tree

Hide file tree

Showing 10 changed files with 39 additions and 45 deletions.
diff --git a/changelog/298.improvement.md b/changelog/298.improvement.md
@@ -0,0 +1,3 @@
+Update to avoid hitting DeprecationWarning in pandas and seaborn
+
+This should help reduce so many warnings appearing when doing common operations.
diff --git a/changelog/298.trivial.md b/changelog/298.trivial.md
@@ -0,0 +1 @@
+Lazy load pyam
diff --git a/docs/source/notebooks/plotting-with-seaborn.py b/docs/source/notebooks/plotting-with-seaborn.py
@@ -6,7 +6,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.14.5
+#       jupytext_version: 1.15.2
 #   kernelspec:
 #     display_name: Python 3 (ipykernel)
 #     language: python
@@ -23,7 +23,6 @@
 # more than the most basic plots.
 
 # %%
-
 import matplotlib.pyplot as plt
 import seaborn as sns
 
@@ -36,7 +35,6 @@
 # For this notebook we use the RCMIP radiative forcings, available at rcmip.org.
 
 # %%
-
 rcmip_db = ScmRun("rcmip-radiative-forcing-annual-means-v4-0-0.csv")
 rcmip_db.head()
 
@@ -46,7 +44,6 @@
 # For the most common plotting patterns, we provide a very simple `lineplot` method in `ScmRun`.
 
 # %%
-
 out = rcmip_db.filter(variable="Effective Radiative Forcing").lineplot()
 out
 
@@ -60,7 +57,6 @@
 # specify the order to display the scenarios in.
 
 # %%
-
 ax = plt.figure(figsize=(16, 9)).add_subplot(111)
 rcmip_db.filter(variable="Effective Radiative Forcing").lineplot(
     ax=ax,
@@ -82,7 +78,6 @@
 print(rcmip_db.lineplot.__doc__)
 
 # %%
-
 fig, axes = plt.subplots(figsize=(16, 9), nrows=2, ncols=2)
 
 pdb = rcmip_db.filter(variable="Effective Radiative Forcing")
@@ -113,11 +108,9 @@
 # These same options can also be passed to the `timeseries` and `long_data` methods.
 
 # %%
-
 rcmip_db.timeseries(time_axis="year-month")
 
 # %%
-
 rcmip_db.long_data(time_axis="days since 1970-01-01")
 
 # %% [markdown]
@@ -146,7 +139,6 @@
 vars_to_plot
 
 # %%
-
 seaborn_df = rcmip_db.filter(variable=vars_to_plot).long_data()
 seaborn_df.head()
 
@@ -155,7 +147,6 @@
 # [seaborn.relplot](https://seaborn.pydata.org/generated/seaborn.relplot.html).
 
 # %%
-
 sns.relplot(
     data=seaborn_df,
     x="time",
@@ -178,17 +169,14 @@
 # different scenarios. In such a case we can reshape the data using pandas before using seaborn.
 
 # %%
-
 ts = rcmip_db.filter(variable=vars_to_plot[:4]).timeseries()
 ts.head()
 
 # %%
-
 ts_reshaped = ts.unstack("variable").stack("time").reset_index()
 ts_reshaped.head()
 
 # %%
-
 sns.pairplot(
     ts_reshaped,
     hue="scenario",

diff --git a/pyproject.toml b/pyproject.toml
@@ -42,7 +42,7 @@ xarray = "*"
 nc-time-axis = { version = ">=1.2.0", optional = true }
 typing-extensions = "*"
 matplotlib = { version = "^3.7.1", optional = true }
-seaborn = { version = "*", optional = true }
+seaborn = { version = ">=0.12.0", optional = true }
 netCDF4 = { version = "*", optional = true }
 openpyxl = { version = "*", optional = true }
 xlrd = { version = "*", optional = true }

diff --git a/src/scmdata/plotting.py b/src/scmdata/plotting.py
@@ -75,7 +75,7 @@ def lineplot(self, time_axis=None, **kwargs):  # pragma: no cover
     if "scenario" in self.meta_attributes:
         kwargs.setdefault("hue", "scenario")
 
-    kwargs.setdefault("ci", "sd")
+    kwargs.setdefault("errorbar", "sd")
     kwargs.setdefault("estimator", np.median)
 
     ax = sns.lineplot(data=plt_df, **kwargs)

diff --git a/src/scmdata/run.py b/src/scmdata/run.py
@@ -59,7 +59,6 @@
 from .offsets import generate_range, to_offset
 from .ops import inject_ops_methods
 from .plotting import inject_plotting_methods
-from .pyam_compat import IamDataFrame, LongDatetimeIamDataFrame
 from .time import _TARGET_DTYPE, TimePoints, TimeseriesConverter
 from .units import UnitConverter
 
@@ -75,6 +74,8 @@
 
     from scmdata.groupby import RunGroupBy
 
+    from .pyam_compat import LongDatetimeIamDataFrame
+
     P = ParamSpec("P")
 
 
@@ -513,6 +514,9 @@ def _init_timeseries(
         copy_data: bool = False,
         **kwargs: Any,
     ) -> None:
+        # Lazy load
+        from .pyam_compat import IamDataFrame
+
         if isinstance(data, np.ndarray):
             if columns is None:
                 raise ValueError("`columns` argument is required")
@@ -871,7 +875,7 @@ def timeseries(
             raise NonUniqueMetadataError(_meta)
 
         if time_axis is None:
-            columns = self._time_points.to_index()
+            columns = self._time_points.to_index().infer_objects()
         elif time_axis == "year":
             columns = self._time_points.years()
         elif time_axis == "year-month":
@@ -902,8 +906,11 @@ def calc_seconds(x):
         if len(np.unique(columns)) != len(columns):
             raise ValueError(f"Ambiguous time values with time_axis = '{time_axis}'")
 
-        df.columns = pd.Index(columns, name="time")
         df.index = pd.MultiIndex.from_frame(_meta)
+        if isinstance(columns, pd.Index):
+            df.columns = columns
+        else:
+            df.columns = pd.Index(columns, name="time")
 
         if drop_all_nan_times:
             df = df.dropna(how="all", axis="columns")
@@ -2366,6 +2373,9 @@ def to_iamdataframe(self) -> LongDatetimeIamDataFrame:  # pragma: no cover
         ImportError
             If `pyam <https://github.com/IAMconsortium/pyam>`_ is not installed
         """
+        # Lazy load
+        from .pyam_compat import LongDatetimeIamDataFrame
+
         if LongDatetimeIamDataFrame is None:
             raise ImportError(
                 "pyam is not installed. Features involving IamDataFrame are unavailable"
@@ -2617,9 +2627,10 @@ def run_append(  # noqa: PLR0912, PLR0915
     ret._df = pd.concat([ret._df, *to_join_dfs], axis="columns").sort_index()
     ret._time_points = TimePoints(ret._df.index.values)
     ret._df.index = ret._time_points.to_index()
-    ret._meta = pd.MultiIndex.from_frame(
-        pd.concat([ret._meta.to_frame(), *to_join_metas]).astype("category")
-    )
+    if not all(m.empty for m in to_join_metas):
+        ret._meta = pd.MultiIndex.from_frame(
+            pd.concat([ret._meta.to_frame(), *to_join_metas]).astype("category")
+        )
 
     if ret._duplicated_meta():
         if overlapping_times and duplicate_msg:

diff --git a/tests/integration/test_plotting_integration.py b/tests/integration/test_plotting_integration.py
@@ -162,7 +162,7 @@ def test_plumeplot_pre_calculated_no_plume_for_one_no_median_for_other_different
             style_var="climate_model",
         )
 
-    assert len(record) == 3
+    assert len(record) == 3, record
     assert (
         record[0].message.args[0]
         == "Quantile 0.05 not available for a_scenario a_model"
@@ -202,19 +202,12 @@ def test_plumeplot_non_unique_lines(plumeplot_scmrun):
 
     error_msg = re.escape(
         "More than one timeseries for "
-        "quantile: {}, "
-        "scenario: {}, "
-        "variable: {}.\n"
+        f"quantile: {quantile}, "
+        f"scenario: {scenario}, "
+        f"variable: {variable}.\n"
         "Please process your data to create unique quantile timeseries "
         "before calling :meth:`plumeplot`.\n"
-        "Found: {}".format(
-            quantile,
-            scenario,
-            variable,
-            summary_stats.filter(
-                quantile=quantile, scenario=scenario, variable=variable
-            ),
-        )
+        f"Found: {summary_stats.filter(quantile=quantile, scenario=scenario, variable=variable)}"
     )
     with pytest.raises(ValueError, match=error_msg):
         summary_stats.plumeplot(pre_calculated=True)

diff --git a/tests/unit/test_plotting.py b/tests/unit/test_plotting.py
@@ -74,7 +74,12 @@ def test_lineplot(mock_long_data, mock_seaborn_lineplot, scm_run):
     mock_long_data.assert_called_with(time_axis="year")
 
     mock_seaborn_lineplot.assert_called_with(
-        ci="sd", data=trv, estimator=np.median, hue="scenario", x="time", y="value"
+        errorbar="sd",
+        data=trv,
+        estimator=np.median,
+        hue="scenario",
+        x="time",
+        y="value",
     )
 
 
@@ -85,7 +90,7 @@ def test_lineplot_kwargs(mock_long_data, mock_seaborn_lineplot, scm_run):
         "x": "x",
         "y": "y",
         "hue": "hue",
-        "ci": "ci",
+        "errorbar": "errorbar",
         "estimator": "estimator",
     }
     trv = "test long_data return value"

diff --git a/tests/unit/test_pyam_compat.py b/tests/unit/test_pyam_compat.py
@@ -1,5 +1,4 @@
 import re
-from unittest import mock
 
 import pandas as pd
 import pytest
@@ -28,9 +27,3 @@ def test_to_int_value_error(test_iam_df):
 
     with pytest.raises(ValueError, match=error_msg):
         LongDatetimeIamDataFrame(idf)
-
-
-@mock.patch("scmdata.run.LongDatetimeIamDataFrame", None)
-def test_pyam_missing(scm_run):
-    with pytest.raises(ImportError):
-        scm_run.to_iamdataframe()
diff --git a/tests/unit/test_run.py b/tests/unit/test_run.py
@@ -2031,7 +2031,7 @@ def test_append_duplicate_times(test_append_scm_runs, duplicate_msg):
             "Duplicate time points detected, the output will be the average of "
             "the duplicates.  Set `duplicate_msg=False` to silence this message."
         )
-        assert len(mock_warn_taking_average) == 1
+        assert len(mock_warn_taking_average) == 1, mock_warn_taking_average
         assert str(mock_warn_taking_average[0].message) == warn_msg
     else:
         assert not mock_warn_taking_average
@@ -2050,7 +2050,7 @@ def test_append_doesnt_warn_if_continuous_times(test_append_scm_runs):
     with warnings.catch_warnings(record=True) as mock_warn_taking_average:
         base.append(other)
 
-    assert len(mock_warn_taking_average) == 0
+    assert len(mock_warn_taking_average) == 0, mock_warn_taking_average
 
 
 @pytest.mark.filterwarnings("ignore::DeprecationWarning")
@@ -3571,7 +3571,7 @@ def test_lineplot_time_axis(scm_run, time_axis, mod_func):
         x="time",
         y="value",
         estimator=np.median,
-        ci="sd",
+        errorbar="sd",
         hue="scenario",
         other_kwarg="value",
         data=mock_return,