From dfbc9fb35c42b340521a40134666aefef84c4f44 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Thu, 28 Dec 2023 18:02:10 +0100 Subject: [PATCH 1/8] Workaround new pandas behaviour --- src/scmdata/run.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/scmdata/run.py b/src/scmdata/run.py index d3a98341..84c5aa5c 100644 --- a/src/scmdata/run.py +++ b/src/scmdata/run.py @@ -2617,9 +2617,10 @@ def run_append( # noqa: PLR0912, PLR0915 ret._df = pd.concat([ret._df, *to_join_dfs], axis="columns").sort_index() ret._time_points = TimePoints(ret._df.index.values) ret._df.index = ret._time_points.to_index() - ret._meta = pd.MultiIndex.from_frame( - pd.concat([ret._meta.to_frame(), *to_join_metas]).astype("category") - ) + if not all(m.empty for m in to_join_metas): + ret._meta = pd.MultiIndex.from_frame( + pd.concat([ret._meta.to_frame(), *to_join_metas]).astype("category") + ) if ret._duplicated_meta(): if overlapping_times and duplicate_msg: From b22baa5f053bc50e049f069de59b99288460c022 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Mon, 22 Jan 2024 17:01:35 +0100 Subject: [PATCH 2/8] Remove seaborn misuse too --- docs/source/notebooks/plotting-with-seaborn.py | 14 +------------- src/scmdata/plotting.py | 2 +- src/scmdata/run.py | 8 +++++++- 3 files changed, 9 insertions(+), 15 deletions(-) diff --git a/docs/source/notebooks/plotting-with-seaborn.py b/docs/source/notebooks/plotting-with-seaborn.py index 7fe49f09..ecf4e9c2 100644 --- a/docs/source/notebooks/plotting-with-seaborn.py +++ b/docs/source/notebooks/plotting-with-seaborn.py @@ -6,7 +6,7 @@ # extension: .py # format_name: percent # format_version: '1.3' -# jupytext_version: 1.14.5 +# jupytext_version: 1.15.2 # kernelspec: # display_name: Python 3 (ipykernel) # language: python @@ -23,7 +23,6 @@ # more than the most basic plots. # %% - import matplotlib.pyplot as plt import seaborn as sns @@ -36,7 +35,6 @@ # For this notebook we use the RCMIP radiative forcings, available at rcmip.org. # %% - rcmip_db = ScmRun("rcmip-radiative-forcing-annual-means-v4-0-0.csv") rcmip_db.head() @@ -46,7 +44,6 @@ # For the most common plotting patterns, we provide a very simple `lineplot` method in `ScmRun`. # %% - out = rcmip_db.filter(variable="Effective Radiative Forcing").lineplot() out @@ -60,7 +57,6 @@ # specify the order to display the scenarios in. # %% - ax = plt.figure(figsize=(16, 9)).add_subplot(111) rcmip_db.filter(variable="Effective Radiative Forcing").lineplot( ax=ax, @@ -82,7 +78,6 @@ print(rcmip_db.lineplot.__doc__) # %% - fig, axes = plt.subplots(figsize=(16, 9), nrows=2, ncols=2) pdb = rcmip_db.filter(variable="Effective Radiative Forcing") @@ -113,11 +108,9 @@ # These same options can also be passed to the `timeseries` and `long_data` methods. # %% - rcmip_db.timeseries(time_axis="year-month") # %% - rcmip_db.long_data(time_axis="days since 1970-01-01") # %% [markdown] @@ -146,7 +139,6 @@ vars_to_plot # %% - seaborn_df = rcmip_db.filter(variable=vars_to_plot).long_data() seaborn_df.head() @@ -155,7 +147,6 @@ # [seaborn.relplot](https://seaborn.pydata.org/generated/seaborn.relplot.html). # %% - sns.relplot( data=seaborn_df, x="time", @@ -178,17 +169,14 @@ # different scenarios. In such a case we can reshape the data using pandas before using seaborn. # %% - ts = rcmip_db.filter(variable=vars_to_plot[:4]).timeseries() ts.head() # %% - ts_reshaped = ts.unstack("variable").stack("time").reset_index() ts_reshaped.head() # %% - sns.pairplot( ts_reshaped, hue="scenario", diff --git a/src/scmdata/plotting.py b/src/scmdata/plotting.py index 2b1cfe3a..a567541f 100644 --- a/src/scmdata/plotting.py +++ b/src/scmdata/plotting.py @@ -75,7 +75,7 @@ def lineplot(self, time_axis=None, **kwargs): # pragma: no cover if "scenario" in self.meta_attributes: kwargs.setdefault("hue", "scenario") - kwargs.setdefault("ci", "sd") + kwargs.setdefault("errorbar", "sd") kwargs.setdefault("estimator", np.median) ax = sns.lineplot(data=plt_df, **kwargs) diff --git a/src/scmdata/run.py b/src/scmdata/run.py index 84c5aa5c..11e76fc5 100644 --- a/src/scmdata/run.py +++ b/src/scmdata/run.py @@ -59,7 +59,6 @@ from .offsets import generate_range, to_offset from .ops import inject_ops_methods from .plotting import inject_plotting_methods -from .pyam_compat import IamDataFrame, LongDatetimeIamDataFrame from .time import _TARGET_DTYPE, TimePoints, TimeseriesConverter from .units import UnitConverter @@ -75,6 +74,8 @@ from scmdata.groupby import RunGroupBy + from .pyam_compat import LongDatetimeIamDataFrame + P = ParamSpec("P") @@ -513,6 +514,8 @@ def _init_timeseries( copy_data: bool = False, **kwargs: Any, ) -> None: + from .pyam_compat import IamDataFrame + if isinstance(data, np.ndarray): if columns is None: raise ValueError("`columns` argument is required") @@ -2366,6 +2369,9 @@ def to_iamdataframe(self) -> LongDatetimeIamDataFrame: # pragma: no cover ImportError If `pyam `_ is not installed """ + # Lazy load + from .pyam_compat import LongDatetimeIamDataFrame + if LongDatetimeIamDataFrame is None: raise ImportError( "pyam is not installed. Features involving IamDataFrame are unavailable" From 55836b5fdbcbd08ae844099208acd4bbf4516174 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Mon, 22 Jan 2024 17:07:55 +0100 Subject: [PATCH 3/8] CHANGELOG --- changelog/298.improvement.md | 3 +++ changelog/298.trivial.md | 1 + 2 files changed, 4 insertions(+) create mode 100644 changelog/298.improvement.md create mode 100644 changelog/298.trivial.md diff --git a/changelog/298.improvement.md b/changelog/298.improvement.md new file mode 100644 index 00000000..9b9e33cd --- /dev/null +++ b/changelog/298.improvement.md @@ -0,0 +1,3 @@ +Update to avoid hitting DeprecationWarning in pandas and seaborn + +This should help reduce so many warnings appearing when doing common operations. diff --git a/changelog/298.trivial.md b/changelog/298.trivial.md new file mode 100644 index 00000000..357be858 --- /dev/null +++ b/changelog/298.trivial.md @@ -0,0 +1 @@ +Lazy load pyam From 779804508215aee07645baeab2d7e65c761fd35f Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Mon, 22 Jan 2024 17:16:10 +0100 Subject: [PATCH 4/8] Fix up tests --- tests/unit/test_plotting.py | 9 +++++++-- tests/unit/test_pyam_compat.py | 7 ------- tests/unit/test_run.py | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/unit/test_plotting.py b/tests/unit/test_plotting.py index 01fd9056..a7aa2113 100644 --- a/tests/unit/test_plotting.py +++ b/tests/unit/test_plotting.py @@ -74,7 +74,12 @@ def test_lineplot(mock_long_data, mock_seaborn_lineplot, scm_run): mock_long_data.assert_called_with(time_axis="year") mock_seaborn_lineplot.assert_called_with( - ci="sd", data=trv, estimator=np.median, hue="scenario", x="time", y="value" + errorbar="sd", + data=trv, + estimator=np.median, + hue="scenario", + x="time", + y="value", ) @@ -85,7 +90,7 @@ def test_lineplot_kwargs(mock_long_data, mock_seaborn_lineplot, scm_run): "x": "x", "y": "y", "hue": "hue", - "ci": "ci", + "errorbar": "errorbar", "estimator": "estimator", } trv = "test long_data return value" diff --git a/tests/unit/test_pyam_compat.py b/tests/unit/test_pyam_compat.py index 65f12a3a..77fd6d67 100644 --- a/tests/unit/test_pyam_compat.py +++ b/tests/unit/test_pyam_compat.py @@ -1,5 +1,4 @@ import re -from unittest import mock import pandas as pd import pytest @@ -28,9 +27,3 @@ def test_to_int_value_error(test_iam_df): with pytest.raises(ValueError, match=error_msg): LongDatetimeIamDataFrame(idf) - - -@mock.patch("scmdata.run.LongDatetimeIamDataFrame", None) -def test_pyam_missing(scm_run): - with pytest.raises(ImportError): - scm_run.to_iamdataframe() diff --git a/tests/unit/test_run.py b/tests/unit/test_run.py index 98dd6bf6..5373c283 100644 --- a/tests/unit/test_run.py +++ b/tests/unit/test_run.py @@ -3571,7 +3571,7 @@ def test_lineplot_time_axis(scm_run, time_axis, mod_func): x="time", y="value", estimator=np.median, - ci="sd", + errorbar="sd", hue="scenario", other_kwarg="value", data=mock_return, From b546fb8517db4956c64dc0ba440938c00aad0463 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Mon, 22 Jan 2024 17:17:10 +0100 Subject: [PATCH 5/8] Add missing comment --- src/scmdata/run.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/scmdata/run.py b/src/scmdata/run.py index 11e76fc5..d6ad26ec 100644 --- a/src/scmdata/run.py +++ b/src/scmdata/run.py @@ -514,6 +514,7 @@ def _init_timeseries( copy_data: bool = False, **kwargs: Any, ) -> None: + # Lazy load from .pyam_compat import IamDataFrame if isinstance(data, np.ndarray): From fd48c827ed7a731e0be700e3bde44b224a6c72de Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Mon, 22 Jan 2024 17:28:12 +0100 Subject: [PATCH 6/8] More info on failure --- tests/integration/test_plotting_integration.py | 17 +++++------------ tests/unit/test_run.py | 4 ++-- 2 files changed, 7 insertions(+), 14 deletions(-) diff --git a/tests/integration/test_plotting_integration.py b/tests/integration/test_plotting_integration.py index 8098db6c..50bed478 100644 --- a/tests/integration/test_plotting_integration.py +++ b/tests/integration/test_plotting_integration.py @@ -162,7 +162,7 @@ def test_plumeplot_pre_calculated_no_plume_for_one_no_median_for_other_different style_var="climate_model", ) - assert len(record) == 3 + assert len(record) == 3, record assert ( record[0].message.args[0] == "Quantile 0.05 not available for a_scenario a_model" @@ -202,19 +202,12 @@ def test_plumeplot_non_unique_lines(plumeplot_scmrun): error_msg = re.escape( "More than one timeseries for " - "quantile: {}, " - "scenario: {}, " - "variable: {}.\n" + f"quantile: {quantile}, " + f"scenario: {scenario}, " + f"variable: {variable}.\n" "Please process your data to create unique quantile timeseries " "before calling :meth:`plumeplot`.\n" - "Found: {}".format( - quantile, - scenario, - variable, - summary_stats.filter( - quantile=quantile, scenario=scenario, variable=variable - ), - ) + f"Found: {summary_stats.filter(quantile=quantile, scenario=scenario, variable=variable)}" ) with pytest.raises(ValueError, match=error_msg): summary_stats.plumeplot(pre_calculated=True) diff --git a/tests/unit/test_run.py b/tests/unit/test_run.py index 5373c283..083c99fd 100644 --- a/tests/unit/test_run.py +++ b/tests/unit/test_run.py @@ -2031,7 +2031,7 @@ def test_append_duplicate_times(test_append_scm_runs, duplicate_msg): "Duplicate time points detected, the output will be the average of " "the duplicates. Set `duplicate_msg=False` to silence this message." ) - assert len(mock_warn_taking_average) == 1 + assert len(mock_warn_taking_average) == 1, mock_warn_taking_average assert str(mock_warn_taking_average[0].message) == warn_msg else: assert not mock_warn_taking_average @@ -2050,7 +2050,7 @@ def test_append_doesnt_warn_if_continuous_times(test_append_scm_runs): with warnings.catch_warnings(record=True) as mock_warn_taking_average: base.append(other) - assert len(mock_warn_taking_average) == 0 + assert len(mock_warn_taking_average) == 0, mock_warn_taking_average @pytest.mark.filterwarnings("ignore::DeprecationWarning") From 8643ec32da8f86f78bf242dd69f1e3c7bfa14541 Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Tue, 23 Jan 2024 07:57:38 +0100 Subject: [PATCH 7/8] Fix yet another deprecation warning --- src/scmdata/run.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/scmdata/run.py b/src/scmdata/run.py index d6ad26ec..ef6affce 100644 --- a/src/scmdata/run.py +++ b/src/scmdata/run.py @@ -875,7 +875,7 @@ def timeseries( raise NonUniqueMetadataError(_meta) if time_axis is None: - columns = self._time_points.to_index() + columns = self._time_points.to_index().infer_objects() elif time_axis == "year": columns = self._time_points.years() elif time_axis == "year-month": @@ -906,8 +906,11 @@ def calc_seconds(x): if len(np.unique(columns)) != len(columns): raise ValueError(f"Ambiguous time values with time_axis = '{time_axis}'") - df.columns = pd.Index(columns, name="time") df.index = pd.MultiIndex.from_frame(_meta) + if isinstance(columns, pd.Index): + df.columns = columns + else: + df.columns = pd.Index(columns, name="time") if drop_all_nan_times: df = df.dropna(how="all", axis="columns") From ea5bcb57ab10c50797924cca48dd0c475fb3978c Mon Sep 17 00:00:00 2001 From: Zebedee Nicholls Date: Mon, 29 Jan 2024 07:51:50 +0100 Subject: [PATCH 8/8] Apply seaborn pin --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index aacd5af2..b0418d47 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ xarray = "*" nc-time-axis = { version = ">=1.2.0", optional = true } typing-extensions = "*" matplotlib = { version = "^3.7.1", optional = true } -seaborn = { version = "*", optional = true } +seaborn = { version = ">=0.12.0", optional = true } netCDF4 = { version = "*", optional = true } openpyxl = { version = "*", optional = true } xlrd = { version = "*", optional = true }