Skip to content

Commit

Permalink
chore(deps): bump pandas >=2.0 (apache#24705)
Browse files Browse the repository at this point in the history
Co-authored-by: EugeneTorap <evgenykrutpro@gmail.com>
  • Loading branch information
sebastianliebscher and EugeneTorap authored Jul 20, 2023
1 parent 05e724f commit 91e6f5c
Show file tree
Hide file tree
Showing 10 changed files with 33 additions and 34 deletions.
4 changes: 3 additions & 1 deletion requirements/base.txt
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ packaging==23.1
# deprecation
# limits
# marshmallow
pandas==1.5.3
pandas==2.0.3
# via apache-superset
paramiko==2.11.0
# via sshtunnel
Expand Down Expand Up @@ -288,6 +288,8 @@ typing-extensions==4.4.0
# apache-superset
# flask-limiter
# limits
tzdata==2023.3
# via pandas
urllib3==1.26.6
# via selenium
vine==5.0.0
Expand Down
2 changes: 0 additions & 2 deletions requirements/testing.txt
Original file line number Diff line number Diff line change
Expand Up @@ -152,8 +152,6 @@ tqdm==4.65.0
# prophet
trino==0.324.0
# via apache-superset
tzdata==2023.3
# via pytz-deprecation-shim
tzlocal==4.3
# via trino
websocket-client==1.5.1
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def get_git_sha() -> str:
"nh3>=0.2.11, <0.3",
"numpy==1.23.5",
"packaging",
"pandas>=1.5.3, <1.6",
"pandas>=2.0.3, <2.1",
"parsedatetime",
"pgsanity",
"polyline>=2.0.0, <3.0",
Expand Down
6 changes: 2 additions & 4 deletions superset/common/query_context_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,17 +138,15 @@ def get_df_payload(

if query_obj and cache_key and not cache.is_loaded:
try:
invalid_columns = [
if invalid_columns := [
col
for col in get_column_names_from_columns(query_obj.columns)
+ get_column_names_from_metrics(query_obj.metrics or [])
if (
col not in self._qc_datasource.column_names
and col != DTTM_ALIAS
)
]

if invalid_columns:
]:
raise QueryObjectValidationError(
_(
"Columns missing in dataset: %(invalid_columns)s",
Expand Down
2 changes: 1 addition & 1 deletion superset/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,7 +761,7 @@ class D3Format(TypedDict, total=False):
# Excel Options: key/value pairs that will be passed as argument to DataFrame.to_excel
# method.
# note: index option should not be overridden
EXCEL_EXPORT = {"encoding": "utf-8"}
EXCEL_EXPORT: dict[str, Any] = {}

# ---------------------------------------------------
# Time grain configurations
Expand Down
11 changes: 7 additions & 4 deletions superset/reports/notifications/slack.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from typing import Union

import backoff
import pandas as pd
from flask_babel import gettext as __
from slack_sdk import WebClient
from slack_sdk.errors import (
Expand Down Expand Up @@ -121,17 +122,19 @@ def _get_body(self) -> str:
# need to truncate the data
for i in range(len(df) - 1):
truncated_df = df[: i + 1].fillna("")
truncated_df = truncated_df.append(
{k: "..." for k in df.columns}, ignore_index=True
truncated_row = pd.Series({k: "..." for k in df.columns})
truncated_df = pd.concat(
[truncated_df, truncated_row.to_frame().T], ignore_index=True
)
tabulated = df.to_markdown()
table = f"```\n{tabulated}\n```\n\n(table was truncated)"
message = self._message_template(table)
if len(message) > MAXIMUM_MESSAGE_SIZE:
# Decrement i and build a message that is under the limit
truncated_df = df[:i].fillna("")
truncated_df = truncated_df.append(
{k: "..." for k in df.columns}, ignore_index=True
truncated_row = pd.Series({k: "..." for k in df.columns})
truncated_df = pd.concat(
[truncated_df, truncated_row.to_frame().T], ignore_index=True
)
tabulated = df.to_markdown()
table = (
Expand Down
2 changes: 0 additions & 2 deletions superset/views/database/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ def form_post(self, form: CsvToDatabaseForm) -> Response:
infer_datetime_format=form.infer_datetime_format.data,
iterator=True,
keep_default_na=not form.null_values.data,
mangle_dupe_cols=form.overwrite_duplicate.data,
usecols=form.use_cols.data if form.use_cols.data else None,
na_values=form.null_values.data if form.null_values.data else None,
nrows=form.nrows.data,
Expand Down Expand Up @@ -344,7 +343,6 @@ def form_post(self, form: ExcelToDatabaseForm) -> Response:
index_col=form.index_col.data,
io=form.excel_file.data,
keep_default_na=not form.null_values.data,
mangle_dupe_cols=form.mangle_dupe_cols.data,
na_values=form.null_values.data if form.null_values.data else None,
parse_dates=form.parse_dates.data,
skiprows=form.skiprows.data,
Expand Down
8 changes: 4 additions & 4 deletions superset/viz.py
Original file line number Diff line number Diff line change
Expand Up @@ -2636,7 +2636,7 @@ def levels_for(
for i in range(0, len(groups) + 1):
agg_df = df.groupby(groups[:i]) if i else df
levels[i] = (
agg_df.mean()
agg_df.mean(numeric_only=True)
if time_op == "agg_mean"
else agg_df.sum(numeric_only=True)
)
Expand All @@ -2661,7 +2661,7 @@ def levels_for_diff(
lambda a, b, fill_value: a / float(b) - 1,
],
}[time_op]
agg_df = df.groupby(DTTM_ALIAS).sum()
agg_df = df.groupby(DTTM_ALIAS).sum(numeric_only=True)
levels = {
0: pd.Series(
{
Expand All @@ -2671,7 +2671,7 @@ def levels_for_diff(
)
}
for i in range(1, len(groups) + 1):
agg_df = df.groupby([DTTM_ALIAS] + groups[:i]).sum()
agg_df = df.groupby([DTTM_ALIAS] + groups[:i]).sum(numeric_only=True)
levels[i] = pd.DataFrame(
{
m: func[0](agg_df[m][until], agg_df[m][since], fill_value=0)
Expand All @@ -2687,7 +2687,7 @@ def levels_for_time(
procs = {}
for i in range(0, len(groups) + 1):
self.form_data["groupby"] = groups[:i]
df_drop = df.drop(groups[i:], 1)
df_drop = df.drop(groups[i:], axis=1)
procs[i] = self.process_data(df_drop, aggregate=True)
self.form_data["groupby"] = groups
return procs
Expand Down
2 changes: 1 addition & 1 deletion tests/integration_tests/viz_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -627,7 +627,7 @@ def test_nest_procs_returns_hierarchy(self):
metrics = ["metric1", "metric2", "metric3"]
procs = {}
for i in range(0, 4):
df_drop = df.drop(groups[i:], 1)
df_drop = df.drop(groups[i:], axis=1)
pivot = df_drop.pivot_table(
index=DTTM_ALIAS, columns=groups[:i], values=metrics
)
Expand Down
28 changes: 14 additions & 14 deletions tests/unit_tests/pandas_postprocessing/test_rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,21 +149,21 @@ def test_rolling_after_pivot_with_single_metric():
sum_metric
country UK US
dttm
2019-01-01 5.0 6.0
2019-01-02 12.0 14.0
2019-01-01 5 6
2019-01-02 12 14
"""
flat_df = pp.flatten(rolling_df)
"""
dttm sum_metric, UK sum_metric, US
0 2019-01-01 5.0 6.0
1 2019-01-02 12.0 14.0
0 2019-01-01 5 6
1 2019-01-02 12 14
"""
assert flat_df.equals(
pd.DataFrame(
data={
"dttm": pd.to_datetime(["2019-01-01", "2019-01-02"]),
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "UK"]): [5.0, 12.0],
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "US"]): [6.0, 14.0],
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "UK"]): [5, 12],
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "US"]): [6, 14],
}
)
)
Expand Down Expand Up @@ -200,23 +200,23 @@ def test_rolling_after_pivot_with_multiple_metrics():
count_metric sum_metric
country UK US UK US
dttm
2019-01-01 1.0 2.0 5.0 6.0
2019-01-02 4.0 6.0 12.0 14.0
2019-01-01 1 2 5 6
2019-01-02 4 6 12 14
"""
flat_df = pp.flatten(rolling_df)
"""
dttm count_metric, UK count_metric, US sum_metric, UK sum_metric, US
0 2019-01-01 1.0 2.0 5.0 6.0
1 2019-01-02 4.0 6.0 12.0 14.0
0 2019-01-01 1 2 5 6
1 2019-01-02 4 6 12 14
"""
assert flat_df.equals(
pd.DataFrame(
data={
"dttm": pd.to_datetime(["2019-01-01", "2019-01-02"]),
FLAT_COLUMN_SEPARATOR.join(["count_metric", "UK"]): [1.0, 4.0],
FLAT_COLUMN_SEPARATOR.join(["count_metric", "US"]): [2.0, 6.0],
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "UK"]): [5.0, 12.0],
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "US"]): [6.0, 14.0],
FLAT_COLUMN_SEPARATOR.join(["count_metric", "UK"]): [1, 4],
FLAT_COLUMN_SEPARATOR.join(["count_metric", "US"]): [2, 6],
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "UK"]): [5, 12],
FLAT_COLUMN_SEPARATOR.join(["sum_metric", "US"]): [6, 14],
}
)
)

0 comments on commit 91e6f5c

Please sign in to comment.