Skip to content

Commit

Permalink
Added performance changes for pandas for the finding and replacement …
Browse files Browse the repository at this point in the history
…of tokens.
  • Loading branch information
Veonms authored Nov 7, 2023
1 parent d8bca19 commit 7236ff6
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions superset/aric_detokeniser.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,13 +40,18 @@


async def detokenise_post_process(df: DataFrame) -> DataFrame:
filtered_tokens = {value for col_name in df.columns for value in df[col_name] if
isinstance(value, str) and value.startswith('t:')}
filtered_tokens = set()

for col_name in df.columns:
filtered_tokens.update(
df[col_name].loc[df[col_name].str.startswith('t:', na=False)])

detokenised_values = session.post(config['DETOKENISE_POST_URL'],
data=json.dumps({"id": list(filtered_tokens)})).result().json()
data=json.dumps({"id": list(filtered_tokens)})).result()

result_dict = dict(zip(filtered_tokens, detokenised_values))
df.replace(result_dict, inplace=True)

for col_name in df.columns:
df[col_name] = df[col_name].map(lambda x: result_dict.get(x, x))

return df

0 comments on commit 7236ff6

Please sign in to comment.