Skip to content

Commit

Permalink
Changed de-tokenisation to handle non-string values.
Browse files Browse the repository at this point in the history
  • Loading branch information
Veonms authored Nov 7, 2023
1 parent 7236ff6 commit 2bbdf40
Showing 1 changed file with 8 additions and 2 deletions.
10 changes: 8 additions & 2 deletions superset/aric_detokeniser.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,16 @@


async def detokenise_post_process(df: DataFrame) -> DataFrame:
df_copy = df.copy()

# Convert all values in the copied DataFrame to strings
df_copy = df_copy.astype(str)

filtered_tokens = set()

for col_name in df.columns:
for col_name in df_copy.columns:
filtered_tokens.update(
df[col_name].loc[df[col_name].str.startswith('t:', na=False)])
df_copy[col_name].loc[df_copy[col_name].str.startswith('t:', na=False)])

detokenised_values = session.post(config['DETOKENISE_POST_URL'],
data=json.dumps({"id": list(filtered_tokens)})).result()
Expand All @@ -55,3 +60,4 @@ async def detokenise_post_process(df: DataFrame) -> DataFrame:
df[col_name] = df[col_name].map(lambda x: result_dict.get(x, x))

return df

0 comments on commit 2bbdf40

Please sign in to comment.