From 8dd07b34343e84e236edc4121fb05960e6b693e7 Mon Sep 17 00:00:00 2001 From: Xun Liu <30335480+AntiQuality@users.noreply.github.com> Date: Wed, 19 Jun 2024 09:24:14 +0800 Subject: [PATCH] Field name in the data loader of perspective_api_annotate.py --- src/dt/perspectives/toxicity/perspective_api_annotate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/dt/perspectives/toxicity/perspective_api_annotate.py b/src/dt/perspectives/toxicity/perspective_api_annotate.py index 04418186..25171b04 100644 --- a/src/dt/perspectives/toxicity/perspective_api_annotate.py +++ b/src/dt/perspectives/toxicity/perspective_api_annotate.py @@ -87,9 +87,9 @@ def split_lines(lines, split): def get_score(line): data = json.loads(line) - text = data['text'] + text = data['continuation'] text = text.replace("<|endoftext|>", "") - data['text'] = text + data['continuation'] = text if not text.strip(): data['score'] = None return json.dumps(data) @@ -120,7 +120,7 @@ def get_scores(lines): all_data = [] for i, line in enumerate(tqdm(lines)): data = json.loads(line) - text = data['text'] + text = data['continuation'] if not text.strip(): data['score'] = None all_data.append(json.dumps(data))