Add benchmarking of Attack model for offensive language detection

alexandrainst · Aug 23, 2021 · 02745e5 · 02745e5
1 parent 96ed6b9
commit 02745e5
Show file tree

Hide file tree

Showing 2 changed files with 104 additions and 9 deletions.
diff --git a/docs/docs/tasks/hatespeech.md b/docs/docs/tasks/hatespeech.md
@@ -10,9 +10,10 @@ Here are definitions of the previous concepts:
  * hateful : targets a group or an individual with the intent to be harmful or to cause social chaos.
 
 
-| Model         | Train Data                      | License   | Trained by          | Tags      | DaNLP |
-|---------------|---------------------------------|-----------|---------------------|-----------|-------|
-| [BERT](#bert) | [DKHate](../datasets.md#dkhate) | CC BY 4.0 | Alexandra Instittut | OFF / NOT | ✔️    |
+| Model              | Train Data                      | License         | Trained by          | Tags      | DaNLP |
+|--------------------|---------------------------------|-----------------|---------------------|-----------|-------|
+| [BERT](#bert)      | [DKHate](../datasets.md#dkhate) | CC BY 4.0       | Alexandra Instittut | OFF / NOT | ✔️     |
+| [A&ttack](#attack) | Facebook comments               | CC BY-NC-SA 4.0 | Analyse & Tal       | OFF / NOT | ❌    |
 
 
 ### Use cases 
@@ -46,18 +47,24 @@ pred = offensive_model.predict(sentence)
 proba = offensive_model.predict_proba(sentence)
 ```
 
+### A&ttack (Analyse & Tal) {#attack}
+
+The A&ttack model detects whether a text is offensive or not. It has been developed by [Analyse & Tal](https://ogtal.dk/) and is based on the pretrained [Ælectra model](https://huggingface.co/Maltehb/-l-ctra-danish-electra-small-uncased). It has been trained on social media data (Facebook, 67,188 tokens). 
+See the [github repo](https://github.com/ogtal/A-ttack) for more details and the [report](https://strapi.ogtal.dk/uploads/966f1ebcfa9942d3aef338e9920611f4.pdf) of the project.
+
 
 ## 📈 Benchmarks
 
 See detailed scoring of the benchmarks in the [example](<https://github.com/alexandrainst/danlp/tree/master/examples>) folder.
 
-The benchmarks has been performed on the test part of the [DKHate](../datasets.md#dkhate) dataset.
+The benchmarking has been performed on the test part of the [DKHate](../datasets.md#dkhate) dataset.
 
-The scores presented here describe the performance of the models for the task of offensive language identification. 
+The scores presented here describe the performance (F1) of the models for the task of offensive language identification. 
 
-| Model | OFF  | NOT  | AVG F1 |
-|-------|------|------|--------|
-| BERT  | 61.9 | 95.4 | 78.7   |
+| Model         | OFF  | NOT  | AVG F1 |
+|---------------|------|------|--------|
+| BERT          | 61.9 | 95.4 | 78.7   |
+| A&ttack (A&T) | 34.2 | 91.4 | 62.8   |
 
 
 The evaluation script `hatespeech_benchmarks.py` can be found [here](https://github.com/alexandrainst/danlp/blob/master/examples/benchmarks/hatespeech_benchmarks.py).

diff --git a/examples/benchmarks/hatespeech_benchmarks.py b/examples/benchmarks/hatespeech_benchmarks.py
@@ -1,6 +1,6 @@
 from danlp.datasets import DKHate
 from danlp.models import load_bert_offensive_model
-import time
+import time, os
 from .utils import *
 
 ## Load the DKHate data
@@ -29,7 +29,95 @@ def benchmark_bert_mdl():
     print(f1_report(labels_true, preds, "BERT", "DKHate"))    
 
 
+def benchmark_attack_mdl():
+
+    import torch
+    from transformers import AutoTokenizer
+    #from ogtal_model import ElectraClassifier
+
+    from transformers import ElectraModel
+    import torch.nn.functional as F
+    import torch.nn as nn
+
+    import wget
+
+    class ElectraClassifier(nn.Module):
+
+        def __init__(self,pretrained_model_name,num_labels=2):
+            super(ElectraClassifier, self).__init__()
+            self.num_labels = num_labels
+            self.electra = ElectraModel.from_pretrained(pretrained_model_name)
+            self.dense = nn.Linear(self.electra.config.hidden_size, self.electra.config.hidden_size)
+            self.dropout = nn.Dropout(self.electra.config.hidden_dropout_prob)
+            self.out_proj = nn.Linear(self.electra.config.hidden_size, self.num_labels)
+
+        def classifier(self,sequence_output):
+            x = sequence_output[:, 0, :]
+            x = self.dropout(x)
+            x = F.gelu(self.dense(x))
+            x = self.dropout(x)
+            x = F.gelu(self.dense(x))
+            x = self.dropout(x)
+            x = F.gelu(self.dense(x))
+            x = self.dropout(x)
+            logits = self.out_proj(x)
+            return logits
+
+        def forward(self, input_ids=None,attention_mask=None):
+            discriminator_hidden_states = self.electra(input_ids=input_ids,attention_mask=attention_mask)
+            sequence_output = discriminator_hidden_states[0]
+            logits = self.classifier(sequence_output)
+            return logits
+
+
+    def make_prediction(text, tokzer, mdl):
+        tokenized_text = tokzer(
+            text,
+            truncation=True,
+            max_length=512,
+            padding='max_length',
+            return_attention_mask=True,
+            return_token_type_ids=False,
+            return_tensors='pt',
+        )
+        input_ids = tokenized_text['input_ids']
+        attention_masks = tokenized_text['attention_mask']
+        logits = mdl(input_ids,attention_masks)
+
+        _,preds = torch.max(logits, dim=1)
+        return(int(preds))
+
+    # load model
+    model_checkpoint = 'Maltehb/-l-ctra-danish-electra-small-cased'
+    tokenizer = AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)
+    model = ElectraClassifier(model_checkpoint,2)
+    mdir = 'examples/benchmarks'
+    model_path = os.path.join(mdir, 'pytorch_model.bin')
+    if not os.path.exists(model_path):
+        url = 'https://github.com/ogtal/A-ttack/blob/main/pytorch_model.bin'
+        print("Cannot find the model", model_path, "\nDownload the model at", url)
+        exit()
+    model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
+    model.eval()
+
+    labels = {0:'NOT', 1:'OFF'}
+
+    start = time.time()
+
+    preds = []
+    for i, sentence in enumerate(sentences):
+        pred = make_prediction(sentence, tokenizer, model)
+        preds.append(labels[pred])
+
+    print('OG TAL:')
+    print_speed_performance(start, num_sentences)
+
+    assert len(preds) == len(sentences)
+
+    print(f1_report(labels_true, preds, "OgTal", "DR Data"))   
+
 
 if __name__ == '__main__':
     benchmark_bert_mdl()
+    benchmark_attack_mdl()