Skip to content

Commit

Permalink
Merge pull request #118 from thunlp/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
zhougr18 authored Sep 18, 2020
2 parents 637e81a + 8f8cde3 commit a280a72
Show file tree
Hide file tree
Showing 35 changed files with 879 additions and 35 deletions.
25 changes: 25 additions & 0 deletions .github/workflows/deploy.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
name: publish to pypi
on:
push:
branches:
- master
jobs:
build-n-publish:
name: Build and publish
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@master
- uses: actions/setup-python@v2
with:
python-version: '3.7'
architecture: 'x64'
- name: Run build script
run: |
pip install twine --user
python setup.py sdist bdist_wheel
- name: Publish distribution 📦 to PyPI
if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags')
uses: pypa/gh-action-pypi-publish@master
with:
user: __token__
password: ${{ secrets.pypi_password }}
41 changes: 12 additions & 29 deletions OpenAttack/attackers/deepwordbug.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ def __init__(self, **kwargs):
:param string scoring: Scoring function used to compute word importance, ``["replaceone", "temporal", "tail", "combined"]``. **Default:** replaceone
:param string transformer: Transform function to modify a word, ``["homoglyph", "swap"]``. **Default:** homoglyph
:Package Requirements:
* torch
:Classifier Capacity: Probability
Black-box Generation of Adversarial Text Sequences to Evade Deep Learning Classifiers. Ji Gao, Jack Lanchantin, Mary Lou Soffa, Yanjun Qi. IEEE SPW 2018.
Expand All @@ -40,15 +38,14 @@ def __init__(self, **kwargs):
self.power = self.config["power"]

def __call__(self, clsf, x_orig, target=None):
import torch
"""
* **clsf** : **Classifier** .
* **x_orig** : Input sentence.
"""
y_orig = clsf.get_pred([x_orig])[0]
inputs = x_orig.strip().lower().split(" ")
losses = self.scorefunc(self.scoring, clsf, inputs, y_orig) # 每个词消失后的loss向量
sorted, indices = torch.sort(losses, descending=True)
indices = np.argsort(losses)

advinputs = inputs[:]
t = 0
Expand Down Expand Up @@ -90,46 +87,32 @@ def transform(self, type, word):

# scoring functions
def replaceone(self, clsf, inputs, y_orig):
import torch

losses = torch.zeros(len(inputs))
losses = np.zeros(len(inputs))
for i in range(len(inputs)):
tempinputs = inputs[:] # ##
tempinputs[i] = self.config['unk']
with torch.no_grad():
tempoutput = torch.from_numpy(clsf.get_prob([" ".join(tempinputs)])) # ##
softmax = torch.nn.Softmax(dim=1)
nll_lossed = -1 * torch.log(softmax(tempoutput))[0][y_orig].item()
losses[i] = nll_lossed # ##
tempoutput = clsf.get_prob([" ".join(tempinputs)])
losses[i] = 1 - tempoutput[0][y_orig]
return losses

def temporal(self, clsf, inputs, y_orig):
import torch
softmax = torch.nn.Softmax(dim=1)

losses1 = torch.zeros(len(inputs))
dloss = torch.zeros(len(inputs))
losses1 = np.zeros(len(inputs))
dloss = np.zeros(len(inputs))
for i in range(len(inputs)):
tempinputs = inputs[: i + 1]
with torch.no_grad():
tempoutput = torch.from_numpy(clsf.get_prob([self.config["processor"].detokenizer(tempinputs)]))
losses1[i] = -1 * torch.log(softmax(tempoutput))[0][y_orig].item()
print(self.config["processor"].detokenizer(tempinputs), losses1[i])
tempoutput = clsf.get_prob([self.config["processor"].detokenizer(tempinputs)])
losses1[i] = 1 - tempoutput[0][y_orig]
for i in range(1, len(inputs)):
dloss[i] = abs(losses1[i] - losses1[i - 1])
return dloss

def temporaltail(self, clsf, inputs, y_orig):
import torch
softmax = torch.nn.Softmax(dim=1)

losses1 = torch.zeros(len(inputs))
dloss = torch.zeros(len(inputs))
losses1 = np.zeros(len(inputs))
dloss = np.zeros(len(inputs))
for i in range(len(inputs)):
tempinputs = inputs[i:]
with torch.no_grad():
tempoutput = torch.from_numpy(clsf.get_prob([self.config["processor"].detokenizer(tempinputs)]))
losses1[i] = -1 * torch.log(softmax(tempoutput))[0][y_orig].item()
tempoutput = clsf.get_prob([self.config["processor"].detokenizer(tempinputs)])
losses1[i] = 1 - tempoutput[0][y_orig]
for i in range(1, len(inputs)):
dloss[i] = abs(losses1[i] - losses1[i - 1])
return dloss
Expand Down
25 changes: 25 additions & 0 deletions OpenAttack/data/data_ag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
:type: a tuple of three :py:class:`.Dataset` s, `(train, valid, test)`.
:Size: 31.0MB
AG News dataset which is used to train victim models.
"""
import pickle

NAME = "Dataset.AG"
DOWNLOAD = "https://thunlp.oss-cn-qingdao.aliyuncs.com/TAADToolbox/dataset/sst.pkl"


def LOAD(path):
from OpenAttack.utils import Dataset, DataInstance

def mapping(data):
return Dataset([
DataInstance(
x=it[0],
y=it[1]
) for it in data
], copy=False)

train, valid, test = pickle.load(open(path, "rb"))
return mapping(train), mapping(valid), mapping(test)
25 changes: 25 additions & 0 deletions OpenAttack/data/data_imdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""
:type: a tuple of three :py:class:`.Dataset` s, `(train, valid, test)`.
:Size: 56.2MB
IMDB dataset which is used to train victim models.
"""
import pickle

NAME = "Dataset.IMDB"
DOWNLOAD = "https://thunlp.oss-cn-qingdao.aliyuncs.com/TAADToolbox/dataset/imdb.pkl"


def LOAD(path):
from OpenAttack.utils import Dataset, DataInstance

def mapping(data):
return Dataset([
DataInstance(
x=it[0],
y=it[1]
) for it in data
], copy=False)

train, valid, test = pickle.load(open(path, "rb"))
return mapping(train), mapping(valid), mapping(test)
26 changes: 26 additions & 0 deletions OpenAttack/data/data_mnli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
:type: a tuple of three :py:class:`.Dataset` s, `(train, valid, test)`.
:Size: 77.373MB
MNLI dataset which is used to train victim models.
"""
import pickle

NAME = "Dataset.MNLI"
DOWNLOAD = "https://thunlp.oss-cn-qingdao.aliyuncs.com/TAADToolbox/dataset/mnli.pkl"


def LOAD(path):
from OpenAttack.utils import Dataset, DataInstance

def mapping(data):
return Dataset([
DataInstance(
x=it[0],
y=it[2],
meta= { "reference": it[1] }
) for it in data
], copy=False)

train, valid, test = pickle.load(open(path, "rb"))
return mapping(train), mapping(valid), mapping(test)
26 changes: 26 additions & 0 deletions OpenAttack/data/data_snli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
"""
:type: a tuple of three :py:class:`.Dataset` s, `(train, valid, test)`.
:Size: 72.596MB
SNLI dataset which is used to train victim models.
"""
import pickle

NAME = "Dataset.SNLI"
DOWNLOAD = "https://thunlp.oss-cn-qingdao.aliyuncs.com/TAADToolbox/dataset/snli.pkl"


def LOAD(path):
from OpenAttack.utils import Dataset, DataInstance

def mapping(data):
return Dataset([
DataInstance(
x=it[0],
y=it[2],
meta= { "reference": it[1] }
) for it in data
], copy=False)

train, valid, test = pickle.load(open(path, "rb"))
return mapping(train), mapping(valid), mapping(test)
2 changes: 1 addition & 1 deletion OpenAttack/data/nltk_perceptron_pos_tagger.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,5 @@

def LOAD(path):
ret = __import__("nltk").tag.PerceptronTagger(load=False)
ret.load(os.path.join(path, "averaged_perceptron_tagger.pickle"))
ret.load("file:" + os.path.join(path, "averaged_perceptron_tagger.pickle"))
return ret.tag
2 changes: 1 addition & 1 deletion OpenAttack/data/nltk_senttokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,5 +14,5 @@


def LOAD(path):
return __import__("nltk").data.load(os.path.join(path, "english.pickle")).tokenize
return __import__("nltk").data.load("file:" + os.path.join(path, "english.pickle")).tokenize

20 changes: 20 additions & 0 deletions OpenAttack/data/victim_albert_ag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
:type: OpenAttack.utils.AlbertClassifier
:Size: 788.697MB
:Package Requirements:
* transformers
* pytorch
Pretrained ALBERT model on AG-4 dataset. See :py:data:`Dataset.AG` for detail.
"""

from OpenAttack.utils import make_zip_downloader, AlbertClassifier

NAME = "Victim.ALBERT.AG"

URL = "https://thunlp.oss-cn-qingdao.aliyuncs.com/TAADToolbox/victim/albert_ag.zip"
DOWNLOAD = make_zip_downloader(URL)

def LOAD(path):
from OpenAttack import Classifier
return AlbertClassifier(path, 5)
20 changes: 20 additions & 0 deletions OpenAttack/data/victim_albert_imdb.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
:type: OpenAttack.utils.AlbertClassifier
:Size: 788.662MB
:Package Requirements:
* transformers
* pytorch
Pretrained ALBERT model on IMDB dataset. See :py:data:`Dataset.IMDB` for detail.
"""

from OpenAttack.utils import make_zip_downloader, AlbertClassifier

NAME = "Victim.ALBERT.IMDB"

URL = "https://thunlp.oss-cn-qingdao.aliyuncs.com/TAADToolbox/victim/albert_imdb.zip"
DOWNLOAD = make_zip_downloader(URL)

def LOAD(path):
from OpenAttack import Classifier
return AlbertClassifier(path, 2)
20 changes: 20 additions & 0 deletions OpenAttack/data/victim_albert_mnli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
:type: OpenAttack.utils.AlbertClassifier
:Size: 788.668MB
:Package Requirements:
* transformers
* pytorch
Pretrained ALBERT model on MNLI dataset. See :py:data:`Dataset.MNLI` for detail.
"""

from OpenAttack.utils import make_zip_downloader, AlbertClassifier

NAME = "Victim.ALBERT.MNLI"

URL = "https://thunlp.oss-cn-qingdao.aliyuncs.com/TAADToolbox/victim/albert_mnli.zip"
DOWNLOAD = make_zip_downloader(URL)

def LOAD(path):
from OpenAttack import Classifier
return AlbertClassifier(path, 2)
20 changes: 20 additions & 0 deletions OpenAttack/data/victim_albert_snli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
:type: OpenAttack.utils.AlbertClassifier
:Size: 788.672MB
:Package Requirements:
* transformers
* pytorch
Pretrained ALBERT model on SNLI dataset. See :py:data:`Dataset.SNLI` for detail.
"""

from OpenAttack.utils import make_zip_downloader, AlbertClassifier

NAME = "Victim.ALBERT.SNLI"

URL = "https://thunlp.oss-cn-qingdao.aliyuncs.com/TAADToolbox/victim/albert_snli.zip"
DOWNLOAD = make_zip_downloader(URL)

def LOAD(path):
from OpenAttack import Classifier
return AlbertClassifier(path, 3)
20 changes: 20 additions & 0 deletions OpenAttack/data/victim_albert_sst.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
:type: OpenAttack.utils.AlbertClassifier
:Size: 788.66MB
:Package Requirements:
* transformers
* pytorch
Pretrained ALBERT model on SST-2 dataset. See :py:data:`Dataset.SST` for detail.
"""

from OpenAttack.utils import make_zip_downloader, AlbertClassifier

NAME = "Victim.ALBERT.SST"

URL = "https://thunlp.oss-cn-qingdao.aliyuncs.com/TAADToolbox/victim/albert_sst.zip"
DOWNLOAD = make_zip_downloader(URL)

def LOAD(path):
from OpenAttack import Classifier
return AlbertClassifier(path, 2)
20 changes: 20 additions & 0 deletions OpenAttack/data/victim_bert_mnli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
:type: OpenAttack.utils.BertClassifier
:Size: 1.23GB
:Package Requirements:
* transformers
* pytorch
Pretrained BERT model on MNLI dataset. See :py:data:`Dataset.MNLI` for detail.
"""

from OpenAttack.utils import make_zip_downloader, BertClassifier

NAME = "Victim.BERT.MNLI"

URL = "https://thunlp.oss-cn-qingdao.aliyuncs.com/TAADToolbox/victim/bert_mnli.zip"
DOWNLOAD = make_zip_downloader(URL)

def LOAD(path):
from OpenAttack import Classifier
return BertClassifier(path, 2)
20 changes: 20 additions & 0 deletions OpenAttack/data/victim_bert_snli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""
:type: OpenAttack.utils.BertClassifier
:Size: 1.23GB
:Package Requirements:
* transformers
* pytorch
Pretrained BERT model on SNLI dataset. See :py:data:`Dataset.SNLI` for detail.
"""

from OpenAttack.utils import make_zip_downloader, BertClassifier

NAME = "Victim.BERT.SNLI"

URL = "https://thunlp.oss-cn-qingdao.aliyuncs.com/TAADToolbox/victim/bert_snli.zip"
DOWNLOAD = make_zip_downloader(URL)

def LOAD(path):
from OpenAttack import Classifier
return BertClassifier(path, 3)
Loading

0 comments on commit a280a72

Please sign in to comment.