Skip to content

Commit

Permalink
Add QAT support to more models (#29)
Browse files Browse the repository at this point in the history
* first version of QDQ monkey patching

* add Albert, Electra and Distilbert QAT support

* add QDQDeberta V1

* fix distilbert

* add ast patch
add quant onnx export

* simplify quantization process

* fix qdq deberta

* quantization refactoring

* add documentation
add quantization tests
add deberta v2

* add quant of layernorm
refactor ast modif
add tests

* add operator name in quantizer name
update notebook

* update notebook

* update notebook
  • Loading branch information
pommedeterresautee authored Dec 28, 2021
1 parent 2b369c1 commit 404c5ee
Show file tree
Hide file tree
Showing 19 changed files with 1,572 additions and 2,883 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* [🐍 TensorRT usage in Python script](#tensorrt-usage-in-python-script)
* [⏱ benchmarks](#benchmarks)
* [🤗 end to end reproduction of Infinity Hugging Face demo](./demo/README.md) (to replay [Medium article](https://towardsdatascience.com/hugging-face-transformer-inference-under-1-millisecond-latency-e1be0057a51c?source=friends_link&sk=cd880e05c501c7880f2b9454830b8915))
* [🏎️ end to end GPU quantization tutorial and many benchmarks (ONNX Runtime, TensorRT, vanilla Pytorch, etc.)](./demo/quantization_end_to_end.ipynb)
* [🏎️ end to end GPU quantization tutorial and many benchmarks (ONNX Runtime, TensorRT, vanilla Pytorch, etc.)](demo/quantization/quantization_end_to_end.ipynb)

#### Why this tool?

Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.2.1
0.3.0

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions src/transformer_deploy/QDQModels/QDQAlbert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright 2021, Lefebvre Sarrut Services
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from transformer_deploy.QDQModels.ast_module_patch import PatchModule


qdq_albert_mapping: PatchModule = PatchModule(
module="transformers.models.albert.modeling_albert",
)
20 changes: 20 additions & 0 deletions src/transformer_deploy/QDQModels/QDQBert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright 2021, Lefebvre Sarrut Services
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from transformer_deploy.QDQModels.ast_module_patch import PatchModule


qdq_bert_mapping: PatchModule = PatchModule(
module="transformers.models.bert.modeling_bert",
)
71 changes: 71 additions & 0 deletions src/transformer_deploy/QDQModels/QDQDeberta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright 2021, Lefebvre Sarrut Services
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import torch

from transformer_deploy.QDQModels.ast_module_patch import PatchModule


# in class DebertaEncoder(nn.Module):
def get_attention_mask(self, attention_mask):
if attention_mask.dim() <= 2:
extended_attention_mask = attention_mask.unsqueeze(1).unsqueeze(2)
attention_mask = extended_attention_mask * extended_attention_mask.squeeze(-2).unsqueeze(-1)
# unecessary conversion, byte == unsigned integer -> not supported by TensorRT
# attention_mask = attention_mask.byte()
elif attention_mask.dim() == 3:
attention_mask = attention_mask.unsqueeze(1)

return attention_mask


# in class XSoftmax(torch.autograd.Function):
# @staticmethod
def symbolic(g, self, mask, dim):
import torch.onnx.symbolic_helper as sym_help
from torch.onnx.symbolic_opset9 import masked_fill, softmax

mask_cast_value = g.op("Cast", mask, to_i=sym_help.cast_pytorch_to_onnx["Long"])
# r_mask = g.op("Sub", g.op("Constant", value_t=torch.tensor(1, dtype=torch.int64)), mask_cast_value)
# replace Byte by Char to get signed numbers
r_mask = g.op(
"Cast",
g.op("Sub", g.op("Constant", value_t=torch.tensor(1, dtype=torch.int64)), mask_cast_value),
to_i=sym_help.cast_pytorch_to_onnx["Char"],
)
output = masked_fill(g, self, r_mask, g.op("Constant", value_t=torch.tensor(float("-inf"))))
output = softmax(g, output, dim)
return masked_fill(g, output, r_mask, g.op("Constant", value_t=torch.tensor(0, dtype=torch.int8)))


qdq_deberta_mapping: PatchModule = PatchModule(
module="transformers.models.deberta.modeling_deberta",
monkey_patch={
"XSoftmax.symbolic": (symbolic, "symbolic"),
"DebertaEncoder.get_attention_mask": (get_attention_mask, "get_attention_mask"),
},
)


def toto():
print("1")


qdq_deberta_v2_mapping: PatchModule = PatchModule(
module="transformers.models.deberta_v2.modeling_deberta_v2",
monkey_patch={
"XSoftmax.symbolic": (toto, "toto"),
"DebertaV2Encoder.get_attention_mask": (get_attention_mask, "get_attention_mask"),
},
)
20 changes: 20 additions & 0 deletions src/transformer_deploy/QDQModels/QDQDistilbert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Copyright 2021, Lefebvre Sarrut Services
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from transformer_deploy.QDQModels.ast_module_patch import PatchModule


qdq_distilbert_mapping: PatchModule = PatchModule(
module="transformers.models.distilbert.modeling_distilbert",
)
21 changes: 21 additions & 0 deletions src/transformer_deploy/QDQModels/QDQElectra.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# Copyright 2021, Lefebvre Sarrut Services
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from transformer_deploy.QDQModels.ast_module_patch import PatchModule


qdq_electra_mapping: PatchModule = PatchModule(
module="transformers.models.electra.modeling_electra",
)
Loading

0 comments on commit 404c5ee

Please sign in to comment.