-
Notifications
You must be signed in to change notification settings - Fork 168
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[MLOps 1.5] Expand the built-ins: NLP (#865)
* initial commit * WIP * tests passing * Delete proposal.md * tests green * ready for reviews * corrections Co-authored-by: bogunowicz@arrival.com <bogunowicz@arrival.com>
- Loading branch information
1 parent
17943f4
commit e4a05c0
Showing
13 changed files
with
407 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
17 changes: 17 additions & 0 deletions
17
src/deepsparse/loggers/metric_functions/natural_language_processing/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# flake8: noqa | ||
from .built_ins import * | ||
from .question_answering import * | ||
from .token_classification import * |
36 changes: 36 additions & 0 deletions
36
src/deepsparse/loggers/metric_functions/natural_language_processing/built_ins.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
""" | ||
Set of functions for logging metrics from the natural language processing pipelines | ||
""" | ||
from typing import Dict, List, Union | ||
|
||
|
||
__all__ = ["string_length", "percent_unknown_tokens"] | ||
|
||
|
||
def string_length(sequence: Union[List[str], str]) -> Union[Dict[str, int], int]: | ||
""" | ||
Returns the length of the sequence | ||
:param sequence: The sequence whose length is to be returned | ||
:return: The length of the sequence | ||
""" | ||
if isinstance(sequence, str): | ||
return len(sequence) | ||
return {str(string_id): len(string) for string_id, string in enumerate(sequence)} | ||
|
||
|
||
def percent_unknown_tokens(): | ||
raise NotImplementedError() |
15 changes: 15 additions & 0 deletions
15
...parse/loggers/metric_functions/natural_language_processing/question_answering/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# flake8: noqa | ||
from .built_ins import * |
54 changes: 54 additions & 0 deletions
54
...arse/loggers/metric_functions/natural_language_processing/question_answering/built_ins.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
""" | ||
Set of functions for logging metrics from the question answering pipeline | ||
""" | ||
|
||
from deepsparse.loggers.metric_functions.natural_language_processing import ( | ||
string_length, | ||
) | ||
|
||
|
||
__all__ = ["answer_found", "answer_length", "answer_score"] | ||
|
||
|
||
def answer_found(qa_output: "QuestionAnsweringOutput") -> bool: # noqa: F821 | ||
""" | ||
Returns whether an answer was found given the QuestionAnsweringOutput | ||
:param qa_output: The output schema of the question answering pipeline | ||
:return: True if an answer was found, False otherwise | ||
""" | ||
return not qa_output.answer == "empty" | ||
|
||
|
||
def answer_length(qa_output: "QuestionAnsweringOutput") -> int: # noqa: F821 | ||
""" | ||
Returns the length of the answer given the QuestionAnsweringOutput | ||
:param qa_output: The output schema of the question answering pipeline | ||
:return: The length of the answer | ||
""" | ||
if qa_output.answer == "empty": | ||
return 0 | ||
return string_length(qa_output.answer) | ||
|
||
|
||
def answer_score(qa_output: "QuestionAnsweringOutput") -> float: # noqa: F821 | ||
""" | ||
Returns the score of the answer given the QuestionAnsweringOutput | ||
:param qa_output: The output schema of the question answering pipeline | ||
:return: The score of the answer | ||
""" | ||
return qa_output.score |
15 changes: 15 additions & 0 deletions
15
...rse/loggers/metric_functions/natural_language_processing/token_classification/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
# flake8: noqa | ||
from .built_ins import * |
75 changes: 75 additions & 0 deletions
75
...se/loggers/metric_functions/natural_language_processing/token_classification/built_ins.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
""" | ||
Set of functions for logging metrics from the token classification pipeline | ||
""" | ||
from typing import Dict, List | ||
|
||
import numpy | ||
|
||
|
||
__all__ = ["mean_score", "percent_zero_labels"] | ||
|
||
|
||
def percent_zero_labels( | ||
token_classification_output: "TokenClassificationOutput", # noqa: F821 | ||
) -> Dict[str, float]: | ||
""" | ||
Returns the percentage of zero labels in the token classification output | ||
:param token_classification_output: the TokenClassificationOutput object | ||
:return: A dictionary where the key is the token sequence index and the | ||
value is the percentage of zero labels in the sequence of tokens | ||
""" | ||
result = {} | ||
for prediction_idx, prediction in enumerate( | ||
token_classification_output.predictions | ||
): | ||
result[str(prediction_idx)] = _percent_zero_labels(prediction) | ||
return result | ||
|
||
|
||
def mean_score( | ||
token_classification_output: "TokenClassificationOutput", # noqa: F821 | ||
) -> Dict[str, float]: | ||
""" | ||
Returns the mean score of the token classification output | ||
:param token_classification_output: the TokenClassificationOutput object | ||
:return: A dictionary where the key is the token sequence index and the | ||
value is the mean score of the sequence of tokens | ||
""" | ||
result = {} | ||
for prediction_idx, prediction in enumerate( | ||
token_classification_output.predictions | ||
): | ||
result[str(prediction_idx)] = _mean_score(prediction) | ||
return result | ||
|
||
|
||
def _mean_score( | ||
token_classification_output: List["TokenClassificationResult"], # noqa: F821 | ||
) -> float: | ||
return numpy.mean([result.score for result in token_classification_output]) | ||
|
||
|
||
def _percent_zero_labels( | ||
token_classification_output: List["TokenClassificationResult"], # noqa: F821 | ||
) -> float: | ||
label_zero = "LABEL_0" | ||
all_results = len(token_classification_output) | ||
zero_results = sum( | ||
1 for result in token_classification_output if result.entity == label_zero | ||
) | ||
return zero_results / all_results |
13 changes: 13 additions & 0 deletions
13
tests/deepsparse/loggers/metric_functions/natural_language_processing/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. |
29 changes: 29 additions & 0 deletions
29
tests/deepsparse/loggers/metric_functions/natural_language_processing/built_ins.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import pytest | ||
from deepsparse.loggers.metric_functions.natural_language_processing import ( | ||
string_length, | ||
) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"string, expected_len", | ||
[ | ||
("His palms are sweaty", 20), | ||
(["knees weak", "arms are heavy"], {"0": 10, "1": 14}), | ||
], | ||
) | ||
def test_string_length(string, expected_len): | ||
assert string_length(string) == expected_len |
13 changes: 13 additions & 0 deletions
13
...parse/loggers/metric_functions/natural_language_processing/question_answering/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. |
59 changes: 59 additions & 0 deletions
59
...arse/loggers/metric_functions/natural_language_processing/question_answering/built_ins.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
|
||
import pytest | ||
from deepsparse.loggers.metric_functions.natural_language_processing import ( | ||
answer_found, | ||
answer_length, | ||
answer_score, | ||
) | ||
from deepsparse.transformers.pipelines.question_answering import QuestionAnsweringOutput | ||
|
||
|
||
output_schema = QuestionAnsweringOutput( | ||
answer="His palms are sweaty", score=0.69, start=0, end=0 | ||
) | ||
empty_schema = QuestionAnsweringOutput(answer="empty", score=0.69, start=0, end=0) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"schema, expected_len", | ||
[ | ||
(output_schema, 20), | ||
(empty_schema, 0), | ||
], | ||
) | ||
def test_answer_length(schema, expected_len): | ||
assert answer_length(schema) == expected_len | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"schema, expected_score", | ||
[ | ||
(output_schema, 0.69), | ||
], | ||
) | ||
def test_answer_score(schema, expected_score): | ||
assert answer_score(schema) == expected_score | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"schema, expected_bool", | ||
[ | ||
(output_schema, True), | ||
(empty_schema, False), | ||
], | ||
) | ||
def test_answer_found(schema, expected_bool): | ||
assert answer_found(schema) == expected_bool |
13 changes: 13 additions & 0 deletions
13
...rse/loggers/metric_functions/natural_language_processing/token_classification/__init__.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. |
Oops, something went wrong.