Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MLOps 1.5] Expand the built-ins: NLP #865

Merged
merged 12 commits into from
Jan 20, 2023
1 change: 1 addition & 0 deletions src/deepsparse/loggers/metric_functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
# flake8: noqa
from .built_ins import *
from .computer_vision import *
from .natural_language_processing import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# flake8: noqa
from .built_ins import *
from .question_answering import *
from .token_classification import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Set of functions for logging metrics from the natural language processing pipelines
"""
from typing import Dict, List, Union


__all__ = ["string_length", "percent_unknown_tokens"]


def string_length(sequence: Union[List[str], str]) -> Union[Dict[str, int], int]:
"""
Returns the length of the sequence

:param sequence: The sequence whose length is to be returned
:return: The length of the sequence
"""
if isinstance(sequence, str):
return len(sequence)
return {str(string_id): len(string) for string_id, string in enumerate(sequence)}


def percent_unknown_tokens():
raise NotImplementedError()
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# flake8: noqa
from .built_ins import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Set of functions for logging metrics from the question answering pipeline
"""

from deepsparse.loggers.metric_functions.natural_language_processing import (
string_length,
)


__all__ = ["answer_found", "answer_length", "answer_score"]


def answer_found(qa_output: "QuestionAnsweringOutput") -> bool: # noqa: F821
"""
Returns whether an answer was found given the QuestionAnsweringOutput
:param qa_output: The output schema of the question answering pipeline
:return: True if an answer was found, False otherwise
"""
return not qa_output.answer == "empty"


def answer_length(qa_output: "QuestionAnsweringOutput") -> int: # noqa: F821
"""
Returns the length of the answer given the QuestionAnsweringOutput

:param qa_output: The output schema of the question answering pipeline
:return: The length of the answer
"""
if qa_output.answer == "empty":
return 0
return string_length(qa_output.answer)


def answer_score(qa_output: "QuestionAnsweringOutput") -> float: # noqa: F821
"""
Returns the score of the answer given the QuestionAnsweringOutput

:param qa_output: The output schema of the question answering pipeline
:return: The score of the answer
"""
return qa_output.score
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# flake8: noqa
from .built_ins import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Set of functions for logging metrics from the token classification pipeline
"""
from typing import Dict, List

import numpy


__all__ = ["mean_score", "percent_zero_labels"]


def percent_zero_labels(
token_classification_output: "TokenClassificationOutput", # noqa: F821
) -> Dict[str, float]:
"""
Returns the percentage of zero labels in the token classification output

:param token_classification_output: the TokenClassificationOutput object
:return: A dictionary where the key is the token sequence index and the
value is the percentage of zero labels in the sequence of tokens
"""
result = {}
for prediction_idx, prediction in enumerate(
dbogunowicz marked this conversation as resolved.
Show resolved Hide resolved
token_classification_output.predictions
):
result[str(prediction_idx)] = _percent_zero_labels(prediction)
return result


def mean_score(
token_classification_output: "TokenClassificationOutput", # noqa: F821
) -> Dict[str, float]:
"""
Returns the mean score of the token classification output

:param token_classification_output: the TokenClassificationOutput object
:return: A dictionary where the key is the token sequence index and the
value is the mean score of the sequence of tokens
"""
result = {}
for prediction_idx, prediction in enumerate(
token_classification_output.predictions
):
result[str(prediction_idx)] = _mean_score(prediction)
return result


def _mean_score(
token_classification_output: List["TokenClassificationResult"], # noqa: F821
) -> float:
return numpy.mean([result.score for result in token_classification_output])


def _percent_zero_labels(
token_classification_output: List["TokenClassificationResult"], # noqa: F821
) -> float:
label_zero = "LABEL_0"
all_results = len(token_classification_output)
zero_results = sum(
1 for result in token_classification_output if result.entity == label_zero
)
return zero_results / all_results
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from deepsparse.loggers.metric_functions.natural_language_processing import (
string_length,
)


@pytest.mark.parametrize(
"string, expected_len",
[
("His palms are sweaty", 20),
dbogunowicz marked this conversation as resolved.
Show resolved Hide resolved
(["knees weak", "arms are heavy"], {"0": 10, "1": 14}),
],
)
def test_string_length(string, expected_len):
assert string_length(string) == expected_len
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from deepsparse.loggers.metric_functions.natural_language_processing import (
answer_found,
answer_length,
answer_score,
)
from deepsparse.transformers.pipelines.question_answering import QuestionAnsweringOutput


output_schema = QuestionAnsweringOutput(
answer="His palms are sweaty", score=0.69, start=0, end=0
)
empty_schema = QuestionAnsweringOutput(answer="empty", score=0.69, start=0, end=0)


@pytest.mark.parametrize(
"schema, expected_len",
[
(output_schema, 20),
(empty_schema, 0),
],
)
def test_answer_length(schema, expected_len):
assert answer_length(schema) == expected_len


@pytest.mark.parametrize(
"schema, expected_score",
[
(output_schema, 0.69),
],
)
def test_answer_score(schema, expected_score):
assert answer_score(schema) == expected_score


@pytest.mark.parametrize(
"schema, expected_bool",
[
(output_schema, True),
(empty_schema, False),
],
)
def test_answer_found(schema, expected_bool):
assert answer_found(schema) == expected_bool
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading