Skip to content

Commit

Permalink
[MLOps 1.5] Expand the built-ins: NLP (#865)
Browse files Browse the repository at this point in the history
* initial commit

* WIP

* tests passing

* Delete proposal.md

* tests green

* ready for reviews

* corrections

Co-authored-by: bogunowicz@arrival.com <bogunowicz@arrival.com>
  • Loading branch information
dbogunowicz and bogunowicz@arrival.com committed Jan 20, 2023
1 parent 17943f4 commit e4a05c0
Show file tree
Hide file tree
Showing 13 changed files with 407 additions and 0 deletions.
1 change: 1 addition & 0 deletions src/deepsparse/loggers/metric_functions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@
# flake8: noqa
from .built_ins import *
from .computer_vision import *
from .natural_language_processing import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# flake8: noqa
from .built_ins import *
from .question_answering import *
from .token_classification import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Set of functions for logging metrics from the natural language processing pipelines
"""
from typing import Dict, List, Union


__all__ = ["string_length", "percent_unknown_tokens"]


def string_length(sequence: Union[List[str], str]) -> Union[Dict[str, int], int]:
"""
Returns the length of the sequence
:param sequence: The sequence whose length is to be returned
:return: The length of the sequence
"""
if isinstance(sequence, str):
return len(sequence)
return {str(string_id): len(string) for string_id, string in enumerate(sequence)}


def percent_unknown_tokens():
raise NotImplementedError()
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# flake8: noqa
from .built_ins import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Set of functions for logging metrics from the question answering pipeline
"""

from deepsparse.loggers.metric_functions.natural_language_processing import (
string_length,
)


__all__ = ["answer_found", "answer_length", "answer_score"]


def answer_found(qa_output: "QuestionAnsweringOutput") -> bool: # noqa: F821
"""
Returns whether an answer was found given the QuestionAnsweringOutput
:param qa_output: The output schema of the question answering pipeline
:return: True if an answer was found, False otherwise
"""
return not qa_output.answer == "empty"


def answer_length(qa_output: "QuestionAnsweringOutput") -> int: # noqa: F821
"""
Returns the length of the answer given the QuestionAnsweringOutput
:param qa_output: The output schema of the question answering pipeline
:return: The length of the answer
"""
if qa_output.answer == "empty":
return 0
return string_length(qa_output.answer)


def answer_score(qa_output: "QuestionAnsweringOutput") -> float: # noqa: F821
"""
Returns the score of the answer given the QuestionAnsweringOutput
:param qa_output: The output schema of the question answering pipeline
:return: The score of the answer
"""
return qa_output.score
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# flake8: noqa
from .built_ins import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Set of functions for logging metrics from the token classification pipeline
"""
from typing import Dict, List

import numpy


__all__ = ["mean_score", "percent_zero_labels"]


def percent_zero_labels(
token_classification_output: "TokenClassificationOutput", # noqa: F821
) -> Dict[str, float]:
"""
Returns the percentage of zero labels in the token classification output
:param token_classification_output: the TokenClassificationOutput object
:return: A dictionary where the key is the token sequence index and the
value is the percentage of zero labels in the sequence of tokens
"""
result = {}
for prediction_idx, prediction in enumerate(
token_classification_output.predictions
):
result[str(prediction_idx)] = _percent_zero_labels(prediction)
return result


def mean_score(
token_classification_output: "TokenClassificationOutput", # noqa: F821
) -> Dict[str, float]:
"""
Returns the mean score of the token classification output
:param token_classification_output: the TokenClassificationOutput object
:return: A dictionary where the key is the token sequence index and the
value is the mean score of the sequence of tokens
"""
result = {}
for prediction_idx, prediction in enumerate(
token_classification_output.predictions
):
result[str(prediction_idx)] = _mean_score(prediction)
return result


def _mean_score(
token_classification_output: List["TokenClassificationResult"], # noqa: F821
) -> float:
return numpy.mean([result.score for result in token_classification_output])


def _percent_zero_labels(
token_classification_output: List["TokenClassificationResult"], # noqa: F821
) -> float:
label_zero = "LABEL_0"
all_results = len(token_classification_output)
zero_results = sum(
1 for result in token_classification_output if result.entity == label_zero
)
return zero_results / all_results
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from deepsparse.loggers.metric_functions.natural_language_processing import (
string_length,
)


@pytest.mark.parametrize(
"string, expected_len",
[
("His palms are sweaty", 20),
(["knees weak", "arms are heavy"], {"0": 10, "1": 14}),
],
)
def test_string_length(string, expected_len):
assert string_length(string) == expected_len
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from deepsparse.loggers.metric_functions.natural_language_processing import (
answer_found,
answer_length,
answer_score,
)
from deepsparse.transformers.pipelines.question_answering import QuestionAnsweringOutput


output_schema = QuestionAnsweringOutput(
answer="His palms are sweaty", score=0.69, start=0, end=0
)
empty_schema = QuestionAnsweringOutput(answer="empty", score=0.69, start=0, end=0)


@pytest.mark.parametrize(
"schema, expected_len",
[
(output_schema, 20),
(empty_schema, 0),
],
)
def test_answer_length(schema, expected_len):
assert answer_length(schema) == expected_len


@pytest.mark.parametrize(
"schema, expected_score",
[
(output_schema, 0.69),
],
)
def test_answer_score(schema, expected_score):
assert answer_score(schema) == expected_score


@pytest.mark.parametrize(
"schema, expected_bool",
[
(output_schema, True),
(empty_schema, False),
],
)
def test_answer_found(schema, expected_bool):
assert answer_found(schema) == expected_bool
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading

0 comments on commit e4a05c0

Please sign in to comment.