neuralmagic · horheynm · Oct 2, 2023 · Sep 25, 2023 · Sep 25, 2023 · Sep 26, 2023
diff --git a/setup.py b/setup.py
@@ -298,7 +298,7 @@ def _setup_entry_points() -> Dict:
         "console_scripts": [
             f"deepsparse.transformers.run_inference={data_api_entrypoint}",
             f"deepsparse.transformers.eval_downstream={eval_downstream}",
-            "deepsparse.infer=deepsparse.transformers.infer:main",
+            "deepsparse.infer=deepsparse.transformers.inference.infer:main",
             "deepsparse.debug_analysis=deepsparse.debug_analysis:main",
             "deepsparse.analyze=deepsparse.analyze:main",
             "deepsparse.check_hardware=deepsparse.cpu:print_hardware_capability",

diff --git a/src/deepsparse/transformers/inference/__init__.py b/src/deepsparse/transformers/inference/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/src/deepsparse/transformers/infer.py → ...eepsparse/transformers/inference/infer.py b/src/deepsparse/transformers/infer.py → ...eepsparse/transformers/inference/infer.py
@@ -63,10 +63,14 @@
 deepsparse.infer models/llama/deployment \
     --task text-generation
 """
+
+from typing import Iterator, Optional
+
 import click
 
 from deepsparse import Pipeline
 from deepsparse.tasks import SupportedTasks
+from deepsparse.transformers.inference.prompt_parser import PromptParser
 
 
 @click.command(
@@ -75,6 +79,14 @@
     )
 )
 @click.argument("model_path", type=str)
+@click.option(
+    "--data",
+    type=str,
+    default=None,
+    help="Path to .txt, .csv, .json, or .jsonl file to load data from"
+    "If provided, runs inference over the entire dataset. If not provided "
+    "runs an interactive inference session in the console. Default None.",
+)
 @click.option(
     "--sequence_length",
     type=int,
@@ -112,6 +124,7 @@
 )
 def main(
     model_path: str,
+    data: Optional[str],
     sequence_length: int,
     sampling_temperature: float,
     prompt_sequence_length: int,
@@ -131,31 +144,75 @@ def main(
         task=task,  # let pipeline determine if task is supported
         model_path=model_path,
         sequence_length=sequence_length,
-        sampling_temperature=sampling_temperature,
         prompt_sequence_length=prompt_sequence_length,
     )
 
-    # continue prompts until a keyboard interrupt
-    while True:
-        input_text = input("User: ")
-        pipeline_inputs = {"prompt": [input_text]}
-
-        if SupportedTasks.is_chat(task):
-            pipeline_inputs["session_ids"] = session_ids
-
-        response = pipeline(**pipeline_inputs)
-        print("Bot: ", response.generations[0].text)
-        if show_tokens_per_sec:
-            times = pipeline.timer_manager.times
-            prefill_speed = (
-                1.0 * prompt_sequence_length / times["engine_prompt_prefill_single"]
-            )
-            generation_speed = 1.0 / times["engine_token_generation_single"]
-            print(
-                f"[prefill: {prefill_speed:.2f} tokens/sec]",
-                f"[decode: {generation_speed:.2f} tokens/sec]",
-                sep="\n",
+    if data:
+        for prompt, prompt_kwargs in _iter_prompt_from_file(data):
+            prompt_kwargs = {}
+            _run_inference(
+                pipeline,
+                sampling_temperature,
+                task,
+                session_ids,
+                show_tokens_per_sec,
+                prompt_sequence_length,
+                prompt,
+                **prompt_kwargs,
             )
+        return
+
+    # continue prompts until a keyboard interrupt
+    while data is None:  # always True in interactive Mode
+        prompt_input = input(">>> ")
+        _run_inference(
+            pipeline,
+            sampling_temperature,
+            task,
+            session_ids,
+            show_tokens_per_sec,
+            prompt_sequence_length,
+            prompt_input,
+        )
+
+
+def _iter_prompt_from_file(data: str) -> Iterator:
+    parser = PromptParser(data)
+    return parser.parse_as_iterable()
+
+
+def _run_inference(
+    pipeline,
+    sampling_temperature,
+    task,
+    session_ids,
+    show_tokens_per_sec,
+    prompt_sequence_length,
+    prompt,
+    **kwargs,
+):
+    pipeline_inputs = dict(
+        prompt=[prompt],
+        sampling_temperature=sampling_temperature,
+        # **kwargs,
+    )
+    if SupportedTasks.is_chat(task):
+        pipeline_inputs["session_ids"] = session_ids
+
+    response = pipeline(**pipeline_inputs)
+    print("\n", response.generations[0].text)
+
+    if show_tokens_per_sec:
+        times = pipeline.timer_manager.times
+        prefill_speed = (
+            1.0 * prompt_sequence_length / times["engine_prompt_prefill_single"]
+        )
+        generation_speed = 1.0 / times["engine_token_generation_single"]
+        print(
+            f"[prefill: {prefill_speed:.2f} tokens/sec]",
+            f"[decode: {generation_speed:.2f} tokens/sec]",
+            sep="\n",
+        )
 
 
 if __name__ == "__main__":

diff --git a/src/deepsparse/transformers/inference/prompt_parser.py b/src/deepsparse/transformers/inference/prompt_parser.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import csv
+import json
+import os
+from enum import Enum
+
+
+class InvalidPromptSourceDirectoryException(Exception):
+    pass
+
+
+class PromptParser:
+    class Extentions(Enum):
+        TEXT = ".txt"
+        CSV = ".csv"
+        JSON = ".json"
+        JSONL = ".jsonl"
+
+    def __init__(self, filename: str):
+        self.extention: self.Extentions = self._validate_and_return_extention(filename)
+        self.filename: str = filename
+
+    def parse_as_iterable(self):
+
+        if self.extention == self.Extentions.TEXT:
+            return self._parse_text()
+        if self.extention == self.Extentions.CSV:
+            return self._parse_csv()
+        if self.extention == self.Extentions.JSON:
+            return self._parse_json_list()
+        if self.extention == self.Extentions.JSONL:
+            return self._parse_jsonl()
+
+    def _parse_text(self):
+        with open(self.filename, "r") as file:
+            for line in file:
+                yield line.strip(), {}
+
+    def _parse_csv(self):
+        with open(self.filename, "r", newline="", encoding="utf-8-sig") as file:
+            reader = csv.DictReader(file)
+            for row in reader:
+                yield row.get("prompt"), row
+
+    def _parse_json_list(self):
+        with open(self.filename, "r") as file:
+            json_list = json.load(file)
+            for json_object in json_list:
+                yield json_object.get("prompt"), json_object
+
+    def _parse_jsonl(self):
+        with open(self.filename, "r") as file:
+            for jsonl in file:
+                jsonl_object = json.loads(jsonl)
+                yield jsonl_object.get("prompt"), jsonl_object
+
+    def _validate_and_return_extention(self, filename: str):
+        if os.path.exists(filename):
+
+            for extention in self.Extentions:
+                if filename.endswith(extention.value):
+                    return extention
+
+            raise InvalidPromptSourceDirectoryException(
+                f"{filename} is not a valid source extract batched prompts"
+            )
+        raise FileNotFoundError