Triton_GPT2 example: extend the example notebook to contain a load te…

…st example using vegeta tool
SeldonIO · May 21, 2021 · acd1db4 · acd1db4
1 parent 2705d87
commit acd1db4
Showing 1 changed file with 145 additions and 31 deletions.
diff --git a/examples/triton_gpt2/README.ipynb b/examples/triton_gpt2/README.ipynb
@@ -9,17 +9,20 @@
     "\n",
     "In this notebook, we will run an example of text generation using GPT2 model exported from HuggingFace and deployed with Seldon's Triton pre-packed server. the example also covers converting the model to ONNX format.\n",
     "The implemented example below is of the Greedy approach for the next token prediction.\n",
-    "\n",
     "more info: https://huggingface.co/transformers/model_doc/gpt2.html?highlight=gpt2\n",
     "\n",
+    "After we have the module deployed to Kubernetes, we will run a simple load test to evaluate the module inference performance.\n",
+    "\n",
+    "\n",
     "## Steps:\n",
     "1. Download pretrained GPT2 model from hugging face\n",
     "2. Convert the model to ONNX\n",
     "3. Store it in MinIo bucket\n",
     "4. Setup Seldon-Core in your kubernetes cluster\n",
     "5. Deploy the ONNX model with Seldon’s prepackaged Triton server.\n",
     "6. Interact with the model, run a greedy alg example (generate sentence completion)\n",
-    "7. Clean-up\n",
+    "7. Run load test using vegeta\n",
+    "8. Clean-up\n",
     "\n",
     "## Basic requirements\n",
     "* Helm v3.0.0+\n",
@@ -108,10 +111,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "id": "lasting-performance",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[m\u001b[32;1mBucket created successfully `minio-seldon/onnx-gpt2`.\u001b[0m\n",
+      "./model.onnx:  622.37 MiB / 622.37 MiB ┃▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓┃ 127.01 MiB/s 4s\u001b[0m\u001b[0m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m"
+     ]
+    }
+   ],
    "source": [
     "!mc mb minio-seldon/onnx-gpt2 -p\n",
     "!mc cp ./model.onnx minio-seldon/onnx-gpt2/gpt2/1/"
@@ -137,7 +149,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "id": "declared-crown",
    "metadata": {},
    "outputs": [
@@ -166,7 +178,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 3,
    "id": "beneficial-anime",
    "metadata": {},
    "outputs": [
@@ -201,7 +213,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "id": "subjective-involvement",
    "metadata": {},
    "outputs": [
@@ -210,18 +222,18 @@
      "output_type": "stream",
      "text": [
       "secret/seldon-init-container-secret configured\n",
-      "seldondeployment.machinelearning.seldon.io/gpt2 configured\n"
+      "seldondeployment.machinelearning.seldon.io/gpt2 created\n"
      ]
     }
    ],
    "source": [
-    "!kubectl apply -f secret.yaml\n",
-    "!kubectl apply -f gpt2-deploy.yaml"
+    "!kubectl apply -f secret.yaml -n default\n",
+    "!kubectl apply -f gpt2-deploy.yaml -n default"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "id": "demanding-thesaurus",
    "metadata": {},
    "outputs": [
@@ -247,7 +259,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 43,
+   "execution_count": 6,
    "id": "married-roller",
    "metadata": {},
    "outputs": [
@@ -258,7 +270,7 @@
       "*   Trying 127.0.0.1:80...\r\n",
       "* TCP_NODELAY set\r\n",
       "* Connected to localhost (127.0.0.1) port 80 (#0)\r\n",
-      "> GET /seldon/seldon/gpt2/v2/models/gpt2 HTTP/1.1\r",
+      "> GET /seldon/default/gpt2/v2/models/gpt2 HTTP/1.1\r",
       "\r\n",
       "> Host: localhost\r",
       "\r\n",
@@ -279,15 +291,15 @@
       "\r\n",
       "< content-type: application/json\r",
       "\r\n",
-      "< seldon-puid: 7e24a20b-3130-4f50-a86b-bda5a9c4c917\r",
+      "< seldon-puid: 150b15aa-fc9d-416b-a934-b683965d551f\r",
       "\r\n",
       "< x-content-type-options: nosniff\r",
       "\r\n",
-      "< date: Fri, 16 Apr 2021 15:19:28 GMT\r",
+      "< date: Mon, 17 May 2021 16:21:31 GMT\r",
       "\r\n",
       "< content-length: 336\r",
       "\r\n",
-      "< x-envoy-upstream-service-time: 1\r",
+      "< x-envoy-upstream-service-time: 4\r",
       "\r\n",
       "< server: istio-envoy\r",
       "\r\n",
@@ -299,7 +311,7 @@
     }
    ],
    "source": [
-    "!curl -v http://localhost:80/seldon/seldon/gpt2/v2/models/gpt2"
+    "!curl -v http://localhost:80/seldon/default/gpt2/v2/models/gpt2"
    ]
   },
   {
@@ -315,16 +327,7 @@
    "execution_count": 7,
    "id": "modified-termination",
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Input: I enjoy working in Seldon\n",
-      "Output: I enjoy working in Seldon 's office , and I 'm glad to see that\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import requests\n",
     "import json\n",
@@ -354,7 +357,7 @@
     "                ]\n",
     "            }\n",
     "\n",
-    "    ret = requests.post('http://localhost:80/seldon/seldon/gpt2/v2/models/gpt2/infer', json=payload)\n",
+    "    ret = requests.post('http://localhost:80/seldon/default/gpt2/v2/models/gpt2/infer', json=payload)\n",
     "\n",
     "    try:\n",
     "        res = ret.json()\n",
@@ -375,6 +378,109 @@
     "print(f'Input: {input_text}\\nOutput: {gen_sentence}')"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "id": "colored-status",
+   "metadata": {},
+   "source": [
+    "### Run Load Test / Performance Test using vegeta"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "exempt-discovery",
+   "metadata": {},
+   "source": [
+    "#### Install vegeta, for more details take a look in [vegeta](https://github.com/tsenart/vegeta#install) official documentation"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "id": "interesting-laptop",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!wget https://github.com/tsenart/vegeta/releases/download/v12.8.3/vegeta-12.8.3-linux-amd64.tar.gz\n",
+    "!tar -zxvf vegeta-12.8.3-linux-amd64.tar.gz\n",
+    "!chmod +x vegeta"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "friendly-lying",
+   "metadata": {},
+   "source": [
+    "#### Generate vegeta [target file](https://github.com/tsenart/vegeta#-targets) contains \"post\" cmd with payload in the requiered structure"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "id": "reliable-croatia",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from subprocess import run, Popen, PIPE\n",
+    "import json\n",
+    "import numpy as np\n",
+    "from transformers import TFGPT2LMHeadModel, GPT2Tokenizer\n",
+    "import base64\n",
+    "\n",
+    "tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
+    "input_text = 'I enjoy working in Seldon'\n",
+    "input_ids = tokenizer.encode(input_text, return_tensors='tf')\n",
+    "shape = input_ids.shape.as_list()\n",
+    "payload = {\n",
+    "\t\t\"inputs\": [\n",
+    "\t\t\t{\"name\": \"input_ids:0\",\n",
+    "\t\t\t \"datatype\": \"INT32\",\n",
+    "\t\t\t \"shape\": shape,\n",
+    "\t\t\t \"data\": input_ids.numpy().tolist()\n",
+    "\t\t\t },\n",
+    "\t\t\t{\"name\": \"attention_mask:0\",\n",
+    "\t\t\t \"datatype\": \"INT32\",\n",
+    "\t\t\t \"shape\": shape,\n",
+    "\t\t\t \"data\": np.ones(shape, dtype=np.int32).tolist()\n",
+    "\t\t\t }\n",
+    "\t\t\t]\n",
+    "\t\t}\n",
+    "\n",
+    "cmd= {\"method\": \"POST\",\n",
+    "\t\t\"header\": {\"Content-Type\": [\"application/json\"] },\n",
+    "\t\t\"url\": \"http://localhost:80/seldon/default/gpt2/v2/models/gpt2/infer\",\n",
+    "\t\t\"body\": base64.b64encode(bytes(json.dumps(payload), \"utf-8\")).decode(\"utf-8\")}\n",
+    "\n",
+    "with open(\"vegeta_target.json\", mode=\"w\") as file:\n",
+    "\tjson.dump(cmd, file)\n",
+    "\tfile.write('\\n\\n')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "tribal-statistics",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requests      [total, rate, throughput]         60, 1.02, 1.01\r\n",
+      "Duration      [total, attack, wait]             59.198s, 59s, 197.751ms\r\n",
+      "Latencies     [min, mean, 50, 90, 95, 99, max]  179.123ms, 280.177ms, 214.79ms, 325.753ms, 457.825ms, 1.936s, 2.009s\r\n",
+      "Bytes In      [total, mean]                     475783920, 7929732.00\r\n",
+      "Bytes Out     [total, mean]                     13140, 219.00\r\n",
+      "Success       [ratio]                           100.00%\r\n",
+      "Status Codes  [code:count]                      200:60  \r\n",
+      "Error Set:\r\n"
+     ]
+    }
+   ],
+   "source": [
+    "!vegeta attack -targets=vegeta_target.json -rate=1 -duration=60s -format=json | vegeta report -type=text"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "patient-suite",
@@ -385,12 +491,20 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "id": "pacific-collectible",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "seldondeployment.machinelearning.seldon.io \"gpt2\" deleted\r\n"
+     ]
+    }
+   ],
    "source": [
-    "!kubectl delete -f gpt2-deploy.yaml"
+    "!kubectl delete -f gpt2-deploy.yaml -n default"
    ]
   }
  ],