Skip to content

Commit

Permalink
Triton_GPT2 example: extend the example notebook to contain a load te…
Browse files Browse the repository at this point in the history
…st example using vegeta tool
  • Loading branch information
nadinet committed May 21, 2021
1 parent 2705d87 commit acd1db4
Showing 1 changed file with 145 additions and 31 deletions.
176 changes: 145 additions & 31 deletions examples/triton_gpt2/README.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,17 +9,20 @@
"\n",
"In this notebook, we will run an example of text generation using GPT2 model exported from HuggingFace and deployed with Seldon's Triton pre-packed server. the example also covers converting the model to ONNX format.\n",
"The implemented example below is of the Greedy approach for the next token prediction.\n",
"\n",
"more info: https://huggingface.co/transformers/model_doc/gpt2.html?highlight=gpt2\n",
"\n",
"After we have the module deployed to Kubernetes, we will run a simple load test to evaluate the module inference performance.\n",
"\n",
"\n",
"## Steps:\n",
"1. Download pretrained GPT2 model from hugging face\n",
"2. Convert the model to ONNX\n",
"3. Store it in MinIo bucket\n",
"4. Setup Seldon-Core in your kubernetes cluster\n",
"5. Deploy the ONNX model with Seldon’s prepackaged Triton server.\n",
"6. Interact with the model, run a greedy alg example (generate sentence completion)\n",
"7. Clean-up\n",
"7. Run load test using vegeta\n",
"8. Clean-up\n",
"\n",
"## Basic requirements\n",
"* Helm v3.0.0+\n",
Expand Down Expand Up @@ -108,10 +111,19 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 1,
"id": "lasting-performance",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[m\u001b[32;1mBucket created successfully `minio-seldon/onnx-gpt2`.\u001b[0m\n",
"./model.onnx: 622.37 MiB / 622.37 MiB ┃▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓┃ 127.01 MiB/s 4s\u001b[0m\u001b[0m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m\u001b[m\u001b[32;1m"
]
}
],
"source": [
"!mc mb minio-seldon/onnx-gpt2 -p\n",
"!mc cp ./model.onnx minio-seldon/onnx-gpt2/gpt2/1/"
Expand All @@ -137,7 +149,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"id": "declared-crown",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -166,7 +178,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"id": "beneficial-anime",
"metadata": {},
"outputs": [
Expand Down Expand Up @@ -201,7 +213,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"id": "subjective-involvement",
"metadata": {},
"outputs": [
Expand All @@ -210,18 +222,18 @@
"output_type": "stream",
"text": [
"secret/seldon-init-container-secret configured\n",
"seldondeployment.machinelearning.seldon.io/gpt2 configured\n"
"seldondeployment.machinelearning.seldon.io/gpt2 created\n"
]
}
],
"source": [
"!kubectl apply -f secret.yaml\n",
"!kubectl apply -f gpt2-deploy.yaml"
"!kubectl apply -f secret.yaml -n default\n",
"!kubectl apply -f gpt2-deploy.yaml -n default"
]
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 5,
"id": "demanding-thesaurus",
"metadata": {},
"outputs": [
Expand All @@ -247,7 +259,7 @@
},
{
"cell_type": "code",
"execution_count": 43,
"execution_count": 6,
"id": "married-roller",
"metadata": {},
"outputs": [
Expand All @@ -258,7 +270,7 @@
"* Trying 127.0.0.1:80...\r\n",
"* TCP_NODELAY set\r\n",
"* Connected to localhost (127.0.0.1) port 80 (#0)\r\n",
"> GET /seldon/seldon/gpt2/v2/models/gpt2 HTTP/1.1\r",
"> GET /seldon/default/gpt2/v2/models/gpt2 HTTP/1.1\r",
"\r\n",
"> Host: localhost\r",
"\r\n",
Expand All @@ -279,15 +291,15 @@
"\r\n",
"< content-type: application/json\r",
"\r\n",
"< seldon-puid: 7e24a20b-3130-4f50-a86b-bda5a9c4c917\r",
"< seldon-puid: 150b15aa-fc9d-416b-a934-b683965d551f\r",
"\r\n",
"< x-content-type-options: nosniff\r",
"\r\n",
"< date: Fri, 16 Apr 2021 15:19:28 GMT\r",
"< date: Mon, 17 May 2021 16:21:31 GMT\r",
"\r\n",
"< content-length: 336\r",
"\r\n",
"< x-envoy-upstream-service-time: 1\r",
"< x-envoy-upstream-service-time: 4\r",
"\r\n",
"< server: istio-envoy\r",
"\r\n",
Expand All @@ -299,7 +311,7 @@
}
],
"source": [
"!curl -v http://localhost:80/seldon/seldon/gpt2/v2/models/gpt2"
"!curl -v http://localhost:80/seldon/default/gpt2/v2/models/gpt2"
]
},
{
Expand All @@ -315,16 +327,7 @@
"execution_count": 7,
"id": "modified-termination",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Input: I enjoy working in Seldon\n",
"Output: I enjoy working in Seldon 's office , and I 'm glad to see that\n"
]
}
],
"outputs": [],
"source": [
"import requests\n",
"import json\n",
Expand Down Expand Up @@ -354,7 +357,7 @@
" ]\n",
" }\n",
"\n",
" ret = requests.post('http://localhost:80/seldon/seldon/gpt2/v2/models/gpt2/infer', json=payload)\n",
" ret = requests.post('http://localhost:80/seldon/default/gpt2/v2/models/gpt2/infer', json=payload)\n",
"\n",
" try:\n",
" res = ret.json()\n",
Expand All @@ -375,6 +378,109 @@
"print(f'Input: {input_text}\\nOutput: {gen_sentence}')"
]
},
{
"cell_type": "markdown",
"id": "colored-status",
"metadata": {},
"source": [
"### Run Load Test / Performance Test using vegeta"
]
},
{
"cell_type": "markdown",
"id": "exempt-discovery",
"metadata": {},
"source": [
"#### Install vegeta, for more details take a look in [vegeta](https://github.com/tsenart/vegeta#install) official documentation"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "interesting-laptop",
"metadata": {},
"outputs": [],
"source": [
"!wget https://github.com/tsenart/vegeta/releases/download/v12.8.3/vegeta-12.8.3-linux-amd64.tar.gz\n",
"!tar -zxvf vegeta-12.8.3-linux-amd64.tar.gz\n",
"!chmod +x vegeta"
]
},
{
"cell_type": "markdown",
"id": "friendly-lying",
"metadata": {},
"source": [
"#### Generate vegeta [target file](https://github.com/tsenart/vegeta#-targets) contains \"post\" cmd with payload in the requiered structure"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "reliable-croatia",
"metadata": {},
"outputs": [],
"source": [
"from subprocess import run, Popen, PIPE\n",
"import json\n",
"import numpy as np\n",
"from transformers import TFGPT2LMHeadModel, GPT2Tokenizer\n",
"import base64\n",
"\n",
"tokenizer = GPT2Tokenizer.from_pretrained(\"gpt2\")\n",
"input_text = 'I enjoy working in Seldon'\n",
"input_ids = tokenizer.encode(input_text, return_tensors='tf')\n",
"shape = input_ids.shape.as_list()\n",
"payload = {\n",
"\t\t\"inputs\": [\n",
"\t\t\t{\"name\": \"input_ids:0\",\n",
"\t\t\t \"datatype\": \"INT32\",\n",
"\t\t\t \"shape\": shape,\n",
"\t\t\t \"data\": input_ids.numpy().tolist()\n",
"\t\t\t },\n",
"\t\t\t{\"name\": \"attention_mask:0\",\n",
"\t\t\t \"datatype\": \"INT32\",\n",
"\t\t\t \"shape\": shape,\n",
"\t\t\t \"data\": np.ones(shape, dtype=np.int32).tolist()\n",
"\t\t\t }\n",
"\t\t\t]\n",
"\t\t}\n",
"\n",
"cmd= {\"method\": \"POST\",\n",
"\t\t\"header\": {\"Content-Type\": [\"application/json\"] },\n",
"\t\t\"url\": \"http://localhost:80/seldon/default/gpt2/v2/models/gpt2/infer\",\n",
"\t\t\"body\": base64.b64encode(bytes(json.dumps(payload), \"utf-8\")).decode(\"utf-8\")}\n",
"\n",
"with open(\"vegeta_target.json\", mode=\"w\") as file:\n",
"\tjson.dump(cmd, file)\n",
"\tfile.write('\\n\\n')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "tribal-statistics",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requests [total, rate, throughput] 60, 1.02, 1.01\r\n",
"Duration [total, attack, wait] 59.198s, 59s, 197.751ms\r\n",
"Latencies [min, mean, 50, 90, 95, 99, max] 179.123ms, 280.177ms, 214.79ms, 325.753ms, 457.825ms, 1.936s, 2.009s\r\n",
"Bytes In [total, mean] 475783920, 7929732.00\r\n",
"Bytes Out [total, mean] 13140, 219.00\r\n",
"Success [ratio] 100.00%\r\n",
"Status Codes [code:count] 200:60 \r\n",
"Error Set:\r\n"
]
}
],
"source": [
"!vegeta attack -targets=vegeta_target.json -rate=1 -duration=60s -format=json | vegeta report -type=text"
]
},
{
"cell_type": "markdown",
"id": "patient-suite",
Expand All @@ -385,12 +491,20 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 11,
"id": "pacific-collectible",
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"seldondeployment.machinelearning.seldon.io \"gpt2\" deleted\r\n"
]
}
],
"source": [
"!kubectl delete -f gpt2-deploy.yaml"
"!kubectl delete -f gpt2-deploy.yaml -n default"
]
}
],
Expand Down

0 comments on commit acd1db4

Please sign in to comment.