diff --git a/examples/aws-serverless/README.md b/examples/aws-serverless/README.md index d49dea2475..6a2bb449b5 100644 --- a/examples/aws-serverless/README.md +++ b/examples/aws-serverless/README.md @@ -90,7 +90,7 @@ python endpoint.py create-realtime #### Call Realtime Endpoint -After the endpoint has been staged (~3 minute), AWS SAM will provide your API Gateway endpoint URL in CLI. You can start making requests by passing this URL into the LambdaClient object. Afterwards, you can run inference by passing in your text input: +After the endpoint has been staged (~3 minutes), AWS SAM will provide your API Gateway endpoint URL in terminal. You can start making requests by passing this URL into the LambdaClient object. Afterwards, you can run inference by passing in your text input: ```python from client import LambdaClient diff --git a/examples/aws-serverless/batch/app_inf/requirements.txt b/examples/aws-serverless/batch/app_inf/requirements.txt index d1295b1f9f..61243ce704 100644 --- a/examples/aws-serverless/batch/app_inf/requirements.txt +++ b/examples/aws-serverless/batch/app_inf/requirements.txt @@ -1,3 +1,2 @@ boto3>=1.18.34 -https://github.com/neuralmagic/transformers/releases/download/v1.4/transformers-4.23.1-py3-none-any.whl -deepsparse>=1.4.0 +deepsparse[transformers]>=1.5.2 \ No newline at end of file diff --git a/examples/aws-serverless/realtime/app/Dockerfile b/examples/aws-serverless/realtime/app/Dockerfile index 209bd4e5f1..0c450322b2 100644 --- a/examples/aws-serverless/realtime/app/Dockerfile +++ b/examples/aws-serverless/realtime/app/Dockerfile @@ -1,27 +1,19 @@ -# Pull the base image with python 3.8 as a runtime for your Lambda -FROM public.ecr.aws/lambda/python:3.8 +# Pull the base image with python 3.10 as a runtime for your Lambda +FROM public.ecr.aws/lambda/python:3.10.2023.07.11.09 -ENV VIRTUAL_ENV=/venv -ENV PATH="$VIRTUAL_ENV/bin:$PATH" - -COPY topo-four-core.json ./ -ENV NM_ARCH_FILE=./topo-four-core.json - -# Copy the earlier created requirements.txt file to the container +# Copy the requirements.txt file to the container COPY requirements.txt ./ # Install the python requirements from requirements.txt -RUN python3 -m venv $VIRTUAL_ENV && \ - pip install --no-cache-dir --upgrade pip && \ - python3.8 -m pip install -r requirements.txt +RUN pip install --no-cache-dir --upgrade pip && \ + pip install -r requirements.txt # Copy the earlier created app.py file to the container COPY app.py ./ -# Load the BERT model from SparseZoo and store it in the model directory +# Load model from SparseZoo and store it in the model directory RUN mkdir model -# RUN sparsezoo.download zoo:nlp/question_answering/distilbert-none/pytorch/huggingface/squad/pruned80_quant-none-vnni --save-dir ./model RUN sparsezoo.download zoo:nlp/sentiment_analysis/obert-base/pytorch/huggingface/sst2/pruned90_quant-none --save-dir ./model # Set the CMD to your handler diff --git a/examples/aws-serverless/realtime/app/requirements.txt b/examples/aws-serverless/realtime/app/requirements.txt index ed273eb656..c77bb5d539 100644 --- a/examples/aws-serverless/realtime/app/requirements.txt +++ b/examples/aws-serverless/realtime/app/requirements.txt @@ -1,2 +1 @@ -https://github.com/neuralmagic/transformers/releases/download/v1.4/transformers-4.23.1-py3-none-any.whl -deepsparse>=1.2.0 \ No newline at end of file +deepsparse[transformers]>=1.5.2 \ No newline at end of file diff --git a/examples/aws-serverless/realtime/app/topo-four-core.json b/examples/aws-serverless/realtime/app/topo-four-core.json deleted file mode 100644 index 24640b6c1d..0000000000 --- a/examples/aws-serverless/realtime/app/topo-four-core.json +++ /dev/null @@ -1,182 +0,0 @@ -{ - "architecture": "x86_64", - "hardware_threads": [ - { - "available": true, - "caches": [ - { - "cache_id": 0, - "coherency_line_size": 64, - "index": 0, - "level": 1, - "size": 32768, - "type": "data" - }, - { - "cache_id": 0, - "coherency_line_size": 64, - "index": 1, - "level": 1, - "size": 32768, - "type": "instruction" - }, - { - "cache_id": 0, - "coherency_line_size": 64, - "index": 2, - "level": 2, - "size": 262144, - "type": "data" - }, - { - "cache_id": 0, - "coherency_line_size": 64, - "index": 3, - "level": 3, - "size": 6291456, - "type": "data" - } - ], - "core_id": 0, - "index": 0, - "l3_index": 0, - "numa_node": 0, - "socket_id": 0, - "thread_id": 0 - }, - { - "available": true, - "caches": [ - { - "cache_id": 1, - "coherency_line_size": 64, - "index": 0, - "level": 1, - "size": 32768, - "type": "data" - }, - { - "cache_id": 1, - "coherency_line_size": 64, - "index": 1, - "level": 1, - "size": 32768, - "type": "instruction" - }, - { - "cache_id": 1, - "coherency_line_size": 64, - "index": 2, - "level": 2, - "size": 262144, - "type": "data" - }, - { - "cache_id": 0, - "coherency_line_size": 64, - "index": 3, - "level": 3, - "size": 6291456, - "type": "data" - } - ], - "core_id": 1, - "index": 1, - "l3_index": 0, - "numa_node": 0, - "socket_id": 0, - "thread_id": 1 - }, - { - "available": true, - "caches": [ - { - "cache_id": 2, - "coherency_line_size": 64, - "index": 0, - "level": 1, - "size": 32768, - "type": "data" - }, - { - "cache_id": 2, - "coherency_line_size": 64, - "index": 1, - "level": 1, - "size": 32768, - "type": "instruction" - }, - { - "cache_id": 2, - "coherency_line_size": 64, - "index": 2, - "level": 2, - "size": 262144, - "type": "data" - }, - { - "cache_id": 0, - "coherency_line_size": 64, - "index": 3, - "level": 3, - "size": 6291456, - "type": "data" - } - ], - "core_id": 2, - "index": 2, - "l3_index": 0, - "numa_node": 0, - "socket_id": 0, - "thread_id": 2 - }, - { - "available": true, - "caches": [ - { - "cache_id": 3, - "coherency_line_size": 64, - "index": 0, - "level": 1, - "size": 32768, - "type": "data" - }, - { - "cache_id": 3, - "coherency_line_size": 64, - "index": 1, - "level": 1, - "size": 32768, - "type": "instruction" - }, - { - "cache_id": 3, - "coherency_line_size": 64, - "index": 2, - "level": 2, - "size": 262144, - "type": "data" - }, - { - "cache_id": 0, - "coherency_line_size": 64, - "index": 3, - "level": 3, - "size": 6291456, - "type": "data" - } - ], - "core_id": 3, - "index": 3, - "l3_index": 0, - "numa_node": 0, - "socket_id": 0, - "thread_id": 3 - } - ], - "isa": "avx2", - "vendor": "GenuineIntel", - "vendor_id": "Intel", - "vendor_model": "Intel(R) Core(TM) i9-7980XE CPU @ 2.60GHz", - "vnni": false -} diff --git a/examples/aws-serverless/realtime/template.yaml b/examples/aws-serverless/realtime/template.yaml index ac0b5fb359..4f2fba9ee4 100755 --- a/examples/aws-serverless/realtime/template.yaml +++ b/examples/aws-serverless/realtime/template.yaml @@ -7,7 +7,7 @@ Resources: Type: AWS::Serverless::Function Properties: PackageType: Image - ImageUri: deepsparseendpoint:python3.8-v1 + ImageUri: deepsparseendpoint:python3.10-v1 MemorySize: 2000 Timeout: 300 Role: !GetAtt LambdaExecutionRole.Arn @@ -21,7 +21,7 @@ Resources: Metadata: Dockerfile: Dockerfile DockerContext: ./app - DockerTag: python3.8-v1 + DockerTag: python3.10-v1 LambdaExecutionRole: Type: AWS::IAM::Role