Skip to content

Commit

Permalink
Add Evaluation GH Action (#209)
Browse files Browse the repository at this point in the history
Signed-off-by: Jason Montleon <jmontleo@redhat.com>
  • Loading branch information
jmontleon committed Jun 25, 2024
1 parent 9a42c42 commit ebef75b
Showing 1 changed file with 89 additions and 0 deletions.
89 changes: 89 additions & 0 deletions .github/workflows/evaluation.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
name: Evaluation Matrix
on:
push:
branches:
- main

permissions:
deployments: write
contents: write

jobs:
evaluation:
name: Performance Evaluation
runs-on: ubuntu-latest
strategy:
matrix:
evaluation:
- provider: ChatIBMGenAI
model_prefix: codellama
model: codellama-34b-instruct
- provider: ChatIBMGenAI
model_prefix: deepseek-ai
model: deepseek-coder-33b-instruct
- provider: ChatIBMGenAI
model_prefix: meta-llama
model: llama-3-70b-instruct
max_new_tokens: 2048
- provider: ChatIBMGenAI
model_prefix: mistralai
model: mistral-7b-v0-1
- provider: ChatIBMGenAI
model_prefix: mistralai
model: mixtral-8x7b-instruct-v01
test:
- example: example_a
steps:
- uses: actions/checkout@v4
- uses: actions/setup-python@main
with:
python-version: 3.12.3
- name: Update environment
run: |
echo "MAX_NEW_TOKENS=${{ matrix.evaluation.max_new_tokens }}" >> $GITHUB_ENV
if: matrix.evaluation.max_new_tokens != null
- name: Run benchmark
run: |
pip install -r requirements.txt
pip install -e .
cd kai
cat << EOF > config.toml
log_level = "info"
demo_mode = false
[incident_store]
provider = "postgresql"
[incident_store.args]
host = "127.0.0.1"
database = "kai"
user = "kai"
password = "dog8code"
[embeddings]
todo = true
[models]
provider = "${{ matrix.evaluation.provider }}"
[models.args]
model_id = "${{ matrix.evaluation.model_prefix }}/${{ matrix.evaluation.model }}"
EOF
if [[ ! -z "${MAX_NEW_TOKENS}" ]]; then
cat << EOF >> config.toml
parameters.max_new_tokens = ${{ matrix.evaluation.max_new_tokens }}
EOF
fi
echo [{\"name\": \
\"${{ matrix.evaluation.provider }}_${{ matrix.evaluation.model_prefix }}_${{ matrix.evaluation.model }}_${{ matrix.test.example }}\", \
\"unit\": \"Match\", \
\"value\": \"$(python evaluation.py --configs ./config.toml | tail -n 1 | awk '{ print $3 }')\" \
}] > ../output.txt
git checkout config.toml
cd ..
env:
GENAI_KEY: ${{ secrets.GENAI_KEY }}
- name: Store benchmark result
uses: benchmark-action/github-action-benchmark@v1
with:
tool: customBiggerIsBetter
benchmark-data-dir-path: evaluations
output-file-path: output.txt
#fail-on-alert: true
github-token: ${{ secrets.GITHUB_TOKEN }}
auto-push: true

0 comments on commit ebef75b

Please sign in to comment.