Skip to content

Llamacpp with cpp backend #116

Llamacpp with cpp backend

Llamacpp with cpp backend #116

Workflow file for this run

# scale-config.yml:
# Powers what instance types are available for GHA auto-scaled
# runners. Runners listed here will be available as self hosted
# runners, configuration is directly pulled from the main branch.
#
# NOTE (Apr, 5, 2021): Linux runners are currently all an amazonlinux2
#
# NOTE (Jan 5, 2021): Linux runners are all non-ephemeral to reduce the amount of CreateInstaces calls
# to avoid RequestLimitExceeded issues
#
# TODO: Add some documentation on how the auto-scaling works
#
# NOTE: Default values,
#
# runner_types:
# runner_label:
# instance_type: m4.large
# os: linux
# max_available: 20
# disk_size: 50
# is_ephemeral: true
runner_types:
# mainly used for ciflow-should-run, not made to run any serious tests
linux.large:
instance_type: c5.large
os: linux
disk_size: 10
is_ephemeral: false
linux.2xlarge:
instance_type: c5.2xlarge
os: linux
max_available: 750
disk_size: 150
is_ephemeral: false
linux.4xlarge: # for binary-builds
instance_type: c5.4xlarge
os: linux
max_available: 250
disk_size: 150
is_ephemeral: false
linux.8xlarge.nvidia.gpu:
instance_type: g3.8xlarge
os: linux
max_available: 125
disk_size: 150
is_ephemeral: false
linux.4xlarge.nvidia.gpu:
instance_type: g3.4xlarge
os: linux
max_available: 125
disk_size: 150
is_ephemeral: false
linux.16xlarge.nvidia.gpu:
instance_type: g3.16xlarge
os: linux
max_available: 10
disk_size: 150
is_ephemeral: false
windows.4xlarge:
instance_type: c5d.4xlarge
os: windows
max_available: 200
disk_size: 256
windows.8xlarge.nvidia.gpu:
instance_type: p3.2xlarge
os: windows
max_available: 50
disk_size: 256