Skip to content

Commit

Permalink
Merge branch 'master' into leaderboarddataapi
Browse files Browse the repository at this point in the history
  • Loading branch information
Suryansh5545 authored Aug 9, 2023
2 parents 66c462e + 8f4e763 commit 44bac68
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 19 deletions.
31 changes: 12 additions & 19 deletions scripts/monitoring/auto_scale_eks_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,14 +73,14 @@ def get_scaling_config(eks_client, cluster_name, nodegroup_name):
return scaling_config


def start_eks_worker(challenge, pending_submissions, evalai_interface, aws_keys, desired_size=None):
def start_eks_worker(challenge, pending_submissions, evalai_interface, aws_keys, new_desired_size):
eks_client, cluster_name, nodegroup_name = get_eks_meta(
challenge, evalai_interface, aws_keys
)
scaling_config = {
"minSize": 1,
"maxSize": pending_submissions if not desired_size else max(desired_size, pending_submissions),
"desiredSize": 1 if not desired_size else desired_size,
"maxSize": max(new_desired_size, pending_submissions),
"desiredSize": new_desired_size,
}
response = eks_client.update_nodegroup_config(
clusterName=cluster_name,
Expand Down Expand Up @@ -132,13 +132,8 @@ def scale_down_workers(challenge, desired_size, evalai_interface, aws_keys):
)


def scale_up_workers(challenge, desired_size, pending_submissions, evalai_interface, aws_keys, scale_up_desired_size=None):
if desired_size == 0:
new_desired_size = (
min(1, pending_submissions)
if not scale_up_desired_size
else min(scale_up_desired_size, pending_submissions)
)
def scale_up_workers(challenge, original_desired_size, pending_submissions, evalai_interface, aws_keys, new_desired_size):
if original_desired_size < new_desired_size:
response = start_eks_worker(
challenge, pending_submissions, evalai_interface, aws_keys, new_desired_size
)
Expand All @@ -156,7 +151,7 @@ def scale_up_workers(challenge, desired_size, pending_submissions, evalai_interf
)


def scale_up_or_down_workers(challenge, metrics, evalai_interface, aws_keys, scale_up_desired_size=None):
def scale_up_or_down_workers(challenge, metrics, evalai_interface, aws_keys, scale_up_desired_size):
try:
pending_submissions = get_pending_submission_count_by_pk(metrics, challenge["id"])
except Exception: # noqa: F841
Expand All @@ -174,31 +169,29 @@ def scale_up_or_down_workers(challenge, metrics, evalai_interface, aws_keys, sca
eks_client, cluster_name, nodegroup_name
)
min_size = scaling_config["minSize"]
desired_size = scaling_config["desiredSize"]
original_desired_size = scaling_config["desiredSize"]
print(
"Challenge ID : {}, Title: {}".format(
challenge["id"], challenge["title"]
)
)
print(
"Min Size: {}, Desired Size: {}, Pending Submissions: {}".format(
min_size, desired_size, pending_submissions
min_size, original_desired_size, pending_submissions
)
)

if pending_submissions == 0 or parse(challenge["end_date"]) < pytz.UTC.localize(
datetime.utcnow()
):
scale_down_workers(challenge, desired_size, evalai_interface, aws_keys)
scale_down_workers(challenge, original_desired_size, evalai_interface, aws_keys)
else:
if (pending_submissions > desired_size and not scale_up_desired_size) or pending_submissions <= scale_up_desired_size:
if pending_submissions > original_desired_size:
# Scale up again if needed, up to the maximum allowed scale_up_desired_size (if provided)
new_desired_size = min(
pending_submissions, scale_up_desired_size or pending_submissions
)
new_desired_size = min(pending_submissions, scale_up_desired_size)
scale_up_workers(
challenge,
desired_size,
original_desired_size,
pending_submissions,
evalai_interface,
aws_keys,
Expand Down
1 change: 1 addition & 0 deletions scripts/workers/code_upload_submission_worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,7 @@ def main():
QUEUE_NAME, is_remote
)
elif submission.get("status") == "queued":
job_name = submission.get("job_name")[-1]
pods_list = get_pods_from_job(api_instance, core_v1_api_instance, job_name)
if pods_list and pods_list.items[0].status.container_statuses:
# Update submission to running
Expand Down

0 comments on commit 44bac68

Please sign in to comment.