Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove assign_cpu_and_gpu_sets #4412

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 2 additions & 39 deletions codalab/worker/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ def __init__(
docker_network_external=self.docker_network_external,
docker_runtime=docker_runtime,
upload_bundle_callback=self.upload_bundle_contents,
assign_cpu_and_gpu_sets_fn=self.assign_cpu_and_gpu_sets,
cpuset=self.cpuset,
gpuset=self.gpuset,
shared_file_system=self.shared_file_system,
shared_memory_size_gb=shared_memory_size_gb,
bundle_runtime=bundle_runtime,
Expand Down Expand Up @@ -590,44 +591,6 @@ def process_runs(self):
if run_state.stage != RunStage.FINISHED
}

def assign_cpu_and_gpu_sets(self, request_cpus, request_gpus):
"""
Propose a cpuset and gpuset to a bundle based on given requested resources.
Note: no side effects (this is important: we don't want to maintain more state than necessary)

Arguments:
request_cpus: integer
request_gpus: integer

Returns a 2-tuple:
cpuset: assigned cpuset (str indices).
gpuset: assigned gpuset (str indices).

Throws an exception if unsuccessful.
"""
cpuset, gpuset = set(map(str, self.cpuset)), set(map(str, self.gpuset))

for run_state in self.runs.values():
if run_state.stage == RunStage.RUNNING:
cpuset -= run_state.cpuset
gpuset -= run_state.gpuset

if len(cpuset) < request_cpus:
raise Exception(
"Requested more CPUs (%d) than available (%d currently out of %d on the machine)"
% (request_cpus, len(cpuset), len(self.cpuset))
)
if len(gpuset) < request_gpus:
raise Exception(
"Requested more GPUs (%d) than available (%d currently out of %d on the machine)"
% (request_gpus, len(gpuset), len(self.gpuset))
)

def propose_set(resource_set, request_count):
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a chance that the user will actually get less CPUs / GPUs than they request? If so, then we might need to keep this function.

return set(str(el) for el in list(resource_set)[:request_count])

return propose_set(cpuset, request_cpus), propose_set(gpuset, request_gpus)

@property
def all_runs(self):
"""
Expand Down
18 changes: 3 additions & 15 deletions codalab/worker/worker_run_state.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,8 @@ def __init__(
docker_network_external, # Docker network to add internet connected bundles to
docker_runtime, # Docker runtime to use for containers (nvidia or runc)
upload_bundle_callback, # Function to call to upload bundle results to the server
assign_cpu_and_gpu_sets_fn, # Function to call to assign CPU and GPU resources to each run
cpuset,
gpuset,
shared_file_system, # If True, bundle mount is shared with server
shared_memory_size_gb, # Shared memory size for the run container (in GB)
bundle_runtime, # Runtime used to run bundles (docker or kubernetes)
Expand Down Expand Up @@ -195,7 +196,6 @@ def __init__(
fields={'disk_utilization': 0, 'running': True, 'lock': None}
)
self.upload_bundle_callback = upload_bundle_callback
self.assign_cpu_and_gpu_sets_fn = assign_cpu_and_gpu_sets_fn
self.shared_file_system = shared_file_system
self.shared_memory_size_gb = shared_memory_size_gb

Expand Down Expand Up @@ -237,19 +237,7 @@ def mount_dependency(dependency, shared_file_system):
)
return run_state._replace(stage=RunStage.CLEANING_UP)

# Check CPU and GPU availability
try:
cpuset, gpuset = self.assign_cpu_and_gpu_sets_fn(
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add an if statement -- consider just not doing this with kubernetes.

run_state.resources.cpus, run_state.resources.gpus
)
except Exception as e:
message = "Unexpectedly unable to assign enough resources to bundle {}: {}".format(
run_state.bundle.uuid, str(e)
)
logger.error(message)
logger.error(traceback.format_exc())
return run_state._replace(run_status=message)

cpuset, gpuset = self.cpuset, self.gpuset
dependencies_ready = True
status_messages = []
dependency_keys_to_paths: Dict[DependencyKey, str] = dict()
Expand Down