Skip to content

Commit

Permalink
Backport: Make diagnostic message with job type and index tony-framew…
Browse files Browse the repository at this point in the history
  • Loading branch information
zuston committed Nov 17, 2021
1 parent e5282f7 commit b721b2e
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 9 deletions.
19 changes: 12 additions & 7 deletions tony-core/src/main/java/com/linkedin/tony/ApplicationMaster.java
Original file line number Diff line number Diff line change
Expand Up @@ -1218,22 +1218,27 @@ public void onStopContainerError(ContainerId containerId, Throwable t) {
private class RMCallbackHandler implements AMRMClientAsync.CallbackHandler {
@Override
public void onContainersCompleted(List<ContainerStatus> completedContainers) {
LOG.info("Completed containers: " + completedContainers.size());
LOG.info("onContainersCompleted called in RMCallbackHandler, completed containers size: " + completedContainers.size());
sleepForTesting();

for (ContainerStatus containerStatus : completedContainers) {
int exitStatus = containerStatus.getExitStatus();
LOG.info("ContainerID = " + containerStatus.getContainerId()
+ ", state = " + containerStatus.getState()
+ ", exitStatus = " + exitStatus);
String diagnostics = containerStatus.getDiagnostics();

String outputLog = "ContainerID = " + containerStatus.getContainerId()
+ ", state = " + containerStatus.getState()
+ ", exitStatus = " + exitStatus
+ ", diagnostics = " + diagnostics;

String errorInformation = null;
if (ContainerExitStatus.SUCCESS != exitStatus) {
LOG.error(diagnostics);
errorInformation = diagnostics;
LOG.error(outputLog);
} else {
LOG.info(diagnostics);
LOG.info(outputLog);
}

processFinishedContainer(containerStatus.getContainerId(), exitStatus, diagnostics);
processFinishedContainer(containerStatus.getContainerId(), exitStatus, errorInformation);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -259,7 +259,9 @@ public Map<String, List<String>> getClusterSpec() {
* Refresh task status when a TaskExecutor registers its exit code with AM.
*/
public void onTaskCompleted(String jobName, String jobIndex, int exitCode, String taskDiagnosticMsg) {
LOG.info(String.format("Job %s:%s exited with %d", jobName, jobIndex, exitCode));
String outputLog = String.format("Job %s:%s exited with %d", jobName, jobIndex, exitCode);
LOG.info(outputLog);

TonyTask task = getTask(jobName, jobIndex);
Preconditions.checkNotNull(task);
task.setExitStatus(exitCode);
Expand All @@ -273,7 +275,7 @@ public void onTaskCompleted(String jobName, String jobIndex, int exitCode, Strin
if (isChief(jobName, jobIndex) || shouldStopOnFailure(jobName)) {
trainingFinished = true;
}
String diagnostic = "Exit status: " + exitCode;
String diagnostic = outputLog + ". Exit status: " + exitCode;
if (taskDiagnosticMsg != null) {
diagnostic += ". Error msg: " + taskDiagnosticMsg;
}
Expand Down

0 comments on commit b721b2e

Please sign in to comment.