Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reset job if existing reset fails #106020

Merged
merged 5 commits into from
Mar 7, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions docs/changelog/106020.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 106020
summary: Fix resetting a job if an existing reset task has failed.
jan-elastic marked this conversation as resolved.
Show resolved Hide resolved
area: Machine Learning
type: bug
issues:
- 105928
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,7 @@ void getFinishedTaskFromIndex(Task thisTask, GetTaskRequest request, ActionListe

client.get(get, ActionListener.wrap(r -> onGetFinishedTaskFromIndex(r, listener), e -> {
if (ExceptionsHelper.unwrap(e, IndexNotFoundException.class) != null) {
// We haven't yet created the index for the task results so it can't be found.
// We haven't yet created the index for the task results, so it can't be found.
listener.onFailure(
new ResourceNotFoundException("task [{}] isn't running and hasn't stored its results", e, request.getTaskId())
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
package org.elasticsearch.xpack.ml.integration;

import org.elasticsearch.core.TimeValue;
import org.elasticsearch.tasks.TaskId;
import org.elasticsearch.xpack.core.ml.job.config.AnalysisConfig;
import org.elasticsearch.xpack.core.ml.job.config.Blocked;
import org.elasticsearch.xpack.core.ml.job.config.DataDescription;
import org.elasticsearch.xpack.core.ml.job.config.Detector;
import org.elasticsearch.xpack.core.ml.job.config.Job;
import org.elasticsearch.xpack.core.ml.job.config.JobUpdate;
import org.elasticsearch.xpack.core.ml.job.process.autodetect.state.DataCounts;
import org.elasticsearch.xpack.core.ml.job.results.Bucket;
import org.junit.After;
Expand All @@ -34,10 +36,18 @@ public void tearDownData() {
}

public void testReset() throws Exception {
testReset(false);
}

public void testReset_previousResetFailed() throws Exception {
testReset(true);
}

private void testReset(boolean previousResetFailed) throws Exception {
TimeValue bucketSpan = TimeValue.timeValueMinutes(30);
long startTime = 1514764800000L;
final int bucketCount = 100;
Job.Builder job = createJob("test-reset", bucketSpan);
Job.Builder job = createJob(bucketSpan);
jan-elastic marked this conversation as resolved.
Show resolved Hide resolved

openJob(job.getId());
postData(
Expand All @@ -53,6 +63,13 @@ public void testReset() throws Exception {
DataCounts dataCounts = getJobStats(job.getId()).get(0).getDataCounts();
assertThat(dataCounts.getProcessedRecordCount(), greaterThan(0L));

if (previousResetFailed) {
JobUpdate jobUpdate = new JobUpdate.Builder(job.getId()).setBlocked(
new Blocked(Blocked.Reason.RESET, new TaskId(randomIdentifier(), randomInt()))
).build();
updateJob(job.getId(), jobUpdate);
}

resetJob(job.getId());

buckets = getBuckets(job.getId());
Expand All @@ -71,11 +88,11 @@ public void testReset() throws Exception {
assertThat("Audit messages: " + auditMessages, auditMessages.get(auditMessages.size() - 1), equalTo("Job has been reset"));
}

private Job.Builder createJob(String jobId, TimeValue bucketSpan) {
private Job.Builder createJob(TimeValue bucketSpan) {
Detector.Builder detector = new Detector.Builder("count", null);
AnalysisConfig.Builder analysisConfig = new AnalysisConfig.Builder(Collections.singletonList(detector.build()));
analysisConfig.setBucketSpan(bucketSpan);
Job.Builder job = new Job.Builder(jobId);
Job.Builder job = new Job.Builder(randomIdentifier());
job.setAnalysisConfig(analysisConfig);
DataDescription.Builder dataDescription = new DataDescription.Builder();
job.setDataDescription(dataDescription);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,10 @@ protected void masterOperation(
waitExistingResetTaskToComplete(
job.getBlocked().getTaskId(),
request,
ActionListener.wrap(r -> resetIfJobIsStillBlockedOnReset(task, request, listener), listener::onFailure)
ActionListener.wrap(
r -> resetIfJobIsStillBlockedOnReset(task, request, listener),
e -> resetIfJobIsStillBlockedOnReset(task, request, listener)
jan-elastic marked this conversation as resolved.
Show resolved Hide resolved
)
);
} else {
ParentTaskAssigningClient taskClient = new ParentTaskAssigningClient(client, taskId);
Expand Down
Loading