Skip to content

Commit

Permalink
Optimize used segment fetching in Kill tasks (apache#15107)
Browse files Browse the repository at this point in the history
* Optimize used segment fetching in Kill tasks
  • Loading branch information
AmatyaAvadhanula authored and ektravel committed Oct 16, 2023
1 parent 0501ea9 commit 61ae0d3
Showing 1 changed file with 20 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -222,18 +222,30 @@ public TaskStatus runTask(TaskToolbox toolbox) throws Exception

toolbox.getTaskActionClient().submit(new SegmentNukeAction(new HashSet<>(unusedSegments)));

// Fetch the load specs of all segments overlapping with the given interval
final Set<Map<String, Object>> usedSegmentLoadSpecs = toolbox
.getTaskActionClient()
.submit(new RetrieveUsedSegmentsAction(getDataSource(), getInterval(), null, Segments.INCLUDING_OVERSHADOWED))
.stream()
.map(DataSegment::getLoadSpec)
.collect(Collectors.toSet());
final Set<Interval> unusedSegmentIntervals = unusedSegments.stream()
.map(DataSegment::getInterval)
.collect(Collectors.toSet());
final Set<Map<String, Object>> usedSegmentLoadSpecs = new HashSet<>();
if (!unusedSegmentIntervals.isEmpty()) {
RetrieveUsedSegmentsAction retrieveUsedSegmentsAction = new RetrieveUsedSegmentsAction(
getDataSource(),
null,
unusedSegmentIntervals,
Segments.INCLUDING_OVERSHADOWED
);
// Fetch the load specs of all segments overlapping with the unused segment intervals
usedSegmentLoadSpecs.addAll(toolbox.getTaskActionClient().submit(retrieveUsedSegmentsAction)
.stream()
.map(DataSegment::getLoadSpec)
.collect(Collectors.toSet())
);
}

// Kill segments from the deep storage only if their load specs are not being used by any used segments
final List<DataSegment> segmentsToBeKilled = unusedSegments
.stream()
.filter(unusedSegment -> !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec()))
.filter(unusedSegment -> unusedSegment.getLoadSpec() == null
|| !usedSegmentLoadSpecs.contains(unusedSegment.getLoadSpec()))
.collect(Collectors.toList());

toolbox.getDataSegmentKiller().kill(segmentsToBeKilled);
Expand Down

0 comments on commit 61ae0d3

Please sign in to comment.