Change InternalSignificantTerms to only sum shard level counts in fin…

…al reduce Signed-off-by: Jay Deng <jayd0104@gmail.com>
opensearch-project · Jul 17, 2023 · 4625f98 · 4625f98
1 parent 8bd6b7c
commit 4625f98
Show file tree

Hide file tree

Showing 2 changed files with 13 additions and 2 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -45,6 +45,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Migrate client transports to Apache HttpClient / Core 5.x ([#4459](https://github.com/opensearch-project/OpenSearch/pull/4459))
 - Change http code on create index API with bad input raising NotXContentException from 500 to 400 ([#4773](https://github.com/opensearch-project/OpenSearch/pull/4773))
 - Improve summary error message for invalid setting updates ([#4792](https://github.com/opensearch-project/OpenSearch/pull/4792))
+- Change InternalSignificantTerms to sum shard-level superset counts only in final reduce ([#8735](https://github.com/opensearch-project/OpenSearch/pull/8735))
 
 ### Deprecated
 

diff --git a/...c/main/java/org/opensearch/search/aggregations/bucket/terms/InternalSignificantTerms.java b/...c/main/java/org/opensearch/search/aggregations/bucket/terms/InternalSignificantTerms.java
@@ -232,7 +232,12 @@ public InternalAggregation reduce(List<InternalAggregation> aggregations, Reduce
             @SuppressWarnings("unchecked")
             InternalSignificantTerms<A, B> terms = (InternalSignificantTerms<A, B>) aggregation;
             globalSubsetSize += terms.getSubsetSize();
-            globalSupersetSize += terms.getSupersetSize();
+            // supersetSize is a shard level count, so we only sum on the final reduce
+            if (reduceContext.isFinalReduce()) {
+                globalSupersetSize += terms.getSupersetSize();
+            } else {
+                globalSupersetSize = terms.getSupersetSize();
+            }
         }
         Map<String, List<B>> buckets = new HashMap<>();
         for (InternalAggregation aggregation : aggregations) {
@@ -291,7 +296,12 @@ protected B reduceBucket(List<B> buckets, ReduceContext context) {
         List<InternalAggregations> aggregationsList = new ArrayList<>(buckets.size());
         for (B bucket : buckets) {
             subsetDf += bucket.subsetDf;
-            supersetDf += bucket.supersetDf;
+            // supersetDf is a shard level count, so we only sum on the final reduce
+            if (context.isFinalReduce()) {
+                supersetDf += bucket.supersetDf;
+            } else {
+                supersetDf = bucket.supersetDf;
+            }
             aggregationsList.add(bucket.aggregations);
         }
         InternalAggregations aggs = InternalAggregations.reduce(aggregationsList, context);