diff --git a/docs/reference/aggregations/bucket.asciidoc b/docs/reference/aggregations/bucket.asciidoc
index b9fbddc65c125..91f8062a96a3b 100644
--- a/docs/reference/aggregations/bucket.asciidoc
+++ b/docs/reference/aggregations/bucket.asciidoc
@@ -64,3 +64,4 @@ include::bucket/significantterms-aggregation.asciidoc[]
 include::bucket/significanttext-aggregation.asciidoc[]
 
 include::bucket/terms-aggregation.asciidoc[]
+
diff --git a/docs/reference/aggregations/bucket/rare-terms-aggregation.asciidoc b/docs/reference/aggregations/bucket/rare-terms-aggregation.asciidoc
new file mode 100644
index 0000000000000..e2537b61aefda
--- /dev/null
+++ b/docs/reference/aggregations/bucket/rare-terms-aggregation.asciidoc
@@ -0,0 +1,357 @@
+[[search-aggregations-bucket-rare-terms-aggregation]]
+=== Rare Terms Aggregation
+
+A multi-bucket value source based aggregation which finds "rare" terms -- terms that are at the long-tail
+of the distribution and are not frequent.  Conceptually, this is like a `terms` aggregation that is
+sorted by `_count` ascending.  As noted in the <<search-aggregations-bucket-terms-aggregation-order,terms aggregation docs>>,
+actually ordering a `terms` agg by count ascending has unbounded error.  Instead, you should use the `rare_terms`
+aggregation
+
+//////////////////////////
+
+[source,js]
+--------------------------------------------------
+PUT /products
+{
+    "mappings": {
+        "properties": {
+            "genre": {
+                "type": "keyword"
+            },
+            "product": {
+                "type": "keyword"
+            }
+        }
+    }
+}
+
+POST /products/_doc/_bulk?refresh
+{"index":{"_id":0}}
+{"genre": "rock", "product": "Product A"}
+{"index":{"_id":1}}
+{"genre": "rock"}
+{"index":{"_id":2}}
+{"genre": "rock"}
+{"index":{"_id":3}}
+{"genre": "jazz", "product": "Product Z"}
+{"index":{"_id":4}}
+{"genre": "jazz"}
+{"index":{"_id":5}}
+{"genre": "electronic"}
+{"index":{"_id":6}}
+{"genre": "electronic"}
+{"index":{"_id":7}}
+{"genre": "electronic"}
+{"index":{"_id":8}}
+{"genre": "electronic"}
+{"index":{"_id":9}}
+{"genre": "electronic"}
+{"index":{"_id":10}}
+{"genre": "swing"}
+
+-------------------------------------------------
+// NOTCONSOLE
+// TESTSETUP
+
+//////////////////////////
+
+==== Syntax
+
+A `rare_terms` aggregation looks like this in isolation:
+
+[source,js]
+--------------------------------------------------
+{
+    "rare_terms": {
+        "field": "the_field",
+        "max_doc_count": 1
+    }
+}
+--------------------------------------------------
+// NOTCONSOLE
+
+.`rare_terms` Parameters
+|===
+|Parameter Name |Description |Required |Default Value
+|`field` |The field we wish to find rare terms in |Required |
+|`max_doc_count` |The maximum number of documents a term should appear in. |Optional |`1`
+|`precision` |The precision of the internal CuckooFilters.  Smaller precision leads to
+better approximation, but higher memory usage. Cannot be smaller than `0.00001` |Optional |`0.01`
+|`include` |Terms that should be included in the aggregation|Optional |
+|`exclude` |Terms that should be excluded from the aggregation|Optional |
+|`missing` |The value that should be used if a document does not have the field being aggregated|Optional |
+|===
+
+
+Example:
+
+[source,js]
+--------------------------------------------------
+GET /_search
+{
+    "aggs" : {
+        "genres" : {
+            "rare_terms" : {
+                "field" : "genre"
+            }
+        }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+// TEST[s/_search/_search\?filter_path=aggregations/]
+
+Response:
+
+[source,js]
+--------------------------------------------------
+{
+    ...
+    "aggregations" : {
+        "genres" : {
+            "buckets" : [
+                {
+                    "key" : "swing",
+                    "doc_count" : 1
+                }
+            ]
+        }
+    }
+}
+--------------------------------------------------
+// TESTRESPONSE[s/\.\.\.//]
+
+In this example, the only bucket that we see is the "swing" bucket, because it is the only term that appears in
+one document.  If we increase the `max_doc_count` to `2`, we'll see some more buckets:
+
+[source,js]
+--------------------------------------------------
+GET /_search
+{
+    "aggs" : {
+        "genres" : {
+            "rare_terms" : {
+                "field" : "genre",
+                "max_doc_count": 2
+            }
+        }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+// TEST[s/_search/_search\?filter_path=aggregations/]
+
+This now shows the "jazz" term which has a `doc_count` of 2":
+
+[source,js]
+--------------------------------------------------
+{
+    ...
+    "aggregations" : {
+        "genres" : {
+            "buckets" : [
+                {
+                    "key" : "swing",
+                    "doc_count" : 1
+                },
+                {
+                    "key" : "jazz",
+                    "doc_count" : 2
+                }
+            ]
+        }
+    }
+}
+--------------------------------------------------
+// TESTRESPONSE[s/\.\.\.//]
+
+[[search-aggregations-bucket-rare-terms-aggregation-max-doc-count]]
+==== Maximum document count
+
+The `max_doc_count` parameter is used to control the upper bound of document counts that a term can have.  There
+is not a size limitation on the `rare_terms` agg like `terms` agg has.  This means that terms
+which match the `max_doc_count` criteria will be returned.  The aggregation functions in this manner to avoid
+the order-by-ascending issues that afflict the `terms` aggregation.
+
+This does, however, mean that  a large number of results can be returned if chosen incorrectly.
+To limit the danger of this setting, the maximum `max_doc_count` is 100.
+
+[[search-aggregations-bucket-rare-terms-aggregation-max-buckets]]
+==== Max Bucket Limit
+
+The Rare Terms aggregation is more liable to trip the `search.max_buckets` soft limit than other aggregations due
+to how it works.  The `max_bucket` soft-limit is evaluated on a per-shard basis while the aggregation is collecting
+results.  It is possible for a term to be "rare" on a shard but become "not rare" once all the shard results are
+merged together.  This means that individual shards tend to collect more buckets than are truly rare, because
+they only have their own local view.  This list is ultimately pruned to the correct, smaller list of rare
+terms on the coordinating node... but a shard may have already tripped the `max_buckets` soft limit and aborted
+the request.
+
+When aggregating on fields that have potentially many "rare" terms, you may need to increase the `max_buckets` soft
+limit.  Alternatively, you might need to find a way to filter the results to return fewer rare values (smaller time
+span, filter by category, etc), or re-evaluate your definition of "rare" (e.g. if something
+appears 100,000 times, is it truly "rare"?)
+
+[[search-aggregations-bucket-rare-terms-aggregation-approximate-counts]]
+==== Document counts are approximate
+
+The naive way to determine the "rare" terms in a dataset is to place all the values in a map, incrementing counts
+as each document is visited, then return the bottom `n` rows.  This does not scale beyond even modestly sized data
+sets.  A sharded approach where only the "top n" values are retained from each shard (ala the `terms` aggregation)
+fails because the long-tail nature of the problem means it is impossible to find the "top n" bottom values without
+simply collecting all the values from all shards.
+
+Instead, the Rare Terms aggregation uses a different approximate algorithm:
+
+1. Values are placed in a map the first time they are seen.
+2. Each addition occurrence of the term increments a counter in the map
+3. If the counter > the `max_doc_count` threshold, the term is removed from the map and placed in a
+https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf[CuckooFilter]
+4. The CuckooFilter is consulted on each term.  If the value is inside the filter, it is known to be above the
+threshold already and skipped.
+
+After execution, the map of values is the map of "rare" terms under the `max_doc_count` threshold.  This map and CuckooFilter
+are then merged with all other shards.  If there are terms that are greater than the threshold (or appear in
+a different shard's CuckooFilter) the term is removed from the merged list.  The final map of values is returned
+to the user as the "rare" terms.
+
+CuckooFilters have the possibility of returning false positives (they can say a value exists in their collection when
+it actually does not).  Since the CuckooFilter is being used to see if a term is over threshold, this means a false positive
+from the CuckooFilter will mistakenly say a value is common when it is not (and thus exclude it from it final list of buckets).
+Practically, this means the aggregations exhibits false-negative behavior since the filter is being used "in reverse"
+of how people generally think of approximate set membership sketches.
+
+CuckooFilters are described in more detail in the paper:
+
+https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf[Fan, Bin, et al. "Cuckoo filter: Practically better than bloom."]
+Proceedings of the 10th ACM International on Conference on emerging Networking Experiments and Technologies. ACM, 2014.
+
+==== Precision
+
+Although the internal CuckooFilter is approximate in nature, the false-negative rate can be controlled with a
+`precision` parameter.  This allows the user to trade more runtime memory for more accurate results.
+
+The default precision is `0.001`, and the smallest (e.g. most accurate and largest memory overhead) is `0.00001`.
+Below are some charts which demonstrate how the accuracy of the aggregation is affected by precision and number
+of distinct terms.
+
+The X-axis shows the number of distinct values the aggregation has seen, and the Y-axis shows the percent error.
+Each line series represents one "rarity" condition (ranging from one rare item to 100,000 rare items).  For example,
+the orange "10" line means ten of the values were "rare" (`doc_count == 1`), out of 1-20m distinct values (where the
+rest of the values had `doc_count > 1`)
+
+This first chart shows precision `0.01`:
+
+image:images/rare_terms/accuracy_01.png[]
+
+And precision `0.001` (the default):
+
+image:images/rare_terms/accuracy_001.png[]
+
+And finally `precision 0.0001`:
+
+image:images/rare_terms/accuracy_0001.png[]
+
+The default precision of `0.001` maintains an accuracy of < 2.5% for the tested conditions, and accuracy slowly
+degrades in a controlled, linear fashion as the number of distinct values increases.
+
+The default precision of `0.001` has a memory profile of `1.748⁻⁶ * n` bytes, where `n` is the number
+of distinct values the aggregation has seen (it can also be roughly eyeballed, e.g. 20 million unique values is about
+30mb of memory).  The memory usage is linear to the number of distinct values regardless of which precision is chosen,
+the precision only affects the slope of the memory profile as seen in this chart:
+
+image:images/rare_terms/memory.png[]
+
+For comparison, an equivalent terms aggregation at 20 million buckets would be roughly
+`20m * 69b == ~1.38gb` (with 69 bytes being a very optimistic estimate of an empty bucket cost, far lower than what
+the circuit breaker accounts for).  So although the `rare_terms` agg is relatively heavy, it is still orders of
+magnitude smaller than the equivalent terms aggregation
+
+==== Filtering Values
+
+It is possible to filter the values for which buckets will be created. This can be done using the `include` and
+`exclude` parameters which are based on regular expression strings or arrays of exact values. Additionally,
+`include` clauses can filter using `partition` expressions.
+
+===== Filtering Values with regular expressions
+
+[source,js]
+--------------------------------------------------
+GET /_search
+{
+    "aggs" : {
+        "genres" : {
+            "rare_terms" : {
+                "field" : "genre",
+                "include" : "swi*",
+                "exclude" : "electro*"
+            }
+        }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+
+In the above example, buckets will be created for all the tags that starts with `swi`, except those starting
+with `electro` (so the tag `swing` will be aggregated but not `electro_swing`). The `include` regular expression will determine what
+values are "allowed" to be aggregated, while the `exclude` determines the values that should not be aggregated. When
+both are defined, the `exclude` has precedence, meaning, the `include` is evaluated first and only then the `exclude`.
+
+The syntax is the same as <<regexp-syntax,regexp queries>>.
+
+===== Filtering Values with exact values
+
+For matching based on exact values the `include` and `exclude` parameters can simply take an array of
+strings that represent the terms as they are found in the index:
+
+[source,js]
+--------------------------------------------------
+GET /_search
+{
+    "aggs" : {
+        "genres" : {
+             "rare_terms" : {
+                 "field" : "genre",
+                 "include" : ["swing", "rock"],
+                 "exclude" : ["jazz"]
+             }
+         }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+
+
+==== Missing value
+
+The `missing` parameter defines how documents that are missing a value should be treated.
+By default they will be ignored but it is also possible to treat them as if they
+had a value.
+
+[source,js]
+--------------------------------------------------
+GET /_search
+{
+    "aggs" : {
+        "genres" : {
+             "rare_terms" : {
+                 "field" : "genre",
+                 "missing": "N/A" <1>
+             }
+         }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+
+<1> Documents without a value in the `tags` field will fall into the same bucket as documents that have the value `N/A`.
+
+==== Nested, RareTerms, and scoring sub-aggregations
+
+The RareTerms aggregation has to operate in `breadth_first` mode, since it needs to prune terms as doc count thresholds
+are breached.  This requirement means the RareTerms aggregation is incompatible with certain combinations of aggregations
+that require `depth_first`. In particular, scoring sub-aggregations that are inside a `nested` force the entire aggregation tree to run
+in `depth_first` mode.  This will throw an exception since RareTerms is unable to process `depth_first`.
+
+As a concrete example, if `rare_terms` aggregation is the child of a `nested` aggregation, and one of the child aggregations of `rare_terms`
+needs document scores (like a `top_hits` aggregation), this will throw an exception.
\ No newline at end of file
diff --git a/docs/reference/images/rare_terms/accuracy_0001.png b/docs/reference/images/rare_terms/accuracy_0001.png
new file mode 100644
index 0000000000000..0c13a3938cde2
Binary files /dev/null and b/docs/reference/images/rare_terms/accuracy_0001.png differ
diff --git a/docs/reference/images/rare_terms/accuracy_001.png b/docs/reference/images/rare_terms/accuracy_001.png
new file mode 100644
index 0000000000000..2aa1be316c382
Binary files /dev/null and b/docs/reference/images/rare_terms/accuracy_001.png differ
diff --git a/docs/reference/images/rare_terms/accuracy_01.png b/docs/reference/images/rare_terms/accuracy_01.png
new file mode 100644
index 0000000000000..7182b7d3c537e
Binary files /dev/null and b/docs/reference/images/rare_terms/accuracy_01.png differ
diff --git a/docs/reference/images/rare_terms/memory.png b/docs/reference/images/rare_terms/memory.png
new file mode 100644
index 0000000000000..e0de5c2163913
Binary files /dev/null and b/docs/reference/images/rare_terms/memory.png differ
diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml
new file mode 100644
index 0000000000000..73c46bc963e12
--- /dev/null
+++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/280_rare_terms.yml
@@ -0,0 +1,316 @@
+setup:
+  - skip:
+      version: " - 8.0.0" # TODO change this after backport
+      reason: RareTerms added in 7.3.0
+  - do:
+      indices.create:
+          index: test_1
+          body:
+            settings:
+              number_of_replicas: 0
+            mappings:
+              properties:
+                str:
+                   type: keyword
+                ip:
+                   type: ip
+                boolean:
+                   type: boolean
+                integer:
+                  type: long
+                number:
+                  type: long
+                date:
+                  type: date
+
+
+  - do:
+      cluster.health:
+        wait_for_status: green
+
+---
+"Basic test":
+  - do:
+      index:
+        index: test_1
+        id: 1
+        body: { "str" : "abc" }
+
+  - do:
+      index:
+        index: test_1
+        id: 2
+        body: { "str": "abc" }
+
+  - do:
+      index:
+        index: test_1
+        id: 3
+        body: { "str": "bcd" }
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "str_terms" : { "rare_terms" : { "field" : "str", "max_doc_count" : 1 } } } }
+
+  - match: { hits.total.value: 3 }
+  - length: { aggregations.str_terms.buckets: 1 }
+  - match: { aggregations.str_terms.buckets.0.key: "bcd" }
+  - is_false: aggregations.str_terms.buckets.0.key_as_string
+  - match: { aggregations.str_terms.buckets.0.doc_count: 1 }
+
+---
+"IP test":
+  - do:
+      index:
+        index: test_1
+        id: 1
+        body: { "ip": "::1" }
+
+  - do:
+      index:
+        index: test_1
+        id: 2
+        body: { "ip": "127.0.0.1" }
+
+  - do:
+      index:
+        index: test_1
+        id: 3
+        body: { "ip": "::1" }
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "ip_terms" : { "rare_terms" : { "field" : "ip" } } } }
+
+  - match: { hits.total.value: 3 }
+  - length: { aggregations.ip_terms.buckets: 1 }
+  - match: { aggregations.ip_terms.buckets.0.key: "127.0.0.1" }
+  - is_false: aggregations.ip_terms.buckets.0.key_as_string
+  - match: { aggregations.ip_terms.buckets.0.doc_count: 1 }
+
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "ip_terms" : { "rare_terms" : { "field" : "ip", "include" : [ "127.0.0.1" ] } } } }
+
+  - match: { hits.total.value: 3 }
+  - length: { aggregations.ip_terms.buckets: 1 }
+  - match: { aggregations.ip_terms.buckets.0.key: "127.0.0.1" }
+  - is_false: aggregations.ip_terms.buckets.0.key_as_string
+  - match: { aggregations.ip_terms.buckets.0.doc_count: 1 }
+
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "ip_terms" : { "rare_terms" : { "field" : "ip", "exclude" : [ "127.0.0.1" ] } } } }
+
+  - match: { hits.total.value: 3 }
+  - length: { aggregations.ip_terms.buckets: 0 }
+
+  - do:
+      catch: request
+      search:
+        index: test_1
+        body: { "size" : 0, "aggs" : { "ip_terms" : { "rare_terms" : { "field" : "ip", "exclude" :  "127.*"  } } } }
+
+
+
+---
+"Boolean test":
+  - do:
+      index:
+        index: test_1
+        id: 1
+        body: { "boolean": true }
+
+  - do:
+      index:
+        index: test_1
+        id: 2
+        body: { "boolean": false }
+
+  - do:
+      index:
+        index: test_1
+        id: 3
+        body: { "boolean": true }
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "boolean_terms" : { "rare_terms" : { "field" : "boolean" } } } }
+
+  - match: { hits.total.value: 3 }
+  - length: { aggregations.boolean_terms.buckets: 1 }
+  - match: { aggregations.boolean_terms.buckets.0.key: 0 }
+  - match: { aggregations.boolean_terms.buckets.0.key_as_string: "false" }
+  - match: { aggregations.boolean_terms.buckets.0.doc_count: 1 }
+
+---
+"Integer test":
+  - do:
+      index:
+        index: test_1
+        id: 1
+        body: { "integer": 1234 }
+
+  - do:
+      index:
+        index: test_1
+        id: 2
+        body: { "integer": 5678 }
+
+  - do:
+      index:
+        index: test_1
+        id: 3
+        body: { "integer": 1234 }
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "integer_terms" : { "rare_terms" : { "field" : "integer" } } } }
+
+  - match: { hits.total.value: 3 }
+
+  - length: { aggregations.integer_terms.buckets: 1 }
+
+  - match: { aggregations.integer_terms.buckets.0.key: 5678 }
+  - is_false: aggregations.integer_terms.buckets.0.key_as_string
+  - match: { aggregations.integer_terms.buckets.0.doc_count: 1 }
+
+---
+"Date test":
+  - do:
+      index:
+        index: test_1
+        id: 1
+        body: { "date": "2016-05-03" }
+
+  - do:
+      index:
+        index: test_1
+        id: 2
+        body: { "date": "2014-09-01" }
+
+  - do:
+      index:
+        index: test_1
+        id: 3
+        body: { "date": "2016-05-03" }
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "date_terms" : { "rare_terms" : { "field" : "date" } } } }
+
+  - match: { hits.total.value: 3 }
+
+  - length: { aggregations.date_terms.buckets: 1 }
+  - match: { aggregations.date_terms.buckets.0.key: 1409529600000 }
+  - match: { aggregations.date_terms.buckets.0.key_as_string: "2014-09-01T00:00:00.000Z" }
+  - match: { aggregations.date_terms.buckets.0.doc_count: 1 }
+
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "date_terms" : { "rare_terms" : { "field" : "date", "include" : [ "2014-09-01" ] } } } }
+
+  - match: { hits.total.value: 3 }
+  - length: { aggregations.date_terms.buckets: 1 }
+  - match: { aggregations.date_terms.buckets.0.key_as_string: "2014-09-01T00:00:00.000Z" }
+  - match: { aggregations.date_terms.buckets.0.doc_count: 1 }
+
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "date_terms" : { "rare_terms" : { "field" : "date", "exclude" : [ "2014-09-01" ] } } } }
+
+  - match: { hits.total.value: 3 }
+  - length: { aggregations.date_terms.buckets: 0 }
+
+---
+"Unmapped strings":
+
+  - do:
+      index:
+        index: test_1
+        id: 1
+        body: {}
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "string_terms" : { "rare_terms" : { "field" : "unmapped_string"} } } }
+
+  - match: { hits.total.value: 1 }
+  - length: { aggregations.string_terms.buckets: 0 }
+
+---
+"Unmapped booleans":
+
+  - do:
+      index:
+        index: test_1
+        id: 1
+        body: {}
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "boolean_terms" : { "rare_terms" : { "field" : "unmapped_boolean" } } } }
+
+  - match: { hits.total.value: 1 }
+  - length: { aggregations.boolean_terms.buckets: 0 }
+
+---
+"Unmapped dates":
+
+  - do:
+      index:
+        index: test_1
+        id: 1
+        body: {}
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "date_terms" : { "rare_terms" : { "field" : "unmapped_date"} } } }
+
+  - match: { hits.total.value: 1 }
+  - length: { aggregations.date_terms.buckets: 0 }
+
+---
+"Unmapped longs":
+
+  - do:
+      index:
+        index: test_1
+        id: 1
+        body: {}
+
+  - do:
+      indices.refresh: {}
+
+  - do:
+      search:
+        body: { "size" : 0, "aggs" : { "long_terms" : { "rare_terms" : { "field" : "unmapped_long", "value_type" : "long" } } } }
+
+  - match: { hits.total.value: 1 }
+  - length: { aggregations.long_terms.buckets: 0 }
+
+
diff --git a/server/src/main/java/org/elasticsearch/common/hash/MurmurHash3.java b/server/src/main/java/org/elasticsearch/common/hash/MurmurHash3.java
index a52f0e8acc4ae..a9232e06657ad 100644
--- a/server/src/main/java/org/elasticsearch/common/hash/MurmurHash3.java
+++ b/server/src/main/java/org/elasticsearch/common/hash/MurmurHash3.java
@@ -21,6 +21,8 @@
 
 import org.elasticsearch.common.util.ByteUtils;
 
+import java.util.Objects;
+
 
 /**
  * MurmurHash3 hashing functions.
@@ -36,6 +38,24 @@ public static class Hash128 {
         public long h1;
         /** higher 64 bits part **/
         public long h2;
+
+        @Override
+        public boolean equals(Object other) {
+            if (this == other) {
+                return true;
+            }
+            if (other == null || getClass() != other.getClass()) {
+                return false;
+            }
+            Hash128 that = (Hash128) other;
+            return Objects.equals(this.h1, that.h1)
+                && Objects.equals(this.h2, that.h2);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(h1, h2);
+        }
     }
 
     private static long C1 = 0x87c37b91114253d5L;
@@ -160,4 +180,22 @@ public static Hash128 hash128(byte[] key, int offset, int length, long seed, Has
         return hash;
     }
 
+    /**
+     * A 64-bit variant which accepts a long to hash, and returns the 64bit long hash.
+     * This is useful if the input is already in long (or smaller) format and you don't
+     * need the full 128b width and flexibility of
+     * {@link MurmurHash3#hash128(byte[], int, int, long, Hash128)}
+     *
+     * Given the limited nature of this variant, it should be faster than the 128b version
+     * when you only need 128b (many fewer instructions)
+     */
+    public static long murmur64(long h) {
+        h ^= h >>> 33;
+        h *= 0xff51afd7ed558ccdL;
+        h ^= h >>> 33;
+        h *= 0xc4ceb9fe1a85ec53L;
+        h ^= h >>> 33;
+        return h;
+    }
+
 }
diff --git a/server/src/main/java/org/elasticsearch/common/util/CuckooFilter.java b/server/src/main/java/org/elasticsearch/common/util/CuckooFilter.java
new file mode 100644
index 0000000000000..54099735fba47
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/common/util/CuckooFilter.java
@@ -0,0 +1,521 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.common.util;
+
+import org.apache.lucene.store.DataInput;
+import org.apache.lucene.store.DataOutput;
+import org.apache.lucene.util.packed.PackedInts;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Writeable;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Objects;
+import java.util.Random;
+
+/**
+ * An approximate set membership datastructure
+ *
+ * CuckooFilters are similar to Bloom Filters in usage; values are inserted, and the Cuckoo
+ * can be asked if it has seen a particular value before.  Because the structure is approximate,
+ * it can return false positives (says it has seen an item when it has not).  False negatives
+ * are not possible though; if the structure says it _has not_ seen an item, that can be
+ * trusted.
+ *
+ * The filter can "saturate" at which point the map has hit it's configured load factor (or near enough
+ * that a large number of evictions are not able to find a free slot) and will refuse to accept
+ * any new insertions.
+ *
+ * NOTE: this version does not support deletions, and as such does not save duplicate
+ * fingerprints (e.g. when inserting, if the fingerprint is already present in the
+ * candidate buckets, it is not inserted).  By not saving duplicates, the CuckooFilter
+ * loses the ability to delete values.  But not by allowing deletions, we can save space
+ * (do not need to waste slots on duplicate fingerprints), and we do not need to worry
+ * about inserts "overflowing" a bucket because the same item has been repeated repeatedly
+ *
+ * NOTE: this CuckooFilter exposes a number of Expert APIs which assume the caller has
+ * intimate knowledge about how the algorithm works.  It is recommended to use
+ * {@link SetBackedScalingCuckooFilter} instead.
+ *
+ * Based on the paper:
+ *
+ * Fan, Bin, et al. "Cuckoo filter: Practically better than bloom."
+ * Proceedings of the 10th ACM International on Conference on emerging Networking Experiments and Technologies. ACM, 2014.
+ *
+ * https://www.cs.cmu.edu/~dga/papers/cuckoo-conext2014.pdf
+ */
+public class CuckooFilter implements Writeable {
+
+    private static final double LN_2 = Math.log(2);
+    private static final int MAX_EVICTIONS = 500;
+    static final int EMPTY = 0;
+
+    private final PackedInts.Mutable data;
+    private final int numBuckets;
+    private final int bitsPerEntry;
+    private final int fingerprintMask;
+    private final int entriesPerBucket;
+    private final Random rng;
+    private int count;
+    private int evictedFingerprint = EMPTY;
+
+    /**
+     * @param capacity The number of expected inserts.  The filter can hold more than this value, it is just an estimate
+     * @param fpp The desired false positive rate.  Smaller values will reduce the
+     *            false positives at expense of larger size
+     * @param rng A random number generator, used with the cuckoo hashing process
+     */
+    CuckooFilter(long capacity, double fpp, Random rng) {
+        this.rng = rng;
+        this.entriesPerBucket = entriesPerBucket(fpp);
+        double loadFactor = getLoadFactor(entriesPerBucket);
+        this.bitsPerEntry = bitsPerEntry(fpp, entriesPerBucket);
+        this.numBuckets = getNumBuckets(capacity, loadFactor, entriesPerBucket);
+
+        if ((long) numBuckets * (long) entriesPerBucket > Integer.MAX_VALUE) {
+            throw new IllegalArgumentException("Attempted to create [" + numBuckets * entriesPerBucket
+                + "] entries which is > Integer.MAX_VALUE");
+        }
+        this.data = PackedInts.getMutable(numBuckets * entriesPerBucket, bitsPerEntry, PackedInts.COMPACT);
+
+        // puts the bits at the right side of the mask, e.g. `0000000000001111` for bitsPerEntry = 4
+        this.fingerprintMask = (0x80000000 >> (bitsPerEntry - 1)) >>> (Integer.SIZE - bitsPerEntry);
+    }
+
+    /**
+     * This ctor is likely slow and should only be used for testing
+     */
+    CuckooFilter(CuckooFilter other) {
+        this.numBuckets = other.numBuckets;
+        this.bitsPerEntry = other.bitsPerEntry;
+        this.entriesPerBucket = other.entriesPerBucket;
+        this.count = other.count;
+        this.evictedFingerprint = other.evictedFingerprint;
+        this.rng = other.rng;
+        this.fingerprintMask = other.fingerprintMask;
+
+        // This shouldn't happen, but as a sanity check
+        if ((long) numBuckets * (long) entriesPerBucket > Integer.MAX_VALUE) {
+            throw new IllegalArgumentException("Attempted to create [" + numBuckets * entriesPerBucket
+                + "] entries which is > Integer.MAX_VALUE");
+        }
+        // TODO this is probably super slow, but just used for testing atm
+        this.data = PackedInts.getMutable(numBuckets * entriesPerBucket, bitsPerEntry, PackedInts.COMPACT);
+        for (int i = 0; i < other.data.size(); i++) {
+            data.set(i, other.data.get(i));
+        }
+    }
+
+    CuckooFilter(StreamInput in, Random rng) throws IOException {
+        this.numBuckets = in.readVInt();
+        this.bitsPerEntry = in.readVInt();
+        this.entriesPerBucket = in.readVInt();
+        this.count = in.readVInt();
+        this.evictedFingerprint = in.readVInt();
+        this.rng = rng;
+
+        this.fingerprintMask = (0x80000000 >> (bitsPerEntry - 1)) >>> (Integer.SIZE - bitsPerEntry);
+
+        data = (PackedInts.Mutable) PackedInts.getReader(new DataInput() {
+            @Override
+            public byte readByte() throws IOException {
+                return in.readByte();
+            }
+
+            @Override
+            public void readBytes(byte[] b, int offset, int len) throws IOException {
+                in.readBytes(b, offset, len);
+            }
+        });
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        out.writeVInt(numBuckets);
+        out.writeVInt(bitsPerEntry);
+        out.writeVInt(entriesPerBucket);
+        out.writeVInt(count);
+        out.writeVInt(evictedFingerprint);
+
+        data.save(new DataOutput() {
+            @Override
+            public void writeByte(byte b) throws IOException {
+                out.writeByte(b);
+            }
+
+            @Override
+            public void writeBytes(byte[] b, int offset, int length) throws IOException {
+                out.writeBytes(b, offset, length);
+            }
+        });
+    }
+
+    /**
+     * Get the number of unique items that are being tracked
+     */
+    public int getCount() {
+        return count;
+    }
+
+    /**
+     * Returns the number of buckets that has been chosen based
+     * on the initial configuration
+     *
+     * Expert-level API
+     */
+    int getNumBuckets() {
+        return numBuckets;
+    }
+
+    /**
+     * Returns the number of bits used per entry
+     *
+     * Expert-level API
+     */
+    int getBitsPerEntry() {
+        return bitsPerEntry;
+    }
+
+    /**
+     * Returns the cached fingerprint mask.  This is simply a mask for the
+     * first bitsPerEntry bits, used by {@link CuckooFilter#fingerprint(int, int, int)}
+     * to generate the fingerprint of a hash
+     *
+     * Expert-level API
+     */
+    int getFingerprintMask() {
+        return fingerprintMask;
+    }
+
+    /**
+     * Returns an iterator that returns the long[] representation of each bucket.  The value
+     * inside each long will be a fingerprint (or 0L, representing empty).
+     *
+     * Expert-level API
+     */
+    Iterator<long[]> getBuckets() {
+        return new Iterator<>() {
+            int current = 0;
+
+            @Override
+            public boolean hasNext() {
+                return current < numBuckets;
+            }
+
+            @Override
+            public long[] next() {
+                long[] values = new long[entriesPerBucket];
+                int offset = getOffset(current, 0);
+                data.get(offset, values, 0, entriesPerBucket);
+                current += 1;
+                return values;
+            }
+        };
+    }
+
+    /**
+     * Returns true if the set might contain the provided value, false otherwise.  False values are
+     * 100% accurate, while true values may be a false-positive.
+     */
+    boolean mightContain(long hash) {
+        int bucket = hashToIndex((int) hash, numBuckets);
+        int fingerprint = fingerprint((int) (hash  >>> 32), bitsPerEntry, fingerprintMask);
+        int alternateIndex = alternateIndex(bucket, fingerprint, numBuckets);
+
+        return mightContainFingerprint(bucket, fingerprint, alternateIndex);
+    }
+
+    /**
+     * Returns true if the bucket or it's alternate bucket contains the fingerprint.
+     *
+     * Expert-level API, use {@link CuckooFilter#mightContain(long)} to check if
+     * a value is in the filter.
+     */
+    boolean mightContainFingerprint(int bucket, int fingerprint, int alternateBucket) {
+
+        // check all entries for both buckets and the evicted slot
+        return hasFingerprint(bucket, fingerprint) || hasFingerprint(alternateBucket, fingerprint) || evictedFingerprint == fingerprint;
+    }
+
+    /**
+     * Return's true if any of the entries in the bucket contain the fingerprint
+     */
+    private boolean hasFingerprint(int bucket, long fingerprint) {
+        long[] values = new long[entriesPerBucket];
+        int offset = getOffset(bucket, 0);
+        data.get(offset, values, 0, entriesPerBucket);
+
+        for (int i = 0; i < entriesPerBucket; i++) {
+            if (values[i] == fingerprint) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Add's the hash to the bucket or alternate bucket.  Returns true if the insertion was
+     * successful, false if the filter is saturated.
+     */
+    boolean add(long hash) {
+        // Each bucket needs 32 bits, so we truncate for the first bucket and shift/truncate for second
+        int bucket = hashToIndex((int) hash, numBuckets);
+        int fingerprint = fingerprint((int) (hash  >>> 32), bitsPerEntry, fingerprintMask);
+        return mergeFingerprint(bucket, fingerprint);
+    }
+
+    /**
+     * Attempts to merge the fingerprint into the specified bucket or it's alternate bucket.
+     * Returns true if the insertion was successful, false if the filter is saturated.
+     *
+     * Expert-level API, use {@link CuckooFilter#add(long)} to insert
+     * values into the filter
+     */
+    boolean mergeFingerprint(int bucket, int fingerprint) {
+        // If we already have an evicted fingerprint we are full, no need to try
+        if (evictedFingerprint != EMPTY) {
+            return false;
+        }
+
+        int alternateBucket = alternateIndex(bucket, fingerprint, numBuckets);
+        if (tryInsert(bucket, fingerprint) || tryInsert(alternateBucket, fingerprint)) {
+            count += 1;
+            return true;
+        }
+
+        for (int i = 0; i < MAX_EVICTIONS; i++) {
+            // overwrite our alternate bucket, and a random entry
+            int offset = getOffset(alternateBucket, rng.nextInt(entriesPerBucket - 1));
+            int oldFingerprint = (int) data.get(offset);
+            data.set(offset, fingerprint);
+
+            // replace details and start again
+            fingerprint = oldFingerprint;
+            bucket = alternateBucket;
+            alternateBucket = alternateIndex(bucket, fingerprint, numBuckets);
+
+            // Only try to insert into alternate bucket
+            if (tryInsert(alternateBucket, fingerprint)) {
+                count += 1;
+                return true;
+            }
+        }
+
+        // If we get this far, we failed to insert the value after MAX_EVICTION rounds,
+        // so cache the last evicted value (so we don't lose it) and signal we failed
+        evictedFingerprint = fingerprint;
+        return false;
+    }
+
+    /**
+     * Low-level insert method. Attempts to write the fingerprint into an empty entry
+     * at this bucket's position.  Returns true if that was sucessful, false if all entries
+     * were occupied.
+     *
+     * If the fingerprint already exists in one of the entries, it will not duplicate the
+     * fingerprint like the original paper.  This means the filter _cannot_ support deletes,
+     * but is not sensitive to "overflowing" buckets with repeated inserts
+     */
+    private boolean tryInsert(int bucket, int fingerprint) {
+        long[] values = new long[entriesPerBucket];
+        int offset = getOffset(bucket, 0);
+        data.get(offset, values, 0, entriesPerBucket);
+
+        // TODO implement semi-sorting
+        for (int i = 0; i < values.length; i++) {
+            if (values[i] == EMPTY) {
+                data.set(offset + i, fingerprint);
+                return true;
+            } else if (values[i] == fingerprint) {
+                // Already have the fingerprint, no need to save
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Converts a hash into a bucket index (primary or alternate).
+     *
+     * If the hash is negative, this flips the bits.  The hash is then modulo numBuckets
+     * to get the final index.
+     *
+     * Expert-level API
+     */
+    static int hashToIndex(int hash, int numBuckets) {
+        return hash & (numBuckets - 1);
+    }
+
+    /**
+     * Calculates the alternate bucket for a given bucket:fingerprint tuple
+     *
+     * The alternate bucket is the fingerprint multiplied by a mixing constant,
+     * then xor'd against the bucket.  This new value is modulo'd against
+     * the buckets via {@link CuckooFilter#hashToIndex(int, int)} to get the final
+     * index.
+     *
+     * Note that the xor makes this operation reversible as long as we have the
+     * fingerprint and current bucket (regardless of if that bucket was the primary
+     * or alternate).
+     *
+     * Expert-level API
+     */
+    static int alternateIndex(int bucket, int fingerprint, int numBuckets) {
+        /*
+            Reference impl uses murmur2 mixing constant:
+            https://github.com/efficient/cuckoofilter/blob/master/src/cuckoofilter.h#L78
+                // NOTE(binfan): originally we use:
+                // index ^ HashUtil::BobHash((const void*) (&tag), 4)) & table_->INDEXMASK;
+                // now doing a quick-n-dirty way:
+                // 0x5bd1e995 is the hash constant from MurmurHash2
+                return IndexHash((uint32_t)(index ^ (tag * 0x5bd1e995)));
+         */
+        int index = bucket ^ (fingerprint * 0x5bd1e995);
+        return hashToIndex(index, numBuckets);
+    }
+
+    /**
+     * Given the bucket and entry position, returns the absolute offset
+     * inside the PackedInts datastructure
+     */
+    private int getOffset(int bucket, int position) {
+        return (bucket * entriesPerBucket) + position;
+    }
+
+    /**
+     * Calculates the fingerprint for a given hash.
+     *
+     * The fingerprint is simply the first `bitsPerEntry` number of bits that are non-zero.
+     * If the entire hash is zero, `(int) 1` is used
+     *
+     * Expert-level API
+     */
+    static int fingerprint(int hash, int bitsPerEntry, int fingerprintMask) {
+        if (hash == 0) {
+            // we use 0 as "empty" so if the hash actually hashes to zero... return 1
+            // Some other impls will re-hash with a salt but this seems simpler
+            return 1;
+        }
+
+        for (int i = 0; i + bitsPerEntry <= Long.SIZE; i += bitsPerEntry) {
+            int v = (hash >> i) & fingerprintMask;
+            if (v != 0) {
+                return v;
+            }
+        }
+        return 1;
+    }
+
+    /**
+     * Calculate the optimal number of bits per entry
+     */
+    private int bitsPerEntry(double fpp, int numEntriesPerBucket) {
+        return (int) Math.round(log2((2 * numEntriesPerBucket) / fpp));
+    }
+
+    /**
+     * Calculate the optimal number of entries per bucket.  Will return 2, 4 or 8
+     * depending on the false positive rate
+     */
+    private int entriesPerBucket(double fpp) {
+        /*
+          Empirical constants from paper:
+            "the space-optimal bucket size depends on the target false positive rate ε:
+             when ε > 0.002, having two entries per bucket yields slightly better results
+             than using four entries per bucket; when ε decreases to 0.00001 < ε <= 0.002,
+             four entries per bucket minimzes space"
+         */
+
+        if (fpp > 0.002) {
+            return 2;
+        } else if (fpp > 0.00001 && fpp <= 0.002) {
+            return 4;
+        }
+        return 8;
+    }
+
+    /**
+     * Calculates the optimal load factor for the filter, given the number of entries
+     * per bucket.  Will return 0.84, 0.955 or 0.98 depending on b
+     */
+    private double getLoadFactor(int b) {
+        if ((b == 2 || b == 4 || b == 8) == false) {
+            throw new IllegalArgumentException("b must be one of [2,4,8]");
+        }
+        /*
+          Empirical constants from the paper:
+            "With k = 2 hash functions, the load factor α is 50% when bucket size b = 1 (i.e
+            the hash table is directly mapped), but increases to 84%, 95%, 98% respectively
+            using bucket size b = 2, 4, 8"
+         */
+        if (b == 2) {
+            return 0.84D;
+        } else if (b == 4) {
+            return 0.955D;
+        } else {
+            return 0.98D;
+        }
+    }
+
+    /**
+     * Calculates the optimal number of buckets for this filter.  The xor used in the bucketing
+     * algorithm requires this to be a power of two, so the optimal number of buckets will
+     * be rounded to the next largest power of two where applicable.
+     *
+     * TODO: there are schemes to avoid powers of two, might want to investigate those
+     */
+    private int getNumBuckets(long capacity, double loadFactor, int b) {
+        long buckets = Math.round((((double) capacity / loadFactor)) / (double) b);
+
+        // Rounds up to nearest power of 2
+        return 1 << -Integer.numberOfLeadingZeros((int)buckets - 1);
+    }
+
+    private double log2(double x) {
+        return Math.log(x) / LN_2;
+    }
+
+    public long getSizeInBytes() {
+        // (numBuckets, bitsPerEntry, fingerprintMask, entriesPerBucket, count, evictedFingerprint) * 4b == 24b
+        return data.ramBytesUsed() + 24;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(numBuckets, bitsPerEntry, entriesPerBucket, count, evictedFingerprint);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (this == other) {
+            return true;
+        }
+        if (other == null || getClass() != other.getClass()) {
+            return false;
+        }
+
+        final CuckooFilter that = (CuckooFilter) other;
+        return Objects.equals(this.numBuckets, that.numBuckets)
+            && Objects.equals(this.bitsPerEntry, that.bitsPerEntry)
+            && Objects.equals(this.entriesPerBucket, that.entriesPerBucket)
+            && Objects.equals(this.count, that.count)
+            && Objects.equals(this.evictedFingerprint, that.evictedFingerprint);
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/common/util/SetBackedScalingCuckooFilter.java b/server/src/main/java/org/elasticsearch/common/util/SetBackedScalingCuckooFilter.java
new file mode 100644
index 0000000000000..095416e5d9aa5
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/common/util/SetBackedScalingCuckooFilter.java
@@ -0,0 +1,408 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.common.util;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.hash.MurmurHash3;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.io.stream.Writeable;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Objects;
+import java.util.Random;
+import java.util.Set;
+import java.util.function.Consumer;
+
+/**
+ * An approximate set membership datastructure that scales as more unique values are inserted.
+ * Can definitively say if a member does not exist (no false negatives), but may say an item exists
+ * when it does not (has false negatives).  Similar in usage to a Bloom Filter.
+ *
+ * Internally, the datastructure maintains a Set of hashes up to a specified threshold.  This provides
+ * 100% accurate membership queries.
+ *
+ * When the threshold is breached, a list of CuckooFilters are created and used to track membership.
+ * These filters are approximate similar to Bloom Filters.
+ *
+ * This datastructure scales as more values are inserted by growing the list of CuckooFilters.
+ * Final size is dependent on the cardinality of data inserted, and the precision specified.
+ */
+public class SetBackedScalingCuckooFilter implements Writeable {
+
+    /**
+     * This is the estimated insertion capacity for each individual internal CuckooFilter.
+     */
+    private static final int FILTER_CAPACITY = 1000000;
+
+    /**
+     * This set is used to track the insertions before we convert over to an approximate
+     * filter. This gives us 100% accuracy for small cardinalities.  This will be null
+     * if isSetMode = false;
+     *
+     * package-private for testing
+     */
+    Set<Long> hashes;
+
+    /**
+     * This list holds our approximate filters, after we have migrated out of a set.
+     * This will be null if isSetMode = true;
+     */
+    List<CuckooFilter> filters;
+
+    private final int threshold;
+    private final Random rng;
+    private final int capacity;
+    private final double fpp;
+    private Consumer<Long> breaker = aLong -> {
+        //noop
+    };
+
+    // cached here for performance reasons
+    private int numBuckets = 0;
+    private int bitsPerEntry = 0;
+    private int fingerprintMask = 0;
+    private MurmurHash3.Hash128 scratchHash = new MurmurHash3.Hash128();
+
+    // True if we are tracking inserts with a set, false otherwise
+    private boolean isSetMode = true;
+
+    /**
+     * @param threshold The number of distinct values that should be tracked
+     *                  before converting to an approximate representation
+     * @param rng A random number generator needed for the cuckoo hashing process
+     * @param fpp the false-positive rate that should be used for the cuckoo filters.
+     */
+    public SetBackedScalingCuckooFilter(int threshold, Random rng, double fpp) {
+        if (threshold <= 0) {
+            throw new IllegalArgumentException("[threshold] must be a positive integer");
+        }
+
+        // We have to ensure that, in the worst case, two full sets can be converted into
+        // one cuckoo filter without overflowing.  This keeps merging logic simpler
+        if (threshold * 2 > FILTER_CAPACITY) {
+            throw new IllegalArgumentException("[threshold] must be smaller than [" + (FILTER_CAPACITY / 2) + "]");
+        }
+        if (fpp < 0) {
+            throw new IllegalArgumentException("[fpp] must be a positive double");
+        }
+        this.hashes = new HashSet<>(threshold);
+        this.threshold = threshold;
+        this.rng = rng;
+        this.capacity = FILTER_CAPACITY;
+        this.fpp = fpp;
+    }
+
+    public SetBackedScalingCuckooFilter(SetBackedScalingCuckooFilter other) {
+        this.threshold = other.threshold;
+        this.isSetMode = other.isSetMode;
+        this.rng = other.rng;
+        this.breaker = other.breaker;
+        this.capacity = other.capacity;
+        this.fpp = other.fpp;
+        if (isSetMode) {
+            this.hashes = new HashSet<>(other.hashes);
+        } else {
+            this.filters = new ArrayList<>(other.filters);
+            this.numBuckets = filters.get(0).getNumBuckets();
+            this.fingerprintMask = filters.get(0).getFingerprintMask();
+            this.bitsPerEntry = filters.get(0).getBitsPerEntry();
+        }
+    }
+
+    public SetBackedScalingCuckooFilter(StreamInput in, Random rng) throws IOException {
+        this.threshold = in.readVInt();
+        this.isSetMode = in.readBoolean();
+        this.rng = rng;
+        this.capacity = in.readVInt();
+        this.fpp = in.readDouble();
+
+        if (isSetMode) {
+            this.hashes = in.readSet(StreamInput::readZLong);
+        } else {
+            this.filters = in.readList(in12 -> new CuckooFilter(in12, rng));
+            this.numBuckets = filters.get(0).getNumBuckets();
+            this.fingerprintMask = filters.get(0).getFingerprintMask();
+            this.bitsPerEntry = filters.get(0).getBitsPerEntry();
+        }
+    }
+
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        out.writeVInt(threshold);
+        out.writeBoolean(isSetMode);
+        out.writeVInt(capacity);
+        out.writeDouble(fpp);
+        if (isSetMode) {
+            out.writeCollection(hashes, StreamOutput::writeZLong);
+        } else {
+            out.writeList(filters);
+        }
+    }
+
+    /**
+     * Registers a circuit breaker with the datastructure.
+     *
+     * CuckooFilter's can "saturate" and refuse to accept any new values.  When this happens,
+     * the datastructure scales by adding a new filter.  This new filter's bytes will be tracked
+     * in the registered breaker when configured.
+     */
+    public void registerBreaker(Consumer<Long> breaker) {
+        this.breaker = Objects.requireNonNull(breaker, "Circuit Breaker Consumer cannot be null");
+        breaker.accept(getSizeInBytes());
+    }
+
+    /**
+     * Returns true if the set might contain the provided value, false otherwise.  False values are
+     * 100% accurate, while true values may be a false-positive.
+     */
+    public boolean mightContain(BytesRef value) {
+        MurmurHash3.Hash128 hash = MurmurHash3.hash128(value.bytes, value.offset, value.length, 0, scratchHash);
+        return mightContainHash(hash.h1);
+    }
+
+    /**
+     * Returns true if the set might contain the provided value, false otherwise.  False values are
+     * 100% accurate, while true values may be a false-positive.
+     */
+    public boolean mightContain(long value) {
+        long hash = MurmurHash3.murmur64(value);
+        return mightContainHash(hash);
+    }
+
+    /**
+     * Returns true if the set might contain the provided value, false otherwise.  False values are
+     * 100% accurate, while true values may be a false-positive.
+     */
+    private boolean mightContainHash(long hash) {
+        if (isSetMode) {
+            return hashes.contains(hash);
+        }
+
+        // We calculate these once up front for all the filters and use the expert API
+        int bucket = CuckooFilter.hashToIndex((int) hash, numBuckets);
+        int fingerprint = CuckooFilter.fingerprint((int) (hash >> 32), bitsPerEntry, fingerprintMask);
+        int alternateIndex = CuckooFilter.alternateIndex(bucket, fingerprint, numBuckets);
+
+        for (CuckooFilter filter : filters) {
+            if (filter.mightContainFingerprint(bucket, fingerprint, alternateIndex)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Returns true if any of the filters contain this fingerprint at the specified bucket.
+     * This is an expert-level API since it is dealing with buckets and fingerprints, not raw values
+     * being hashed.
+     */
+    private boolean mightContainFingerprint(int bucket, int fingerprint) {
+        int alternateIndex = CuckooFilter.alternateIndex(bucket, fingerprint, numBuckets);
+        for (CuckooFilter filter : filters) {
+            if (filter.mightContainFingerprint(bucket, fingerprint, alternateIndex)) {
+                return true;
+            }
+        }
+        return false;
+    }
+
+    /**
+     * Add's the provided value to the set for tracking
+     */
+    public void add(BytesRef value) {
+        MurmurHash3.Hash128 hash = MurmurHash3.hash128(value.bytes, value.offset, value.length, 0, scratchHash);
+        addHash(hash.h1);
+    }
+
+    /**
+     * Add's the provided value to the set for tracking
+     */
+    public void add(long value) {
+        addHash(MurmurHash3.murmur64(value));
+    }
+
+    private void addHash(long hash) {
+        if (isSetMode) {
+            hashes.add(hash);
+            maybeConvert();
+            return;
+        }
+
+        boolean success = filters.get(filters.size() - 1).add(hash);
+        if (success == false) {
+            // filter is full, create a new one and insert there
+            CuckooFilter t = new CuckooFilter(capacity, fpp, rng);
+            t.add(hash);
+            filters.add(t);
+            breaker.accept(t.getSizeInBytes()); // make sure we account for the new filter
+        }
+    }
+
+    private void maybeConvert() {
+        if (isSetMode && hashes.size() > threshold) {
+            convert();
+        }
+    }
+
+    /**
+     * If we still holding values in a set, convert this filter into an approximate, cuckoo-backed filter.
+     * This will create a list of CuckooFilters, and null out the set of hashes
+     */
+    void convert() {
+        if (isSetMode == false) {
+            throw new IllegalStateException("Cannot convert SetBackedScalingCuckooFilter to approximate " +
+                "when it has already been converted.");
+        }
+        long oldSize = getSizeInBytes();
+
+        filters = new ArrayList<>();
+        CuckooFilter t = new CuckooFilter(capacity, fpp, rng);
+        // Cache the chosen numBuckets for later use
+        numBuckets = t.getNumBuckets();
+        fingerprintMask = t.getFingerprintMask();
+        bitsPerEntry = t.getBitsPerEntry();
+
+        hashes.forEach(t::add);
+        filters.add(t);
+
+        hashes = null;
+        isSetMode = false;
+
+        breaker.accept(-oldSize); // this zeros out the overhead of the set
+        breaker.accept(getSizeInBytes()); // this adds back in the new overhead of the cuckoo filters
+
+    }
+
+    /**
+     * Get the approximate size of this datastructure.  Approximate because only the Set occupants
+     * are tracked, not the overhead of the Set itself.
+     */
+    public long getSizeInBytes() {
+        long bytes = 13; // fpp (double), threshold (int), isSetMode (boolean)
+        if (hashes != null) {
+            bytes = (hashes.size() * 16);
+        }
+        if (filters != null) {
+            bytes += filters.stream().mapToLong(CuckooFilter::getSizeInBytes).sum();
+        }
+        return bytes;
+    }
+
+
+    /**
+     * Merge `other` cuckoo filter into this cuckoo.  After merging, this filter's state will
+     * be the union of the two.  During the merging process, the internal Set may be upgraded
+     * to a cuckoo if it goes over threshold
+     */
+    public void merge(SetBackedScalingCuckooFilter other) {
+        // Some basic sanity checks to make sure we can merge
+        if (this.threshold != other.threshold) {
+            throw new IllegalStateException("Cannot merge other CuckooFilter because thresholds do not match: ["
+                + this.threshold + "] vs [" + other.threshold + "]");
+        }
+        if (this.capacity != other.capacity) {
+            throw new IllegalStateException("Cannot merge other CuckooFilter because capacities do not match: ["
+                + this.capacity + "] vs [" + other.capacity + "]");
+        }
+        if (this.fpp != other.fpp) {
+            throw new IllegalStateException("Cannot merge other CuckooFilter because precisions do not match: ["
+                + this.fpp + "] vs [" + other.fpp + "]");
+        }
+
+        if (isSetMode && other.isSetMode) {
+            // Both in sets, merge collections then see if we need to convert to cuckoo
+            hashes.addAll(other.hashes);
+            maybeConvert();
+        } else if (isSetMode && other.isSetMode == false) {
+            // Other is in cuckoo mode, so we convert our set to a cuckoo, then
+            // call the merge function again.  Since both are now in set-mode
+            // this will fall through to the last conditional and do a cuckoo-cuckoo merge
+            convert();
+            merge(other);
+        } else if (isSetMode == false && other.isSetMode) {
+            // Rather than converting the other to a cuckoo first, we can just
+            // replay the values directly into our filter.
+            other.hashes.forEach(this::add);
+        } else {
+            // Both are in cuckoo mode, merge raw fingerprints
+
+            CuckooFilter currentFilter = filters.get(filters.size() - 1);
+
+            for (CuckooFilter otherFilter : other.filters) {
+
+                // The iterator returns an array of longs corresponding to the
+                // fingerprints for buckets at the current position
+                Iterator<long[]> iter = otherFilter.getBuckets();
+                int bucket = 0;
+                while (iter.hasNext()) {
+                    long[] fingerprints = iter.next();
+
+                    // We check to see if the fingerprint is present in any of the existing filters
+                    // (in the same bucket/alternate bucket), or if the fingerprint is empty.  In these cases
+                    // we can skip the fingerprint
+                    for (long fingerprint : fingerprints) {
+                        if (fingerprint == CuckooFilter.EMPTY || mightContainFingerprint(bucket, (int) fingerprint)) {
+                            continue;
+                        }
+                        // Try to insert into the last filter in our list
+                        if (currentFilter.mergeFingerprint(bucket, (int) fingerprint) == false) {
+                            // if we failed, the filter is now saturated and we need to create a new one
+                            CuckooFilter t = new CuckooFilter(capacity, fpp, rng);
+                            filters.add(t);
+                            breaker.accept(t.getSizeInBytes()); // make sure we account for the new filter
+
+                            currentFilter = filters.get(filters.size() - 1);
+                        }
+                    }
+                    bucket += 1;
+                }
+            }
+        }
+    }
+
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(hashes, filters, threshold, isSetMode, capacity, fpp);
+    }
+
+    @Override
+    public boolean equals(Object other) {
+        if (this == other) {
+            return true;
+        }
+        if (other == null || getClass() != other.getClass()) {
+            return false;
+        }
+
+        final SetBackedScalingCuckooFilter that = (SetBackedScalingCuckooFilter) other;
+        return Objects.equals(this.hashes, that.hashes)
+            && Objects.equals(this.filters, that.filters)
+            && Objects.equals(this.threshold, that.threshold)
+            && Objects.equals(this.isSetMode, that.isSetMode)
+            && Objects.equals(this.capacity, that.capacity)
+            && Objects.equals(this.fpp, that.fpp);
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java
index fc3bdcfda8ecf..01a4aa66810c8 100644
--- a/server/src/main/java/org/elasticsearch/search/SearchModule.java
+++ b/server/src/main/java/org/elasticsearch/search/SearchModule.java
@@ -153,9 +153,13 @@
 import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
 import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicParser;
 import org.elasticsearch.search.aggregations.bucket.terms.DoubleTerms;
+import org.elasticsearch.search.aggregations.bucket.terms.LongRareTerms;
 import org.elasticsearch.search.aggregations.bucket.terms.LongTerms;
+import org.elasticsearch.search.aggregations.bucket.terms.RareTermsAggregationBuilder;
+import org.elasticsearch.search.aggregations.bucket.terms.StringRareTerms;
 import org.elasticsearch.search.aggregations.bucket.terms.StringTerms;
 import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder;
+import org.elasticsearch.search.aggregations.bucket.terms.UnmappedRareTerms;
 import org.elasticsearch.search.aggregations.bucket.terms.UnmappedTerms;
 import org.elasticsearch.search.aggregations.metrics.AvgAggregationBuilder;
 import org.elasticsearch.search.aggregations.metrics.CardinalityAggregationBuilder;
@@ -390,6 +394,11 @@ private void registerAggregations(List<SearchPlugin> plugins) {
                     .addResultReader(UnmappedTerms.NAME, UnmappedTerms::new)
                     .addResultReader(LongTerms.NAME, LongTerms::new)
                     .addResultReader(DoubleTerms.NAME, DoubleTerms::new));
+        registerAggregation(new AggregationSpec(RareTermsAggregationBuilder.NAME, RareTermsAggregationBuilder::new,
+                RareTermsAggregationBuilder::parse)
+                    .addResultReader(StringRareTerms.NAME, StringRareTerms::new)
+                    .addResultReader(UnmappedRareTerms.NAME, UnmappedRareTerms::new)
+                    .addResultReader(LongRareTerms.NAME, LongRareTerms::new));
         registerAggregation(new AggregationSpec(SignificantTermsAggregationBuilder.NAME, SignificantTermsAggregationBuilder::new,
                 SignificantTermsAggregationBuilder.getParser(significanceHeuristicParserRegistry))
                     .addResultReader(SignificantStringTerms.NAME, SignificantStringTerms::new)
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java
index 71dacc698bee6..a4ef4286447c1 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java
@@ -90,7 +90,9 @@ public final void mergeBuckets(long[] mergeMap, long newNumBuckets) {
             docCounts.fill(0, newNumBuckets, 0);
             for (int i = 0; i < oldDocCounts.size(); i++) {
                 int docCount = oldDocCounts.get(i);
-                if (docCount != 0) {
+
+                // Skip any in the map which have been "removed", signified with -1
+                if (docCount != 0 && mergeMap[i] != -1) {
                     docCounts.increment(mergeMap[i], docCount);
                 }
             }
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/MergingBucketsDeferringCollector.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/MergingBucketsDeferringCollector.java
index b293cc53a3629..bff5015846951 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/MergingBucketsDeferringCollector.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/MergingBucketsDeferringCollector.java
@@ -37,27 +37,87 @@ public MergingBucketsDeferringCollector(SearchContext context, boolean isGlobal)
         super(context, isGlobal);
     }
 
+/**
+     * Merges/prunes the existing bucket ordinals and docDeltas according to the provided mergeMap.
+     *
+     * The mergeMap is an array where the index position represents the current bucket ordinal, and
+     * the value at that position represents the ordinal the bucket should be merged with.  If
+     * the value is set to -1 it is removed entirely.
+     *
+     * For example, if the mergeMap [1,1,3,-1,3] is provided:
+     *  - Buckets `0` and `1` will be merged to bucket ordinal `1`
+     *  - Bucket `2` and `4` will be merged to ordinal `3`
+     *  - Bucket `3` will be removed entirely
+     *
+     *  This process rebuilds the ordinals and docDeltas according to the mergeMap, so it should
+     *  not be called unless there are actually changes to be made, to avoid unnecessary work.
+     */
     public void mergeBuckets(long[] mergeMap) {
         List<Entry> newEntries = new ArrayList<>(entries.size());
         for (Entry sourceEntry : entries) {
             PackedLongValues.Builder newBuckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT);
+            PackedLongValues.Builder newDocDeltas = PackedLongValues.packedBuilder(PackedInts.DEFAULT);
+            PackedLongValues.Iterator docDeltasItr = sourceEntry.docDeltas.iterator();
+
+            long lastGoodDelta = 0;
             for (PackedLongValues.Iterator itr = sourceEntry.buckets.iterator(); itr.hasNext();) {
                 long bucket = itr.next();
-                newBuckets.add(mergeMap[Math.toIntExact(bucket)]);
+                assert docDeltasItr.hasNext();
+                long delta = docDeltasItr.next();
+
+                // Only merge in the ordinal if it hasn't been "removed", signified with -1
+                long ordinal = mergeMap[Math.toIntExact(bucket)];
+
+                if (ordinal != -1) {
+                    newBuckets.add(ordinal);
+                    newDocDeltas.add(delta + lastGoodDelta);
+                    lastGoodDelta = 0;
+                } else {
+                    // we are skipping this ordinal, which means we need to accumulate the
+                    // doc delta's since the last "good" delta
+                    lastGoodDelta += delta;
+                }
+            }
+            // Only create an entry if this segment has buckets after merging
+            if (newBuckets.size() > 0) {
+                assert newDocDeltas.size() > 0 : "docDeltas was empty but we had buckets";
+                newEntries.add(new Entry(sourceEntry.context, newDocDeltas.build(), newBuckets.build()));
             }
-            newEntries.add(new Entry(sourceEntry.context, sourceEntry.docDeltas, newBuckets.build()));
         }
         entries = newEntries;
 
         // if there are buckets that have been collected in the current segment
         // we need to update the bucket ordinals there too
-        if (bucketsBuilder.size() > 0) {
+        if (bucketsBuilder != null && bucketsBuilder.size() > 0) {
             PackedLongValues currentBuckets = bucketsBuilder.build();
             PackedLongValues.Builder newBuckets = PackedLongValues.packedBuilder(PackedInts.DEFAULT);
+            PackedLongValues.Builder newDocDeltas = PackedLongValues.packedBuilder(PackedInts.DEFAULT);
+
+            // The current segment's deltas aren't built yet, so build to a temp object
+            PackedLongValues currentDeltas = docDeltasBuilder.build();
+            PackedLongValues.Iterator docDeltasItr = currentDeltas.iterator();
+
+            long lastGoodDelta = 0;
             for (PackedLongValues.Iterator itr = currentBuckets.iterator(); itr.hasNext();) {
                 long bucket = itr.next();
-                newBuckets.add(mergeMap[Math.toIntExact(bucket)]);
+                assert docDeltasItr.hasNext();
+                long delta = docDeltasItr.next();
+                long ordinal = mergeMap[Math.toIntExact(bucket)];
+
+                // Only merge in the ordinal if it hasn't been "removed", signified with -1
+                if (ordinal != -1) {
+                    newBuckets.add(ordinal);
+                    newDocDeltas.add(delta + lastGoodDelta);
+                    lastGoodDelta = 0;
+                } else {
+                    // we are skipping this ordinal, which means we need to accumulate the
+                    // doc delta's since the last "good" delta.
+                    // The first is skipped because the original deltas are stored as offsets from first doc,
+                    // not offsets from 0
+                    lastGoodDelta += delta;
+                }
             }
+            docDeltasBuilder = newDocDeltas;
             bucketsBuilder = newBuckets;
         }
     }
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractRareTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractRareTermsAggregator.java
new file mode 100644
index 0000000000000..2bbe3c01988df
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/AbstractRareTermsAggregator.java
@@ -0,0 +1,134 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+import org.elasticsearch.common.util.SetBackedScalingCuckooFilter;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.Aggregator;
+import org.elasticsearch.search.aggregations.AggregatorFactories;
+import org.elasticsearch.search.aggregations.BucketOrder;
+import org.elasticsearch.search.aggregations.LeafBucketCollector;
+import org.elasticsearch.search.aggregations.bucket.DeferableBucketAggregator;
+import org.elasticsearch.search.aggregations.bucket.DeferringBucketCollector;
+import org.elasticsearch.search.aggregations.bucket.MergingBucketsDeferringCollector;
+import org.elasticsearch.search.aggregations.bucket.nested.NestedAggregator;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+import org.elasticsearch.search.aggregations.support.ValuesSource;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Random;
+
+public abstract class AbstractRareTermsAggregator<T extends ValuesSource,
+    U extends IncludeExclude.Filter, V> extends DeferableBucketAggregator {
+
+    static final BucketOrder ORDER = BucketOrder.compound(BucketOrder.count(true), BucketOrder.key(true)); // sort by count ascending
+
+    protected final long maxDocCount;
+    protected final double precision;
+    protected final DocValueFormat format;
+    protected final T valuesSource;
+    protected final U includeExclude;
+
+    MergingBucketsDeferringCollector deferringCollector;
+    LeafBucketCollector subCollectors;
+    final SetBackedScalingCuckooFilter filter;
+
+    AbstractRareTermsAggregator(String name, AggregatorFactories factories, SearchContext context,
+                                Aggregator parent, List<PipelineAggregator> pipelineAggregators,
+                                Map<String, Object> metaData, long maxDocCount, double precision,
+                                DocValueFormat format, T valuesSource, U includeExclude) throws IOException {
+        super(name, factories, context, parent, pipelineAggregators, metaData);
+
+        // We seed the rng with the ShardID so results are deterministic and don't change randomly
+        this.filter = new SetBackedScalingCuckooFilter(10000, new Random(context.indexShard().shardId().hashCode()), precision);
+        this.filter.registerBreaker(this::addRequestCircuitBreakerBytes);
+
+        this.maxDocCount = maxDocCount;
+        this.precision = precision;
+        this.format = format;
+        this.valuesSource = valuesSource;
+        this.includeExclude = includeExclude;
+        String scoringAgg = subAggsNeedScore();
+        String nestedAgg = descendsFromNestedAggregator(parent);
+        if (scoringAgg != null && nestedAgg != null) {
+            /*
+             * Terms agg would force the collect mode to depth_first here, because
+             * we need to access the score of nested documents in a sub-aggregation
+             * and we are not able to generate this score while replaying deferred documents.
+             *
+             * But the RareTerms agg _must_ execute in breadth first since it relies on
+             * deferring execution, so we just have to throw up our hands and refuse
+             */
+            throw new IllegalStateException("RareTerms agg [" + name() + "] is the child of the nested agg [" + nestedAgg
+                + "], and also has a scoring child agg [" + scoringAgg + "].  This combination is not supported because " +
+                "it requires executing in [depth_first] mode, which the RareTerms agg cannot do.");
+        }
+    }
+
+    @Override
+    protected boolean shouldDefer(Aggregator aggregator) {
+        return true;
+    }
+
+    @Override
+    public DeferringBucketCollector getDeferringCollector() {
+        deferringCollector = new MergingBucketsDeferringCollector(context, descendsFromGlobalAggregator(parent()));
+        return deferringCollector;
+    }
+
+    private String subAggsNeedScore() {
+        for (Aggregator subAgg : subAggregators) {
+            if (subAgg.scoreMode().needsScores()) {
+                return subAgg.name();
+            }
+        }
+        return null;
+    }
+
+    private String descendsFromNestedAggregator(Aggregator parent) {
+        while (parent != null) {
+            if (parent.getClass() == NestedAggregator.class) {
+                return parent.name();
+            }
+            parent = parent.parent();
+        }
+        return null;
+    }
+
+    protected void doCollect(V val, int docId) throws IOException {
+        long bucketOrdinal = addValueToOrds(val);
+
+        if (bucketOrdinal < 0) { // already seen
+            bucketOrdinal = -1 - bucketOrdinal;
+            collectExistingBucket(subCollectors, docId, bucketOrdinal);
+        } else {
+            collectBucket(subCollectors, docId, bucketOrdinal);
+        }
+    }
+
+    /**
+     * Add's the value to the ordinal map.  Return the newly allocated id if it wasn't in the ordinal map yet,
+     * or <code>-1-id</code> if it was already present
+     */
+    abstract long addValueToOrds(V value);
+}
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/IncludeExclude.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/IncludeExclude.java
index 8154108f9f0bc..30653f04a355a 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/IncludeExclude.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/IncludeExclude.java
@@ -137,10 +137,12 @@ public static IncludeExclude parseExclude(XContentParser parser) throws IOExcept
         }
     }
 
+    public abstract static class Filter {}
+
     // The includeValue and excludeValue ByteRefs which are the result of the parsing
     // process are converted into a LongFilter when used on numeric fields
     // in the index.
-    public abstract static class LongFilter {
+    public abstract static class LongFilter extends Filter {
         public abstract boolean accept(long value);
 
     }
@@ -183,7 +185,7 @@ private void addReject(long val) {
     }
 
     // Only used for the 'map' execution mode (ie. scripts)
-    public abstract static class StringFilter {
+    public abstract static class StringFilter extends Filter {
         public abstract boolean accept(BytesRef value);
     }
 
@@ -231,7 +233,7 @@ public boolean accept(BytesRef value) {
         }
     }
 
-    public abstract static class OrdinalsFilter {
+    public abstract static class OrdinalsFilter extends Filter {
         public abstract LongBitSet acceptedGlobalOrdinals(SortedSetDocValues globalOrdinals) throws IOException;
 
     }
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedRareTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedRareTerms.java
new file mode 100644
index 0000000000000..d774d09fa1862
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalMappedRareTerms.java
@@ -0,0 +1,182 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.lucene.util.CollectionUtil;
+import org.elasticsearch.common.Randomness;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.util.SetBackedScalingCuckooFilter;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.AggregationExecutionException;
+import org.elasticsearch.search.aggregations.BucketOrder;
+import org.elasticsearch.search.aggregations.InternalAggregation;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+public abstract class InternalMappedRareTerms<A extends InternalRareTerms<A, B>, B extends InternalRareTerms.Bucket<B>>
+    extends InternalRareTerms<A, B> {
+
+    protected DocValueFormat format;
+    protected List<B> buckets;
+    protected Map<String, B> bucketMap;
+
+    final SetBackedScalingCuckooFilter filter;
+
+    protected final Logger logger = LogManager.getLogger(getClass());
+
+    InternalMappedRareTerms(String name, BucketOrder order, List<PipelineAggregator> pipelineAggregators,
+                            Map<String, Object> metaData, DocValueFormat format,
+                            List<B> buckets, long maxDocCount, SetBackedScalingCuckooFilter filter) {
+        super(name, order, maxDocCount, pipelineAggregators, metaData);
+        this.format = format;
+        this.buckets = buckets;
+        this.filter = filter;
+    }
+
+    public long getMaxDocCount() {
+        return maxDocCount;
+    }
+
+    SetBackedScalingCuckooFilter getFilter() {
+        return filter;
+    }
+
+    /**
+     * Read from a stream.
+     */
+    InternalMappedRareTerms(StreamInput in, Bucket.Reader<B> bucketReader) throws IOException {
+        super(in);
+        format = in.readNamedWriteable(DocValueFormat.class);
+        buckets = in.readList(stream -> bucketReader.read(stream, format));
+        filter = new SetBackedScalingCuckooFilter(in, Randomness.get());
+    }
+
+    @Override
+    protected void writeTermTypeInfoTo(StreamOutput out) throws IOException {
+        out.writeNamedWriteable(format);
+        out.writeList(buckets);
+        filter.writeTo(out);
+    }
+
+    @Override
+    public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
+        Map<Object, List<B>> buckets = new HashMap<>();
+        InternalRareTerms<A, B> referenceTerms = null;
+        SetBackedScalingCuckooFilter filter = null;
+
+        for (InternalAggregation aggregation : aggregations) {
+            // Unmapped rare terms don't have a cuckoo filter so we'll skip all this work
+            // and save some type casting headaches later.
+            if (aggregation.isMapped() == false) {
+                continue;
+            }
+
+            @SuppressWarnings("unchecked")
+            InternalRareTerms<A, B> terms = (InternalRareTerms<A, B>) aggregation;
+            if (referenceTerms == null && aggregation.getClass().equals(UnmappedRareTerms.class) == false) {
+                referenceTerms = terms;
+            }
+            if (referenceTerms != null &&
+                referenceTerms.getClass().equals(terms.getClass()) == false &&
+                terms.getClass().equals(UnmappedRareTerms.class) == false) {
+                // control gets into this loop when the same field name against which the query is executed
+                // is of different types in different indices.
+                throw new AggregationExecutionException("Merging/Reducing the aggregations failed when computing the aggregation ["
+                    + referenceTerms.getName() + "] because the field you gave in the aggregation query existed as two different "
+                    + "types in two different indices");
+            }
+            for (B bucket : terms.getBuckets()) {
+                List<B> bucketList = buckets.computeIfAbsent(bucket.getKey(), k -> new ArrayList<>());
+                bucketList.add(bucket);
+            }
+
+            SetBackedScalingCuckooFilter otherFilter = ((InternalMappedRareTerms)aggregation).getFilter();
+            if (filter == null) {
+                filter = new SetBackedScalingCuckooFilter(otherFilter);
+            } else {
+                filter.merge(otherFilter);
+            }
+        }
+
+        final List<B> rare = new ArrayList<>();
+        for (List<B> sameTermBuckets : buckets.values()) {
+            final B b = sameTermBuckets.get(0).reduce(sameTermBuckets, reduceContext);
+            if ((b.getDocCount() <= maxDocCount && containsTerm(filter, b) == false)) {
+                rare.add(b);
+                reduceContext.consumeBucketsAndMaybeBreak(1);
+            } else if (b.getDocCount() > maxDocCount) {
+                // this term has gone over threshold while merging, so add it to the filter.
+                // Note this may happen during incremental reductions too
+                addToFilter(filter, b);
+            }
+        }
+        CollectionUtil.introSort(rare, order.comparator(null));
+        return createWithFilter(name, rare, filter);
+    }
+
+    public abstract boolean containsTerm(SetBackedScalingCuckooFilter filter, B bucket);
+
+    public abstract void addToFilter(SetBackedScalingCuckooFilter filter, B bucket);
+
+    @Override
+    public List<B> getBuckets() {
+        return buckets;
+    }
+
+    @Override
+    public B getBucketByKey(String term) {
+        if (bucketMap == null) {
+            bucketMap = buckets.stream().collect(Collectors.toMap(InternalRareTerms.Bucket::getKeyAsString, Function.identity()));
+        }
+        return bucketMap.get(term);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj) return true;
+        if (obj == null || getClass() != obj.getClass()) return false;
+        if (super.equals(obj) == false) return false;
+        InternalMappedRareTerms<?,?> that = (InternalMappedRareTerms<?,?>) obj;
+        return Objects.equals(buckets, that.buckets)
+            && Objects.equals(format, that.format)
+            && Objects.equals(filter, that.filter);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(super.hashCode(), buckets, format, filter);
+    }
+
+    @Override
+    public final XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
+        return doXContentCommon(builder, params, buckets);
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalRareTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalRareTerms.java
new file mode 100644
index 0000000000000..dd1a0c19200cf
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/InternalRareTerms.java
@@ -0,0 +1,205 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.util.SetBackedScalingCuckooFilter;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.Aggregations;
+import org.elasticsearch.search.aggregations.BucketOrder;
+import org.elasticsearch.search.aggregations.InternalAggregation;
+import org.elasticsearch.search.aggregations.InternalAggregations;
+import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
+import org.elasticsearch.search.aggregations.InternalOrder;
+import org.elasticsearch.search.aggregations.KeyComparable;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+public abstract class InternalRareTerms<A extends InternalRareTerms<A, B>, B extends InternalRareTerms.Bucket<B>>
+    extends InternalMultiBucketAggregation<A, B> implements RareTerms {
+
+    public abstract static class Bucket<B extends Bucket<B>> extends InternalMultiBucketAggregation.InternalBucket
+        implements RareTerms.Bucket, KeyComparable<B> {
+        /**
+         * Reads a bucket. Should be a constructor reference.
+         */
+        @FunctionalInterface
+        public interface Reader<B extends Bucket<B>> {
+            B read(StreamInput in, DocValueFormat format) throws IOException;
+        }
+
+        long bucketOrd;
+
+        protected long docCount;
+        protected InternalAggregations aggregations;
+        protected final DocValueFormat format;
+
+        protected Bucket(long docCount, InternalAggregations aggregations, DocValueFormat formatter) {
+            this.format = formatter;
+            this.docCount = docCount;
+            this.aggregations = aggregations;
+        }
+
+        /**
+         * Read from a stream.
+         */
+        protected Bucket(StreamInput in, DocValueFormat formatter) throws IOException {
+            this.format = formatter;
+            docCount = in.readVLong();
+            aggregations = new InternalAggregations(in);
+        }
+
+        @Override
+        public final void writeTo(StreamOutput out) throws IOException {
+            out.writeVLong(getDocCount());
+            aggregations.writeTo(out);
+            writeTermTo(out);
+        }
+
+        protected abstract void writeTermTo(StreamOutput out) throws IOException;
+
+        @Override
+        public long getDocCount() {
+            return docCount;
+        }
+
+        @Override
+        public Aggregations getAggregations() {
+            return aggregations;
+        }
+
+        abstract B newBucket(long docCount, InternalAggregations aggs);
+
+        public B reduce(List<B> buckets, ReduceContext context) {
+            long docCount = 0;
+            List<InternalAggregations> aggregationsList = new ArrayList<>(buckets.size());
+            for (B bucket : buckets) {
+                docCount += bucket.docCount;
+                aggregationsList.add(bucket.aggregations);
+            }
+            InternalAggregations aggs = InternalAggregations.reduce(aggregationsList, context);
+            return newBucket(docCount, aggs);
+        }
+
+        @Override
+        public final XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+            builder.startObject();
+            keyToXContent(builder);
+            builder.field(CommonFields.DOC_COUNT.getPreferredName(), getDocCount());
+            aggregations.toXContentInternal(builder, params);
+            builder.endObject();
+            return builder;
+        }
+
+        protected abstract XContentBuilder keyToXContent(XContentBuilder builder) throws IOException;
+
+        @Override
+        public boolean equals(Object obj) {
+            if (obj == null || getClass() != obj.getClass()) {
+                return false;
+            }
+            Bucket<?> that = (Bucket<?>) obj;
+            return Objects.equals(docCount, that.docCount)
+                && Objects.equals(aggregations, that.aggregations);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(getClass(), docCount, aggregations);
+        }
+    }
+
+    protected final BucketOrder order;
+    protected final long maxDocCount;
+
+    protected InternalRareTerms(String name, BucketOrder order, long maxDocCount,
+                            List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
+        super(name, pipelineAggregators, metaData);
+        this.order = order;
+        this.maxDocCount = maxDocCount;
+    }
+
+    /**
+     * Read from a stream.
+     */
+    protected InternalRareTerms(StreamInput in) throws IOException {
+        super(in);
+        order = InternalOrder.Streams.readOrder(in);
+        maxDocCount = in.readVLong();
+    }
+
+    @Override
+    protected final void doWriteTo(StreamOutput out) throws IOException {
+        order.writeTo(out);
+        out.writeVLong(maxDocCount);
+        writeTermTypeInfoTo(out);
+    }
+
+    protected abstract void writeTermTypeInfoTo(StreamOutput out) throws IOException;
+
+    @Override
+    public abstract List<B> getBuckets();
+
+    @Override
+    public abstract B getBucketByKey(String term);
+
+    @Override
+    public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
+        throw new UnsupportedOperationException();
+    }
+
+    protected abstract A createWithFilter(String name, List<B> buckets, SetBackedScalingCuckooFilter filter);
+
+    /**
+     * Create an array to hold some buckets. Used in collecting the results.
+     */
+    protected abstract B[] createBucketsArray(int size);
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj) return true;
+        if (obj == null || getClass() != obj.getClass()) return false;
+        if (super.equals(obj) == false) return false;
+        InternalRareTerms<?,?> that = (InternalRareTerms<?,?>) obj;
+        return Objects.equals(maxDocCount, that.maxDocCount)
+            && Objects.equals(order, that.order);
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(super.hashCode(), maxDocCount, order);
+    }
+
+    protected static XContentBuilder doXContentCommon(XContentBuilder builder, Params params,
+                                                      List<? extends Bucket> buckets) throws IOException {
+        builder.startArray(CommonFields.BUCKETS.getPreferredName());
+        for (Bucket bucket : buckets) {
+            bucket.toXContent(builder, params);
+        }
+        builder.endArray();
+        return builder;
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongRareTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongRareTerms.java
new file mode 100644
index 0000000000000..29f84fb6030e1
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongRareTerms.java
@@ -0,0 +1,156 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.util.SetBackedScalingCuckooFilter;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.BucketOrder;
+import org.elasticsearch.search.aggregations.InternalAggregations;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+/**
+ * Result of the RareTerms aggregation when the field is some kind of whole number like a integer, long, or a date.
+ */
+public class LongRareTerms extends InternalMappedRareTerms<LongRareTerms, LongRareTerms.Bucket> {
+    public static final String NAME = "lrareterms";
+
+    public static class Bucket extends InternalRareTerms.Bucket<Bucket> {
+        long term;
+
+        public Bucket(long term, long docCount, InternalAggregations aggregations, DocValueFormat format) {
+            super(docCount, aggregations, format);
+            this.term = term;
+        }
+
+        /**
+         * Read from a stream.
+         */
+        public Bucket(StreamInput in, DocValueFormat format) throws IOException {
+            super(in, format);
+            term = in.readLong();
+        }
+
+        @Override
+        protected void writeTermTo(StreamOutput out) throws IOException {
+            out.writeLong(term);
+        }
+
+        @Override
+        public String getKeyAsString() {
+            return format.format(term).toString();
+        }
+
+        @Override
+        public Object getKey() {
+            return term;
+        }
+
+        @Override
+        public Number getKeyAsNumber() {
+            return term;
+        }
+
+        @Override
+        public int compareKey(Bucket other) {
+            return Long.compare(term, other.term);
+        }
+
+        @Override
+        Bucket newBucket(long docCount, InternalAggregations aggs) {
+            return new Bucket(term, docCount, aggs, format);
+        }
+
+        @Override
+        protected final XContentBuilder keyToXContent(XContentBuilder builder) throws IOException {
+            builder.field(CommonFields.KEY.getPreferredName(), term);
+            if (format != DocValueFormat.RAW) {
+                builder.field(CommonFields.KEY_AS_STRING.getPreferredName(), format.format(term).toString());
+            }
+            return builder;
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            return super.equals(obj) && Objects.equals(term, ((Bucket) obj).term);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(super.hashCode(), term);
+        }
+    }
+
+    LongRareTerms(String name, BucketOrder order, List<PipelineAggregator> pipelineAggregators,
+                  Map<String, Object> metaData, DocValueFormat format,
+                  List<LongRareTerms.Bucket> buckets, long maxDocCount, SetBackedScalingCuckooFilter filter) {
+        super(name, order, pipelineAggregators, metaData, format, buckets, maxDocCount, filter);
+    }
+
+    /**
+     * Read from a stream.
+     */
+    public LongRareTerms(StreamInput in) throws IOException {
+        super(in, LongRareTerms.Bucket::new);
+    }
+
+    @Override
+    public String getWriteableName() {
+        return NAME;
+    }
+
+    @Override
+    public LongRareTerms create(List<LongRareTerms.Bucket> buckets) {
+        return new LongRareTerms(name, order, pipelineAggregators(), metaData, format, buckets, maxDocCount, filter);
+    }
+
+    @Override
+    public LongRareTerms.Bucket createBucket(InternalAggregations aggregations, LongRareTerms.Bucket prototype) {
+        return new LongRareTerms.Bucket(prototype.term, prototype.getDocCount(), aggregations, prototype.format);
+    }
+
+    @Override
+    protected LongRareTerms createWithFilter(String name, List<LongRareTerms.Bucket> buckets, SetBackedScalingCuckooFilter filter) {
+        return new LongRareTerms(name, order, pipelineAggregators(), getMetaData(), format,
+            buckets, maxDocCount, filter);
+    }
+
+    @Override
+    protected LongRareTerms.Bucket[] createBucketsArray(int size) {
+        return new LongRareTerms.Bucket[size];
+    }
+
+    @Override
+    public boolean containsTerm(SetBackedScalingCuckooFilter filter, LongRareTerms.Bucket bucket) {
+        return filter.mightContain((long) bucket.getKey());
+    }
+
+    @Override
+    public void addToFilter(SetBackedScalingCuckooFilter filter, LongRareTerms.Bucket bucket) {
+        filter.add((long) bucket.getKey());
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongRareTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongRareTermsAggregator.java
new file mode 100644
index 0000000000000..b1d294fefdcf6
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/LongRareTermsAggregator.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.index.SortedNumericDocValues;
+import org.apache.lucene.util.CollectionUtil;
+import org.elasticsearch.common.lease.Releasables;
+import org.elasticsearch.common.util.LongHash;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.Aggregator;
+import org.elasticsearch.search.aggregations.AggregatorFactories;
+import org.elasticsearch.search.aggregations.InternalAggregation;
+import org.elasticsearch.search.aggregations.LeafBucketCollector;
+import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+import org.elasticsearch.search.aggregations.support.ValuesSource;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import static java.util.Collections.emptyList;
+
+/**
+ * An aggregator that finds "rare" string values (e.g. terms agg that orders ascending)
+ */
+public class LongRareTermsAggregator extends AbstractRareTermsAggregator<ValuesSource.Numeric, IncludeExclude.LongFilter, Long> {
+
+    protected LongHash bucketOrds;
+
+    LongRareTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Numeric valuesSource, DocValueFormat format,
+                                   SearchContext aggregationContext, Aggregator parent, IncludeExclude.LongFilter longFilter,
+                                   int maxDocCount, double precision, List<PipelineAggregator> pipelineAggregators,
+                                   Map<String, Object> metaData) throws IOException {
+        super(name, factories, aggregationContext, parent, pipelineAggregators, metaData, maxDocCount, precision,
+            format, valuesSource, longFilter);
+        this.bucketOrds = new LongHash(1, aggregationContext.bigArrays());
+    }
+
+    protected SortedNumericDocValues getValues(ValuesSource.Numeric valuesSource, LeafReaderContext ctx) throws IOException {
+        return valuesSource.longValues(ctx);
+    }
+
+    @Override
+    public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
+                                                final LeafBucketCollector sub) throws IOException {
+        final SortedNumericDocValues values = getValues(valuesSource, ctx);
+        if (subCollectors == null) {
+            subCollectors = sub;
+        }
+        return new LeafBucketCollectorBase(sub, values) {
+
+            @Override
+            public void collect(int docId, long owningBucketOrdinal) throws IOException {
+                if (values.advanceExact(docId)) {
+                    final int valuesCount = values.docValueCount();
+                    long previous = Long.MAX_VALUE;
+                    for (int i = 0; i < valuesCount; ++i) {
+                        final long val = values.nextValue();
+                        if (previous != val || i == 0) {
+                            if ((includeExclude == null) || (includeExclude.accept(val))) {
+                                doCollect(val, docId);
+                            }
+                            previous = val;
+                        }
+                    }
+                }
+            }
+        };
+    }
+
+    @Override
+    long addValueToOrds(Long value) {
+        return bucketOrds.add(value);
+    }
+
+    /**
+     * Merges the ordinals to a minimal set, populates the CuckooFilter and
+     * generates a final set of buckets.
+     *
+     * If a term is below the maxDocCount, it is turned into a Bucket.  Otherwise,
+     * the term is added to the filter, and pruned from the ordinal map.  If
+     * necessary the ordinal map is merged down to a minimal set to remove deletions
+     */
+    private List<LongRareTerms.Bucket> buildSketch() {
+        long deletionCount = 0;
+        LongHash newBucketOrds = new LongHash(1, context.bigArrays());
+        List<LongRareTerms.Bucket> buckets = new ArrayList<>();
+        try (LongHash oldBucketOrds = bucketOrds) {
+
+            long[] mergeMap = new long[(int) oldBucketOrds.size()];
+            for (int i = 0; i < oldBucketOrds.size(); i++) {
+                long oldKey = oldBucketOrds.get(i);
+                long newBucketOrd = -1;
+
+                long docCount = bucketDocCount(i);
+                // if the key is below threshold, reinsert into the new ords
+                if (docCount <= maxDocCount) {
+                    newBucketOrd = newBucketOrds.add(oldKey);
+                    LongRareTerms.Bucket bucket = new LongRareTerms.Bucket(oldKey, docCount, null, format);
+                    bucket.bucketOrd = newBucketOrd;
+                    buckets.add(bucket);
+
+                    consumeBucketsAndMaybeBreak(1);
+                } else {
+                    // Make a note when one of the ords has been deleted
+                    deletionCount += 1;
+                    filter.add(oldKey);
+                }
+                mergeMap[i] = newBucketOrd;
+            }
+
+            // Only merge/delete the ordinals if we have actually deleted one,
+            // to save on some redundant work
+            if (deletionCount > 0) {
+                mergeBuckets(mergeMap, newBucketOrds.size());
+                if (deferringCollector != null) {
+                    deferringCollector.mergeBuckets(mergeMap);
+                }
+            }
+        }
+        bucketOrds = newBucketOrds;
+        return buckets;
+    }
+
+    @Override
+    public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
+        assert owningBucketOrdinal == 0;
+        List<LongRareTerms.Bucket> buckets = buildSketch();
+        runDeferredCollections(buckets.stream().mapToLong(b -> b.bucketOrd).toArray());
+
+        // Finalize the buckets
+        for (LongRareTerms.Bucket bucket : buckets) {
+            bucket.aggregations = bucketAggregations(bucket.bucketOrd);
+        }
+
+        CollectionUtil.introSort(buckets, ORDER.comparator(this));
+        return new LongRareTerms(name, ORDER, pipelineAggregators(), metaData(), format, buckets, maxDocCount, filter);
+    }
+
+    @Override
+    public InternalAggregation buildEmptyAggregation() {
+        return new LongRareTerms(name, ORDER, pipelineAggregators(), metaData(), format, emptyList(), 0, filter);
+    }
+
+    @Override
+    public void doClose() {
+        Releasables.close(bucketOrds);
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTerms.java
new file mode 100644
index 0000000000000..2248514783264
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTerms.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
+
+import java.util.List;
+
+
+public interface RareTerms extends MultiBucketsAggregation {
+
+    /**
+     * A bucket that is associated with a single term
+     */
+    interface Bucket extends MultiBucketsAggregation.Bucket {
+
+        Number getKeyAsNumber();
+    }
+
+    /**
+     * Return the sorted list of the buckets in this terms aggregation.
+     */
+    @Override
+    List<? extends Bucket> getBuckets();
+
+    /**
+     * Get the bucket for the given term, or null if there is no such bucket.
+     */
+    Bucket getBucketByKey(String term);
+
+}
+
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregationBuilder.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregationBuilder.java
new file mode 100644
index 0000000000000..5772cfa9708d5
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregationBuilder.java
@@ -0,0 +1,203 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.xcontent.ObjectParser;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.search.aggregations.AggregationBuilder;
+import org.elasticsearch.search.aggregations.AggregatorFactories.Builder;
+import org.elasticsearch.search.aggregations.AggregatorFactory;
+import org.elasticsearch.search.aggregations.support.ValueType;
+import org.elasticsearch.search.aggregations.support.ValuesSource;
+import org.elasticsearch.search.aggregations.support.ValuesSourceAggregationBuilder;
+import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
+import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
+import org.elasticsearch.search.aggregations.support.ValuesSourceParserHelper;
+import org.elasticsearch.search.aggregations.support.ValuesSourceType;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Objects;
+
+public class RareTermsAggregationBuilder extends ValuesSourceAggregationBuilder<ValuesSource, RareTermsAggregationBuilder> {
+    public static final String NAME = "rare_terms";
+
+    private static final ParseField MAX_DOC_COUNT_FIELD_NAME = new ParseField("max_doc_count");
+    private static final ParseField PRECISION = new ParseField("precision");
+
+    private static final int MAX_MAX_DOC_COUNT = 100;
+    private static final ObjectParser<RareTermsAggregationBuilder, Void> PARSER;
+    static {
+        PARSER = new ObjectParser<>(RareTermsAggregationBuilder.NAME);
+        ValuesSourceParserHelper.declareAnyFields(PARSER, true, true);
+        PARSER.declareLong(RareTermsAggregationBuilder::maxDocCount, MAX_DOC_COUNT_FIELD_NAME);
+
+        PARSER.declareField((b, v) -> b.includeExclude(IncludeExclude.merge(v, b.includeExclude())),
+            IncludeExclude::parseInclude, IncludeExclude.INCLUDE_FIELD, ObjectParser.ValueType.OBJECT_ARRAY_OR_STRING);
+
+        PARSER.declareField((b, v) -> b.includeExclude(IncludeExclude.merge(b.includeExclude(), v)),
+            IncludeExclude::parseExclude, IncludeExclude.EXCLUDE_FIELD, ObjectParser.ValueType.STRING_ARRAY);
+
+        PARSER.declareDouble(RareTermsAggregationBuilder::setPrecision, PRECISION);
+    }
+
+    public static AggregationBuilder parse(String aggregationName, XContentParser parser) throws IOException {
+        return PARSER.parse(parser, new RareTermsAggregationBuilder(aggregationName, null), null);
+    }
+
+    private IncludeExclude includeExclude = null;
+    private int maxDocCount = 1;
+    private double precision = 0.001;
+
+    public RareTermsAggregationBuilder(String name, ValueType valueType) {
+        super(name, ValuesSourceType.ANY, valueType);
+    }
+
+    private RareTermsAggregationBuilder(RareTermsAggregationBuilder clone, Builder factoriesBuilder, Map<String, Object> metaData) {
+        super(clone, factoriesBuilder, metaData);
+        this.includeExclude = clone.includeExclude;
+    }
+
+    @Override
+    protected AggregationBuilder shallowCopy(Builder factoriesBuilder, Map<String, Object> metaData) {
+        return new RareTermsAggregationBuilder(this, factoriesBuilder, metaData);
+    }
+
+    /**
+     * Read from a stream.
+     */
+    public RareTermsAggregationBuilder(StreamInput in) throws IOException {
+        super(in, ValuesSourceType.ANY);
+        includeExclude = in.readOptionalWriteable(IncludeExclude::new);
+        maxDocCount = in.readVInt();
+    }
+
+    @Override
+    protected boolean serializeTargetValueType() {
+        return true;
+    }
+
+    @Override
+    protected void innerWriteTo(StreamOutput out) throws IOException {
+        out.writeOptionalWriteable(includeExclude);
+        out.writeVInt(maxDocCount);
+    }
+
+    /**
+     * Set the maximum document count terms should have in order to appear in
+     * the response.
+     */
+    public RareTermsAggregationBuilder maxDocCount(long maxDocCount) {
+        if (maxDocCount <= 0) {
+            throw new IllegalArgumentException(
+                "[" + MAX_DOC_COUNT_FIELD_NAME.getPreferredName() + "] must be greater than 0. Found ["
+                    + maxDocCount + "] in [" + name + "]");
+        }
+        //TODO review: what size cap should we put on this?
+        if (maxDocCount > MAX_MAX_DOC_COUNT) {
+            throw new IllegalArgumentException("[" + MAX_DOC_COUNT_FIELD_NAME.getPreferredName() + "] must be smaller" +
+                "than " + MAX_MAX_DOC_COUNT + "in [" + name + "]");
+        }
+        this.maxDocCount = (int) maxDocCount;
+        return this;
+    }
+
+    /**
+     * Set terms to include and exclude from the aggregation results
+     */
+    public RareTermsAggregationBuilder includeExclude(IncludeExclude includeExclude) {
+        this.includeExclude = includeExclude;
+        return this;
+    }
+
+    /**
+     * Get terms to include and exclude from the aggregation results
+     */
+    public IncludeExclude includeExclude() {
+        return includeExclude;
+    }
+
+    /**
+     * Get the current false positive rate for individual cuckoo filters.
+     */
+    public double getPrecision() {
+        return precision;
+    }
+
+    /**
+     * Set's the false-positive rate for individual cuckoo filters.  Does not dictate the overall fpp rate
+     * since we use a "scaling" cuckoo filter which adds more filters as required, and the overall
+     * error rate grows differently than individual filters
+     *
+     * This value does, however, affect the overall space usage of the filter.  Coarser precisions provide
+     * more compact filters.  The default is 0.01
+     */
+    public void setPrecision(double precision) {
+        if (precision < 0.00001) {
+            throw new IllegalArgumentException("[precision] must be greater than 0.00001");
+        }
+        this.precision = precision;
+    }
+
+    @Override
+    protected ValuesSourceAggregatorFactory<ValuesSource, ?> innerBuild(SearchContext context,
+                                                                        ValuesSourceConfig<ValuesSource> config,
+                                                                        AggregatorFactory<?> parent,
+                                                                        Builder subFactoriesBuilder) throws IOException {
+        return new RareTermsAggregatorFactory(name, config, includeExclude,
+            context, parent, subFactoriesBuilder, metaData, maxDocCount, precision);
+    }
+
+    @Override
+    protected XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
+        if (includeExclude != null) {
+            includeExclude.toXContent(builder, params);
+        }
+        builder.field(MAX_DOC_COUNT_FIELD_NAME.getPreferredName(), maxDocCount);
+        builder.field(PRECISION.getPreferredName(), precision);
+        return builder;
+    }
+
+    @Override
+    public int hashCode() {
+        return Objects.hash(super.hashCode(), includeExclude, maxDocCount, precision);
+    }
+
+    @Override
+    public boolean equals(Object obj) {
+        if (this == obj) return true;
+        if (obj == null || getClass() != obj.getClass()) return false;
+        if (super.equals(obj) == false) return false;
+        RareTermsAggregationBuilder other = (RareTermsAggregationBuilder) obj;
+        return Objects.equals(includeExclude, other.includeExclude)
+            && Objects.equals(maxDocCount, other.maxDocCount)
+            && Objects.equals(precision, other.precision);
+    }
+
+    @Override
+    public String getType() {
+        return NAME;
+    }
+
+}
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java
new file mode 100644
index 0000000000000..ddb563e03039d
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorFactory.java
@@ -0,0 +1,164 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+import org.elasticsearch.common.ParseField;
+import org.elasticsearch.common.logging.DeprecationLogger;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.AggregationExecutionException;
+import org.elasticsearch.search.aggregations.Aggregator;
+import org.elasticsearch.search.aggregations.AggregatorFactories;
+import org.elasticsearch.search.aggregations.AggregatorFactory;
+import org.elasticsearch.search.aggregations.InternalAggregation;
+import org.elasticsearch.search.aggregations.NonCollectingAggregator;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+import org.elasticsearch.search.aggregations.support.ValuesSource;
+import org.elasticsearch.search.aggregations.support.ValuesSourceAggregatorFactory;
+import org.elasticsearch.search.aggregations.support.ValuesSourceConfig;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+public class RareTermsAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource, RareTermsAggregatorFactory> {
+    private final IncludeExclude includeExclude;
+    private final int maxDocCount;
+    private final double precision;
+
+    RareTermsAggregatorFactory(String name, ValuesSourceConfig<ValuesSource> config,
+                                      IncludeExclude includeExclude,
+                                      SearchContext context,
+                                      AggregatorFactory<?> parent, AggregatorFactories.Builder subFactoriesBuilder,
+                                      Map<String, Object> metaData, int maxDocCount, double precision) throws IOException {
+        super(name, config, context, parent, subFactoriesBuilder, metaData);
+        this.includeExclude = includeExclude;
+        this.maxDocCount = maxDocCount;
+        this.precision = precision;
+    }
+
+    @Override
+    protected Aggregator createUnmapped(Aggregator parent, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData)
+        throws IOException {
+        final InternalAggregation aggregation = new UnmappedRareTerms(name, pipelineAggregators, metaData);
+        return new NonCollectingAggregator(name, context, parent, factories, pipelineAggregators, metaData) {
+            @Override
+            public InternalAggregation buildEmptyAggregation() {
+                return aggregation;
+            }
+        };
+    }
+
+    @Override
+    protected Aggregator doCreateInternal(ValuesSource valuesSource, Aggregator parent, boolean collectsFromSingleBucket,
+                                          List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) throws IOException {
+        if (collectsFromSingleBucket == false) {
+            return asMultiBucketAggregator(this, context, parent);
+        }
+        if (valuesSource instanceof ValuesSource.Bytes) {
+            ExecutionMode execution = ExecutionMode.MAP; //TODO global ords not implemented yet, only supports "map"
+
+            DocValueFormat format = config.format();
+            if ((includeExclude != null) && (includeExclude.isRegexBased()) && format != DocValueFormat.RAW) {
+                throw new AggregationExecutionException("Aggregation [" + name + "] cannot support " +
+                    "regular expression style include/exclude settings as they can only be applied to string fields. " +
+                    "Use an array of values for include/exclude clauses");
+            }
+
+            return execution.create(name, factories, valuesSource, format,
+                includeExclude, context, parent, pipelineAggregators, metaData, maxDocCount, precision);
+        }
+
+        if ((includeExclude != null) && (includeExclude.isRegexBased())) {
+            throw new AggregationExecutionException("Aggregation [" + name + "] cannot support regular expression style include/exclude "
+                + "settings as they can only be applied to string fields. Use an array of numeric values for include/exclude clauses " +
+                "used to filter numeric fields");
+        }
+
+        if (valuesSource instanceof ValuesSource.Numeric) {
+            IncludeExclude.LongFilter longFilter = null;
+            if (((ValuesSource.Numeric) valuesSource).isFloatingPoint()) {
+                throw new AggregationExecutionException("RareTerms aggregation does not support floating point fields.");
+            }
+            if (includeExclude != null) {
+                longFilter = includeExclude.convertToLongFilter(config.format());
+            }
+            return new LongRareTermsAggregator(name, factories, (ValuesSource.Numeric) valuesSource, config.format(),
+                context, parent, longFilter, maxDocCount, precision, pipelineAggregators, metaData);
+        }
+
+        throw new AggregationExecutionException("RareTerms aggregation cannot be applied to field [" + config.fieldContext().field()
+            + "]. It can only be applied to numeric or string fields.");
+    }
+
+    public enum ExecutionMode {
+
+        MAP(new ParseField("map")) {
+
+            @Override
+            Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
+                              DocValueFormat format, IncludeExclude includeExclude,
+                              SearchContext context, Aggregator parent,
+                              List<PipelineAggregator> pipelineAggregators,
+                              Map<String, Object> metaData, long maxDocCount, double precision)
+                throws IOException {
+                final IncludeExclude.StringFilter filter = includeExclude == null ? null : includeExclude.convertToStringFilter(format);
+                return new StringRareTermsAggregator(name, factories, (ValuesSource.Bytes) valuesSource, format, filter,
+                    context, parent, pipelineAggregators, metaData, maxDocCount, precision);
+            }
+
+            @Override
+            boolean needsGlobalOrdinals() {
+                return false;
+            }
+
+        };
+
+        public static ExecutionMode fromString(String value, final DeprecationLogger deprecationLogger) {
+            switch (value) {
+                case "map":
+                    return MAP;
+                default:
+                    throw new IllegalArgumentException("Unknown `execution_hint`: [" + value + "], expected any of [map]");
+            }
+        }
+
+        private final ParseField parseField;
+
+        ExecutionMode(ParseField parseField) {
+            this.parseField = parseField;
+        }
+
+        abstract Aggregator create(String name, AggregatorFactories factories, ValuesSource valuesSource,
+                                   DocValueFormat format, IncludeExclude includeExclude,
+                                   SearchContext context, Aggregator parent,
+                                   List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData,
+                                   long maxDocCount, double precision)
+            throws IOException;
+
+        abstract boolean needsGlobalOrdinals();
+
+        @Override
+        public String toString() {
+            return parseField.getPreferredName();
+        }
+    }
+
+}
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringRareTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringRareTerms.java
new file mode 100644
index 0000000000000..3c3e19664a631
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringRareTerms.java
@@ -0,0 +1,159 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.util.SetBackedScalingCuckooFilter;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.BucketOrder;
+import org.elasticsearch.search.aggregations.InternalAggregations;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
+
+public class StringRareTerms extends InternalMappedRareTerms<StringRareTerms, StringRareTerms.Bucket> {
+    public static final String NAME = "srareterms";
+
+    public static class Bucket extends InternalRareTerms.Bucket<Bucket> {
+        BytesRef termBytes;
+
+        public Bucket(BytesRef term, long docCount, InternalAggregations aggregations, DocValueFormat format) {
+            super(docCount, aggregations, format);
+            this.termBytes = term;
+        }
+
+        /**
+         * Read from a stream.
+         */
+        public Bucket(StreamInput in, DocValueFormat format) throws IOException {
+            super(in, format);
+            termBytes = in.readBytesRef();
+        }
+
+        @Override
+        protected void writeTermTo(StreamOutput out) throws IOException {
+            out.writeBytesRef(termBytes);
+        }
+
+        @Override
+        public Object getKey() {
+            return getKeyAsString();
+        }
+
+        // this method is needed for scripted numeric aggs
+        @Override
+        public Number getKeyAsNumber() {
+            /*
+             * If the term is a long greater than 2^52 then parsing as a double would lose accuracy. Therefore, we first parse as a long and
+             * if this fails then we attempt to parse the term as a double.
+             */
+            try {
+                return Long.parseLong(termBytes.utf8ToString());
+            } catch (final NumberFormatException ignored) {
+                return Double.parseDouble(termBytes.utf8ToString());
+            }
+        }
+
+        @Override
+        public String getKeyAsString() {
+            return format.format(termBytes).toString();
+        }
+
+        @Override
+        public int compareKey(Bucket other) {
+            return termBytes.compareTo(other.termBytes);
+        }
+
+        @Override
+        Bucket newBucket(long docCount, InternalAggregations aggs) {
+            return new Bucket(termBytes, docCount, aggs, format);
+        }
+
+        @Override
+        protected final XContentBuilder keyToXContent(XContentBuilder builder) throws IOException {
+            return builder.field(CommonFields.KEY.getPreferredName(), getKeyAsString());
+        }
+
+        @Override
+        public boolean equals(Object obj) {
+            return super.equals(obj) && Objects.equals(termBytes, ((Bucket) obj).termBytes);
+        }
+
+        @Override
+        public int hashCode() {
+            return Objects.hash(super.hashCode(), termBytes);
+        }
+    }
+
+    StringRareTerms(String name, BucketOrder order, List<PipelineAggregator> pipelineAggregators,
+                           Map<String, Object> metaData, DocValueFormat format,
+                           List<StringRareTerms.Bucket> buckets, long maxDocCount, SetBackedScalingCuckooFilter filter) {
+        super(name, order, pipelineAggregators, metaData, format, buckets, maxDocCount, filter);
+    }
+
+    /**
+     * Read from a stream.
+     */
+    public StringRareTerms(StreamInput in) throws IOException {
+        super(in, StringRareTerms.Bucket::new);
+    }
+
+    @Override
+    public String getWriteableName() {
+        return NAME;
+    }
+
+    @Override
+    public StringRareTerms create(List<StringRareTerms.Bucket> buckets) {
+        return new StringRareTerms(name, order, pipelineAggregators(), metaData, format, buckets, maxDocCount, filter);
+    }
+
+    @Override
+    public StringRareTerms.Bucket createBucket(InternalAggregations aggregations, StringRareTerms.Bucket prototype) {
+        return new StringRareTerms.Bucket(prototype.termBytes, prototype.getDocCount(), aggregations, prototype.format);
+    }
+
+    @Override
+    protected StringRareTerms createWithFilter(String name, List<StringRareTerms.Bucket> buckets,
+                                               SetBackedScalingCuckooFilter filterFilter) {
+        return new StringRareTerms(name, order, pipelineAggregators(), metaData, format,
+            buckets, maxDocCount, filterFilter);
+    }
+
+    @Override
+    protected StringRareTerms.Bucket[] createBucketsArray(int size) {
+        return new StringRareTerms.Bucket[size];
+    }
+
+    @Override
+    public boolean containsTerm(SetBackedScalingCuckooFilter filter, StringRareTerms.Bucket bucket) {
+        return filter.mightContain(bucket.termBytes);
+    }
+
+    @Override
+    public void addToFilter(SetBackedScalingCuckooFilter filter, StringRareTerms.Bucket bucket) {
+        filter.add(bucket.termBytes);
+    }
+}
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringRareTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringRareTermsAggregator.java
new file mode 100644
index 0000000000000..0c200e96b242c
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringRareTermsAggregator.java
@@ -0,0 +1,175 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.BytesRefBuilder;
+import org.apache.lucene.util.CollectionUtil;
+import org.elasticsearch.common.lease.Releasables;
+import org.elasticsearch.common.util.BytesRefHash;
+import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.Aggregator;
+import org.elasticsearch.search.aggregations.AggregatorFactories;
+import org.elasticsearch.search.aggregations.InternalAggregation;
+import org.elasticsearch.search.aggregations.LeafBucketCollector;
+import org.elasticsearch.search.aggregations.LeafBucketCollectorBase;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+import org.elasticsearch.search.aggregations.support.ValuesSource;
+import org.elasticsearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import static java.util.Collections.emptyList;
+
+/**
+ * An aggregator that finds "rare" string values (e.g. terms agg that orders ascending)
+ */
+public class StringRareTermsAggregator extends AbstractRareTermsAggregator<ValuesSource.Bytes, IncludeExclude.StringFilter, BytesRef> {
+    protected BytesRefHash bucketOrds;
+
+    StringRareTermsAggregator(String name, AggregatorFactories factories, ValuesSource.Bytes valuesSource,
+                                     DocValueFormat format,  IncludeExclude.StringFilter stringFilter,
+                                     SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
+                                     Map<String, Object> metaData, long maxDocCount, double precision) throws IOException {
+        super(name, factories, context, parent, pipelineAggregators, metaData, maxDocCount, precision, format, valuesSource, stringFilter);
+        this.bucketOrds = new BytesRefHash(1, context.bigArrays());
+    }
+
+    @Override
+    public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
+                                                final LeafBucketCollector sub) throws IOException {
+        final SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
+        if (subCollectors == null) {
+            subCollectors = sub;
+        }
+        return new LeafBucketCollectorBase(sub, values) {
+            final BytesRefBuilder previous = new BytesRefBuilder();
+
+            @Override
+            public void collect(int docId, long bucket) throws IOException {
+                assert bucket == 0;
+                if (values.advanceExact(docId)) {
+                    final int valuesCount = values.docValueCount();
+                    previous.clear();
+
+                    // SortedBinaryDocValues don't guarantee uniqueness so we
+                    // need to take care of dups
+                    for (int i = 0; i < valuesCount; ++i) {
+                        final BytesRef bytes = values.nextValue();
+                        if (includeExclude != null && !includeExclude.accept(bytes)) {
+                            continue;
+                        }
+                        if (i > 0 && previous.get().equals(bytes)) {
+                            continue;
+                        }
+
+                        doCollect(bytes, docId);
+                        previous.copyBytes(bytes);
+                    }
+                }
+            }
+        };
+    }
+
+    @Override
+    long addValueToOrds(BytesRef value) {
+        return bucketOrds.add(value);
+    }
+
+    /**
+     * Merges the ordinals to a minimal set, populates the CuckooFilter and
+     * generates a final set of buckets.
+     *
+     * If a term is below the maxDocCount, it is turned into a Bucket.  Otherwise,
+     * the term is added to the filter, and pruned from the ordinal map.  If
+     * necessary the ordinal map is merged down to a minimal set to remove deletions
+     */
+    private List<StringRareTerms.Bucket> buildSketch() {
+        long deletionCount = 0;
+        BytesRefHash newBucketOrds = new BytesRefHash(1, context.bigArrays());
+        List<StringRareTerms.Bucket> buckets = new ArrayList<>();
+        try (BytesRefHash oldBucketOrds = bucketOrds) {
+
+            long[] mergeMap = new long[(int) oldBucketOrds.size()];
+            BytesRef scratch = new BytesRef();
+            for (int i = 0; i < oldBucketOrds.size(); i++) {
+                BytesRef oldKey = oldBucketOrds.get(i, scratch);
+                long newBucketOrd = -1;
+                long docCount = bucketDocCount(i);
+                // if the key is below threshold, reinsert into the new ords
+                if (docCount <= maxDocCount) {
+                    newBucketOrd = newBucketOrds.add(oldKey);
+                    StringRareTerms.Bucket bucket = new StringRareTerms.Bucket(BytesRef.deepCopyOf(oldKey), docCount, null, format);
+                    bucket.bucketOrd = newBucketOrd;
+                    buckets.add(bucket);
+
+                    consumeBucketsAndMaybeBreak(1);
+                } else {
+                    // Make a note when one of the ords has been deleted
+                    deletionCount += 1;
+                    filter.add(oldKey);
+                }
+                mergeMap[i] = newBucketOrd;
+            }
+
+            // Only merge/delete the ordinals if we have actually deleted one,
+            // to save on some redundant work
+            if (deletionCount > 0) {
+                mergeBuckets(mergeMap, newBucketOrds.size());
+                if (deferringCollector != null) {
+                    deferringCollector.mergeBuckets(mergeMap);
+                }
+            }
+        }
+        bucketOrds = newBucketOrds;
+        return buckets;
+    }
+
+    @Override
+    public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
+        assert owningBucketOrdinal == 0;
+
+        List<StringRareTerms.Bucket> buckets = buildSketch();
+        runDeferredCollections(buckets.stream().mapToLong(b -> b.bucketOrd).toArray());
+
+        // Finalize the buckets
+        for (StringRareTerms.Bucket bucket : buckets) {
+            bucket.aggregations = bucketAggregations(bucket.bucketOrd);
+        }
+
+        CollectionUtil.introSort(buckets, ORDER.comparator(this));
+        return new StringRareTerms(name, ORDER, pipelineAggregators(), metaData(), format, buckets, maxDocCount, filter);
+    }
+
+    @Override
+    public InternalAggregation buildEmptyAggregation() {
+        return new StringRareTerms(name, LongRareTermsAggregator.ORDER, pipelineAggregators(), metaData(), format, emptyList(), 0, filter);
+    }
+
+    @Override
+    public void doClose() {
+        Releasables.close(bucketOrds);
+    }
+}
+
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java
index 20162fd1bc78a..446aafa22d36b 100644
--- a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/StringTermsAggregator.java
@@ -114,8 +114,9 @@ public void collect(int doc, long bucket) throws IOException {
     public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOException {
         assert owningBucketOrdinal == 0;
 
-        if (bucketCountThresholds.getMinDocCount() == 0 && (InternalOrder.isCountDesc(order) == false ||
-                bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
+        if (bucketCountThresholds.getMinDocCount() == 0
+            && (InternalOrder.isCountDesc(order) == false
+                    || bucketOrds.size() < bucketCountThresholds.getRequiredSize())) {
             // we need to fill-in the blanks
             for (LeafReaderContext ctx : context.searcher().getTopReaderContext().leaves()) {
                 final SortedBinaryDocValues values = valuesSource.bytesValues(ctx);
@@ -168,11 +169,10 @@ public InternalAggregation buildAggregation(long owningBucketOrdinal) throws IOE
         runDeferredCollections(survivingBucketOrds);
 
         // Now build the aggs
-        for (int i = 0; i < list.length; i++) {
-          final StringTerms.Bucket bucket = list[i];
-          bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
-          bucket.aggregations = bucketAggregations(bucket.bucketOrd);
-          bucket.docCountError = 0;
+        for (final StringTerms.Bucket bucket : list) {
+            bucket.termBytes = BytesRef.deepCopyOf(bucket.termBytes);
+            bucket.aggregations = bucketAggregations(bucket.bucketOrd);
+            bucket.docCountError = 0;
         }
 
         return new StringTerms(name, order, bucketCountThresholds.getRequiredSize(), bucketCountThresholds.getMinDocCount(),
diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedRareTerms.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedRareTerms.java
new file mode 100644
index 0000000000000..eff5441a1d7e7
--- /dev/null
+++ b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/terms/UnmappedRareTerms.java
@@ -0,0 +1,119 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+import org.elasticsearch.common.io.stream.StreamInput;
+import org.elasticsearch.common.io.stream.StreamOutput;
+import org.elasticsearch.common.util.SetBackedScalingCuckooFilter;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.search.DocValueFormat;
+import org.elasticsearch.search.aggregations.InternalAggregation;
+import org.elasticsearch.search.aggregations.InternalAggregations;
+import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
+
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
+import static java.util.Collections.emptyList;
+
+/**
+ * Result of the RareTerms aggregation when the field is unmapped.
+ */
+public class UnmappedRareTerms extends InternalRareTerms<UnmappedRareTerms, UnmappedRareTerms.Bucket> {
+    public static final String NAME = "umrareterms";
+
+    protected abstract static class Bucket extends InternalRareTerms.Bucket<Bucket> {
+        private Bucket(long docCount, InternalAggregations aggregations, DocValueFormat formatter) {
+            super(docCount, aggregations, formatter);
+        }
+    }
+
+    UnmappedRareTerms(String name, List<PipelineAggregator> pipelineAggregators, Map<String, Object> metaData) {
+        super(name, LongRareTermsAggregator.ORDER, 0, pipelineAggregators, metaData);
+    }
+
+    /**
+     * Read from a stream.
+     */
+    public UnmappedRareTerms(StreamInput in) throws IOException {
+        super(in);
+    }
+
+    @Override
+    protected void writeTermTypeInfoTo(StreamOutput out) throws IOException {
+        // Nothing to write
+    }
+
+    @Override
+    public String getWriteableName() {
+        return NAME;
+    }
+
+    @Override
+    public String getType() {
+        return StringTerms.NAME;
+    }
+
+    @Override
+    public UnmappedRareTerms create(List<UnmappedRareTerms.Bucket> buckets) {
+        return new UnmappedRareTerms(name, pipelineAggregators(), metaData);
+    }
+
+    @Override
+    public UnmappedRareTerms.Bucket createBucket(InternalAggregations aggregations, UnmappedRareTerms.Bucket prototype) {
+        throw new UnsupportedOperationException("not supported for UnmappedRareTerms");
+    }
+
+    @Override
+    protected UnmappedRareTerms createWithFilter(String name, List<UnmappedRareTerms.Bucket> buckets, SetBackedScalingCuckooFilter filter) {
+        throw new UnsupportedOperationException("not supported for UnmappedRareTerms");
+    }
+
+    @Override
+    public InternalAggregation doReduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
+        return new UnmappedRareTerms(name, pipelineAggregators(), metaData);
+    }
+
+    @Override
+    public boolean isMapped() {
+        return false;
+    }
+
+    @Override
+    public final XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
+        return doXContentCommon(builder, params, Collections.emptyList());
+    }
+
+    @Override
+    public List<UnmappedRareTerms.Bucket> getBuckets() {
+        return emptyList();
+    }
+
+    @Override
+    public UnmappedRareTerms.Bucket getBucketByKey(String term) {
+        return null;
+    }
+
+    @Override
+    protected UnmappedRareTerms.Bucket[] createBucketsArray(int size) {
+        return new UnmappedRareTerms.Bucket[size];
+    }
+}
diff --git a/server/src/test/java/org/elasticsearch/common/util/CuckooFilterTests.java b/server/src/test/java/org/elasticsearch/common/util/CuckooFilterTests.java
new file mode 100644
index 0000000000000..47e9081d815b6
--- /dev/null
+++ b/server/src/test/java/org/elasticsearch/common/util/CuckooFilterTests.java
@@ -0,0 +1,135 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.common.util;
+
+import org.elasticsearch.common.Randomness;
+import org.elasticsearch.common.hash.MurmurHash3;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.test.AbstractWireSerializingTestCase;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
+
+public class CuckooFilterTests extends AbstractWireSerializingTestCase<CuckooFilter> {
+
+    @Override
+    protected CuckooFilter createTestInstance() {
+        CuckooFilter filter = new CuckooFilter(randomIntBetween(1, 100000),
+            ((float)randomIntBetween(1, 50)) / 100.0, Randomness.get());
+
+        int num = randomIntBetween(0, 10);
+        for (int i = 0; i < num; i++) {
+            filter.add(hash(randomLong()));
+        }
+
+        return filter;
+    }
+
+    @Override
+    protected Writeable.Reader<CuckooFilter> instanceReader() {
+        return in -> new CuckooFilter(in, Randomness.get());
+    }
+
+    @Override
+    protected CuckooFilter mutateInstance(CuckooFilter instance) {
+        CuckooFilter newInstance = new CuckooFilter(instance);
+        int num = randomIntBetween(1, 10);
+        for (int i = 0; i < num; i++) {
+            newInstance.add(hash(randomLong()));
+        }
+        return newInstance;
+    }
+
+    public void testExact() {
+        CuckooFilter filter = new CuckooFilter(10000, 0.03, Randomness.get());
+
+        for (int i = 0; i < 100; i++) {
+            filter.add(hash(i));
+        }
+
+        // Was sized sufficiently large that all of these values should be retained
+        for (int i = 0; i < 100; i++) {
+            assertThat(filter.mightContain(hash(i)), equalTo(true));
+        }
+    }
+
+    public void testSaturate() {
+        CuckooFilter filter = new CuckooFilter(10, 0.03, Randomness.get());
+        int counter = 0;
+        boolean saturated = false;
+        for (int i = 0; i < 100; i++) {
+            logger.info("Value: " + i);
+            if (filter.add(hash(i)) == false) {
+                saturated = true;
+            }
+            counter += 1;
+            if (saturated) {
+                break;
+            }
+        }
+        // Unclear when it will saturate exactly, but should be before 100 given the configuration
+        assertTrue(saturated);
+        logger.info("Saturated at: " + counter);
+
+        for (int i = 0; i < counter; i++) {
+            logger.info("Value: " + i);
+            assertThat(filter.mightContain(hash(i)), equalTo(true));
+        }
+    }
+
+    public void testHash() {
+        CuckooFilter.hashToIndex(-10, 32);
+    }
+
+    public void testBig() {
+        CuckooFilter filter = new CuckooFilter(1000000, 0.001, Randomness.get());
+
+        for (int i = 0; i < 10000; i++) {
+            filter.add(hash(i));
+        }
+
+        int correct = 0;
+        int incorrect = 0;
+        for (int i = 0; i < 10000; i++) {
+            if (filter.mightContain(hash(i))) {
+                correct += 1;
+            } else {
+                incorrect += 1;
+            }
+        }
+
+        assertThat(correct, equalTo(10000));
+        assertThat(incorrect, equalTo(0));
+
+        for (int i = 10000; i < 100000; i++) {
+            if (filter.mightContain(hash(i))) {
+                incorrect += 1;
+            } else {
+                correct += 1;
+            }
+        }
+
+        double fppRate = (double) incorrect / 100000;
+        assertThat(fppRate, lessThanOrEqualTo(0.001));
+    }
+
+    private long hash(long i) {
+        return MurmurHash3.murmur64(i);
+    }
+}
diff --git a/server/src/test/java/org/elasticsearch/common/util/SetBackedScalingCuckooFilterTests.java b/server/src/test/java/org/elasticsearch/common/util/SetBackedScalingCuckooFilterTests.java
new file mode 100644
index 0000000000000..20ffaa00998a1
--- /dev/null
+++ b/server/src/test/java/org/elasticsearch/common/util/SetBackedScalingCuckooFilterTests.java
@@ -0,0 +1,231 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.common.util;
+
+import org.elasticsearch.common.Randomness;
+import org.elasticsearch.common.hash.MurmurHash3;
+import org.elasticsearch.common.io.stream.Writeable;
+import org.elasticsearch.test.AbstractWireSerializingTestCase;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Set;
+
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThan;
+import static org.hamcrest.Matchers.lessThanOrEqualTo;
+
+public class SetBackedScalingCuckooFilterTests extends AbstractWireSerializingTestCase<SetBackedScalingCuckooFilter> {
+
+    @Override
+    protected SetBackedScalingCuckooFilter createTestInstance() {
+        SetBackedScalingCuckooFilter bloom = new SetBackedScalingCuckooFilter(1000, Randomness.get(), 0.01);
+
+        int num = randomIntBetween(0, 10);
+        for (int i = 0; i < num; i++) {
+            bloom.add(randomLong());
+        }
+
+        return bloom;
+    }
+
+    @Override
+    protected Writeable.Reader<SetBackedScalingCuckooFilter> instanceReader() {
+        return in -> new SetBackedScalingCuckooFilter(in, Randomness.get());
+    }
+
+    @Override
+    protected SetBackedScalingCuckooFilter mutateInstance(SetBackedScalingCuckooFilter instance) throws IOException {
+        SetBackedScalingCuckooFilter newInstance = new SetBackedScalingCuckooFilter(instance);
+        int num = randomIntBetween(1, 10);
+        for (int i = 0; i < num; i++) {
+            newInstance.add(randomLong());
+        }
+        return newInstance;
+    }
+
+    public void testExact() {
+        int threshold = randomIntBetween(1000, 10000);
+        SetBackedScalingCuckooFilter filter = new SetBackedScalingCuckooFilter(threshold, Randomness.get(), 0.01);
+
+        int size = 0;
+        Set<Long> values = new HashSet<>();
+        Set<Long> hashed = new HashSet<>(values.size());
+        while (size < threshold - 100) {
+            long value = randomLong();
+            filter.add(value);
+            boolean newValue = values.add(value);
+            if (newValue) {
+                Long hash = MurmurHash3.murmur64(value);
+                hashed.add(hash);
+
+                size += 16;
+            }
+        }
+        assertThat(filter.hashes.size(), equalTo(hashed.size()));
+        assertThat(filter.hashes, equalTo(hashed));
+        assertNull(filter.filters);
+
+        for (Long value : values) {
+            assertThat(filter.mightContain(value), equalTo(true));
+        }
+    }
+
+    public void testConvert() {
+        int threshold = randomIntBetween(1000, 10000);
+        SetBackedScalingCuckooFilter filter = new SetBackedScalingCuckooFilter(threshold, Randomness.get(), 0.01);
+
+        int counter = 0;
+        Set<Long> values = new HashSet<>();
+        while (counter < threshold + 100) {
+            long value = randomLong();
+            filter.add(value);
+            boolean newValue = values.add(value);
+            if (newValue) {
+                counter += 1;
+            }
+        }
+        assertNull(filter.hashes);
+        assertThat(filter.filters.size(), greaterThan(0));
+
+        int incorrect = 0;
+        for (Long v : values) {
+            if (filter.mightContain(v) == false) {
+                incorrect += 1;
+            }
+        }
+        double fppRate = (double) incorrect / values.size();
+        assertThat(fppRate, lessThanOrEqualTo(0.001));
+    }
+
+    public void testConvertTwice() {
+        int threshold = randomIntBetween(1000, 10000);
+        SetBackedScalingCuckooFilter filter = new SetBackedScalingCuckooFilter(threshold, Randomness.get(), 0.01);
+
+        int counter = 0;
+        Set<Long> values = new HashSet<>();
+        while (counter < threshold + 100) {
+            long value = randomLong();
+            filter.add(value);
+            boolean newValue = values.add(value);
+            if (newValue) {
+                counter += 1;
+            }
+        }
+        assertNull(filter.hashes);
+        assertThat(filter.filters.size(), greaterThan(0));
+        IllegalStateException e = expectThrows(IllegalStateException.class, filter::convert);
+        assertThat(e.getMessage(), equalTo("Cannot convert SetBackedScalingCuckooFilter to approximate " +
+            "when it has already been converted."));
+    }
+
+    public void testMergeSmall() {
+        int threshold = 1000;
+
+        // Setup the first filter
+        SetBackedScalingCuckooFilter filter = new SetBackedScalingCuckooFilter(threshold, Randomness.get(), 0.01);
+
+        int counter = 0;
+        Set<Long> values = new HashSet<>();
+        while (counter < threshold + 1) {
+            long value = randomLong();
+            filter.add(value);
+            boolean newValue = values.add(value);
+            if (newValue) {
+                counter += 1;
+            }
+        }
+        assertNull(filter.hashes);
+        assertThat(filter.filters.size(), greaterThan(0));
+
+        int incorrect = 0;
+        for (Long v : values) {
+            if (filter.mightContain(v) == false) {
+                incorrect += 1;
+            }
+        }
+        double fppRate = (double) incorrect / values.size();
+        assertThat(fppRate, lessThanOrEqualTo(0.001));
+
+        // Setup the second filter
+        SetBackedScalingCuckooFilter filter2 = new SetBackedScalingCuckooFilter(threshold, Randomness.get(), 0.01);
+        counter = 0;
+        Set<Long> values2 = new HashSet<>();
+        while (counter < threshold + 1) {
+            long value = randomLong();
+            filter2.add(value);
+            boolean newValue = values2.add(value);
+            if (newValue) {
+                counter += 1;
+            }
+        }
+        assertNull(filter2.hashes);
+        assertThat(filter2.filters.size(), greaterThan(0));
+
+        incorrect = 0;
+        for (Long v : values2) {
+            if (filter2.mightContain(v) == false) {
+                incorrect += 1;
+            }
+        }
+        fppRate = (double) incorrect / values2.size();
+        assertThat(fppRate, lessThanOrEqualTo(0.001));
+
+        // now merge and verify the combined set
+        filter.merge(filter2);
+        incorrect = 0;
+        for (Long v : values) {
+            if (filter.mightContain(v) == false) {
+                incorrect += 1;
+            }
+        }
+        for (Long v : values2) {
+            if (filter.mightContain(v) == false) {
+                incorrect += 1;
+            }
+        }
+        fppRate = (double) incorrect / (values.size() + values2.size());
+        assertThat(fppRate, lessThanOrEqualTo(0.001));
+    }
+
+    public void testMergeIncompatible() {
+        SetBackedScalingCuckooFilter filter1 = new SetBackedScalingCuckooFilter(100, Randomness.get(), 0.01);
+        SetBackedScalingCuckooFilter filter2 = new SetBackedScalingCuckooFilter(1000, Randomness.get(), 0.01);
+        IllegalStateException e = expectThrows(IllegalStateException.class, () -> filter1.merge(filter2));
+        assertThat(e.getMessage(), equalTo("Cannot merge other CuckooFilter because thresholds do not match: [100] vs [1000]"));
+
+        SetBackedScalingCuckooFilter filter3 = new SetBackedScalingCuckooFilter(100, Randomness.get(), 0.001);
+        e = expectThrows(IllegalStateException.class, () -> filter1.merge(filter3));
+        assertThat(e.getMessage(), equalTo("Cannot merge other CuckooFilter because precisions do not match: [0.01] vs [0.001]"));
+    }
+
+    public void testBadParameters() {
+        IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
+            () -> new SetBackedScalingCuckooFilter(-1, Randomness.get(), 0.11));
+        assertThat(e.getMessage(), equalTo("[threshold] must be a positive integer"));
+
+        e = expectThrows(IllegalArgumentException.class,
+            () -> new SetBackedScalingCuckooFilter(1000000, Randomness.get(), 0.11));
+        assertThat(e.getMessage(), equalTo("[threshold] must be smaller than [500000]"));
+
+        e = expectThrows(IllegalArgumentException.class,
+            () -> new SetBackedScalingCuckooFilter(100, Randomness.get(), -1.0));
+        assertThat(e.getMessage(), equalTo("[fpp] must be a positive double"));
+    }
+}
diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/RareTermsTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/RareTermsTests.java
new file mode 100644
index 0000000000000..31382f5df8e5c
--- /dev/null
+++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/RareTermsTests.java
@@ -0,0 +1,101 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.elasticsearch.search.aggregations.bucket;
+
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.automaton.RegExp;
+import org.elasticsearch.search.aggregations.BaseAggregationTestCase;
+import org.elasticsearch.search.aggregations.bucket.terms.IncludeExclude;
+import org.elasticsearch.search.aggregations.bucket.terms.RareTermsAggregationBuilder;
+
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+public class RareTermsTests extends BaseAggregationTestCase<RareTermsAggregationBuilder> {
+
+    @Override
+    protected RareTermsAggregationBuilder createTestAggregatorBuilder() {
+        String name = randomAlphaOfLengthBetween(3, 20);
+        RareTermsAggregationBuilder factory = new RareTermsAggregationBuilder(name, null);
+        String field = randomAlphaOfLengthBetween(3, 20);
+        randomFieldOrScript(factory, field);
+        if (randomBoolean()) {
+            factory.missing("MISSING");
+        }
+        if (randomBoolean()) {
+            factory.format("###.##");
+        }
+        if (randomBoolean()) {
+            IncludeExclude incExc = null;
+            switch (randomInt(6)) {
+                case 0:
+                    incExc = new IncludeExclude(new RegExp("foobar"), null);
+                    break;
+                case 1:
+                    incExc = new IncludeExclude(null, new RegExp("foobaz"));
+                    break;
+                case 2:
+                    incExc = new IncludeExclude(new RegExp("foobar"), new RegExp("foobaz"));
+                    break;
+                case 3:
+                    SortedSet<BytesRef> includeValues = new TreeSet<>();
+                    int numIncs = randomIntBetween(1, 20);
+                    for (int i = 0; i < numIncs; i++) {
+                        includeValues.add(new BytesRef(randomAlphaOfLengthBetween(1, 30)));
+                    }
+                    SortedSet<BytesRef> excludeValues = null;
+                    incExc = new IncludeExclude(includeValues, excludeValues);
+                    break;
+                case 4:
+                    SortedSet<BytesRef> includeValues2 = null;
+                    SortedSet<BytesRef> excludeValues2 = new TreeSet<>();
+                    int numExcs2 = randomIntBetween(1, 20);
+                    for (int i = 0; i < numExcs2; i++) {
+                        excludeValues2.add(new BytesRef(randomAlphaOfLengthBetween(1, 30)));
+                    }
+                    incExc = new IncludeExclude(includeValues2, excludeValues2);
+                    break;
+                case 5:
+                    SortedSet<BytesRef> includeValues3 = new TreeSet<>();
+                    int numIncs3 = randomIntBetween(1, 20);
+                    for (int i = 0; i < numIncs3; i++) {
+                        includeValues3.add(new BytesRef(randomAlphaOfLengthBetween(1, 30)));
+                    }
+                    SortedSet<BytesRef> excludeValues3 = new TreeSet<>();
+                    int numExcs3 = randomIntBetween(1, 20);
+                    for (int i = 0; i < numExcs3; i++) {
+                        excludeValues3.add(new BytesRef(randomAlphaOfLengthBetween(1, 30)));
+                    }
+                    incExc = new IncludeExclude(includeValues3, excludeValues3);
+                    break;
+                case 6:
+                    final int numPartitions = randomIntBetween(1, 100);
+                    final int partition = randomIntBetween(0, numPartitions - 1);
+                    incExc = new IncludeExclude(partition, numPartitions);
+                    break;
+                default:
+                    fail();
+            }
+            factory.includeExclude(incExc);
+        }
+        return factory;
+    }
+
+}
diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java
new file mode 100644
index 0000000000000..a0d48b7ab778f
--- /dev/null
+++ b/server/src/test/java/org/elasticsearch/search/aggregations/bucket/terms/RareTermsAggregatorTests.java
@@ -0,0 +1,600 @@
+/*
+ * Licensed to Elasticsearch under one or more contributor
+ * license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright
+ * ownership. Elasticsearch licenses this file to you under
+ * the Apache License, Version 2.0 (the "License"); you may
+ * not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.elasticsearch.search.aggregations.bucket.terms;
+
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.LongPoint;
+import org.apache.lucene.document.NumericDocValuesField;
+import org.apache.lucene.document.SortedDocValuesField;
+import org.apache.lucene.document.SortedNumericDocValuesField;
+import org.apache.lucene.document.SortedSetDocValuesField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.RandomIndexWriter;
+import org.apache.lucene.search.DocValuesFieldExistsQuery;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.MatchNoDocsQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TotalHits;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.Version;
+import org.elasticsearch.cluster.metadata.IndexMetaData;
+import org.elasticsearch.common.settings.Settings;
+import org.elasticsearch.index.IndexSettings;
+import org.elasticsearch.index.mapper.IdFieldMapper;
+import org.elasticsearch.index.mapper.KeywordFieldMapper;
+import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.NumberFieldMapper;
+import org.elasticsearch.index.mapper.SeqNoFieldMapper;
+import org.elasticsearch.index.mapper.TypeFieldMapper;
+import org.elasticsearch.index.mapper.Uid;
+import org.elasticsearch.search.SearchHit;
+import org.elasticsearch.search.aggregations.Aggregation;
+import org.elasticsearch.search.aggregations.Aggregations;
+import org.elasticsearch.search.aggregations.Aggregator;
+import org.elasticsearch.search.aggregations.AggregatorTestCase;
+import org.elasticsearch.search.aggregations.InternalAggregation;
+import org.elasticsearch.search.aggregations.InternalMultiBucketAggregation;
+import org.elasticsearch.search.aggregations.MultiBucketConsumerService;
+import org.elasticsearch.search.aggregations.bucket.MultiBucketsAggregation;
+import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
+import org.elasticsearch.search.aggregations.bucket.global.InternalGlobal;
+import org.elasticsearch.search.aggregations.bucket.nested.InternalNested;
+import org.elasticsearch.search.aggregations.bucket.nested.NestedAggregationBuilder;
+import org.elasticsearch.search.aggregations.metrics.InternalTopHits;
+import org.elasticsearch.search.aggregations.metrics.Max;
+import org.elasticsearch.search.aggregations.metrics.MaxAggregationBuilder;
+import org.elasticsearch.search.aggregations.metrics.TopHitsAggregationBuilder;
+import org.elasticsearch.search.aggregations.support.ValueType;
+import org.elasticsearch.search.sort.FieldSortBuilder;
+import org.elasticsearch.search.sort.ScoreSortBuilder;
+import org.junit.Assert;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.function.Consumer;
+
+import static org.elasticsearch.index.mapper.SeqNoFieldMapper.PRIMARY_TERM_NAME;
+import static org.hamcrest.Matchers.equalTo;
+import static org.hamcrest.Matchers.greaterThan;
+
+public class RareTermsAggregatorTests extends AggregatorTestCase {
+
+    private static final String LONG_FIELD = "numeric";
+    private static final String KEYWORD_FIELD = "keyword";
+
+    private static final List<Long> dataset;
+    static {
+        List<Long> d = new ArrayList<>(45);
+        for (int i = 0; i < 10; i++) {
+            for (int j = 0; j < i; j++) {
+                d.add((long) i);
+            }
+        }
+        dataset  = d;
+    }
+
+    public void testMatchNoDocs() throws IOException {
+        testBothCases(new MatchNoDocsQuery(), dataset,
+            aggregation -> aggregation.field(KEYWORD_FIELD).maxDocCount(1),
+            agg -> assertEquals(0, agg.getBuckets().size()), ValueType.STRING
+        );
+        testBothCases(new MatchNoDocsQuery(), dataset,
+            aggregation -> aggregation.field(LONG_FIELD).maxDocCount(1),
+            agg -> assertEquals(0, agg.getBuckets().size()), ValueType.NUMERIC
+        );
+    }
+
+    public void testMatchAllDocs() throws IOException {
+        Query query = new MatchAllDocsQuery();
+
+        testBothCases(query, dataset,
+            aggregation -> aggregation.field(LONG_FIELD).maxDocCount(1),
+            agg -> {
+                assertEquals(1, agg.getBuckets().size());
+                LongRareTerms.Bucket bucket = (LongRareTerms.Bucket) agg.getBuckets().get(0);
+                assertThat(bucket.getKey(), equalTo(1L));
+                assertThat(bucket.getDocCount(), equalTo(1L));
+            }, ValueType.NUMERIC
+        );
+        testBothCases(query, dataset,
+            aggregation -> aggregation.field(KEYWORD_FIELD).maxDocCount(1),
+            agg -> {
+                assertEquals(1, agg.getBuckets().size());
+                StringRareTerms.Bucket bucket = (StringRareTerms.Bucket) agg.getBuckets().get(0);
+                assertThat(bucket.getKeyAsString(), equalTo("1"));
+                assertThat(bucket.getDocCount(), equalTo(1L));
+            }, ValueType.STRING
+        );
+    }
+
+    public void testManyDocsOneRare() throws IOException {
+        Query query = new MatchAllDocsQuery();
+
+        List<Long> d = new ArrayList<>(500);
+        for (int i = 1; i < 500; i++) {
+            d.add((long) i);
+            d.add((long) i);
+        }
+
+        // The one rare term
+        d.add(0L);
+
+        testSearchAndReduceCase(query, d,
+            aggregation -> aggregation.field(LONG_FIELD).maxDocCount(1),
+            agg -> {
+                assertEquals(1, agg.getBuckets().size());
+                LongRareTerms.Bucket bucket = (LongRareTerms.Bucket) agg.getBuckets().get(0);
+                assertThat(bucket.getKey(), equalTo(0L));
+                assertThat(bucket.getDocCount(), equalTo(1L));
+            }, ValueType.NUMERIC
+        );
+        testSearchAndReduceCase(query, d,
+            aggregation -> aggregation.field(KEYWORD_FIELD).maxDocCount(1),
+            agg -> {
+                assertEquals(1, agg.getBuckets().size());
+                StringRareTerms.Bucket bucket = (StringRareTerms.Bucket) agg.getBuckets().get(0);
+                assertThat(bucket.getKeyAsString(), equalTo("0"));
+                assertThat(bucket.getDocCount(), equalTo(1L));
+            }, ValueType.STRING
+        );
+    }
+
+    public void testIncludeExclude() throws IOException {
+        Query query = new MatchAllDocsQuery();
+
+        testBothCases(query, dataset,
+            aggregation -> aggregation.field(LONG_FIELD)
+                .maxDocCount(2) // bump to 2 since we're only including "2"
+                .includeExclude(new IncludeExclude(new long[]{2}, new long[]{})),
+            agg -> {
+                assertEquals(1, agg.getBuckets().size());
+                LongRareTerms.Bucket bucket = (LongRareTerms.Bucket) agg.getBuckets().get(0);
+                assertThat(bucket.getKey(), equalTo(2L));
+                assertThat(bucket.getDocCount(), equalTo(2L));
+            }, ValueType.NUMERIC
+        );
+        testBothCases(query, dataset,
+            aggregation -> aggregation.field(KEYWORD_FIELD)
+                .maxDocCount(2) // bump to 2 since we're only including "2"
+                .includeExclude(new IncludeExclude(new String[]{"2"}, new String[]{})),
+            agg -> {
+                assertEquals(1, agg.getBuckets().size());
+                StringRareTerms.Bucket bucket = (StringRareTerms.Bucket) agg.getBuckets().get(0);
+                assertThat(bucket.getKeyAsString(), equalTo("2"));
+                assertThat(bucket.getDocCount(), equalTo(2L));
+            }, ValueType.STRING
+        );
+    }
+
+    public void testEmbeddedMaxAgg() throws IOException {
+        Query query = new MatchAllDocsQuery();
+
+        testBothCases(query, dataset, aggregation -> {
+                MaxAggregationBuilder max = new MaxAggregationBuilder("the_max").field(LONG_FIELD);
+                aggregation.field(LONG_FIELD).maxDocCount(1).subAggregation(max);
+            },
+            agg -> {
+                assertEquals(1, agg.getBuckets().size());
+                LongRareTerms.Bucket bucket = (LongRareTerms.Bucket) agg.getBuckets().get(0);
+                assertThat(bucket.getKey(), equalTo(1L));
+                assertThat(bucket.getDocCount(), equalTo(1L));
+
+                Aggregations children = bucket.getAggregations();
+                assertThat(children.asList().size(), equalTo(1));
+                assertThat(children.asList().get(0).getName(), equalTo("the_max"));
+                assertThat(((Max)(children.asList().get(0))).getValue(), equalTo(1.0));
+            }, ValueType.NUMERIC
+        );
+        testBothCases(query, dataset, aggregation -> {
+                MaxAggregationBuilder max = new MaxAggregationBuilder("the_max").field(LONG_FIELD);
+                aggregation.field(KEYWORD_FIELD).maxDocCount(1).subAggregation(max);
+            },
+            agg -> {
+                assertEquals(1, agg.getBuckets().size());
+                StringRareTerms.Bucket bucket = (StringRareTerms.Bucket) agg.getBuckets().get(0);
+                assertThat(bucket.getKey(), equalTo("1"));
+                assertThat(bucket.getDocCount(), equalTo(1L));
+
+                Aggregations children = bucket.getAggregations();
+                assertThat(children.asList().size(), equalTo(1));
+                assertThat(children.asList().get(0).getName(), equalTo("the_max"));
+                assertThat(((Max)(children.asList().get(0))).getValue(), equalTo(1.0));
+            }, ValueType.STRING
+        );
+    }
+
+    public void testEmpty() throws IOException {
+        Query query = new MatchAllDocsQuery();
+
+        testSearchCase(query, Collections.emptyList(),
+            aggregation -> aggregation.field(LONG_FIELD).maxDocCount(1),
+            agg -> assertEquals(0, agg.getBuckets().size()), ValueType.NUMERIC
+        );
+        testSearchCase(query, Collections.emptyList(),
+            aggregation -> aggregation.field(KEYWORD_FIELD).maxDocCount(1),
+            agg -> assertEquals(0, agg.getBuckets().size()), ValueType.STRING
+        );
+
+        // Note: the search and reduce test will generate no segments (due to no docs)
+        // and so will return a null agg because the aggs aren't run/reduced
+        testSearchAndReduceCase(query, Collections.emptyList(),
+            aggregation -> aggregation.field(LONG_FIELD).maxDocCount(1),
+            Assert::assertNull, ValueType.NUMERIC
+        );
+        testSearchAndReduceCase(query, Collections.emptyList(),
+            aggregation -> aggregation.field(KEYWORD_FIELD).maxDocCount(1),
+            Assert::assertNull, ValueType.STRING
+        );
+    }
+
+    public void testUnmapped() throws Exception {
+        try (Directory directory = newDirectory()) {
+            try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
+                Document document = new Document();
+                document.add(new SortedDocValuesField("string", new BytesRef("a")));
+                document.add(new NumericDocValuesField("long", 0L));
+                indexWriter.addDocument(document);
+                MappedFieldType fieldType1 = new KeywordFieldMapper.KeywordFieldType();
+                fieldType1.setName("another_string");
+                fieldType1.setHasDocValues(true);
+
+                MappedFieldType fieldType2 = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
+                fieldType2.setName("another_long");
+                fieldType2.setHasDocValues(true);
+
+
+                try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
+                    IndexSearcher indexSearcher = newIndexSearcher(indexReader);
+                    ValueType[] valueTypes = new ValueType[]{ValueType.STRING, ValueType.LONG};
+                    String[] fieldNames = new String[]{"string", "long"};
+                    for (int i = 0; i < fieldNames.length; i++) {
+                        RareTermsAggregationBuilder aggregationBuilder = new RareTermsAggregationBuilder("_name", valueTypes[i])
+                            .field(fieldNames[i]);
+                        Aggregator aggregator = createAggregator(aggregationBuilder, indexSearcher, fieldType1, fieldType2);
+                        aggregator.preCollection();
+                        indexSearcher.search(new MatchAllDocsQuery(), aggregator);
+                        aggregator.postCollection();
+                        RareTerms result = (RareTerms) aggregator.buildAggregation(0L);
+                        assertEquals("_name", result.getName());
+                        assertEquals(0, result.getBuckets().size());
+                    }
+                }
+            }
+        }
+    }
+
+    public void testNestedTerms() throws IOException {
+        Query query = new MatchAllDocsQuery();
+
+        testBothCases(query, dataset, aggregation -> {
+                TermsAggregationBuilder terms = new TermsAggregationBuilder("the_terms", ValueType.STRING).field(KEYWORD_FIELD);
+                aggregation.field(LONG_FIELD).maxDocCount(1).subAggregation(terms);
+            },
+            agg -> {
+                assertEquals(1, agg.getBuckets().size());
+                LongRareTerms.Bucket bucket = (LongRareTerms.Bucket) agg.getBuckets().get(0);
+                assertThat(bucket.getKey(), equalTo(1L));
+                assertThat(bucket.getDocCount(), equalTo(1L));
+
+                Aggregations children = bucket.getAggregations();
+                assertThat(children.asList().size(), equalTo(1));
+                assertThat(children.asList().get(0).getName(), equalTo("the_terms"));
+                assertThat(((Terms)(children.asList().get(0))).getBuckets().size(), equalTo(1));
+                assertThat(((Terms)(children.asList().get(0))).getBuckets().get(0).getKeyAsString(), equalTo("1"));
+            }, ValueType.NUMERIC
+        );
+
+        testBothCases(query, dataset, aggregation -> {
+                TermsAggregationBuilder terms = new TermsAggregationBuilder("the_terms", ValueType.STRING).field(KEYWORD_FIELD);
+                aggregation.field(KEYWORD_FIELD).maxDocCount(1).subAggregation(terms);
+            },
+            agg -> {
+                assertEquals(1, agg.getBuckets().size());
+                StringRareTerms.Bucket bucket = (StringRareTerms.Bucket) agg.getBuckets().get(0);
+                assertThat(bucket.getKey(), equalTo("1"));
+                assertThat(bucket.getDocCount(), equalTo(1L));
+
+                Aggregations children = bucket.getAggregations();
+                assertThat(children.asList().size(), equalTo(1));
+                assertThat(children.asList().get(0).getName(), equalTo("the_terms"));
+                assertThat(((Terms)(children.asList().get(0))).getBuckets().size(), equalTo(1));
+                assertThat(((Terms)(children.asList().get(0))).getBuckets().get(0).getKeyAsString(), equalTo("1"));
+            }, ValueType.STRING
+        );
+    }
+
+    public void testGlobalAggregationWithScore() throws IOException {
+        try (Directory directory = newDirectory()) {
+            try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
+                Document document = new Document();
+                document.add(new SortedDocValuesField("keyword", new BytesRef("a")));
+                indexWriter.addDocument(document);
+                document = new Document();
+                document.add(new SortedDocValuesField("keyword", new BytesRef("c")));
+                indexWriter.addDocument(document);
+                document = new Document();
+                document.add(new SortedDocValuesField("keyword", new BytesRef("e")));
+                indexWriter.addDocument(document);
+                try (IndexReader indexReader = maybeWrapReaderEs(indexWriter.getReader())) {
+                    IndexSearcher indexSearcher = newIndexSearcher(indexReader);
+                    Aggregator.SubAggCollectionMode collectionMode = randomFrom(Aggregator.SubAggCollectionMode.values());
+                    GlobalAggregationBuilder globalBuilder = new GlobalAggregationBuilder("global")
+                        .subAggregation(
+                            new RareTermsAggregationBuilder("terms", ValueType.STRING)
+                                .field("keyword")
+                                .subAggregation(
+                                    new RareTermsAggregationBuilder("sub_terms", ValueType.STRING)
+                                        .field("keyword")
+                                        .subAggregation(
+                                            new TopHitsAggregationBuilder("top_hits")
+                                                .storedField("_none_")
+                                        )
+                                )
+                        );
+
+                    MappedFieldType fieldType = new KeywordFieldMapper.KeywordFieldType();
+                    fieldType.setName("keyword");
+                    fieldType.setHasDocValues(true);
+
+                    InternalGlobal result = searchAndReduce(indexSearcher, new MatchAllDocsQuery(), globalBuilder, fieldType);
+                    InternalMultiBucketAggregation<?, ?> terms = result.getAggregations().get("terms");
+                    assertThat(terms.getBuckets().size(), equalTo(3));
+                    for (MultiBucketsAggregation.Bucket bucket : terms.getBuckets()) {
+                        InternalMultiBucketAggregation<?, ?> subTerms = bucket.getAggregations().get("sub_terms");
+                        assertThat(subTerms.getBuckets().size(), equalTo(1));
+                        MultiBucketsAggregation.Bucket subBucket  = subTerms.getBuckets().get(0);
+                        InternalTopHits topHits = subBucket.getAggregations().get("top_hits");
+                        assertThat(topHits.getHits().getHits().length, equalTo(1));
+                        for (SearchHit hit : topHits.getHits()) {
+                            assertThat(hit.getScore(), greaterThan(0f));
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    public void testWithNestedAggregations() throws IOException {
+        try (Directory directory = newDirectory()) {
+            try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
+                for (int i = 0; i < 10; i++) {
+                    int[] nestedValues = new int[i];
+                    for (int j = 0; j < i; j++) {
+                        nestedValues[j] = j;
+                    }
+                    indexWriter.addDocuments(generateDocsWithNested(Integer.toString(i), i, nestedValues));
+                }
+                indexWriter.commit();
+
+                NestedAggregationBuilder nested = new NestedAggregationBuilder("nested", "nested_object")
+                    .subAggregation(new RareTermsAggregationBuilder("terms", ValueType.LONG)
+                        .field("nested_value")
+                        .maxDocCount(1)
+                    );
+                MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
+                fieldType.setHasDocValues(true);
+                fieldType.setName("nested_value");
+                try (IndexReader indexReader = wrap(DirectoryReader.open(directory))) {
+                    InternalNested result = searchAndReduce(newIndexSearcher(indexReader),
+                        // match root document only
+                        new DocValuesFieldExistsQuery(PRIMARY_TERM_NAME), nested, fieldType);
+                    InternalMultiBucketAggregation<?, ?> terms = result.getAggregations().get("terms");
+                    assertThat(terms.getBuckets().size(), equalTo(1));
+                    assertThat(terms.getBuckets().get(0).getKeyAsString(), equalTo("8"));
+                }
+
+            }
+        }
+    }
+
+    public void testWithNestedScoringAggregations() throws IOException {
+        try (Directory directory = newDirectory()) {
+            try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
+                for (int i = 0; i < 10; i++) {
+                    int[] nestedValues = new int[i];
+                    for (int j = 0; j < i; j++) {
+                        nestedValues[j] = j;
+                    }
+                    indexWriter.addDocuments(generateDocsWithNested(Integer.toString(i), i, nestedValues));
+                }
+                indexWriter.commit();
+                for (boolean withScore : new boolean[]{true, false}) {
+                    NestedAggregationBuilder nested = new NestedAggregationBuilder("nested", "nested_object")
+                        .subAggregation(new RareTermsAggregationBuilder("terms", ValueType.LONG)
+                            .field("nested_value")
+                            .maxDocCount(2)
+                            .subAggregation(
+                                new TopHitsAggregationBuilder("top_hits")
+                                    .sort(withScore ? new ScoreSortBuilder() : new FieldSortBuilder("_doc"))
+                                    .storedField("_none_")
+                            )
+                        );
+                    MappedFieldType fieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
+                    fieldType.setHasDocValues(true);
+                    fieldType.setName("nested_value");
+                    try (IndexReader indexReader = wrap(DirectoryReader.open(directory))) {
+
+                        if (withScore) {
+
+                            IllegalStateException e = expectThrows(IllegalStateException.class,
+                                () -> searchAndReduce(newIndexSearcher(indexReader),
+                                // match root document only
+                                new DocValuesFieldExistsQuery(PRIMARY_TERM_NAME), nested, fieldType));
+                            assertThat(e.getMessage(), equalTo("RareTerms agg [terms] is the child of the nested agg [nested], " +
+                                "and also has a scoring child agg [top_hits].  This combination is not supported because it requires " +
+                                "executing in [depth_first] mode, which the RareTerms agg cannot do."));
+                        } else {
+                            InternalNested result = searchAndReduce(newIndexSearcher(indexReader),
+                                // match root document only
+                                new DocValuesFieldExistsQuery(PRIMARY_TERM_NAME), nested, fieldType);
+                            InternalMultiBucketAggregation<?, ?> terms = result.getAggregations().get("terms");
+                            assertThat(terms.getBuckets().size(), equalTo(2));
+                            long counter = 1;
+                            for (MultiBucketsAggregation.Bucket bucket : terms.getBuckets()) {
+                                InternalTopHits topHits = bucket.getAggregations().get("top_hits");
+                                TotalHits hits = topHits.getHits().getTotalHits();
+                                assertNotNull(hits);
+                                assertThat(hits.value, equalTo(counter));
+                                assertThat(topHits.getHits().getMaxScore(), equalTo(Float.NaN));
+                                counter += 1;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    }
+
+    private final SeqNoFieldMapper.SequenceIDFields sequenceIDFields = SeqNoFieldMapper.SequenceIDFields.emptySeqID();
+    private List<Document> generateDocsWithNested(String id, int value, int[] nestedValues) {
+        List<Document> documents = new ArrayList<>();
+
+        for (int nestedValue : nestedValues) {
+            Document document = new Document();
+            document.add(new Field(IdFieldMapper.NAME, Uid.encodeId(id), IdFieldMapper.Defaults.NESTED_FIELD_TYPE));
+            document.add(new Field(TypeFieldMapper.NAME, "__nested_object", TypeFieldMapper.Defaults.FIELD_TYPE));
+            document.add(new SortedNumericDocValuesField("nested_value", nestedValue));
+            documents.add(document);
+        }
+
+        Document document = new Document();
+        document.add(new Field(IdFieldMapper.NAME, Uid.encodeId(id), IdFieldMapper.Defaults.FIELD_TYPE));
+        document.add(new Field(TypeFieldMapper.NAME, "docs", TypeFieldMapper.Defaults.FIELD_TYPE));
+        document.add(new SortedNumericDocValuesField("value", value));
+        document.add(sequenceIDFields.primaryTerm);
+        documents.add(document);
+
+        return documents;
+    }
+
+
+    private InternalAggregation buildInternalAggregation(RareTermsAggregationBuilder builder, MappedFieldType fieldType,
+                                                         IndexSearcher searcher) throws IOException {
+        AbstractRareTermsAggregator aggregator = createAggregator(builder, searcher, fieldType);
+        aggregator.preCollection();
+        searcher.search(new MatchAllDocsQuery(), aggregator);
+        aggregator.postCollection();
+        return aggregator.buildAggregation(0L);
+    }
+
+    private void testSearchCase(Query query, List<Long> dataset,
+                                Consumer<RareTermsAggregationBuilder> configure,
+                                Consumer<InternalMappedRareTerms> verify, ValueType valueType) throws IOException {
+        executeTestCase(false, query, dataset, configure, verify, valueType);
+    }
+
+    private void testSearchAndReduceCase(Query query, List<Long> dataset,
+                                         Consumer<RareTermsAggregationBuilder> configure,
+                                         Consumer<InternalMappedRareTerms> verify, ValueType valueType) throws IOException {
+        executeTestCase(true, query, dataset, configure, verify, valueType);
+    }
+
+    private void testBothCases(Query query, List<Long> dataset,
+                               Consumer<RareTermsAggregationBuilder> configure,
+                               Consumer<InternalMappedRareTerms> verify, ValueType valueType) throws IOException {
+        testSearchCase(query, dataset, configure, verify, valueType);
+        testSearchAndReduceCase(query, dataset, configure, verify, valueType);
+    }
+
+    @Override
+    protected IndexSettings createIndexSettings() {
+        Settings nodeSettings = Settings.builder()
+            .put("search.max_buckets", 100000).build();
+        return new IndexSettings(
+            IndexMetaData.builder("_index").settings(Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT))
+                .numberOfShards(1)
+                .numberOfReplicas(0)
+                .creationDate(System.currentTimeMillis())
+                .build(),
+            nodeSettings
+        );
+    }
+
+    private void executeTestCase(boolean reduced, Query query, List<Long> dataset,
+                                 Consumer<RareTermsAggregationBuilder> configure,
+                                 Consumer<InternalMappedRareTerms> verify, ValueType valueType) throws IOException {
+
+        try (Directory directory = newDirectory()) {
+            try (RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory)) {
+                Document document = new Document();
+                for (Long value : dataset) {
+                    if (frequently()) {
+                        indexWriter.commit();
+                    }
+
+                    document.add(new SortedNumericDocValuesField(LONG_FIELD, value));
+                    document.add(new LongPoint(LONG_FIELD, value));
+                    document.add(new SortedSetDocValuesField(KEYWORD_FIELD, new BytesRef(Long.toString(value))));
+                    indexWriter.addDocument(document);
+                    document.clear();
+                }
+            }
+
+            try (IndexReader indexReader = DirectoryReader.open(directory)) {
+                IndexSearcher indexSearcher = newIndexSearcher(indexReader);
+
+                RareTermsAggregationBuilder aggregationBuilder = new RareTermsAggregationBuilder("_name", valueType);
+                if (configure != null) {
+                    configure.accept(aggregationBuilder);
+                }
+
+                MappedFieldType keywordFieldType = new KeywordFieldMapper.KeywordFieldType();
+                keywordFieldType.setName(KEYWORD_FIELD);
+                keywordFieldType.setHasDocValues(true);
+
+                MappedFieldType longFieldType = new NumberFieldMapper.NumberFieldType(NumberFieldMapper.NumberType.LONG);
+                longFieldType.setName(LONG_FIELD);
+                longFieldType.setHasDocValues(true);
+
+                InternalMappedRareTerms rareTerms;
+                if (reduced) {
+                    rareTerms = searchAndReduce(indexSearcher, query, aggregationBuilder, keywordFieldType, longFieldType);
+                } else {
+                    rareTerms = search(indexSearcher, query, aggregationBuilder, keywordFieldType, longFieldType);
+                }
+                verify.accept(rareTerms);
+            }
+        }
+    }
+
+    @Override
+    public void doAssertReducedMultiBucketConsumer(Aggregation agg, MultiBucketConsumerService.MultiBucketConsumer bucketConsumer) {
+        /*
+         * No-op.
+         *
+         * This is used in the aggregator tests to check that after a reduction, we have the correct number of buckets.
+         * This can be done during incremental reduces, and the final reduce.  Unfortunately, the number of buckets
+         * can _decrease_ during runtime as values are reduced together (e.g. 1 count on each shard, but when
+         * reduced it becomes 2 and is greater than the threshold).
+         *
+         * Because the incremental reduction test picks random subsets to reduce together, it's impossible
+         * to predict how the buckets will end up, and so this assertion will fail.
+         *
+         * If we want to put this assertion back in, we'll need this test to override the incremental reduce
+         * portion so that we can deterministically know which shards are being reduced together and which
+         * buckets we should have left after each reduction.
+         */
+    }
+}
diff --git a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java
index 3bd3b6838a897..b939f8a9110db 100644
--- a/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java
+++ b/test/framework/src/main/java/org/elasticsearch/search/aggregations/AggregatorTestCase.java
@@ -411,7 +411,7 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
                     new InternalAggregation.ReduceContext(root.context().bigArrays(), null,
                         reduceBucketConsumer, false);
                 A reduced = (A) aggs.get(0).doReduce(toReduce, context);
-                InternalAggregationTestCase.assertMultiBucketConsumer(reduced, reduceBucketConsumer);
+                doAssertReducedMultiBucketConsumer(reduced, reduceBucketConsumer);
                 aggs = new ArrayList<>(aggs.subList(r, toReduceSize));
                 aggs.add(reduced);
             }
@@ -427,12 +427,16 @@ protected <A extends InternalAggregation, C extends Aggregator> A searchAndReduc
                     internalAgg = (A) pipelineAggregator.reduce(internalAgg, context);
                 }
             }
-            InternalAggregationTestCase.assertMultiBucketConsumer(internalAgg, reduceBucketConsumer);
+            doAssertReducedMultiBucketConsumer(internalAgg, reduceBucketConsumer);
             return internalAgg;
         }
 
     }
 
+    protected void doAssertReducedMultiBucketConsumer(Aggregation agg, MultiBucketConsumerService.MultiBucketConsumer bucketConsumer) {
+        InternalAggregationTestCase.assertMultiBucketConsumer(agg, bucketConsumer);
+    }
+
     private static class ShardSearcher extends IndexSearcher {
         private final List<LeafReaderContext> ctx;