Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(search): Supporting Aggregations for hasX fields #8241

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -143,9 +143,18 @@ private void extractSearchableAnnotation(final Object annotationObj, final DataS
throw new ModelValidationException(
String.format("Entity has multiple searchable fields with the same field name %s, path: %s", annotation.getFieldName(), fullPath.orElse(path)));
} else {
annotation = new SearchableAnnotation(pathName, annotation.getFieldType(), annotation.isQueryByDefault(),
annotation.isEnableAutocomplete(), annotation.isAddToFilters(), annotation.getFilterNameOverride(),
annotation.getBoostScore(), annotation.getHasValuesFieldName(), annotation.getNumValuesFieldName(),
annotation = new SearchableAnnotation(
pathName,
annotation.getFieldType(),
annotation.isQueryByDefault(),
annotation.isEnableAutocomplete(),
annotation.isAddToFilters(),
annotation.isAddHasValuesToFilters(),
annotation.getFilterNameOverride(),
annotation.getHasValuesFilterNameOverride(),
annotation.getBoostScore(),
annotation.getHasValuesFieldName(),
annotation.getNumValuesFieldName(),
annotation.getWeightsPerFieldValue());
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,12 @@ public class SearchableAnnotation {
boolean enableAutocomplete;
// Whether or not to add field to filters.
boolean addToFilters;
// Name of the filter
// Whether or not to add the "has values" to filters.
boolean addHasValuesToFilters;
// Display name of the filter
Optional<String> filterNameOverride;
// Display name of the has values filter
Optional<String> hasValuesFilterNameOverride;
// Boost multiplier to the match score. Matches on fields with higher boost score ranks higher
double boostScore;
// If set, add a index field of the given name that checks whether the field exists
Expand Down Expand Up @@ -80,18 +84,29 @@ public static SearchableAnnotation fromPegasusAnnotationObject(@Nonnull final Ob
final Optional<Boolean> queryByDefault = AnnotationUtils.getField(map, "queryByDefault", Boolean.class);
final Optional<Boolean> enableAutocomplete = AnnotationUtils.getField(map, "enableAutocomplete", Boolean.class);
final Optional<Boolean> addToFilters = AnnotationUtils.getField(map, "addToFilters", Boolean.class);
final Optional<Boolean> addHasValuesToFilters = AnnotationUtils.getField(map, "addHasValuesToFilters", Boolean.class);
final Optional<String> filterNameOverride = AnnotationUtils.getField(map, "filterNameOverride", String.class);
final Optional<String> hasValuesFilterNameOverride =
AnnotationUtils.getField(map, "hasValuesFilterNameOverride", String.class);
final Optional<Double> boostScore = AnnotationUtils.getField(map, "boostScore", Double.class);
final Optional<String> hasValuesFieldName = AnnotationUtils.getField(map, "hasValuesFieldName", String.class);
final Optional<String> numValuesFieldName = AnnotationUtils.getField(map, "numValuesFieldName", String.class);

final Optional<Map> weightsPerFieldValueMap =
AnnotationUtils.getField(map, "weightsPerFieldValue", Map.class).map(m -> (Map<Object, Double>) m);

final FieldType resolvedFieldType = getFieldType(fieldType, schemaDataType);
return new SearchableAnnotation(fieldName.orElse(schemaFieldName), resolvedFieldType,
getQueryByDefault(queryByDefault, resolvedFieldType), enableAutocomplete.orElse(false),
addToFilters.orElse(false), filterNameOverride, boostScore.orElse(1.0), hasValuesFieldName, numValuesFieldName,
return new SearchableAnnotation(
fieldName.orElse(schemaFieldName),
resolvedFieldType,
getQueryByDefault(queryByDefault, resolvedFieldType),
enableAutocomplete.orElse(false),
addToFilters.orElse(false),
addHasValuesToFilters.orElse(false),
filterNameOverride,
hasValuesFilterNameOverride,
boostScore.orElse(1.0),
hasValuesFieldName,
numValuesFieldName,
weightsPerFieldValueMap.orElse(ImmutableMap.of()));
}

Expand Down Expand Up @@ -127,4 +142,17 @@ private static Boolean getQueryByDefault(Optional<Boolean> maybeQueryByDefault,
public String getFilterName() {
return filterNameOverride.orElse(fieldName);
}

public String getHasValuesFilterName() {
return hasValuesFilterNameOverride.orElse(
hasValuesFieldName.orElse(String.format("has%s", capitalizeFirstLetter(fieldName))));
}

private static String capitalizeFirstLetter(String str) {
if (str == null || str.length() == 0) {
return str;
} else {
return str.substring(0, 1).toUpperCase() + str.substring(1);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@
import com.linkedin.metadata.config.search.SearchConfiguration;
import com.linkedin.metadata.models.annotation.SearchableAnnotation;
import com.linkedin.metadata.search.utils.ESUtils;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import lombok.extern.slf4j.Slf4j;
import org.elasticsearch.search.aggregations.AggregationBuilder;
Expand All @@ -16,34 +19,55 @@

@Slf4j
public class AggregationQueryBuilder {

private final SearchConfiguration _configs;
private final List<SearchableAnnotation> _annotations;
private final Set<String> _facetFields;
public AggregationQueryBuilder(SearchConfiguration configs, List<SearchableAnnotation> annotations) {
this._configs = configs;
this._annotations = annotations;

public AggregationQueryBuilder(
@Nonnull final SearchConfiguration configs,
@Nonnull final List<SearchableAnnotation> annotations) {
this._configs = Objects.requireNonNull(configs, "configs must not be null");
this._facetFields = getFacetFields(annotations);
}

private Set<String> getFacetFields(List<SearchableAnnotation> annotations) {
/**
* Get the set of default aggregations, across all facets.
*/
public List<AggregationBuilder> getAggregations() {
return getAggregations(null);
}

/**
* Get aggregations for a search request for the given facets provided, and if none are provided, then get aggregations for all.
*/
public List<AggregationBuilder> getAggregations(@Nullable List<String> facets) {
final Set<String> facetsToAggregate;
if (facets != null) {
facets.stream().filter(f -> !isValidAggregate(f)).forEach(facet -> {
log.warn(String.format("Provided facet for search filter aggregations that doesn't exist. Provided: %s; Available: %s", facet, _facetFields));
});
facetsToAggregate = facets.stream().filter(this::isValidAggregate).collect(Collectors.toSet());
} else {
facetsToAggregate = _facetFields;
}
return facetsToAggregate.stream().map(this::facetToAggregationBuilder).collect(Collectors.toList());
}


private Set<String> getFacetFields(final List<SearchableAnnotation> annotations) {
Set<String> facets = annotations.stream()
.filter(SearchableAnnotation::isAddToFilters)
.map(SearchableAnnotation::getFieldName)
.flatMap(annotation -> getFacetFieldsFromAnnotation(annotation).stream())
.collect(Collectors.toSet());
facets.add(INDEX_VIRTUAL_FIELD);
return facets;
}

public List<AggregationBuilder> getAggregations() {
return getAggregations(null);
}

private boolean isValidAggregate(String inputFacet) {
private boolean isValidAggregate(final String inputFacet) {
Set<String> facets = Set.of(inputFacet.split(AGGREGATION_SEPARATOR_CHAR));
return facets.size() > 0 && _facetFields.containsAll(facets);
}

private AggregationBuilder facetToAggregationBuilder(String inputFacet) {
private AggregationBuilder facetToAggregationBuilder(final String inputFacet) {
List<String> facets = List.of(inputFacet.split(AGGREGATION_SEPARATOR_CHAR));
AggregationBuilder lastAggBuilder = null;
for (int i = facets.size() - 1; i >= 0; i--) {
Expand All @@ -52,7 +76,9 @@ private AggregationBuilder facetToAggregationBuilder(String inputFacet) {
facet = "_index";
}
AggregationBuilder aggBuilder =
AggregationBuilders.terms(inputFacet).field(ESUtils.toKeywordField(facet, false)).size(_configs.getMaxTermBucketSize());
AggregationBuilders.terms(inputFacet)
.field(getAggregationField(facet))
.size(_configs.getMaxTermBucketSize());
if (lastAggBuilder != null) {
aggBuilder = aggBuilder.subAggregation(lastAggBuilder);
}
Expand All @@ -62,19 +88,23 @@ private AggregationBuilder facetToAggregationBuilder(String inputFacet) {
return lastAggBuilder;
}

/**
* Get aggregations for a search request for the given facets provided, and if none are provided, then get aggregations for all.
*/
public List<AggregationBuilder> getAggregations(@Nullable List<String> facets) {
final Set<String> facetsToAggregate;
if (facets != null) {
facets.stream().filter(f -> !isValidAggregate(f)).forEach(facet -> {
log.warn(String.format("Provided facet for search filter aggregations that doesn't exist. Provided: %s; Available: %s", facet, _facetFields));
});
facetsToAggregate = facets.stream().filter(this::isValidAggregate).collect(Collectors.toSet());
} else {
facetsToAggregate = _facetFields;
private String getAggregationField(final String facet) {
if (facet.startsWith("has")) {
// Boolean hasX field, not a keyword field. Return the name of the original facet.
return facet;
}
return facetsToAggregate.stream().map(this::facetToAggregationBuilder).collect(Collectors.toList());
// Otherwise assume that this field is of keyword type.
return ESUtils.toKeywordField(facet, false);
}

List<String> getFacetFieldsFromAnnotation(final SearchableAnnotation annotation) {
final List<String> facetsFromAnnotation = new ArrayList<>();
if (annotation.isAddToFilters()) {
facetsFromAnnotation.add(annotation.getFieldName());
}
if (annotation.isAddHasValuesToFilters() && annotation.getHasValuesFieldName().isPresent()) {
facetsFromAnnotation.add(annotation.getHasValuesFieldName().get());
}
return facetsFromAnnotation;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import com.linkedin.metadata.search.features.Features;
import com.linkedin.metadata.search.utils.ESUtils;
import com.linkedin.metadata.utils.SearchUtil;
import com.linkedin.util.Pair;
import io.opentelemetry.extension.annotations.WithSpan;
import java.net.URISyntaxException;
import java.util.ArrayList;
Expand Down Expand Up @@ -81,11 +82,11 @@ public class SearchRequestHandler {
.setSkipCache(false)
.setSkipAggregates(false)
.setSkipHighlighting(false);

private static final Map<List<EntitySpec>, SearchRequestHandler> REQUEST_HANDLER_BY_ENTITY_NAME = new ConcurrentHashMap<>();
private static final String REMOVED = "removed";

private static final String URN_FILTER = "urn";
private static final String[] FIELDS_TO_FETCH = new String[]{"urn", "usageCountLast30Days"};
jjoyce0510 marked this conversation as resolved.
Show resolved Hide resolved
private static final String[] URN_FIELD = new String[]{"urn"};

private final List<EntitySpec> _entitySpecs;
private final Set<String> _defaultQueryFieldNames;
Expand All @@ -106,8 +107,8 @@ private SearchRequestHandler(@Nonnull List<EntitySpec> entitySpecs, @Nonnull Sea
List<SearchableAnnotation> annotations = getSearchableAnnotations();
_defaultQueryFieldNames = getDefaultQueryFieldNames(annotations);
_filtersToDisplayName = annotations.stream()
.filter(SearchableAnnotation::isAddToFilters)
.collect(Collectors.toMap(SearchableAnnotation::getFieldName, SearchableAnnotation::getFilterName, mapMerger()));
.flatMap(annotation -> getFacetFieldDisplayNameFromAnnotation(annotation).stream())
.collect(Collectors.toMap(Pair::getFirst, Pair::getSecond, mapMerger()));
_filtersToDisplayName.put(INDEX_VIRTUAL_FIELD, "Type");
_highlights = getHighlights();
_searchQueryBuilder = new SearchQueryBuilder(configs, customSearchConfiguration);
Expand Down Expand Up @@ -135,13 +136,6 @@ private List<SearchableAnnotation> getSearchableAnnotations() {
.collect(Collectors.toList());
}

private Set<String> getFacetFields(List<SearchableAnnotation> annotations) {
return annotations.stream()
.filter(SearchableAnnotation::isAddToFilters)
.map(SearchableAnnotation::getFieldName)
.collect(Collectors.toSet());
}

@VisibleForTesting
private Set<String> getDefaultQueryFieldNames(List<SearchableAnnotation> annotations) {
return Stream.concat(annotations.stream()
Expand Down Expand Up @@ -695,4 +689,20 @@ private AggregationMetadata buildAggregationMetadata(
.setFilterValues(filterValues);
}

private List<Pair<String, String>> getFacetFieldDisplayNameFromAnnotation(
@Nonnull final SearchableAnnotation annotation
) {
final List<Pair<String, String>> facetsFromAnnotation = new ArrayList<>();
// Case 1: Default Keyword field
if (annotation.isAddToFilters()) {
facetsFromAnnotation.add(Pair.of(annotation.getFieldName(), annotation.getFilterName()));
}
// Case 2: HasX boolean field
if (annotation.isAddHasValuesToFilters() && annotation.getHasValuesFieldName().isPresent()) {
facetsFromAnnotation.add(Pair.of(
annotation.getHasValuesFieldName().get(), annotation.getHasValuesFilterName()
));
}
return facetsFromAnnotation;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package com.linkedin.metadata.search.elasticsearch.query.request;

import com.google.common.collect.ImmutableList;
import com.linkedin.metadata.config.search.SearchConfiguration;
import com.linkedin.metadata.models.annotation.SearchableAnnotation;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import org.elasticsearch.search.aggregations.AggregationBuilder;
import org.testng.Assert;
import org.testng.annotations.Test;


public class AggregationQueryBuilderTest {

@Test
public void testGetAggregationsHasFields() {
jjoyce0510 marked this conversation as resolved.
Show resolved Hide resolved

SearchableAnnotation annotation = new SearchableAnnotation(
"test",
SearchableAnnotation.FieldType.KEYWORD,
true,
true,
false,
true,
Optional.empty(),
Optional.of("Has Test"),
1.0,
Optional.of("hasTest"),
Optional.empty(),
Collections.emptyMap()
);

SearchConfiguration config = new SearchConfiguration();
config.setMaxTermBucketSize(25);

AggregationQueryBuilder builder = new AggregationQueryBuilder(
config, ImmutableList.of(annotation));

List<AggregationBuilder> aggs = builder.getAggregations();

Assert.assertTrue(aggs.stream().anyMatch(agg -> agg.getName().equals("hasTest")));
jjoyce0510 marked this conversation as resolved.
Show resolved Hide resolved
}

@Test
public void testGetAggregationsFields() {

SearchableAnnotation annotation = new SearchableAnnotation(
"test",
SearchableAnnotation.FieldType.KEYWORD,
true,
true,
true,
false,
Optional.of("Test Filter"),
Optional.empty(),
1.0,
Optional.empty(),
Optional.empty(),
Collections.emptyMap()
);

SearchConfiguration config = new SearchConfiguration();
config.setMaxTermBucketSize(25);

AggregationQueryBuilder builder = new AggregationQueryBuilder(
config, ImmutableList.of(annotation));

List<AggregationBuilder> aggs = builder.getAggregations();

Assert.assertTrue(aggs.stream().anyMatch(agg -> agg.getName().equals("test")));
}
}
Loading