Skip to content

Commit

Permalink
MV_FILTER_ONLY may run into Exceptions in case duplicate values were …
Browse files Browse the repository at this point in the history
…processed (#15012)
  • Loading branch information
kgyrtkirk authored Sep 27, 2023
1 parent fc929e6 commit 022950a
Show file tree
Hide file tree
Showing 3 changed files with 57 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -79,9 +79,9 @@ public DimensionSelector decorate(final DimensionSelector selector)
}

if (isWhitelist) {
return filterAllowList(values, selector);
return filterAllowList(values, selector, delegate.getExtractionFn() != null);
} else {
return filterDenyList(values, selector);
return filterDenyList(values, selector, delegate.getExtractionFn() != null);
}
}

Expand Down Expand Up @@ -125,9 +125,9 @@ public static IdMapping buildDenyListIdMapping(
return builder.build();
}

public static DimensionSelector filterAllowList(Set<String> values, DimensionSelector selector)
public static DimensionSelector filterAllowList(Set<String> values, DimensionSelector selector, boolean forcePredicateFilter)
{
if (selector.getValueCardinality() < 0 || !selector.nameLookupPossibleInAdvance()) {
if (forcePredicateFilter || selector.getValueCardinality() < 0 || !selector.nameLookupPossibleInAdvance()) {
return new PredicateFilteredDimensionSelector(selector, Predicates.in(values));
}
final IdMapping idMapping = buildAllowListIdMapping(
Expand All @@ -139,9 +139,9 @@ public static DimensionSelector filterAllowList(Set<String> values, DimensionSel
return new ForwardingFilteredDimensionSelector(selector, idMapping);
}

public static DimensionSelector filterDenyList(Set<String> values, DimensionSelector selector)
public static DimensionSelector filterDenyList(Set<String> values, DimensionSelector selector, boolean forcePredicateFilter)
{
if (selector.getValueCardinality() < 0 || !selector.nameLookupPossibleInAdvance()) {
if (forcePredicateFilter || selector.getValueCardinality() < 0 || !selector.nameLookupPossibleInAdvance()) {
return new PredicateFilteredDimensionSelector(
selector,
input -> !values.contains(input)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import org.apache.druid.segment.index.semantic.LexicographicalRangeIndexes;
import org.apache.druid.segment.index.semantic.NullValueIndex;
import org.apache.druid.segment.index.semantic.StringValueSetIndexes;
import org.apache.druid.segment.serde.NoIndexesColumnIndexSupplier;

import javax.annotation.Nullable;
import java.util.Collections;
Expand Down Expand Up @@ -136,9 +137,9 @@ public DimensionSelector makeDimensionSelector(
)
{
if (allowList) {
return ListFilteredDimensionSpec.filterAllowList(values, factory.makeDimensionSelector(delegate));
return ListFilteredDimensionSpec.filterAllowList(values, factory.makeDimensionSelector(delegate), delegate.getExtractionFn() != null);
} else {
return ListFilteredDimensionSpec.filterDenyList(values, factory.makeDimensionSelector(delegate));
return ListFilteredDimensionSpec.filterDenyList(values, factory.makeDimensionSelector(delegate), delegate.getExtractionFn() != null);
}
}

Expand Down Expand Up @@ -182,6 +183,9 @@ public boolean usesDotNotation()
@Override
public ColumnIndexSupplier getIndexSupplier(String columnName, ColumnSelector columnSelector)
{
if (delegate.getExtractionFn() != null) {
return NoIndexesColumnIndexSupplier.getInstance();
}
return new ColumnIndexSupplier()
{
@Nullable
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.error.DruidException;
import org.apache.druid.java.util.common.StringUtils;
Expand All @@ -32,6 +33,7 @@
import org.apache.druid.query.aggregation.FilteredAggregatorFactory;
import org.apache.druid.query.aggregation.LongSumAggregatorFactory;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.dimension.ExtractionDimensionSpec;
import org.apache.druid.query.expression.TestExprMacroTable;
import org.apache.druid.query.filter.ExpressionDimFilter;
import org.apache.druid.query.filter.InDimFilter;
Expand All @@ -41,6 +43,7 @@
import org.apache.druid.query.groupby.GroupByQueryConfig;
import org.apache.druid.query.groupby.orderby.DefaultLimitSpec;
import org.apache.druid.query.groupby.orderby.OrderByColumnSpec;
import org.apache.druid.query.lookup.RegisteredLookupExtractionFn;
import org.apache.druid.query.ordering.StringComparators;
import org.apache.druid.query.scan.ScanQuery;
import org.apache.druid.segment.column.ColumnType;
Expand Down Expand Up @@ -2189,4 +2192,46 @@ public void testMultiValueStringOverlapFilterInconsistentUsage()

);
}

@Test
public void testMultiValuedFilterOnlyWhenLookupPullsInDuplicates()
{
cannotVectorize();
testBuilder()
.sql("SELECT \n"
+ " MV_FILTER_ONLY(LOOKUP(dim3,'lookyloo'),ARRAY[null]),count(1) \n"
+ "FROM druid.foo AS t group by 1\n")
.expectedQuery(
GroupByQuery.builder()
.setDataSource(CalciteTests.DATASOURCE1)
.setInterval(querySegmentSpec(Filtration.eternity()))
.setGranularity(Granularities.ALL)
.setVirtualColumns(
new ListFilteredVirtualColumn(
"v0",
new ExtractionDimensionSpec(
"dim3",
"dim3",
ColumnType.STRING,
new RegisteredLookupExtractionFn(
null,
"lookyloo",
false,
null,
null,
true)),
Sets.newHashSet((String) null),
true))
.setDimensions(
dimensions(
new DefaultDimensionSpec("v0", "d0", ColumnType.STRING)))
.setAggregatorSpecs(aggregators(new CountAggregatorFactory("a0")))
.setContext(QUERY_CONTEXT_DEFAULT)
.build())
.expectedResults(
ImmutableList.of(new Object[] {NullHandling.defaultStringValue(), 7L}))
.run();

}

}

0 comments on commit 022950a

Please sign in to comment.