Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

make dimension column extensible with COMPLEX type #10277

Merged
merged 11 commits into from
Dec 3, 2020
Merged
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/

package org.apache.druid.segment;

public interface DimensionHandlerProvider
<EncodedType extends Comparable<EncodedType>, EncodedKeyComponentType, ActualType extends Comparable<ActualType>>
{
DimensionHandler<EncodedType, EncodedKeyComponentType, ActualType> get(String dimensionName);
}
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Function;

public final class DimensionHandlerUtils
Expand All @@ -62,6 +63,28 @@ public final class DimensionHandlerUtils
.setDictionaryValuesSorted(false)
.setHasBitmapIndexes(false);

public static final ConcurrentHashMap<String, DimensionHandlerProvider> DIMENSION_HANDLER_PROVIDERS = new ConcurrentHashMap<>();

public static void registerDimensionHandlerProvider(String type, DimensionHandlerProvider provider)
{
DIMENSION_HANDLER_PROVIDERS.compute(type, (key, value) -> {
if (value == null) {
return provider;
} else {
if (!value.getClass().getName().equals(provider.getClass().getName())) {
throw new ISE(
"Incompatible dimensionHandlerProvider for type[%s] already exists. Expected [%s], found [%s].",
key,
value.getClass().getName(),
provider.getClass().getName()
);
} else {
return value;
}
}
});
}

private DimensionHandlerUtils()
{
}
Expand Down Expand Up @@ -97,6 +120,14 @@ private DimensionHandlerUtils()
return new DoubleDimensionHandler(dimensionName);
}

if (capabilities.getType() == ValueType.COMPLEX && capabilities.getComplexTypeName() != null) {
DimensionHandlerProvider provider = DIMENSION_HANDLER_PROVIDERS.get(capabilities.getComplexTypeName());
if (provider == null) {
throw new ISE("Can't find DimensionHandlerProvider for typeName [%s]", capabilities.getComplexTypeName());
}
return provider.get(dimensionName);
}

// Return a StringDimensionHandler by default (null columns will be treated as String typed)
return new StringDimensionHandler(dimensionName, multiValueHandling, true, false);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,12 @@ public ColumnBuilder setType(ValueType type)
return this;
}

public ColumnBuilder setComplexTypeName(String typeName)
{
this.capabilitiesBuilder.setComplexTypeName(typeName);
return this;
}

public ColumnBuilder setHasMultipleValues(boolean hasMultipleValues)
{
this.capabilitiesBuilder.setHasMultipleValues(hasMultipleValues);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,12 @@ public interface ColumnCapabilities
*/
ValueType getType();

/**
*
* If ValueType is COMPLEX, then the typeName associated with it.
*/
String getComplexTypeName();

/**
* Is the column dictionary encoded? If so, a DimensionDictionarySelector may be used instead of using a value
* selector, allowing algorithms to operate on primitive integer dictionary ids rather than the looked up dictionary
Expand All @@ -48,6 +54,7 @@ public interface ColumnCapabilities
* If the column is dictionary encoded, are those values sorted? Useful to know for optimizations that can defer
* looking up values and allowing sorting with the dictionary ids directly
*/

Capable areDictionaryValuesSorted();

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ public static ColumnCapabilitiesImpl copyOf(@Nullable final ColumnCapabilities o
final ColumnCapabilitiesImpl capabilities = new ColumnCapabilitiesImpl();
if (other != null) {
capabilities.type = other.getType();
capabilities.complexTypeName = other.getComplexTypeName();
capabilities.dictionaryEncoded = other.isDictionaryEncoded();
capabilities.hasInvertedIndexes = other.hasBitmapIndexes();
capabilities.hasSpatialIndexes = other.hasSpatialIndexes();
Expand Down Expand Up @@ -129,6 +130,14 @@ public static ColumnCapabilitiesImpl merge(
throw new ISE("Cannot merge columns of type[%s] and [%s]", merged.type, otherSnapshot.getType());
}

if (merged.type == ValueType.COMPLEX && merged.complexTypeName == null) {
merged.complexTypeName = other.getComplexTypeName();
}

if (merged.type == ValueType.COMPLEX && merged.complexTypeName != null && !merged.complexTypeName.equals(other.getComplexTypeName())) {
throw new ISE("Cannot merge columns of typeName[%s] and [%s]", merged.complexTypeName, other.getComplexTypeName());
}

merged.dictionaryEncoded = merged.dictionaryEncoded.or(otherSnapshot.isDictionaryEncoded());
merged.hasMultipleValues = merged.hasMultipleValues.or(otherSnapshot.hasMultipleValues());
merged.dictionaryValuesSorted = merged.dictionaryValuesSorted.and(otherSnapshot.areDictionaryValuesSorted());
Expand Down Expand Up @@ -187,6 +196,9 @@ public static ColumnCapabilitiesImpl createSimpleArrayColumnCapabilities(ValueTy
@Nullable
private ValueType type = null;

@Nullable
private String complexTypeName = null;

private boolean hasInvertedIndexes = false;
private boolean hasSpatialIndexes = false;
private Capable dictionaryEncoded = Capable.UNKNOWN;
Expand All @@ -209,12 +221,25 @@ public ValueType getType()
return type;
}

@Override
@JsonProperty
public String getComplexTypeName()
{
return complexTypeName;
}

public ColumnCapabilitiesImpl setType(ValueType type)
{
this.type = Preconditions.checkNotNull(type, "'type' must be nonnull");
return this;
}

public ColumnCapabilitiesImpl setComplexTypeName(String typeName)
{
this.complexTypeName = typeName;
return this;
}

@Override
@JsonProperty("dictionaryEncoded")
public Capable isDictionaryEncoded()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -289,7 +289,12 @@ protected IncrementalIndex(
for (DimensionSchema dimSchema : dimensionsSpec.getDimensions()) {
ValueType type = dimSchema.getValueType();
String dimName = dimSchema.getName();
ColumnCapabilitiesImpl capabilities = makeDefaultCapabilitiesFromValueType(type);

// Note: Things might be simpler if DimensionSchema had a method "getColumnCapabilities()" which could return
// type specific capabilities by itself. However, for various reasons, DimensionSchema currently lives in druid-core
// while ColumnCapabilities lives in druid-processing which makes that approach difficult.
ColumnCapabilitiesImpl capabilities = makeDefaultCapabilitiesFromValueType(type, dimSchema.getTypeName());
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Apologies for the conflicts here, I made some semi disruptive changes to push ColumnCapabilities into the DimensionIndexer implementations so they can be more accurate. I think the changes should still be workable with your addition of DimensionHandlerProvider, just the dimension indexer it provides will need to provide the complex column capabilities.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it should be alright, I will update this PR later when I have my MapStringString extension ready/deployed and tested with changes in this patch. thanks for the heads up.


capabilities.setHasBitmapIndexes(dimSchema.hasBitmapIndex());

if (dimSchema.getTypeName().equals(DimensionSchema.SPATIAL_TYPE_NAME)) {
Expand Down Expand Up @@ -588,7 +593,7 @@ IncrementalIndexRowResult toIncrementalIndexRow(InputRow row)
dimension,
// for schemaless type discovery, everything is a String. this should probably try to autodetect
// based on the value to use a better handler
makeDefaultCapabilitiesFromValueType(ValueType.STRING),
makeDefaultCapabilitiesFromValueType(ValueType.STRING, null),
null
)
);
Expand Down Expand Up @@ -839,7 +844,7 @@ public List<String> getDimensionOrder()
}
}

private ColumnCapabilitiesImpl makeDefaultCapabilitiesFromValueType(ValueType type)
private ColumnCapabilitiesImpl makeDefaultCapabilitiesFromValueType(ValueType type, String typeName)
{
switch (type) {
case STRING:
Expand All @@ -850,7 +855,7 @@ private ColumnCapabilitiesImpl makeDefaultCapabilitiesFromValueType(ValueType ty
.setDictionaryValuesUnique(true)
.setDictionaryValuesSorted(false);
case COMPLEX:
return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type).setHasNulls(true);
return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type).setHasNulls(true).setComplexTypeName(typeName);
default:
return ColumnCapabilitiesImpl.createSimpleNumericColumnCapabilities(type);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ public Deserializer getDeserializer()
// longer term this needs to be captured by making the serde provide this information, and then this should
// no longer be set to true but rather the actual values
builder.setHasNulls(ColumnCapabilities.Capable.TRUE);
serde.deserializeColumn(buffer, builder);
serde.deserializeColumn(buffer, builder, columnConfig);
}
};
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.apache.druid.guice.annotations.ExtensionPoint;
import org.apache.druid.segment.GenericColumnSerializer;
import org.apache.druid.segment.column.ColumnBuilder;
import org.apache.druid.segment.column.ColumnConfig;
import org.apache.druid.segment.data.ObjectStrategy;
import org.apache.druid.segment.writeout.SegmentWriteOutMedium;

Expand All @@ -45,7 +46,22 @@ public abstract class ComplexMetricSerde
*
* @param buffer the buffer to deserialize
* @param builder ColumnBuilder to add the column to
* @param columnConfig ColumnConfiguration used during deserialization
*/
public void deserializeColumn(
ByteBuffer buffer,
ColumnBuilder builder,
@SuppressWarnings("unused") ColumnConfig columnConfig
)
{
deserializeColumn(buffer, builder);
}

/**
* {@link ComplexMetricSerde#deserializeColumn(ByteBuffer, ColumnBuilder, ColumnConfig)} should be used instead of this.
* This method is left for backward compatibility.
*/
@Deprecated
public abstract void deserializeColumn(ByteBuffer buffer, ColumnBuilder builder);

/**
Expand Down
Loading