Skip to content

Commit

Permalink
revert ColumnAnalysis type, add typeSignature and use it for DruidSchema
Browse files Browse the repository at this point in the history
  • Loading branch information
clintropolis committed Nov 9, 2021
1 parent a5bd0b8 commit bff926e
Show file tree
Hide file tree
Showing 9 changed files with 277 additions and 47 deletions.
8 changes: 5 additions & 3 deletions docs/querying/segmentmetadataquery.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,11 @@ The format of the result is:
} ]
```

Dimension columns will have type `STRING`, `FLOAT`, `DOUBLE`, or `LONG`.
Metric columns will have type `FLOAT`, `DOUBLE`, or `LONG`, or the name of the underlying complex type such as `hyperUnique` in case of COMPLEX metric.
Timestamp column will have type `LONG`.
All columns will contain a `typeSignature` which is the Druid internal representation of the type information for this column, is what is show in [`INFORMATION_SCHEMA.COLUMNS`](../querying/sql.md#columns-table) table in SQL, and is typically the value used to supply Druid with JSON type information at query or ingest time. This value will be `STRING`, `FLOAT`, `DOUBLE`, `LONG`, or `COMPLEX<typeName>` (e.g. `COMPLEX<hyperUnique>`).

Additionally, columns will have a legacy friendly `type` name. This might match `typeSignature` for some column types (`STRING`, `FLOAT`, `DOUBLE`, or `LONG`) but for COMPLEX columns will only contain the name of the underlying complex type such as `hyperUnique`.

The timestamp column will always have `typeSignature` and `type` as `LONG`.

If the `errorMessage` field is non-null, you should not trust the other fields in the response. Their contents are
undefined.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,8 @@ private ColumnAnalysis analyzeNumericColumn(
}

return new ColumnAnalysis(
capabilities.asTypeString(),
capabilities.toColumnType(),
capabilities.getType().name(),
capabilities.hasMultipleValues().isTrue(),
capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls
size,
Expand Down Expand Up @@ -248,7 +249,8 @@ private ColumnAnalysis analyzeStringColumn(
}

return new ColumnAnalysis(
capabilities.asTypeString(),
capabilities.toColumnType(),
capabilities.getType().name(),
capabilities.hasMultipleValues().isTrue(),
capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls
size,
Expand Down Expand Up @@ -326,7 +328,8 @@ public Long accumulate(Long accumulated, Cursor cursor)
}

return new ColumnAnalysis(
capabilities.asTypeString(),
capabilities.toColumnType(),
capabilities.getType().name(),
capabilities.hasMultipleValues().isTrue(),
capabilities.hasNulls().isMaybeTrue(), // if we don't know for sure, then we should plan to check for nulls
size,
Expand All @@ -343,8 +346,6 @@ private ColumnAnalysis analyzeComplexColumn(
final String typeName
)
{
// serialize using asTypeString (which is also used for JSON so can easily round-trip complex type info back into ColumnType)
final String serdeTypeName = ColumnType.ofComplex(typeName).asTypeString();
try (final ComplexColumn complexColumn = columnHolder != null ? (ComplexColumn) columnHolder.getColumn() : null) {
final boolean hasMultipleValues = capabilities != null && capabilities.hasMultipleValues().isTrue();
final boolean hasNulls = capabilities != null && capabilities.hasNulls().isMaybeTrue();
Expand All @@ -359,7 +360,8 @@ private ColumnAnalysis analyzeComplexColumn(
final Function<Object, Long> inputSizeFn = serde.inputSizeFn();
if (inputSizeFn == null) {
return new ColumnAnalysis(
serdeTypeName,
capabilities.toColumnType(),
typeName,
hasMultipleValues,
hasNulls,
0,
Expand All @@ -377,7 +379,8 @@ private ColumnAnalysis analyzeComplexColumn(
}

return new ColumnAnalysis(
serdeTypeName,
capabilities.toColumnType(),
typeName,
hasMultipleValues,
hasNulls,
size,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonTypeInfo;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.segment.column.ColumnType;

import java.util.Objects;

Expand All @@ -33,10 +35,11 @@ public class ColumnAnalysis

public static ColumnAnalysis error(String reason)
{
return new ColumnAnalysis("STRING", false, false, -1, null, null, null, ERROR_PREFIX + reason);
return new ColumnAnalysis(ColumnType.STRING, "STRING", false, false, -1, null, null, null, ERROR_PREFIX + reason);
}

private final String type;
private final ColumnType typeSignature;
private final boolean hasMultipleValues;
private final boolean hasNulls;
private final long size;
Expand All @@ -47,6 +50,7 @@ public static ColumnAnalysis error(String reason)

@JsonCreator
public ColumnAnalysis(
@JsonProperty("typeSignature") ColumnType typeSignature,
@JsonProperty("type") String type,
@JsonProperty("hasMultipleValues") boolean hasMultipleValues,
@JsonProperty("hasNulls") boolean hasNulls,
Expand All @@ -57,6 +61,7 @@ public ColumnAnalysis(
@JsonProperty("errorMessage") String errorMessage
)
{
this.typeSignature = typeSignature;
this.type = type;
this.hasMultipleValues = hasMultipleValues;
this.hasNulls = hasNulls;
Expand All @@ -73,6 +78,12 @@ public String getType()
return type;
}

@JsonProperty
public ColumnType getTypeSignature()
{
return typeSignature;
}

@JsonProperty
public boolean isHasMultipleValues()
{
Expand Down Expand Up @@ -136,7 +147,19 @@ public ColumnAnalysis fold(ColumnAnalysis rhs)
}

if (!type.equals(rhs.getType())) {
return ColumnAnalysis.error("cannot_merge_diff_types");
return ColumnAnalysis.error(
StringUtils.format("cannot_merge_diff_types: [%s] and [%s]", type, rhs.getType())
);
}

if (!typeSignature.equals(rhs.getTypeSignature())) {
return ColumnAnalysis.error(
StringUtils.format(
"cannot_merge_diff_types: [%s] and [%s]",
typeSignature.asTypeString(),
rhs.getTypeSignature().asTypeString()
)
);
}

Integer cardinality = getCardinality();
Expand All @@ -153,6 +176,7 @@ public ColumnAnalysis fold(ColumnAnalysis rhs)
Comparable newMax = choose(maxValue, rhs.maxValue, true);

return new ColumnAnalysis(
typeSignature,
type,
multipleValues,
hasNulls || rhs.hasNulls,
Expand Down Expand Up @@ -181,6 +205,7 @@ public String toString()
{
return "ColumnAnalysis{" +
"type='" + type + '\'' +
", columnType=" + typeSignature +
", hasMultipleValues=" + hasMultipleValues +
", hasNulls=" + hasNulls +
", size=" + size +
Expand All @@ -205,6 +230,7 @@ public boolean equals(Object o)
hasNulls == that.hasNulls &&
size == that.size &&
Objects.equals(type, that.type) &&
Objects.equals(typeSignature, that.typeSignature) &&
Objects.equals(cardinality, that.cardinality) &&
Objects.equals(minValue, that.minValue) &&
Objects.equals(maxValue, that.maxValue) &&
Expand All @@ -214,6 +240,7 @@ public boolean equals(Object o)
@Override
public int hashCode()
{
return Objects.hash(type, hasMultipleValues, hasNulls, size, cardinality, minValue, maxValue, errorMessage);
return Objects.hash(type,
typeSignature, hasMultipleValues, hasNulls, size, cardinality, minValue, maxValue, errorMessage);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.apache.druid.segment.QueryableIndexSegment;
import org.apache.druid.segment.TestHelper;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.incremental.IncrementalIndex;
import org.apache.druid.segment.incremental.IncrementalIndexSchema;
Expand Down Expand Up @@ -155,7 +156,8 @@ public static Collection<?> dataFeeder()
ImmutableMap.of(
TIME_COLUMN,
new ColumnAnalysis(
ValueType.LONG.toString(),
ColumnType.LONG,
ValueType.LONG.name(),
false,
false,
100,
Expand All @@ -166,7 +168,8 @@ public static Collection<?> dataFeeder()
),
DIM_NAME,
new ColumnAnalysis(
ValueType.STRING.toString(),
ColumnType.STRING,
ValueType.STRING.name(),
false,
false,
120,
Expand All @@ -177,7 +180,8 @@ public static Collection<?> dataFeeder()
),
DIM_FLOAT_NAME,
new ColumnAnalysis(
ValueType.DOUBLE.toString(),
ColumnType.DOUBLE,
ValueType.DOUBLE.name(),
false,
false,
80,
Expand All @@ -200,7 +204,8 @@ public static Collection<?> dataFeeder()
ImmutableMap.of(
TIME_COLUMN,
new ColumnAnalysis(
ValueType.LONG.toString(),
ColumnType.LONG,
ValueType.LONG.name(),
false,
false,
100,
Expand All @@ -211,7 +216,8 @@ public static Collection<?> dataFeeder()
),
DIM_NAME,
new ColumnAnalysis(
ValueType.STRING.toString(),
ColumnType.STRING,
ValueType.STRING.name(),
false,
false,
120,
Expand All @@ -222,7 +228,8 @@ public static Collection<?> dataFeeder()
),
DIM_FLOAT_NAME,
new ColumnAnalysis(
ValueType.FLOAT.toString(),
ColumnType.FLOAT,
ValueType.FLOAT.name(),
false,
false,
80,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
import org.apache.druid.query.metadata.metadata.SegmentAnalysis;
import org.apache.druid.query.metadata.metadata.SegmentMetadataQuery;
import org.apache.druid.query.spec.LegacySegmentSpec;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.timeline.LogicalSegment;
import org.joda.time.Interval;
Expand Down Expand Up @@ -79,7 +80,8 @@ public void testCacheStrategy() throws Exception
ImmutableMap.of(
"placement",
new ColumnAnalysis(
ValueType.STRING.toString(),
ColumnType.STRING,
ValueType.STRING.name(),
true,
false,
10881,
Expand Down
Loading

0 comments on commit bff926e

Please sign in to comment.