Skip to content

Commit

Permalink
preserve explicitly specified dimension schema in "logical" schema of…
Browse files Browse the repository at this point in the history
… sampler response (#14144)
  • Loading branch information
clintropolis authored Apr 23, 2023
1 parent b95708f commit 887f8db
Show file tree
Hide file tree
Showing 2 changed files with 128 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -247,10 +247,19 @@ public SamplerResponse sample(
if (!SamplerInputRow.SAMPLER_ORDERING_COLUMN.equals(dimensionDesc.getName())) {
final ColumnType columnType = dimensionDesc.getCapabilities().toColumnType();
signatureBuilder.add(dimensionDesc.getName(), columnType);
// for now, use legacy types instead of standard type
logicalDimensionSchemas.add(
DimensionSchema.getDefaultSchemaForBuiltInType(dimensionDesc.getName(), dimensionDesc.getCapabilities())
);
// use explicitly specified dimension schema if it exists
if (dataSchema != null &&
dataSchema.getDimensionsSpec() != null &&
dataSchema.getDimensionsSpec().getSchema(dimensionDesc.getName()) != null) {
logicalDimensionSchemas.add(dataSchema.getDimensionsSpec().getSchema(dimensionDesc.getName()));
} else {
logicalDimensionSchemas.add(
DimensionSchema.getDefaultSchemaForBuiltInType(
dimensionDesc.getName(),
dimensionDesc.getCapabilities()
)
);
}
physicalDimensionSchemas.add(
dimensionDesc.getIndexer().getFormat().getColumnSchema(dimensionDesc.getName())
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
import org.apache.druid.jackson.DefaultObjectMapper;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.segment.AutoTypeColumnSchema;
import org.apache.druid.segment.NestedDataDimensionSchema;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.indexing.DataSchema;
Expand Down Expand Up @@ -183,4 +184,118 @@ public void testDiscoveredTypesStrictBooleans()
ExpressionProcessing.initializeForTests();
}
}

@Test
public void testTypesClassicDiscovery()
{
final InputSource inputSource = new InlineInputSource(Strings.join(STR_JSON_ROWS, '\n'));
final DataSchema dataSchema = new DataSchema(
"test",
new TimestampSpec("t", null, null),
DimensionsSpec.builder().build(),
null,
null,
null
);
final SamplerResponse response = inputSourceSampler.sample(
inputSource,
new JsonInputFormat(null, null, null, null, null),
dataSchema,
null
);

Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(5, response.getNumRowsIndexed());
Assert.assertEquals(6, response.getData().size());
Assert.assertEquals(
ImmutableList.of(
new StringDimensionSchema("string"),
new StringDimensionSchema("long"),
new StringDimensionSchema("double"),
new StringDimensionSchema("bool"),
new StringDimensionSchema("variant"),
new StringDimensionSchema("array")
),
response.getLogicalDimensions()
);

Assert.assertEquals(
ImmutableList.of(
new StringDimensionSchema("string"),
new StringDimensionSchema("long"),
new StringDimensionSchema("double"),
new StringDimensionSchema("bool"),
new StringDimensionSchema("variant"),
new StringDimensionSchema("array")
),
response.getPhysicalDimensions()
);
Assert.assertEquals(
RowSignature.builder()
.addTimeColumn()
.add("string", ColumnType.STRING)
.add("long", ColumnType.STRING)
.add("double", ColumnType.STRING)
.add("bool", ColumnType.STRING)
.add("variant", ColumnType.STRING)
.add("array", ColumnType.STRING)
.build(),
response.getLogicalSegmentSchema()
);
}

@Test
public void testTypesNoDiscoveryExplicitSchema()
{
final InputSource inputSource = new InlineInputSource(Strings.join(STR_JSON_ROWS, '\n'));
final DataSchema dataSchema = new DataSchema(
"test",
new TimestampSpec("t", null, null),
DimensionsSpec.builder().setDimensions(
ImmutableList.of(new StringDimensionSchema("string"),
new LongDimensionSchema("long"),
new DoubleDimensionSchema("double"),
new StringDimensionSchema("bool"),
new NestedDataDimensionSchema("variant"),
new NestedDataDimensionSchema("array"),
new NestedDataDimensionSchema("nested")
)
).build(),
null,
null,
null
);
final SamplerResponse response = inputSourceSampler.sample(
inputSource,
new JsonInputFormat(null, null, null, null, null),
dataSchema,
null
);

Assert.assertEquals(6, response.getNumRowsRead());
Assert.assertEquals(5, response.getNumRowsIndexed());
Assert.assertEquals(6, response.getData().size());
Assert.assertEquals(
dataSchema.getDimensionsSpec().getDimensions(),
response.getLogicalDimensions()
);

Assert.assertEquals(
dataSchema.getDimensionsSpec().getDimensions(),
response.getPhysicalDimensions()
);
Assert.assertEquals(
RowSignature.builder()
.addTimeColumn()
.add("string", ColumnType.STRING)
.add("long", ColumnType.LONG)
.add("double", ColumnType.DOUBLE)
.add("bool", ColumnType.STRING)
.add("variant", ColumnType.NESTED_DATA)
.add("array", ColumnType.NESTED_DATA)
.add("nested", ColumnType.NESTED_DATA)
.build(),
response.getLogicalSegmentSchema()
);
}
}

0 comments on commit 887f8db

Please sign in to comment.