Skip to content

Commit

Permalink
SQL: Fix issue with GROUP BY YEAR()
Browse files Browse the repository at this point in the history
Grouping By YEAR() is translated to a histogram aggregation, but
previously if there was a scalar function invloved (e.g.:
`YEAR(date + INTERVAL 2 YEARS)`), there was no proper script created
and the histogram was applied on a field with name: `date + INTERVAL 2 YEARS`
which doesn't make sense, and resulted in null result.

Check the underlying field of YEAR() and if it's a function call
`asScript()` to properly get the painless script on which the histogram
is applied.

Fixes: elastic#49386
  • Loading branch information
matriv committed Nov 25, 2019
1 parent fc33ee4 commit 41e2798
Show file tree
Hide file tree
Showing 3 changed files with 53 additions and 4 deletions.
23 changes: 23 additions & 0 deletions x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec
Original file line number Diff line number Diff line change
Expand Up @@ -534,6 +534,29 @@ SELECT HISTOGRAM(YEAR(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY
null |10
;

histogramYearOnDateTimeWithScalars
schema::year:i|c:l
SELECT YEAR(CAST(birth_date + INTERVAL 5 YEARS AS DATE) + INTERVAL 20 MONTHS) AS year, COUNT(*) as c FROM test_emp GROUP BY 1;

year | c
---------------+---------------
null |10
1958 |2
1959 |12
1960 |7
1961 |7
1962 |4
1963 |5
1964 |5
1965 |7
1966 |9
1967 |7
1968 |7
1969 |8
1970 |6
1971 |4
;

histogramNumericWithExpression
schema::h:i|c:l
SELECT HISTOGRAM(emp_no % 100, 10) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -283,10 +283,20 @@ static GroupingContext groupBy(List<? extends Expression> groupings) {
// dates are handled differently because of date histograms
if (exp instanceof DateTimeHistogramFunction) {
DateTimeHistogramFunction dthf = (DateTimeHistogramFunction) exp;
if (dthf.calendarInterval() != null) {
key = new GroupByDateHistogram(aggId, nameOf(exp), dthf.calendarInterval(), dthf.zoneId());
} else {
key = new GroupByDateHistogram(aggId, nameOf(exp), dthf.fixedInterval(), dthf.zoneId());
Expression field = dthf.field();
if (field instanceof FieldAttribute) {
if (dthf.calendarInterval() != null) {
key = new GroupByDateHistogram(aggId, nameOf(field), dthf.calendarInterval(), dthf.zoneId());
} else {
key = new GroupByDateHistogram(aggId, nameOf(field), dthf.fixedInterval(), dthf.zoneId());
}
} else if (field instanceof Function) {
ScriptTemplate script = ((Function) field).asScript();
if (dthf.calendarInterval() != null) {
key = new GroupByDateHistogram(aggId, script, dthf.calendarInterval(), dthf.zoneId());
} else {
key = new GroupByDateHistogram(aggId, script, dthf.fixedInterval(), dthf.zoneId());
}
}
}
// all other scalar functions become a script
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -999,6 +999,22 @@ public void testGroupByYearQueryTranslator() {
+ "\"calendar_interval\":\"1y\",\"time_zone\":\"Z\"}}}]}}}"));
}

public void testGroupByYearAndScalarsQueryTranslator() {
PhysicalPlan p = optimizeAndPlan("SELECT YEAR(CAST(date + INTERVAL 5 months AS DATE)) FROM test GROUP BY 1");
assertEquals(EsQueryExec.class, p.getClass());
EsQueryExec eqe = (EsQueryExec) p;
assertEquals(1, eqe.output().size());
assertEquals("YEAR(CAST(date + INTERVAL 5 months AS DATE))", eqe.output().get(0).qualifiedName());
assertEquals(DataType.INTEGER, eqe.output().get(0).dataType());
assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""),
endsWith("\"date_histogram\":{\"script\":{\"source\":\"InternalSqlScriptUtils.cast(" +
"InternalSqlScriptUtils.add(InternalSqlScriptUtils.docValue(doc,params.v0)," +
"InternalSqlScriptUtils.intervalYearMonth(params.v1,params.v2)),params.v3)\"," +
"\"lang\":\"painless\",\"params\":{\"v0\":\"date\",\"v1\":\"P5M\",\"v2\":\"INTERVAL_MONTH\"," +
"\"v3\":\"DATE\"}},\"missing_bucket\":true,\"value_type\":\"long\",\"order\":\"asc\"," +
"\"calendar_interval\":\"1y\",\"time_zone\":\"Z\"}}}]}}}"));
}

public void testGroupByHistogramWithDate() {
LogicalPlan p = plan("SELECT MAX(int) FROM test GROUP BY HISTOGRAM(CAST(date AS DATE), INTERVAL 2 MONTHS)");
assertTrue(p instanceof Aggregate);
Expand Down

0 comments on commit 41e2798

Please sign in to comment.