From 41e2798b1f31c08c09745132e09dc9219678433f Mon Sep 17 00:00:00 2001 From: Marios Trivyzas Date: Mon, 25 Nov 2019 18:45:47 +0100 Subject: [PATCH] SQL: Fix issue with GROUP BY YEAR() Grouping By YEAR() is translated to a histogram aggregation, but previously if there was a scalar function invloved (e.g.: `YEAR(date + INTERVAL 2 YEARS)`), there was no proper script created and the histogram was applied on a field with name: `date + INTERVAL 2 YEARS` which doesn't make sense, and resulted in null result. Check the underlying field of YEAR() and if it's a function call `asScript()` to properly get the painless script on which the histogram is applied. Fixes: #49386 --- .../sql/qa/src/main/resources/agg.csv-spec | 23 +++++++++++++++++++ .../xpack/sql/planner/QueryTranslator.java | 18 +++++++++++---- .../sql/planner/QueryTranslatorTests.java | 16 +++++++++++++ 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec b/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec index 19ee3d260b83c..182b6c2c76f39 100644 --- a/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec +++ b/x-pack/plugin/sql/qa/src/main/resources/agg.csv-spec @@ -534,6 +534,29 @@ SELECT HISTOGRAM(YEAR(birth_date), 2) AS h, COUNT(*) as c FROM test_emp GROUP BY null |10 ; +histogramYearOnDateTimeWithScalars +schema::year:i|c:l +SELECT YEAR(CAST(birth_date + INTERVAL 5 YEARS AS DATE) + INTERVAL 20 MONTHS) AS year, COUNT(*) as c FROM test_emp GROUP BY 1; + + year | c +---------------+--------------- +null |10 +1958 |2 +1959 |12 +1960 |7 +1961 |7 +1962 |4 +1963 |5 +1964 |5 +1965 |7 +1966 |9 +1967 |7 +1968 |7 +1969 |8 +1970 |6 +1971 |4 +; + histogramNumericWithExpression schema::h:i|c:l SELECT HISTOGRAM(emp_no % 100, 10) AS h, COUNT(*) as c FROM test_emp GROUP BY h ORDER BY h DESC; diff --git a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryTranslator.java b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryTranslator.java index 6614d98cffbba..4fbcc76ff8220 100644 --- a/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryTranslator.java +++ b/x-pack/plugin/sql/src/main/java/org/elasticsearch/xpack/sql/planner/QueryTranslator.java @@ -283,10 +283,20 @@ static GroupingContext groupBy(List groupings) { // dates are handled differently because of date histograms if (exp instanceof DateTimeHistogramFunction) { DateTimeHistogramFunction dthf = (DateTimeHistogramFunction) exp; - if (dthf.calendarInterval() != null) { - key = new GroupByDateHistogram(aggId, nameOf(exp), dthf.calendarInterval(), dthf.zoneId()); - } else { - key = new GroupByDateHistogram(aggId, nameOf(exp), dthf.fixedInterval(), dthf.zoneId()); + Expression field = dthf.field(); + if (field instanceof FieldAttribute) { + if (dthf.calendarInterval() != null) { + key = new GroupByDateHistogram(aggId, nameOf(field), dthf.calendarInterval(), dthf.zoneId()); + } else { + key = new GroupByDateHistogram(aggId, nameOf(field), dthf.fixedInterval(), dthf.zoneId()); + } + } else if (field instanceof Function) { + ScriptTemplate script = ((Function) field).asScript(); + if (dthf.calendarInterval() != null) { + key = new GroupByDateHistogram(aggId, script, dthf.calendarInterval(), dthf.zoneId()); + } else { + key = new GroupByDateHistogram(aggId, script, dthf.fixedInterval(), dthf.zoneId()); + } } } // all other scalar functions become a script diff --git a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java index 2b03810b57982..36722e6e1d0f5 100644 --- a/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java +++ b/x-pack/plugin/sql/src/test/java/org/elasticsearch/xpack/sql/planner/QueryTranslatorTests.java @@ -999,6 +999,22 @@ public void testGroupByYearQueryTranslator() { + "\"calendar_interval\":\"1y\",\"time_zone\":\"Z\"}}}]}}}")); } + public void testGroupByYearAndScalarsQueryTranslator() { + PhysicalPlan p = optimizeAndPlan("SELECT YEAR(CAST(date + INTERVAL 5 months AS DATE)) FROM test GROUP BY 1"); + assertEquals(EsQueryExec.class, p.getClass()); + EsQueryExec eqe = (EsQueryExec) p; + assertEquals(1, eqe.output().size()); + assertEquals("YEAR(CAST(date + INTERVAL 5 months AS DATE))", eqe.output().get(0).qualifiedName()); + assertEquals(DataType.INTEGER, eqe.output().get(0).dataType()); + assertThat(eqe.queryContainer().aggs().asAggBuilder().toString().replaceAll("\\s+", ""), + endsWith("\"date_histogram\":{\"script\":{\"source\":\"InternalSqlScriptUtils.cast(" + + "InternalSqlScriptUtils.add(InternalSqlScriptUtils.docValue(doc,params.v0)," + + "InternalSqlScriptUtils.intervalYearMonth(params.v1,params.v2)),params.v3)\"," + + "\"lang\":\"painless\",\"params\":{\"v0\":\"date\",\"v1\":\"P5M\",\"v2\":\"INTERVAL_MONTH\"," + + "\"v3\":\"DATE\"}},\"missing_bucket\":true,\"value_type\":\"long\",\"order\":\"asc\"," + + "\"calendar_interval\":\"1y\",\"time_zone\":\"Z\"}}}]}}}")); + } + public void testGroupByHistogramWithDate() { LogicalPlan p = plan("SELECT MAX(int) FROM test GROUP BY HISTOGRAM(CAST(date AS DATE), INTERVAL 2 MONTHS)"); assertTrue(p instanceof Aggregate);