Skip to content

Commit

Permalink
Support generated column for yyyy-MM-dd date_format
Browse files Browse the repository at this point in the history
The generated column machinery supports partition pruning for `date_format` for `yyyy-MM` and `yyyy-MM-dd-HH` but missed `yyyy-MM-dd`. Add the latter to close the gap.

GitOrigin-RevId: aee7e7ecda7761327d9b35e777d3059b4ce6b613
  • Loading branch information
ryan-johnson-databricks authored and allisonport-db committed Feb 10, 2023
1 parent b38f404 commit 7e446fd
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,9 @@ object GeneratedColumn extends DeltaLogging with AnalysisHelper {
case DATE_FORMAT_YEAR_MONTH =>
createExpr(name)(
DateFormatPartitionExpr(partColName, DATE_FORMAT_YEAR_MONTH))
case DATE_FORMAT_YEAR_MONTH_DAY =>
createExpr(name)(
DateFormatPartitionExpr(partColName, DATE_FORMAT_YEAR_MONTH_DAY))
case DATE_FORMAT_YEAR_MONTH_DAY_HOUR =>
createExpr(name)(
DateFormatPartitionExpr(partColName, DATE_FORMAT_YEAR_MONTH_DAY_HOUR))
Expand Down Expand Up @@ -539,6 +542,7 @@ object GeneratedColumn extends DeltaLogging with AnalysisHelper {
}

private val DATE_FORMAT_YEAR_MONTH = "yyyy-MM"
private val DATE_FORMAT_YEAR_MONTH_DAY = "yyyy-MM-dd"
private val DATE_FORMAT_YEAR_MONTH_DAY_HOUR = "yyyy-MM-dd-HH"
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1079,6 +1079,42 @@ class OptimizeGeneratedColumnSuite extends GeneratedColumnTest {
)
)

testOptimizablePartitionExpression(
"eventTime TIMESTAMP",
"day STRING",
Map("day" -> "DATE_FORMAT(eventTime, 'yyyy-MM-dd')"),
expectedPartitionExpr = DateFormatPartitionExpr("day", "yyyy-MM-dd"),
auxiliaryTestName = Option(" from timestamp"),
filterTestCases = Seq(
"eventTime < '2021-06-28 18:00:00'" ->
Seq("((unix_timestamp(day, 'yyyy-MM-dd') <= unix_timestamp(" +
"date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) " +
"OR ((unix_timestamp(day, 'yyyy-MM-dd') <= unix_timestamp(" +
"date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) IS NULL))"),
"eventTime <= '2021-06-28 18:00:00'" ->
Seq("((unix_timestamp(day, 'yyyy-MM-dd') <= unix_timestamp(" +
"date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) " +
"OR ((unix_timestamp(day, 'yyyy-MM-dd') <= unix_timestamp(" +
"date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) IS NULL))"),
"eventTime = '2021-06-28 18:00:00'" ->
Seq("((unix_timestamp(day, 'yyyy-MM-dd') = unix_timestamp(" +
"date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) " +
"OR ((unix_timestamp(day, 'yyyy-MM-dd') = unix_timestamp(" +
"date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) IS NULL))"),
"eventTime > '2021-06-28 18:00:00'" ->
Seq("((unix_timestamp(day, 'yyyy-MM-dd') >= unix_timestamp(" +
"date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) " +
"OR ((unix_timestamp(day, 'yyyy-MM-dd') >= unix_timestamp(" +
"date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) IS NULL))"),
"eventTime >= '2021-06-28 18:00:00'" ->
Seq("((unix_timestamp(day, 'yyyy-MM-dd') >= unix_timestamp(" +
"date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) " +
"OR ((unix_timestamp(day, 'yyyy-MM-dd') >= unix_timestamp(" +
"date_format(TIMESTAMP '2021-06-28 18:00:00', 'yyyy-MM-dd'), 'yyyy-MM-dd')) IS NULL))"),
"eventTime is null" -> Seq("(day IS NULL)")
)
)

testOptimizablePartitionExpression(
"eventTime TIMESTAMP",
"hour STRING",
Expand Down

0 comments on commit 7e446fd

Please sign in to comment.