From 1b14d085abad4236b0148f60c8939101ac4d7c98 Mon Sep 17 00:00:00 2001 From: Eduardo Ruiz Date: Mon, 8 Nov 2021 13:29:52 +0100 Subject: [PATCH] feat: Date time functions #65 --- .../main/scala/doric/syntax/DateColumns.scala | 83 +++++++++++----- .../scala/doric/syntax/NumericColumns.scala | 47 ++++++++- .../scala/doric/syntax/StringColumns.scala | 26 +++++ .../scala/doric/syntax/TimestampColumns.scala | 69 ++++++++++--- .../main/scala/doric/syntax/WhenBuilder.scala | 2 +- .../scala/doric/syntax/DateColumnsSpec.scala | 24 ++++- .../doric/syntax/NumericOperationsSpec.scala | 98 +++++++++++++++++-- .../doric/syntax/StringColumnsSpec.scala | 32 ++++++ .../doric/syntax/TimestampColumnsSpec.scala | 60 ++++++++++-- 9 files changed, 381 insertions(+), 60 deletions(-) diff --git a/core/src/main/scala/doric/syntax/DateColumns.scala b/core/src/main/scala/doric/syntax/DateColumns.scala index cd626aff1..a9159057f 100644 --- a/core/src/main/scala/doric/syntax/DateColumns.scala +++ b/core/src/main/scala/doric/syntax/DateColumns.scala @@ -3,7 +3,7 @@ package syntax import cats.implicits._ import doric.types.{DateType, SparkType} -import org.apache.spark.sql.catalyst.expressions.{DateFormatClass, MonthsBetween, NextDay, TruncDate} +import org.apache.spark.sql.catalyst.expressions.{DateFormatClass, MonthsBetween, NextDay, TruncDate, TruncTimestamp, UnixTimestamp} import org.apache.spark.sql.{Column, functions => f} import java.sql.Date @@ -30,7 +30,8 @@ private[syntax] trait DateColumns { * the number of months to add, can be negative to subtract. * @return * Date column after adding months - * @note Timestamp columns will be truncated to Date column + * @note + * Timestamp columns will be truncated to Date column */ def addMonths(nMonths: IntegerColumn): DateColumn = (column.elem, nMonths.elem).mapN(f.add_months).toDC @@ -38,9 +39,11 @@ private[syntax] trait DateColumns { /** * Returns the date that is `days` days after date column * - * @param days A column of the number of days to add to date column, can be negative to subtract days + * @param days + * A column of the number of days to add to date column, can be negative to subtract days + * @note + * Timestamp columns will be truncated to Date column * @group Date & Timestamp Type - * @note Timestamp columns will be truncated to Date column */ def addDays(days: IntegerColumn): DateColumn = (column.elem, days.elem).mapN(f.date_add).toDC @@ -49,9 +52,11 @@ private[syntax] trait DateColumns { * Converts a date to a value of string in the format specified by the date * format given by the second argument. * - * @param format A pattern `dd.MM.yyyy` would return a string like `18.03.1993` - * @note Use specialized functions like 'year' whenever possible as they benefit from a - * specialized implementation. + * @param format + * A pattern `dd.MM.yyyy` would return a string like `18.03.1993` + * @note + * Use specialized functions like 'year' whenever possible as they benefit from a + * specialized implementation. * @group Date & Timestamp Type */ def format(format: StringColumn): StringColumn = @@ -64,10 +69,11 @@ private[syntax] trait DateColumns { /** * Returns the date that is `days` days before date column * - * @param days A column of the number of days to subtract from date column, can be negative to add - * days + * @param days + * A column of the number of days to subtract from date column, can be negative to add days + * @note + * Timestamp columns will be truncated to Date column * @group Date & Timestamp Type - * @note Timestamp columns will be truncated to Date column */ def subDays(days: IntegerColumn): DateColumn = (column.elem, days.elem).mapN(f.date_sub).toDC @@ -75,7 +81,8 @@ private[syntax] trait DateColumns { /** * Returns the number of days from date column to `dateCol`. * - * @param dateCol A Date or Timestamp column + * @param dateCol + * A Date or Timestamp column * @group Date & Timestamp Type */ def diff(dateCol: DoricColumn[T]): IntegerColumn = @@ -141,7 +148,8 @@ private[syntax] trait DateColumns { * Timestamp("2017-06-01 00:00:00").monthsBetween(Timestamp("2017-06-16 12:00:00")) // returns -0.5 * }}} * - * @param dateCol Date or Timestamp column + * @param dateCol + * Date or Timestamp column * @group Date & Timestamp Type */ def monthsBetween(dateCol: DoricColumn[T]): DoubleColumn = @@ -150,9 +158,11 @@ private[syntax] trait DateColumns { /** * Returns number of months between dates `dateCol` and date column. * - * @param dateCol Date or Timestamp column - * @param roundOff If `roundOff` is set to true, the result is rounded off to 8 digits; - * it is not rounded otherwise. + * @param dateCol + * Date or Timestamp column + * @param roundOff + * If `roundOff` is set to true, the result is rounded off to 8 digits; + * it is not rounded otherwise. * @group Date & Timestamp Type */ def monthsBetween( @@ -172,9 +182,11 @@ private[syntax] trait DateColumns { * For example, `Date("2015-07-27").nextDay("Sunday")` returns Date("2015-08-02") because * that is the first Sunday after 2015-07-27. * - * @param dayOfWeek Case insensitive, and accepts: "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" + * @param dayOfWeek + * Case insensitive, and accepts: "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" + * @note + * Timestamp columns will be truncated to Date column * @group Date & Timestamp Type - * @note Timestamp columns will be truncated to Date column */ def nextDay(dayOfWeek: StringColumn): DateColumn = (column.elem, dayOfWeek.elem) @@ -195,19 +207,44 @@ private[syntax] trait DateColumns { * * For example, `Timestamp("2018-11-19 12:01:19").trunc("year")` returns Date("2018-01-01") * - * @param format 'year', 'yyyy', 'yy' to truncate by year, - * or 'month', 'mon', 'mm' to truncate by month - * Other options are: 'week', 'quarter' + * @param format + * if date: + * * 'year', 'yyyy', 'yy' to truncate by year, + * * 'month', 'mon', 'mm' to truncate by month + * Other options are: 'week', 'quarter' + * if timestamp: + * * 'year', 'yyyy', 'yy' to truncate by year, + * * 'month', 'mon', 'mm' to truncate by month, + * * 'day', 'dd' to truncate by day, + * Other options are: + * * 'microsecond', 'millisecond', 'second', 'minute', 'hour', 'week', 'quarter' + * @note + * Timestamp columns will be truncated to Date column * @group Date & Timestamp Type - * @note Timestamp columns will be truncated to Date column */ - def trunc(format: StringColumn): DateColumn = + def truncate(format: StringColumn): DoricColumn[T] = (column.elem, format.elem) .mapN((c, fmt) => { - new Column(TruncDate(c.expr, fmt.expr)) + new Column(SparkType[T].dataType match { + case org.apache.spark.sql.types.DateType => + TruncDate(c.expr, fmt.expr) + case org.apache.spark.sql.types.TimestampType => + TruncTimestamp(fmt.expr, c.expr) + }) }) .toDC + /** + * Converts date/timestamp to Unix timestamp (in seconds), + * using the default timezone and the default locale. + * + * @return + * A long + * + * @group Date & Timestamp Type + */ + def unixTimestamp: LongColumn = column.elem.map(f.unix_timestamp).toDC + /** * Extracts the week number as an integer from a given date. * diff --git a/core/src/main/scala/doric/syntax/NumericColumns.scala b/core/src/main/scala/doric/syntax/NumericColumns.scala index 500bada33..36f11f65a 100644 --- a/core/src/main/scala/doric/syntax/NumericColumns.scala +++ b/core/src/main/scala/doric/syntax/NumericColumns.scala @@ -5,10 +5,15 @@ import cats.implicits.catsSyntaxTuple2Semigroupal import doric.DoricColumn.sparkFunction import doric.types.NumericType import org.apache.spark.sql.Column -import org.apache.spark.sql.catalyst.expressions.FormatNumber +import org.apache.spark.sql.{functions => f} +import org.apache.spark.sql.catalyst.expressions.{FormatNumber, FromUnixTime} private[syntax] trait NumericColumns { + def unixTimestamp(): LongColumn = { + DoricColumn(f.unix_timestamp()) + } + implicit class NumericOperationsSyntax[T: NumericType]( column: DoricColumn[T] ) { @@ -83,6 +88,46 @@ private[syntax] trait NumericColumns { }) .toDC + /** + * Creates timestamp from the number of seconds since UTC epoch. + * + * @group Numeric Type + */ + def timestampSeconds: TimestampColumn = + column.elem.map(f.timestamp_seconds).toDC + + } + + implicit class LongOperationsSyntax( + column: LongColumn + ) { + + /** + * Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string + * representing the timestamp of that moment in the current system time zone in the + * yyyy-MM-dd HH:mm:ss format. + * + * @group Numeric Type + */ + def fromUnixTime: StringColumn = column.elem.map(f.from_unixtime).toDC + + /** + * Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string + * representing the timestamp of that moment in the current system time zone in the given + * format. + * + * @note + * An IllegalArgumentException will be thrown if invalid pattern + * + * @group Numeric Type + */ + def fromUnixTime(format: StringColumn): StringColumn = + (column.elem, format.elem) + .mapN((c, f) => { + new Column(FromUnixTime(c.expr, f.expr)) + }) + .toDC + } } diff --git a/core/src/main/scala/doric/syntax/StringColumns.scala b/core/src/main/scala/doric/syntax/StringColumns.scala index 5ed4b307c..69c133772 100644 --- a/core/src/main/scala/doric/syntax/StringColumns.scala +++ b/core/src/main/scala/doric/syntax/StringColumns.scala @@ -457,6 +457,32 @@ private[syntax] trait StringColumns { */ def unbase64: BinaryColumn = s.elem.map(f.unbase64).toDC + /** + * Converts date/timestamp to Unix timestamp (in seconds), + * using the default timezone and the default locale. + * + * @return + * A long + * + * @group String Type + */ + def unixTimestamp: LongColumn = s.elem.map(f.unix_timestamp).toDC + + /** + * Converts date/timestamp with given pattern to Unix timestamp (in seconds). + * + * @return + * A long, or null if the input was a string not of the correct format + * + * @group String Type + */ + def unixTimestamp(pattern: StringColumn): LongColumn = + (s.elem, pattern.elem) + .mapN((c, p) => { + new Column(UnixTimestamp(c.expr, p.expr)) + }) + .toDC + /** * ******************************************************** * DORIC FUNCTIONS diff --git a/core/src/main/scala/doric/syntax/TimestampColumns.scala b/core/src/main/scala/doric/syntax/TimestampColumns.scala index 9f2b76a1b..dd03f4e5a 100644 --- a/core/src/main/scala/doric/syntax/TimestampColumns.scala +++ b/core/src/main/scala/doric/syntax/TimestampColumns.scala @@ -1,8 +1,10 @@ package doric package syntax +import cats.implicits.catsSyntaxTuple2Semigroupal import doric.types.TimestampType -import org.apache.spark.sql.{functions => f} +import org.apache.spark.sql.catalyst.expressions.{FromUTCTimestamp, ToUTCTimestamp} +import org.apache.spark.sql.{Column, functions => f} import java.sql.Timestamp @@ -21,6 +23,34 @@ private[syntax] trait TimestampColumns { column: DoricColumn[T] ) { + /** + * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in UTC, and renders + * that time as a timestamp in the given time zone. For example, 'GMT+1' would yield + * '2017-07-14 03:40:00.0'. + * + * @group Timestamp Type + */ + def fromUtc(timeZone: StringColumn): TimestampColumn = + (column.elem, timeZone.elem) + .mapN((c, tz) => { + new Column(FromUTCTimestamp(c.expr, tz.expr)) + }) + .toDC + + /** + * Given a timestamp like '2017-07-14 02:40:00.0', interprets it as a time in the given time + * zone, and renders that time as a timestamp in UTC. For example, 'GMT+1' would yield + * '2017-07-14 01:40:00.0'. + * + * @group Timestamp Type + */ + def toUtc(timeZone: StringColumn): TimestampColumn = + (column.elem, timeZone.elem) + .mapN((c, tz) => { + new Column(ToUTCTimestamp(c.expr, tz.expr)) + }) + .toDC + /** * Extracts the seconds as an integer from a given timestamp. * @@ -44,21 +74,24 @@ private[syntax] trait TimestampColumns { * Generates tumbling time windows given a timestamp specifying column. Window * starts are inclusive but the window ends are exclusive. * - * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`, - * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for - * valid duration identifiers. Note that the duration is a fixed length of - * time, and does not vary over time according to a calendar. For example, - * `1 day` always means 86,400,000 milliseconds, not a calendar day. - * @param slideDuration A string specifying the sliding interval of the window, e.g. `1 minute`. - * A new window will be generated every `slideDuration`. Must be less than - * or equal to the `windowDuration`. Check - * `org.apache.spark.unsafe.types.CalendarInterval` for valid duration - * identifiers. This duration is likewise absolute, and does not vary - * according to a calendar. - * @param startTime The offset with respect to 1970-01-01 00:00:00 UTC with which to start - * window intervals. For example, in order to have hourly tumbling windows that - * start 15 minutes past the hour, e.g. 12:15-13:15, 13:15-14:15... provide - * `startTime` as `15 minutes`. + * @param windowDuration + * A string specifying the width of the window, e.g. `10 minutes`, + * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for + * valid duration identifiers. Note that the duration is a fixed length of + * time, and does not vary over time according to a calendar. For example, + * `1 day` always means 86,400,000 milliseconds, not a calendar day. + * @param slideDuration + * A string specifying the sliding interval of the window, e.g. `1 minute`. + * A new window will be generated every `slideDuration`. Must be less than + * or equal to the `windowDuration`. Check + * `org.apache.spark.unsafe.types.CalendarInterval` for valid duration + * identifiers. This duration is likewise absolute, and does not vary + * according to a calendar. + * @param startTime + * The offset with respect to 1970-01-01 00:00:00 UTC with which to start + * window intervals. For example, in order to have hourly tumbling windows that + * start 15 minutes past the hour, e.g. 12:15-13:15, 13:15-14:15... provide + * `startTime` as `15 minutes`. * @group Timestamp Type */ def window( @@ -71,6 +104,8 @@ private[syntax] trait TimestampColumns { .toDC /** + * Safe casting to Date column + * * @group Timestamp Type * @return * a Date Column without the hour @@ -78,6 +113,8 @@ private[syntax] trait TimestampColumns { def toDate: DateColumn = column.elem.map(f.to_date).toDC /** + * Safe casting to LocalDate column + * * @group Timestamp Type * @return * a LocalDate Column without the hour diff --git a/core/src/main/scala/doric/syntax/WhenBuilder.scala b/core/src/main/scala/doric/syntax/WhenBuilder.scala index 32f7675b9..3a49dad69 100644 --- a/core/src/main/scala/doric/syntax/WhenBuilder.scala +++ b/core/src/main/scala/doric/syntax/WhenBuilder.scala @@ -30,7 +30,7 @@ final private[doric] case class WhenBuilder[T]( /** * ads a case that if the condition is matched, the value is returned * @param cond - * BooleanColumn with the condition to satify + * BooleanColumn with the condition to satisfy * @param elem * the returned element if the condition is true * @return diff --git a/core/src/test/scala/doric/syntax/DateColumnsSpec.scala b/core/src/test/scala/doric/syntax/DateColumnsSpec.scala index d81ebf59e..82c14f4c5 100644 --- a/core/src/test/scala/doric/syntax/DateColumnsSpec.scala +++ b/core/src/test/scala/doric/syntax/DateColumnsSpec.scala @@ -356,28 +356,42 @@ class DateColumnsSpec } } - describe("trunc doric function with literal") { + describe("truncate doric function with literal") { import spark.implicits._ val df = List(Date.valueOf("2021-10-05"), null).toDF("dateCol") - it("should work as spark date_format function with literal") { + it("should work as spark trunc function with dates") { df.testColumns2("dateCol", "yyyy")( - (d, m) => colDate(d).trunc(m.lit), + (d, m) => colDate(d).truncate(m.lit), (d, m) => f.trunc(f.col(d), m), List(Date.valueOf("2021-01-01"), null).map(Option(_)) ) } it("should return null if malformed format") { - df.testColumns2("dateCol", "yabcd")( - (d, m) => colDate(d).trunc(m.lit), + df.testColumns2("dateCol", "second")( + (d, m) => colDate(d).truncate(m.lit), (d, m) => f.trunc(f.col(d), m), List(null, null).map(Option(_)) ) } } + describe("date unixTimestamp doric function") { + import spark.implicits._ + + val df = List(Date.valueOf("2021-10-05"), null).toDF("dateCol") + + it("should work as spark unix_timestamp function") { + df.testColumns("dateCol")( + d => colDate(d).unixTimestamp, + d => f.unix_timestamp(f.col(d)), + List(Some(1633392000L), None) + ) + } + } + describe("weekOfYear doric function") { import spark.implicits._ diff --git a/core/src/test/scala/doric/syntax/NumericOperationsSpec.scala b/core/src/test/scala/doric/syntax/NumericOperationsSpec.scala index 33e665b2d..ba8df8de1 100644 --- a/core/src/test/scala/doric/syntax/NumericOperationsSpec.scala +++ b/core/src/test/scala/doric/syntax/NumericOperationsSpec.scala @@ -1,17 +1,17 @@ package doric package syntax -import scala.reflect.{classTag, ClassTag} - +import scala.reflect.{ClassTag, classTag} import doric.types.{NumericType, SparkType} import org.scalatest.funspec.AnyFunSpecLike - import org.apache.spark.sql.{functions => f} import org.apache.spark.sql.DataFrame -trait NumericOperationsSpec extends AnyFunSpecLike with TypedColumnTest { +import java.sql.Timestamp - import doric.implicitConversions.stringCname +import doric.implicitConversions.stringCname + +trait NumericOperationsSpec extends AnyFunSpecLike with TypedColumnTest { def df: DataFrame @@ -88,10 +88,96 @@ class NumericSpec extends NumericOperationsSpec with SparkSessionTestWrapper { .toDF("col1") df.testColumns2("col1", 1)( - (c, d) => colDouble(c.cname).formatNumber(d.lit), + (c, d) => colDouble(c).formatNumber(d.lit), (c, d) => f.format_number(f.col(c), d), List(Some("123.6"), Some("1.0"), None) ) } } + + describe("timestampSeconds doric function") { + import spark.implicits._ + + it("should work as spark timestamp_seconds function with integers") { + val df = List(Some(123), Some(1), None) + .toDF("col1") + + df.testColumns("col1")( + c => colInt(c).timestampSeconds, + c => f.timestamp_seconds(f.col(c)), + List( + Some(Timestamp.valueOf("1970-01-01 00:02:03")), + Some(Timestamp.valueOf("1970-01-01 00:00:01")), + None + ) + ) + } + + it("should work as spark timestamp_seconds function with longs") { + val df = List(Some(123L), Some(1L), None) + .toDF("col1") + + df.testColumns("col1")( + c => colLong(c).timestampSeconds, + c => f.timestamp_seconds(f.col(c)), + List( + Some(Timestamp.valueOf("1970-01-01 00:02:03")), + Some(Timestamp.valueOf("1970-01-01 00:00:01")), + None + ) + ) + } + + it("should work as spark timestamp_seconds function with doubles") { + val df = List(Some(123.2), Some(1.9), None) + .toDF("col1") + + df.testColumns("col1")( + c => colDouble(c).timestampSeconds, + c => f.timestamp_seconds(f.col(c)), + List( + Some(Timestamp.valueOf("1970-01-01 00:02:03.2")), + Some(Timestamp.valueOf("1970-01-01 00:00:01.9")), + None + ) + ) + } + } + + describe("fromUnixTime doric function") { + import spark.implicits._ + + it("should work as spark format_number function") { + val df = List(Some(123L), Some(1L), None) + .toDF("col1") + + df.testColumns("col1")( + c => colLong(c).fromUnixTime, + c => f.from_unixtime(f.col(c)), + List(Some("1970-01-01 00:02:03"), Some("1970-01-01 00:00:01"), None) + ) + } + + it("should work as spark format_number(pattern) function") { + val df = List(Some(123L), Some(1L), None) + .toDF("col1") + + df.testColumns2("col1", "yyyy-MM-dd h:m:s")( + (c, p) => colLong(c).fromUnixTime(p.lit), + (c, p) => f.from_unixtime(f.col(c), p), + List(Some("1970-01-01 12:2:3"), Some("1970-01-01 12:0:1"), None) + ) + } + + it("should fail if wrong pattern is given") { + val df = List(Some(123L), Some(1L), None) + .toDF("col1") + + intercept[IllegalArgumentException]( + df.select(colLong("col1").fromUnixTime("wrong pattern".lit)) + .collect() + ) + } + } + } diff --git a/core/src/test/scala/doric/syntax/StringColumnsSpec.scala b/core/src/test/scala/doric/syntax/StringColumnsSpec.scala index 707d1d621..9599e5d21 100644 --- a/core/src/test/scala/doric/syntax/StringColumnsSpec.scala +++ b/core/src/test/scala/doric/syntax/StringColumnsSpec.scala @@ -874,4 +874,36 @@ class StringColumnsSpec } } + describe("string unixTimestamp doric function") { + import spark.implicits._ + + val df = List("2021-10-05", "20211005", null).toDF("dateCol") + + it("should work as spark unix_timestamp function") { + val dfString = List("2021-10-05 01:02:03", "20211005", null) + .toDF("dateCol") + + dfString.testColumns("dateCol")( + d => colString(d).unixTimestamp, + d => f.unix_timestamp(f.col(d)), + List(Some(1633395723L), None, None) + ) + } + + it("should work as spark unix_timestamp(pattern) function") { + df.testColumns2("dateCol", "yyyy-mm-dd")( + (d, m) => colString(d).unixTimestamp(m.lit), + (d, m) => f.unix_timestamp(f.col(d), m), + List(Some(1609805400L), None, None) + ) + } + + it("should fail if malformed format") { + intercept[IllegalArgumentException]( + df.select(colString("dateCol").unixTimestamp("yabcd".lit)) + .collect() + ) + } + } + } diff --git a/core/src/test/scala/doric/syntax/TimestampColumnsSpec.scala b/core/src/test/scala/doric/syntax/TimestampColumnsSpec.scala index 964eb5139..5e97da75b 100644 --- a/core/src/test/scala/doric/syntax/TimestampColumnsSpec.scala +++ b/core/src/test/scala/doric/syntax/TimestampColumnsSpec.scala @@ -53,6 +53,50 @@ class TimestampColumnsSpec } } + describe("fromUtc doric function") { + import spark.implicits._ + + val df = List(Timestamp.valueOf("2017-07-14 02:40:00"), null) + .toDF("timestampCol") + + it("should work as spark from_utc_timestamp function") { + df.testColumns2("timestampCol", "GMT+1")( + (d, m) => colTimestamp(d).fromUtc(m.lit), + (d, m) => f.from_utc_timestamp(f.col(d), m), + List(Timestamp.valueOf("2017-07-14 03:40:00"), null).map(Option(_)) + ) + } + + it("should fail if invalid timeZone") { + intercept[java.time.DateTimeException]( + df.select(colTimestamp("timestampCol").fromUtc("wrong timeZone".lit)) + .collect() + ) + } + } + + describe("toUtc doric function") { + import spark.implicits._ + + val df = List(Timestamp.valueOf("2017-07-14 02:40:00"), null) + .toDF("timestampCol") + + it("should work as spark to_utc_timestamp function") { + df.testColumns2("timestampCol", "GMT+1")( + (d, m) => colTimestamp(d).toUtc(m.lit), + (d, m) => f.to_utc_timestamp(f.col(d), m), + List(Timestamp.valueOf("2017-07-14 01:40:00"), null).map(Option(_)) + ) + } + + it("should fail if invalid timeZone") { + intercept[java.time.DateTimeException]( + df.select(colTimestamp("timestampCol").toUtc("wrong timeZone".lit)) + .collect() + ) + } + } + describe("addMonths doric function with column") { import spark.implicits._ @@ -384,21 +428,21 @@ class TimestampColumnsSpec describe("trunc doric function with literal") { import spark.implicits._ - val df = - List(Timestamp.valueOf("2021-10-05 00:00:00"), null).toDF("timestampCol") + val df = List(Timestamp.valueOf("2021-10-05 00:00:00"), null) + .toDF("timestampCol") - it("should work as spark trunc function with literal") { + it("should work as spark date_trunc function with literal") { df.testColumns2("timestampCol", "yyyy")( - (d, m) => colTimestamp(d).trunc(m.lit), - (d, m) => f.trunc(f.col(d), m), - List(Date.valueOf("2021-01-01"), null).map(Option(_)) + (d, m) => colTimestamp(d).truncate(m.lit), + (d, m) => f.date_trunc(m, f.col(d)), + List(Timestamp.valueOf("2021-01-01 00:00:00"), null).map(Option(_)) ) } it("should return null if malformed format") { df.testColumns2("timestampCol", "yabcd")( - (d, m) => colTimestamp(d).trunc(m.lit), - (d, m) => f.trunc(f.col(d), m), + (d, m) => colTimestamp(d).truncate(m.lit), + (d, m) => f.date_trunc(m, f.col(d)), List(null, null).map(Option(_)) ) }