From 273f5d62ca5a74463b4c37e357706b5962e5aa2e Mon Sep 17 00:00:00 2001 From: Eduardo Ruiz Alonso Date: Fri, 19 Nov 2021 12:31:02 +0100 Subject: [PATCH] #71 feat: add concat_ws (#110), sbt-api-mappings & publish temp site * feat: add concatWs function (resolves #71) * feat: add sbt-api-mappings * feat: add publish temp site Co-authored-by: Eduardo Ruiz --- .github/workflows/publishTempSite.yml | 39 +++++++++++++++++++ .github/workflows/release.yml | 4 +- .../main/scala/doric/syntax/DateColumns.scala | 2 +- .../scala/doric/syntax/StringColumns.scala | 30 ++++++++++++++ .../doric/syntax/StringColumnsSpec.scala | 15 +++++++ project/plugins.sbt | 1 + 6 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/publishTempSite.yml diff --git a/.github/workflows/publishTempSite.yml b/.github/workflows/publishTempSite.yml new file mode 100644 index 000000000..5bdd79443 --- /dev/null +++ b/.github/workflows/publishTempSite.yml @@ -0,0 +1,39 @@ +name: Publish temp site +on: + push: + branches-ignore: + - main + - gh-pages + +jobs: + publish: + runs-on: ubuntu-20.04 + steps: + - uses: actions/checkout@v2 + with: + fetch-depth: 0 + - uses: olafurpg/setup-scala@v13 + - name: Coursier cache + uses: coursier/cache-action@v6 + - name: Build the microsite + run: | + sbt docs/mdoc + - name: Build the scaladocs + run: | + sbt core/doc + - name: Copy site and docs to gh-pages branch + run: | + mkdir -p docs/target/mdoc/docs/api/latest + cp -ar core/target/scala-2.12/api/* docs/target/mdoc/docs/api/latest/ + - name: Deploy 🚀 + uses: JamesIves/github-pages-deploy-action@4.1.6 + with: + branch: gh-pages # The branch the action should deploy to. + folder: docs/target/mdoc # The folder the action should deploy. + - name: Cleaning for cache + run: | + rm -rf "$HOME/.ivy2/local" || true + find $HOME/Library/Caches/Coursier/v1 -name "ivydata-*.properties" -delete || true + find $HOME/.ivy2/cache -name "ivydata-*.properties" -delete || true + find $HOME/.cache/coursier/v1 -name "ivydata-*.properties" -delete || true + find $HOME/.sbt -name "*.lock" -delete || true diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index b19000fde..13e6b92a0 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -33,12 +33,12 @@ jobs: - name: Build the scaladocs run: | sbt core/doc - - name: Copy sit and docs to gh-pages branch + - name: Copy site and docs to gh-pages branch run: | mkdir -p docs/target/mdoc/docs/api/latest cp -ar core/target/scala-2.12/api/* docs/target/mdoc/docs/api/latest/ - name: Deploy 🚀 - uses: JamesIves/github-pages-deploy-action@4.1.5 + uses: JamesIves/github-pages-deploy-action@4.1.6 with: branch: gh-pages # The branch the action should deploy to. folder: docs/target/mdoc # The folder the action should deploy. diff --git a/core/src/main/scala/doric/syntax/DateColumns.scala b/core/src/main/scala/doric/syntax/DateColumns.scala index a9159057f..d880ea311 100644 --- a/core/src/main/scala/doric/syntax/DateColumns.scala +++ b/core/src/main/scala/doric/syntax/DateColumns.scala @@ -3,7 +3,7 @@ package syntax import cats.implicits._ import doric.types.{DateType, SparkType} -import org.apache.spark.sql.catalyst.expressions.{DateFormatClass, MonthsBetween, NextDay, TruncDate, TruncTimestamp, UnixTimestamp} +import org.apache.spark.sql.catalyst.expressions.{DateFormatClass, MonthsBetween, NextDay, TruncDate, TruncTimestamp} import org.apache.spark.sql.{Column, functions => f} import java.sql.Date diff --git a/core/src/main/scala/doric/syntax/StringColumns.scala b/core/src/main/scala/doric/syntax/StringColumns.scala index 69c133772..04e380cbf 100644 --- a/core/src/main/scala/doric/syntax/StringColumns.scala +++ b/core/src/main/scala/doric/syntax/StringColumns.scala @@ -21,6 +21,36 @@ private[syntax] trait StringColumns { def concat(cols: StringColumn*): StringColumn = cols.map(_.elem).toList.sequence.map(f.concat(_: _*)).toDC + /** + * Concatenates multiple input string columns together into a single string column, + * using the given separator. + * + * @note even if `cols` contain null columns, it prints remaining string columns (or empty string). + * @example {{{ + * df.withColumn("res", concatWs("-".lit, col("col1"), col("col2"))) + * .show(false) + * +----+----+----+ + * |col1|col2| res| + * +----+----+----+ + * | 1| 1| 1-1| + * |null| 2| 2| + * | 3|null| 3| + * |null|null| | + * +----+----+----+ + * }}} + * @group String Type + * @see [[org.apache.spark.sql.functions.concat_ws]] + */ + def concatWs(sep: StringColumn, cols: StringColumn*): StringColumn = + (sep +: cols) + .map(_.elem) + .toList + .sequence + .map(l => { + new Column(ConcatWs(l.map(_.expr))) + }) + .toDC + /** * Formats the arguments in printf-style and returns the result as a string * column. diff --git a/core/src/test/scala/doric/syntax/StringColumnsSpec.scala b/core/src/test/scala/doric/syntax/StringColumnsSpec.scala index 9599e5d21..275b449b9 100644 --- a/core/src/test/scala/doric/syntax/StringColumnsSpec.scala +++ b/core/src/test/scala/doric/syntax/StringColumnsSpec.scala @@ -41,6 +41,21 @@ class StringColumnsSpec } } + describe("concatWs doric function") { + import spark.implicits._ + + val df = List(("1", "1"), (null, "2"), ("3", null), (null, null)) + .toDF("col1", "col2") + + it("should work as spark concat_ws function") { + df.testColumns3("-", "col1", "col2")( + (sep, col1, col2) => concatWs(sep.lit, col(col1), col(col2)), + (sep, col1, col2) => f.concat_ws(sep, f.col(col1), f.col(col2)), + List("1-1", "2", "3", "").map(Option(_)) + ) + } + } + describe("formatString doric function") { import spark.implicits._ diff --git a/project/plugins.sbt b/project/plugins.sbt index 640d78ad0..feb9ac04f 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -4,3 +4,4 @@ addSbtPlugin("com.github.sbt" % "sbt-ci-release" % "1.5.10") addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.4") addSbtPlugin("org.scalameta" % "sbt-mdoc" % "2.2.24") addSbtPlugin("org.scoverage" % "sbt-scoverage" % "1.9.2") +addSbtPlugin("com.thoughtworks.sbt-api-mappings" % "sbt-api-mappings" % "3.0.0")