Skip to content

Commit

Permalink
Update scalafmt-core to 3.0.0 (#77)
Browse files Browse the repository at this point in the history
* Update scalafmt-core to 3.0.0

* Reformat with scalafmt 3.0.0
  • Loading branch information
scala-steward authored Aug 19, 2021
1 parent 87874ee commit 1c4cb58
Show file tree
Hide file tree
Showing 22 changed files with 460 additions and 243 deletions.
2 changes: 1 addition & 1 deletion .scalafmt.conf
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
version = "2.7.5"
version = "3.0.0"
align.preset = more
maxColumn = 80
assumeStandardLibraryStripMargin = true
Expand Down
2 changes: 1 addition & 1 deletion build.sbt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ThisBuild / organization := "org.hablapps"
ThisBuild / homepage := Some(url("https://github.com/hablapps/doric"))
ThisBuild / homepage := Some(url("https://github.com/hablapps/doric"))
ThisBuild / licenses := List(
"Apache-2.0" -> url("https://www.apache.org/licenses/LICENSE-2.0")
)
Expand Down
4 changes: 2 additions & 2 deletions project/plugins.sbt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
logLevel := Level.Warn

addSbtPlugin("com.geirsson" % "sbt-ci-release" % "1.5.7")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.3")
addSbtPlugin("com.geirsson" % "sbt-ci-release" % "1.5.7")
addSbtPlugin("org.scalameta" % "sbt-scalafmt" % "2.4.3")
34 changes: 20 additions & 14 deletions src/main/scala/doric/sem/AggregationOps.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,23 +9,24 @@ trait AggregationOps extends RelationalGroupedDatasetDoricInterface {
implicit class DataframeAggSyntax(df: Dataset[_]) {

/**
* Groups the Dataset using the specified columns, so we can run aggregation on them. See
* Groups the Dataset using the specified columns, so we can run
* aggregation on them. See
*/
def groupBy(cols: DoricColumn[_]*): RelationalGroupedDataset = {
sparkGroupBy(df.toDF(), cols: _*).returnOrThrow("groupBy")
}

/**
* Create a multi-dimensional cube for the current Dataset using the specified columns,
* so we can run aggregation on them.
* Create a multi-dimensional cube for the current Dataset using the
* specified columns, so we can run aggregation on them.
*/
def cube(cols: DoricColumn[_]*): RelationalGroupedDataset = {
sparkCube(df.toDF(), cols: _*).returnOrThrow("cube")
}

/**
* Create a multi-dimensional rollup for the current Dataset using the specified columns,
* so we can run aggregation on them.
* Create a multi-dimensional rollup for the current Dataset using the
* specified columns, so we can run aggregation on them.
*/
def rollup(cols: DoricColumn[_]*): RelationalGroupedDataset = {
sparkRollup(df.toDF(), cols: _*).returnOrThrow("rollup")
Expand All @@ -35,21 +36,26 @@ trait AggregationOps extends RelationalGroupedDatasetDoricInterface {
implicit class RelationalGroupedDatasetSem(rel: RelationalGroupedDataset) {

/**
* Compute aggregates by specifying a series of aggregate columns. Note that this function by
* default retains the grouping columns in its output. To not retain grouping columns, set
* Compute aggregates by specifying a series of aggregate columns. Note
* that this function by default retains the grouping columns in its
* output. To not retain grouping columns, set
* `spark.sql.retainGroupColumns` to false.
*/
def agg(col: DoricColumn[_], cols: DoricColumn[_]*): DataFrame =
sparkAgg(rel, col, cols: _*).returnOrThrow("agg")

/**
* Pivots a column of the current `DataFrame` and performs the specified aggregation.
* There are two versions of pivot function: one that requires the caller to specify the list
* of distinct values to pivot on, and one that does not. The latter is more concise but less
* efficient, because Spark needs to first compute the list of distinct values internally.
* @param expr doric column to pivot
* @param values the values of the column to extract
* @tparam T The type of the column and parameters
* Pivots a column of the current `DataFrame` and performs the specified
* aggregation. There are two versions of pivot function: one that requires
* the caller to specify the list of distinct values to pivot on, and one
* that does not. The latter is more concise but less efficient, because
* Spark needs to first compute the list of distinct values internally.
* @param expr
* doric column to pivot
* @param values
* the values of the column to extract
* @tparam T
* The type of the column and parameters
*/
def pivot[T](expr: DoricColumn[T])(
values: Seq[T]
Expand Down
72 changes: 48 additions & 24 deletions src/main/scala/doric/sem/CollectOps.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,29 @@ trait CollectOps {

/**
* Collects the provided columns of the dataframe
* @param col1 the Doric column to collect from the dataframe
* @tparam T1 the type of the column to collect, must have an Spark `Encoder`
* @return The array of the selected column
* @param col1
* the Doric column to collect from the dataframe
* @tparam T1
* the type of the column to collect, must have an Spark `Encoder`
* @return
* The array of the selected column
*/
def collectCols[T1: Encoder](col1: DoricColumn[T1]): Array[T1] = {
df.select(col1).as[T1].collect()
}

/**
* Collects the provided columns of the dataframe
* @param col1 the Doric column to collect from the dataframe
* @param col2 other Doric column to collect from the dataframe
* @tparam T1 the type of the column to collect, must have an Spark `Encoder`
* @tparam T2 the type of the second column to collect, must have an Spark `Encoder`
* @return The array of the selected columns
* @param col1
* the Doric column to collect from the dataframe
* @param col2
* other Doric column to collect from the dataframe
* @tparam T1
* the type of the column to collect, must have an Spark `Encoder`
* @tparam T2
* the type of the second column to collect, must have an Spark `Encoder`
* @return
* The array of the selected columns
*/
def collectCols[T1, T2](
col1: DoricColumn[T1],
Expand All @@ -34,13 +42,20 @@ trait CollectOps {

/**
* Collects the provided columns of the dataframe
* @param col1 the Doric column to collect from the dataframe
* @param col2 second Doric column to collect from the dataframe
* @param col3 third Doric column to collect from the dataframe
* @tparam T1 the type of the column to collect, must have an Spark `Encoder`
* @tparam T2 the type of the second column to collect, must have an Spark `Encoder`
* @tparam T3 the type of the third column to collect, must have an Spark `Encoder`
* @return The array of the selected columns
* @param col1
* the Doric column to collect from the dataframe
* @param col2
* second Doric column to collect from the dataframe
* @param col3
* third Doric column to collect from the dataframe
* @tparam T1
* the type of the column to collect, must have an Spark `Encoder`
* @tparam T2
* the type of the second column to collect, must have an Spark `Encoder`
* @tparam T3
* the type of the third column to collect, must have an Spark `Encoder`
* @return
* The array of the selected columns
*/
def collectCols[T1, T2, T3](
col1: DoricColumn[T1],
Expand All @@ -52,15 +67,24 @@ trait CollectOps {

/**
* Collects the provided columns of the dataframe
* @param col1 the Doric column to collect from the dataframe
* @param col2 second Doric column to collect from the dataframe
* @param col3 third Doric column to collect from the dataframe
* @param col4 forth Doric column to collect from the dataframe
* @tparam T1 the type of the column to collect, must have an Spark `Encoder`
* @tparam T2 the type of the second column to collect, must have an Spark `Encoder`
* @tparam T3 the type of the third column to collect, must have an Spark `Encoder`
* @tparam T4 the type of the forth column to collect, must have an Spark `Encoder`
* @return The array of the selected columns
* @param col1
* the Doric column to collect from the dataframe
* @param col2
* second Doric column to collect from the dataframe
* @param col3
* third Doric column to collect from the dataframe
* @param col4
* forth Doric column to collect from the dataframe
* @tparam T1
* the type of the column to collect, must have an Spark `Encoder`
* @tparam T2
* the type of the second column to collect, must have an Spark `Encoder`
* @tparam T3
* the type of the third column to collect, must have an Spark `Encoder`
* @tparam T4
* the type of the forth column to collect, must have an Spark `Encoder`
* @return
* The array of the selected columns
*/
def collectCols[T1, T2, T3, T4](
col1: DoricColumn[T1],
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/doric/sem/Errors.scala
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ case class DoricMultiError(

implicit class StringOps(s: String) {
private val indentation = " "
def withTabs: String = indentation + s.replaceAll("\n", s"\n$indentation")
def withTabs: String = indentation + s.replaceAll("\n", s"\n$indentation")
}

implicit class JoinCases(errors: NonEmptyChain[DoricSingleError]) {
Expand Down
59 changes: 36 additions & 23 deletions src/main/scala/doric/sem/JoinOps.scala
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,25 @@ trait JoinOps {
implicit class DataframeJoinSyntax[A](df: Dataset[A]) {

/**
* Join with another `DataFrame`, using the given doric columns. The following performs
* a full outer join between `df1` and `df2` by the key `dfKey` that must be string type.
* Join with another `DataFrame`, using the given doric columns. The
* following performs a full outer join between `df1` and `df2` by the key
* `dfKey` that must be string type.
*
* {{{
* df1.join(df2,"outer", colString("dfKey"))
* }}}
*
* @param df2 Right side of the join.
* @param joinType Type of join to perform. Default `inner`. Must be one of:
* `inner`, `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`,
* `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`,
* `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, `left_anti`.
* @param col Doric column that must be in both dataframes.
* @param cols rest of doric columns that must be in both dataframes.
* @param df2
* Right side of the join.
* @param joinType
* Type of join to perform. Default `inner`. Must be one of: `inner`,
* `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`,
* `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`,
* `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, `left_anti`.
* @param col
* Doric column that must be in both dataframes.
* @param cols
* rest of doric columns that must be in both dataframes.
*/
def join(
df2: Dataset[_],
Expand All @@ -48,20 +53,24 @@ trait JoinOps {
}

/**
* Join with another `DataFrame`, using the given doric columns. The following performs
* a full outer join between `df1` with key `dfKey1` and `df2` with key `dfKey2` that must be string type.
* Join with another `DataFrame`, using the given doric columns. The
* following performs a full outer join between `df1` with key `dfKey1` and
* `df2` with key `dfKey2` that must be string type.
*
* {{{
* val joinColumn = Left.colString("dfKey1") === Right.colString("dfKey2")
* df1.join(df2, joinColumn, "outer")
* }}}
*
* @param df2 Right side of the join.
* @param colum Doric join column that must be in both dataframes.
* @param joinType Type of join to perform. Default `inner`. Must be one of:
* `inner`, `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`,
* `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`,
* `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, `left_anti`.
* @param df2
* Right side of the join.
* @param colum
* Doric join column that must be in both dataframes.
* @param joinType
* Type of join to perform. Default `inner`. Must be one of: `inner`,
* `cross`, `outer`, `full`, `fullouter`, `full_outer`, `left`,
* `leftouter`, `left_outer`, `right`, `rightouter`, `right_outer`,
* `semi`, `leftsemi`, `left_semi`, `anti`, `leftanti`, `left_anti`.
*/
def join(
df2: Dataset[_],
Expand All @@ -75,17 +84,21 @@ trait JoinOps {
}

/**
* Join with another `DataFrame`, using the given doric columns. The following performs
* a inner join between `df1` and `df2` by the key `dfKey` that must be string type.
* It drops in the return dataframes the `dfKey` column of the right dataframe.
* Join with another `DataFrame`, using the given doric columns. The
* following performs a inner join between `df1` and `df2` by the key
* `dfKey` that must be string type. It drops in the return dataframes the
* `dfKey` column of the right dataframe.
*
* {{{
* df1.innerJoinKeepLeftKeys(df2, colString("dfKey"))
* }}}
*
* @param df2 Right side of the join.
* @param column Doric column that must be in both dataframes.
* @param columns rest of doric columns that must be in both dataframes.
* @param df2
* Right side of the join.
* @param column
* Doric column that must be in both dataframes.
* @param columns
* rest of doric columns that must be in both dataframes.
*/
def innerJoinKeepLeftKeys(
df2: Dataset[_],
Expand Down
23 changes: 14 additions & 9 deletions src/main/scala/doric/sem/TransformOps.scala
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,18 @@ trait TransformOps {
implicit class DataframeTransformationSyntax[A](df: Dataset[A]) {

/**
* Returns a new Dataset by adding a column or replacing the existing column that has
* the same name.
* Returns a new Dataset by adding a column or replacing the existing
* column that has the same name.
*
* `column`'s expression must only refer to attributes supplied by this Dataset. It is an
* error to add a column that refers to some other Dataset.
* `column`'s expression must only refer to attributes supplied by this
* Dataset. It is an error to add a column that refers to some other
* Dataset.
*
* @note this method introduces a projection internally. Therefore, calling it multiple times,
* for instance, via loops in order to add multiple columns can generate big plans which
* can cause performance issues and even `StackOverflowException`.
* @note
* this method introduces a projection internally. Therefore, calling it
* multiple times, for instance, via loops in order to add multiple
* columns can generate big plans which can cause performance issues and
* even `StackOverflowException`.
*/
def withColumn[T](colName: String, col: DoricColumn[T]): DataFrame = {
col.elem
Expand All @@ -35,7 +38,8 @@ trait TransformOps {
* peopleDs.where(colInt("age") > 15)
* }}}
*
* @param condition BooleanColumn that let pass elements that are true
* @param condition
* BooleanColumn that let pass elements that are true
*/
def filter(condition: BooleanColumn): Dataset[A] = {
condition.elem
Expand All @@ -52,7 +56,8 @@ trait TransformOps {
* peopleDs.where(colInt("age") > 15)
* }}}
*
* @param condition BooleanColumn that let pass elements that are true
* @param condition
* BooleanColumn that let pass elements that are true
*/
def where(condition: BooleanColumn): Dataset[A] = {
condition.elem
Expand Down
Loading

0 comments on commit 1c4cb58

Please sign in to comment.