Skip to content

Commit

Permalink
[SPARK-48989][SQL][FOLLOWUP] Fix SubstringIndex codegen
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?
Fix genCode for `SubstringIndex`.

### Why are the changes needed?
GenCode was not fully correct in the original PR.

### Does this PR introduce _any_ user-facing change?
Yes, genCode now works properly for all collations.

### How was this patch tested?
Additional tests for other collations.

### Was this patch authored or co-authored using generative AI tooling?
No.

Closes apache#47610 from uros-db/followup-48989.

Authored-by: Uros Bojanic <157381213+uros-db@users.noreply.github.com>
Signed-off-by: Wenchen Fan <wenchen@databricks.com>
  • Loading branch information
uros-db authored and cloud-fan committed Aug 6, 2024
1 parent 2db05db commit c3985ac
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -477,7 +477,7 @@ public static String genCode(final String string, final String delimiter,
} else if (collation.supportsLowercaseEquality) {
return String.format(expr + "Lowercase(%s, %s, %s)", string, delimiter, count);
} else {
return String.format(expr + "ICU(%s, %s, %d, %s)", string, delimiter, count, collationId);
return String.format(expr + "ICU(%s, %s, %s, %d)", string, delimiter, count, collationId);
}
}
public static UTF8String execBinary(final UTF8String string, final UTF8String delimiter,
Expand All @@ -490,8 +490,7 @@ public static UTF8String execLowercase(final UTF8String string, final UTF8String
}
public static UTF8String execICU(final UTF8String string, final UTF8String delimiter,
final int count, final int collationId) {
return CollationAwareUTF8String.subStringIndex(string, delimiter, count,
collationId);
return CollationAwareUTF8String.subStringIndex(string, delimiter, count, collationId);
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -425,12 +425,17 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession {
Row("www.apache")
)

// TODO SPARK-48779 Move E2E SQL tests with column input to collations.sql golden file.
val testTable = "test_substring_index"
withTable(testTable) {
sql(s"CREATE TABLE $testTable (num int) USING parquet")
sql(s"INSERT INTO $testTable VALUES (1), (2), (3), (NULL)")
val query = s"SELECT num, SUBSTRING_INDEX('a_a_a', '_', num) as sub_str FROM $testTable"
checkAnswer(sql(query), Seq(Row(1, "a"), Row(2, "a_a"), Row(3, "a_a_a"), Row(null, null)))
Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI").foreach(collation =>
withSQLConf(SQLConf.DEFAULT_COLLATION.key -> collation) {
val query = s"SELECT num, SUBSTRING_INDEX('a_a_a', '_', num) as sub_str FROM $testTable"
checkAnswer(sql(query), Seq(Row(1, "a"), Row(2, "a_a"), Row(3, "a_a_a"), Row(null, null)))
}
)
}
}

Expand Down

0 comments on commit c3985ac

Please sign in to comment.