diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java index b6ff7abe5c1a4..f160661af3894 100644 --- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java +++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java @@ -477,7 +477,7 @@ public static String genCode(final String string, final String delimiter, } else if (collation.supportsLowercaseEquality) { return String.format(expr + "Lowercase(%s, %s, %s)", string, delimiter, count); } else { - return String.format(expr + "ICU(%s, %s, %d, %s)", string, delimiter, count, collationId); + return String.format(expr + "ICU(%s, %s, %s, %d)", string, delimiter, count, collationId); } } public static UTF8String execBinary(final UTF8String string, final UTF8String delimiter, @@ -490,8 +490,7 @@ public static UTF8String execLowercase(final UTF8String string, final UTF8String } public static UTF8String execICU(final UTF8String string, final UTF8String delimiter, final int count, final int collationId) { - return CollationAwareUTF8String.subStringIndex(string, delimiter, count, - collationId); + return CollationAwareUTF8String.subStringIndex(string, delimiter, count, collationId); } } diff --git a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala index 37becda0286eb..1cfde5f0f81dd 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/StringFunctionsSuite.scala @@ -425,12 +425,17 @@ class StringFunctionsSuite extends QueryTest with SharedSparkSession { Row("www.apache") ) + // TODO SPARK-48779 Move E2E SQL tests with column input to collations.sql golden file. val testTable = "test_substring_index" withTable(testTable) { sql(s"CREATE TABLE $testTable (num int) USING parquet") sql(s"INSERT INTO $testTable VALUES (1), (2), (3), (NULL)") - val query = s"SELECT num, SUBSTRING_INDEX('a_a_a', '_', num) as sub_str FROM $testTable" - checkAnswer(sql(query), Seq(Row(1, "a"), Row(2, "a_a"), Row(3, "a_a_a"), Row(null, null))) + Seq("UTF8_BINARY", "UTF8_LCASE", "UNICODE", "UNICODE_CI").foreach(collation => + withSQLConf(SQLConf.DEFAULT_COLLATION.key -> collation) { + val query = s"SELECT num, SUBSTRING_INDEX('a_a_a', '_', num) as sub_str FROM $testTable" + checkAnswer(sql(query), Seq(Row(1, "a"), Row(2, "a_a"), Row(3, "a_a_a"), Row(null, null))) + } + ) } }