Skip to content

Commit

Permalink
[fix](sparkload): fix min_value will be negative number when `maxGlob…
Browse files Browse the repository at this point in the history
…alDictValue` exceeds integer range
  • Loading branch information
weixiang committed May 7, 2022
1 parent 98bfeaf commit cd7da8c
Showing 1 changed file with 6 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -309,8 +309,10 @@ private String getSplitBuildGlobalDictSql(Map<String, Long> distinctKeyMap, Stri
sql.append("insert overwrite table ").append(globalDictTableName).append(" partition(dict_column='").append(distinctColumnName).append("') ")
.append(" select dict_key,dict_value from ").append(globalDictTableName).append(" where dict_column='").append(distinctColumnName).append("' ");
for (Map.Entry<String, Long> entry : distinctKeyMap.entrySet()) {
sql.append(" union all select dict_key, (row_number() over(order by dict_key)) ")
.append(String.format(" +(%s) as dict_value from %s", entry.getValue(), entry.getKey()));
sql.append(" union all select dict_key, CAST((row_number() over(order by dict_key)) as BIGINT) ")
.append(String.format("+ CAST(%s as BIGINT) as dict_value from %s",
entry.getValue(),
entry.getKey()));
}
return sql.toString();
}
Expand All @@ -334,7 +336,8 @@ private double[] getRandomSplitWeights() {
private String getBuildGlobalDictSql(long maxGlobalDictValue, String distinctColumnName) {
return "insert overwrite table " + globalDictTableName + " partition(dict_column='" + distinctColumnName + "') "
+ " select dict_key,dict_value from " + globalDictTableName + " where dict_column='" + distinctColumnName + "' "
+ " union all select t1.dict_key as dict_key,(row_number() over(order by t1.dict_key)) + (" + maxGlobalDictValue + ") as dict_value from "
+ " union all select t1.dict_key as dict_key,CAST((row_number() over(order by t1.dict_key)) as BIGINT) + "
+ "CAST(" + maxGlobalDictValue + " as BIGINT) as dict_value from "
+ "(select dict_key from " + distinctKeyTableName + " where dict_column='" + distinctColumnName + "' and dict_key is not null)t1 left join "
+ " (select dict_key,dict_value from " + globalDictTableName + " where dict_column='" + distinctColumnName + "' )t2 " +
"on t1.dict_key = t2.dict_key where t2.dict_value is null";
Expand Down

0 comments on commit cd7da8c

Please sign in to comment.