Skip to content

Commit

Permalink
Add Cast expression with extended upcasting support
Browse files Browse the repository at this point in the history
  • Loading branch information
johanl-db committed Aug 16, 2024
1 parent 5e227f4 commit fc25e16
Show file tree
Hide file tree
Showing 10 changed files with 494 additions and 83 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/*
* Copyright (2024) The Delta Lake Project Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.delta.kernel.expressions;

import io.delta.kernel.annotation.Evolving;
import io.delta.kernel.types.DataType;
import java.util.Collections;
import java.util.List;

/**
* A cast expression to convert the input type to another given type.
*
* @since 3.3.0
*/
@Evolving
public final class Cast implements Expression {
private final Expression child;
private final DataType outputType;

/** Create a cast around the given input expression to specified output data type. */
public Cast(Expression child, DataType outputType) {
this.child = child;
this.outputType = outputType;
}

/** @return the target data type of this cast expression. */
public DataType getOutputType() {
return outputType;
}

@Override
public List<Expression> getChildren() {
return Collections.singletonList(child);
}

@Override
public String toString() {
return String.format("CAST(%s AS %s)", child, outputType);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
import io.delta.kernel.data.ColumnVector;
import io.delta.kernel.data.MapValue;
import io.delta.kernel.data.Row;
import io.delta.kernel.internal.data.StructRow;
import io.delta.kernel.types.*;
import java.util.ArrayList;
import java.util.HashMap;
Expand All @@ -32,6 +31,24 @@ public final class VectorUtils {

private VectorUtils() {}

/**
* Converts a struct in a {@link ColumnVector} to a Java list. Java doesn't have a Tuple type so
* we return a list of untyped values corresponding to each element in the struct. Any nested
* complex types are also converted to their Java type.
*/
public static List<Object> toJavaList(ColumnVector vector, int rowId) {
checkArgument(
vector.getDataType() instanceof StructType, "Expected a struct type column vector");

List<Object> values = new ArrayList<>();
StructType structType = (StructType) vector.getDataType();

for (int i = 0; i < structType.length(); i++) {
values.add(getValueAsObject(vector.getChild(i), vector.getChild(i).getDataType(), rowId));
}
return values;
}

/**
* Converts an {@link ArrayValue} to a Java list. Any nested complex types are also converted to
* their Java type.
Expand Down Expand Up @@ -193,11 +210,10 @@ private static Object getValueAsObject(ColumnVector columnVector, DataType dataT
return columnVector.getString(rowId);
} else if (dataType instanceof BinaryType) {
return columnVector.getBinary(rowId);
} else if (dataType instanceof StructType) {
// TODO are we okay with this usage of StructRow?
return StructRow.fromStructVector(columnVector, rowId);
} else if (dataType instanceof DecimalType) {
return columnVector.getDecimal(rowId);
} else if (dataType instanceof StructType) {
return toJavaList(columnVector, rowId);
} else if (dataType instanceof ArrayType) {
return toJavaList(columnVector.getArray(rowId));
} else if (dataType instanceof MapType) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@
public final class DecimalType extends DataType {
public static final DecimalType USER_DEFAULT = new DecimalType(10, 0);

// Smallest decimal types that can hold all values for the given primitive types.
public static final DecimalType BYTE_DECIMAL = new DecimalType(3, 0);
public static final DecimalType SHORT_DECIMAL = new DecimalType(5, 0);
public static final DecimalType INT_DECIMAL = new DecimalType(10, 0);
public static final DecimalType LONG_DECIMAL = new DecimalType(20, 0);

private final int precision;
private final int scale;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
import io.delta.kernel.types.DataType;
import io.delta.kernel.types.StructType;
import java.time.Instant;
import java.time.LocalDate;
import java.time.LocalDateTime;
import java.time.ZoneOffset;
import java.time.format.DateTimeFormatter;
Expand Down Expand Up @@ -71,6 +72,15 @@ public static long millisToMicros(long millis) {
return Math.multiplyExact(millis, DateTimeConstants.MICROS_PER_MILLIS);
}

/**
* Converts a number of days since epoch (1970-01-01 00:00:00 UTC) to microseconds between epoch
* and start of the day in the given timezone.
*/
public static long daysToMicros(int days, ZoneOffset timezone) {
long seconds = LocalDate.ofEpochDay(days).atStartOfDay(timezone).toEpochSecond();
return seconds * DateTimeConstants.MICROS_PER_SECOND;
}

/**
* Parses a TimestampNTZ string in UTC format, supporting milliseconds and microseconds, to
* microseconds since the Unix epoch.
Expand Down
Loading

0 comments on commit fc25e16

Please sign in to comment.