Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: use a macro for logical vs physical type matching #8479

Merged
merged 2 commits into from
Mar 12, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
63 changes: 42 additions & 21 deletions src/common/src/types/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1151,29 +1151,50 @@ impl ScalarImpl {
}
}

/// `for_all_type_pairs` is a macro that records all logical type (`DataType`) variants and their
/// corresponding physical type (`ScalarImpl`, `ArrayImpl`, or `ArrayBuilderImpl`) variants.
///
/// This is useful for checking whether a physical type is compatible with a logical type.
#[macro_export]
macro_rules! for_all_type_pairs {
($macro:ident) => {
$macro! {
{ Boolean, Bool },
{ Int16, Int16 },
{ Int32, Int32 },
{ Int64, Int64 },
{ Float32, Float32 },
{ Float64, Float64 },
{ Varchar, Utf8 },
{ Bytea, Bytea },
{ Date, NaiveDate },
{ Time, NaiveTime },
{ Timestamp, NaiveDateTime },
{ Timestamptz, Int64 },
{ Interval, Interval },
{ Decimal, Decimal },
{ Jsonb, Jsonb },
{ List, List },
{ Struct, Struct }
}
};
}

/// Returns whether the `literal` matches the `data_type`.
pub fn literal_type_match(data_type: &DataType, literal: Option<&ScalarImpl>) -> bool {
match literal {
Some(datum) => {
matches!(
(data_type, datum),
(DataType::Boolean, ScalarImpl::Bool(_))
| (DataType::Int16, ScalarImpl::Int16(_))
| (DataType::Int32, ScalarImpl::Int32(_))
| (DataType::Int64, ScalarImpl::Int64(_))
| (DataType::Float32, ScalarImpl::Float32(_))
| (DataType::Float64, ScalarImpl::Float64(_))
| (DataType::Varchar, ScalarImpl::Utf8(_))
| (DataType::Bytea, ScalarImpl::Bytea(_))
| (DataType::Date, ScalarImpl::NaiveDate(_))
| (DataType::Time, ScalarImpl::NaiveTime(_))
| (DataType::Timestamp, ScalarImpl::NaiveDateTime(_))
| (DataType::Timestamptz, ScalarImpl::Int64(_))
| (DataType::Decimal, ScalarImpl::Decimal(_))
| (DataType::Interval, ScalarImpl::Interval(_))
| (DataType::Jsonb, ScalarImpl::Jsonb(_))
| (DataType::Struct { .. }, ScalarImpl::Struct(_))
| (DataType::List { .. }, ScalarImpl::List(_))
)
Some(scalar) => {
macro_rules! matches {
($( { $DataType:ident, $PhysicalType:ident }),*) => {
match (data_type, scalar) {
$(
(DataType::$DataType { .. }, ScalarImpl::$PhysicalType(_)) => true,
(DataType::$DataType { .. }, _) => false, // so that we won't forget to match a new logical type
)*
}
}
}
for_all_type_pairs! { matches }
}
None => true,
}
Expand Down
49 changes: 21 additions & 28 deletions src/common/src/util/schema_check.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,43 +15,36 @@
use itertools::Itertools;

use crate::array::column::Column;
use crate::array::ArrayImpl;
use crate::for_all_type_pairs;
use crate::types::DataType;

/// Check if the schema of `columns` matches the expected `data_types`. Used for debugging.
pub fn schema_check<'a, T, C>(data_types: T, columns: C) -> Result<(), String>
where
T: IntoIterator<Item = &'a DataType> + Clone,
C: IntoIterator<Item = &'a Column> + Clone,
T: IntoIterator<Item = &'a DataType>,
C: IntoIterator<Item = &'a Column>,
{
tracing::event!(
tracing::Level::TRACE,
"input schema = \n{:#?}\nexpected schema = \n{:#?}",
columns
.clone()
.into_iter()
.map(|col| col.array_ref().get_ident())
.collect_vec(),
data_types.clone().into_iter().collect_vec(),
);

for (i, pair) in columns.into_iter().zip_longest(data_types).enumerate() {
let array = pair.as_ref().left().map(|c| c.array_ref());
let builder = pair.as_ref().right().map(|d| d.create_array_builder(0)); // TODO: check `data_type` directly

macro_rules! check_schema {
($( { $variant_name:ident, $suffix_name:ident, $array:ty, $builder:ty } ),*) => {
use crate::array::ArrayBuilderImpl;
use crate::array::ArrayImpl;

match (array, &builder) {
$( (Some(ArrayImpl::$variant_name(_)), Some(ArrayBuilderImpl::$variant_name(_))) => {} ),*
_ => return Err(format!("column {} should be {:?}, while stream chunk gives {:?}",
i, builder.map(|b| b.get_ident()), array.map(|a| a.get_ident()))),
for (i, pair) in data_types
.into_iter()
.zip_longest(columns.into_iter().map(|c| c.array_ref()))
.enumerate()
{
macro_rules! matches {
($( { $DataType:ident, $PhysicalType:ident }),*) => {
match (pair.as_ref().left(), pair.as_ref().right()) {
$( (Some(DataType::$DataType { .. }), Some(ArrayImpl::$PhysicalType(_))) => continue, )*
(data_type, array) => {
let array_ident = array.map(|a| a.get_ident());
return Err(format!(
"column type mismatched at position {i}: expected {data_type:?}, found {array_ident:?}"
));
}
}
};
}
}

for_all_variants! { check_schema };
for_all_type_pairs! { matches }
}

Ok(())
Expand Down