From 8f9cd3d1e820eb114c32627cc34f3897cda7903e Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Tue, 3 Oct 2023 15:17:52 +0200 Subject: [PATCH 1/8] Rework range splitting api --- .../src/thir/pattern/deconstruct_pat.rs | 202 ++++++++---------- 1 file changed, 90 insertions(+), 112 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index 3ee4befa12189..d63190985a313 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -39,7 +39,7 @@ //! //! Splitting is implemented in the [`Constructor::split`] function. We don't do splitting for //! or-patterns; instead we just try the alternatives one-by-one. For details on splitting -//! wildcards, see [`SplitWildcard`]; for integer ranges, see [`SplitIntRange`]; for slices, see +//! wildcards, see [`SplitWildcard`]; for integer ranges, see [`IntRange::split`]; for slices, see //! [`SplitVarLenSlice`]. use std::cell::Cell; @@ -186,6 +186,93 @@ impl IntRange { (lo == other_hi || hi == other_lo) && !self.is_singleton() && !other.is_singleton() } + /// See `Constructor::is_covered_by` + fn is_covered_by(&self, other: &Self) -> bool { + if self.intersection(other).is_some() { + // Constructor splitting should ensure that all intersections we encounter are actually + // inclusions. + assert!(self.is_subrange(other)); + true + } else { + false + } + } + + /// Partition a range of integers into disjoint subranges. This does constructor splitting for + /// integer ranges as explained at the top of the file. + /// + /// This returns an output that covers `self`. The output is split so that the only + /// intersections between an output range and a column range are inclusions. No output range + /// straddles the boundary of one of the inputs. + /// + /// The following input: + /// ```text + /// |-------------------------| // `self` + /// |------| |----------| |----| + /// |-------| |-------| + /// ``` + /// would be iterated over as follows: + /// ```text + /// ||---|--||-|---|---|---|--| + /// ``` + fn split( + &self, + column_ranges: impl Iterator, + ) -> impl Iterator { + /// Represents a boundary between 2 integers. Because the intervals spanning boundaries must be + /// able to cover every integer, we need to be able to represent 2^128 + 1 such boundaries. + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] + enum IntBoundary { + JustBefore(u128), + AfterMax, + } + + fn unpack_intrange(range: IntRange) -> [IntBoundary; 2] { + use IntBoundary::*; + let (lo, hi) = range.boundaries(); + let lo = JustBefore(lo); + let hi = match hi.checked_add(1) { + Some(m) => JustBefore(m), + None => AfterMax, + }; + [lo, hi] + } + + // The boundaries of ranges in `column_ranges` intersected with `self`. + let mut boundaries: Vec = column_ranges + .filter_map(|r| self.intersection(&r)) + .map(unpack_intrange) + .flat_map(|[lo, hi]| [lo, hi]) + .collect(); + boundaries.sort_unstable(); + + let [self_start, self_end] = unpack_intrange(self.clone()); + // Gather pairs of adjacent boundaries. + let mut prev_bdy = self_start; + boundaries + .into_iter() + // End with the end of the range. + .chain(once(self_end)) + // List pairs of adjacent boundaries. + .map(move |bdy| { + let ret = (prev_bdy, bdy); + prev_bdy = bdy; + ret + }) + // Skip duplicates. + .filter(|&(prev_bdy, bdy)| prev_bdy != bdy) + // Convert back to ranges. + .map(move |(prev_bdy, bdy)| { + use IntBoundary::*; + let range = match (prev_bdy, bdy) { + (JustBefore(n), JustBefore(m)) if n < m => n..=(m - 1), + (JustBefore(n), AfterMax) => n..=u128::MAX, + _ => unreachable!(), // Ruled out by the sorting and filtering we did + }; + IntRange { range } + }) + } + /// Only used for displaying the range properly. fn to_pat<'tcx>(&self, tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Pat<'tcx> { let (lo, hi) = self.boundaries(); @@ -254,18 +341,6 @@ impl IntRange { ); } } - - /// See `Constructor::is_covered_by` - fn is_covered_by(&self, other: &Self) -> bool { - if self.intersection(other).is_some() { - // Constructor splitting should ensure that all intersections we encounter are actually - // inclusions. - assert!(self.is_subrange(other)); - true - } else { - false - } - } } /// Note: this is often not what we want: e.g. `false` is converted into the range `0..=0` and @@ -279,101 +354,6 @@ impl fmt::Debug for IntRange { } } -/// Represents a border between 2 integers. Because the intervals spanning borders must be able to -/// cover every integer, we need to be able to represent 2^128 + 1 such borders. -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -enum IntBorder { - JustBefore(u128), - AfterMax, -} - -/// A range of integers that is partitioned into disjoint subranges. This does constructor -/// splitting for integer ranges as explained at the top of the file. -/// -/// This is fed multiple ranges, and returns an output that covers the input, but is split so that -/// the only intersections between an output range and a seen range are inclusions. No output range -/// straddles the boundary of one of the inputs. -/// -/// The following input: -/// ```text -/// |-------------------------| // `self` -/// |------| |----------| |----| -/// |-------| |-------| -/// ``` -/// would be iterated over as follows: -/// ```text -/// ||---|--||-|---|---|---|--| -/// ``` -#[derive(Debug, Clone)] -struct SplitIntRange { - /// The range we are splitting - range: IntRange, - /// The borders of ranges we have seen. They are all contained within `range`. This is kept - /// sorted. - borders: Vec, -} - -impl SplitIntRange { - fn new(range: IntRange) -> Self { - SplitIntRange { range, borders: Vec::new() } - } - - /// Internal use - fn to_borders(r: IntRange) -> [IntBorder; 2] { - use IntBorder::*; - let (lo, hi) = r.boundaries(); - let lo = JustBefore(lo); - let hi = match hi.checked_add(1) { - Some(m) => JustBefore(m), - None => AfterMax, - }; - [lo, hi] - } - - /// Add ranges relative to which we split. - fn split(&mut self, ranges: impl Iterator) { - let this_range = &self.range; - let included_ranges = ranges.filter_map(|r| this_range.intersection(&r)); - let included_borders = included_ranges.flat_map(|r| { - let borders = Self::to_borders(r); - once(borders[0]).chain(once(borders[1])) - }); - self.borders.extend(included_borders); - self.borders.sort_unstable(); - } - - /// Iterate over the contained ranges. - fn iter(&self) -> impl Iterator + Captures<'_> { - use IntBorder::*; - - let self_range = Self::to_borders(self.range.clone()); - // Start with the start of the range. - let mut prev_border = self_range[0]; - self.borders - .iter() - .copied() - // End with the end of the range. - .chain(once(self_range[1])) - // List pairs of adjacent borders. - .map(move |border| { - let ret = (prev_border, border); - prev_border = border; - ret - }) - // Skip duplicates. - .filter(|(prev_border, border)| prev_border != border) - // Finally, convert to ranges. - .map(move |(prev_border, border)| { - let range = match (prev_border, border) { - (JustBefore(n), JustBefore(m)) if n < m => n..=(m - 1), - (JustBefore(n), AfterMax) => n..=u128::MAX, - _ => unreachable!(), // Ruled out by the sorting and filtering we did - }; - IntRange { range } - }) - } -} - #[derive(Copy, Clone, Debug, PartialEq, Eq)] enum SliceKind { /// Patterns of length `n` (`[x, y]`). @@ -733,10 +713,8 @@ impl<'tcx> Constructor<'tcx> { // Fast-track if the range is trivial. In particular, we don't do the overlapping // ranges check. IntRange(ctor_range) if !ctor_range.is_singleton() => { - let mut split_range = SplitIntRange::new(ctor_range.clone()); - let int_ranges = ctors.filter_map(|ctor| ctor.as_int_range()); - split_range.split(int_ranges.cloned()); - split_range.iter().map(IntRange).collect() + let int_ranges = ctors.filter_map(|ctor| ctor.as_int_range()).cloned(); + ctor_range.split(int_ranges).map(IntRange).collect() } &Slice(Slice { kind: VarLen(self_prefix, self_suffix), array_len }) => { let mut split_self = SplitVarLenSlice::new(self_prefix, self_suffix, array_len); From 590edee3209906b3906f09bad029586b0a15e29f Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Tue, 3 Oct 2023 14:38:40 +0200 Subject: [PATCH 2/8] Rework slice splitting api --- .../src/thir/pattern/deconstruct_pat.rs | 253 +++++++++--------- 1 file changed, 120 insertions(+), 133 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index d63190985a313..6755e665bba1d 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -40,7 +40,7 @@ //! Splitting is implemented in the [`Constructor::split`] function. We don't do splitting for //! or-patterns; instead we just try the alternatives one-by-one. For details on splitting //! wildcards, see [`SplitWildcard`]; for integer ranges, see [`IntRange::split`]; for slices, see -//! [`SplitVarLenSlice`]. +//! [`Slice::split`]. use std::cell::Cell; use std::cmp::{self, max, min, Ordering}; @@ -410,141 +410,130 @@ impl Slice { fn is_covered_by(self, other: Self) -> bool { other.kind.covers_length(self.arity()) } -} -/// This computes constructor splitting for variable-length slices, as explained at the top of the -/// file. -/// -/// A slice pattern `[x, .., y]` behaves like the infinite or-pattern `[x, y] | [x, _, y] | [x, _, -/// _, y] | ...`. The corresponding value constructors are fixed-length array constructors above a -/// given minimum length. We obviously can't list this infinitude of constructors. Thankfully, -/// it turns out that for each finite set of slice patterns, all sufficiently large array lengths -/// are equivalent. -/// -/// Let's look at an example, where we are trying to split the last pattern: -/// ``` -/// # fn foo(x: &[bool]) { -/// match x { -/// [true, true, ..] => {} -/// [.., false, false] => {} -/// [..] => {} -/// } -/// # } -/// ``` -/// Here are the results of specialization for the first few lengths: -/// ``` -/// # fn foo(x: &[bool]) { match x { -/// // length 0 -/// [] => {} -/// // length 1 -/// [_] => {} -/// // length 2 -/// [true, true] => {} -/// [false, false] => {} -/// [_, _] => {} -/// // length 3 -/// [true, true, _ ] => {} -/// [_, false, false] => {} -/// [_, _, _ ] => {} -/// // length 4 -/// [true, true, _, _ ] => {} -/// [_, _, false, false] => {} -/// [_, _, _, _ ] => {} -/// // length 5 -/// [true, true, _, _, _ ] => {} -/// [_, _, _, false, false] => {} -/// [_, _, _, _, _ ] => {} -/// # _ => {} -/// # }} -/// ``` -/// -/// If we went above length 5, we would simply be inserting more columns full of wildcards in the -/// middle. This means that the set of witnesses for length `l >= 5` if equivalent to the set for -/// any other `l' >= 5`: simply add or remove wildcards in the middle to convert between them. -/// -/// This applies to any set of slice patterns: there will be a length `L` above which all lengths -/// behave the same. This is exactly what we need for constructor splitting. Therefore a -/// variable-length slice can be split into a variable-length slice of minimal length `L`, and many -/// fixed-length slices of lengths `< L`. -/// -/// For each variable-length pattern `p` with a prefix of length `plₚ` and suffix of length `slₚ`, -/// only the first `plₚ` and the last `slₚ` elements are examined. Therefore, as long as `L` is -/// positive (to avoid concerns about empty types), all elements after the maximum prefix length -/// and before the maximum suffix length are not examined by any variable-length pattern, and -/// therefore can be added/removed without affecting them - creating equivalent patterns from any -/// sufficiently-large length. -/// -/// Of course, if fixed-length patterns exist, we must be sure that our length is large enough to -/// miss them all, so we can pick `L = max(max(FIXED_LEN)+1, max(PREFIX_LEN) + max(SUFFIX_LEN))` -/// -/// `max_slice` below will be made to have arity `L`. -#[derive(Debug)] -struct SplitVarLenSlice { - /// If the type is an array, this is its size. - array_len: Option, - /// The arity of the input slice. - arity: usize, - /// The smallest slice bigger than any slice seen. `max_slice.arity()` is the length `L` - /// described above. - max_slice: SliceKind, -} - -impl SplitVarLenSlice { - fn new(prefix: usize, suffix: usize, array_len: Option) -> Self { - SplitVarLenSlice { array_len, arity: prefix + suffix, max_slice: VarLen(prefix, suffix) } - } - - /// Pass a set of slices relative to which to split this one. - fn split(&mut self, slices: impl Iterator) { - let VarLen(max_prefix_len, max_suffix_len) = &mut self.max_slice else { - // No need to split - return; - }; - // We grow `self.max_slice` to be larger than all slices encountered, as described above. - // For diagnostics, we keep the prefix and suffix lengths separate, but grow them so that - // `L = max_prefix_len + max_suffix_len`. - let mut max_fixed_len = 0; - for slice in slices { - match slice { - FixedLen(len) => { - max_fixed_len = cmp::max(max_fixed_len, len); + /// This computes constructor splitting for variable-length slices, as explained at the top of + /// the file. + /// + /// A slice pattern `[x, .., y]` behaves like the infinite or-pattern `[x, y] | [x, _, y] | [x, + /// _, _, y] | etc`. The corresponding value constructors are fixed-length array constructors of + /// corresponding lengths. We obviously can't list this infinitude of constructors. + /// Thankfully, it turns out that for each finite set of slice patterns, all sufficiently large + /// array lengths are equivalent. + /// + /// Let's look at an example, where we are trying to split the last pattern: + /// ``` + /// # fn foo(x: &[bool]) { + /// match x { + /// [true, true, ..] => {} + /// [.., false, false] => {} + /// [..] => {} // `self` + /// } + /// # } + /// ``` + /// Here are the results of specialization for the first few lengths: + /// ``` + /// # fn foo(x: &[bool]) { match x { + /// // length 0 + /// [] => {} + /// // length 1 + /// [_] => {} + /// // length 2 + /// [true, true] => {} + /// [false, false] => {} + /// [_, _] => {} + /// // length 3 + /// [true, true, _ ] => {} + /// [_, false, false] => {} + /// [_, _, _ ] => {} + /// // length 4 + /// [true, true, _, _ ] => {} + /// [_, _, false, false] => {} + /// [_, _, _, _ ] => {} + /// // length 5 + /// [true, true, _, _, _ ] => {} + /// [_, _, _, false, false] => {} + /// [_, _, _, _, _ ] => {} + /// # _ => {} + /// # }} + /// ``` + /// + /// We see that above length 4, we are simply inserting columns full of wildcards in the middle. + /// This means that specialization and witness computation with slices of length `l >= 4` will + /// give equivalent results independently of `l`. This applies to any set of slice patterns: + /// there will be a length `L` above which all lengths behave the same. This is exactly what we + /// need for constructor splitting. + /// + /// A variable-length slice pattern covers all lengths from its arity up to infinity. As we just + /// saw, we can split this in two: lengths below `L` are treated individually with a + /// fixed-length slice each; lengths above `L` are grouped into a single variable-length slice + /// constructor. + /// + /// For each variable-length slice pattern `p` with a prefix of length `plₚ` and suffix of + /// length `slₚ`, only the first `plₚ` and the last `slₚ` elements are examined. Therefore, as + /// long as `L` is positive (to avoid concerns about empty types), all elements after the + /// maximum prefix length and before the maximum suffix length are not examined by any + /// variable-length pattern, and therefore can be ignored. This gives us a way to compute `L`. + /// + /// Additionally, if fixed-length patterns exist, we must pick an `L` large enough to miss them, + /// so we can pick `L = max(max(FIXED_LEN)+1, max(PREFIX_LEN) + max(SUFFIX_LEN))`. + /// `max_slice` below will be made to have this arity `L`. + /// + /// If `self` is fixed-length, it is returned as-is. + fn split(self, column_slices: impl Iterator) -> impl Iterator { + // Range of lengths below `L`. + let smaller_lengths; + let mut max_slice = self.kind; + match &mut max_slice { + VarLen(max_prefix_len, max_suffix_len) => { + // We grow `max_slice` to be larger than all slices encountered, as described above. + // For diagnostics, we keep the prefix and suffix lengths separate, but grow them so that + // `L = max_prefix_len + max_suffix_len`. + let mut max_fixed_len = 0; + for slice in column_slices { + match slice.kind { + FixedLen(len) => { + max_fixed_len = cmp::max(max_fixed_len, len); + } + VarLen(prefix, suffix) => { + *max_prefix_len = cmp::max(*max_prefix_len, prefix); + *max_suffix_len = cmp::max(*max_suffix_len, suffix); + } + } } - VarLen(prefix, suffix) => { - *max_prefix_len = cmp::max(*max_prefix_len, prefix); - *max_suffix_len = cmp::max(*max_suffix_len, suffix); + // We want `L = max(L, max_fixed_len + 1)`, modulo the fact that we keep prefix and + // suffix separate. + if max_fixed_len + 1 >= *max_prefix_len + *max_suffix_len { + // The subtraction can't overflow thanks to the above check. + // The new `max_prefix_len` is larger than its previous value. + *max_prefix_len = max_fixed_len + 1 - *max_suffix_len; } - } - } - // We want `L = max(L, max_fixed_len + 1)`, modulo the fact that we keep prefix and - // suffix separate. - if max_fixed_len + 1 >= *max_prefix_len + *max_suffix_len { - // The subtraction can't overflow thanks to the above check. - // The new `max_prefix_len` is larger than its previous value. - *max_prefix_len = max_fixed_len + 1 - *max_suffix_len; - } - // We cap the arity of `max_slice` at the array size. - match self.array_len { - Some(len) if self.max_slice.arity() >= len => self.max_slice = FixedLen(len), - _ => {} - } - } + // We cap the arity of `max_slice` at the array size. + match self.array_len { + Some(len) if max_slice.arity() >= len => max_slice = FixedLen(len), + _ => {} + } - /// Iterate over the partition of this slice. - fn iter(&self) -> impl Iterator + Captures<'_> { - let smaller_lengths = match self.array_len { - // The only admissible fixed-length slice is one of the array size. Whether `max_slice` - // is fixed-length or variable-length, it will be the only relevant slice to output - // here. - Some(_) => 0..0, // empty range - // We cover all arities in the range `(self.arity..infinity)`. We split that range into - // two: lengths smaller than `max_slice.arity()` are treated independently as - // fixed-lengths slices, and lengths above are captured by `max_slice`. - None => self.arity..self.max_slice.arity(), + smaller_lengths = match self.array_len { + // The only admissible fixed-length slice is one of the array size. Whether `max_slice` + // is fixed-length or variable-length, it will be the only relevant slice to output + // here. + Some(_) => 0..0, // empty range + // We need to cover all arities in the range `(arity..infinity)`. We split that + // range into two: lengths smaller than `max_slice.arity()` are treated + // independently as fixed-lengths slices, and lengths above are captured by + // `max_slice`. + None => self.arity()..max_slice.arity(), + }; + } + FixedLen(_) => { + // No need to split. + smaller_lengths = 0..0; + } }; smaller_lengths .map(FixedLen) - .chain(once(self.max_slice)) + .chain(once(max_slice)) .map(move |kind| Slice::new(self.array_len, kind)) } } @@ -716,11 +705,9 @@ impl<'tcx> Constructor<'tcx> { let int_ranges = ctors.filter_map(|ctor| ctor.as_int_range()).cloned(); ctor_range.split(int_ranges).map(IntRange).collect() } - &Slice(Slice { kind: VarLen(self_prefix, self_suffix), array_len }) => { - let mut split_self = SplitVarLenSlice::new(self_prefix, self_suffix, array_len); - let slices = ctors.filter_map(|c| c.as_slice()).map(|s| s.kind); - split_self.split(slices); - split_self.iter().map(Slice).collect() + &Slice(slice @ Slice { kind: VarLen(..), .. }) => { + let slices = ctors.filter_map(|c| c.as_slice()); + slice.split(slices).map(Slice).collect() } // Any other constructor can be used unchanged. _ => smallvec![self.clone()], From 429770a48eb7dcb11831d4670a25a01c698f704c Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Tue, 3 Oct 2023 16:21:40 +0200 Subject: [PATCH 3/8] Splitting ensures subrange comparison is all we need --- .../src/thir/pattern/deconstruct_pat.rs | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index 6755e665bba1d..b4bc456987880 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -186,18 +186,6 @@ impl IntRange { (lo == other_hi || hi == other_lo) && !self.is_singleton() && !other.is_singleton() } - /// See `Constructor::is_covered_by` - fn is_covered_by(&self, other: &Self) -> bool { - if self.intersection(other).is_some() { - // Constructor splitting should ensure that all intersections we encounter are actually - // inclusions. - assert!(self.is_subrange(other)); - true - } else { - false - } - } - /// Partition a range of integers into disjoint subranges. This does constructor splitting for /// integer ranges as explained at the top of the file. /// @@ -730,7 +718,7 @@ impl<'tcx> Constructor<'tcx> { (Single, Single) => true, (Variant(self_id), Variant(other_id)) => self_id == other_id, - (IntRange(self_range), IntRange(other_range)) => self_range.is_covered_by(other_range), + (IntRange(self_range), IntRange(other_range)) => self_range.is_subrange(other_range), (F32Range(self_from, self_to, self_end), F32Range(other_from, other_to, other_end)) => { self_from.ge(other_from) && match self_to.partial_cmp(other_to) { From c1800ef93fa44ac583c213632a315956b5b2f31b Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Tue, 3 Oct 2023 15:30:05 +0200 Subject: [PATCH 4/8] Replace SplitWildcard with a cleaner ConstructorSet abstraction --- .../src/thir/pattern/deconstruct_pat.rs | 622 +++++++++++------- .../src/thir/pattern/usefulness.rs | 130 ++-- 2 files changed, 434 insertions(+), 318 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index b4bc456987880..ab30f5109c77d 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -39,8 +39,8 @@ //! //! Splitting is implemented in the [`Constructor::split`] function. We don't do splitting for //! or-patterns; instead we just try the alternatives one-by-one. For details on splitting -//! wildcards, see [`SplitWildcard`]; for integer ranges, see [`IntRange::split`]; for slices, see -//! [`Slice::split`]. +//! wildcards, see [`Constructor::split`]; for integer ranges, see +//! [`IntRange::split`]; for slices, see [`Slice::split`]. use std::cell::Cell; use std::cmp::{self, max, min, Ordering}; @@ -52,6 +52,7 @@ use smallvec::{smallvec, SmallVec}; use rustc_apfloat::ieee::{DoubleS, IeeeFloat, SingleS}; use rustc_data_structures::captures::Captures; +use rustc_data_structures::fx::FxHashSet; use rustc_hir::{HirId, RangeEnd}; use rustc_index::Idx; use rustc_middle::middle::stability::EvalResult; @@ -86,6 +87,13 @@ fn expand_or_pat<'p, 'tcx>(pat: &'p Pat<'tcx>) -> Vec<&'p Pat<'tcx>> { pats } +/// Whether we have seen a constructor in the column or not. +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +enum Presence { + Unseen, + Seen, +} + /// An inclusive interval, used for precise integer exhaustiveness checking. /// `IntRange`s always store a contiguous range. This means that values are /// encoded such that `0` encodes the minimum value for the integer, @@ -203,10 +211,12 @@ impl IntRange { /// ```text /// ||---|--||-|---|---|---|--| /// ``` + /// + /// Additionally, we track for each output range whether it is covered by one of the column ranges or not. fn split( &self, column_ranges: impl Iterator, - ) -> impl Iterator { + ) -> impl Iterator { /// Represents a boundary between 2 integers. Because the intervals spanning boundaries must be /// able to cover every integer, we need to be able to represent 2^128 + 1 such boundaries. #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] @@ -227,41 +237,57 @@ impl IntRange { } // The boundaries of ranges in `column_ranges` intersected with `self`. - let mut boundaries: Vec = column_ranges + // We do parenthesis matching for input ranges. A boundary counts as +1 if it starts + // a range and -1 if it ends it. When the count is > 0 between two boundaries, we + // are within an input range. + let mut boundaries: Vec<(IntBoundary, isize)> = column_ranges .filter_map(|r| self.intersection(&r)) .map(unpack_intrange) - .flat_map(|[lo, hi]| [lo, hi]) + .flat_map(|[lo, hi]| [(lo, 1), (hi, -1)]) .collect(); boundaries.sort_unstable(); + // Counter for parenthesis matching. + let mut paren_counter = 0isize; + let boundaries_with_paren_counts = boundaries + .into_iter() + // Accumulate parenthesis counts. + .map(move |(bdy, delta)| { + paren_counter += delta; + (bdy, paren_counter) + }); + let [self_start, self_end] = unpack_intrange(self.clone()); // Gather pairs of adjacent boundaries. let mut prev_bdy = self_start; - boundaries - .into_iter() - // End with the end of the range. - .chain(once(self_end)) + let mut prev_paren_count = 0; + boundaries_with_paren_counts + // End with the end of the range. The count is irrelevant. + .chain(once((self_end, 0))) // List pairs of adjacent boundaries. - .map(move |bdy| { - let ret = (prev_bdy, bdy); + .map(move |(bdy, paren_count)| { + let ret = (prev_bdy, prev_paren_count, bdy); prev_bdy = bdy; + prev_paren_count = paren_count; ret }) // Skip duplicates. - .filter(|&(prev_bdy, bdy)| prev_bdy != bdy) + .filter(|&(prev_bdy, _, bdy)| prev_bdy != bdy) // Convert back to ranges. - .map(move |(prev_bdy, bdy)| { + .map(move |(prev_bdy, paren_count, bdy)| { use IntBoundary::*; + use Presence::*; + let presence = if paren_count > 0 { Seen } else { Unseen }; let range = match (prev_bdy, bdy) { (JustBefore(n), JustBefore(m)) if n < m => n..=(m - 1), (JustBefore(n), AfterMax) => n..=u128::MAX, _ => unreachable!(), // Ruled out by the sorting and filtering we did }; - IntRange { range } + (presence, IntRange { range }) }) } - /// Only used for displaying the range properly. + /// Only used for displaying the range. fn to_pat<'tcx>(&self, tcx: TyCtxt<'tcx>, ty: Ty<'tcx>) -> Pat<'tcx> { let (lo, hi) = self.boundaries(); @@ -414,7 +440,7 @@ impl Slice { /// match x { /// [true, true, ..] => {} /// [.., false, false] => {} - /// [..] => {} // `self` + /// [..] => {} /// } /// # } /// ``` @@ -447,9 +473,9 @@ impl Slice { /// /// We see that above length 4, we are simply inserting columns full of wildcards in the middle. /// This means that specialization and witness computation with slices of length `l >= 4` will - /// give equivalent results independently of `l`. This applies to any set of slice patterns: - /// there will be a length `L` above which all lengths behave the same. This is exactly what we - /// need for constructor splitting. + /// give equivalent results regardless of `l`. This applies to any set of slice patterns: there + /// will be a length `L` above which all lengths behave the same. This is exactly what we need + /// for constructor splitting. /// /// A variable-length slice pattern covers all lengths from its arity up to infinity. As we just /// saw, we can split this in two: lengths below `L` are treated individually with a @@ -467,10 +493,18 @@ impl Slice { /// `max_slice` below will be made to have this arity `L`. /// /// If `self` is fixed-length, it is returned as-is. - fn split(self, column_slices: impl Iterator) -> impl Iterator { + /// + /// Additionally, we track for each output slice whether it is covered by one of the column slices or not. + fn split( + self, + column_slices: impl Iterator, + ) -> impl Iterator { // Range of lengths below `L`. let smaller_lengths; + let arity = self.arity(); let mut max_slice = self.kind; + let mut min_var_len = usize::MAX; + let mut seen_fixed_lens = FxHashSet::default(); match &mut max_slice { VarLen(max_prefix_len, max_suffix_len) => { // We grow `max_slice` to be larger than all slices encountered, as described above. @@ -481,10 +515,14 @@ impl Slice { match slice.kind { FixedLen(len) => { max_fixed_len = cmp::max(max_fixed_len, len); + if arity <= len { + seen_fixed_lens.insert(len); + } } VarLen(prefix, suffix) => { *max_prefix_len = cmp::max(*max_prefix_len, prefix); *max_suffix_len = cmp::max(*max_suffix_len, suffix); + min_var_len = cmp::min(min_var_len, prefix + suffix); } } } @@ -515,14 +553,32 @@ impl Slice { }; } FixedLen(_) => { - // No need to split. + // No need to split here. We only track presence. + for slice in column_slices { + match slice.kind { + FixedLen(len) => { + if len == arity { + seen_fixed_lens.insert(len); + } + } + VarLen(prefix, suffix) => { + min_var_len = cmp::min(min_var_len, prefix + suffix); + } + } + } smaller_lengths = 0..0; } }; - smaller_lengths - .map(FixedLen) - .chain(once(max_slice)) - .map(move |kind| Slice::new(self.array_len, kind)) + + smaller_lengths.map(FixedLen).chain(once(max_slice)).map(move |kind| { + let arity = kind.arity(); + let seen = if min_var_len <= arity || seen_fixed_lens.contains(&arity) { + Presence::Seen + } else { + Presence::Unseen + }; + (seen, Slice::new(self.array_len, kind)) + }) } } @@ -556,8 +612,8 @@ pub(super) enum Constructor<'tcx> { /// Fake extra constructor for enums that aren't allowed to be matched exhaustively. Also used /// for those types for which we cannot list constructors explicitly, like `f64` and `str`. NonExhaustive, - /// Stands for constructors that are not seen in the matrix, as explained in the documentation - /// for [`SplitWildcard`]. The carried `bool` is used for the `non_exhaustive_omitted_patterns` + /// Stands for constructors that are not seen in the matrix, as explained in the code for + /// [`Constructor::split`]. The carried `bool` is used for the `non_exhaustive_omitted_patterns` /// lint. Missing { nonexhaustive_enum_missing_real_variants: bool, @@ -577,13 +633,18 @@ impl<'tcx> Constructor<'tcx> { matches!(self, NonExhaustive) } + pub(super) fn as_variant(&self) -> Option { + match self { + Variant(i) => Some(*i), + _ => None, + } + } fn as_int_range(&self) -> Option<&IntRange> { match self { IntRange(range) => Some(range), _ => None, } } - fn as_slice(&self) -> Option { match self { Slice(slice) => Some(*slice), @@ -660,19 +721,19 @@ impl<'tcx> Constructor<'tcx> { } } - /// Some constructors (namely `Wildcard`, `IntRange` and `Slice`) actually stand for a set of actual - /// constructors (like variants, integers or fixed-sized slices). When specializing for these - /// constructors, we want to be specialising for the actual underlying constructors. + /// Some constructors (namely `Wildcard`, `IntRange` and `Slice`) actually stand for a set of + /// actual constructors (like variants, integers or fixed-sized slices). When specializing for + /// these constructors, we want to be specialising for the actual underlying constructors. /// Naively, we would simply return the list of constructors they correspond to. We instead are - /// more clever: if there are constructors that we know will behave the same wrt the current - /// matrix, we keep them grouped. For example, all slices of a sufficiently large length - /// will either be all useful or all non-useful with a given matrix. + /// more clever: if there are constructors that we know will behave the same w.r.t. the current + /// matrix, we keep them grouped. For example, all slices of a sufficiently large length will + /// either be all useful or all non-useful with a given matrix. /// /// See the branches for details on how the splitting is done. /// - /// This function may discard some irrelevant constructors if this preserves behavior and - /// diagnostics. Eg. for the `_` case, we ignore the constructors already present in the - /// matrix, unless all of them are. + /// This function may discard some irrelevant constructors if this preserves behavior. Eg. for + /// the `_` case, we ignore the constructors already present in the column, unless all of them + /// are. pub(super) fn split<'a>( &self, pcx: &PatCtxt<'_, '_, 'tcx>, @@ -683,19 +744,74 @@ impl<'tcx> Constructor<'tcx> { { match self { Wildcard => { - let mut split_wildcard = SplitWildcard::new(pcx); - split_wildcard.split(pcx, ctors); - split_wildcard.into_ctors(pcx) + let split_set = ConstructorSet::for_ty(pcx.cx, pcx.ty).split(pcx, ctors); + if !split_set.missing.is_empty() { + // We are splitting a wildcard in order to compute its usefulness. Some constructors are + // not present in the column. The first thing we note is that specializing with any of + // the missing constructors would select exactly the rows with wildcards. Moreover, they + // would all return equivalent results. We can therefore group them all into a + // fictitious `Missing` constructor. + // + // As an important optimization, this function will skip all the present constructors. + // This is correct because specializing with any of the present constructors would + // select a strict superset of the wildcard rows, and thus would only find witnesses + // already found with the `Missing` constructor. + // This does mean that diagnostics are incomplete: in + // ``` + // match x { + // Some(true) => {} + // } + // ``` + // we report `None` as missing but not `Some(false)`. + // + // When all the constructors are missing we can equivalently return the `Wildcard` + // constructor on its own. The difference between `Wildcard` and `Missing` will then + // only be in diagnostics. + + // If some constructors are missing, we typically want to report those constructors, + // e.g.: + // ``` + // enum Direction { N, S, E, W } + // let Direction::N = ...; + // ``` + // we can report 3 witnesses: `S`, `E`, and `W`. + // + // However, if the user didn't actually specify a constructor + // in this arm, e.g., in + // ``` + // let x: (Direction, Direction, bool) = ...; + // let (_, _, false) = x; + // ``` + // we don't want to show all 16 possible witnesses `(, , + // true)` - we are satisfied with `(_, _, true)`. So if all constructors are missing we + // prefer to report just a wildcard `_`. + // + // The exception is: if we are at the top-level, for example in an empty match, we + // usually prefer to report the full list of constructors. + let all_missing = split_set.present.is_empty(); + let report_when_all_missing = + pcx.is_top_level && !IntRange::is_integral(pcx.ty); + let ctor = if all_missing && !report_when_all_missing { + Wildcard + } else { + Missing { + nonexhaustive_enum_missing_real_variants: split_set + .nonexhaustive_enum_missing_real_variants, + } + }; + smallvec![ctor] + } else { + split_set.present + } } - // Fast-track if the range is trivial. In particular, we don't do the overlapping - // ranges check. - IntRange(ctor_range) if !ctor_range.is_singleton() => { - let int_ranges = ctors.filter_map(|ctor| ctor.as_int_range()).cloned(); - ctor_range.split(int_ranges).map(IntRange).collect() + // Fast-track if the range is trivial. + IntRange(this_range) if !this_range.is_singleton() => { + let column_ranges = ctors.filter_map(|ctor| ctor.as_int_range()).cloned(); + this_range.split(column_ranges).map(|(_, range)| IntRange(range)).collect() } - &Slice(slice @ Slice { kind: VarLen(..), .. }) => { - let slices = ctors.filter_map(|c| c.as_slice()); - slice.split(slices).map(Slice).collect() + Slice(this_slice @ Slice { kind: VarLen(..), .. }) => { + let column_slices = ctors.filter_map(|c| c.as_slice()); + this_slice.split(column_slices).map(|(_, slice)| Slice(slice)).collect() } // Any other constructor can be used unchanged. _ => smallvec![self.clone()], @@ -755,96 +871,112 @@ impl<'tcx> Constructor<'tcx> { ), } } +} - /// Faster version of `is_covered_by` when applied to many constructors. `used_ctors` is - /// assumed to be built from `matrix.head_ctors()` with wildcards and opaques filtered out, - /// and `self` is assumed to have been split from a wildcard. - fn is_covered_by_any<'p>( - &self, - pcx: &PatCtxt<'_, 'p, 'tcx>, - used_ctors: &[Constructor<'tcx>], - ) -> bool { - if used_ctors.is_empty() { - return false; - } - - // This must be kept in sync with `is_covered_by`. - match self { - // If `self` is `Single`, `used_ctors` cannot contain anything else than `Single`s. - Single => !used_ctors.is_empty(), - Variant(vid) => used_ctors.iter().any(|c| matches!(c, Variant(i) if i == vid)), - IntRange(range) => used_ctors - .iter() - .filter_map(|c| c.as_int_range()) - .any(|other| range.is_covered_by(other)), - Slice(slice) => used_ctors - .iter() - .filter_map(|c| c.as_slice()) - .any(|other| slice.is_covered_by(other)), - // This constructor is never covered by anything else - NonExhaustive => false, - Str(..) | F32Range(..) | F64Range(..) | Opaque | Missing { .. } | Wildcard | Or => { - span_bug!(pcx.span, "found unexpected ctor in all_ctors: {:?}", self) - } - } - } +/// Describes the set of all constructors for a type. +pub(super) enum ConstructorSet { + /// The type has a single constructor, e.g. `&T` or a struct. + Single, + /// This type has the following list of constructors. + Variants { variants: Vec, non_exhaustive: bool }, + /// The type is spanned by integer values. The range or ranges give the set of allowed values. + /// The second range is only useful for `char`. + /// This is reused for bool. FIXME: don't. + /// `non_exhaustive` is used when the range is not allowed to be matched exhaustively (that's + /// for usize/isize). + Integers { range_1: IntRange, range_2: Option, non_exhaustive: bool }, + /// The type is matched by slices. The usize is the compile-time length of the array, if known. + Slice(Option), + /// The type is matched by slices whose elements are uninhabited. + SliceOfEmpty, + /// The constructors cannot be listed, and the type cannot be matched exhaustively. E.g. `str`, + /// floats. + Unlistable, + /// The type has no inhabitants. + Uninhabited, } -/// A wildcard constructor that we split relative to the constructors in the matrix, as explained -/// at the top of the file. +/// Describes the result of analyzing the constructors in a column of a match. /// -/// A constructor that is not present in the matrix rows will only be covered by the rows that have -/// wildcards. Thus we can group all of those constructors together; we call them "missing -/// constructors". Splitting a wildcard would therefore list all present constructors individually -/// (or grouped if they are integers or slices), and then all missing constructors together as a -/// group. +/// `present` is morally the set of constructors present in the column, and `missing` is the set of +/// constructors that exist in the type but are not present in the column. /// -/// However we can go further: since any constructor will match the wildcard rows, and having more -/// rows can only reduce the amount of usefulness witnesses, we can skip the present constructors -/// and only try the missing ones. -/// This will not preserve the whole list of witnesses, but will preserve whether the list is empty -/// or not. In fact this is quite natural from the point of view of diagnostics too. This is done -/// in `to_ctors`: in some cases we only return `Missing`. -#[derive(Debug)] -pub(super) struct SplitWildcard<'tcx> { - /// Constructors (other than wildcards and opaques) seen in the matrix. - matrix_ctors: Vec>, - /// All the constructors for this type - all_ctors: SmallVec<[Constructor<'tcx>; 1]>, +/// More formally, they respect the following constraints: +/// - the union of `present` and `missing` covers the whole type +/// - `present` and `missing` are disjoint +/// - neither contains wildcards +/// - each constructor in `present` is covered by some non-wildcard constructor in the column +/// - together, the constructors in `present` cover all the non-wildcard constructor in the column +/// - non-wildcards in the column do no cover anything in `missing` +/// - constructors in `present` and `missing` are split for the column; in other words, they are +/// either fully included in or disjoint from each constructor in the column. This avoids +/// non-trivial intersections like between `0..10` and `5..15`. +struct SplitConstructorSet<'tcx> { + present: SmallVec<[Constructor<'tcx>; 1]>, + missing: Vec>, + /// For the `non_exhaustive_omitted_patterns` lint. + nonexhaustive_enum_missing_real_variants: bool, } -impl<'tcx> SplitWildcard<'tcx> { - pub(super) fn new<'p>(pcx: &PatCtxt<'_, 'p, 'tcx>) -> Self { - debug!("SplitWildcard::new({:?})", pcx.ty); - let cx = pcx.cx; - let make_range = |start, end| { - IntRange( - // `unwrap()` is ok because we know the type is an integer. - IntRange::from_range(cx.tcx, start, end, pcx.ty, RangeEnd::Included), - ) - }; - // This determines the set of all possible constructors for the type `pcx.ty`. For numbers, +impl ConstructorSet { + pub(super) fn for_ty<'p, 'tcx>(cx: &MatchCheckCtxt<'p, 'tcx>, ty: Ty<'tcx>) -> Self { + debug!("ConstructorSet::for_ty({:?})", ty); + let make_range = + |start, end| IntRange::from_range(cx.tcx, start, end, ty, RangeEnd::Included); + // This determines the set of all possible constructors for the type `ty`. For numbers, // arrays and slices we use ranges and variable-length slices when appropriate. // // If the `exhaustive_patterns` feature is enabled, we make sure to omit constructors that // are statically impossible. E.g., for `Option`, we do not include `Some(_)` in the // returned list of constructors. - // Invariant: this is empty if and only if the type is uninhabited (as determined by + // Invariant: this is `Uninhabited` if and only if the type is uninhabited (as determined by // `cx.is_uninhabited()`). - let all_ctors = match pcx.ty.kind() { - ty::Bool => smallvec![make_range(0, 1)], + match ty.kind() { + ty::Bool => { + Self::Integers { range_1: make_range(0, 1), range_2: None, non_exhaustive: false } + } + ty::Char => { + // The valid Unicode Scalar Value ranges. + Self::Integers { + range_1: make_range('\u{0000}' as u128, '\u{D7FF}' as u128), + range_2: Some(make_range('\u{E000}' as u128, '\u{10FFFF}' as u128)), + non_exhaustive: false, + } + } + &ty::Int(ity) => { + // `usize`/`isize` are not allowed to be matched exhaustively unless the + // `precise_pointer_size_matching` feature is enabled. + let non_exhaustive = + ty.is_ptr_sized_integral() && !cx.tcx.features().precise_pointer_size_matching; + let bits = Integer::from_int_ty(&cx.tcx, ity).size().bits() as u128; + let min = 1u128 << (bits - 1); + let max = min - 1; + Self::Integers { range_1: make_range(min, max), non_exhaustive, range_2: None } + } + &ty::Uint(uty) => { + // `usize`/`isize` are not allowed to be matched exhaustively unless the + // `precise_pointer_size_matching` feature is enabled. + let non_exhaustive = + ty.is_ptr_sized_integral() && !cx.tcx.features().precise_pointer_size_matching; + let size = Integer::from_uint_ty(&cx.tcx, uty).size(); + let max = size.truncate(u128::MAX); + Self::Integers { range_1: make_range(0, max), non_exhaustive, range_2: None } + } ty::Array(sub_ty, len) if len.try_eval_target_usize(cx.tcx, cx.param_env).is_some() => { let len = len.eval_target_usize(cx.tcx, cx.param_env) as usize; if len != 0 && cx.is_uninhabited(*sub_ty) { - smallvec![] + Self::Uninhabited } else { - smallvec![Slice(Slice::new(Some(len), VarLen(0, 0)))] + Self::Slice(Some(len)) } } // Treat arrays of a constant but unknown length like slices. ty::Array(sub_ty, _) | ty::Slice(sub_ty) => { - let kind = if cx.is_uninhabited(*sub_ty) { FixedLen(0) } else { VarLen(0, 0) }; - smallvec![Slice(Slice::new(None, kind))] + if cx.is_uninhabited(*sub_ty) { + Self::SliceOfEmpty + } else { + Self::Slice(None) + } } ty::Adt(def, args) if def.is_enum() => { // If the enum is declared as `#[non_exhaustive]`, we treat it as if it had an @@ -863,19 +995,14 @@ impl<'tcx> SplitWildcard<'tcx> { // // we don't want to show every possible IO error, but instead have only `_` as the // witness. - let is_declared_nonexhaustive = cx.is_foreign_non_exhaustive_enum(pcx.ty); - - let is_exhaustive_pat_feature = cx.tcx.features().exhaustive_patterns; - - // If `exhaustive_patterns` is disabled and our scrutinee is an empty enum, we treat it - // as though it had an "unknown" constructor to avoid exposing its emptiness. The - // exception is if the pattern is at the top level, because we want empty matches to be - // considered exhaustive. - let is_secretly_empty = - def.variants().is_empty() && !is_exhaustive_pat_feature && !pcx.is_top_level; + let is_declared_nonexhaustive = cx.is_foreign_non_exhaustive_enum(ty); - let mut ctors: SmallVec<[_; 1]> = - def.variants() + if def.variants().is_empty() && !is_declared_nonexhaustive { + Self::Uninhabited + } else { + let is_exhaustive_pat_feature = cx.tcx.features().exhaustive_patterns; + let variants: Vec<_> = def + .variants() .iter_enumerated() .filter(|(_, v)| { // If `exhaustive_patterns` is enabled, we exclude variants known to be @@ -885,135 +1012,150 @@ impl<'tcx> SplitWildcard<'tcx> { .instantiate(cx.tcx, args) .apply(cx.tcx, cx.param_env, cx.module) }) - .map(|(idx, _)| Variant(idx)) + .map(|(idx, _)| idx) .collect(); - if is_secretly_empty || is_declared_nonexhaustive { - ctors.push(NonExhaustive); + Self::Variants { variants, non_exhaustive: is_declared_nonexhaustive } } - ctors - } - ty::Char => { - smallvec![ - // The valid Unicode Scalar Value ranges. - make_range('\u{0000}' as u128, '\u{D7FF}' as u128), - make_range('\u{E000}' as u128, '\u{10FFFF}' as u128), - ] - } - ty::Int(_) | ty::Uint(_) - if pcx.ty.is_ptr_sized_integral() - && !cx.tcx.features().precise_pointer_size_matching => - { - // `usize`/`isize` are not allowed to be matched exhaustively unless the - // `precise_pointer_size_matching` feature is enabled. So we treat those types like - // `#[non_exhaustive]` enums by returning a special unmatchable constructor. - smallvec![NonExhaustive] } - &ty::Int(ity) => { - let bits = Integer::from_int_ty(&cx.tcx, ity).size().bits() as u128; - let min = 1u128 << (bits - 1); - let max = min - 1; - smallvec![make_range(min, max)] - } - &ty::Uint(uty) => { - let size = Integer::from_uint_ty(&cx.tcx, uty).size(); - let max = size.truncate(u128::MAX); - smallvec![make_range(0, max)] - } - // If `exhaustive_patterns` is disabled and our scrutinee is the never type, we cannot - // expose its emptiness. The exception is if the pattern is at the top level, because we - // want empty matches to be considered exhaustive. - ty::Never if !cx.tcx.features().exhaustive_patterns && !pcx.is_top_level => { - smallvec![NonExhaustive] - } - ty::Never => smallvec![], - _ if cx.is_uninhabited(pcx.ty) => smallvec![], - ty::Adt(..) | ty::Tuple(..) | ty::Ref(..) => smallvec![Single], + ty::Never => Self::Uninhabited, + _ if cx.is_uninhabited(ty) => Self::Uninhabited, + ty::Adt(..) | ty::Tuple(..) | ty::Ref(..) => Self::Single, // This type is one for which we cannot list constructors, like `str` or `f64`. - _ => smallvec![NonExhaustive], - }; - - SplitWildcard { matrix_ctors: Vec::new(), all_ctors } + _ => Self::Unlistable, + } } - /// Pass a set of constructors relative to which to split this one. Don't call twice, it won't - /// do what you want. - pub(super) fn split<'a>( - &mut self, + /// This is the core logical operation of exhaustiveness checking. This analyzes a column a + /// constructors to 1/ determine which constructors of the type (if any) are missing; 2/ split + /// constructors to handle non-trivial intersections e.g. on ranges or slices. + fn split<'a, 'tcx>( + &self, pcx: &PatCtxt<'_, '_, 'tcx>, ctors: impl Iterator> + Clone, - ) where + ) -> SplitConstructorSet<'tcx> + where 'tcx: 'a, { - // Since `all_ctors` never contains wildcards, this won't recurse further. - self.all_ctors = - self.all_ctors.iter().flat_map(|ctor| ctor.split(pcx, ctors.clone())).collect(); - self.matrix_ctors = ctors.filter(|c| !matches!(c, Wildcard | Opaque)).cloned().collect(); - } - - /// Whether there are any value constructors for this type that are not present in the matrix. - fn any_missing(&self, pcx: &PatCtxt<'_, '_, 'tcx>) -> bool { - self.iter_missing(pcx).next().is_some() - } + let mut missing = Vec::new(); + let mut present: SmallVec<[_; 1]> = SmallVec::new(); + // Constructors in `ctors`, except wildcards. + let mut seen = Vec::new(); + for ctor in ctors.cloned() { + match ctor { + // Wildcards in `ctors` are irrelevant to splitting + Opaque | Wildcard => {} + _ => { + seen.push(ctor); + } + } + } + let mut nonexhaustive_enum_missing_real_variants = false; + match self { + ConstructorSet::Single => { + if seen.is_empty() { + missing.push(Single); + } else { + present.push(Single); + } + } + ConstructorSet::Variants { variants, non_exhaustive } => { + let seen_set: FxHashSet<_> = seen.iter().map(|c| c.as_variant().unwrap()).collect(); + let mut skipped_a_hidden_variant = false; + for variant in variants { + let ctor = Variant(*variant); + if seen_set.contains(&variant) { + present.push(ctor); + } else if ctor.is_doc_hidden_variant(pcx) || ctor.is_unstable_variant(pcx) { + // We don't want to mention any variants that are `doc(hidden)` or behind an + // unstable feature gate if they aren't present in the match. + skipped_a_hidden_variant = true; + } else { + missing.push(ctor); + } + } - /// Iterate over the constructors for this type that are not present in the matrix. - pub(super) fn iter_missing<'a, 'p>( - &'a self, - pcx: &'a PatCtxt<'a, 'p, 'tcx>, - ) -> impl Iterator> + Captures<'p> { - self.all_ctors.iter().filter(move |ctor| !ctor.is_covered_by_any(pcx, &self.matrix_ctors)) - } + if *non_exhaustive { + nonexhaustive_enum_missing_real_variants = !missing.is_empty(); + missing.push(NonExhaustive); + } else if skipped_a_hidden_variant { + // FIXME(Nadrieril): This represents the skipped variants, but isn't super + // clean. Using `NonExhaustive` breaks things elsewhere. + missing.push(Wildcard); + } + } + ConstructorSet::Integers { range_1, range_2, non_exhaustive } => { + let seen_ranges = seen.iter().map(|ctor| ctor.as_int_range().unwrap()).cloned(); + for (seen, splitted_range) in range_1.split(seen_ranges.clone()) { + match seen { + Presence::Unseen => missing.push(IntRange(splitted_range)), + Presence::Seen => present.push(IntRange(splitted_range)), + } + } + if let Some(range_2) = range_2 { + for (seen, splitted_range) in range_2.split(seen_ranges) { + match seen { + Presence::Unseen => missing.push(IntRange(splitted_range)), + Presence::Seen => present.push(IntRange(splitted_range)), + } + } + } - /// Return the set of constructors resulting from splitting the wildcard. As explained at the - /// top of the file, if any constructors are missing we can ignore the present ones. - fn into_ctors(self, pcx: &PatCtxt<'_, '_, 'tcx>) -> SmallVec<[Constructor<'tcx>; 1]> { - if self.any_missing(pcx) { - // Some constructors are missing, thus we can specialize with the special `Missing` - // constructor, which stands for those constructors that are not seen in the matrix, - // and matches the same rows as any of them (namely the wildcard rows). See the top of - // the file for details. - // However, when all constructors are missing we can also specialize with the full - // `Wildcard` constructor. The difference will depend on what we want in diagnostics. - - // If some constructors are missing, we typically want to report those constructors, - // e.g.: - // ``` - // enum Direction { N, S, E, W } - // let Direction::N = ...; - // ``` - // we can report 3 witnesses: `S`, `E`, and `W`. - // - // However, if the user didn't actually specify a constructor - // in this arm, e.g., in - // ``` - // let x: (Direction, Direction, bool) = ...; - // let (_, _, false) = x; - // ``` - // we don't want to show all 16 possible witnesses `(, , - // true)` - we are satisfied with `(_, _, true)`. So if all constructors are missing we - // prefer to report just a wildcard `_`. - // - // The exception is: if we are at the top-level, for example in an empty match, we - // sometimes prefer reporting the list of constructors instead of just `_`. - let report_when_all_missing = pcx.is_top_level && !IntRange::is_integral(pcx.ty); - let ctor = if !self.matrix_ctors.is_empty() || report_when_all_missing { - if pcx.is_non_exhaustive { - Missing { - nonexhaustive_enum_missing_real_variants: self - .iter_missing(pcx) - .any(|c| !(c.is_non_exhaustive() || c.is_unstable_variant(pcx))), + if *non_exhaustive { + missing.push(NonExhaustive); + } + } + &ConstructorSet::Slice(array_len) => { + let seen_slices = seen.iter().map(|c| c.as_slice().unwrap()); + let base_slice = Slice { kind: VarLen(0, 0), array_len }; + for (seen, splitted_slice) in base_slice.split(seen_slices) { + let ctor = Slice(splitted_slice); + match seen { + Presence::Unseen => missing.push(ctor), + Presence::Seen => present.push(ctor), } + } + } + ConstructorSet::SliceOfEmpty => { + // Behaves essentially like `Single`. + let slice = Slice(Slice::new(None, FixedLen(0))); + if seen.is_empty() { + missing.push(slice); } else { - Missing { nonexhaustive_enum_missing_real_variants: false } + present.push(slice); } - } else { - Wildcard - }; - return smallvec![ctor]; + } + ConstructorSet::Unlistable => { + // Since we can't list constructors, we take the ones in the column. This might list + // some constructors several times but there's not much we can do. + present.extend(seen.iter().cloned()); + missing.push(NonExhaustive); + } + // If `exhaustive_patterns` is disabled and our scrutinee is an empty type, we cannot + // expose its emptiness. The exception is if the pattern is at the top level, because we + // want empty matches to be considered exhaustive. + ConstructorSet::Uninhabited + if !pcx.cx.tcx.features().exhaustive_patterns && !pcx.is_top_level => + { + missing.push(NonExhaustive); + } + ConstructorSet::Uninhabited => {} } - // All the constructors are present in the matrix, so we just go through them all. - self.all_ctors + SplitConstructorSet { present, missing, nonexhaustive_enum_missing_real_variants } + } + + /// Compute the set of constructors missing from this column. + /// This is only used for reporting to the user. + pub(super) fn compute_missing<'a, 'tcx>( + &self, + pcx: &PatCtxt<'_, '_, 'tcx>, + ctors: impl Iterator> + Clone, + ) -> Vec> + where + 'tcx: 'a, + { + self.split(pcx, ctors).missing } } diff --git a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs index 21031e8ba9d7e..68ca0e2ac0466 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs @@ -307,7 +307,7 @@ use self::ArmType::*; use self::Usefulness::*; -use super::deconstruct_pat::{Constructor, DeconstructedPat, Fields, SplitWildcard}; +use super::deconstruct_pat::{Constructor, ConstructorSet, DeconstructedPat, Fields}; use crate::errors::{NonExhaustiveOmittedPattern, Uncovered}; use rustc_data_structures::captures::Captures; @@ -368,8 +368,6 @@ pub(super) struct PatCtxt<'a, 'p, 'tcx> { /// Whether the current pattern is the whole pattern as found in a match arm, or if it's a /// subpattern. pub(super) is_top_level: bool, - /// Whether the current pattern is from a `non_exhaustive` enum. - pub(super) is_non_exhaustive: bool, } impl<'a, 'p, 'tcx> fmt::Debug for PatCtxt<'a, 'p, 'tcx> { @@ -616,62 +614,41 @@ impl<'p, 'tcx> Usefulness<'p, 'tcx> { WithWitnesses(ref witnesses) if witnesses.is_empty() => self, WithWitnesses(witnesses) => { let new_witnesses = if let Constructor::Missing { .. } = ctor { + let mut missing = ConstructorSet::for_ty(pcx.cx, pcx.ty) + .compute_missing(pcx, matrix.heads().map(DeconstructedPat::ctor)); + if missing.iter().any(|c| c.is_non_exhaustive()) { + // We only report `_` here; listing other constructors would be redundant. + missing = vec![Constructor::NonExhaustive]; + } + // We got the special `Missing` constructor, so each of the missing constructors - // gives a new pattern that is not caught by the match. We list those patterns. - if pcx.is_non_exhaustive { - witnesses - .into_iter() - // Here we don't want the user to try to list all variants, we want them to add - // a wildcard, so we only suggest that. - .map(|witness| { - witness.apply_constructor(pcx, &Constructor::NonExhaustive) - }) - .collect() - } else { - let mut split_wildcard = SplitWildcard::new(pcx); - split_wildcard.split(pcx, matrix.heads().map(DeconstructedPat::ctor)); - - // This lets us know if we skipped any variants because they are marked - // `doc(hidden)` or they are unstable feature gate (only stdlib types). - let mut hide_variant_show_wild = false; - // Construct for each missing constructor a "wild" version of this - // constructor, that matches everything that can be built with - // it. For example, if `ctor` is a `Constructor::Variant` for - // `Option::Some`, we get the pattern `Some(_)`. - let mut new_patterns: Vec> = split_wildcard - .iter_missing(pcx) - .filter_map(|missing_ctor| { - // Check if this variant is marked `doc(hidden)` - if missing_ctor.is_doc_hidden_variant(pcx) - || missing_ctor.is_unstable_variant(pcx) - { - hide_variant_show_wild = true; - return None; - } - Some(DeconstructedPat::wild_from_ctor(pcx, missing_ctor.clone())) - }) - .collect(); - - if hide_variant_show_wild { - new_patterns.push(DeconstructedPat::wildcard(pcx.ty, pcx.span)); - } - - witnesses - .into_iter() - .flat_map(|witness| { - new_patterns.iter().map(move |pat| { - Witness( - witness - .0 - .iter() - .chain(once(pat)) - .map(DeconstructedPat::clone_and_forget_reachability) - .collect(), - ) - }) + // gives a new pattern that is not caught by the match. + // We construct for each missing constructor a version of this constructor with + // wildcards for fields, i.e. that matches everything that can be built with it. + // For example, if `ctor` is a `Constructor::Variant` for `Option::Some`, we get + // the pattern `Some(_)`. + let new_patterns: Vec> = missing + .into_iter() + .map(|missing_ctor| { + DeconstructedPat::wild_from_ctor(pcx, missing_ctor.clone()) + }) + .collect(); + + witnesses + .into_iter() + .flat_map(|witness| { + new_patterns.iter().map(move |pat| { + Witness( + witness + .0 + .iter() + .chain(once(pat)) + .map(DeconstructedPat::clone_and_forget_reachability) + .collect(), + ) }) - .collect() - } + }) + .collect() } else { witnesses .into_iter() @@ -844,9 +821,8 @@ fn is_useful<'p, 'tcx>( ty = row.head().ty(); } } - let is_non_exhaustive = cx.is_foreign_non_exhaustive_enum(ty); debug!("v.head: {:?}, v.span: {:?}", v.head(), v.head().span()); - let pcx = &PatCtxt { cx, ty, span: v.head().span(), is_top_level, is_non_exhaustive }; + let pcx = &PatCtxt { cx, ty, span: v.head().span(), is_top_level }; let v_ctor = v.head().ctor(); debug!(?v_ctor); @@ -861,7 +837,8 @@ fn is_useful<'p, 'tcx>( } // We split the head constructor of `v`. let split_ctors = v_ctor.split(pcx, matrix.heads().map(DeconstructedPat::ctor)); - let is_non_exhaustive_and_wild = is_non_exhaustive && v_ctor.is_wildcard(); + let is_non_exhaustive_and_wild = + cx.is_foreign_non_exhaustive_enum(ty) && v_ctor.is_wildcard(); // For each constructor, we compute whether there's a value that starts with it that would // witness the usefulness of `v`. let start_matrix = &matrix; @@ -898,24 +875,21 @@ fn is_useful<'p, 'tcx>( Constructor::Missing { nonexhaustive_enum_missing_real_variants: true } ) { - let patterns = { - let mut split_wildcard = SplitWildcard::new(pcx); - split_wildcard.split(pcx, matrix.heads().map(DeconstructedPat::ctor)); - // Construct for each missing constructor a "wild" version of this - // constructor, that matches everything that can be built with - // it. For example, if `ctor` is a `Constructor::Variant` for - // `Option::Some`, we get the pattern `Some(_)`. - split_wildcard - .iter_missing(pcx) - // Filter out the `NonExhaustive` because we want to list only real - // variants. Also remove any unstable feature gated variants. - // Because of how we computed `nonexhaustive_enum_missing_real_variants`, - // this will not return an empty `Vec`. - .filter(|c| !(c.is_non_exhaustive() || c.is_unstable_variant(pcx))) - .cloned() - .map(|missing_ctor| DeconstructedPat::wild_from_ctor(pcx, missing_ctor)) - .collect::>() - }; + let missing = ConstructorSet::for_ty(pcx.cx, pcx.ty) + .compute_missing(pcx, matrix.heads().map(DeconstructedPat::ctor)); + // Construct for each missing constructor a "wild" version of this + // constructor, that matches everything that can be built with + // it. For example, if `ctor` is a `Constructor::Variant` for + // `Option::Some`, we get the pattern `Some(_)`. + let patterns = missing + .into_iter() + // Filter out the `NonExhaustive` because we want to list only real + // variants. Also remove any unstable feature gated variants. + // Because of how we computed `nonexhaustive_enum_missing_real_variants`, + // this will not return an empty `Vec`. + .filter(|c| !(c.is_non_exhaustive() || c.is_unstable_variant(pcx))) + .map(|missing_ctor| DeconstructedPat::wild_from_ctor(pcx, missing_ctor)) + .collect::>(); // Report that a match of a `non_exhaustive` enum marked with `non_exhaustive_omitted_patterns` // is not exhaustive enough. From 2f4cab4d214dd8a5e0b3e90d6e517aa97bd94fd9 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Tue, 3 Oct 2023 17:09:20 +0200 Subject: [PATCH 5/8] Clarify handling of hidden variants --- .../src/thir/pattern/deconstruct_pat.rs | 135 +++++++++--------- .../src/thir/pattern/usefulness.rs | 15 +- 2 files changed, 76 insertions(+), 74 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index ab30f5109c77d..2a3fd416a5281 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -609,19 +609,23 @@ pub(super) enum Constructor<'tcx> { /// boxes for the purposes of exhaustiveness: we must not inspect them, and they /// don't count towards making a match exhaustive. Opaque, + /// Or-pattern. + Or, + /// Wildcard pattern. + Wildcard, /// Fake extra constructor for enums that aren't allowed to be matched exhaustively. Also used /// for those types for which we cannot list constructors explicitly, like `f64` and `str`. NonExhaustive, - /// Stands for constructors that are not seen in the matrix, as explained in the code for - /// [`Constructor::split`]. The carried `bool` is used for the `non_exhaustive_omitted_patterns` - /// lint. + /// Fake extra constructor for variants that should not be mentioned in diagnostics. + /// We use this for variants behind an unstable gate as well as + /// `#[doc(hidden)]` ones. + Hidden, + /// Fake extra constructor for constructors that are not seen in the matrix, as explained in the + /// code for [`Constructor::split`]. The carried `bool` is used for the + /// `non_exhaustive_omitted_patterns` lint. Missing { - nonexhaustive_enum_missing_real_variants: bool, + nonexhaustive_enum_missing_visible_variants: bool, }, - /// Wildcard pattern. - Wildcard, - /// Or-pattern. - Or, } impl<'tcx> Constructor<'tcx> { @@ -652,32 +656,6 @@ impl<'tcx> Constructor<'tcx> { } } - /// Checks if the `Constructor` is a variant and `TyCtxt::eval_stability` returns - /// `EvalResult::Deny { .. }`. - /// - /// This means that the variant has a stdlib unstable feature marking it. - pub(super) fn is_unstable_variant(&self, pcx: &PatCtxt<'_, '_, 'tcx>) -> bool { - if let Constructor::Variant(idx) = self && let ty::Adt(adt, _) = pcx.ty.kind() { - let variant_def_id = adt.variant(*idx).def_id; - // Filter variants that depend on a disabled unstable feature. - return matches!( - pcx.cx.tcx.eval_stability(variant_def_id, None, DUMMY_SP, None), - EvalResult::Deny { .. } - ); - } - false - } - - /// Checks if the `Constructor` is a `Constructor::Variant` with a `#[doc(hidden)]` - /// attribute from a type not local to the current crate. - pub(super) fn is_doc_hidden_variant(&self, pcx: &PatCtxt<'_, '_, 'tcx>) -> bool { - if let Constructor::Variant(idx) = self && let ty::Adt(adt, _) = pcx.ty.kind() { - let variant_def_id = adt.variants()[*idx].def_id; - return pcx.cx.tcx.is_doc_hidden(variant_def_id) && !variant_def_id.is_local(); - } - false - } - fn variant_index_for_adt(&self, adt: ty::AdtDef<'tcx>) -> VariantIdx { match *self { Variant(idx) => idx, @@ -713,8 +691,9 @@ impl<'tcx> Constructor<'tcx> { | F32Range(..) | F64Range(..) | IntRange(..) - | NonExhaustive | Opaque + | NonExhaustive + | Hidden | Missing { .. } | Wildcard => 0, Or => bug!("The `Or` constructor doesn't have a fixed arity"), @@ -795,8 +774,8 @@ impl<'tcx> Constructor<'tcx> { Wildcard } else { Missing { - nonexhaustive_enum_missing_real_variants: split_set - .nonexhaustive_enum_missing_real_variants, + nonexhaustive_enum_missing_visible_variants: split_set + .nonexhaustive_enum_missing_visible_variants, } }; smallvec![ctor] @@ -828,8 +807,8 @@ impl<'tcx> Constructor<'tcx> { match (self, other) { // Wildcards cover anything (_, Wildcard) => true, - // The missing ctors are not covered by anything in the matrix except wildcards. - (Missing { .. } | Wildcard, _) => false, + // Only a wildcard pattern can match these special constructors. + (Wildcard | Missing { .. } | NonExhaustive | Hidden, _) => false, (Single, Single) => true, (Variant(self_id), Variant(other_id)) => self_id == other_id, @@ -860,8 +839,6 @@ impl<'tcx> Constructor<'tcx> { // We are trying to inspect an opaque constant. Thus we skip the row. (Opaque, _) | (_, Opaque) => false, - // Only a wildcard pattern can match the special extra constructor. - (NonExhaustive, _) => false, _ => span_bug!( pcx.span, @@ -878,7 +855,14 @@ pub(super) enum ConstructorSet { /// The type has a single constructor, e.g. `&T` or a struct. Single, /// This type has the following list of constructors. - Variants { variants: Vec, non_exhaustive: bool }, + /// Some variants are hidden, which means they won't be mentioned in diagnostics unless the user + /// mentioned them first. We use this for variants behind an unstable gate as well as + /// `#[doc(hidden)]` ones. + Variants { + visible_variants: Vec, + hidden_variants: Vec, + non_exhaustive: bool, + }, /// The type is spanned by integer values. The range or ranges give the set of allowed values. /// The second range is only useful for `char`. /// This is reused for bool. FIXME: don't. @@ -915,7 +899,7 @@ struct SplitConstructorSet<'tcx> { present: SmallVec<[Constructor<'tcx>; 1]>, missing: Vec>, /// For the `non_exhaustive_omitted_patterns` lint. - nonexhaustive_enum_missing_real_variants: bool, + nonexhaustive_enum_missing_visible_variants: bool, } impl ConstructorSet { @@ -1001,7 +985,7 @@ impl ConstructorSet { Self::Uninhabited } else { let is_exhaustive_pat_feature = cx.tcx.features().exhaustive_patterns; - let variants: Vec<_> = def + let (hidden_variants, visible_variants) = def .variants() .iter_enumerated() .filter(|(_, v)| { @@ -1013,9 +997,24 @@ impl ConstructorSet { .apply(cx.tcx, cx.param_env, cx.module) }) .map(|(idx, _)| idx) - .collect(); - - Self::Variants { variants, non_exhaustive: is_declared_nonexhaustive } + .partition(|idx| { + let variant_def_id = def.variant(*idx).def_id; + // Filter variants that depend on a disabled unstable feature. + let is_unstable = matches!( + cx.tcx.eval_stability(variant_def_id, None, DUMMY_SP, None), + EvalResult::Deny { .. } + ); + // Filter foreign `#[doc(hidden)]` variants. + let is_doc_hidden = + cx.tcx.is_doc_hidden(variant_def_id) && !variant_def_id.is_local(); + is_unstable || is_doc_hidden + }); + + Self::Variants { + visible_variants, + hidden_variants, + non_exhaustive: is_declared_nonexhaustive, + } } } ty::Never => Self::Uninhabited, @@ -1050,7 +1049,7 @@ impl ConstructorSet { } } } - let mut nonexhaustive_enum_missing_real_variants = false; + let mut nonexhaustive_enum_missing_visible_variants = false; match self { ConstructorSet::Single => { if seen.is_empty() { @@ -1059,29 +1058,34 @@ impl ConstructorSet { present.push(Single); } } - ConstructorSet::Variants { variants, non_exhaustive } => { + ConstructorSet::Variants { visible_variants, hidden_variants, non_exhaustive } => { let seen_set: FxHashSet<_> = seen.iter().map(|c| c.as_variant().unwrap()).collect(); let mut skipped_a_hidden_variant = false; - for variant in variants { + for variant in visible_variants { let ctor = Variant(*variant); if seen_set.contains(&variant) { present.push(ctor); - } else if ctor.is_doc_hidden_variant(pcx) || ctor.is_unstable_variant(pcx) { - // We don't want to mention any variants that are `doc(hidden)` or behind an - // unstable feature gate if they aren't present in the match. - skipped_a_hidden_variant = true; } else { missing.push(ctor); } } + nonexhaustive_enum_missing_visible_variants = + *non_exhaustive && !missing.is_empty(); + + for variant in hidden_variants { + let ctor = Variant(*variant); + if seen_set.contains(&variant) { + present.push(ctor); + } else { + skipped_a_hidden_variant = true; + } + } + if skipped_a_hidden_variant { + missing.push(Hidden); + } if *non_exhaustive { - nonexhaustive_enum_missing_real_variants = !missing.is_empty(); missing.push(NonExhaustive); - } else if skipped_a_hidden_variant { - // FIXME(Nadrieril): This represents the skipped variants, but isn't super - // clean. Using `NonExhaustive` breaks things elsewhere. - missing.push(Wildcard); } } ConstructorSet::Integers { range_1, range_2, non_exhaustive } => { @@ -1142,7 +1146,7 @@ impl ConstructorSet { ConstructorSet::Uninhabited => {} } - SplitConstructorSet { present, missing, nonexhaustive_enum_missing_real_variants } + SplitConstructorSet { present, missing, nonexhaustive_enum_missing_visible_variants } } /// Compute the set of constructors missing from this column. @@ -1272,8 +1276,9 @@ impl<'p, 'tcx> Fields<'p, 'tcx> { | F32Range(..) | F64Range(..) | IntRange(..) - | NonExhaustive | Opaque + | NonExhaustive + | Hidden | Missing { .. } | Wildcard => Fields::empty(), Or => { @@ -1587,7 +1592,7 @@ impl<'p, 'tcx> DeconstructedPat<'p, 'tcx> { } &Str(value) => PatKind::Constant { value }, IntRange(range) => return range.to_pat(cx.tcx, self.ty), - Wildcard | NonExhaustive => PatKind::Wild, + Wildcard | NonExhaustive | Hidden => PatKind::Wild, Missing { .. } => bug!( "trying to convert a `Missing` constructor into a `Pat`; this is probably a bug, `Missing` should have been processed in `apply_constructors`" @@ -1770,15 +1775,15 @@ impl<'p, 'tcx> fmt::Debug for DeconstructedPat<'p, 'tcx> { F32Range(lo, hi, end) => write!(f, "{lo}{end}{hi}"), F64Range(lo, hi, end) => write!(f, "{lo}{end}{hi}"), IntRange(range) => write!(f, "{range:?}"), // Best-effort, will render e.g. `false` as `0..=0` - Wildcard | Missing { .. } | NonExhaustive => write!(f, "_ : {:?}", self.ty), + Str(value) => write!(f, "{value}"), + Opaque => write!(f, ""), Or => { for pat in self.iter_fields() { write!(f, "{}{:?}", start_or_continue(" | "), pat)?; } Ok(()) } - Str(value) => write!(f, "{value}"), - Opaque => write!(f, ""), + Wildcard | Missing { .. } | NonExhaustive | Hidden => write!(f, "_ : {:?}", self.ty), } } } diff --git a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs index 68ca0e2ac0466..6cd73c7eaa958 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/usefulness.rs @@ -872,22 +872,19 @@ fn is_useful<'p, 'tcx>( && usefulness.is_useful() && matches!(witness_preference, RealArm) && matches!( &ctor, - Constructor::Missing { nonexhaustive_enum_missing_real_variants: true } + Constructor::Missing { nonexhaustive_enum_missing_visible_variants: true } ) { let missing = ConstructorSet::for_ty(pcx.cx, pcx.ty) .compute_missing(pcx, matrix.heads().map(DeconstructedPat::ctor)); - // Construct for each missing constructor a "wild" version of this - // constructor, that matches everything that can be built with - // it. For example, if `ctor` is a `Constructor::Variant` for - // `Option::Some`, we get the pattern `Some(_)`. + // Construct for each missing constructor a "wild" version of this constructor, that + // matches everything that can be built with it. For example, if `ctor` is a + // `Constructor::Variant` for `Option::Some`, we get the pattern `Some(_)`. let patterns = missing .into_iter() - // Filter out the `NonExhaustive` because we want to list only real - // variants. Also remove any unstable feature gated variants. - // Because of how we computed `nonexhaustive_enum_missing_real_variants`, + // Because of how we computed `nonexhaustive_enum_missing_visible_variants`, // this will not return an empty `Vec`. - .filter(|c| !(c.is_non_exhaustive() || c.is_unstable_variant(pcx))) + .filter(|c| !(matches!(c, Constructor::NonExhaustive | Constructor::Hidden))) .map(|missing_ctor| DeconstructedPat::wild_from_ctor(pcx, missing_ctor)) .collect::>(); From fda0301b3395ebc6d33d9da566027010b37e1adc Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Tue, 3 Oct 2023 19:51:18 +0200 Subject: [PATCH 6/8] Don't collect `seen` if not needed --- .../src/thir/pattern/deconstruct_pat.rs | 30 +++++++------------ 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index 2a3fd416a5281..a2bbde250c3d1 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -1036,30 +1036,21 @@ impl ConstructorSet { where 'tcx: 'a, { - let mut missing = Vec::new(); let mut present: SmallVec<[_; 1]> = SmallVec::new(); + let mut missing = Vec::new(); // Constructors in `ctors`, except wildcards. - let mut seen = Vec::new(); - for ctor in ctors.cloned() { - match ctor { - // Wildcards in `ctors` are irrelevant to splitting - Opaque | Wildcard => {} - _ => { - seen.push(ctor); - } - } - } + let mut seen = ctors.filter(|c| !(matches!(c, Opaque | Wildcard))); let mut nonexhaustive_enum_missing_visible_variants = false; match self { ConstructorSet::Single => { - if seen.is_empty() { + if seen.next().is_none() { missing.push(Single); } else { present.push(Single); } } ConstructorSet::Variants { visible_variants, hidden_variants, non_exhaustive } => { - let seen_set: FxHashSet<_> = seen.iter().map(|c| c.as_variant().unwrap()).collect(); + let seen_set: FxHashSet<_> = seen.map(|c| c.as_variant().unwrap()).collect(); let mut skipped_a_hidden_variant = false; for variant in visible_variants { let ctor = Variant(*variant); @@ -1089,15 +1080,16 @@ impl ConstructorSet { } } ConstructorSet::Integers { range_1, range_2, non_exhaustive } => { - let seen_ranges = seen.iter().map(|ctor| ctor.as_int_range().unwrap()).cloned(); - for (seen, splitted_range) in range_1.split(seen_ranges.clone()) { + let seen_ranges: Vec<_> = + seen.map(|ctor| ctor.as_int_range().unwrap().clone()).collect(); + for (seen, splitted_range) in range_1.split(seen_ranges.iter().cloned()) { match seen { Presence::Unseen => missing.push(IntRange(splitted_range)), Presence::Seen => present.push(IntRange(splitted_range)), } } if let Some(range_2) = range_2 { - for (seen, splitted_range) in range_2.split(seen_ranges) { + for (seen, splitted_range) in range_2.split(seen_ranges.into_iter()) { match seen { Presence::Unseen => missing.push(IntRange(splitted_range)), Presence::Seen => present.push(IntRange(splitted_range)), @@ -1110,7 +1102,7 @@ impl ConstructorSet { } } &ConstructorSet::Slice(array_len) => { - let seen_slices = seen.iter().map(|c| c.as_slice().unwrap()); + let seen_slices = seen.map(|c| c.as_slice().unwrap()); let base_slice = Slice { kind: VarLen(0, 0), array_len }; for (seen, splitted_slice) in base_slice.split(seen_slices) { let ctor = Slice(splitted_slice); @@ -1123,7 +1115,7 @@ impl ConstructorSet { ConstructorSet::SliceOfEmpty => { // Behaves essentially like `Single`. let slice = Slice(Slice::new(None, FixedLen(0))); - if seen.is_empty() { + if seen.next().is_none() { missing.push(slice); } else { present.push(slice); @@ -1132,7 +1124,7 @@ impl ConstructorSet { ConstructorSet::Unlistable => { // Since we can't list constructors, we take the ones in the column. This might list // some constructors several times but there's not much we can do. - present.extend(seen.iter().cloned()); + present.extend(seen.cloned()); missing.push(NonExhaustive); } // If `exhaustive_patterns` is disabled and our scrutinee is an empty type, we cannot From edf6a2d3371218043e4858b81fe48c99535cabe7 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Wed, 4 Oct 2023 15:59:16 +0200 Subject: [PATCH 7/8] Clarify for review --- .../src/thir/pattern/deconstruct_pat.rs | 58 +++++++++++-------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index a2bbde250c3d1..dd00982f7f627 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -201,18 +201,26 @@ impl IntRange { /// intersections between an output range and a column range are inclusions. No output range /// straddles the boundary of one of the inputs. /// + /// Additionally, we track for each output range whether it is covered by one of the column ranges or not. + /// /// The following input: /// ```text - /// |-------------------------| // `self` - /// |------| |----------| |----| - /// |-------| |-------| + /// (--------------------------) // `self` + /// (------) (----------) (-) + /// (------) (--------) /// ``` - /// would be iterated over as follows: + /// is first intersected with `self`: /// ```text - /// ||---|--||-|---|---|---|--| + /// (--------------------------) // `self` + /// (----) (----------) (-) + /// (------) (--------) /// ``` - /// - /// Additionally, we track for each output range whether it is covered by one of the column ranges or not. + /// and then iterated over as follows: + /// ```text + /// (-(--)-(-)-(------)-)--(-)- + /// ``` + /// where each sequence of dashes is an output range, and dashes outside parentheses are marked + /// as `Presence::Missing`. fn split( &self, column_ranges: impl Iterator, @@ -245,33 +253,30 @@ impl IntRange { .map(unpack_intrange) .flat_map(|[lo, hi]| [(lo, 1), (hi, -1)]) .collect(); + // We sort by boundary, and for each boundary we sort the "closing parentheses" first. The + // order of +1/-1 for a same boundary value is actually irrelevant, because we only look at + // the accumulated count between distinct boundary values. boundaries.sort_unstable(); - // Counter for parenthesis matching. - let mut paren_counter = 0isize; - let boundaries_with_paren_counts = boundaries - .into_iter() - // Accumulate parenthesis counts. - .map(move |(bdy, delta)| { - paren_counter += delta; - (bdy, paren_counter) - }); - let [self_start, self_end] = unpack_intrange(self.clone()); + // Accumulate parenthesis counts. + let mut paren_counter = 0isize; // Gather pairs of adjacent boundaries. let mut prev_bdy = self_start; - let mut prev_paren_count = 0; - boundaries_with_paren_counts - // End with the end of the range. The count is irrelevant. + boundaries + .into_iter() + // End with the end of the range. The count is ignored. .chain(once((self_end, 0))) - // List pairs of adjacent boundaries. - .map(move |(bdy, paren_count)| { - let ret = (prev_bdy, prev_paren_count, bdy); + // List pairs of adjacent boundaries and the count between them. + .map(move |(bdy, delta)| { + // `delta` affects the count as we cross `bdy`, so the relevant count between + // `prev_bdy` and `bdy` is untouched by `delta`. + let ret = (prev_bdy, paren_counter, bdy); prev_bdy = bdy; - prev_paren_count = paren_count; + paren_counter += delta; ret }) - // Skip duplicates. + // Skip empty ranges. .filter(|&(prev_bdy, _, bdy)| prev_bdy != bdy) // Convert back to ranges. .map(move |(prev_bdy, paren_count, bdy)| { @@ -503,7 +508,10 @@ impl Slice { let smaller_lengths; let arity = self.arity(); let mut max_slice = self.kind; + // Tracks the smallest variable-length slice we've seen. Any slice arity above it is + // therefore `Presence::Seen` in the column. let mut min_var_len = usize::MAX; + // Tracks the fixed-length slices we've seen, to mark them as `Presence::Seen`. let mut seen_fixed_lens = FxHashSet::default(); match &mut max_slice { VarLen(max_prefix_len, max_suffix_len) => { From c1b29b338dc37f96f0695e393743ce35508d3704 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Thu, 5 Oct 2023 00:58:14 +0200 Subject: [PATCH 8/8] Fix handling slices of empty types --- .../src/thir/pattern/deconstruct_pat.rs | 29 ++++++++++---- tests/ui/pattern/usefulness/slice_of_empty.rs | 22 +++++++++++ .../pattern/usefulness/slice_of_empty.stderr | 39 +++++++++++++++++++ 3 files changed, 82 insertions(+), 8 deletions(-) create mode 100644 tests/ui/pattern/usefulness/slice_of_empty.rs create mode 100644 tests/ui/pattern/usefulness/slice_of_empty.stderr diff --git a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs index dd00982f7f627..a7a000ba31c6f 100644 --- a/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs +++ b/compiler/rustc_mir_build/src/thir/pattern/deconstruct_pat.rs @@ -859,6 +859,7 @@ impl<'tcx> Constructor<'tcx> { } /// Describes the set of all constructors for a type. +#[derive(Debug)] pub(super) enum ConstructorSet { /// The type has a single constructor, e.g. `&T` or a struct. Single, @@ -903,6 +904,7 @@ pub(super) enum ConstructorSet { /// - constructors in `present` and `missing` are split for the column; in other words, they are /// either fully included in or disjoint from each constructor in the column. This avoids /// non-trivial intersections like between `0..10` and `5..15`. +#[derive(Debug)] struct SplitConstructorSet<'tcx> { present: SmallVec<[Constructor<'tcx>; 1]>, missing: Vec>, @@ -911,8 +913,8 @@ struct SplitConstructorSet<'tcx> { } impl ConstructorSet { + #[instrument(level = "debug", skip(cx), ret)] pub(super) fn for_ty<'p, 'tcx>(cx: &MatchCheckCtxt<'p, 'tcx>, ty: Ty<'tcx>) -> Self { - debug!("ConstructorSet::for_ty({:?})", ty); let make_range = |start, end| IntRange::from_range(cx.tcx, start, end, ty, RangeEnd::Included); // This determines the set of all possible constructors for the type `ty`. For numbers, @@ -1036,6 +1038,7 @@ impl ConstructorSet { /// This is the core logical operation of exhaustiveness checking. This analyzes a column a /// constructors to 1/ determine which constructors of the type (if any) are missing; 2/ split /// constructors to handle non-trivial intersections e.g. on ranges or slices. + #[instrument(level = "debug", skip(self, pcx, ctors), ret)] fn split<'a, 'tcx>( &self, pcx: &PatCtxt<'_, '_, 'tcx>, @@ -1111,7 +1114,7 @@ impl ConstructorSet { } &ConstructorSet::Slice(array_len) => { let seen_slices = seen.map(|c| c.as_slice().unwrap()); - let base_slice = Slice { kind: VarLen(0, 0), array_len }; + let base_slice = Slice::new(array_len, VarLen(0, 0)); for (seen, splitted_slice) in base_slice.split(seen_slices) { let ctor = Slice(splitted_slice); match seen { @@ -1121,12 +1124,22 @@ impl ConstructorSet { } } ConstructorSet::SliceOfEmpty => { - // Behaves essentially like `Single`. - let slice = Slice(Slice::new(None, FixedLen(0))); - if seen.next().is_none() { - missing.push(slice); - } else { - present.push(slice); + // This one is tricky because even though there's only one possible value of this + // type (namely `[]`), slice patterns of all lengths are allowed, they're just + // unreachable if length != 0. + // We still gather the seen constructors in `present`, but the only slice that can + // go in `missing` is `[]`. + let seen_slices = seen.map(|c| c.as_slice().unwrap()); + let base_slice = Slice::new(None, VarLen(0, 0)); + for (seen, splitted_slice) in base_slice.split(seen_slices) { + let ctor = Slice(splitted_slice); + match seen { + Presence::Seen => present.push(ctor), + Presence::Unseen if splitted_slice.arity() == 0 => { + missing.push(Slice(Slice::new(None, FixedLen(0)))) + } + Presence::Unseen => {} + } } } ConstructorSet::Unlistable => { diff --git a/tests/ui/pattern/usefulness/slice_of_empty.rs b/tests/ui/pattern/usefulness/slice_of_empty.rs new file mode 100644 index 0000000000000..fe068871195a2 --- /dev/null +++ b/tests/ui/pattern/usefulness/slice_of_empty.rs @@ -0,0 +1,22 @@ +#![feature(never_type)] +#![feature(exhaustive_patterns)] +#![deny(unreachable_patterns)] + +fn main() {} + +fn foo(nevers: &[!]) { + match nevers { + &[] => (), + }; + + match nevers { + &[] => (), + &[_] => (), //~ ERROR unreachable pattern + &[_, _, ..] => (), //~ ERROR unreachable pattern + }; + + match nevers { + //~^ ERROR non-exhaustive patterns: `&[]` not covered + &[_] => (), //~ ERROR unreachable pattern + }; +} diff --git a/tests/ui/pattern/usefulness/slice_of_empty.stderr b/tests/ui/pattern/usefulness/slice_of_empty.stderr new file mode 100644 index 0000000000000..07bb6b3a67d8e --- /dev/null +++ b/tests/ui/pattern/usefulness/slice_of_empty.stderr @@ -0,0 +1,39 @@ +error: unreachable pattern + --> $DIR/slice_of_empty.rs:14:9 + | +LL | &[_] => (), + | ^^^^ + | +note: the lint level is defined here + --> $DIR/slice_of_empty.rs:3:9 + | +LL | #![deny(unreachable_patterns)] + | ^^^^^^^^^^^^^^^^^^^^ + +error: unreachable pattern + --> $DIR/slice_of_empty.rs:15:9 + | +LL | &[_, _, ..] => (), + | ^^^^^^^^^^^ + +error: unreachable pattern + --> $DIR/slice_of_empty.rs:20:9 + | +LL | &[_] => (), + | ^^^^ + +error[E0004]: non-exhaustive patterns: `&[]` not covered + --> $DIR/slice_of_empty.rs:18:11 + | +LL | match nevers { + | ^^^^^^ pattern `&[]` not covered + | + = note: the matched value is of type `&[!]` +help: ensure that all possible cases are being handled by adding a match arm with a wildcard pattern or an explicit pattern as shown + | +LL | &[_] => (), &[] => todo!(), + | ++++++++++++++++ + +error: aborting due to 4 previous errors + +For more information about this error, try `rustc --explain E0004`.