feat(streaming): only output required columns for MV-on-MV (#8555)

Signed-off-by: Bugen Zhao <i@bugenzhao.com>
risingwavelabs · Mar 16, 2023 · 25a4809 · 25a4809
1 parent 64a5f88
commit 25a4809
Show file tree

Hide file tree

Showing 16 changed files with 267 additions and 341 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/dashboard/proto/gen/catalog.ts b/dashboard/proto/gen/catalog.ts
diff --git a/dashboard/proto/gen/expr.ts b/dashboard/proto/gen/expr.ts
diff --git a/dashboard/proto/gen/stream_plan.ts b/dashboard/proto/gen/stream_plan.ts
diff --git a/e2e_test/ddl/alter_table_column.slt b/e2e_test/ddl/alter_table_column.slt
@@ -127,7 +127,6 @@ select * from mv3;
 3 3.3 3-3
 
 # Drop column
-# TODO(#4529): create mview on partial columns and test whether dropping the unrefereced column works.
 statement error being referenced
 alter table t drop column s;
 
@@ -137,6 +136,10 @@ drop materialized view mv2;
 statement ok
 drop materialized view mv3;
 
+statement ok
+create materialized view mv5 as select v, s from t;
+
+# This should succeed as there's no materialized view referencing the column, including `mv5`. (#4529)
 statement ok
 alter table t drop column r;
 
@@ -152,6 +155,13 @@ select v, s from t;
 2 NULL
 3 3-3
 
+query IR rowsort
+select v, s from mv5;
+----
+1 1-1
+2 NULL
+3 3-3
+
 # Add column after dropping column, to test that the column ID is not reused.
 statement ok
 alter table t add column r real;
@@ -191,6 +201,9 @@ select v, s, r from t;
 4 4-4 4.4
 
 # Clean up
+statement ok
+drop materialized view mv5;
+
 statement ok
 drop materialized view mv;
 

diff --git a/proto/stream_plan.proto b/proto/stream_plan.proto
@@ -384,14 +384,16 @@ enum ChainType {
 //   2. BatchPlanNode for snapshot read.
 message ChainNode {
   uint32 table_id = 1;
-  // The schema of input stream, which will be used to build a MergeNode
-  repeated plan_common.Field upstream_fields = 2;
-  // The columns from the upstream table to output.
-  // TODO: rename this field.
-  repeated uint32 upstream_column_indices = 3;
+
   // The columns from the upstream table that'll be internally required by this chain node.
-  // TODO: This is currently only used by backfill table scan. We should also apply it to the upstream dispatcher (#4529).
-  repeated int32 upstream_column_ids = 8;
+  // - For non-backfill chain node, it's the same as the output columns.
+  // - For backfill chain node, there're additionally primary key columns.
+  repeated int32 upstream_column_ids = 2;
+  // The columns to be output by this chain node. The index is based on the internal required columns.
+  // - For non-backfill chain node, it's simply all the columns.
+  // - For backfill chain node, this strips the primary key columns if they're unnecessary.
+  repeated uint32 output_indices = 3;
+
   // Generally, the barrier needs to be rearranged during the MV creation process, so that data can
   // be flushed to shared buffer periodically, instead of making the first epoch from batch query extra
   // large. However, in some cases, e.g., shared state, the barrier cannot be rearranged in ChainNode.