diff --git a/Cargo.lock b/Cargo.lock index b9887892cc47..ff6d8255a901 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6507,6 +6507,7 @@ dependencies = [ "anyhow", "chrono", "clap 4.2.7", + "expect-test", "itertools", "libtest-mimic", "madsim-tokio", diff --git a/src/sqlparser/src/ast/mod.rs b/src/sqlparser/src/ast/mod.rs index 11a10e784bdd..8649d2ab0da0 100644 --- a/src/sqlparser/src/ast/mod.rs +++ b/src/sqlparser/src/ast/mod.rs @@ -200,6 +200,10 @@ impl ObjectName { .collect::>() .join(".") } + + pub fn from_test_str(s: &str) -> Self { + ObjectName::from(vec![s.into()]) + } } impl fmt::Display for ObjectName { diff --git a/src/tests/sqlsmith/Cargo.toml b/src/tests/sqlsmith/Cargo.toml index bbe1133a797b..afd3da79de85 100644 --- a/src/tests/sqlsmith/Cargo.toml +++ b/src/tests/sqlsmith/Cargo.toml @@ -34,6 +34,7 @@ tracing-subscriber = "0.3.16" workspace-hack = { path = "../../workspace-hack" } [dev-dependencies] +expect-test = "1" libtest-mimic = "0.6" [[bin]] diff --git a/src/tests/sqlsmith/README.md b/src/tests/sqlsmith/README.md index 83a24c19c416..9267627c6a1d 100644 --- a/src/tests/sqlsmith/README.md +++ b/src/tests/sqlsmith/README.md @@ -19,8 +19,29 @@ This test will be run as a unit test: Take a look at [`gen_queries.sh`](scripts/gen_queries.sh). -Caveat: Even with a given snapshot, certain parts of the system are non-determninistic. -For instance with scheduler errors, the same query may not trigger errors when executed. +Sometimes during the generation process some failed queries might be encountered. + +For instance if the logs produces: +```sh +[WARN] Cluster crashed while generating queries. see .risingwave/log/generate-22.log for more information. +``` + +You can re-run the failed query: +```sh +RUST_BACKTRACE=1 MADSIM_TEST_SEED=22 RUST_LOG=info \ +./target/sim/ci-sim/risingwave_simulation \ + --run-sqlsmith-queries $SNAPSHOT_DIR/failed/22 +``` + +The `failed query` is a summary of the full query set. +In case it does not actually fail, it might be wrong. + +You can re-run the full query set as well in that case: +```sh +RUST_BACKTRACE=1 MADSIM_TEST_SEED=22 RUST_LOG=info \ +./target/sim/ci-sim/risingwave_simulation \ + --run-sqlsmith-queries $SNAPSHOT_DIR/22 +``` ## Running with Madsim diff --git a/src/tests/sqlsmith/scripts/gen_queries.sh b/src/tests/sqlsmith/scripts/gen_queries.sh index d9c5c657afc8..73d2764c3460 100755 --- a/src/tests/sqlsmith/scripts/gen_queries.sh +++ b/src/tests/sqlsmith/scripts/gen_queries.sh @@ -41,14 +41,19 @@ echo_err() { # Get reason for generation crash. get_failure_reason() { - tac | grep -B 10000 -m1 "\[EXECUTING" | tac | tail -n+2 + cat $1 | tac | grep -B 10000 -m1 "\[EXECUTING" | tac | tail -n+2 +} + +check_if_failed() { + grep -B 2 "$CRASH_MESSAGE" || true } # Extract queries from file $1, write to file $2 extract_queries() { QUERIES=$(grep "\[EXECUTING .*\]: " < "$1" | sed -E 's/^.*\[EXECUTING .*\]: (.*)$/\1;/') - FAIL_REASON=$(get_failure_reason < "$1") - if [[ -n "$FAIL_REASON" ]]; then + FAILED=$(check_if_failed < "$1") + if [[ -n "$FAILED" ]]; then + FAIL_REASON=$(get_failure_reason < "$1") echo_err "[WARN] Cluster crashed while generating queries. see $1 for more information." QUERIES=$(echo -e "$QUERIES" | sed -E '$ s/(.*)/-- \1/') fi @@ -59,10 +64,14 @@ extract_ddl() { grep "\[EXECUTING CREATE .*\]: " | sed -E 's/^.*\[EXECUTING CREATE .*\]: (.*)$/\1;/' | pg_format || true } -extract_dml() { +extract_inserts() { grep "\[EXECUTING INSERT\]: " | sed -E 's/^.*\[EXECUTING INSERT\]: (.*)$/\1;/' || true } +extract_updates() { + grep "\[EXECUTING UPDATES\]: " | sed -E 's/^.*\[EXECUTING UPDATES\]: (.*)$/\1;/' || true +} + extract_last_session() { grep "\[EXECUTING TEST SESSION_VAR\]: " | sed -E 's/^.*\[EXECUTING TEST SESSION_VAR\]: (.*)$/\1;/' | tail -n 1 || true } @@ -82,20 +91,32 @@ extract_fail_info_from_logs() { for LOGFILENAME in $(ls "$LOGDIR" | grep "$LOGFILE_PREFIX") do LOGFILE="$LOGDIR/$LOGFILENAME" - REASON=$(get_failure_reason < "$LOGFILE") - if [[ -n "$REASON" ]]; then - echo_err "[INFO] $LOGFILE Encountered bug." + echo_err "[INFO] Checking $LOGFILE for bugs" + FAILED=$(check_if_failed < "$LOGFILE") + echo_err "[INFO] Checked $LOGFILE for bugs" + if [[ -n "$FAILED" ]]; then + echo_err "[WARN] $LOGFILE Encountered bug." - # TODO(Noel): Perhaps add verbose logs here, if any part is missing. + REASON=$(get_failure_reason "$LOGFILE") SEED=$(echo "$LOGFILENAME" | sed -E "s/${LOGFILE_PREFIX}\-(.*)\.log/\1/") + DDL=$(extract_ddl < "$LOGFILE") GLOBAL_SESSION=$(extract_global_session < "$LOGFILE") - DML=$(extract_dml < "$LOGFILE") + # FIXME(kwannoel): Extract dml for updates too. + INSERTS=$(extract_inserts < "$LOGFILE") + UPDATES=$(extract_updates < "$LOGFILE") TEST_SESSION=$(extract_last_session < "$LOGFILE") QUERY=$(extract_failing_query < "$LOGFILE") + FAIL_DIR="$OUTDIR/failed/$SEED" mkdir -p "$FAIL_DIR" - echo -e "$DDL" "\n\n$GLOBAL_SESSION" "\n\n$DML" "\n\n$TEST_SESSION" "\n\n$QUERY" > "$FAIL_DIR/queries.sql" + + echo -e "$DDL" \ + "\n\n$GLOBAL_SESSION" \ + "\n\n$INSERTS" \ + "\n\n$UPDATES" \ + "\n\n$TEST_SESSION" \ + "\n\n$QUERY" > "$FAIL_DIR/queries.sql" echo_err "[INFO] WROTE FAIL QUERY to $FAIL_DIR/queries.sql" echo -e "$REASON" > "$FAIL_DIR/fail.log" echo_err "[INFO] WROTE FAIL REASON to $FAIL_DIR/fail.log" @@ -116,14 +137,18 @@ generate_deterministic() { set +e echo "" > $LOGDIR/generate_deterministic.stdout.log seq "$TEST_NUM" | env_parallel " - mkdir -p $OUTDIR/{}; \ + mkdir -p $OUTDIR/{} + echo '[INFO] Generating For Seed {}' MADSIM_TEST_SEED={} ./$MADSIM_BIN \ --sqlsmith 100 \ --generate-sqlsmith-queries $OUTDIR/{} \ $TESTDATA \ 1>>$LOGDIR/generate_deterministic.stdout.log \ - 2>$LOGDIR/generate-{}.log; \ - extract_queries $LOGDIR/generate-{}.log $OUTDIR/{}/queries.sql; \ + 2>$LOGDIR/generate-{}.log + echo '[INFO] Finished Generating For Seed {}' + echo '[INFO] Extracting Queries For Seed {}' + extract_queries $LOGDIR/generate-{}.log $OUTDIR/{}/queries.sql + echo '[INFO] Extracted Queries For Seed {}' " set -e } @@ -193,6 +218,7 @@ build() { } generate() { + echo_err "[INFO] Generating" generate_deterministic echo_err "[INFO] Finished generation" } @@ -214,8 +240,8 @@ validate() { # sync step # Some queries maybe be added sync_queries() { - set +x pushd $OUTDIR + git stash git checkout main git pull set +e @@ -223,10 +249,10 @@ sync_queries() { set -e git checkout -b stage popd - set -x } sync() { + echo_err "[INFO] Syncing" sync_queries echo_err "[INFO] Synced" } @@ -245,6 +271,7 @@ upload_queries() { } upload() { + echo_err "[INFO] Uploading Queries" upload_queries echo_err "[INFO] Uploaded" } @@ -256,7 +283,7 @@ cleanup() { ################### ENTRY POINTS -generate() { +run_generate() { setup build @@ -268,7 +295,7 @@ generate() { cleanup } -extract() { +run_extract() { LOGDIR="$PWD" OUTDIR="$PWD" extract_fail_info_from_logs "fuzzing" for QUERY_FOLDER in failed/* do @@ -287,9 +314,9 @@ extract() { main() { if [[ $1 == "extract" ]]; then echo "[INFO] Extracting queries" - extract + run_extract elif [[ $1 == "generate" ]]; then - generate + run_generate else echo " ================================================================ diff --git a/src/tests/sqlsmith/src/lib.rs b/src/tests/sqlsmith/src/lib.rs index 68008ad535fc..98866fe0365e 100644 --- a/src/tests/sqlsmith/src/lib.rs +++ b/src/tests/sqlsmith/src/lib.rs @@ -17,10 +17,15 @@ #![feature(lazy_cell)] #![feature(box_patterns)] +use std::collections::{HashMap, HashSet}; + +use anyhow::Result; +use itertools::Itertools; use rand::prelude::SliceRandom; use rand::Rng; use risingwave_sqlparser::ast::{ - BinaryOperator, Expr, Join, JoinConstraint, JoinOperator, Statement, + BinaryOperator, ColumnOption, Expr, Join, JoinConstraint, JoinOperator, Statement, + TableConstraint, }; use risingwave_sqlparser::parser::Parser; @@ -46,7 +51,7 @@ pub fn insert_sql_gen(rng: &mut impl Rng, tables: Vec, count: usize) -> V let mut gen = SqlGenerator::new(rng, vec![]); tables .into_iter() - .map(|table| format!("{}", gen.gen_insert_stmt(table, count))) + .map(|table| format!("{}", gen.generate_insert_statement(&table, count))) .collect() } @@ -76,6 +81,15 @@ pub fn session_sql_gen(rng: &mut R) -> String { .to_string() } +pub fn generate_update_statements( + rng: &mut R, + tables: &[Table], + inserts: &[Statement], +) -> Result> { + let mut gen = SqlGenerator::new(rng, vec![]); + gen.generate_update_statements(tables, inserts) +} + /// Parse SQL /// FIXME(Noel): Introduce error type for sqlsmith for this. pub fn parse_sql>(sql: S) -> Vec { @@ -86,13 +100,572 @@ pub fn parse_sql>(sql: S) -> Vec { /// Extract relevant info from CREATE TABLE statement, to construct a Table pub fn create_table_statement_to_table(statement: &Statement) -> Table { match statement { - Statement::CreateTable { name, columns, .. } => Table { - name: name.0[0].real_value(), - columns: columns.iter().map(|c| c.clone().into()).collect(), - }, + Statement::CreateTable { + name, + columns, + constraints, + .. + } => { + let column_name_to_index_mapping: HashMap<_, _> = columns + .iter() + .enumerate() + .map(|(i, c)| (&c.name, i)) + .collect(); + let mut pk_indices = HashSet::new(); + for (i, column) in columns.iter().enumerate() { + let is_primary_key = column + .options + .iter() + .any(|option| option.option == ColumnOption::Unique { is_primary: true }); + if is_primary_key { + pk_indices.insert(i); + } + } + for constraint in constraints { + if let TableConstraint::Unique { + columns, + is_primary: true, + .. + } = constraint + { + for column in columns { + let pk_index = column_name_to_index_mapping.get(column).unwrap(); + pk_indices.insert(*pk_index); + } + } + } + let mut pk_indices = pk_indices.into_iter().collect_vec(); + pk_indices.sort_unstable(); + Table::new_with_pk( + name.0[0].real_value(), + columns.iter().map(|c| c.clone().into()).collect(), + pk_indices, + ) + } _ => panic!( "Only CREATE TABLE statements permitted, received: {}", statement ), } } + +pub fn parse_create_table_statements(sql: impl AsRef) -> (Vec
, Vec) { + let statements = parse_sql(&sql); + let tables = statements + .iter() + .map(create_table_statement_to_table) + .collect(); + (tables, statements) +} + +#[cfg(test)] +mod tests { + use std::fmt::Debug; + + use expect_test::{expect, Expect}; + + use super::*; + + fn check(actual: impl Debug, expect: Expect) { + let actual = format!("{:#?}", actual); + expect.assert_eq(&actual); + } + + #[test] + fn test_parse_create_table_statements_no_pk() { + let test_string = " +CREATE TABLE t(v1 int); +CREATE TABLE t2(v1 int, v2 bool); +CREATE TABLE t3(v1 int, v2 bool, v3 smallint); + "; + check( + parse_create_table_statements(test_string), + expect![[r#" + ( + [ + Table { + name: "t", + columns: [ + Column { + name: "v1", + data_type: Int32, + }, + ], + pk_indices: [], + }, + Table { + name: "t2", + columns: [ + Column { + name: "v1", + data_type: Int32, + }, + Column { + name: "v2", + data_type: Boolean, + }, + ], + pk_indices: [], + }, + Table { + name: "t3", + columns: [ + Column { + name: "v1", + data_type: Int32, + }, + Column { + name: "v2", + data_type: Boolean, + }, + Column { + name: "v3", + data_type: Int16, + }, + ], + pk_indices: [], + }, + ], + [ + CreateTable { + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName( + [ + Ident { + value: "t", + quote_style: None, + }, + ], + ), + columns: [ + ColumnDef { + name: Ident { + value: "v1", + quote_style: None, + }, + data_type: Some( + Int, + ), + collation: None, + options: [], + }, + ], + constraints: [], + with_options: [], + source_schema: None, + source_watermarks: [], + append_only: false, + query: None, + }, + CreateTable { + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName( + [ + Ident { + value: "t2", + quote_style: None, + }, + ], + ), + columns: [ + ColumnDef { + name: Ident { + value: "v1", + quote_style: None, + }, + data_type: Some( + Int, + ), + collation: None, + options: [], + }, + ColumnDef { + name: Ident { + value: "v2", + quote_style: None, + }, + data_type: Some( + Boolean, + ), + collation: None, + options: [], + }, + ], + constraints: [], + with_options: [], + source_schema: None, + source_watermarks: [], + append_only: false, + query: None, + }, + CreateTable { + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName( + [ + Ident { + value: "t3", + quote_style: None, + }, + ], + ), + columns: [ + ColumnDef { + name: Ident { + value: "v1", + quote_style: None, + }, + data_type: Some( + Int, + ), + collation: None, + options: [], + }, + ColumnDef { + name: Ident { + value: "v2", + quote_style: None, + }, + data_type: Some( + Boolean, + ), + collation: None, + options: [], + }, + ColumnDef { + name: Ident { + value: "v3", + quote_style: None, + }, + data_type: Some( + SmallInt, + ), + collation: None, + options: [], + }, + ], + constraints: [], + with_options: [], + source_schema: None, + source_watermarks: [], + append_only: false, + query: None, + }, + ], + )"#]], + ); + } + + #[test] + fn test_parse_create_table_statements_with_pk() { + let test_string = " +CREATE TABLE t(v1 int PRIMARY KEY); +CREATE TABLE t2(v1 int, v2 smallint PRIMARY KEY); +CREATE TABLE t3(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY); +CREATE TABLE t4(v1 int PRIMARY KEY, v2 smallint PRIMARY KEY, v3 bool PRIMARY KEY); +"; + check( + parse_create_table_statements(test_string), + expect![[r#" + ( + [ + Table { + name: "t", + columns: [ + Column { + name: "v1", + data_type: Int32, + }, + ], + pk_indices: [ + 0, + ], + }, + Table { + name: "t2", + columns: [ + Column { + name: "v1", + data_type: Int32, + }, + Column { + name: "v2", + data_type: Int16, + }, + ], + pk_indices: [ + 1, + ], + }, + Table { + name: "t3", + columns: [ + Column { + name: "v1", + data_type: Int32, + }, + Column { + name: "v2", + data_type: Int16, + }, + ], + pk_indices: [ + 0, + 1, + ], + }, + Table { + name: "t4", + columns: [ + Column { + name: "v1", + data_type: Int32, + }, + Column { + name: "v2", + data_type: Int16, + }, + Column { + name: "v3", + data_type: Boolean, + }, + ], + pk_indices: [ + 0, + 1, + 2, + ], + }, + ], + [ + CreateTable { + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName( + [ + Ident { + value: "t", + quote_style: None, + }, + ], + ), + columns: [ + ColumnDef { + name: Ident { + value: "v1", + quote_style: None, + }, + data_type: Some( + Int, + ), + collation: None, + options: [ + ColumnOptionDef { + name: None, + option: Unique { + is_primary: true, + }, + }, + ], + }, + ], + constraints: [], + with_options: [], + source_schema: None, + source_watermarks: [], + append_only: false, + query: None, + }, + CreateTable { + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName( + [ + Ident { + value: "t2", + quote_style: None, + }, + ], + ), + columns: [ + ColumnDef { + name: Ident { + value: "v1", + quote_style: None, + }, + data_type: Some( + Int, + ), + collation: None, + options: [], + }, + ColumnDef { + name: Ident { + value: "v2", + quote_style: None, + }, + data_type: Some( + SmallInt, + ), + collation: None, + options: [ + ColumnOptionDef { + name: None, + option: Unique { + is_primary: true, + }, + }, + ], + }, + ], + constraints: [], + with_options: [], + source_schema: None, + source_watermarks: [], + append_only: false, + query: None, + }, + CreateTable { + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName( + [ + Ident { + value: "t3", + quote_style: None, + }, + ], + ), + columns: [ + ColumnDef { + name: Ident { + value: "v1", + quote_style: None, + }, + data_type: Some( + Int, + ), + collation: None, + options: [ + ColumnOptionDef { + name: None, + option: Unique { + is_primary: true, + }, + }, + ], + }, + ColumnDef { + name: Ident { + value: "v2", + quote_style: None, + }, + data_type: Some( + SmallInt, + ), + collation: None, + options: [ + ColumnOptionDef { + name: None, + option: Unique { + is_primary: true, + }, + }, + ], + }, + ], + constraints: [], + with_options: [], + source_schema: None, + source_watermarks: [], + append_only: false, + query: None, + }, + CreateTable { + or_replace: false, + temporary: false, + if_not_exists: false, + name: ObjectName( + [ + Ident { + value: "t4", + quote_style: None, + }, + ], + ), + columns: [ + ColumnDef { + name: Ident { + value: "v1", + quote_style: None, + }, + data_type: Some( + Int, + ), + collation: None, + options: [ + ColumnOptionDef { + name: None, + option: Unique { + is_primary: true, + }, + }, + ], + }, + ColumnDef { + name: Ident { + value: "v2", + quote_style: None, + }, + data_type: Some( + SmallInt, + ), + collation: None, + options: [ + ColumnOptionDef { + name: None, + option: Unique { + is_primary: true, + }, + }, + ], + }, + ColumnDef { + name: Ident { + value: "v3", + quote_style: None, + }, + data_type: Some( + Boolean, + ), + collation: None, + options: [ + ColumnOptionDef { + name: None, + option: Unique { + is_primary: true, + }, + }, + ], + }, + ], + constraints: [], + with_options: [], + source_schema: None, + source_watermarks: [], + append_only: false, + query: None, + }, + ], + )"#]], + ); + } +} diff --git a/src/tests/sqlsmith/src/runner.rs b/src/tests/sqlsmith/src/runner.rs index 9c13032649ca..f30aa667a9e2 100644 --- a/src/tests/sqlsmith/src/runner.rs +++ b/src/tests/sqlsmith/src/runner.rs @@ -20,6 +20,7 @@ use rand::rngs::SmallRng; use rand::{Rng, SeedableRng}; #[cfg(madsim)] use rand_chacha::ChaChaRng; +use risingwave_sqlparser::ast::Statement; use tokio::time::{sleep, Duration}; use tokio_postgres::error::Error as PgError; use tokio_postgres::Client; @@ -27,8 +28,8 @@ use tokio_postgres::Client; use crate::utils::read_file_contents; use crate::validation::{is_permissible_error, is_recovery_in_progress_error}; use crate::{ - create_table_statement_to_table, insert_sql_gen, mview_sql_gen, parse_sql, session_sql_gen, - sql_gen, Table, + generate_update_statements, insert_sql_gen, mview_sql_gen, parse_create_table_statements, + parse_sql, session_sql_gen, sql_gen, Table, }; type PgResult = std::result::Result; @@ -37,7 +38,7 @@ type Result = anyhow::Result; /// e2e test runner for pre-generated queries from sqlsmith pub async fn run_pre_generated(client: &Client, outdir: &str) { let queries_path = format!("{}/queries.sql", outdir); - let queries = std::fs::read_to_string(queries_path).unwrap(); + let queries = read_file_contents(queries_path).unwrap(); let ddl = queries .lines() .filter(|s| s.starts_with("CREATE")) @@ -77,23 +78,28 @@ pub async fn generate( let rows_per_table = 50; let max_rows_inserted = rows_per_table * base_tables.len(); - populate_tables(client, &mut rng, base_tables.clone(), rows_per_table).await; + let inserts = populate_tables(client, &mut rng, base_tables.clone(), rows_per_table).await; tracing::info!("Populated base tables"); let (tables, mviews) = create_mviews(&mut rng, base_tables.clone(), client) .await .unwrap(); + // Generate an update for some inserts, on the corresponding table. + update_base_tables(client, &mut rng, &base_tables, &inserts).await; + test_sqlsmith( client, &mut rng, tables.clone(), - base_tables, + base_tables.clone(), max_rows_inserted, ) .await; tracing::info!("Passed sqlsmith tests"); + tracing::info!("Ran updates"); + let mut queries = String::with_capacity(10000); let mut generated_queries = 0; for _ in 0..count { @@ -159,7 +165,7 @@ pub async fn run(client: &Client, testdata: &str, count: usize, seed: Option) -> impl Rng { } } +async fn update_base_tables( + client: &Client, + rng: &mut R, + base_tables: &[Table], + inserts: &[Statement], +) { + let update_statements = generate_update_statements(rng, base_tables, inserts).unwrap(); + for update_statement in update_statements { + if rng.gen_bool(0.5) { + let sql = update_statement.to_string(); + tracing::info!("[EXECUTING UPDATES]: {}", &sql); + client.simple_query(&sql).await.unwrap(); + } + } +} + async fn populate_tables( client: &Client, rng: &mut R, base_tables: Vec
, row_count: usize, -) -> String { +) -> Vec { let inserts = insert_sql_gen(rng, base_tables, row_count); for insert in &inserts { tracing::info!("[EXECUTING INSERT]: {}", insert); client.simple_query(insert).await.unwrap(); } - inserts.into_iter().map(|i| format!("{};\n", i)).collect() + inserts + .iter() + .map(|s| parse_sql(s).into_iter().next().unwrap()) + .collect_vec() } /// Sanity checks for sqlsmith @@ -319,6 +349,7 @@ async fn test_stream_queries( sample_size: usize, ) -> Result { let mut skipped = 0; + for _ in 0..sample_size { test_session_variable(client, rng).await; let (sql, table) = mview_sql_gen(rng, tables.clone(), "stream_query"); @@ -344,12 +375,8 @@ async fn create_base_tables(testdata: &str, client: &Client) -> Result SqlGenerator<'a, R> { + pub(crate) fn generate_insert_statement( + &mut self, + table: &Table, + row_count: usize, + ) -> Statement { + let table_name = ObjectName(vec![table.name.as_str().into()]); + let data_types = table + .columns + .iter() + .cloned() + .map(|c| c.data_type) + .collect_vec(); + let values = self.gen_values(&data_types, row_count); + let source = Query { + with: None, + body: SetExpr::Values(Values(values)), + order_by: vec![], + limit: None, + offset: None, + fetch: None, + }; + Statement::Insert { + table_name, + columns: vec![], + source: Box::new(source), + returning: vec![], + } + } + + pub(crate) fn generate_update_statements( + &mut self, + tables: &[Table], + inserts: &[Statement], + ) -> Result> { + let mut updates = vec![]; + for insert in inserts { + if self.rng.gen_bool(0.1) { + match insert { + Statement::Insert { + table_name, source, .. + } => { + let values = Self::extract_insert_values(source)?; + let table = tables + .iter() + .find(|table| table.name == table_name.real_value()) + .expect("Inserted values should always have an existing table"); + let pk_indices = &table.pk_indices; + let mut updates_for_insert = + self.generate_update_statements_inner(table, values, pk_indices); + updates.append(&mut updates_for_insert); + } + _ => bail!("Should only have insert statements"), + } + } + } + Ok(updates) + } + + pub(crate) fn generate_update_statements_inner( + &mut self, + table: &Table, + values: &[Vec], + pk_indices: &[usize], + ) -> Vec { + let data_types = table + .columns + .iter() + .cloned() + .map(|c| c.data_type) + .collect_vec(); + if pk_indices.is_empty() { + // do delete for a random subset of rows. + let delete_statements = self.generate_delete_statements(table, values); + // then insert back some number of rows. + let insert_statements = if delete_statements.is_empty() { + vec![] + } else { + let insert_statement = + self.generate_insert_statement(table, delete_statements.len()); + vec![insert_statement] + }; + delete_statements + .into_iter() + .chain(insert_statements.into_iter()) + .collect() + } else { + let value_indices = (0..table.columns.len()) + .filter(|i| !pk_indices.contains(i)) + .collect_vec(); + let update_values = values + .iter() + .filter_map(|row| { + if self.rng.gen_bool(0.1) { + let mut updated_row = row.clone(); + for value_index in &value_indices { + let data_type = &data_types[*value_index]; + updated_row[*value_index] = self.gen_simple_scalar(data_type) + } + Some(updated_row) + } else { + None + } + }) + .collect_vec(); + let update_statements = update_values + .iter() + .map(|row| Self::row_to_update_statement(table, pk_indices, &value_indices, row)) + .collect_vec(); + update_statements + } + } + + fn row_to_update_statement( + table: &Table, + pk_indices: &[usize], + value_indices: &[usize], + row: &[Expr], + ) -> Statement { + let assignments = value_indices + .iter() + .copied() + .map(|i| { + let name = table.columns[i].name.as_str(); + let id = vec![name.into()]; + let value = AssignmentValue::Expr(row[i].clone()); + Assignment { id, value } + }) + .collect_vec(); + assert!(!assignments.is_empty()); + Statement::Update { + table_name: ObjectName::from_test_str(&table.name), + assignments, + selection: Some(Self::create_selection_expr(table, pk_indices, row)), + returning: vec![], + } + } + + fn create_selection_expr(table: &Table, selected_indices: &[usize], row: &[Expr]) -> Expr { + assert!(!selected_indices.is_empty()); + let match_exprs = selected_indices + .iter() + .copied() + .map(|i| { + let match_val = row[i].clone(); + let match_col = Expr::Identifier(table.columns[i].name.as_str().into()); + + Expr::BinaryOp { + left: Box::new(match_col), + op: BinaryOperator::Eq, + right: Box::new(match_val), + } + }) + .collect_vec(); + match_exprs + .into_iter() + .reduce(|l, r| BinaryOp { + left: Box::new(l), + op: BinaryOperator::And, + right: Box::new(r), + }) + .expect("pk should be non empty") + } + + fn generate_delete_statements( + &mut self, + table: &Table, + values: &[Vec], + ) -> Vec { + let selected = (0..table.columns.len()).collect_vec(); + values + .iter() + .filter_map(|row| { + if self.rng.gen_bool(0.1) { + let selection = Some(Self::create_selection_expr(table, &selected, row)); + Some(Statement::Delete { + table_name: ObjectName::from_test_str(&table.name), + selection, + returning: vec![], + }) + } else { + None + } + }) + .collect() + } + + fn extract_insert_values(source: &Query) -> Result<&[Vec]> { + let body = &source.body; + match body { + SetExpr::Values(values) => Ok(&values.0), + _ => bail!("Should not have insert values"), + } + } + + fn gen_values(&mut self, data_types: &[DataType], row_count: usize) -> Vec> { + (0..row_count).map(|_| self.gen_row(data_types)).collect() + } + + fn gen_row(&mut self, data_types: &[DataType]) -> Vec { + data_types + .iter() + .map(|typ| self.gen_simple_scalar(typ)) + .collect() + } +} diff --git a/src/tests/sqlsmith/src/sql_gen/insert.rs b/src/tests/sqlsmith/src/sql_gen/insert.rs deleted file mode 100644 index b5c2aa575fed..000000000000 --- a/src/tests/sqlsmith/src/sql_gen/insert.rs +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2023 RisingWave Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -use itertools::Itertools; -use rand::Rng; -use risingwave_common::types::DataType; -use risingwave_sqlparser::ast::{Expr, ObjectName, Query, SetExpr, Statement, Values}; - -use crate::sql_gen::SqlGenerator; -use crate::Table; - -impl<'a, R: Rng> SqlGenerator<'a, R> { - pub(crate) fn gen_insert_stmt(&mut self, table: Table, row_count: usize) -> Statement { - let table_name = ObjectName(vec![table.name.as_str().into()]); - let data_types = table - .columns - .iter() - .cloned() - .map(|c| c.data_type) - .collect_vec(); - let values = self.gen_values(&data_types, row_count); - let source = Query { - with: None, - body: SetExpr::Values(Values(values)), - order_by: vec![], - limit: None, - offset: None, - fetch: None, - }; - Statement::Insert { - table_name, - columns: vec![], - source: Box::new(source), - returning: vec![], - } - } - - fn gen_values(&mut self, data_types: &[DataType], row_count: usize) -> Vec> { - (0..row_count).map(|_| self.gen_row(data_types)).collect() - } - - fn gen_row(&mut self, data_types: &[DataType]) -> Vec { - data_types - .iter() - .map(|typ| self.gen_simple_scalar(typ)) - .collect() - } -} diff --git a/src/tests/sqlsmith/src/sql_gen/mod.rs b/src/tests/sqlsmith/src/sql_gen/mod.rs index 2a73d791fe76..f4e07f19477e 100644 --- a/src/tests/sqlsmith/src/sql_gen/mod.rs +++ b/src/tests/sqlsmith/src/sql_gen/mod.rs @@ -26,7 +26,7 @@ use risingwave_sqlparser::ast::{ColumnDef, Expr, Ident, ObjectName, Statement}; mod expr; pub use expr::print_function_table; -mod insert; +mod dml; mod query; mod relation; mod scalar; @@ -38,11 +38,24 @@ mod utils; pub struct Table { pub name: String, pub columns: Vec, + pub pk_indices: Vec, } impl Table { pub fn new(name: String, columns: Vec) -> Self { - Self { name, columns } + Self { + name, + columns, + pk_indices: vec![], + } + } + + pub fn new_with_pk(name: String, columns: Vec, pk_indices: Vec) -> Self { + Self { + name, + columns, + pk_indices, + } } pub fn get_qualified_columns(&self) -> Vec { @@ -59,8 +72,8 @@ impl Table { /// Sqlsmith Column definition #[derive(Clone, Debug)] pub struct Column { - name: String, - data_type: DataType, + pub(crate) name: String, + pub(crate) data_type: DataType, } impl From for Column { @@ -177,10 +190,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { pub(crate) fn gen_mview_stmt(&mut self, name: &str) -> (Statement, Table) { let (query, schema) = self.gen_query(); let query = Box::new(query); - let table = Table { - name: name.to_string(), - columns: schema, - }; + let table = Table::new(name.to_string(), schema); let name = ObjectName(vec![Ident::new_unchecked(name)]); let mview = Statement::CreateView { or_replace: false, diff --git a/src/tests/sqlsmith/src/sql_gen/query.rs b/src/tests/sqlsmith/src/sql_gen/query.rs index a42540b1df48..d34e24a3b078 100644 --- a/src/tests/sqlsmith/src/sql_gen/query.rs +++ b/src/tests/sqlsmith/src/sql_gen/query.rs @@ -135,10 +135,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { from, }; - let with_tables = vec![Table { - name: alias.name.real_value(), - columns: query_schema, - }]; + let with_tables = vec![Table::new(alias.name.real_value(), query_schema)]; ( With { recursive: false, @@ -231,7 +228,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { }; // We short-circuit here for mview to avoid streaming nested loop join, - // since CROSS JOIN below maybe correlated. + // since CROSS JOIN below could be correlated. if self.is_mview { assert!(!self.tables.is_empty()); return from; diff --git a/src/tests/sqlsmith/src/sql_gen/relation.rs b/src/tests/sqlsmith/src/sql_gen/relation.rs index 7f9004f0de8c..e1b9cabe98e2 100644 --- a/src/tests/sqlsmith/src/sql_gen/relation.rs +++ b/src/tests/sqlsmith/src/sql_gen/relation.rs @@ -355,10 +355,7 @@ impl<'a, R: Rng> SqlGenerator<'a, R> { fn gen_table_subquery(&mut self) -> (TableWithJoins, Vec
) { let (subquery, columns) = self.gen_local_query(); let alias = self.gen_table_name_with_prefix("sq"); - let table = Table { - name: alias.clone(), - columns, - }; + let table = Table::new(alias.clone(), columns); let relation = TableWithJoins { relation: TableFactor::Derived { lateral: false, diff --git a/src/tests/sqlsmith/tests/frontend/mod.rs b/src/tests/sqlsmith/tests/frontend/mod.rs index 618b739d2c2a..4d13b0099134 100644 --- a/src/tests/sqlsmith/tests/frontend/mod.rs +++ b/src/tests/sqlsmith/tests/frontend/mod.rs @@ -15,7 +15,6 @@ use std::env; use std::sync::Arc; -use itertools::Itertools; use libtest_mimic::{Arguments, Failed, Trial}; use rand::rngs::SmallRng; use rand::{Rng, SeedableRng}; @@ -27,7 +26,7 @@ use risingwave_frontend::{ }; use risingwave_sqlparser::ast::Statement; use risingwave_sqlsmith::{ - create_table_statement_to_table, is_permissible_error, mview_sql_gen, parse_sql, sql_gen, Table, + is_permissible_error, mview_sql_gen, parse_create_table_statements, parse_sql, sql_gen, Table, }; use tokio::runtime::Runtime; @@ -95,11 +94,7 @@ async fn create_tables( let sql = get_seed_table_sql(); setup_sql.push_str(&sql); - let statements = parse_sql(&sql); - let mut tables = statements - .iter() - .map(create_table_statement_to_table) - .collect_vec(); + let (mut tables, statements) = parse_create_table_statements(sql); for s in statements { let create_sql = s.to_string();