Skip to content

Commit

Permalink
feat(sqlsmith): updates and deletes (#9985)
Browse files Browse the repository at this point in the history
  • Loading branch information
kwannoel committed May 29, 2023
1 parent 121e5a1 commit 9151a82
Show file tree
Hide file tree
Showing 13 changed files with 951 additions and 124 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions src/sqlparser/src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,10 @@ impl ObjectName {
.collect::<Vec<_>>()
.join(".")
}

pub fn from_test_str(s: &str) -> Self {
ObjectName::from(vec![s.into()])
}
}

impl fmt::Display for ObjectName {
Expand Down
1 change: 1 addition & 0 deletions src/tests/sqlsmith/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ tracing-subscriber = "0.3.16"
workspace-hack = { path = "../../workspace-hack" }

[dev-dependencies]
expect-test = "1"
libtest-mimic = "0.6"

[[bin]]
Expand Down
25 changes: 23 additions & 2 deletions src/tests/sqlsmith/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,29 @@ This test will be run as a unit test:

Take a look at [`gen_queries.sh`](scripts/gen_queries.sh).

Caveat: Even with a given snapshot, certain parts of the system are non-determninistic.
For instance with scheduler errors, the same query may not trigger errors when executed.
Sometimes during the generation process some failed queries might be encountered.

For instance if the logs produces:
```sh
[WARN] Cluster crashed while generating queries. see .risingwave/log/generate-22.log for more information.
```
You can re-run the failed query:
```sh
RUST_BACKTRACE=1 MADSIM_TEST_SEED=22 RUST_LOG=info \
./target/sim/ci-sim/risingwave_simulation \
--run-sqlsmith-queries $SNAPSHOT_DIR/failed/22
```
The `failed query` is a summary of the full query set.
In case it does not actually fail, it might be wrong.
You can re-run the full query set as well in that case:
```sh
RUST_BACKTRACE=1 MADSIM_TEST_SEED=22 RUST_LOG=info \
./target/sim/ci-sim/risingwave_simulation \
--run-sqlsmith-queries $SNAPSHOT_DIR/22
```
## Running with Madsim
Expand Down
65 changes: 46 additions & 19 deletions src/tests/sqlsmith/scripts/gen_queries.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,19 @@ echo_err() {

# Get reason for generation crash.
get_failure_reason() {
tac | grep -B 10000 -m1 "\[EXECUTING" | tac | tail -n+2
cat $1 | tac | grep -B 10000 -m1 "\[EXECUTING" | tac | tail -n+2
}

check_if_failed() {
grep -B 2 "$CRASH_MESSAGE" || true
}

# Extract queries from file $1, write to file $2
extract_queries() {
QUERIES=$(grep "\[EXECUTING .*\]: " < "$1" | sed -E 's/^.*\[EXECUTING .*\]: (.*)$/\1;/')
FAIL_REASON=$(get_failure_reason < "$1")
if [[ -n "$FAIL_REASON" ]]; then
FAILED=$(check_if_failed < "$1")
if [[ -n "$FAILED" ]]; then
FAIL_REASON=$(get_failure_reason < "$1")
echo_err "[WARN] Cluster crashed while generating queries. see $1 for more information."
QUERIES=$(echo -e "$QUERIES" | sed -E '$ s/(.*)/-- \1/')
fi
Expand All @@ -59,10 +64,14 @@ extract_ddl() {
grep "\[EXECUTING CREATE .*\]: " | sed -E 's/^.*\[EXECUTING CREATE .*\]: (.*)$/\1;/' | pg_format || true
}

extract_dml() {
extract_inserts() {
grep "\[EXECUTING INSERT\]: " | sed -E 's/^.*\[EXECUTING INSERT\]: (.*)$/\1;/' || true
}

extract_updates() {
grep "\[EXECUTING UPDATES\]: " | sed -E 's/^.*\[EXECUTING UPDATES\]: (.*)$/\1;/' || true
}

extract_last_session() {
grep "\[EXECUTING TEST SESSION_VAR\]: " | sed -E 's/^.*\[EXECUTING TEST SESSION_VAR\]: (.*)$/\1;/' | tail -n 1 || true
}
Expand All @@ -82,20 +91,32 @@ extract_fail_info_from_logs() {
for LOGFILENAME in $(ls "$LOGDIR" | grep "$LOGFILE_PREFIX")
do
LOGFILE="$LOGDIR/$LOGFILENAME"
REASON=$(get_failure_reason < "$LOGFILE")
if [[ -n "$REASON" ]]; then
echo_err "[INFO] $LOGFILE Encountered bug."
echo_err "[INFO] Checking $LOGFILE for bugs"
FAILED=$(check_if_failed < "$LOGFILE")
echo_err "[INFO] Checked $LOGFILE for bugs"
if [[ -n "$FAILED" ]]; then
echo_err "[WARN] $LOGFILE Encountered bug."

# TODO(Noel): Perhaps add verbose logs here, if any part is missing.
REASON=$(get_failure_reason "$LOGFILE")
SEED=$(echo "$LOGFILENAME" | sed -E "s/${LOGFILE_PREFIX}\-(.*)\.log/\1/")

DDL=$(extract_ddl < "$LOGFILE")
GLOBAL_SESSION=$(extract_global_session < "$LOGFILE")
DML=$(extract_dml < "$LOGFILE")
# FIXME(kwannoel): Extract dml for updates too.
INSERTS=$(extract_inserts < "$LOGFILE")
UPDATES=$(extract_updates < "$LOGFILE")
TEST_SESSION=$(extract_last_session < "$LOGFILE")
QUERY=$(extract_failing_query < "$LOGFILE")

FAIL_DIR="$OUTDIR/failed/$SEED"
mkdir -p "$FAIL_DIR"
echo -e "$DDL" "\n\n$GLOBAL_SESSION" "\n\n$DML" "\n\n$TEST_SESSION" "\n\n$QUERY" > "$FAIL_DIR/queries.sql"

echo -e "$DDL" \
"\n\n$GLOBAL_SESSION" \
"\n\n$INSERTS" \
"\n\n$UPDATES" \
"\n\n$TEST_SESSION" \
"\n\n$QUERY" > "$FAIL_DIR/queries.sql"
echo_err "[INFO] WROTE FAIL QUERY to $FAIL_DIR/queries.sql"
echo -e "$REASON" > "$FAIL_DIR/fail.log"
echo_err "[INFO] WROTE FAIL REASON to $FAIL_DIR/fail.log"
Expand All @@ -116,14 +137,18 @@ generate_deterministic() {
set +e
echo "" > $LOGDIR/generate_deterministic.stdout.log
seq "$TEST_NUM" | env_parallel "
mkdir -p $OUTDIR/{}; \
mkdir -p $OUTDIR/{}
echo '[INFO] Generating For Seed {}'
MADSIM_TEST_SEED={} ./$MADSIM_BIN \
--sqlsmith 100 \
--generate-sqlsmith-queries $OUTDIR/{} \
$TESTDATA \
1>>$LOGDIR/generate_deterministic.stdout.log \
2>$LOGDIR/generate-{}.log; \
extract_queries $LOGDIR/generate-{}.log $OUTDIR/{}/queries.sql; \
2>$LOGDIR/generate-{}.log
echo '[INFO] Finished Generating For Seed {}'
echo '[INFO] Extracting Queries For Seed {}'
extract_queries $LOGDIR/generate-{}.log $OUTDIR/{}/queries.sql
echo '[INFO] Extracted Queries For Seed {}'
"
set -e
}
Expand Down Expand Up @@ -193,6 +218,7 @@ build() {
}

generate() {
echo_err "[INFO] Generating"
generate_deterministic
echo_err "[INFO] Finished generation"
}
Expand All @@ -214,19 +240,19 @@ validate() {
# sync step
# Some queries maybe be added
sync_queries() {
set +x
pushd $OUTDIR
git stash
git checkout main
git pull
set +e
git branch -D stage
set -e
git checkout -b stage
popd
set -x
}

sync() {
echo_err "[INFO] Syncing"
sync_queries
echo_err "[INFO] Synced"
}
Expand All @@ -245,6 +271,7 @@ upload_queries() {
}

upload() {
echo_err "[INFO] Uploading Queries"
upload_queries
echo_err "[INFO] Uploaded"
}
Expand All @@ -256,7 +283,7 @@ cleanup() {

################### ENTRY POINTS

generate() {
run_generate() {
setup

build
Expand All @@ -268,7 +295,7 @@ generate() {
cleanup
}

extract() {
run_extract() {
LOGDIR="$PWD" OUTDIR="$PWD" extract_fail_info_from_logs "fuzzing"
for QUERY_FOLDER in failed/*
do
Expand All @@ -287,9 +314,9 @@ extract() {
main() {
if [[ $1 == "extract" ]]; then
echo "[INFO] Extracting queries"
extract
run_extract
elif [[ $1 == "generate" ]]; then
generate
run_generate
else
echo "
================================================================
Expand Down
Loading

0 comments on commit 9151a82

Please sign in to comment.