Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(sqlsmith): updates and deletes #9985

Merged
merged 23 commits into from
May 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions src/sqlparser/src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,10 @@ impl ObjectName {
.collect::<Vec<_>>()
.join(".")
}

pub fn from_test_str(s: &str) -> Self {
ObjectName::from(vec![s.into()])
}
}

impl fmt::Display for ObjectName {
Expand Down
1 change: 1 addition & 0 deletions src/tests/sqlsmith/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ tracing-subscriber = "0.3.16"
workspace-hack = { path = "../../workspace-hack" }

[dev-dependencies]
expect-test = "1"
libtest-mimic = "0.6"

[[bin]]
Expand Down
25 changes: 23 additions & 2 deletions src/tests/sqlsmith/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,29 @@ This test will be run as a unit test:

Take a look at [`gen_queries.sh`](scripts/gen_queries.sh).

Caveat: Even with a given snapshot, certain parts of the system are non-determninistic.
For instance with scheduler errors, the same query may not trigger errors when executed.
Sometimes during the generation process some failed queries might be encountered.

For instance if the logs produces:
```sh
[WARN] Cluster crashed while generating queries. see .risingwave/log/generate-22.log for more information.
```
You can re-run the failed query:
```sh
RUST_BACKTRACE=1 MADSIM_TEST_SEED=22 RUST_LOG=info \
./target/sim/ci-sim/risingwave_simulation \
--run-sqlsmith-queries $SNAPSHOT_DIR/failed/22
```
The `failed query` is a summary of the full query set.
In case it does not actually fail, it might be wrong.
You can re-run the full query set as well in that case:
```sh
RUST_BACKTRACE=1 MADSIM_TEST_SEED=22 RUST_LOG=info \
./target/sim/ci-sim/risingwave_simulation \
--run-sqlsmith-queries $SNAPSHOT_DIR/22
```
## Running with Madsim
Expand Down
65 changes: 46 additions & 19 deletions src/tests/sqlsmith/scripts/gen_queries.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,14 +41,19 @@ echo_err() {

# Get reason for generation crash.
get_failure_reason() {
tac | grep -B 10000 -m1 "\[EXECUTING" | tac | tail -n+2
cat $1 | tac | grep -B 10000 -m1 "\[EXECUTING" | tac | tail -n+2
}

check_if_failed() {
grep -B 2 "$CRASH_MESSAGE" || true
}

# Extract queries from file $1, write to file $2
extract_queries() {
QUERIES=$(grep "\[EXECUTING .*\]: " < "$1" | sed -E 's/^.*\[EXECUTING .*\]: (.*)$/\1;/')
FAIL_REASON=$(get_failure_reason < "$1")
if [[ -n "$FAIL_REASON" ]]; then
FAILED=$(check_if_failed < "$1")
if [[ -n "$FAILED" ]]; then
FAIL_REASON=$(get_failure_reason < "$1")
echo_err "[WARN] Cluster crashed while generating queries. see $1 for more information."
QUERIES=$(echo -e "$QUERIES" | sed -E '$ s/(.*)/-- \1/')
fi
Expand All @@ -59,10 +64,14 @@ extract_ddl() {
grep "\[EXECUTING CREATE .*\]: " | sed -E 's/^.*\[EXECUTING CREATE .*\]: (.*)$/\1;/' | pg_format || true
}

extract_dml() {
extract_inserts() {
grep "\[EXECUTING INSERT\]: " | sed -E 's/^.*\[EXECUTING INSERT\]: (.*)$/\1;/' || true
}

extract_updates() {
grep "\[EXECUTING UPDATES\]: " | sed -E 's/^.*\[EXECUTING UPDATES\]: (.*)$/\1;/' || true
}

extract_last_session() {
grep "\[EXECUTING TEST SESSION_VAR\]: " | sed -E 's/^.*\[EXECUTING TEST SESSION_VAR\]: (.*)$/\1;/' | tail -n 1 || true
}
Expand All @@ -82,20 +91,32 @@ extract_fail_info_from_logs() {
for LOGFILENAME in $(ls "$LOGDIR" | grep "$LOGFILE_PREFIX")
do
LOGFILE="$LOGDIR/$LOGFILENAME"
REASON=$(get_failure_reason < "$LOGFILE")
if [[ -n "$REASON" ]]; then
echo_err "[INFO] $LOGFILE Encountered bug."
echo_err "[INFO] Checking $LOGFILE for bugs"
FAILED=$(check_if_failed < "$LOGFILE")
echo_err "[INFO] Checked $LOGFILE for bugs"
if [[ -n "$FAILED" ]]; then
echo_err "[WARN] $LOGFILE Encountered bug."

# TODO(Noel): Perhaps add verbose logs here, if any part is missing.
REASON=$(get_failure_reason "$LOGFILE")
SEED=$(echo "$LOGFILENAME" | sed -E "s/${LOGFILE_PREFIX}\-(.*)\.log/\1/")

DDL=$(extract_ddl < "$LOGFILE")
GLOBAL_SESSION=$(extract_global_session < "$LOGFILE")
DML=$(extract_dml < "$LOGFILE")
# FIXME(kwannoel): Extract dml for updates too.
INSERTS=$(extract_inserts < "$LOGFILE")
UPDATES=$(extract_updates < "$LOGFILE")
TEST_SESSION=$(extract_last_session < "$LOGFILE")
QUERY=$(extract_failing_query < "$LOGFILE")

FAIL_DIR="$OUTDIR/failed/$SEED"
mkdir -p "$FAIL_DIR"
echo -e "$DDL" "\n\n$GLOBAL_SESSION" "\n\n$DML" "\n\n$TEST_SESSION" "\n\n$QUERY" > "$FAIL_DIR/queries.sql"

echo -e "$DDL" \
"\n\n$GLOBAL_SESSION" \
"\n\n$INSERTS" \
"\n\n$UPDATES" \
"\n\n$TEST_SESSION" \
"\n\n$QUERY" > "$FAIL_DIR/queries.sql"
echo_err "[INFO] WROTE FAIL QUERY to $FAIL_DIR/queries.sql"
echo -e "$REASON" > "$FAIL_DIR/fail.log"
echo_err "[INFO] WROTE FAIL REASON to $FAIL_DIR/fail.log"
Expand All @@ -116,14 +137,18 @@ generate_deterministic() {
set +e
echo "" > $LOGDIR/generate_deterministic.stdout.log
seq "$TEST_NUM" | env_parallel "
mkdir -p $OUTDIR/{}; \
mkdir -p $OUTDIR/{}
echo '[INFO] Generating For Seed {}'
MADSIM_TEST_SEED={} ./$MADSIM_BIN \
--sqlsmith 100 \
--generate-sqlsmith-queries $OUTDIR/{} \
$TESTDATA \
1>>$LOGDIR/generate_deterministic.stdout.log \
2>$LOGDIR/generate-{}.log; \
extract_queries $LOGDIR/generate-{}.log $OUTDIR/{}/queries.sql; \
2>$LOGDIR/generate-{}.log
echo '[INFO] Finished Generating For Seed {}'
echo '[INFO] Extracting Queries For Seed {}'
extract_queries $LOGDIR/generate-{}.log $OUTDIR/{}/queries.sql
echo '[INFO] Extracted Queries For Seed {}'
"
set -e
}
Expand Down Expand Up @@ -193,6 +218,7 @@ build() {
}

generate() {
echo_err "[INFO] Generating"
generate_deterministic
echo_err "[INFO] Finished generation"
}
Expand All @@ -214,19 +240,19 @@ validate() {
# sync step
# Some queries maybe be added
sync_queries() {
set +x
pushd $OUTDIR
git stash
git checkout main
git pull
set +e
git branch -D stage
set -e
git checkout -b stage
popd
set -x
}

sync() {
echo_err "[INFO] Syncing"
sync_queries
echo_err "[INFO] Synced"
}
Expand All @@ -245,6 +271,7 @@ upload_queries() {
}

upload() {
echo_err "[INFO] Uploading Queries"
upload_queries
echo_err "[INFO] Uploaded"
}
Expand All @@ -256,7 +283,7 @@ cleanup() {

################### ENTRY POINTS

generate() {
run_generate() {
setup

build
Expand All @@ -268,7 +295,7 @@ generate() {
cleanup
}

extract() {
run_extract() {
LOGDIR="$PWD" OUTDIR="$PWD" extract_fail_info_from_logs "fuzzing"
for QUERY_FOLDER in failed/*
do
Expand All @@ -287,9 +314,9 @@ extract() {
main() {
if [[ $1 == "extract" ]]; then
echo "[INFO] Extracting queries"
extract
run_extract
elif [[ $1 == "generate" ]]; then
generate
run_generate
else
echo "
================================================================
Expand Down
Loading