diff --git a/.github/workflows/failpoint_test.yaml b/.github/workflows/failpoint_test.yaml index 46cafab6c..0e5a8de35 100644 --- a/.github/workflows/failpoint_test.yaml +++ b/.github/workflows/failpoint_test.yaml @@ -15,5 +15,6 @@ jobs: with: go-version: ${{ steps.goversion.outputs.goversion }} - run: | + go install ./cmd/bbolt make gofail-enable - make test-failpoint + sudo -E PATH=$PATH make test-failpoint diff --git a/go.mod b/go.mod index 26f2d2f52..bf563af24 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module go.etcd.io/bbolt go 1.21 require ( + github.com/fuweid/go-dmflakey v0.0.0-20231120170452-3fb3c5f05920 github.com/spf13/cobra v1.8.0 github.com/spf13/pflag v1.0.5 github.com/stretchr/testify v1.8.4 diff --git a/go.sum b/go.sum index 204bc990c..8778047d4 100644 --- a/go.sum +++ b/go.sum @@ -1,6 +1,8 @@ github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/fuweid/go-dmflakey v0.0.0-20231120170452-3fb3c5f05920 h1:htgd4chS+39MRDlQIY3/sjiQiYk/30Zr2j5yPSSAga4= +github.com/fuweid/go-dmflakey v0.0.0-20231120170452-3fb3c5f05920/go.mod h1:LyUyUu5fLT6AhjTmoztxGdgvFOTyA+3hU2kM92drr9M= github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= diff --git a/tests/failpoint/db_failpoint_test.go b/tests/failpoint/db_failpoint_test.go index c1da5b583..19c8c6b00 100644 --- a/tests/failpoint/db_failpoint_test.go +++ b/tests/failpoint/db_failpoint_test.go @@ -1,12 +1,19 @@ package failpoint import ( + "bytes" "fmt" + "os" + "os/exec" "path/filepath" + "strings" "testing" "time" + "github.com/fuweid/go-dmflakey" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "golang.org/x/sys/unix" bolt "go.etcd.io/bbolt" "go.etcd.io/bbolt/errors" @@ -155,3 +162,120 @@ func TestFailpoint_LackOfDiskSpace(t *testing.T) { require.Error(t, err) require.ErrorIs(t, err, errors.ErrTxClosed) } + +// TestRestartFromPowerFailure is to test data after unexpected power failure. +func TestRestartFromPowerFailure(t *testing.T) { + flakey := initFlakeyDevice(t, "") + root := flakey.rootfs() + + dbPath := filepath.Join(root, "boltdb") + + args := []string{"bbolt", "bench", + "-work", // keep the database + "-path", dbPath, + "-count=1000000000", + "-batch-size=5", // separate total count into multiple truncation + } + cmd := exec.Command(args[0], args[1:]...) + logger := &bytes.Buffer{} + cmd.Stdout = logger + cmd.Stderr = logger + + t.Logf("start %s", strings.Join(args, " ")) + require.NoError(t, cmd.Start(), "args: %v", args) + time.Sleep(3 * time.Second) + + defer func() { + if t.Failed() { + t.Logf("dump log:\n: %s", logger.String()) + } + }() + + t.Logf("simulate power failure") + cmd.Process.Kill() + require.Error(t, cmd.Wait()) + require.NoError(t, flakey.powerFailure("")) + + st, err := os.Stat(dbPath) + require.NoError(t, err) + t.Logf("db size: %d", st.Size()) + + t.Logf("verify data") + output, err := exec.Command("bbolt", "check", dbPath).CombinedOutput() + require.NoError(t, err, "bbolt check output: %s", string(output)) +} + +// initFlakeyDevice inits flakey device with ext4 filesystem. +func initFlakeyDevice(t *testing.T, mntOpt string) *flakeyDevice { + imgDir := t.TempDir() + rootDir := t.TempDir() + fsType := dmflakey.FSTypeEXT4 + + flakey, err := dmflakey.InitFlakey("bbolt-failpoint", imgDir, fsType) + require.NoError(t, err, "init flakey device") + t.Cleanup(func() { + assert.NoError(t, flakey.Teardown()) + }) + + err = unix.Mount(flakey.DevicePath(), rootDir, string(fsType), 0, mntOpt) + require.NoError(t, err, "init rootfs with flakey device") + t.Cleanup(func() { + assert.NoError(t, unmount(rootDir)) + }) + + return &flakeyDevice{ + fsType: fsType, + flakey: flakey, + + rootDir: rootDir, + } +} + +type flakeyDevice struct { + fsType dmflakey.FSType + flakey dmflakey.Flakey + + rootDir string +} + +// rootfs returns the rootfs where flakey device mounts. +func (f *flakeyDevice) rootfs() string { + return f.rootDir +} + +// powerFailure drops all the pending writes and remount the rootfs. +func (f *flakeyDevice) powerFailure(mntOpt string) error { + if err := f.flakey.DropWrites(); err != nil { + return fmt.Errorf("failed to drop_writes: %w", err) + } + + if err := unmount(f.rootDir); err != nil { + return fmt.Errorf("failed to unmount rootfs %s: %w", f.rootDir, err) + } + + if err := f.flakey.AllowWrites(); err != nil { + return fmt.Errorf("failed to allow_writes: %w", err) + } + + if err := unix.Mount(f.flakey.DevicePath(), f.rootDir, string(f.fsType), 0, mntOpt); err != nil { + return fmt.Errorf("failed to mount rootfs %s: %w", f.rootDir, err) + } + return nil +} + +func unmount(target string) error { + for i := 0; i < 50; i++ { + if err := unix.Unmount(target, 0); err != nil { + switch err { + case unix.EBUSY: + time.Sleep(500 * time.Millisecond) + continue + case unix.EINVAL: + default: + return fmt.Errorf("failed to umount %s: %w", target, err) + } + } + return nil + } + return unix.EBUSY +}