Skip to content

Commit

Permalink
add command "downloader torrent_cat" (#8824)
Browse files Browse the repository at this point in the history
  • Loading branch information
AskAlexSharov authored Nov 27, 2023
1 parent fd6f529 commit 8cfafa4
Show file tree
Hide file tree
Showing 5 changed files with 128 additions and 44 deletions.
73 changes: 67 additions & 6 deletions cmd/downloader/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"strings"
"time"

"github.com/anacrolix/torrent/metainfo"
"github.com/c2h5oh/datasize"
grpc_middleware "github.com/grpc-ecosystem/go-grpc-middleware"
grpc_recovery "github.com/grpc-ecosystem/go-grpc-middleware/recovery"
Expand Down Expand Up @@ -60,6 +61,7 @@ var (
filePath string
forceRebuild bool
forceVerify bool
forceVerifyFiles []string
downloaderApiAddr string
natSetting string
torrentVerbosity int
Expand Down Expand Up @@ -94,20 +96,24 @@ func init() {
rootCmd.Flags().BoolVar(&disableIPV6, "downloader.disable.ipv6", utils.DisableIPV6.Value, utils.DisableIPV6.Usage)
rootCmd.Flags().BoolVar(&disableIPV4, "downloader.disable.ipv4", utils.DisableIPV4.Value, utils.DisableIPV6.Usage)
rootCmd.Flags().BoolVar(&seedbox, "seedbox", false, "seedbox determines to either download .torrent from webseed or not")
rootCmd.PersistentFlags().BoolVar(&forceVerify, "verify", false, "Force verify data files if have .torrent files")
rootCmd.PersistentFlags().BoolVar(&forceVerify, "verify", false, "Verify files. All by default, or passed by --verify.files")
rootCmd.PersistentFlags().StringArrayVar(&forceVerifyFiles, "verify.files", nil, "Limit list of files to verify")

withDataDir(createTorrent)
withFile(createTorrent)
rootCmd.AddCommand(createTorrent)

rootCmd.AddCommand(torrentCat)
rootCmd.AddCommand(torrentMagnet)

withDataDir(printTorrentHashes)
printTorrentHashes.PersistentFlags().BoolVar(&forceRebuild, "rebuild", false, "Force re-create .torrent files")
printTorrentHashes.Flags().StringVar(&targetFile, "targetfile", "", "write output to file")
if err := printTorrentHashes.MarkFlagFilename("targetfile"); err != nil {
panic(err)
}

rootCmd.AddCommand(createTorrent)
rootCmd.AddCommand(printTorrentHashes)

}

func withDataDir(cmd *cobra.Command) {
Expand All @@ -132,8 +138,10 @@ var rootCmd = &cobra.Command{
debug.Exit()
},
PersistentPreRun: func(cmd *cobra.Command, args []string) {
logger = debug.SetupCobra(cmd, "downloader")
logger.Info("Build info", "git_branch", params.GitBranch, "git_tag", params.GitTag, "git_commit", params.GitCommit)
if cmd.Name() != "torrent_cat" {
logger = debug.SetupCobra(cmd, "downloader")
logger.Info("Build info", "git_branch", params.GitBranch, "git_tag", params.GitTag, "git_commit", params.GitCommit)
}
},
Run: func(cmd *cobra.Command, args []string) {
if err := Downloader(cmd.Context(), logger); err != nil {
Expand Down Expand Up @@ -199,9 +207,11 @@ func Downloader(ctx context.Context, logger log.Logger) error {
logger.Info("[snapshots] Start bittorrent server", "my_peer_id", fmt.Sprintf("%x", d.TorrentClient().PeerID()))

if forceVerify { // remove and create .torrent files (will re-read all snapshots)
if err = d.VerifyData(ctx); err != nil {
if err = d.VerifyData(ctx, forceVerifyFiles); err != nil {
return err
}
logger.Info("[snapshots] Verify done")
return nil
}

d.MainLoopInBackground(false)
Expand Down Expand Up @@ -251,6 +261,57 @@ var printTorrentHashes = &cobra.Command{
},
}

var torrentVerify = &cobra.Command{
Use: "torrent_verify",
Example: "go run ./cmd/downloader torrent_verify <path_to_torrent_file>",
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) == 0 {
return fmt.Errorf("please pass .torrent file path by first argument")
}
fPath := args[0]
mi, err := metainfo.LoadFromFile(fPath)
if err != nil {
return fmt.Errorf("LoadFromFile: %w, file=%s", err, fPath)
}

fmt.Printf("%s\n", mi.HashInfoBytes())
return nil
},
}
var torrentCat = &cobra.Command{
Use: "torrent_cat",
Example: "go run ./cmd/downloader torrent_cat <path_to_torrent_file>",
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) == 0 {
return fmt.Errorf("please pass .torrent file path by first argument")
}
fPath := args[0]
mi, err := metainfo.LoadFromFile(fPath)
if err != nil {
return fmt.Errorf("LoadFromFile: %w, file=%s", err, fPath)
}

fmt.Printf("%s\n", mi.HashInfoBytes())
return nil
},
}
var torrentMagnet = &cobra.Command{
Use: "torrent_magnet",
Example: "go run ./cmd/downloader torrent_magnet <path_to_torrent_file>",
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) == 0 {
return fmt.Errorf("please pass .torrent file path by first argument")
}
fPath := args[0]
mi, err := metainfo.LoadFromFile(fPath)
if err != nil {
return fmt.Errorf("LoadFromFile: %w, file=%s", err, fPath)
}
fmt.Printf("%s\n", mi.Magnet(nil, nil).String())
return nil
},
}

func doPrintTorrentHashes(ctx context.Context, logger log.Logger) error {
dirs := datadir.New(datadirCli)
if err := datadir.ApplyMigrations(dirs); err != nil {
Expand Down
68 changes: 41 additions & 27 deletions cmd/downloader/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,14 @@ Flag `--snapshots` is compatible with `--prune` flag
# It will dump blocks from Database to .seg files:
erigon snapshots retire --datadir=<your_datadir>

# Create .torrent files (Downloader will seed automatically all .torrent files)
# Create .torrent files (you can think about them as "checksum")
downloader torrent_create --datadir=<your_datadir>

# output format is compatible with https://github.com/ledgerwatch/erigon-snapshot
downloader torrent_hashes --rebuild --datadir=<your_datadir>
downloader torrent_hashes --datadir=<your_datadir>

# Start downloader (seeds automatically)
# Start downloader (read all .torrent files, and download/seed data)
downloader --downloader.api.addr=127.0.0.1:9093 --datadir=<your_datadir>

# Erigon is not required for snapshots seeding. But Erigon with --snapshots also does seeding.
```

Additional info:
Expand Down Expand Up @@ -109,52 +109,66 @@ Technical details:
- To prevent attack - .idx creation using random Seed - all nodes will have
different .idx file (and same .seg files)
- If you add/remove any .seg file manually, also need
remove `<your_datadir>/snapshots/db` folder
remove `<your_datadir>/downloader` folder

## How to verify that .seg files have the same checksum as current .torrent files

```
# Use it if you see weird behavior, bugs, bans, hardware issues, etc...
downloader --verify --datadir=<your_datadir>
downloader --verify --verify.files=v1-1-2-transaction.seg --datadir=<your_datadir>
```

## Faster rsync
## Create cheap seedbox

Usually Erigon's network is self-sufficient - peers automatically producing and
seeding snapshots. But new network or new type of snapshots need Bootstraping
step - no peers yet have this files.

**Seedbox** - machie which ony seeding archive files:

- Doesn't need synced erigon
- Can work on very cheap disks, cpu, ram
- It works exactly like Erigon node - downloading archive files and seed them

```
rsync -aP --delete -e "ssh -T -o Compression=no -x" <src> <dst>
downloader --seedbox --datadir=<your> --chain=mainnet
```

## Release details

Start automatic commit of new hashes to branch `master`
Seedbox can fallback to **Webseed** - HTTP url to centralized infrastructure. For example: private S3 bucket with
signed_urls, or any HTTP server with files. Main idea: erigon decentralized infrastructure has higher prioriity than
centralized (which used as **support/fallback**).

```
crontab -e
@hourly cd <erigon_source_dir> && ./cmd/downloader/torrent_hashes_update.sh <your_datadir> <network_name> 1>&2 2>> ~/erigon_cron.log
# Erigon has default webseed url's - and you can create own
downloader --datadir=<your> --chain=mainnet --webseed=<webseed_url>
# See also: `downloader --help` of `--webseed` flag. There is an option to pass it by `datadir/webseed.toml` file
```

It does push to branch `auto`, before release - merge `auto` to `main` manually
---------

## Create seedbox to support network
## Utilities

```
# Can run on empty datadir
downloader --datadir=<your> --chain=mainnet
downloader torrent_cat /path/to.torrent
downloader torrent_magnet /path/to.torrent
```

## Launch new network or new type of snapshots
## Faster rsync

Usually Erigon's network is self-sufficient - peers automatically producing and
seedingsnapshots. But new network or new type of snapshots need Bootstraping
step - no peers yet have this files.
```
rsync -aP --delete -e "ssh -T -o Compression=no -x" <src> <dst>
```

**WebSeed** - is centralized file-storage - used to Bootstrap network. For
example S3 with signed_url.
## Release details

Erigon dev team can share existing **webseed_url**. Or you can create own.
Start automatic commit of new hashes to branch `master`

```
downloader --datadir=<your> --chain=mainnet --webseed=<webseed_url>
crontab -e
@hourly cd <erigon_source_dir> && ./cmd/downloader/torrent_hashes_update.sh <your_datadir> <network_name> 1>&2 2>> ~/erigon_cron.log
```

It does push to branch `auto`, before release - merge `auto` to `main` manually

# See also: `downloader --help` of `--webseed` flag. There is an option to pass it by `datadir/webseed.toml` file.
```
20 changes: 13 additions & 7 deletions erigon-lib/downloader/downloader.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ import (
"github.com/ledgerwatch/erigon-lib/kv"
"github.com/ledgerwatch/erigon-lib/kv/mdbx"
"github.com/ledgerwatch/log/v3"
"golang.org/x/exp/slices"
"golang.org/x/sync/errgroup"
"golang.org/x/sync/semaphore"
)
Expand Down Expand Up @@ -372,7 +373,7 @@ func (d *Downloader) ReCalcStats(interval time.Duration) {
stats.Progress = 0
} else {
stats.Progress = float32(float64(100) * (float64(stats.BytesCompleted) / float64(stats.BytesTotal)))
if stats.Progress == 100 && !stats.Completed {
if int(stats.Progress) == 100 && !stats.Completed {
stats.Progress = 99.99
}
}
Expand All @@ -382,7 +383,7 @@ func (d *Downloader) ReCalcStats(interval time.Duration) {
d.stats = stats
}

func (d *Downloader) verifyFile(ctx context.Context, t *torrent.Torrent, completePieces *atomic.Uint64) error {
func VerifyFile(ctx context.Context, t *torrent.Torrent, completePieces *atomic.Uint64) error {
select {
case <-ctx.Done():
return ctx.Err()
Expand All @@ -408,15 +409,20 @@ func (d *Downloader) verifyFile(ctx context.Context, t *torrent.Torrent, complet
return g.Wait()
}

func (d *Downloader) VerifyData(ctx context.Context) error {
func (d *Downloader) VerifyData(ctx context.Context, onlyFiles []string) error {
total := 0
torrents := d.torrentClient.Torrents()
_torrents := d.torrentClient.Torrents()
torrents := make([]*torrent.Torrent, 0, len(_torrents))
for _, t := range torrents {
select {
case <-t.GotInfo():
if len(onlyFiles) > 0 && !slices.Contains(onlyFiles, t.Name()) {
continue
}
torrents = append(torrents, t)
total += t.NumPieces()
default:
continue
case <-ctx.Done():
return ctx.Err()
}
}

Expand Down Expand Up @@ -449,7 +455,7 @@ func (d *Downloader) VerifyData(ctx context.Context) error {
for _, t := range torrents {
t := t
g.Go(func() error {
return d.verifyFile(ctx, t, completedPieces)
return VerifyFile(ctx, t, completedPieces)
})
}

Expand Down
2 changes: 1 addition & 1 deletion erigon-lib/downloader/downloader_grpc_server.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ func (s *GrpcServer) Delete(ctx context.Context, request *proto_downloader.Delet
}

func (s *GrpcServer) Verify(ctx context.Context, request *proto_downloader.VerifyRequest) (*emptypb.Empty, error) {
err := s.d.VerifyData(ctx)
err := s.d.VerifyData(ctx, nil)
if err != nil {
return nil, err
}
Expand Down
9 changes: 6 additions & 3 deletions turbo/logging/logging.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,12 @@ func SetupLoggerCmd(filePrefix string, cmd *cobra.Command) log.Logger {

dirPath := cmd.Flags().Lookup(LogDirPathFlag.Name).Value.String()
if dirPath == "" {
datadir := cmd.Flags().Lookup("datadir").Value.String()
if datadir != "" {
dirPath = filepath.Join(datadir, "logs")
datadirFlag := cmd.Flags().Lookup("datadir")
if datadirFlag != nil {
datadir := datadirFlag.Value.String()
if datadir != "" {
dirPath = filepath.Join(datadir, "logs")
}
}
}

Expand Down

0 comments on commit 8cfafa4

Please sign in to comment.