From e710b259fe2650ec50b4c77a5c020e57cca59c28 Mon Sep 17 00:00:00 2001 From: sslivkoff Date: Wed, 9 Aug 2023 11:10:26 -0700 Subject: [PATCH] use clap fork to properly parse hyphenated block ranges (#25) --- Cargo.lock | 38 ++++++------- crates/cli/Cargo.toml | 2 +- crates/cli/src/args.rs | 12 ++-- crates/cli/src/main.rs | 2 +- crates/cli/src/parse/blocks.rs | 85 ++++++++++++++++++++++++++-- crates/python/src/collect_adapter.rs | 2 +- crates/python/src/freeze_adapter.rs | 2 +- 7 files changed, 109 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5bea3002..ed38ff0f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -574,33 +574,33 @@ dependencies = [ ] [[package]] -name = "clap" -version = "4.3.10" +name = "clap_builder_cryo" +version = "4.3.21-cryo" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "384e169cc618c613d5e3ca6404dda77a8685a63e08660dcc64abaf7da7cb0c7a" +checksum = "e9e3a8989ce9e79041f7681f82e669c1a06724975fb8f5b7bd60e359e572da76" dependencies = [ - "clap_builder", - "clap_derive", - "once_cell", + "anstream", + "anstyle", + "clap_lex_cryo", + "strsim", ] [[package]] -name = "clap_builder" -version = "4.3.10" +name = "clap_cryo" +version = "4.3.21-cryo" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef137bbe35aab78bdb468ccfba75a5f4d8321ae011d34063770780545176af2d" +checksum = "053a8216426fdf2f5ce7fa46197e3517adf8b1c0ab96d3dbf73469bc15f34ad6" dependencies = [ - "anstream", - "anstyle", - "clap_lex", - "strsim", + "clap_builder_cryo", + "clap_derive_cryo", + "once_cell", ] [[package]] -name = "clap_derive" -version = "4.3.2" +name = "clap_derive_cryo" +version = "4.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b8cd2b2a819ad6eec39e8f1d6b53001af1e5469f8c177579cdaeb313115b825f" +checksum = "fd1cd2cebdb1ec98182edb745382a2b65d6bc254782de26316e4366d83d39988" dependencies = [ "heck", "proc-macro2", @@ -609,10 +609,10 @@ dependencies = [ ] [[package]] -name = "clap_lex" +name = "clap_lex_cryo" version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2da6da31387c7e4ef160ffab6d5e7f00c42626fe39aea70a7b0f1773f7dd6c1b" +checksum = "98979652585ac7c8d6267e363229b6a26bc4bf469c69e96442f85830ebc89f45" [[package]] name = "coins-bip32" @@ -849,7 +849,7 @@ version = "0.1.0" dependencies = [ "anstyle", "chrono", - "clap", + "clap_cryo", "color-print", "colored", "cryo_freeze", diff --git a/crates/cli/Cargo.toml b/crates/cli/Cargo.toml index bfb440e4..0f481e4c 100644 --- a/crates/cli/Cargo.toml +++ b/crates/cli/Cargo.toml @@ -18,7 +18,7 @@ path = "src/main.rs" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] -clap = { version = "4.3.4", features = ["derive", "color", "unstable-styles"] } +clap_cryo = { version = "4.3.21-cryo", features = ["derive", "color", "unstable-styles"] } color-print = "0.3.4" ethers = "2.0.7" hex = "0.4.3" diff --git a/crates/cli/src/args.rs b/crates/cli/src/args.rs index c671810e..24090585 100644 --- a/crates/cli/src/args.rs +++ b/crates/cli/src/args.rs @@ -1,17 +1,17 @@ -use clap::Parser; +use clap_cryo::Parser; use color_print::cstr; /// Command line arguments #[derive(Parser, Debug)] -#[command(name = "cryo", author, version, about = get_about_str(), long_about = None, styles=get_styles(), after_help=get_after_str())] +#[command(name = "cryo", author, version, about = get_about_str(), long_about = None, styles=get_styles(), after_help=get_after_str(), allow_negative_numbers = true)] pub struct Args { /// datatype to collect #[arg(required = true, help=get_datatype_help(), num_args(1..))] pub datatype: Vec, /// Block numbers, see syntax below - #[arg(short, long, allow_hyphen_values(true), help_heading = "Content Options")] - pub blocks: Option, + #[arg(short, long, allow_negative_numbers = true, help_heading = "Content Options", num_args(1..))] + pub blocks: Option>, /// Transaction hashes, see syntax below #[arg( @@ -165,14 +165,14 @@ pub struct Args { pub inner_request_size: u64, } -pub(crate) fn get_styles() -> clap::builder::Styles { +pub(crate) fn get_styles() -> clap_cryo::builder::Styles { let white = anstyle::Color::Rgb(anstyle::RgbColor(255, 255, 255)); let green = anstyle::Color::Rgb(anstyle::RgbColor(0, 225, 0)); let grey = anstyle::Color::Rgb(anstyle::RgbColor(170, 170, 170)); let title = anstyle::Style::new().bold().fg_color(Some(green)); let arg = anstyle::Style::new().bold().fg_color(Some(white)); let comment = anstyle::Style::new().fg_color(Some(grey)); - clap::builder::Styles::styled() + clap_cryo::builder::Styles::styled() .header(title) .error(comment) .usage(title) diff --git a/crates/cli/src/main.rs b/crates/cli/src/main.rs index a733f4d3..eeeb7c14 100644 --- a/crates/cli/src/main.rs +++ b/crates/cli/src/main.rs @@ -1,6 +1,6 @@ //! cryo_cli is a cli for cryo_freeze -use clap::Parser; +use clap_cryo::Parser; mod args; mod parse; diff --git a/crates/cli/src/parse/blocks.rs b/crates/cli/src/parse/blocks.rs index 0ca4762e..72312061 100644 --- a/crates/cli/src/parse/blocks.rs +++ b/crates/cli/src/parse/blocks.rs @@ -1,6 +1,7 @@ use std::sync::Arc; use ethers::prelude::*; +use polars::prelude::*; use cryo_freeze::{BlockChunk, Chunk, ChunkData, ParseError, Subchunk}; @@ -10,13 +11,87 @@ pub(crate) async fn parse_blocks( args: &Args, provider: Arc>, ) -> Result)>, ParseError> { - // parse inputs into BlockChunks - let block_chunks = match &args.blocks { - Some(inputs) => parse_block_inputs(inputs, &provider).await?, - None => return Err(ParseError::ParseError("could not parse block inputs".to_string())), + let (files, explicit_numbers): (Vec<&String>, Vec<&String>) = match &args.blocks { + Some(blocks) => blocks.iter().partition(|tx| std::path::Path::new(tx).exists()), + None => return Err(ParseError::ParseError("no blocks specified".to_string())), }; - postprocess_block_chunks(block_chunks, args, provider).await + let mut file_chunks = if !files.is_empty() { + let mut file_chunks = Vec::new(); + for path in files { + let column = if path.contains(':') { + path.split(':') + .last() + .ok_or(ParseError::ParseError("could not parse txs path column".to_string()))? + } else { + "block_number" + }; + let integers = read_integer_column(path, column) + .map_err(|_e| ParseError::ParseError("could not read input".to_string()))?; + let chunk = BlockChunk::Numbers(integers); + let chunk_label = path + .split("__") + .last() + .and_then(|s| s.strip_suffix(".parquet").map(|s| s.to_string())); + file_chunks.push((Chunk::Block(chunk), chunk_label)); + } + file_chunks + } else { + Vec::new() + }; + + let explicit_chunks = if !explicit_numbers.is_empty() { + // parse inputs into BlockChunks + let mut block_chunks = Vec::new(); + for explicit_number in explicit_numbers { + let outputs = parse_block_inputs(explicit_number, &provider).await?; + block_chunks.extend(outputs.into_iter()); + } + postprocess_block_chunks(block_chunks, args, provider).await? + } else { + Vec::new() + }; + + file_chunks.extend(explicit_chunks.into_iter()); + Ok(file_chunks) +} + +fn read_integer_column(path: &str, column: &str) -> Result, ParseError> { + let file = std::fs::File::open(path) + .map_err(|_e| ParseError::ParseError("could not open file path".to_string()))?; + + let df = ParquetReader::new(file) + .with_columns(Some(vec![column.to_string()])) + .finish() + .map_err(|_e| ParseError::ParseError("could not read data from column".to_string()))?; + + let series = df + .column(column) + .map_err(|_e| ParseError::ParseError("could not get column".to_string()))? + .unique() + .map_err(|_e| ParseError::ParseError("could not get column".to_string()))?; + + println!("{:?}", series); + match series.u32() { + Ok(ca) => ca + .into_iter() + .map(|v| { + v.ok_or_else(|| ParseError::ParseError("block number missing".to_string())) + .map(|data| data.into()) + }) + .collect(), + Err(_e) => match series.u64() { + Ok(ca) => ca + .into_iter() + .map(|v| { + v.ok_or_else(|| ParseError::ParseError("block number missing".to_string())) + }) + .collect(), + Err(_e) => { + Err(ParseError::ParseError("could not convert to integer column".to_string())) + } + }, + } } async fn postprocess_block_chunks( diff --git a/crates/python/src/collect_adapter.rs b/crates/python/src/collect_adapter.rs index cf85184b..35ae5729 100644 --- a/crates/python/src/collect_adapter.rs +++ b/crates/python/src/collect_adapter.rs @@ -48,7 +48,7 @@ use cryo_freeze::collect; pub fn _collect( py: Python<'_>, datatype: String, - blocks: Option, + blocks: Option>, txs: Option>, align: bool, reorg_buffer: u64, diff --git a/crates/python/src/freeze_adapter.rs b/crates/python/src/freeze_adapter.rs index e7670771..146e4418 100644 --- a/crates/python/src/freeze_adapter.rs +++ b/crates/python/src/freeze_adapter.rs @@ -49,7 +49,7 @@ use cryo_cli::{run, Args}; pub fn _freeze( py: Python<'_>, datatype: Vec, - blocks: Option, + blocks: Option>, txs: Option>, align: bool, reorg_buffer: u64,