From 78d5a801802774d78f1f4c4c430baa4e45eaa8ac Mon Sep 17 00:00:00 2001 From: MrFixThis Date: Mon, 25 Sep 2023 14:20:08 -0500 Subject: [PATCH 1/2] refactor: Enhance code quality and cli module Improved screenshot capturing logic, enhanced helper functions and improved CLI argument parsing specification --- Cargo.lock | 7 ++ Cargo.toml | 6 +- src/cli/args.rs | 107 +++++++++++++---- src/cli/mod.rs | 60 +++------ src/cli/screenshot.rs | 274 +++++++++++++++++++++--------------------- src/log.rs | 18 ++- src/main.rs | 27 ++--- 7 files changed, 261 insertions(+), 238 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8d616a4..dd0b844 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -74,6 +74,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "anyhow" +version = "1.0.75" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4668cab20f66d8d020e1fbc0ebe47217433c1b6c8f2040faf858554e394ace6" + [[package]] name = "async-tungstenite" version = "0.22.2" @@ -586,6 +592,7 @@ checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" name = "hxn" version = "0.1.6" dependencies = [ + "anyhow", "chromiumoxide", "clap", "colored", diff --git a/Cargo.toml b/Cargo.toml index 9129e36..d87c948 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,11 +22,7 @@ futures = "0.3.28" clap = { version = "4.4.4", features = ["derive", "string"] } columns = "0.1.0" colored = "2.0.4" - -[[bin]] -name = "hxn" -path = "src/main.rs" - +anyhow = "1.0.75" [profile.dev] opt-level = 0 diff --git a/src/cli/args.rs b/src/cli/args.rs index 85797d7..8993cea 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -1,44 +1,101 @@ -use crate::cli::splash; -use clap::Parser; +use super::ascii; +use clap::{Args, Parser}; -#[derive(Parser)] -#[command(author, version, about = splash() )] -#[command(propagate_version = true)] -#[command(arg_required_else_help = true)] -pub struct Cli { - #[arg(required = false, short, long)] - /// a single url or a file containing multiple urls +#[derive(Debug, Args)] +#[group(required = false, multiple = false, conflicts_with = "stdin")] +pub struct Input { + /// Website URL + #[arg(short, long)] pub url: Option, - #[arg(short, long, default_value = "hxnshots")] + /// Path of the file containing URLs + #[arg(short, long)] + pub file_path: Option, +} + +#[derive(Debug, Parser)] +#[command( + author, + version, + about = ascii::splash(), + propagate_version = true, + arg_required_else_help = true +)] +pub struct Cli { + /// Browser binary path + #[arg(short, long, default_value = "/usr/bin/chrome")] + pub binary_path: String, + + /// Read urls from the standard input + #[arg(long)] + pub stdin: bool, + + #[command(flatten)] + pub input: Input, + /// Output directory to save screenshots + #[arg(short, long, default_value = "hxnshots")] pub outdir: String, - #[arg(short, long, default_value = "4")] /// Maximum number of parallel tabs - pub tabs: Option, - - #[arg(short, long, default_value = "/usr/bin/google-chrome")] - /// Browser binary path - pub binary_path: String, + #[arg(short, long, default_value = "4")] + pub tabs: usize, - #[arg(short = 'x', long, default_value = "1440")] /// Width of the website // URL - pub width: Option, + #[arg(short = 'x', long, default_value = "1440")] + pub width: u32, - #[arg(short = 'y', long, default_value = "900")] /// Height of the website // URL - pub height: Option, + #[arg(short = 'y', long, default_value = "900")] + pub height: u32, - #[arg(long, default_value = "10")] /// Define timeout for urls + #[arg(long, default_value = "10")] pub timeout: u64, - #[arg(long)] /// Silent mode (suppress all console output) + #[arg(long)] pub silent: bool, +} - #[arg(long)] - /// Read urls from the standard in - pub stdin: bool, +#[cfg(test)] +mod tests { + use clap::error::ErrorKind; + + use super::*; + + #[test] + fn test_no_input_urls() { + let args = Cli::try_parse_from(["-b my_browser"]); + assert!(args.is_err()); + assert_eq!( + args.unwrap_err().kind(), + ErrorKind::DisplayHelpOnMissingArgumentOrSubcommand + ); + } + + #[test] + fn test_stdin_source_mutual_exclusion() { + let args = Cli::try_parse_from([ + "-b my_browser", + "--stdin", + "-u https://example.com", + "-f /my/file", + ]); + assert!(args.is_err()); + assert_eq!(args.unwrap_err().kind(), ErrorKind::ArgumentConflict); + } + + #[test] + fn test_url_mutual_exclusion_with_file_path() { + let args = Cli::try_parse_from(["-b my_browser", "-u https://example.com", "-f /my/file"]); + assert!(args.is_err()); + assert_eq!(args.unwrap_err().kind(), ErrorKind::ArgumentConflict); + } + + #[test] + fn test_file_path_as_source() { + let args = Cli::try_parse_from(["-b my_browser", "-f /my/file"]); + assert!(args.is_ok()); + } } diff --git a/src/cli/mod.rs b/src/cli/mod.rs index 9da7be2..7ba8ee7 100644 --- a/src/cli/mod.rs +++ b/src/cli/mod.rs @@ -1,52 +1,28 @@ pub mod args; -pub mod ascii; +mod ascii; pub mod screenshot; -pub use args::*; -pub use ascii::*; -pub use screenshot::*; pub mod hxn_helper { - - use std::io::BufRead; - - /// https://www.youtube.com/watch?v=K_wnB9ibCMg&t=1078s - /// Reads user input from stdin line by line - pub fn read_urls_from_stdin() -> Vec { - let mut input = String::new(); - let mut urls = Vec::new(); - - loop { - input.clear(); - match std::io::stdin().lock().read_line(&mut input) { - Ok(0) => break, // EOF reached - Ok(_) => urls.push(input.trim().to_string()), - Err(err) => panic!("Error reading from stdin: {}", err), - } - } - - urls + use std::{io, path::Path}; + + /// Reads user's input from stdin line by line. + #[inline] + pub fn read_urls_from_stdin() -> anyhow::Result> { + Ok(io::read_to_string(io::stdin().lock())? + .lines() + .map(|url| url.trim().to_owned()) + .collect()) } - #[allow(dead_code)] - pub fn read_urls_from_file(url: &Option) -> Vec { - let mut urls = Vec::new(); - - if let Some(url) = url { - if std::path::Path::new(url).exists() { - if let Ok(file) = std::fs::File::open(url) { - let lines = std::io::BufReader::new(file).lines().map_while(Result::ok); - urls = lines.collect(); - } else { - urls = vec![url.clone()]; - } - } else { - urls = vec![url.clone()]; - } - } - - urls + /// Reads URLs from a file. + #[inline] + pub fn read_urls_from_file>(file_path: T) -> anyhow::Result> { + Ok(std::fs::read_to_string(file_path)? + .lines() + .map(|url| url.trim().to_owned()) + .collect()) } - #[allow(dead_code)] + #[allow(unused)] pub fn read_ports() {} } diff --git a/src/cli/screenshot.rs b/src/cli/screenshot.rs index 2ac682a..095ab64 100644 --- a/src/cli/screenshot.rs +++ b/src/cli/screenshot.rs @@ -1,124 +1,92 @@ -use crate::cli::ascii::{BAR, RESET}; -use crate::log::error; -use chromiumoxide::browser::{Browser, BrowserConfig}; -use chromiumoxide::handler::viewport::Viewport; -use colored::{Color, Colorize}; -use futures::StreamExt; -use std::{ - env, - io::{BufRead, BufReader}, - path::Path, -}; -use tokio::{fs, time::timeout}; - -use chromiumoxide::cdp::browser_protocol::page::{ - CaptureScreenshotFormat, CaptureScreenshotParams, +use super::args::{Cli, Input}; +use super::ascii::{BAR, RESET}; +use crate::log; +use anyhow::Context; +use chromiumoxide::{ + browser::{Browser, BrowserConfig}, + cdp::browser_protocol::page::{CaptureScreenshotFormat, CaptureScreenshotParams}, + handler::viewport::Viewport, }; -use chromiumoxide::Page; +use colored::{Color, Colorize}; use columns::Columns; -use core::time::Duration; -use reqwest::get; +use futures::StreamExt; +use reqwest::StatusCode; +use std::sync::Arc; +use std::{env, path::Path, time::Duration}; +use tokio::{fs, task, time}; +use url::Url; -#[allow(clippy::too_many_arguments)] pub async fn run( - url: Option, - outdir: Option, - tabs: Option, - binary_path: String, - width: Option, - height: Option, - timeout: u64, - silent: bool, - stdin: bool, -) -> Result<(), Box> { - if !Path::new(&binary_path).exists() { - error("Unble to locate browser binary"); - - std::process::exit(0); + Cli { + binary_path, + input: Input { url, file_path }, + stdin, + outdir, + tabs, + width, + height, + timeout, + silent, + }: Cli, +) -> anyhow::Result<()> { + let browser = Path::new(&binary_path); + if !browser.exists() { + return Err(anyhow::Error::msg(format!( + "Unable to locate browser binary {binary_path}" + ))); } - let outdir = match outdir { - Some(dir) => dir, - None => "hxnshots".to_string(), - }; - - let viewport_width = width.unwrap_or(1440); - let viewport_height = height.unwrap_or(900); let (browser, mut handler) = Browser::launch( BrowserConfig::builder() .no_sandbox() - .window_size(viewport_width, viewport_height) - .chrome_executable(Path::new(&binary_path)) + .window_size(width, height) + .chrome_executable(browser) .viewport(Viewport { - width: viewport_width, - height: viewport_height, + width, + height, device_scale_factor: None, emulating_mobile: false, is_landscape: false, has_touch: false, }) - .build()?, + .build() + .map_err(anyhow::Error::msg)?, ) - .await?; - - let _handle = tokio::task::spawn(async move { - loop { - let _ = handler.next().await; + .await + .context(format!("Error instantiating browser {binary_path}"))?; + let browser = Arc::new(browser); + + task::spawn(async move { + while let Some(h) = handler.next().await { + if h.is_err() { + break; + } } }); - if fs::metadata(&outdir).await.is_err() { - fs::create_dir(&outdir).await?; + let dump_dir = Path::new(&outdir); + if !dump_dir.exists() { + // TODO: Check error cases for reporting + fs::create_dir(dump_dir).await?; } - let urls: Vec; - - match stdin { - true => { - urls = crate::cli::hxn_helper::read_urls_from_stdin(); - } - - false => { - if let Some(url) = &url { - if Path::new(url).exists() { - let file = std::fs::File::open(url)?; - let lines = BufReader::new(file).lines().map_while(Result::ok); - urls = lines.collect(); - } else { - urls = vec![url.clone()]; - } - } else { - urls = vec![]; + if stdin { + env::set_current_dir(dump_dir)?; + let urls = super::hxn_helper::read_urls_from_stdin()?; + take_screenshot_in_bulk(&browser, urls, tabs, timeout, silent).await?; + } else { + match (url, file_path) { + (None, Some(file_path)) => { + let urls = super::hxn_helper::read_urls_from_file(file_path)?; + env::set_current_dir(dump_dir)?; + take_screenshot_in_bulk(&browser, urls, tabs, timeout, silent).await?; } - } - } - - let mut url_chunks = Vec::new(); - - for chunk in urls.chunks(tabs.unwrap_or(4)) { - let mut urls = Vec::new(); - for url in chunk { - if let Ok(url) = url::Url::parse(url) { - urls.push(url); + (Some(url), None) => { + env::set_current_dir(dump_dir)?; + take_screenshot(&browser, url, timeout, silent).await?; } + _ => unreachable!(), } - url_chunks.push(urls); - } - - env::set_current_dir(Path::new(&outdir))?; - - let mut handles = Vec::new(); - - for chunk in url_chunks { - let n_tab = browser.new_page("about:blank").await?; - let h = tokio::spawn(take_screenshots(n_tab, chunk, silent, timeout)); - handles.push(h); - } - - for handle in handles { - handle - .await? - .expect("Something went wrong while waiting for taking screenshot and saving to file"); } println!( @@ -132,48 +100,82 @@ pub async fn run( Ok(()) } -async fn take_screenshots( - page: Page, - urls: Vec, +async fn take_screenshot_in_bulk( + browser: &Arc, + urls: Vec, + tabs: usize, + timeout: u64, silent: bool, - timeout_value: u64, -) -> Result<(), Box> { - for url in urls { - let url = url.as_str(); - if let Ok(Ok(_res)) = timeout(Duration::from_secs(timeout_value), get(url)).await { - let filename = url.replace("://", "-").replace('/', "_") + ".png"; - page.goto(url) - .await? - .save_screenshot( - CaptureScreenshotParams::builder() - .format(CaptureScreenshotFormat::Png) - .build(), - filename, - ) - .await?; - - let info = Columns::from(vec![ - format!("{RESET}").split('\n').collect::>(), - vec![ - &format!(" {BAR}").bold().blue(), - &format!(" 🔗 URL = {}", url.red()), - &format!( - " 🏠 Title = {}", - page.get_title().await?.unwrap_or_default().purple() - ), - &format!(" 🔥 Status = {}", _res.status()).green(), - ], - ]) - .set_tabsize(0) - .make_columns(); - if !silent { - println!("{info}"); +) -> anyhow::Result<()> { + let url_chunks: Vec> = urls.chunks(tabs).map(ToOwned::to_owned).collect(); + let mut handles = Vec::with_capacity(url_chunks.len()); + + for urls in url_chunks { + let browser = Arc::clone(browser); + let handle = tokio::spawn(async move { + for url in urls { + if let Err(error) = take_screenshot(&browser, url, timeout, silent).await { + log::warn(error.to_string()); + } } - } else { - error("Please increase timout value by --timeout flag"); - println!("[-] Timed out URL = {}", url); - } + }); + + handles.push(handle); + } + + for handle in handles { + handle.await?; + } + + Ok(()) +} + +async fn take_screenshot( + browser: &Browser, + url: String, + timeout: u64, + silent: bool, +) -> anyhow::Result<()> { + let parsed_url = Url::parse(&url)?; + let res = time::timeout( + Duration::from_secs(timeout), + reqwest::get(parsed_url.clone()), + ) + .await + .context(format!("[-] Timed out URL = {url}"))??; + + let filename = format!("{}.png", url.replace("://", "-").replace('/', "_")); + let page = browser.new_page(parsed_url).await?; + page.save_screenshot( + CaptureScreenshotParams::builder() + .format(CaptureScreenshotFormat::Png) + .build(), + filename, + ) + .await?; + + if !silent { + let title = page.get_title().await.unwrap_or_default().unwrap(); + show_info(url, title, res.status()); } + page.close().await?; + Ok(()) } + +fn show_info(url: String, title: String, status: StatusCode) { + let info = Columns::from(vec![ + RESET.split('\n').collect::>(), + vec![ + &BAR.bold().blue(), + &format!(" 🔗 URL = {}", url.red()), + &format!(" 🏠 Title = {}", title.purple()), + &format!(" 🔥 Status = {}", status).green(), + ], + ]) + .set_tabsize(0) + .make_columns(); + + println!("{info}"); +} diff --git a/src/log.rs b/src/log.rs index 84fa646..9a6990d 100644 --- a/src/log.rs +++ b/src/log.rs @@ -1,27 +1,25 @@ +#![allow(unused)] + use colored::{Color, Colorize}; /// Prints the given message to the console and aborts the process. -#[allow(dead_code)] -pub fn abort(msg: &str) -> ! { +pub fn abort(msg: String) -> ! { error(msg); std::process::exit(1); } -#[allow(dead_code)] -pub fn info(msg: &str, color: Color) { +pub fn info(msg: String, color: Color) { println!("{}: {}", "info".bold().color(color), msg); } -pub fn error(msg: &str) { - println!("{}: {}", "error".bold().color(Color::Red), msg); +pub fn error(msg: String) { + eprintln!("{}: {}", "error".bold().color(Color::Red), msg); } -#[allow(dead_code)] -pub fn success(msg: &str) { +pub fn success(msg: String) { println!("{}: {}", "success".bold().color(Color::Green), msg); } -#[allow(dead_code)] -pub fn warn(msg: &str) { +pub fn warn(msg: String) { println!("{}: {}", "warning".bold().color(Color::Yellow), msg); } diff --git a/src/main.rs b/src/main.rs index a2365b3..23c5851 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,26 +1,13 @@ -use { - crate::cli::{args, screenshot::run}, - clap::Parser, -}; - mod cli; mod log; +use clap::Parser; +use cli::{args, screenshot}; + #[tokio::main] -async fn main() -> Result<(), Box> { +async fn main() { let cli = args::Cli::parse(); - run( - cli.url, - Some(cli.outdir), - cli.tabs, - cli.binary_path, - cli.width, - cli.height, - cli.timeout, - cli.silent, - cli.stdin, - ) - .await?; - - Ok(()) + if let Err(error) = screenshot::run(cli).await { + log::error(error.to_string()) + } } From f28577d1a9ded242cbb2276c88ebad608bbbd038 Mon Sep 17 00:00:00 2001 From: MrFixThis Date: Mon, 25 Sep 2023 14:53:35 -0500 Subject: [PATCH 2/2] chore(README): Specify new --file-path | -f option --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 986c393..39e5281 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,7 @@ Built from the ground up for ease of use, performance, beautiful ui and portabil   ```bash - hxn -b $(which brave) -u urls.txt + hxn -b $(which brave) -f urls.txt ``` ![many](https://github.com/pwnwriter/haylxon/assets/90331517/86f987d9-0961-4247-841e-18aee6aaf53f)