Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add flag to force html mode #673

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -217,6 +217,7 @@ FLAGS:
--exclude-private Exclude private IP address ranges from checking
--glob-ignore-case Ignore case when expanding filesystem path glob inputs
--help Prints help information
--html Treat the input as HTML
--include-verbatim Find links in verbatim sections like `pre`- and `code` blocks
-i, --insecure Proceed for server connections considered insecure (invalid TLS)
-n, --no-progress Do not show progress bar.
Expand Down Expand Up @@ -273,9 +274,9 @@ ARGS:
### Ignoring links

You can exclude links from getting checked by specifying regex patterns
with `--exclude` (e.g. `--exclude example\.(com|org)`).
with `--exclude` (e.g. `--exclude example\.(com|org)`).
If a file named `.lycheeignore` exists in the current working directory, its
contents are excluded as well. The file allows you to list multiple regular
contents are excluded as well. The file allows you to list multiple regular
expressions for exclusion (one pattern per line).

### Caching
Expand Down
3 changes: 3 additions & 0 deletions fixtures/configs/smoketest.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ include_verbatim = false
# Ignore case of paths when matching glob patterns.
glob_ignore_case = false

# Treat input as HTML
html = false

# Exclude URLs from checking (supports regex).
exclude = [ '.*\.github.com\.*' ]

Expand Down
24 changes: 21 additions & 3 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use crate::parse::{parse_base, parse_statuscodes};
use anyhow::{anyhow, Context, Error, Result};
use const_format::{concatcp, formatcp};
use lychee_lib::{
Base, Input, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES, DEFAULT_RETRY_WAIT_TIME_SECS,
DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT,
Base, FileType, Input, DEFAULT_MAX_REDIRECTS, DEFAULT_MAX_RETRIES,
DEFAULT_RETRY_WAIT_TIME_SECS, DEFAULT_TIMEOUT_SECS, DEFAULT_USER_AGENT,
};
use secrecy::{ExposeSecret, SecretString};
use serde::Deserialize;
Expand Down Expand Up @@ -122,14 +122,26 @@ impl LycheeOptions {
// but we'd get no access to `glob_ignore_case`.
/// Get parsed inputs from options.
pub(crate) fn inputs(&self) -> Result<Vec<Input>> {
let file_type_hint = if self.config.html {
Some(FileType::Html)
} else {
None
};
let excluded = if self.config.exclude_path.is_empty() {
None
} else {
Some(self.config.exclude_path.clone())
};
self.raw_inputs
.iter()
.map(|s| Input::new(s, None, self.config.glob_ignore_case, excluded.clone()))
.map(|s| {
Input::new(
s,
file_type_hint,
self.config.glob_ignore_case,
excluded.clone(),
)
})
.collect::<Result<_, _>>()
.context("Cannot parse inputs from arguments")
}
Expand Down Expand Up @@ -319,6 +331,11 @@ pub(crate) struct Config {
#[serde(default)]
pub(crate) glob_ignore_case: bool,

/// Treat the input as HTML
#[structopt(long)]
#[serde(default)]
pub(crate) html: bool,

/// Output file of status report
#[structopt(short, long, parse(from_os_str))]
#[serde(default)]
Expand Down Expand Up @@ -393,6 +410,7 @@ impl Config {
skip_missing: false;
include_verbatim: false;
glob_ignore_case: false;
html: false;
output: None;
require_https: false;
}
Expand Down
23 changes: 17 additions & 6 deletions lychee-lib/src/types/input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use crate::{helpers, ErrorKind, Result};
use async_stream::try_stream;
use futures::stream::Stream;
use glob::glob_with;
use jwalk::WalkDir;
use jwalk::WalkDirGeneric;
use reqwest::Url;
use serde::Serialize;
use shellexpand::tilde;
Expand Down Expand Up @@ -198,7 +198,7 @@ impl Input {
}
InputSource::FsPath(ref path) => {
if path.is_dir() {
for entry in WalkDir::new(path).skip_hidden(true)
for entry in WalkDirGeneric::<((usize), (Option<FileType>))>::new(path).skip_hidden(true)
.process_read_dir(move |_, _, _, children| {
children.retain(|child| {
let entry = match child.as_ref() {
Expand All @@ -224,19 +224,24 @@ impl Input {
}
return valid_extension(&entry.path());
});
children.first_mut().map(|child| {
if let Ok(entry) = child {
entry.client_state = self.file_type_hint;
}
});
}) {
let entry = entry?;
if entry.file_type().is_dir() {
continue;
}
let content = Self::path_content(entry.path()).await?;
let content = Self::path_content(entry.path(), entry.client_state).await?;
yield content
}
} else {
if self.is_excluded_path(path) {
return ();
}
let content = Self::path_content(path).await;
let content = Self::path_content(path, self.file_type_hint).await;
match content {
Err(_) if skip_missing => (),
Err(e) => Err(e)?,
Expand Down Expand Up @@ -301,7 +306,7 @@ impl Input {
if self.is_excluded_path(&path) {
continue;
}
let content: InputContent = Self::path_content(&path).await?;
let content: InputContent = Self::path_content(&path, self.file_type_hint).await?;
yield content;
}
Err(e) => eprintln!("{e:?}"),
Expand All @@ -325,13 +330,19 @@ impl Input {
/// Will return `Err` if file contents can't be read
pub async fn path_content<P: Into<PathBuf> + AsRef<Path> + Clone>(
path: P,
file_type_hint: Option<FileType>,
) -> Result<InputContent> {
let path = path.into();
let content = tokio::fs::read_to_string(&path)
.await
.map_err(|e| ErrorKind::ReadFileInput(e, path.clone()))?;
let file_type = if file_type_hint.is_none() {
FileType::from(&path)
} else {
file_type_hint.unwrap_or_default()
};
let input_content = InputContent {
file_type: FileType::from(&path),
file_type,
source: InputSource::FsPath(path),
content,
};
Expand Down
3 changes: 3 additions & 0 deletions lychee.example.toml
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@ include_verbatim = false
# Ignore case of paths when matching glob patterns.
glob_ignore_case = false

# Treat the input as HTML.
html = false

# Exclude URLs from checking (supports regex).
exclude = [ '.*\.github.com\.*' ]

Expand Down