Skip to content

Commit

Permalink
Add a way to handle "pretty URLs", i.e. URIs without .html extension (
Browse files Browse the repository at this point in the history
#1422)

In many circumstances (GitHub Pages, Apache configured with MultiViews,
etc), web servers process URIs by appending the `.html` file extension
when no file is found at the path specified by the URI but a `.html`
file corresponding to that path _is_ found.

To allow Lychee to use the fast, offline method of checking such files
locally via the `file://` scheme, let's handle this scenario gracefully
by adding the `--fallback-extensions=html` option.

Note: This new option can take a list of file extensions to use; The
first one for which a corresponding file is found is then used.

Signed-off-by: Johannes Schindelin <johannes.schindelin@gmx.de>
  • Loading branch information
dscho committed Jun 11, 2024
1 parent 255164c commit 8c6eee9
Show file tree
Hide file tree
Showing 7 changed files with 83 additions and 5 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,13 @@ Options:
--remap <REMAP>
Remap URI matching pattern to different URI
--fallback-extensions <FALLBACK_EXTENSIONS>
Test the specified file extensions for URIs when checking files locally.
Multiple extensions can be separated by commas. Extensions will be checked in
order of appearance.
Example: --fallback-extensions html,htm,php,asp,aspx,jsp,cgi
--header <HEADER>
Custom request header
Expand Down
10 changes: 10 additions & 0 deletions fixtures/fallback-extensions/index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>For Testing pretty URLs</title>
</head>
<body>
<a href="other">other</a>
</body>
</html>
10 changes: 10 additions & 0 deletions fixtures/fallback-extensions/other.htm
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>For Testing pretty URLs</title>
</head>
<body>
<a href="index">index</a>
</body>
</html>
1 change: 1 addition & 0 deletions lychee-bin/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ pub(crate) fn create(cfg: &Config, cookie_jar: Option<&Arc<CookieStoreMutex>>) -
.require_https(cfg.require_https)
.cookie_jar(cookie_jar.cloned())
.include_fragments(cfg.include_fragments)
.fallback_extensions(cfg.fallback_extensions.clone())
.build()
.client()
.context("Failed to create request client")
Expand Down
14 changes: 14 additions & 0 deletions lychee-bin/src/options.rs
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,19 @@ pub(crate) struct Config {
#[arg(long)]
pub(crate) remap: Vec<String>,

/// Automatically append file extensions to `file://` URIs as needed
#[serde(default)]
#[arg(
long,
value_delimiter = ',',
long_help = "Test the specified file extensions for URIs when checking files locally.
Multiple extensions can be separated by commas. Extensions will be checked in
order of appearance.
Example: --fallback-extensions html,htm,php,asp,aspx,jsp,cgi"
)]
pub(crate) fallback_extensions: Vec<String>,

/// Custom request header
#[arg(long)]
#[serde(default)]
Expand Down Expand Up @@ -439,6 +452,7 @@ impl Config {
exclude_loopback: false;
exclude_mail: false;
remap: Vec::<String>::new();
fallback_extensions: Vec::<String>::new();
header: Vec::<String>::new();
timeout: DEFAULT_TIMEOUT_SECS;
retry_wait_time: DEFAULT_RETRY_WAIT_TIME_SECS;
Expand Down
13 changes: 13 additions & 0 deletions lychee-bin/tests/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1556,4 +1556,17 @@ mod cli {
// 3 failures because of missing fragments
.stdout(contains("3 Errors"));
}

#[test]
fn test_fallback_extensions() {
let mut cmd = main_command();
let input = fixtures_path().join("fallback-extensions");

cmd.arg("--verbose")
.arg("--fallback-extensions=htm,html")
.arg(input)
.assert()
.success()
.stdout(contains("0 Errors"));
}
}
33 changes: 28 additions & 5 deletions lychee-lib/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ pub struct ClientBuilder {
/// make sure rules don't conflict with each other.
remaps: Option<Remaps>,

/// Automatically append file extensions to `file://` URIs as needed
fallback_extensions: Vec<String>,

/// Links matching this set of regular expressions are **always** checked.
///
/// This has higher precedence over [`ClientBuilder::excludes`], **but**
Expand Down Expand Up @@ -384,6 +387,7 @@ impl ClientBuilder {
reqwest_client,
github_client,
remaps: self.remaps,
fallback_extensions: self.fallback_extensions,
filter,
max_retries: self.max_retries,
retry_wait_time: self.retry_wait_time,
Expand Down Expand Up @@ -412,6 +416,9 @@ pub struct Client {
/// Optional remapping rules for URIs matching pattern.
remaps: Option<Remaps>,

/// Automatically append file extensions to `file://` URIs as needed
fallback_extensions: Vec<String>,

/// Rules to decided whether each link should be checked or ignored.
filter: Filter,

Expand Down Expand Up @@ -655,14 +662,30 @@ impl Client {
let Ok(path) = uri.url.to_file_path() else {
return ErrorKind::InvalidFilePath(uri.clone()).into();
};
if !path.exists() {

if path.exists() {
if self.include_fragments {
return self.check_fragment(&path, uri).await;
}
return Status::Ok(StatusCode::OK);
}

if path.extension().is_some() {
return ErrorKind::InvalidFilePath(uri.clone()).into();
}
if self.include_fragments {
self.check_fragment(&path, uri).await
} else {
Status::Ok(StatusCode::OK)

// if the path has no file extension, try to append some
let mut path_buf = path.clone();
for ext in &self.fallback_extensions {
path_buf.set_extension(ext);
if path_buf.exists() {
if self.include_fragments {
return self.check_fragment(&path_buf, uri).await;
}
return Status::Ok(StatusCode::OK);
}
}
ErrorKind::InvalidFilePath(uri.clone()).into()
}

/// Checks a `file` URI's fragment.
Expand Down

0 comments on commit 8c6eee9

Please sign in to comment.