Skip to content

Commit

Permalink
Implement improved compat sorting when multiple artifacts are compati…
Browse files Browse the repository at this point in the history
…ble (#45)
  • Loading branch information
filiptibell committed Jul 16, 2024
1 parent 6a2860c commit a6b84cc
Show file tree
Hide file tree
Showing 7 changed files with 219 additions and 43 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,14 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## Unreleased

### Fixed

- Fixed `selene` and other tools not being installable because their releases contain multiple archives / binaries ([#45])

[#45]: https://github.com/rojo-rbx/rokit/pull/45

## `0.1.7` - July 15th, 2024

### Fixed
Expand Down
4 changes: 3 additions & 1 deletion lib/descriptor/arch.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::env::consts::ARCH as CURRENT_ARCH;

use super::{char_is_word_separator, executable_parsing::parse_executable, OS};
use crate::util::str::char_is_word_separator;

use super::{executable_parsing::parse_executable, OS};

// Matching substrings - these can be partial matches, eg. "wordwin64" will match as x64 arch
// These will take priority over full word matches, and should be as precise as possible
Expand Down
4 changes: 0 additions & 4 deletions lib/descriptor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,6 @@ impl FromStr for Descriptor {
}
}

fn char_is_word_separator(c: char) -> bool {
c == '-' || c == '_' || c.is_ascii_whitespace()
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
4 changes: 3 additions & 1 deletion lib/descriptor/os.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
use std::env::consts::OS as CURRENT_OS;

use super::{char_is_word_separator, executable_parsing::parse_executable};
use crate::util::str::char_is_word_separator;

use super::executable_parsing::parse_executable;

// Matching substrings - these can be partial matches, eg. "wordwin64" will match as windows OS
// These will take priority over full word matches, and should be as precise as possible
Expand Down
67 changes: 30 additions & 37 deletions lib/sources/artifact/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,10 @@ use super::{

mod format;
mod provider;
mod sorting;
mod util;

use self::sorting::sort_preferred_artifact;
use self::util::split_filename_and_extensions;

pub use self::format::ArtifactFormat;
Expand Down Expand Up @@ -112,7 +114,7 @@ impl Artifact {
}

/**
Sorts the given artifacts by system compatibility.
Sorts the given artifacts by their compatibility with the current system.
See also:
Expand All @@ -121,53 +123,39 @@ impl Artifact {
- [`Descriptor::sort_by_preferred_compat`]
*/
pub fn sort_by_system_compatibility(artifacts: impl AsRef<[Self]>) -> Vec<Self> {
let current_desc = Descriptor::current_system();

let mut compatible_artifacts = artifacts
.as_ref()
.iter()
.filter_map(|artifact| {
let name = artifact.name.as_deref()?;
if let Some(asset_desc) = Descriptor::detect(name) {
if current_desc.is_compatible_with(&asset_desc) {
Some((asset_desc, artifact))
} else {
None
}
} else {
None
}
})
.collect::<Vec<_>>();

compatible_artifacts.sort_by(|(desc_a, _), (desc_b, _)| {
current_desc.sort_by_preferred_compat(desc_a, desc_b)
});

compatible_artifacts
.into_iter()
.map(|(_, artifact)| artifact.clone())
.collect()
Self::sort_by_system_compatibility_inner(artifacts, false)
}

/**
Tries to find a partially compatible artifact, to be used as a fallback
during artifact selection if [`Artifact::sort_by_system_compatibility`]
finds no system-compatible artifacts to use.
Returns `None` if more than one artifact is partially compatible.
Note that this not is guaranteed to be compatible with the current
system, the contents of the artifact should be checked before use.
*/
pub fn find_partially_compatible_fallback(artifacts: impl AsRef<[Self]>) -> Option<Self> {
Self::sort_by_system_compatibility_inner(artifacts, true)
.into_iter()
.next()
}

fn sort_by_system_compatibility_inner(
artifacts: impl AsRef<[Self]>,
allow_partial_compatibility: bool,
) -> Vec<Self> {
let current_desc = Descriptor::current_system();

let os_compatible_artifacts = artifacts
let mut compatible_artifacts = artifacts
.as_ref()
.iter()
.filter_map(|artifact| {
let name = artifact.name.as_deref()?;
if let Some(asset_desc) = Descriptor::detect(name) {
if current_desc.os() == asset_desc.os() {
Some(artifact)
let is_fully_compatible = current_desc.is_compatible_with(&asset_desc);
let is_os_compatible = current_desc.os() == asset_desc.os();
if is_fully_compatible || (allow_partial_compatibility && is_os_compatible) {
Some((asset_desc, artifact))
} else {
None
}
Expand All @@ -177,10 +165,15 @@ impl Artifact {
})
.collect::<Vec<_>>();

if os_compatible_artifacts.len() == 1 {
Some(os_compatible_artifacts[0].clone())
} else {
None
}
compatible_artifacts.sort_by(|(desc_a, artifact_a), (desc_b, artifact_b)| {
current_desc
.sort_by_preferred_compat(desc_a, desc_b)
.then_with(|| sort_preferred_artifact(artifact_a, artifact_b))
});

compatible_artifacts
.into_iter()
.map(|(_, artifact)| artifact.clone())
.collect()
}
}
166 changes: 166 additions & 0 deletions lib/sources/artifact/sorting.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
use std::cmp::Ordering;

use semver::Version;

use crate::{
descriptor::{Arch, OS},
tool::ToolId,
util::str::char_is_word_separator,
};

use super::Artifact;

/**
Helper function to sort which artifact is preferred, based on
heuristics such as which tool name mentions the artifact name
more closely, and is probably more desirable for a user.
This **currently** means that, if a tool is named `tool`,
and we have these two artifacts:
- Artifact A: `tool-v1.0.0-x86_64-linux`
- Artifact B: `tool-with-extras-in-name-v1.0.0-x86_64-linux`
Then A would be preferred, because it mentions
the tool name more precisely, and nothing else.
Note that this sorting method is subject to change
and should not be directly exposed in a public API.
*/
pub(super) fn sort_preferred_artifact(artifact_a: &Artifact, artifact_b: &Artifact) -> Ordering {
let count_a = count_non_tool_mentions(
artifact_a.name.as_deref().unwrap_or_default(),
artifact_a.tool_spec.id(),
);
let count_b = count_non_tool_mentions(
artifact_b.name.as_deref().unwrap_or_default(),
artifact_b.tool_spec.id(),
);
count_a.cmp(&count_b)
}

fn count_non_tool_mentions(name: impl AsRef<str>, tool_id: &ToolId) -> usize {
let name = name.as_ref();
if name.trim().is_empty() {
return 0;
}

let name_words = name
.split(char_is_word_separator)
.filter(|s| word_is_not_arch_or_os_or_version_or_numeric(s))
.collect::<Vec<_>>();
let tool_words = tool_id
.name()
.split(char_is_word_separator)
.collect::<Vec<_>>();

#[allow(clippy::cast_possible_wrap)]
let len_diff = ((name_words.len() as isize) - (tool_words.len() as isize)).unsigned_abs();

let mut word_diff = 0;
for (name_word, tool_word) in name_words.into_iter().zip(tool_words) {
if !name_word.eq_ignore_ascii_case(tool_word) {
word_diff += 1;
}
}

len_diff + word_diff
}

fn word_is_not_arch_or_os_or_version_or_numeric(word: impl AsRef<str>) -> bool {
let word = word.as_ref();
Arch::detect(word).is_none()
&& OS::detect(word).is_none()
&& word.trim_start_matches('v').parse::<Version>().is_err()
&& !word.chars().all(char::is_numeric)
}

#[cfg(test)]
mod tests {
use super::*;

fn new_id(author: &str, name: &str) -> ToolId {
format!("{author}/{name}").parse().unwrap()
}

fn test_no_mentions(name: &str, tool_name: &str) {
let tool_id = new_id("author", tool_name);
assert_eq!(
count_non_tool_mentions(name, &tool_id),
0,
"Expected no non-tool mentions in name: {name}"
);
}

fn test_some_mentions(name: &str, tool_name: &str) {
let tool_id = new_id("author", tool_name);
assert_ne!(
count_non_tool_mentions(name, &tool_id),
0,
"Expected non-tool mentions in name: {name}"
);
}

#[test]
fn name_mention_check_tool_valid() {
// Single word tools
test_no_mentions("tool", "tool");
test_no_mentions("tool-linux", "tool");
test_no_mentions("tool-v1.0.0", "tool");
test_no_mentions("tool-1.0.0-x86_64-linux", "tool");
test_no_mentions("tool-x86_64-linux", "tool");
test_no_mentions("tool-x86_64-linux-v1.0.0", "tool");
// Multiple word tools
test_no_mentions("super-tool", "super-tool");
test_no_mentions("super-tool-linux", "super-tool");
test_no_mentions("super-tool-v1.0.0", "super-tool");
test_no_mentions("super-tool-1.0.0-x86_64-linux", "super-tool");
test_no_mentions("super-mega-tool", "super-mega-tool");
test_no_mentions("super-mega-tool-linux", "super-mega-tool");
test_no_mentions("super-mega-tool-v1.0.0", "super-mega-tool");
test_no_mentions("super-mega-tool-1.0.0-x86_64-linux", "super-mega-tool");
}

#[test]
fn name_mention_check_tool_invalid() {
// Contains similar but not exact word
test_some_mentions("tooling", "tool");
test_some_mentions("tooling-linux", "tool");
test_some_mentions("tooling-v1.0.0", "tool");
test_some_mentions("tooling-1.0.0-x86_64-linux", "tool");
test_some_mentions("tooling-x86_64-linux", "tool");
test_some_mentions("tooling-x86_64-linux-v1.0.0", "tool");
// Contains the exact word, but also others
test_some_mentions("super-tool", "tool");
test_some_mentions("super-tool-linux", "tool");
test_some_mentions("super-tool-v1.0.0", "tool");
test_some_mentions("super-tool-1.0.0-x86_64-linux", "tool");
test_some_mentions("super-mega-tool", "tool");
test_some_mentions("super-mega-tool-linux", "tool");
test_some_mentions("super-mega-tool-v1.0.0", "tool");
test_some_mentions("super-mega-tool-1.0.0-x86_64-linux", "tool");
}

#[test]
fn name_mention_check_case_insensitive() {
test_no_mentions("Tool-x86_64-linux", "tool");
test_no_mentions("tOOl-x86_64-linux", "tool");
test_no_mentions("TOOL-x86_64-linux", "tool");
test_some_mentions("Tooling-x86_64-linux", "tool");
test_some_mentions("tOOling-x86_64-linux", "tool");
test_some_mentions("TOOLING-x86_64-linux", "tool");
}

#[test]
fn name_mention_check_real_tools() {
// Valid
test_no_mentions("wally-v0.3.2-linux.zip", "wally");
test_no_mentions("lune-0.8.6-macos-aarch64.zip", "lune");
test_no_mentions("selene-0.27.1-linux.zip", "selene");
// Invalid
test_some_mentions("selene-light-0.27.1-linux.zip", "selene");
// Valid - but multiple words
test_no_mentions("sentry-cli-linux-i686-2.32.1", "sentry-cli");
test_no_mentions("selene-light-0.27.1-linux.zip", "selene-light");
}
}
9 changes: 9 additions & 0 deletions lib/util/str.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,14 @@
use std::fmt;

/**
Checks if the given character is a "word" separator.
For internal use only.
*/
pub(crate) const fn char_is_word_separator(c: char) -> bool {
c.is_ascii_whitespace() || matches!(c, '-' | '_')
}

/**
A case-insensitive string wrapper.
Expand Down

0 comments on commit a6b84cc

Please sign in to comment.