From ba0d6c973994915faef00e0e15e2957ff2b286b7 Mon Sep 17 00:00:00 2001 From: Jonathan Pallant Date: Thu, 4 Jul 2024 18:34:35 +0100 Subject: [PATCH 01/10] Update generate-copyright This tool now scans for cargo dependencies and includes any important looking license files. We do this because cargo package metadata is not sufficient - the Apache-2.0 license says you have to include any NOTICE file, for example. And authors != copyright holders (cargo has the former, we must include the latter). --- Cargo.lock | 2 + src/bootstrap/src/core/build_steps/run.rs | 2 + src/tools/collect-license-metadata/Cargo.toml | 2 + .../collect-license-metadata/src/main.rs | 5 + src/tools/generate-copyright/Cargo.toml | 3 + .../generate-copyright/src/cargo_metadata.rs | 196 ++++++++++++++++++ src/tools/generate-copyright/src/main.rs | 122 +++++++++-- 7 files changed, 320 insertions(+), 12 deletions(-) create mode 100644 src/tools/generate-copyright/src/cargo_metadata.rs diff --git a/Cargo.lock b/Cargo.lock index a4b4e49f82c2e..0f3a106512d20 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1408,6 +1408,8 @@ dependencies = [ "anyhow", "serde", "serde_json", + "tempfile", + "thiserror", ] [[package]] diff --git a/src/bootstrap/src/core/build_steps/run.rs b/src/bootstrap/src/core/build_steps/run.rs index fde1693646a8b..29d7bcc425bea 100644 --- a/src/bootstrap/src/core/build_steps/run.rs +++ b/src/bootstrap/src/core/build_steps/run.rs @@ -217,6 +217,8 @@ impl Step for GenerateCopyright { let mut cmd = builder.tool_cmd(Tool::GenerateCopyright); cmd.env("LICENSE_METADATA", &license_metadata); cmd.env("DEST", &dest); + cmd.env("OUT_DIR", &builder.out); + cmd.env("CARGO", &builder.initial_cargo); cmd.run(builder); dest diff --git a/src/tools/collect-license-metadata/Cargo.toml b/src/tools/collect-license-metadata/Cargo.toml index d0820cfc2a0e4..edf9e5c5393ea 100644 --- a/src/tools/collect-license-metadata/Cargo.toml +++ b/src/tools/collect-license-metadata/Cargo.toml @@ -2,6 +2,8 @@ name = "collect-license-metadata" version = "0.1.0" edition = "2021" +description = "Runs the reuse tool and caches the output, so rust toolchain devs don't need to have reuse installed" +license = "MIT OR Apache-2.0" [dependencies] anyhow = "1.0.65" diff --git a/src/tools/collect-license-metadata/src/main.rs b/src/tools/collect-license-metadata/src/main.rs index ca6aa01d78c04..dce36bb17b600 100644 --- a/src/tools/collect-license-metadata/src/main.rs +++ b/src/tools/collect-license-metadata/src/main.rs @@ -8,6 +8,11 @@ use anyhow::Error; use crate::licenses::LicensesInterner; +/// The entry point to the binary. +/// +/// You should probably let `bootstrap` execute this program instead of running it directly. +/// +/// Run `x.py run collect-license-metadata` fn main() -> Result<(), Error> { let reuse_exe: PathBuf = std::env::var_os("REUSE_EXE").expect("Missing REUSE_EXE").into(); let dest: PathBuf = std::env::var_os("DEST").expect("Missing DEST").into(); diff --git a/src/tools/generate-copyright/Cargo.toml b/src/tools/generate-copyright/Cargo.toml index 899ef0f8a6c26..bf643876a042b 100644 --- a/src/tools/generate-copyright/Cargo.toml +++ b/src/tools/generate-copyright/Cargo.toml @@ -2,6 +2,7 @@ name = "generate-copyright" version = "0.1.0" edition = "2021" +description = "Produces a manifest of all the copyrighted materials in the Rust Toolchain" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html @@ -9,3 +10,5 @@ edition = "2021" anyhow = "1.0.65" serde = { version = "1.0.147", features = ["derive"] } serde_json = "1.0.85" +thiserror = "1" +tempfile = "3" diff --git a/src/tools/generate-copyright/src/cargo_metadata.rs b/src/tools/generate-copyright/src/cargo_metadata.rs new file mode 100644 index 0000000000000..721a6b1c6e627 --- /dev/null +++ b/src/tools/generate-copyright/src/cargo_metadata.rs @@ -0,0 +1,196 @@ +//! Gets metadata about a workspace from Cargo + +use std::collections::{BTreeMap, BTreeSet}; +use std::ffi::{OsStr, OsString}; +use std::path::Path; + +/// Describes how this module can fail +#[derive(Debug, thiserror::Error)] +pub enum Error { + #[error("Failed to run cargo metadata: {0:?}")] + LaunchingMetadata(#[from] std::io::Error), + #[error("Failed get output from cargo metadata: {0:?}")] + GettingMetadata(String), + #[error("Failed parse JSON output from cargo metadata: {0:?}")] + ParsingJson(#[from] serde_json::Error), + #[error("Failed find expected JSON element {0} in output from cargo metadata")] + MissingJsonElement(&'static str), + #[error("Failed find expected JSON element {0} in output from cargo metadata for package {1}")] + MissingJsonElementForPackage(String, String), + #[error("Failed to run cargo vendor: {0:?}")] + LaunchingVendor(std::io::Error), + #[error("Failed to complete cargo vendor")] + RunningVendor, +} + +/// Describes one of our dependencies +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct Dependency { + /// The name of the package + pub name: String, + /// The version number + pub version: String, + /// The license it is under + pub license: String, + /// The list of authors from the package metadata + pub authors: Vec, + /// A list of important files from the package, with their contents. + /// + /// This includes *COPYRIGHT*, *NOTICE*, *AUTHOR*, *LICENSE*, and *LICENCE* files, case-insensitive. + pub notices: BTreeMap, +} + +/// Use `cargo` to get a list of dependencies and their license data. +/// +/// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can +/// grab the license files. +/// +/// Any dependency with a path beginning with `root_path` is ignored, as we +/// assume `reuse` has covered it already. +pub fn get( + cargo: &Path, + dest: &Path, + root_path: &Path, + manifest_paths: &[&Path], +) -> Result, Error> { + let mut temp_set = BTreeSet::new(); + // Look at the metadata for each manifest + for manifest_path in manifest_paths { + if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) { + panic!("cargo_manifest::get requires a path to a Cargo.toml file"); + } + let metadata_json = get_metadata_json(cargo, manifest_path)?; + let packages = metadata_json["packages"] + .as_array() + .ok_or_else(|| Error::MissingJsonElement("packages array"))?; + for package in packages { + let package = + package.as_object().ok_or_else(|| Error::MissingJsonElement("package object"))?; + let manifest_path = package + .get("manifest_path") + .and_then(|v| v.as_str()) + .map(Path::new) + .ok_or_else(|| Error::MissingJsonElement("package.manifest_path"))?; + if manifest_path.starts_with(&root_path) { + // it's an in-tree dependency and reuse covers it + continue; + } + // otherwise it's an out-of-tree dependency + let get_string = |field_name: &str, package_name: &str| { + package.get(field_name).and_then(|v| v.as_str()).ok_or_else(|| { + Error::MissingJsonElementForPackage( + format!("package.{field_name}"), + package_name.to_owned(), + ) + }) + }; + let name = get_string("name", "unknown")?; + let license = get_string("license", name)?; + let version = get_string("version", name)?; + let authors_list = package + .get("authors") + .and_then(|v| v.as_array()) + .ok_or_else(|| Error::MissingJsonElement("package.authors"))?; + let authors: Vec = + authors_list.iter().filter_map(|v| v.as_str()).map(|s| s.to_owned()).collect(); + temp_set.insert(Dependency { + name: name.to_owned(), + version: version.to_owned(), + license: license.to_owned(), + authors, + notices: BTreeMap::new(), + }); + } + } + + // Now do a cargo-vendor and grab everything + let vendor_path = dest.join("vendor"); + println!("Vendoring deps into {}...", vendor_path.display()); + run_cargo_vendor(cargo, &vendor_path, manifest_paths)?; + + // Now for each dependency we found, go and grab any important looking files + let mut output = BTreeSet::new(); + for mut dep in temp_set { + load_important_files(&mut dep, &vendor_path)?; + output.insert(dep); + } + + Ok(output) +} + +/// Get cargo-metdata for a package, as JSON +fn get_metadata_json(cargo: &Path, manifest_path: &Path) -> Result { + let metadata_output = std::process::Command::new(cargo) + .arg("metadata") + .arg("--format-version=1") + .arg("--all-features") + .arg("--manifest-path") + .arg(manifest_path) + .env("RUSTC_BOOTSTRAP", "1") + .output() + .map_err(|e| Error::LaunchingMetadata(e))?; + if !metadata_output.status.success() { + return Err(Error::GettingMetadata( + String::from_utf8(metadata_output.stderr).expect("UTF-8 output from cargo"), + )); + } + let json = serde_json::from_slice(&metadata_output.stdout)?; + Ok(json) +} + +/// Run cargo-vendor, fetching into the given dir +fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Result<(), Error> { + let mut vendor_command = std::process::Command::new(cargo); + vendor_command.env("RUSTC_BOOTSTRAP", "1"); + vendor_command.arg("vendor"); + vendor_command.arg("--quiet"); + vendor_command.arg("--versioned-dirs"); + for manifest_path in manifest_paths { + vendor_command.arg("-s"); + vendor_command.arg(manifest_path); + } + vendor_command.arg(dest); + + let vendor_status = vendor_command.status().map_err(|e| Error::LaunchingVendor(e))?; + + if !vendor_status.success() { + return Err(Error::RunningVendor); + } + + Ok(()) +} + +/// Add important files off disk into this dependency. +/// +/// Maybe one-day Cargo.toml will contain enough information that we don't need +/// to do this manual scraping. +fn load_important_files(dep: &mut Dependency, vendor_root: &Path) -> Result<(), Error> { + let name_version = format!("{}-{}", dep.name, dep.version); + println!("Scraping notices for {}...", name_version); + let dep_vendor_path = vendor_root.join(name_version); + for entry in std::fs::read_dir(dep_vendor_path)? { + let entry = entry?; + let metadata = entry.metadata()?; + let path = entry.path(); + if let Some(filename) = path.file_name() { + let lc_filename = filename.to_ascii_lowercase(); + let lc_filename_str = lc_filename.to_string_lossy(); + let mut keep = false; + for m in ["copyright", "licence", "license", "author", "notice"] { + if lc_filename_str.contains(m) { + keep = true; + break; + } + } + if keep { + if metadata.is_dir() { + // scoop up whole directory + } else if metadata.is_file() { + println!("Scraping {}", filename.to_string_lossy()); + dep.notices.insert(filename.to_owned(), std::fs::read_to_string(path)?); + } + } + } + } + Ok(()) +} diff --git a/src/tools/generate-copyright/src/main.rs b/src/tools/generate-copyright/src/main.rs index dce1a558697e6..6191cd158bc9b 100644 --- a/src/tools/generate-copyright/src/main.rs +++ b/src/tools/generate-copyright/src/main.rs @@ -1,54 +1,114 @@ use std::io::Write; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use anyhow::Error; +mod cargo_metadata; + +/// The entry point to the binary. +/// +/// You should probably let `bootstrap` execute this program instead of running it directly. +/// +/// Run `x.py run generate-metadata` fn main() -> Result<(), Error> { - let dest = env_path("DEST")?; + let dest_file = env_path("DEST")?; + let out_dir = env_path("OUT_DIR")?; + let cargo = env_path("CARGO")?; let license_metadata = env_path("LICENSE_METADATA")?; - let metadata: Metadata = serde_json::from_slice(&std::fs::read(&license_metadata)?)?; + let collected_tree_metadata: Metadata = + serde_json::from_slice(&std::fs::read(&license_metadata)?)?; + + let root_path = std::path::absolute(".")?; + let workspace_paths = [ + Path::new("./Cargo.toml"), + Path::new("./src/tools/cargo/Cargo.toml"), + Path::new("./library/std/Cargo.toml"), + ]; + let collected_cargo_metadata = + cargo_metadata::get(&cargo, &out_dir, &root_path, &workspace_paths)?; let mut buffer = Vec::new(); - render_recursive(&metadata.files, &mut buffer, 0)?; - std::fs::write(&dest, &buffer)?; + writeln!(buffer, "# COPYRIGHT for Rust")?; + writeln!(buffer)?; + writeln!( + buffer, + "This file describes the copyright and licensing information for the source code within The Rust Project git tree, and the third-party dependencies used when building the Rust toolchain (including the Rust Standard Library)" + )?; + writeln!(buffer)?; + writeln!(buffer, "## Table of Contents")?; + writeln!(buffer)?; + writeln!(buffer, "* [In-tree files](#in-tree-files)")?; + writeln!(buffer, "* [Out-of-tree files](#out-of-tree-files)")?; + // writeln!(buffer, "* [License Texts](#license-texts)")?; + writeln!(buffer)?; + + writeln!(buffer, "## In-tree files")?; + writeln!(buffer)?; + writeln!( + buffer, + "The following licenses cover the in-tree source files that were used in this release:" + )?; + writeln!(buffer)?; + render_tree_recursive(&collected_tree_metadata.files, &mut buffer, 0)?; + + writeln!(buffer)?; + + writeln!(buffer, "## Out-of-tree files")?; + writeln!(buffer)?; + writeln!( + buffer, + "The following licenses cover the out-of-tree crates that were used in this release:" + )?; + writeln!(buffer)?; + render_deps(collected_cargo_metadata.iter(), &mut buffer)?; + + std::fs::write(&dest_file, &buffer)?; Ok(()) } -fn render_recursive(node: &Node, buffer: &mut Vec, depth: usize) -> Result<(), Error> { +/// Recursively draw the tree of files/folders we found on disk and their licenses, as +/// markdown, into the given Vec. +fn render_tree_recursive(node: &Node, buffer: &mut Vec, depth: usize) -> Result<(), Error> { let prefix = std::iter::repeat("> ").take(depth + 1).collect::(); match node { Node::Root { children } => { for child in children { - render_recursive(child, buffer, depth)?; + render_tree_recursive(child, buffer, depth)?; } } Node::Directory { name, children, license } => { - render_license(&prefix, std::iter::once(name), license.as_ref(), buffer)?; + render_tree_license(&prefix, std::iter::once(name), license.as_ref(), buffer)?; if !children.is_empty() { writeln!(buffer, "{prefix}")?; writeln!(buffer, "{prefix}*Exceptions:*")?; for child in children { writeln!(buffer, "{prefix}")?; - render_recursive(child, buffer, depth + 1)?; + render_tree_recursive(child, buffer, depth + 1)?; } } } Node::Group { files, directories, license } => { - render_license(&prefix, directories.iter().chain(files.iter()), Some(license), buffer)?; + render_tree_license( + &prefix, + directories.iter().chain(files.iter()), + Some(license), + buffer, + )?; } Node::File { name, license } => { - render_license(&prefix, std::iter::once(name), Some(license), buffer)?; + render_tree_license(&prefix, std::iter::once(name), Some(license), buffer)?; } } Ok(()) } -fn render_license<'a>( +/// Draw a series of sibling files/folders, as markdown, into the given Vec. +fn render_tree_license<'a>( prefix: &str, names: impl Iterator, license: Option<&License>, @@ -67,11 +127,47 @@ fn render_license<'a>( Ok(()) } +/// Render a list of out-of-tree dependencies as markdown into the given Vec. +fn render_deps<'a, 'b>( + deps: impl Iterator, + buffer: &'b mut Vec, +) -> Result<(), Error> { + for dep in deps { + let authors_list = dep.authors.join(", ").replace("<", "\\<").replace(">", "\\>"); + let url = format!("https://crates.io/crates/{}/{}", dep.name, dep.version); + writeln!(buffer)?; + writeln!( + buffer, + "### [{name} {version}]({url})", + name = dep.name, + version = dep.version, + url = url, + )?; + writeln!(buffer)?; + writeln!(buffer, "* Authors: {}", authors_list)?; + writeln!(buffer, "* License: {}", dep.license)?; + for (name, contents) in &dep.notices { + writeln!(buffer)?; + writeln!(buffer, "#### {}", name.to_string_lossy())?; + writeln!(buffer)?; + writeln!(buffer, "
Click to expand")?; + writeln!(buffer)?; + writeln!(buffer, "```")?; + writeln!(buffer, "{}", contents)?; + writeln!(buffer, "```")?; + writeln!(buffer)?; + writeln!(buffer, "
")?; + } + } + Ok(()) +} +/// Describes a tree of metadata for our filesystem tree #[derive(serde::Deserialize)] struct Metadata { files: Node, } +/// Describes one node in our metadata tree #[derive(serde::Deserialize)] #[serde(rename_all = "kebab-case", tag = "type")] pub(crate) enum Node { @@ -81,12 +177,14 @@ pub(crate) enum Node { Group { files: Vec, directories: Vec, license: License }, } +/// A License has an SPDX license name and a list of copyright holders. #[derive(serde::Deserialize)] struct License { spdx: String, copyright: Vec, } +/// Grab an environment variable as a PathBuf, or fail nicely. fn env_path(var: &str) -> Result { if let Some(var) = std::env::var_os(var) { Ok(var.into()) From 204e3eadf1323ebd886ee159b193e231ec4906c9 Mon Sep 17 00:00:00 2001 From: Jonathan Pallant Date: Mon, 29 Jul 2024 11:41:02 +0100 Subject: [PATCH 02/10] generate-copyright: Produce HTML, not Markdown This format works better with large amounts of structured data. We also mark which deps are in the stdlib --- src/bootstrap/src/core/build_steps/run.rs | 2 +- .../generate-copyright/src/cargo_metadata.rs | 81 +++++---- src/tools/generate-copyright/src/main.rs | 172 ++++++++++-------- 3 files changed, 152 insertions(+), 103 deletions(-) diff --git a/src/bootstrap/src/core/build_steps/run.rs b/src/bootstrap/src/core/build_steps/run.rs index 29d7bcc425bea..65d635c0bd69f 100644 --- a/src/bootstrap/src/core/build_steps/run.rs +++ b/src/bootstrap/src/core/build_steps/run.rs @@ -212,7 +212,7 @@ impl Step for GenerateCopyright { let license_metadata = builder.ensure(CollectLicenseMetadata); // Temporary location, it will be moved to the proper one once it's accurate. - let dest = builder.out.join("COPYRIGHT.md"); + let dest = builder.out.join("COPYRIGHT.html"); let mut cmd = builder.tool_cmd(Tool::GenerateCopyright); cmd.env("LICENSE_METADATA", &license_metadata); diff --git a/src/tools/generate-copyright/src/cargo_metadata.rs b/src/tools/generate-copyright/src/cargo_metadata.rs index 721a6b1c6e627..eda53c73c0af6 100644 --- a/src/tools/generate-copyright/src/cargo_metadata.rs +++ b/src/tools/generate-copyright/src/cargo_metadata.rs @@ -1,6 +1,6 @@ //! Gets metadata about a workspace from Cargo -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::BTreeMap; use std::ffi::{OsStr, OsString}; use std::path::Path; @@ -23,13 +23,18 @@ pub enum Error { RunningVendor, } -/// Describes one of our dependencies +/// Uniquely describes a package on crates.io #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] -pub struct Dependency { +pub struct Package { /// The name of the package pub name: String, /// The version number pub version: String, +} + +/// Extra data about a package +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub struct PackageMetadata { /// The license it is under pub license: String, /// The list of authors from the package metadata @@ -40,20 +45,44 @@ pub struct Dependency { pub notices: BTreeMap, } -/// Use `cargo` to get a list of dependencies and their license data. +/// Use `cargo metadata` and `cargo vendor` to get a list of dependencies and their license data. /// /// This will involve running `cargo vendor` into `${BUILD}/vendor` so we can /// grab the license files. /// /// Any dependency with a path beginning with `root_path` is ignored, as we /// assume `reuse` has covered it already. -pub fn get( +pub fn get_metadata_and_notices( cargo: &Path, dest: &Path, root_path: &Path, manifest_paths: &[&Path], -) -> Result, Error> { - let mut temp_set = BTreeSet::new(); +) -> Result, Error> { + let mut output = get_metadata(cargo, root_path, manifest_paths)?; + + // Now do a cargo-vendor and grab everything + let vendor_path = dest.join("vendor"); + println!("Vendoring deps into {}...", vendor_path.display()); + run_cargo_vendor(cargo, &vendor_path, manifest_paths)?; + + // Now for each dependency we found, go and grab any important looking files + for (package, metadata) in output.iter_mut() { + load_important_files(package, metadata, &vendor_path)?; + } + + Ok(output) +} + +/// Use `cargo metadata` to get a list of dependencies and their license data. +/// +/// Any dependency with a path beginning with `root_path` is ignored, as we +/// assume `reuse` has covered it already. +pub fn get_metadata( + cargo: &Path, + root_path: &Path, + manifest_paths: &[&Path], +) -> Result, Error> { + let mut output = BTreeMap::new(); // Look at the metadata for each manifest for manifest_path in manifest_paths { if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) { @@ -71,7 +100,7 @@ pub fn get( .and_then(|v| v.as_str()) .map(Path::new) .ok_or_else(|| Error::MissingJsonElement("package.manifest_path"))?; - if manifest_path.starts_with(&root_path) { + if manifest_path.starts_with(root_path) { // it's an in-tree dependency and reuse covers it continue; } @@ -93,28 +122,14 @@ pub fn get( .ok_or_else(|| Error::MissingJsonElement("package.authors"))?; let authors: Vec = authors_list.iter().filter_map(|v| v.as_str()).map(|s| s.to_owned()).collect(); - temp_set.insert(Dependency { - name: name.to_owned(), - version: version.to_owned(), - license: license.to_owned(), - authors, - notices: BTreeMap::new(), - }); + let package = Package { name: name.to_owned(), version: version.to_owned() }; + output.insert( + package.clone(), + PackageMetadata { license: license.to_owned(), authors, notices: BTreeMap::new() }, + ); } } - // Now do a cargo-vendor and grab everything - let vendor_path = dest.join("vendor"); - println!("Vendoring deps into {}...", vendor_path.display()); - run_cargo_vendor(cargo, &vendor_path, manifest_paths)?; - - // Now for each dependency we found, go and grab any important looking files - let mut output = BTreeSet::new(); - for mut dep in temp_set { - load_important_files(&mut dep, &vendor_path)?; - output.insert(dep); - } - Ok(output) } @@ -128,7 +143,7 @@ fn get_metadata_json(cargo: &Path, manifest_path: &Path) -> Result Resu } vendor_command.arg(dest); - let vendor_status = vendor_command.status().map_err(|e| Error::LaunchingVendor(e))?; + let vendor_status = vendor_command.status().map_err(Error::LaunchingVendor)?; if !vendor_status.success() { return Err(Error::RunningVendor); @@ -164,8 +179,12 @@ fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Resu /// /// Maybe one-day Cargo.toml will contain enough information that we don't need /// to do this manual scraping. -fn load_important_files(dep: &mut Dependency, vendor_root: &Path) -> Result<(), Error> { - let name_version = format!("{}-{}", dep.name, dep.version); +fn load_important_files( + package: &Package, + dep: &mut PackageMetadata, + vendor_root: &Path, +) -> Result<(), Error> { + let name_version = format!("{}-{}", package.name, package.version); println!("Scraping notices for {}...", name_version); let dep_vendor_path = vendor_root.join(name_version); for entry in std::fs::read_dir(dep_vendor_path)? { diff --git a/src/tools/generate-copyright/src/main.rs b/src/tools/generate-copyright/src/main.rs index 6191cd158bc9b..efccba0651e4d 100644 --- a/src/tools/generate-copyright/src/main.rs +++ b/src/tools/generate-copyright/src/main.rs @@ -1,3 +1,4 @@ +use std::collections::BTreeMap; use std::io::Write; use std::path::{Path, PathBuf}; @@ -5,6 +6,33 @@ use anyhow::Error; mod cargo_metadata; +static TOP_BOILERPLATE: &str = r##" + + + + + Copyright notices for The Rust Toolchain + + + +

Copyright notices for The Rust Toolchain

+ +

This file describes the copyright and licensing information for the source +code within The Rust Project git tree, and the third-party dependencies used +when building the Rust toolchain (including the Rust Standard Library).

+ +

Table of Contents

+ +"##; + +static BOTTOM_BOILERPLATE: &str = r#" + + +"#; + /// The entry point to the binary. /// /// You should probably let `bootstrap` execute this program instead of running it directly. @@ -26,43 +54,28 @@ fn main() -> Result<(), Error> { Path::new("./library/std/Cargo.toml"), ]; let collected_cargo_metadata = - cargo_metadata::get(&cargo, &out_dir, &root_path, &workspace_paths)?; + cargo_metadata::get_metadata_and_notices(&cargo, &out_dir, &root_path, &workspace_paths)?; + + let stdlib_set = + cargo_metadata::get_metadata(&cargo, &root_path, &[Path::new("./library/std/Cargo.toml")])?; let mut buffer = Vec::new(); - writeln!(buffer, "# COPYRIGHT for Rust")?; - writeln!(buffer)?; - writeln!( - buffer, - "This file describes the copyright and licensing information for the source code within The Rust Project git tree, and the third-party dependencies used when building the Rust toolchain (including the Rust Standard Library)" - )?; - writeln!(buffer)?; - writeln!(buffer, "## Table of Contents")?; - writeln!(buffer)?; - writeln!(buffer, "* [In-tree files](#in-tree-files)")?; - writeln!(buffer, "* [Out-of-tree files](#out-of-tree-files)")?; - // writeln!(buffer, "* [License Texts](#license-texts)")?; - writeln!(buffer)?; - - writeln!(buffer, "## In-tree files")?; - writeln!(buffer)?; + writeln!(buffer, "{}", TOP_BOILERPLATE)?; + writeln!( buffer, - "The following licenses cover the in-tree source files that were used in this release:" + r#"

In-tree files

The following licenses cover the in-tree source files that were used in this release:

"# )?; - writeln!(buffer)?; - render_tree_recursive(&collected_tree_metadata.files, &mut buffer, 0)?; - - writeln!(buffer)?; + render_tree_recursive(&collected_tree_metadata.files, &mut buffer)?; - writeln!(buffer, "## Out-of-tree files")?; - writeln!(buffer)?; writeln!( buffer, - "The following licenses cover the out-of-tree crates that were used in this release:" + r#"

Out-of-tree dependencies

The following licenses cover the out-of-tree crates that were used in this release:

"# )?; - writeln!(buffer)?; - render_deps(collected_cargo_metadata.iter(), &mut buffer)?; + render_deps(&collected_cargo_metadata, &stdlib_set, &mut buffer)?; + + writeln!(buffer, "{}", BOTTOM_BOILERPLATE)?; std::fs::write(&dest_file, &buffer)?; @@ -71,56 +84,51 @@ fn main() -> Result<(), Error> { /// Recursively draw the tree of files/folders we found on disk and their licenses, as /// markdown, into the given Vec. -fn render_tree_recursive(node: &Node, buffer: &mut Vec, depth: usize) -> Result<(), Error> { - let prefix = std::iter::repeat("> ").take(depth + 1).collect::(); - +fn render_tree_recursive(node: &Node, buffer: &mut Vec) -> Result<(), Error> { + writeln!(buffer, r#"
"#)?; match node { Node::Root { children } => { for child in children { - render_tree_recursive(child, buffer, depth)?; + render_tree_recursive(child, buffer)?; } } Node::Directory { name, children, license } => { - render_tree_license(&prefix, std::iter::once(name), license.as_ref(), buffer)?; + render_tree_license(std::iter::once(name), license.as_ref(), buffer)?; if !children.is_empty() { - writeln!(buffer, "{prefix}")?; - writeln!(buffer, "{prefix}*Exceptions:*")?; + writeln!(buffer, "

Exceptions:

")?; for child in children { - writeln!(buffer, "{prefix}")?; - render_tree_recursive(child, buffer, depth + 1)?; + render_tree_recursive(child, buffer)?; } } } Node::Group { files, directories, license } => { - render_tree_license( - &prefix, - directories.iter().chain(files.iter()), - Some(license), - buffer, - )?; + render_tree_license(directories.iter().chain(files.iter()), Some(license), buffer)?; } Node::File { name, license } => { - render_tree_license(&prefix, std::iter::once(name), Some(license), buffer)?; + render_tree_license(std::iter::once(name), Some(license), buffer)?; } } + writeln!(buffer, "
")?; Ok(()) } /// Draw a series of sibling files/folders, as markdown, into the given Vec. fn render_tree_license<'a>( - prefix: &str, names: impl Iterator, license: Option<&License>, buffer: &mut Vec, ) -> Result<(), Error> { + writeln!(buffer, "

File/Directory: ")?; for name in names { - writeln!(buffer, "{prefix}**`{name}`** ")?; + writeln!(buffer, "{name}")?; } + writeln!(buffer, "

")?; + if let Some(license) = license { - writeln!(buffer, "{prefix}License: `{}`", license.spdx)?; + writeln!(buffer, "

License: {}

", license.spdx)?; for copyright in license.copyright.iter() { - writeln!(buffer, "{prefix}Copyright: {copyright}")?; + writeln!(buffer, "

Copyright: {copyright}

")?; } } @@ -128,36 +136,48 @@ fn render_tree_license<'a>( } /// Render a list of out-of-tree dependencies as markdown into the given Vec. -fn render_deps<'a, 'b>( - deps: impl Iterator, - buffer: &'b mut Vec, +fn render_deps( + all_deps: &BTreeMap, + stdlib_set: &BTreeMap, + buffer: &mut Vec, ) -> Result<(), Error> { - for dep in deps { - let authors_list = dep.authors.join(", ").replace("<", "\\<").replace(">", "\\>"); - let url = format!("https://crates.io/crates/{}/{}", dep.name, dep.version); + for (package, metadata) in all_deps { + let authors_list = if metadata.authors.is_empty() { + "None Specified".to_owned() + } else { + metadata.authors.join(", ") + }; + let url = format!("https://crates.io/crates/{}/{}", package.name, package.version); writeln!(buffer)?; writeln!( buffer, - "### [{name} {version}]({url})", - name = dep.name, - version = dep.version, - url = url, + r#"

📦 {name}-{version}

"#, + name = package.name, + version = package.version, )?; - writeln!(buffer)?; - writeln!(buffer, "* Authors: {}", authors_list)?; - writeln!(buffer, "* License: {}", dep.license)?; - for (name, contents) in &dep.notices { - writeln!(buffer)?; - writeln!(buffer, "#### {}", name.to_string_lossy())?; - writeln!(buffer)?; - writeln!(buffer, "
Click to expand")?; - writeln!(buffer)?; - writeln!(buffer, "```")?; - writeln!(buffer, "{}", contents)?; - writeln!(buffer, "```")?; - writeln!(buffer)?; - writeln!(buffer, "
")?; + writeln!(buffer, r#"

URL: {url}

"#,)?; + writeln!( + buffer, + "

In libstd: {}

", + if stdlib_set.contains_key(package) { "Yes" } else { "No" } + )?; + writeln!(buffer, "

Authors: {}

", escape_html(&authors_list))?; + writeln!(buffer, "

License: {}

", escape_html(&metadata.license))?; + writeln!(buffer, "

Notices: ")?; + if metadata.notices.is_empty() { + writeln!(buffer, "None")?; + } else { + for (name, contents) in &metadata.notices { + writeln!( + buffer, + "

{}", + name.to_string_lossy() + )?; + writeln!(buffer, "
\n{}\n
", contents)?; + writeln!(buffer, "
")?; + } } + writeln!(buffer, "

")?; } Ok(()) } @@ -192,3 +212,13 @@ fn env_path(var: &str) -> Result { anyhow::bail!("missing environment variable {var}") } } + +/// Escapes any invalid HTML characters +fn escape_html(input: &str) -> String { + static MAPPING: [(char, &str); 3] = [('&', "&"), ('<', "<"), ('>', ">")]; + let mut output = input.to_owned(); + for (ch, s) in &MAPPING { + output = output.replace(*ch, s); + } + output +} From 56f84796a4e819be108721f723d8c1b229e5dbdd Mon Sep 17 00:00:00 2001 From: Jonathan Pallant Date: Tue, 30 Jul 2024 13:54:48 +0100 Subject: [PATCH 03/10] generate-copyright: Fix typo --- src/tools/generate-copyright/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/generate-copyright/src/main.rs b/src/tools/generate-copyright/src/main.rs index efccba0651e4d..af69ab8c8bf36 100644 --- a/src/tools/generate-copyright/src/main.rs +++ b/src/tools/generate-copyright/src/main.rs @@ -37,7 +37,7 @@ static BOTTOM_BOILERPLATE: &str = r#" /// /// You should probably let `bootstrap` execute this program instead of running it directly. /// -/// Run `x.py run generate-metadata` +/// Run `x.py run generate-copyright` fn main() -> Result<(), Error> { let dest_file = env_path("DEST")?; let out_dir = env_path("OUT_DIR")?; From dbab595d78f12c0514cfe2ac4c7c9d083445c14f Mon Sep 17 00:00:00 2001 From: Jonathan Pallant Date: Tue, 30 Jul 2024 13:56:17 +0100 Subject: [PATCH 04/10] generate-copyright: use cargo-metadata --- Cargo.lock | 1 + src/tools/generate-copyright/Cargo.toml | 1 + .../generate-copyright/src/cargo_metadata.rs | 80 +++++-------------- 3 files changed, 20 insertions(+), 62 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0f3a106512d20..eeaeff79ebb25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1406,6 +1406,7 @@ name = "generate-copyright" version = "0.1.0" dependencies = [ "anyhow", + "cargo_metadata 0.18.1", "serde", "serde_json", "tempfile", diff --git a/src/tools/generate-copyright/Cargo.toml b/src/tools/generate-copyright/Cargo.toml index bf643876a042b..c94cc35fb5036 100644 --- a/src/tools/generate-copyright/Cargo.toml +++ b/src/tools/generate-copyright/Cargo.toml @@ -12,3 +12,4 @@ serde = { version = "1.0.147", features = ["derive"] } serde_json = "1.0.85" thiserror = "1" tempfile = "3" +cargo_metadata = "0.18.1" diff --git a/src/tools/generate-copyright/src/cargo_metadata.rs b/src/tools/generate-copyright/src/cargo_metadata.rs index eda53c73c0af6..655d73715e036 100644 --- a/src/tools/generate-copyright/src/cargo_metadata.rs +++ b/src/tools/generate-copyright/src/cargo_metadata.rs @@ -7,16 +7,10 @@ use std::path::Path; /// Describes how this module can fail #[derive(Debug, thiserror::Error)] pub enum Error { - #[error("Failed to run cargo metadata: {0:?}")] - LaunchingMetadata(#[from] std::io::Error), - #[error("Failed get output from cargo metadata: {0:?}")] - GettingMetadata(String), - #[error("Failed parse JSON output from cargo metadata: {0:?}")] - ParsingJson(#[from] serde_json::Error), - #[error("Failed find expected JSON element {0} in output from cargo metadata")] - MissingJsonElement(&'static str), - #[error("Failed find expected JSON element {0} in output from cargo metadata for package {1}")] - MissingJsonElementForPackage(String, String), + #[error("I/O Error: {0:?}")] + Io(#[from] std::io::Error), + #[error("Failed get output from cargo-metadata: {0:?}")] + GettingMetadata(#[from] cargo_metadata::Error), #[error("Failed to run cargo vendor: {0:?}")] LaunchingVendor(std::io::Error), #[error("Failed to complete cargo vendor")] @@ -88,44 +82,26 @@ pub fn get_metadata( if manifest_path.file_name() != Some(OsStr::new("Cargo.toml")) { panic!("cargo_manifest::get requires a path to a Cargo.toml file"); } - let metadata_json = get_metadata_json(cargo, manifest_path)?; - let packages = metadata_json["packages"] - .as_array() - .ok_or_else(|| Error::MissingJsonElement("packages array"))?; - for package in packages { - let package = - package.as_object().ok_or_else(|| Error::MissingJsonElement("package object"))?; - let manifest_path = package - .get("manifest_path") - .and_then(|v| v.as_str()) - .map(Path::new) - .ok_or_else(|| Error::MissingJsonElement("package.manifest_path"))?; + let metadata = cargo_metadata::MetadataCommand::new() + .cargo_path(cargo) + .env("RUSTC_BOOTSTRAP", "1") + .manifest_path(manifest_path) + .exec()?; + for package in metadata.packages { + let manifest_path = package.manifest_path.as_path(); if manifest_path.starts_with(root_path) { // it's an in-tree dependency and reuse covers it continue; } // otherwise it's an out-of-tree dependency - let get_string = |field_name: &str, package_name: &str| { - package.get(field_name).and_then(|v| v.as_str()).ok_or_else(|| { - Error::MissingJsonElementForPackage( - format!("package.{field_name}"), - package_name.to_owned(), - ) - }) - }; - let name = get_string("name", "unknown")?; - let license = get_string("license", name)?; - let version = get_string("version", name)?; - let authors_list = package - .get("authors") - .and_then(|v| v.as_array()) - .ok_or_else(|| Error::MissingJsonElement("package.authors"))?; - let authors: Vec = - authors_list.iter().filter_map(|v| v.as_str()).map(|s| s.to_owned()).collect(); - let package = Package { name: name.to_owned(), version: version.to_owned() }; + let package_id = Package { name: package.name, version: package.version.to_string() }; output.insert( - package.clone(), - PackageMetadata { license: license.to_owned(), authors, notices: BTreeMap::new() }, + package_id, + PackageMetadata { + license: package.license.unwrap_or_else(|| String::from("Unspecified")), + authors: package.authors, + notices: BTreeMap::new(), + }, ); } } @@ -133,26 +109,6 @@ pub fn get_metadata( Ok(output) } -/// Get cargo-metdata for a package, as JSON -fn get_metadata_json(cargo: &Path, manifest_path: &Path) -> Result { - let metadata_output = std::process::Command::new(cargo) - .arg("metadata") - .arg("--format-version=1") - .arg("--all-features") - .arg("--manifest-path") - .arg(manifest_path) - .env("RUSTC_BOOTSTRAP", "1") - .output() - .map_err(Error::LaunchingMetadata)?; - if !metadata_output.status.success() { - return Err(Error::GettingMetadata( - String::from_utf8(metadata_output.stderr).expect("UTF-8 output from cargo"), - )); - } - let json = serde_json::from_slice(&metadata_output.stdout)?; - Ok(json) -} - /// Run cargo-vendor, fetching into the given dir fn run_cargo_vendor(cargo: &Path, dest: &Path, manifest_paths: &[&Path]) -> Result<(), Error> { let mut vendor_command = std::process::Command::new(cargo); From f7e6bf61a9c3492115d19dc112071adea90a7038 Mon Sep 17 00:00:00 2001 From: Jonathan Pallant Date: Tue, 30 Jul 2024 19:39:06 +0100 Subject: [PATCH 05/10] generate-copyright: use rinja to format the output I can't find a way to derive rinja::Template for Node - I think because it is a recursive type. So I rendered it manually using html_escape. --- src/tools/generate-copyright/Cargo.toml | 6 +- .../generate-copyright/src/cargo_metadata.rs | 12 +- src/tools/generate-copyright/src/main.rs | 226 +++++++----------- .../templates/COPYRIGHT.html | 54 +++++ 4 files changed, 146 insertions(+), 152 deletions(-) create mode 100644 src/tools/generate-copyright/templates/COPYRIGHT.html diff --git a/src/tools/generate-copyright/Cargo.toml b/src/tools/generate-copyright/Cargo.toml index c94cc35fb5036..c00292cf33108 100644 --- a/src/tools/generate-copyright/Cargo.toml +++ b/src/tools/generate-copyright/Cargo.toml @@ -8,8 +8,10 @@ description = "Produces a manifest of all the copyrighted materials in the Rust [dependencies] anyhow = "1.0.65" +cargo_metadata = "0.18.1" +html-escape = "0.2.13" +rinja = "0.2.0" serde = { version = "1.0.147", features = ["derive"] } serde_json = "1.0.85" -thiserror = "1" tempfile = "3" -cargo_metadata = "0.18.1" +thiserror = "1" diff --git a/src/tools/generate-copyright/src/cargo_metadata.rs b/src/tools/generate-copyright/src/cargo_metadata.rs index 655d73715e036..d02b9eeb6f922 100644 --- a/src/tools/generate-copyright/src/cargo_metadata.rs +++ b/src/tools/generate-copyright/src/cargo_metadata.rs @@ -1,7 +1,7 @@ //! Gets metadata about a workspace from Cargo use std::collections::BTreeMap; -use std::ffi::{OsStr, OsString}; +use std::ffi::OsStr; use std::path::Path; /// Describes how this module can fail @@ -36,7 +36,9 @@ pub struct PackageMetadata { /// A list of important files from the package, with their contents. /// /// This includes *COPYRIGHT*, *NOTICE*, *AUTHOR*, *LICENSE*, and *LICENCE* files, case-insensitive. - pub notices: BTreeMap, + pub notices: BTreeMap, + /// If this is true, this dep is in the Rust Standard Library + pub is_in_libstd: Option, } /// Use `cargo metadata` and `cargo vendor` to get a list of dependencies and their license data. @@ -101,6 +103,7 @@ pub fn get_metadata( license: package.license.unwrap_or_else(|| String::from("Unspecified")), authors: package.authors, notices: BTreeMap::new(), + is_in_libstd: None, }, ); } @@ -161,8 +164,9 @@ fn load_important_files( if metadata.is_dir() { // scoop up whole directory } else if metadata.is_file() { - println!("Scraping {}", filename.to_string_lossy()); - dep.notices.insert(filename.to_owned(), std::fs::read_to_string(path)?); + let filename = filename.to_string_lossy(); + println!("Scraping {}", filename); + dep.notices.insert(filename.to_string(), std::fs::read_to_string(path)?); } } } diff --git a/src/tools/generate-copyright/src/main.rs b/src/tools/generate-copyright/src/main.rs index af69ab8c8bf36..03b789b739298 100644 --- a/src/tools/generate-copyright/src/main.rs +++ b/src/tools/generate-copyright/src/main.rs @@ -1,37 +1,17 @@ use std::collections::BTreeMap; -use std::io::Write; use std::path::{Path, PathBuf}; use anyhow::Error; +use rinja::Template; mod cargo_metadata; -static TOP_BOILERPLATE: &str = r##" - - - - - Copyright notices for The Rust Toolchain - - - -

Copyright notices for The Rust Toolchain

- -

This file describes the copyright and licensing information for the source -code within The Rust Project git tree, and the third-party dependencies used -when building the Rust toolchain (including the Rust Standard Library).

- -

Table of Contents

- -"##; - -static BOTTOM_BOILERPLATE: &str = r#" - - -"#; +#[derive(Template)] +#[template(path = "COPYRIGHT.html")] +struct CopyrightTemplate { + in_tree: Node, + dependencies: BTreeMap, +} /// The entry point to the binary. /// @@ -53,150 +33,114 @@ fn main() -> Result<(), Error> { Path::new("./src/tools/cargo/Cargo.toml"), Path::new("./library/std/Cargo.toml"), ]; - let collected_cargo_metadata = + let mut collected_cargo_metadata = cargo_metadata::get_metadata_and_notices(&cargo, &out_dir, &root_path, &workspace_paths)?; let stdlib_set = cargo_metadata::get_metadata(&cargo, &root_path, &[Path::new("./library/std/Cargo.toml")])?; - let mut buffer = Vec::new(); + for (key, value) in collected_cargo_metadata.iter_mut() { + value.is_in_libstd = Some(stdlib_set.contains_key(key)); + } - writeln!(buffer, "{}", TOP_BOILERPLATE)?; + let template = CopyrightTemplate { + in_tree: collected_tree_metadata.files, + dependencies: collected_cargo_metadata, + }; - writeln!( - buffer, - r#"

In-tree files

The following licenses cover the in-tree source files that were used in this release:

"# - )?; - render_tree_recursive(&collected_tree_metadata.files, &mut buffer)?; + let output = template.render()?; - writeln!( - buffer, - r#"

Out-of-tree dependencies

The following licenses cover the out-of-tree crates that were used in this release:

"# - )?; - render_deps(&collected_cargo_metadata, &stdlib_set, &mut buffer)?; + std::fs::write(&dest_file, output)?; - writeln!(buffer, "{}", BOTTOM_BOILERPLATE)?; + Ok(()) +} - std::fs::write(&dest_file, &buffer)?; +/// Describes a tree of metadata for our filesystem tree +#[derive(serde::Deserialize)] +struct Metadata { + files: Node, +} +/// Describes one node in our metadata tree +#[derive(serde::Deserialize)] +#[serde(rename_all = "kebab-case", tag = "type")] +pub(crate) enum Node { + Root { children: Vec }, + Directory { name: String, children: Vec, license: Option }, + File { name: String, license: License }, + Group { files: Vec, directories: Vec, license: License }, +} + +fn with_box(fmt: &mut std::fmt::Formatter<'_>, inner: F) -> std::fmt::Result +where + F: FnOnce(&mut std::fmt::Formatter<'_>) -> std::fmt::Result, +{ + writeln!(fmt, r#"
"#)?; + inner(fmt)?; + writeln!(fmt, "
")?; Ok(()) } -/// Recursively draw the tree of files/folders we found on disk and their licenses, as -/// markdown, into the given Vec. -fn render_tree_recursive(node: &Node, buffer: &mut Vec) -> Result<(), Error> { - writeln!(buffer, r#"
"#)?; - match node { - Node::Root { children } => { - for child in children { - render_tree_recursive(child, buffer)?; +impl std::fmt::Display for Node { + fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Node::Root { children } => { + if children.len() > 1 { + with_box(fmt, |f| { + for child in children { + writeln!(f, "{child}")?; + } + Ok(()) + }) + } else { + for child in children { + writeln!(fmt, "{child}")?; + } + Ok(()) + } } - } - Node::Directory { name, children, license } => { - render_tree_license(std::iter::once(name), license.as_ref(), buffer)?; - if !children.is_empty() { - writeln!(buffer, "

Exceptions:

")?; - for child in children { - render_tree_recursive(child, buffer)?; + Node::Directory { name, children, license } => with_box(fmt, |f| { + render_tree_license(std::iter::once(name), license.as_ref(), f)?; + if !children.is_empty() { + writeln!(f, "

Exceptions:

")?; + for child in children { + writeln!(f, "{child}")?; + } } + Ok(()) + }), + Node::Group { files, directories, license } => with_box(fmt, |f| { + render_tree_license(directories.iter().chain(files.iter()), Some(license), f) + }), + Node::File { name, license } => { + with_box(fmt, |f| render_tree_license(std::iter::once(name), Some(license), f)) } } - Node::Group { files, directories, license } => { - render_tree_license(directories.iter().chain(files.iter()), Some(license), buffer)?; - } - Node::File { name, license } => { - render_tree_license(std::iter::once(name), Some(license), buffer)?; - } } - writeln!(buffer, "
")?; - - Ok(()) } -/// Draw a series of sibling files/folders, as markdown, into the given Vec. +/// Draw a series of sibling files/folders, as HTML, into the given formatter. fn render_tree_license<'a>( names: impl Iterator, license: Option<&License>, - buffer: &mut Vec, -) -> Result<(), Error> { - writeln!(buffer, "

File/Directory: ")?; + f: &mut std::fmt::Formatter<'_>, +) -> std::fmt::Result { + writeln!(f, "

File/Directory: ")?; for name in names { - writeln!(buffer, "{name}")?; + writeln!(f, "{}", html_escape::encode_text(&name))?; } - writeln!(buffer, "

")?; + writeln!(f, "

")?; if let Some(license) = license { - writeln!(buffer, "

License: {}

", license.spdx)?; + writeln!(f, "

License: {}

", html_escape::encode_text(&license.spdx))?; for copyright in license.copyright.iter() { - writeln!(buffer, "

Copyright: {copyright}

")?; + writeln!(f, "

Copyright: {}

", html_escape::encode_text(©right))?; } } Ok(()) } -/// Render a list of out-of-tree dependencies as markdown into the given Vec. -fn render_deps( - all_deps: &BTreeMap, - stdlib_set: &BTreeMap, - buffer: &mut Vec, -) -> Result<(), Error> { - for (package, metadata) in all_deps { - let authors_list = if metadata.authors.is_empty() { - "None Specified".to_owned() - } else { - metadata.authors.join(", ") - }; - let url = format!("https://crates.io/crates/{}/{}", package.name, package.version); - writeln!(buffer)?; - writeln!( - buffer, - r#"

📦 {name}-{version}

"#, - name = package.name, - version = package.version, - )?; - writeln!(buffer, r#"

URL: {url}

"#,)?; - writeln!( - buffer, - "

In libstd: {}

", - if stdlib_set.contains_key(package) { "Yes" } else { "No" } - )?; - writeln!(buffer, "

Authors: {}

", escape_html(&authors_list))?; - writeln!(buffer, "

License: {}

", escape_html(&metadata.license))?; - writeln!(buffer, "

Notices: ")?; - if metadata.notices.is_empty() { - writeln!(buffer, "None")?; - } else { - for (name, contents) in &metadata.notices { - writeln!( - buffer, - "

{}", - name.to_string_lossy() - )?; - writeln!(buffer, "
\n{}\n
", contents)?; - writeln!(buffer, "
")?; - } - } - writeln!(buffer, "

")?; - } - Ok(()) -} -/// Describes a tree of metadata for our filesystem tree -#[derive(serde::Deserialize)] -struct Metadata { - files: Node, -} - -/// Describes one node in our metadata tree -#[derive(serde::Deserialize)] -#[serde(rename_all = "kebab-case", tag = "type")] -pub(crate) enum Node { - Root { children: Vec }, - Directory { name: String, children: Vec, license: Option }, - File { name: String, license: License }, - Group { files: Vec, directories: Vec, license: License }, -} - /// A License has an SPDX license name and a list of copyright holders. #[derive(serde::Deserialize)] struct License { @@ -212,13 +156,3 @@ fn env_path(var: &str) -> Result { anyhow::bail!("missing environment variable {var}") } } - -/// Escapes any invalid HTML characters -fn escape_html(input: &str) -> String { - static MAPPING: [(char, &str); 3] = [('&', "&"), ('<', "<"), ('>', ">")]; - let mut output = input.to_owned(); - for (ch, s) in &MAPPING { - output = output.replace(*ch, s); - } - output -} diff --git a/src/tools/generate-copyright/templates/COPYRIGHT.html b/src/tools/generate-copyright/templates/COPYRIGHT.html new file mode 100644 index 0000000000000..ccb177a54d419 --- /dev/null +++ b/src/tools/generate-copyright/templates/COPYRIGHT.html @@ -0,0 +1,54 @@ + + + + + Copyright notices for The Rust Toolchain + + + +

Copyright notices for The Rust Toolchain

+ +

This file describes the copyright and licensing information for the source +code within The Rust Project git tree, and the third-party dependencies used +when building the Rust toolchain (including the Rust Standard Library).

+ +

Table of Contents

+ + +

In-tree files

+ +

The following licenses cover the in-tree source files that were used in this +release:

+ +{{ in_tree|safe }} + +

Out-of-tree dependencies

+ +

The following licenses cover the out-of-tree crates that were used in this +release:

+ +{% for (key, value) in dependencies %} +

📦 {{key.name}}-{{key.version}}

+

URL: https://crates.io/crates/{{ key.name }}/{{ key.version }}

+

In libstd: {% if value.is_in_libstd.unwrap() %} Yes {% else %} No {% endif %}

+

Authors: {{ value.authors|join(", ") }}

+

License: {{ value.license }}

+ {% let len = value.notices.len() %} + {% if len > 0 %} +

Notices: + {% for (notice_name, notice_text) in value.notices %} +

+ {{ notice_name }} +
+{{ notice_text }}
+                
+
+ {% endfor %} +

+ {% endif %} +{% endfor %} + + \ No newline at end of file From 37ab09010cb627df6b3ffb2d4e95c2cacaf93efb Mon Sep 17 00:00:00 2001 From: Jonathan Pallant Date: Tue, 30 Jul 2024 19:39:38 +0100 Subject: [PATCH 06/10] REUSE.toml: Copyright text isn't parsed as Markdown. --- REUSE.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/REUSE.toml b/REUSE.toml index 1a30d8016c9ea..efd705552478d 100644 --- a/REUSE.toml +++ b/REUSE.toml @@ -163,7 +163,7 @@ SPDX-License-Identifier = "MIT OR Apache-2.0" path = "src/llvm-project/**" precedence = "override" SPDX-FileCopyrightText = [ - "2003-2019 by the contributors listed in [CREDITS.TXT](https://github.com/rust-lang/llvm-project/blob/7738295178045041669876bf32b0543ec8319a5c/llvm/CREDITS.TXT)", + "2003-2019 by the contributors listed in CREDITS.TXT (https://github.com/rust-lang/llvm-project/blob/7738295178045041669876bf32b0543ec8319a5c/llvm/CREDITS.TXT)", "2010 Apple Inc", "2003-2019 University of Illinois at Urbana-Champaign.", ] From 30ac7c9a817793a86d870eeab5a8238acb29b186 Mon Sep 17 00:00:00 2001 From: Jonathan Pallant Date: Wed, 31 Jul 2024 19:26:44 +0100 Subject: [PATCH 07/10] generate-copyright: Render Node with rinja too. --- Cargo.lock | 46 +++++++++++- src/tools/generate-copyright/Cargo.toml | 2 - src/tools/generate-copyright/src/main.rs | 73 +------------------ .../generate-copyright/templates/Node.html | 71 ++++++++++++++++++ 4 files changed, 115 insertions(+), 77 deletions(-) create mode 100644 src/tools/generate-copyright/templates/Node.html diff --git a/Cargo.lock b/Cargo.lock index eeaeff79ebb25..54454e84b5a97 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1407,9 +1407,9 @@ version = "0.1.0" dependencies = [ "anyhow", "cargo_metadata 0.18.1", + "rinja 0.2.0", "serde", "serde_json", - "tempfile", "thiserror", ] @@ -3100,6 +3100,18 @@ dependencies = [ "walkdir", ] +[[package]] +name = "rinja" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2d47a46d7729e891c8accf260e9daa02ae6d570aa2a94fb1fb27eb5364a2323" +dependencies = [ + "humansize", + "num-traits", + "percent-encoding", + "rinja_derive 0.2.0", +] + [[package]] name = "rinja" version = "0.3.0" @@ -3107,7 +3119,24 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d3762e3740cdbf2fd2be465cc2c26d643ad17353cc2e0223d211c1b096118bd" dependencies = [ "itoa", - "rinja_derive", + "rinja_derive 0.3.0", +] + +[[package]] +name = "rinja_derive" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "44dae9afe59d58ed8d988d67d1945f3638125d2fd2104058399382e11bd3ea2a" +dependencies = [ + "basic-toml", + "mime", + "mime_guess", + "once_map", + "proc-macro2", + "quote", + "rinja_parser 0.2.0", + "serde", + "syn 2.0.67", ] [[package]] @@ -3123,11 +3152,20 @@ dependencies = [ "once_map", "proc-macro2", "quote", - "rinja_parser", + "rinja_parser 0.3.0", "serde", "syn 2.0.67", ] +[[package]] +name = "rinja_parser" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b1771c78cd5d3b1646ef8d8f2ed100db936e8b291d3cc06e92a339ff346858c" +dependencies = [ + "nom", +] + [[package]] name = "rinja_parser" version = "0.3.0" @@ -4606,7 +4644,7 @@ dependencies = [ "minifier", "pulldown-cmark 0.9.6", "regex", - "rinja", + "rinja 0.3.0", "rustdoc-json-types", "serde", "serde_json", diff --git a/src/tools/generate-copyright/Cargo.toml b/src/tools/generate-copyright/Cargo.toml index c00292cf33108..d200e2ec9f1e1 100644 --- a/src/tools/generate-copyright/Cargo.toml +++ b/src/tools/generate-copyright/Cargo.toml @@ -9,9 +9,7 @@ description = "Produces a manifest of all the copyrighted materials in the Rust [dependencies] anyhow = "1.0.65" cargo_metadata = "0.18.1" -html-escape = "0.2.13" rinja = "0.2.0" serde = { version = "1.0.147", features = ["derive"] } serde_json = "1.0.85" -tempfile = "3" thiserror = "1" diff --git a/src/tools/generate-copyright/src/main.rs b/src/tools/generate-copyright/src/main.rs index 03b789b739298..37de24648d56d 100644 --- a/src/tools/generate-copyright/src/main.rs +++ b/src/tools/generate-copyright/src/main.rs @@ -62,8 +62,9 @@ struct Metadata { } /// Describes one node in our metadata tree -#[derive(serde::Deserialize)] +#[derive(serde::Deserialize, rinja::Template)] #[serde(rename_all = "kebab-case", tag = "type")] +#[template(path = "Node.html")] pub(crate) enum Node { Root { children: Vec }, Directory { name: String, children: Vec, license: Option }, @@ -71,76 +72,6 @@ pub(crate) enum Node { Group { files: Vec, directories: Vec, license: License }, } -fn with_box(fmt: &mut std::fmt::Formatter<'_>, inner: F) -> std::fmt::Result -where - F: FnOnce(&mut std::fmt::Formatter<'_>) -> std::fmt::Result, -{ - writeln!(fmt, r#"
"#)?; - inner(fmt)?; - writeln!(fmt, "
")?; - Ok(()) -} - -impl std::fmt::Display for Node { - fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - Node::Root { children } => { - if children.len() > 1 { - with_box(fmt, |f| { - for child in children { - writeln!(f, "{child}")?; - } - Ok(()) - }) - } else { - for child in children { - writeln!(fmt, "{child}")?; - } - Ok(()) - } - } - Node::Directory { name, children, license } => with_box(fmt, |f| { - render_tree_license(std::iter::once(name), license.as_ref(), f)?; - if !children.is_empty() { - writeln!(f, "

Exceptions:

")?; - for child in children { - writeln!(f, "{child}")?; - } - } - Ok(()) - }), - Node::Group { files, directories, license } => with_box(fmt, |f| { - render_tree_license(directories.iter().chain(files.iter()), Some(license), f) - }), - Node::File { name, license } => { - with_box(fmt, |f| render_tree_license(std::iter::once(name), Some(license), f)) - } - } - } -} - -/// Draw a series of sibling files/folders, as HTML, into the given formatter. -fn render_tree_license<'a>( - names: impl Iterator, - license: Option<&License>, - f: &mut std::fmt::Formatter<'_>, -) -> std::fmt::Result { - writeln!(f, "

File/Directory: ")?; - for name in names { - writeln!(f, "{}", html_escape::encode_text(&name))?; - } - writeln!(f, "

")?; - - if let Some(license) = license { - writeln!(f, "

License: {}

", html_escape::encode_text(&license.spdx))?; - for copyright in license.copyright.iter() { - writeln!(f, "

Copyright: {}

", html_escape::encode_text(©right))?; - } - } - - Ok(()) -} - /// A License has an SPDX license name and a list of copyright holders. #[derive(serde::Deserialize)] struct License { diff --git a/src/tools/generate-copyright/templates/Node.html b/src/tools/generate-copyright/templates/Node.html new file mode 100644 index 0000000000000..a71a1bf3b73d7 --- /dev/null +++ b/src/tools/generate-copyright/templates/Node.html @@ -0,0 +1,71 @@ +{% match self %} + +{% when Node::Root { children } %} + +{% for child in children %} +{{ child|safe }} +{% endfor %} + +{% when Node::Directory { name, children, license } %} + +
+ +

+ File/Directory: {{ name }} +

+ + {% if let Some(license) = license %} + +

License: {{ license.spdx }}

+ {% for copyright in license.copyright.iter() %} +

Copyright: {{ copyright }}

+ {% endfor %} + + {% endif %} + + {% if !children.is_empty() %} + +

Exceptions:

+ {% for child in children %} + {{ child|safe }} + {% endfor %} + + {% endif %} + +
+ +{% when Node::File { name, license } %} + +
+

+ File/Directory: {{ name }} +

+ +

License: {{ license.spdx }}

+ {% for copyright in license.copyright.iter() %} +

Copyright: {{ copyright }}

+ {% endfor %} +
+ +{% when Node::Group { files, directories, license } %} + +
+ +

+ File/Directory: + {% for name in files %} + {{ name }} + {% endfor %} + {% for name in directories %} + {{ name }} + {% endfor %} +

+ +

License: {{ license.spdx }}

+ {% for copyright in license.copyright.iter() %} +

Copyright: {{ copyright }}

+ {% endfor %} + +
+ +{% endmatch %} From 5277b67b6977e3fcef64b0ce21cecd3a5dc9c22a Mon Sep 17 00:00:00 2001 From: Jonathan Pallant Date: Tue, 6 Aug 2024 11:02:11 +0100 Subject: [PATCH 08/10] generate-copyright: gather files inside interesting folders --- .../generate-copyright/src/cargo_metadata.rs | 50 ++++++++++++------- 1 file changed, 33 insertions(+), 17 deletions(-) diff --git a/src/tools/generate-copyright/src/cargo_metadata.rs b/src/tools/generate-copyright/src/cargo_metadata.rs index d02b9eeb6f922..c85e4aa371a2a 100644 --- a/src/tools/generate-copyright/src/cargo_metadata.rs +++ b/src/tools/generate-copyright/src/cargo_metadata.rs @@ -2,7 +2,7 @@ use std::collections::BTreeMap; use std::ffi::OsStr; -use std::path::Path; +use std::path::{Path, PathBuf}; /// Describes how this module can fail #[derive(Debug, thiserror::Error)] @@ -15,6 +15,8 @@ pub enum Error { LaunchingVendor(std::io::Error), #[error("Failed to complete cargo vendor")] RunningVendor, + #[error("Bad path {0:?} whilst scraping files")] + Scraping(PathBuf), } /// Uniquely describes a package on crates.io @@ -150,24 +152,38 @@ fn load_important_files( let entry = entry?; let metadata = entry.metadata()?; let path = entry.path(); - if let Some(filename) = path.file_name() { - let lc_filename = filename.to_ascii_lowercase(); - let lc_filename_str = lc_filename.to_string_lossy(); - let mut keep = false; - for m in ["copyright", "licence", "license", "author", "notice"] { - if lc_filename_str.contains(m) { - keep = true; - break; - } + let Some(filename) = path.file_name() else { + return Err(Error::Scraping(path)); + }; + let lc_filename = filename.to_ascii_lowercase(); + let lc_filename_str = lc_filename.to_string_lossy(); + let mut keep = false; + for m in ["copyright", "licence", "license", "author", "notice"] { + if lc_filename_str.contains(m) { + keep = true; + break; } - if keep { - if metadata.is_dir() { - // scoop up whole directory - } else if metadata.is_file() { - let filename = filename.to_string_lossy(); - println!("Scraping {}", filename); - dep.notices.insert(filename.to_string(), std::fs::read_to_string(path)?); + } + if keep { + if metadata.is_dir() { + for inner_entry in std::fs::read_dir(entry.path())? { + let inner_entry = inner_entry?; + if inner_entry.metadata()?.is_file() { + let inner_filename = inner_entry.file_name(); + let inner_filename_str = inner_filename.to_string_lossy(); + let qualified_filename = + format!("{}/{}", lc_filename_str, inner_filename_str); + println!("Scraping {}", qualified_filename); + dep.notices.insert( + qualified_filename.to_string(), + std::fs::read_to_string(inner_entry.path())?, + ); + } } + } else if metadata.is_file() { + let filename = filename.to_string_lossy(); + println!("Scraping {}", filename); + dep.notices.insert(filename.to_string(), std::fs::read_to_string(path)?); } } } From 4e24e9b1adb8ffd57d7315028407922c44069f26 Mon Sep 17 00:00:00 2001 From: Jonathan Pallant Date: Tue, 6 Aug 2024 12:03:37 +0100 Subject: [PATCH 09/10] Update to rinja 0.3 --- Cargo.lock | 49 ++++--------------------- src/tools/generate-copyright/Cargo.toml | 2 +- 2 files changed, 8 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 54454e84b5a97..67fee5b3f059f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1407,7 +1407,7 @@ version = "0.1.0" dependencies = [ "anyhow", "cargo_metadata 0.18.1", - "rinja 0.2.0", + "rinja", "serde", "serde_json", "thiserror", @@ -3100,43 +3100,17 @@ dependencies = [ "walkdir", ] -[[package]] -name = "rinja" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2d47a46d7729e891c8accf260e9daa02ae6d570aa2a94fb1fb27eb5364a2323" -dependencies = [ - "humansize", - "num-traits", - "percent-encoding", - "rinja_derive 0.2.0", -] - [[package]] name = "rinja" version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d3762e3740cdbf2fd2be465cc2c26d643ad17353cc2e0223d211c1b096118bd" dependencies = [ + "humansize", "itoa", - "rinja_derive 0.3.0", -] - -[[package]] -name = "rinja_derive" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44dae9afe59d58ed8d988d67d1945f3638125d2fd2104058399382e11bd3ea2a" -dependencies = [ - "basic-toml", - "mime", - "mime_guess", - "once_map", - "proc-macro2", - "quote", - "rinja_parser 0.2.0", - "serde", - "syn 2.0.67", + "num-traits", + "percent-encoding", + "rinja_derive", ] [[package]] @@ -3152,20 +3126,11 @@ dependencies = [ "once_map", "proc-macro2", "quote", - "rinja_parser 0.3.0", + "rinja_parser", "serde", "syn 2.0.67", ] -[[package]] -name = "rinja_parser" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b1771c78cd5d3b1646ef8d8f2ed100db936e8b291d3cc06e92a339ff346858c" -dependencies = [ - "nom", -] - [[package]] name = "rinja_parser" version = "0.3.0" @@ -4644,7 +4609,7 @@ dependencies = [ "minifier", "pulldown-cmark 0.9.6", "regex", - "rinja 0.3.0", + "rinja", "rustdoc-json-types", "serde", "serde_json", diff --git a/src/tools/generate-copyright/Cargo.toml b/src/tools/generate-copyright/Cargo.toml index d200e2ec9f1e1..404101abd41bf 100644 --- a/src/tools/generate-copyright/Cargo.toml +++ b/src/tools/generate-copyright/Cargo.toml @@ -9,7 +9,7 @@ description = "Produces a manifest of all the copyrighted materials in the Rust [dependencies] anyhow = "1.0.65" cargo_metadata = "0.18.1" -rinja = "0.2.0" +rinja = "0.3.0" serde = { version = "1.0.147", features = ["derive"] } serde_json = "1.0.85" thiserror = "1" From 99579f3ec1ee070d3d8203df1168f948b05b48dc Mon Sep 17 00:00:00 2001 From: Jonathan Pallant Date: Tue, 6 Aug 2024 12:12:57 +0100 Subject: [PATCH 10/10] Apparently library/std is now part of a workspace at library/ --- src/tools/generate-copyright/src/main.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/generate-copyright/src/main.rs b/src/tools/generate-copyright/src/main.rs index 37de24648d56d..afa75d0d67140 100644 --- a/src/tools/generate-copyright/src/main.rs +++ b/src/tools/generate-copyright/src/main.rs @@ -31,7 +31,7 @@ fn main() -> Result<(), Error> { let workspace_paths = [ Path::new("./Cargo.toml"), Path::new("./src/tools/cargo/Cargo.toml"), - Path::new("./library/std/Cargo.toml"), + Path::new("./library/Cargo.toml"), ]; let mut collected_cargo_metadata = cargo_metadata::get_metadata_and_notices(&cargo, &out_dir, &root_path, &workspace_paths)?;