Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(core): optimize daemon output glob matching #27775

Merged
merged 7 commits into from
Sep 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 6 additions & 15 deletions packages/nx/src/migrations/update-15-0-0/prefix-outputs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,7 @@ export default async function (tree: Tree) {
continue;
}

try {
validateOutputs(target.outputs);
} catch (e) {
target.outputs = transformLegacyOutputs(project.root, e);
}
target.outputs = transformLegacyOutputs(project.root, target.outputs);
}
try {
updateProjectConfiguration(tree, projectName, project);
Expand All @@ -44,11 +40,10 @@ export default async function (tree: Tree) {
(json) => {
for (const target of Object.values(json.nx?.targets ?? {})) {
if (target.outputs) {
try {
validateOutputs(target.outputs);
} catch (e) {
target.outputs = transformLegacyOutputs(project.root, e);
}
target.outputs = transformLegacyOutputs(
project.root,
target.outputs
);
}
}

Expand All @@ -64,11 +59,7 @@ export default async function (tree: Tree) {
if (!target.outputs) {
continue;
}
try {
validateOutputs(target.outputs);
} catch (e: any) {
target.outputs = transformLegacyOutputs('{projectRoot}', e);
}
target.outputs = transformLegacyOutputs('{projectRoot}', target.outputs);
}

updateNxJson(tree, nxJson);
Expand Down
89 changes: 73 additions & 16 deletions packages/nx/src/native/cache/expand_outputs.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
use hashbrown::HashMap;
use std::path::{Path, PathBuf};
use tracing::trace;

use crate::native::glob::{build_glob_set, contains_glob_pattern};
use crate::native::glob::{build_glob_set, contains_glob_pattern, glob_transform::partition_glob};
use crate::native::logger::enable_logger;
use crate::native::utils::Normalize;
use crate::native::walker::nx_walker_sync;
use crate::native::walker::{nx_walker, nx_walker_sync};

#[napi]
pub fn expand_outputs(directory: String, entries: Vec<String>) -> anyhow::Result<Vec<String>> {
Expand Down Expand Up @@ -70,13 +72,33 @@ where
Ok(found_paths)
}

fn partition_globs_into_map(globs: Vec<String>) -> anyhow::Result<HashMap<String, Vec<String>>> {
globs
.iter()
.map(|glob| partition_glob(glob))
// Right now we have an iterator where each item is (root: String, patterns: String[]).
// We want a singular root, with the patterns mapped to it.
.fold(
Ok(HashMap::<String, Vec<String>>::new()),
|map_result, parsed_glob| {
let mut map = map_result?;
let (root, patterns) = parsed_glob?;
let entry = map.entry(root).or_insert(vec![]);
entry.extend(patterns);
Ok(map)
},
)
}

#[napi]
/// Expands the given outputs into a list of existing files.
/// This is used when hashing outputs
pub fn get_files_for_outputs(
directory: String,
entries: Vec<String>,
) -> anyhow::Result<Vec<String>> {
enable_logger();

let directory: PathBuf = directory.into();

let mut globs: Vec<String> = vec![];
Expand All @@ -86,7 +108,9 @@ pub fn get_files_for_outputs(
let path = directory.join(&entry);

if !path.exists() {
globs.push(entry);
if contains_glob_pattern(&entry) {
globs.push(entry);
}
} else if path.is_dir() {
directories.push(entry);
} else {
Expand All @@ -95,28 +119,41 @@ pub fn get_files_for_outputs(
}

if !globs.is_empty() {
// todo(jcammisuli): optimize this as nx_walker_sync is very slow on the root directory. We need to change this to only search smaller directories
let glob_set = build_glob_set(&globs)?;
let found_paths = nx_walker_sync(&directory, None).filter_map(|path| {
if glob_set.is_match(&path) {
Some(path.to_normalized_string())
} else {
None
}
});
let partitioned_globs = partition_globs_into_map(globs)?;
for (root, patterns) in partitioned_globs {
let root_path = directory.join(&root);
let glob_set = build_glob_set(&patterns)?;
trace!("walking directory: {:?}", root_path);

let found_paths: Vec<String> = nx_walker(&root_path, false)
.filter_map(|file| {
if glob_set.is_match(&file.normalized_path) {
Some(
// root_path contains full directory,
// root is only the leading dirs from glob
PathBuf::from(&root)
.join(&file.normalized_path)
.to_normalized_string(),
)
} else {
None
}
})
.collect();

files.extend(found_paths);
files.extend(found_paths);
}
}

if !directories.is_empty() {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we use the multi-threaded walker below?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can do this some other time.

for dir in directories {
let dir = PathBuf::from(dir);
let dir_path = directory.join(&dir);
let files_in_dir = nx_walker_sync(&dir_path, None).filter_map(|e| {
let path = dir_path.join(&e);
let files_in_dir = nx_walker(&dir_path, false).filter_map(|e| {
let path = dir_path.join(&e.normalized_path);

if path.is_file() {
Some(dir.join(e).to_normalized_string())
Some(dir.join(e.normalized_path).to_normalized_string())
} else {
None
}
Expand Down Expand Up @@ -221,4 +258,24 @@ mod test {
]
);
}

#[test]
fn should_get_files_for_outputs_with_glob() {
let temp = setup_fs();
let entries = vec![
"packages/nx/src/native/*.node".to_string(),
"folder/nested-folder".to_string(),
"test.txt".to_string(),
];
let mut result = get_files_for_outputs(temp.display().to_string(), entries).unwrap();
result.sort();
assert_eq!(
result,
vec![
"folder/nested-folder",
"packages/nx/src/native/nx.darwin-arm64.node",
"test.txt"
]
);
}
}
3 changes: 2 additions & 1 deletion packages/nx/src/native/cache/mod.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
pub mod cache;
pub mod expand_outputs;
pub mod file_ops;
pub mod cache;
pub mod validate_outputs;
77 changes: 77 additions & 0 deletions packages/nx/src/native/cache/validate_outputs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
use itertools::Itertools;
use regex::Regex;

use crate::native::glob::{contains_glob_pattern, glob_transform::partition_glob};

const ALLOWED_WORKSPACE_ROOT_OUTPUT_PREFIXES: [&str; 2] = ["!{workspaceRoot}", "{workspaceRoot}"];

fn is_missing_prefix(output: &str) -> bool {
let re = Regex::new(r"^!?\{[\s\S]+\}").expect("Output pattern regex should compile");

!re.is_match(output)
}

#[napi]
pub fn validate_outputs(outputs: Vec<String>) -> anyhow::Result<()> {
let outputs_len = outputs.len();
let mut missing_prefix = Vec::with_capacity(outputs_len);
let mut workspace_globs = Vec::with_capacity(outputs_len);

for output in outputs.iter() {
if is_missing_prefix(output) {
missing_prefix.push(output);
} else {
for prefix in ALLOWED_WORKSPACE_ROOT_OUTPUT_PREFIXES.iter() {
if let Some(trimmed) = output.strip_prefix(prefix) {
if contains_glob_pattern(&trimmed) {
let (root, _) = partition_glob(&trimmed)?;
if root.is_empty() {
workspace_globs.push(output);
}
}
}
}
}
}

if workspace_globs.is_empty() && missing_prefix.is_empty() {
return Ok(());
}

let mut error_message = String::new();
if !missing_prefix.is_empty() {
error_message.push_str(&format!(
"The following outputs are invalid: \n - {}\n\nRun `nx repair` to fix this.",
missing_prefix.iter().join("\n - ")
));
}
if !workspace_globs.is_empty() {
error_message.push_str(&format!(
"The following outputs are defined by a glob pattern from the workspace root: \n - {}\n\nThese can be slow, replace them with a more specific pattern.",
workspace_globs.iter().join("\n - ")
));
}

Err(anyhow::anyhow!(error_message))
}

#[napi]
pub fn get_transformable_outputs(outputs: Vec<String>) -> Vec<String> {
outputs
.into_iter()
.filter(|output| is_missing_prefix(output))
.collect()
}

#[cfg(test)]
mod test {
use super::is_missing_prefix;

#[test]
fn test_is_missing_prefix() {
assert!(is_missing_prefix("dist"));
assert!(is_missing_prefix("!dist"));
assert!(!is_missing_prefix("{workspaceRoot}/dist"));
assert!(!is_missing_prefix("!{workspaceRoot}/dist"));
}
}
2 changes: 1 addition & 1 deletion packages/nx/src/native/glob.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
mod glob_group;
mod glob_parser;
mod glob_transform;
pub mod glob_transform;

use crate::native::glob::glob_transform::convert_glob;
use globset::{GlobBuilder, GlobSet, GlobSetBuilder};
Expand Down
65 changes: 62 additions & 3 deletions packages/nx/src/native/glob/glob_transform.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,16 @@ use crate::native::glob::glob_group::GlobGroup;
use crate::native::glob::glob_parser::parse_glob;
use itertools::Itertools;
use std::collections::HashSet;
use itertools::Either::{Left, Right};
use super::contains_glob_pattern;

#[derive(Debug)]
enum GlobType {
Negative(String),
Positive(String),
}

pub fn convert_glob(glob: &str) -> anyhow::Result<Vec<String>> {
let (negated, parsed) = parse_glob(glob)?;
fn convert_glob_segments(negated: bool, parsed: Vec<Vec<GlobGroup>>) -> Vec<String> {
let mut built_segments: Vec<Vec<GlobType>> = Vec::new();
for (index, glob_segment) in parsed.iter().enumerate() {
let is_last = index == parsed.len() - 1;
Expand Down Expand Up @@ -58,7 +59,12 @@ pub fn convert_glob(glob: &str) -> anyhow::Result<Vec<String>> {
.into_iter()
.collect::<Vec<_>>();
globs.sort();
Ok(globs)
globs
}

pub fn convert_glob(glob: &str) -> anyhow::Result<Vec<String>> {
let (negated, parsed) = parse_glob(glob)?;
Ok(convert_glob_segments(negated, parsed))
}

fn build_segment(
Expand Down Expand Up @@ -107,6 +113,31 @@ fn build_segment(
}
}

pub fn partition_glob(glob: &str) -> anyhow::Result<(String, Vec<String>)> {
let (negated, groups) = parse_glob(glob)?;
// Partition glob into leading directories and patterns that should be matched
let mut has_patterns = false;
let (leading_dir_segments, pattern_segments): (Vec<String>, _) = groups
.into_iter()
.filter(|group| !group.is_empty())
.partition_map(|group| {
match &group[0] {
GlobGroup::NonSpecial(value) if !contains_glob_pattern(&value) && !has_patterns => {
Left(value.to_string())
}
_ => {
has_patterns = true;
Right(group)
}
}
});

Ok((
leading_dir_segments.join("/"),
convert_glob_segments(negated, pattern_segments),
))
}

#[cfg(test)]
mod test {
use super::convert_glob;
Expand Down Expand Up @@ -233,4 +264,32 @@ mod test {
]
);
}

#[test]
fn should_partition_glob_with_leading_dirs() {
let (leading_dirs, globs) = super::partition_glob("dist/app/**/!(README|LICENSE).(js|ts)").unwrap();
assert_eq!(leading_dirs, "dist/app");
assert_eq!(globs, ["!**/{README,LICENSE}.{js,ts}", "**/*.{js,ts}",]);
}

#[test]
fn should_partition_glob_with_leading_dirs_and_simple_patterns() {
let (leading_dirs, globs) = super::partition_glob("dist/app/**/*.css").unwrap();
assert_eq!(leading_dirs, "dist/app");
assert_eq!(globs, ["**/*.css"]);
}

#[test]
fn should_partition_glob_with_leading_dirs_dirs_and_patterns() {
let (leading_dirs, globs) = super::partition_glob("dist/app/**/js/*.js").unwrap();
assert_eq!(leading_dirs, "dist/app");
assert_eq!(globs, ["**/js/*.js"]);
}

#[test]
fn should_partition_glob_with_leading_dirs_and_no_patterns() {
let (leading_dirs, globs) = super::partition_glob("dist/app/").unwrap();
assert_eq!(leading_dirs, "dist/app");
assert_eq!(globs, [] as [String; 0]);
}
}
4 changes: 4 additions & 0 deletions packages/nx/src/native/index.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,8 @@ export declare export function getBinaryTarget(): string
*/
export declare export function getFilesForOutputs(directory: string, entries: Array<string>): Array<string>

export declare export function getTransformableOutputs(outputs: Array<string>): Array<string>

export declare export function hashArray(input: Array<string>): string

export interface HashDetails {
Expand Down Expand Up @@ -263,6 +265,8 @@ export interface UpdatedWorkspaceFiles {
externalReferences: NxWorkspaceFilesExternals
}

export declare export function validateOutputs(outputs: Array<string>): void

export interface WatchEvent {
path: string
type: EventType
Expand Down
Loading