diff --git a/.gitignore b/.gitignore index 521d8f4fb421c2..9411522c4f6a2a 100644 --- a/.gitignore +++ b/.gitignore @@ -158,6 +158,7 @@ /git-show-branch /git-show-index /git-show-ref +/git-sparse-checkout /git-stage /git-stash /git-status diff --git a/Documentation/config/core.txt b/Documentation/config/core.txt index edf3a223308b63..6c322a57446120 100644 --- a/Documentation/config/core.txt +++ b/Documentation/config/core.txt @@ -660,8 +660,11 @@ core.gvfs:: -- core.sparseCheckout:: - Enable "sparse checkout" feature. See section "Sparse checkout" in - linkgit:git-read-tree[1] for more information. + Enable "sparse checkout" feature. If "false", then sparse-checkout + is disabled. If "true", then sparse-checkout is enabled with the full + .gitignore pattern set. If "cone", then sparse-checkout is enabled with + a restricted pattern set. See linkgit:git-sparse-checkout[1] for more + information. core.abbrev:: Set the length object names are abbreviated to. If diff --git a/Documentation/git-clone.txt b/Documentation/git-clone.txt index 34011c2940ad4b..0fe91d2f04766b 100644 --- a/Documentation/git-clone.txt +++ b/Documentation/git-clone.txt @@ -15,7 +15,7 @@ SYNOPSIS [--dissociate] [--separate-git-dir ] [--depth ] [--[no-]single-branch] [--no-tags] [--recurse-submodules[=]] [--[no-]shallow-submodules] - [--[no-]remote-submodules] [--jobs ] [--] + [--[no-]remote-submodules] [--jobs ] [--sparse] [--] [] DESCRIPTION @@ -156,6 +156,12 @@ objects from the source repository into a pack in the cloned repository. used, neither remote-tracking branches nor the related configuration variables are created. +--sparse:: + Initialize the sparse-checkout file so the working + directory starts with only the files in the root + of the repository. The sparse-checkout file can be + modified to grow the working directory as needed. + --mirror:: Set up a mirror of the source repository. This implies `--bare`. Compared to `--bare`, `--mirror` not only maps local branches of the diff --git a/Documentation/git-read-tree.txt b/Documentation/git-read-tree.txt index d2718426085613..da33f84f33d2c5 100644 --- a/Documentation/git-read-tree.txt +++ b/Documentation/git-read-tree.txt @@ -436,7 +436,7 @@ support. SEE ALSO -------- linkgit:git-write-tree[1]; linkgit:git-ls-files[1]; -linkgit:gitignore[5] +linkgit:gitignore[5]; linkgit:git-sparse-checkout[1]; GIT --- diff --git a/Documentation/git-sparse-checkout.txt b/Documentation/git-sparse-checkout.txt new file mode 100644 index 00000000000000..7ade82737079f3 --- /dev/null +++ b/Documentation/git-sparse-checkout.txt @@ -0,0 +1,146 @@ +git-sparse-checkout(1) +======================= + +NAME +---- +git-sparse-checkout - Initialize and modify the sparse-checkout +configuration, which reduces the checkout to a set of directories +given by a list of prefixes. + + +SYNOPSIS +-------- +[verse] +'git sparse-checkout [options]' + + +DESCRIPTION +----------- + +Initialize and modify the sparse-checkout configuration, which reduces +the checkout to a set of directories given by a list of prefixes. + + +COMMANDS +-------- +'list':: + Provide a list of the contents in the sparse-checkout file. + +'init':: + Enable the `core.sparseCheckout` setting. If the + sparse-checkout file does not exist, then populate it with + patterns that match every file in the root directory and + no other directories, then will remove all directories tracked + by Git. Add patterns to the sparse-checkout file to + repopulate the working directory. + +'add':: + Add a set of patterns to the sparse-checkout file, as given over + stdin. Updates the working directory to match the new patterns. + +'disable':: + Remove the sparse-checkout file, set `core.sparseCheckout` to + `false`, and restore the working directory to include all files. + +SPARSE CHECKOUT +---------------- + +"Sparse checkout" allows populating the working directory sparsely. +It uses the skip-worktree bit (see linkgit:git-update-index[1]) to tell +Git whether a file in the working directory is worth looking at. If +the skip-worktree bit is set, then the file is ignored in the working +directory. Git will not populate the contents of those files, which +makes a sparse checkout helpful when working in a repository with many +files, but only a few are important to the current user. + +The `$GIT_DIR/info/sparse-checkout` file is used to define the +skip-worktree reference bitmap. When Git updates the working +directory, it resets the skip-worktree bit in the index based on this +file. If an entry +matches a pattern in this file, skip-worktree will not be set on +that entry. Otherwise, skip-worktree will be set. + +Then it compares the new skip-worktree value with the previous one. If +skip-worktree turns from set to unset, it will add the corresponding +file back. If it turns from unset to set, that file will be removed. + +To repopulate the working directory with all files, use the +`git sparse-checkout disable` command. + +Sparse checkout support in 'git read-tree' and similar commands is +disabled by default. You need to set `core.sparseCheckout` to `true` +in order to have sparse checkout support. + +## FULL PATTERN SET + +By default, the sparse-checkout file uses the same syntax as `.gitignore` +files. + +While `$GIT_DIR/info/sparse-checkout` is usually used to specify what +files are in, you can also specify what files are _not_ in, using +negate patterns. For example, to remove the file `unwanted`: + +---------------- +/* +!unwanted +---------------- + +## CONE PATTERN SET + +The full pattern set allows for arbitrary pattern matches and complicated +inclusion/exclusion rules. These can result in O(N*M) pattern matches when +updating the index, where N is the number of patterns and M is the number +of paths in the index. To combat this performance issue, a more restricted +pattern set is allowed when `core.spareCheckout` is set to `cone`. + +The accepted patterns in the cone pattern set are: + +1. *Recursive:* All paths inside a directory are included. + +2. *Parent:* All files immediately inside a directory are included. + +In addition to the above two patterns, we also expect that all files in the +root directory are included. If a recursive pattern is added, then all +leading directories are added as parent patterns. + +By default, when running `git sparse-checkout init`, the root directory is +added as a parent pattern. At this point, the sparse-checkout file contains +the following patterns: + +``` +/* +!/*/* +``` + +This says "include everything in root, but nothing two levels below root." +If we then add the folder `A/B/C` as a recursive pattern, the folders `A` and +`A/B` are added as parent patterns. The resulting sparse-checkout file is +now + +``` +/* +!/*/* +/A/* +!/A/*/* +/A/B/* +!/A/B/*/* +/A/B/C/* +``` + +Here, order matters, so the negative patterns are overridden by the positive +patterns that appear lower in the file. + +If `core.sparseCheckout=cone`, then Git will parse the sparse-checkout file +expecting patterns of these types. Git will warn if the patterns do not match. +If the patterns do match the expected format, then Git will use faster hash- +based algorithms to compute inclusion in the sparse-checkout. + +SEE ALSO +-------- + +linkgit:git-read-tree[1] +linkgit:gitignore[5] + +GIT +--- +Part of the linkgit:git[1] suite diff --git a/Makefile b/Makefile index 7bc28b0f145332..bbf7fa076e7c24 100644 --- a/Makefile +++ b/Makefile @@ -1136,6 +1136,7 @@ BUILTIN_OBJS += builtin/shortlog.o BUILTIN_OBJS += builtin/show-branch.o BUILTIN_OBJS += builtin/show-index.o BUILTIN_OBJS += builtin/show-ref.o +BUILTIN_OBJS += builtin/sparse-checkout.o BUILTIN_OBJS += builtin/stash.o BUILTIN_OBJS += builtin/stripspace.o BUILTIN_OBJS += builtin/submodule--helper.o diff --git a/builtin.h b/builtin.h index 5cf5df69f72fd5..2b25a80cde37b4 100644 --- a/builtin.h +++ b/builtin.h @@ -225,6 +225,7 @@ int cmd_shortlog(int argc, const char **argv, const char *prefix); int cmd_show(int argc, const char **argv, const char *prefix); int cmd_show_branch(int argc, const char **argv, const char *prefix); int cmd_show_index(int argc, const char **argv, const char *prefix); +int cmd_sparse_checkout(int argc, const char **argv, const char *prefix); int cmd_status(int argc, const char **argv, const char *prefix); int cmd_stash(int argc, const char **argv, const char *prefix); int cmd_stripspace(int argc, const char **argv, const char *prefix); diff --git a/builtin/clone.c b/builtin/clone.c index 7783e58b3c8abf..b074de5be14134 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -60,6 +60,7 @@ static const char *real_git_dir; static char *option_upload_pack = "git-upload-pack"; static int option_verbosity; static int option_progress = -1; +static int option_sparse_checkout; static enum transport_family family; static struct string_list option_config = STRING_LIST_INIT_NODUP; static struct string_list option_required_reference = STRING_LIST_INIT_NODUP; @@ -147,6 +148,8 @@ static struct option builtin_clone_options[] = { OPT_PARSE_LIST_OBJECTS_FILTER(&filter_options), OPT_BOOL(0, "remote-submodules", &option_remote_submodules, N_("any cloned submodules will use their remote-tracking branch")), + OPT_BOOL(0, "sparse", &option_sparse_checkout, + N_("initialize sparse-checkout file to include only files at root")), OPT_END() }; @@ -734,6 +737,27 @@ static void update_head(const struct ref *our, const struct ref *remote, } } +static int git_sparse_checkout_init(const char *repo) +{ + struct argv_array argv = ARGV_ARRAY_INIT; + int result = 0; + argv_array_pushl(&argv, "-C", repo, "sparse-checkout", "init", NULL); + + /* + * We must apply the setting in the current process + * for the later checkout to use the sparse-checkout file. + */ + core_sparse_checkout = SPARSE_CHECKOUT_FULL; + + if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) { + error(_("failed to initialize sparse-checkout")); + result = 1; + } + + argv_array_clear(&argv); + return result; +} + static int checkout(int submodule_progress) { struct object_id oid; @@ -1108,6 +1132,9 @@ int cmd_clone(int argc, const char **argv, const char *prefix) if (option_required_reference.nr || option_optional_reference.nr) setup_reference(); + if (option_sparse_checkout && git_sparse_checkout_init(repo)) + return 1; + remote = remote_get(option_origin); strbuf_addf(&default_refspec, "+%s*:%s*", src_ref_prefix, diff --git a/builtin/reset.c b/builtin/reset.c index 71463ef63845f9..e4af64d3e85adf 100644 --- a/builtin/reset.c +++ b/builtin/reset.c @@ -151,7 +151,7 @@ static void update_index_from_diff(struct diff_queue_struct *q, * directory so that they will have the right content and the next * status call will show modified or untracked files correctly. */ - if (core_apply_sparse_checkout && !file_exists(two->path)) + if (core_sparse_checkout && !file_exists(two->path)) { pos = cache_name_pos(two->path, strlen(two->path)); if ((pos >= 0 && ce_skip_worktree(active_cache[pos])) && (is_missing || !was_missing)) diff --git a/builtin/sparse-checkout.c b/builtin/sparse-checkout.c new file mode 100644 index 00000000000000..0a4e101ddd2ab1 --- /dev/null +++ b/builtin/sparse-checkout.c @@ -0,0 +1,389 @@ +#include "builtin.h" +#include "config.h" +#include "dir.h" +#include "parse-options.h" +#include "pathspec.h" +#include "repository.h" +#include "run-command.h" +#include "strbuf.h" +#include "string-list.h" + +static char const * const builtin_sparse_checkout_usage[] = { + N_("git sparse-checkout [init|add|list|disable]"), + NULL +}; + +static const char * const builtin_sparse_checkout_init_usage[] = { + N_("git sparse-checkout init [--cone]"), + NULL +}; + +struct opts_sparse_checkout { + const char *subcommand; + int read_stdin; + int cone; +} opts; + +static char *get_sparse_checkout_filename(void) +{ + return git_pathdup("info/sparse-checkout"); +} + +static void write_excludes_to_file(FILE *fp, struct exclude_list *el) +{ + int i; + + for (i = 0; i < el->nr; i++) { + struct exclude *x = el->excludes[i]; + + if (x->flags & EXC_FLAG_NEGATIVE) + fprintf(fp, "!"); + + fprintf(fp, "%s", x->pattern); + + if (x->flags & EXC_FLAG_MUSTBEDIR) + fprintf(fp, "/"); + + fprintf(fp, "\n"); + } +} + +static void write_cone_to_file(FILE *fp, struct exclude_list *el) +{ + int i; + struct exclude_entry *entry; + struct hashmap_iter iter; + struct string_list sl = STRING_LIST_INIT_DUP; + + hashmap_iter_init(&el->parent_hashmap, &iter); + while ((entry = hashmap_iter_next(&iter))) { + char *pattern = xstrdup(entry->pattern); + char *converted = pattern; + if (pattern[0] == '/') + converted++; + if (pattern[entry->patternlen - 1] == '/') + pattern[entry->patternlen - 1] = 0; + string_list_insert(&sl, converted); + free(pattern); + } + + string_list_sort(&sl); + string_list_remove_duplicates(&sl, 0); + + for (i = 0; i < sl.nr; i++) { + char *pattern = sl.items[i].string; + + if (!strcmp(pattern, "")) + fprintf(fp, "/*\n!/*/*\n"); + else + fprintf(fp, "/%s/*\n!/%s/*/*\n", pattern, pattern); + } + + string_list_clear(&sl, 0); + + hashmap_iter_init(&el->recursive_hashmap, &iter); + while ((entry = hashmap_iter_next(&iter))) { + char *pattern = xstrdup(entry->pattern); + char *converted = pattern; + if (pattern[0] == '/') + converted++; + if (pattern[entry->patternlen - 1] == '/') + pattern[entry->patternlen - 1] = 0; + string_list_insert(&sl, converted); + free(pattern); + } + + string_list_sort(&sl); + string_list_remove_duplicates(&sl, 0); + + for (i = 0; i < sl.nr; i++) { + char *pattern = sl.items[i].string; + fprintf(fp, "/%s/*\n", pattern); + } +} + +static int sparse_checkout_list(int argc, const char **argv) +{ + struct exclude_list el; + char *sparse_filename; + int res; + + memset(&el, 0, sizeof(el)); + + sparse_filename = get_sparse_checkout_filename(); + res = add_excludes_from_file_to_list(sparse_filename, "", 0, &el, NULL); + free(sparse_filename); + + if (res < 0) { + warning(_("failed to parse sparse-checkout file; it may not exist")); + return 0; + } + + write_excludes_to_file(stdout, &el); + clear_exclude_list(&el); + + return 0; +} + +static int sc_read_tree(void) +{ + struct argv_array argv = ARGV_ARRAY_INIT; + int result = 0; + argv_array_pushl(&argv, "read-tree", "-m", "-u", "HEAD", NULL); + + if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) { + error(_("failed to update index with new sparse-checkout paths")); + result = 1; + } + + argv_array_clear(&argv); + return result; +} + +static int sc_set_config(enum sparse_checkout_mode mode) +{ + struct argv_array argv = ARGV_ARRAY_INIT; + int result = 0; + argv_array_pushl(&argv, "config", "--add", "core.sparseCheckout", NULL); + + switch (mode) { + case SPARSE_CHECKOUT_FULL: + argv_array_pushl(&argv, "true", NULL); + break; + + case SPARSE_CHECKOUT_CONE: + argv_array_pushl(&argv, "cone", NULL); + break; + + case SPARSE_CHECKOUT_NONE: + argv_array_pushl(&argv, "false", NULL); + break; + + default: + die(_("invalid config mode")); + } + + if (run_command_v_opt(argv.argv, RUN_GIT_CMD)) { + error(_("failed to enable core.sparseCheckout")); + result = 1; + } + + argv_array_clear(&argv); + return result; +} + +static int delete_directory(const struct object_id *oid, struct strbuf *base, + const char *pathname, unsigned mode, int stage, void *context) +{ + struct strbuf dirname = STRBUF_INIT; + struct stat sb; + + strbuf_addstr(&dirname, the_repository->worktree); + strbuf_addch(&dirname, '/'); + strbuf_addstr(&dirname, pathname); + + if (stat(dirname.buf, &sb) || !(sb.st_mode & S_IFDIR)) + return 0; + + if (remove_dir_recursively(&dirname, 0)) + warning(_("failed to remove directory '%s'"), + dirname.buf); + + strbuf_release(&dirname); + return 0; +} + +static int sparse_checkout_init(int argc, const char **argv) +{ + struct tree *t; + struct object_id oid; + struct exclude_list el; + static struct pathspec pathspec; + char *sparse_filename; + FILE *fp; + int res; + enum sparse_checkout_mode mode; + + static struct option builtin_sparse_checkout_init_options[] = { + OPT_BOOL(0, "cone", &opts.cone, + N_("initialize the sparse-checkout in cone mode")), + OPT_END(), + }; + + argc = parse_options(argc, argv, NULL, + builtin_sparse_checkout_init_options, + builtin_sparse_checkout_init_usage, 0); + + mode = opts.cone ? SPARSE_CHECKOUT_CONE : SPARSE_CHECKOUT_FULL; + + if (sc_set_config(mode)) + return 1; + + memset(&el, 0, sizeof(el)); + + sparse_filename = get_sparse_checkout_filename(); + res = add_excludes_from_file_to_list(sparse_filename, "", 0, &el, NULL); + + /* If we already have a sparse-checkout file, use it. */ + if (res >= 0) { + free(sparse_filename); + goto reset_dir; + } + + /* initial mode: all blobs at root */ + fp = fopen(sparse_filename, "w"); + free(sparse_filename); + fprintf(fp, "/*\n!/*/*\n"); + fclose(fp); + + /* remove all directories in the root, if tracked by Git */ + if (get_oid("HEAD", &oid)) { + /* assume we are in a fresh repo */ + return 0; + } + + t = parse_tree_indirect(&oid); + + parse_pathspec(&pathspec, PATHSPEC_ALL_MAGIC & + ~(PATHSPEC_FROMTOP | PATHSPEC_LITERAL), + PATHSPEC_PREFER_CWD, + "", NULL); + + if (read_tree_recursive(the_repository, t, "", 0, 0, &pathspec, + delete_directory, NULL)) + return 1; + +reset_dir: + return sc_read_tree(); +} + +static void insert_recursive_pattern(struct exclude_list *el, struct strbuf *path) +{ + struct exclude_entry *e = xmalloc(sizeof(struct exclude_entry)); + e->patternlen = path->len; + e->pattern = strbuf_detach(path, NULL); + hashmap_entry_init(e, memhash(e->pattern, e->patternlen)); + + hashmap_add(&el->recursive_hashmap, e); + + while (e->patternlen) { + char *slash = strrchr(e->pattern, '/'); + char *oldpattern = e->pattern; + size_t newlen; + + if (!slash) + break; + + newlen = slash - e->pattern; + e = xmalloc(sizeof(struct exclude_entry)); + e->patternlen = newlen; + e->pattern = xstrndup(oldpattern, newlen); + hashmap_entry_init(e, memhash(e->pattern, e->patternlen)); + + if (!hashmap_get(&el->parent_hashmap, e, NULL)) + hashmap_add(&el->parent_hashmap, e); + } +} + +static int sparse_checkout_add(int argc, const char **argv) +{ + struct exclude_list el; + char *sparse_filename; + FILE *fp; + struct strbuf line = STRBUF_INIT; + + memset(&el, 0, sizeof(el)); + + sparse_filename = get_sparse_checkout_filename(); + add_excludes_from_file_to_list(sparse_filename, "", 0, &el, NULL); + + fp = fopen(sparse_filename, "w"); + + if (core_sparse_checkout == SPARSE_CHECKOUT_FULL) { + write_excludes_to_file(fp, &el); + + while (!strbuf_getline(&line, stdin)) { + strbuf_trim(&line); + fprintf(fp, "%s\n", line.buf); + } + } else if (core_sparse_checkout == SPARSE_CHECKOUT_CONE) { + while (!strbuf_getline(&line, stdin)) { + strbuf_trim(&line); + + strbuf_trim_trailing_dir_sep(&line); + + if (!line.len) + continue; + + if (line.buf[0] == '/') + strbuf_remove(&line, 0, 1); + + if (!line.len) + continue; + + insert_recursive_pattern(&el, &line); + } + + write_cone_to_file(fp, &el); + } + + fclose(fp); + free(sparse_filename); + + clear_exclude_list(&el); + + return sc_read_tree(); +} + +static int sparse_checkout_disable(int argc, const char **argv) +{ + char *sparse_filename; + FILE *fp; + + if (sc_set_config(SPARSE_CHECKOUT_FULL)) + die(_("failed to change config")); + + sparse_filename = get_sparse_checkout_filename(); + fp = fopen(sparse_filename, "w"); + fprintf(fp, "/*\n"); + fclose(fp); + + if (sc_read_tree()) + die(_("error while refreshing working directory")); + + unlink(sparse_filename); + free(sparse_filename); + + return sc_set_config(SPARSE_CHECKOUT_NONE); +} + +int cmd_sparse_checkout(int argc, const char **argv, const char *prefix) +{ + static struct option builtin_sparse_checkout_options[] = { + OPT_END(), + }; + + if (argc == 2 && !strcmp(argv[1], "-h")) + usage_with_options(builtin_sparse_checkout_usage, + builtin_sparse_checkout_options); + + git_config(git_default_config, NULL); + argc = parse_options(argc, argv, prefix, + builtin_sparse_checkout_options, + builtin_sparse_checkout_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + + if (argc > 0) { + if (!strcmp(argv[0], "list")) + return sparse_checkout_list(argc, argv); + if (!strcmp(argv[0], "init")) + return sparse_checkout_init(argc, argv); + if (!strcmp(argv[0], "add")) + return sparse_checkout_add(argc, argv); + if (!strcmp(argv[0], "disable")) + return sparse_checkout_disable(argc, argv); + } + + usage_with_options(builtin_sparse_checkout_usage, + builtin_sparse_checkout_options); +} diff --git a/cache.h b/cache.h index b2d900a9a51a32..4ab70eeba9baf0 100644 --- a/cache.h +++ b/cache.h @@ -865,7 +865,6 @@ extern char *git_replace_ref_base; extern int fsync_object_files; extern int core_preload_index; -extern int core_apply_sparse_checkout; extern const char *core_virtualfilesystem; extern int core_gvfs; extern int precomposed_unicode; @@ -873,6 +872,13 @@ extern int protect_hfs; extern int protect_ntfs; extern const char *core_fsmonitor; +enum sparse_checkout_mode { + SPARSE_CHECKOUT_NONE = 0, + SPARSE_CHECKOUT_FULL = 1, + SPARSE_CHECKOUT_CONE = 2, +}; +enum sparse_checkout_mode core_sparse_checkout; + /* * Include broken refs in all ref iterations, which will * generally choke dangerous operations rather than letting diff --git a/config.c b/config.c index 897212f175d839..e12c5472d82c11 100644 --- a/config.c +++ b/config.c @@ -1366,11 +1366,22 @@ static int git_default_core_config(const char *var, const char *value, void *cb) } if (!strcmp(var, "core.sparsecheckout")) { + int result = git_parse_maybe_bool(value); + /* virtual file system relies on the sparse checkout logic so force it on */ - if (core_virtualfilesystem) - core_apply_sparse_checkout = 1; - else - core_apply_sparse_checkout = git_config_bool(var, value); + if (core_virtualfilesystem) { + core_sparse_checkout = SPARSE_CHECKOUT_FULL; + return 0; + } + + if (result < 0) { + core_sparse_checkout = SPARSE_CHECKOUT_NONE; + + if (!strcasecmp(value, "cone")) + core_sparse_checkout = SPARSE_CHECKOUT_CONE; + } else + core_sparse_checkout = result; + return 0; } @@ -2393,7 +2404,7 @@ int git_config_get_virtualfilesystem(void) free(default_index_file); if (should_run_hook) { /* virtual file system relies on the sparse checkout logic so force it on */ - core_apply_sparse_checkout = 1; + core_sparse_checkout = SPARSE_CHECKOUT_FULL; return 1; } core_virtualfilesystem = NULL; diff --git a/dir.c b/dir.c index 97f31e7ab2e858..d4267e234b69a5 100644 --- a/dir.c +++ b/dir.c @@ -600,6 +600,99 @@ void parse_exclude_pattern(const char **pattern, *patternlen = len; } +static int el_hashmap_cmp(const void *unused_cmp_data, + const void *a, const void *b, const void *key) +{ + const struct exclude_entry *ee1 = a; + const struct exclude_entry *ee2 = b; + + return strncmp(ee1->pattern, ee2->pattern, ee1->patternlen); +} + +static void add_exclude_to_hashsets(struct exclude_list *el, struct exclude *x) +{ + struct exclude_entry *e; + char *truncated; + char *data = NULL; + + if (!el->use_cone_patterns) + return; + + if (x->patternlen >= 4 && + !strcmp(x->pattern + x->patternlen - 4, "/*/*")) { + if (!(x->flags & EXC_FLAG_NEGATIVE)) { + /* Not a cone pattern. */ + el->use_cone_patterns = 0; + warning(_("unrecognized pattern: '%s'"), x->pattern); + goto clear_hashmaps; + } + + truncated = xstrdup(x->pattern); + truncated[x->patternlen - 4] = 0; + + e = xmalloc(sizeof(struct exclude_entry)); + e->pattern = truncated; + e->patternlen = x->patternlen - 4; + hashmap_entry_init(e, memhash(e->pattern, e->patternlen)); + + if (!hashmap_get(&el->recursive_hashmap, e, NULL)) { + /* We did not see the "parent" included */ + warning(_("unrecognized negative pattern: '%s'"), x->pattern); + free(truncated); + goto clear_hashmaps; + } + + hashmap_add(&el->parent_hashmap, e); + hashmap_remove(&el->recursive_hashmap, e, &data); + free(data); + return; + } + + if (x->patternlen >= 2 && + !strcmp(x->pattern + x->patternlen - 2, "/*")) { + if (x->flags & EXC_FLAG_NEGATIVE) { + warning(_("unrecognized negative pattern: '%s'"), x->pattern); + goto clear_hashmaps; + } + + e = xmalloc(sizeof(struct exclude_entry)); + + truncated = xstrdup(x->pattern); + truncated[x->patternlen - 2] = 0; + e->pattern = truncated; + e->patternlen = x->patternlen - 2; + hashmap_entry_init(e, memhash(e->pattern, e->patternlen)); + + hashmap_add(&el->recursive_hashmap, e); + + if (hashmap_get(&el->parent_hashmap, e, NULL)) { + /* we already included this at the parent level */ + warning(_("your sparse-checkout file may have issues: pattern '%s' is repeated"), + x->pattern); + hashmap_remove(&el->parent_hashmap, e, &data); + free(data); + } + return; + } + +clear_hashmaps: + hashmap_free(&el->parent_hashmap, 1); + hashmap_free(&el->recursive_hashmap, 1); + el->use_cone_patterns = 0; +} + +static int hashmap_contains_path(struct hashmap *map, + struct strbuf *pattern) +{ + struct exclude_entry e; + + /* Check straight mapping */ + e.pattern = pattern->buf; + e.patternlen = pattern->len; + hashmap_entry_init(&e, memhash(e.pattern, e.patternlen)); + return !!hashmap_get(map, &e, NULL); +} + void add_exclude(const char *string, const char *base, int baselen, struct exclude_list *el, int srcpos) { @@ -624,6 +717,8 @@ void add_exclude(const char *string, const char *base, ALLOC_GROW(el->excludes, el->nr + 1, el->alloc); el->excludes[el->nr++] = x; x->el = el; + + add_exclude_to_hashsets(el, x); } static int read_skip_worktree_file_from_index(const struct index_state *istate, @@ -892,6 +987,10 @@ static int add_excludes_from_buffer(char *buf, size_t size, int i, lineno = 1; char *entry; + el->use_cone_patterns = core_sparse_checkout == SPARSE_CHECKOUT_CONE ? 1 : 0; + hashmap_init(&el->recursive_hashmap, el_hashmap_cmp, NULL, 0); + hashmap_init(&el->parent_hashmap, el_hashmap_cmp, NULL, 0); + el->filebuf = buf; if (skip_utf8_bom(&buf, size)) @@ -1114,13 +1213,16 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname, /* * Scan the list and let the last match determine the fate. - * Return 1 for exclude, 0 for include and -1 for undecided. + * Return 0 for exclude, 1 for include and -1 for undecided. */ int is_excluded_from_list(const char *pathname, int pathlen, const char *basename, int *dtype, struct exclude_list *el, struct index_state *istate) { struct exclude *exclude; + struct strbuf parent_pathname = STRBUF_INIT; + int result = 0; + const char *slash_pos; /* * The virtual file system data is used to prevent git from traversing @@ -1133,11 +1235,50 @@ int is_excluded_from_list(const char *pathname, if (is_excluded_from_virtualfilesystem(pathname, pathlen, *dtype) > 0) return 1; - exclude = last_exclude_matching_from_list(pathname, pathlen, basename, - dtype, el, istate); - if (exclude) - return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; - return -1; /* undecided */ + if (!el->use_cone_patterns) { + exclude = last_exclude_matching_from_list(pathname, pathlen, basename, + dtype, el, istate); + + if (exclude) + return exclude->flags & EXC_FLAG_NEGATIVE ? 0 : 1; + + return -1; /* undecided */ + } + + strbuf_addch(&parent_pathname, '/'); + strbuf_add(&parent_pathname, pathname, pathlen); + slash_pos = strrchr(parent_pathname.buf, '/'); + + if (slash_pos == parent_pathname.buf) { + /* include every file in root */ + result = 1; + goto done; + } + + strbuf_setlen(&parent_pathname, slash_pos - parent_pathname.buf); + + if (hashmap_contains_path(&el->parent_hashmap, &parent_pathname)) { + result = 1; + goto done; + } + + while (parent_pathname.len) { + if (hashmap_contains_path(&el->recursive_hashmap, + &parent_pathname)) { + result = -1; + goto done; + } + + slash_pos = strrchr(parent_pathname.buf, '/'); + if (slash_pos == parent_pathname.buf) + break; + + strbuf_setlen(&parent_pathname, slash_pos - parent_pathname.buf); + } + +done: + strbuf_release(&parent_pathname); + return result; } static struct exclude *last_exclude_matching_from_lists(struct dir_struct *dir, diff --git a/dir.h b/dir.h index 680079bbe3241f..2d3356d1c0a131 100644 --- a/dir.h +++ b/dir.h @@ -4,6 +4,7 @@ /* See Documentation/technical/api-directory-listing.txt */ #include "cache.h" +#include "hashmap.h" #include "strbuf.h" struct dir_entry { @@ -37,6 +38,13 @@ struct exclude { int srcpos; }; +/* used for hashmaps for cone patterns */ +struct exclude_entry { + struct hashmap_entry ent; + char *pattern; + size_t patternlen; +}; + /* * Each excludes file will be parsed into a fresh exclude_list which * is appended to the relevant exclude_list_group (either EXC_DIRS or @@ -55,6 +63,25 @@ struct exclude_list { const char *src; struct exclude **excludes; + + /* + * While scanning the excludes, we attempt to match the patterns + * with a more restricted set that allows us to use hashsets for + * matching logic, which is faster than the linear lookup in the + * excludes array above. If non-zero, that check succeeded. + */ + unsigned use_cone_patterns; + + /* + * Stores paths where everything starting with those paths + * is included. + */ + struct hashmap recursive_hashmap; + + /* + * Used to check single-level parents of blobs. + */ + struct hashmap parent_hashmap; }; /* diff --git a/environment.c b/environment.c index 23ca0ffdcf5e8c..a9086559a56c32 100644 --- a/environment.c +++ b/environment.c @@ -68,7 +68,7 @@ enum push_default_type push_default = PUSH_DEFAULT_UNSPECIFIED; enum object_creation_mode object_creation_mode = OBJECT_CREATION_MODE; char *notes_ref_name; int grafts_replace_parents = 1; -int core_apply_sparse_checkout; +enum sparse_checkout_mode core_sparse_checkout; int core_gvfs; const char *core_virtualfilesystem; int merge_log_config = -1; diff --git a/git.c b/git.c index 098a5e6a7c846d..f26a79e84aff08 100644 --- a/git.c +++ b/git.c @@ -642,6 +642,7 @@ static struct cmd_struct commands[] = { { "show-branch", cmd_show_branch, RUN_SETUP }, { "show-index", cmd_show_index }, { "show-ref", cmd_show_ref, RUN_SETUP }, + { "sparse-checkout", cmd_sparse_checkout, RUN_SETUP | NEED_WORK_TREE }, { "stage", cmd_add, RUN_SETUP | NEED_WORK_TREE }, /* * NEEDSWORK: Until the builtin stash is thoroughly robust and no diff --git a/t/t1091-sparse-checkout-builtin.sh b/t/t1091-sparse-checkout-builtin.sh new file mode 100755 index 00000000000000..3412bafdff5521 --- /dev/null +++ b/t/t1091-sparse-checkout-builtin.sh @@ -0,0 +1,195 @@ +#!/bin/sh + +test_description='sparse checkout builtin tests' + +. ./test-lib.sh + +test_expect_success 'setup' ' + git init repo && + ( + cd repo && + echo "initial" >a && + mkdir folder1 folder2 deep && + mkdir deep/deeper1 deep/deeper2 && + mkdir deep/deeper1/deepest && + cp a folder1 && + cp a folder2 && + cp a deep && + cp a deep/deeper1 && + cp a deep/deeper2 && + cp a deep/deeper1/deepest && + git add . && + git commit -m "initial commit" + ) +' + +test_expect_success 'git sparse-checkout list (empty)' ' + git -C repo sparse-checkout list >list 2>err && + test_line_count = 0 list && + test_i18ngrep "failed to parse sparse-checkout file; it may not exist" err +' + +test_expect_success 'git sparse-checkout list (populated)' ' + test_when_finished rm -f repo/.git/info/sparse-checkout && + cat >repo/.git/info/sparse-checkout <<-EOF && + /folder1/* + /deep/ + **/a + !*bin* + EOF + git -C repo sparse-checkout list >list && + cat >expect <<-EOF && + /folder1/* + /deep/ + **/a + !*bin* + EOF + test_cmp expect list +' + +test_expect_success 'git sparse-checkout init' ' + git -C repo sparse-checkout init && + cat >expect <<-EOF && + /* + !/*/* + EOF + test_cmp expect repo/.git/info/sparse-checkout && + git -C repo config --list >config && + test_i18ngrep "core.sparsecheckout=true" config && + ls repo >dir && + echo a >expect && + test_cmp expect dir +' + +test_expect_success 'git sparse-checkout list after init' ' + git -C repo sparse-checkout list >actual && + cat >expect <<-EOF && + /* + !/*/* + EOF + test_cmp expect actual +' + +test_expect_success 'init with existing sparse-checkout' ' + echo "/folder1/*" >> repo/.git/info/sparse-checkout && + git -C repo sparse-checkout init && + cat >expect <<-EOF && + /* + !/*/* + /folder1/* + EOF + test_cmp expect repo/.git/info/sparse-checkout && + ls repo >dir && + cat >expect <<-EOF && + a + folder1 + EOF + test_cmp expect dir +' + +test_expect_success 'clone --sparse' ' + git clone --sparse repo clone && + git -C clone sparse-checkout list >actual && + cat >expect <<-EOF && + /* + !/*/* + EOF + test_cmp expect actual && + ls clone >dir && + echo a >expect && + test_cmp expect dir +' + +test_expect_success 'add to existing sparse-checkout' ' + echo "/folder2/*" | git -C repo sparse-checkout add && + cat >expect <<-EOF && + /* + !/*/* + /folder1/* + /folder2/* + EOF + git -C repo sparse-checkout list >actual && + test_cmp expect actual && + test_cmp expect repo/.git/info/sparse-checkout && + ls repo >dir && + cat >expect <<-EOF && + a + folder1 + folder2 + EOF + test_cmp expect dir +' + +test_expect_success 'cone mode: match patterns' ' + git -C repo config --replace-all core.sparseCheckout cone && + rm -rf repo/a repo/folder1 repo/folder2 && + git -C repo read-tree -mu HEAD && + git -C repo reset --hard && + ls repo >dir && + cat >expect <<-EOF && + a + folder1 + folder2 + EOF + test_cmp expect dir +' + +test_expect_success 'cone mode: warn on bad pattern' ' + test_when_finished mv sparse-checkout repo/.git/info && + cp repo/.git/info/sparse-checkout . && + echo "!/deep/deeper/*" >>repo/.git/info/sparse-checkout && + git -C repo read-tree -mu HEAD 2>err && + test_i18ngrep "unrecognized negative pattern" err +' + +test_expect_success 'sparse-checkout disable' ' + git -C repo sparse-checkout disable && + test_path_is_missing repo/.git/info/sparse-checkout && + git -C repo config --list >config && + test_i18ngrep "core.sparsecheckout=false" config && + ls repo >dir && + cat >expect <<-EOF && + a + deep + folder1 + folder2 + EOF + test_cmp expect dir +' + +test_expect_success 'cone mode: init and add' ' + git -C repo sparse-checkout init --cone && + git -C repo config --list >config && + test_i18ngrep "core.sparsecheckout=cone" config && + ls repo >dir && + echo a >expect && + test_cmp expect dir && + echo deep/deeper1/deepest | git -C repo sparse-checkout add && + ls repo >dir && + cat >expect <<-EOF && + a + deep + EOF + ls repo/deep >dir && + cat >expect <<-EOF && + a + deeper1 + EOF + ls repo/deep/deeper1 >dir && + cat >expect <<-EOF && + a + deepest + EOF + test_cmp expect dir && + cat >expect <<-EOF && + /* + !/*/* + /deep/* + !/deep/*/* + /deep/deeper1/* + !/deep/deeper1/*/* + /deep/deeper1/deepest/* + EOF + test_cmp expect repo/.git/info/sparse-checkout +' +test_done \ No newline at end of file diff --git a/unpack-trees.c b/unpack-trees.c index fbfef582b95cf5..25de65dc4e21d4 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -1434,6 +1434,7 @@ static int clear_ce_flags(struct index_state *istate, xsnprintf(label, sizeof(label), "clear_ce_flags/0x%08lx_0x%08lx", (unsigned long)select_mask, (unsigned long)clear_mask); trace2_region_enter("unpack_trees", label, the_repository); + rval = clear_ce_flags_1(istate, istate->cache, istate->cache_nr, @@ -1503,7 +1504,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options trace_performance_enter(); memset(&el, 0, sizeof(el)); - if (!core_apply_sparse_checkout || !o->update) + if (!core_sparse_checkout || !o->update) o->skip_sparse_checkout = 1; if (!o->skip_sparse_checkout) { if (core_virtualfilesystem) {