Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce an fsck API #4

Closed
wants to merge 19 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions Documentation/config.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1250,6 +1250,25 @@ filter.<driver>.smudge::
object to a worktree file upon checkout. See
linkgit:gitattributes[5] for details.

fsck.<msg-id>::
Allows overriding the message type (error, warn or ignore) of a
specific message ID such as `missingEmail`.
+
For convenience, fsck prefixes the error/warning with the message ID,
e.g. "missingEmail: invalid author/committer line - missing email" means
that setting `fsck.missingEmail = ignore` will hide that issue.
+
This feature is intended to support working with legacy repositories
which cannot be repaired without disruptive changes.

fsck.skipList::
The path to a sorted list of object names (i.e. one SHA-1 per
line) that are known to be broken in a non-fatal way and should
be ignored. This feature is useful when an established project
should be accepted despite early commits containing errors that
can be safely ignored such as invalid committer email addresses.
Note: corrupt objects cannot be skipped with this setting.

gc.aggressiveDepth::
The depth parameter used in the delta compression
algorithm used by 'git gc --aggressive'. This defaults
Expand Down Expand Up @@ -2205,6 +2224,28 @@ receive.fsckObjects::
Defaults to false. If not set, the value of `transfer.fsckObjects`
is used instead.

receive.fsck.<msg-id>::
When `receive.fsckObjects` is set to true, errors can be switched
to warnings and vice versa by configuring the `receive.fsck.<msg-id>`
setting where the `<msg-id>` is the fsck message ID and the value
is one of `error`, `warn` or `ignore`. For convenience, fsck prefixes
the error/warning with the message ID, e.g. "missingEmail: invalid
author/committer line - missing email" means that setting
`receive.fsck.missingEmail = ignore` will hide that issue.
+
This feature is intended to support working with legacy repositories
which would not pass pushing when `receive.fsckObjects = true`, allowing
the host to accept repositories with certain known issues but still catch
other issues.

receive.fsck.skipList::
The path to a sorted list of object names (i.e. one SHA-1 per
line) that are known to be broken in a non-fatal way and should
be ignored. This feature is useful when an established project
should be accepted despite early commits containing errors that
can be safely ignored such as invalid committer email addresses.
Note: corrupt objects cannot be skipped with this setting.

receive.unpackLimit::
If the number of objects received in a push is below this
limit then the objects will be unpacked into loose object
Expand Down
7 changes: 6 additions & 1 deletion Documentation/git-fsck.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ SYNOPSIS
[verse]
'git fsck' [--tags] [--root] [--unreachable] [--cache] [--no-reflogs]
[--[no-]full] [--strict] [--verbose] [--lost-found]
[--[no-]dangling] [--[no-]progress] [<object>*]
[--[no-]dangling] [--[no-]progress] [--connectivity-only] [<object>*]

DESCRIPTION
-----------
Expand Down Expand Up @@ -60,6 +60,11 @@ index file, all SHA-1 references in `refs` namespace, and all reflogs
object pools. This is now default; you can turn it off
with --no-full.

--connectivity-only::
Check only the connectivity of tags, commits and tree objects. By
avoiding to unpack blobs, this speeds up the operation, at the
expense of missing corrupt objects or other problematic issues.

--strict::
Enable more strict checking, namely to catch a file mode
recorded with g+w bit set, which was created by older
Expand Down
78 changes: 53 additions & 25 deletions builtin/fsck.c
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,11 @@ static int show_tags;
static int show_unreachable;
static int include_reflogs = 1;
static int check_full = 1;
static int connectivity_only;
static int check_strict;
static int keep_cache_objects;
static struct fsck_options fsck_walk_options = FSCK_OPTIONS_DEFAULT;
static struct fsck_options fsck_obj_options = FSCK_OPTIONS_DEFAULT;
static struct object_id head_oid;
static const char *head_points_at;
static int errors_found;
Expand All @@ -44,39 +47,52 @@ static int show_dangling = 1;
#define DIRENT_SORT_HINT(de) ((de)->d_ino)
#endif

static void objreport(struct object *obj, const char *severity,
const char *err, va_list params)
static int fsck_config(const char *var, const char *value, void *cb)
{
fprintf(stderr, "%s in %s %s: ",
severity, typename(obj->type), sha1_to_hex(obj->sha1));
vfprintf(stderr, err, params);
fputs("\n", stderr);
if (strcmp(var, "fsck.skiplist") == 0) {
const char *path;
struct strbuf sb = STRBUF_INIT;

if (git_config_pathname(&path, var, value))
return 1;
strbuf_addf(&sb, "skiplist=%s", path);
free((char *)path);
fsck_set_msg_types(&fsck_obj_options, sb.buf);
strbuf_release(&sb);
return 0;
}

if (skip_prefix(var, "fsck.", &var)) {
fsck_set_msg_type(&fsck_obj_options, var, value);
return 0;
}

return git_default_config(var, value, cb);
}

static void objreport(struct object *obj, const char *msg_type,
const char *err)
{
fprintf(stderr, "%s in %s %s: %s\n",
msg_type, typename(obj->type), sha1_to_hex(obj->sha1), err);
}

__attribute__((format (printf, 2, 3)))
static int objerror(struct object *obj, const char *err, ...)
static int objerror(struct object *obj, const char *err)
{
va_list params;
va_start(params, err);
errors_found |= ERROR_OBJECT;
objreport(obj, "error", err, params);
va_end(params);
objreport(obj, "error", err);
return -1;
}

__attribute__((format (printf, 3, 4)))
static int fsck_error_func(struct object *obj, int type, const char *err, ...)
static int fsck_error_func(struct object *obj, int type, const char *message)
{
va_list params;
va_start(params, err);
objreport(obj, (type == FSCK_WARN) ? "warning" : "error", err, params);
va_end(params);
objreport(obj, (type == FSCK_WARN) ? "warning" : "error", message);
return (type == FSCK_WARN) ? 0 : 1;
}

static struct object_array pending;

static int mark_object(struct object *obj, int type, void *data)
static int mark_object(struct object *obj, int type, void *data, struct fsck_options *options)
{
struct object *parent = data;

Expand Down Expand Up @@ -119,7 +135,7 @@ static int mark_object(struct object *obj, int type, void *data)

static void mark_object_reachable(struct object *obj)
{
mark_object(obj, OBJ_ANY, NULL);
mark_object(obj, OBJ_ANY, NULL, NULL);
}

static int traverse_one_object(struct object *obj)
Expand All @@ -132,7 +148,7 @@ static int traverse_one_object(struct object *obj)
if (parse_tree(tree) < 0)
return 1; /* error already displayed */
}
result = fsck_walk(obj, mark_object, obj);
result = fsck_walk(obj, obj, &fsck_walk_options);
if (tree)
free_tree_buffer(tree);
return result;
Expand All @@ -158,7 +174,7 @@ static int traverse_reachable(void)
return !!result;
}

static int mark_used(struct object *obj, int type, void *data)
static int mark_used(struct object *obj, int type, void *data, struct fsck_options *options)
{
if (!obj)
return 1;
Expand All @@ -179,6 +195,8 @@ static void check_reachable_object(struct object *obj)
if (!(obj->flags & HAS_OBJ)) {
if (has_sha1_pack(obj->sha1))
return; /* it is in pack - forget about it */
if (connectivity_only && has_sha1_file(obj->sha1))
return;
printf("missing %s %s\n", typename(obj->type), sha1_to_hex(obj->sha1));
errors_found |= ERROR_REACHABLE;
return;
Expand Down Expand Up @@ -296,9 +314,9 @@ static int fsck_obj(struct object *obj)
fprintf(stderr, "Checking %s %s\n",
typename(obj->type), sha1_to_hex(obj->sha1));

if (fsck_walk(obj, mark_used, NULL))
if (fsck_walk(obj, NULL, &fsck_obj_options))
objerror(obj, "broken links");
if (fsck_object(obj, NULL, 0, check_strict, fsck_error_func))
if (fsck_object(obj, NULL, 0, &fsck_obj_options))
return -1;

if (obj->type == OBJ_TREE) {
Expand Down Expand Up @@ -621,6 +639,7 @@ static struct option fsck_opts[] = {
OPT_BOOL(0, "cache", &keep_cache_objects, N_("make index objects head nodes")),
OPT_BOOL(0, "reflogs", &include_reflogs, N_("make reflogs head nodes (default)")),
OPT_BOOL(0, "full", &check_full, N_("also consider packs and alternate objects")),
OPT_BOOL(0, "connectivity-only", &connectivity_only, N_("check only connectivity")),
OPT_BOOL(0, "strict", &check_strict, N_("enable more strict checking")),
OPT_BOOL(0, "lost-found", &write_lost_and_found,
N_("write dangling objects in .git/lost-found")),
Expand All @@ -638,6 +657,12 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)

argc = parse_options(argc, argv, prefix, fsck_opts, fsck_usage, 0);

fsck_walk_options.walk = mark_object;
fsck_obj_options.walk = mark_used;
fsck_obj_options.error_func = fsck_error_func;
if (check_strict)
fsck_obj_options.strict = 1;

if (show_progress == -1)
show_progress = isatty(2);
if (verbose)
Expand All @@ -648,8 +673,11 @@ int cmd_fsck(int argc, const char **argv, const char *prefix)
include_reflogs = 0;
}

git_config(fsck_config, NULL);

fsck_head_link();
fsck_object_dir(get_object_directory());
if (!connectivity_only)
fsck_object_dir(get_object_directory());

prepare_alt_odb();
for (alt = alt_odb_list; alt; alt = alt->next) {
Expand Down
13 changes: 9 additions & 4 deletions builtin/index-pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ static int nr_threads;
static int from_stdin;
static int strict;
static int do_fsck_object;
static struct fsck_options fsck_options = FSCK_OPTIONS_STRICT;
static int verbose;
static int show_stat;
static int check_self_contained_and_connected;
Expand Down Expand Up @@ -192,7 +193,7 @@ static void cleanup_thread(void)
#endif


static int mark_link(struct object *obj, int type, void *data)
static int mark_link(struct object *obj, int type, void *data, struct fsck_options *options)
{
if (!obj)
return -1;
Expand Down Expand Up @@ -838,10 +839,9 @@ static void sha1_object(const void *data, struct object_entry *obj_entry,
if (!obj)
die(_("invalid %s"), typename(type));
if (do_fsck_object &&
fsck_object(obj, buf, size, 1,
fsck_error_function))
fsck_object(obj, buf, size, &fsck_options))
die(_("Error in object"));
if (fsck_walk(obj, mark_link, NULL))
if (fsck_walk(obj, NULL, &fsck_options))
die(_("Not all child objects of %s are reachable"), sha1_to_hex(obj->sha1));

if (obj->type == OBJ_TREE) {
Expand Down Expand Up @@ -1615,6 +1615,7 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
usage(index_pack_usage);

check_replace_refs = 0;
fsck_options.walk = mark_link;

reset_pack_idx_option(&opts);
git_config(git_index_pack_config, &opts);
Expand All @@ -1632,6 +1633,10 @@ int cmd_index_pack(int argc, const char **argv, const char *prefix)
} else if (!strcmp(arg, "--strict")) {
strict = 1;
do_fsck_object = 1;
} else if (skip_prefix(arg, "--strict=", &arg)) {
strict = 1;
do_fsck_object = 1;
fsck_set_msg_types(&fsck_options, arg);
} else if (!strcmp(arg, "--check-self-contained-and-connected")) {
strict = 1;
check_self_contained_and_connected = 1;
Expand Down
28 changes: 26 additions & 2 deletions builtin/receive-pack.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "tag.h"
#include "gpg-interface.h"
#include "sigchain.h"
#include "fsck.h"

static const char receive_pack_usage[] = "git receive-pack <git-dir>";

Expand All @@ -36,6 +37,7 @@ static enum deny_action deny_current_branch = DENY_UNCONFIGURED;
static enum deny_action deny_delete_current = DENY_UNCONFIGURED;
static int receive_fsck_objects = -1;
static int transfer_fsck_objects = -1;
static struct strbuf fsck_msg_types = STRBUF_INIT;
static int receive_unpack_limit = -1;
static int transfer_unpack_limit = -1;
static int advertise_atomic_push = 1;
Expand Down Expand Up @@ -115,6 +117,26 @@ static int receive_pack_config(const char *var, const char *value, void *cb)
return 0;
}

if (strcmp(var, "receive.fsck.skiplist") == 0) {
const char *path;

if (git_config_pathname(&path, var, value))
return 1;
strbuf_addf(&fsck_msg_types, "%cskiplist=%s",
fsck_msg_types.len ? ',' : '=', path);
free((char *)path);
return 0;
}

if (skip_prefix(var, "receive.fsck.", &var)) {
if (is_valid_msg_type(var, value))
strbuf_addf(&fsck_msg_types, "%c%s=%s",
fsck_msg_types.len ? ',' : '=', var, value);
else
warning("Skipping unknown msg id '%s'", var);
return 0;
}

if (strcmp(var, "receive.fsckobjects") == 0) {
receive_fsck_objects = git_config_bool(var, value);
return 0;
Expand Down Expand Up @@ -1490,7 +1512,8 @@ static const char *unpack(int err_fd, struct shallow_info *si)
if (quiet)
argv_array_push(&child.args, "-q");
if (fsck_objects)
argv_array_push(&child.args, "--strict");
argv_array_pushf(&child.args, "--strict%s",
fsck_msg_types.buf);
child.no_stdout = 1;
child.err = err_fd;
child.git_cmd = 1;
Expand All @@ -1508,7 +1531,8 @@ static const char *unpack(int err_fd, struct shallow_info *si)
argv_array_pushl(&child.args, "index-pack",
"--stdin", hdr_arg, keep_arg, NULL);
if (fsck_objects)
argv_array_push(&child.args, "--strict");
argv_array_pushf(&child.args, "--strict%s",
fsck_msg_types.buf);
if (fix_thin)
argv_array_push(&child.args, "--fix-thin");
child.out = -1;
Expand Down
Loading