Skip to content

Commit

Permalink
fsck: don't hard die on invalid object types
Browse files Browse the repository at this point in the history
Change the error fsck emits on invalid object types, such as:

    $ git hash-object --stdin -w -t garbage --literally </dev/null
    <OID>

From the very ungraceful error of:

    $ git fsck
    fatal: invalid object type
    $

To:

    $ git fsck
    error: <OID>: object is of unknown type 'garbage': <OID_PATH>
    [ other fsck output ]

We'll still exit with non-zero, but now we'll finish the rest of the
traversal. The tests that's being added here asserts that we'll still
complain about other fsck issues (e.g. an unrelated dangling blob).

To do this we need to pass down the "OBJECT_INFO_ALLOW_UNKNOWN_TYPE"
flag from read_loose_object() through to parse_loose_header(). Since
the read_loose_object() function is only used in builtin/fsck.c we can
simply change it to accept a "struct object_info" (which contains the
OBJECT_INFO_ALLOW_UNKNOWN_TYPE in its flags). See
f6371f9 (sha1_file: add read_loose_object() function, 2017-01-13)
for the introduction of read_loose_object().

Since we'll need a "struct strbuf" to hold the "type_name" let's pass
it to the for_each_loose_file_in_objdir() callback to avoid allocating
a new one for each loose object in the iteration. It also makes the
memory management simpler than sticking it in fsck_loose() itself, as
we'll only need to strbuf_reset() it, with no need to do a
strbuf_release() before each "return".

Before this commit we'd never check the "type" if read_loose_object()
failed, but now we do. We therefore need to initialize it to OBJ_NONE
to be able to tell the difference between e.g. its
unpack_loose_header() having failed, and us getting past that and into
parse_loose_header().

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@gmail.com>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
  • Loading branch information
avar authored and gitster committed Oct 1, 2021
1 parent dccb32b commit 31deb28
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 30 deletions.
37 changes: 31 additions & 6 deletions builtin/fsck.c
Original file line number Diff line number Diff line change
Expand Up @@ -592,18 +592,36 @@ static void get_default_heads(void)
}
}

struct for_each_loose_cb
{
struct progress *progress;
struct strbuf obj_type;
};

static int fsck_loose(const struct object_id *oid, const char *path, void *data)
{
struct for_each_loose_cb *cb_data = data;
struct object *obj;
enum object_type type;
enum object_type type = OBJ_NONE;
unsigned long size;
void *contents;
int eaten;
struct object_info oi = OBJECT_INFO_INIT;
int err = 0;

if (read_loose_object(path, oid, &type, &size, &contents) < 0) {
strbuf_reset(&cb_data->obj_type);
oi.type_name = &cb_data->obj_type;
oi.sizep = &size;
oi.typep = &type;

if (read_loose_object(path, oid, &contents, &oi) < 0)
err = error(_("%s: object corrupt or missing: %s"),
oid_to_hex(oid), path);
if (type != OBJ_NONE && type < 0)
err = error(_("%s: object is of unknown type '%s': %s"),
oid_to_hex(oid), cb_data->obj_type.buf, path);
if (err < 0) {
errors_found |= ERROR_OBJECT;
error(_("%s: object corrupt or missing: %s"),
oid_to_hex(oid), path);
return 0; /* keep checking other objects */
}

Expand Down Expand Up @@ -639,15 +657,21 @@ static int fsck_cruft(const char *basename, const char *path, void *data)
return 0;
}

static int fsck_subdir(unsigned int nr, const char *path, void *progress)
static int fsck_subdir(unsigned int nr, const char *path, void *data)
{
struct for_each_loose_cb *cb_data = data;
struct progress *progress = cb_data->progress;
display_progress(progress, nr + 1);
return 0;
}

static void fsck_object_dir(const char *path)
{
struct progress *progress = NULL;
struct for_each_loose_cb cb_data = {
.obj_type = STRBUF_INIT,
.progress = progress,
};

if (verbose)
fprintf_ln(stderr, _("Checking object directory"));
Expand All @@ -656,9 +680,10 @@ static void fsck_object_dir(const char *path)
progress = start_progress(_("Checking object directories"), 256);

for_each_loose_file_in_objdir(path, fsck_loose, fsck_cruft, fsck_subdir,
progress);
&cb_data);
display_progress(progress, 256);
stop_progress(&progress);
strbuf_release(&cb_data.obj_type);
}

static int fsck_head_link(const char *head_ref_name,
Expand Down
18 changes: 6 additions & 12 deletions object-file.c
Original file line number Diff line number Diff line change
Expand Up @@ -2520,18 +2520,15 @@ static int check_stream_oid(git_zstream *stream,

int read_loose_object(const char *path,
const struct object_id *expected_oid,
enum object_type *type,
unsigned long *size,
void **contents)
void **contents,
struct object_info *oi)
{
int ret = -1;
void *map = NULL;
unsigned long mapsize;
git_zstream stream;
char hdr[MAX_HEADER_LEN];
struct object_info oi = OBJECT_INFO_INIT;
oi.typep = type;
oi.sizep = size;
unsigned long *size = oi->sizep;

*contents = NULL;

Expand All @@ -2547,15 +2544,13 @@ int read_loose_object(const char *path,
goto out;
}

if (parse_loose_header(hdr, &oi) < 0) {
if (parse_loose_header(hdr, oi) < 0) {
error(_("unable to parse header of %s"), path);
git_inflate_end(&stream);
goto out;
}
if (*type < 0)
die(_("invalid object type"));

if (*type == OBJ_BLOB && *size > big_file_threshold) {
if (*oi->typep == OBJ_BLOB && *size > big_file_threshold) {
if (check_stream_oid(&stream, hdr, *size, path, expected_oid) < 0)
goto out;
} else {
Expand All @@ -2566,8 +2561,7 @@ int read_loose_object(const char *path,
goto out;
}
if (check_object_signature(the_repository, expected_oid,
*contents, *size,
type_name(*type))) {
*contents, *size, oi->type_name->buf)) {
error(_("hash mismatch for %s (expected %s)"), path,
oid_to_hex(expected_oid));
free(*contents);
Expand Down
6 changes: 3 additions & 3 deletions object-store.h
Original file line number Diff line number Diff line change
Expand Up @@ -236,16 +236,16 @@ int force_object_loose(const struct object_id *oid, time_t mtime);

/*
* Open the loose object at path, check its hash, and return the contents,
* use the "oi" argument to assert things about the object, or e.g. populate its
* type, and size. If the object is a blob, then "contents" may return NULL,
* to allow streaming of large blobs.
*
* Returns 0 on success, negative on error (details may be written to stderr).
*/
int read_loose_object(const char *path,
const struct object_id *expected_oid,
enum object_type *type,
unsigned long *size,
void **contents);
void **contents,
struct object_info *oi);

/* Retry packed storage after checking packed and loose storage */
#define HAS_OBJECT_RECHECK_PACKED 1
Expand Down
18 changes: 9 additions & 9 deletions t/t1450-fsck.sh
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,10 @@ test_expect_success 'object with hash and type mismatch' '
cmt=$(echo bogus | git commit-tree $tree) &&
git update-ref refs/heads/bogus $cmt &&
cat >expect <<-\EOF &&
fatal: invalid object type
EOF
test_must_fail git fsck 2>actual &&
test_cmp expect actual
test_must_fail git fsck 2>out &&
grep "^error: hash mismatch for " out &&
grep "^error: $oid: object is of unknown type '"'"'garbage'"'"'" out
)
'

Expand Down Expand Up @@ -910,19 +909,20 @@ test_expect_success 'detect corrupt index file in fsck' '
test_i18ngrep "bad index file" errors
'

test_expect_success 'fsck hard errors on an invalid object type' '
test_expect_success 'fsck error and recovery on invalid object type' '
git init --bare garbage-type &&
(
cd garbage-type &&
git hash-object --stdin -w -t garbage --literally </dev/null &&
garbage_blob=$(git hash-object --stdin -w -t garbage --literally </dev/null) &&
cat >err.expect <<-\EOF &&
fatal: invalid object type
EOF
test_must_fail git fsck >out 2>err &&
test_cmp err.expect err &&
test_must_be_empty out
grep -e "^error" -e "^fatal" err >errors &&
test_line_count = 1 errors &&
grep "$garbage_blob: object is of unknown type '"'"'garbage'"'"':" err
)
'

Expand Down

0 comments on commit 31deb28

Please sign in to comment.