Skip to content

Commit

Permalink
serialize-status: serialize global and repo-local exclude file metadata
Browse files Browse the repository at this point in the history
Changes to the global or repo-local excludes files can change the
results returned by "git status" for untracked files.  Therefore,
it is important that the exclude-file values used during serialization
are still current at the time of deserialization.

Teach "git status --serialize" to report metadata on the user's global
exclude file (which defaults to "$XDG_HOME/git/ignore") and for the
repo-local excludes file (which is in ".git/info/excludes").  Serialize
will record the pathnames and mtimes for these files in the serialization
header (next to the mtime data for the .git/index file).

Teach "git status --deserialize" to validate this new metadata.  If either
exclude file has changed since the serialization-cache-file was written,
then deserialize will reject the cache file and force a full/normal status
run.

Signed-off-by: Jeff Hostetler <jeffhost@microsoft.com>
  • Loading branch information
jeffhostetler authored and dscho committed Aug 16, 2023
1 parent 61fb8b2 commit 4180c3f
Show file tree
Hide file tree
Showing 3 changed files with 213 additions and 0 deletions.
85 changes: 85 additions & 0 deletions wt-status-deserialize.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
#include "pkt-line.h"
#include "trace.h"
#include "statinfo.h"
#include "path.h"

static struct trace_key trace_deserialize = TRACE_KEY_INIT(DESERIALIZE);

Expand Down Expand Up @@ -69,12 +70,69 @@ static int my_validate_index(const struct cache_time *mtime_reported)
return DESERIALIZE_OK;
}

/*
* Use the given key and exclude pathname to compute a serialization header
* reflecting the current contents on disk. See if that matches the value
* computed for this key when the cache was written. Reject the cache if
* anything has changed.
*/
static int my_validate_excludes(const char *path, const char *key, const char *line)
{
struct strbuf sb = STRBUF_INIT;
int r;

wt_serialize_compute_exclude_header(&sb, key, path);

r = (strcmp(line, sb.buf) ? DESERIALIZE_ERR : DESERIALIZE_OK);

if (r == DESERIALIZE_ERR)
trace_printf_key(&trace_deserialize,
"%s changed [cached '%s'][observed '%s']",
key, line, sb.buf);

strbuf_release(&sb);
return r;
}

static int my_parse_core_excludes(const char *line)
{
/*
* In dir.c:setup_standard_excludes() they use either the value of
* the "core.excludefile" variable (stored in the global "excludes_file"
* variable) -or- the default value "$XDG_HOME/git/ignore". This is done
* during wt_status_collect_untracked() which we are hoping to not call.
*
* Fake the setup here.
*/

if (excludes_file) {
return my_validate_excludes(excludes_file, "core_excludes", line);
} else {
char *path = xdg_config_home("ignore");
int r = my_validate_excludes(path, "core_excludes", line);
free(path);
return r;
}
}

static int my_parse_repo_excludes(const char *line)
{
char *path = git_pathdup("info/exclude");
int r = my_validate_excludes(path, "repo_excludes", line);
free(path);

return r;
}

static int wt_deserialize_v1_header(struct wt_status *s, int fd)
{
struct cache_time index_mtime;
int line_len, nr_fields;
const char *line;
const char *arg;
int have_required_index_mtime = 0;
int have_required_core_excludes = 0;
int have_required_repo_excludes = 0;

/*
* parse header lines up to the first flush packet.
Expand All @@ -90,6 +148,20 @@ static int wt_deserialize_v1_header(struct wt_status *s, int fd)
nr_fields, line);
return DESERIALIZE_ERR;
}
have_required_index_mtime = 1;
continue;
}

if (skip_prefix(line, "core_excludes ", &arg)) {
if (my_parse_core_excludes(line) != DESERIALIZE_OK)
return DESERIALIZE_ERR;
have_required_core_excludes = 1;
continue;
}
if (skip_prefix(line, "repo_excludes ", &arg)) {
if (my_parse_repo_excludes(line) != DESERIALIZE_OK)
return DESERIALIZE_ERR;
have_required_repo_excludes = 1;
continue;
}

Expand Down Expand Up @@ -174,6 +246,19 @@ static int wt_deserialize_v1_header(struct wt_status *s, int fd)
return DESERIALIZE_ERR;
}

if (!have_required_index_mtime) {
trace_printf_key(&trace_deserialize, "missing '%s'", "index_mtime");
return DESERIALIZE_ERR;
}
if (!have_required_core_excludes) {
trace_printf_key(&trace_deserialize, "missing '%s'", "core_excludes");
return DESERIALIZE_ERR;
}
if (!have_required_repo_excludes) {
trace_printf_key(&trace_deserialize, "missing '%s'", "repo_excludes");
return DESERIALIZE_ERR;
}

return my_validate_index(&index_mtime);
}

Expand Down
120 changes: 120 additions & 0 deletions wt-status-serialize.c
Original file line number Diff line number Diff line change
@@ -1,13 +1,131 @@
#include "git-compat-util.h"
#include "environment.h"
#include "hex.h"
#include "repository.h"
#include "wt-status.h"
#include "pkt-line.h"
#include "trace.h"
#include "read-cache-ll.h"
#include "path.h"

static struct trace_key trace_serialize = TRACE_KEY_INIT(SERIALIZE);

/*
* Compute header record for exclude file using format:
* <key> SP <status_char> SP <variant> LF
*/
void wt_serialize_compute_exclude_header(struct strbuf *sb,
const char *key,
const char *path)
{
struct stat st;
struct stat_data sd;

memset(&sd, 0, sizeof(sd));

strbuf_setlen(sb, 0);

if (!path || !*path) {
strbuf_addf(sb, "%s U (unset)", key);
} else if (lstat(path, &st) == -1) {
if (is_missing_file_error(errno))
strbuf_addf(sb, "%s E (not-found) %s", key, path);
else
strbuf_addf(sb, "%s E (other) %s", key, path);
} else {
fill_stat_data(&sd, &st);
strbuf_addf(sb, "%s F %d %d %s",
key, sd.sd_mtime.sec, sd.sd_mtime.nsec, path);
}
}

static void append_exclude_info(int fd, const char *path, const char *key)
{
struct strbuf sb = STRBUF_INIT;

wt_serialize_compute_exclude_header(&sb, key, path);

packet_write_fmt(fd, "%s\n", sb.buf);

strbuf_release(&sb);
}

static void append_core_excludes_file_info(int fd)
{
/*
* Write pathname and mtime of the core/global excludes file to
* the status cache header. Since a change in the global excludes
* will/may change the results reported by status, the deserialize
* code should be able to reject the status cache if the excludes
* file changes since when the cache was written.
*
* The "core.excludefile" setting defaults to $XDG_HOME/git/ignore
* and uses a global variable which should have been set during
* wt_status_collect_untracked().
*
* See dir.c:setup_standard_excludes()
*/
append_exclude_info(fd, excludes_file, "core_excludes");
}

static void append_repo_excludes_file_info(int fd)
{
/*
* Likewise, there is a per-repo excludes file in .git/info/excludes
* that can change the results reported by status. And the deserialize
* code needs to be able to reject the status cache if this file
* changes.
*
* See dir.c:setup_standard_excludes() and git_path_info_excludes().
* We replicate the pathname construction here because of the static
* variables/functions used in dir.c.
*/
char *path = git_pathdup("info/exclude");

append_exclude_info(fd, path, "repo_excludes");

free(path);
}

/*
* WARNING: The status cache attempts to preserve the essential in-memory
* status data after a status scan into a "serialization" (aka "status cache")
* file. It allows later "git status --deserialize=<foo>" instances to
* just print the cached status results without scanning the workdir (and
* without reading the index).
*
* The status cache file is valid as long as:
* [1] the set of functional command line options are the same (think "-u").
* [2] repo-local and user-global configuration settings are compatible.
* [3] nothing in the workdir has changed.
*
* We rely on:
* [1.a] We remember the relevant (functional, non-display) command line
* arguments in the status cache header.
* [2.a] We use the mtime of the .git/index to detect staging changes.
* [2.b] We use the mtimes of the excludes files to detect changes that
* might affect untracked file reporting.
*
* But we need external help to verify [3].
* [] This includes changes to tracked files.
* [] This includes changes to tracked .gitignore files that might change
* untracked file reporting.
* [] This includes the creation of new, untracked per-directory .gitignore
* files that might change untracked file reporting.
*
* [3.a] On GVFS repos, we rely on the GVFS service (mount) daemon to
* watch the filesystem and invalidate (delete) the status cache
* when anything changes inside the workdir.
*
* [3.b] TODO This problem is not solved for non-GVFS repos.
* [] It is possible that the untracked-cache index extension
* could help with this but that requires status to read the
* index to load the extension.
* [] It is possible that the new fsmonitor facility could also
* provide this information, but that to requires reading the
* index.
*/

/*
* Write V1 header fields.
*/
Expand All @@ -20,6 +138,8 @@ static void wt_serialize_v1_header(struct wt_status *s, int fd)
packet_write_fmt(fd, "index_mtime %d %d\n",
s->repo->index->timestamp.sec,
s->repo->index->timestamp.nsec);
append_core_excludes_file_info(fd);
append_repo_excludes_file_info(fd);

/*
* Write data from wt_status to qualify this status report.
Expand Down
8 changes: 8 additions & 0 deletions wt-status.h
Original file line number Diff line number Diff line change
Expand Up @@ -233,4 +233,12 @@ void wt_status_serialize_v1(int fd, struct wt_status *s);
int wt_status_deserialize(const struct wt_status *cmd_s,
const char *path);

/*
* A helper routine for serialize and deserialize to compute
* metadata for the user-global and repo-local excludes files.
*/
void wt_serialize_compute_exclude_header(struct strbuf *sb,
const char *key,
const char *path);

#endif /* STATUS_H */

0 comments on commit 4180c3f

Please sign in to comment.