Skip to content

Commit

Permalink
survey: calculate stats on refs and print results
Browse files Browse the repository at this point in the history
Calculate stats on the set of refs. This includes the number of
branches, plain and annotated tags, remotes, and etc.  Calculate
the number of packed vs loose refs.  Calculate the size of the
set of refnames.

Print results on the console in JSON format.

Add Trace2 logging of the results as a data_json event.

Signed-off-by: Jeff Hostetler <jeffhostetler@github.com>
  • Loading branch information
jeffhostetler authored and dscho committed Sep 18, 2024
1 parent ca6d3d8 commit 4c9a2e2
Showing 1 changed file with 344 additions and 1 deletion.
345 changes: 344 additions & 1 deletion builtin/survey.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
#include "builtin.h"
#include "config.h"
#include "json-writer.h"
#include "object-store.h"
#include "parse-options.h"
#include "progress.h"
#include "ref-filter.h"
#include "refs.h"
#include "strbuf.h"
#include "strmap.h"
#include "strvec.h"
#include "trace2.h"

Expand Down Expand Up @@ -50,7 +55,7 @@ static struct survey_opts survey_opts = {
.verbose = 0,
.show_progress = -1, /* defaults to isatty(2) */

.refs.want_all_refs = -1,
.refs.want_all_refs = 0,

.refs.want_branches = -1, /* default these to undefined */
.refs.want_tags = -1,
Expand Down Expand Up @@ -150,6 +155,43 @@ static void survey_load_config(void)
git_config(survey_load_config_cb, NULL);
}

/*
* Stats on the set of refs that we found.
*/
struct survey_stats_refs {
uint32_t cnt_total;
uint32_t cnt_lightweight_tags;
uint32_t cnt_annotated_tags;
uint32_t cnt_branches;
uint32_t cnt_remotes;
uint32_t cnt_detached;
uint32_t cnt_other;

uint32_t cnt_symref;

uint32_t cnt_packed;
uint32_t cnt_loose;

/*
* Measure the length of the refnames. We can look for
* potential platform limits. The partial sums may help us
* estimate the size of a haves/wants conversation, since each
* refname and a SHA must be transmitted.
*/
size_t len_max_local_refname;
size_t len_sum_local_refnames;
size_t len_max_remote_refname;
size_t len_sum_remote_refnames;

struct strintmap refsmap;
};

struct survey_stats {
struct survey_stats_refs refs;
};

static struct survey_stats survey_stats = { 0 };

static void do_load_refs(struct ref_array *ref_array)
{
struct ref_filter filter = REF_FILTER_INIT;
Expand Down Expand Up @@ -202,6 +244,178 @@ static void do_load_refs(struct ref_array *ref_array)
ref_sorting_release(sorting);
}

/*
* If we want this type of ref, increment counters and return 1.
*/
static int maybe_count_ref(struct repository *r, struct ref_array_item *p)
{
struct survey_refs_wanted *rw = &survey_opts.refs;
struct survey_stats_refs *prs = &survey_stats.refs;
struct object_id peeled;

/*
* Classify the ref using the `kind` value. Note that
* p->kind was populated by `ref_kind_from_refname()`
* based strictly on the refname. This only knows about
* the basic stock categories and returns FILTER_REFS_OTHERS
* for notes, stashes, and any custom namespaces (like
* "refs/prefetch/").
*/
switch (p->kind) {
case FILTER_REFS_TAGS:
if (rw->want_all_refs || rw->want_tags) {
/*
* NEEDSWORK: Both types of tags have the same
* "refs/tags/" prefix. Do we want to count them
* in separate buckets in the refsmap?
*/
strintmap_incr(&prs->refsmap, "refs/tags/", 1);

if (!peel_iterated_oid(r, &p->objectname, &peeled))
prs->cnt_annotated_tags++;
else
prs->cnt_lightweight_tags++;

return 1;
}
return 0;

case FILTER_REFS_BRANCHES:
if (rw->want_all_refs || rw->want_branches) {
strintmap_incr(&prs->refsmap, "refs/heads/", 1);

prs->cnt_branches++;
return 1;
}
return 0;

case FILTER_REFS_REMOTES:
if (rw->want_all_refs || rw->want_remotes) {
/*
* For the refsmap, group them by the "refs/remotes/<remote>/".
* For example:
* "refs/remotes/origin/..."
*/
if (starts_with(p->refname, "refs/remotes/")) {
struct strbuf buf = STRBUF_INIT;
int begin = strlen("refs/remotes/");
size_t j;

strbuf_addstr(&buf, p->refname);
for (j = begin; j < buf.len; j++) {
if (buf.buf[j] == '/') {
strbuf_setlen(&buf, j+1);
break;
}
}
strintmap_incr(&prs->refsmap, buf.buf, 1);
strbuf_release(&buf);
}

prs->cnt_remotes++;
return 1;
}
return 0;

case FILTER_REFS_OTHERS:
if (rw->want_all_refs || rw->want_other) {
/*
* For the refsmap, group them by their "refs/<class>/".
* For example:
* "refs/notes/..."
* "refs/stash/..."
* "refs/<custom>/..."
*/
if (starts_with(p->refname, "refs/")) {
struct strbuf buf = STRBUF_INIT;
int begin = strlen("refs/");
size_t j;

strbuf_addstr(&buf, p->refname);
for (j = begin; j < buf.len; j++) {
if (buf.buf[j] == '/') {
strbuf_setlen(&buf, j+1);
break;
}
}
strintmap_incr(&prs->refsmap, buf.buf, 1);
strbuf_release(&buf);
}

prs->cnt_other++;
return 1;
}
return 0;

case FILTER_REFS_DETACHED_HEAD:
if (rw->want_all_refs || rw->want_detached) {
strintmap_incr(&prs->refsmap, p->refname, 1);

prs->cnt_detached++;
return 1;
}
return 0;

default:
if (rw->want_all_refs) {
strintmap_incr(&prs->refsmap, p->refname, 1); /* probably "HEAD" */

return 1;
}
return 0;
}
}

/*
* Calculate stats on the set of refs that we found.
*/
static void do_calc_stats_refs(struct repository *r, struct ref_array *ref_array)
{
struct survey_stats_refs *prs = &survey_stats.refs;
int k;

strintmap_init(&prs->refsmap, 0);

for (k = 0; k < ref_array->nr; k++) {
struct ref_array_item *p = ref_array->items[k];
size_t len;

if (!maybe_count_ref(r, p))
continue;

prs->cnt_total++;

/*
* SymRefs are somewhat orthogonal to the above
* classification (e.g. "HEAD" --> detached
* and "refs/remotes/origin/HEAD" --> remote) so
* our totals will already include them.
*/
if (p->flag & REF_ISSYMREF)
prs->cnt_symref++;

/*
* Where/how is the ref stored in GITDIR.
*/
if (p->flag & REF_ISPACKED)
prs->cnt_packed++;
else
prs->cnt_loose++;

len = strlen(p->refname);

if (p->kind == FILTER_REFS_REMOTES) {
prs->len_sum_remote_refnames += len;
if (len > prs->len_max_remote_refname)
prs->len_max_remote_refname = len;
} else {
prs->len_sum_local_refnames += len;
if (len > prs->len_max_local_refname)
prs->len_max_local_refname = len;
}
}
}

/*
* The REFS phase:
*
Expand All @@ -224,9 +438,135 @@ static void survey_phase_refs(struct repository *r)
do_load_refs(&ref_array);
trace2_region_leave("survey", "phase/refs", the_repository);

trace2_region_enter("survey", "phase/calcstats", the_repository);
do_calc_stats_refs(r, &ref_array);
trace2_region_leave("survey", "phase/calcstats", the_repository);

ref_array_clear(&ref_array);
}

static void json_refs_section(struct json_writer *jw_top, int pretty, int want_trace2)
{
struct survey_stats_refs *prs = &survey_stats.refs;
struct json_writer jw_refs = JSON_WRITER_INIT;
int k;

jw_object_begin(&jw_refs, pretty);
{
jw_object_intmax(&jw_refs, "count", prs->cnt_total);

jw_object_inline_begin_object(&jw_refs, "count_by_type");
{
if (survey_opts.refs.want_branches)
jw_object_intmax(&jw_refs, "branches", prs->cnt_branches);
if (survey_opts.refs.want_tags) {
jw_object_intmax(&jw_refs, "lightweight_tags", prs->cnt_lightweight_tags);
jw_object_intmax(&jw_refs, "annotated_tags", prs->cnt_annotated_tags);
}
if (survey_opts.refs.want_remotes)
jw_object_intmax(&jw_refs, "remotes", prs->cnt_remotes);
if (survey_opts.refs.want_detached)
jw_object_intmax(&jw_refs, "detached", prs->cnt_detached);
if (survey_opts.refs.want_other)
jw_object_intmax(&jw_refs, "other", prs->cnt_other);

/*
* SymRefs are somewhat orthogonal to
* the above classification
* (e.g. "HEAD" --> detached and
* "refs/remotes/origin/HEAD" -->
* remote) so the above classified
* counts will already include them,
* but it is less confusing to display
* them here than to create a whole
* new section.
*/
if (prs->cnt_symref)
jw_object_intmax(&jw_refs, "symrefs", prs->cnt_symref);
}
jw_end(&jw_refs);

jw_object_inline_begin_object(&jw_refs, "count_by_storage");
{
jw_object_intmax(&jw_refs, "loose_refs", prs->cnt_loose);
jw_object_intmax(&jw_refs, "packed_refs", prs->cnt_packed);
}
jw_end(&jw_refs);

jw_object_inline_begin_object(&jw_refs, "refname_length");
{
if (prs->len_sum_local_refnames) {
jw_object_intmax(&jw_refs, "max_local", prs->len_max_local_refname);
jw_object_intmax(&jw_refs, "sum_local", prs->len_sum_local_refnames);
}
if (prs->len_sum_remote_refnames) {
jw_object_intmax(&jw_refs, "max_remote", prs->len_max_remote_refname);
jw_object_intmax(&jw_refs, "sum_remote", prs->len_sum_remote_refnames);
}
}
jw_end(&jw_refs);

jw_object_inline_begin_array(&jw_refs, "requested");
{
for (k = 0; k < survey_vec_refs_wanted.nr; k++)
jw_array_string(&jw_refs, survey_vec_refs_wanted.v[k]);
}
jw_end(&jw_refs);

jw_object_inline_begin_array(&jw_refs, "count_by_class");
{
struct hashmap_iter iter;
struct strmap_entry *entry;

strintmap_for_each_entry(&prs->refsmap, &iter, entry) {
const char *key = entry->key;
intptr_t count = (intptr_t)entry->value;
int value = count;
jw_array_inline_begin_object(&jw_refs);
{
jw_object_string(&jw_refs, "class", key);
jw_object_intmax(&jw_refs, "count", value);
}
jw_end(&jw_refs);
}
}
jw_end(&jw_refs);
}
jw_end(&jw_refs);

if (jw_top)
jw_object_sub_jw(jw_top, "refs", &jw_refs);

if (want_trace2)
trace2_data_json("survey", the_repository, "refs", &jw_refs);

jw_release(&jw_refs);
}

static void survey_print_json(void)
{
struct json_writer jw_top = JSON_WRITER_INIT;
int pretty = 1;

jw_object_begin(&jw_top, pretty);
{
json_refs_section(&jw_top, pretty, 0);
}
jw_end(&jw_top);

printf("%s\n", jw_top.json.buf);

jw_release(&jw_top);
}

static void survey_emit_trace2(void)
{
if (!trace2_is_enabled())
return;

json_refs_section(NULL, 0, 1);
}

int cmd_survey(int argc, const char **argv, const char *prefix)
{
survey_load_config();
Expand All @@ -241,6 +581,9 @@ int cmd_survey(int argc, const char **argv, const char *prefix)

survey_phase_refs(the_repository);

survey_emit_trace2();
survey_print_json();

strvec_clear(&survey_vec_refs_wanted);

return 0;
Expand Down

0 comments on commit 4c9a2e2

Please sign in to comment.