diff --git a/builtin/survey.c b/builtin/survey.c index 9d7faa71ebc75d..8e1a0e4e871246 100644 --- a/builtin/survey.c +++ b/builtin/survey.c @@ -1,8 +1,13 @@ #include "builtin.h" #include "config.h" +#include "json-writer.h" +#include "object-store.h" #include "parse-options.h" #include "progress.h" #include "ref-filter.h" +#include "refs.h" +#include "strbuf.h" +#include "strmap.h" #include "strvec.h" #include "trace2.h" @@ -50,7 +55,7 @@ static struct survey_opts survey_opts = { .verbose = 0, .show_progress = -1, /* defaults to isatty(2) */ - .refs.want_all_refs = -1, + .refs.want_all_refs = 0, .refs.want_branches = -1, /* default these to undefined */ .refs.want_tags = -1, @@ -150,6 +155,43 @@ static void survey_load_config(void) git_config(survey_load_config_cb, NULL); } +/* + * Stats on the set of refs that we found. + */ +struct survey_stats_refs { + uint32_t cnt_total; + uint32_t cnt_lightweight_tags; + uint32_t cnt_annotated_tags; + uint32_t cnt_branches; + uint32_t cnt_remotes; + uint32_t cnt_detached; + uint32_t cnt_other; + + uint32_t cnt_symref; + + uint32_t cnt_packed; + uint32_t cnt_loose; + + /* + * Measure the length of the refnames. We can look for + * potential platform limits. The partial sums may help us + * estimate the size of a haves/wants conversation, since each + * refname and a SHA must be transmitted. + */ + size_t len_max_local_refname; + size_t len_sum_local_refnames; + size_t len_max_remote_refname; + size_t len_sum_remote_refnames; + + struct strintmap refsmap; +}; + +struct survey_stats { + struct survey_stats_refs refs; +}; + +static struct survey_stats survey_stats = { 0 }; + static void do_load_refs(struct ref_array *ref_array) { struct ref_filter filter = REF_FILTER_INIT; @@ -202,6 +244,178 @@ static void do_load_refs(struct ref_array *ref_array) ref_sorting_release(sorting); } +/* + * If we want this type of ref, increment counters and return 1. + */ +static int maybe_count_ref(struct repository *r, struct ref_array_item *p) +{ + struct survey_refs_wanted *rw = &survey_opts.refs; + struct survey_stats_refs *prs = &survey_stats.refs; + struct object_id peeled; + + /* + * Classify the ref using the `kind` value. Note that + * p->kind was populated by `ref_kind_from_refname()` + * based strictly on the refname. This only knows about + * the basic stock categories and returns FILTER_REFS_OTHERS + * for notes, stashes, and any custom namespaces (like + * "refs/prefetch/"). + */ + switch (p->kind) { + case FILTER_REFS_TAGS: + if (rw->want_all_refs || rw->want_tags) { + /* + * NEEDSWORK: Both types of tags have the same + * "refs/tags/" prefix. Do we want to count them + * in separate buckets in the refsmap? + */ + strintmap_incr(&prs->refsmap, "refs/tags/", 1); + + if (!peel_iterated_oid(r, &p->objectname, &peeled)) + prs->cnt_annotated_tags++; + else + prs->cnt_lightweight_tags++; + + return 1; + } + return 0; + + case FILTER_REFS_BRANCHES: + if (rw->want_all_refs || rw->want_branches) { + strintmap_incr(&prs->refsmap, "refs/heads/", 1); + + prs->cnt_branches++; + return 1; + } + return 0; + + case FILTER_REFS_REMOTES: + if (rw->want_all_refs || rw->want_remotes) { + /* + * For the refsmap, group them by the "refs/remotes//". + * For example: + * "refs/remotes/origin/..." + */ + if (starts_with(p->refname, "refs/remotes/")) { + struct strbuf buf = STRBUF_INIT; + int begin = strlen("refs/remotes/"); + size_t j; + + strbuf_addstr(&buf, p->refname); + for (j = begin; j < buf.len; j++) { + if (buf.buf[j] == '/') { + strbuf_setlen(&buf, j+1); + break; + } + } + strintmap_incr(&prs->refsmap, buf.buf, 1); + strbuf_release(&buf); + } + + prs->cnt_remotes++; + return 1; + } + return 0; + + case FILTER_REFS_OTHERS: + if (rw->want_all_refs || rw->want_other) { + /* + * For the refsmap, group them by their "refs//". + * For example: + * "refs/notes/..." + * "refs/stash/..." + * "refs//..." + */ + if (starts_with(p->refname, "refs/")) { + struct strbuf buf = STRBUF_INIT; + int begin = strlen("refs/"); + size_t j; + + strbuf_addstr(&buf, p->refname); + for (j = begin; j < buf.len; j++) { + if (buf.buf[j] == '/') { + strbuf_setlen(&buf, j+1); + break; + } + } + strintmap_incr(&prs->refsmap, buf.buf, 1); + strbuf_release(&buf); + } + + prs->cnt_other++; + return 1; + } + return 0; + + case FILTER_REFS_DETACHED_HEAD: + if (rw->want_all_refs || rw->want_detached) { + strintmap_incr(&prs->refsmap, p->refname, 1); + + prs->cnt_detached++; + return 1; + } + return 0; + + default: + if (rw->want_all_refs) { + strintmap_incr(&prs->refsmap, p->refname, 1); /* probably "HEAD" */ + + return 1; + } + return 0; + } +} + +/* + * Calculate stats on the set of refs that we found. + */ +static void do_calc_stats_refs(struct repository *r, struct ref_array *ref_array) +{ + struct survey_stats_refs *prs = &survey_stats.refs; + int k; + + strintmap_init(&prs->refsmap, 0); + + for (k = 0; k < ref_array->nr; k++) { + struct ref_array_item *p = ref_array->items[k]; + size_t len; + + if (!maybe_count_ref(r, p)) + continue; + + prs->cnt_total++; + + /* + * SymRefs are somewhat orthogonal to the above + * classification (e.g. "HEAD" --> detached + * and "refs/remotes/origin/HEAD" --> remote) so + * our totals will already include them. + */ + if (p->flag & REF_ISSYMREF) + prs->cnt_symref++; + + /* + * Where/how is the ref stored in GITDIR. + */ + if (p->flag & REF_ISPACKED) + prs->cnt_packed++; + else + prs->cnt_loose++; + + len = strlen(p->refname); + + if (p->kind == FILTER_REFS_REMOTES) { + prs->len_sum_remote_refnames += len; + if (len > prs->len_max_remote_refname) + prs->len_max_remote_refname = len; + } else { + prs->len_sum_local_refnames += len; + if (len > prs->len_max_local_refname) + prs->len_max_local_refname = len; + } + } +} + /* * The REFS phase: * @@ -224,9 +438,135 @@ static void survey_phase_refs(struct repository *r) do_load_refs(&ref_array); trace2_region_leave("survey", "phase/refs", the_repository); + trace2_region_enter("survey", "phase/calcstats", the_repository); + do_calc_stats_refs(r, &ref_array); + trace2_region_leave("survey", "phase/calcstats", the_repository); + ref_array_clear(&ref_array); } +static void json_refs_section(struct json_writer *jw_top, int pretty, int want_trace2) +{ + struct survey_stats_refs *prs = &survey_stats.refs; + struct json_writer jw_refs = JSON_WRITER_INIT; + int k; + + jw_object_begin(&jw_refs, pretty); + { + jw_object_intmax(&jw_refs, "count", prs->cnt_total); + + jw_object_inline_begin_object(&jw_refs, "count_by_type"); + { + if (survey_opts.refs.want_branches) + jw_object_intmax(&jw_refs, "branches", prs->cnt_branches); + if (survey_opts.refs.want_tags) { + jw_object_intmax(&jw_refs, "lightweight_tags", prs->cnt_lightweight_tags); + jw_object_intmax(&jw_refs, "annotated_tags", prs->cnt_annotated_tags); + } + if (survey_opts.refs.want_remotes) + jw_object_intmax(&jw_refs, "remotes", prs->cnt_remotes); + if (survey_opts.refs.want_detached) + jw_object_intmax(&jw_refs, "detached", prs->cnt_detached); + if (survey_opts.refs.want_other) + jw_object_intmax(&jw_refs, "other", prs->cnt_other); + + /* + * SymRefs are somewhat orthogonal to + * the above classification + * (e.g. "HEAD" --> detached and + * "refs/remotes/origin/HEAD" --> + * remote) so the above classified + * counts will already include them, + * but it is less confusing to display + * them here than to create a whole + * new section. + */ + if (prs->cnt_symref) + jw_object_intmax(&jw_refs, "symrefs", prs->cnt_symref); + } + jw_end(&jw_refs); + + jw_object_inline_begin_object(&jw_refs, "count_by_storage"); + { + jw_object_intmax(&jw_refs, "loose_refs", prs->cnt_loose); + jw_object_intmax(&jw_refs, "packed_refs", prs->cnt_packed); + } + jw_end(&jw_refs); + + jw_object_inline_begin_object(&jw_refs, "refname_length"); + { + if (prs->len_sum_local_refnames) { + jw_object_intmax(&jw_refs, "max_local", prs->len_max_local_refname); + jw_object_intmax(&jw_refs, "sum_local", prs->len_sum_local_refnames); + } + if (prs->len_sum_remote_refnames) { + jw_object_intmax(&jw_refs, "max_remote", prs->len_max_remote_refname); + jw_object_intmax(&jw_refs, "sum_remote", prs->len_sum_remote_refnames); + } + } + jw_end(&jw_refs); + + jw_object_inline_begin_array(&jw_refs, "requested"); + { + for (k = 0; k < survey_vec_refs_wanted.nr; k++) + jw_array_string(&jw_refs, survey_vec_refs_wanted.v[k]); + } + jw_end(&jw_refs); + + jw_object_inline_begin_array(&jw_refs, "count_by_class"); + { + struct hashmap_iter iter; + struct strmap_entry *entry; + + strintmap_for_each_entry(&prs->refsmap, &iter, entry) { + const char *key = entry->key; + intptr_t count = (intptr_t)entry->value; + int value = count; + jw_array_inline_begin_object(&jw_refs); + { + jw_object_string(&jw_refs, "class", key); + jw_object_intmax(&jw_refs, "count", value); + } + jw_end(&jw_refs); + } + } + jw_end(&jw_refs); + } + jw_end(&jw_refs); + + if (jw_top) + jw_object_sub_jw(jw_top, "refs", &jw_refs); + + if (want_trace2) + trace2_data_json("survey", the_repository, "refs", &jw_refs); + + jw_release(&jw_refs); +} + +static void survey_print_json(void) +{ + struct json_writer jw_top = JSON_WRITER_INIT; + int pretty = 1; + + jw_object_begin(&jw_top, pretty); + { + json_refs_section(&jw_top, pretty, 0); + } + jw_end(&jw_top); + + printf("%s\n", jw_top.json.buf); + + jw_release(&jw_top); +} + +static void survey_emit_trace2(void) +{ + if (!trace2_is_enabled()) + return; + + json_refs_section(NULL, 0, 1); +} + int cmd_survey(int argc, const char **argv, const char *prefix) { survey_load_config(); @@ -241,6 +581,9 @@ int cmd_survey(int argc, const char **argv, const char *prefix) survey_phase_refs(the_repository); + survey_emit_trace2(); + survey_print_json(); + strvec_clear(&survey_vec_refs_wanted); return 0;