Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
jagprog5 committed Sep 11, 2023
1 parent 9a3a1c4 commit 88d5508
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 35 deletions.
34 changes: 27 additions & 7 deletions src/args.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,8 @@ struct Arguments {
// disable or allow warning
bool can_drop_warn = true;

bool is_bounded_query = false;

// a special case where the tokens can be sent directly to the output as they are received
bool is_direct_output() const { //
return !tui && !sort && !flip && !tail;
Expand All @@ -117,6 +119,13 @@ struct Arguments {
return is_direct_output() && !unique;
}

// the elements in output vector are being inserted with any excess being discarded
bool mem_is_bounded() const {
return out_end.has_value() //
&& !truncate_no_bound //
&& (unique ? sort && unique_type == sort_type : true);
}

void drop_warning() {
if (this->can_drop_warn) {
this->can_drop_warn = false;
Expand Down Expand Up @@ -315,6 +324,9 @@ void print_help_message() {
" must have at least one digit. parse failures are smallest\n"
" -i, --ignore-case\n"
" make the positional argument case-insensitive\n"
" --is-bounded\n"
" prints a line indicating if the memory usage is bounded due to\n"
" truncation, then exits. not related to --unique-limit\n"
" --load-factor <positive float, default: " choose_xstr(UNIQUE_LOAD_FACTOR_DEFAULT) ">\n"
" if a hash table is used for uniqueness, set the max load factor\n"
" --locale <locale>\n"
Expand Down Expand Up @@ -375,12 +387,11 @@ void print_help_message() {
" on tui confirmed selection, do not exit; but still flush the\n"
" current selection to the output as a batch\n"
" --truncate-no-bound\n"
" if truncation is specified (--out/--tail) and uniqueness is not\n"
" specified, then choose only retains the relevant n values in\n"
" memory. This is only faster for small values of n, as elements\n"
" are shifted within this storage space. If n is large, this\n"
" option should be used to disable this optimization, leading to\n"
" faster speed but more space used\n"
" if truncation is specified (--out/--tail) then choose may retain\n"
" only the relevant n values in memory. see --is-bounded. this is\n"
" faster for small values of n, as elements are shifted within\n"
" this storage space. If n is large, this option should be used to\n"
" disable this optimization\n"
" -u, --unique\n"
" remove duplicate input tokens. leaves first occurrences. applied\n"
" before sorting\n"
Expand All @@ -396,7 +407,8 @@ void print_help_message() {
" --unique-general-numeric\n"
" apply uniqueness general numerically. implies -u\n"
" --unique-limit <#tokens>\n"
" implies -u. forget least recently used tokens\n"
" implies -u. forget least recently used tokens. ignore if memory\n"
" is bounded due to truncation (see --is-bounded)\n"
" --unique-use-set\n"
" implies -u. apply uniqueness with a tree instead of a hash table\n"
" --use-delimiter\n"
Expand Down Expand Up @@ -527,6 +539,7 @@ Arguments handle_args(int argc, char* const* argv, FILE* input = NULL, FILE* out
{"flip", no_argument, NULL, 0},
{"flush", no_argument, NULL, 0},
{"ignore-case", no_argument, NULL, 'i'},
{"is-bounded", no_argument, NULL, 0},
{"multi", no_argument, NULL, 'm'},
{"multiline", no_argument, NULL, 0},
{"match", no_argument, NULL, 0},
Expand Down Expand Up @@ -776,6 +789,8 @@ Arguments handle_args(int argc, char* const* argv, FILE* input = NULL, FILE* out
} else if (strcmp("unique-general-numeric", name) == 0) {
ret.unique = true;
ret.unique_type = general_numeric;
} else if (strcmp("is-bounded", name) == 0) {
ret.is_bounded_query = true;
} else if (strcmp("multiline", name) == 0) {
uncompiled_output.re_options &= ~PCRE2_LITERAL;
uncompiled_output.re_options |= PCRE2_MULTILINE;
Expand Down Expand Up @@ -998,6 +1013,11 @@ Arguments handle_args(int argc, char* const* argv, FILE* input = NULL, FILE* out
}
}

if (ret.is_bounded_query) {
int exit_code = puts(ret.mem_is_bounded() ? "yes" : "no") < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
exit(exit_code);
}

if (isatty(fileno(ret.input))) {
int exit_code = puts("Try 'choose --help' for more information.") < 0 ? EXIT_FAILURE : EXIT_SUCCESS;
exit(exit_code);
Expand Down
16 changes: 10 additions & 6 deletions src/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -920,30 +920,34 @@ BOOST_AUTO_TEST_CASE(out_tail) {
}

BOOST_AUTO_TEST_CASE(sort_unique_out) {
// entirety of the input is read, but the mem size should not exceed the number of
// unique elements from the input
OutputSizeBoundFixture f(4);
OutputSizeBoundFixture f(2);
choose_output out = run_choose("this\nis\nis\na\na\ntest", {"--sort", "--unique", "--out=2"});
choose_output correct_output{to_vec("a\nis\n")};
BOOST_REQUIRE_EQUAL(out, correct_output);
}

BOOST_AUTO_TEST_CASE(sort_unique_out_diff) {
choose_output out = run_choose("5\n-5\n-5\n-10\n-10\n0", {"--sort", "--unique-numeric", "--out=2"});
choose_output correct_output{to_vec("-10\n-5\n")};
BOOST_REQUIRE_EQUAL(out, correct_output);
}

BOOST_AUTO_TEST_CASE(sort_unique_out_min) {
OutputSizeBoundFixture f(4);
OutputSizeBoundFixture f(2);
choose_output out = run_choose("this\nis\nis\na\na\ntest", {"--sort", "--unique", "--out=1,2"});
choose_output correct_output{to_vec("is\n")};
BOOST_REQUIRE_EQUAL(out, correct_output);
}

BOOST_AUTO_TEST_CASE(sort_unique_tail) {
OutputSizeBoundFixture f(4);
OutputSizeBoundFixture f(2);
choose_output out = run_choose("this\nis\nis\na\na\ntest", {"--sort", "--unique", "--tail=2"});
choose_output correct_output{to_vec("test\nthis\n")};
BOOST_REQUIRE_EQUAL(out, correct_output);
}

BOOST_AUTO_TEST_CASE(sort_unique_tail_min) {
OutputSizeBoundFixture f(4);
OutputSizeBoundFixture f(2);
choose_output out = run_choose("this\nis\nis\na\na\ntest", {"--sort", "--unique", "--tail=1,2"});
choose_output correct_output{to_vec("test\n")};
BOOST_REQUIRE_EQUAL(out, correct_output);
Expand Down
41 changes: 19 additions & 22 deletions src/token.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ struct TokenOutputStream {
}
};

// leads to an exit unless this is a unit test
// exit unless this is a unit test
struct termination_request : public std::exception {};

namespace {
Expand Down Expand Up @@ -243,14 +243,7 @@ std::vector<Token> create_tokens(choose::Arguments& args) {
const bool sort = args.sort;
const Comparison sort_type = args.sort_type;
const bool sort_reversed = args.sort_reverse;

// the elements in output are being inserted with any excess being discarded.
// this branch is incompatible with uniqueness since the data structures point
// within output, and if things are moving around then the iterators' elements
// are also moved. this makes sense anyway, since if uniqueness is specified,
// then choose needs to keep track of what has been seen before (can't be
// bounded). this var also could be named "output_size_is_bounded"
const bool output_is_shifting = args.out_end.has_value() && !unique && !args.truncate_no_bound;
const bool mem_is_bounded = args.mem_is_bounded();

char subject[args.buf_size]; // match buffer
size_t subject_size = 0; // how full is the buffer
Expand Down Expand Up @@ -439,7 +432,7 @@ std::vector<Token> create_tokens(choose::Arguments& args) {
#ifndef CHOOSE_DISABLE_FIELD
t.set_field(args.field, field_data);
#endif
if (!output_is_shifting) {
if (!mem_is_bounded) {
// typical case
output.push_back(std::move(t));
if (unique) {
Expand All @@ -450,24 +443,28 @@ std::vector<Token> create_tokens(choose::Arguments& args) {
}
}
} else {
// output size is bounded.
// precondition here is that unique is false. but everything else should be handled
// output size is bounded. from --tail or --out
if (sort) {
// note that the sorting is reversed if tail is used. so this
// handles tail and non tail cases. see UncompiledCodes.
// always a stable sort, given insertion position
auto insertion_pos = std::upper_bound(output.begin(), output.end(), t, sort_comparison);
if (likely(output.size() == *args.out_end)) {
// a faster branch that avoids any vector realloc logic.
// it's basically a fixed length buffer at this point
while (insertion_pos < output.end()) {
std::swap(*insertion_pos++, t);
if (!unique || (insertion_pos == output.begin() || !consecutive_equality_predicate(insertion_pos[-1], t))) {
// uniqueness is not used, or t does not yet exist in output
if (likely(output.size() == *args.out_end)) {
while (insertion_pos < output.end()) {
std::swap(*insertion_pos++, t);
}
return false;
} else {
output.insert(insertion_pos, std::move(t));
}
return false;
} else {
output.insert(insertion_pos, std::move(t));
// unique is being used and t already exists in the output
return false;
}
} else {
// unsorted memory bounded case.
// precondition unique is false (can't be applied in a mem bounded way)
if (tail && likely(output.size() == *args.out_end)) {
// same reasoning as above. fixed length buffer being moved around
auto it = output.rbegin();
Expand Down Expand Up @@ -575,7 +572,7 @@ std::vector<Token> create_tokens(choose::Arguments& args) {
} else {
check_unique_then_append(); // result ignored
// handle the case mentioned in check_unique_then_append
if (output_is_shifting && !sort && !tail) {
if (mem_is_bounded && !sort && !tail) {
if (output.size() == *args.out_end) {
return true;
}
Expand Down Expand Up @@ -877,7 +874,7 @@ std::vector<Token> create_tokens(choose::Arguments& args) {
}
} else {
// truncate the ends, leaving only the beginning elements
if (output_is_shifting) {
if (mem_is_bounded) {
// sort and end truncation has already been applied
} else {
// truncate the end
Expand Down

0 comments on commit 88d5508

Please sign in to comment.