Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use experimental make_strings_children for strings replace/filter/translate #15586

Merged
merged 1 commit into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 7 additions & 6 deletions cpp/src/strings/char_types/char_types.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/strings/char_types/char_types.hpp>
#include <cudf/strings/detail/char_tables.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/detail/utf8.hpp>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/string_view.cuh>
Expand Down Expand Up @@ -130,8 +130,9 @@ struct filter_chars_fn {
string_character_types const types_to_remove;
string_character_types const types_to_keep;
string_view const d_replacement; ///< optional replacement for removed characters
int32_t* d_offsets{}; ///< size of the output string stored here during first pass
char* d_chars{}; ///< this is null only during the first pass
size_type* d_sizes{};
char* d_chars{};
cudf::detail::input_offsetalator d_offsets;

/**
* @brief Returns true if the given character should be replaced.
Expand All @@ -150,7 +151,7 @@ struct filter_chars_fn {
__device__ void operator()(size_type idx)
{
if (d_column.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
if (!d_chars) { d_sizes[idx] = 0; }
return;
}
auto const d_str = d_column.element<string_view>(idx);
Expand All @@ -165,7 +166,7 @@ struct filter_chars_fn {
nbytes += d_newchar.size_bytes() - char_size;
if (out_ptr) out_ptr = cudf::strings::detail::copy_string(out_ptr, d_newchar);
}
if (!out_ptr) d_offsets[idx] = nbytes;
if (!out_ptr) { d_sizes[idx] = nbytes; }
}
};

Expand Down Expand Up @@ -202,7 +203,7 @@ std::unique_ptr<column> filter_characters_of_type(strings_column_view const& str

// this utility calls filterer to build the offsets and chars columns
auto [offsets_column, chars] =
cudf::strings::detail::make_strings_children(filterer, strings_count, stream, mr);
cudf::strings::detail::experimental::make_strings_children(filterer, strings_count, stream, mr);

// return new strings column
return make_strings_column(strings_count,
Expand Down
11 changes: 6 additions & 5 deletions cpp/src/strings/filter_chars.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
Expand Down Expand Up @@ -57,8 +57,9 @@ struct filter_fn {
rmm::device_uvector<char_range>::iterator table_begin;
rmm::device_uvector<char_range>::iterator table_end;
string_view const d_replacement;
int32_t* d_offsets{};
size_type* d_sizes{};
char* d_chars{};
cudf::detail::input_offsetalator d_offsets;

/**
* @brief Return true if this character should be removed.
Expand Down Expand Up @@ -87,7 +88,7 @@ struct filter_fn {
__device__ void operator()(size_type idx)
{
if (d_strings.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
if (!d_chars) { d_sizes[idx] = 0; }
return;
}
auto const d_str = d_strings.element<string_view>(idx);
Expand All @@ -104,7 +105,7 @@ struct filter_fn {
else
nbytes += d_newchar.size_bytes() - char_size;
}
if (!out_ptr) d_offsets[idx] = nbytes;
if (!out_ptr) { d_sizes[idx] = nbytes; }
}
};

Expand Down Expand Up @@ -141,7 +142,7 @@ std::unique_ptr<column> filter_characters(
// this utility calls the strip_fn to build the offsets and chars columns
filter_fn ffn{*d_strings, keep_characters, table.begin(), table.end(), d_replacement};
auto [offsets_column, chars] =
cudf::strings::detail::make_strings_children(ffn, strings.size(), stream, mr);
cudf::strings::detail::experimental::make_strings_children(ffn, strings.size(), stream, mr);

return make_strings_column(strings_count,
std::move(offsets_column),
Expand Down
15 changes: 9 additions & 6 deletions cpp/src/strings/replace/multi.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include <cudf/detail/utilities/algorithm.cuh>
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/strings/detail/replace.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/detail/strings_column_factories.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/replace.hpp>
Expand Down Expand Up @@ -404,13 +404,14 @@ struct replace_multi_fn {
column_device_view const d_strings;
column_device_view const d_targets;
column_device_view const d_repls;
int32_t* d_offsets{};
size_type* d_sizes{};
char* d_chars{};
cudf::detail::input_offsetalator d_offsets;

__device__ void operator()(size_type idx)
{
if (d_strings.is_null(idx)) {
if (!d_chars) { d_offsets[idx] = 0; }
if (!d_chars) { d_sizes[idx] = 0; }
return;
}
auto const d_str = d_strings.element<string_view>(idx);
Expand Down Expand Up @@ -443,9 +444,11 @@ struct replace_multi_fn {
++spos;
}
if (out_ptr) // copy remainder
{
memcpy(out_ptr, in_ptr + lpos, d_str.size_bytes() - lpos);
else
d_offsets[idx] = bytes;
} else {
d_sizes[idx] = bytes;
}
}
};

Expand All @@ -459,7 +462,7 @@ std::unique_ptr<column> replace_string_parallel(strings_column_view const& input
auto d_targets = column_device_view::create(targets.parent(), stream);
auto d_replacements = column_device_view::create(repls.parent(), stream);

auto [offsets_column, chars] = cudf::strings::detail::make_strings_children(
auto [offsets_column, chars] = cudf::strings::detail::experimental::make_strings_children(
replace_multi_fn{*d_strings, *d_targets, *d_replacements}, input.size(), stream, mr);

return make_strings_column(input.size(),
Expand Down
11 changes: 6 additions & 5 deletions cpp/src/strings/replace/replace.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/algorithm.cuh>
#include <cudf/strings/detail/replace.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/detail/strings_column_factories.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/replace.hpp>
Expand Down Expand Up @@ -345,13 +345,14 @@ struct replace_fn {
string_view d_target;
string_view d_replacement;
cudf::size_type maxrepl;
cudf::size_type* d_offsets{};
cudf::size_type* d_sizes{};
char* d_chars{};
cudf::detail::input_offsetalator d_offsets;

__device__ void operator()(size_type idx)
{
if (d_strings.is_null(idx)) {
if (!d_chars) { d_offsets[idx] = 0; }
if (!d_chars) { d_sizes[idx] = 0; }
return;
}
auto const d_str = d_strings.element<string_view>(idx);
Expand Down Expand Up @@ -384,7 +385,7 @@ struct replace_fn {
if (out_ptr) { // copy remainder
memcpy(out_ptr, in_ptr + lpos, d_str.size_bytes() - lpos);
} else {
d_offsets[idx] = bytes;
d_sizes[idx] = bytes;
}
}
};
Expand All @@ -398,7 +399,7 @@ std::unique_ptr<column> replace_string_parallel(strings_column_view const& input
{
auto d_strings = column_device_view::create(input.parent(), stream);

auto [offsets_column, chars] = cudf::strings::detail::make_strings_children(
auto [offsets_column, chars] = cudf::strings::detail::experimental::make_strings_children(
replace_fn{*d_strings, d_target, d_replacement, maxrepl}, input.size(), stream, mr);

return make_strings_column(input.size(),
Expand Down
11 changes: 6 additions & 5 deletions cpp/src/strings/replace/replace_slice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/strings/detail/replace.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/replace.hpp>
#include <cudf/strings/string_view.cuh>
Expand All @@ -45,13 +45,14 @@ struct replace_slice_fn {
string_view const d_repl;
size_type const start;
size_type const stop;
size_type* d_offsets{};
size_type* d_sizes{};
char* d_chars{};
cudf::detail::input_offsetalator d_offsets;

__device__ void operator()(size_type idx)
{
if (d_strings.is_null(idx)) {
if (!d_chars) { d_offsets[idx] = 0; }
if (!d_chars) { d_sizes[idx] = 0; }
return;
}
auto const d_str = d_strings.element<string_view>(idx);
Expand All @@ -69,7 +70,7 @@ struct replace_slice_fn {
in_ptr + end,
d_str.size_bytes() - end);
} else {
d_offsets[idx] = d_str.size_bytes() + d_repl.size_bytes() - (end - begin);
d_sizes[idx] = d_str.size_bytes() + d_repl.size_bytes() - (end - begin);
}
}
};
Expand All @@ -94,7 +95,7 @@ std::unique_ptr<column> replace_slice(strings_column_view const& input,
auto d_strings = column_device_view::create(input.parent(), stream);

// this utility calls the given functor to build the offsets and chars columns
auto [offsets_column, chars] = cudf::strings::detail::make_strings_children(
auto [offsets_column, chars] = cudf::strings::detail::experimental::make_strings_children(
replace_slice_fn{*d_strings, d_repl, start, stop}, input.size(), stream, mr);

return make_strings_column(input.size(),
Expand Down
11 changes: 6 additions & 5 deletions cpp/src/strings/translate.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/strings/translate.hpp>
Expand Down Expand Up @@ -52,13 +52,14 @@ struct translate_fn {
column_device_view const d_strings;
rmm::device_uvector<translate_table>::iterator table_begin;
rmm::device_uvector<translate_table>::iterator table_end;
int32_t* d_offsets{};
size_type* d_sizes{};
char* d_chars{};
cudf::detail::input_offsetalator d_offsets;

__device__ void operator()(size_type idx)
{
if (d_strings.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
if (!d_chars) { d_sizes[idx] = 0; }
return;
}
string_view const d_str = d_strings.element<string_view>(idx);
Expand All @@ -80,7 +81,7 @@ struct translate_fn {
}
if (chr && out_ptr) out_ptr += from_char_utf8(chr, out_ptr);
}
if (!d_chars) d_offsets[idx] = bytes;
if (!d_chars) { d_sizes[idx] = bytes; }
}
};

Expand Down Expand Up @@ -111,7 +112,7 @@ std::unique_ptr<column> translate(strings_column_view const& strings,

auto d_strings = column_device_view::create(strings.parent(), stream);

auto [offsets_column, chars] = make_strings_children(
auto [offsets_column, chars] = cudf::strings::detail::experimental::make_strings_children(
translate_fn{*d_strings, table.begin(), table.end()}, strings.size(), stream, mr);

return make_strings_column(strings.size(),
Expand Down
Loading