Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use experimental make_strings_children for strings join/url_encode/slice #15598

Merged
merged 1 commit into from
Apr 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions cpp/src/strings/combine/join.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/strings/combine.hpp>
#include <cudf/strings/detail/combine.hpp>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/detail/strings_column_factories.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/string_view.cuh>
Expand Down Expand Up @@ -84,8 +85,9 @@ struct join_base_fn {
* This functor is suitable for make_strings_children
*/
struct join_fn : public join_base_fn {
size_type* d_offsets{};
size_type* d_sizes{};
char* d_chars{};
cudf::detail::input_offsetalator d_offsets;

join_fn(column_device_view const d_strings,
string_view d_separator,
Expand All @@ -106,7 +108,7 @@ struct join_fn : public join_base_fn {
} else {
bytes += d_str.size_bytes() + d_sep.size_bytes();
}
if (!d_chars) { d_offsets[idx] = bytes; }
if (!d_chars) { d_sizes[idx] = bytes; }
}
};

Expand Down Expand Up @@ -148,7 +150,7 @@ std::unique_ptr<column> join_strings(strings_column_view const& input,
if ((input.size() == input.null_count()) ||
((input.chars_size(stream) / (input.size() - input.null_count())) <=
AVG_CHAR_BYTES_THRESHOLD)) {
return std::get<1>(make_strings_children(
return std::get<1>(experimental::make_strings_children(
join_fn{*d_strings, d_separator, d_narep}, input.size(), stream, mr))
.release();
}
Expand Down
13 changes: 7 additions & 6 deletions cpp/src/strings/combine/join_list_elements.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include <cudf/lists/lists_column_view.hpp>
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/strings/combine.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/strings_column_view.hpp>
#include <cudf/utilities/default_stream.hpp>
Expand Down Expand Up @@ -60,11 +60,12 @@ struct compute_size_and_concatenate_fn {
separator_on_nulls const separate_nulls;
output_if_empty_list const empty_list_policy;

size_type* d_offsets{nullptr};
size_type* d_sizes{nullptr};

// If d_chars == nullptr: only compute sizes and validities of the output strings.
// If d_chars != nullptr: only concatenate strings.
char* d_chars{nullptr};
cudf::detail::input_offsetalator d_offsets;

[[nodiscard]] __device__ bool output_is_null(size_type const idx,
size_type const start_idx,
Expand All @@ -84,7 +85,7 @@ struct compute_size_and_concatenate_fn {
auto const end_idx = list_offsets[idx + 1];

if (!d_chars && output_is_null(idx, start_idx, end_idx)) {
d_offsets[idx] = 0;
d_sizes[idx] = 0;
return;
}

Expand Down Expand Up @@ -120,7 +121,7 @@ struct compute_size_and_concatenate_fn {

// If there are all null elements, the output should be the same as having an empty list input:
// a null or an empty string
if (!d_chars) { d_offsets[idx] = has_valid_element ? size_bytes : 0; }
if (!d_chars) { d_sizes[idx] = has_valid_element ? size_bytes : 0; }
}
};

Expand Down Expand Up @@ -208,7 +209,7 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
separate_nulls,
empty_list_policy};

auto [offsets_column, chars] = make_strings_children(comp_fn, num_rows, stream, mr);
auto [offsets_column, chars] = experimental::make_strings_children(comp_fn, num_rows, stream, mr);
auto [null_mask, null_count] =
cudf::detail::valid_if(thrust::counting_iterator<size_type>(0),
thrust::counting_iterator<size_type>(num_rows),
Expand Down Expand Up @@ -283,7 +284,7 @@ std::unique_ptr<column> join_list_elements(lists_column_view const& lists_string
separate_nulls,
empty_list_policy};

auto [offsets_column, chars] = make_strings_children(comp_fn, num_rows, stream, mr);
auto [offsets_column, chars] = experimental::make_strings_children(comp_fn, num_rows, stream, mr);
auto [null_mask, null_count] =
cudf::detail::valid_if(thrust::counting_iterator<size_type>(0),
thrust::counting_iterator<size_type>(num_rows),
Expand Down
9 changes: 5 additions & 4 deletions cpp/src/strings/convert/convert_lists.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <cudf/column/column_device_view.cuh>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/strings/convert/convert_lists.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/utilities/default_stream.hpp>
Expand Down Expand Up @@ -66,8 +66,9 @@ struct format_lists_fn {
string_view const d_na_rep;
stack_item* d_stack;
size_type const max_depth;
size_type* d_offsets{};
size_type* d_sizes{};
char* d_chars{};
cudf::detail::input_offsetalator d_offsets;

__device__ column_device_view get_nested_child(size_type idx)
{
Expand Down Expand Up @@ -184,7 +185,7 @@ struct format_lists_fn {
}
}

if (!d_chars) d_offsets[idx] = bytes;
if (!d_chars) { d_sizes[idx] = bytes; }
}
};

Expand Down Expand Up @@ -217,7 +218,7 @@ std::unique_ptr<column> format_list_column(lists_column_view const& input,
auto const d_separators = column_device_view::create(separators.parent(), stream);
auto const d_na_rep = na_rep.value(stream);

auto [offsets_column, chars] = cudf::strings::detail::make_strings_children(
auto [offsets_column, chars] = experimental::make_strings_children(
format_lists_fn{*d_input, *d_separators, d_na_rep, stack_buffer.data(), depth},
input.size(),
stream,
Expand Down
13 changes: 7 additions & 6 deletions cpp/src/strings/convert/convert_urls.cu
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
#include <cudf/detail/utilities/cuda.cuh>
#include <cudf/detail/utilities/integer_utils.hpp>
#include <cudf/strings/convert/convert_urls.hpp>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/detail/utilities.cuh>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
Expand Down Expand Up @@ -50,8 +50,9 @@ namespace {
//
struct url_encoder_fn {
column_device_view const d_strings;
size_type* d_offsets{};
size_type* d_sizes{};
char* d_chars{};
cudf::detail::input_offsetalator d_offsets;

// utility to create 2-byte hex characters from single binary byte
__device__ void byte_to_hex(uint8_t byte, char* hex)
Expand Down Expand Up @@ -80,7 +81,7 @@ struct url_encoder_fn {
__device__ void operator()(size_type idx)
{
if (d_strings.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
if (!d_chars) { d_sizes[idx] = 0; }
return;
}

Expand Down Expand Up @@ -117,7 +118,7 @@ struct url_encoder_fn {
}
}
}
if (!d_chars) d_offsets[idx] = nbytes;
if (!d_chars) { d_sizes[idx] = nbytes; }
}
};

Expand All @@ -132,8 +133,8 @@ std::unique_ptr<column> url_encode(strings_column_view const& input,

auto d_column = column_device_view::create(input.parent(), stream);

auto [offsets_column, chars] = cudf::strings::detail::make_strings_children(
url_encoder_fn{*d_column}, input.size(), stream, mr);
auto [offsets_column, chars] =
experimental::make_strings_children(url_encoder_fn{*d_column}, input.size(), stream, mr);

return make_strings_column(input.size(),
std::move(offsets_column),
Expand Down
13 changes: 7 additions & 6 deletions cpp/src/strings/slice.cu
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
#include <cudf/detail/null_mask.hpp>
#include <cudf/detail/nvtx/ranges.hpp>
#include <cudf/scalar/scalar_device_view.cuh>
#include <cudf/strings/detail/strings_children.cuh>
#include <cudf/strings/detail/strings_children_ex.cuh>
#include <cudf/strings/slice.hpp>
#include <cudf/strings/string_view.cuh>
#include <cudf/strings/strings_column_view.hpp>
Expand Down Expand Up @@ -79,19 +79,20 @@ struct substring_fn {
numeric_scalar_device_view<size_type> const d_start;
numeric_scalar_device_view<size_type> const d_stop;
numeric_scalar_device_view<size_type> const d_step;
int32_t* d_offsets{};
size_type* d_sizes{};
char* d_chars{};
cudf::detail::input_offsetalator d_offsets;

__device__ void operator()(size_type idx)
{
if (d_column.is_null(idx)) {
if (!d_chars) d_offsets[idx] = 0;
if (!d_chars) { d_sizes[idx] = 0; }
return;
}
auto const d_str = d_column.template element<string_view>(idx);
auto const length = d_str.length();
if (length == 0) {
if (!d_chars) d_offsets[idx] = 0;
if (!d_chars) { d_sizes[idx] = 0; }
return;
}
size_type const step = d_step.is_valid() ? d_step.value() : 1;
Expand Down Expand Up @@ -131,7 +132,7 @@ struct substring_fn {
}
itr += step;
}
if (!d_chars) d_offsets[idx] = bytes;
if (!d_chars) { d_sizes[idx] = bytes; }
}
};

Expand Down Expand Up @@ -205,7 +206,7 @@ std::unique_ptr<column> slice_strings(strings_column_view const& strings,
auto const d_stop = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(stop));
auto const d_step = get_scalar_device_view(const_cast<numeric_scalar<size_type>&>(step));

auto [offsets, chars] = make_strings_children(
auto [offsets, chars] = experimental::make_strings_children(
substring_fn{*d_column, d_start, d_stop, d_step}, strings.size(), stream, mr);

return make_strings_column(strings.size(),
Expand Down
Loading