Skip to content

Commit

Permalink
fix nullable decimal column will core when comparison with other valu…
Browse files Browse the repository at this point in the history
…es (apache#46)

* fix nullable decimal column will core when comparison with other values
  • Loading branch information
yangzhg authored and HappenLee committed Aug 10, 2021
1 parent c4f5c15 commit d2a0b07
Show file tree
Hide file tree
Showing 7 changed files with 51 additions and 19 deletions.
3 changes: 2 additions & 1 deletion be/src/vec/columns/column.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,8 @@ class IColumn : public COW<IColumn> {
* limit - if isn't 0, then only first limit elements of the result column could be sorted.
* nan_direction_hint - see above.
*/
virtual void getPermutation(bool reverse, size_t limit, int nan_direction_hint, Permutation & res) const = 0;
virtual void getPermutation(bool reverse, size_t limit, int nan_direction_hint,
Permutation& res) const = 0;

/** Copies each element according offsets parameter.
* (i-th element should be copied offsets[i] - offsets[i - 1] times.)
Expand Down
6 changes: 3 additions & 3 deletions be/src/vec/columns/column_nullable.h
Original file line number Diff line number Diff line change
Expand Up @@ -162,8 +162,8 @@ class ColumnNullable final : public COWHelper<IColumn, ColumnNullable> {
size_t size = getNullMapData().size();
const UInt8* null_pos = getNullMapData().data();
const UInt8* null_pos_end = getNullMapData().data() + size;
#ifdef __SSE2__
/** A slightly more optimized version.
#ifdef __SSE2__
/** A slightly more optimized version.
* Based on the assumption that often pieces of consecutive values
* completely pass or do not pass the filter.
* Therefore, we will optimistically check the parts of `SIMD_BYTES` values.
Expand All @@ -174,7 +174,7 @@ class ColumnNullable final : public COWHelper<IColumn, ColumnNullable> {

while (null_pos < null_end_sse) {
int mask = _mm_movemask_epi8(_mm_cmpgt_epi8(
_mm_loadu_si128(reinterpret_cast<const __m128i*>(null_pos)), zero16));
_mm_loadu_si128(reinterpret_cast<const __m128i*>(null_pos)), zero16));

if (0 != mask) {
return true;
Expand Down
45 changes: 38 additions & 7 deletions be/src/vec/core/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@

#include "vec/core/block.h"

#include <fmt/format.h>

#include <iomanip>
#include <iterator>
#include <memory>

#include "fmt/format.h"
#include "common/status.h"
#include "gen_cpp/data.pb.h"
#include "vec/columns/column_const.h"
#include "vec/columns/column_nullable.h"
Expand Down Expand Up @@ -427,7 +429,10 @@ std::string Block::dumpData(size_t row_limit) const {
// content
for (size_t row_num = 0; row_num < rows() && row_num < row_limit; ++row_num) {
for (size_t i = 0; i < columns(); ++i) {
std::string s = data[i].to_string(row_num);
std::string s = "";
if (data[i].column) {
s = data[i].to_string(row_num);
}
if (s.length() > headers_size[i]) {
s = s.substr(0, headers_size[i] - 3) + "...";
}
Expand Down Expand Up @@ -722,11 +727,7 @@ void Block::updateHash(SipHash& hash) const {
for (const auto& col : data) col.column->updateHashWithValue(row_no, hash);
}

void Block::filter_block(Block* block, int filter_column_id, int column_to_keep) {
ColumnPtr filter_column = block->getByPosition(filter_column_id).column;
const IColumn::Filter& filter =
assert_cast<const doris::vectorized::ColumnVector<UInt8>&>(*filter_column).getData();

void filter_block_internal(Block* block, const IColumn::Filter& filter, int column_to_keep) {
auto count = countBytesInFilter(filter);
if (count == 0) {
block->getByPosition(0).column = block->getByPosition(0).column->cloneEmpty();
Expand All @@ -741,6 +742,36 @@ void Block::filter_block(Block* block, int filter_column_id, int column_to_keep)
}
}
}

Status Block::filter_block(Block* block, int filter_column_id, int column_to_keep) {
ColumnPtr filter_column = block->getByPosition(filter_column_id).column;
if (auto* nullable_column = checkAndGetColumn<ColumnNullable>(*filter_column)) {
ColumnPtr nested_column = nullable_column->getNestedColumnPtr();

MutableColumnPtr mutable_holder = (*std::move(nested_column)).mutate();

ColumnUInt8* concrete_column = typeid_cast<ColumnUInt8*>(mutable_holder.get());
if (!concrete_column) {
return Status::InvalidArgument(
"Illegal type " + filter_column->getName() +
" of column for filter. Must be UInt8 or Nullable(UInt8).");
}
const NullMap& null_map = nullable_column->getNullMapData();
IColumn::Filter& filter = concrete_column->getData();

size_t size = filter.size();
for (size_t i = 0; i < size; ++i) {
filter[i] = filter[i] && !null_map[i];
}
filter_block_internal(block, filter, column_to_keep);
} else {
const IColumn::Filter& filter =
assert_cast<const doris::vectorized::ColumnVector<UInt8>&>(*filter_column)
.getData();
filter_block_internal(block, filter, column_to_keep);
}
return Status::OK();
}
void Block::serialize(PBlock* pblock) const {
for (auto c = cbegin(); c != cend(); ++c) {
PColumn* pc = pblock->add_columns();
Expand Down
9 changes: 6 additions & 3 deletions be/src/vec/core/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@
#include "vec/core/columns_with_type_and_name.h"
#include "vec/core/names_and_types.h"

namespace doris::vectorized {
namespace doris {
class Status;
namespace vectorized {

/** Container for set of columns for bunch of rows in memory.
* This is unit of data processing.
Expand Down Expand Up @@ -158,7 +160,7 @@ class Block {
/** Get block data in string. */
std::string dumpData(size_t row_limit = 100) const;

static void filter_block(Block* block, int filter_conlumn_id, int column_to_keep);
static Status filter_block(Block* block, int filter_conlumn_id, int column_to_keep);
// serialize block to PRowBatch
void serialize(PBlock* pblock) const;

Expand Down Expand Up @@ -263,4 +265,5 @@ class MutableBlock {
// add_rows(Block* block,PODArray<Int32>& group, int group_num);
};

} // namespace doris::vectorized
} // namespace vectorized
} // namespace doris
2 changes: 1 addition & 1 deletion be/src/vec/core/column_with_type_and_name.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ String ColumnWithTypeAndName::dumpStructure() const {
return out.str();
}
std::string ColumnWithTypeAndName::to_string(size_t row_num) const {
return type->to_string(*column.get(), row_num);
return type->to_string(*column->convertToFullColumnIfConst().get(), row_num);
}

} // namespace doris::vectorized
2 changes: 0 additions & 2 deletions be/src/vec/functions/functions_comparison.h
Original file line number Diff line number Diff line change
Expand Up @@ -528,8 +528,6 @@ class FunctionComparison : public IFunction {
// check_decimal_overflow(decimalCheckComparisonOverflow(context))
FunctionComparison() {}

bool useDefaultImplementationForNulls() const override { return false; }

private:
// const Context & context;
// bool check_decimal_overflow = true;
Expand Down
3 changes: 1 addition & 2 deletions be/src/vec/sink/mysql_result_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,7 @@ Status MysqlResultWriter::_add_one_column(const ColumnPtr& column_ptr) {
}

if constexpr (type == TYPE_TINYINT) {
buf_ret = _vec_buffers[i]->push_tinyint(
assert_cast<const ColumnVector<Int8>&>(*column).getData()[i]);
buf_ret = _vec_buffers[i]->push_tinyint(static_cast<int8_t>(column->getBool(i)));
}
if constexpr (type == TYPE_SMALLINT) {
buf_ret = _vec_buffers[i]->push_smallint(
Expand Down

0 comments on commit d2a0b07

Please sign in to comment.