Skip to content

Commit 69dd613

Browse files
storage: fix block rows not match when filter column is the first non-empty column in the block (#9483)
ref #9472 storage: fix block rows not match when the filter column is the first non-empty column in the block Signed-off-by: Lloyd-Pottiger <yan1579196623@gmail.com> Co-authored-by: JaySon <tshent@qq.com>
1 parent 92e8dac commit 69dd613

5 files changed

Lines changed: 49 additions & 35 deletions

File tree

dbms/src/Core/Block.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -541,11 +541,9 @@ Block hstackBlocks(Blocks && blocks, const Block & header)
541541
return {};
542542

543543
Block res = header.cloneEmpty();
544-
size_t num_rows = blocks.front().rows();
545544
auto rs_result = DM::RSResult::All;
546545
for (const auto & block : blocks)
547546
{
548-
RUNTIME_CHECK_MSG(block.rows() == num_rows, "Cannot hstack blocks with different number of rows");
549547
for (const auto & elem : block)
550548
{
551549
if (likely(res.has(elem.name)))
@@ -556,6 +554,7 @@ Block hstackBlocks(Blocks && blocks, const Block & header)
556554
rs_result = rs_result && block.getRSResult();
557555
}
558556
res.setRSResult(rs_result);
557+
res.checkNumberOfRows();
559558

560559
return res;
561560
}

dbms/src/Core/Block.h

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -180,11 +180,14 @@ using BucketBlocksListMap = std::map<Int32, BlocksList>;
180180
/// Join blocks by columns
181181
/// The schema of the output block is the same as the header block.
182182
/// The columns not in the header block will be ignored.
183-
/// For example:
184-
/// header: (a UInt32, b UInt32, c UInt32, d UInt32)
185-
/// block1: (a UInt32, b UInt32, c UInt32, e UInt32), rows: 3
186-
/// block2: (d UInt32), rows: 3
187-
/// result: (a UInt32, b UInt32, c UInt32, d UInt32), rows: 3
183+
/// NOTE: The input blocks can have columns with different sizes,
184+
/// but the columns in the header block must have the same size,
185+
/// Otherwise, an exception will be thrown.
186+
/// Example:
187+
/// header: (a UInt32, b UInt32, c UInt32, d UInt32)
188+
/// block1: (a UInt32, b UInt32, c UInt32, e UInt32), rows: 3
189+
/// block2: (d UInt32), rows: 3
190+
/// result: (a UInt32, b UInt32, c UInt32, d UInt32), rows: 3
188191
Block hstackBlocks(Blocks && blocks, const Block & header);
189192

190193
/// Join blocks by rows

dbms/src/DataStreams/FilterTransformAction.cpp

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ FilterTransformAction::FilterTransformAction(
3939
/// Determine position of filter column.
4040
expression->execute(header);
4141

42-
filter_column = header.getPositionByName(filter_column_name);
43-
auto & column_elem = header.safeGetByPosition(filter_column);
42+
filter_column_position = header.getPositionByName(filter_column_name);
43+
auto & column_elem = header.safeGetByPosition(filter_column_position);
4444

4545
/// Isn't the filter already constant?
4646
if (column_elem.column)
@@ -76,9 +76,10 @@ bool FilterTransformAction::transform(Block & block, FilterPtr & res_filter, boo
7676

7777
if (block.getRSResult().allMatch())
7878
{
79-
block.insert(
80-
filter_column,
81-
header.safeGetByPosition(filter_column)); // Make some checks on block structure happy.
79+
auto filter_column = header.safeGetByPosition(filter_column_position).cloneEmpty();
80+
filter_column.column = filter_column.type->createColumnConst(block.rows(), static_cast<UInt64>(1));
81+
// Make some checks on block structure happy.
82+
block.insert(filter_column_position, std::move(filter_column));
8283
if (return_filter)
8384
res_filter = nullptr;
8485
return true;
@@ -95,7 +96,7 @@ bool FilterTransformAction::transform(Block & block, FilterPtr & res_filter, boo
9596

9697
size_t columns = block.columns();
9798
size_t rows = block.rows();
98-
ColumnPtr column_of_filter = block.safeGetByPosition(filter_column).column;
99+
ColumnPtr column_of_filter = block.safeGetByPosition(filter_column_position).column;
99100

100101
/** It happens that at the stage of analysis of expressions (in sample_block) the columns-constants have not been calculated yet,
101102
* and now - are calculated. That is, not all cases are covered by the code above.
@@ -139,8 +140,8 @@ bool FilterTransformAction::transform(Block & block, FilterPtr & res_filter, boo
139140
if (filtered_rows == rows)
140141
{
141142
/// Replace the column with the filter by a constant.
142-
block.safeGetByPosition(filter_column).column
143-
= block.safeGetByPosition(filter_column).type->createColumnConst(filtered_rows, static_cast<UInt64>(1));
143+
auto filter_column = block.safeGetByPosition(filter_column_position);
144+
filter_column.column = filter_column.type->createColumnConst(filtered_rows, static_cast<UInt64>(1));
144145
/// No need to touch the rest of the columns.
145146
return true;
146147
}
@@ -150,7 +151,7 @@ bool FilterTransformAction::transform(Block & block, FilterPtr & res_filter, boo
150151
{
151152
ColumnWithTypeAndName & current_column = block.safeGetByPosition(i);
152153

153-
if (i == filter_column)
154+
if (i == filter_column_position)
154155
{
155156
/// The column with filter itself is replaced with a column with a constant `1`, since after filtering, nothing else will remain.
156157
/// NOTE User could pass column with something different than 0 and 1 for filter.

dbms/src/DataStreams/FilterTransformAction.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ struct FilterTransformAction
4141
private:
4242
Block header;
4343
ExpressionActionsPtr expression;
44-
size_t filter_column;
44+
size_t filter_column_position;
4545

4646
ConstantFilterDescription constant_filter_description;
4747
IColumn::Filter * filter = nullptr;

dbms/src/Storages/DeltaMerge/LateMaterializationBlockInputStream.cpp

Lines changed: 29 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,32 @@
2121
namespace DB::DM
2222
{
2323

24+
namespace
25+
{
26+
27+
void filterFilterColumnBlock(
28+
const Block & header,
29+
Block & block,
30+
const IColumn::Filter & filter,
31+
size_t passed_count,
32+
const String & filter_column_name)
33+
{
34+
ColumnPtr filter_column;
35+
for (auto & col : block)
36+
{
37+
if (col.name == filter_column_name)
38+
{
39+
filter_column = col.column;
40+
continue;
41+
}
42+
col.column = col.column->filter(filter, passed_count);
43+
}
44+
if (header.has(filter_column_name))
45+
filter_column = filter_column->filter(filter, passed_count);
46+
}
47+
48+
} // namespace
49+
2450
LateMaterializationBlockInputStream::LateMaterializationBlockInputStream(
2551
const ColumnDefines & columns_to_read,
2652
const String & filter_column_name_,
@@ -69,12 +95,7 @@ Block LateMaterializationBlockInputStream::read()
6995
{
7096
col.column = col.column->filter(col_filter, passed_count);
7197
}
72-
for (auto & col : filter_column_block)
73-
{
74-
if (col.name == filter_column_name)
75-
continue;
76-
col.column = col.column->filter(col_filter, passed_count);
77-
}
98+
filterFilterColumnBlock(header, filter_column_block, col_filter, passed_count, filter_column_name);
7899
}
79100
return hstackBlocks({std::move(filter_column_block), std::move(rest_column_block)}, header);
80101
}
@@ -112,12 +133,7 @@ Block LateMaterializationBlockInputStream::read()
112133
// so only if the number of rows left after filtering out is large enough,
113134
// we can skip some packs of the next block, call readWithFilter to get the next block.
114135
rest_column_block = rest_column_stream->readWithFilter(*filter);
115-
for (auto & col : filter_column_block)
116-
{
117-
if (col.name == filter_column_name)
118-
continue;
119-
col.column = col.column->filter(*filter, passed_count);
120-
}
136+
filterFilterColumnBlock(header, filter_column_block, *filter, passed_count, filter_column_name);
121137
}
122138
else if (filter_out_count > 0)
123139
{
@@ -128,12 +144,7 @@ Block LateMaterializationBlockInputStream::read()
128144
{
129145
col.column = col.column->filter(*filter, passed_count);
130146
}
131-
for (auto & col : filter_column_block)
132-
{
133-
if (col.name == filter_column_name)
134-
continue;
135-
col.column = col.column->filter(*filter, passed_count);
136-
}
147+
filterFilterColumnBlock(header, filter_column_block, *filter, passed_count, filter_column_name);
137148
}
138149
else
139150
{

0 commit comments

Comments
 (0)