Skip to content

Commit 8d69295

Browse files
gengliqiti-chi-bot
authored andcommitted
This is an automated cherry-pick of pingcap#10530
Signed-off-by: ti-chi-bot <ti-community-prow-bot@tidb.io>
1 parent b50ff41 commit 8d69295

9 files changed

Lines changed: 354 additions & 8 deletions

File tree

dbms/src/Flash/Coprocessor/GenSchemaAndColumn.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15+
#include <DataStreams/GeneratedColumnPlaceholderBlockInputStream.h>
1516
#include <Flash/Coprocessor/GenSchemaAndColumn.h>
1617
#include <Storages/DeltaMerge/DeltaMergeDefines.h>
1718
#include <Storages/MutableSupport.h>
@@ -91,14 +92,23 @@ NamesAndTypes genNamesAndTypes(const TiDBTableScan & table_scan, const StringRef
9192
return genNamesAndTypes(table_scan.getColumns(), column_prefix);
9293
}
9394

94-
std::tuple<DM::ColumnDefinesPtr, int> genColumnDefinesForDisaggregatedRead(const TiDBTableScan & table_scan)
95+
std::tuple<DM::ColumnDefinesPtr, int, std::vector<std::tuple<UInt64, String, DataTypePtr>>> genColumnDefinesForDisaggregatedRead(
96+
const TiDBTableScan & table_scan)
9597
{
9698
auto column_defines = std::make_shared<DM::ColumnDefines>();
9799
int extra_table_id_index = InvalidColumnID;
98100
column_defines->reserve(table_scan.getColumnSize());
101+
std::vector<std::tuple<UInt64, String, DataTypePtr>> generated_column_infos;
99102
for (Int32 i = 0; i < table_scan.getColumnSize(); ++i)
100103
{
101104
const auto & column_info = table_scan.getColumns()[i];
105+
if (column_info.hasGeneratedColumnFlag())
106+
{
107+
const auto & data_type = getDataTypeByColumnInfoForComputingLayer(column_info);
108+
const auto & col_name = GeneratedColumnPlaceholderBlockInputStream::getColumnName(i);
109+
generated_column_infos.push_back(std::make_tuple(i, col_name, data_type));
110+
continue;
111+
}
102112
// Now the upper level seems treat disagg read as an ExchangeReceiver output, so
103113
// use this as output column prefix.
104114
// Even if the id is pk_column or extra_table_id, we still output it as
@@ -114,10 +124,13 @@ std::tuple<DM::ColumnDefinesPtr, int> genColumnDefinesForDisaggregatedRead(const
114124
break;
115125
case ExtraTableIDColumnID:
116126
{
127+
<<<<<<< HEAD
117128
column_defines->emplace_back(DM::ColumnDefine{
118129
ExtraTableIDColumnID,
119130
output_name, // MutableSupport::extra_table_id_column_name
120131
MutableSupport::extra_table_id_column_type});
132+
=======
133+
>>>>>>> a5e14033f8 (Fix three schema mismatch bugs under disaggregated arch (#10530))
121134
extra_table_id_index = i;
122135
break;
123136
}
@@ -130,7 +143,7 @@ std::tuple<DM::ColumnDefinesPtr, int> genColumnDefinesForDisaggregatedRead(const
130143
break;
131144
}
132145
}
133-
return {std::move(column_defines), extra_table_id_index};
146+
return {std::move(column_defines), extra_table_id_index, std::move(generated_column_infos)};
134147
}
135148

136149
ColumnsWithTypeAndName getColumnWithTypeAndName(const NamesAndTypes & names_and_types)

dbms/src/Flash/Coprocessor/GenSchemaAndColumn.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,8 @@ NamesAndTypes genNamesAndTypes(const TiDB::ColumnInfos & column_infos, const Str
3333
ColumnsWithTypeAndName getColumnWithTypeAndName(const NamesAndTypes & names_and_types);
3434
NamesAndTypes toNamesAndTypes(const DAGSchema & dag_schema);
3535

36-
// The column defines and `extra table id index`
37-
std::tuple<DM::ColumnDefinesPtr, int> genColumnDefinesForDisaggregatedRead(const TiDBTableScan & table_scan);
36+
// The column defines, `extra table id index` and `generated columns info` for disaggregated read.
37+
std::tuple<DM::ColumnDefinesPtr, int, std::vector<std::tuple<UInt64, String, DataTypePtr>>> genColumnDefinesForDisaggregatedRead(
38+
const TiDBTableScan & table_scan);
3839

3940
} // namespace DB

dbms/src/Storages/StorageDisaggregated.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,8 +105,15 @@ class StorageDisaggregated : public IStorage
105105
std::shared_ptr<disaggregated::EstablishDisaggTaskRequest> buildEstablishDisaggTaskReq(
106106
const Context & db_context,
107107
const pingcap::coprocessor::BatchCopTask & batch_cop_task);
108+
<<<<<<< HEAD
108109
DM::RSOperatorPtr buildRSOperator(const Context & db_context, const DM::ColumnDefinesPtr & columns_to_read);
109110
std::variant<DM::Remote::RNWorkersPtr, DM::SegmentReadTaskPoolPtr> packSegmentReadTasks(
111+
=======
112+
std::tuple<DM::RSOperatorPtr, DM::ColumnRangePtr> buildRSOperatorAndColumnRange(
113+
const Context & db_context,
114+
const DM::ColumnDefinesPtr & columns_to_read);
115+
std::tuple<std::variant<DM::Remote::RNWorkersPtr, DM::SegmentReadTaskPoolPtr>, DM::ColumnDefinesPtr> packSegmentReadTasks(
116+
>>>>>>> a5e14033f8 (Fix three schema mismatch bugs under disaggregated arch (#10530))
110117
const Context & db_context,
111118
DM::SegmentReadTasks && read_tasks,
112119
const DM::ColumnDefinesPtr & column_defines,
@@ -154,5 +161,12 @@ class StorageDisaggregated : public IStorage
154161
const FilterConditions & filter_conditions;
155162

156163
std::unique_ptr<DAGExpressionAnalyzer> analyzer;
164+
<<<<<<< HEAD
165+
=======
166+
static constexpr auto ZONE_LABEL_KEY = "zone";
167+
std::optional<String> zone_label;
168+
// For generated column, just need a placeholder, and TiDB will fill this column.
169+
std::vector<std::tuple<UInt64, String, DataTypePtr>> generated_column_infos;
170+
>>>>>>> a5e14033f8 (Fix three schema mismatch bugs under disaggregated arch (#10530))
157171
};
158172
} // namespace DB

dbms/src/Storages/StorageDisaggregatedRemote.cpp

Lines changed: 109 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,18 @@ BlockInputStreams StorageDisaggregated::readThroughS3(const Context & db_context
103103
{
104104
// Build InputStream according to the remote segment read tasks
105105
DAGPipeline pipeline;
106+
<<<<<<< HEAD
106107
buildRemoteSegmentInputStreams(db_context, buildReadTaskWithBackoff(db_context), num_streams, pipeline);
108+
=======
109+
buildRemoteSegmentInputStreams(
110+
db_context,
111+
buildReadTaskWithBackoff(db_context, scan_context),
112+
num_streams,
113+
pipeline,
114+
scan_context);
115+
// handle generated column if necessary.
116+
executeGeneratedColumnPlaceholder(generated_column_infos, log, pipeline);
117+
>>>>>>> a5e14033f8 (Fix three schema mismatch bugs under disaggregated arch (#10530))
107118

108119
NamesAndTypes source_columns;
109120
source_columns.reserve(table_scan.getColumnSize());
@@ -131,8 +142,16 @@ void StorageDisaggregated::readThroughS3(
131142
exec_context,
132143
group_builder,
133144
db_context,
145+
<<<<<<< HEAD
134146
buildReadTaskWithBackoff(db_context),
135147
num_streams);
148+
=======
149+
buildReadTaskWithBackoff(db_context, scan_context),
150+
num_streams,
151+
scan_context);
152+
// handle generated column if necessary.
153+
executeGeneratedColumnPlaceholder(exec_context, group_builder, generated_column_infos, log);
154+
>>>>>>> a5e14033f8 (Fix three schema mismatch bugs under disaggregated arch (#10530))
136155

137156
NamesAndTypes source_columns;
138157
auto header = group_builder.getCurrentHeader();
@@ -496,12 +515,23 @@ DM::RSOperatorPtr StorageDisaggregated::buildRSOperator(
496515
return DM::RSOperator::build(dag_query, table_scan.getColumns(), *columns_to_read, enable_rs_filter, log);
497516
}
498517

518+
<<<<<<< HEAD
499519
std::variant<DM::Remote::RNWorkersPtr, DM::SegmentReadTaskPoolPtr> StorageDisaggregated::packSegmentReadTasks(
500520
const Context & db_context,
501521
DM::SegmentReadTasks && read_tasks,
502522
const DM::ColumnDefinesPtr & column_defines,
503523
size_t num_streams,
504524
int extra_table_id_index)
525+
=======
526+
std::tuple<std::variant<DM::Remote::RNWorkersPtr, DM::SegmentReadTaskPoolPtr>, DM::ColumnDefinesPtr> StorageDisaggregated::
527+
packSegmentReadTasks(
528+
const Context & db_context,
529+
DM::SegmentReadTasks && read_tasks,
530+
const DM::ColumnDefinesPtr & column_defines,
531+
const DM::ScanContextPtr & scan_context,
532+
size_t num_streams,
533+
int extra_table_id_index)
534+
>>>>>>> a5e14033f8 (Fix three schema mismatch bugs under disaggregated arch (#10530))
505535
{
506536
const auto & executor_id = table_scan.getTableScanExecutorID();
507537

@@ -520,20 +550,29 @@ std::variant<DM::Remote::RNWorkersPtr, DM::SegmentReadTaskPoolPtr> StorageDisagg
520550
push_down_filter);
521551
const UInt64 start_ts = sender_target_mpp_task_id.gather_id.query_id.start_ts;
522552
const auto enable_read_thread = db_context.getSettingsRef().dt_enable_read_thread;
553+
<<<<<<< HEAD
554+
=======
555+
const auto & final_columns_defines = push_down_executor && push_down_executor->extra_cast
556+
? push_down_executor->columns_after_cast
557+
: column_defines;
558+
RUNTIME_CHECK(num_streams > 0, num_streams);
559+
>>>>>>> a5e14033f8 (Fix three schema mismatch bugs under disaggregated arch (#10530))
523560
LOG_INFO(
524561
log,
525562
"packSegmentReadTasks: enable_read_thread={} read_mode={} is_fast_scan={} keep_order={} task_count={} "
526-
"num_streams={} column_defines={}",
563+
"num_streams={} column_defines={} final_columns_defines={}",
527564
enable_read_thread,
528565
magic_enum::enum_name(read_mode),
529566
table_scan.isFastScan(),
530567
table_scan.keepOrder(),
531568
read_tasks.size(),
532569
num_streams,
533-
*column_defines);
570+
*column_defines,
571+
*final_columns_defines);
534572

535573
if (enable_read_thread)
536574
{
575+
<<<<<<< HEAD
537576
return std::make_shared<DM::SegmentReadTaskPool>(
538577
extra_table_id_index,
539578
*column_defines,
@@ -561,6 +600,44 @@ std::variant<DM::Remote::RNWorkersPtr, DM::SegmentReadTaskPoolPtr> StorageDisagg
561600
.read_mode = read_mode,
562601
},
563602
num_streams);
603+
=======
604+
// Under disagg arch, now we use blocking IO to read data from cloud storage. So it require more active
605+
// segments to fully utilize the read threads.
606+
const size_t read_thread_num_active_seg = 10 * num_streams;
607+
return {
608+
std::make_shared<DM::SegmentReadTaskPool>(
609+
extra_table_id_index,
610+
*final_columns_defines,
611+
push_down_executor,
612+
start_ts,
613+
db_context.getSettingsRef().max_block_size,
614+
read_mode,
615+
std::move(read_tasks),
616+
/*after_segment_read*/ [](const DM::DMContextPtr &, const DM::SegmentPtr &) {},
617+
log->identifier(),
618+
/*enable_read_thread*/ true,
619+
num_streams,
620+
read_thread_num_active_seg,
621+
context.getDAGContext()->getKeyspaceID(),
622+
context.getDAGContext()->getResourceGroupName()),
623+
final_columns_defines};
624+
}
625+
else
626+
{
627+
return {
628+
DM::Remote::RNWorkers::create(
629+
db_context,
630+
std::move(read_tasks),
631+
{
632+
.log = log->getChild(executor_id),
633+
.columns_to_read = final_columns_defines,
634+
.start_ts = start_ts,
635+
.push_down_executor = push_down_executor,
636+
.read_mode = read_mode,
637+
},
638+
num_streams),
639+
final_columns_defines};
640+
>>>>>>> a5e14033f8 (Fix three schema mismatch bugs under disaggregated arch (#10530))
564641
}
565642
}
566643

@@ -598,15 +675,29 @@ void StorageDisaggregated::buildRemoteSegmentInputStreams(
598675
DAGPipeline & pipeline)
599676
{
600677
// Build the input streams to read blocks from remote segments
678+
<<<<<<< HEAD
601679
auto [column_defines, extra_table_id_index] = genColumnDefinesForDisaggregatedRead(table_scan);
602680
auto packed_read_tasks
603681
= packSegmentReadTasks(db_context, std::move(read_tasks), column_defines, num_streams, extra_table_id_index);
604682
RUNTIME_CHECK(num_streams > 0, num_streams);
683+
=======
684+
DM::ColumnDefinesPtr column_defines;
685+
int extra_table_id_index;
686+
std::tie(column_defines, extra_table_id_index, generated_column_infos)
687+
= genColumnDefinesForDisaggregatedRead(table_scan);
688+
auto [packed_read_tasks, final_column_defines] = packSegmentReadTasks(
689+
db_context,
690+
std::move(read_tasks),
691+
column_defines,
692+
scan_context,
693+
num_streams,
694+
extra_table_id_index);
695+
>>>>>>> a5e14033f8 (Fix three schema mismatch bugs under disaggregated arch (#10530))
605696
pipeline.streams.reserve(num_streams);
606697

607698
InputStreamBuilder builder{
608699
.tracing_id = log->identifier(),
609-
.columns_to_read = column_defines,
700+
.columns_to_read = final_column_defines,
610701
.extra_table_id_index = extra_table_id_index,
611702
};
612703
for (size_t stream_idx = 0; stream_idx < num_streams; ++stream_idx)
@@ -661,14 +752,28 @@ void StorageDisaggregated::buildRemoteSegmentSourceOps(
661752
size_t num_streams)
662753
{
663754
// Build the input streams to read blocks from remote segments
755+
<<<<<<< HEAD
664756
auto [column_defines, extra_table_id_index] = genColumnDefinesForDisaggregatedRead(table_scan);
665757
auto packed_read_tasks
666758
= packSegmentReadTasks(db_context, std::move(read_tasks), column_defines, num_streams, extra_table_id_index);
759+
=======
760+
DM::ColumnDefinesPtr column_defines;
761+
int extra_table_id_index;
762+
std::tie(column_defines, extra_table_id_index, generated_column_infos)
763+
= genColumnDefinesForDisaggregatedRead(table_scan);
764+
auto [packed_read_tasks, final_column_defines] = packSegmentReadTasks(
765+
db_context,
766+
std::move(read_tasks),
767+
column_defines,
768+
scan_context,
769+
num_streams,
770+
extra_table_id_index);
771+
>>>>>>> a5e14033f8 (Fix three schema mismatch bugs under disaggregated arch (#10530))
667772

668773
RUNTIME_CHECK(num_streams > 0, num_streams);
669774
SrouceOpBuilder builder{
670775
.tracing_id = log->identifier(),
671-
.column_defines = column_defines,
776+
.column_defines = final_column_defines,
672777
.extra_table_id_index = extra_table_id_index,
673778
.exec_context = exec_context,
674779
};

dbms/src/TableFunctions/CMakeLists.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,5 +18,16 @@ add_headers_and_sources(tiflash_table_functions .)
1818
list(REMOVE_ITEM tiflash_table_functions_sources ITableFunction.cpp TableFunctionFactory.cpp)
1919
list(REMOVE_ITEM tiflash_table_functions_headers ITableFunction.h TableFunctionFactory.h)
2020

21+
<<<<<<< HEAD:dbms/src/TableFunctions/CMakeLists.txt
2122
add_library(tiflash_table_functions ${tiflash_table_functions_sources})
2223
target_link_libraries(tiflash_table_functions tiflash_storages_system dbms ${Poco_Foundation_LIBRARY})
24+
=======
25+
tikv-worker-url = "tikv-worker0:19000"
26+
27+
enable-telemetry = false
28+
temp-dir = "/data/tmp"
29+
[performance]
30+
tcp-keep-alive = true
31+
[security]
32+
enable-sem = false
33+
>>>>>>> a5e14033f8 (Fix three schema mismatch bugs under disaggregated arch (#10530)):tests/docker/next-gen-config/tidb.toml

release-linux-llvm/dockerfiles/misc/install_cmake.sh

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,16 @@
1313
# See the License for the specific language governing permissions and
1414
# limitations under the License.
1515

16+
<<<<<<< HEAD:release-linux-llvm/dockerfiles/misc/install_cmake.sh
17+
=======
18+
[storage]
19+
# No space is reserved at all for testing
20+
reserve-space = "0"
21+
# Enable keyspace and ttl for next-gen
22+
api-version = 2
23+
enable-ttl = true
24+
low-space-threshold = 0
25+
>>>>>>> a5e14033f8 (Fix three schema mismatch bugs under disaggregated arch (#10530)):tests/docker/next-gen-config/tikv.toml
1626

1727
# Install cmake for CI/CD.
1828
# Require: wget
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Copyright 2025 PingCAP, Inc.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# object storage for next-gen
16+
[dfs]
17+
prefix = "tikv"
18+
s3-endpoint = "http://minio0:9000"
19+
s3-key-id = "minioadmin"
20+
s3-secret-key = "minioadmin"
21+
s3-bucket = "tiflash-test"
22+
s3-region = "local"
23+
24+
[schema-manager]
25+
dir = "/data/schemas"
26+
enabled = true
27+
keyspace-refresh-interval = "10s"
28+
schema-refresh-threshold = 1

0 commit comments

Comments
 (0)