@@ -1250,8 +1250,7 @@ BlockInputStreams DeltaMergeStore::read(
12501250 const RuntimeFilteList & runtime_filter_list,
12511251 int rf_max_wait_time_ms,
12521252 const String & tracing_id,
1253- bool keep_order,
1254- bool is_fast_scan,
1253+ const DMReadOptions & read_opts,
12551254 size_t expected_block_size,
12561255 const SegmentIdSet & read_segments,
12571256 size_t extra_table_id_index,
@@ -1261,7 +1260,7 @@ BlockInputStreams DeltaMergeStore::read(
12611260 auto dm_context = newDMContext (db_context, db_settings, tracing_id, scan_context);
12621261
12631262 // If keep order is required, disable read thread.
1264- auto enable_read_thread = db_context.getSettingsRef ().dt_enable_read_thread && !keep_order;
1263+ auto enable_read_thread = db_context.getSettingsRef ().dt_enable_read_thread && !read_opts. keep_order ;
12651264 // SegmentReadTaskScheduler and SegmentReadTaskPool use table_id + segment id as unique ID when read thread is enabled.
12661265 // 'try_split_task' can result in several read tasks with the same id that can cause some trouble.
12671266 // Also, too many read tasks of a segment with different small ranges is not good for data sharing cache.
@@ -1281,7 +1280,7 @@ BlockInputStreams DeltaMergeStore::read(
12811280
12821281 GET_METRIC (tiflash_storage_read_tasks_count).Increment (tasks.size ());
12831282 size_t final_num_stream = std::max (1 , std::min (num_streams, tasks.size ()));
1284- auto read_mode = getReadMode (db_context, is_fast_scan, keep_order, filter);
1283+ auto read_mode = getReadMode (db_context, read_opts. is_fast_scan , read_opts. keep_order , filter);
12851284 const auto & final_columns_to_read = filter && filter->extra_cast ? *filter->columns_after_cast : columns_to_read;
12861285 auto read_task_pool = std::make_shared<SegmentReadTaskPool>(
12871286 extra_table_id_index,
@@ -1334,10 +1333,10 @@ BlockInputStreams DeltaMergeStore::read(
13341333 " Read create stream done, keep_order={} dt_enable_read_thread={} enable_read_thread={} "
13351334 " is_fast_scan={} is_push_down_filter_empty={} pool_id={} num_streams={} columns_to_read={} "
13361335 " final_columns_to_read={}" ,
1337- keep_order,
1336+ read_opts. keep_order ,
13381337 db_context.getSettingsRef ().dt_enable_read_thread ,
13391338 enable_read_thread,
1340- is_fast_scan,
1339+ read_opts. is_fast_scan ,
13411340 filter == nullptr || filter->before_where == nullptr ,
13421341 read_task_pool->pool_id ,
13431342 final_num_stream,
@@ -1360,8 +1359,7 @@ void DeltaMergeStore::read(
13601359 const RuntimeFilteList & runtime_filter_list,
13611360 int rf_max_wait_time_ms,
13621361 const String & tracing_id,
1363- bool keep_order,
1364- bool is_fast_scan,
1362+ const DMReadOptions & read_opts,
13651363 size_t expected_block_size,
13661364 const SegmentIdSet & read_segments,
13671365 size_t extra_table_id_index,
@@ -1371,7 +1369,7 @@ void DeltaMergeStore::read(
13711369 auto dm_context = newDMContext (db_context, db_settings, tracing_id, scan_context);
13721370
13731371 // If keep order is required, disable read thread.
1374- auto enable_read_thread = db_context.getSettingsRef ().dt_enable_read_thread && !keep_order;
1372+ auto enable_read_thread = db_context.getSettingsRef ().dt_enable_read_thread && !read_opts. keep_order ;
13751373 // SegmentReadTaskScheduler and SegmentReadTaskPool use table_id + segment id as unique ID when read thread is enabled.
13761374 // 'try_split_task' can result in several read tasks with the same id that can cause some trouble.
13771375 // Also, too many read tasks of a segment with different small ranges is not good for data sharing cache.
@@ -1390,9 +1388,24 @@ void DeltaMergeStore::read(
13901388 };
13911389
13921390 GET_METRIC (tiflash_storage_read_tasks_count).Increment (tasks.size ());
1393- size_t final_num_stream
1394- = enable_read_thread ? std::max (1 , num_streams) : std::max (1 , std::min (num_streams, tasks.size ()));
1395- auto read_mode = getReadMode (db_context, is_fast_scan, keep_order, filter);
1391+ size_t final_num_stream = 0 ;
1392+ if (enable_read_thread)
1393+ {
1394+ // For limited tasks size under `enable_read_thread`, too much source ops actually lead to
1395+ // the table scan speed can not match the compute layer speed and lead to more concurrency
1396+ // overhead. So we limit the final_num_stream to tasks.size() * 4 when read thread is enabled
1397+ // under multiple partitions.
1398+ if (read_opts.has_multiple_partitions )
1399+ final_num_stream = std::min (num_streams, tasks.size () * 4 );
1400+ else
1401+ final_num_stream = num_streams;
1402+ final_num_stream = std::max (1 , final_num_stream);
1403+ }
1404+ else
1405+ {
1406+ final_num_stream = std::max (1 , std::min (num_streams, tasks.size ()));
1407+ }
1408+ auto read_mode = getReadMode (db_context, read_opts.is_fast_scan , read_opts.keep_order , filter);
13961409 const auto & final_columns_to_read = filter && filter->extra_cast ? *filter->columns_after_cast : columns_to_read;
13971410 auto read_task_pool = std::make_shared<SegmentReadTaskPool>(
13981411 extra_table_id_index,
@@ -1454,10 +1467,10 @@ void DeltaMergeStore::read(
14541467 " Read create PipelineExec done, keep_order={} dt_enable_read_thread={} enable_read_thread={} "
14551468 " is_fast_scan={} is_push_down_filter_empty={} pool_id={} num_streams={} columns_to_read={} "
14561469 " final_columns_to_read={}" ,
1457- keep_order,
1470+ read_opts. keep_order ,
14581471 db_context.getSettingsRef ().dt_enable_read_thread ,
14591472 enable_read_thread,
1460- is_fast_scan,
1473+ read_opts. is_fast_scan ,
14611474 filter == nullptr || filter->before_where == nullptr ,
14621475 read_task_pool->pool_id ,
14631476 final_num_stream,
0 commit comments