Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ url = "2.5.4"
uuid = { version = "1.10.0", features = ["std", "serde", "v4", "v7"] }
volo-thrift = "0.10"
walkdir = "2.3.2"
watcher = { version = "0.4.1" }
watcher = { version = "0.4.2" }
wiremock = "0.6"
wkt = "0.11.1"
xorf = { version = "0.11.0", default-features = false, features = ["binary-fuse"] }
Expand Down Expand Up @@ -661,5 +661,5 @@ sub-cache = { git = "https://github.com/databendlabs/sub-cache", tag = "v0.2.1"
tantivy = { git = "https://github.com/datafuse-extras/tantivy", rev = "7502370" }
tantivy-common = { git = "https://github.com/datafuse-extras/tantivy", rev = "7502370", package = "tantivy-common" }
tantivy-jieba = { git = "https://github.com/datafuse-extras/tantivy-jieba", rev = "0e300e9" }
watcher = { git = "https://github.com/databendlabs/watcher", tag = "v0.4.1" }
watcher = { git = "https://github.com/databendlabs/watcher", tag = "v0.4.2" }
xorfilter-rs = { git = "https://github.com/datafuse-extras/xorfilter", tag = "databend-alpha.4" }
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ use crate::leveled_store::rotbl_codec::RotblCodec;
use crate::leveled_store::util;
use crate::marked::Marked;
use crate::state_machine::ExpireKey;
use crate::utils::add_cooperative_yielding;

/// The data to compact.
///
Expand Down Expand Up @@ -141,6 +142,8 @@ impl<'a> CompactingData<'a> {
// Filter out tombstone
let normal_strm = coalesce.try_filter(|(_k, v)| future::ready(v.is_normal()));

let normal_strm = add_cooperative_yielding(normal_strm, "compact");

Ok((sys_data, normal_strm.boxed()))
}
}
29 changes: 19 additions & 10 deletions src/meta/raft-store/src/state_machine_api_ext.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ use crate::leveled_store::map_api::MarkedOf;
use crate::marked::Marked;
use crate::state_machine::ExpireKey;
use crate::state_machine_api::StateMachineApi;
use crate::utils::add_cooperative_yielding;
use crate::utils::prefix_right_bound;

#[async_trait::async_trait]
Expand Down Expand Up @@ -111,7 +112,9 @@ pub trait StateMachineApiExt: StateMachineApi {

let strm = strm
// Return only keys with the expected prefix
.try_take_while(move |(k, _)| future::ready(Ok(k.starts_with(&p))))
.try_take_while(move |(k, _)| future::ready(Ok(k.starts_with(&p))));

let strm = add_cooperative_yielding(strm, format!("list_kv: {prefix}"))
// Skip tombstone
.try_filter_map(|(k, marked)| future::ready(Ok(marked_to_seqv(k, marked))));

Expand All @@ -121,10 +124,15 @@ pub trait StateMachineApiExt: StateMachineApi {
/// Return a range of kv entries.
async fn range_kv<R>(&self, rng: R) -> Result<IOResultStream<(String, SeqV)>, io::Error>
where R: RangeBounds<String> + Send + Sync + Clone + 'static {
let strm = self.map_ref().str_map().range(rng).await?;
let left = rng.start_bound().cloned();
let right = rng.end_bound().cloned();

// Skip tombstone
let strm = strm.try_filter_map(|(k, marked)| future::ready(Ok(marked_to_seqv(k, marked))));
let leveled_map = self.map_ref();
let strm = leveled_map.str_map().range(rng).await?;

let strm = add_cooperative_yielding(strm, format!("range_kv: {left:?} to {right:?}"))
// Skip tombstone
.try_filter_map(|(k, marked)| future::ready(Ok(marked_to_seqv(k, marked))));

Ok(strm.boxed())
}
Expand Down Expand Up @@ -181,12 +189,13 @@ pub trait StateMachineApiExt: StateMachineApi {

let strm = self.map_ref().expire_map().range(start..end).await?;

let strm = strm
// Return only non-deleted records
.try_filter_map(|(k, marked)| {
let expire_entry = marked.unpack().map(|(v, _v_meta)| (k, v));
future::ready(Ok(expire_entry))
});
let strm =
add_cooperative_yielding(strm, format!("list_expire_index since {start} to {end}"))
// Return only non-deleted records
.try_filter_map(|(k, marked)| {
let expire_entry = marked.unpack().map(|(v, _v_meta)| (k, v));
future::ready(Ok(expire_entry))
});

Ok(strm.boxed())
}
Expand Down
38 changes: 38 additions & 0 deletions src/meta/raft-store/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,44 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::fmt;

use futures::Stream;
use futures_util::StreamExt;
use log::info;

/// Add cooperative yielding to a stream to prevent task starvation.
///
/// This yields control back to the async runtime every 100 items to prevent
/// blocking other concurrent tasks when processing large streams.
pub(crate) fn add_cooperative_yielding<S, T>(
stream: S,
stream_name: impl fmt::Display + Send,
) -> impl Stream<Item = T>
where
S: Stream<Item = T>,
T: Send + 'static,
{
stream.enumerate().then(move |(index, item)| {
// Yield control every 100 items to prevent blocking other tasks
let to_yield = if index % 100 == 0 {
if index % 5000 == 0 {
info!("{stream_name} yield control to allow other tasks to run: index={index}");
}
true
} else {
false
};

async move {
if to_yield {
tokio::task::yield_now().await;
}
item
}
})
}

/// Return the right bound of the prefix, so that `p..right` will cover all strings with prefix `p`.
///
/// If the right bound can not be built, return None.
Expand Down
21 changes: 17 additions & 4 deletions src/meta/service/src/api/grpc/grpc_service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ use databend_common_meta_types::LogEntry;
use databend_common_meta_types::TxnReply;
use databend_common_meta_types::TxnRequest;
use databend_common_metrics::count::Count;
use display_more::DisplayOptionExt;
use fastrace::func_name;
use fastrace::func_path;
use fastrace::prelude::*;
Expand Down Expand Up @@ -449,8 +448,11 @@ impl MetaService for MetaServiceImpl {
let sm = &mn.raft_store.state_machine;
let sm = sm.write().await;

let weak_sender = mn.add_watcher(watch, tx.clone()).await?;
let sender_str = weak_sender.upgrade().map(|s| s.to_string());
info!("enter sm write lock for watch {}", watch);

let sender = mn.new_watch_sender(watch, tx.clone())?;
let sender_str = sender.to_string();
let weak_sender = mn.insert_watch_sender(sender);

// Build a closure to remove the stream tx from Dispatcher when the stream is dropped.
let on_drop = {
Expand All @@ -467,9 +469,15 @@ impl MetaService for MetaServiceImpl {
let snk = new_initialization_sink::<WatchTypes>(tx.clone(), ctx);
let strm = sm.range_kv(key_range).await?;

info!("created initialization stream for {}", sender_str);

let sndr = sender_str.clone();

let fu = async move {
try_forward(strm, snk, ctx).await;

info!("initialization flush complete for watcher {}", sndr);

// Send an empty message with `is_initialization=false` to indicate
// the end of the initialization flush.
tx.send(Ok(WatchResponse::new_initialization_complete()))
Expand All @@ -478,12 +486,17 @@ impl MetaService for MetaServiceImpl {
error!("failed to send flush complete message: {}", e);
})
.ok();

info!(
"finished sending initialization complete flag for watcher {}",
sndr
);
};
let fu = Box::pin(fu);

info!(
"sending initial flush Future to watcher {} via Dispatcher",
sender_str.display()
sender_str
);

mn.dispatcher_handle.send_future(fu);
Expand Down
40 changes: 24 additions & 16 deletions src/meta/service/src/meta_service/meta_node.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1174,26 +1174,34 @@ impl MetaNode {
}
}

pub(crate) async fn add_watcher(
pub(crate) fn insert_watch_sender(
&self,
sender: Arc<WatchStreamSender<WatchTypes>>,
) -> Weak<WatchStreamSender<WatchTypes>> {
let weak = Arc::downgrade(&sender);

self.dispatcher_handle
.request(move |dispatcher: &mut Dispatcher<WatchTypes>| {
dispatcher.insert_watch_stream_sender(sender);
});

weak
}

pub(crate) fn new_watch_sender(
&self,
request: WatchRequest,
tx: mpsc::Sender<Result<WatchResponse, Status>>,
) -> Result<Weak<WatchStreamSender<WatchTypes>>, Status> {
let stream_sender = self
.dispatcher_handle
.request_blocking(move |dispatcher: &mut Dispatcher<WatchTypes>| {
let key_range = match build_key_range(&request.key, &request.key_end) {
Ok(kr) => kr,
Err(e) => return Err(Status::invalid_argument(e.to_string())),
};

let interested = event_filter_from_filter_type(request.filter_type());
Ok(dispatcher.add_watcher(key_range, interested, tx))
})
.await
.map_err(|_e| Status::internal("watch-event-Dispatcher closed"))??;
) -> Result<Arc<WatchStreamSender<WatchTypes>>, Status> {
let key_range = match build_key_range(&request.key, &request.key_end) {
Ok(kr) => kr,
Err(e) => return Err(Status::invalid_argument(e.to_string())),
};

let interested = event_filter_from_filter_type(request.filter_type());

Ok(stream_sender)
let sender = Dispatcher::new_watch_stream_sender(key_range.clone(), interested, tx);
Ok(sender)
}

/// Get a kvapi::KVApi implementation.
Expand Down
Loading