@@ -20,6 +20,7 @@ use omicron_test_utils::dev::poll::wait_for_condition;
2020use oximeter:: Datum ;
2121use oximeter:: Measurement ;
2222use oximeter:: TimeseriesSchema ;
23+ use oximeter_collector:: ForcedCollectionError ;
2324use serde:: de:: DeserializeOwned ;
2425use slog:: Logger ;
2526use std:: borrow:: Cow ;
@@ -220,10 +221,28 @@ impl<'a, N> MetricsQuerier<'a, N> {
220221 {
221222 let result = wait_for_condition (
222223 || async {
223- self . ctx
224- . oximeter
225- . try_force_collect ( )
226- . expect ( "sent trigger to force oximeter collection" ) ;
224+ // On faster CI runners we encountered a lot of flakiness caused
225+ // by try_force_collect() returning the QueueFull error.
226+ //
227+ // At the time of writing this, the method just puts a message
228+ // in a bounded channel (with a capacity of 4), with QueueFull
229+ // indicating that the channel reached capacity.
230+ //
231+ // wait_for_condition() will call this every second until the
232+ // condition matches, so if collection is not done within four
233+ // seconds this will error with QueueFull.
234+ //
235+ // In those cases, rather than failing the test we should just
236+ // respect the backpressure and try again the next iteration.
237+ match self . ctx . oximeter . try_force_collect ( ) {
238+ Ok ( ( ) ) => { }
239+ Err ( ForcedCollectionError :: QueueFull ) => {
240+ return Err ( CondCheckError :: < ( ) > :: NotYet ) ;
241+ }
242+ Err ( e) => {
243+ panic ! ( "failed to start oximeter collection: {e:?}" ) ;
244+ }
245+ }
227246
228247 let page = objects_list_page_authz :: < U > (
229248 & self . ctx . external_client ,
@@ -274,10 +293,28 @@ impl<'a, N> MetricsQuerier<'a, N> {
274293 {
275294 let result = wait_for_condition (
276295 || async {
277- self . ctx
278- . oximeter
279- . try_force_collect ( )
280- . expect ( "sent trigger to force oximeter collection" ) ;
296+ // On faster CI runners we encountered a lot of flakiness caused
297+ // by try_force_collect() returning the QueueFull error.
298+ //
299+ // At the time of writing this, the method just puts a message
300+ // in a bounded channel (with a capacity of 4), with QueueFull
301+ // indicating that the channel reached capacity.
302+ //
303+ // wait_for_condition() will call this every second until the
304+ // condition matches, so if collection is not done within four
305+ // seconds this will error with QueueFull.
306+ //
307+ // In those cases, rather than failing the test we should just
308+ // respect the backpressure and try again the next iteration.
309+ match self . ctx . oximeter . try_force_collect ( ) {
310+ Ok ( ( ) ) => { }
311+ Err ( ForcedCollectionError :: QueueFull ) => {
312+ return Err ( CondCheckError :: < ( ) > :: NotYet ) ;
313+ }
314+ Err ( e) => {
315+ panic ! ( "failed to start oximeter collection: {e:?}" ) ;
316+ }
317+ }
281318
282319 let tables = match self
283320 . execute_query_once ( endpoint, query. to_string ( ) )
0 commit comments