Skip to content

Commit 9bf2a57

Browse files
committed
fix(cpp_trees): address PR #1 review
- ResetRobotStateAction: latch fault_reported_ to coalesce up to 6 kRobotResetFailed callback paths into a single FAILED event. - MoveManipulatorAction: emit reportFaultPassed for kPlannerCollision Detected and kPlannerEstopTriggered on the early path of onStart when the corresponding blackboard flag reads false, so the fault clears after recovery. Guard the per-attempt PASSED on success with current_try_>0; emit kPlannerRetryAttempt PASSED in onHalted. - WaitForInputAction: track timeout_reported_ so reportFaultPassed fires only when a FAILED preceded it (onRunning success and onHalted), instead of accumulating spurious PASSED events. - PublishJointStateAction, ResetTrajectories: document the empty FaultReporting inheritance as reserved for future failure modes. - installFaultReporter: accept optional service_name override for namespaced test rigs and multi-robot setups.
1 parent e50cbbd commit 9bf2a57

6 files changed

Lines changed: 124 additions & 38 deletions

File tree

manymove_cpp_trees/include/manymove_cpp_trees/action_nodes_gripper.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,11 @@ class GripperTrajAction : public BT::StatefulActionNode, public FaultReporting
136136
// PublishJointStateAction
137137
// =======================================================
138138

139+
// NOTE: this node currently has no fault sites — the call is fire-and-forget
140+
// via the publisher API and cannot fail in a way worth reporting. The
141+
// FaultReporting base is kept so that any future failure mode (e.g. detection
142+
// of a downstream subscriber drop) can emit through the same channel as the
143+
// rest of the BT without changing the inheritance.
139144
class PublishJointStateAction : public BT::SyncActionNode, public FaultReporting
140145
{
141146
public:

manymove_cpp_trees/include/manymove_cpp_trees/action_nodes_planner.hpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,11 @@ class MoveManipulatorAction : public BT::StatefulActionNode, public FaultReporti
113113
*
114114
* It takes a comma-separated list of move_ids and for each, sets 'trajectory_{id}' to empty
115115
* and 'validity_{id}' to false in the blackboard.
116+
*
117+
* NOTE: this node has no fault sites — blackboard writes do not fail in a way
118+
* worth reporting. The FaultReporting base is kept so that any future failure
119+
* mode (e.g. missing move_id list, malformed input) can emit through the same
120+
* channel as the rest of the BT without changing the inheritance.
116121
*/
117122
class ResetTrajectories : public BT::SyncActionNode, public FaultReporting
118123
{

manymove_cpp_trees/include/manymove_cpp_trees/action_nodes_signals.hpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,14 @@ class ResetRobotStateAction : public BT::StatefulActionNode, public FaultReporti
248248
bool reset_goal_sent_;
249249
bool load_goal_sent_;
250250

251+
// One failed reset can fan out across rejection + result-failure callbacks
252+
// for unload-trajectory, reset-robot-state and load-trajectory (up to 6
253+
// call sites). Without coalescing, those would all emit
254+
// kRobotResetFailed at kSeverityError (>= bypass_severity), driving the
255+
// AUTOSAR-DEM debounce counter past confirmation_threshold on a single
256+
// logical incident. Latch on first emission per run; reset in onStart.
257+
bool fault_reported_;
258+
251259
std::string computed_controller_name_;
252260

253261
ResetRobotState::Result action_result_;
@@ -313,6 +321,12 @@ class WaitForInputAction : public BT::StatefulActionNode, public FaultReporting
313321
// last read from the server
314322
bool last_success_;
315323
int last_value_;
324+
325+
// Track whether kSignalWaitInputTimeout was actually emitted this run so
326+
// onHalted only fires reportFaultPassed when a FAILED preceded it.
327+
// Without this, every halt accumulated a spurious PASSED, which biases
328+
// LocalFilter::should_forward_passed counters in the medkit reporter.
329+
bool timeout_reported_;
316330
};
317331

318332
} // namespace manymove_cpp_trees

manymove_cpp_trees/include/manymove_cpp_trees/main_imports_helper.hpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -106,10 +106,15 @@ namespace manymove_cpp_trees
106106
//
107107
// source_id defaults to the node's fully-qualified name, which matches the
108108
// SOVD apps[].ros_binding linkage used by the medkit gateway.
109-
inline void installFaultReporter(BT::Blackboard::Ptr blackboard, rclcpp::Node::SharedPtr node)
109+
//
110+
// service_name lets callers remap the FaultManager service for namespaced
111+
// test rigs or multi-robot setups (e.g. "/robot1/fault_manager/report_fault").
112+
inline void installFaultReporter(
113+
BT::Blackboard::Ptr blackboard, rclcpp::Node::SharedPtr node,
114+
const std::string & service_name = "/fault_manager/report_fault")
110115
{
111116
auto reporter = std::make_shared<ros2_medkit_fault_reporter::FaultReporter>(
112-
node, node->get_fully_qualified_name());
117+
node, node->get_fully_qualified_name(), service_name);
113118
blackboard->set(manymove_cpp_trees::kFaultReporterBlackboardKey, reporter);
114119
}
115120

manymove_cpp_trees/src/action_nodes_planner.cpp

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,10 @@ BT::NodeStatus MoveManipulatorAction::onStart()
112112
"collision detected on " + robot_prefix_ + " before motion start");
113113
return BT::NodeStatus::FAILURE;
114114
}
115+
// Heal any prior CONFIRMED collision once the flag has been cleared.
116+
// Without this, the fault lingers in FaultManager after the operator /
117+
// recovery cleared the underlying condition on hardware.
118+
reportFaultPassed(fault_codes::kPlannerCollisionDetected);
115119

116120
// Read move_id.
117121
if (!getInput<std::string>("move_id", move_id_)) {
@@ -138,6 +142,9 @@ BT::NodeStatus MoveManipulatorAction::onStart()
138142
"stop_execution flag set on " + robot_prefix_ + " before motion start");
139143
return BT::NodeStatus::FAILURE;
140144
}
145+
// Heal any prior CONFIRMED e-stop once the flag has been cleared by the
146+
// operator. Without this, the CRITICAL fault lingers after release.
147+
reportFaultPassed(fault_codes::kPlannerEstopTriggered);
141148

142149
return BT::NodeStatus::RUNNING;
143150
}
@@ -227,8 +234,13 @@ BT::NodeStatus MoveManipulatorAction::onRunning()
227234
setHMIMessage(config().blackboard, robot_prefix_, "", "grey");
228235

229236
RCLCPP_INFO(node_->get_logger(), "[MoveManipulatorAction] success => returning SUCCESS");
230-
// Heal the per-attempt soft fault if any retry was reported earlier.
231-
reportFaultPassed(fault_codes::kPlannerRetryAttempt);
237+
// Heal the per-attempt soft fault only when a retry actually occurred.
238+
// On first-attempt success current_try_ is still 0 — no FAILED was
239+
// ever emitted, and a stray PASSED biases
240+
// LocalFilter::should_forward_passed in the medkit reporter.
241+
if (current_try_ > 0) {
242+
reportFaultPassed(fault_codes::kPlannerRetryAttempt);
243+
}
232244
return BT::NodeStatus::SUCCESS;
233245
} else {
234246
config().blackboard->set("trajectory_" + move_id_, trajectory_msgs::msg::JointTrajectory());
@@ -291,6 +303,14 @@ void MoveManipulatorAction::onHalted()
291303

292304
// HMI message
293305
setHMIMessage(config().blackboard, robot_prefix_, "MOTION HALTED", "red");
306+
307+
// Heal the per-attempt soft fault on halt, mirroring WaitForInputAction::
308+
// onHalted and WaitForObjectAction::onHalted. Without this, a halted
309+
// retry loop leaves a lingering kPlannerRetryAttempt soft fault in
310+
// FaultManager until the next successful attempt.
311+
if (current_try_ > 0) {
312+
reportFaultPassed(fault_codes::kPlannerRetryAttempt);
313+
}
294314
}
295315

296316
void MoveManipulatorAction::goalResponseCallback(

manymove_cpp_trees/src/action_nodes_signals.cpp

Lines changed: 71 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -488,7 +488,11 @@ ResetRobotStateAction::ResetRobotStateAction(
488488
goal_sent_(false),
489489
result_received_(false),
490490
unload_traj_success_(false),
491-
load_traj_success_(false)
491+
load_traj_success_(false),
492+
unload_goal_sent_(false),
493+
reset_goal_sent_(false),
494+
load_goal_sent_(false),
495+
fault_reported_(false)
492496
{
493497
if (!config.blackboard) {
494498
throw BT::RuntimeError("ResetRobotStateAction: no blackboard provided.");
@@ -553,6 +557,7 @@ BT::NodeStatus ResetRobotStateAction::onStart()
553557
unload_goal_sent_ = false;
554558
reset_goal_sent_ = false;
555559
load_goal_sent_ = false;
560+
fault_reported_ = false;
556561

557562
// Step 1: Call UnloadTrajController action
558563
UnloadTrajController::Goal unload_traj_goal;
@@ -657,10 +662,13 @@ void ResetRobotStateAction::goalResponseCallback(
657662
name().c_str());
658663
action_result_.success = false;
659664
result_received_ = true;
660-
reportFault(
661-
fault_codes::kRobotResetFailed, kSeverityError,
662-
std::string("ResetRobotStateAction [") + name() +
663-
"]: reset_robot_state goal rejected by server");
665+
if (!fault_reported_) {
666+
fault_reported_ = true;
667+
reportFault(
668+
fault_codes::kRobotResetFailed, kSeverityError,
669+
std::string("ResetRobotStateAction [") + name() +
670+
"]: reset_robot_state goal rejected by server");
671+
}
664672
} else {
665673
RCLCPP_INFO(
666674
node_->get_logger(), "ResetRobotStateAction [%s]: ResetRobotState Goal ACCEPTED by server.",
@@ -676,10 +684,13 @@ void ResetRobotStateAction::resultCallback(
676684
} else {
677685
action_result_.success = false;
678686
action_result_.message = "ResetRobotState aborted or failed";
679-
reportFault(
680-
fault_codes::kRobotResetFailed, kSeverityError,
681-
std::string("ResetRobotStateAction [") + name() +
682-
"]: reset_robot_state action aborted or failed");
687+
if (!fault_reported_) {
688+
fault_reported_ = true;
689+
reportFault(
690+
fault_codes::kRobotResetFailed, kSeverityError,
691+
std::string("ResetRobotStateAction [") + name() +
692+
"]: reset_robot_state action aborted or failed");
693+
}
683694
}
684695
result_received_ = true;
685696
}
@@ -692,10 +703,13 @@ void ResetRobotStateAction::goalResponseCallbackUnloadTraj(
692703
node_->get_logger(),
693704
"ResetRobotStateAction [%s]: UnloadTrajController Goal REJECTED by server.", name().c_str());
694705
unload_traj_success_ = false;
695-
reportFault(
696-
fault_codes::kRobotResetFailed, kSeverityError,
697-
std::string("ResetRobotStateAction [") + name() +
698-
"]: unload_trajectory_controller goal rejected by server");
706+
if (!fault_reported_) {
707+
fault_reported_ = true;
708+
reportFault(
709+
fault_codes::kRobotResetFailed, kSeverityError,
710+
std::string("ResetRobotStateAction [") + name() +
711+
"]: unload_trajectory_controller goal rejected by server");
712+
}
699713
} else {
700714
RCLCPP_INFO(
701715
node_->get_logger(),
@@ -708,18 +722,22 @@ void ResetRobotStateAction::resultCallbackUnloadTraj(
708722
{
709723
if (wrapped_result.code == rclcpp_action::ResultCode::SUCCEEDED) {
710724
unload_traj_success_ = wrapped_result.result->success;
711-
if (!unload_traj_success_) {
725+
if (!unload_traj_success_ && !fault_reported_) {
726+
fault_reported_ = true;
712727
reportFault(
713728
fault_codes::kRobotResetFailed, kSeverityError,
714729
std::string("ResetRobotStateAction [") + name() +
715730
"]: unload_trajectory_controller reported success=false");
716731
}
717732
} else {
718733
unload_traj_success_ = false;
719-
reportFault(
720-
fault_codes::kRobotResetFailed, kSeverityError,
721-
std::string("ResetRobotStateAction [") + name() +
722-
"]: unload_trajectory_controller action aborted or failed");
734+
if (!fault_reported_) {
735+
fault_reported_ = true;
736+
reportFault(
737+
fault_codes::kRobotResetFailed, kSeverityError,
738+
std::string("ResetRobotStateAction [") + name() +
739+
"]: unload_trajectory_controller action aborted or failed");
740+
}
723741
}
724742
}
725743

@@ -731,10 +749,13 @@ void ResetRobotStateAction::goalResponseCallbackLoadTraj(
731749
node_->get_logger(),
732750
"ResetRobotStateAction [%s]: LoadTrajController Goal REJECTED by server.", name().c_str());
733751
load_traj_success_ = false;
734-
reportFault(
735-
fault_codes::kRobotResetFailed, kSeverityError,
736-
std::string("ResetRobotStateAction [") + name() +
737-
"]: load_trajectory_controller goal rejected by server");
752+
if (!fault_reported_) {
753+
fault_reported_ = true;
754+
reportFault(
755+
fault_codes::kRobotResetFailed, kSeverityError,
756+
std::string("ResetRobotStateAction [") + name() +
757+
"]: load_trajectory_controller goal rejected by server");
758+
}
738759
} else {
739760
RCLCPP_INFO(
740761
node_->get_logger(),
@@ -747,18 +768,22 @@ void ResetRobotStateAction::resultCallbackLoadTraj(
747768
{
748769
if (wrapped_result.code == rclcpp_action::ResultCode::SUCCEEDED) {
749770
load_traj_success_ = wrapped_result.result->success;
750-
if (!load_traj_success_) {
771+
if (!load_traj_success_ && !fault_reported_) {
772+
fault_reported_ = true;
751773
reportFault(
752774
fault_codes::kRobotResetFailed, kSeverityError,
753775
std::string("ResetRobotStateAction [") + name() +
754776
"]: load_trajectory_controller reported success=false");
755777
}
756778
} else {
757779
load_traj_success_ = false;
758-
reportFault(
759-
fault_codes::kRobotResetFailed, kSeverityError,
760-
std::string("ResetRobotStateAction [") + name() +
761-
"]: load_trajectory_controller action aborted or failed");
780+
if (!fault_reported_) {
781+
fault_reported_ = true;
782+
reportFault(
783+
fault_codes::kRobotResetFailed, kSeverityError,
784+
std::string("ResetRobotStateAction [") + name() +
785+
"]: load_trajectory_controller action aborted or failed");
786+
}
762787
}
763788
}
764789

@@ -768,7 +793,8 @@ WaitForInputAction::WaitForInputAction(
768793
goal_sent_(false),
769794
result_received_(false),
770795
last_success_(false),
771-
last_value_(0)
796+
last_value_(0),
797+
timeout_reported_(false)
772798
{
773799
// Obtain the ROS2 node from the blackboard
774800
if (!config.blackboard) {
@@ -786,6 +812,7 @@ BT::NodeStatus WaitForInputAction::onStart()
786812
result_received_ = false;
787813
last_success_ = false;
788814
last_value_ = 0;
815+
timeout_reported_ = false;
789816

790817
// Read ports
791818
if (!getInput<std::string>("io_type", io_type_)) {
@@ -863,8 +890,13 @@ BT::NodeStatus WaitForInputAction::onRunning()
863890
std::to_string(desired_value_),
864891
"green");
865892

866-
// Pair: clear timeout filter once the desired value is observed.
867-
reportFaultPassed(fault_codes::kSignalWaitInputTimeout);
893+
// Pair: only clear if we actually emitted the timeout earlier. A
894+
// first-pass success has no FAILED to heal and a stray PASSED biases
895+
// LocalFilter::should_forward_passed.
896+
if (timeout_reported_) {
897+
reportFaultPassed(fault_codes::kSignalWaitInputTimeout);
898+
timeout_reported_ = false;
899+
}
868900
return BT::NodeStatus::SUCCESS;
869901
}
870902

@@ -884,6 +916,7 @@ BT::NodeStatus WaitForInputAction::onRunning()
884916
fault_codes::kSignalWaitInputTimeout, kSeverityWarn,
885917
"WaitForInput " + io_type_ + "[" + std::to_string(ionum_) + "] timed out after " +
886918
std::to_string(timeout_) + "s, last_value=" + std::to_string(last_value_));
919+
timeout_reported_ = true;
887920
return BT::NodeStatus::FAILURE;
888921
}
889922
}
@@ -914,10 +947,14 @@ void WaitForInputAction::onHalted()
914947
action_client_->async_cancel_all_goals();
915948
}
916949
// Heal a previously raised wait-timeout: when the subtree is halted
917-
// mid-wait, the timeout condition no longer holds. Without this,
918-
// any earlier `kSignalWaitInputTimeout` would linger in FaultManager
919-
// until the next successful wait.
920-
reportFaultPassed(fault_codes::kSignalWaitInputTimeout);
950+
// mid-wait, the timeout condition no longer holds. Only emit if we
951+
// actually reported the FAILED earlier — otherwise the PASSED would
952+
// accumulate in LocalFilter::should_forward_passed and bias subsequent
953+
// healing decisions.
954+
if (timeout_reported_) {
955+
reportFaultPassed(fault_codes::kSignalWaitInputTimeout);
956+
timeout_reported_ = false;
957+
}
921958
goal_sent_ = false;
922959
result_received_ = false;
923960
}

0 commit comments

Comments
 (0)