bit-bots · sWintermoor · Jun 29, 2025 · Jul 4, 2025 · Jul 4, 2025 · Jul 4, 2025
diff --git a/.gitignore b/.gitignore
@@ -206,7 +206,7 @@ bitbots_docs_internal
 
 # Neural Network Model Path
 /src/bitbots_vision/models/
-/src/bitbots_motion/bitbots_rl_motion/rl_walk_models/
+/src/bitbots_motion/bitbots_rl_motion/models/
 
 **/.*.
 .idea/*

diff --git a/pixi.lock b/pixi.lock
diff --git a/src/bitbots_behavior/bitbots_blackboard/bitbots_blackboard/capsules/kick_capsule.py b/src/bitbots_behavior/bitbots_blackboard/bitbots_blackboard/capsules/kick_capsule.py
@@ -1,6 +1,7 @@
 from enum import Flag
 from typing import Optional
 
+from geometry_msgs.msg import PoseStamped
 from rclpy.action import ActionClient
 from rclpy.callback_groups import ReentrantCallbackGroup
 from rclpy.duration import Duration
@@ -34,6 +35,7 @@ class WalkKickTargets(Flag):
         RIGHT = True
 
     walk_kick_pub: Publisher
+    rl_kick_pub: Publisher
 
     def __init__(self, node, blackboard):
         super().__init__(node, blackboard)
@@ -42,6 +44,7 @@ def __init__(self, node, blackboard):
         """
         self.walk_kick_pub = self._node.create_publisher(Bool, "/kick", 1)
         # self.connect_dynamic_kick()  Do not connect if dynamic_kick is disabled
+        self.rl_kick_pub = self._node.create_publisher(PoseStamped, "/rl_command/kick_command", 1)
 
     def walk_kick(self, target: WalkKickTargets) -> None:
         """
@@ -81,6 +84,13 @@ def dynamic_kick(self, goal: Kick.Goal) -> None:
         self.last_goal = goal
         self.last_goal_sent = self._node.get_clock().now()
 
+    def rl_kick(self, ball_pose: PoseStamped) -> None:
+        """
+        Kick the ball using the RL kick
+        :param goal_pose: Pose to kick to
+        """
+        self.rl_kick_pub.publish(ball_pose)
+
     def __feedback_cb(self, feedback):
         self.last_feedback: Kick.Feedback = feedback.feedback
         self.last_feedback_received = self._node.get_clock().now()

diff --git a/...ts_behavior/bitbots_body_behavior/bitbots_body_behavior/behavior_dsd/actions/kick_ball.py b/...ts_behavior/bitbots_body_behavior/bitbots_body_behavior/behavior_dsd/actions/kick_ball.py
@@ -2,6 +2,7 @@
 from bitbots_blackboard.capsules.kick_capsule import KickCapsule
 from bitbots_utils.transforms import quat_from_yaw
 from dynamic_stack_decider.abstract_action_element import AbstractActionElement
+from geometry_msgs.msg import PoseStamped
 
 from bitbots_msgs.action import Kick
 
@@ -123,3 +124,33 @@ def perform(self, reevaluate=False):
                 self._goal_sent = True
             else:
                 self.pop()
+
+
+# TODO: Fix integration
+class RLKick(AbstractKickAction):
+    def __init__(self, blackboard, dsd, parameters):
+        super().__init__(blackboard, dsd, parameters)
+
+        self.kick_length = self.blackboard.config["kick_cost_kick_length"]
+        self.angular_range = self.blackboard.config["kick_cost_angular_range"]
+        self.max_kick_angle = self.blackboard.config["max_kick_angle"]
+        self.num_kick_angles = self.blackboard.config["num_kick_angles"]
+        self.penalty_kick_angle = self.blackboard.config["penalty_kick_angle"]
+
+    def perform(self, reevaluate=False):
+        ball_pose = PoseStamped()
+        ball_pose.header.stamp = self.blackboard.node.get_clock().now().to_msg()
+        ball_pose.header.frame_id = self.blackboard.world_model.base_footprint_frame
+
+        ball_u, ball_v = self.blackboard.world_model.get_ball_position_uv()
+        ball_pose.pose.position.x = ball_u
+        ball_pose.pose.position.y = ball_v
+        ball_pose.pose.position.z = 0.0
+
+        ball_pose.pose.orientation.x = 0.0  # isn't used
+        ball_pose.pose.orientation.y = 0.0
+        ball_pose.pose.orientation.z = 0.0
+        ball_pose.pose.orientation.w = 1.0
+
+        self.blackboard.kick.rl_kick(ball_pose)
+        self.pop()
diff --git a/src/bitbots_lowlevel/bitbots_ros_control/config/wolfgang.yaml b/src/bitbots_lowlevel/bitbots_ros_control/config/wolfgang.yaml
@@ -25,7 +25,7 @@ wolfgang_hardware_interface:
     servos:
       # specifies which information should be read
       read_position: true
-      read_velocity: false
+      read_velocity: true
       read_effort: false
       read_pwm: false
       read_volt_temp: true # this also corresponds for the error byte
@@ -61,7 +61,7 @@ wolfgang_hardware_interface:
         Position_D_Gain: 0 #2800 # [/16] 0~16,383
         Position_I_Gain: 0 #180000 # [/ 65,536] 0~16,383
         #If robot starts to tremble, reduce Position_P_Gain
-        Position_P_Gain: 1200 #1100 # [/ 128] 0~16,383
+        Position_P_Gain: 300 #1100 # [/ 128] 0~16,383
         Feedforward_2nd_Gain: 0 # [/4]
         Feedforward_1st_Gain: 0 # [/4]
         Profile_Acceleration: 0 # 0 for infinite
@@ -89,7 +89,7 @@ wolfgang_hardware_interface:
         Position_D_Gain: 0 #2800 # [/16] 0~16,383
         Position_I_Gain: 0 #180000 # [/ 65,536] 0~16,383
         #If robot starts to tremble, reduce Position_P_Gain
-        Position_P_Gain: 800 #1100 # [/ 128] 0~16,383
+        Position_P_Gain: 1100 #1100 # [/ 128] 0~16,383
         Feedforward_2nd_Gain: 0 # [/4]
         Feedforward_1st_Gain: 0 # [/4]
         Profile_Acceleration: 0 # 0 for infinite
@@ -117,7 +117,7 @@ wolfgang_hardware_interface:
         Position_D_Gain: 0 #2800 # [/16] 0~16,383
         Position_I_Gain: 0 #180000 # [/ 65,536] 0~16,383
         #If robot starts to tremble, reduce Position_P_Gain
-        Position_P_Gain: 4000 #1100 # [/ 128] 0~16,383
+        Position_P_Gain: 600 #1100 # [/ 128] 0~16,383
         Feedforward_2nd_Gain: 0 # [/4]
         Feedforward_1st_Gain: 0 # [/4]
         Profile_Acceleration: 0 # 0 for infinite
@@ -145,7 +145,7 @@ wolfgang_hardware_interface:
         Position_D_Gain: 0 #2800 # [/16] 0~16,383
         Position_I_Gain: 0 #180000 # [/ 65,536] 0~16,383
         #If robot starts to tremble, reduce Position_P_Gain
-        Position_P_Gain: 500 #1100 # [/ 128] 0~16,383
+        Position_P_Gain: 100 #1100 # [/ 128] 0~16,383
         Feedforward_2nd_Gain: 0 # [/4]
         Feedforward_1st_Gain: 0 # [/4]
         Profile_Acceleration: 0 # 0 for infinite

diff --git a/src/bitbots_misc/bitbots_bringup/launch/motion.launch b/src/bitbots_misc/bitbots_bringup/launch/motion.launch
@@ -2,7 +2,8 @@
 <launch>
     <arg name="sim" default="false"/>
     <arg name="viz" default="false"/>
-    <arg name="walking" default="true" description="start the walking" />
+    <arg name="walking" default="false" description="start the walking" /> <!--should be removed-->
+    <arg name="rl_policies" default="true" description="start the policies" />
     <arg name="torqueless_mode" default="false" description="start without torque, for example for testing the falling detection"/>
     <arg name="tts" default="true" description="Whether to speak" />
 
@@ -38,7 +39,7 @@
         <arg name="sim" value="$(var sim)" />
     </include>
 
-    <!-- launch the walking -->
+    <!-- launch the old walking - exists for debug purpose-->
     <group if="$(var walking)">
         <include file="$(find-pkg-share bitbots_quintic_walk)/launch/quintic_walk.launch">
             <arg name="sim" value="$(var sim)"/>
@@ -58,6 +59,12 @@
         </include>
     -->
 
+
+     <!--launch rl policies -->
+    <include file="$(find-pkg-share bitbots_rl_motion)/launch/rl_motion.launch">
+        <arg name="sim" value="$(var sim)"/>
+    </include>
+
     <!-- launch dynup -->
     <include file="$(find-pkg-share bitbots_dynup)/launch/dynup.launch">
         <arg name="sim" value="$(var sim)"/>

diff --git a/src/bitbots_misc/bitbots_bringup/package.xml b/src/bitbots_misc/bitbots_bringup/package.xml
@@ -28,6 +28,7 @@
     <exec_depend>bitbots_localization</exec_depend>
     <exec_depend>bitbots_odometry</exec_depend>
     <exec_depend>bitbots_quintic_walk</exec_depend>
+    <exec_depend>bitbots_rl_motion</exec_depend>
     <exec_depend>bitbots_robot_description</exec_depend>
     <exec_depend>bitbots_ros_control</exec_depend>
     <exec_depend>bitbots_utils</exec_depend>

diff --git a/src/bitbots_motion/bitbots_dynup/config/dynup_config.yaml b/src/bitbots_motion/bitbots_dynup/config/dynup_config.yaml
@@ -33,21 +33,21 @@ bitbots_dynup:
       trunk_height:
         type: double
         description: "End pose trunk height. Depends on walkready of walking."
-        default_value: 0.38
+        default_value: 0.372
         validation:
           bounds<>: [0.0, 1.0]
       trunk_pitch:
         type: double
         description: "End pose trunk pitch. Depends on walkready of walking."
-        default_value: 0.2
+        default_value: 11.459
         validation:
-          bounds<>: [0.0, 1.0]
+          bounds<>: [-40.0, 40.0]
       trunk_x_final:
         type: double
         description: "End pose position of the trunk in x direction. Depends on walkready of walking."
-        default_value: 0.001
+        default_value: -0.0839
         validation:
-          bounds<>: [0.0, 1.0]
+          bounds<>: [-0.2, 0.2]
       arm_side_offset_back:
         type: double
         description: "End pose arm side offset back. Depends on walkready of walking."

diff --git a/src/bitbots_motion/bitbots_rl_motion/CMakeLists.txt b/src/bitbots_motion/bitbots_rl_motion/CMakeLists.txt
diff --git a/src/bitbots_motion/bitbots_rl_motion/README.md b/src/bitbots_motion/bitbots_rl_motion/README.md
@@ -0,0 +1,27 @@
+## General
+
+The package contains a framework which is a capsule for the application of policies on a robot.
+
+## Framework structure
+
+The code is divided in five sections: Configs, Handlers, Nodes, Launch and rest.
+
+The Nodes-folder contains all relevant ROS-Nodes regarding policy models. These nodes are responsible for starting the policies correctly, feeding them with correct data and publishing their outputs correctly.
+The name of the node describes for which kind of policy it is suitable. 
+The RL Node is a special case. All other nodes are kids of the RL Node. It centralizes the execution loop and minimizes boiler plate code. 
+
+The Handlers-folder contains all handlers. A handler is a specific type of object which is responsible for processing external data such that they are comprehensible for the policy models. All handlers are kids of the Handler class.
+
+The Configs-folder contains all robot/policy specific configurations. Files in the Configs-folder should be in .yaml-format. They also contain the paths to the onnx-policy models.
+
+The Launch-folder contains a launch file which starts all relevant policy nodes.
+
+phase.py and previous_action.py are two files, which do not fall in any of the aforementioned categories. 
+phase.py defines a PhaseObject, which is responsible for the phase management. previous_action.py defindes a PreviousAction object, which is responsible for saving and provide the previous action.
+Both files are located in the bitbots_rl_motion folder. 
+
+## Execution
+
+For proper starting you need a policy model and a config file. The config file should have the same structure as the wolfgang_dribbling_model_config.yaml file.
+Furthermore, you have to create or adjust a node file to your needs. walk_node.py can be used for orientation. If chages are conducted on the RL_Node class, it should be announced.
+Finally, you define which nodes and policies you wanna use in the launch file.  
diff --git a/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/__init__.py b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/__init__.py
diff --git a/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/phase.py
@@ -0,0 +1,37 @@
+import numpy as np
+from rclpy.node import Node
+
+# Please pay attention to the code in rl_node.py if you wanna change here sth.
+
+
+class PhaseObject(Node):
+    _phase: np.ndarray = np.array([0.0, np.pi], dtype=np.float32)
+    _phase_dt: float
+
+    def __init__(self, config):
+        if self.config["phase"]:
+            self._control_dt = config["phase"]["control_dt"]
+            self._gait_frequency = config["phase"]["gait_frequency"]
+            self._phase_dt = 2 * np.pi * self._gait_frequency * self._control_dt
+        else:
+            self._control_dt = None
+            self._gait_frequency = None
+            self._phase_dt = None
+            self.get_logger().warning("No phase was found! Using policy without phase!")
+
+        self._obs_phase = None
+
+    def set_phase(self, new_phase):
+        self._phase = new_phase
+
+    def set_obs_phase(self, new_obs_phase):
+        self._obs_phase = new_obs_phase
+
+    def get_phase(self):
+        return self._phase
+
+    def get_phase_dt(self):
+        return self._phase_dt
+
+    def get_obs_phase(self):
+        return self._obs_phase
diff --git a/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/previous_action.py b/src/bitbots_motion/bitbots_rl_motion/bitbots_rl_motion/previous_action.py
@@ -0,0 +1,14 @@
+import numpy as np
+
+
+class PreviousActionObject:
+    def __init__(self, config):
+        self._previous_action: np.ndarray = np.zeros(
+            len(config["joints"]["ordered_relevant_joint_names"]), dtype=np.float32
+        )
+
+    def set_previous_action(self, new_previous_action):
+        self._phase = new_previous_action
+
+    def get_previous_action(self):
+        return self._previous_action
diff --git a/src/bitbots_motion/bitbots_rl_motion/config/rl_walk_sim.yaml b/src/bitbots_motion/bitbots_rl_motion/config/rl_walk_sim.yaml