@@ -320,40 +320,41 @@ def _create_buffers(self) -> None: # noqa: C901
320320 self ._spatial_tendon_limit = TimestampedBuffer ((N , T_spa , 2 ), dev , wp .float32 )
321321 self ._spatial_tendon_rest_length = TimestampedBuffer ((N , T_spa ), dev , wp .float32 )
322322
323- # -- Pinned-host CPU staging buffers for CPU-only bindings on a non-CPU sim .
323+ # -- CPU staging buffers for CPU-only bindings.
324324 # Pre-allocate all of them so there is no per-step allocation on the hot path.
325325 # These are keyed by tensor_type in self._cpu_staging_buffers; _binding_read
326326 # selects the right one at read time. The sizes must match the binding shapes
327- # (flat float32).
328- if dev != "cpu" :
329- self ._cpu_body_mass = wp .zeros ((N , L ), dtype = wp .float32 , device = "cpu" , pinned = True )
330- self ._cpu_body_coms = wp .zeros ((N , L , 7 ), dtype = wp .float32 , device = "cpu" , pinned = True )
331- self ._cpu_body_inertia = wp .zeros ((N , L , 9 ), dtype = wp .float32 , device = "cpu" , pinned = True )
332- self ._cpu_joint_stiffness = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = True )
333- self ._cpu_joint_damping = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = True )
334- self ._cpu_joint_position_limit = wp .zeros ((N , D , 2 ), dtype = wp .float32 , device = "cpu" , pinned = True )
335- self ._cpu_joint_velocity_limit = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = True )
336- self ._cpu_joint_effort_limit = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = True )
337- self ._cpu_joint_armature = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = True )
338- self ._cpu_joint_friction_static = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = True )
339- self ._cpu_joint_friction_dynamic = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = True )
340- self ._cpu_joint_friction_viscous = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = True )
341- if T_fix > 0 :
342- self ._cpu_fixed_tendon_stiffness = wp .zeros ((N , T_fix ), dtype = wp .float32 , device = "cpu" , pinned = True )
343- self ._cpu_fixed_tendon_damping = wp .zeros ((N , T_fix ), dtype = wp .float32 , device = "cpu" , pinned = True )
344- self ._cpu_fixed_tendon_limit_stiffness = wp .zeros (
345- (N , T_fix ), dtype = wp .float32 , device = "cpu" , pinned = True
346- )
347- self ._cpu_fixed_tendon_rest_length = wp .zeros ((N , T_fix ), dtype = wp .float32 , device = "cpu" , pinned = True )
348- self ._cpu_fixed_tendon_offset = wp .zeros ((N , T_fix ), dtype = wp .float32 , device = "cpu" , pinned = True )
349- self ._cpu_fixed_tendon_pos_limits = wp .zeros ((N , T_fix , 2 ), dtype = wp .float32 , device = "cpu" , pinned = True )
350- if T_spa > 0 :
351- self ._cpu_spatial_tendon_stiffness = wp .zeros ((N , T_spa ), dtype = wp .float32 , device = "cpu" , pinned = True )
352- self ._cpu_spatial_tendon_damping = wp .zeros ((N , T_spa ), dtype = wp .float32 , device = "cpu" , pinned = True )
353- self ._cpu_spatial_tendon_limit_stiffness = wp .zeros (
354- (N , T_spa ), dtype = wp .float32 , device = "cpu" , pinned = True
355- )
356- self ._cpu_spatial_tendon_offset = wp .zeros ((N , T_spa ), dtype = wp .float32 , device = "cpu" , pinned = True )
327+ # (flat float32). On a GPU sim the buffers are pinned-host (page-locked) so
328+ # the wheel can dispatch async copies; on a CPU sim the staging copy is
329+ # functionally redundant but the buffer must still exist for the write
330+ # helpers, so we allocate unpinned and pay only the intra-CPU memcpy.
331+ pinned = dev != "cpu"
332+ self ._cpu_body_mass = wp .zeros ((N , L ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
333+ self ._cpu_body_coms = wp .zeros ((N , L , 7 ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
334+ self ._cpu_body_inertia = wp .zeros ((N , L , 9 ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
335+ self ._cpu_joint_stiffness = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
336+ self ._cpu_joint_damping = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
337+ self ._cpu_joint_position_limit = wp .zeros ((N , D , 2 ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
338+ self ._cpu_joint_velocity_limit = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
339+ self ._cpu_joint_effort_limit = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
340+ self ._cpu_joint_armature = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
341+ self ._cpu_joint_friction_static = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
342+ self ._cpu_joint_friction_dynamic = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
343+ self ._cpu_joint_friction_viscous = wp .zeros ((N , D ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
344+ if T_fix > 0 :
345+ self ._cpu_fixed_tendon_stiffness = wp .zeros ((N , T_fix ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
346+ self ._cpu_fixed_tendon_damping = wp .zeros ((N , T_fix ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
347+ self ._cpu_fixed_tendon_limit_stiffness = wp .zeros ((N , T_fix ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
348+ self ._cpu_fixed_tendon_rest_length = wp .zeros ((N , T_fix ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
349+ self ._cpu_fixed_tendon_offset = wp .zeros ((N , T_fix ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
350+ self ._cpu_fixed_tendon_pos_limits = wp .zeros ((N , T_fix , 2 ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
351+ if T_spa > 0 :
352+ self ._cpu_spatial_tendon_stiffness = wp .zeros ((N , T_spa ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
353+ self ._cpu_spatial_tendon_damping = wp .zeros ((N , T_spa ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
354+ self ._cpu_spatial_tendon_limit_stiffness = wp .zeros (
355+ (N , T_spa ), dtype = wp .float32 , device = "cpu" , pinned = pinned
356+ )
357+ self ._cpu_spatial_tendon_offset = wp .zeros ((N , T_spa ), dtype = wp .float32 , device = "cpu" , pinned = pinned )
357358
358359 # Read initial joint/body properties from bindings (one-time CPU reads).
359360 self ._read_initial_properties ()
0 commit comments