Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 25 additions & 25 deletions mujoco_warp/_src/block_cholesky.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@ def blocked_cholesky_func(
A: wp.array2d[float],
matrix_size: int,
# Out:
L: wp.array2d[float],
U: wp.array2d[float],
):
"""Computes the Cholesky factorization of a symmetric positive definite matrix A in blocks.

It returns a lower-triangular matrix L such that A = L L^T.
It returns an upper-triangular matrix U such that A = U^T U.
"""
# Process the matrix in blocks along its leading dimension.
for k in range(0, matrix_size, block_size):
Expand All @@ -41,24 +41,24 @@ def blocked_cholesky_func(
A_kk_tile = wp.tile_load(A, shape=(block_size, block_size), offset=(k, k), storage="shared")

for j in range(0, k, block_size):
L_block = wp.tile_load(L, shape=(block_size, block_size), offset=(k, j), storage="shared")
wp.tile_matmul(L_block, wp.tile_transpose(L_block), A_kk_tile, alpha=-1.0)
U_block = wp.tile_load(U, shape=(block_size, block_size), offset=(j, k), storage="shared")
wp.tile_matmul(wp.tile_transpose(U_block), U_block, A_kk_tile, alpha=-1.0)

# Compute the Cholesky factorization for the block
wp.tile_cholesky_inplace(A_kk_tile)
wp.tile_store(L, A_kk_tile, offset=(k, k))
wp.tile_cholesky_inplace(A_kk_tile, fill_mode="upper")
wp.tile_store(U, A_kk_tile, offset=(k, k))

# Process the blocks below the current block
# Process the blocks to the right of the current block
for i in range(end, matrix_size, block_size):
A_ik_tile = wp.tile_load(A, shape=(block_size, block_size), offset=(i, k), storage="shared")
A_ki_tile = wp.tile_load(A, shape=(block_size, block_size), offset=(k, i), storage="shared")

for j in range(0, k, block_size):
L_tile = wp.tile_load(L, shape=(block_size, block_size), offset=(i, j), storage="shared")
L_2_tile = wp.tile_load(L, shape=(block_size, block_size), offset=(k, j), storage="shared")
wp.tile_matmul(L_tile, wp.tile_transpose(L_2_tile), A_ik_tile, alpha=-1.0)
U_jk_tile = wp.tile_load(U, shape=(block_size, block_size), offset=(j, k), storage="shared")
U_ji_tile = wp.tile_load(U, shape=(block_size, block_size), offset=(j, i), storage="shared")
wp.tile_matmul(wp.tile_transpose(U_jk_tile), U_ji_tile, A_ki_tile, alpha=-1.0)

wp.tile_lower_solve_inplace(A_kk_tile, wp.tile_transpose(A_ik_tile))
wp.tile_store(L, A_ik_tile, offset=(i, k))
wp.tile_lower_solve_inplace(wp.tile_transpose(A_kk_tile), A_ki_tile)
wp.tile_store(U, A_ki_tile, offset=(k, i))

return blocked_cholesky_func

Expand All @@ -68,41 +68,41 @@ def create_blocked_cholesky_solve_func(block_size: int, matrix_size_static: int)
@wp.func
def blocked_cholesky_solve_func(
# In:
L: wp.array2d[float],
U: wp.array2d[float],
b: wp.array2d[float],
matrix_size: int,
# Out:
x: wp.array2d[float],
):
"""Block Cholesky factorization and solve.

Solves A x = b given the Cholesky factor L (A = L L^T) using blocked forward and backward
Solves A x = b given the Cholesky factor U (A = U^T U) using blocked forward and backward
substitution.
"""
rhs_tile = wp.tile_load(b, shape=(matrix_size_static, 1), offset=(0, 0), storage="shared", bounds_check=False)

# Forward substitution: solve L y = b
# Forward substitution: solve U^T y = b
for i in range(0, matrix_size, block_size):
rhs_view = wp.tile_view(rhs_tile, shape=(block_size, 1), offset=(i, 0))
for j in range(0, i, block_size):
L_block = wp.tile_load(L, shape=(block_size, block_size), offset=(i, j), storage="shared")
U_block = wp.tile_load(U, shape=(block_size, block_size), offset=(j, i), storage="shared")
y_block = wp.tile_view(rhs_tile, shape=(block_size, 1), offset=(j, 0))
wp.tile_matmul(L_block, y_block, rhs_view, alpha=-1.0)
wp.tile_matmul(wp.tile_transpose(U_block), y_block, rhs_view, alpha=-1.0)

L_tile = wp.tile_load(L, shape=(block_size, block_size), offset=(i, i), storage="shared")
wp.tile_lower_solve_inplace(L_tile, rhs_view)
U_tile = wp.tile_load(U, shape=(block_size, block_size), offset=(i, i), storage="shared")
wp.tile_lower_solve_inplace(wp.tile_transpose(U_tile), rhs_view)

# Backward substitution: solve L^T x = y
# Backward substitution: solve U x = y
for i in range(matrix_size - block_size, -1, -block_size):
i_end = i + block_size
tmp_tile = wp.tile_view(rhs_tile, shape=(block_size, 1), offset=(i, 0))
for j in range(i_end, matrix_size, block_size):
L_tile = wp.tile_load(L, shape=(block_size, block_size), offset=(j, i), storage="shared")
U_tile = wp.tile_load(U, shape=(block_size, block_size), offset=(i, j), storage="shared")
x_tile = wp.tile_view(rhs_tile, shape=(block_size, 1), offset=(j, 0))
wp.tile_matmul(wp.tile_transpose(L_tile), x_tile, tmp_tile, alpha=-1.0)
L_tile = wp.tile_load(L, shape=(block_size, block_size), offset=(i, i), storage="shared")
wp.tile_matmul(U_tile, x_tile, tmp_tile, alpha=-1.0)
U_tile = wp.tile_load(U, shape=(block_size, block_size), offset=(i, i), storage="shared")

wp.tile_upper_solve_inplace(wp.tile_transpose(L_tile), tmp_tile)
wp.tile_upper_solve_inplace(U_tile, tmp_tile)

wp.tile_store(x, rhs_tile, offset=(0, 0), bounds_check=False)

Expand Down
4 changes: 2 additions & 2 deletions mujoco_warp/_src/forward.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,8 +385,8 @@ def euler_dense(
qm_integration_tile = wp.tile_diag_add(M_tile, damping_scaled)

Ma_tile = wp.tile_load(efc_Ma_in[worldid], shape=(TILE_SIZE,), offset=(dofid,))
L_tile = wp.tile_cholesky(qm_integration_tile)
qacc_tile = wp.tile_cholesky_solve(L_tile, Ma_tile)
L_tile = wp.tile_cholesky(qm_integration_tile, fill_mode="upper")
qacc_tile = wp.tile_cholesky_solve(L_tile, Ma_tile, fill_mode="upper")
wp.tile_store(qacc_out[worldid], qacc_tile, offset=(dofid))

return euler_dense
Expand Down
15 changes: 12 additions & 3 deletions mujoco_warp/_src/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def _check_friction(name: str, id_: int, condim: int, friction, checks):
jnt_limited_slide_hinge = mjm.jnt_limited & np.isin(mjm.jnt_type, (mujoco.mjtJoint.mjJNT_SLIDE, mujoco.mjtJoint.mjJNT_HINGE))
m.jnt_limited_slide_hinge_adr = np.nonzero(jnt_limited_slide_hinge)[0]
m.jnt_limited_ball_adr = np.nonzero(mjm.jnt_limited & (mjm.jnt_type == mujoco.mjtJoint.mjJNT_BALL))[0]
m.dof_tri_row, m.dof_tri_col = np.tril_indices(mjm.nv)
m.dof_tri_row, m.dof_tri_col = np.triu_indices(mjm.nv)

# precompute body_isdofancestor: which DOFs affect each body
# TODO: Investigate alternative approach such as bitmap
Expand Down Expand Up @@ -640,6 +640,15 @@ def _check_margin(name, t1, t2, margin):
elemid += 1
j = mjm.dof_parentid[j]
m.qM_fullm_elemid = qM_fullm_elemid
m.qM_fullm_upper_i, m.qM_fullm_upper_j, m.qM_fullm_upper_elemid = [], [], []
for j in range(mjm.nv):
for i in range(j, mjm.nv):
elemid = qM_fullm_elemid[i, j]
if elemid == -1:
continue
m.qM_fullm_upper_i.append(j)
m.qM_fullm_upper_j.append(i)
m.qM_fullm_upper_elemid.append(elemid)

# indices for sparse qD_fullm (used in RNE derivatives)
# D-structure is the full square sparsity pattern (both upper and lower triangle)
Expand Down Expand Up @@ -1290,10 +1299,10 @@ def put_data(
qM = np.zeros((mjm.nv, mjm.nv))
if check_version("mujoco>=3.8.1.dev910242375"):
mujoco.mju_sym2dense(qM, mjd.M, mjm.M_rownnz, mjm.M_rowadr, mjm.M_colind)
qLD = np.linalg.cholesky(qM) if (mjd.M != 0.0).any() and (mjd.qLD != 0.0).any() else np.zeros((mjm.nv, mjm.nv))
qLD = np.linalg.cholesky(qM).T if (mjd.M != 0.0).any() and (mjd.qLD != 0.0).any() else np.zeros((mjm.nv, mjm.nv))
else:
mujoco.mj_fullM(mjm, qM, mjd.qM)
qLD = np.linalg.cholesky(qM) if (mjd.qM != 0.0).any() and (mjd.qLD != 0.0).any() else np.zeros((mjm.nv, mjm.nv))
qLD = np.linalg.cholesky(qM).T if (mjd.qM != 0.0).any() and (mjd.qLD != 0.0).any() else np.zeros((mjm.nv, mjm.nv))
padding = sizes["nv_pad"] - mjm.nv
qM_padded = np.pad(qM, ((0, padding), (0, padding)), mode="constant", constant_values=0.0)
d.qM = wp.array(np.full((nworld, sizes["nv_pad"], sizes["nv_pad"]), qM_padded), dtype=float)
Expand Down
20 changes: 10 additions & 10 deletions mujoco_warp/_src/smooth.py
Original file line number Diff line number Diff line change
Expand Up @@ -1082,7 +1082,7 @@ def cholesky_factorize(

dofid = adr[nodeid]
M_tile = wp.tile_load(qM_in[worldid], shape=(TILE_SIZE, TILE_SIZE), offset=(dofid, dofid))
L_tile = wp.tile_cholesky(M_tile)
L_tile = wp.tile_cholesky(M_tile, fill_mode="upper")
wp.tile_store(L_out[worldid], L_tile, offset=(dofid, dofid))

return cholesky_factorize
Expand Down Expand Up @@ -2842,14 +2842,14 @@ def cholesky_solve(
dofid = adr[nodeid]
y_slice = wp.tile_load(y[worldid], shape=(TILE_SIZE,), offset=(dofid,))
L_tile = wp.tile_load(L[worldid], shape=(TILE_SIZE, TILE_SIZE), offset=(dofid, dofid))
x_slice = wp.tile_cholesky_solve(L_tile, y_slice)
x_slice = wp.tile_cholesky_solve(L_tile, y_slice, fill_mode="upper")
wp.tile_store(x[worldid], x_slice, offset=(dofid,))

return cholesky_solve


def _solve_LD_dense(m: Model, d: Data, L: wp.array3d[float], x: wp.array2d[float], y: wp.array2d[float]):
"""Computes dense backsubstitution: x = inv(L'*L)*y."""
"""Computes dense backsubstitution: x = inv(U.T @ U) * y."""
for tile in m.qM_tiles:
wp.launch_tiled(
_tile_cholesky_solve(tile),
Expand All @@ -2868,9 +2868,9 @@ def solve_LD(
x: wp.array2d[float],
y: wp.array2d[float],
):
"""Computes backsubstitution to solve a linear system of the form x = inv(L'*D*L) * y.
"""Computes backsubstitution for the inertia factorization.

L and D are the factors from the Cholesky factorization of the inertia matrix.
Sparse models use MuJoCo's L'*D*L factors; dense models use an upper Cholesky factor U.

This function dispatches to either a sparse or dense solver depending on Model options.

Expand Down Expand Up @@ -2922,9 +2922,9 @@ def cholesky_factorize_solve(
M_tile = wp.tile_load(M[worldid], shape=(TILE_SIZE, TILE_SIZE), offset=(dofid, dofid))
y_slice = wp.tile_load(y[worldid], shape=(TILE_SIZE,), offset=(dofid,))

L_tile = wp.tile_cholesky(M_tile)
L_tile = wp.tile_cholesky(M_tile, fill_mode="upper")
wp.tile_store(L[worldid], L_tile, offset=(dofid, dofid))
x_slice = wp.tile_cholesky_solve(L_tile, y_slice)
x_slice = wp.tile_cholesky_solve(L_tile, y_slice, fill_mode="upper")
wp.tile_store(x[worldid], x_slice, offset=(dofid,))

return cholesky_factorize_solve
Expand All @@ -2949,9 +2949,9 @@ def _factor_solve_i_dense(


def factor_solve_i(m, d, M, L, D, x, y):
"""Factorizes and solves the linear system: x = inv(L'*D*L) * y or x = inv(L'*L) * y.
"""Factorizes and solves the inertia-like linear system.

M is an inertia-like matrix and L, D are its Cholesky-like factors.
Sparse models use MuJoCo's L'*D*L factors; dense models use an upper Cholesky factor U.

This function first factorizes the matrix M (sparse or dense depending on model options),
then solves the system for x given right-hand side y.
Expand All @@ -2960,7 +2960,7 @@ def factor_solve_i(m, d, M, L, D, x, y):
m: The model containing factorization and sparsity information.
d: The data object containing workspace and factorization results.
M: The inertia-like matrix to factorize.
L: Output lower-triangular factor from the factorization (sparse or dense).
L: Output sparse factor or dense upper Cholesky factor.
D: Output diagonal factor from the factorization (only used for sparse).
x: Output array for the solution.
y: Input right-hand side array.
Expand Down
15 changes: 10 additions & 5 deletions mujoco_warp/_src/smooth_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -212,11 +212,16 @@ def test_crb(self, jacobian):
@parameterized.parameters(mujoco.mjtJacobian.mjJAC_SPARSE, mujoco.mjtJacobian.mjJAC_DENSE)
def test_factor_m(self, jacobian):
"""Tests factor_m."""
_, mjd, m, d = test_data.fixture("pendula.xml", overrides={"opt.jacobian": jacobian})
mjm, mjd, m, d = test_data.fixture("pendula.xml", overrides={"opt.jacobian": jacobian})

qLD = d.qLD.numpy()[0].copy()
d.qLDiagInv.fill_(wp.inf)
if jacobian == mujoco.mjtJacobian.mjJAC_DENSE:
qM = np.zeros((mjm.nv, mjm.nv))
if check_version("mujoco>=3.8.1.dev910242375"):
mujoco.mju_sym2dense(qM, mjd.M, mjm.M_rownnz, mjm.M_rowadr, mjm.M_colind)
else:
mujoco.mj_fullM(mjm, qM, mjd.qM)
qLD = np.linalg.cholesky(qM).T
wp_qLD = qLD.copy()
wp_qLD[wp_qLD != 0.0] = np.inf
wp.copy(d.qLD, wp.array(wp_qLD, dtype=float))
Expand All @@ -229,7 +234,7 @@ def test_factor_m(self, jacobian):
_assert_eq(d.qLD.numpy()[0, 0], mjd.qLD, "qLD (sparse)")
_assert_eq(d.qLDiagInv.numpy()[0], mjd.qLDiagInv, "qLDiagInv")
else:
_assert_eq(d.qLD.numpy()[0], qLD, "qLD (dense)")
_assert_eq(d.qLD.numpy()[0], qLD, "qLD (dense upper)")

@parameterized.parameters(mujoco.mjtJacobian.mjJAC_SPARSE, mujoco.mjtJacobian.mjJAC_DENSE)
def test_solve_m(self, jacobian):
Expand Down Expand Up @@ -507,8 +512,8 @@ def test_factor_solve_i(self, jacobian):
_assert_eq(d.qLD.numpy()[0].reshape(-1), mjd.qLD, "qLD")
_assert_eq(d.qLDiagInv.numpy()[0], mjd.qLDiagInv, "qLDiagInv")
else:
qLD = np.linalg.cholesky(qM)
_assert_eq(d.qLD.numpy()[0], qLD, "qLD")
qLD = np.linalg.cholesky(qM).T
_assert_eq(d.qLD.numpy()[0], qLD, "qLD upper")

def test_tendon_armature(self):
mjm, mjd, m, d = test_data.fixture("tendon/armature.xml", keyframe=0)
Expand Down
Loading
Loading