Skip to content

Commit 961409c

Browse files
committed
feat(simd): add Sub/Div for F32x8 + F64x4 (AVX2 arithmetic completeness)
F32x8 and F64x4 only had Add + Mul. AVX2 fallback for F32x16 needs all four arithmetic ops on the 256-bit types. Additive only — no existing code changed. https://claude.ai/code/session_01BTATTRUACijvsK4hqmKUBR
1 parent 03f1aa0 commit 961409c

1 file changed

Lines changed: 60 additions & 0 deletions

File tree

src/simd_avx512.rs

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1259,6 +1259,36 @@ impl MulAssign for F32x8 {
12591259
}
12601260
}
12611261

1262+
impl Sub for F32x8 {
1263+
type Output = Self;
1264+
#[inline(always)]
1265+
fn sub(self, rhs: Self) -> Self {
1266+
Self(unsafe { _mm256_sub_ps(self.0, rhs.0) })
1267+
}
1268+
}
1269+
1270+
impl SubAssign for F32x8 {
1271+
#[inline(always)]
1272+
fn sub_assign(&mut self, rhs: Self) {
1273+
self.0 = unsafe { _mm256_sub_ps(self.0, rhs.0) };
1274+
}
1275+
}
1276+
1277+
impl Div for F32x8 {
1278+
type Output = Self;
1279+
#[inline(always)]
1280+
fn div(self, rhs: Self) -> Self {
1281+
Self(unsafe { _mm256_div_ps(self.0, rhs.0) })
1282+
}
1283+
}
1284+
1285+
impl DivAssign for F32x8 {
1286+
#[inline(always)]
1287+
fn div_assign(&mut self, rhs: Self) {
1288+
self.0 = unsafe { _mm256_div_ps(self.0, rhs.0) };
1289+
}
1290+
}
1291+
12621292
impl fmt::Debug for F32x8 {
12631293
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
12641294
write!(f, "F32x8({:?})", self.to_array())
@@ -1360,6 +1390,36 @@ impl MulAssign for F64x4 {
13601390
}
13611391
}
13621392

1393+
impl Sub for F64x4 {
1394+
type Output = Self;
1395+
#[inline(always)]
1396+
fn sub(self, rhs: Self) -> Self {
1397+
Self(unsafe { _mm256_sub_pd(self.0, rhs.0) })
1398+
}
1399+
}
1400+
1401+
impl SubAssign for F64x4 {
1402+
#[inline(always)]
1403+
fn sub_assign(&mut self, rhs: Self) {
1404+
self.0 = unsafe { _mm256_sub_pd(self.0, rhs.0) };
1405+
}
1406+
}
1407+
1408+
impl Div for F64x4 {
1409+
type Output = Self;
1410+
#[inline(always)]
1411+
fn div(self, rhs: Self) -> Self {
1412+
Self(unsafe { _mm256_div_pd(self.0, rhs.0) })
1413+
}
1414+
}
1415+
1416+
impl DivAssign for F64x4 {
1417+
#[inline(always)]
1418+
fn div_assign(&mut self, rhs: Self) {
1419+
self.0 = unsafe { _mm256_div_pd(self.0, rhs.0) };
1420+
}
1421+
}
1422+
13631423
impl fmt::Debug for F64x4 {
13641424
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
13651425
write!(f, "F64x4({:?})", self.to_array())

0 commit comments

Comments
 (0)