@@ -608,6 +608,44 @@ public static Vector128<short> MultiplyHigh(Vector128<short> left, Vector128<sho
608608 return Vector128 . Narrow ( prodLo , prodHi ) ;
609609 }
610610
611+ /// <summary>
612+ /// Multiply the packed 16-bit unsigned integers in <paramref name="left"/> and <paramref name="right"/>, producing
613+ /// intermediate unsigned 32-bit integers, and store the high 16 bits of the intermediate integers in the result.
614+ /// </summary>
615+ /// <param name="left">
616+ /// The first vector containing packed 16-bit unsigned integers to multiply.
617+ /// </param>
618+ /// <param name="right">
619+ /// The second vector containing packed 16-bit unsigned integers to multiply.
620+ /// </param>
621+ /// <returns>
622+ /// A vector containing the high 16 bits of the products of the packed 16-bit unsigned integers
623+ /// from <paramref name="left"/> and <paramref name="right"/>.
624+ /// </returns>
625+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
626+ public static Vector128 < ushort > MultiplyHigh ( Vector128 < ushort > left , Vector128 < ushort > right )
627+ {
628+ if ( Sse2 . IsSupported )
629+ {
630+ return Sse2 . MultiplyHigh ( left , right ) ;
631+ }
632+
633+ // Widen each half of the short vectors into two uint vectors
634+ ( Vector128 < uint > leftLo , Vector128 < uint > leftHi ) = Vector128 . Widen ( left ) ;
635+ ( Vector128 < uint > rightLo , Vector128 < uint > rightHi ) = Vector128 . Widen ( right ) ;
636+
637+ // Elementwise multiply: each int lane now holds the full 32-bit product
638+ Vector128 < uint > prodLo = leftLo * rightLo ;
639+ Vector128 < uint > prodHi = leftHi * rightHi ;
640+
641+ // Arithmetic shift right by 16 bits to extract the high word
642+ prodLo >>= 16 ;
643+ prodHi >>= 16 ;
644+
645+ // Narrow the two int vectors back into one short vector
646+ return Vector128 . Narrow ( prodLo , prodHi ) ;
647+ }
648+
611649 /// <summary>
612650 /// Unpack and interleave 64-bit integers from the high half of <paramref name="left"/> and <paramref name="right"/>
613651 /// and store the results in the result.
@@ -927,7 +965,7 @@ public static Vector128<sbyte> UnpackLow(Vector128<sbyte> left, Vector128<sbyte>
927965 /// The second vector containing packed signed 16-bit integers to subtract.
928966 /// </param>
929967 /// <returns>
930- /// A vector containing the results of subtracting packed unsigned 16-bit integers
968+ /// A vector containing the results of subtracting packed signed 16-bit integers
931969 /// </returns>
932970 [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
933971 public static Vector128 < short > SubtractSaturate ( Vector128 < short > left , Vector128 < short > right )
@@ -967,7 +1005,57 @@ public static Vector128<short> SubtractSaturate(Vector128<short> left, Vector128
9671005 }
9681006
9691007 /// <summary>
970- /// Add packed unsigned 8-bit integers in <paramref name="right"/> from packed unsigned 8-bit integers
1008+ /// Subtract packed unsigned 16-bit integers in <paramref name="right"/> from packed unsigned 16-bit integers
1009+ /// in <paramref name="left"/> using saturation, and store the results.
1010+ /// </summary>
1011+ /// <param name="left">
1012+ /// The first vector containing packed unsigned 16-bit integers to subtract from.
1013+ /// </param>
1014+ /// <param name="right">
1015+ /// The second vector containing packed unsigned 16-bit integers to subtract.
1016+ /// </param>
1017+ /// <returns>
1018+ /// A vector containing the results of subtracting packed unsigned 16-bit integers
1019+ /// </returns>
1020+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
1021+ public static Vector128 < ushort > SubtractSaturate ( Vector128 < ushort > left , Vector128 < ushort > right )
1022+ {
1023+ if ( Sse2 . IsSupported )
1024+ {
1025+ return Sse2 . SubtractSaturate ( left , right ) ;
1026+ }
1027+
1028+ if ( AdvSimd . IsSupported )
1029+ {
1030+ return AdvSimd . SubtractSaturate ( left , right ) ;
1031+ }
1032+
1033+ if ( PackedSimd . IsSupported )
1034+ {
1035+ return PackedSimd . SubtractSaturate ( left , right ) ;
1036+ }
1037+
1038+ // Widen inputs to 32-bit signed
1039+ ( Vector128 < uint > leftLo , Vector128 < uint > leftHi ) = Vector128 . Widen ( left ) ;
1040+ ( Vector128 < uint > rightLo , Vector128 < uint > rightHi ) = Vector128 . Widen ( right ) ;
1041+
1042+ // Subtract
1043+ Vector128 < uint > diffLo = leftLo - rightLo ;
1044+ Vector128 < uint > diffHi = leftHi - rightHi ;
1045+
1046+ // Clamp to signed 16-bit range
1047+ Vector128 < uint > min = Vector128 . Create ( ( uint ) ushort . MinValue ) ;
1048+ Vector128 < uint > max = Vector128 . Create ( ( uint ) ushort . MaxValue ) ;
1049+
1050+ diffLo = Clamp ( diffLo , min , max ) ;
1051+ diffHi = Clamp ( diffHi , min , max ) ;
1052+
1053+ // Narrow back to 16 bit signed.
1054+ return Vector128 . Narrow ( diffLo , diffHi ) ;
1055+ }
1056+
1057+ /// <summary>
1058+ /// Add packed unsigned 8-bit integers in <paramref name="right"/> to packed unsigned 8-bit integers
9711059 /// in <paramref name="left"/> using saturation, and store the results.
9721060 /// </summary>
9731061 /// <param name="left">
@@ -1015,6 +1103,55 @@ public static Vector128<byte> AddSaturate(Vector128<byte> left, Vector128<byte>
10151103 return Vector128 . Narrow ( sumLo , sumHi ) ;
10161104 }
10171105
1106+ /// <summary>
1107+ /// Add packed unsigned 16-bit integers in <paramref name="right"/> to packed unsigned 16-bit integers
1108+ /// in <paramref name="left"/> using saturation, and store the results.
1109+ /// </summary>
1110+ /// <param name="left">
1111+ /// The first vector containing packed unsigned 16-bit integers to add to.
1112+ /// </param>
1113+ /// <param name="right">
1114+ /// The second vector containing packed unsigned 16-bit integers to add.
1115+ /// </param>
1116+ /// <returns>
1117+ /// A vector containing the results of adding packed unsigned 16-bit integers
1118+ /// </returns>
1119+ [ MethodImpl ( MethodImplOptions . AggressiveInlining ) ]
1120+ public static Vector128 < ushort > AddSaturate ( Vector128 < ushort > left , Vector128 < ushort > right )
1121+ {
1122+ if ( Sse2 . IsSupported )
1123+ {
1124+ return Sse2 . AddSaturate ( left , right ) ;
1125+ }
1126+
1127+ if ( AdvSimd . IsSupported )
1128+ {
1129+ return AdvSimd . AddSaturate ( left , right ) ;
1130+ }
1131+
1132+ if ( PackedSimd . IsSupported )
1133+ {
1134+ return PackedSimd . AddSaturate ( left , right ) ;
1135+ }
1136+
1137+ // Widen inputs to 32-bit
1138+ ( Vector128 < uint > leftLo , Vector128 < uint > leftHi ) = Vector128 . Widen ( left ) ;
1139+ ( Vector128 < uint > rightLo , Vector128 < uint > rightHi ) = Vector128 . Widen ( right ) ;
1140+
1141+ // Add
1142+ Vector128 < uint > sumLo = leftLo + rightLo ;
1143+ Vector128 < uint > sumHi = leftHi + rightHi ;
1144+
1145+ // Clamp to signed 16-bit range
1146+ Vector128 < uint > max = Vector128 . Create ( ( uint ) ushort . MaxValue ) ;
1147+
1148+ sumLo = Clamp ( sumLo , Vector128 < uint > . Zero , max ) ;
1149+ sumHi = Clamp ( sumHi , Vector128 < uint > . Zero , max ) ;
1150+
1151+ // Narrow back to 16 bit unsigned.
1152+ return Vector128 . Narrow ( sumLo , sumHi ) ;
1153+ }
1154+
10181155 /// <summary>
10191156 /// Subtract packed unsigned 8-bit integers in <paramref name="right"/> from packed unsigned 8-bit integers
10201157 /// in <paramref name="left"/> using saturation, and store the results.
0 commit comments