@@ -165,7 +165,6 @@ unsigned char *rans_compress_O0_32x16(unsigned char *in,
165165 } else {
166166 // Branchless version optimises poorly with gcc unless we have
167167 // AVX2 capability, so have a custom rewrite of it.
168- uint16_t * ptr16 = (uint16_t * )ptr ;
169168 for (i = (in_size & ~(NX - 1 )); likely (i > 0 ); i -= NX ) {
170169 // Unrolled copy of below, because gcc doesn't optimise this
171170 // well in the original form.
@@ -197,15 +196,15 @@ unsigned char *rans_compress_O0_32x16(unsigned char *in,
197196 int c1 = rp [3 - 1 ] > sy [1 ]-> x_max ;
198197
199198#ifdef HTSCODECS_LITTLE_ENDIAN
200- ptr16 [ -1 ] = rp [3 - 0 ]; ptr16 -= c0 ;
201- ptr16 [ -1 ] = rp [3 - 1 ]; ptr16 -= c1 ;
199+ memcpy ( & ptr [ -2 ], & rp [3 - 0 ], 2 ); ptr -= c0 * 2 ;
200+ memcpy ( & ptr [ -2 ], & rp [3 - 1 ], 2 ); ptr -= c1 * 2 ;
202201#else
203- (( uint8_t * ) & ptr16 [ -1 ])[ 0 ] = rp [3 - 0 ];
204- (( uint8_t * ) & ptr16 [ -1 ])[ 1 ] = rp [3 - 0 ]>>8 ;
205- ptr16 -= c0 ;
206- (( uint8_t * ) & ptr16 [ -1 ])[ 0 ] = rp [3 - 1 ];
207- (( uint8_t * ) & ptr16 [ -1 ])[ 1 ] = rp [3 - 1 ]>>8 ;
208- ptr16 -= c1 ;
202+ ptr [ -2 ] = rp [3 - 0 ];
203+ ptr [ - 1 ] = rp [3 - 0 ]>>8 ;
204+ ptr -= c0 * 2 ;
205+ ptr [ -2 ] = rp [3 - 1 ];
206+ ptr [ - 1 ] = rp [3 - 1 ]>>8 ;
207+ ptr -= c1 * 2 ;
209208#endif
210209
211210 rp [3 - 0 ] = c0 ? rp [3 - 0 ]>>16 : rp [3 - 0 ];
@@ -217,15 +216,15 @@ unsigned char *rans_compress_O0_32x16(unsigned char *in,
217216 int c2 = rp [3 - 2 ] > sy [2 ]-> x_max ;
218217 int c3 = rp [3 - 3 ] > sy [3 ]-> x_max ;
219218#ifdef HTSCODECS_LITTLE_ENDIAN
220- ptr16 [ -1 ] = rp [3 - 2 ]; ptr16 -= c2 ;
221- ptr16 [ -1 ] = rp [3 - 3 ]; ptr16 -= c3 ;
219+ memcpy ( & ptr [ -2 ], & rp [3 - 2 ], 2 ); ptr -= c2 * 2 ;
220+ memcpy ( & ptr [ -2 ], & rp [3 - 3 ], 2 ); ptr -= c3 * 2 ;
222221#else
223- (( uint8_t * ) & ptr16 [ -1 ])[ 0 ] = rp [3 - 2 ];
224- (( uint8_t * ) & ptr16 [ -1 ])[ 1 ] = rp [3 - 2 ]>>8 ;
225- ptr16 -= c2 ;
226- (( uint8_t * ) & ptr16 [ -1 ])[ 0 ] = rp [3 - 3 ];
227- (( uint8_t * ) & ptr16 [ -1 ])[ 1 ] = rp [3 - 3 ]>>8 ;
228- ptr16 -= c3 ;
222+ ptr [ -2 ] = rp [3 - 2 ];
223+ ptr [ - 1 ] = rp [3 - 2 ]>>8 ;
224+ ptr -= c2 * 2 ;
225+ ptr [ -2 ] = rp [3 - 3 ];
226+ ptr [ - 1 ] = rp [3 - 3 ]>>8 ;
227+ ptr -= c3 * 2 ;
229228#endif
230229 rp [3 - 2 ] = c2 ? rp [3 - 2 ]>>16 : rp [3 - 2 ];
231230 rp [3 - 3 ] = c3 ? rp [3 - 3 ]>>16 : rp [3 - 3 ];
@@ -239,7 +238,6 @@ unsigned char *rans_compress_O0_32x16(unsigned char *in,
239238 }
240239 if (z < -1 ) abort ();
241240 }
242- ptr = (uint8_t * )ptr16 ;
243241 }
244242 for (z = NX - 1 ; z >= 0 ; z -- )
245243 RansEncFlush (& ransN [z ], & ptr );
@@ -476,7 +474,6 @@ unsigned char *rans_compress_O1_32x16(unsigned char *in,
476474 i32 [i ] = & in [iN [i ]];
477475
478476 for (; likely (i32 [0 ] >= in ); ) {
479- uint16_t * ptr16 = (uint16_t * )ptr ;
480477 for (z = NX - 1 ; z >= 0 ; z -= 4 ) {
481478 RansEncSymbol * sy [4 ];
482479 int k ;
@@ -490,12 +487,12 @@ unsigned char *rans_compress_O1_32x16(unsigned char *in,
490487 for (k = 0 ; k < 4 ; k ++ ) {
491488 int c = ransN [z - k ] > sy [k ]-> x_max ;
492489#ifdef HTSCODECS_LITTLE_ENDIAN
493- ptr16 [ -1 ] = ransN [z - k ];
490+ memcpy ( & ptr [ -2 ], & ransN [z - k ], 2 ) ;
494491#else
495- (( uint8_t * ) & ptr16 [ -1 ])[ 0 ] = ransN [z - k ];
496- (( uint8_t * ) & ptr16 [ -1 ])[ 1 ] = ransN [z - k ]>>8 ;
492+ ptr [ -2 ] = ransN [z - k ];
493+ ptr [ - 1 ] = ransN [z - k ]>>8 ;
497494#endif
498- ptr16 -= c ;
495+ ptr -= c * 2 ;
499496 //ransN[z-k] >>= c<<4;
500497 ransN [z - k ] = c ? ransN [z - k ]>>16 : ransN [z - k ];
501498 }
@@ -506,7 +503,6 @@ unsigned char *rans_compress_O1_32x16(unsigned char *in,
506503 ransN [z - k ] += sy [k ]-> bias + q * sy [k ]-> cmpl_freq ;
507504 }
508505 }
509- ptr = (uint8_t * )ptr16 ;
510506 }
511507
512508 for (z = NX - 1 ; z >=0 ; z -- )
0 commit comments