Skip to content

Commit c595e79

Browse files
committed
update
1 parent e63b58f commit c595e79

4 files changed

Lines changed: 3 additions & 128 deletions

File tree

src/game/client/videoservices/video_ffmpeg.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -441,10 +441,10 @@ void VideoFFmpegPlayer::PrintInfo( std::string &filePath ) noexcept {
441441

442442
ConColorMsg( Color( 77, 166, 255, 150 ), " Decoder:\n" );
443443
if ( decoder != nullptr ) {
444-
ConColorMsg( Color( 153, 204, 255, 255 ), " Decoder: %s (%s)\n", decoder->name ? decoder->name : "unknown", decoder->long_name ? decoder->long_name : "unknown" );
444+
ConColorMsg( Color( 153, 204, 255, 255 ), " Decoder: %s (%s)\n", decoder->name ? decoder->name : "unknown", decoder->long_name ? decoder->long_name : "unknown" );
445445
}
446446
else {
447-
ConColorMsg( Color( 153, 204, 255, 255 ), " Decoder: unknown\n" );
447+
ConColorMsg( Color( 153, 204, 255, 255 ), " Decoder: unknown\n" );
448448
}
449449
ConColorMsg( Color( 153, 204, 255, 150 ), " Pixel Format: %s\n", pixFmt ? pixFmt : "unknown" );
450450
ConColorMsg( Color( 153, 204, 255, 255 ), " Color range: %s\n", range_str ? range_str : "unknown" );

src/game/client/videoservices/video_material_simd.cpp

Lines changed: 1 addition & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -46,15 +46,6 @@ VMSM_INLINE void VideoMaterialSIMD::RuntimeDispatch() noexcept
4646
else {
4747
m_memcpy_fn = memcpy_sse2;
4848
}
49-
50-
// Conversion function dispatch
51-
/**/
52-
if ( CPUHasAVX2() ) {
53-
m_convert_u16_u8_rrs_fn = convert_u16_u8_rrs_avx2;
54-
}
55-
else {
56-
m_convert_u16_u8_rrs_fn = convert_u16_u8_rrs_sse2;
57-
}
5849
}
5950

6051
// --------------------------------------------------------------------------------------------
@@ -63,7 +54,6 @@ VMSM_INLINE void VideoMaterialSIMD::RuntimeDispatch() noexcept
6354
// src - source buffer: AVFrame uint8_t/uint16_t, 32/64-byte aligned
6455
// --------------------------------------------------------------------------------------------
6556

66-
6757
#define MASK_64B 0x3F // 6: 64-byte blocks (used with __m512i, AVX-512 register = 512 bits)
6858
#define MASK_32B 0x1F // 5: 32-byte blocks (used with __m256i, AVX/AVX2 register = 256 bits)
6959
#define MASK_16B 0xF // 4: 16-byte blocks (used with __m128i, SSE/SSE2 register = 128 bits)
@@ -119,7 +109,7 @@ VMSM_INLINE void VideoMaterialSIMD::memcpy_sse2( uint8_t *VMSM_RESTRICT dst, uin
119109
mov edi, d
120110

121111
align_loop :
122-
movdqa xmm0, [ esi ]
112+
movdqa xmm0, [ esi ]
123113
movdqa xmm1, [ esi + 16 ]
124114
movdqa xmm2, [ esi + 32 ]
125115
movdqa xmm3, [ esi + 48 ]
@@ -142,97 +132,6 @@ VMSM_INLINE void VideoMaterialSIMD::memcpy_sse2( uint8_t *VMSM_RESTRICT dst, uin
142132
memcpy( d, s, rem );
143133
}
144134

145-
// --------------------------------------------------------------------------------------------
146-
// SIMD Convert U16 to U8 - Right Shift Implementations for Source Engine Video Materials
147-
// dst: Source Engine: IVTFTexture uint8_t, 1byte per pixel, 16-byte aligned
148-
// src: FFmpeg: AVFrame uint16_t, 2byte per pixel, 32/64-byte aligned
149-
// --------------------------------------------------------------------------------------------
150-
151-
//-----------------------------------------------------------------
152-
// SIMD Convert_U16_U8_RS: AVX2 STREAM Aligned(dst)-Unaligned(src)
153-
//-----------------------------------------------------------------
154-
155-
VMSM_INLINE void VideoMaterialSIMD::convert_u16_u8_rrs_avx2(
156-
uint8_t *VMSM_RESTRICT dst, const uint16_t *VMSM_RESTRICT src, size_t bts, int sft ) noexcept
157-
{
158-
const int bs = 1 << ( sft - 1 );
159-
const __m256i rnd = _mm256_set1_epi16( bs );
160-
161-
size_t bts2a = ( 64 - ( reinterpret_cast< uintptr_t >( dst ) & MASK_64B ) ) & MASK_64B;
162-
if ( bts2a > 0 ) {
163-
for ( size_t i = 0; i < bts2a; ++i ) {
164-
dst[ i ] = static_cast< uint8_t >( ( src[ i ] + bs ) >> sft );
165-
}
166-
dst += bts2a;
167-
src += bts2a;
168-
bts -= bts2a;
169-
}
170-
171-
const __m256i *s = reinterpret_cast< const __m256i * >( src );
172-
__m256i *d = reinterpret_cast< __m256i * >( dst );
173-
174-
size_t blx = bts >> 6;
175-
const size_t rem = bts & MASK_64B;
176-
177-
while ( blx-- )
178-
{
179-
__m256i v01 = _mm256_lddqu_si256( s++ );
180-
__m256i v02 = _mm256_lddqu_si256( s++ );
181-
__m256i v03 = _mm256_lddqu_si256( s++ );
182-
__m256i v04 = _mm256_lddqu_si256( s++ );
183-
184-
v01 = _mm256_srli_epi16( _mm256_add_epi16( v01, rnd ), sft );
185-
v02 = _mm256_srli_epi16( _mm256_add_epi16( v02, rnd ), sft );
186-
v03 = _mm256_srli_epi16( _mm256_add_epi16( v03, rnd ), sft );
187-
v04 = _mm256_srli_epi16( _mm256_add_epi16( v04, rnd ), sft );
188-
189-
_mm256_stream_si256( d++, _mm256_permute4x64_epi64( _mm256_packus_epi16( v01, v02 ), 0xD8 ) );
190-
_mm256_stream_si256( d++, _mm256_permute4x64_epi64( _mm256_packus_epi16( v03, v04 ), 0xD8 ) );
191-
}
192-
_mm_sfence();
193-
if ( rem > 0 ) {
194-
for ( size_t i = 0; i < rem; ++i ) {
195-
reinterpret_cast< uint8_t * >( d )[ i ] =
196-
static_cast< uint8_t >( ( reinterpret_cast< const uint16_t * >( s )[ i ] + bs ) >> sft );
197-
}
198-
}
199-
}
200-
201-
//-----------------------------------------------------------------
202-
// SIMD Convert_U16_U8_RS: SSE2 STREAM Aligned(dst)-Aligned(src)
203-
//-----------------------------------------------------------------
204-
205-
VMSM_INLINE void VideoMaterialSIMD::convert_u16_u8_rrs_sse2(
206-
uint8_t *VMSM_RESTRICT dst, const uint16_t *VMSM_RESTRICT src, size_t bts, int sft ) noexcept
207-
{
208-
const int bs = 1 << ( sft - 1 );
209-
const __m128i rnd = _mm_set1_epi16( bs );
210-
211-
const __m128i *s = reinterpret_cast< const __m128i * >( src );
212-
__m128i *d = reinterpret_cast< __m128i * >( dst );
213-
214-
size_t blx = bts >> 4;
215-
const size_t rem = bts & MASK_16B;
216-
217-
while ( blx-- )
218-
{
219-
__m128i v01 = _mm_load_si128( s++ );
220-
__m128i v02 = _mm_load_si128( s++ );
221-
222-
v01 = _mm_srli_epi16( _mm_add_epi16( v01, rnd ), sft );
223-
v02 = _mm_srli_epi16( _mm_add_epi16( v02, rnd ), sft );
224-
225-
_mm_stream_si128( d++, _mm_packus_epi16( v01, v02 ) );
226-
}
227-
_mm_sfence();
228-
if ( rem > 0 ) {
229-
for ( size_t i = 0; i < rem; ++i ) {
230-
reinterpret_cast< uint8_t * >( d )[ i ] =
231-
static_cast< uint8_t >( ( reinterpret_cast< const uint16_t * >( s )[ i ] + bs ) >> sft );
232-
}
233-
}
234-
}
235-
236135
// --------------------------------------------------------------------------------------------
237136
// CPU Feature Detection
238137
// These functions check for the presence of specific SIMD instruction sets

src/game/client/videoservices/video_material_simd.h

Lines changed: 0 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -51,19 +51,6 @@ struct VideoMaterialSIMD final {
5151
m_memcpy_fn( dst, src, bts );
5252
}
5353

54-
/**
55-
* @brief Convert uint16_t array to uint8_t with right shift
56-
* @param dst - destination buffer: Valve Source Engine: IVTFTexture I8, uint8_t, 16-byte aligned
57-
* @param src - source buffer: AVFrame uint16_t, 32/64-byte aligned
58-
* @param bts - Number of bytes to copy
59-
* @param sft - Right shift amount (0-15)
60-
*/
61-
VMSM_INLINE void Convert_U16_U8_RRS(
62-
uint8_t *VMSM_RESTRICT dst, const uint16_t *VMSM_RESTRICT src, size_t bts, int sft ) const noexcept
63-
{
64-
m_convert_u16_u8_rrs_fn( dst, src, bts, sft );
65-
}
66-
6754
// --------------------------------------------------------------------------------------------
6855
// CPU feature queries
6956
// --------------------------------------------------------------------------------------------
@@ -84,11 +71,9 @@ struct VideoMaterialSIMD final {
8471

8572
// Function pointer types
8673
using MemcpyFn = void( * )( uint8_t *VMSM_RESTRICT, uint8_t *VMSM_RESTRICT, size_t );
87-
using ConvertShiftFn = void( * )( uint8_t *VMSM_RESTRICT, const uint16_t *VMSM_RESTRICT, size_t, int );
8874

8975
// Function pointers (jump table)
9076
MemcpyFn m_memcpy_fn = nullptr;
91-
ConvertShiftFn m_convert_u16_u8_rrs_fn = nullptr;
9277

9378
// --------------------------------------------------------------------------------------------
9479
// Memcpy implementations
@@ -98,13 +83,4 @@ struct VideoMaterialSIMD final {
9883
static VMSM_INLINE void memcpy_avx2( uint8_t *VMSM_RESTRICT dst, uint8_t *VMSM_RESTRICT src, size_t bts ) noexcept;
9984
// SSE2
10085
static VMSM_INLINE void memcpy_sse2( uint8_t *VMSM_RESTRICT dst, uint8_t *VMSM_RESTRICT src, size_t bts ) noexcept;
101-
102-
// --------------------------------------------------------------------------------------------
103-
// Convert_U16_U8_RRS implementations
104-
// --------------------------------------------------------------------------------------------
105-
106-
// AVX2
107-
static VMSM_INLINE void convert_u16_u8_rrs_avx2( uint8_t *VMSM_RESTRICT dst, const uint16_t *VMSM_RESTRICT src, size_t bts, int sft ) noexcept;
108-
// SSE2
109-
static VMSM_INLINE void convert_u16_u8_rrs_sse2( uint8_t *VMSM_RESTRICT dst, const uint16_t *VMSM_RESTRICT src, size_t bts, int sft ) noexcept;
11086
};

src/lib/public/shaderlib.lib

0 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)