11; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
2- ; 2021-02-23 : Igor Pavlov : Public domain
2+ ; 2024-06-18 : Igor Pavlov : Public domain
33;
44; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
55; function for check at link time.
@@ -17,11 +17,41 @@ include 7zAsm.asm
1717
1818MY_ASM_START
1919
20- _TEXT $ LZMADECOPT SEGMENT ALIGN( 64 ) 'CODE'
20+ ; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is defined, we use additional SEGMENT with 64-byte alignment.
21+ ; if Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT is not defined, we use default SEGMENT (where default 16-byte alignment of segment is expected).
22+ ; The performance is almost identical in our tests.
23+ ; But the performance can depend from position of lzmadec code inside instruction cache
24+ ; or micro-op cache line (depending from low address bits in 32-byte/64-byte cache lines).
25+ ; And 64-byte alignment provides a more consistent speed regardless
26+ ; of the code's position in the executable.
27+ ; But also it's possible that code without Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT can be
28+ ; slightly faster than 64-bytes aligned code in some cases, if offset of lzmadec
29+ ; code in 64-byte block after compilation provides better speed by some reason.
30+ ; Note that Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT adds an extra section to the ELF file.
31+ ; If you don't want to get that extra section, do not define Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT.
32+
33+ ifndef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
34+ if (IS_LINUX gt 0 )
35+ Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
36+ else
37+ Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT equ 1
38+ endif
39+ endif
2140
41+ ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
42+ _TEXT $ LZMADECOPT SEGMENT ALIGN( 64 ) 'CODE'
2243MY_ALIGN macro num:req
2344 align num
45+ ; align 16
2446endm
47+ else
48+ MY_ALIGN macro num:req
49+ ; We expect that ".text" is aligned for 16-bytes.
50+ ; So we don't need large alignment inside out function.
51+ align 16
52+ endm
53+ endif
54+
2555
2656MY_ALIGN_16 macro
2757 MY_ALIGN 16
@@ -610,7 +640,11 @@ PARAM_lzma equ REG_ABI_PARAM_0
610640PARAM_limit equ REG_ABI_PARAM_1
611641PARAM_bufLimit equ REG_ABI_PARAM_2
612642
643+ ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
613644; MY_ALIGN_64
645+ else
646+ MY_ALIGN_16
647+ endif
614648MY_PROC LzmaDec_DecodeReal_3 , 3
615649MY_PUSH_PRESERVED_ABI_REGS
616650
@@ -1298,6 +1332,8 @@ fin:
12981332MY_POP_PRESERVED_ABI_REGS
12991333MY_ENDP
13001334
1335+ ifdef Z7_LZMA_DEC_OPT_ASM_USE_SEGMENT
13011336_TEXT $ LZMADECOPT ENDS
1337+ endif
13021338
13031339end
0 commit comments