Skip to content

Commit 27b4ca6

Browse files
ZERICO2005mateoconlechuga
authored andcommitted
improved shifting by 48-63 in llshr
1 parent e72c315 commit 27b4ca6

1 file changed

Lines changed: 32 additions & 3 deletions

File tree

src/crt/llshr.src

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,32 +9,62 @@
99

1010
__llshru:
1111
; Suboptimal for large shift amounts
12+
; shift == 0 : 26F + 10R + 6W + 2
13+
; shift [ 1, 47] : 64F + 18R + 16W + 3 + (shift - 1) * (24F + 2R + 2W + 3)
14+
; shift == 48 : 42F + 10R + 16W + 2
15+
; shift [49, 63] : 42F + 10R + 19W + 2 + __ishrs
16+
; max CC (shift 47): 1168F + 110R + 108W + 141
1217
push af
1318
push iy
1419
ld iy, 0
1520
add iy, sp
1621
ld a, (iy + 9)
22+
cp a, 48
23+
jr nc, .L.llshru_48_63
1724
or a, a
1825
jr z, .L.finish
1926
push de
2027
push hl
2128
srl b
2229
jr .L.hijack_llshru
2330

31+
.L.llshrs_48_63:
32+
rlc b
33+
rrc b
34+
; Carry = Sign
35+
.L.llshru_48_63: ; <-- Carry is cleared
36+
sbc hl, hl
37+
ex de, hl
38+
sbc hl, hl
39+
ld l, c
40+
ld h, b
41+
sub a, 48
42+
ld c, a
43+
; this can be converted to call __ishrs if needed
44+
call nz, __ishrs
45+
ld b, e
46+
ld c, e
47+
jr .L.finish
48+
2449
__llshrs:
2550
; Suboptimal for large shift amounts
51+
; shift == 0 : 26F + 10R + 6W + 2
52+
; shift [ 1, 47] : 61F + 18R + 16W + 3 + (shift - 1) * (24F + 2R + 2W + 3)
53+
; shift == 48 : 46F + 10R + 16W + 2
54+
; shift [49, 63] : 46F + 10R + 19W + 2 + __ishrs
55+
; max CC (shift 47): 1165F + 110R + 108W + 141
2656
push af
2757
push iy
2858
ld iy, 0
2959
add iy, sp
3060
ld a, (iy + 9)
61+
cp a, 48
62+
jr nc, .L.llshrs_48_63
3163
or a, a
3264
jr z, .L.finish
3365
push de
3466
push hl
3567

36-
.local __llshr_common
37-
__llshr_common:
3868
.L.loop:
3969
sra b
4070
.L.hijack_llshru:
@@ -55,7 +85,6 @@ __llshr_common:
5585
ld l, e
5686
ld h, d
5787
pop de
58-
.local .L.finish
5988
.L.finish:
6089
pop iy
6190
pop af

0 commit comments

Comments
 (0)