Skip to content

Commit 68bb040

Browse files
ZERICO2005runer112
andcommitted
optimize lshr(u/s) with the power of ex (sp), hl
Co-authored-by: Zachary Wassall <runer112@gmail.com>
1 parent 9b344b1 commit 68bb040

1 file changed

Lines changed: 19 additions & 26 deletions

File tree

src/crt/lshr.src

Lines changed: 19 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -15,36 +15,32 @@ __lshrs:
1515
; Suboptimal for large shift amounts
1616
; CC:
1717
; L == 0: 4F + 3R + 0W + 1
18-
; L >= 1: 34F + 13R + 10W + 2 + (10F + 1) * (L - 1)
19-
; Max CC: 334F + 13R + 10W + 32
18+
; L >= 1: 27F + 15R + 12W + 2 + (10F + 1) * (L - 1)
19+
; Max CC: 327F + 15R + 12W + 32
2020
inc l
2121
dec l
2222
ret z
23-
push hl
2423
push de
24+
ld e, a
25+
ld a, c
2526
push bc
26-
ld d, b
2727
ld b, l
28-
ld hl, 2
29-
add hl, sp
30-
ld e, a
31-
ld a, (hl)
28+
inc sp
29+
ex (sp), hl
3230
.L.loop:
3331
sra e ; A
3432
.local __lshrs.hijack_lshru
3533
__lshrs.hijack_lshru:
36-
rra ; UBC
37-
rr d ; B
38-
rr c ; C
34+
rr h ; UBC
35+
rr l ; B
36+
rra ; C
3937
djnz .L.loop
40-
ld (hl), a
41-
ld l, c
38+
ex (sp), hl
39+
dec sp
4240
pop bc
43-
ld b, d
44-
ld c, l
41+
ld c, a
4542
ld a, e
4643
pop de
47-
pop hl
4844
ret
4945

5046
.endif
@@ -64,22 +60,19 @@ __lshru:
6460
; Suboptimal for large shift amounts
6561
; CC:
6662
; L == 0: 4F + 3R + 0W + 1
67-
; L >= 1: 36F + 13R + 10W + 2 + (10F + 1) * (L - 1)
68-
; Max CC: 336F + 13R + 10W + 32
63+
; L >= 1: 30F + 15R + 12W + 2 + (10F + 1) * (L - 1)
64+
; Max CC: 330F + 15R + 12W + 32
6965
inc l
7066
dec l
7167
ret z
72-
push hl
7368
push de
69+
ld e, a
70+
ld a, c
7471
push bc
75-
ld d, b
7672
ld b, l
77-
ld hl, 2
78-
add hl, sp ; clears carry
79-
rra ; cheaper than doing srl e later on
80-
ld e, a
81-
ld a, (hl)
82-
; srl e
73+
inc sp
74+
ex (sp), hl
75+
srl e ; A
8376
jr __lshrs.hijack_lshru
8477

8578
.endif

0 commit comments

Comments
 (0)