Skip to content

Commit 9b344b1

Browse files
committed
optimized lshrs/lshru (worst case 61.8% faster)
1 parent d921ea1 commit 9b344b1

1 file changed

Lines changed: 33 additions & 19 deletions

File tree

src/crt/lshr.src

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -11,31 +11,39 @@
1111
.else
1212

1313
__lshrs:
14+
; (int32_t)A:UBC >>= L
1415
; Suboptimal for large shift amounts
15-
; CC: if C!=0: C*(15*r(PC)+3*r(SPL)+3*w(SPL)+4)+13*r(PC)+9*r(SPL)+6*w(SPL)+1
16-
; if C==0: 4*r(PC)+3*r(SPL)+2
16+
; CC:
17+
; L == 0: 4F + 3R + 0W + 1
18+
; L >= 1: 34F + 13R + 10W + 2 + (10F + 1) * (L - 1)
19+
; Max CC: 334F + 13R + 10W + 32
1720
inc l
1821
dec l
1922
ret z
2023
push hl
24+
push de
2125
push bc
26+
ld d, b
2227
ld b, l
23-
or a, a
24-
sbc hl, hl
28+
ld hl, 2
2529
add hl, sp
30+
ld e, a
31+
ld a, (hl)
2632
.L.loop:
27-
sra a
33+
sra e ; A
2834
.local __lshrs.hijack_lshru
2935
__lshrs.hijack_lshru:
30-
inc hl
31-
inc hl
32-
rr (hl)
33-
dec hl
34-
rr (hl)
35-
dec hl
36-
rr (hl)
36+
rra ; UBC
37+
rr d ; B
38+
rr c ; C
3739
djnz .L.loop
40+
ld (hl), a
41+
ld l, c
3842
pop bc
43+
ld b, d
44+
ld c, l
45+
ld a, e
46+
pop de
3947
pop hl
4048
ret
4149

@@ -52,20 +60,26 @@ __lshrs.hijack_lshru:
5260
.else
5361

5462
__lshru:
63+
; (uint32_t)A:UBC >>= L
5564
; Suboptimal for large shift amounts
56-
; CC: if C!=0: C*(15*r(PC)+3*r(SPL)+3*w(SPL)+4)+17*r(PC)+9*r(SPL)+6*w(SPL)+2
57-
; if C==0: 4*r(PC)+3*r(SPL)+2
65+
; CC:
66+
; L == 0: 4F + 3R + 0W + 1
67+
; L >= 1: 36F + 13R + 10W + 2 + (10F + 1) * (L - 1)
68+
; Max CC: 336F + 13R + 10W + 32
5869
inc l
5970
dec l
6071
ret z
6172
push hl
73+
push de
6274
push bc
75+
ld d, b
6376
ld b, l
64-
or a, a
65-
sbc hl, hl
66-
add hl, sp
67-
.L.loop:
68-
srl a
77+
ld hl, 2
78+
add hl, sp ; clears carry
79+
rra ; cheaper than doing srl e later on
80+
ld e, a
81+
ld a, (hl)
82+
; srl e
6983
jr __lshrs.hijack_lshru
7084

7185
.endif

0 commit comments

Comments
 (0)