Skip to content

Commit 2108183

Browse files
ZERICO2005mateoconlechuga
authored andcommitted
optimize 8 bit denominator case for fallback lldvrmu
1 parent 7a82a0b commit 2108183

1 file changed

Lines changed: 60 additions & 3 deletions

File tree

src/crt/lldvrmu.src

Lines changed: 60 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,23 +116,39 @@ __lldvrmu.hijack:
116116

117117
__lldvrmu:
118118
__lldvrmu.hijack:
119+
; Fallback routine that does not disable interrupts or use shadow registers.
120+
; Uses a special fast path for 8-bit denominators (0 - 255).
119121
; Atrociously slow.
120122

121123
push hl
122124
ex (sp), ix
123125
ld iy, 0
124126
add iy, sp
125127

128+
; test if the denominator is less than 256 (fast path)
126129
push bc
130+
; test bits [8, 55]
131+
ld hl, (iy + 16)
132+
ld bc, (iy + 19)
133+
adc hl, bc
134+
jr nz, .L.not_8_bit
135+
; note that bits [8, 55] are non-zero if carry is set
136+
ld a, l ; ld a, 0
137+
; test if bits [56, 63] are non-zero or if carry was set from before
138+
sbc a, (iy + 22)
139+
jr nc, .L.denominator_is_8_bit
140+
.L.not_8_bit:
141+
; denominator >= 256
142+
127143
push de
128-
; or a, a
144+
or a, a
129145
sbc hl, hl
130146
ex de, hl
131147
sbc hl, hl
132148
ld c, l
133149
ld b, l
134-
ld a, 64
135-
jr .L.start
150+
ld a, 64 + 1
151+
; jr .L.start
136152

137153
; (iy + 21) = denominator [48:63]
138154
; (iy + 18) = denominator [24:47]
@@ -148,6 +164,7 @@ __lldvrmu.hijack:
148164
; SP = iy - 6
149165

150166
.L.loop:
167+
; worst-case CC per iter: 85F + 38R + 20W + 2
151168
dec a
152169
jr z, .L.finish
153170
.L.start:
@@ -229,4 +246,44 @@ __lldvrmu.hijack:
229246
pop ix
230247
ret
231248

249+
.L.denominator_is_8_bit:
250+
; A is zero and carry is cleared
251+
ex de, hl
252+
pop de
253+
ld c, (iy + 15) ; denominator
254+
ld b, 64
255+
.L.loop_8_bit:
256+
; worst-case CC per iter: 20F + 1
257+
add ix, ix ; UHL
258+
adc hl, hl ; UDE
259+
rl e ; C
260+
rl d ; B
261+
rla ; remainder
262+
jr c, .L.bit_1
263+
cp a, c
264+
jr c, .L.bit_0
265+
.L.bit_1:
266+
sub a, c
267+
inc ixl
268+
.L.bit_0:
269+
djnz .L.loop_8_bit
270+
; B is zero here
271+
272+
; store the 64-bit quotient
273+
ld (iy + 15), ix
274+
ld (iy + 18), hl
275+
ld (iy + 21), e
276+
ld (iy + 22), d
277+
278+
; store the 8-bit remainder
279+
ex.s de, hl ; zero UHL and UDE
280+
ld c, b
281+
ld e, b
282+
ld d, b
283+
ld h, b
284+
ld l, a ; remainder
285+
286+
pop ix
287+
ret
288+
232289
.endif

0 commit comments

Comments
 (0)