Skip to content

Commit 314b2ae

Browse files
committed
added __llshift_alt
1 parent 0e354c9 commit 314b2ae

7 files changed

Lines changed: 126 additions & 29 deletions

File tree

src/crt/dtof.src

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -75,8 +75,8 @@ __dtof:
7575
add hl, hl
7676
add hl, hl
7777
; offset = -$380 - -$47F = $FF = -1 ; therefore decrement
78-
; H = exponent + 1
79-
ld l, 29 ; f64_mant_bits - f32_mant_bits = 52 - 23 = 29
78+
ld l, h ; L = exponent + 1
79+
ld h, 29 ; f64_mant_bits - f32_mant_bits = 52 - 23 = 29
8080
ex (sp), hl ; (SP) = exponent/shift, HL = lo24
8181

8282
; clear exponent
@@ -101,14 +101,15 @@ __dtof:
101101
and a, $1F
102102
.L.round_up:
103103
.L.round_down:
104-
call __llshru
104+
call __llshru_alt
105105
; B, C, and UDE are zero here
106+
pop bc ; C = exponent + 1
106107
or a, a
107108
jr z, .L.no_round
108109
inc hl ; does not overflow
109110
.L.no_round:
110-
pop af ; a = exponent + 1, flags = 29 = ---5H3V-C
111-
sbc a, b ; decrement exponent and clear carry
111+
ld a, c
112+
dec a
112113
rra
113114
jr nc, .L.even_exponent
114115
ld bc, $800000
@@ -216,5 +217,5 @@ __dtof:
216217

217218
.extern __lland
218219
.extern __llcmpzero
219-
.extern __llshru
220+
.extern __llshru_alt
220221
.extern __lshru

src/crt/dtoll.src

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -47,19 +47,19 @@ __dtoull:
4747
inc a ; A is one less than it should be here to allow for the CPL trick in shift_right
4848
; C is [16, 31]
4949
cp a, c ; only call __llshl if the shift amount is [0, 63]
50-
ld l, a
50+
ld h, a
5151
ex (sp), hl
5252
; shift is non-zero and [1, 11] in the non-UB case
53-
call c, __llshl
53+
call c, __llshl_alt
5454
jr .finish
5555
.shift_right:
5656
; A is [-1, -52]
5757
; expon is [0, 51]
5858
cpl
59-
ld l, a
59+
ld h, a
6060
ex (sp), hl
6161
; shift is [0, 51]
62-
call __llshru
62+
call __llshru_alt
6363
.finish:
6464
pop af ; reset SP
6565
pop af
@@ -85,5 +85,5 @@ __dtoull:
8585
jr .finish_zero_or_one
8686

8787
.extern __llneg
88-
.extern __llshl
89-
.extern __llshru
88+
.extern __llshl_alt
89+
.extern __llshru_alt

src/crt/llshl_alt.src

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
.assume adl=1
2+
3+
.section .text
4+
.global __llshl_alt
5+
.type __llshl_alt, @function
6+
7+
__llshl_alt:
8+
; Suboptimal for large shift amounts
9+
push af
10+
ld a, b
11+
12+
push hl
13+
ld hl, 10
14+
add hl, sp
15+
ld b, (hl)
16+
pop hl
17+
18+
inc b
19+
jr .L.begin
20+
21+
.L.loop:
22+
add hl, hl
23+
ex de, hl
24+
adc hl, hl
25+
ex de, hl
26+
rl c
27+
rla
28+
.L.begin:
29+
djnz .L.loop
30+
31+
ld b, a
32+
pop af
33+
ret

src/crt/llshr_alt.src

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
.assume adl=1
2+
3+
.section .text
4+
5+
.global __llshru
6+
.type __llshru_alt, @function
7+
.global __llshrs
8+
.type __llshrs_alt, @function
9+
10+
__llshru_alt:
11+
; Suboptimal for large shift amounts
12+
push af
13+
push iy
14+
ld iy, 0
15+
add iy, sp
16+
ld a, (iy + 10)
17+
or a, a
18+
jr z, .L.finish
19+
push de
20+
push hl
21+
srl b
22+
jr .L.hijack_llshru_alt
23+
24+
__llshrs_alt:
25+
; Suboptimal for large shift amounts
26+
push af
27+
push iy
28+
ld iy, 0
29+
add iy, sp
30+
ld a, (iy + 10)
31+
or a, a
32+
jr z, .L.finish
33+
push de
34+
push hl
35+
36+
.local __llshr_alt_common
37+
__llshr_alt_common:
38+
.L.loop:
39+
sra b
40+
.L.hijack_llshru_alt:
41+
rr c
42+
rr (iy - 1)
43+
rr d
44+
rr e
45+
rr (iy - 4)
46+
rr h
47+
rr l
48+
dec a
49+
jr nz, .L.loop
50+
51+
ld (iy - 3), e
52+
ld (iy - 2), d
53+
ex de, hl
54+
pop hl
55+
ld l, e
56+
ld h, d
57+
pop de
58+
.local .L.finish
59+
.L.finish:
60+
pop iy
61+
pop af
62+
ret

src/crt/ltod.src

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,13 @@ __lltod:
6363
.else
6464
.L.no_round_inexact:
6565
.endif
66-
ld h, b
66+
ld l, b
6767
ld a, c
68-
ld l, c
68+
ld h, c
6969
pop bc
7070

7171
ex (sp), hl ; (SP) = shift
72-
call __llshru
72+
call __llshru_alt
7373
add a, 51 - 1 ; compensate for the implicit mantissa bit
7474

7575
; BC/exponent = [$434*, $43E*]
@@ -138,13 +138,13 @@ __ltod:
138138
; Maximum exponent: $434 (2^52)
139139
; It is assumed that A is [0, 51] here, or [-52, -1] before adding 52
140140
push hl
141-
ld l, a
141+
ld h, a
142142
ex (sp), hl ; (SP) = shift
143-
call __llshl
144-
ex (sp), hl ; (SP) = shifted HL, L = shift
143+
call __llshl_alt
144+
ex (sp), hl ; (SP) = shifted HL, H = shift
145145

146146
ld a, 51
147-
sub a, l
147+
sub a, h
148148

149149
; exponent = ($400 + (base2_logarithm - 1)) << 4
150150
; BC = $4PPM
@@ -173,8 +173,8 @@ __ltod:
173173
.extern __lneg
174174
.extern __lctlz
175175
.extern __llctlz
176-
.extern __llshl
177-
.extern __llshru
176+
.extern __llshl_alt
177+
.extern __llshru_alt
178178
.extern __llneg
179179
.extern __lladd_1
180180
.extern ___fe_cur_env

src/softfloat/s_normSubnormalF64Sig.src

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,9 @@ _softfloat_normSubnormalF64Sig:
2626
inc hl
2727
ld (iy + 0), hl
2828
; z.sig = sig<<shiftDist;
29-
ld l, a
29+
ld h, a
3030
ex (sp), hl
31-
call __llshl
31+
call __llshl_alt
3232
pop af ; reset SP
3333
ld (iy + 2), hl
3434
ld (iy + 5), de
@@ -37,6 +37,6 @@ _softfloat_normSubnormalF64Sig:
3737
lea hl, iy + 0 ; ABI struct ptr
3838
ret
3939

40-
.extern __llshl
40+
.extern __llshl_alt
4141
.extern __llctlz
4242
.extern __ineg

src/softfloat/s_shiftRightJam64.src

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,13 +33,14 @@ _softfloat_shiftRightJam64:
3333
ld de, (iy + 7)
3434
ld bc, (iy + 10)
3535
jr nc, .L.overflow_shift
36+
ld h, l
3637
push hl ; >> shift
3738
sub a, l ; A = 0 - L
3839
and a, $3F
39-
ld l, a
40+
ld h, a
4041
push hl ; << shift
4142
ld hl, (iy + 4)
42-
call __llshl
43+
call __llshl_alt
4344
xor a, a
4445
call __llcmpzero
4546
jr z, .L.no_round
@@ -49,7 +50,7 @@ _softfloat_shiftRightJam64:
4950
ld hl, (iy + 4)
5051
ld de, (iy + 7)
5152
ld bc, (iy + 10)
52-
call __llshru
53+
call __llshru_alt
5354
or a, l
5455
ld l, a
5556
pop af ; reset SP
@@ -70,5 +71,5 @@ _softfloat_shiftRightJam64:
7071
ret
7172

7273
.extern __llcmpzero
73-
.extern __llshl
74-
.extern __llshru
74+
.extern __llshl_alt
75+
.extern __llshru_alt

0 commit comments

Comments
 (0)