toolchain/src/libc/ldexpf.src at b37490f38acd6fdcb61ddafa6193e5cc1304b571 · CE-Programming/toolchain · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
	.assume	adl=1

	.include	"errno.inc"
	.include	"fenv.inc"

	.section	.text
	.global	_ldexpf
	.type	_ldexpf, @function
	.global	_ldexp
	.type	_ldexp, @function
	; when FLT_RADIX == 2, scalbn is equivilent to ldexp
	.global	_scalbnf
	.type	_scalbnf, @function
	.global	_scalbn
	.type	_scalbn, @function

.ifdef PREFER_OS_LIBC

	.set	_ldexpf, 0x0220DC
	.set	_ldexp, _ldexpf
	.set	_scalbnf, _ldexpf
	.set	_scalbn, _ldexpf

.else

; (set to 0 or 1) avoid returning negative zero on underflow with Ti's floats
	.equ	__ldexpf_avoid_negative_zero, 1

; ldexpf behaviour:
; - signed zero, infinity, and NaN inputs are returned unmodified
; - ERRNO and FE_INEXACT are set if a finite value becomes zero or infinite
; - FE_INEXACT is set if rounding occured
;-------------------------------------------------------------------------------

.L.maybe_subnormal:
	; A = zero, carry = signbit
	rra	; restore signbit and clear carry
	adc	hl, bc	; BC is zero
.L.ret_self:
	ld	hl, (iy + 3)	; mant
	ret	z	; return zero/inf/NaN
	dec	bc	; BC is now -1
; .L.subnormal_input:
	; BC is -1 here
	bit	7, (iy + 11)	; scale sign
	ld	de, (iy + 9)	; scale
	jr	nz, .L.move_subnormal_down
; .L.move_subnormal_up:
.L.norm_loop:
	add	hl, hl
	jr	c, .L.normalized
	ex	de, hl
	add	hl, bc	; --scale
	ex	de, hl
	jr	c, .L.norm_loop
; .L.still_subnormal:
	; DE is -1 here
	; saves 8F for this path at a cost of 3 bytes:
.if 0
	inc	de	; ld e, 0
	jr	_ldexpf.finish_subnormal
.endif
.L.normalized:
	inc	de	; don't touch the Z flag
	ex	de, hl
	; Z is set here
	jr	.L.scale_up_subnormal

;-------------------------------------------------------------------------------
; When the input and output are normal:
; scaling up  : 60F + 12R + 4W + 2
; scaling down: 60F + 12R + 4W + 4
_scalbn:
_scalbnf:
_ldexp:
_ldexpf:
	ld	iy, 0
	lea	bc, iy + 0
	add	iy, sp
	ld	hl, (iy + 3)	; mant
	add	hl, hl
	ld	a, (iy + 6)	; expon
	ld	e, a		; signbit
	adc	a, a
	jr	z, .L.maybe_subnormal
	ld	c, a
	inc	a
	jr	z, .L.ret_self	; inf NaN
	ld	a, e		; signbit
	ld	de, (iy + 9)	; scale
	ex	de, hl
	add	hl, bc	; add expon
	bit	7, (iy + 11)	; scale sign
	jr	nz, .L.scale_down
.L.scale_up:
	; HL is [1, $8000FD]
	ld	c, b	; ld bc, 0
	dec	bc	; ld bc, -1
.L.scale_up_subnormal:	; <-- HL is [0, $7FFFFE]
	inc	c	; ld bc, $FFFF00
	inc	c	; ld bc, $FFFF01 ; BC is -255 ; sets NZ
	; ld	bc, -255
	add	hl, bc
	jr	c, .L.overflow_to_inf
	; sbc	hl, bc	; restore hl
	dec	l	; we only care about the low 8 bits
	ex	de, hl
.L.finish_subnormal:
	push	hl
.L.finish:
	rla	; extract signbit
	rr	e
	rr	(iy - 1)
	pop	hl
	rr	h
	rr	l
	ret

;-------------------------------------------------------------------------------

.L.move_subnormal_down:
	; DE = scale
	; BC is -1 here
	; first we need to test that the result won't be zero
	call	__ictlz
	ex	de, hl	; HL = scale
	; A is [1, 23]
	; return zero if (scale < clz_result - 24) or (clz_result - 25 >= scale)
	add	a, -24	; A is [-23, -1] and carry is cleared
	ld	c, a	; sign extend A
	ld	a, l
	sbc	hl, bc
	cpl
	jr	nc, .L.shru_common
; .underflow:
	inc	b	; ld b, 0 ; sets Z
.L.overflow_to_inf:	; <-- NZ is set when infinite
	ld	a, FE_OVERFLOW_MASK | FE_INEXACT_MASK
.L.underflow_to_zero:	; <-- Z is set when underflowing to zero
.L.raise_erange:
	ld	hl, $800000
	jr	nz, .L.overflow
	ld	a, FE_UNDERFLOW_MASK | FE_INEXACT_MASK
	add	hl, hl	; ld hl, 0
.if __ldexpf_avoid_negative_zero
	; prevents negative zero from being emitted on underflow
	res	7, (iy + 6)
.endif
.L.overflow:
	ex	de, hl
	ld	hl, ERANGE
	ld	(_errno), hl
.L.raise_inexact:
	ld	hl, ___fe_cur_env
	or	a, (hl)
	ld	(hl), a
.L.result_is_exact:
	ld	a, (iy + 6)	; expon
	rla	; extract signbit
	ex	de, hl
	; B is $FF if infinite and $00 otherwise
	rr	b
	ld	e, b
	ret

;-------------------------------------------------------------------------------
.L.scale_down:
	push	de	; mant <<= 1
	ld	e, l	; shift amount
	; HL is not INT_MIN here
	dec	hl
	add	hl, hl
	jr	nc, .L.finish	; expon > 0
;-------------------------------------------------------------------------------
.L.shru_to_subnormal:
	; Z is set here
	xor	a, a
	ld	c, 48	; ld bc, 24 << 1
	add	hl, bc
	pop	hl	; reset SP
	jr	nc, .L.underflow_to_zero
	sub	a, e
	set	7, (iy + 5)	; set implicit mantissa bit
.L.shru_common:
	; A should be [0, 23]
	ld	b, a
	ld	hl, (iy + 3)	; mantissa
	push	hl	; ld (iy - 3), hl
	xor	a, a
	inc	b
	; shift amount will be [1, 24]
	ld	d, a	; ld d, 0
	ld	c, (iy - 1)
.L.shru_loop:
	adc	a, d	; collect sticky bits
	srl	c
	rr	h
	rr	l
	djnz	.L.shru_loop
	ld	(iy - 1), c
	pop	de
	ld	d, h
	ld	e, l

	; round upwards to even if (round && (guard || sticky))
	jr	nc, .L.no_round
	; we must ensure that FE_INEXACT is raised since rounding has occured
	or	a, a
	jr	nz, .L.round_up
	inc	a	; ld a, 1
	and	a, e	; test guard bit
	jr	z, .L.no_round_inexact
.L.round_up:
	inc	de	; round upwards to even (wont overflow)
.L.no_round:
	adc	a, a	; test the sticky and round bits
	jr	z, .L.result_is_exact
	; carry wont be set
.L.no_round_inexact:
	; we need to raise ERANGE if the mantissa was rounded down to zero
	ld	a, c	; UDE
	or	a, d
	or	a, e
	ld	a, FE_INEXACT_MASK
	jr	nz, .L.raise_inexact
	; NZ needs to be set here
	jr	.L.raise_erange

	.extern	_errno
	.extern	___fe_cur_env
	.extern	__ictlz

.endif