|
9 | 9 | __imulhu: |
10 | 10 | ; TODO: Optimize this routine as this is mostly just a copy paste of __i48mulu with some stuff removed. |
11 | 11 | ; |
12 | | -; CC: 118*r(PC)+39*r(SPL)+38*w(SPL)+37 |
13 | | -; CC: 117 bytes | 118F + 39R + 38W + 37 |
14 | | - push de |
| 12 | +; CC: 113*r(PC)+41*r(SPL)+34*w(SPL)+37 |
| 13 | +; CC: 112 bytes | 113F + 41R + 34W + 37 |
| 14 | + |
15 | 15 | ; backup af |
16 | 16 | push af |
17 | 17 | push ix |
18 | | - ld ix, 0 |
19 | | - add ix, sp |
20 | 18 |
|
21 | 19 | ; On stack to get upper byte when needed |
22 | 20 | push de ; de will also be used to perform the actual multiplication |
23 | 21 | push hl |
24 | 22 | push iy |
25 | 23 | push bc |
26 | 24 |
|
27 | | - ; bc = a[0], a[1] |
28 | | - ld a, l ; a = b[0] |
29 | | - ld iy, (ix - 5) ; iy = b[1], b[2] |
| 25 | + ld ix, 0 |
| 26 | + push ix ; upper bytes of sum at (ix + 0) |
| 27 | + add ix, sp |
30 | 28 |
|
31 | | - ; or a, a ; carry is already cleared |
32 | | - sbc hl, hl |
33 | | - push hl ; upper bytes of sum at -15 |
34 | 29 | ; Stack Use: |
35 | | - ; ix-1 : deu b[5] |
36 | | - ; ix-2 : d b[4] |
37 | | - ; ix-3 : e b[3] |
38 | | - ; ix-4 : hlu b[2] |
39 | | - ; ix-5 : h b[1] |
40 | | - ; ix-6 : l b[0] |
41 | | - ; ix-7 : iyu a[5] |
42 | | - ; ix-8 : iyh a[4] |
43 | | - ; ix-9 : iyl a[3] |
44 | | - ; ix-10 : bcu a[2] |
45 | | - ; ix-11 : b a[1] |
46 | | - ; ix-12 : c a[0] |
47 | | - ; ix-13 : sum[5] |
48 | | - ; ix-14 : sum[4] |
49 | | - ; ix-15 : sum[3] |
50 | | - ; ix-16 : sum[2] |
51 | | - ; ix-17 : sum[1] |
52 | | - ; ix-18 : sum[0] |
| 30 | + ; ix + 14 : deu X[5] |
| 31 | + ; ix + 13 : d X[4] |
| 32 | + ; ix + 12 : e X[3] |
| 33 | + ; ix + 11 : hlu X[2] |
| 34 | + ; ix + 10 : h X[1] |
| 35 | + ; ix + 9 : l X[0] |
| 36 | + ; ix + 8 : iyu Y[5] |
| 37 | + ; ix + 7 : iyh Y[4] |
| 38 | + ; ix + 6 : iyl Y[3] |
| 39 | + ; ix + 5 : bcu Y[2] |
| 40 | + ; ix + 4 : b Y[1] |
| 41 | + ; ix + 3 : c Y[0] |
| 42 | + ; ix + 2 : sum[5] |
| 43 | + ; ix + 1 : sum[4] |
| 44 | + ; ix + 0 : sum[3] |
| 45 | + ; ix - 1 : sum[2] |
| 46 | + ; ix - 2 : sum[1] |
| 47 | + ; ix - 3 : sum[0] |
| 48 | + |
| 49 | + ld iy, (ix + 10) ; iy = X[1], X[2] |
| 50 | + ; bc = Y[0], Y[1] |
| 51 | + ld a, l ; a = X[0] |
53 | 52 |
|
54 | 53 | ; ====================================================================== |
55 | 54 | ; sum[0-1] |
56 | 55 |
|
57 | | - ; a[0]*b[0] |
58 | | - ld d, c ; d = a[0] |
59 | | - ld e, a ; e = b[0] |
60 | | - mlt de |
61 | | - push de ; lower bytes of sum at -18 |
| 56 | + ; X[0]*Y[0] |
| 57 | + ; l = X[0] |
| 58 | + ld h, c ; h = Y[0] |
| 59 | + mlt hl |
| 60 | + push hl ; lower bytes of sum at (ix - 3) |
62 | 61 |
|
63 | 62 | ; ====================================================================== |
64 | 63 | ; sum[1-2] |
65 | | - ld l, d ; hl will store current partial sum |
| 64 | + ld l, h ; hl will store current partial sum |
| 65 | + ld h, 0 |
66 | 66 |
|
67 | | - ; a[1]*b[0] |
68 | | - ld d, b ; d = a[1] |
69 | | - ld e, a ; e = b[0] |
| 67 | + ; X[0]*Y[1] |
| 68 | + ld e, a ; e = X[0] |
| 69 | + ld d, b ; d = Y[1] |
70 | 70 | mlt de |
71 | 71 | add hl, de |
72 | 72 |
|
73 | | - ; a[0]*b[1] |
74 | | - ld d, c ; d = a[0] |
75 | | - ld e, iyl ; e = b[1] |
| 73 | + ; X[1]*Y[0] |
| 74 | + ld e, iyl ; e = X[1] |
| 75 | + ld d, c ; d = Y[0] |
76 | 76 | mlt de |
77 | 77 | add hl, de |
78 | 78 |
|
79 | | - ld (ix - 17), hl |
| 79 | + ld (ix - 2), hl |
80 | 80 |
|
81 | 81 | ; ====================================================================== |
82 | 82 | ; sum[2-3] |
83 | | - ld hl, (ix - 16) ; hl will store current partial sum |
| 83 | + ld hl, (ix - 1) ; hl will store current partial sum |
84 | 84 |
|
85 | | - ; a[0]*b[2] |
86 | | - ld d, c ; d = a[0] |
87 | | - ld e, iyh ; e = b[2] |
| 85 | + ; X[2]*Y[0] |
| 86 | + ld e, iyh ; e = X[2] |
| 87 | + ld d, c ; d = Y[0] |
88 | 88 | mlt de |
89 | 89 | add hl, de |
90 | 90 |
|
91 | | - ; a[1]*b[1] |
92 | | - ld d, b ; d = a[1] |
93 | | - ld e, iyl ; e = b[1] |
| 91 | + ; X[1]*Y[1] |
| 92 | + ld e, iyl ; e = X[1] |
| 93 | + ld d, b ; d = Y[1] |
94 | 94 | mlt de |
95 | 95 | add hl, de |
96 | 96 |
|
97 | | - ; a[2]*b[0] |
98 | | - ld d, (ix - 10) ; d = a[2] |
99 | | - ld e, a ; e = b[0] |
| 97 | + ; X[0]*Y[2] |
| 98 | + ld e, a ; e = X[0] |
| 99 | + ld d, (ix + 5) ; d = Y[2] |
| 100 | + ld c, d ; c = Y[2] |
100 | 101 | mlt de |
101 | 102 | add hl, de |
| 103 | + ld d, c ; d = Y[2] |
102 | 104 |
|
103 | | - ld (ix - 16), hl |
| 105 | + ld (ix - 1), hl |
104 | 106 |
|
105 | 107 | ; ====================================================================== |
106 | 108 | ; sum[3-4] |
107 | | - ld hl, (ix - 15) ; hl will store current partial sum |
| 109 | + ld hl, (ix + 0) ; hl will store current partial sum |
108 | 110 |
|
109 | | - ; a[1]*b[2] |
110 | | - ld d, b ; d = a[1] |
111 | | - ld e, iyh ; e = b[2] |
112 | | - mlt de |
113 | | - add hl, de |
| 111 | + ; X[2]*Y[1] |
| 112 | + ld c, iyh ; c = X[2] |
| 113 | + ; b = Y[1] |
| 114 | + mlt bc |
| 115 | + add hl, bc |
114 | 116 |
|
115 | | - ; a[2]*b[1] |
116 | | - ld d, (ix - 10) ; d = a[2] |
117 | | - ld e, iyl ; e = b[1] |
| 117 | + ; X[1]*Y[2] |
| 118 | + ld e, iyl ; e = X[1] |
| 119 | + ; d = Y[2] |
118 | 120 | mlt de |
119 | 121 | add hl, de |
120 | 122 |
|
121 | | - ld (ix - 15), hl |
| 123 | + ld (ix + 0), hl |
122 | 124 |
|
123 | 125 | ; ====================================================================== |
124 | 126 | ; sum[4-5] |
125 | | - ld hl, (ix - 14) ; hl will store current partial sum |
| 127 | + ld hl, (ix + 1) ; hl will store current partial sum |
126 | 128 |
|
127 | | - ; a[2]*b[2] |
128 | | - ld d, (ix - 10) ; d = a[2] |
129 | | - ld e, iyh ; e = b[2] |
| 129 | + ; X[2]*Y[2] |
| 130 | + ld e, iyh ; e = X[2] |
| 131 | + ld d, (ix + 5) ; d = Y[2] |
130 | 132 | mlt de |
131 | 133 | add hl, de |
132 | 134 |
|
133 | | - ld (ix - 14), l |
134 | | - ld (ix - 13), h |
| 135 | + ld a, l ; ld (ix + 1), l |
| 136 | + ld (ix + 2), h |
135 | 137 |
|
136 | 138 | ; clean up stack and restore registers |
137 | | - pop de |
| 139 | + pop hl ; reset SP |
138 | 140 | pop hl |
| 141 | + ld h, a |
139 | 142 | pop bc |
140 | 143 | pop iy |
141 | 144 |
|
142 | | - ld sp, ix |
| 145 | + pop de ; reset SP |
| 146 | + pop de ; restore DE |
| 147 | + |
143 | 148 | pop ix |
144 | 149 | pop af |
145 | | - pop de |
146 | 150 | ret |
0 commit comments