chiavdf/src/gcd_128.h at eae35d176cdf2328753f4896b3f24efab2dcafe8 · Chia-Network/chiavdf · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
#ifndef GCD_128_H
#define GCD_128_H

bool gcd_128(
    array<uint128, 2>& ab, array<array<uint64, 2>, 2>& uv_uint64, int& uv_uint64_parity, bool is_lehmer, uint128 ab_threshold=0
) {
    static int test_asm_counter=0;
    ++test_asm_counter;

    bool test_asm_run=true;
    bool test_asm_print=false; //(test_asm_counter%1000==0);
    bool debug_output=false;

    if (debug_output) {
        cerr.setf(ios::fixed, ios::floatfield);
        //cerr.setf(ios::showpoint);
    }

    assert(ab[0]>=ab[1] && ab[1]>=0);

    uv_uint64={
        array<uint64,2>{1, 0},
        array<uint64,2>{0, 1}
    };
    uv_uint64_parity=0;

    array<uint128, 2> ab_start=ab;

    bool progress=false;
    int iter=0;

    while (true) {
        if (debug_output) print(
            "======== 1:", iter,
            uint64(ab[0]), uint64(ab[0]>>64), uint64(ab[1]), uint64(ab[1]>>64),
            uint64(ab_threshold), uint64(ab_threshold>>64)
        );

        if (ab[1]<=ab_threshold) {
            break;
        }

        assert(ab[0]>=ab[1] && ab[1]>=0);

        int a_zeros=0;

        //this uses CMOV
        if ((ab[0]>>64)!=0) {
            uint64 a_high(ab[0]>>64);
            assert(a_high!=0);
            a_zeros=__builtin_clzll(a_high);
        } else {
            uint64 a_low(ab[0]);
            assert(a_low!=0);
            a_zeros=64+__builtin_clzll(a_low);
        }

        int a_num_bits=128-a_zeros;
        if (is_lehmer) {
            const int min_bits=96;
            if (a_num_bits<min_bits) {
                a_num_bits=min_bits;
            }
        }

        int shift_amount=a_num_bits-gcd_base_bits;
        if (shift_amount<0) {
            shift_amount=0;
        }

        if (debug_output) print( "2:", a_zeros, a_num_bits, shift_amount );

        //print( "    gcd_128", a_num_bits );

        vector2 ab_double{
            double(uint64(ab[0]>>shift_amount)),
            double(uint64(ab[1]>>shift_amount))
        };
        double ab_threshold_double(uint64(ab_threshold>>shift_amount));

        if (debug_output) print( "3:", ab_double[0], ab_double[1], ab_threshold_double, is_lehmer || (shift_amount!=0) );

        vector2 ab_double_2=ab_double;

        //this doesn't need to be exact
        //all of the comparisons with threshold are >, so this shouldn't be required
        //if (shift_amount!=0) {
        //    ++ab_threshold_double;
        //}

        //void gcd_64(vector2 start_a, pair<matrix2, vector2>& res, int& num_iterations, bool approximate, int max_iterations) {
        //}

        matrix2 uv_double;
        if (!gcd_base_continued_fraction(ab_double, uv_double, is_lehmer || (shift_amount!=0), ab_threshold_double)) {
            print( "        gcd_128 break 1" ); //this is fine
            break;
        }

        if (debug_output) print( "4:", uv_double[0][0], uv_double[1][0], uv_double[0][1], uv_double[1][1], ab_double[0], ab_double[1] );

        if (0) {
            matrix2 uv_double_2;
            if (!gcd_base_continued_fraction_2(ab_double_2, uv_double_2, is_lehmer || (shift_amount!=0), ab_threshold_double)) {
                print( "        gcd_128 break 2" );
                break;
            }

            assert(uv_double==uv_double_2);
            assert(ab_double==ab_double_2);
        }

        array<array<uint64,2>,2> uv_double_int={
            array<uint64,2>{uint64(abs(uv_double[0][0])), uint64(abs(uv_double[0][1]))},
            array<uint64,2>{uint64(abs(uv_double[1][0])), uint64(abs(uv_double[1][1]))}
        };

        int uv_double_parity=(uv_double[1][1]<0)? 1 : 0; //sign bit

        array<array<uint64, 2>, 2> uv_uint64_new;
        if (iter==0) {
            uv_uint64_new=uv_double_int;
        } else {
            if (!multiply_exact(uv_double_int, uv_uint64, uv_uint64_new)) {
                print( "        gcd_128 slow 1" ); //calculated a bunch of quotients and threw all of them away, which is bad
                break;
            }
        }

        int uv_uint64_parity_new=uv_uint64_parity^uv_double_parity;
        bool even=(uv_uint64_parity_new==0);

        if (debug_output) print(
            "5:", uv_uint64_new[0][0], uv_uint64_new[1][0], uv_uint64_new[0][1], uv_uint64_new[1][1], uv_uint64_parity_new
        );

        uint64 uv_00=uv_uint64_new[0][0];
        uint64 uv_01=uv_uint64_new[0][1];
        uint64 uv_10=uv_uint64_new[1][0];
        uint64 uv_11=uv_uint64_new[1][1];

        uint128 a_new_1=ab_start[0]; a_new_1*=uv_00; //a_new_1.set_negative(!even);
        uint128 a_new_2=ab_start[1]; a_new_2*=uv_01; //a_new_2.set_negative(even);
        uint128 b_new_1=ab_start[1]; b_new_1*=uv_11; //b_new_1.set_negative(!even);
        uint128 b_new_2=ab_start[0]; b_new_2*=uv_10; //b_new_2.set_negative(even);

        //CMOV
        //print( "        gcd_128 even", even );
        if (!even) {
            swap(a_new_1, a_new_2);
            swap(b_new_1, b_new_2);
        }

        uint128 a_new_s=a_new_1-a_new_2;
        uint128 b_new_s=b_new_1-b_new_2;

        //if this assert hit, one of the quotients is wrong. the base case is not supposed to return incorrect quotients
        //assert(a_new_s>=b_new_s && b_new_s>=0);
        //commenting this out because a and b can be 128 bits now

        //if (!(a_new_s>=b_new_s && b_new_s>=0)) {
            //print( "        gcd_128 slow 2" );
            //break;
        //}

        uint128 a_new(a_new_s);
        uint128 b_new(b_new_s);

        if (debug_output) print( "6:", uint64(a_new), uint64(a_new>>64), uint64(b_new), uint64(b_new>>64) );

        if (is_lehmer) {
            assert(a_new>=b_new);
            uint128 ab_delta=a_new-b_new;

            // even:
            // +uv_00 -uv_01
            // -uv_10 +uv_11

            uint128 u_delta=uint128(uv_10)+uint128(uv_00); //even: negative. odd: positive
            uint128 v_delta=uint128(uv_11)+uint128(uv_01); //even: positive. odd: negative

            // uv_10 is negative if even, positive if odd
            // uv_11 is positive if even, negative if odd
            bool passed_even=(b_new>=uint128(uv_10) && ab_delta>=v_delta);
            bool passed_odd=(b_new>=uint128(uv_11) && ab_delta>=u_delta);

            if (debug_output) print( "7:", passed_even, passed_odd );

            //CMOV
            if (!(even? passed_even : passed_odd)) {
                print( "        gcd_128 slow 5" ); //throwing away a bunch of quotients because the last one is bad
                break;
            }
        }

        if (a_new<=ab_threshold) {
            if (debug_output) print( "8:" );
            print( "        gcd_128 slow 6" ); //still throwing away quotients
            break;
        }

        ab={a_new, b_new};
        uv_uint64=uv_uint64_new;
        uv_uint64_parity=uv_uint64_parity_new;
        progress=true;

        ++iter;
        if (iter>=gcd_128_max_iter) {
            if (debug_output) print( "9:" );
            break; //this is the only way to exit the loop without wasting quotients
        }

        //todo break;
    }

    #if defined(TEST_ASM) && !defined(ARCH_ARM)
    #ifndef GENERATE_ASM_TRACKING_DATA
    if (test_asm_run) {
        if (test_asm_print) {
            print( "test asm gcd_128", test_asm_counter );
        }

        asm_code::asm_func_gcd_128_data asm_data;

        asm_data.ab_start_0_0=uint64(ab_start[0]);
        asm_data.ab_start_0_8=uint64(ab_start[0]>>64);
        asm_data.ab_start_1_0=uint64(ab_start[1]);
        asm_data.ab_start_1_8=uint64(ab_start[1]>>64);

        asm_data.is_lehmer=uint64(is_lehmer);
        asm_data.ab_threshold_0=uint64(ab_threshold);
        asm_data.ab_threshold_8=uint64(ab_threshold>>64);

        int error_code=hasAVX2()?
		asm_code::asm_avx2_func_gcd_128(&asm_data):
	        asm_code::asm_cel_func_gcd_128(&asm_data);

        assert(error_code==0);
        assert(asm_data.u_0==uv_uint64[0][0]);
        assert(asm_data.u_1==uv_uint64[1][0]);
        assert(asm_data.v_0==uv_uint64[0][1]);
        assert(asm_data.v_1==uv_uint64[1][1]);
        assert(asm_data.parity==uv_uint64_parity);
        assert(asm_data.no_progress==int(!progress));
    }
    #endif
    #endif

    return progress;
}

// end Headerguard GCD_128_H
#endif