@@ -17,167 +17,156 @@ constant static const ulong RC[] = {
1717 0x8000000000008080ul , 0x0000000080000001ul , 0x8000000080008008ul ,
1818};
1919
20-
2120ulong swap_endian_64 (ulong value ) {
22- return ((value & 0x00000000000000FFULL ) << 56 ) |
23- ((value & 0x000000000000FF00ULL ) << 40 ) |
24- ((value & 0x0000000000FF0000ULL ) << 24 ) |
25- ((value & 0x00000000FF000000ULL ) << 8 ) |
26- ((value & 0x000000FF00000000ULL ) >> 8 ) |
27- ((value & 0x0000FF0000000000ULL ) >> 24 ) |
28- ((value & 0x00FF000000000000ULL ) >> 40 ) |
29- ((value & 0xFF00000000000000ULL ) >> 56 );
21+ return ((value & 0x00000000000000FFULL ) << 56 ) |
22+ ((value & 0x000000000000FF00ULL ) << 40 ) |
23+ ((value & 0x0000000000FF0000ULL ) << 24 ) |
24+ ((value & 0x00000000FF000000ULL ) << 8 ) |
25+ ((value & 0x000000FF00000000ULL ) >> 8 ) |
26+ ((value & 0x0000FF0000000000ULL ) >> 24 ) |
27+ ((value & 0x00FF000000000000ULL ) >> 40 ) |
28+ ((value & 0xFF00000000000000ULL ) >> 56 );
3029}
3130
31+ kernel void sha3 (global ulong * buffer , ulong nonce_start , ulong difficulty ,
32+ uint num_rounds , global ulong * output_1 ) {
33+
34+ // output_1[0] = 0;
35+ // output_1[1] = 0;
36+ ulong state [25 ];
37+ for (uint i = 0 ; i < num_rounds ; i ++ ) {
38+
39+ for (uint j = 0 ; j < 25 ; j ++ ) {
40+ state [j ] = 0 ;
41+ }
42+ state [0 ] = nonce_start + get_global_id (0 ) + i * get_global_size (0 );
43+ state [1 ] = buffer [1 ];
44+ state [2 ] = buffer [2 ];
45+ state [3 ] = buffer [3 ];
46+
47+ state [4 ] = buffer [4 ];
48+ state [5 ] = buffer [5 ];
49+
50+ state [16 ] ^= 0x8000000000000000ul ;
51+
52+ uint r , x , y , t ;
53+ ulong tmp , current , C [5 ];
54+ for (r = 0 ; r < 24 ; ++ r ) {
55+ for (x = 0 ; x < 5 ; ++ x ) {
56+ C [x ] = state [x ] ^ state [x + 5 ] ^ state [x + 10 ] ^ state [x + 15 ] ^
57+ state [x + 20 ];
58+ }
59+ for (x = 0 ; x < 5 ; ++ x ) {
60+ tmp = C [(x + 4 ) % 5 ] ^ rotate (C [(x + 1 ) % 5 ], 1ul );
61+ for (y = 0 ; y < 5 ; ++ y ) {
62+ state [x + y * 5 ] ^= tmp ;
63+ }
64+ }
65+ current = state [1 ];
66+ for (t = 0 ; t < 24 ; ++ t ) {
67+ tmp = state [pos [t ]];
68+ state [pos [t ]] = rotate (current , rot [t ]);
69+ current = tmp ;
70+ }
71+ for (y = 0 ; y < 25 ; y += 5 ) {
72+ for (x = 0 ; x < 5 ; ++ x )
73+ C [x ] = state [y + x ];
74+ for (x = 0 ; x < 5 ; ++ x ) {
75+ state [x + y ] = C [x ] ^ (~C [(x + 1 ) % 5 ] & C [(x + 2 ) % 5 ]);
76+ }
77+ }
78+ state [0 ] ^= RC [r ];
79+ }
80+
81+ for (uint j = 4 ; j < 25 ; j ++ ) {
82+ state [j ] = 0 ;
83+ }
84+ state [4 ] = 0x06 ;
85+ state [16 ] = 0x8000000000000000ul ;
86+
87+ for (r = 0 ; r < 24 ; ++ r ) {
88+ for (x = 0 ; x < 5 ; ++ x ) {
89+ C [x ] = state [x ] ^ state [x + 5 ] ^ state [x + 10 ] ^ state [x + 15 ] ^
90+ state [x + 20 ];
91+ }
92+ for (x = 0 ; x < 5 ; ++ x ) {
93+ tmp = C [(x + 4 ) % 5 ] ^ rotate (C [(x + 1 ) % 5 ], 1ul );
94+ for (y = 0 ; y < 5 ; ++ y ) {
95+ state [x + y * 5 ] ^= tmp ;
96+ }
97+ }
98+ current = state [1 ];
99+ for (t = 0 ; t < 24 ; ++ t ) {
100+ tmp = state [pos [t ]];
101+ state [pos [t ]] = rotate (current , rot [t ]);
102+ current = tmp ;
103+ }
104+ for (y = 0 ; y < 25 ; y += 5 ) {
105+ for (x = 0 ; x < 5 ; ++ x )
106+ C [x ] = state [y + x ];
107+ for (x = 0 ; x < 5 ; ++ x ) {
108+ state [x + y ] = C [x ] ^ (~C [(x + 1 ) % 5 ] & C [(x + 2 ) % 5 ]);
109+ }
110+ }
111+ state [0 ] ^= RC [r ];
112+ }
113+
114+ for (uint j = 4 ; j < 25 ; j ++ ) {
115+ state [j ] = 0 ;
116+ }
117+ state [4 ] = 0x06 ;
118+ state [16 ] = 0x8000000000000000ul ;
119+
120+ // round 3
121+ for (r = 0 ; r < 24 ; ++ r ) {
122+ for (x = 0 ; x < 5 ; ++ x ) {
123+ C [x ] = state [x ] ^ state [x + 5 ] ^ state [x + 10 ] ^ state [x + 15 ] ^
124+ state [x + 20 ];
125+ }
126+ for (x = 0 ; x < 5 ; ++ x ) {
127+ tmp = C [(x + 4 ) % 5 ] ^ rotate (C [(x + 1 ) % 5 ], 1ul );
128+ for (y = 0 ; y < 5 ; ++ y ) {
129+ state [x + y * 5 ] ^= tmp ;
130+ }
131+ }
132+ current = state [1 ];
133+ for (t = 0 ; t < 24 ; ++ t ) {
134+ tmp = state [pos [t ]];
135+ state [pos [t ]] = rotate (current , rot [t ]);
136+ current = tmp ;
137+ }
138+ for (y = 0 ; y < 25 ; y += 5 ) {
139+ for (x = 0 ; x < 5 ; ++ x )
140+ C [x ] = state [y + x ];
141+ for (x = 0 ; x < 5 ; ++ x ) {
142+ state [x + y ] = C [x ] ^ (~C [(x + 1 ) % 5 ] & C [(x + 2 ) % 5 ]);
143+ }
144+ }
145+ state [0 ] ^= RC [r ];
146+ }
32147
33- kernel void sha3 (global ulong * buffer ,
34- ulong nonce_start , ulong difficulty ,
35- uint num_rounds , global ulong * output_1
36- ) {
37-
38- output_1 [0 ] = 0 ;
39- output_1 [1 ] = 0 ;
40- ulong state [25 ];
41- for (uint i = 0 ;i < num_rounds ; i ++ ) {
42-
43- for (uint j = 0 ; j < 25 ; j ++ ) {
44- state [j ] = 0 ;
45- }
46- state [0 ] = nonce_start + get_global_id (0 ) + i * get_global_size (0 );
47- state [1 ] = buffer [1 ];
48- state [2 ] = buffer [2 ];
49- state [3 ] = buffer [3 ];
50-
51- state [4 ] = buffer [4 ];
52- state [5 ] = buffer [5 ];
53-
54- state [16 ] ^= 0x8000000000000000ul ;
55-
56-
57-
58-
59-
60- uint r , x , y , t ;
61- ulong tmp , current , C [5 ];
62- for (r = 0 ; r < 24 ; ++ r ) {
63- for (x = 0 ; x < 5 ; ++ x ) {
64- C [x ] = state [x ] ^ state [x + 5 ] ^ state [x + 10 ] ^ state [x + 15 ] ^
65- state [x + 20 ];
66- }
67- for (x = 0 ; x < 5 ; ++ x ) {
68- tmp = C [(x + 4 ) % 5 ] ^ rotate (C [(x + 1 ) % 5 ], 1ul );
69- for (y = 0 ; y < 5 ; ++ y ) {
70- state [x + y * 5 ] ^= tmp ;
71- }
72- }
73- current = state [1 ];
74- for (t = 0 ; t < 24 ; ++ t ) {
75- tmp = state [pos [t ]];
76- state [pos [t ]] = rotate (current , rot [t ]);
77- current = tmp ;
78- }
79- for (y = 0 ; y < 25 ; y += 5 ) {
80- for (x = 0 ; x < 5 ; ++ x )
81- C [x ] = state [y + x ];
82- for (x = 0 ; x < 5 ; ++ x ) {
83- state [x + y ] = C [x ] ^ (~C [(x + 1 ) % 5 ] & C [(x + 2 ) % 5 ]);
84- }
85- }
86- state [0 ] ^= RC [r ];
87- }
88-
89-
90- for (uint j = 4 ; j < 25 ; j ++ ) {
91- state [j ] = 0 ;
92- }
93- state [4 ] = 0x06 ;
94- state [16 ] = 0x8000000000000000ul ;
95-
96- for (r = 0 ; r < 24 ; ++ r ) {
97- for (x = 0 ; x < 5 ; ++ x ) {
98- C [x ] = state [x ] ^ state [x + 5 ] ^ state [x + 10 ] ^ state [x + 15 ] ^
99- state [x + 20 ];
100- }
101- for (x = 0 ; x < 5 ; ++ x ) {
102- tmp = C [(x + 4 ) % 5 ] ^ rotate (C [(x + 1 ) % 5 ], 1ul );
103- for (y = 0 ; y < 5 ; ++ y ) {
104- state [x + y * 5 ] ^= tmp ;
105- }
106- }
107- current = state [1 ];
108- for (t = 0 ; t < 24 ; ++ t ) {
109- tmp = state [pos [t ]];
110- state [pos [t ]] = rotate (current , rot [t ]);
111- current = tmp ;
112- }
113- for (y = 0 ; y < 25 ; y += 5 ) {
114- for (x = 0 ; x < 5 ; ++ x )
115- C [x ] = state [y + x ];
116- for (x = 0 ; x < 5 ; ++ x ) {
117- state [x + y ] = C [x ] ^ (~C [(x + 1 ) % 5 ] & C [(x + 2 ) % 5 ]);
118- }
119- }
120- state [0 ] ^= RC [r ];
121- }
122-
123-
124- for (uint j = 4 ; j < 25 ; j ++ ) {
125- state [j ] = 0 ;
126- }
127- state [4 ] = 0x06 ;
128- state [16 ] = 0x8000000000000000ul ;
129-
130- // round 3
131- for (r = 0 ; r < 24 ; ++ r ) {
132- for (x = 0 ; x < 5 ; ++ x ) {
133- C [x ] = state [x ] ^ state [x + 5 ] ^ state [x + 10 ] ^ state [x + 15 ] ^
134- state [x + 20 ];
135- }
136- for (x = 0 ; x < 5 ; ++ x ) {
137- tmp = C [(x + 4 ) % 5 ] ^ rotate (C [(x + 1 ) % 5 ], 1ul );
138- for (y = 0 ; y < 5 ; ++ y ) {
139- state [x + y * 5 ] ^= tmp ;
140- }
141- }
142- current = state [1 ];
143- for (t = 0 ; t < 24 ; ++ t ) {
144- tmp = state [pos [t ]];
145- state [pos [t ]] = rotate (current , rot [t ]);
146- current = tmp ;
147- }
148- for (y = 0 ; y < 25 ; y += 5 ) {
149- for (x = 0 ; x < 5 ; ++ x )
150- C [x ] = state [y + x ];
151- for (x = 0 ; x < 5 ; ++ x ) {
152- state [x + y ] = C [x ] ^ (~C [(x + 1 ) % 5 ] & C [(x + 2 ) % 5 ]);
153- }
154- }
155- state [0 ] ^= RC [r ];
156- }
157-
158-
159- // check difficulty
160- ulong swap = swap_endian_64 (state [0 ]);
161- if (swap < difficulty ) {
162- output_1 [0 ] = nonce_start + get_global_id (0 ) + i * get_global_size (0 );
163- output_1 [1 ] = swap ;
164- }
165- else {
148+ // check difficulty
149+ ulong swap = swap_endian_64 (state [0 ]);
150+ if (swap < difficulty ) {
166151 if (output_1 [1 ] == 0 || output_1 [1 ] > swap ) {
167- output_1 [1 ] = swap ;
152+ output_1 [0 ] = nonce_start + get_global_id (0 ) + i * get_global_size (0 );
153+ output_1 [1 ] = swap ;
168154 }
169- // if (output_1[1] < nonce_start+ get_global_id(0)) {
170- // output_1[1] = nonce_start + get_global_id(0);
171- // }
172- }
155+ } else {
156+ if (output_1 [1 ] == 0 || output_1 [1 ] > swap ) {
157+ // output_1[0] = nonce_start + get_global_id(0) + i *
158+ // get_global_size(0);
159+ output_1 [1 ] = swap ;
160+ }
161+ // if (output_1[1] < nonce_start+ get_global_id(0)) {
162+ // output_1[1] = nonce_start + get_global_id(0);
163+ // }
164+ }
173165
174- // output_1[0] = difficulty;
175- // output_1[0] = nonce_start + get_global_id(0) ;
166+ // output_1[0] = difficulty;
167+ // output_1[0] = nonce_start + get_global_id(0) ;
176168 // output_1[0] = 1;
177- }
178-
179-
180-
169+ }
181170
182171 // // Compare difficulty
183172 // bool le = true;
@@ -225,7 +214,8 @@ for (uint i = 0;i< num_rounds; i++) {
225214 // // n -= d
226215 // int r = 0;
227216 // for (int j = 31; j >= 0; --j) {
228- // // There is no temporary overflow, because in OpenCL uchar + uchar is
217+ // // There is no temporary overflow, because in OpenCL uchar + uchar
218+ // is
229219 // // ulong (not really sure, but it's bigger than uchar)
230220 // if (n[j] < output_buffer[j] + r) {
231221 // n[j] = n[j] - r - output_buffer[j];
0 commit comments