@@ -51,6 +51,7 @@ const float PI = 3.141591f;
5151const float TWO_PI_OVER_3 = 2.0f*PI*0.66666f;
5252const float TWO_OVER_PI = 2.0f / PI;
5353
54+ const vec2 ZERO = vec2(0.0,0.0);
5455const vec2 CPI = vec2(PI,0.0f);
5556const vec2 ONE = vec2(1.0f,0.0f);
5657const vec2 MINUS_ONE = vec2(-1.0f,0.0f);
@@ -99,7 +100,7 @@ vec2 cexp(vec2 z){
99100}
100101
101102vec2 clog(vec2 z){
102- return vec2(log(length(z)), carg(z));
103+ return vec2(log(length(z)), carg(z).x );
103104}
104105
105106vec2 cpow(vec2 a, vec2 b){
@@ -265,54 +266,64 @@ vec2 im(vec2 z){
265266
266267const int GAMMA_PRECISION = 7;
267268
269+ const float INF = 1.0/0.0;
270+
268271const float GAMMA_COEFFICIENTS[7] = float[](
269272 1.000000000190015, 76.18009172947146, -86.50532032941677,
270273 24.01409824083091, -1.231739572450155, 1.208650973866179e-3, -5.395239384953e-6
271274);
272275
273276const vec2 SQUARE_ROOT_TWO_PI = vec2(sqrt(PI * 2.0f),0.0f);
274277
275- vec2 cgamma (vec2 z){
276- vec2 sum = vec2(GAMMA_COEFFICIENTS[0],0.0);
278+ vec2 clngamma (vec2 z) {
279+ vec2 sum = vec2(GAMMA_COEFFICIENTS[0], 0.0);
277280 for(int i = 1; i < 7; ++i) {
278281 sum = cadd(sum, cdiv(vec2(GAMMA_COEFFICIENTS[i], 0.0), cadd(z, vec2(float(i), 0.0))));
279282 }
280- vec2 t = cadd(z, vec2(5.5, 0.0));
281- vec2 exp_term = cmult(cpow(t, cadd(z, vec2(0.5, 0.0))), cexp(-t));
282- vec2 result_pos = cdiv(cmult(cmult(exp_term, SQUARE_ROOT_TWO_PI), sum),z);
283283
284- vec2 z_neg = cneg(z);
285- vec2 sum_neg = vec2(GAMMA_COEFFICIENTS[0],0.0);
286- for(int i = 1; i < 7; ++i) {
287- sum_neg = cadd(sum_neg, cdiv(vec2(GAMMA_COEFFICIENTS[i], 0.0), cadd(z_neg, vec2(float(i), 0.0))));
288- }
289- vec2 t_neg = cadd(z_neg,vec2(5.5,0.0));
290- vec2 exp_term_neg = cmult(cpow(t_neg, cadd(z_neg, vec2(0.5, 0.0))), cexp(-t_neg));
291- vec2 gamma_1_minus_z = cmult(cmult(exp_term_neg, SQUARE_ROOT_TWO_PI), sum_neg);
292- vec2 pi_z = cmult(z,vec2(PI,0.0));
293- vec2 result_neg = cdiv(vec2(PI,0.0),cmult(csin(pi_z),gamma_1_minus_z));
284+ vec2 t = cadd(z, vec2(5.5, 0.0));
285+ vec2 log_term = cmult(cadd(z, vec2(0.5, 0.0)), clog(t));
286+ const vec2 ln_sqrt_2pi = vec2(0.9189385332, 0.0);
287+ return cadd(csub(cadd(ln_sqrt_2pi, log_term), t), clog(sum));
288+ }
294289
295- float is_positive = step(0.5,z.x);
296- return mix(result_neg,result_pos,is_positive);
290+ vec2 cgamma(vec2 z) {
291+ if(z.x > 0.5)
292+ return cexp(clngamma(z));
293+ vec2 gamma_1_z = cexp(clngamma(ONE-z));
294+ vec2 denom = cmult(gamma_1_z,csin(cmult(CPI,z)));
295+ vec2 result = cdiv(CPI,denom);
296+ if(isnan(result.x)){
297+ result.x = 0.0;
298+ }
299+ if(isnan(result.y)){
300+ result.y = 0.0;
301+ }
302+ return result;
297303}
298304
299- const int ZETA_PRECISION = 10;
300- const float ZETA_COEFFICIENTS [10] = float[](1.0, 513.0, 23041.0, 263169.0, 1153025.0, 2306049.0, 2191361.0, 950273.0, 171537.0, 9217.0);
305+ const float ZETA_WEIGHTS[10] = float[](
306+ 1.000000, 0.999023, 0.989258, 0.945312, 0.828125,
307+ 0.623047, 0.376953, 0.171875, 0.054688, 0.010742
308+ );
301309
302310vec2 czeta_main_branch(vec2 s){
303311 vec2 sum = vec2(0.0, 0.0);
304312 float sign = 1.0;
313+
305314 for(int n = 0; n < 10; ++n){
306- vec2 n_to_neg_s = cpow(vec2(float(n+1),0.0), cmult(s, MINUS_ONE));
307-
308- n_to_neg_s = cmult(n_to_neg_s, vec2(sign * ZETA_COEFFICIENTS[ ZETA_PRECISION - 1 - n], 0.0));
309- sum = cadd(sum,n_to_neg_s);
315+ vec2 n_to_neg_s = cpow(vec2(float(n+1), 0.0), cmult(s, MINUS_ONE));
316+
317+ n_to_neg_s = cmult(n_to_neg_s, vec2(sign * ZETA_WEIGHTS[n], 0.0));
318+ sum = cadd(sum, n_to_neg_s);
319+
310320 sign = -sign;
311321 }
312- vec2 eta = cmult(sum,vec2(1.0/ZETA_COEFFICIENTS[ZETA_PRECISION - 1], 0.0));
313- vec2 one_minus_s = csub(ONE,s);
314- vec2 two_pow = cpow(vec2(2.0,0.0),one_minus_s);
315- return cdiv(eta,csub(ONE,two_pow));
322+
323+ vec2 one_minus_s = csub(ONE, s);
324+ vec2 two_pow = cpow(vec2(2.0, 0.0), one_minus_s);
325+
326+ return cdiv(sum, csub(ONE, two_pow));
316327}
317328
318329vec2 czeta_negative_branch(vec2 s){
@@ -382,6 +393,7 @@ void main(){
382393
383394 #define INTERPRETER_ASSIGNEMENT HERE
384395 vec2 func_value = run_stack(operator_stack,constant_stack,z);
396+ func_value = clamp(func_value, -1e38, 1e38);
385397 #define END_INTERPRETER_ASSIGNEMENT HERE
386398
387399 #define INJECTION_POINT HERE
0 commit comments