Skip to content

Commit 14352ff

Browse files
committed
speed up using numeric approximations along the tree
1 parent 1cb168c commit 14352ff

3 files changed

Lines changed: 215 additions & 21 deletions

File tree

panicmage

114 Bytes
Binary file not shown.

panicmage.c

Lines changed: 54 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -530,8 +530,11 @@ if (printtree_flag == 1){
530530
if (skipall_flag == 0) {
531531

532532

533-
ex *theogfs_symb_fast;
534-
theogfs_symb_fast = new ex[leaves];
533+
// ex *theogfs_symb_fast;
534+
// theogfs_symb_fast = new ex[leaves];
535+
float *theogfs_fast;
536+
theogfs_fast = new float[leaves];
537+
535538

536539
// only estimate if -t or -r or -c is not set to custom value
537540
if (estimate_flag == 1){
@@ -540,28 +543,53 @@ if (skipall_flag == 0) {
540543
estimate(&theta_hat,&rho_hat,para);
541544
}
542545
else{
543-
//compute the symbolic formula in advance
544-
// do this only once and give the functions to my_f_symbolic
546+
// // // // // // this part works well for smaller trees
547+
// //compute the symbolic formula in advance
548+
// // do this only once and give the functions to my_f_symbolic
549+
// cout << "Initializing the tree structure...\n";
550+
// rootTree(intree,NULL);
551+
// unprob_symb(intree);
552+
// initialize_tree(intree,leaves);
553+
// cout << "done.\nComputing probabilities along the tree...\n";
554+
// // cout << "We did get past the initialize tree\n";
555+
// // check_probs(tree,anzahl,para->rhoS);
556+
// comp_pkfhs(intree,leaves,x);
557+
// cout << "done.\nEstimation of theta and rho...\n";
558+
// // cout << "We computed the probabillties within the tree\n";
559+
// // check_probs(tree,anzahl,para->rhoS)
560+
// treegfs_symbolic_fast(intree,leaves,theogfs_symb_fast,x);
561+
// for(i=0; i<leaves; i++){
562+
// paraS->symbolicgfs[i] = theogfs_symb_fast[i];
563+
// // printf("\n\n\n\n");
564+
// // cout << theogfs_symb_fast[i];
565+
// // // // //
566+
567+
568+
// // // // // // this part works faster for larger trees
569+
// //compute the pkfhs and the gfs along the tree during estimation
570+
// this is faster than using the symbolic structure where many formulas appear many times
571+
// my_f_numeric will need the tree to compute the pkfhs along the tree
545572
cout << "Initializing the tree structure...\n";
546573
rootTree(intree,NULL);
547-
unprob_symb(intree);
548-
initialize_tree(intree,leaves);
549-
cout << "done.\nComputing probabilities along the tree...\n";
574+
unprob(intree);
575+
initialize_tree_numeric(intree,leaves);
576+
cout << "done.\nTesting to computing probabilities along the tree...\n";
550577
// cout << "We did get past the initialize tree\n";
551578
// check_probs(tree,anzahl,para->rhoS);
552-
comp_pkfhs(intree,leaves,x);
579+
comp_pkfhs_numeric(intree,leaves,2.8);
553580
cout << "done.\nEstimation of theta and rho...\n";
554581
// cout << "We computed the probabillties within the tree\n";
555582
// check_probs(tree,anzahl,para->rhoS)
556-
treegfs_symbolic_fast(intree,leaves,theogfs_symb_fast,x);
557-
for(i=0; i<leaves; i++){
558-
paraS->symbolicgfs[i] = theogfs_symb_fast[i];
559-
// printf("\n\n\n\n");
560-
// cout << theogfs_symb_fast[i];
561-
}
583+
// treegfs_symbolic_fast(intree,leaves,theogfs_symb_fast,x);
584+
// for(i=0; i<leaves; i++){
585+
// paraS->symbolicgfs[i] = theogfs_symb_fast[i];
586+
// // printf("\n\n\n\n");
587+
// // cout << theogfs_symb_fast[i];
588+
// }
589+
// // // // //
562590

563591
// cout << "We successfully called treegfs_symbolic_fast\n";
564-
estimate_symbolic(&theta_hat,&rho_hat,paraS);
592+
estimate_numeric(&theta_hat,&rho_hat,para);
565593
}
566594
}
567595
//////////////////* END estimate theta and rho*//////////////////////////////////////////
@@ -588,11 +616,15 @@ if (skipall_flag == 0) {
588616
// // compute the GFS given the tree
589617
// treegfs(intree,leaves,est_gfs_giventree_theo,rho_hat); //this is not fast
590618
rootTree(intree,NULL);
591-
treegfs_symbolic_fast(intree,leaves,theogfs_symb_fast,x);
592-
int inc;
593-
for (inc = 0; inc < leaves; inc++){
594-
est_gfs_giventree_theo[inc] = to_double(ex_to<numeric>(theogfs_symb_fast[inc].subs(x == rho_hat).evalf()));
595-
}
619+
// treegfs_symbolic_fast(intree,leaves,theogfs_symb_fast,x); // this is fast but can only very slowly be evaluated
620+
unprob(intree);
621+
initialize_tree_numeric(intree,leaves);
622+
comp_pkfhs_numeric(intree,leaves,rho_hat);
623+
treegfs_numeric_fast(intree,leaves,theogfs_fast,rho_hat);
624+
// int inc;
625+
// for (inc = 0; inc < leaves; inc++){
626+
// est_gfs_giventree_theo[inc] = to_double(ex_to<numeric>(theogfs_symb_fast[inc].subs(x == rho_hat).evalf()));
627+
// }
596628

597629

598630

@@ -602,7 +634,8 @@ if (skipall_flag == 0) {
602634
printf("Input GFS:\n");
603635
printgfs(input_gfs,leaves,1.,1.);
604636
printf("Estimated GFS given the tree:\n");
605-
printgfs(est_gfs_giventree_theo,leaves,theta_hat,rho_hat);
637+
// printgfs(est_gfs_giventree_theo,leaves,theta_hat,rho_hat);
638+
printgfs(theogfs_fast,leaves,theta_hat,rho_hat);
606639
}
607640

608641

source/treesymbolic.h

Lines changed: 161 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,59 @@ double my_f_symbolic (const gsl_vector *v, void *params)
540540

541541

542542

543+
double my_f_numeric (const gsl_vector *v, void *params)
544+
{
545+
extern int g_includecoreflag;
546+
// cout << "This is the numeric estimation...\n";
547+
double rho, theta, sum = 0.;
548+
Params *para = (Params *)params;
549+
550+
int anzahl = para->anzahl;
551+
Node *tree = para->tree;
552+
553+
rho = gsl_vector_get(v, 0);
554+
theta = gsl_vector_get(v, 1);
555+
556+
rootTree(tree,NULL);
557+
unprob(tree);
558+
initialize_tree_numeric(tree,anzahl);
559+
comp_pkfhs_numeric(tree,anzahl,rho);
560+
561+
562+
// rootTree(tree,NULL);
563+
float *theogfs;
564+
theogfs = (float *)malloc(anzahl*sizeof(float));
565+
// treegfs(tree,anzahl,theogfs,rho);
566+
treegfs_numeric_fast(tree,anzahl,theogfs,rho);
567+
568+
float *datagfs;
569+
datagfs = (float *)malloc(anzahl*sizeof(float));
570+
571+
int i;
572+
for (i = 0; i < anzahl; i++) {
573+
datagfs[i] = para->datagfs[i];
574+
}
575+
576+
//return lsqu(datagfs,theogfs,anzahl-1,theta,rho);
577+
//return llh(datagfs, theogfs, anzahl-1,theta,rho);
578+
if(g_includecoreflag == 1){
579+
return llh(datagfs, theogfs, anzahl,theta,rho);
580+
}else{
581+
return llh(datagfs, theogfs, anzahl-1,theta,rho);
582+
}
583+
}
584+
585+
586+
587+
588+
589+
590+
591+
592+
593+
594+
595+
543596

544597
// // try to compute the gfs in a clever way:
545598
// rootTree(intree,NULL);
@@ -688,6 +741,114 @@ void estimate_symbolic(float *theta_hat, float *rho_hat, Params_symbolic *paraS)
688741

689742

690743

744+
/*estimate the parameters theta and rho using the numeric precomputation
745+
of pkfhs (probabilities to keep from here in k individuals) */
746+
747+
// Params are:
748+
// Params *para;
749+
// para = malloc(sizeof(Params) + number * sizeof(double));
750+
// para->anzahl = number;
751+
// para->tree = tree;
752+
// }
753+
754+
void estimate_numeric(float *theta_hat, float *rho_hat, Params *para){
755+
756+
const gsl_multimin_fminimizer_type *T = gsl_multimin_fminimizer_nmsimplex2;
757+
gsl_multimin_fminimizer *s = NULL;
758+
gsl_vector *ss, *x;
759+
gsl_multimin_function minex_func;
760+
761+
size_t iter = 0;
762+
int status;
763+
double size;
764+
765+
/*optional set the starting points to a first simple estimate*/
766+
767+
int i, n;
768+
float c1, c2, firsttheta, firstrho;
769+
c1 = 0.;
770+
c2 = 0.;
771+
n = para->anzahl;
772+
for (i = 0; i < n; i++){
773+
c1 += (i+1.)/n * para->datagfs[i];
774+
c2 += (i+1.)*(n-i-1.)/(n*(n-1.)) * para->datagfs[i];
775+
}
776+
firstrho = c2/(c1-c2);
777+
firsttheta = c2 + firstrho*c2;
778+
if (firstrho > 20.) firstrho = 20.;
779+
if (firstrho < 0.1) firstrho = 0.1;
780+
if (firsttheta > 10000.) firsttheta = 10000.;
781+
if (firsttheta < 50.) firsttheta = 50;
782+
783+
784+
/* Starting point */
785+
x = gsl_vector_alloc (2);
786+
787+
// gsl_vector_set (x, 0, 1.); // fixed starting point rho
788+
// gsl_vector_set (x, 1, 2000.0); // fixed starting point theta
789+
790+
gsl_vector_set (x, 0, firstrho); // estimated starting point rho
791+
gsl_vector_set (x, 1, firsttheta); // estimated starting point theta
792+
793+
// printf("firsttheta = %.0f\tfirstrho = %.2f\n",firsttheta, firstrho);
794+
795+
796+
/* Set initial step sizes to 1 */
797+
ss = gsl_vector_alloc (2);
798+
gsl_vector_set_all (ss, 1.0);
799+
800+
/* Initialize method and iterate */
801+
minex_func.n = 2;
802+
minex_func.f = my_f_numeric;
803+
minex_func.params = para;
804+
805+
s = gsl_multimin_fminimizer_alloc (T, 2);
806+
gsl_multimin_fminimizer_set (s, &minex_func, x, ss);
807+
808+
do
809+
{
810+
iter++;
811+
status = gsl_multimin_fminimizer_iterate(s);
812+
813+
if (status)
814+
break;
815+
816+
size = gsl_multimin_fminimizer_size (s);
817+
status = gsl_multimin_test_size (size, 1e-1);
818+
819+
if (status == GSL_SUCCESS)
820+
{
821+
printf ("converged to minimum at\n");
822+
}
823+
824+
printf ("%5zu %10.3e %10.3e f() = %7.3f size = %.3f\n",
825+
iter,
826+
gsl_vector_get (s->x, 0),
827+
gsl_vector_get (s->x, 1),
828+
s->fval, size);
829+
}
830+
while(status == GSL_CONTINUE && iter < 100);
831+
832+
// printf("\n");
833+
if (iter != 100) {
834+
theta_hat[0] = (float) gsl_vector_get (s->x, 1);
835+
rho_hat[0] = (float) gsl_vector_get (s->x, 0);
836+
}
837+
else {
838+
printf("WARNING: minimizer did not converge: \t size: %.2f \t\t theta: %.2f \t\t rho: %.2f\n", size, (float) gsl_vector_get (s->x, 1), (float) gsl_vector_get (s->x, 0) );
839+
// theta_hat[0] = 0;
840+
// rho_hat[0] = 0;
841+
theta_hat[0] = (float) gsl_vector_get (s->x, 1);
842+
rho_hat[0] = (float) gsl_vector_get (s->x, 0);
843+
}
844+
gsl_vector_free(x);
845+
gsl_vector_free(ss);
846+
gsl_multimin_fminimizer_free (s);
847+
}
848+
849+
850+
851+
691852

692853

693854

0 commit comments

Comments
 (0)