@@ -417,7 +417,7 @@ def _compute_cumulative_distribution(
417417 covariates : np .ndarray ,
418418 treatment_arms : np .ndarray ,
419419 outcomes : np .array ,
420- ) -> np .ndarray :
420+ ) -> Tuple [ np .ndarray , np . ndarray , np . ndarray ] :
421421 """
422422 Compute the cumulative distribution values.
423423
@@ -429,47 +429,37 @@ def _compute_cumulative_distribution(
429429 outcomes (np.ndarray): An array of outcomes in the observed data
430430
431431 Returns:
432- np.ndarray: Estimated cumulative distribution values.
432+ Tuple of numpy arrays:
433+ - np.ndarray: Unconditional cumulative distribution values.
434+ - np.ndarray: Adjusted cumulative distribution for each observation.
435+ - np.ndarray: Conditional cumulative distribution for each observation.
433436 """
434437 n_records = outcomes .shape [0 ]
435438 n_loc = locations .shape [0 ]
436- superset_prediction = np .zeros ((n_records , n_loc ))
437439 prediction = np .zeros ((n_records , n_loc ))
438440 treatment_mask = treatment_arms == target_treatment_arm
439441
440442 strata = self .strata
441443 s_list = np .unique (strata )
442- s_dict = {}
444+ w_s = {}
443445 for s in s_list :
444446 s_mask = strata == s
445- s_dict [s ] = (s_mask & treatment_mask ).sum () / s_mask .sum ()
447+ w_s [s ] = (s_mask & treatment_mask ).sum () / s_mask .sum ()
446448 n_obs = outcomes .shape [0 ]
447449 n_loc = locations .shape [0 ]
448450 for i , outcome in enumerate (locations ):
449451 for j in range (n_obs ):
450452 s = strata [j ]
451- prediction [j , i ] = (
452- (outcomes [j ] <= outcome ) / s_dict [s ] * treatment_mask [j ]
453- )
453+ prediction [j , i ] = (outcomes [j ] <= outcome ) / w_s [s ] * treatment_mask [j ]
454454
455- pred = {}
456- for j in range (n_obs ):
457- s = strata [j ]
458- s_mask = s == strata
459- if s in pred :
460- superset_prediction [j ] = pred [s ]
461- else :
462- superset_prediction [j ] = prediction [s_mask ].mean (axis = 0 )
463- pred [s ] = superset_prediction [j ]
464-
465- for i , outcome in enumerate (locations ):
466- for j in range (n_obs ):
467- s = strata [j ]
468- prediction [j , i ] = (
469- (outcomes [j ] <= outcome ) - superset_prediction [j , i ]
470- ) / s_dict [s ] * treatment_mask [j ] + superset_prediction [j , i ]
455+ unconditional_pred = {s : prediction [s == strata ].mean (axis = 0 ) for s in s_list }
456+ conditional_prediction = np .array ([unconditional_pred [s ] for s in strata ])
457+ weights = np .array ([w_s [s ] for s in strata ])[:, np .newaxis ]
458+ prediction = (
459+ (outcomes [:, np .newaxis ] <= locations ) - conditional_prediction
460+ ) / weights * treatment_mask [:, np .newaxis ] + conditional_prediction
471461
472- return prediction .mean (axis = 0 ), prediction , superset_prediction
462+ return prediction .mean (axis = 0 ), prediction , conditional_prediction
473463
474464 def _compute_interval_probability (
475465 self ,
@@ -478,57 +468,52 @@ def _compute_interval_probability(
478468 covariates : np .ndarray ,
479469 treatment_arms : np .ndarray ,
480470 outcomes : np .array ,
481- ) -> np .ndarray :
482- """Compute the cumulative distribution values .
471+ ) -> Tuple [ np .ndarray , np . ndarray , np . ndarray ] :
472+ """Compute the interval probabilities .
483473
484474 Args:
485475 target_treatment_arm (int): The index of the treatment arm.
486- locations (np.ndarray): Scalar values to be used for computing the cumulative distribution .
476+ locations (np.ndarray): Scalar values to be used for computing the interval probabilities .
487477 covariates: (np.ndarray): An array of covariates variables in the observed data.
488478 treatment_arm (np.ndarray): An array of treatment arms in the observed data.
489479 outcomes (np.ndarray): An array of outcomes in the observed data
490480
491481 Returns:
492- np.ndarray: Estimated cumulative distribution values.
482+ Tuple of numpy arrays:
483+ - np.ndarray: Estimated unconditional interval probabilities.
484+ - np.ndarray: Adjusted for each observation.
485+ - np.ndarray: Conditional for each observation.
493486 """
494487 n_records = outcomes .shape [0 ]
495488 n_loc = locations .shape [0 ]
496- superset_prediction = np .zeros ((n_records , n_loc ))
497489 prediction = np .zeros ((n_records , n_loc ))
498490 treatment_mask = treatment_arms == target_treatment_arm
499491
500492 strata = self .strata
501493 s_list = np .unique (strata )
502- s_dict = {}
494+ w_s = {}
503495 for s in s_list :
504496 s_mask = strata == s
505- s_dict [s ] = (s_mask & treatment_mask ).sum () / s_mask .sum ()
497+ w_s [s ] = (s_mask & treatment_mask ).sum () / s_mask .sum ()
506498 n_obs = outcomes .shape [0 ]
507499 n_loc = locations .shape [0 ]
508500 for i , outcome in enumerate (locations ):
509501 for j in range (n_obs ):
510502 s = strata [j ]
511- prediction [j , i ] = (
512- (outcomes [j ] <= outcome ) / s_dict [s ] * treatment_mask [j ]
513- )
503+ prediction [j , i ] = (outcomes [j ] <= outcome ) / w_s [s ] * treatment_mask [j ]
514504
515- for j in range (n_obs ):
516- s = strata [j ]
517- s_mask = s == strata
518- superset_prediction [j ] = prediction [s_mask ].mean (axis = 0 )
505+ unconditional_pred = {s : prediction [s == strata ].mean (axis = 0 ) for s in s_list }
506+ conditional_prediction = np .array ([unconditional_pred [s ] for s in strata ])
507+ weights = np .array ([w_s [s ] for s in strata ])[:, np .newaxis ]
508+ prediction = (
509+ (outcomes [:, np .newaxis ] <= locations ) - conditional_prediction
510+ ) / weights * treatment_mask [:, np .newaxis ] + conditional_prediction
519511
520- for i , outcome in enumerate (locations ):
521- for j in range (n_obs ):
522- s = strata [j ]
523- prediction [j , i ] = (
524- (outcomes [j ] <= outcome ) - superset_prediction [j , i ]
525- ) / s_dict [s ] * treatment_mask [j ] + superset_prediction [j , i ]
526- return prediction .mean (axis = 0 ), superset_prediction
527512 cdf = prediction .mean (axis = 0 )
528513 return (
529514 cdf [1 :] - cdf [:- 1 ],
530515 prediction [:, 1 :] - prediction [:, :- 1 ],
531- superset_prediction [:, 1 :] - superset_prediction [:, :- 1 ],
516+ conditional_prediction [:, 1 :] - conditional_prediction [:, :- 1 ],
532517 )
533518
534519
@@ -596,7 +581,7 @@ def _compute_cumulative_distribution(
596581 covariates : np .ndarray ,
597582 treatment_arms : np .ndarray ,
598583 outcomes : np .array ,
599- ) -> np .ndarray :
584+ ) -> Tuple [ np .ndarray , np . ndarray , np . ndarray ] :
600585 """
601586 Compute the cumulative distribution values.
602587
@@ -608,7 +593,10 @@ def _compute_cumulative_distribution(
608593 outcomes (np.ndarray): An array of outcomes in the observed data
609594
610595 Returns:
611- Tuple[np.ndarray, np.ndarray, np.ndarray]: Estimated cumulative distribution values, prediction for each observation, and superset prediction for each observation.
596+ Tuple of numpy arrays:
597+ - np.ndarray: Unconditional cumulative distribution values.
598+ - np.ndarray: Adjusted cumulative distribution for each observation.
599+ - np.ndarray: Conditional cumulative distribution for each observation.
612600 """
613601 n_records = outcomes .shape [0 ]
614602 n_loc = locations .shape [0 ]
@@ -619,7 +607,7 @@ def _compute_cumulative_distribution(
619607 strata = self .strata
620608 s_list = np .unique (strata )
621609 if self .is_multi_task :
622- binominal = (outcomes .reshape (- 1 , 1 ) <= locations ) * 1 # (n_records, n_loc)
610+ binomial = (outcomes .reshape (- 1 , 1 ) <= locations ) * 1 # (n_records, n_loc)
623611 for fold in range (self .folds ):
624612 fold_mask = (folds != fold ) & treatment_mask
625613 for s in s_list :
@@ -628,51 +616,51 @@ def _compute_cumulative_distribution(
628616 superset_mask = (folds == fold ) & s_mask
629617 subset_train_mask = (folds != fold ) & s_mask & treatment_mask
630618 covariates_train = covariates [subset_train_mask ]
631- binominal_train = binominal [subset_train_mask ]
632- if len (np .unique (binominal_train )) > 1 :
619+ binomial_train = binomial [subset_train_mask ]
620+ if len (np .unique (binomial_train )) > 1 :
633621 self .model = deepcopy (self .base_model )
634- self .model .fit (covariates_train , binominal_train )
622+ self .model .fit (covariates_train , binomial_train )
635623
636624 pred = self ._compute_model_prediction (
637625 self .model , covariates [superset_mask ]
638626 )
639627 prediction [superset_mask ] = (
640628 pred
641629 + treatment_mask [superset_mask ].reshape (- 1 , 1 )
642- * (binominal [superset_mask ] - pred )
630+ * (binomial [superset_mask ] - pred )
643631 / weight
644632 )
645633 superset_prediction [superset_mask ] = pred
646634 else :
647635 for i , location in enumerate (locations ):
648- binominal = (outcomes <= location ) * 1 # (n_records)
636+ binomial = (outcomes <= location ) * 1 # (n_records)
649637 for fold in range (self .folds ):
650638 fold_mask = (folds != fold ) & treatment_mask
651639 covariates_train = covariates [fold_mask ]
652- binominal_train = binominal [fold_mask ]
640+ binomial_train = binomial [fold_mask ]
653641 # Pool the records across strata and train the model
654- if len (np .unique (binominal_train )) > 1 :
642+ if len (np .unique (binomial_train )) > 1 :
655643 self .model = deepcopy (self .base_model )
656- self .model .fit (covariates_train , binominal_train )
644+ self .model .fit (covariates_train , binomial_train )
657645 for s in s_list :
658646 s_mask = strata == s
659647 weight = (s_mask & treatment_mask ).sum () / s_mask .sum ()
660648 superset_mask = (folds == fold ) & s_mask
661649 subset_train_mask = (folds != fold ) & s_mask & treatment_mask
662650 covariates_train = covariates [subset_train_mask ]
663- binominal_train = binominal [subset_train_mask ]
651+ binomial_train = binomial [subset_train_mask ]
664652 # TODO: revisit the logic here
665- if len (np .unique (binominal_train )) > 1 :
653+ if len (np .unique (binomial_train )) > 1 :
666654 # self.model = deepcopy(self.base_model)
667- # self.model.fit(covariates_train, binominal_train )
655+ # self.model.fit(covariates_train, binomial_train )
668656 pass
669657 else :
670- pred = binominal_train [0 ]
658+ pred = binomial_train [0 ]
671659 superset_prediction [superset_mask , i ] = pred
672660 prediction [superset_mask , i ] = (
673661 pred
674662 + treatment_mask [superset_mask ]
675- * (binominal [superset_mask ] - pred )
663+ * (binomial [superset_mask ] - pred )
676664 / weight
677665 )
678666 continue
@@ -682,7 +670,7 @@ def _compute_cumulative_distribution(
682670 prediction [superset_mask , i ] = (
683671 pred
684672 + treatment_mask [superset_mask ]
685- * (binominal [superset_mask ] - pred )
673+ * (binomial [superset_mask ] - pred )
686674 / weight
687675 )
688676 superset_prediction [superset_mask , i ] = pred
@@ -696,9 +684,9 @@ def _compute_interval_probability(
696684 covariates : np .ndarray ,
697685 treatment_arms : np .ndarray ,
698686 outcomes : np .array ,
699- ) -> np .ndarray :
687+ ) -> Tuple [ np .ndarray , np . ndarray , np . ndarray ] :
700688 """
701- Compute the cumulative distribution values .
689+ Compute the interval probabilities .
702690
703691 Args:
704692 target_treatment_arm (int): The index of the treatment arm.
@@ -708,7 +696,10 @@ def _compute_interval_probability(
708696 outcomes (np.ndarray): An array of outcomes in the observed data
709697
710698 Returns:
711- np.ndarray: Estimated cumulative distribution values.
699+ Tuple of numpy arrays:
700+ - np.ndarray: Unconditional interval probabilities.
701+ - np.ndarray: Adjusted interval probabilities for each observation.
702+ - np.ndarray: Conditional interval probabilities for each observation.
712703 """
713704 n_records = outcomes .shape [0 ]
714705 n_loc = locations .shape [0 ]
@@ -720,28 +711,28 @@ def _compute_interval_probability(
720711 s_list = np .unique (strata )
721712 binominals = (outcomes [:, np .newaxis ] <= locations ) * 1 # (n_records, n_loc)
722713 for i in range (len (locations ) - 1 ):
723- binominal = binominals [:, i + 1 ] - binominals [:, i ]
714+ binomial = binominals [:, i + 1 ] - binominals [:, i ]
724715 for fold in range (self .folds ):
725716 fold_mask = (folds != fold ) & treatment_mask
726717 covariates_train = covariates [fold_mask ]
727- binominal_train = binominal [fold_mask ]
728- if len (np .unique (binominal_train )) > 1 :
718+ binomial_train = binomial [fold_mask ]
719+ if len (np .unique (binomial_train )) > 1 :
729720 self .model = deepcopy (self .base_model )
730- self .model .fit (covariates_train , binominal_train )
721+ self .model .fit (covariates_train , binomial_train )
731722 for s in s_list :
732723 s_mask = strata == s
733724 wight = (s_mask & treatment_mask ).sum () / s_mask .sum ()
734725 superset_mask = (folds == fold ) & s_mask
735726 subset_train_mask = (folds != fold ) & s_mask & treatment_mask
736727 covariates_train = covariates [subset_train_mask ]
737- binominal_train = binominal [subset_train_mask ]
738- if len (np .unique (binominal_train )) == 1 :
739- pred = binominal_train [0 ]
728+ binomial_train = binomial [subset_train_mask ]
729+ if len (np .unique (binomial_train )) == 1 :
730+ pred = binomial_train [0 ]
740731 superset_prediction [superset_mask , i ] = pred
741732 prediction [superset_mask , i ] = (
742733 pred
743734 + treatment_mask [superset_mask ]
744- * (binominal [superset_mask ] - pred )
735+ * (binomial [superset_mask ] - pred )
745736 / wight
746737 )
747738 continue
@@ -751,7 +742,7 @@ def _compute_interval_probability(
751742 prediction [superset_mask , i ] = (
752743 pred
753744 + treatment_mask [superset_mask ]
754- * (binominal [superset_mask ] - pred )
745+ * (binomial [superset_mask ] - pred )
755746 / wight
756747 )
757748 superset_prediction [superset_mask , i ] = pred
0 commit comments