soedinglab
diff --git a/‎AFfine/ig_pipeline.py‎
Lines changed: 573 additions & 0 deletions b/‎AFfine/ig_pipeline.py‎
Lines changed: 573 additions & 0 deletions
diff --git a/‎AFfine/predict_utils_ig.py‎
Lines changed: 377 additions & 0 deletions b/‎AFfine/predict_utils_ig.py‎
Lines changed: 377 additions & 0 deletions
diff --git a/‎AFfine/rank_pep_plddt.py‎
Lines changed: 56 additions & 0 deletions b/‎AFfine/rank_pep_plddt.py‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎AFfine/run_prediction.py‎
Lines changed: 26 additions & 18 deletions b/‎AFfine/run_prediction.py‎
Lines changed: 26 additions & 18 deletions
@@ -0,0 +1,56 @@
+import numpy as np
+import json
+import sys
+import os
+
+def rank_peptide_plddt(npz_path, query_chainseq, output_path=None):
+    """
+    Rank sampled structures by mean peptide pLDDT.
+    
+    Args:
+        npz_path: path to *_sampling_results.npz
+        query_chainseq: e.g. "MKTL.../AVSL..." (peptide is last chain)
+        output_path: where to save JSON (default: same dir as npz)
+    """
+    data = np.load(npz_path)
+    all_plddt = data['all_plddt']        # [N_samples, N_res_padded]
+    plddt_mean = data.get('plddt_mean')  # [N_res_padded] from single prediction
+
+    chain_lens = [len(c) for c in query_chainseq.split('/')]
+    pep_start = sum(chain_lens[:-1])
+    pep_end = sum(chain_lens)
+
+    # Non-sampled: use the first prediction's pLDDT (or plddt_mean)
+    # plddt_mean is average across samples, so use all_plddt[0] isn't right either
+    # Better: load from the regular prediction. But plddt_mean works as proxy.
+    non_sampled_pep = float(np.mean(plddt_mean[pep_start:pep_end]))
+
+    # Per-sample peptide mean pLDDT
+    sampled = {}
+    for i in range(all_plddt.shape[0]):
+        pep_plddt = float(np.mean(all_plddt[i, pep_start:pep_end]))
+        sampled[i] = round(pep_plddt, 3)
+
+    # Sort by pLDDT descending
+    ranked = dict(sorted(sampled.items(), key=lambda x: x[1], reverse=True))
+
+    result = {
+        "non_sampled_peptide_plddt": round(non_sampled_pep, 3),
+        "n_samples": len(ranked),
+        "sampled_ranked": {str(k): v for k, v in ranked.items()},
+    }
+
+    if output_path is None:
+        output_path = npz_path.replace('_sampling_results.npz', '_pep_plddt_ranked.json')
+
+    with open(output_path, 'w') as f:
+        json.dump(result, f, indent=2)
+    print(f"Saved: {output_path}")
+    return result
+
+
+if __name__ == '__main__':
+    npz_path = sys.argv[1]
+    chainseq = sys.argv[2]
+    out = sys.argv[3] if len(sys.argv) > 3 else None
+    rank_peptide_plddt(npz_path, chainseq, out)
@@ -17,8 +17,12 @@
 import warnings
 warnings.filterwarnings('ignore', category=FutureWarning)
 warnings.filterwarnings('ignore', category=DeprecationWarning)
+from rank_pep_plddt import rank_peptide_plddt
 ###
 import argparse
+from run_prediction_ig_patch import (
+    add_ig_pipeline_args, setup_ig_pipeline, process_target_with_ig_pipeline,
+)
 
 parser = argparse.ArgumentParser(
     description="Run simple template-based alphafold inference",
@@ -71,6 +75,10 @@
 parser.add_argument('--no_initial_guess', action='store_true', default=False, help='When active, no intial guess is used to direct modeling and only template is used.')
 parser.add_argument('--return_all_outputs', action='store_true', default=False, help='Save all alphafold outputs including evoformer output')
 parser.add_argument('--use_msa', action='store_true', default=False, help='If Enabled, use MSA for prediction. If not, only template is used.')
+parser = add_ig_pipeline_args(parser)
+parser.add_argument('--pep_sampling', type=str, default=None,
+    help='Peptide sampling scope: "all", "anchors", or comma-separated '
+         '1-indexed peptide positions e.g. "2,5,9"')
 args = parser.parse_args()
 
 import os
@@ -108,7 +116,7 @@
     num_recycle = args.num_recycles[0],
     args = args
 )
-
+ig_config = setup_ig_pipeline(args)
 final_dfl = []
 for counter, targetl in targets.iterrows():
     print('START:', counter, 'of', targets.shape[0])
@@ -191,25 +199,25 @@
         msa = [query_sequence] + msa
 
 
-   
-
-    all_metrics = predict_utils.run_alphafold_prediction(
-        query_sequence=query_sequence,
-        msa=msa,
-        deletion_matrix=deletion_matrix,
-        chainbreak_sequence=query_chainseq,
-        template_features=all_template_features,
-        model_runners=model_runners,
-        out_prefix=outfile_prefix,
-        crop_size=crop_size,
-        dump_pdbs = not (args.no_pdbs or args.terse),
-        dump_metrics = not args.terse,
-        template_pdb_dict = template_pdb_dict, # added by Amir for getting pandora data
-        no_initial_guess=args.no_initial_guess,
-        return_all_outputs=args.return_all_outputs
+    # ── Build peptide mask per-target if sampling requested ──
+    all_metrics = process_target_with_ig_pipeline(
+            args, ig_config, targetl, query_sequence, query_chainseq,
+            all_template_features, model_runners, outfile_prefix,
+            crop_size, msa, deletion_matrix,
+        )
+    # -------- End of V2 after sampling mode -------------- #
+    all_metrics = process_target_with_ig_pipeline(
+        args, ig_config, targetl, query_sequence, query_chainseq,
+        all_template_features, model_runners, outfile_prefix,
+        crop_size, msa, deletion_matrix,
     )
 
-
+    # ── Rank sampled structures by peptide pLDDT ──
+    if getattr(args, 'pep_sampling', None) is not None:
+        for model_name in args.model_names:
+            npz_path = f'{outfile_prefix}_{model_name}_sampling_results.npz'
+            if os.path.exists(npz_path):
+                rank_peptide_plddt(npz_path, query_chainseq)
     outl = targetl.copy()
     for model_name, metrics in all_metrics.items():
         plddts = metrics['plddt']