MakeSummaryTable gets read files from bam header

Peter Combs · Peter Combs · commit eb7440b7f85f · 2014-11-21T11:46:32.000-08:00
This is way better than trying to guess it based on the constantly
changing formats the sequencing center gives the data in.

There's a bit of a risk still present in that it doesn't count all the
reads, so if there are multiple "leftovers" files (for instance, if
adjacent slices are computationally merged), then it won't deal with
them properly.
diff --git a/MakeSummaryTable.py b/MakeSummaryTable.py
@@ -149,10 +149,10 @@ def get_stagenum(name, series, dir):
         else:
             skip = sf.mapped < args.strip_low_reads
     if args.strip_low_map_rate and args.has_params and not skip:
-        rfs = sorted(glob(path.join('sequence',
-                                    '*{}*'.format(params.ix[old_dirname]['Index']),
-                                    '*_R1_*.fastq.gz'))
-                    )
+        rfs = [entry for entry in
+               sf.header['PG'][0]['CL'].split()
+              if entry.endswith('.gz') or entry.endswith('.fastq')][0]
+        rfs = sorted(rfs.split(','))
         total_reads = 4e6 * (len(rfs) - 1)
         for i, line in enumerate(gzip.open(rfs[-1])):
             pass