Skip to content

Commit fd9d3ec

Browse files
author
Peter Combs
committed
Merge branch 'HbRNAi'
2 parents 742f693 + f6a04d5 commit fd9d3ec

5 files changed

Lines changed: 93 additions & 23 deletions

File tree

MakeSummaryTable.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
2) -c -- also include confidence intervals
77
88
"""
9-
from __future__ import division
9+
from __future__ import division, print_function
1010
import pandas
1111
from os import path
1212
from glob import glob
@@ -132,8 +132,10 @@ def get_stagenum(name, series, dir):
132132
stage=params.ix[dirname]['Stage'],
133133
num=get_stagenum(dirname, params.index,
134134
params.ix[dirname, 'Direction']))
135-
print dirname, '=', new_dirname
135+
print(dirname, '=', new_dirname)
136136
dirname = new_dirname
137+
else:
138+
print(dirname)
137139

138140
skip = False
139141
if args.strip_low_reads:
@@ -158,16 +160,17 @@ def get_stagenum(name, series, dir):
158160
pass
159161
total_reads += i//4
160162
skip += (reads / total_reads) < (args.strip_low_map_rate / 100)
161-
print(reads, total_reads, reads/total_reads,
162-
args.strip_low_map_rate / 100)
163+
if skip:
164+
print(reads, total_reads, reads/total_reads,
165+
args.strip_low_map_rate / 100)
163166

164167
if skip:
165168
if args.strip_as_nan:
166169
from numpy import nan
167-
print "NaNing", dirname
170+
print("NaNing", dirname, "\t{:,} reads".format(reads))
168171
table.ix[:] = nan
169172
else:
170-
print "Skipping", dirname
173+
print("Skipping", dirname)
171174
continue
172175
if df is None:
173176
df = pandas.DataFrame({dirname+"_FPKM": table.ix[:, args.column]})

Makefile

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,31 +13,32 @@ MELVERSION = $(word 1, $(subst _FB, ,$(MELRELEASE)))
1313
VIRVERSION = $(word 1, $(subst _FB, ,$(VIRRELEASE)))
1414
MELDATE = $(word 2, $(subst _FB, ,$(MELRELEASE)))
1515

16-
MELFASTA = prereqs/dmel-all-chromosome-$(MELVERSION).fasta
17-
VIRFASTA = prereqs/dvir-all-chromosome-$(VIRVERSION).fasta
16+
PREREQDIR = prereqs
17+
MELFASTA = $(PREREQDIR)/dmel-all-chromosome-$(MELVERSION).fasta
18+
VIRFASTA = $(PREREQDIR)/dvir-all-chromosome-$(VIRVERSION).fasta
1819

1920
REFDIR = Reference
2021

2122
MELFASTA2= $(REFDIR)/dmel_prepend.fasta
2223
VIRFASTA2= $(REFDIR)/dvir_prepend.fasta
2324

24-
ORTHOLOGS = prereqs/gene_orthologs_fb_$(MELDATE).tsv
25+
ORTHOLOGS = $(PREREQDIR)/gene_orthologs_fb_$(MELDATE).tsv
2526

26-
CERFASTA = prereqs/S288C_reference_sequence_R64-1-1_20110203.fsa
27+
CERFASTA = $(PREREQDIR)/S288C_reference_sequence_R64-1-1_20110203.fsa
2728
CERFASTA2= $(REFDIR)/scer_prepend.fasta
2829

29-
MELGFF = prereqs/dmel-all-$(MELVERSION).gff
30+
MELGFF = $(PREREQDIR)/dmel-all-$(MELVERSION).gff
3031
MELGTF = $(REFDIR)/mel_good.gtf
31-
VIRGFF = prereqs/dvir-all-$(VIRVERSION).gff
32+
VIRGFF = $(PREREQDIR)/dvir-all-$(VIRVERSION).gff
3233
VIRGTF = $(REFDIR)/vir_good.gtf
33-
CERGFF = prereqs/saccharomyces_cerevisiae_R64-1-1_20110208.gff
34+
CERGFF = $(PREREQDIR)/saccharomyces_cerevisiae_R64-1-1_20110208.gff
3435
MELVIRGTF= $(REFDIR)/melvir.gtf
3536
MELVIRGTF_FILT= $(REFDIR)/melvir_withgenename.gtf
3637
MELVIRFASTA=$(REFDIR)/melvir.fa
3738
MELALLGTF = $(REFDIR)/mel_all.gtf
3839
MELBADGTF = $(REFDIR)/mel_bad.gtf
3940

40-
GENEMAPTABLE = gene_map_table_fb_$(MELDATE).tsv
41+
GENEMAPTABLE = $(PREREQDIR)/gene_map_table_fb_$(MELDATE).tsv
4142

4243

4344
all : $(ANALYSIS_DIR)/summary.tsv $(REFDIR)/$(MELMAJORVERSION) $(REFDIR)/$(MELVERSION)
@@ -124,11 +125,11 @@ $(MELBADGTF): $(MELALLGTF) | $(REFDIR)
124125
> $@
125126

126127

127-
$(MELFASTA): $(REFDIR)/$(MELMAJORVERSION) | $(REFDIR)
128+
$(MELFASTA): $(REFDIR)/$(MELMAJORVERSION) | $(REFDIR) $(PREREQDIR)
128129
wget -O $@.gz ftp://ftp.flybase.net/genomes/Drosophila_melanogaster/dmel_$(MELRELEASE)/fasta/dmel-all-chromosome-$(MELVERSION).fasta.gz
129130
gunzip --force $@.gz
130131

131-
$(MELGFF): $(REFDIR)/$(MELVERSION) | $(REFDIR)
132+
$(MELGFF): $(REFDIR)/$(MELVERSION) | $(REFDIR) $(PREREQDIR)
132133
wget -O $@.gz ftp://ftp.flybase.net/genomes/Drosophila_melanogaster/dmel_$(MELRELEASE)/gff/dmel-all-$(MELVERSION).gff.gz
133134
gunzip --force $@.gz
134135

@@ -179,7 +180,7 @@ $(REFDIR)/DmelDvir/Genome : $(MELVIRGTF) | $(REFDIR)/DmelDvir $(MELFASTA2) $(VI
179180
--genomeFastaFiles $(MELFASTA2) $(VIRFASTA2) \
180181
--sjdbGTFfile $(MELVIRGTF)
181182

182-
$(ORTHOLOGS) :
183+
$(ORTHOLOGS) : | $(PREREQDIR)
183184
wget -O $@.gz -i ftp.flybase.org/releases/FB$(MELDATE)/precomputed_files/genes/gene_orthologs_fb_$(MELDATE).tsv.gz
184185
gunzip --force $@.gz
185186

@@ -190,6 +191,8 @@ $(REFDIR)/DmelDvir:
190191

191192
$(REFDIR)/DmelScer:
192193
mkdir $@
194+
$(PREREQDIR):
195+
mkdir $@
193196

194197
Reference/DmelDper:
195198
mkdir $@
@@ -200,8 +203,9 @@ Reference/DmelDvir:
200203
Reference/DmelDmoj:
201204
mkdir $@
202205

206+
203207
$(GENEMAPTABLE):
204-
wget ftp://ftp.flybase.net/releases/$(MELDATE)/precomputed_files/genes/$(GENEMAPTABLE).gz \
208+
wget ftp://ftp.flybase.net/releases/FB$(MELDATE)/precomputed_files/genes/$(notdir $(GENEMAPTABLE)).gz \
205209
-O $(GENEMAPTABLE).gz
206210
gunzip --force $(GENEMAPTABLE).gz
207211

PlotUtils.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@ def svg_heatmap(data, filename, row_labels=None, box_size=4,
144144
draw_box=False, draw_name=False, data_names=None,
145145
max_width=np.inf,
146146
spacers=None,
147+
hatch_nan=True, hatch_size=20,
147148
first_col='', last_col=''):
148149
"""
149150
Draw heatmap as an SVG file stored in filename
@@ -200,9 +201,9 @@ def svg_heatmap(data, filename, row_labels=None, box_size=4,
200201
pat = dwg.pattern(id='hatch', insert=(0, 0), size=(25, 25),
201202
patternUnits='userSpaceOnUse')
202203
g = pat.add(dwg.g(style="fill:none; stroke:#B0B0B0; stroke-width:1"))
203-
g.add(dwg.path(('M0,0', 'l20,20')))
204-
g.add(dwg.path(('M10,0 l10,10'.split())))
205-
g.add(dwg.path(('M0,10 l10,10'.split())))
204+
g.add(dwg.path(('M0,0', 'l{hatch},{hatch}'.format(hatch=hatch_size))))
205+
g.add(dwg.path(('M{hatch2},0 l{hatch2},{hatch2}'.format(hatch2=hatch_size/2).split())))
206+
g.add(dwg.path(('M0,{hatch2} l{hatch2},{hatch2}'.format(hatch2=hatch_size/2).split())))
206207

207208
dwg.add(pat)
208209

@@ -302,7 +303,7 @@ def svg_heatmap(data, filename, row_labels=None, box_size=4,
302303
.format(*[int(255*x) for x in
303304
c_cmap(norm_data.ix[i, j])])))
304305
dwg.add(g)
305-
if hatch:
306+
if hatch_nan and hatch:
306307
g.add(dwg.rect((x_start + box_size*j,
307308
y_start + i*box_height),
308309
(box_size, box_height),

Utils.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,3 +31,21 @@ def to_number(dataval):
3131

3232
except ValueError:
3333
return dataval
34+
35+
36+
def contains(string_or_iterable):
37+
if isinstance(string_or_iterable, str):
38+
return lambda x: string_or_iterable in x
39+
else:
40+
return lambda x: any(i in x for i in string_or_iterable)
41+
42+
def startswith(string_or_iterable):
43+
if not isinstance(string_or_iterable, str):
44+
string_or_iterable = tuple(string_or_iterable)
45+
return lambda x: x.startswith(string_or_iterable)
46+
47+
def sel_contains(string_or_iterable):
48+
return dict(crit=contains(string_or_iterable), axis=1)
49+
50+
def sel_startswith(string_or_iterable):
51+
return dict(crit=startswith(string_or_iterable), axis=1)

configure

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,12 +39,47 @@ star_str = ('$(ANALYSIS_DIR)/{label}/accepted_hits.bam: '
3939
mappers = {'star': star_str, 'tophat' : tophat_str}
4040

4141
glob_specs = [
42+
'{seqdir}/*/*{index_seq}*_{{read}}_*.fastq*',
4243
'{seqdir}/*{label}{index}/*_{{read}}_*.fastq*',
4344
# '{seqdir}/*{index}/*_{{read}}_*.fastq*',
4445
'{seqdir}/*/*{label}*/*_{{read}}_*.fastq*',
4546
'{seqdir}/*index{index}/*_{{read}}*.fastq*',
4647
]
4748

49+
index_converter = {
50+
'S501':'TAGATCGC',
51+
'S502':'CTCTCTAT',
52+
'S503':'TATCCTCT',
53+
'S504':'AGAGTAGA',
54+
'S505':'GTAAGGAG',
55+
'S506':'ACTGCATA',
56+
'S507':'AAGGAGTA',
57+
'S508':'CTAAGCCT',
58+
#'N701':'TCGCCTTA',
59+
#'N702':'CTAGTACG',
60+
#'N703':'TTCTGCCT',
61+
#'N704':'GCTCAGGA',
62+
#'N705':'AGGAGTCC',
63+
#'N706':'CATGCCTA',
64+
#'N707':'GTAGAGAG',
65+
#'N708':'CCTCTCTG',
66+
#'N709':'AGCGTAGC',
67+
#'N710':'CAGCCTCG',
68+
#'N711':'TGCCTCTT',
69+
#'N712':'TCCTCTAC',
70+
'N701':'TAAGGCGA',
71+
'N702':'CGTACTAG',
72+
'N703':'AGGCAGAA',
73+
'N704':'TCCTGAGC',
74+
'N705':'GGACTCCT',
75+
'N706':'TAGGCATG',
76+
'N707':'CTCTCTAC',
77+
'N708':'CAGAGAGG',
78+
'N709':'GCTACGCT',
79+
'N710':'CGAGGCTG',
80+
'N711':'AAGAGGCA',
81+
'N712':'GTAGAGGA',
82+
}
4883

4984
def parse_arguments():
5085
from argparse import ArgumentParser
@@ -70,6 +105,10 @@ def parse_arguments():
70105
return args
71106

72107

108+
def convert_indices(index_spec):
109+
for index in index_converter:
110+
index_spec = index_spec.replace(index, index_converter[index])
111+
return index_spec
73112

74113
args = parse_arguments()
75114
config_file = pd.read_table(args.parameters,
@@ -99,7 +138,12 @@ for i, row in config_file.iterrows():
99138
species = row['CarrierSpecies']
100139
for glob_spec in glob_specs:
101140
glob_spec = (glob_spec
102-
.format(seqdir=args.seqdir, label=label, index=index, id=mbepc))
141+
.format(seqdir=args.seqdir,
142+
label=label,
143+
index=index,
144+
index_seq=convert_indices(index),
145+
id=mbepc,
146+
))
103147
rf1 = glob(glob_spec.format(read='R1'))
104148
if rf1 != []:
105149
out.write("GLOBSPEC = {}\n".format(glob_spec))

0 commit comments

Comments
 (0)