Skip to content

Commit 1c47873

Browse files
Create dichotomizer.pl
1 parent f248977 commit 1c47873

1 file changed

Lines changed: 65 additions & 0 deletions

File tree

dichotomizer.pl

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# dichotomizer.pl
2+
# a simle script that produces an occurence table for protein homologs
3+
# the database is a set of blast-formatted proteomes (.fas files)
4+
# for queries it reads files with numbers (1-Z) as names files have the extension .query
5+
# it performs a blast search where the score cut off is set defined by query length
6+
# by pcruzm@biosustain.dtu.dk 25-oct-23
7+
# the output is a table with n rows and Z columns, 1=found 0=not found
8+
9+
#reading a directory
10+
@FILES=`ls *.fas`;
11+
foreach(@FILES){
12+
chomp $_;
13+
$cont=0;
14+
print "\n$_\t";
15+
#blasting the queries $cont on the genome $_
16+
while ($cont < 597){
17+
$cont++;
18+
#extracting protein length
19+
$seq_len=`grep ">" -v $cont.query |wc -c`;
20+
#the score cut off is defined for a given $seq_len
21+
if ($seq_len > 6000){
22+
$score_cutoff=4000;
23+
}
24+
if ( 5000<= $seq_len <= 5999){
25+
$score_cutoff= 3000;
26+
}
27+
if ( 3000<= $seq_len <= 4999){
28+
$score_cutoff= 2500;
29+
}
30+
if ( 2000<= $seq_len <= 2999){
31+
$score_cutoff= 2000;
32+
}
33+
if ( 1000<= $seq_len <= 1999){
34+
$score_cutoff= 1000;
35+
}
36+
if ( 700<= $seq_len <= 999 ){
37+
$score_cutoff= 500 ;
38+
}
39+
if ( 500<= $seq_len <= 699 ){
40+
$score_cutoff= 500 ;
41+
}
42+
if ( 300<= $seq_len <= 499 ){
43+
$score_cutoff= 300 ;
44+
}
45+
if ( 200<= $seq_len <= 299 ){
46+
$score_cutoff= 200 ;
47+
}
48+
if ( 100<= $seq_len <= 199 ){
49+
$score_cutoff= 150 ;
50+
}
51+
if ( 50<= $seq_len <= 99 ){
52+
$score_cutoff= 100 ;
53+
}
54+
if ($seq_len < 49){
55+
$score_cutoff=50;
56+
}
57+
$search=`blastp -query $cont.query -db $_ -evalue 0.000001 -outfmt 6 -max_target_seqs 1 2\>\/dev\/null |awk '(\$12 \> $score_cutoff)'|cut -f 2`;
58+
if ($search=~/peg/){
59+
print "1\t";
60+
}
61+
else {
62+
print "0\t";
63+
}
64+
}
65+
}#foreach files

0 commit comments

Comments
 (0)