File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ # dichotomizer.pl
2+ # a simle script that produces an occurence table for protein homologs
3+ # the database is a set of blast-formatted proteomes (.fas files)
4+ # for queries it reads files with numbers (1-Z) as names files have the extension .query
5+ # it performs a blast search where the score cut off is set defined by query length
6+ # by pcruzm@biosustain.dtu.dk 25-oct-23
7+ # the output is a table with n rows and Z columns, 1=found 0=not found
8+
9+ # reading a directory
10+ @FILES =` ls *.fas` ;
11+ foreach (@FILES ){
12+ chomp $_ ;
13+ $cont =0;
14+ print " \n $_ \t " ;
15+ # blasting the queries $cont on the genome $_
16+ while ($cont < 597){
17+ $cont ++;
18+ # extracting protein length
19+ $seq_len =` grep ">" -v $cont .query |wc -c` ;
20+ # the score cut off is defined for a given $seq_len
21+ if ($seq_len > 6000){
22+ $score_cutoff =4000;
23+ }
24+ if ( 5000<= $seq_len <= 5999){
25+ $score_cutoff = 3000;
26+ }
27+ if ( 3000<= $seq_len <= 4999){
28+ $score_cutoff = 2500;
29+ }
30+ if ( 2000<= $seq_len <= 2999){
31+ $score_cutoff = 2000;
32+ }
33+ if ( 1000<= $seq_len <= 1999){
34+ $score_cutoff = 1000;
35+ }
36+ if ( 700<= $seq_len <= 999 ){
37+ $score_cutoff = 500 ;
38+ }
39+ if ( 500<= $seq_len <= 699 ){
40+ $score_cutoff = 500 ;
41+ }
42+ if ( 300<= $seq_len <= 499 ){
43+ $score_cutoff = 300 ;
44+ }
45+ if ( 200<= $seq_len <= 299 ){
46+ $score_cutoff = 200 ;
47+ }
48+ if ( 100<= $seq_len <= 199 ){
49+ $score_cutoff = 150 ;
50+ }
51+ if ( 50<= $seq_len <= 99 ){
52+ $score_cutoff = 100 ;
53+ }
54+ if ($seq_len < 49){
55+ $score_cutoff =50;
56+ }
57+ $search =` blastp -query $cont .query -db $_ -evalue 0.000001 -outfmt 6 -max_target_seqs 1 2\>\/ dev\/ null |awk '(\$ 12 \> $score_cutoff )'|cut -f 2` ;
58+ if ($search =~/ peg/ ){
59+ print " 1\t " ;
60+ }
61+ else {
62+ print " 0\t " ;
63+ }
64+ }
65+ }# foreach files
You can’t perform that action at this time.
0 commit comments