-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathexclude_mono.pl
More file actions
executable file
·104 lines (87 loc) · 2.2 KB
/
exclude_mono.pl
File metadata and controls
executable file
·104 lines (87 loc) · 2.2 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
#!/usr/bin/env perl
use strict;
use warnings;
use Getopt::Long qw(GetOptions);
my ($inputFile,$outputFile,$excludeFile);
GetOptions('input=s' => \$inputFile,'output=s' => \$outputFile,'exclude=s' => \$excludeFile);
my %vars_to_exclude;
my $fh;
$\="\n";
open ($fh, "<", $excludeFile) or die "[Error] File with variants to exclude ($excludeFile) could not be opened. Exiting.";
while(my $v=<$fh>){
chomp $v;
$vars_to_exclude{$v}=1;
}
close($fh);
open ($fh, "<", $inputFile) or die "[Error] Input file ($inputFile) could not be opened. Exiting.";
my %H;
my $flag=0;
while(my $line=<$fh>){
chomp $line;
my @a=split(/\t/,$line);
if ($a[1] eq "1"){
# this line contains variant IDs
# next line contains weights
$flag=1;
$H{$a[0]}->{"vars"}=join("\t",@a[2..$#a]);
}
elsif($a[1] eq "0"){
if ($flag==1){
$H{$a[0]}->{"weights"}=join("\t",@a[2..$#a]);
}
else{
$H{$a[0]}->{"vars"}=join("\t",@a[2..$#a]);
$H{$a[0]}->{"weights"}="NA";
}
$flag=0;
}
else{
die "[Error] The second field in $line is neither 0 nor 1. Exiting.";
}
}
close($fh);
my %mono;
foreach my $gene (keys(%H)){
my $vars=$H{$gene}->{"vars"};
my @a=split(/\t/,$vars);
my $c=0;
for (my $i=0;$i<scalar(@a);$i++){
if(exists($vars_to_exclude{$a[$i]})){
$a[$i]="NA";
$c++;
}
}
my $rem=scalar(@a)-$c;
$mono{$gene}=1 if($rem<2);
$H{$gene}->{"vars"}=join("\t",@a);
}
$,="\t";
open ($fh, ">", $outputFile) or die "[Error] Output ($outputFile) could not be opened. Exiting.";
foreach my $gene (keys(%H)){
if (exists($mono{$gene})){
print STDERR $gene;
}
else{
my $vars=$H{$gene}->{"vars"};
my $weights=$H{$gene}->{"weights"};
my @a=split(/\t/,$vars);
my @c=();
my @d=();
for (my $i=0;$i<scalar(@a);$i++){
push(@c,$a[$i]) unless($a[$i] eq "NA");
}
if ($weights eq "NA"){
print $fh $gene,"0",join("\t",@c);
}
else{
my @b=split(/\t/,$weights);
die "[Error] Numbers of variants and weights for $gene are different. Exiting." if scalar(@b)!=scalar(@b);
for (my $i=0;$i<scalar(@b);$i++){
push(@d,$b[$i]) unless($a[$i] eq "NA");
}
print $fh $gene,"1",join("\t",@c);
print $fh $gene,"0",join("\t",@d);
}
}
}
close($fh);