-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathrunGeneCNVAndPloidyQuery
More file actions
executable file
·105 lines (83 loc) · 3.4 KB
/
runGeneCNVAndPloidyQuery
File metadata and controls
executable file
·105 lines (83 loc) · 3.4 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#!/usr/bin/perl
use strict;
use lib "$ENV{GUS_HOME}/lib/perl";
use Getopt::Long;
use GUS::ObjRelP::DbiDatabase;
use GUS::Supported::GusConfig;
use CBIL::Util::PropertySet;
my ($gusConfigFile,$taxonId,$orthoGroupFile,$geneSourceIdOrthologFile,$chrsForCalcsFile);
&GetOptions("taxonId=s" => \$taxonId,
"orthoGroupFile=s" => \$orthoGroupFile,
"geneSourceIdOrthologFile=s" => \$geneSourceIdOrthologFile,
"gusConfigFile=s" => \$gusConfigFile,
"chrsForCalcsFile=s" => \$chrsForCalcsFile);
my $proteinToGeneSql = "
SELECT aas.source_id AS protein_source_id,
gf.source_id AS gene_source_id
FROM dots.AASequence aas
JOIN dots.TranslatedAASequence tas ON aas.aa_sequence_id = tas.aa_sequence_id
JOIN dots.TranslatedAAFeature taf ON taf.aa_sequence_id = tas.aa_sequence_id
JOIN dots.Transcript t ON taf.na_feature_id = t.na_feature_id
JOIN dots.GeneFeature gf ON t.parent_id = gf.na_feature_id
WHERE aas.subclass_view = 'TranslatedAASequence'
AND aas.taxon_id = $taxonId
AND aas.taxon_id IN (
SELECT taxon_id
FROM apidb.organism
WHERE is_annotated_genome = 1
)
";
my $chrsForCalcsSql = "select ns.source_id from dots.nasequence ns, sres.ontologyterm ot where ot.name = 'chromosome' and ot.ontology_term_id = ns.sequence_ontology_id and ns.taxon_id = $taxonId";
#$gusConfigFile = $ENV{GUS_HOME}."/config/gus.config";
die "Config file $gusConfigFile does not exist" unless -e $gusConfigFile;
my @properties = ();
my $gusConfig = CBIL::Util::PropertySet -> new ($gusConfigFile, \@properties, 1);
my $db = GUS::ObjRelP::DbiDatabase-> new($gusConfig->{props}->{dbiDsn},
$gusConfig->{props}->{databaseLogin},
$gusConfig->{props}->{databasePassword},
0,0,1, # verbose, no insert, default
$gusConfig->{props}->{coreSchemaName});
my $dbh = $db->getQueryHandle();
my $proteinToGeneStmt = $dbh->prepare($proteinToGeneSql);
$proteinToGeneStmt->execute();
my %proteinToGene;
while (my @row = $proteinToGeneStmt->fetchrow_array()){
$proteinToGene{$row[0]} = $row[1];
}
my %proteinToGroup;
open(GROUPS, "<$orthoGroupFile") or die "Cannot open $orthoGroupFile: $!";
while (my $line = <GROUPS>) {
chomp $line;
my ($groupId, $proteinList) = split(/:\s*/, $line, 2);
next unless defined $proteinList;
foreach my $protein (split(/\s+/, $proteinList)) {
$proteinToGroup{$protein} = $groupId;
}
}
close GROUPS;
my @proteinsWithNoGroup;
open(GENE, ">$geneSourceIdOrthologFile") or die "Cannot open $geneSourceIdOrthologFile: $!";
while (my ($protein, $gene) = each %proteinToGene) {
my $group = $proteinToGroup{$protein};
unless ($group) {
(my $altProtein = $protein) =~ s/:/\_/g;
$group = $proteinToGroup{$altProtein};
}
if ($group) {
print GENE "$gene\t$group\n";
} else {
push @proteinsWithNoGroup, $protein;
}
}
close GENE;
if (@proteinsWithNoGroup) {
print STDERR "The following proteins have no group assignment in $orthoGroupFile:\n" . join("\n", @proteinsWithNoGroup) . "\n";
}
my $chrsForCalcs = $dbh->prepare($chrsForCalcsSql);
$chrsForCalcs->execute();
open(CHRS, ">$chrsForCalcsFile") or die "Cannot open $chrsForCalcsFile: $!";
while (my @row = $chrsForCalcs->fetchrow_array()){
print CHRS "$row[0]\t\n";
}
close CHRS;
$dbh->disconnect();