| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package NCBIx::eUtils::GeneAliases; |
|
2
|
1
|
|
|
1
|
|
20614
|
use Class::Std; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
use Class::Std::Utils; |
|
4
|
|
|
|
|
|
|
use LWP::Simple; |
|
5
|
|
|
|
|
|
|
|
|
6
|
|
|
|
|
|
|
use warnings; |
|
7
|
|
|
|
|
|
|
use strict; |
|
8
|
|
|
|
|
|
|
use Carp; |
|
9
|
|
|
|
|
|
|
|
|
10
|
|
|
|
|
|
|
use version; our $VERSION = qv('0.9.0'); |
|
11
|
|
|
|
|
|
|
|
|
12
|
|
|
|
|
|
|
our $utils = "http://www.ncbi.nlm.nih.gov/entrez/eutils"; |
|
13
|
|
|
|
|
|
|
our $retmax = 500; |
|
14
|
|
|
|
|
|
|
our @keywords = ('Official Symbol:', 'and Name:', 'Name:', 'Other Aliases:', 'Other Designations:', 'Chromosome:', 'Location:', 'Annotation:', 'MIM:', 'Genomic context:', 'Macronuclear:', 'GeneID:'); |
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
{ |
|
17
|
|
|
|
|
|
|
my %utils_url_of :ATTR( :get :set :default<''> :init_arg ); |
|
18
|
|
|
|
|
|
|
my %retmax_of :ATTR( :get :set :default<'500'> :init_arg ); |
|
19
|
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub START { |
|
21
|
|
|
|
|
|
|
my ($self, $ident, $arg_ref) = @_; |
|
22
|
|
|
|
|
|
|
$self->set_utils_url( $utils ); |
|
23
|
|
|
|
|
|
|
return; |
|
24
|
|
|
|
|
|
|
} |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
sub get_aliases { |
|
27
|
|
|
|
|
|
|
my ( $self, $gene_id ) = @_; |
|
28
|
|
|
|
|
|
|
my $gene_names = {}; |
|
29
|
|
|
|
|
|
|
my $gene_data; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
# Get NCBI records for gene |
|
32
|
|
|
|
|
|
|
my $gene_alts = $self->_get_docsums( $gene_id ); |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
if ( $gene_alts ) { |
|
35
|
|
|
|
|
|
|
# Remove newlines |
|
36
|
|
|
|
|
|
|
$gene_alts =~ s/\n/ /g; |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
# Break into lines before keywords |
|
39
|
|
|
|
|
|
|
foreach my $keyword ( @keywords ) { $gene_alts =~ s/$keyword/\n$keyword/g; } |
|
40
|
|
|
|
|
|
|
my @alt_lines = split( /\n/, $gene_alts ); |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
# Process lines |
|
43
|
|
|
|
|
|
|
foreach my $alt_line ( @alt_lines ) { |
|
44
|
|
|
|
|
|
|
if ( $alt_line =~ m/^Official Symbol:(.*)$/ ) { |
|
45
|
|
|
|
|
|
|
my $match = $1; $match =~ s/[,;]/ /g; |
|
46
|
|
|
|
|
|
|
my @symbols = split( /\s+/, $match ); |
|
47
|
|
|
|
|
|
|
foreach my $symbol ( @symbols ) { if ( $symbol && $symbol ne $gene_id ) { $gene_names->{$symbol}++; } } |
|
48
|
|
|
|
|
|
|
} |
|
49
|
|
|
|
|
|
|
elsif ( $alt_line =~ m/^Other Aliases:(.*)$/ ) { |
|
50
|
|
|
|
|
|
|
my $match = $1; $match =~ s/[,;]/ /g; |
|
51
|
|
|
|
|
|
|
my @aliases = split( /\s+/, $match ); |
|
52
|
|
|
|
|
|
|
foreach my $alias ( @aliases ) { if ( $alias ) { $gene_names->{$alias}++; } } |
|
53
|
|
|
|
|
|
|
} |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
} |
|
56
|
|
|
|
|
|
|
return sort keys %$gene_names; |
|
57
|
|
|
|
|
|
|
} else { |
|
58
|
|
|
|
|
|
|
return (); |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
} |
|
61
|
|
|
|
|
|
|
|
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
sub _get_docsums { |
|
64
|
|
|
|
|
|
|
my ( $self, $gene_id ) = @_; |
|
65
|
|
|
|
|
|
|
my $gene_data = ''; |
|
66
|
|
|
|
|
|
|
my $retmax = $self->get_retmax(); |
|
67
|
|
|
|
|
|
|
my $utils_url = $self->get_utils_url(); |
|
68
|
|
|
|
|
|
|
my $retstart; |
|
69
|
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
# Get the query |
|
71
|
|
|
|
|
|
|
my $esearch = $utils_url . "/esearch.fcgi?" . "db=gene&retmax=1&usehistory=y&term=$gene_id" . |
|
72
|
|
|
|
|
|
|
'&tool=cpan_ncbix_eutils_genealiases&email=roger@iosea.com'; |
|
73
|
|
|
|
|
|
|
my $esearch_result = get( $esearch ); |
|
74
|
|
|
|
|
|
|
sleep(3); |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# Parse the count, query_key, and webenv |
|
77
|
|
|
|
|
|
|
$esearch_result =~ m|(\d+).*(\d+).*(\S+)|s; |
|
78
|
|
|
|
|
|
|
my $Count = $1 ? $1 : 0; |
|
79
|
|
|
|
|
|
|
my $QueryKey = $2; |
|
80
|
|
|
|
|
|
|
my $WebEnv = $3; |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
#print " STATUS: Getting $Count results for $gene_id \n"; |
|
83
|
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
for ( my $retstart = 0; $retstart < $Count; $retstart += $retmax ) { |
|
85
|
|
|
|
|
|
|
my $efetch = $utils_url . "/efetch.fcgi?" . |
|
86
|
|
|
|
|
|
|
"rettype=docsum&retmode=text&retstart=$retstart&retmax=$retmax&" . |
|
87
|
|
|
|
|
|
|
"db=gene&query_key=$QueryKey&WebEnv=$WebEnv" . |
|
88
|
|
|
|
|
|
|
'&tool=cpan_ncbix_eutils_genealiases&email=roger@iosea.com'; |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
$gene_data .= get($efetch); |
|
91
|
|
|
|
|
|
|
sleep(2); |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
#print " STATUS: Done. \n"; |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
return $gene_data; |
|
97
|
|
|
|
|
|
|
} |
|
98
|
|
|
|
|
|
|
} |
|
99
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
1; # Magic true value required at end of module |
|
101
|
|
|
|
|
|
|
__END__ |