File Coverage

blib/lib/Bio/Phylo/Parsers/Abstract.pm
Criterion Covered Total %
statement 84 126 66.6
branch 33 60 55.0
condition 3 12 25.0
subroutine 25 28 89.2
pod n/a
total 145 226 64.1


line stmt bran cond sub pod time code
1             package Bio::Phylo::Parsers::Abstract;
2 33     33   211 use strict;
  33         60  
  33         760  
3 33     33   137 use warnings;
  33         58  
  33         668  
4 33     33   134 use base 'Bio::Phylo::IO';
  33         54  
  33         2943  
5 33     33   190 use IO::Handle;
  33         58  
  33         1259  
6 33     33   179 use Bio::Phylo::Util::Exceptions 'throw';
  33         58  
  33         1353  
7 33     33   173 use Bio::Phylo::Util::CONSTANT '/looks_like/';
  33         58  
  33         5208  
8 33     33   203 use Bio::Phylo::Util::Logger ':simple';
  33         59  
  33         3458  
9 33     33   7463 use Bio::Phylo::Factory;
  33         70  
  33         182  
10              
11             =head1 NAME
12              
13             Bio::Phylo::Parsers::Abstract - Superclass for parsers used by Bio::Phylo::IO
14              
15             =head1 DESCRIPTION
16              
17             This package is subclassed by all other packages within Bio::Phylo::Parsers::.*.
18             There is no direct usage.
19              
20             =cut
21              
22             my $factory = Bio::Phylo::Factory->new;
23             my $logger = Bio::Phylo::Util::Logger->new;
24              
25             # argument is a file name, which we open
26             sub _open_file {
27 1     1   2 my $file_name = shift;
28 1   50     7 my $encoding = shift || '';
29 1 50       67 open my $handle, "<${encoding}", $file_name or throw 'FileError' => $!;
30 0         0 return $handle;
31             }
32              
33             # argument is a string, which, at perl version >5.8,
34             # we can treat as a handle by opening it by reference
35             sub _open_string {
36 108     108   228 my $string_value = shift;
37 108   50     565 my $encoding = shift || '';
38 30 50   30   197 open my $handle, "<${encoding}", \$string_value or throw 'FileError' => $!;
  30         56  
  30         194  
  108         2627  
39 108         15614 return $handle;
40             }
41              
42             # argument is a url,
43             sub _open_url {
44 0     0   0 my $url = shift;
45 0   0     0 my $encoding = shift || '';
46 0         0 my $handle;
47              
48             # we need to use LWP::UserAgent to fetch the resource, but
49             # we don't "use" it at the top of the module because that
50             # would make it a required dependency
51 0         0 eval { require LWP::UserAgent };
  0         0  
52 0 0       0 if ($@) {
53 0         0 throw 'ExtensionError' =>
54             "Providing a -url argument requires\nsuccesful loading "
55             . "of LWP::UserAgent.\nHowever, there was an error when "
56             . "I\ntried that:\n"
57             . $@;
58             }
59              
60             # apparently it's installed, so let's instantiate a client
61 0         0 my $ua = LWP::UserAgent->new;
62 0         0 $ua->timeout(10);
63 0         0 $ua->env_proxy;
64              
65             # fetch the resource, get an HTTP::Response object
66 0         0 my $response = $ua->get($url);
67              
68             # i.e. 200, or 304 (unchanged cache)
69 0 0 0     0 if ( $response->is_success or $response->status_line =~ /304/ ) {
70              
71             # content is a string, so we create a handle in the same way
72             # as when the argument was a string
73 0         0 $handle = _open_string( $response->content, $encoding );
74             }
75             else {
76 0         0 throw 'NetworkError' => $response->status_line;
77             }
78 0         0 return $handle;
79             }
80              
81             # deal with all possible data sources, return
82             # a handle to whatever it is or throw an exception
83             sub _open_handle {
84 116     116   373 my %args = @_;
85 116         213 my $handle;
86 116 100       562 if ( $args{'-handle'} ) {
    100          
    50          
    0          
87 7         36 binmode $args{'-handle'}, ":utf8";
88 7         13 $handle = $args{'-handle'};
89             }
90             elsif ( $args{'-file'} ) {
91 1         6 $handle = _open_file( $args{'-file'}, $args{'-encoding'} );
92             }
93             elsif ( $args{'-string'} ) {
94 108         484 $handle = _open_string( $args{'-string'}, $args{'-encoding'} );
95             }
96             elsif ( $args{'-url'} ) {
97 0         0 $handle = _open_url( $args{'-url'}, $args{'-encoding'} );
98             }
99             else {
100 0         0 throw 'BadArgs' => 'No data source provided!';
101             }
102            
103             # check to see if the data source contains anything
104             #if ( eof $handle ) {
105             # throw 'NoData' => "Source is empty!";
106             #}
107 115         636 return $handle;
108             }
109              
110             # open a Bio::Phylo::Project if asked (if the -as_project flag
111             # was provided.) If the user has supplied one (the -project flag)
112             # simply return that or undefined otherwise.
113             sub _open_project {
114 113     113   421 my ( $fac, %args ) = @_;
115 113 50       501 if ( $args{'-project'} ) {
    100          
116 0         0 return $args{'-project'};
117             }
118             elsif ( $args{'-as_project'} ) {
119 24         188 return $fac->create_project;
120             }
121             else {
122 89         1149 return undef;
123             }
124             }
125              
126             # this constructor is called by the Bio::Phylo::IO::parse
127             # subroutine
128             sub _new {
129 116     116   262 my $class = shift;
130 116         375 my %args = looks_like_hash @_;
131            
132             # we need to guess the format
133 116 100       422 if ( $class eq __PACKAGE__ ) {
134 2 100       8 if ( my $format = _guess_format(_open_handle(%args)) ) {
135 1         4 $class = 'Bio::Phylo::Parsers::' . ucfirst($format);
136 1         3 return looks_like_class($class)->_new(%args);
137             }
138             else {
139 1         6 throw 'BadArgs' => "No format specified and unable to guess!";
140             }
141             }
142              
143             # factory is either user supplied or a private static
144 114   33     812 my $fac = $args{'-factory'} || $factory;
145              
146             # values of these object fields will be accessed
147             # by child classes through the appropriate protected
148             # getters
149             return bless {
150             '_fac' => $fac,
151             '_handle' => _open_handle(%args),
152             '_proj' => _open_project( $fac, %args ),
153             '_args' => \%args, # for child-specific arguments
154             '_encoding' => $args{'-encoding'},
155             '_handlers' => $args{'-handlers'},
156 114         559 '_flush' => $args{'-flush'},
157             }, $class;
158             }
159              
160             # child classes can override this to specify
161             # that their return value is a single scalar
162             # (e.g. a tree block, as is the case for newick),
163             # instead of an array of blocks
164 29     29   82 sub _return_is_scalar { 0 }
165              
166             # this is called by Bio::Phylo::IO::parse, and
167             # in turn it calls the _parse method of whatever
168             # the concrete child instance is.
169             sub _process {
170 113     113   269 my $self = shift;
171 113 100       442 if ( $self->_return_is_scalar ) {
172 84         288 my $result = $self->_parse;
173 84 100       455 if ( my $p = $self->_project ) {
174 10 50       43 if ( my $meta = $self->_project_meta ) {
175 0         0 $p->add_meta($_) for @{ $meta };
  0         0  
176             }
177 10         66 return $p->insert($result);
178             }
179             else {
180 74         849 return $result;
181             }
182             }
183             else {
184 29         112 my @result = $self->_parse;
185 27 100       192 if ( my $p = $self->_project ) {
186 14 50       79 if ( my $meta = $self->_project_meta ) {
187 0         0 $p->add_meta($_) for @{ $meta };
  0         0  
188             }
189 14         77 return $p->insert(@result);
190             }
191             else {
192 13         173 return [@result];
193             }
194             }
195             }
196              
197             # once this is called, the handle will have read to
198             # the end of the stream, so it needs to be rewound
199             # if we want to read from the top
200             sub _string {
201 0     0   0 my $self = shift;
202 0         0 my $handle = $self->_handle;
203 0         0 my $string = do { local $/; <$handle> };
  0         0  
  0         0  
204 0         0 return $string;
205             }
206       24     sub _project_meta {};
207 159199     159199   520895 sub _logger { $logger }
208 111     111   464 sub _project { shift->{'_proj'} }
209 113     113   514 sub _handle { shift->{'_handle'} }
210 5355     5355   15960 sub _factory { shift->{'_fac'} }
211 18984     18984   37698 sub _args { shift->{'_args'} }
212 0     0   0 sub _encoding { shift->{'_encoding'} }
213 586     586   3127 sub _flush { shift->{'_flush'} }
214             sub _handlers {
215 584     584   1088 my ( $self, $type ) = @_;
216 584 50       1699 if ( my $h = $self->{'_handlers'} ) {
217 584 50       1798 return defined $type ? $h->{$type} : $h;
218             }
219             }
220              
221             sub _guess_format {
222 2     2   4 my $handle = shift;
223 2         62 my $line = $handle->getline;
224 2         47 my $format;
225 2 100       22 if ( $line =~ /^#nexus/i ) {
    50          
    50          
    50          
    50          
    50          
    50          
    50          
226 1         3 $format = 'nexus';
227             }
228             elsif ( $line =~ /^<[^>]*nexml/ ) {
229 0         0 $format = 'nexml';
230             }
231             elsif ( $line =~ /^<[^>]*phyloxml/ ) {
232 0         0 $format = 'phyloxml';
233             }
234             elsif ( $line =~ /^\s*\d+\s+\d+\s*$/ ) {
235 0         0 $format = 'phylip';
236             }
237             elsif ( $line =~ /^>/ ) {
238 0         0 $format = 'fasta';
239             }
240             elsif ( $line =~ /^\@/ ) {
241 0         0 $format = 'fastq';
242             }
243             elsif ( $line =~ /^\s*\(/ ) {
244 0         0 $format = 'newick';
245 0 0       0 if ( $line =~ /{/ ) {
246 0         0 $format = 'figtree';
247             }
248             }
249             elsif ( $line =~ /<\? xml/ ) {
250 0         0 $line = $handle;
251 0 0       0 if ( $line =~ /^<[^>]*nexml/ ) {
    0          
252 0         0 $format = 'nexml';
253             }
254             elsif ( $line =~ /^<[^>]*phyloxml/ ) {
255 0         0 $format = 'phyloxml';
256             }
257             }
258 2         6 seek( $handle, 0, 0 );
259 2         7 return $format;
260             }
261              
262             # podinherit_insert_token
263              
264             =head1 SEE ALSO
265              
266             There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo>
267             for any user or developer questions and discussions.
268              
269             =over
270              
271             =item L<Bio::Phylo::IO>
272              
273             The parsers are called by the L<Bio::Phylo::IO> object.
274             Look there for examples.
275              
276             =item L<Bio::Phylo::Manual>
277              
278             Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>.
279              
280             =back
281              
282             =head1 CITATION
283              
284             If you use Bio::Phylo in published research, please cite it:
285              
286             B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
287             and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
288             I<BMC Bioinformatics> B<12>:63.
289             L<http://dx.doi.org/10.1186/1471-2105-12-63>
290              
291             =cut
292              
293             1;