File Coverage

blib/lib/Bio/Phylo/Parsers/Abstract.pm
Criterion Covered Total %
statement 81 123 65.8
branch 33 60 55.0
condition 3 12 25.0
subroutine 24 27 88.8
pod n/a
total 141 222 63.5


line stmt bran cond sub pod time code
1             package Bio::Phylo::Parsers::Abstract;
2 33     33   225 use strict;
  33         68  
  33         907  
3 33     33   161 use base 'Bio::Phylo::IO';
  33         63  
  33         3172  
4 33     33   227 use IO::Handle;
  33         74  
  33         1310  
5 33     33   183 use Bio::Phylo::Util::Exceptions 'throw';
  33         74  
  33         1374  
6 33     33   186 use Bio::Phylo::Util::CONSTANT '/looks_like/';
  33         69  
  33         5480  
7 33     33   211 use Bio::Phylo::Util::Logger ':simple';
  33         59  
  33         3501  
8 33     33   8115 use Bio::Phylo::Factory;
  33         78  
  33         188  
9              
10             =head1 NAME
11              
12             Bio::Phylo::Parsers::Abstract - Superclass for parsers used by Bio::Phylo::IO
13              
14             =head1 DESCRIPTION
15              
16             This package is subclassed by all other packages within Bio::Phylo::Parsers::.*.
17             There is no direct usage.
18              
19             =cut
20              
21             my $factory = Bio::Phylo::Factory->new;
22             my $logger = Bio::Phylo::Util::Logger->new;
23              
24             # argument is a file name, which we open
25             sub _open_file {
26 1     1   2 my $file_name = shift;
27 1   50     8 my $encoding = shift || '';
28 1 50       73 open my $handle, "<${encoding}", $file_name or throw 'FileError' => $!;
29 0         0 return $handle;
30             }
31              
32             # argument is a string, which, at perl version >5.8,
33             # we can treat as a handle by opening it by reference
34             sub _open_string {
35 108     108   222 my $string_value = shift;
36 108   50     552 my $encoding = shift || '';
37 30 50   30   191 open my $handle, "<${encoding}", \$string_value or throw 'FileError' => $!;
  30         57  
  30         189  
  108         2527  
38 108         15736 return $handle;
39             }
40              
41             # argument is a url,
42             sub _open_url {
43 0     0   0 my $url = shift;
44 0   0     0 my $encoding = shift || '';
45 0         0 my $handle;
46              
47             # we need to use LWP::UserAgent to fetch the resource, but
48             # we don't "use" it at the top of the module because that
49             # would make it a required dependency
50 0         0 eval { require LWP::UserAgent };
  0         0  
51 0 0       0 if ($@) {
52 0         0 throw 'ExtensionError' =>
53             "Providing a -url argument requires\nsuccesful loading "
54             . "of LWP::UserAgent.\nHowever, there was an error when "
55             . "I\ntried that:\n"
56             . $@;
57             }
58              
59             # apparently it's installed, so let's instantiate a client
60 0         0 my $ua = LWP::UserAgent->new;
61 0         0 $ua->timeout(10);
62 0         0 $ua->env_proxy;
63              
64             # fetch the resource, get an HTTP::Response object
65 0         0 my $response = $ua->get($url);
66              
67             # i.e. 200, or 304 (unchanged cache)
68 0 0 0     0 if ( $response->is_success or $response->status_line =~ /304/ ) {
69              
70             # content is a string, so we create a handle in the same way
71             # as when the argument was a string
72 0         0 $handle = _open_string( $response->content, $encoding );
73             }
74             else {
75 0         0 throw 'NetworkError' => $response->status_line;
76             }
77 0         0 return $handle;
78             }
79              
80             # deal with all possible data sources, return
81             # a handle to whatever it is or throw an exception
82             sub _open_handle {
83 116     116   357 my %args = @_;
84 116         209 my $handle;
85 116 100       658 if ( $args{'-handle'} ) {
    100          
    50          
    0          
86 7         36 binmode $args{'-handle'}, ":utf8";
87 7         14 $handle = $args{'-handle'};
88             }
89             elsif ( $args{'-file'} ) {
90 1         6 $handle = _open_file( $args{'-file'}, $args{'-encoding'} );
91             }
92             elsif ( $args{'-string'} ) {
93 108         491 $handle = _open_string( $args{'-string'}, $args{'-encoding'} );
94             }
95             elsif ( $args{'-url'} ) {
96 0         0 $handle = _open_url( $args{'-url'}, $args{'-encoding'} );
97             }
98             else {
99 0         0 throw 'BadArgs' => 'No data source provided!';
100             }
101            
102             # check to see if the data source contains anything
103             #if ( eof $handle ) {
104             # throw 'NoData' => "Source is empty!";
105             #}
106 115         554 return $handle;
107             }
108              
109             # open a Bio::Phylo::Project if asked (if the -as_project flag
110             # was provided.) If the user has supplied one (the -project flag)
111             # simply return that or undefined otherwise.
112             sub _open_project {
113 113     113   483 my ( $fac, %args ) = @_;
114 113 50       457 if ( $args{'-project'} ) {
    100          
115 0         0 return $args{'-project'};
116             }
117             elsif ( $args{'-as_project'} ) {
118 24         194 return $fac->create_project;
119             }
120             else {
121 89         1002 return undef;
122             }
123             }
124              
125             # this constructor is called by the Bio::Phylo::IO::parse
126             # subroutine
127             sub _new {
128 116     116   255 my $class = shift;
129 116         342 my %args = looks_like_hash @_;
130            
131             # we need to guess the format
132 116 100       405 if ( $class eq __PACKAGE__ ) {
133 2 100       7 if ( my $format = _guess_format(_open_handle(%args)) ) {
134 1         3 $class = 'Bio::Phylo::Parsers::' . ucfirst($format);
135 1         3 return looks_like_class($class)->_new(%args);
136             }
137             else {
138 1         3 throw 'BadArgs' => "No format specified and unable to guess!";
139             }
140             }
141              
142             # factory is either user supplied or a private static
143 114   33     813 my $fac = $args{'-factory'} || $factory;
144              
145             # values of these object fields will be accessed
146             # by child classes through the appropriate protected
147             # getters
148             return bless {
149             '_fac' => $fac,
150             '_handle' => _open_handle(%args),
151             '_proj' => _open_project( $fac, %args ),
152             '_args' => \%args, # for child-specific arguments
153             '_encoding' => $args{'-encoding'},
154             '_handlers' => $args{'-handlers'},
155 114         542 '_flush' => $args{'-flush'},
156             }, $class;
157             }
158              
159             # child classes can override this to specify
160             # that their return value is a single scalar
161             # (e.g. a tree block, as is the case for newick),
162             # instead of an array of blocks
163 29     29   79 sub _return_is_scalar { 0 }
164              
165             # this is called by Bio::Phylo::IO::parse, and
166             # in turn it calls the _parse method of whatever
167             # the concrete child instance is.
168             sub _process {
169 113     113   252 my $self = shift;
170 113 100       409 if ( $self->_return_is_scalar ) {
171 84         269 my $result = $self->_parse;
172 84 100       452 if ( my $p = $self->_project ) {
173 10 50       52 if ( my $meta = $self->_project_meta ) {
174 0         0 $p->add_meta($_) for @{ $meta };
  0         0  
175             }
176 10         48 return $p->insert($result);
177             }
178             else {
179 74         780 return $result;
180             }
181             }
182             else {
183 29         106 my @result = $self->_parse;
184 27 100       192 if ( my $p = $self->_project ) {
185 14 50       70 if ( my $meta = $self->_project_meta ) {
186 0         0 $p->add_meta($_) for @{ $meta };
  0         0  
187             }
188 14         68 return $p->insert(@result);
189             }
190             else {
191 13         142 return [@result];
192             }
193             }
194             }
195              
196             # once this is called, the handle will have read to
197             # the end of the stream, so it needs to be rewound
198             # if we want to read from the top
199             sub _string {
200 0     0   0 my $self = shift;
201 0         0 my $handle = $self->_handle;
202 0         0 my $string = do { local $/; <$handle> };
  0         0  
  0         0  
203 0         0 return $string;
204             }
205       24     sub _project_meta {};
206 140686     140686   420861 sub _logger { $logger }
207 111     111   418 sub _project { shift->{'_proj'} }
208 113     113   465 sub _handle { shift->{'_handle'} }
209 4795     4795   18291 sub _factory { shift->{'_fac'} }
210 16464     16464   29876 sub _args { shift->{'_args'} }
211 0     0   0 sub _encoding { shift->{'_encoding'} }
212 586     586   5321 sub _flush { shift->{'_flush'} }
213             sub _handlers {
214 584     584   1967 my ( $self, $type ) = @_;
215 584 50       3649 if ( my $h = $self->{'_handlers'} ) {
216 584 50       2680 return defined $type ? $h->{$type} : $h;
217             }
218             }
219              
220             sub _guess_format {
221 2     2   5 my $handle = shift;
222 2         57 my $line = $handle->getline;
223 2         46 my $format;
224 2 100       23 if ( $line =~ /^#nexus/i ) {
    50          
    50          
    50          
    50          
    50          
    50          
    50          
225 1         3 $format = 'nexus';
226             }
227             elsif ( $line =~ /^<[^>]*nexml/ ) {
228 0         0 $format = 'nexml';
229             }
230             elsif ( $line =~ /^<[^>]*phyloxml/ ) {
231 0         0 $format = 'phyloxml';
232             }
233             elsif ( $line =~ /^\s*\d+\s+\d+\s*$/ ) {
234 0         0 $format = 'phylip';
235             }
236             elsif ( $line =~ /^>/ ) {
237 0         0 $format = 'fasta';
238             }
239             elsif ( $line =~ /^\@/ ) {
240 0         0 $format = 'fastq';
241             }
242             elsif ( $line =~ /^\s*\(/ ) {
243 0         0 $format = 'newick';
244 0 0       0 if ( $line =~ /{/ ) {
245 0         0 $format = 'figtree';
246             }
247             }
248             elsif ( $line =~ /<\? xml/ ) {
249 0         0 $line = $handle;
250 0 0       0 if ( $line =~ /^<[^>]*nexml/ ) {
    0          
251 0         0 $format = 'nexml';
252             }
253             elsif ( $line =~ /^<[^>]*phyloxml/ ) {
254 0         0 $format = 'phyloxml';
255             }
256             }
257 2         5 seek( $handle, 0, 0 );
258 2         7 return $format;
259             }
260              
261             # podinherit_insert_token
262              
263             =head1 SEE ALSO
264              
265             There is a mailing list at L<https://groups.google.com/forum/#!forum/bio-phylo>
266             for any user or developer questions and discussions.
267              
268             =over
269              
270             =item L<Bio::Phylo::IO>
271              
272             The parsers are called by the L<Bio::Phylo::IO> object.
273             Look there for examples.
274              
275             =item L<Bio::Phylo::Manual>
276              
277             Also see the manual: L<Bio::Phylo::Manual> and L<http://rutgervos.blogspot.com>.
278              
279             =back
280              
281             =head1 CITATION
282              
283             If you use Bio::Phylo in published research, please cite it:
284              
285             B<Rutger A Vos>, B<Jason Caravas>, B<Klaas Hartmann>, B<Mark A Jensen>
286             and B<Chase Miller>, 2011. Bio::Phylo - phyloinformatic analysis using Perl.
287             I<BMC Bioinformatics> B<12>:63.
288             L<http://dx.doi.org/10.1186/1471-2105-12-63>
289              
290             =cut
291              
292             1;