File Coverage

Bio/SeqIO/excel.pm
Criterion Covered Total %
statement 49 49 100.0
branch 11 14 78.5
condition 1 2 50.0
subroutine 10 10 100.0
pod 2 2 100.0
total 73 77 94.8


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SeqIO::excel
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Hilmar Lapp
7             #
8              
9             #
10             # (c) Hilmar Lapp, hlapp at gmx.net, 2005.
11             # (c) GNF, Genomics Institute of the Novartis Research Foundation, 2005.
12             #
13             # You may distribute this module under the same terms as perl itself.
14             # Refer to the Perl Artistic License (see the license accompanying this
15             # software package, or see http://www.perl.com/language/misc/Artistic.html)
16             # for the terms under which you may use, modify, and redistribute this module.
17             #
18             # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
19             # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
20             # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
21             #
22              
23             # POD documentation - main docs before the code
24              
25             =head1 NAME
26              
27             Bio::SeqIO::excel - sequence input/output stream from a
28             MSExcel-formatted table
29              
30             =head1 SYNOPSIS
31              
32             #It is probably best not to use this object directly, but
33             #rather go through the SeqIO handler system. Go:
34              
35             $stream = Bio::SeqIO->new(-file => $filename, -format => 'excel');
36              
37             while ( my $seq = $stream->next_seq() ) {
38             # do something with $seq
39             }
40              
41             =head1 DESCRIPTION
42              
43             This class transforms records in a MS Excel workbook file into
44             Bio::Seq objects. It is derived from the table format module and
45             merely defines additional properties and overrides the way to get data
46             from the file and advance to the next record.
47              
48             The module permits specifying which columns hold which type of
49             annotation. The semantics of certain attributes, if present, are
50             pre-defined, e.g., accession number and sequence. Additional
51             attributes may be added to the annotation bundle. See
52             L for a complete list of parameters and
53             capabilities.
54              
55             You may also specify the worksheet from which to obtain the data, and
56             after finishing one worksheet you may change the name to keep reading
57             from another worksheet (in the same file).
58              
59             This module depends on Spreadsheet::ParseExcel to parse the underlying
60             Excel file.
61              
62             =head1 FEEDBACK
63              
64             =head2 Mailing Lists
65              
66             User feedback is an integral part of the evolution of this and other
67             Bioperl modules. Send your comments and suggestions preferably to one
68             of the Bioperl mailing lists. Your participation is much appreciated.
69              
70             bioperl-l@bioperl.org - General discussion
71             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
72              
73             =head2 Support
74              
75             Please direct usage questions or support issues to the mailing list:
76              
77             I
78              
79             rather than to the module maintainer directly. Many experienced and
80             reponsive experts will be able look at the problem and quickly
81             address it. Please include a thorough description of the problem
82             with code and data examples if at all possible.
83              
84             =head2 Reporting Bugs
85              
86             Report bugs to the Bioperl bug tracking system to help us keep track
87             the bugs and their resolution.
88              
89             Bug reports can be submitted via email or the web:
90              
91             https://github.com/bioperl/bioperl-live/issues
92              
93             =head1 AUTHOR - Hilmar Lapp
94              
95             Email hlapp at gmx.net
96              
97             =head1 APPENDIX
98              
99             The rest of the documentation details each of the object
100             methods. Internal methods are usually preceded with a _
101              
102             =cut
103              
104             # Let the code begin...
105              
106             package Bio::SeqIO::excel;
107 2     2   584 use strict;
  2         4  
  2         47  
108              
109 2     2   317 use Bio::SeqIO;
  2         4  
  2         54  
110 2     2   10 use Spreadsheet::ParseExcel;
  2         2  
  2         42  
111             #use Spreadsheet::ParseExcel::Workbook;
112              
113 2     2   7 use base qw(Bio::SeqIO::table);
  2         2  
  2         730  
114              
115             =head2 new
116              
117             Title : new
118             Usage : $stream = Bio::SeqIO->new(-file => $filename, -format => 'excel')
119             Function: Returns a new seqstream
120             Returns : A Bio::SeqIO stream for a MS Excel format
121              
122             Args : Supports the same named parameters as Bio::SeqIO::table,
123             except -delim, which obviously does not apply to a binary
124             format. In addition, the following parameters are supported.
125              
126             -worksheet the name of the worksheet holding the table;
127             if unspecified the first worksheet will be
128             used
129              
130              
131             =cut
132              
133             sub _initialize {
134 2     2   8 my($self,@args) = @_;
135              
136             # chained initialization
137 2         8 $self->SUPER::_initialize(@args);
138              
139             # our own parameters
140 2         8 my ($worksheet) = $self->_rearrange([qw(WORKSHEET)], @args);
141              
142             # store options and apply defaults
143 2   50     12 $self->worksheet($worksheet || 0);
144              
145             }
146              
147             =head2 worksheet
148              
149             Title : worksheet
150             Usage : $obj->worksheet($newval)
151             Function: Get/set the name of the worksheet holding the table. The
152             worksheet name may also be a numeric index.
153              
154             You may change the value during parsing at any time in
155             order to start reading from a different worksheet (in the
156             same file).
157              
158             Example :
159             Returns : value of worksheet (a scalar)
160             Args : on set, new value (a scalar or undef, optional)
161              
162              
163             =cut
164              
165             sub worksheet{
166 3     3 1 4 my $self = shift;
167              
168 3 100       7 if (@_) {
169 2         3 my $sheetname = shift;
170             # on set we reset the parser here in order to allow reading
171             # from multiple worksheets in a row
172 2 50       8 $self->_worksheet(undef) if defined($sheetname);
173 2         6 return $self->{'worksheet'} = $sheetname;
174             }
175 1         4 return $self->{'worksheet'};
176             }
177              
178             =head2 close
179              
180             Title : close
181             Usage :
182             Function: Close and/or release the resources used by this parser instance.
183              
184             We override this here in order to free up the worksheet and
185             other related objects.
186              
187             Example :
188             Returns :
189             Args :
190              
191              
192             =cut
193              
194             sub close{
195 2     2 1 3 my $self = shift;
196              
197 2         6 $self->_worksheet(undef);
198             # make sure we chain to the inherited method
199 2         23 $self->SUPER::close(@_);
200             }
201              
202             =head1 Internal methods
203              
204             All methods with a leading underscore are not meant to be part of the
205             'official' API. They are for use by this module only, consider them
206             private unless you are a developer trying to modify this module.
207              
208             =cut
209              
210             =head2 _worksheet
211              
212             Title : _worksheet
213             Usage : $obj->_worksheet($newval)
214             Function: Get/set the worksheet object to be used for accessing cells.
215             Example :
216             Returns : value of _worksheet (a Spreadsheet::ParseExcel::Worksheet object)
217             Args : on set, new value (a Spreadsheet::ParseExcel::Worksheet
218             object or undef, optional)
219              
220              
221             =cut
222              
223             sub _worksheet{
224 28     28   20 my $self = shift;
225              
226 28 100       47 return $self->{'_worksheet'} = shift if @_;
227 23         21 return $self->{'_worksheet'};
228             }
229              
230             =head2 _next_record
231              
232             Title : _next_record
233             Usage :
234             Function: Navigates the underlying file to the next record.
235              
236             We override this here in order to adapt navigation to data
237             in an Excel worksheet.
238              
239             Example :
240             Returns : TRUE if the navigation was successful and FALSE
241             otherwise. Unsuccessful navigation will usually be treated
242             as an end-of-file condition.
243             Args :
244              
245              
246             =cut
247              
248             sub _next_record{
249 12     12   13 my $self = shift;
250              
251 12         15 my $wsheet = $self->_worksheet();
252 12 100       19 if (! defined($wsheet)) {
253             # worksheet hasn't been initialized yet, do so now
254 1         2 my $wbook = Spreadsheet::ParseExcel::Workbook->Parse($self->_fh);
255 1         14948 $wsheet = $wbook->Worksheet($self->worksheet);
256             # store the result
257 1         12 $self->_worksheet($wsheet);
258             # re-initialize the current row
259 1         1 $self->{'_row'} = -1;
260             }
261              
262             # we need a valid worksheet to continue
263 12 50       19 return unless defined($wsheet);
264              
265             # check whether we are at or beyond the last defined row
266 12         27 my ($minrow, $maxrow) = $wsheet->RowRange();
267 12 100       67 return if $self->{'_row'} >= $maxrow;
268              
269             # we don't check for empty rows here as in order to do that we'd
270             # have to know in which column to look
271             # so, just advance to the next row
272 11         11 $self->{'_row'}++;
273              
274             # done
275 11         11 return 1;
276             }
277              
278             =head2 _get_row_values
279              
280             Title : _get_row_values
281             Usage :
282             Function: Get the values for the current line (or row) as an array in
283             the order of columns.
284              
285             We override this here in order to adapt access to column
286             values to data contained in an Excel worksheet.
287              
288             Example :
289             Returns : An array of column values for the current row.
290             Args :
291              
292              
293             =cut
294              
295             sub _get_row_values{
296 11     11   10 my $self = shift;
297              
298             # obtain the range of columns - we use all that are defined
299 11         13 my $wsheet = $self->_worksheet();
300 11         19 my ($colmin,$colmax) = $wsheet->ColRange();
301              
302             # build the array of columns for the current row
303 11         46 my @cols = ();
304 11         9 my $row = $self->{'_row'};
305 11         17 for (my $i = $colmin; $i <= $colmax; $i++) {
306 110         335 my $cell = $wsheet->Cell($row, $i);
307 110 50       626 push(@cols, defined($cell) ? $cell->Value : $cell);
308             }
309             # done
310 11         63 return @cols;
311             }
312              
313             1;