File Coverage

Bio/SeqIO/table.pm
Criterion Covered Total %
statement 194 200 97.0
branch 89 102 87.2
condition 34 53 64.1
subroutine 22 23 95.6
pod 10 10 100.0
total 349 388 89.9


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::SeqIO::table
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Hilmar Lapp
7             #
8             # You may distribute this module under the same terms as perl itself.
9             # Refer to the Perl Artistic License (see the license accompanying this
10             # software package, or see http://www.perl.com/language/misc/Artistic.html)
11             # for the terms under which you may use, modify, and redistribute this module.
12             #
13             # THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR IMPLIED
14             # WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
15             # MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
16             #
17              
18             # POD documentation - main docs before the code
19              
20             =head1 NAME
21              
22             Bio::SeqIO::table - sequence input/output stream from a delimited table
23              
24             =head1 SYNOPSIS
25              
26             # Do not to use this object directly, use Bio::SeqIO, for example:
27              
28             $in = Bio::SeqIO->new(-file => $filename, -format => 'table');
29              
30             while ( my $seq = $in->next_seq() ) {
31             # do something with $seq
32             }
33              
34             =head1 DESCRIPTION
35              
36             This class transforms records in a table-formatted text file into
37             Bio::Seq objects.
38              
39             A table-formatted text file of sequence records for the purposes of
40             this module is defined as a text file with each row corresponding to a
41             sequence, and the attributes of the sequence being in different
42             columns. Columns are delimited by a common delimiter, for instance tab
43             or comma.
44              
45             The module permits specifying which columns hold which type of
46             annotation. The semantics of certain attributes, if present, are
47             pre-defined, e.g., accession number and sequence. Additional
48             attributes may be added to the annotation bundle.
49              
50             =head1 FEEDBACK
51              
52             =head2 Mailing Lists
53              
54             User feedback is an integral part of the evolution of this and other
55             Bioperl modules. Send your comments and suggestions preferably to one
56             of the Bioperl mailing lists. Your participation is much appreciated.
57              
58             bioperl-l@bioperl.org - General discussion
59             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
60              
61             =head2 Support
62              
63             Please direct usage questions or support issues to the mailing list:
64              
65             I
66              
67             rather than to the module maintainer directly. Many experienced and
68             reponsive experts will be able look at the problem and quickly
69             address it. Please include a thorough description of the problem
70             with code and data examples if at all possible.
71              
72             =head2 Reporting Bugs
73              
74             Report bugs to the Bioperl bug tracking system to help us keep track
75             the bugs and their resolution.
76              
77             Bug reports can be submitted via email or the web:
78              
79             https://github.com/bioperl/bioperl-live/issues
80              
81             =head1 AUTHOR - Hilmar Lapp
82              
83             Email hlapp at gmx.net
84              
85             =head1 APPENDIX
86              
87             The rest of the documentation details each of the object
88             methods. Internal methods are usually preceded with a _
89              
90             =cut
91              
92             # Let the code begin...
93              
94             package Bio::SeqIO::table;
95 2     2   885 use strict;
  2         3  
  2         53  
96              
97 2     2   491 use Bio::Species;
  2         5  
  2         56  
98 2     2   496 use Bio::Seq::SeqFactory;
  2         4  
  2         47  
99 2     2   511 use Bio::Annotation::Collection;
  2         4  
  2         49  
100 2     2   12 use Bio::Annotation::SimpleValue;
  2         3  
  2         47  
101              
102 2     2   9 use base qw(Bio::SeqIO);
  2         3  
  2         2256  
103              
104             =head2 new
105              
106             Title : new
107             Usage : $stream = Bio::SeqIO->new(-file => $filename, -format => 'table')
108             Function: Returns a new seqstream
109             Returns : A Bio::SeqIO stream for a table format
110             Args : Named parameters:
111              
112             -file Name of file to read
113             -fh Filehandle to attach to
114             -comment Leading character(s) introducing a comment line
115             -header the number of header lines to skip; the first
116             non-comment header line will be used to obtain
117             column names; column names will be used as the
118             default tags for attaching annotation.
119             -delim The delimiter for columns as a regular expression;
120             consecutive occurrences of the delimiter will
121             not be collapsed.
122             -display_id The one-based index of the column containing
123             the display ID of the sequence
124             -accession_number The one-based index of the column
125             containing the accession number of the sequence
126             -seq The one-based index of the column containing
127             the sequence string of the sequence
128             -desc The one-based index of the column containing
129             the description of the sequence
130             -species The one-based index of the column containing the
131             species for the sequence record; if not a
132             number, will be used as the static species
133             common to all records
134             -annotation If provided and a scalar (but see below), a
135             flag whether or not all additional columns are
136             to be preserved as annotation, the tags used
137             will either be 'colX' if there is no column
138             header and where X is the one-based column
139             index, and otherwise the column headers will be
140             used as tags;
141              
142             If a reference to an array, or a square
143             bracket-enclosed string of comma-delimited
144             values, only those columns (one-based index)
145             will be preserved as annotation, tags as before;
146              
147             If a reference to a hash, or a curly
148             braces-enclosed string of comma-delimited key
149             and value pairs in alternating order, the keys
150             are one-based column indexes to be preserved,
151             and the values are the tags under which the
152             annotation is to be attached; if not provided or
153             supplied as undef, no additional annotation will
154             be preserved.
155             -colnames A reference to an array of column labels, or a
156             string of comma-delimited labels, denoting the
157             columns to be converted into annotation; this is
158             an alternative to -annotation and will be
159             ignored if -annotation is also supplied with a
160             valid value.
161             -trim Flag determining whether or not all values should
162             be trimmed of leading and trailing white space
163             and double quotes
164              
165             Additional arguments may be used to e.g. set factories and
166             builders involved in the sequence object creation (see the
167             POD of Bio::SeqIO).
168              
169             =cut
170              
171             sub _initialize {
172 7     7   25 my($self,@args) = @_;
173              
174             # chained initialization
175 7         31 $self->SUPER::_initialize(@args);
176              
177             # our own parameters
178 7         42 my ($cmtchars,
179             $header,
180             $delim,
181             $display_id,
182             $desc,
183             $accnr,
184             $seq,
185             $taxon,
186             $useann,
187             $colnames,
188             $trim) =
189             $self->_rearrange([qw(COMMENT
190             HEADER
191             DELIM
192             DISPLAY_ID
193             DESC
194             ACCESSION_NUMBER
195             SEQ
196             SPECIES
197             ANNOTATION
198             COLNAMES
199             TRIM)
200             ], @args);
201              
202             # store options and apply defaults
203 7 50 33     32 $self->comment_char(defined($cmtchars) ? $cmtchars : "#")
    50          
204             if (!defined($self->comment_char)) || defined($cmtchars);
205 7 100 33     23 $self->delimiter(defined($delim) ? $delim : "\t")
    50          
206             if (!defined($self->delimiter)) || defined($delim);
207 7 100       23 $self->header($header) if defined($header);
208 7 100       24 $self->trim_values($trim) if defined($trim);
209              
210             # attribute columns
211 7         12 my $attrs = {};
212 7 100       20 $attrs->{-display_id} = $display_id if defined($display_id);
213 7 100       21 $attrs->{-accession_number} = $accnr if defined($accnr);
214 7 100       22 $attrs->{-seq} = $seq if defined($seq);
215 7 100       17 $attrs->{-desc} = $desc if defined($desc);
216 7 100       18 if (defined($taxon)) {
217 4 50 33     23 if (ref($taxon) || ($taxon =~ /^\d+$/)) {
218             # either a static object, or a column reference
219 0         0 $attrs->{-species} = $taxon;
220             } else {
221             # static species as a string
222 4         36 $attrs->{-species} = Bio::Species->new(
223             -classification => [reverse(split(' ',$taxon))]);
224             }
225             }
226 7         33 $self->attribute_map($attrs);
227              
228             # annotation columns, if any
229 7 100 66     35 if ($useann && !ref($useann)) {
230             # it's a scalar; check whether this is in fact an array or
231             # hash as a string rather than just a flag
232 3 100       15 if ($useann =~ /^\[(.*)\]$/) {
    50          
233 1         8 $useann = [split(/[,;]/,$1)];
234             } elsif ($useann =~ /^{(.*)}$/) {
235 0         0 $useann = {split(/[,;]/,$1)};
236             } # else it is probably indeed just a flag
237             }
238 7 100       17 if (ref($useann)) {
239 1         1 my $ann_map;
240 1 50       6 if (ref($useann) eq "ARRAY") {
241 1         3 my $has_header = ($self->header > 0);
242 1         2 $ann_map = {};
243 1         2 foreach my $i (@$useann) {
244 5 50       10 $ann_map->{$i} = $has_header ? undef : "col$i";
245             }
246             } else {
247             # no special handling necessary
248 0         0 $ann_map = $useann;
249             }
250 1         3 $self->annotation_map($ann_map);
251             } else {
252 6   100     35 $self->keep_annotation($useann || $colnames);
253             # annotation columns, if any
254 6 100 66     18 if ($colnames && !ref($colnames)) {
255             # an array as a string
256 1         7 $colnames =~ s/^\[(.*)\]$/$1/;
257 1         6 $colnames = [split(/[,;]/,$colnames)];
258             }
259 6 100       18 $self->annotation_columns($colnames) if ref($colnames);
260             }
261              
262             # make sure we have a factory defined
263 7 50       39 if(!defined($self->sequence_factory)) {
264 7         25 $self->sequence_factory(
265             Bio::Seq::SeqFactory->new(-verbose => $self->verbose(),
266             -type => 'Bio::Seq::RichSeq'));
267             }
268             }
269              
270             =head2 next_seq
271              
272             Title : next_seq
273             Usage : $seq = $stream->next_seq()
274             Function: returns the next sequence in the stream
275             Returns : Bio::Seq::RichSeq object
276             Args :
277              
278             =cut
279              
280             sub next_seq {
281 46     46 1 95 my $self = shift;
282              
283             # skip until not a comment and not an empty line
284 46         150 my $line_ok = $self->_next_record();
285              
286             # if there is a header but we haven't read past it yet then do so now
287 46 100 100     194 if ($line_ok && (! $self->_header_skipped) && $self->header) {
      66        
288 5         16 $line_ok = $self->_parse_header();
289 5         13 $self->_header_skipped(1);
290             }
291              
292             # return if we reached end-of-file
293 46 100       104 return unless $line_ok;
294              
295             # otherwise, parse the record
296              
297             # split into columns
298 42         105 my @cols = $self->_get_row_values();
299             # trim leading and trailing whitespace and quotes if desired
300 42 100       103 if ($self->trim_values) {
301 40         102 for(my $i = 0; $i < scalar(@cols); $i++) {
302 400 100       513 if ($cols[$i]) {
303             # trim off whitespace
304 380         505 $cols[$i] =~ s/^\s+//;
305 380         884 $cols[$i] =~ s/\s+$//;
306             # trim off double quotes
307 380         348 $cols[$i] =~ s/^"//;
308 380         651 $cols[$i] =~ s/"$//;
309             }
310             }
311             }
312              
313             # assign values for columns in the attribute map
314 42         103 my $attrmap = $self->_attribute_map;
315 42         73 my %params = ();
316 42         133 foreach my $attr (keys %$attrmap) {
317 168 100 66     650 if ((!ref($attrmap->{$attr})) && ($attrmap->{$attr} =~ /^\d+$/)) {
318             # this is a column index, add to instantiation parameters
319 128         258 $params{$attr} = $cols[$attrmap->{$attr}];
320             } else {
321             # not a column index; we assume it's a static value
322 40         72 $params{$attr} = $attrmap->{$attr};
323             }
324             }
325              
326             # add annotation columns to the annotation bundle
327 42         112 my $annmap = $self->_annotation_map;
328 42 100 66     146 if ($annmap && %$annmap) {
329 40         164 my $anncoll = Bio::Annotation::Collection->new();
330 40         143 foreach my $col (keys %$annmap) {
331 240 100       490 next unless $cols[$col]; # skip empty columns!
332             $anncoll->add_Annotation(
333             Bio::Annotation::SimpleValue->new(-value => $cols[$col],
334 212         563 -tagname=> $annmap->{$col}));
335             }
336 40         116 $params{'-annotation'} = $anncoll;
337             }
338              
339             # ask the object builder to add the slots that we've gathered
340 42         164 my $builder = $self->sequence_builder();
341 42         236 $builder->add_slot_value(%params);
342             # and instantiate the object
343 42         115 my $seq = $builder->make_object();
344              
345             # done!
346 42         170 return $seq;
347             }
348              
349             =head2 comment_char
350              
351             Title : comment_char
352             Usage : $obj->comment_char($newval)
353             Function: Get/set the leading character(s) designating a line as
354             a comment-line.
355             Example :
356             Returns : value of comment_char (a scalar)
357             Args : on set, new value (a scalar or undef, optional)
358              
359              
360             =cut
361              
362             sub comment_char{
363 53     53 1 81 my $self = shift;
364              
365 53 100       123 return $self->{'comment_char'} = shift if @_;
366 46         130 return $self->{'comment_char'};
367             }
368              
369             =head2 header
370              
371             Title : header
372             Usage : $obj->header($newval)
373             Function: Get/set the number of header lines to skip before the
374             rows containing actual sequence records.
375              
376             If set to zero or undef, means that there is no header and
377             therefore also no column headers.
378              
379             Example :
380             Returns : value of header (a scalar)
381             Args : on set, new value (a scalar or undef, optional)
382              
383              
384             =cut
385              
386             sub header{
387 16     16 1 22 my $self = shift;
388              
389 16 100       33 return $self->{'header'} = shift if @_;
390 11         25 return $self->{'header'};
391             }
392              
393             =head2 delimiter
394              
395             Title : delimiter
396             Usage : $obj->delimiter($newval)
397             Function: Get/set the column delimiter. This will in fact be
398             treated as a regular expression. Consecutive occurrences
399             will not be collapsed to a single one.
400              
401             Example :
402             Returns : value of delimiter (a scalar)
403             Args : on set, new value (a scalar or undef, optional)
404              
405              
406             =cut
407              
408             sub delimiter{
409 49     49 1 57 my $self = shift;
410              
411 49 100       115 return $self->{'delimiter'} = shift if @_;
412 42         118 return $self->{'delimiter'};
413             }
414              
415             =head2 attribute_map
416              
417             Title : attribute_map
418             Usage : $obj->attribute_map($newval)
419             Function: Get/set the map of sequence object initialization
420             attributes (keys) to one-based column index.
421              
422             Attributes will usually need to be prefixed by a dash, just
423             as if they were passed to the new() method of the sequence
424             class.
425              
426             Example :
427             Returns : value of attribute_map (a reference to a hash)
428             Args : on set, new value (a reference to a hash or undef, optional)
429              
430              
431             =cut
432              
433             sub attribute_map{
434 9     9 1 12 my $self = shift;
435              
436             # internally we store zero-based maps - so we need to convert back
437             # and forth here
438 9 100       20 if (@_) {
439 7         10 my $arg = shift;
440             # allow for and protect against undef
441 7 50       17 return delete $self->{'_attribute_map'} unless defined($arg);
442             # copy to avoid side-effects
443 7         25 my $attr_map = {%$arg};
444 7         50 foreach my $key (keys %$attr_map) {
445 20 100 66     82 if ((!ref($attr_map->{$key})) && ($attr_map->{$key} =~ /^\d+$/)) {
446 16         28 $attr_map->{$key}--;
447             }
448             }
449 7         18 $self->{'_attribute_map'} = $attr_map;
450             }
451             # there may not be a map
452 9 50       25 return unless exists($self->{'_attribute_map'});
453             # we need to copy in order not to override the stored map!
454 9         15 my %attr_map = %{$self->{'_attribute_map'}};
  9         36  
455 9         25 foreach my $key (keys %attr_map) {
456 28 100 66     137 if ((!ref($attr_map{$key})) && ($attr_map{$key} =~ /^\d+$/)) {
457 22         63 $attr_map{$key}++;
458             }
459             }
460 9         25 return \%attr_map;
461             }
462              
463             =head2 annotation_map
464              
465             Title : annotation_map
466             Usage : $obj->annotation_map($newval)
467             Function: Get/set the mapping between one-based column indexes
468             (keys) and annotation tags (values).
469              
470             Note that the map returned by this method may change after
471             the first next_seq() call if the file contains a column
472             header and no annotation keys have been predefined in the
473             map, because upon reading the column header line the tag
474             names will be set automatically.
475              
476             Note also that the map may reference columns that are used
477             as well in the sequence attribute map.
478              
479             Example :
480             Returns : value of annotation_map (a reference to a hash)
481             Args : on set, new value (a reference to a hash or undef, optional)
482              
483              
484             =cut
485              
486             sub annotation_map{
487 9     9 1 16 my $self = shift;
488              
489             # internally we store zero-based maps - so we need to convert back
490             # and forth here
491 9 100       17 if (@_) {
492 5         6 my $arg = shift;
493             # allow for and protect against undef
494 5 50       13 return delete $self->{'_annotation_map'} unless defined($arg);
495             # copy to avoid side-effects
496 5         20 my $ann_map = {%$arg};
497             # make sure we sort the keys numerically or otherwise we may
498             # clobber a key with a higher index
499 5         33 foreach my $key (sort { $a <=> $b } keys(%$ann_map)) {
  51         60  
500 29         44 $ann_map->{$key-1} = $ann_map->{$key};
501 29         34 delete $ann_map->{$key};
502             }
503 5         10 $self->{'_annotation_map'} = $ann_map;
504             # also make a note that we want to keep annotation
505 5         9 $self->keep_annotation(1);
506             }
507             # there may not be a map
508 9 100       27 return unless exists($self->{'_annotation_map'});
509             # we need to copy in order not to override the stored map!
510 6         9 my %ann_map = %{$self->{'_annotation_map'}};
  6         21  
511             # here we need to sort numerically in reverse order ...
512 6         23 foreach my $key (sort { $b <=> $a } keys(%ann_map)) {
  60         59  
513 34         46 $ann_map{$key+1} = $ann_map{$key};
514 34         34 delete $ann_map{$key};
515             }
516 6         21 return \%ann_map;
517             }
518              
519             =head2 keep_annotation
520              
521             Title : keep_annotation
522             Usage : $obj->keep_annotation($newval)
523             Function: Get/set flag whether or not to keep values from
524             additional columns as annotation.
525              
526             Additional columns are all those columns in the input file
527             that aren't referenced in the attribute map.
528              
529             Example :
530             Returns : value of keep_annotation (a scalar)
531             Args : on set, new value (a scalar or undef, optional)
532              
533              
534             =cut
535              
536             sub keep_annotation{
537 16     16 1 24 my $self = shift;
538              
539 16 100       47 return $self->{'keep_annotation'} = shift if @_;
540 5         13 return $self->{'keep_annotation'};
541             }
542              
543             =head2 annotation_columns
544              
545             Title : annotation_columns
546             Usage : $obj->annotation_columns($newval)
547             Function: Get/set the names (labels) of the columns to be used for
548             annotation.
549              
550             This is an alternative to using annotation_map. In order to
551             have any effect, it must be set before the first call of
552             next_seq(), and obviously there must be a header line (or
553             row) too giving the column labels.
554              
555             Example :
556             Returns : value of annotation_columns (a reference to an array)
557             Args : on set, new value (a reference to an array of undef, optional)
558              
559              
560             =cut
561              
562             sub annotation_columns{
563 4     4 1 7 my $self = shift;
564              
565 4 100       11 return $self->{'annotation_columns'} = shift if @_;
566 3         9 return $self->{'annotation_columns'};
567             }
568              
569             =head2 trim_values
570              
571             Title : trim_values
572             Usage : $obj->trim_values($newval)
573             Function: Get/set whether or not to trim leading and trailing
574             whitespace off all column values.
575             Example :
576             Returns : value of trim_values (a scalar)
577             Args : on set, new value (a scalar or undef, optional)
578              
579              
580             =cut
581              
582             sub trim_values{
583 50     50 1 76 my $self = shift;
584              
585 50 100       133 return $self->{'trim_values'} = shift if @_;
586 46         121 return $self->{'trim_values'};
587             }
588              
589             =head2 write_seq
590              
591             Title: write_seq
592             Usage: write_seq() is not implemented for table format output.
593              
594             =cut
595              
596             sub write_seq {
597 0     0 1 0 shift->throw("write_seq() not implemented for 'table' format");
598             }
599              
600             =head1 Internal methods
601              
602             All methods with a leading underscore are not meant to be part of the
603             'official' API. They are for use by this module only, consider them
604             private unless you are a developer trying to modify this module.
605              
606             =cut
607              
608             =head2 _attribute_map
609              
610             Title : _attribute_map
611             Usage : $obj->_attribute_map($newval)
612             Function: Get only. Same as attribute_map, but zero-based indexes.
613              
614             Note that any changes made to the returned map will change
615             the map used by this instance. You should know what you are
616             doing if you modify the returned value (or if you call this
617             method in the first place).
618              
619             Example :
620             Returns : value of _attribute_map (a reference to a hash)
621             Args : none
622              
623              
624             =cut
625              
626             sub _attribute_map{
627 42     42   62 my $self = shift;
628              
629 42         78 return $self->{'_attribute_map'};
630             }
631              
632             =head2 _annotation_map
633              
634             Title : _annotation_map
635             Usage : $obj->_annotation_map($newval)
636             Function: Get only. Same as annotation_map, but with zero-based indexes.
637              
638             Note that any changes made to the returned map will change
639             the map used by this instance. You should know what you are
640             doing if you modify the returned value (or if you call this
641             method in the first place).
642              
643             Example :
644             Returns : value of _annotation_map (a reference to a hash)
645             Args : none
646              
647              
648             =cut
649              
650             sub _annotation_map{
651 42     42   50 my $self = shift;
652              
653 42         80 return $self->{'_annotation_map'};
654             }
655              
656             =head2 _header_skipped
657              
658             Title : _header_skipped
659             Usage : $obj->_header_skipped($newval)
660             Function: Get/set the flag whether the header was already
661             read (and skipped) or not.
662             Example :
663             Returns : value of _header_skipped (a scalar)
664             Args : on set, new value (a scalar or undef, optional)
665              
666              
667             =cut
668              
669             sub _header_skipped{
670 47     47   71 my $self = shift;
671              
672 47 100       96 return $self->{'_header_skipped'} = shift if @_;
673 42         148 return $self->{'_header_skipped'};
674             }
675              
676             =head2 _next_record
677              
678             Title : _next_record
679             Usage :
680             Function: Navigates the underlying file to the next record.
681              
682             For row-based records in delimited text files, this will
683             skip all empty lines and lines with a leading comment
684             character.
685              
686             This method is here is to serve as a hook for other formats
687             that conceptually also represent tables but aren't
688             formatted as row-based text files.
689              
690             Example :
691             Returns : TRUE if the navigation was successful and FALSE
692             otherwise. Unsuccessful navigation will usually be treated
693             as an end-of-file condition.
694             Args :
695              
696              
697             =cut
698              
699             sub _next_record{
700 39     39   42 my $self = shift;
701              
702 39         75 my $cmtcc = $self->comment_char;
703 39         156 my $line = $self->_readline();
704              
705             # skip until not a comment and not an empty line
706 39   33     473 while (defined($line)
      66        
707             && (($cmtcc && ($line =~ /^\s*$cmtcc/))
708             || ($line =~ /^\s*$/))) {
709 0         0 $line = $self->_readline();
710             }
711              
712 39         113 return $self->{'_line'} = $line;
713             }
714              
715             =head2 _parse_header
716              
717             Title : _parse_header
718             Usage :
719             Function: Parse the table header and navigate past it.
720              
721             This method is called if the number of header rows has been
722             specified equal to or greater than one, and positioned at
723             the first header line (row). By default the first header
724             line (row) is used for setting column names, but additional
725             lines (rows) may be skipped too. Empty lines and comment
726             lines do not count as header lines (rows).
727              
728             This method will call _next_record() to navigate to the
729             next header line (row), if there is more than one header
730             line (row). Upon return, the file is presumed to be
731             positioned at the first record after the header.
732              
733             This method is here is to serve as a hook for other formats
734             that conceptually also represent tables but aren't
735             formatted as row-based text files.
736              
737             Note however that the only methods used to access file
738             content or navigate the position are _get_row_values() and
739             _next_record(), so it should usually suffice to override
740             those.
741              
742             Example :
743             Returns : TRUE if navigation past the header was successful and FALSE
744             otherwise. Unsuccessful navigation will usually be treated
745             as an end-of-file condition.
746             Args :
747              
748              
749             =cut
750              
751             sub _parse_header{
752 5     5   8 my $self = shift;
753              
754             # the first header line contains the column headers, see whether
755             # we need them
756 5 100       12 if ($self->keep_annotation) {
757 4         10 my @colnames = $self->_get_row_values();
758             # trim leading and trailing whitespace if desired
759 4 50       10 if ($self->trim_values) {
760             # trim off whitespace
761 4         11 @colnames = map { $_ =~ s/^\s+//; $_ =~ s/\s+$//; $_; } @colnames;
  40         45  
  40         50  
  40         49  
762             # trim off double quotes
763 4         8 @colnames = map { $_ =~ s/^"//; $_ =~ s/"$//; $_; } @colnames;
  40         35  
  40         35  
  40         44  
764             }
765             # build or complete annotation column map
766 4   100     12 my $annmap = $self->annotation_map || {};
767 4 100       11 if (! %$annmap) {
768             # check whether columns have been defined by name rather than index
769 3 100       13 if (my $anncols = $self->annotation_columns) {
770             # first sanity check: all column names must map
771 1         2 my %colmap = map { ($_,1); } @colnames;
  10         18  
772 1         3 foreach my $col (@$anncols) {
773 5 50       10 if (!exists($colmap{$col})) {
774 0         0 $self->throw("no such column labeled '$col'");
775             }
776             }
777             # now map to the column indexes
778 1         3 %colmap = map { ($_,1); } @$anncols;
  5         8  
779 1         4 for (my $i = 0; $i < scalar(@colnames); $i++) {
780 10 100       16 if (exists($colmap{$colnames[$i]})) {
781 5         13 $annmap->{$i+1} = $colnames[$i];
782             }
783             }
784             } else {
785             # no columns specified, default to all non-attribute columns
786 2         8 for (my $i = 0; $i < scalar(@colnames); $i++) {
787 20         42 $annmap->{$i+1} = $colnames[$i];
788             }
789             # subtract all attribute-referenced columns
790 2         4 foreach my $attrcol (values %{$self->attribute_map}) {
  2         5  
791 8 100 66     28 if ((!ref($attrcol)) && ($attrcol =~ /^\d+$/)) {
792 6         15 delete $annmap->{$attrcol};
793             }
794             }
795             }
796             } else {
797             # fill in where the tag names weren't pre-defined
798 1         4 for (my $i = 0; $i < scalar(@colnames); $i++) {
799 10 100 66     26 if (exists($annmap->{$i+1}) && ! defined($annmap->{$i+1})) {
800 5         12 $annmap->{$i+1} = $colnames[$i];
801             }
802             }
803             }
804 4         13 $self->annotation_map($annmap);
805             }
806              
807             # now read past the header
808 5         16 my $header_lines = $self->header;
809 5         10 my $line_ok = 1;
810 5   66     21 while (defined($line_ok) && ($header_lines > 0)) {
811 5         16 $line_ok = $self->_next_record();
812 5         17 $header_lines--;
813             }
814              
815 5         14 return $line_ok;
816             }
817              
818             =head2 _get_row_values
819              
820             Title : _get_row_values
821             Usage :
822             Function: Get the values for the current line (or row) as an array in
823             the order of columns.
824              
825             This method is here is to serve as a hook for other formats
826             that conceptually also represent tables but aren't
827             formatted as row-based text files.
828              
829             Example :
830             Returns : An array of column values for the current row.
831             Args :
832              
833              
834             =cut
835              
836             sub _get_row_values{
837 35     35   40 my $self = shift;
838 35         77 my $delim = $self->delimiter;
839 35         53 my $line = $self->{'_line'};
840 35         131 chomp($line);
841 35         284 my @cols = split(/$delim/,$line);
842 35         139 return @cols;
843             }
844              
845             1;