File Coverage

blib/lib/Bio/ToolBox/Data/Stream.pm
Criterion Covered Total %
statement 100 157 63.6
branch 39 88 44.3
condition 19 47 40.4
subroutine 11 13 84.6
pod 8 8 100.0
total 177 313 56.5


line stmt bran cond sub pod time code
1             package Bio::ToolBox::Data::Stream;
2             our $VERSION = '1.69';
3              
4             =head1 NAME
5              
6             Bio::ToolBox::Data::Stream - Read, Write, and Manipulate Data File Line by Line
7              
8             =head1 SYNOPSIS
9              
10             use Bio::ToolBox::Data;
11            
12             ### Open a pre-existing file
13             my $Stream = Bio::ToolBox::Data->new(
14             in => 'regions.bed',
15             stream => 1,
16             );
17            
18             # or directly
19             my $Stream = Bio::ToolBox::Data::Stream->new(
20             in => 'regions.bed',
21             );
22            
23             ### Open a new file for writing
24             my $Stream = Bio::ToolBox::Data::Stream->new(
25             out => 'output.txt',
26             columns => [qw(chromosome start stop name)],
27             );
28            
29            
30             ### Working line by line
31             while (my $line = $Stream->next_line) {
32             # get the positional information from the file data
33             # assuming that the input file had these identifiable columns
34             # each line is Bio::ToolBox::Data::Feature item
35             my $seq_id = $line->seq_id;
36             my $start = $line->start;
37             my $stop = $line->end;
38            
39             # change values
40             $line->value(1, 100); # index, new value
41             }
42            
43            
44             ### Working with two file streams
45             my $inStream = Bio::ToolBox::Data::Stream->new(
46             file => 'regions.bed',
47             );
48             my $outStream = $inStream->duplicate('regions_ext100.bed');
49             my $sc = $inStream->start_column;
50             my $ec = $inStream->end_column;
51             while (my $line = $inStream->next_line) {
52             # adjust positions by 100 bp
53             my $s = $line->start;
54             my $e = $line->end;
55             $line->value($sc, $s - 100);
56             $line->value($ec, $e + 100);
57             $outStream->write_row($line);
58             }
59            
60            
61             ### Finishing
62             # close your file handles when you are done
63             $Stream->close_fh;
64              
65             =head1 DESCRIPTION
66              
67             This module works similarly to the L object, except that
68             rows are read from a file handle rather than a memory structure. This
69             allows very large files to be read, manipulated, and even written without
70             slurping the entire contents into a memory.
71              
72             For an introduction to the L object and methods, refer to
73             its documentation and the L documentation.
74              
75             Typically, manipulations are only performed on one row at a time, not on an
76             entire table. Therefore, large scale table manipulations, such as sorting, is
77             not possible.
78              
79             A typical workflow consists of opening two Stream objects, one for reading and
80             one for writing. Rows are read, one at a time, from the read Stream, manipulated
81             as necessary, and then written to the write Stream. Each row is passed as a
82             L object. It can be manipulated as such, or the
83             corresponding values may be dumped as an array. Working with the row data
84             as an array is required when adding or deleting columns, since these manipulations
85             are not allowed with a Feature object. The write Stream can then be passed
86             either the Feature object or the array of values to be written.
87              
88              
89             =head1 METHODS
90              
91             =head2 Initializing the structure
92              
93             A new Bio::ToolBox::Data::Stream object may be generated directly, or indirectly
94             through the L module.
95              
96             =over 4
97              
98             =item new
99              
100             my $Stream = Bio::ToolBox::Data::Stream->new(
101             in => $filename,
102             );
103             my $Stream = Bio::ToolBox::Data->new(
104             stream => 1,
105             in => $filename,
106             );
107              
108             Options to the new function are listed below. Streams are inherently either
109             read or write mode, determined by the mode given through the options.
110              
111             =over 4
112              
113             =item in
114              
115             Provide the path of the file to open for reading. File types are
116             recognized by the extension, and compressed files (.gz) are supported. File
117             types supported include all those listed in L.
118              
119             =item out
120              
121             Provide the path of the file to open for writing. No check is made
122             for pre-existing files; if it exists it will be overwritten! A new data
123             object is prepared, therefore column names must be provided.
124              
125             =item noheader
126              
127             Boolean option indicating that the input file does not have file headers,
128             in which case dummy headers are provided. This is not necessary for
129             defined file types that don't normally have file headers, such as
130             BED, GFF, or UCSC files. Ignored for output files.
131              
132             =item columns
133              
134             my $Stream = Bio::ToolBox::Data::Stream->new(
135             out => $filename,
136             columns => [qw(Column1 Column2 ...)],
137             );
138              
139             When a new file is written, provide the names of the columns as an
140             anonymous array. If no columns are provided, then a completely empty
141             data structure is made. Columns must be added with the add_column()
142             method below.
143              
144             =item gff
145              
146             When writing a GFF file, provide a GFF version. When this is given, the
147             nine standard column names and metadata are automatically provided based
148             on the file format specification. Note that the column names are not
149             actually written in the file, but are maintained for internal use.
150             Acceptable versions include 1, 2, 2.5 (GTF), and 3 (GFF3).
151              
152             =item bed
153              
154             When writing a BED file, provide the number of bed columns that the file
155             will have. When this is given, the standard column names and metadata
156             will be automatically provided based on the standard file format
157             specification. Note that column names are not actually written to the file,
158             but are maintained for internal use. Acceptable values are integers from
159             3 to 12.
160              
161             =item ucsc
162              
163             When writing a UCSC-style file format, provide the number of bed columns
164             that the file will have. When this is given, the standard column names and
165             metadata will be automatically provided based on the file format specification.
166             Note that column names are not actually written to the file, but are maintained
167             for internal use. Acceptable values include 10 (refFlat without gene names),
168             11 (refFlat with gene names), 12 (knownGene gene prediction table), and 15
169             (an extended gene prediction or genePredExt table).
170              
171             =item gz
172              
173             Boolean value to change the compression status of the output file. If
174             overwriting an input file, the default is maintain the compression status,
175             otherwise no compression. Pass a 0 for no compression, 1 for standard
176             gzip compression, or 2 for block gzip (bgzip) compression for tabix
177             compatibility.
178              
179             =back
180              
181             =item duplicate
182              
183             my $Out_Stream = $Stream->duplicate($new_filename);
184              
185             For an opened-to-read Stream object, you may duplicate the object as a new
186             opened-to_write Stream object that maintains the same columns and metadata.
187             A new different filename must be provided.
188              
189             =back
190              
191             =head2 General Metadata
192              
193             There is a variety of general metadata regarding the Data structure that
194             is available.
195              
196             The following methods may be used to access or set these
197             metadata properties. Note that metadata is only written at the beginning
198             of the file, and so must be set prior to iterating through the file.
199              
200             =over 4
201              
202             =item feature
203              
204             Returns or sets the name of the features used to collect
205             the list of features. The actual feature types are listed
206             in the table, so this metadata is merely descriptive.
207              
208             =item feature_type
209              
210             Returns one of three specific values describing the contents
211             of the data table inferred by the presence of specific column
212             names. This provides a clue as to whether the table features
213             represent genomic regions (defined by coordinate positions) or
214             named database features. The return values include:
215              
216             =over 4
217              
218             =item coordinate: Table includes at least chromosome and start
219              
220             =item named: Table includes name, type, and/or Primary_ID
221              
222             =item unknown: unrecognized
223              
224             =back
225              
226             =item program
227              
228             Returns or sets the name of the program generating the list.
229              
230             =item database
231              
232             Returns or sets the name or path of the database from which the
233             features were derived.
234              
235             =item gff
236              
237             Returns or sets the version of loaded GFF files. Supported versions
238             included 1, 2, 2.5 (GTF), and 3.
239              
240             =item bed
241              
242             Returns or sets the BED file version. Here, the BED version is simply
243             the number of columns.
244              
245             =item ucsc
246              
247             Returns or sets the UCSC file format version. Here, the version is
248             simply the number of columns. Supported versions include 10 (gene
249             prediction), 11 (refFlat, or gene prediction with gene name), 12
250             (knownGene table), 15 (extended gene prediction), or 16 (extended
251             gene prediction with bin).
252              
253             =item vcf
254              
255             Returns or sets the VCF file version number. VCF support is limited.
256              
257             =back
258              
259             =head2 File information
260              
261             These methods provide information about the file from which the
262             data table was loaded. This does not include parsed annotation tables.
263              
264             =over 4
265              
266             =item filename
267              
268             =item path
269              
270             =item basename
271              
272             =item extension
273              
274             Returns the filename, full path, basename, and extension of
275             the filename. Concatenating the last three values will reconstitute
276             the first original filename.
277              
278             =item add_file_metadata
279              
280             $Data->add_file_metadata('/path/to/file.txt');
281              
282             Add filename metadata. This will automatically parse the path,
283             basename, and recognized extension from the passed filename and
284             set the appropriate metadata attributes.
285              
286             =back
287              
288             =head2 Comments
289              
290             Comments are the other commented lines from a text file (lines
291             beginning with a #) that were not parsed as metadata.
292              
293             =over 4
294              
295             =item comments
296              
297             Returns a copy of the array containing commented lines.
298              
299             =item add_comment
300              
301             Appends the text string to the comment array.
302              
303             =item delete_comment
304              
305             Deletes a comment. Provide the array index of the comment to
306             delete. If an index is not provided, ALL comments will be deleted!
307              
308             =item vcf_headers
309              
310             For VCF files, this will partially parse the VCF headers into a
311             hash structure that can be queried or manipulated. Each header
312             line is parsed for the primary key, being the first word after the
313             ## prefix, e.g. INFO, FORMAT, FILTER, contig, etc. For the simple
314             values, they are stored as the value. For complex entries, such as
315             with INFO and FORMAT, a second level hash is created with the ID
316             extracted and used as the second level key. The value is always the
317             always the remainder of the string.
318              
319             For example, the following would be a simple parsed vcf header in
320             code representation.
321              
322             $vcf_header = {
323             FORMAT => {
324             GT = q(ID=GT,Number=1,Type=String,Description="Genotype"),
325             AD = q(ID=AD,Number=.,Type=Integer,Description="ref,alt Allelic depths"),
326             },
327             fileDate => 20150715,
328             }
329              
330             =item rewrite_vcf_headers
331              
332             If you have altered the vcf headers exported by the vcf_headers()
333             method, then this method will rewrite the hash structure as new
334             comment lines. Do this prior to writing the new file stream
335             or else you will lose your changed VCF header metadata.
336              
337             =back
338              
339             =head2 Column Metadata
340              
341             Information about the columns may be accessed. This includes the
342             names of the column and shortcuts to specific identifiable columns,
343             such as name and coordinates. In addition, each column may have
344             additional metadata. Each metadata is a series of key =E
345             value pairs. The minimum keys are 'index' (the 0-based index
346             of the column) and 'name' (the column header name). Additional
347             keys and values may be queried or set as appropriate. When the
348             file is written, these are stored as commented metadata lines at
349             the beginning of the file. Setting metadata is futile after
350             reading or writing has begun.
351              
352             =over 4
353              
354             =item list_columns
355              
356             Returns an array or array reference of the column names
357             in ascending (left to right) order.
358              
359             =item number_columns
360              
361             Returns the number of columns in the Data table.
362              
363             =item last_column
364              
365             Returns the array index of the last (rightmost) column in the
366             Data table.
367              
368             =item name
369              
370             $Stream->name($index, $new_name);
371             my $name = $Stream->name($i);
372              
373             Convenient method to return the name of the column given the
374             index number. A column may also be renamed by passing a new name.
375              
376             =item metadata
377              
378             $Stream->metadata($index, $key, $new_value);
379             my $value = $Stream->metadata($index, $key)
380              
381             Returns or sets the metadata value for a specific $key for a
382             specific column $index.
383              
384             This may also be used to add a new metadata key. Simply provide
385             the name of a new $key that is not present
386              
387             If no key is provided, then a hash or hash reference is returned
388             representing the entire metadata for that column.
389              
390             =item copy_metadata
391              
392             $Stream->copy_metadata($source, $target);
393              
394             This method will copy the metadata (everything except name and
395             index) between the source column and target column. Returns 1 if
396             successful.
397              
398             =item delete_metadata
399              
400             $Stream->delete_metadata($index, $key);
401              
402             Deletes a column-specific metadata $key and value for a specific
403             column $index. If a $key is not provided, then all metadata keys
404             for that index will be deleted.
405              
406             =item find_column
407              
408             my $i = $Stream->find_column('Gene');
409             my $i = $Stream->find_column('^Gene$')
410              
411             Searches the column names for the specified column name. This
412             employs a case-insensitive grep search, so simple substitutions
413             may be made.
414              
415             =item chromo_column
416              
417             =item start_column
418              
419             =item stop_column
420              
421             =item strand_column
422              
423             =item name_column
424              
425             =item type_column
426              
427             =item id_column
428              
429             These methods will return the identified column best matching
430             the description. Returns C if that column is not present.
431             These use the L method with a predefined list of
432             aliases.
433              
434             =back
435              
436             =head2 Modifying Columns
437              
438             These methods allow modification to the number and order of the
439             columns in a Stream object. These methods can only be employed
440             prior to opening a file handle for writing, i.e. before the first
441             L method is called. This enables one, for example, to
442             duplicate a read-only Stream object to create a write-only Stream,
443             add or delete columns, and then begin the row iteration.
444              
445             =over 4
446              
447             =item add_column
448              
449             my $i = $Stream->add_column($name);
450              
451             Appends a new column at the rightmost position (highest
452             index). It adds the column header name and creates a
453             new column metadata hash. Pass a text string representing
454             the new column name. It returns the new column index if
455             successful.
456              
457             =item copy_column
458              
459             my $j = $Stream->copy_column($i);
460              
461             This will copy a column, appending the duplicate column at
462             the rightmost position (highest index). It will duplicate
463             column metadata as well. It will return the new index
464             position.
465              
466             =item delete_column
467              
468             Deletes one or more specified columns. Any remaining
469             columns rightwards will have their indices shifted
470             down appropriately. If you had identified one of the
471             shifted columns, you may need to re-find or calculate
472             its new index.
473              
474             =item reorder_column
475              
476             $Data->reorder_column($c,$b,$a,$a);
477              
478             Reorders columns into the specified order. Provide the
479             new desired order of indices. Columns could be duplicated
480             or deleted using this method. The columns will adopt their
481             new index numbers.
482              
483             =back
484              
485             =head2 Row Data Access
486              
487             Once a file Stream object has been opened, and metadata and/or
488             columns adjusted as necessary, then the file contents can be
489             iterated through, one row at a time. This is typically a one-way
490             direction. If you need to go back or start over, the easiest thing
491             to do is re-open the file as a new Stream object.
492              
493             There are two main methods, L for reading and L
494             for writing. They cannot and should not be used on the same Stream
495             object.
496              
497             =over 4
498              
499             =item next_row
500              
501             =item next_line
502              
503             =item read_line
504              
505             This method reads the next line in the file handle and returns a
506             L object. This object represents the
507             values in the current file row.
508              
509             Note that strand values and 0-based start coordinates are automatically
510             converted to BioPerl conventions if required by the file type.
511              
512             =item add_row
513              
514             =item add_line
515              
516             =item write_row
517              
518             =item write_line
519              
520             $Data->add_row(\@values);
521             $Data->add_row($Row); # Bio::ToolBox::Data::Feature object
522              
523             This method writes a new row or line to a file handle. The first
524             time this method is called the file handle is automatically opened for
525             writing. Up to this point, columns may be manipulated. After this point,
526             columns cannot be adjusted (otherwise the file structure becomes
527             inconsistent).
528              
529             This method may be implemented in one of three ways, based on the type
530             data that is passed.
531              
532             =over 4
533              
534             =item * A Feature object
535              
536             A Feature object representing a row from another L
537             data table or Stream. The values from this object will be automatically
538             obtained. Modified strand and 0-based coordinates may be adjusted back
539             as necessary.
540              
541             =item * An array reference of values
542              
543             Pass an array reference of values. The number of elements should match the
544             number of expected columns. The values will be automatically joined using tabs.
545             This implementation should be used if you using values from another Stream
546             and the number of columns have been modified.
547              
548             Manipulation of strand and 0-based starts may be performed if the
549             metadata indicates this should be done.
550              
551             =item * A string
552              
553             Pass a text string. This assumes the column values are already tab
554             concatenated. A new line character is appended if one is not included.
555             No data manipulation (strand or 0-based starts) or sanity checking of the
556             required number of columns is performed. Use with caution!
557              
558             =back
559              
560             =item iterate
561              
562             $Stream->iterate( sub {
563             my $row = shift;
564             my $number = $row->value($index);
565             my $log_number = log($number);
566             $row->value($index, $log_number);
567             } );
568              
569             A convenience method that will process a code reference for every line
570             in the file. Pass a subroutine or code reference. The subroutine will
571             receive the line as a L object, just as with
572             the L method.
573              
574             =back
575              
576             =head2 File Handle methods
577              
578             The below methods work with the file handle. When you are finished with
579             a Stream, you should be kind and close the file handle properly.
580              
581             =over 4
582              
583             =item mode
584              
585             Returns the write mode of the Stream object. Read-only objects
586             return false (0) and write-only Stream objects return true (1).
587              
588             =item close_fh
589              
590             Closes the file handle.
591              
592             =item fh
593              
594             Returns the L compatible file handle object representing
595             the file handle. Use with caution.
596              
597             =back
598              
599             =head1 SEE ALSO
600              
601             L, L
602              
603             =cut
604              
605 2     2   1249 use strict;
  2         4  
  2         51  
606 2     2   7 use Carp qw(carp cluck croak confess);
  2         4  
  2         81  
607 2     2   9 use base 'Bio::ToolBox::Data::core';
  2         4  
  2         549  
608 2     2   678 use Bio::ToolBox::Data::Feature;
  2         5  
  2         2519  
609              
610             1;
611              
612              
613             #### Initialize ####
614              
615             sub new {
616 28     28 1 807 my $class = shift;
617 28         57 my %args = @_;
618            
619             # file arguments
620 28   50     69 $args{in} ||= $args{file} || undef;
      66        
621 28   100     138 $args{out} ||= undef;
622 28 50 66     56 unless ($args{in} or $args{out}) {
623 0         0 cluck "a filename must be specified with 'in' or 'out' argument keys!\n";
624 0         0 return;
625             }
626 28 50 66     93 if (defined $args{in} and defined $args{out}) {
627 0         0 cluck "cannot define both 'in' and 'out' arguments!\n";
628 0         0 return;
629             }
630 28   50     106 $args{noheader} ||= 0;
631            
632             # prepare object
633 28         97 my $self = $class->SUPER::new();
634            
635             # open an existing file for reading
636 28 100       69 if ($args{in}) {
    50          
637            
638             # check and open file
639 26         90 my $filename = $self->check_file($args{in});
640 26 50       60 unless ($filename) {
641 0         0 carp sprintf "file '%s' does not exist!", $args{in};
642 0         0 return;
643             }
644 26         86 $self->add_file_metadata($filename);
645 26 50       69 $self->open_to_read_fh or return;
646 26         53 $self->{mode} = 0; # read mode
647            
648             # parse column headers
649 26         111 $self->parse_headers($args{noheader});
650 26         66 $self->{line_count} = $self->{header_line_count};
651            
652             # add example row, this will get tossed when the first next_row() is called
653 26         49 $self->{data_table}->[1] = $self->{example};
654 26         50 delete $self->{example};
655             }
656            
657             # prepare to write to a new stream
658             elsif ($args{out}) {
659            
660             # add file name information
661 2         7 $self->add_file_metadata($args{out});
662            
663             # we will not open the file handle quite yet in case the user
664             # wants to modify metadata
665 2         3 $self->{mode} = 1; # set to write mode
666 2         2 $self->{fh} = undef;
667            
668             # get names of columns user may have passed
669 2         4 my @columns;
670 2 100       6 if (exists $args{columns}) {
    50          
671 1         2 @columns = @{ $args{columns} };
  1         3  
672             }
673             elsif (exists $args{datasets}) {
674 0         0 @columns = @{ $args{datasets} };
  0         0  
675             }
676            
677             # add the column names
678 2 100 33     13 if (@columns) {
    50 33        
    50 0        
    0          
679 1         2 foreach my $c (@columns) {
680 4         6 $self->add_column($c);
681             }
682             }
683             elsif (exists $args{gff} and $args{gff}) {
684             # use standard names for the number of columns indicated
685             # we trust that the user knows the subtle difference between gff versions
686 0         0 $self->add_gff_metadata($args{gff});
687 0 0       0 unless ($self->extension =~ /g[tf]f/) {
688             $self->{extension} = $args{gff} == 2.5 ? '.gtf' :
689 0 0       0 $args{gff} == 3 ? '.gff3' : '.gff';
    0          
690             }
691             }
692             elsif (exists $args{bed} and $args{bed}) {
693             # use standard names for the number of columns indicated
694 1 50 33     7 unless ($args{bed} =~ /^\d{1,2}$/ and $args{bed} >= 3) {
695 0         0 carp "bed parameter must be an integer 3-12!";
696 0         0 return;
697             }
698 1         4 $self->add_bed_metadata($args{bed});
699 1 50       3 unless ($self->extension =~ /bed|peak/) {
700 0         0 $self->{extension} = '.bed';
701             }
702             }
703             elsif (exists $args{ucsc} and $args{ucsc}) {
704             # a ucsc format such as refFlat, genePred, or genePredExt
705 0         0 my $u = $self->add_ucsc_metadata($args{ucsc});
706 0 0       0 unless ($u) {
707 0         0 carp "unrecognized number of columns for ucsc format!";
708 0         0 return;
709             };
710 0 0       0 unless ($self->extension =~ /ucsc|ref+lat|genepred/) {
711 0         0 $self->{extension} = '.ucsc';
712             }
713             }
714             # else it will be an empty object with no columns
715            
716             # append gz if necessary
717 2 0 33     7 if (exists $args{gz} and $args{gz} and $self->extension !~ /gz$/) {
      0        
718 0         0 $self->{extension} .= '.gz';
719             }
720            
721             # rebuild the filename after modifying the extension
722 2         6 $self->{filename} = $self->{path} . $self->{basename} . $self->{extension};
723            
724             # add feature
725 2   50     11 $args{feature} ||= $args{features} || undef;
      33        
726 2 100       5 $self->feature($args{feature}) unless $self->feature;
727             }
728            
729 28         97 return $self;
730             }
731              
732              
733             sub duplicate {
734 1     1 1 343 my ($self, $filename) = @_;
735 1 50       4 unless ($filename) {
736 0         0 carp "a new filename must be provided!";
737 0         0 return;
738             }
739 1 50       3 if ($filename eq $self->filename) {
740 0         0 carp "provided filename is not unique from that in metadata!";
741 0         0 return;
742             }
743            
744             # duplicate the data structure
745 1         4 my $columns = $self->list_columns;
746 1 50       3 my $Dup = $self->new(
747             'out' => $filename,
748             'columns' => $columns,
749             ) or return;
750            
751             # copy the metadata
752 1         3 for (my $i = 0; $i < $self->number_columns; $i++) {
753             # column metadata
754 4         9 my %md = $self->metadata($i);
755 4         11 $Dup->{$i} = \%md;
756             }
757 1         3 foreach (qw(feature program db bed gff vcf ucsc headers)) {
758             # various keys
759 8         19 $Dup->{$_} = $self->{$_};
760             }
761 1         6 my @comments = $self->comments;
762 1         2 push @{$Dup->{comments}}, @comments;
  1         2  
763            
764 1         4 return $Dup;
765             }
766              
767              
768              
769             ### Column manipulation
770              
771             sub add_column {
772 4     4 1 5 my ($self, $name) = @_;
773 4 50       6 return unless $name;
774 4 50       7 unless ($self->mode) {
775 0         0 cluck "We have a read-only Stream object, cannot add columns";
776 0         0 return;
777             }
778 4 50       17 if (defined $self->{fh}) {
779             # Stream file handle is opened
780 0         0 cluck "Cannot modify columns when a Stream file handle is opened!";
781 0         0 return;
782             }
783            
784 4         8 my $column = $self->number_columns;
785 4         9 $self->{$column} = {
786             'name' => $name,
787             'index' => $column,
788             };
789 4         6 $self->{data_table}->[0][$column] = $name;
790 4         4 $self->{number_columns}++;
791 4 50       8 delete $self->{column_indices} if exists $self->{column_indices};
792 4 50 33     7 if ($self->gff or $self->bed or $self->ucsc or $self->vcf) {
      33        
      33        
793             # check if we maintain integrity, at least insofar what we test
794 0         0 $self->verify(1); # silence so user doesn't get these messages
795             }
796 4         6 return $column;
797             }
798              
799             sub copy_column {
800 0     0 1 0 my $self = shift;
801 0 0       0 unless ($self->mode) {
802 0         0 confess "We have a read-only Stream object, cannot add columns";
803             }
804 0 0       0 if (defined $self->{fh}) {
805             # Stream file handle is opened
806 0         0 confess "Cannot modify columns when a Stream file handle is opened!";
807             }
808 0         0 my $index = shift;
809 0 0       0 return unless defined $index;
810            
811 0         0 my $new_index = $self->add_column( $self->name($index) );
812 0         0 $self->copy_metadata($index, $new_index);
813 0         0 return $new_index;
814             }
815              
816              
817              
818             #### Row Access ####
819              
820             *next_line = *read_line = \&next_row;
821              
822             sub next_row {
823 201     201 1 212 my $self = shift;
824 201 50       268 if ($self->{mode}) {
825 0         0 confess "Stream object is write-only! cannot read";
826             }
827            
828             # read and add the next line in the file
829 201 100       2662 my $line = $self->{fh}->getline or return;
830 196         3042 $self->{line_count}++;
831 196 50       295 if (substr($line,0,1) eq '#') {
832             # we shouldn't have internal comment lines, but just in case....
833             # could be a gff3 pragma
834 0         0 $self->add_comment($line);
835 0         0 return $self->next_row;
836             }
837            
838             # add the current line to the data table as row 1
839 196         176 pop @{ $self->{data_table} }; # remove the old line
  196         215  
840 196         444 $self->add_data_line($line);
841            
842             # return the feature
843 196         375 return Bio::ToolBox::Data::Feature->new(
844             'data' => $self,
845             'index' => 1,
846             );
847             }
848              
849              
850             *add_row = *add_line = *write_line = \&write_row;
851              
852             sub write_row {
853 78     78 1 160 my $self = shift;
854 78         72 my $data = shift;
855 78 50       105 unless ($self->{mode}) {
856 0         0 confess "Stream object is read-only! cannot write";
857             }
858            
859             # open the file handle if it hasn't been opened yet
860 78 100       92 unless (defined $self->{fh}) {
861             # we first write a standard empty data file with metadata and headers
862 2         5 my $newfile = $self->write_file($self->filename);
863 2 50       7 unless ($newfile) {
864 0         0 die "unable to write file!";
865             }
866            
867             # just in case the filename is changed when writing the file
868 2 50       7 if ($newfile ne $self->filename) {
869 0         0 $self->add_file_metadata($newfile);
870             }
871            
872             # then we re-open the file for appending
873 2 50       7 my $fh = $self->open_to_write_fh($newfile, undef, 1) or
874             die "unable to append to file $newfile!";
875 2         6 $self->{fh} = $fh;
876             }
877            
878             # identify what kind of data we are dealing with
879 78         127 my $data_ref = ref $data;
880 78 100       103 if ($data_ref eq 'Bio::ToolBox::Data::Feature') {
    50          
881             # user passed a Feature object
882 39         67 $self->{fh}->print( join("\t", ($data->row_values)), "\n" );
883             }
884             elsif ($data_ref eq 'ARRAY') {
885             # user passed an array of values
886 39         100 $self->{fh}->print( join("\t", @$data), "\n");
887             }
888             else {
889             # assume the passed data is a string
890             # make sure it has a newline
891 0 0       0 unless ($data =~ /\n$/) {
892 0         0 $data .= "\n";
893             }
894 0         0 $self->{fh}->print($data);
895             }
896 78         398 return 1;
897             }
898              
899             sub iterate {
900 0     0 1 0 my $self = shift;
901 0         0 my $code = shift;
902 0 0       0 unless (ref $code eq 'CODE') {
903 0         0 cluck "iterate_function() method requires a code reference!";
904 0         0 return;
905             }
906 0         0 while (my $row = $self->next_row) {
907 0         0 &$code($row);
908             }
909 0         0 return 1;
910             }
911              
912              
913              
914              
915             #### File handle ####
916              
917             sub mode {
918 4     4 1 5 my $self = shift;
919 4         6 return $self->{mode};
920             }
921              
922             sub DESTROY {
923 28     28   19143 my $self = shift;
924 28         97 $self->close_fh;
925             }
926              
927              
928             ####################################################
929              
930             __END__