File Coverage

Bio/SeqIO.pm
Criterion Covered Total %
statement 122 149 81.8
branch 75 108 69.4
condition 21 38 55.2
subroutine 18 26 69.2
pod 10 10 100.0
total 246 331 74.3


line stmt bran cond sub pod time code
1             # BioPerl module for Bio::SeqIO
2             #
3             # Please direct questions and support issues to
4             #
5             # Cared for by Ewan Birney
6             # and Lincoln Stein
7             #
8             # Copyright Ewan Birney
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12             # _history
13             # October 18, 1999 Largely rewritten by Lincoln Stein
14              
15             # POD documentation - main docs before the code
16              
17             =head1 NAME
18              
19             Bio::SeqIO - Handler for SeqIO Formats
20              
21             =head1 SYNOPSIS
22              
23             use Bio::SeqIO;
24              
25             $in = Bio::SeqIO->new(-file => "inputfilename" ,
26             -format => 'Fasta');
27             $out = Bio::SeqIO->new(-file => ">outputfilename" ,
28             -format => 'EMBL');
29              
30             while ( my $seq = $in->next_seq() ) {
31             $out->write_seq($seq);
32             }
33              
34             # Now, to actually get at the sequence object, use the standard Bio::Seq
35             # methods (look at Bio::Seq if you don't know what they are)
36              
37             use Bio::SeqIO;
38              
39             $in = Bio::SeqIO->new(-file => "inputfilename" ,
40             -format => 'genbank');
41              
42             while ( my $seq = $in->next_seq() ) {
43             print "Sequence ",$seq->id, " first 10 bases ",
44             $seq->subseq(1,10), "\n";
45             }
46              
47              
48             # The SeqIO system does have a filehandle binding. Most people find this
49             # a little confusing, but it does mean you can write the world's
50             # smallest reformatter
51              
52             use Bio::SeqIO;
53              
54             $in = Bio::SeqIO->newFh(-file => "inputfilename" ,
55             -format => 'Fasta');
56             $out = Bio::SeqIO->newFh(-format => 'EMBL');
57              
58             # World's shortest Fasta<->EMBL format converter:
59             print $out $_ while <$in>;
60              
61              
62             =head1 DESCRIPTION
63              
64             Bio::SeqIO is a handler module for the formats in the SeqIO set (eg,
65             Bio::SeqIO::fasta). It is the officially sanctioned way of getting at
66             the format objects, which most people should use.
67              
68             The Bio::SeqIO system can be thought of like biological file handles.
69             They are attached to filehandles with smart formatting rules (eg,
70             genbank format, or EMBL format, or binary trace file format) and
71             can either read or write sequence objects (Bio::Seq objects, or
72             more correctly, Bio::SeqI implementing objects, of which Bio::Seq is
73             one such object). If you want to know what to do with a Bio::Seq
74             object, read L.
75              
76             The idea is that you request a stream object for a particular format.
77             All the stream objects have a notion of an internal file that is read
78             from or written to. A particular SeqIO object instance is configured
79             for either input or output. A specific example of a stream object is
80             the Bio::SeqIO::fasta object.
81              
82             Each stream object has functions
83              
84             $stream->next_seq();
85              
86             and
87              
88             $stream->write_seq($seq);
89              
90             As an added bonus, you can recover a filehandle that is tied to the
91             SeqIO object, allowing you to use the standard EE and print
92             operations to read and write sequence objects:
93              
94             use Bio::SeqIO;
95              
96             $stream = Bio::SeqIO->newFh(-format => 'Fasta',
97             -fh => \*ARGV);
98             # read from standard input or the input filenames
99              
100             while ( $seq = <$stream> ) {
101             # do something with $seq
102             }
103              
104             and
105              
106             print $stream $seq; # when stream is in output mode
107              
108             This makes the simplest ever reformatter
109              
110             #!/usr/bin/perl
111             use strict;
112             my $format1 = shift;
113             my $format2 = shift || die
114             "Usage: reformat format1 format2 < input > output";
115              
116             use Bio::SeqIO;
117              
118             my $in = Bio::SeqIO->newFh(-format => $format1, -fh => \*ARGV );
119             my $out = Bio::SeqIO->newFh(-format => $format2 );
120             # Note: you might want to quote -format to keep older
121             # perl's from complaining.
122              
123             print $out $_ while <$in>;
124              
125              
126             =head1 CONSTRUCTORS
127              
128             =head2 Bio::SeqIO-Enew()
129              
130             $seqIO = Bio::SeqIO->new(-file => 'seqs.fasta', -format => $format);
131             $seqIO = Bio::SeqIO->new(-fh => \*FILEHANDLE, -format => $format);
132             $seqIO = Bio::SeqIO->new(-string => $string , -format => $format);
133             $seqIO = Bio::SeqIO->new(-format => $format);
134              
135             The new() class method constructs a new Bio::SeqIO object. The returned object
136             can be used to retrieve or print Seq objects. new() accepts the following
137             parameters:
138              
139             =over 5
140              
141             =item -file
142              
143             A file path to be opened for reading or writing. The usual Perl
144             conventions apply:
145              
146             'file' # open file for reading
147             '>file' # open file for writing
148             '>>file' # open file for appending
149             '+
150              
151             To read from or write to a piped command, open a filehandle and use the -fh
152             option.
153              
154             =item -fh
155              
156             You may use new() with a opened filehandle, provided as a glob reference. For
157             example, to read from STDIN:
158              
159             my $seqIO = Bio::SeqIO->new(-fh => \*STDIN);
160              
161             A string filehandle is handy if you want to modify the output in the
162             memory, before printing it out. The following program reads in EMBL
163             formatted entries from a file and prints them out in fasta format with
164             some HTML tags:
165              
166             use Bio::SeqIO;
167             use IO::String;
168             my $in = Bio::SeqIO->new(-file => "emblfile",
169             -format => 'EMBL');
170             while ( my $seq = $in->next_seq() ) {
171             # the output handle is reset for every file
172             my $stringio = IO::String->new($string);
173             my $out = Bio::SeqIO->new(-fh => $stringio,
174             -format => 'fasta');
175             # output goes into $string
176             $out->write_seq($seq);
177             # modify $string
178             $string =~ s|(>)(\w+)|$1$2|g;
179             # print into STDOUT
180             print $string;
181             }
182              
183             Filehandles can also be used to read from or write to a piped command:
184              
185             use Bio::SeqIO;
186             #convert .fastq.gz to .fasta
187             open my $zcat, 'zcat seq.fastq.gz |' or die $!;
188             my $in=Bio::SeqIO->new(-fh=>$zcat,
189             -format=>'fastq');
190             my $out=Bio::SeqIO->new(-file=>'>seq.fasta',
191             -format=>'fasta');
192             while (my $seq=$in->next_seq) {
193             $out->write_seq($seq)
194             }
195              
196             =item -string
197              
198             A string to read the sequences from. For example:
199              
200             my $string = ">seq1\nACGCTAGCTAGC\n";
201             my $seqIO = Bio::SeqIO->new(-string => $string);
202              
203             =item -format
204              
205             Specify the format of the file. Supported formats include fasta,
206             genbank, embl, swiss (SwissProt), Entrez Gene and tracefile formats
207             such as abi (ABI) and scf. There are many more, for a complete listing
208             see the SeqIO HOWTO (L).
209              
210             If no format is specified and a filename is given then the module will
211             attempt to deduce the format from the filename suffix. If there is no
212             suffix that Bioperl understands then it will attempt to guess the
213             format based on file content. If this is unsuccessful then SeqIO will
214             throw a fatal error.
215              
216             The format name is case-insensitive: 'FASTA', 'Fasta' and 'fasta' are
217             all valid.
218              
219             Currently, the tracefile formats (except for SCF) require installation
220             of the external Staden "io_lib" package, as well as the
221             Bio::SeqIO::staden::read package available from the bioperl-ext
222             repository.
223              
224             =item -alphabet
225              
226             Sets the alphabet ('dna', 'rna', or 'protein'). When the alphabet is
227             set then Bioperl will not attempt to guess what the alphabet is. This
228             may be important because Bioperl does not always guess correctly.
229              
230             =item -flush
231              
232             By default, all files (or filehandles) opened for writing sequences
233             will be flushed after each write_seq() (making the file immediately
234             usable). If you do not need this facility and would like to marginally
235             improve the efficiency of writing multiple sequences to the same file
236             (or filehandle), pass the -flush option '0' or any other value that
237             evaluates as defined but false:
238              
239             my $gb = Bio::SeqIO->new(-file => "
240             -format => "gb");
241             my $fa = Bio::SeqIO->new(-file => ">gball.fa",
242             -format => "fasta",
243             -flush => 0); # go as fast as we can!
244             while($seq = $gb->next_seq) { $fa->write_seq($seq) }
245              
246             =item -seqfactory
247              
248             Provide a Bio::Factory::SequenceFactoryI object. See the sequence_factory() method.
249              
250             =item -locfactory
251              
252             Provide a Bio::Factory::LocationFactoryI object. See the location_factory() method.
253              
254             =item -objbuilder
255              
256             Provide a Bio::Factory::ObjectBuilderI object. See the object_builder() method.
257              
258             =back
259              
260             =head2 Bio::SeqIO-EnewFh()
261              
262             $fh = Bio::SeqIO->newFh(-fh => \*FILEHANDLE, -format=>$format);
263             $fh = Bio::SeqIO->newFh(-format => $format);
264             # etc.
265              
266             This constructor behaves like new(), but returns a tied filehandle
267             rather than a Bio::SeqIO object. You can read sequences from this
268             object using the familiar EE operator, and write to it using
269             print(). The usual array and $_ semantics work. For example, you can
270             read all sequence objects into an array like this:
271              
272             @sequences = <$fh>;
273              
274             Other operations, such as read(), sysread(), write(), close(), and
275             printf() are not supported.
276              
277             =head1 OBJECT METHODS
278              
279             See below for more detailed summaries. The main methods are:
280              
281             =head2 $sequence = $seqIO-Enext_seq()
282              
283             Fetch the next sequence from the stream, or nothing if no more.
284              
285             =head2 $seqIO-Ewrite_seq($sequence [,$another_sequence,...])
286              
287             Write the specified sequence(s) to the stream.
288              
289             =head2 TIEHANDLE(), READLINE(), PRINT()
290              
291             These provide the tie interface. See L for more details.
292              
293             =head1 FEEDBACK
294              
295             =head2 Mailing Lists
296              
297             User feedback is an integral part of the evolution of this and other
298             Bioperl modules. Send your comments and suggestions preferably to one
299             of the Bioperl mailing lists.
300              
301             Your participation is much appreciated.
302              
303             bioperl-l@bioperl.org - General discussion
304             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
305              
306             =head2 Support
307              
308             Please direct usage questions or support issues to the mailing list:
309              
310             bioperl-l@bioperl.org
311              
312             rather than to the module maintainer directly. Many experienced and
313             responsive experts will be able look at the problem and quickly
314             address it. Please include a thorough description of the problem
315             with code and data examples if at all possible.
316              
317             =head2 Reporting Bugs
318              
319             Report bugs to the Bioperl bug tracking system to help us keep track
320             the bugs and their resolution. Bug reports can be submitted via the
321             web:
322              
323             https://github.com/bioperl/bioperl-live/issues
324              
325             =head1 AUTHOR - Ewan Birney, Lincoln Stein
326              
327             Email birney@ebi.ac.uk
328             lstein@cshl.org
329              
330             =head1 APPENDIX
331              
332             The rest of the documentation details each of the object
333             methods. Internal methods are usually preceded with a _
334              
335             =cut
336              
337             #' Let the code begin...
338              
339             package Bio::SeqIO;
340              
341 82     82   23241 use strict;
  82         143  
  82         3262  
342 82     82   350 use warnings;
  82         165  
  82         2274  
343              
344 82     82   21047 use Bio::Factory::FTLocationFactory;
  82         203  
  82         2477  
345 82     82   23754 use Bio::Seq::SeqBuilder;
  82         187  
  82         2166  
346 82     82   23838 use Bio::Tools::GuessSeqFormat;
  82         203  
  82         2563  
347 82     82   549 use Symbol;
  82         145  
  82         5064  
348              
349 82     82   579 use parent qw(Bio::Root::Root Bio::Root::IO Bio::Factory::SequenceStreamI);
  82         146  
  82         663  
350              
351             my %valid_alphabet_cache;
352              
353              
354             =head2 new
355              
356             Title : new
357             Usage : $stream = Bio::SeqIO->new(-file => 'sequences.fasta',
358             -format => 'fasta');
359             Function: Returns a new sequence stream
360             Returns : A Bio::SeqIO stream initialised with the appropriate format
361             Args : Named parameters indicating where to read the sequences from or to
362             write them to:
363             -file => filename, OR
364             -fh => filehandle to attach to, OR
365             -string => string
366              
367             Additional arguments, all with reasonable defaults:
368             -format => format of the sequences, usually auto-detected
369             -alphabet => 'dna', 'rna', or 'protein'
370             -flush => 0 or 1 (default: flush filehandles after each write)
371             -seqfactory => sequence factory
372             -locfactory => location factory
373             -objbuilder => object builder
374              
375             See L
376              
377             =cut
378              
379             my $entry = 0;
380              
381             sub new {
382 843     843 1 12525 my ($caller, @args) = @_;
383 843   33     3612 my $class = ref($caller) || $caller;
384              
385             # or do we want to call SUPER on an object if $caller is an
386             # object?
387 843 100       3992 if( $class =~ /Bio::SeqIO::(\S+)/ ) {
388 420         1894 my ($self) = $class->SUPER::new(@args);
389 420         2108 $self->_initialize(@args);
390 420         3141 return $self;
391             } else {
392 423         1847 my %params = @args;
393 423         1736 @params{ map { lc $_ } keys %params } = values %params; # lowercase keys
  1056         3019  
394              
395 423 100 100     2322 unless( defined $params{-file} ||
      100        
396             defined $params{-fh} ||
397             defined $params{-string} ) {
398             $class->throw("file argument provided, but with an undefined value")
399 3 100       10 if exists $params{'-file'};
400             $class->throw("fh argument provided, but with an undefined value")
401 2 100       7 if exists $params{'-fh'};
402             $class->throw("string argument provided, but with an undefined value")
403 1 50       3 if exists($params{'-string'});
404             # $class->throw("No file, fh, or string argument provided"); # neither defined
405             }
406              
407             # Determine or guess sequence format and variant
408 421         1054 my $format = $params{'-format'};
409 421 100       1263 if (! $format ) {
410 27 100       80 if ($params{-file}) {
    100          
    100          
411             # Guess from filename extension, and then from file content
412             $format = $class->_guess_format( $params{-file} ) ||
413 24   100     105 Bio::Tools::GuessSeqFormat->new(-file => $params{-file} )->guess;
414             } elsif ($params{-fh}) {
415             # Guess from filehandle content
416 1         8 $format = Bio::Tools::GuessSeqFormat->new(-fh => $params{-fh} )->guess;
417             } elsif ($params{-string}) {
418             # Guess from string content
419 1         9 $format = Bio::Tools::GuessSeqFormat->new(-text => $params{-string})->guess;
420             }
421             }
422              
423             # changed 1-3-11; no need to print out an empty string (only way this
424             # exception is triggered) - cjfields
425 420 100       1206 $class->throw("Could not guess format from file, filehandle or string")
426             if not $format;
427 418         1038 $format = "\L$format"; # normalize capitalization to lower case
428              
429 418 100       1657 if ($format =~ /-/) {
430 55         204 ($format, my $variant) = split('-', $format, 2);
431 55         123 $params{-variant} = $variant;
432             }
433              
434 418 50       1679 return unless( $class->_load_format_module($format) );
435 418         3928 return "Bio::SeqIO::$format"->new(%params);
436             }
437             }
438              
439              
440             =head2 newFh
441              
442             Title : newFh
443             Usage : $fh = Bio::SeqIO->newFh(-file=>$filename,-format=>'Format')
444             Function: Does a new() followed by an fh()
445             Example : $fh = Bio::SeqIO->newFh(-file=>$filename,-format=>'Format')
446             $sequence = <$fh>; # read a sequence object
447             print $fh $sequence; # write a sequence object
448             Returns : filehandle tied to the Bio::SeqIO::Fh class
449             Args :
450              
451             See L
452              
453             =cut
454              
455             sub newFh {
456 0     0 1 0 my $class = shift;
457 0 0       0 return unless my $self = $class->new(@_);
458 0         0 return $self->fh;
459             }
460              
461              
462             =head2 fh
463              
464             Title : fh
465             Usage : $obj->fh
466             Function: Get or set the IO filehandle
467             Example : $fh = $obj->fh; # make a tied filehandle
468             $sequence = <$fh>; # read a sequence object
469             print $fh $sequence; # write a sequence object
470             Returns : filehandle tied to Bio::SeqIO class
471             Args : none
472              
473             =cut
474              
475             sub fh {
476 0     0 1 0 my $self = shift;
477 0   0     0 my $class = ref($self) || $self;
478 0         0 my $s = Symbol::gensym;
479 0         0 tie $$s,$class,$self;
480 0         0 return $s;
481             }
482              
483              
484             # _initialize is chained for all SeqIO classes
485              
486             sub _initialize {
487 436     436   1311 my($self, @args) = @_;
488              
489             # flush is initialized by the Root::IO init
490              
491 436         2462 my ($seqfact,$locfact,$objbuilder, $alphabet) =
492             $self->_rearrange([qw(SEQFACTORY
493             LOCFACTORY
494             OBJBUILDER
495             ALPHABET)
496             ], @args);
497              
498 436 50       2149 $locfact = Bio::Factory::FTLocationFactory->new(-verbose => $self->verbose)
499             if ! $locfact;
500 436 50       1885 $objbuilder = Bio::Seq::SeqBuilder->new(-verbose => $self->verbose)
501             unless $objbuilder;
502 436         1942 $self->sequence_builder($objbuilder);
503 436         1655 $self->location_factory($locfact);
504              
505             # note that this should come last because it propagates the sequence
506             # factory to the sequence builder
507 436 50       1075 $seqfact && $self->sequence_factory($seqfact);
508              
509             #bug 2160
510 436 50       1001 $alphabet && $self->alphabet($alphabet);
511              
512             # initialize the IO part
513 436         1732 $self->_initialize_io(@args);
514             }
515              
516              
517             =head2 next_seq
518              
519             Title : next_seq
520             Usage : $seq = stream->next_seq
521             Function: Reads the next sequence object from the stream and returns it.
522              
523             Certain driver modules may encounter entries in the stream
524             that are either misformatted or that use syntax not yet
525             understood by the driver. If such an incident is
526             recoverable, e.g., by dismissing a feature of a feature
527             table or some other non-mandatory part of an entry, the
528             driver will issue a warning. In the case of a
529             non-recoverable situation an exception will be thrown. Do
530             not assume that you can resume parsing the same stream
531             after catching the exception. Note that you can always turn
532             recoverable errors into exceptions by calling
533             $stream->verbose(2).
534              
535             Returns : a Bio::Seq sequence object, or nothing if no more sequences
536             are available
537              
538             Args : none
539              
540             See L, L, L
541              
542             =cut
543              
544             sub next_seq {
545 0     0 1 0 my ($self, $seq) = @_;
546 0         0 $self->throw("Sorry, you cannot read from a generic Bio::SeqIO object.");
547             }
548              
549              
550             =head2 write_seq
551              
552             Title : write_seq
553             Usage : $stream->write_seq($seq)
554             Function: writes the $seq object into the stream
555             Returns : 1 for success and 0 for error
556             Args : Bio::Seq object
557              
558             =cut
559              
560             sub write_seq {
561 0     0 1 0 my ($self, $seq) = @_;
562 0         0 $self->throw("Sorry, you cannot write to a generic Bio::SeqIO object.");
563             }
564              
565              
566             =head2 format
567              
568             Title : format
569             Usage : $format = $stream->format()
570             Function: Get the sequence format
571             Returns : sequence format, e.g. fasta, fastq
572             Args : none
573              
574             =cut
575              
576             # format() method inherited from Bio::Root::IO
577              
578              
579             =head2 alphabet
580              
581             Title : alphabet
582             Usage : $self->alphabet($newval)
583             Function: Set/get the molecule type for the Seq objects to be created.
584             Example : $seqio->alphabet('protein')
585             Returns : value of alphabet: 'dna', 'rna', or 'protein'
586             Args : newvalue (optional)
587             Throws : Exception if the argument is not one of 'dna', 'rna', or 'protein'
588              
589             =cut
590              
591             sub alphabet {
592 80     80 1 141 my ($self, $value) = @_;
593              
594 80 100       155 if ( defined $value) {
595 2         4 $value = lc $value;
596 2 50       6 unless ($valid_alphabet_cache{$value}) {
597             # instead of hard-coding the allowed values once more, we check by
598             # creating a dummy sequence object
599 2         4 eval {
600 2         9 require Bio::PrimarySeq;
601 2         7 my $seq = Bio::PrimarySeq->new( -verbose => $self->verbose,
602             -alphabet => $value );
603             };
604 2 50       6 if ($@) {
605 0         0 $self->throw("Invalid alphabet: $value\n. See Bio::PrimarySeq for allowed values.");
606             }
607 2         5 $valid_alphabet_cache{$value} = 1;
608             }
609 2         4 $self->{'alphabet'} = $value;
610             }
611 80         165 return $self->{'alphabet'};
612             }
613              
614              
615             =head2 _load_format_module
616              
617             Title : _load_format_module
618             Usage : *INTERNAL SeqIO stuff*
619             Function: Loads up (like use) a module at run time on demand
620             Example :
621             Returns :
622             Args :
623              
624             =cut
625              
626             sub _load_format_module {
627 418     418   1196 my ($self, $format) = @_;
628 418         1127 my $module = "Bio::SeqIO::" . $format;
629 418         663 my $ok;
630              
631 418         734 eval {
632 418         2251 $ok = $self->_load_module($module);
633             };
634 418 50       1162 if ( $@ ) {
635 0         0 print STDERR <
636             $self: $format cannot be found
637             Exception $@
638             For more information about the SeqIO system please see the SeqIO docs.
639             This includes ways of checking for formats at compile time, not run time
640             END
641             ;
642             }
643 418         1384 return $ok;
644             }
645              
646              
647             =head2 _concatenate_lines
648              
649             Title : _concatenate_lines
650             Usage : $s = _concatenate_lines($line, $continuation_line)
651             Function: Private. Concatenates two strings assuming that the second stems
652             from a continuation line of the first. Adds a space between both
653             unless the first ends with a dash.
654              
655             Takes care of either arg being empty.
656             Example :
657             Returns : A string.
658             Args :
659              
660             =cut
661              
662             sub _concatenate_lines {
663 516     516   1237 my ($self, $s1, $s2) = @_;
664 516 50 66     1490 $s1 .= " " if($s1 && ($s1 !~ /-$/) && $s2);
      66        
665 516 100       1705 return ($s1 ? $s1 : "") . ($s2 ? $s2 : "");
    50          
666             }
667              
668              
669             =head2 _filehandle
670              
671             Title : _filehandle
672             Usage : $obj->_filehandle($newval)
673             Function: This method is deprecated. Call _fh() instead.
674             Example :
675             Returns : value of _filehandle
676             Args : newvalue (optional)
677              
678             =cut
679              
680             sub _filehandle {
681 7     7   13 my ($self,@args) = @_;
682 7         19 return $self->_fh(@args);
683             }
684              
685              
686             =head2 _guess_format
687              
688             Title : _guess_format
689             Usage : $obj->_guess_format($filename)
690             Function: guess format based on file suffix
691             Example :
692             Returns : guessed format of filename (lower case)
693             Args :
694             Notes : formats that _filehandle() will guess include fasta,
695             genbank, scf, pir, embl, raw, gcg, ace, bsml, swissprot,
696             fastq and phd/phred
697              
698             =cut
699              
700             sub _guess_format {
701 24     24   40 my $class = shift;
702 24 50       75 return unless $_ = shift;
703              
704 24 50       125 return 'abi' if /\.ab[i1]$/i;
705 24 100       87 return 'ace' if /\.ace$/i;
706 23 50       75 return 'alf' if /\.alf$/i;
707 23 50       127 return 'bsml' if /\.(bsm|bsml)$/i;
708 23 50       80 return 'ctf' if /\.ctf$/i;
709 23 100       89 return 'embl' if /\.(embl|ebl|emb|dat)$/i;
710 22 50       74 return 'entrezgene' if /\.asn$/i;
711 22 50       70 return 'exp' if /\.exp$/i;
712 22 100       114 return 'fasta' if /\.(fasta|fast|fas|seq|fa|fsa|nt|aa|fna|faa)$/i;
713 15 100       73 return 'fastq' if /\.fastq$/i;
714 13 100       49 return 'gcg' if /\.gcg$/i;
715 12 100       75 return 'genbank' if /\.(gb|gbank|genbank|gbk|gbs)$/i;
716 8 50       36 return 'phd' if /\.(phd|phred)$/i;
717 8 100       37 return 'pir' if /\.pir$/i;
718 7 50       33 return 'pln' if /\.pln$/i;
719 7 50       28 return 'qual' if /\.qual$/i;
720 7 50       28 return 'raw' if /\.txt$/i;
721 7 50       31 return 'scf' if /\.scf$/i;
722             # from Strider 1.4 Release Notes: The file name extensions used by
723             # Strider 1.4 are ".xdna", ".xdgn", ".xrna" and ".xprt" for DNA,
724             # DNA Degenerate, RNA and Protein Sequence Files, respectively
725 7 50       30 return 'strider' if /\.(xdna|xdgn|xrna|xprt)$/i;
726 7 100       48 return 'swiss' if /\.(swiss|sp)$/i;
727 6 50       91 return 'ztr' if /\.ztr$/i;
728             }
729              
730              
731             sub DESTROY {
732 431     431   34084 my $self = shift;
733 431         2535 $self->close();
734             }
735              
736              
737             sub TIEHANDLE {
738 0     0   0 my ($class,$val) = @_;
739 0         0 return bless {'seqio' => $val}, $class;
740             }
741              
742              
743             sub READLINE {
744 0     0   0 my $self = shift;
745 0 0 0     0 return $self->{'seqio'}->next_seq() || undef unless wantarray;
746 0         0 my (@list, $obj);
747 0         0 push @list, $obj while $obj = $self->{'seqio'}->next_seq();
748 0         0 return @list;
749             }
750              
751              
752             sub PRINT {
753 0     0   0 my $self = shift;
754 0         0 $self->{'seqio'}->write_seq(@_);
755             }
756              
757              
758             =head2 sequence_factory
759              
760             Title : sequence_factory
761             Usage : $seqio->sequence_factory($seqfactory)
762             Function: Get/Set the Bio::Factory::SequenceFactoryI
763             Returns : Bio::Factory::SequenceFactoryI
764             Args : [optional] Bio::Factory::SequenceFactoryI
765              
766             =cut
767              
768             sub sequence_factory {
769 1565     1565 1 2973 my ($self, $obj) = @_;
770 1565 100       3122 if( defined $obj ) {
771 413 50 33     3678 if( ! ref($obj) || ! $obj->isa('Bio::Factory::SequenceFactoryI') ) {
772 0         0 $self->throw("Must provide a valid Bio::Factory::SequenceFactoryI object to ".ref($self)."::sequence_factory()");
773             }
774 413         1073 $self->{'_seqio_seqfactory'} = $obj;
775 413         1022 my $builder = $self->sequence_builder();
776 413 50 66     4195 if($builder && $builder->can('sequence_factory') &&
      66        
777             (! $builder->sequence_factory())) {
778 411         997 $builder->sequence_factory($obj);
779             }
780             }
781 1565         5637 $self->{'_seqio_seqfactory'};
782             }
783              
784              
785             =head2 object_factory
786              
787             Title : object_factory
788             Usage : $obj->object_factory($newval)
789             Function: This is an alias to sequence_factory with a more generic name.
790             Example :
791             Returns : value of object_factory (a scalar)
792             Args : on set, new value (a scalar or undef, optional)
793              
794             =cut
795              
796             sub object_factory{
797 0     0 1 0 return shift->sequence_factory(@_);
798             }
799              
800              
801             =head2 sequence_builder
802              
803             Title : sequence_builder
804             Usage : $seqio->sequence_builder($seqfactory)
805             Function: Get/Set the Bio::Factory::ObjectBuilderI used to build sequence
806             objects. This applies to rich sequence formats only, e.g. genbank
807             but not fasta.
808              
809             If you do not set the sequence object builder yourself, it
810             will in fact be an instance of L, and
811             you may use all methods documented there to configure it.
812              
813             Returns : a Bio::Factory::ObjectBuilderI compliant object
814             Args : [optional] a Bio::Factory::ObjectBuilderI compliant object
815              
816             =cut
817              
818             sub sequence_builder {
819 1054     1054 1 1869 my ($self, $obj) = @_;
820 1054 100       2159 if( defined $obj ) {
821 436 50 33     3099 if( ! ref($obj) || ! $obj->isa('Bio::Factory::ObjectBuilderI') ) {
822 0         0 $self->throw("Must provide a valid Bio::Factory::ObjectBuilderI object to ".ref($self)."::sequence_builder()");
823             }
824 436         1002 $self->{'_object_builder'} = $obj;
825             }
826 1054         2271 $self->{'_object_builder'};
827             }
828              
829              
830             =head2 location_factory
831              
832             Title : location_factory
833             Usage : $seqio->location_factory($locfactory)
834             Function: Get/Set the Bio::Factory::LocationFactoryI object to be used for
835             location string parsing
836             Returns : a Bio::Factory::LocationFactoryI implementing object
837             Args : [optional] on set, a Bio::Factory::LocationFactoryI implementing
838             object.
839              
840             =cut
841              
842             sub location_factory {
843 9416     9416 1 14976 my ($self,$obj) = @_;
844 9416 100       16546 if( defined $obj ) {
845 436 50 33     2929 if( ! ref($obj) || ! $obj->isa('Bio::Factory::LocationFactoryI') ) {
846 0         0 $self->throw("Must provide a valid Bio::Factory::LocationFactoryI" .
847             " object to ".ref($self)."->location_factory()");
848             }
849 436         893 $self->{'_seqio_locfactory'} = $obj;
850             }
851 9416         21153 $self->{'_seqio_locfactory'};
852             }
853              
854             1;
855