File Coverage

Bio/SeqIO/largefasta.pm

Criterion	Covered	Total	%
statement	50	54	92.5
branch	12	20	60.0
condition	6	12	50.0
subroutine	7	7	100.0
pod	2	2	100.0
total	77	95	81.0

line	stmt	bran	cond	sub	pod	time	code
1							# BioPerl module for Bio::SeqIO::largefasta
2							#
3							# Please direct questions and support issues to
4							#
5							# Cared for by Jason Stajich
6							#
7							# Copyright Jason Stajich
8							#
9							# You may distribute this module under the same terms as perl itself
10							# _history
11							#
12							# POD documentation - main docs before the code
13
14							=head1 NAME
15
16							Bio::SeqIO::largefasta - method i/o on very large fasta sequence files
17
18							=head1 SYNOPSIS
19
20							Do not use this module directly. Use it via the Bio::SeqIO class.
21
22							=head1 DESCRIPTION
23
24							This object can transform Bio::Seq objects to and from fasta flat
25							file databases.
26
27							This module handles very large sequence files by using the
28							Bio::Seq::LargePrimarySeq module to store all the sequence data in
29							a file. This can be a problem if you have limited disk space on your
30							computer because this will effectively cause 2 copies of the sequence
31							file to reside on disk for the life of the
32							Bio::Seq::LargePrimarySeq object. The default location for this is
33							specified by the L-Etmpdir routine which is usually /tmp
34							on UNIX. If a sequence file is larger than the swap space (capacity
35							of the /tmp dir) this could cause problems for the machine. It is
36							possible to set the directory where the temporary file is located by
37							adding the following line to your code BEFORE calling next_seq. See
38							L for more information.
39
40							$Bio::Seq::LargePrimarySeq::DEFAULT_TEMP_DIR = 'newdir';
41
42							=head1 FEEDBACK
43
44							=head2 Mailing Lists
45
46							User feedback is an integral part of the evolution of this and other
47							Bioperl modules. Send your comments and suggestions preferably to one
48							of the Bioperl mailing lists. Your participation is much appreciated.
49
50							bioperl-l@bioperl.org - General discussion
51							http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52
53							=head2 Support
54
55							Please direct usage questions or support issues to the mailing list:
56
57							I
58
59							rather than to the module maintainer directly. Many experienced and
60							reponsive experts will be able look at the problem and quickly
61							address it. Please include a thorough description of the problem
62							with code and data examples if at all possible.
63
64							=head2 Reporting Bugs
65
66							Report bugs to the Bioperl bug tracking system to help us keep track
67							the bugs and their resolution. Bug reports can be submitted via the web:
68
69							https://github.com/bioperl/bioperl-live/issues
70
71							=head1 AUTHORS - Jason Stajich
72
73							Email: jason@bioperl.org
74
75							=head1 APPENDIX
76
77							The rest of the documentation details each of the object
78							methods. Internal methods are usually preceded with a _
79
80							=cut
81
82							# Let the code begin...
83
84							package Bio::SeqIO::largefasta;
85	1			1		411	use vars qw($FASTALINELEN);
	1					2
	1					36
86	1			1		4	use strict;
	1					1
	1					17
87
88	1			1		208	use Bio::Seq::SeqFactory;
	1					2
	1					30
89
90							$FASTALINELEN = 60;
91	1			1		4	use base qw(Bio::SeqIO);
	1					2
	1					287
92
93							sub _initialize {
94	3			3		8	my($self,@args) = @_;
95	3					12	$self->SUPER::_initialize(@args);
96	3	50				10	if( ! defined $self->sequence_factory ) {
97	3					9	$self->sequence_factory(Bio::Seq::SeqFactory->new
98							(-verbose => $self->verbose(),
99							-type => 'Bio::Seq::LargePrimarySeq'));
100							}
101							}
102
103							=head2 next_seq
104
105							Title : next_seq
106							Usage : $seq = $stream->next_seq()
107							Function: returns the next sequence in the stream
108							Returns : A Bio::Seq::LargePrimarySeq object
109							Args : NONE
110
111							=cut
112
113							sub next_seq {
114	2			2	1	508	my ($self) = @_;
115							# local $/ = "\n";
116	2					6	my $largeseq = $self->sequence_factory->create();
117	2					4	my ($id,$fulldesc,$entry);
118	2					2	my $count = 0;
119	2					2	my $seen = 0;
120	2					13	while( defined ($entry = $self->_readline) ) {
121	5624	50	66			13999	if( $seen == 1 && $entry =~ /^\s*>/ ) {
122	0					0	$self->_pushback($entry);
123	0					0	return $largeseq;
124							}
125							# if ( ($entry eq '>') \|\| eof($self->_fh) ) { $seen = 1; next; }
126	5624	50				8976	if ( ($entry eq '>') ) { $seen = 1; next; }
	0	100				0
	0					0
127							elsif( $entry =~ /\s*>(.+?)$/ ) {
128	2					3	$seen = 1;
129	2	50				13	($id,$fulldesc) = ($1 =~ /^\s(\S+)\s(.*)$/)
130							or $self->warn("Can't parse fasta header");
131	2					12	$largeseq->display_id($id);
132	2					7	$largeseq->primary_id($id);
133	2					6	$largeseq->desc($fulldesc);
134							} else {
135	5622					17766	$entry =~ s/\s+//g;
136	5622					9869	$largeseq->add_sequence_as_string($entry);
137							}
138	5624	50	66			11621	(++$count % 1000 == 0 && $self->verbose() > 0) && print "line $count\n";
139							}
140	2	50				10	return unless $seen;
141	2					9	return $largeseq;
142							}
143
144							=head2 write_seq
145
146							Title : write_seq
147							Usage : $stream->write_seq(@seq)
148							Function: writes the $seq object into the stream
149							Returns : 1 for success and 0 for error
150							Args : Bio::Seq object
151
152
153							=cut
154
155							sub write_seq {
156	1			1	1	482	my ($self,@seq) = @_;
157	1					2	foreach my $seq (@seq) {
158	1					8	my $top = $seq->id();
159	1	50	33			12	if ($seq->can('desc') and my $desc = $seq->desc()) {
160	1					3	$desc =~ s/\n//g;
161	1					3	$top .= " $desc";
162							}
163	1					7	$self->_print (">",$top,"\n");
164	1					3	my $end = $seq->length();
165	1					2	my $start = 1;
166	1					4	while( $start <= $end ) {
167	2811					2812	my $stop = $start + $FASTALINELEN - 1;
168	2811	100				3271	$stop = $end if( $stop > $end );
169	2811					3841	$self->_print($seq->subseq($start,$stop), "\n");
170	2811					4153	$start += $FASTALINELEN;
171							}
172							}
173
174	1	50	33			7	$self->flush if $self->_flush_on_write && defined $self->_fh;
175	1					9	return 1;
176							}
177
178							1;