File Coverage

Bio/Tools/TandemRepeatsFinder.pm

Criterion	Covered	Total	%
statement	57	58	98.2
branch	16	18	88.8
condition			n/a
subroutine	11	11	100.0
pod	3	3	100.0
total	87	90	96.6

line	stmt	bran	sub	pod	time	code
1
2						=head1 NAME
3
4						Bio::Tools::TandemRepeatsFinder - a parser for Tandem Repeats Finder output
5
6						=head1 SYNOPSIS
7
8						use Bio::Tools::TandemRepeatsFinder;
9
10						# create parser
11						my $parser = Bio::Tools::Bio::Tools::TandemRepeatsFinder->new(-file => 'tandem_repeats.out');
12
13						# loop through results
14						while( my $feature = $parser->next_result ) {
15
16						# print the source sequence id, start, end, percent matches, and the consensus sequence
17						my ($percent_matches) = $feat->get_tag_values('percent_matches');
18						my ($consensus_sequence) = $feat->get_tag_values('consensus_sequence');
19						print $feat->seq_id()."\t".$feat->start()."\t".$feat->end()."\t$percent_matches\t$consensus_sequence\n";
20
21						}
22
23						=head1 DESCRIPTION
24
25						A parser for Tandem Repeats Finder output.
26						Written and tested for version 4.00
27
28						Location, seq_id, and score are stored in Bio::SeqFeature::Generic feature.
29						All other data is stored in tags. The availabale tags are
30
31						period_size
32						copy_number
33						consensus_size
34						percent_matches
35						percent_indels
36						percent_a
37						percent_c
38						percent_g
39						percent_t
40						entropy
41						consensus_sequence
42						repeat_sequence
43						run_parameters
44						sequence_description
45
46						The run_parameters are stored in a hashref with the following key:
47
48						match_weight
49						mismatch_weight
50						indel_weight
51						match_prob
52						indel_prob
53						min_score
54						max_period_size
55
56						=head1 FEEDBACK
57
58						=head2 Mailing Lists
59
60						User feedback is an integral part of the evolution of this and other
61						Bioperl modules. Send your comments and suggestions preferably to
62						the Bioperl mailing list. Your participation is much appreciated.
63
64						bioperl-l@bioperl.org - General discussion
65						http://bioperl.org/wiki/Mailing_lists - About the mailing lists
66
67						=head2 Support
68
69						Please direct usage questions or support issues to the mailing list:
70
71						I
72
73						rather than to the module maintainer directly. Many experienced and
74						reponsive experts will be able look at the problem and quickly
75						address it. Please include a thorough description of the problem
76						with code and data examples if at all possible.
77
78						=head2 Reporting Bugs
79
80						Report bugs to the Bioperl bug tracking system to help us keep track
81						of the bugs and their resolution. Bug reports can be submitted via
82						the web:
83
84						https://github.com/bioperl/bioperl-live/issues
85
86						=head1 AUTHOR - Eric Just
87
88						Email e-just@northwestern.edu
89
90						=head1 APPENDIX
91
92						The rest of the documentation details each of the object methods.
93						Internal methods are usually preceded with a _
94
95						=cut
96
97						package Bio::Tools::TandemRepeatsFinder;
98	1		1		549	use strict;
	1				2
	1				27
99	1		1		4	use constant DEBUG => 0;
	1				2
	1				57
100	1		1		301	use Bio::SeqFeature::Generic;
	1				6
	1				57
101
102	1		1		9	use base qw(Bio::Root::Root Bio::Root::IO);
	1				2
	1				668
103
104						=head2 new
105
106						Title : new
107						Usage : my $obj = Bio::Tools::TandemRepeatsFinder->new();
108						Function: Builds a new Bio::Tools::TandemRepeatsFinder object
109						Returns : Bio::Tools::TandemRepeatsFinder
110						Args : -fh/-file => $val, for initing input, see Bio::Root::IO
111
112						=cut
113
114						sub new {
115	3		3	1	35	my ( $class, @args ) = @_;
116
117	3				22	my $self = $class->SUPER::new(@args);
118	3				16	$self->_initialize_io(@args);
119
120	3				21	return $self;
121						}
122
123						=head2 version
124
125						Title : version
126						Usage : $self->version( $version )
127						Function: get/set the version of Tandem Repeats finder that was used in analysis
128						Returns : value of version of
129						Args : new value (optional)
130
131						=cut
132
133						sub version {
134	3		3	1	6	my ( $self, $value ) = @_;
135	3	50			7	if ( defined $value ) {
136	3				8	$self->{'version'} = $value;
137						}
138	3				6	return $self->{'version'};
139						}
140
141						=head2 _current_seq_id
142
143						Title : _current_seq_id
144						Usage : $self->_current_seq_id( $current_seq_id )
145						Function: get/set the _current_seq_id
146						Returns : value of _current_seq_id
147						Args : new value (optional)
148
149						=cut
150
151						sub _current_seq_id {
152	8		8		16	my ( $self, $value ) = @_;
153	8	100			18	if ( defined $value ) {
154	4				8	$self->{'_current_seq_id'} = $value;
155						}
156	8				42	return $self->{'_current_seq_id'};
157						}
158
159						=head2 _current_seq_description
160
161						Title : _current_seq_description
162						Usage : $self->_current_seq_description( $current_seq_id )
163						Function: get/set the _current_seq_description
164						Returns : value of _current_seq_description
165						Args : new value (optional)
166
167						=cut
168
169						sub _current_seq_description {
170	8		8		13	my ( $self, $value ) = @_;
171	8	100			19	if ( defined $value ) {
172	2				4	$self->{'_current_seq_description'} = $value;
173						}
174	8				49	return $self->{'_current_seq_description'};
175						}
176
177						=head2 _current_parameters
178
179						Title : _current_parameters
180						Usage : $self->_current_parameters( $parameters_hashref )
181						Function: get/set the _current_parameters
182						Returns : hashref representing current parameters parsed from results file
183						: keys are
184						match_weight
185						mismatch_weight
186						indel_weight
187						match_prob
188						indel_prob
189						min_score
190						max_period_size
191						Args : parameters hashref (optional)
192
193						=cut
194
195						sub _current_parameters {
196	8		8		14	my ( $self, $value ) = @_;
197	8	100			17	if ( defined $value ) {
198	4				9	$self->{'_current_parameters'} = $value;
199						}
200	8				23	return $self->{'_current_parameters'};
201						}
202
203						=head2 next_result
204
205						Title : next_result
206						Usage : my $r = $trf->next_result()
207						Function: Get the next result set from parser data
208						Returns : Bio::SeqFeature::Generic
209						Args : none
210
211						=cut
212
213						sub next_result {
214	6		6	1	2753	my ($self) = @_;
215	6				27	while ( defined( $_ = $self->_readline() ) ) {
216
217						# Parse Version line
218	117	100			342	if (/^Version (.+)/) {
		100
		100
		100
219	3				9	my $version = $1;
220	3				3	$self->warn("parsed version: $version\n") if DEBUG;
221	3	50			13	$self->warn( qq{ Bio::Tools::TandemRepeatsFinder was written and tested for Tandem Repeats Masker Version 4.00 output
222						You appear to be using Verion $version. Use at your own risk.}) if ($version != 4);
223	3				8	$self->version($version);
224						}
225
226						# Parse Sequence identifier
227						# i.e. Sequence: DDB0215018 \|Masked Chromosomal Sequence\| Chr 2f
228						elsif ( /^Sequence: ([^\s]+)\s(.+)?/ ) {
229	4				12	my $seq_id = $1;
230	4				10	my $seq_description = $2;
231	4				7	$self->warn("parsed sequence_id: $seq_id\n") if DEBUG;
232	4				14	$self->_current_seq_id($seq_id);
233	4				11	$self->_current_seq_description($seq_description);
234						}
235
236						# Parse Parameters
237						# i.e. Parameters: 2 7 7 80 10 50 12
238						elsif (/^Parameters: (.+)/) {
239	4				9	my $params = $1;
240	4				4	$self->warn("parsed parameters: $params\n") if DEBUG;
241
242	4				28	my @param_array = split /\s/, $params;
243
244	4				27	my $param_hash = {
245						match_weight => $param_array[0],
246						mismatch_weight => $param_array[1],
247						indel_weight => $param_array[2],
248						match_prob => $param_array[3],
249						indel_prob => $param_array[4],
250						min_score => $param_array[5],
251						max_period_size => $param_array[6]
252						};
253	4				14	$self->_current_parameters($param_hash);
254						}
255
256						# Parse Data
257						# i.e. 13936 13960 12 2.1 12 100 0 50 16 8 52 24 1.70 T TTTTTTTTTT
258						elsif (/^\d+\s\d+\s\d+/) {
259
260						# call internal method to create Bio::SeqFeature::Generic
261						# to represent tandem repeat
262	4				13	return $self->_create_feature($_);
263						}
264
265	0				0	elsif (DEBUG) {
266						$self->warn( "UNPARSED LINE:\n" . $_ );
267						}
268						}
269	2				6	return;
270						}
271
272						=head2 _create_feature
273
274						Title : _create_feature
275						Usage : internal method used by 'next_feature'
276						Function: Takes a line from the results file and creates a bioperl object
277						Returns : Bio::SeqFeature::Generic
278						Args : none
279
280						=cut
281
282						sub _create_feature {
283	4		4		8	my ( $self, $line ) = @_;
284
285						# split the line and store into named variables
286	4				33	my @element = split /\s/, $line;
287						my (
288	4				20	$start, $end, $period_size,
289						$copy_number, $consensus_size, $percent_matches,
290						$percent_indels, $score, $percent_a,
291						$percent_c, $percent_g, $percent_t,
292						$entropy, $consensus_sequence, $repeat_sequence
293						) = @element;
294
295						# create tag hash from data in line
296	4				16	my $tags = {
297						period_size => $period_size,
298						copy_number => $copy_number,
299						consensus_size => $consensus_size,
300						percent_matches => $percent_matches,
301						percent_indels => $percent_indels,
302						percent_a => $percent_a,
303						percent_c => $percent_c,
304						percent_g => $percent_g,
305						percent_t => $percent_t,
306						entropy => $entropy,
307						consensus_sequence => $consensus_sequence,
308						repeat_sequence => $repeat_sequence,
309						run_parameters => $self->_current_parameters(),
310						sequence_description => $self->_current_seq_description()
311						};
312
313						# create feature from start/end etc
314	4				13	my $feat = Bio::SeqFeature::Generic->new(
315						-seq_id => $self->_current_seq_id(),
316						-score => $score,
317						-start => $start,
318						-end => $end,
319						-source_tag => 'Tandem Repeats Finder',
320						-primary_tag => 'tandem repeat',
321						-tag => $tags
322						);
323
324	4				30	return $feat;
325
326						}
327
328						1;
329