File Coverage

Bio/Tools/Match.pm
Criterion Covered Total %
statement 30 31 96.7
branch 1 2 50.0
condition 3 7 42.8
subroutine 6 6 100.0
pod 2 2 100.0
total 42 48 87.5


line stmt bran cond sub pod time code
1             # $Id: Match.pm,v 1.2 2007/06/14 18:01:52 nathan Exp $
2             #
3             # BioPerl module for Bio::Tools::Match
4             #
5             # Please direct questions and support issues to
6             #
7             # Cared for by Sendu Bala
8             #
9             # Copyright Sendu Bala
10             #
11             # You may distribute this module under the same terms as perl itself
12              
13             # POD documentation - main docs before the code
14              
15             =head1 NAME
16              
17             Bio::Tools::Match - Parses output from Transfac's match(TM)
18              
19             =head1 SYNOPSIS
20              
21             use strict;
22              
23             use Bio::Tools::Match;
24              
25             my $parser = Bio::Tools::Match->new(-file => "match.out");
26            
27             while (my $feat = $parser->next_result) {
28             my $start = $feat->start;
29             my $end = $feat->end;
30             my $core_score = $feat->score;
31             my $matrix_score = ($feat->annotation->get_Annotations('matrix_score'))[0]->value;
32             my $matrix_id = ($feat->annotation->get_Annotations('matrix_id'))[0]->value;
33             }
34              
35             =head1 DESCRIPTION
36              
37             This module is used to parse the output from Transfac's match(TM) program. It
38             doesn't support the histogram output of match.
39              
40             Each result is a Bio::SeqFeature::Annotated representing a single matrix match.
41              
42             =head1 FEEDBACK
43              
44             =head2 Mailing Lists
45              
46             User feedback is an integral part of the evolution of this and other
47             Bioperl modules. Send your comments and suggestions preferably to
48             the Bioperl mailing list. Your participation is much appreciated.
49              
50             bioperl-l@bioperl.org - General discussion
51             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
52              
53             =head2 Support
54              
55             Please direct usage questions or support issues to the mailing list:
56              
57             I
58              
59             rather than to the module maintainer directly. Many experienced and
60             reponsive experts will be able look at the problem and quickly
61             address it. Please include a thorough description of the problem
62             with code and data examples if at all possible.
63              
64             =head2 Reporting Bugs
65              
66             Report bugs to the Bioperl bug tracking system to help us keep track
67             of the bugs and their resolution. Bug reports can be submitted via the
68             web:
69              
70             https://github.com/bioperl/bioperl-live/issues
71              
72             =head1 AUTHOR - Sendu Bala
73              
74             Email bix@sendu.me.uk
75              
76             =head1 APPENDIX
77              
78             The rest of the documentation details each of the object methods.
79             Internal methods are usually preceded with a _
80              
81             =cut
82              
83             # Let the code begin...
84              
85             package Bio::Tools::Match;
86 1     1   421 use strict;
  1         1  
  1         25  
87              
88 1     1   287 use Bio::SeqFeature::Generic;
  1         3  
  1         38  
89 1     1   6 use Bio::Annotation::SimpleValue;
  1         3  
  1         21  
90              
91 1     1   6 use base qw(Bio::Root::Root Bio::Root::IO);
  1         2  
  1         379  
92              
93              
94             =head2 new
95              
96             Title : new
97             Usage : my $obj = Bio::Tools::Match->new();
98             Function: Builds a new Bio::Tools::Match object
99             Returns : Bio::Tools::Match
100             Args : -file (or -fh) should contain the contents of a standard match output
101              
102             =cut
103              
104             sub new {
105 1     1 1 5 my ($class, @args) = @_;
106 1         13 my $self = $class->SUPER::new(@args);
107            
108 1         8 $self->_initialize_io(@args);
109            
110 1         8 return $self;
111             }
112              
113             =head2 next_result
114              
115             Title : next_result
116             Usage : $result = $obj->next_result();
117             Function: Returns the next result available from the input, or undef if there
118             are no more results.
119             Returns : Bio::SeqFeature::Annotated object. Features are annotated with tags
120             for 'matrix_score', 'matrix_id' and a 'predicted' tag.
121             Args : none
122              
123             =cut
124              
125             sub next_result {
126 5     5 1 12 my ($self) = @_;
127            
128 5   50     28 my $line = $self->_readline || return;
129            
130 5 50 33     31 if (! $self->{found_seq_id} && $line =~ /^Inspecting sequence ID\s+(.+)/) {
131 0         0 $self->{found_seq_id} = $1;
132             }
133            
134 5         34 while ($line !~ /^\s\S+\s+\|\s+\d+/) {
135 7   50     11 $line = $self->_readline || return;
136             }
137            
138            
139             # The first column gives the TRANSFAC(r) identifier of the matching matrix,
140             # then comes the position and the strand where the respective match has been
141             # found. The core similarity score is given in column three, the matrix
142             # similarity score in column four. The last column gives the matching
143             # sequence.
144             #
145             #
146             #Search for sites by WeightMatrix library: /home/sendu/files/programs/transfac/cgi-bin/data/matrix.dat
147             #Sequence file: sequence.fa
148             #Site selection profile: mxprf Profile generated from /home/sendu/files/programs/transfac/cgi-bin/data/matrix.dat with default values.
149             #
150             #
151             #Inspecting sequence ID Homo_sapiens
152             #
153             # V$MYOD_01 | 5 (+) | 0.751 | 0.784 | ttaGAGGTggcg
154             # V$MYOD_01 | 5 (-) | 0.778 | 0.580 | ttagAGGTGgcg
155             # V$MYOD_01 | 30 (+) | 0.751 | 0.581 | gctCAGGCaccc
156             #[...]
157             # V$RORA_Q4 | 53610 (+) | 0.775 | 0.668 | tgtgggGGCCA
158             # V$RORA_Q4 | 53639 (+) | 0.775 | 0.636 | gtcgggGGACA
159             #
160             # Total sequences length=53654
161             #
162             # Total number of found sites=1735559
163             #
164             # Frequency of sites per nucleotide=32.347243
165            
166 5         53 my ($matrix_id, $start, $strand, $core_score, $matrix_score, $seq) = $line =~ /^\s(\S+)\s+\|\s+(\d+)\s+\(([+-])\)\s+\|\s+(\S+)\s+\|\s+(\S+)\s+\|\s+(\S+)/;
167             my $feat = Bio::SeqFeature::Generic->new(
168             -seq_id => $self->{found_seq_id},
169 5         53 -start => $start,
170             -end => $start + length($seq) - 1,
171             -strand => 1,
172             -score => $core_score,
173             -source => 'transfac_match');
174            
175 5         29 my $sv = Bio::Annotation::SimpleValue->new(-tagname => 'predicted', -value => 1);
176 5         50 $feat->annotation->add_Annotation($sv);
177 5         14 $sv = Bio::Annotation::SimpleValue->new(-tagname => 'matrix_score', -value => $matrix_score);
178 5         14 $feat->annotation->add_Annotation($sv);
179 5         21 $sv = Bio::Annotation::SimpleValue->new(-tagname => 'matrix_id', -value => $matrix_id);
180 5         16 $feat->annotation->add_Annotation($sv);
181            
182 5         14 return $feat;
183             }
184              
185             1;