File Coverage

Bio/Restriction/IO/withrefm.pm
Criterion Covered Total %
statement 63 66 95.4
branch 18 22 81.8
condition 3 6 50.0
subroutine 10 11 90.9
pod 2 2 100.0
total 96 107 89.7


line stmt bran cond sub pod time code
1             # BioPerl module for Bio::Restriction::IO::withrefm
2             #
3             # Please direct questions and support issues to
4             #
5             # Cared for by Rob Edwards
6             #
7             # Copyright Rob Edwards
8             #
9             # You may distribute this module under the same terms as perl itself
10             #
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Restriction::IO::withrefm - withrefm enzyme set
17              
18             =head1 SYNOPSIS
19              
20             Do not use this module directly. Use it via the Bio::Restriction::IO class.
21              
22             =head1 DESCRIPTION
23              
24             This is the most complete format of the REBASE files, and basically
25             includes all the data on each of the restriction enzymes.
26              
27              
28             =head1 FEEDBACK
29              
30             =head2 Mailing Lists
31              
32             User feedback is an integral part of the evolution of this and other
33             Bioperl modules. Send your comments and suggestions preferably to the
34             Bioperl mailing lists Your participation is much appreciated.
35              
36             bioperl-l@bioperl.org - General discussion
37             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
38              
39             =head2 Support
40              
41             Please direct usage questions or support issues to the mailing list:
42              
43             I
44              
45             rather than to the module maintainer directly. Many experienced and
46             reponsive experts will be able look at the problem and quickly
47             address it. Please include a thorough description of the problem
48             with code and data examples if at all possible.
49              
50             =head2 Reporting Bugs
51              
52             Report bugs to the Bioperl bug tracking system to help us keep track
53             the bugs and their resolution. Bug reports can be submitted via the
54             web:
55              
56             https://github.com/bioperl/bioperl-live/issues
57              
58             =head1 AUTHOR
59              
60             Rob Edwards, redwards@utmem.edu
61              
62             =head1 CONTRIBUTORS
63              
64             Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
65             Mark A. Jensen, maj-at-fortinbras-dot-us
66              
67             =head1 APPENDIX
68              
69             The rest of the documentation details each of the object
70             methods. Internal methods are usually preceded with a _
71              
72             =cut
73              
74             # Let the code begin...
75              
76             package Bio::Restriction::IO::withrefm;
77              
78 2     2   7 use vars qw(%WITH_REFM_FIELD);
  2         3  
  2         86  
79 2     2   7 use strict;
  2         1  
  2         35  
80              
81             #use Bio::Restriction::IO;
82 2     2   6 use Bio::Restriction::Enzyme;
  2         1  
  2         33  
83 2     2   7 use Bio::Restriction::EnzymeCollection;
  2         1  
  2         26  
84              
85 2     2   6 use Data::Dumper;
  2         2  
  2         92  
86              
87 2     2   8 use base qw(Bio::Restriction::IO::base);
  2         1  
  2         669  
88              
89             =head2 read
90              
91             Title : read
92             Usage : $renzs = $stream->read
93             Function: reads all the restrction enzymes from the stream
94             Returns : a Bio::Restriction::Restriction object
95             Args : none
96              
97             =cut
98              
99             sub read {
100 3     3 1 5 my $self = shift;
101              
102 3         23 my $renzs = Bio::Restriction::EnzymeCollection->new(-empty => 1);
103              
104 3         11 local $/ = '<1>';
105 3         103 while (defined(my $entry=$self->_readline()) ) {
106              
107             # not an entry.
108 8049 100       16449 next unless $entry =~ /<2>/;
109              
110             #$self->debug("|$entry|\n");
111              
112             #
113             # Minimal information
114             #
115 8046         18438 my ($name) = $entry =~ /^(\S+)/;
116 8046         19503 my ($site) = $entry =~ /\<3\>([^\n]+)/;
117              
118 8046 100 33     35267 if ( ! defined $site || $site eq '' or $site eq '?') {
      66        
119 593 50       1349 $self->warn("$name: no site. Skipping") if $self->verbose > 1;
120 593         1341 next;
121             }
122              
123             # there are a couple of sequences that have multiple
124             # recognition sites eg M.PhiBssHII: ACGCGT,CCGCGG,RGCGCY,RCCGGY,GCGCGC
125             # TaqII : GACCGA(11/9),CACCCA(11/9)
126              
127 7453         6163 my @sequences;
128 7453 100       10710 if ($site =~ /\,/) {
129 14         39 @sequences = split (/\,/, $site);
130 14         42 $site=shift @sequences;
131             }
132              
133             # this regexp now parses all possible components
134             # $1 : (s/t) or undef
135             # $2 : [site]
136             # $3 : (m/n) or undef /maj
137              
138 2     2   10 no warnings; # avoid faulty 'uninitialized value' warnings
  2         3  
  2         849  
139             # occurring against the variables set by
140             # regexp matching (unless anyone has other ideas...)
141              
142 7453         20045 my ($precut, $recog, $postcut) = ( $site =~ m/^(?:\((-?\w+\/-?\w+)\))?([\w^]+)(?:\((-?\w+\/-?\w+)\))?/ );
143              
144              
145             #
146             # prototype / isoschizomers
147             #
148              
149 7453         21076 my ($isoschizomers) = $entry =~ /<2>([^\n]+)/;
150 7453         62393 my @isos = split(/\,/,$isoschizomers);
151 7453 100       9641 my $is_prototype = (@isos ? 1 : 0);
152              
153             #
154             # microbe
155             #
156 7453         18791 my ($microbe) = $entry =~ /<5>([^\n]+)/;
157              
158             #
159             # source
160             #
161 7453         17028 my ($source) = $entry =~ /<6>([^\n]+)/;
162              
163             #
164             # vendors
165             #
166 7453         16397 my ($vendors) = $entry =~ /<7>([^\n]+)/;
167 7453         9431 my @vendors = split(/ */, $vendors);
168              
169              
170             #
171             # references
172             #
173 7453         29537 my ($refs) = $entry =~ /<8>(.+)<1>/s;
174 7453         7818 my @refs = map {split /\n+/} $refs;
  7453         21590  
175              
176 2     2   10 use warnings;
  2         3  
  2         578  
177            
178             # when enz is constructed, site() will contain original characters,
179             # but recog() will contain a regexp if required.../maj
180 7453         84647 my $re = Bio::Restriction::Enzyme->new(
181             -name => $name,
182             -site => $recog,
183             -recog => $recog,
184             -precut => $precut,
185             -postcut => $postcut,
186             -is_prototype => $is_prototype,
187             -isoschizomers => [@isos],
188             -source => $source,
189             -vendors => [@vendors],
190             -references => [@refs],
191             -xln_sub => \&_xln_sub
192             );
193              
194             #
195             # methylation: easier to set here during parsing/maj
196             #
197              
198 7453         29678 my ($meth) = $entry =~ /<4>([^\n]+)/;
199 7453         5769 my @meths;
200 7453 100       9897 if ($meth) {
201             # this can be either X(Y) or X(Y),X2(Y2)
202             # where X is the base and y is the type of methylation
203 754 100       2806 if ( $meth =~ /(\S+)\((\d+)\),(\S+)\((\d+)\)/ ) { # two msites per site
    50          
204             #my ($p1, $m1, $p2, $m2) = ($1, $2, $3, $4);
205 47         138 $re->methylation_sites($self->_meth($re,$1, $2),
206             $self->_meth($re,$3,$4));
207             }
208             elsif ($meth =~ /(\S+)\((\d+)\)/ ) { # one msite per site or more sites
209 707         1961 $re->methylation_sites( $self->_meth($re,$1,$2) );
210 707         1396 @meths = split (/\, /, $meth);
211 707         835 $meth=shift @meths;
212             } else {
213 0 0       0 $self->warn("Unknown methylation format [$meth]") if $self->verbose >0;
214             }
215             }
216              
217             # the _make_multicuts function now takes place in the
218             # Enzyme constructor / maj
219              
220             #
221             # create special types of Enzymes
222             # (because of object cloning in _make_multisites, this happens
223             # after everything else is set /maj)
224             # (with the removal of the collection from the arglist, this
225             # call (or its code) could now be placed in the constructor,
226             # which is safer (since this has to happen last),
227             # but it requires the methylation info, which
228             # is more natural to get out here in the parsing./maj
229              
230 7453 100       9264 $self->_make_multisites($re, \@sequences, \@meths, \&_xln_sub) if @sequences;
231              
232 7453         14631 $renzs->enzymes($re);
233              
234              
235             }
236              
237 3         27 return $renzs;
238             }
239              
240             =head2 _xln_sub
241              
242             Title : _xln_sub
243             Function: Translates withrefm coords to Bio::Restriction coords
244             Args : Bio::Restriction::Enzyme object, scalar integer (cut posn)
245             Note : Used internally; pass as a coderef to the B:R::Enzyme
246             constructor
247              
248             =cut
249              
250             sub _xln_sub {
251 998     998   1037 my ($z,$c) = @_;
252 998 100       2398 return ($c < 0 ? $c : length($z->string)+$c);
253             }
254              
255             =head2 write
256              
257             Title : write
258             Usage : $stream->write($renzs)
259             Function: writes restriction enzymes into the stream
260             Returns : 1 for success and 0 for error
261             Args : a Bio::Restriction::Enzyme
262             or a Bio::Restriction::EnzymeCollection object
263              
264             =cut
265              
266             sub write {
267 0     0 1   my ($self,@h) = @_;
268 0           $self->throw_not_implemented;
269             }
270              
271             1;