File Coverage

Bio/Restriction/IO/base.pm
Criterion Covered Total %
statement 68 105 64.7
branch 11 32 34.3
condition 8 24 33.3
subroutine 12 18 66.6
pod 4 4 100.0
total 103 183 56.2


line stmt bran cond sub pod time code
1             #
2             # BioPerl module for Bio::Restriction::IO::base
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Rob Edwards
7             #
8             # Copyright Rob Edwards
9             #
10             # You may distribute this module under the same terms as perl itself
11             #
12              
13             # POD documentation - main docs before the code
14              
15             =head1 NAME
16              
17             Bio::Restriction::IO::base - base enzyme set
18              
19             =head1 SYNOPSIS
20              
21             Do not use this module directly. Use it via the Bio::Restriction::IO class.
22              
23             =head1 DESCRIPTION
24              
25              
26             This class defines some base methods for restriction enzyme input and
27             at the same time gives a base list of common enzymes.
28              
29             =head1 FEEDBACK
30              
31             =head2 Mailing Lists
32              
33             User feedback is an integral part of the evolution of this and other
34             Bioperl modules. Send your comments and suggestions preferably to the
35             Bioperl mailing lists Your participation is much appreciated.
36              
37             bioperl-l@bioperl.org - General discussion
38             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
39              
40             =head2 Support
41              
42             Please direct usage questions or support issues to the mailing list:
43              
44             I
45              
46             rather than to the module maintainer directly. Many experienced and
47             reponsive experts will be able look at the problem and quickly
48             address it. Please include a thorough description of the problem
49             with code and data examples if at all possible.
50              
51             =head2 Reporting Bugs
52              
53             Report bugs to the Bioperl bug tracking system to help us keep track
54             the bugs and their resolution. Bug reports can be submitted via the
55             web:
56              
57             https://github.com/bioperl/bioperl-live/issues
58              
59             =head1 AUTHOR
60              
61             Rob Edwards, redwards@utmem.edu
62              
63             =head1 CONTRIBUTORS
64              
65             Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
66             Mark A. Jensen, maj-at-fortinbras-dot-us
67              
68             =head1 APPENDIX
69              
70             The rest of the documentation details each of the object
71             methods. Internal methods are usually preceded with a _
72              
73             =cut
74              
75             # Let the code begin...
76              
77             package Bio::Restriction::IO::base;
78              
79 4     4   24 use strict;
  4         6  
  4         112  
80              
81 4     4   343 use Bio::Restriction::Enzyme;
  4         10  
  4         114  
82 4     4   483 use Bio::Restriction::EnzymeCollection;
  4         8  
  4         86  
83 4     4   909 use Bio::Restriction::Enzyme::MultiCut;
  4         8  
  4         205  
84 4     4   1251 use Bio::Restriction::Enzyme::MultiSite;
  4         8  
  4         125  
85              
86 4     4   27 use base qw(Bio::Restriction::IO);
  4         9  
  4         5381  
87              
88             my $offset; # class variable
89              
90             sub new {
91 11     11 1 35 my($class, @args) = @_;
92 11 50       29 $class = ref $class ? ref $class : $class;
93 11         46 my $self = bless {}, $class;
94 11         42 $self->_initialize(@args);
95 11         65 return $self;
96             }
97              
98              
99             {
100            
101             my %FILE_FORMAT = (
102             #'itype2' => 'itype2', # itype2 format doesn't work with 'current'
103             #'8' => 'itype2',
104             'withrefm' => 'withrefm',
105             '31' => 'withrefm',
106             #'bairoch' => 'bairoch', # bairoch format doesn't work with 'current'
107             #'19' => 'bairoch',
108             #'macvector' => 'bairoch',
109             #'vectorNTI' => 'bairoch',
110             'neo' => 'neos',
111             'prototype' => 'proto'
112             );
113              
114             sub _initialize {
115 11     11   26 my($self,@args) = @_;
116 11         79 my ($current, $url, $file, $fh, $format, $verbose) =
117             $self->_rearrange([qw(CURRENT URL FILE FH FORMAT VERBOSE)],@args);
118 11 50       33 $verbose || 0;
119 11         50 $self->verbose($verbose);
120 11 50 33     39 if ($current && $format) {
121 0 0 0     0 $self->throw("Can't use -current with file, fh, or url set") if ($url || $file || $fh);
      0        
122 0 0       0 $self->throw("Format $format not retrievable using 'current'") if (!exists $FILE_FORMAT{$format});
123 0         0 my $io = $self->new(-url => 'ftp://ftp.neb.com/pub/rebase/VERSION');
124 0         0 chomp (my $version = $io->_readline);
125 0         0 push @args, (-url => "ftp://ftp.neb.com/pub/rebase/$FILE_FORMAT{$format}.$version", -retries => 1);
126             }
127              
128 11         37 $self->_companies;
129 11 50       57 return unless $self->SUPER::_initialize(@args);
130             }
131              
132             }
133              
134             =head2 read
135              
136             Title : read
137             Usage : $renzs = $stream->read
138             Function: reads all the restrction enzymes from the stream
139             Returns : a Bio::Restriction::Restriction object
140             Args : none
141              
142             =cut
143              
144             sub read {
145 5     5 1 7 my $self = shift;
146              
147 5         31 my $renzs = Bio::Restriction::EnzymeCollection->new(-empty => 1);
148 5   66     44 seek DATA,($offset||=tell DATA), 0;
149 5         70 while () {
150 2660         3641 chomp;
151 2660 50       6606 next if /^\s*$/;
152 2660         9651 my ($name, $site, $cut) = split /\s+/;
153 2660         7732 my $re = Bio::Restriction::Enzyme->new(-name => $name,
154             -site => $site,
155             -cut => $cut);
156 2660         7363 $renzs->enzymes($re);
157             }
158 5         58 return $renzs;
159             }
160              
161             =head2 _xln_sub
162              
163             Title : _xln_sub
164             Function: Translates withrefm coords to Bio::Restriction coords
165             Args : Bio::Restriction::Enzyme object, scalar integer (cut posn)
166             Note : Used internally; pass as a coderef to the B:R::Enzyme
167             constructor
168             Note : It is convenient for each format module to have its own
169             version of this; not currently demanded by the interface.
170             =cut
171              
172             sub _xln_sub { # for base.pm, a no-op
173 0     0   0 my ($z,$c) = @_;
174 0         0 return $c;
175             }
176              
177              
178             =head2 write
179              
180             Title : write
181             Usage : $stream->write($renzs)
182             Function: writes restriction enzymes into the stream
183             Returns : 1 for success and 0 for error
184             Args : a Bio::Restriction::Enzyme
185             or a Bio::Restriction::EnzymeCollection object
186              
187             =cut
188              
189             sub write {
190 0     0 1 0 my $self = shift;
191 0         0 foreach (@_) {
192 0         0 map { printf "%s\t%s\t%s\n", $_->name, $_->string, $_->cut
193 0 0       0 } sort {$a->name cmp $b->name} $_->each_enzyme
  0         0  
194             if $_->isa('Bio::Restriction::EnzymeCollection');
195 0 0       0 printf "%s\t%s\t%s\n", $_->name, $_->string, $_->cut
196             if $_->isa('Bio::Restriction::Enzyme');
197             }
198             }
199              
200             =head2 verify_prototype
201              
202             Title : verify_prototype
203             Purpose : checks enzyme against current prototype list (retrieved remotely)
204             Returns : returns TRUE if enzyme is prototype
205             Argument : Bio::Restriction::EnzymeI
206             Comments : This is an auxiliary method to retrieve and check an enzyme
207             as a prototype. It retrieves the current list, stores it
208             as a singleton instance, then uses it to check the prototype
209             and modify is_prototype() to true or false. Use as follows:
210              
211             my $col = $io->read;
212             for my $enz ($col->each_enzyme) {
213             print $enz->name.":".$enz->site."\n";
214             print "\t".$io->verify_prototype($enz)."\n";
215             }
216              
217             =cut
218              
219             my $protodb;
220              
221             sub verify_prototype {
222 0     0 1 0 my ($self, $enz) = @_;
223 0 0 0     0 $self->throw("Must pass a Bio::Restriction::EnzymeI") unless
      0        
224             $enz && ref $enz && $enz->isa("Bio::Restriction::EnzymeI");
225 0 0       0 if (!defined $protodb) {
226 0         0 my $io = Bio::Restriction::IO->new(-format => 'prototype',
227             -current => 1);
228 0         0 $protodb = $io->read;
229             }
230 0 0       0 if ($protodb->get_enzyme($enz->name)) {
231 0         0 $enz->is_prototype(1);
232             } else {
233 0         0 $enz->is_prototype(0);
234             }
235 0         0 $enz->is_prototype;
236             }
237              
238             =head2 Common REBASE parsing methods
239              
240             The rest of the methods in this file are to be used by other REBASE
241             parsers. They are not to be used outside subclasses of this base
242             class. (They are 'protected' in the sense the word is used in Java.)
243              
244             =cut
245              
246             =head2 _cuts_from_site
247              
248             Title : _cuts_from_site
249             Usage : ($site, $cut, $comp_cut) = _cuts_from_site('ACGCGT(4/5)');
250             Function: Separates cut positions from a single site string.
251             Does nothing to site if it does not have the cut string
252             Returns : array of site_string, forward_cut_position, reverse_cut_position
253             Args : recognition site string
254             Note : Not used in withrefm refactor/maj
255              
256             =cut
257              
258             sub _cuts_from_site {
259 0     0   0 my ($self, $site) = @_;
260 0         0 my ($cut, $comp_cut) = $site =~ /\((-?\d+)\/(-?\d+)\)/;
261 0         0 $site =~ s/\(.*\)$//;
262 0         0 return ($site, $cut, $comp_cut);
263             }
264              
265              
266             =head2 _meth
267              
268             Title : _meth
269             Usage : ($pos, $meth) = $self->_meth('2(5)');
270             Function: Separates methylation position and coce from a string.
271             Adjusts the position depending on enzyme site length
272             and symmetry
273             Returns : array of position and methylation code
274             Args : 1. reference to Enzyme object
275             2. methylation description string
276              
277             =cut
278              
279             sub _meth {
280 828     828   2519 my ($self, $re, $meth) = @_;
281              
282 828         1186 $meth =~ /(\S+)\((\d+)\)/;
283 828         1411 my ($pos, $m) = ($1, $2);
284 828 100       1480 $pos = 0 if $pos eq '?';
285 828 100 100     1797 $pos = $re->seq->length + $pos if $pos and ! $re->palindromic;
286 828         2455 return ($pos, $m);
287              
288 0 0       0 $self->warn("Unknown methylation format [$meth]") if $self->verbose >0;
289             }
290              
291              
292             =head2 _coordinate_shift_to_cut
293              
294             Title : _coordinate_shift_to_cut
295             Usage : $cut = $self->_coordinate_shift_to_cut($oricut, offset);
296             Function: Adjust cut position coordinates to start from the
297             first nucleotides of site
298             Returns : Cut position in correct coordinates
299             Args : 1. Original cut position
300             2. Length of the recognition site
301             Note : Not used in withrefm.pm refactor/maj
302              
303             =cut
304              
305             sub _coordinate_shift_to_cut {
306 0     0   0 my ($self, $cut, $site_length) = @_;
307 0         0 return $cut + $site_length;
308             }
309              
310              
311             =head2 _make_multisites
312              
313             Title : _make_multisites
314             Usage : $self->_make_multisites($first_enzyme, \@sites, \@mets)
315             Function: Bless a Bio::Restriction::Enzyme into
316             Bio::Restriction::Enzyme::MultiSite and clone it as many
317             times as there are alternative sites.
318             Returns : nothing, does in place editing
319             Args : 1. a Bio::Restriction::Enzyme
320             2. reference to an array of recognition site strings
321             3. reference to an array of methylation code strings, optional
322              
323             =cut
324              
325             # removed the enzyme collection from arg list /maj
326              
327             sub _make_multisites {
328 15     15   38 my ($self, $re, $sites, $meths, $xln_sub) = @_;
329              
330 15         50 bless $re, 'Bio::Restriction::Enzyme::MultiSite';
331              
332 15         24 my $count = 0;
333 15         24 while ($count < scalar @{$sites}) {
  41         83  
334             # this should probably be refactored to use the constructor
335             # too, rather than the clone/accessor method /maj
336             # my $re2 = $re->clone;
337             # my $re2;
338              
339 26         34 my $site = @{$sites}[$count];
  26         62  
340 26         125 my ($precut, $recog, $postcut) = ( $site =~ m/^(?:\((\w+\/\w+)\))?([\w^]+)(?:\((\w+\/\w+)\))?/ );
341            
342             # set the site attribute
343             # $re2->site($recog);
344              
345             # set the recog attribute (which will make the regexp transformation
346             # if necessary:
347             # $re2->recog($recog);
348             # $recog = $re2->string;
349            
350             # no warnings; # avoid 'uninitialized value' warning against $postcut
351             # my ($cut, $comp_cut) = ( $postcut =~ /(-?\d+)\/(-?\d+)/ );
352             # use warnings;
353            
354             # note the following hard codes the coordinate transformation
355             # used for rebase/itype2 : this method will break on the
356             # base.pm format.
357             # if ($cut) {
358             # $re2->cut($cut + length $recog);
359             # $re2->complementary_cut($comp_cut + length $recog);
360             # }
361            
362 26         78 my $re2 = Bio::Restriction::Enzyme::MultiSite->new(
363             -name => $re->name,
364             -site => $recog,
365             -recog => $recog,
366             -precut => $precut,
367             -postcut => $postcut,
368             -xln_sub => $xln_sub
369             );
370              
371 26 100 66     97 if ($meths and @$meths) {
372 22         61 $re2->purge_methylation_sites;
373 22         35 $re2->methylation_sites($self->_meth($re2, @{$meths}[$count]));
  22         48  
374             }
375              
376 26         72 $re->others($re2);
377 26         43 $count++;
378             }
379              
380 15         38 foreach my $enz ($re->others) {
381 26         49 $enz->others($re, grep {$_ ne $enz} $re->others);
  66         148  
382             }
383              
384 15         24 1;
385             }
386              
387             =head2 _make_multicuts
388              
389             Title : _make_multicuts
390             Usage : $self->_make_multicuts($first_enzyme, $precuts)
391             Function:
392              
393             Bless a Bio::Restriction::Enzyme into
394             Bio::Restriction::Enzyme::MultiCut and clone it. The precut
395             string is processed to replase the cut sites in the cloned
396             object. Both objects refer to each other through others() method.
397              
398             Returns : nothing, does in place editing
399             Args : 1. a Bio::Restriction::Enzyme
400             2. precut string, e.g. '12/7'
401              
402              
403             The examples we have of multiply cutting enzymes cut only four
404             times. This protected method deals only with a string of two
405             integers separated with a slash, e.g. '12/7'. The numbers represent the positions
406             BEFORE the start of the recognition site, i.e. negative positions.
407              
408             =cut
409              
410             # removed the enzyme collection from arg list /maj
411              
412             sub _make_multicuts {
413 0     0   0 my ($self, $re, $precut) = @_;
414              
415 0         0 bless $re, 'Bio::Restriction::Enzyme::MultiCut';
416            
417 0         0 my ($cut, $comp_cut) = $precut =~ /(-?\d+)\/(-?\d+)/;
418            
419 0         0 my $re2 = $re->clone;
420              
421 0         0 $re2->cut("-$cut");
422 0         0 $re2->complementary_cut("-$comp_cut");
423              
424 0         0 $re->others($re2);
425              
426 0         0 1;
427             }
428              
429             =head2 _companies
430              
431             Title : _companies
432             Purpose : Defines the companies that we know about
433             Returns : A hash
434             Argument : Nothing
435             Comments : An internal method to define the companies that we know about
436             REBASE uses a code, and this converts the code to the real name
437             (e.g. A = Amersham Pharmacia Biotech)
438              
439             =cut
440              
441             sub _companies {
442             # this is just so it is easy to set up the codes that REBASE uses
443 11     11   18 my $self=shift;
444 11         131 my %companies=(
445             'A'=>'Amersham Pharmacia Biotech (1/03)',
446             'C'=>'Minotech Biotechnology (6/01)',
447             'E'=>'Stratagene (1/03)',
448             'F'=>'Fermentas AB (1/03)',
449             'G'=>'Qbiogene (1/03)',
450             'H'=>'American Allied Biochemical, Inc. (10/98)',
451             'I'=>'SibEnzyme Ltd. (1/03)',
452             'J'=>'Nippon Gene Co., Ltd. (6/00)',
453             'K'=>'Takara Shuzo Co. Ltd. (1/03)',
454             'M'=>'Roche Applied Science (1/03)',
455             'N'=>'New England Biolabs (1/03)',
456             'O'=>'Toyobo Biochemicals (11/98)',
457             'P'=>'Megabase Research Products (5/99)',
458             'Q'=>'CHIMERx (1/03)',
459             'R'=>'Promega Corporation (1/03)',
460             'S'=>'Sigma Chemical Corporation (1/03)',
461             'U'=>'Bangalore Genei (1/03)',
462             'V'=>'MRC-Holland (1/03)',
463             'X'=>'EURx Ltd. (1/03)');
464 11         26 $self->{company}=\%companies;
465             }
466              
467             1;
468              
469             __DATA__