File Coverage

Bio/Restriction/Analysis.pm

Criterion	Covered	Total	%
statement	262	356	73.6
branch	102	170	60.0
condition	4	9	44.4
subroutine	23	30	76.6
pod	13	14	92.8
total	404	579	69.7

line	stmt	bran	cond	sub	pod	time	code
1							#
2							# BioPerl module Bio::Restriction::Analysis
3							#
4							# Please direct questions and support issues to
5							#
6							# Cared for by Rob Edwards
7							#
8							# You may distribute this module under the same terms as perl itself
9
10							## POD Documentation:
11
12							=head1 NAME
13
14							Bio::Restriction::Analysis - cutting sequences with restriction
15							enzymes
16
17							=head1 SYNOPSIS
18
19							# analyze a DNA sequence for restriction enzymes
20							use Bio::Restriction::Analysis;
21							use Bio::PrimarySeq;
22							use Data::Dumper;
23
24							# get a DNA sequence from somewhere
25							my $seq = Bio::PrimarySeq->new
26							(-seq =>'AGCTTAATTCATTAGCTCTGACTGCAACGGGCAATATGTCTC',
27							-primary_id => 'synopsis',
28							-molecule => 'dna');
29
30							# now start an analysis.
31							# this is using the default set of enzymes
32							my $ra = Bio::Restriction::Analysis->new(-seq=>$seq);
33
34							# find unique cutters. This returns a
35							# Bio::Restriction::EnzymeCollection object
36							my $enzymes = $ra->unique_cutters;
37							print "Unique cutters: ", join (', ',
38							map {$_->name} $enzymes->unique_cutters), "\n";
39
40							# AluI is one them. Where does it cut?
41							# This is will return an array of the sequence strings
42
43							my $enz = 'AluI';
44							my @frags = $ra->fragments($enz);
45							# how big are the fragments?
46							print "AluI fragment lengths: ", join(' & ', map {length $_} @frags), "\n";
47
48							# You can also bypass fragments and call sizes directly:
49							# to see all the fragment sizes
50							print "All sizes: ", join " ", $ra->sizes($enz), "\n";
51							# to see all the fragment sizes sorted by size like on a gel
52							print "All sizes, sorted ", join (" ", $ra->sizes($enz, 0, 1)), "\n";
53
54							# how many times does each enzyme cut
55							my $cuts = $ra->cuts_by_enzyme('BamHI');
56							print "BamHI cuts $cuts times\n";
57
58							# How many enzymes do not cut at all?
59							print "There are ", scalar $ra->zero_cutters->each_enzyme,
60							" enzymes that do not cut\n";
61
62							# what about enzymes that cut twice?
63							my $two_cutters = $ra->cutters(2);
64							print join (" ", map {$_->name} $two_cutters->each_enzyme),
65							" cut the sequence twice\n";
66
67							# what are all the enzymes that cut, and how often do they cut
68							printf "\n%-10s%s\n", 'Enzyme', 'Number of Cuts';
69							my $all_cutters = $ra->cutters;
70							map {
71							printf "%-10s%s\n", $_->name, $ra->cuts_by_enzyme($_->name)
72							} $all_cutters->each_enzyme;
73
74							# Finally, we can interact the restriction enzyme object by
75							# retrieving it from the collection object see the docs for
76							# Bio::Restriction::Enzyme.pm
77							my $enzobj = $enzymes->get_enzyme($enz);
78
79
80							=head1 DESCRIPTION
81
82							Bio::Restriction::Analysis describes the results of cutting a DNA
83							sequence with restriction enzymes.
84
85							To use this module you can pass a sequence object and optionally a
86							Bio::Restriction::EnzymeCollection that contains the enzyme(s) to cut the
87							sequences with. There is a default set of enzymes that will be loaded
88							if you do not pass in a Bio::Restriction::EnzymeCollection.
89
90							To cut a sequence, set up a Restriction::Analysis object with a sequence
91							like this:
92
93							use Bio::Restriction::Analysis;
94							my $ra = Bio::Restriction::Analysis->new(-seq=>$seqobj);
95
96							or
97
98							my $ra = Bio::Restriction::Analysis->new
99							(-seq=>$seqobj, -enzymes=>$enzs);
100
101							Then, to get the fragments for a particular enzyme use this:
102
103							@fragments = $ra->fragments('EcoRI');
104
105							Note that the naming of restriction enzymes is that the last numbers
106							are usually Roman numbers (I, II, III, etc). You may want to use
107							something like this:
108
109							# get a reference to an array of unique (single) cutters
110							$singles = $re->unique_cutters;
111							foreach my $enz ($singles->each_enzyme) {
112							@fragments = $re->fragments($enz);
113							... do something here ...
114							}
115
116							Note that if your sequence is circular, the first and last fragment
117							will be joined so that they are the appropriate length and sequence
118							for further analysis. This fragment will also be checked for cuts
119							by the enzyme(s). However, this will change the start of the
120							sequence!
121
122							There are two separate algorithms used depending on whether your
123							enzyme has ambiguity. The non-ambiguous algorithm is a lot faster,
124							and if you are using very large sequences you should try and use
125							this algorithm. If you have a large sequence (e.g. genome) and
126							want to use ambgiuous enzymes you may want to make separate
127							Bio::Restriction::Enzyme objects for each of the possible
128							alternatives and make sure that you do not set is_ambiguous!
129
130							This version should correctly deal with overlapping cut sites
131							in both ambiguous and non-ambiguous enzymes.
132
133							I have tried to write this module with speed and memory in mind
134							so that it can be effectively used for large (e.g. genome sized)
135							sequence. This module only stores the cut positions internally,
136							and calculates everything else on an as-needed basis. Therefore
137							when you call fragment_maps (for example), there may be another
138							delay while these are generated.
139
140							=head1 FEEDBACK
141
142							=head2 Mailing Lists
143
144							User feedback is an integral part of the evolution of this and other
145							Bioperl modules. Send your comments and suggestions preferably to one
146							of the Bioperl mailing lists. Your participation is much appreciated.
147
148							bioperl-l@bioperl.org - General discussion
149							http://bioperl.org/wiki/Mailing_lists - About the mailing lists
150
151							=head2 Support
152
153							Please direct usage questions or support issues to the mailing list:
154
155							I
156
157							rather than to the module maintainer directly. Many experienced and
158							reponsive experts will be able look at the problem and quickly
159							address it. Please include a thorough description of the problem
160							with code and data examples if at all possible.
161
162							=head2 Reporting Bugs
163
164							Report bugs to the Bioperl bug tracking system to help us keep track
165							the bugs and their resolution. Bug reports can be submitted via the
166							web:
167
168							https://github.com/bioperl/bioperl-live/issues
169
170							=head1 AUTHOR
171
172							Rob Edwards, redwards@utmem.edu,
173							Steve Chervitz, sac@bioperl.org
174
175							=head1 CONTRIBUTORS
176
177							Heikki Lehvaslaiho, heikki-at-bioperl-dot-org
178							Mark A. Jensen, maj-at-fortinbras-dot-us
179
180							=head1 COPYRIGHT
181
182							Copyright (c) 2003 Rob Edwards. Some of this work is Copyright (c)
183							1997-2002 Steve A. Chervitz. All Rights Reserved.
184
185							This module is free software; you can redistribute it and/or modify it
186							under the same terms as Perl itself.
187
188							=head1 SEE ALSO
189
190							L,
191							L
192
193							=head1 APPENDIX
194
195							Methods beginning with a leading underscore are considered private and
196							are intended for internal use by this module. They are not considered
197							part of the public interface and are described here for documentation
198							purposes only.
199
200							=cut
201
202							package Bio::Restriction::Analysis;
203	3			3		3680	use Bio::Restriction::EnzymeCollection;
	3					6
	3					65
204	3			3		13	use strict;
	3					3
	3					42
205	3			3		8	use Data::Dumper;
	3					3
	3					105
206
207	3			3		10	use base qw(Bio::Root::Root);
	3					4
	3					167
208	3			3		11	use Scalar::Util qw(blessed);
	3					4
	3					6085
209
210							=head1 new
211
212							Title : new
213							Function : Initializes the restriction enzyme object
214							Returns : The Restriction::Analysis object
215							Arguments :
216
217							$re_anal->new(-seq=$seqobj,
218							-enzymes=>Restriction::EnzymeCollection object)
219							-seq requires a Bio::PrimarySeq object
220							-enzymes is optional.
221							If omitted it will use the default set of enzymes
222
223							This is the place to start. Pass in a sequence, and you will be able
224							to get the fragments back out. Several other things are available
225							like the number of zero cutters or single cutters.
226
227							=cut
228
229							sub new {
230	4			4	1	790	my($class, @args) = @_;
231	4					23	my $self = $class->SUPER::new(@args);
232	4					26	my ($seq,$enzymes) =
233							$self->_rearrange([qw(
234							SEQ
235							ENZYMES
236							)], @args);
237
238	4	50				22	$seq && $self->seq($seq);
239
240							$enzymes ? $self->enzymes($enzymes)
241	4	100				24	: ($self->{'_enzymes'} = Bio::Restriction::EnzymeCollection->new );
242
243							# keep track of status
244	4					6	$self->{'_cut'} = 0;
245
246							# left these here because we want to reforce a _cut if someone
247							# just calls new
248	4					10	$self->{maximum_cuts} = 0;
249
250	4					9	$self->{'_number_of_cuts_by_enzyme'} = {};
251	4					12	$self->{'_number_of_cuts_by_cuts'} = {};
252	4					13	$self->{'_fragments'} = {};
253	4					6	$self->{'_cut_positions'} = {}; # cut position is the real position
254	4					9	$self->{'_frag_map_list'} = {};
255
256	4					26	return $self;
257
258							}
259
260							=head1 Methods to set parameters
261
262							=cut
263
264							=head2 seq
265
266							Title : seq
267							Usage : $ranalysis->seq($newval);
268							Function : get/set method for the sequence to be cut
269							Example : $re->seq($seq);
270							Returns : value of seq
271							Args : A Bio::PrimarySeqI dna object (optional)
272
273							=cut
274
275							sub seq {
276	16			16	1	31	my $self = shift;
277	16	100				53	if (@_) {
278	6					7	my $seq = shift;
279	6	50				42	$self->throw('Need a sequence object ['. ref $seq. ']')
280							unless $seq->isa('Bio::PrimarySeqI');
281	6	50				20	$self->throw('Need a DNA sequence object ['. $seq->alphabet. ']')
282							unless $seq->alphabet eq 'dna';
283
284	6					21	$self->{'_seq'} = $seq;
285	6					11	$self->{'_cut'} = 0;
286							}
287	16					40	return $self->{'_seq'};
288							}
289
290							=head2 enzymes
291
292							Title : enzymes
293							Usage : $re->enzymes($newval)
294							Function : gets/Set the restriction enzyme enzymes
295							Example : $re->enzymes('EcoRI')
296							Returns : reference to the collection
297							Args : an array of Bio::Restriction::EnzymeCollection and/or
298							Bio::Restriction::Enzyme objects
299
300
301							The default object for this method is
302							Bio::Restriction::EnzymeCollection. However, you can also pass it a
303							list of Bio::Restriction::Enzyme objects - even mixed with Collection
304							objects. They will all be stored into one collection.
305
306							=cut
307
308							sub enzymes {
309	2			2	1	4	my $self = shift;
310	2	50				6	if (@_) {
311							$self->{'_enzymes'} = Bio::Restriction::EnzymeCollection->new (-empty => 1)
312	2	50				13	unless $self->{'_enzymes'};
313	2					6	$self->{'_enzymes'}->enzymes(@_);
314	2					5	$self->{'_cut'} = 0;
315							}
316	2					7	return $self->{'_enzymes'};
317							}
318
319
320							=head1 Perform the analysis
321
322							=cut
323
324							=head2 cut
325
326							Title : cut
327							Usage : $re->cut()
328							Function : Cut the sequence with the enzymes
329							Example : $re->cut(); $re->cut('single'); or $re->cut('multiple', $enzymecollection);
330							Returns : $self
331							Args : 'single' (optional), 'multiple' with enzyme collection.
332
333							An explicit cut method is needed to pass arguments to it.
334
335							There are two varieties of cut. Single is the default, and need
336							not be explicitly called. This cuts the sequence with each
337							enzyme separately.
338
339							Multiple cuts a sequence with more than one enzyme. You must pass
340							it a Bio::Restriction::EnzymeCollection object of the set
341							of enzymes that you want to use in the double digest. The results
342							will be stored as an enzyme named "multiple_digest", so you can
343							use all the retrieval methods to get the data.
344
345							If you want to use the default setting there is no need to call cut
346							directly. Every method in the class that needs output checks the
347							object's internal status and recalculates the cuts if needed.
348
349							Note: cut doesn't now re-initialize everything before figuring
350							out cuts. This is so that you can do multiple digests, or add more
351							data or whatever. You'll have to use new to reset everything.
352
353							See also the comments in above about ambiguous and non-ambiguous
354							sequences.
355
356							=cut
357
358							sub cut {
359	10			10	1	18	my ($self, $opt, $ec) = @_;
360
361							# for the moment I have left this as a separate routine so
362							# the user calls cut rather than _cuts. This also initializes
363							# some stuff we need to use.
364
365	10	50				28	$self->throw("A sequence must be supplied")
366							unless $self->seq;
367
368	10	100	66			49	if ($opt && uc($opt) eq "MULTIPLE") {
369	3	50				8	$self->throw("You must supply a separate enzyme collection for multiple digests") unless $ec;
370	3					11	$self->_multiple_cuts($ec); # multiple digests
371							} else {
372							# reset some of the things that we save
373	7					13	$self->{maximum_cuts} = 0;
374	7					12	$self->{'_number_of_cuts_by_enzyme'} = {};
375	7					1141	$self->{'_number_of_cuts_by_cuts'} = {};
376	7					288	$self->{'_fragments'} = {};
377	7					12	$self->{'_cut_positions'} = {}; # cut position is the real position
378	7					861	$self->{'_frag_map_list'} = {};
379	7					34	$self->_cuts;
380							}
381
382	10					1358	$self->{'_cut'} = 1;
383	10					37	return $self;
384							}
385
386							=head2 mulitple_digest
387
388							Title : multiple_digest
389							Function : perform a multiple digest on a sequence
390							Returns : $self so you can go and get any of the other methods
391							Arguments : An enzyme collection
392
393							Multiple digests can use 1 or more enzymes, and the data is stored
394							in as if it were an enzyme called multiple_digest. You can then
395							retrieve information about multiple digests from any of the other
396							methods.
397
398							You can use this method in place of $re->cut('multiple', $enz_coll);
399
400							=cut
401
402							sub multiple_digest {
403	0			0	0	0	my ($self, $ec)=@_;
404	0					0	return $self->cut('multiple', $ec);
405							}
406
407							=head1 Query the results of the analysis
408
409							=cut
410
411							=head2 positions
412
413							Title : positions
414							Function : Retrieve the positions that an enzyme cuts at
415							Returns : An array of the positions that an enzyme cuts at
416							: or an empty array if the enzyme doesn't cut
417							Arguments: An enzyme name to retrieve the positions for
418							Comments : The cut occurs after the base specified.
419
420							=cut
421
422							sub positions {
423	8			8	1	329	my ($self, $enz) = @_;
424	8	50				27	$self->cut unless $self->{'_cut'};
425	8	50				20	$self->throw('no enzyme selected to get positions for')
426							unless $enz;
427
428							return defined $self->{'_cut_positions'}->{$enz} ?
429	8	50				22	@{$self->{'_cut_positions'}->{$enz}} :
	8					44
430							();
431							}
432
433							=head2 fragments
434
435							Title : fragments
436							Function : Retrieve the fragments that we cut
437							Returns : An array of the fragments retrieved.
438							Arguments: An enzyme name to retrieve the fragments for
439
440							For example this code will retrieve the fragments for all enzymes that
441							cut your sequence
442
443							my $all_cutters = $analysis->cutters;
444							foreach my $enz ($$all_cutters->each_enzyme}) {
445							@fragments=$analysis->fragments($enz);
446							}
447
448							=cut
449
450							sub fragments {
451	18			18	1	4327	my ($self, $enz) = @_;
452	18	100				57	$self->cut unless $self->{'_cut'};
453	18	50				44	$self->throw('no enzyme selected to get fragments for')
454							unless $enz;
455	18					23	my @fragments;
456	18					42	for ($self->fragment_maps($enz)) {push @fragments, $_->{seq}}
	46					56
457	18					91	return @fragments;
458							}
459
460							=head2 fragment_maps
461
462							Title : fragment_maps
463							Function : Retrieves fragment sequences with start and end
464							points. Useful for feature construction.
465
466							Returns : An array containing a hash reference for each fragment,
467							containing the start point, end point and DNA
468							sequence. The hash keys are 'start', 'end' and
469							'seq'. Returns an empty array if not defined.
470
471							Arguments : An enzyme name, enzyme object,
472							or enzyme collection to retrieve the fragments for.
473
474							If passes an enzyme collection it will return the result of a multiple
475							digest. This : will also cause the special enzyme 'multiple_digest' to
476							be created so you can get : other information about this multiple
477							digest. (TMTOWTDI).
478
479							There is a minor problem with this and $self-Efragments that I
480							haven't got a good answer for (at the moment). If the sequence is not
481							cut, do we return undef, or the whole sequence?
482
483							For linear fragments it would be good to return the whole
484							sequence. For circular fragments I am not sure.
485
486							At the moment it returns the whole sequence with start of 1 and end of
487							length of the sequence. For example:
488
489							use Bio::Restriction::Analysis;
490							use Bio::Restriction::EnzymeCollection;
491							use Bio::PrimarySeq;
492
493							my $seq = Bio::PrimarySeq->new
494							(-seq =>'AGCTTAATTCATTAGCTCTGACTGCAACGGGCAATATGTCTCTGTGTGGATCCAAAAAAGAGTGAGCTTCTGAT',
495							-primary_id => 'synopsis',
496							-molecule => 'dna');
497
498							my $ra = Bio::Restriction::Analysis->new(-seq=>$seq);
499
500							my @gel;
501							my @bam_maps = $ra->fragment_maps('BamHI');
502							foreach my $i (@bam_maps) {
503							my $start = $i->{start};
504							my $end = $i->{end};
505							my $sequence = $i->{seq};
506							push @gel, "$start--$sequence--$end";
507							@gel = sort {length $b <=> length $a} @gel;
508							}
509							print join("\n", @gel) . "\n";
510
511							=cut
512
513							sub fragment_maps {
514	21			21	1	24	my ($self, $enz) = @_;
515	21	100				45	$self->cut unless $self->{'_cut'};
516	21	50				41	$self->throw('no enzyme selected to get fragment maps for')
517							unless $enz;
518
519							# we are going to generate this on an as-needed basis rather than
520							# for every enzyme this should cut down on the amount of
521							# duplicated data we are trying to save in memory and make this
522							# faster and easier for large sequences, e.g. genome analysis
523
524	21					21	my @cut_positions;
525	21	100	66			144	if (ref $enz eq '' && exists $self->{'_cut_positions'}->{$enz}) {
		100
		100
526	13					15	@cut_positions=@{$self->{'_cut_positions'}->{$enz}};
	13					33
527							} elsif ($enz->isa("Bio::Restriction::EnzymeI")) {
528	5					5	@cut_positions=@{$self->{'_cut_positions'}->{$enz->name}};
	5					15
529							} elsif ($enz->isa("Bio::Restriction::EnzymeCollection")) {
530	2					7	$self->cut('multiple', $enz);
531	2					2	@cut_positions=@{$self->{'_cut_positions'}->{'multiple_digest'}};
	2					10
532							}
533
534	21	100				45	unless (defined($cut_positions[0])) {
535							# it doesn't cut
536							# return the whole sequence
537							# this should probably have the is_circular command
538							my %map=(
539							'start' => 1,
540							'end' => $self->{'_seq'}->length,
541	4					14	'seq' => $self->{'_seq'}->seq
542							);
543	4					6	push (@{$self->{'_frag_map_list'}->{$enz}}, \%map);
	4					12
544							return defined $self->{'_frag_map_list'}->{$enz} ?
545	4	50				11	@{$self->{'_frag_map_list'}->{$enz}} : ();
	4					11
546							}
547
548	17					44	@cut_positions=sort {$a <=> $b} @cut_positions;
	109					83
549	17					35	push my @cuts, $cut_positions[0];
550	17					21	foreach my $i (@cut_positions) {
551	72	100				123	push @cuts, $i if $i != $cuts[$#cuts];
552							}
553
554	17					20	my $start=1; my $stop; my %seq; my %stop;
	17					18
	0					0
555	17					22	foreach $stop (@cuts) {
556	67	100				93	next if !$stop; # cuts at beginning of sequence
557	66					144	$seq{$start}=$self->{'_seq'}->subseq($start, $stop);
558	66					86	$stop{$start}=$stop;
559	66					68	$start=$stop+1;
560							}
561	17					40	$stop=$self->{'_seq'}->length;
562	17	50				33	if ($start > $stop) {
563							# borderline case. The enzyme cleaved at the end of the sequence
564							# what do I do now?
565							}
566							else {
567	17					34	$seq{$start}=$self->{'_seq'}->subseq($start, $stop);
568	17					33	$stop{$start}=$stop;
569							}
570
571	17	100				43	if ($self->{'_seq'}->is_circular) {
572							# join the first and last fragments
573	6					13	$seq{$start}.=$seq{'1'};
574	6					10	delete $seq{'1'};
575	6					9	$stop{$start}=$stop{'1'};
576	6					6	delete $stop{'1'};
577							}
578
579	17					61	foreach my $start (sort {$a <=> $b} keys %seq) {
	132					131
580							my %map=(
581							'start' => $start,
582							'end' => $stop{$start},
583	77					166	'seq' => $seq{$start}
584							);
585	77					49	push (@{$self->{'_frag_map_list'}->{$enz}}, \%map);
	77					201
586							}
587
588							return defined $self->{'_frag_map_list'}->{$enz} ?
589	17	50				43	@{$self->{'_frag_map_list'}->{$enz}} : ();
	17					86
590							}
591
592
593							=head2 sizes
594
595							Title : sizes
596							Function : Retrieves an array with the sizes of the fragments
597							Returns : Array that has the sizes of the fragments ordered from
598							largest to smallest like they would appear in a gel.
599							Arguments: An enzyme name to retrieve the sizes for is required and
600							kilobases to the nearest 0.1 kb, else it will be in
601							bp. If the optional third entry is set the results will
602							be sorted.
603
604							This is designed to make it easy to see what fragments you should get
605							on a gel!
606
607							You should be able to do these:
608
609							# to see all the fragment sizes,
610							print join "\n", $re->sizes($enz), "\n";
611							# to see all the fragment sizes sorted
612							print join "\n", $re->sizes($enz, 0, 1), "\n";
613							# to see all the fragment sizes in kb sorted
614							print join "\n", $re->sizes($enz, 1, 1), "\n";
615
616							=cut
617
618							sub sizes {
619	3			3	1	7	my ($self, $enz, $kb, $sort) = @_;
620	3	50				8	$self->throw('no enzyme selected to get fragments for')
621							unless $enz;
622
623	3	100				11	if (blessed($enz)) {
624	1	50				5	$self->throw("Enzyme must be enzyme name or a Bio::Restriction::EnzymeI, not ".ref($enz))
625							if !$enz->isa('Bio::Restriction::EnzymeI');
626	1					2	$enz = $enz->name;
627							}
628	3	50				8	$self->cut unless $self->{'_cut'};
629	3					3	my @frag; my $lastsite=0;
	3					4
630
631	3					3	foreach my $site (@{$self->{'_cut_positions'}->{$enz}}) {
	3					8
632	9	50				12	$kb ? push (@frag, (int($site-($lastsite))/100)/10)
633							: push (@frag, $site-($lastsite));
634	9					10	$lastsite=$site;
635							}
636							$kb ? push (@frag, (int($self->{'_seq'}->length-($lastsite))/100)/10)
637	3	50				13	: push (@frag, $self->{'_seq'}->length-($lastsite));
638	3	100				6	if ($self->{'_seq'}->is_circular) {
639	1					2	my $first=shift @frag;
640	1					2	my $last=pop @frag;
641	1					2	push @frag, ($first+$last);
642							}
643	3	50				4	$sort ? @frag = sort {$b <=> $a} @frag : 1;
	0					0
644
645	3					13	return @frag;
646							}
647
648							=head1 How many times does enzymes X cut?
649
650							=cut
651
652							=head2 cuts_by_enzyme
653
654							Title : cuts_by_enzyme
655							Function : Return the number of cuts for an enzyme
656							Returns : An integer with the number of times each enzyme cuts.
657							Returns 0 if doesn't cut or undef if not defined
658							Arguments : An enzyme name string
659
660
661							=cut
662
663							sub cuts_by_enzyme {
664	4			4	1	6	my ($self, $enz)=@_;
665
666	4	50				10	$self->throw("Need an enzyme name")
667							unless defined $enz;
668	4	50				10	$self->cut unless $self->{'_cut'};
669	4					14	return $self->{'_number_of_cuts_by_enzyme'}->{$enz};
670							}
671
672							=head1 Which enzymes cut the sequence N times?
673
674							=cut
675
676							=head2 cutters
677
678							Title : cutters
679							Function : Find enzymes that cut a given number of times
680							Returns : a Bio::Restriction::EnzymeCollection
681							Arguments : 1. exact time or lower limit,
682							non-negative integer, optional
683							2. upper limit, non-negative integer,
684							larger or equalthan first, optional
685
686
687							If no arguments are given, the method returns all enzymes that do cut
688							the sequence. The argument zero, '0', is same as method
689							zero_cutters(). The argument one, '1', corresponds to unique_cutters.
690							If either of the limits is larger than number of cuts any enzyme cuts the
691							sequence, the that limit is automagically lowered. The method max_cuts()
692							gives the largest number of cuts.
693
694							See Also : L,
695							L, L
696
697							=cut
698
699							sub cutters {
700	5			5	1	7	my ($self, $a, $z) = @_;
701
702	5	100				15	$self->cut unless $self->{'_cut'};
703
704	5					5	my ($start, $end);
705	5	100				11	if (defined $a) {
706	4	50				19	$self->throw("Need a non-zero integer [$a]")
707							unless $a =~ /^[+]?\d+$/;
708	4					5	$start = $a;
709							} else {
710	1					2	$start = 1;
711							}
712	5	50				11	$start = $self->{'maximum_cuts'} if $start > $self->{'maximum_cuts'};
713
714	5	50				11	if (defined $z) {
		100
715	0	0	0			0	$self->throw("Need a non-zero integer no smaller than start [0]")
716							unless $z =~ /^[+]?\d+$/ and $z >= $a;
717	0					0	$end = $z;
718							}
719							elsif (defined $a) {
720	4					5	$end = $start;
721							} else {
722	1					2	$end = $self->{'maximum_cuts'};
723							}
724	5	50				13	$end = $self->{'maximum_cuts'} if $end > $self->{'maximum_cuts'};
725	5					22	my $set = Bio::Restriction::EnzymeCollection->new(-empty => 1);
726
727							#return an empty set if nothing cuts
728	5	50				10	return $set unless $self->{'maximum_cuts'};
729
730	5					11	for (my $i=$start; $i<=$end; $i++) {
731	8					30	$set->enzymes( @{$self->{_number_of_cuts_by_cuts}->{$i}} )
732	13	100				25	if defined $self->{_number_of_cuts_by_cuts}->{$i};
733							}
734
735	5					22	return $set;
736							}
737
738
739							=head2 unique_cutters
740
741							Title : unique_cutters
742							Function : A special case if cutters() where enzymes only cut once
743							Returns : a Bio::Restriction::EnzymeCollection
744							Arguments : -
745
746
747							See also: L, L
748
749							=cut
750
751							sub unique_cutters {
752	2			2	1	8	shift->cutters(1);
753							}
754
755							=head2 zero_cutters
756
757							Title : zero_cutters
758							Function : A special case if cutters() where enzymes don't cut the sequence
759							Returns : a Bio::Restriction::EnzymeCollection
760							Arguments : -
761
762							See also: L, L
763
764							=cut
765
766							sub zero_cutters {
767	1			1	1	3	shift->cutters(0);
768							}
769
770							=head2 max_cuts
771
772							Title : max_cuts
773							Function : Find the most number of cuts
774							Returns : The number of times the enzyme that cuts most cuts.
775							Arguments : None
776
777							This is not a very practical method, but if you are curious...
778
779							=cut
780
781	1			1	1	4	sub max_cuts { return shift->{maximum_cuts} }
782
783							=head1 Internal methods
784
785							=cut
786
787							=head2 _cuts
788
789							Title : _cuts
790							Function : Figures out which enzymes we know about and cuts the sequence.
791							Returns : Nothing.
792							Arguments : None.
793							Comments : An internal method. This will figure out where the sequence
794							should be cut, and provide the appropriate results.
795
796							=cut
797
798							sub _cuts {
799	7			7		11	my $self = shift;
800
801	7					34	my $target_seq=uc $self->{'_seq'}->seq; # I have been burned on this before :)
802
803
804							# first, find out all the enzymes that we have
805	7					30	foreach my $enz ($self->{'_enzymes'}->each_enzyme) {
806	9575					6150	my @all_cuts;
807	9575	100				22804	my @others = $enz->others if $enz->can("others");
808	9575					8692	foreach my $enzyme ($enz, @others) {
809							# cut the sequence
810							# _make_cuts handles all cases (amibiguous, non-ambiguous) X
811							# (palindromic X non-palindromic)
812							#
813	9641					11358	my $cut_positions = $self->_make_cuts($target_seq, $enzyme);
814
815	9641					7988	push @all_cuts, @$cut_positions;
816
817							#### need to refactor circular handling....
818							####
819
820							# deal with is_circular sequences
821	9641	100				18008	if ($self->{'_seq'}->is_circular) {
822	4286					6352	$cut_positions=$self->_circular($target_seq, $enzyme);
823	4286					4811	push @all_cuts, @$cut_positions;
824							}
825							# non-symmetric cutters (most external cutters, e.g.) need
826							# special handling
827	9641	100				14507	unless ($enzyme->is_symmetric) {
828							# do all of above with explicit use of the
829							# enzyme's 'complementary_cut'...
830
831	868					1304	$cut_positions = $self->_make_cuts($target_seq, $enzyme, 'COMP');
832	868					991	push @all_cuts, @$cut_positions;
833							# deal with is_circular sequences
834	868	100				1714	if ($self->{'_seq'}->is_circular) {
835	432					870	$cut_positions=$self->_circular($target_seq, $enzyme, 'COMP');
836	432					842	push @all_cuts, @$cut_positions;
837							}
838							}
839							}
840
841	9575	100				11060	if (defined $all_cuts[0]) {
842							# now just remove any duplicate cut sites
843	1717					2843	@all_cuts = sort {$a <=> $b} @all_cuts;
	1350					1668
844	1717					1225	push @{$self->{'_cut_positions'}->{$enz->name}}, $all_cuts[0];
	1717					2952
845	1717					2054	foreach my $i (@all_cuts) {
846	565					840	push @{$self->{'_cut_positions'}->{$enz->name}}, $i
847	2636	100				1690	if $i != ${$self->{'_cut_positions'}->{$enz->name}}[$#{$self->{'_cut_positions'}->{$enz->name}}];
	2636					3198
	2636					3456
848							}
849							} else {
850							# this just fixes an eror when @all_cuts is not defined!
851	7858					5521	@{$self->{'_cut_positions'}->{$enz->name}}=();
	7858					12019
852							}
853
854							# note I have removed saving any other information except the
855							# cut_positions this should significantly decrease the amount
856							# of memory that is required for large sequences. It should
857							# also speed things up dramatically, because fragments and
858							# fragment maps are only calculated for those enzymes they are
859							# needed for.
860
861							# finally, save minimal information about each enzyme
862	9575					7517	my $number_of_cuts=scalar @{$self->{'_cut_positions'}->{$enz->name}};
	9575					12827
863							# now just store the number of cuts
864	9575					14128	$self->{_number_of_cuts_by_enzyme}->{$enz->name}=$number_of_cuts;
865	9575					7675	push (@{$self->{_number_of_cuts_by_cuts}->{$number_of_cuts}}, $enz);
	9575					13517
866	9575	100				17667	if ($number_of_cuts > $self->{maximum_cuts}) {
867	24					48	$self->{maximum_cuts}=$number_of_cuts;
868							}
869
870							}
871							}
872
873							=head2 _enzyme_sites
874
875							Title : _enzyme_sites
876							Function : An internal method to figure out the two sides of an enzyme
877							Returns : The sequence before the cut and the sequence after the cut
878							Arguments : A Bio::Restriction::Enzyme object,
879							$comp : boolean, calculate based on $enz->complementary_cut()
880							if true, $enz->cut() if false
881							Status : NOW DEPRECATED - maj
882
883							=cut
884
885							sub _enzyme_sites {
886	0			0		0	my ($self, $enz, $comp )=@_;
887							# get the cut site
888							# I have reworked this so that it uses $enz->cut to get the site
889
890	0	0				0	my $site= ( $comp ? $enz->complementary_cut : $enz->cut );
891							# split it into the two fragments for the sequence before and after.
892	0	0				0	$site=0 unless defined $site;
893
894							# the default values just stop an error from an undefined
895							# string. But they don't affect the split.
896	0					0	my ($beforeseq, $afterseq)= ('.', '.');
897
898							# extra-site cutting
899							# the before seq is going to be the entire site
900							# the after seq is empty
901							# BUT, need to communicate how to cut within the sample sequence
902							# relative to the end of the site (do through $enz->cut), and
903							# ALSO, need to check length of sample seq so that if cut falls
904							# outside the input sequence, we have a warning/throw. /maj
905
906							# pre-site cutting
907							# need to handle negative site numbers
908
909	0	0				0	if ($site <= 0) { # <= to handle pre-site cutting
		0
910	0					0	$afterseq=$enz->string;
911							}
912							elsif ($site >= $enz->seq->length) { # >= to handle extrasite cutters/maj
913	0					0	$beforeseq=$enz->string;
914							}
915							else { # $site < $enz->seq->length
916	0					0	$beforeseq=$enz->seq->subseq(1, $site);
917	0					0	$afterseq=$enz->seq->subseq($site+1, $enz->seq->length);
918							}
919							# if the enzyme is ambiguous we need to convert this into a perl string
920	0	0				0	if ($enz->is_ambiguous) {
921	0					0	$beforeseq=$self->_expanded_string($beforeseq);
922	0					0	$afterseq =$self->_expanded_string($afterseq);
923							}
924
925	0					0	return ($beforeseq, $afterseq);
926							}
927
928
929							=head2 _non_pal_enz
930
931							Title : _non_pal_enz
932							Function : Analyses non_palindromic enzymes for cuts in both ways
933							(in fact, delivers only minus strand cut positions in the
934							plus strand coordinates/maj)
935							Returns : A reference to an array of cut positions
936							Arguments: The sequence to check and the enzyme object
937							NOW DEPRECATED/maj
938
939							=cut
940
941							sub _non_pal_enz {
942	0			0		0	my ($self, $target_seq, $enz) =@_;
943							# add support for non-palindromic sequences
944							# the enzyme is not the same forwards and backwards
945
946	0					0	my $site=$enz->complementary_cut;
947							# complementary_cut is in plus strand coordinates
948
949							# we are going to rc the sequence, so complementary_cut becomes length-complementary_cut
950
951							# I think this is wrong; cut sites are a matter of position with respect
952							# to the plus strand: the recognition site is double stranded and
953							# directly identifiable on the plus strand sequence. /maj
954
955							# what really needs doing is to keep track of plus strand and minus strand
956							# nicks separately./maj
957
958	0					0	my ($beforeseq, $afterseq)=('.', '.');
959
960							# now, for extra-site cuts, $site > length...so...?/maj
961	0					0	my $new_left_cut=$enz->seq->length-$site;
962							# there is a problem when this is actually zero
963
964	0	0				0	if ($new_left_cut == 0) {$afterseq=$enz->seq->revcom->seq}
	0	0				0
965	0					0	elsif ($new_left_cut == $enz->seq->length) {$beforeseq=$enz->seq->revcom->seq}
966							else {
967							# this can't be right./maj
968	0					0	$beforeseq=$enz->seq->revcom->subseq(1, ($enz->seq->length-$site));
969	0					0	$afterseq=$enz->seq->revcom->subseq(($enz->seq->length-$site), $enz->seq->length);
970							}
971
972							# do this correctly, in the context of the current code design,
973							# by providing a "complement" argument to _ambig_cuts and _nonambig_cuts,
974							# use these explicitly rather than this wrapper./maj
975
976	0					0	my $results=[];
977	0	0				0	if ($enz->is_ambiguous) {
978	0					0	$results= $self->_ambig_cuts($beforeseq, $afterseq, $target_seq, $enz);
979							} else {
980	0					0	$results= $self->_nonambig_cuts($beforeseq, $afterseq, $target_seq, $enz);
981							}
982
983							# deal with is_circular
984	0					0	my $more_results=[];
985							$more_results=$self->_circular($beforeseq, $afterseq, $enz)
986	0	0				0	if ($self->{'_seq'}->is_circular);
987
988	0					0	return [@$more_results, @$results];
989							}
990
991							=head2 _ambig_cuts
992
993							Title : _ambig_cuts
994							Function : An internal method to localize the cuts in the sequence
995							Returns : A reference to an array of cut positions
996							Arguments : The separated enzyme site, the target sequence, and the enzyme object
997							Comments : This is a slow implementation but works for ambiguous sequences.
998							Whenever possible, _nonambig_cuts should be used as it is a lot faster.
999
1000							=cut
1001
1002							# we have problems here when the cut is extrasite: $beforeseq/$afterseq do
1003							# not define the cut site then! I am renaming this to _ambig_cuts_depr,
1004							# providing a more compact method that correctly handles extrasite cuts
1005							# below /maj
1006
1007							sub _ambig_cuts_depr {
1008	0			0		0	my ($self, $beforeseq, $afterseq, $target_seq, $enz) = @_;
1009
1010							# cut the sequence. This is done with split so we can use
1011							# regexp.
1012	0					0	$target_seq = uc $target_seq;
1013	0					0	my @cuts = split /($beforeseq)($afterseq)/i, $target_seq;
1014							# now the array has extra elements --- the before and after!
1015							# we have:
1016							# element 0 sequence
1017							# element 1 3' end
1018							# element 2 5' end of next sequence
1019							# element 3 sequence
1020							# ....
1021
1022							# we need to loop through the array and add the ends to the
1023							# appropriate parts of the sequence
1024
1025	0					0	my $i=0;
1026	0					0	my @re_frags;
1027	0	0				0	if ($#cuts) { # there is >1 element
1028	0					0	while ($i<$#cuts) {
1029	0					0	my $joinedseq;
1030							# the first sequence is a special case
1031	0	0				0	if ($i == 0) {
1032	0					0	$joinedseq=$cuts[$i].$cuts[$i+1];
1033							} else {
1034	0					0	$joinedseq=$cuts[$i-1].$cuts[$i].$cuts[$i+1];
1035							}
1036							# now deal with overlapping sequences
1037							# we can do this through a regular regexp as we only
1038							# have a short fragment to look through
1039
1040	0					0	while ($joinedseq =~ /$beforeseq$afterseq/) {
1041	0					0	$joinedseq =~ s/^(.*?$beforeseq)($afterseq)/$2/;
1042	0					0	push @re_frags, $1;
1043							}
1044	0					0	push @re_frags, $joinedseq;
1045	0					0	$i+=3;
1046							}
1047
1048							# I don't think we want the last fragment in. It is messing up the _circular
1049							# part of things. So I deleted this part of the code :)
1050
1051							} else {
1052							# if we don't cut, leave the array empty
1053	0					0	return [];
1054							} # the sequence was not cut.
1055
1056							# now @re_frags has the fragments of all the sequences
1057							# but some people want to have this return the lengths
1058							# of the fragments.
1059
1060							# in theory the actual cut sites should be the length
1061							# of the fragments in @re_frags
1062
1063							# note, that now this is the only data that we are saving. We
1064							# will have to go back add regenerate re_frags. The reason is
1065							# that we can use this in _circular easier
1066
1067	0					0	my @cut_positions = map {length($_)} @re_frags;
	0					0
1068
1069							# the cut positions are right now the lengths of the sequence, but
1070							# we need to add them all onto each other
1071
1072	0					0	for (my $i=1; $i<=$#cut_positions; $i++) {
1073	0					0	$cut_positions[$i]+=$cut_positions[$i-1];
1074							}
1075
1076							# in one of those oddities in life, 2 fragments mean an enzyme cut once
1077							# so $#re_frags is the number of cuts
1078	0					0	return \@cut_positions;
1079							}
1080
1081							# new version/maj
1082
1083							sub _ambig_cuts {
1084	0			0		0	my ($self, $before, $after, $target, $enz, $comp) = @_;
1085	0	0				0	my $cut_site = ($comp ? $enz->complementary_cut : $enz->cut);
1086	0					0	local $_ = uc $target;
1087	0					0	my @cuts;
1088	0					0	my $recog = $enz->recog;
1089	0					0	my $site_re = qr/($recog)/;
1090	0					0	push @cuts, pos while (/$site_re/g);
1091	0					0	$_ = $_ - length($enz->recog) + $cut_site for @cuts;
1092	0					0	return [@cuts];
1093							}
1094
1095							=head2 _nonambig_cuts
1096
1097							Title : _nonambig_cuts
1098							Function : Figures out which enzymes we know about and cuts the sequence.
1099							Returns : Nothing.
1100							Arguments : The separated enzyme site, the target sequence, and the enzyme object
1101
1102							An internal method. This will figure out where the sequence should be
1103							cut, and provide the appropriate results. This is a much faster
1104							implementation because it doesn't use a regexp, but it can not deal
1105							with ambiguous sequences
1106
1107							=cut
1108
1109							# now, DO want the enzyme object.../maj
1110
1111							sub _nonambig_cuts {
1112	0			0		0	my ($self, $beforeseq, $afterseq, $target_seq, $enz, $comp) = @_;
1113	0	0				0	my $cut_site = ($comp ? $enz->complementary_cut : $enz->cut);
1114	0	0				0	if ($beforeseq eq ".") {$beforeseq = ''}
	0					0
1115	0	0				0	if ($afterseq eq ".") {$afterseq = ''}
	0					0
1116	0					0	$target_seq = uc $target_seq;
1117							# my $index_posn=index($target_seq, $beforeseq.$afterseq);
1118	0					0	my $index_posn=index($target_seq, $enz->recog);
1119	0	0				0	return [] if ($index_posn == -1); # there is no match to the sequence
1120
1121							# there is at least one cut site
1122	0					0	my @cuts;
1123	0					0	while ($index_posn > -1) {
1124							# extrasite cutting issue here...
1125							# think we want $index_posn+$enz->cut
1126							# push (@cuts, $index_posn+length($beforeseq));
1127	0					0	push (@cuts, $index_posn+$cut_site);
1128							# $index_posn=index($target_seq, $beforeseq.$afterseq, $index_posn+1);
1129	0					0	$index_posn=index($target_seq, $enz->recog, $index_posn+1);
1130							}
1131
1132	0					0	return \@cuts;
1133							}
1134
1135							=head2 _make_cuts
1136
1137							Title : _make_cuts
1138							Usage : $an->_make_cuts( $target_sequence, $enzyme, $complement_q )
1139							Function: Returns an array of cut sites on target seq, using enzyme
1140							on the plus strand ($complement_q = 0) or minus strand
1141							($complement_q = 1); follows Enzyme objects in
1142							$enzyme->others()
1143							Returns : array of scalar integers
1144							Args : sequence string, B:R:Enzyme object, boolean
1145
1146							=cut
1147
1148							sub _make_cuts {
1149	3			3		16	no warnings qw( uninitialized );
	3					3
	3					2017
1150
1151	15249			15249		18169	my ($self, $target, $enz, $comp) = @_;
1152	15249					21189	local $_ = uc $target;
1153
1154	15249					10747	my @cuts;
1155
1156	15249	50				32501	my @enzs = map { $_ \|\| () } ($enz, $enz->can('others') ? $enz->others : ());
	15481	100				32923
1157							ENZ:
1158	15249					14179	foreach $enz (@enzs) {
1159	15481					23409	my $recog = $enz->recog;
1160	15481	100				26542	my $cut_site = ($comp ? $enz->complementary_cut : $enz->cut);
1161	15481					10644	my @these_cuts;
1162
1163	15481	100				30646	if ( $recog =~ /[^\w]/ ) { # "ambig"
1164	5810					44908	my $site_re = qr/($recog)/;
1165	5810					30176	push @these_cuts, pos while (/$site_re/g);
1166	5810					8316	$_ = $_ - length($enz->string) + $cut_site for @these_cuts;
1167	5810	100				10025	if (!$enz->is_palindromic) {
1168	864					1459	pos = 0;
1169	864					791	my @these_rev_cuts;
1170	864					1238	$recog = $enz->revcom_recog;
1171	864	100				1240	$cut_site = length($enz->string) - ($comp ? $enz->cut : $enz->complementary_cut);
1172	864					3742	$site_re = qr/($recog)/;
1173	864					3515	push @these_rev_cuts, pos while (/$site_re/g);
1174	864					1277	$_ = $_ - length($enz->string) + $cut_site for @these_rev_cuts;
1175	864					1148	push @these_cuts, @these_rev_cuts;
1176							}
1177							}
1178							else { # "nonambig"
1179	9671					13714	my $index_posn=index($_, $recog);
1180	9671					13915	while ($index_posn > -1) {
1181	1547					1750	push (@these_cuts, $index_posn+$cut_site);
1182	1547					2916	$index_posn=index($_, $recog, $index_posn+1);
1183							}
1184	9671	100				13417	if (!$enz->is_palindromic) {
1185	1943					3028	$recog = $enz->revcom_recog;
1186	1943	100				3023	$cut_site = length($enz->string) - ($comp ? $enz->cut : $enz->complementary_cut);
1187	1943					2962	$index_posn=index($_, $recog);
1188	1943					3346	while ($index_posn > -1) {
1189	246					262	push @these_cuts, $index_posn+$cut_site;
1190	246					476	$index_posn=index($_, $recog, $index_posn+1);
1191							}
1192							}
1193							}
1194	15481					20834	push @cuts, @these_cuts;
1195							}
1196	15249					22747	return [@cuts];
1197							}
1198
1199							=head2 _multiple_cuts
1200
1201							Title : _multiple_cuts
1202							Function : Figures out multiple digests
1203							Returns : An array of the cut sites for multiply digested DNA
1204							Arguments : A Bio::Restriction::EnzymeCollection object
1205							Comments : Double digests is one subset of this, but you can use
1206							as many enzymes as you want.
1207
1208							=cut
1209
1210							sub _multiple_cuts {
1211	3			3		5	my ($self, $ec)=@_;
1212	3	50				10	$self->cut unless $self->{'_cut'};
1213
1214							# now that we are using positions rather than fragments
1215							# this is really easy
1216	3					7	my @cuts;
1217	3					13	foreach my $enz ($ec->each_enzyme) {
1218	15					26	push @cuts, @{$self->{'_cut_positions'}->{$enz->name}}
1219	15	50				33	if defined $self->{'_cut_positions'}->{$enz->name};
1220							}
1221	3					16	@{$self->{'_cut_positions'}->{'multiple_digest'}}=sort {$a <=> $b} @cuts;
	3					15
	87					65
1222
1223	3					6	my $number_of_cuts;
1224
1225	3					5	$number_of_cuts=scalar @{$self->{'_cut_positions'}->{'multiple_digest'}};
	3					8
1226	3					7	$self->{_number_of_cuts_by_enzyme}->{'multiple_digest'}=$number_of_cuts;
1227	3					5	push (@{$self->{_number_of_cuts_by_cuts}->{$number_of_cuts}}, 'multiple_digest');
	3					11
1228	3	50				16	if ($number_of_cuts > $self->{maximum_cuts}) {
1229	0					0	$self->{maximum_cuts}=$number_of_cuts;
1230							}
1231							}
1232
1233
1234							=head2 _circular
1235
1236							Title : _circular
1237							Function : Identifies cuts at the join of the end of the target with
1238							the beginning of the target
1239							Returns : array of scalar integers ( cut sites near join, if any )
1240							Arguments : scalar string (target sequence), Bio::Restriction::Enzyme obj
1241
1242							=cut
1243
1244							sub _circular {
1245	4718			4718		4972	my ($self, $target, $enz, $comp) = @_;
1246	4718					6780	$target=uc $target;
1247	4718	50				7535	my $patch_len = ( length $target > 20 ? 10 : int( length($target)/2 ) );
1248
1249	4718					7850	my ($first, $last) =
1250							(substr($target, 0, $patch_len),substr($target, -$patch_len));
1251	4718					5328	my $patch=$last.$first;
1252
1253							# now find the cut sites
1254
1255	4718					6205	my $cut_positions = $self->_make_cuts($patch, $enz, $comp);
1256
1257							# the enzyme doesn't cut in the new fragment
1258	4718	50				7124	return [] if (!$cut_positions);
1259
1260							# now we are going to add things to _cut_positions
1261							# in this shema it doesn't matter if the site is there twice -
1262							# we will take care of that later. Because we are using position
1263							# rather than frag or anything else, we can just
1264							# remove duplicates.
1265	4718					3476	my @circ_cuts;
1266	4718					5665	foreach my $cut (@$cut_positions) {
1267	275	100				867	if ($cut == length($last)) {
		100
1268							# the cut is actually at position 0, but we're going to call this the
1269							# length of the sequence so we don't confuse no cuts with a 0 cut
1270							# push (@circ_cuts, $self->{'_seq'}->length);
1271	2					8	push (@circ_cuts, 0);
1272
1273							}
1274							elsif ($cut < length($last)) {
1275							# the cut is before the end of the sequence
1276							#check
1277	150					512	push (@circ_cuts, $self->{'_seq'}->length - (length($last) - $cut));
1278							}
1279							else {
1280							# the cut is at the start of the sequence (position >=1)
1281
1282							# note, we put this at the beginning of the array rather than the end!
1283	123					374	unshift (@circ_cuts, $cut-length($last));
1284							}
1285							}
1286	4718					8302	return \@circ_cuts;
1287							}
1288
1289
1290
1291
1292
1293							=head2 _expanded_string
1294
1295							Title : _expanded_string
1296							Function : Expand nucleotide ambiguity codes to their representative letters
1297							Returns : The full length string
1298							Arguments : The string to be expanded.
1299
1300							Stolen from the original RestrictionEnzyme.pm
1301
1302							=cut
1303
1304
1305							sub _expanded_string {
1306	0			0			my ($self, $str) = @_;
1307
1308	0						$str =~ s/N\|X/\./g;
1309	0						$str =~ s/R/\[AG\]/g;
1310	0						$str =~ s/Y/\[CT\]/g;
1311	0						$str =~ s/S/\[GC\]/g;
1312	0						$str =~ s/W/\[AT\]/g;
1313	0						$str =~ s/M/\[AC\]/g;
1314	0						$str =~ s/K/\[TG\]/g;
1315	0						$str =~ s/B/\[CGT\]/g;
1316	0						$str =~ s/D/\[AGT\]/g;
1317	0						$str =~ s/H/\[ACT\]/g;
1318	0						$str =~ s/V/\[ACG\]/g;
1319
1320	0						return $str;
1321							}
1322
1323
1324							1;