File Coverage

Bio/Tools/SeqPattern.pm
Criterion Covered Total %
statement 174 244 71.3
branch 39 72 54.1
condition 5 17 29.4
subroutine 17 18 94.4
pod 3 7 42.8
total 238 358 66.4


line stmt bran cond sub pod time code
1             #
2             # bioperl module for Bio::Tools::SeqPattern
3             #
4             # Please direct questions and support issues to
5             #
6             # Cared for by Steve Chervitz (sac-at-bioperl.org)
7             #
8             # Copyright Steve Chervitz
9             #
10             # You may distribute this module under the same terms as perl itself
11              
12             # POD documentation - main docs before the code
13              
14             =head1 NAME
15              
16             Bio::Tools::SeqPattern - represent a sequence pattern or motif
17              
18             =head1 SYNOPSIS
19              
20             use Bio::Tools::SeqPattern;
21              
22             my $pat1 = 'T[GA]AA...TAAT';
23             my $pattern1 = Bio::Tools::SeqPattern->new(-SEQ =>$pat1, -TYPE =>'Dna');
24              
25             my $pat2 = '[VILM]R(GXX){3,2}...[^PG]';
26             my $pattern2 = Bio::Tools::SeqPattern->new(-SEQ =>$pat2, -TYPE =>'Amino');
27              
28             =head1 DESCRIPTION
29              
30             L module encapsulates generic data and
31             methods for manipulating regular expressions describing nucleic or
32             amino acid sequence patterns (a.k.a, "motifs"), such as the ones produced by
33             L.
34              
35             L is a concrete class that inherits from L.
36              
37             This class grew out of a need to have a standard module for doing routine
38             tasks with sequence patterns such as:
39              
40             -- Forming a reverse-complement version of a nucleotide sequence pattern
41             -- Expanding patterns containing ambiguity codes
42             -- Checking for invalid regexp characters
43             -- Untainting yet preserving special characters in the pattern
44              
45             Other features to look for in the future:
46              
47             -- Full pattern syntax checking
48             -- Conversion between expanded and condensed forms of the pattern
49              
50             =head1 MOTIVATIONS
51              
52             A key motivation for L is to have a way to
53             generate a reverse complement of a nucleotide sequence pattern.
54             This makes possible simultaneous pattern matching on both sense and
55             anti-sense strands of a query sequence.
56              
57             In principle, one could do such a search more inefficiently by testing
58             against both sense and anti-sense versions of a sequence.
59             It is entirely equivalent to test a regexp containing both sense and
60             anti-sense versions of the *pattern* against one copy of the sequence.
61             The latter approach is much more efficient since:
62              
63             1) You need only one copy of the sequence.
64             2) Only one regexp is executed.
65             3) Regexp patterns are typically much smaller than sequences.
66              
67             Patterns can be quite complex and it is often difficult to
68             generate the reverse complement pattern. The Bioperl SeqPattern.pm
69             addresses this problem, providing a convenient set of tools
70             for working with biological sequence regular expressions.
71              
72             Not all patterns have been tested. If you discover a pattern that
73             is not handled properly by Bio::Tools::SeqPattern.pm, please
74             send me some email (sac@bioperl.org). Thanks.
75              
76             =head1 OTHER FEATURES
77              
78             =head2 Extended Alphabet Support
79              
80             This module supports the same set of ambiguity codes for nucleotide
81             sequences as supported by L. These ambiguity codes
82             define the behavior or the L method.
83              
84             ------------------------------------------
85             Symbol Meaning Nucleic Acid
86             ------------------------------------------
87             A A (A)denine
88             C C (C)ytosine
89             G G (G)uanine
90             T T (T)hymine
91             U U (U)racil
92             M A or C a(M)ino group
93             R A or G pu(R)ine
94             W A or T (W)eak bond
95             S C or G (S)trong bond
96             Y C or T p(Y)rimidine
97             K G or T (K)eto group
98             V A or C or G
99             H A or C or T
100             D A or G or T
101             B C or G or T
102             X G or A or T or C
103             N G or A or T or C
104             . G or A or T or C
105              
106              
107              
108             ------------------------------------------
109             Symbol Meaning
110             ------------------------------------------
111             A Alanine
112             C Cysteine
113             D Aspartic Acid
114             E Glutamic Acid
115             F Phenylalanine
116             G Glycine
117             H Histidine
118             I Isoleucine
119             K Lysine
120             L Leucine
121             M Methionine
122             N Asparagine
123             P Proline
124             Q Glutamine
125             R Arginine
126             S Serine
127             T Threonine
128             V Valine
129             W Tryptophan
130             Y Tyrosine
131              
132             B Aspartic Acid, Asparagine
133             Z Glutamic Acid, Glutamine
134             X Any amino acid
135             . Any amino acid
136              
137              
138             =head2 Multiple Format Support
139              
140             Ultimately, this module should be able to build SeqPattern.pm objects
141             using a variety of pattern formats such as ProSite, Blocks, Prints, GCG, etc.
142             Currently, this module only supports patterns using a grep-like syntax.
143              
144             =head1 USAGE
145              
146             A simple demo script called seq_pattern.pl is included in the examples/
147             directory of the central Bioperl distribution.
148              
149             =head1 SEE ALSO
150              
151             L - Lightweight sequence object.
152              
153             L - The IUPAC code for degenerate residues and their
154             conversion to a regular expression.
155              
156             =head1 FEEDBACK
157              
158             =head2 Mailing Lists
159              
160             User feedback is an integral part of the evolution of this and other
161             Bioperl modules. Send your comments and suggestions preferably to one
162             of the Bioperl mailing lists. Your participation is much appreciated.
163              
164             bioperl-l@bioperl.org - General discussion
165             http://bioperl.org/wiki/Mailing_lists - About the mailing lists
166              
167             =head2 Support
168              
169             Please direct usage questions or support issues to the mailing list:
170              
171             I
172              
173             rather than to the module maintainer directly. Many experienced and
174             reponsive experts will be able look at the problem and quickly
175             address it. Please include a thorough description of the problem
176             with code and data examples if at all possible.
177              
178             =head2 Reporting Bugs
179              
180             Report bugs to the Bioperl bug tracking system to help us keep track
181             the bugs and their resolution. Bug reports can be submitted via the
182             web:
183              
184             https://github.com/bioperl/bioperl-live/issues
185              
186             =head1 AUTHOR
187              
188             Steve Chervitz, sac-at-bioperl.org
189              
190             =head1 COPYRIGHT
191              
192             Copyright (c) 1997-8 Steve Chervitz. All Rights Reserved.
193             This module is free software; you can redistribute it and/or
194             modify it under the same terms as Perl itself.
195              
196             =cut
197              
198             #
199             ##
200             ###
201             #### END of main POD documentation.
202             ###
203             ##
204             #'
205             # CREATED : 28 Aug 1997
206              
207              
208             package Bio::Tools::SeqPattern;
209              
210 2     2   531 use base qw(Bio::Root::Root);
  2         2  
  2         335  
211 2     2   8 use strict;
  2         2  
  2         33  
212 2     2   5 use vars qw ($ID);
  2         2  
  2         4561  
213             $ID = 'Bio::Tools::SeqPattern';
214              
215             ## These constants may be more appropriate in a Bio::Dictionary.pm
216             ## type of class.
217             my $PURINES = 'AG';
218             my $PYRIMIDINES = 'CT';
219             my $BEE = 'DN';
220             my $ZED = 'EQ';
221             my $Regexp_chars = '\w,.\*()\[\]<>\{\}^\$'; # quoted for use in regexps
222              
223             ## Package variables used in reverse complementing.
224             my (%Processed_braces, %Processed_asterics);
225              
226             #####################################################################################
227             ## CONSTRUCTOR ##
228             #####################################################################################
229              
230             =head1 new
231              
232             Title : new
233             Usage : my $seqpat = Bio::Tools::SeqPattern->new();
234             Purpose : Verifies that the type is correct for superclass (Bio::Seq.pm)
235             : and calls superclass constructor last.
236             Returns : n/a
237             Argument : Parameters passed to new()
238             Throws : Exception if the pattern string (seq) is empty.
239             Comments : The process of creating a new SeqPattern.pm object
240             : ensures that the pattern string is untained.
241              
242             See Also : L,
243             L
244              
245             =cut
246              
247             #----------------
248             sub new {
249             #----------------
250 78     78 1 301 my($class, %param) = @_;
251              
252 78         203 my $self = $class->SUPER::new(%param);
253 78         228 my ($seq,$type) = $self->_rearrange([qw(SEQ TYPE)], %param);
254              
255 78 50       149 $seq || $self->throw("Empty pattern.");
256 78         72 my $t;
257             # Get the type ready for Bio::Seq.pm
258 78 100       203 if ($type =~ /nuc|[dr]na/i) {
    50          
259 71         67 $t = 'Dna';
260             } elsif ($type =~ /amino|pep|prot/i) {
261 7         6 $t = 'Amino';
262             }
263 78         87 $seq =~ tr/a-z/A-Z/; #ps 8/8/00 Canonicalize to upper case
264 78         123 $self->str($seq);
265 78         85 $self->type($t);
266              
267 78         177 return $self;
268             }
269              
270              
271             =head1 alphabet_ok
272              
273             Title : alphabet_ok
274             Usage : $mypat->alphabet_ok;
275             Purpose : Checks for invalid regexp characters.
276             : Overrides Bio::Seq::alphabet_ok() to allow
277             : additional regexp characters ,.*()[]<>{}^$
278             : in addition to the standard genetic alphabet.
279             : Also untaints the pattern and sets the sequence
280             : object's sequence to the untained string.
281             Returns : Boolean (1 | 0)
282             Argument : n/a
283             Throws : Exception if the pattern contains invalid characters.
284             Comments : Does not call the superclass method.
285             : Actually permits any alphanumeric, not just the
286             : standard genetic alphabet.
287              
288             =cut
289              
290             #----------------'
291             sub alphabet_ok {
292             #----------------
293 0     0 0 0 my( $self) = @_;
294              
295 0 0       0 return 1 if $self->{'_alphabet_checked'};
296              
297 0         0 $self->{'_alphabet_checked'} = 1;
298              
299 0         0 my $pat = $self->seq();
300              
301 0 0       0 if($pat =~ /[^$Regexp_chars]/io) {
302 0         0 $self->throw("Pattern contains invalid characters: $pat",
303             'Legal characters: a-z,A-Z,0-9,,.*()[]<>{}^$ ');
304             }
305              
306             # Untaint pattern (makes code taint-safe).
307 0         0 $pat =~ /([$Regexp_chars]+)/io;
308 0         0 $self->setseq(uc($1));
309             # print STDERR "\npattern ok: $pat\n";
310 0         0 1;
311             }
312              
313             =head1 expand
314              
315             Title : expand
316             Usage : $seqpat_object->expand();
317             Purpose : Expands the sequence pattern using special ambiguity codes.
318             Example : $pat = $seq_pat->expand();
319             Returns : String containing fully expanded sequence pattern
320             Argument : n/a
321             Throws : Exception if sequence type is not recognized
322             : (i.e., is not one of [DR]NA, Amino)
323              
324             See Also : L, L<_expand_pep>(), L<_expand_nuc>()
325              
326             =cut
327              
328             #----------
329             sub expand {
330             #----------
331 2     2 0 3 my $self = shift;
332              
333 2 100       3 if($self->type =~ /[DR]na/i) { $self->_expand_nuc(); }
  1 50       2  
334 1         2 elsif($self->type =~ /Amino/i) { $self->_expand_pep(); }
335             else{
336 0         0 $self->throw("Don't know how to expand ${\$self->type} patterns.\n");
  0         0  
337             }
338             }
339              
340              
341             =head1 _expand_pep
342              
343             Title : _expand_pep
344             Usage : n/a; automatically called by expand()
345             Purpose : Expands peptide patterns
346             Returns : String (the expanded pattern)
347             Argument : String (the unexpanded pattern)
348             Throws : n/a
349              
350             See Also : L(), L<_expand_nuc>()
351              
352             =cut
353              
354             #----------------
355             sub _expand_pep {
356             #----------------
357 1     1   2 my ($self,$pat) = @_;
358 1   33     4 $pat ||= $self->str;
359 1         2 $pat =~ s/X/./g;
360 1         2 $pat =~ s/^
361 1         1 $pat =~ s/>$/\$/;
362              
363             ## Avoid nested situations: [bmnq] --/--> [[$ZED]mnq]
364             ## Yet correctly deal with: fze[bmnq] ---> f[$BEE]e[$ZEDmnq]
365 1 50       3 if($pat =~ /\[\w*[BZ]\w*\]/) {
366 0         0 $pat =~ s/\[(\w*)B(\w*)\]/\[$1$ZED$2\]/g;
367 0         0 $pat =~ s/\[(\w*)Z(\w*)\]/\[$1$BEE$2\]/g;
368 0         0 $pat =~ s/B/\[$ZED\]/g;
369 0         0 $pat =~ s/Z/\[$BEE\]/g;
370             } else {
371 1         4 $pat =~ s/B/\[$ZED\]/g;
372 1         4 $pat =~ s/Z/\[$BEE\]/g;
373             }
374 1         1 $pat =~ s/\((.)\)/$1/g; ## Doing these last since:
375 1         3 $pat =~ s/\[(.)\]/$1/g; ## Pattern could contain [B] (for example)
376              
377 1         4 return $pat;
378             }
379              
380              
381              
382             =head1 _expand_nuc
383              
384             Title : _expand_nuc
385             Purpose : Expands nucleotide patterns
386             Returns : String (the expanded pattern)
387             Argument : String (the unexpanded pattern)
388             Throws : n/a
389              
390             See Also : L(), L<_expand_pep>()
391              
392             =cut
393              
394             #---------------
395             sub _expand_nuc {
396             #---------------
397 2     2   2 my ($self,$pat) = @_;
398              
399 2   66     7 $pat ||= $self->str;
400 2         11 $pat =~ s/N|X/./g;
401 2         3 $pat =~ s/pu/R/ig;
402 2         3 $pat =~ s/py/Y/ig;
403 2         3 $pat =~ s/U/T/g;
404 2         2 $pat =~ s/^
405 2         2 $pat =~ s/>$/\$/;
406              
407             ## Avoid nested situations: [ya] --/--> [[ct]a]
408             ## Yet correctly deal with: sg[ya] ---> [gc]g[cta]
409 2 50       4 if($pat =~ /\[\w*[RYSWMK]\w*\]/) {
410 0         0 $pat =~ s/\[(\w*)R(\w*)\]/\[$1$PURINES$2\]/g;
411 0         0 $pat =~ s/\[(\w*)Y(\w*)\]/\[$1$PYRIMIDINES$2\]/g;
412 0         0 $pat =~ s/\[(\w*)S(\w*)\]/\[$1GC$2\]/g;
413 0         0 $pat =~ s/\[(\w*)W(\w*)\]/\[$1AT$2\]/g;
414 0         0 $pat =~ s/\[(\w*)M(\w*)\]/\[$1AC$2\]/g;
415 0         0 $pat =~ s/\[(\w*)K(\w*)\]/\[$1GT$2\]/g;
416 0         0 $pat =~ s/\[(\w*)V(\w*)\]/\[$1ACG$2\]/g;
417 0         0 $pat =~ s/\[(\w*)H(\w*)\]/\[$1ACT$2\]/g;
418 0         0 $pat =~ s/\[(\w*)D(\w*)\]/\[$1AGT$2\]/g;
419 0         0 $pat =~ s/\[(\w*)B(\w*)\]/\[$1CGT$2\]/g;
420 0         0 $pat =~ s/R/\[$PURINES\]/g;
421 0         0 $pat =~ s/Y/\[$PYRIMIDINES\]/g;
422 0         0 $pat =~ s/S/\[GC\]/g;
423 0         0 $pat =~ s/W/\[AT\]/g;
424 0         0 $pat =~ s/M/\[AC\]/g;
425 0         0 $pat =~ s/K/\[GT\]/g;
426 0         0 $pat =~ s/V/\[ACG\]/g;
427 0         0 $pat =~ s/H/\[ACT\]/g;
428 0         0 $pat =~ s/D/\[AGT\]/g;
429 0         0 $pat =~ s/B/\[CGT\]/g;
430             } else {
431 2         9 $pat =~ s/R/\[$PURINES\]/g;
432 2         2 $pat =~ s/Y/\[$PYRIMIDINES\]/g;
433 2         3 $pat =~ s/S/\[GC\]/g;
434 2         2 $pat =~ s/W/\[AT\]/g;
435 2         3 $pat =~ s/M/\[AC\]/g;
436 2         1 $pat =~ s/K/\[GT\]/g;
437 2         2 $pat =~ s/V/\[ACG\]/g;
438 2         2 $pat =~ s/H/\[ACT\]/g;
439 2         2 $pat =~ s/D/\[AGT\]/g;
440 2         1 $pat =~ s/B/\[CGT\]/g;
441             }
442 2         5 $pat =~ s/\((.)\)/$1/g; ## Doing thses last since:
443 2         3 $pat =~ s/\[(.)\]/$1/g; ## Pattern could contain [y] (for example)
444              
445 2         6 return $pat;
446             }
447              
448              
449              
450             =head1 revcom
451              
452             Title : revcom
453             Usage : revcom([1]);
454             Purpose : Forms a pattern capable of recognizing the reverse complement
455             : version of a nucleotide sequence pattern.
456             Example : $pattern_object->revcom();
457             : $pattern_object->revcom(1); ## returns expanded rev complement pattern.
458             Returns : Object reference for a new Bio::Tools::SeqPattern containing
459             : the revcom of the current pattern as its sequence.
460             Argument : (1) boolean (optional) (default= false)
461             : true : expand the pattern before rev-complementing.
462             : false: don't expand pattern before or after rev-complementing.
463             Throws : Exception if called for amino acid sequence pattern.
464             Comments : This method permits the simultaneous searching of both
465             : sense and anti-sense versions of a nucleotide pattern
466             : by means of a grep-type of functionality in which any
467             : number of patterns may be or-ed into the recognition
468             : pattern.
469             : Overrides Bio::Seq::revcom() and calls it first thing.
470             : The order of _fixpat() calls is critical.
471              
472             See Also : L, L, L, L, L, L
473              
474             =cut
475              
476             #-----------'
477             sub revcom {
478             #-----------
479 34     34 0 79 my($self,$expand) = @_;
480              
481 34 100       40 if ($self->type !~ /Dna|Rna/i) {
482 1         2 $self->throw("Can't get revcom for ${\$self->type} sequence types.\n");
  1         2  
483             }
484             # return $self->{'_rev'} if defined $self->{'_rev'};
485              
486 33   100     91 $expand ||= 0;
487 33         38 my $str = $self->str;
488 33         38 $str =~ tr/acgtrymkswhbvdnxACGTRYMKSWHBVDNX/tgcayrkmswdvbhnxTGCAYRKMSWDVBHNX/;
489 33         37 my $rev = CORE::reverse $str;
490 33         32 $rev =~ tr/[](){}<>/][)(}{>
491              
492 33 100       42 if($expand) {
493 1         3 $rev = $self->_expand_nuc($rev);
494             # print "\nExpanded: $rev\n";
495             }
496              
497 33         37 %Processed_braces = ();
498 33         28 %Processed_asterics = ();
499              
500 33         38 my $fixrev = _fixpat_1($rev);
501             # print "FIX 1: $fixrev";;
502              
503 33         43 $fixrev = _fixpat_2($fixrev);
504             # print "FIX 2: $fixrev";;
505              
506 33         43 $fixrev = _fixpat_3($fixrev);
507             # print "FIX 3: $fixrev";;
508              
509 33         45 $fixrev = _fixpat_4($fixrev);
510             # print "FIX 4: $fixrev";;
511              
512 33         41 $fixrev = _fixpat_5($fixrev);
513             # print "FIX 5: $fixrev";;
514              
515             ##### Added by ps 8/7/00 to allow non-greedy matching
516 33         43 $fixrev = _fixpat_6($fixrev);
517             # print "FIX 6: $fixrev";;
518              
519             # $self->{'_rev'} = $fixrev;
520              
521 33         59 return Bio::Tools::SeqPattern->new(-seq =>$fixrev, -type =>$self->type);
522             }
523              
524             =head1 backtranslate
525              
526             Title : backtranslate
527             Usage : backtranslate();
528             Purpose : Produce a degenerate oligonucleotide whose translation would produce
529             : the original protein motif.
530             Example : $pattern_object->backtranslate();
531             Returns : Object reference for a new Bio::Tools::SeqPattern containing
532             : the reverse translation of the current pattern as its sequence.
533             Throws : Exception if called for nucleotide sequence pattern.
534              
535             =cut
536              
537             sub backtranslate {
538 7     7 0 1076 my $self = shift;
539            
540             # _load_module loads dynamically, caches call if successful
541 7         19 $self->_load_module('Bio::Tools::SeqPattern::Backtranslate');
542 7         160 Bio::Tools::SeqPattern::Backtranslate->import("_reverse_translate_motif");
543              
544 7 100       15 if ($self->type ne 'Amino') {
545 1         2 $self->throw(
546 1         2 "Can't get backtranslate for ${\$self->type} sequence types.\n"
547             );
548             }
549              
550 6         15 return __PACKAGE__->new(
551             -SEQ => _reverse_translate_motif($self->str),
552             -TYPE => 'Dna',
553             );
554             }
555              
556             =head1 _fixpat_1
557              
558             Title : _fixpat_1
559             Usage : n/a; called automatically by revcom()
560             Purpose : Utility method for revcom()
561             : Converts all {7,5} --> {5,7} (Part I)
562             : and [T^] --> [^T] (Part II)
563             : and *N --> N* (Part III)
564             Returns : String (the new, partially reversed pattern)
565             Argument : String (the expanded pattern)
566             Throws : n/a
567              
568             See Also : L()
569              
570             =cut
571              
572             #--------------
573             sub _fixpat_1 {
574             #--------------
575 33     33   29 my $pat = shift;
576              
577             ## Part I:
578 33         25 my (@done,@parts);
579 33         22 while(1) {
580 37 100       72 $pat =~ /(.*)\{(\S+?)\}(.*)/ or do{ push @done, $pat; last; };
  33         34  
  33         34  
581 4         13 $pat = $1.'#{'.reverse($2).'}'.$3;
582             # print "1: $1\n2: $2\n3: $3\n";
583             # print "modified pat: $pat";;
584 4         9 @parts = split '#', $pat;
585 4         3 push @done, $parts[1];
586 4         4 $pat = $parts[0];
587             # print "done: $parts[1]<---\nnew pat: $pat<---";;
588 4 50       5 last if not $pat;
589             }
590 33         44 $pat = join('', reverse @done);
591              
592             ## Part II:
593 33         32 @done = ();
594 33         20 while(1) {
595 117 100       303 $pat =~ /(.*)\[(\S+?)\](.*)/ or do{ push @done, $pat; last; };
  19         19  
  19         15  
596 98         188 $pat = $1.'#['.reverse($2).']'.$3;
597             # print "1: $1\n2: $2\n3: $3\n";
598             # print "modified pat: $pat";;
599 98         133 @parts = split '#', $pat;
600 98         76 push @done, $parts[1];
601 98         65 $pat = $parts[0];
602             # print "done: $parts[1]<---\nnew pat: $pat<---";;
603 98 100       120 last if not $pat;
604             }
605 33         40 $pat = join('', reverse @done);
606              
607             ## Part III:
608 33         35 @done = ();
609 33         26 while(1) {
610 33 50       53 $pat =~ /(.*)\*([\w.])(.*)/ or do{ push @done, $pat; last; };
  33         27  
  33         29  
611 0         0 $pat = $1.'#'.$2.'*'.$3;
612 0         0 $Processed_asterics{$2}++;
613             # print "1: $1\n2: $2\n3: $3\n";
614             # print "modified pat: $pat";;
615 0         0 @parts = split '#', $pat;
616 0         0 push @done, $parts[1];
617 0         0 $pat = $parts[0];
618             # print "done: $parts[1]<---\nnew pat: $pat<---";;
619 0 0       0 last if not $pat;
620             }
621 33         61 return join('', reverse @done);
622              
623             }
624              
625              
626             =head1 _fixpat_2
627              
628             Title : _fixpat_2
629             Usage : n/a; called automatically by revcom()
630             Purpose : Utility method for revcom()
631             : Converts all {5,7}Y ---> Y{5,7}
632             : and {10,}. ---> .{10,}
633             Returns : String (the new, partially reversed pattern)
634             Argument : String (the expanded, partially reversed pattern)
635             Throws : n/a
636              
637             See Also : L()
638              
639             =cut
640              
641             #--------------
642             sub _fixpat_2 {
643             #--------------
644 33     33   27 my $pat = shift;
645              
646 33         77 local($^W) = 0;
647 33         25 my (@done,@parts,$braces);
648 33         26 while(1) {
649             # $pat =~ s/(.*)([^])])(\{\S+?\})([\w.])(.*)/$1$2#$4$3$5/ or do{ push @done, $pat; last; };
650 37 100       83 $pat =~ s/(.*)(\{\S+?\})([\w.])(.*)/$1#$3$2$4/ or do{ push @done, $pat; last; };
  33         29  
  33         60  
651 4         6 $braces = $2;
652 4         9 $braces =~ s/[{}]//g;
653 4         8 $Processed_braces{"$3$braces"}++;
654             # print "modified pat: $pat";;
655 4         5 @parts = split '#', $pat;
656 4         4 push @done, $parts[1];
657 4         5 $pat = $parts[0];
658             # print "done: $parts[1]<---\nnew pat: $pat<---";;
659 4 50       4 last if not $pat;
660             }
661 33         78 return join('', reverse @done);
662             }
663              
664              
665             =head1 _fixpat_3
666              
667             Title : _fixpat_3
668             Usage : n/a; called automatically by revcom()
669             Purpose : Utility method for revcom()
670             : Converts all {5,7}(XXX) ---> (XXX){5,7}
671             Returns : String (the new, partially reversed pattern)
672             Argument : String (the expanded, partially reversed pattern)
673             Throws : n/a
674              
675             See Also : L()
676              
677             =cut
678              
679             #-------------
680             sub _fixpat_3 {
681             #-------------
682 33     33   24 my $pat = shift;
683              
684 33         28 my (@done,@parts,$braces,$newpat,$oldpat);
685 33         26 while(1) {
686             # $pat =~ s/(.+)(\{\S+\})(\(\w+\))(.*)/$1#$3$2$4/ or do{ push @done, $pat; last; };
687 36 100       81 if( $pat =~ /(.*)(.)(\{\S+\})(\(\w+\))(.*)/) {
    50          
688 3         11 $newpat = "$1#$2$4$3$5";
689             ##ps $oldpat = "$1#$2$3$4$5";
690             # print "1: $1\n2: $2\n3: $3\n4: $4\n5: $5\n";
691             ##ps $braces = $3;
692             ##ps $braces =~ s/[{}]//g;
693             ##ps if( exists $Processed_braces{"$2$braces"} || exists $Processed_asterics{$2}) {
694             ##ps $pat = $oldpat; # Don't change it. Already processed.
695             # print "saved pat: $pat";;
696             ##ps } else {
697             # print "new pat: $newpat";;
698 3         3 $pat = $newpat; # Change it.
699             ##ps }
700             } elsif( $pat =~ /^(\{\S+\})(\(\w+\))(.*)/) {
701 0         0 $pat = "#$2$1$3";
702             } else {
703 33         29 push @done, $pat; last;
  33         33  
704             }
705 3         5 @parts = split '#', $pat;
706 3         4 push @done, $parts[1];
707 3         2 $pat = $parts[0];
708             # print "done: $parts[1]<---\nnew pat: $pat<---";;
709 3 50       5 last if not $pat;
710             }
711 33         56 return join('', reverse @done);
712             }
713              
714              
715             =head1 _fixpat_4
716              
717             Title : _fixpat_4
718             Usage : n/a; called automatically by revcom()
719             Purpose : Utility method for revcom()
720             : Converts all {5,7}[XXX] ---> [XXX]{5,7}
721             Returns : String (the new, partially reversed pattern)
722             Argument : String (the expanded, partially reversed pattern)
723             Throws : n/a
724              
725             See Also : L()
726              
727             =cut
728              
729             #---------------
730             sub _fixpat_4 {
731             #---------------
732 33     33   25 my $pat = shift;
733              
734 33         26 my (@done,@parts,$braces,$newpat,$oldpat);
735 33         28 while(1) {
736             # $pat =~ s/(.*)(\{\S+\})(\[\w+\])(.*)/$1#$3$2$4/ or do{ push @done, $pat; last; };
737             # $pat =~ s/(.*)([^\w.])(\{\S+\})(\[\w+\])(.*)/$1$2#$4$3$5/ or do{ push @done, $pat; last; };
738 33 50       61 if( $pat =~ /(.*)(.)(\{\S+\})(\[\w+\])(.*)/) {
    50          
739 0         0 $newpat = "$1#$2$4$3$5";
740 0         0 $oldpat = "$1#$2$3$4$5";
741             # print "1: $1\n2: $2\n3: $3\n4: $4\n5: $5\n";
742 0         0 $braces = $3;
743 0         0 $braces =~ s/[{}]//g;
744 0 0 0     0 if( (defined $braces and defined $2) and
      0        
      0        
745             exists $Processed_braces{"$2$braces"} || exists $Processed_asterics{$2}) {
746 0         0 $pat = $oldpat; # Don't change it. Already processed.
747             # print "saved pat: $pat";;
748             } else {
749 0         0 $pat = $newpat; # Change it.
750             # print "new pat: $pat";;
751             }
752             } elsif( $pat =~ /^(\{\S+\})(\[\w+\])(.*)/) {
753 0         0 $pat = "#$2$1$3";
754             } else {
755 33         26 push @done, $pat; last;
  33         26  
756             }
757              
758 0         0 @parts = split '#', $pat;
759 0         0 push @done, $parts[1];
760 0         0 $pat = $parts[0];
761             # print "done: $parts[1]<---\nnew pat: $pat<---";;
762 0 0       0 last if not $pat;
763             }
764 33         45 return join('', reverse @done);
765             }
766              
767              
768             =head1 _fixpat_5
769              
770             Title : _fixpat_5
771             Usage : n/a; called automatically by revcom()
772             Purpose : Utility method for revcom()
773             : Converts all *[XXX] ---> [XXX]*
774             : and *(XXX) ---> (XXX)*
775             Returns : String (the new, partially reversed pattern)
776             Argument : String (the expanded, partially reversed pattern)
777             Throws : n/a
778              
779             See Also : L()
780              
781             =cut
782              
783             #--------------
784             sub _fixpat_5 {
785             #--------------
786 33     33   28 my $pat = shift;
787              
788 33         21 my (@done,@parts,$newpat,$oldpat);
789 33         30 while(1) {
790             # $pat =~ s/(.*)(\{\S+\})(\[\w+\])(.*)/$1#$3$2$4/ or do{ push @done, $pat; last; };
791             # $pat =~ s/(.*)([^\w.])(\{\S+\})(\[\w+\])(.*)/$1$2#$4$3$5/ or do{ push @done, $pat; last; };
792 33 50       65 if( $pat =~ /(.*)(.)\*(\[\w+\]|\(\w+\))(.*)/) {
    50          
793 0         0 $newpat = "$1#$2$3*$4";
794 0         0 $oldpat = "$1#$2*$3$4";
795             # print "1: $1\n2: $2\n3: $3\n4: $4\n";
796 0 0       0 if( exists $Processed_asterics{$2}) {
797 0         0 $pat = $oldpat; # Don't change it. Already processed.
798             # print "saved pat: $pat";;
799             } else {
800 0         0 $pat = $newpat; # Change it.
801             # print "new pat: $pat";;
802             }
803             } elsif( $pat =~ /^\*(\[\w+\]|\(\w+\))(.*)/) {
804 0         0 $pat = "#$1*$3";
805             } else {
806 33         27 push @done, $pat; last;
  33         24  
807             }
808              
809 0         0 @parts = split '#', $pat;
810 0         0 push @done, $parts[1];
811 0         0 $pat = $parts[0];
812             # print "done: $parts[1]<---\nnew pat: $pat<---";;
813 0 0       0 last if not $pat;
814             }
815 33         48 return join('', reverse @done);
816             }
817              
818              
819              
820              
821              
822             ############################
823             #
824             # PS: Added 8/7/00 to allow non-greedy matching patterns
825             #
826             ######################################
827              
828             =head1 _fixpat_6
829              
830             Title : _fixpat_6
831             Usage : n/a; called automatically by revcom()
832             Purpose : Utility method for revcom()
833             : Converts all ?Y{5,7} ---> Y{5,7}?
834             : and ?(XXX){5,7} ---> (XXX){5,7}?
835             : and ?[XYZ]{5,7} ---> [XYZ]{5,7}?
836             Returns : String (the new, partially reversed pattern)
837             Argument : String (the expanded, partially reversed pattern)
838             Throws : n/a
839              
840             See Also : L()
841              
842             =cut
843              
844             #--------------
845             sub _fixpat_6 {
846             #--------------
847 33     33   23 my $pat = shift;
848 33         27 my (@done,@parts);
849              
850 33         29 @done = ();
851 33         25 while(1) {
852 33 50       52 $pat =~ /(.*)\?(\[\w+\]|\(\w+\)|\w)(\{\S+?\})?(.*)/ or do{ push @done, $pat; last; };
  33         26  
  33         27  
853 0 0       0 my $quantifier = $3 ? $3 : ""; # Shut up warning if no explicit quantifier
854 0         0 $pat = $1.'#'.$2.$quantifier.'?'.$4;
855             # $pat = $1.'#'.$2.$3.'?'.$4;
856              
857             # print "1: $1\n2: $2\n3: $3\n";
858             # print "modified pat: $pat";;
859 0         0 @parts = split '#', $pat;
860 0         0 push @done, $parts[1];
861 0         0 $pat = $parts[0];
862             # print "done: $parts[1]<---\nnew pat: $pat<---";;
863 0 0       0 last if not $pat;
864             }
865 33         52 return join('', reverse @done);
866              
867             }
868              
869             =head2 str
870              
871             Title : str
872             Usage : $obj->str($newval)
873             Function:
874             Returns : value of str
875             Args : newvalue (optional)
876              
877              
878             =cut
879              
880             sub str{
881 158     158 1 1639 my $obj = shift;
882 158 100       223 if( @_ ) {
883 78         61 my $value = shift;
884 78         84 $obj->{'str'} = $value;
885             }
886 158         192 return $obj->{'str'};
887              
888             }
889              
890             =head2 type
891              
892             Title : type
893             Usage : $obj->type($newval)
894             Function:
895             Returns : value of type
896             Args : newvalue (optional)
897              
898              
899             =cut
900              
901             sub type{
902 157     157 1 117 my $obj = shift;
903 157 100       207 if( @_ ) {
904 78         68 my $value = shift;
905 78         80 $obj->{'type'} = $value;
906             }
907 157         319 return $obj->{'type'};
908              
909             }
910              
911             1;
912              
913             __END__