File Coverage

blib/lib/TIGR/FASTA/Record.pm
Criterion Covered Total %
statement 10 95 10.5
branch 0 30 0.0
condition 0 54 0.0
subroutine 4 15 26.6
pod 11 11 100.0
total 25 205 12.2


line stmt bran cond sub pod time code
1             # $Id: FASTArecord.pm,v 1.1 2004/04/28 15:03:43 aphillip Exp $
2              
3             package TIGR::FASTA::Record;
4             {
5              
6             =head1 NAME
7              
8             TIGR::FASTA::Record - TIGR::FASTA::Record class describing FASTA records
9              
10             =head1 SYNOPSIS
11              
12             use TIGR::FASTA::Record;
13             my $obj_instance = new TIGR::FASTA::Record ($record_header,
14             $record_data);
15              
16             =head1 DESCRIPTION
17              
18             This module provides an object definition for a FASTA record. It verifies
19             data entry on creation, and returns information queried on the record.
20              
21             =cut
22              
23             BEGIN {
24 1     1   48 require 5.006_00;
25             }
26              
27 1     1   6 use strict;
  1         1  
  1         25  
28 1     1   256 use TIGR::FASTA::Grammar ':public';
  1         2  
  1         169  
29 1     1   6 use TIGR::FASTA::Grammar ':private';
  1         1  
  1         1012  
30              
31             ## external variables
32              
33             my $OUTPUT_LINE_LENGTH_REF = \$TIGR::FASTA::Grammar::OUTPUT_LINE_LENGTH;
34             my $UNBOUND_FASTA_SEPARATOR = $TIGR::FASTA::Grammar::UNBOUND_FASTA_SEPARATOR;
35            
36             ## internal variables and identifiers
37              
38             our $REVISION = (qw$Revision: 1.1 $)[-1];
39             our $VERSION = '1.2';
40             our $VERSION_STRING = "$VERSION (Build $REVISION)";
41             our @DEPEND = ();
42              
43             ## prototypes
44              
45             sub new($$);
46             sub equals($);
47             sub getHeader();
48             sub getIdentifier();
49             sub getData();
50             sub size();
51             sub toString();
52             sub reverseComplement($);
53             sub reverseComplementData();
54             sub subSequence($;$);
55             sub unGap();
56              
57             ## implementation
58              
59             =over
60              
61             =item $obj_instance = new TIGR::FASTA::Record ($header, $data_rec);
62              
63             This method returns a new instance of a TIGR::FASTA::Record object. It takes
64             a record header, C<$header>, and the record data, C<$data_rec> as parameters.
65             The record header may optionally contain the leading header character, a
66             C<> symbol. Both parameters are parsed. A new object instance is
67             returned on success. If parsing fails or a record cannot be created, this
68             method returns undefined.
69              
70             =cut
71              
72              
73             sub new($$) {
74 0     0 1   my $pkg = shift;
75 0           my $header = shift;
76 0           my $data_rec = shift;
77 0           my $self = undef;
78 0           my $identifier = undef;
79              
80 0 0 0       if ( ( defined ($header) ) &&
      0        
      0        
      0        
81             ( $header =~ s[^($UNBOUND_FASTA_SEPARATOR){0,1}(.*)] # accept no
82             [$UNBOUND_FASTA_SEPARATOR$2] ) && # separator
83             ( defined ($data_rec) ) &&
84             ( defined($identifier = _headerToIdentifier($header)) ) &&
85             #
86             # parsable id.
87 0           ( isValidFASTAdata($data_rec) != 0 ) ) { # valid data?
88 0           $self = {};
89             bless $self, $pkg;
90 0            
91 0           chomp($header);
92             chomp($data_rec);
93 0            
94 0           $self->{header} = $header;
95 0           $self->{identifier} = $identifier;
96 0           $self->{data_rec} = $data_rec;
97             $self->{rec_size} = length($data_rec); # compute size
98 0           }
99             return $self;
100             }
101              
102              
103             =item $result = $obj_instance->equals($fasta_obj);
104              
105             This method takes a I object as a parameter. It compares
106             the passed object with the calling object's internal structures to test for
107             equality. If the two objects are equal, this method returns true (1).
108             Otherwise, this method returns false (undefined).
109              
110             =cut
111              
112              
113 0     0 1   sub equals($) {
114 0           my $self = shift;
115 0           my $other_obj = shift;
116             my $return_val = undef;
117 0 0 0        
118             if ( ( defined ($other_obj) ) &&
119             ( $self->toString() eq $other_obj->toString() )
120 0           ) {
121             $return_val = 1;
122             }
123 0           else {
124             $return_val = undef;
125             }
126 0            
127             return $return_val;
128             }
129              
130              
131             =item $identifier = $obj_instance->getHeader();
132              
133             This method returns the header string for the record.
134              
135             =cut
136              
137              
138 0     0 1   sub getHeader() {
139 0           my $self = shift;
140 0 0         my $header = $self->{header}; # should always be defined
141             if (! isValidFASTAheader($header) ) {
142 0           # need to prepend an $UNBOUND_FASTA_SEPARATOR
143             $header = $UNBOUND_FASTA_SEPARATOR . $header;
144 0           }
145             return $header;
146             }
147              
148              
149             =item $identifier = $obj_instance->getIdentifier();
150              
151             This method returns the identifier string for the record.
152              
153             =cut
154              
155              
156 0     0 1   sub getIdentifier() {
157 0           my $self = shift;
158             return $self->{identifier}; # should always be defined
159             }
160              
161              
162             =item $data_contents = $obj_instance->getData();
163              
164             This method returns the data contents of the record as an
165             uninterrupted string.
166              
167             =cut
168              
169              
170 0     0 1   sub getData() {
171 0           my $self = shift;
172             return $self->{data_rec}; # should always be defined
173             }
174              
175              
176             =item $size_of_rec = $obj_instance->size();
177              
178             This method returns the size of the data string contained by the
179             record.
180              
181             =cut
182              
183              
184 0     0 1   sub size() {
185 0           my $self = shift;
186             return $self->{rec_size}; # should always be defined
187             }
188              
189              
190             =item $str_rep = $obj_instance->toString();
191              
192             This method returns a string representation of the FASTA record.
193             This string representation conforms to the TIGR definition of a
194             FASTA record.
195              
196             =cut
197              
198              
199 0     0 1   sub toString() {
200 0           my $self = shift;
201 0           my $data_copy = $self->{data_rec};
202 0           my $str_rep = $self->getHeader() . "\n";
203 0           my $seg = undef;
204 0           my @segments = ();
205 0           my $last_seg = undef;
206             my $skip = undef;
207 0   0        
208             while((defined($seg = substr $data_copy, 0,
209 0           $$OUTPUT_LINE_LENGTH_REF,'')) && ($seg ne '')) {
210             $str_rep .= $seg . "\n";
211 0           }
212             return $str_rep;
213             }
214              
215             =item $rev_compl_str = $obj_instance->reverseComplement($NA_strand);
216              
217             This method takes in a string which represents a Nucleotide strand and
218             returns the reverse complement of the strand. If the string does not
219             represent a Nucleotide strand or is undefined, the method returns undefined.
220             When an empty string is passed into the method, we get an empty string on
221             return.
222              
223             =cut
224              
225            
226 0     0 1   sub reverseComplement($) {
227 0           my $self = shift;
228 0           my $NA_strand = shift;
229 0           my $error_condition = 1;
230 0           my $rev_compl_str = undef;
231             my $validData = 0;
232 0 0 0      
      0        
233             if((defined ($NA_strand)) &&
234             ($validData = _isNucleotideData($NA_strand)) && ($validData == 1)) {
235 0          
236             $rev_compl_str = '';
237 0 0         # taking the complement of the sequence
    0          
238 0           if(!defined( $NA_strand =~ tr/AaCcTtGgUuMmRrWwSsYyKkVvHhDdBbXxNn.-/TtGgAaCcAaKkYyWwSsRrMmBbDdHhVvXxNn.-/)) {
239             $error_condition = undef;
240             }
241             # taking the reverse of the complemented sequence
242 0           elsif(!defined($rev_compl_str = reverse($NA_strand))) {
243             $error_condition = undef;
244             }
245             }
246 0           else {
247             $error_condition = undef;
248 0 0         }
249             return ( defined ( $error_condition ) ) ? $rev_compl_str : undef;
250             }
251              
252              
253             =item $rev_compl_str = $obj_instance->reverseComplementData();
254              
255             This method returns the reverse complement of the FASTA record data. If the
256             FASTA record data does not represent a Nucleotide strand or is undefined, the
257             method returns undefined.
258              
259             =cut
260              
261              
262 0     0 1   sub reverseComplementData() {
263 0           my $self = shift;
264 0           my $NA_strand = $self->{data_rec};
265 0           my $error_condition = 1;
266 0           my $rev_compl_str = undef;
267             my $validData = 0;
268 0 0 0      
      0        
269             if((defined ($NA_strand)) &&
270             ($validData = _isNucleotideData($NA_strand)) && ($validData == 1)) {
271 0          
272             $rev_compl_str = '';
273 0 0         # taking the complement of the sequence
    0          
274 0           if(!defined( $NA_strand =~ tr/AaCcTtGgUuMmRrWwSsYyKkVvHhDdBbXxNn.-/TtGgAaCcAaKkYyWwSsRrMmBbDdHhVvXxNn.-/)) {
275             $error_condition = undef;
276             }
277             # taking the reverse of the complemented sequence
278 0           elsif(!defined($rev_compl_str = reverse($NA_strand))) {
279             $error_condition = undef;
280             }
281             }
282 0           else {
283             $error_condition = undef;
284 0 0         }
285             return ( defined ( $error_condition ) ) ? $rev_compl_str : undef;
286             }
287              
288              
289             =item $data_substr = $obj_instance->subSequence($startpos, $length);
290              
291             This method behaves like substr(). This method takes in two numbers $startpos
292             and $length and returns a substring of the record data. The $startpos for the
293             first base in the data is 0. The $length is optional. The substring is
294             extracted starting at $startpos characters from the front of the data string.
295             If $startpos is negative, the substring starts that far from the end of the
296             string instead. If $length is omitted, everything to the end of the string is
297             returned. If $length is negative, the length is calculated to leave that many
298             characters off the end of the string. Otherwise, $length indicates the length
299             of the substring to extract. If $startpos is undefined the function returns
300             undefined. If either $startpos or $length are greater than the data length, the
301             method returns undefined
302              
303             =cut
304              
305            
306 0     0 1   sub subSequence($;$) {
307 0           my $self = shift;
308 0           my ($startpos, $length) = @_;
309 0           my $data_str = $self->getData();
310 0           my $data_substr = undef;
311             my $strlen = length($data_str);
312 0 0 0      
    0 0        
      0        
      0        
      0        
      0        
      0        
313             if((defined ($data_str)) && (defined ($startpos)) &&
314             (defined ($length)) && (abs($startpos) < $strlen) &&
315             (abs($length) <= $strlen)) {
316 0           #both parameters are specified
317             $data_substr = substr($data_str, $startpos, $length);
318             }
319             elsif((defined ($data_str)) && (defined ($startpos)) &&
320             (!defined ($length)) && (abs($startpos) < $strlen) ) {
321 0           #only one parameter is specified.
322             $data_substr = substr($data_str, $startpos);
323             }
324             else {
325             #either $data_str is undefined or $startpos is undefined so do
326             #nothing.
327 0           }
328             return $data_substr;
329             }
330              
331             =item $result = $obj_instance->unGap();
332              
333             This method removes all the gaps('-' characters) from the record data. The
334             record size is changed after the gaps are removed. The method returns 1 on
335             success and undefined otherwise.
336              
337             =cut
338              
339            
340 0     0 1   sub unGap() {
341 0           my $self = shift;
342 0           my $data_str = $self->getData();
343 0 0 0       my $result = 1;
    0          
344             if((defined ($data_str)) && ($data_str =~ /[-]+/)) {
345 0           #removing the gaps in the data.
346 0           $data_str =~ s/[-]+//g;
347 0           $self->{data_rec} = $data_str;
348             $self->{rec_size} = length($data_str);
349            
350             }
351 0           elsif(!defined ($data_str)) {
352             $result = undef;
353 0           }
354             return $result;
355             }
356              
357             =back
358              
359              
360              
361             =head1 USAGE
362              
363             To use this module, load it using the C function. The object must
364             be initialized with a header and a data string. An example follows.
365             Please refer to the C package usage for more examples.
366              
367             #!/usr/local/bin/perl -w
368              
369             use strict;
370             use TIGR::FASTA::Record;
371              
372             MAIN:
373             {
374              
375             # set up a simple example without using the leading carot.
376             my $header1 = "ORF00001 The first ORF in the series";
377             my $data1 =
378             "MEEISTPEGGVLVPISIETEVKRAYIDYSMSVIVSRALPDVRDGLKPVHRRILYAMEEKG" .
379             "LRFSGPTRKCAKIVGDVLGSFHPHGDASVYDALVRLGQDFSLRYPVIHPQGNFGTIGGDP" .
380             "PAAYRYTEAKMARIAESMVEDIKKETVSFVPNFDDSDVEPTVLPGRFPFLLANGSSGIAV" .
381             "GMTTNMPPHNLREIAAAISAYIENPNLSIQELCDCINGPDFPTGGIIFGKNGIRQSYETG" .
382             "RGKIVVRARFTIETDSKGRDTIIFTEVPYQVNTTMLVMRIGELARAKVIEGIANVNDETS" .
383             "DRTGLRIVVELKKGTPAQVVLNHLFAKTPLQSSFNVINLALVEGRPRMLTLKDLVRYFVE" .
384             "HRVDVVTRRAHFELRKAQERIHLVRALIRALDAIDKIITLIRHSQNTELAKQRLREQFDF" .
385             "DNVQAQAIVDMQMKRLTGLEVESLRTELKDLTELISSLEELLTSPQKVLGVVKKETRDIA" .
386             "DMFGDDRRTDIVSNEIEYLDVEDFIQKEEMVILISHLGYIKRVPVSAYRNQNRGGKGSSS" .
387             "ANLAAHDFISQIFTASTHDYVMFVTSRGRAYWLKVYGIPESGRANRGSHIKSLLMVATDE" .
388             "EITAIVSLREFSNKSYVFMATARGVVKKVTTDNFVNAKTRGIIALKLSGGDTLVSAVLVQ" .
389             "DEDEVMLITRQGKALRMSGREVREMGRNSSGVIGIKLTSEDLVAGVLRVSEQRKVLIMTE" .
390             "NGYGKRVSFSEFSVHGRGTAGQKIYTQTDRKGAIIGALAVLDTDECMCITGQGKTIRVDV" .
391             "CAISVLGRGAQGVRVLDIEPSDLVVGLSCVMQG";
392              
393             my $fasta_record = new TIGR::FASTA::Record $header1, $data1;
394             if ( defined ( $fasta_record ) ) {
395             print STDOUT "Sequence " . $fasta_record->getIdentifier() . " is " .
396             $fasta_record->size() . " residues.\n";
397             print STDOUT $fasta_record->toString();
398             }
399             else {
400             die "Invalid FASTA record 1";
401             }
402            
403             # but this header is also valid.
404             my $header2 = ">ORF00001 The first ORF in the series";
405             my $data2 =
406             "MEEISTPEGGVLVPISIETEVKRAYIDYSMSVIVSRALPDVRDGLKPVHRRILYAMEEKG" .
407             "LRFSGPTRKCAKIVGDVLGSFHPHGDASVYDALVRLGQDFSLRYPVIHPQGNFGTIGGDP" .
408             "PAAYRYTEAKMARIAESMVEDIKKETVSFVPNFDDSDVEPTVLPGRFPFLLANGSSGIAV" .
409             "GMTTNMPPHNLREIAAAISAYIENPNLSIQELCDCINGPDFPTGGIIFGKNGIRQSYETG" .
410             "RGKIVVRARFTIETDSKGRDTIIFTEVPYQVNTTMLVMRIGELARAKVIEGIANVNDETS" .
411             "DRTGLRIVVELKKGTPAQVVLNHLFAKTPLQSSFNVINLALVEGRPRMLTLKDLVRYFVE" .
412             "HRVDVVTRRAHFELRKAQERIHLVRALIRALDAIDKIITLIRHSQNTELAKQRLREQFDF" .
413             "DNVQAQAIVDMQMKRLTGLEVESLRTELKDLTELISSLEELLTSPQKVLGVVKKETRDIA" .
414             "DMFGDDRRTDIVSNEIEYLDVEDFIQKEEMVILISHLGYIKRVPVSAYRNQNRGGKGSSS" .
415             "ANLAAHDFISQIFTASTHDYVMFVTSRGRAYWLKVYGIPESGRANRGSHIKSLLMVATDE" .
416             "EITAIVSLREFSNKSYVFMATARGVVKKVTTDNFVNAKTRGIIALKLSGGDTLVSAVLVQ" .
417             "DEDEVMLITRQGKALRMSGREVREMGRNSSGVIGIKLTSEDLVAGVLRVSEQRKVLIMTE" .
418             "NGYGKRVSFSEFSVHGRGTAGQKIYTQTDRKGAIIGALAVLDTDECMCITGQGKTIRVDV" .
419             "CAISVLGRGAQGVRVLDIEPSDLVVGLSCVMQG";
420              
421             my $fasta_record2 = new TIGR::FASTA::Record $header2, $data2;
422             if ( defined ( $fasta_record2 ) ) {
423             print STDOUT "Sequence " . $fasta_record2->getIdentifier() . " is " .
424             $fasta_record2->size() . " residues.\n";
425             print STDOUT $fasta_record2->toString();
426             }
427             else {
428             die "Invalid FASTA record 2";
429             }
430              
431             # this entry fails; note the 'J' in the second line of data (8th char.)
432             my $header3 = "ORF00001 The first ORF in the series";
433             my $data3 =
434             "MEEISTPEGGVLVPISIETEVKRAYIDYSMSVIVSRALPDVRDGLKPVHRRILYAMEEKG" .
435             "LRFSGPTJKCAKIVGDVLGSFHPHGDASVYDALVRLGQDFSLRYPVIHPQGNFGTIGGDP" .
436             "PAAYRYTEAKMARIAESMVEDIKKETVSFVPNFDDSDVEPTVLPGRFPFLLANGSSGIAV" .
437             "GMTTNMPPHNLREIAAAISAYIENPNLSIQELCDCINGPDFPTGGIIFGKNGIRQSYETG" .
438             "RGKIVVRARFTIETDSKGRDTIIFTEVPYQVNTTMLVMRIGELARAKVIEGIANVNDETS" .
439             "DRTGLRIVVELKKGTPAQVVLNHLFAKTPLQSSFNVINLALVEGRPRMLTLKDLVRYFVE" .
440             "HRVDVVTRRAHFELRKAQERIHLVRALIRALDAIDKIITLIRHSQNTELAKQRLREQFDF" .
441             "DNVQAQAIVDMQMKRLTGLEVESLRTELKDLTELISSLEELLTSPQKVLGVVKKETRDIA" .
442             "DMFGDDRRTDIVSNEIEYLDVEDFIQKEEMVILISHLGYIKRVPVSAYRNQNRGGKGSSS" .
443             "ANLAAHDFISQIFTASTHDYVMFVTSRGRAYWLKVYGIPESGRANRGSHIKSLLMVATDE" .
444             "EITAIVSLREFSNKSYVFMATARGVVKKVTTDNFVNAKTRGIIALKLSGGDTLVSAVLVQ" .
445             "DEDEVMLITRQGKALRMSGREVREMGRNSSGVIGIKLTSEDLVAGVLRVSEQRKVLIMTE" .
446             "NGYGKRVSFSEFSVHGRGTAGQKIYTQTDRKGAIIGALAVLDTDECMCITGQGKTIRVDV" .
447             "CAISVLGRGAQGVRVLDIEPSDLVVGLSCVMQG";
448              
449             my $fasta_record3 = new TIGR::FASTA::Record $header3, $data3;
450             if ( defined ( $fasta_record3 ) ) {
451             print STDOUT "Sequence " . $fasta_record3->getIdentifier() . " is " .
452             $fasta_record3->size() . " residues.\n";
453             print STDOUT $fasta_record3->toString();
454             }
455             else {
456             die "Invalid FASTA record 3";
457             }
458             }
459              
460             =cut
461              
462             }
463              
464             1;