File Coverage

blib/lib/FASTX/Seq.pm
Criterion Covered Total %
statement 196 208 94.2
branch 72 98 73.4
condition 34 54 62.9
subroutine 34 36 94.4
pod 29 29 100.0
total 365 425 85.8


line stmt bran cond sub pod time code
1             package FASTX::Seq;
2             #ABSTRACT: A class for representing a sequence for FASTX::Reader
3              
4 46     46   225208 use 5.012;
  46         192  
5 46     46   260 use warnings;
  46         114  
  46         1395  
6 46     46   304 use Carp qw(confess);
  46         92  
  46         2165  
7 46     46   2413 use Data::Dumper;
  46         22863  
  46         2419  
8             $Data::Dumper::Sortkeys = 1;
9 46     46   329 use File::Basename;
  46         98  
  46         157448  
10              
11             $FASTX::Seq::VERSION = $FASTX::Reader::VERSION;
12             $FASTX::Seq::DEFAULT_QUALITY = 'I';
13             $FASTX::Seq::DEFAULT_LINE_LEN = 0;
14             $FASTX::Seq::DEFAULT_OFFSET = 33;
15             require Exporter;
16             our @ISA = qw(Exporter);
17              
18              
19              
20             sub new {
21 81     81 1 5352 my $class = shift @_;
22 81         173 my ($seq, $name, $comment, $qual, $offset, $line_len, $default_quality);
23 81 100       219 if (not defined $_[0]) {
24 1         294 confess "ERROR FASTX::Seq: Sequence missing, record cannot be created.\n";
25             }
26             # Descriptive instantiation with parameters -param => value
27 80 100       350 if (substr($_[0], 0, 1) eq '-') {
    50          
28 15         76 my %data = @_;
29             # Try parsing
30 15         75 for my $i (keys %data) {
31 53 100       331 if ($i =~ /^-(seq|sequence)/) {
    100          
    100          
    100          
    100          
    100          
    50          
32 15         40 $seq = $data{$i};
33             } elsif ($i =~ /^-(name|id)/) {
34 12         29 $name = $data{$i};
35             } elsif ($i =~ /^-(comment|desc|description)/) {
36 7         15 $comment = $data{$i};
37             } elsif ($i =~ /^-qual(ity)?/) {
38 12         35 $qual = $data{$i};
39             } elsif ($i =~ /^-offset/) {
40 4         13 $offset = $data{$i};
41             } elsif ($i =~ /^-line_len/) {
42 2         6 $line_len = $data{$i};
43             } elsif ($i =~ /^-default_quality/) {
44 0         0 $default_quality = $data{$i};
45             } else {
46 1         264 confess "ERROR FASTX::Seq: Unknown parameter $i\n";
47             }
48             }
49             } elsif (not defined $seq) {
50             # Positional instantiation
51             # check number of arguments
52 65 50 33     384 confess "ERROR FASTX::Seq: Wrong number of arguments\n" if (scalar(@_) < 1 || scalar(@_) > 4);
53 65         189 ($seq, $name, $comment, $qual) = @_;
54             }
55            
56             # Required NOT empty
57 79 50       178 if (not defined $seq) {
58 0         0 confess "ERROR FASTX::Seq: Sequence missing, record cannot be created\n";
59             }
60              
61             # If quality is provided, must match sequence length
62 79 100 66     499 if ( defined $seq && defined $qual
      100        
63             && (length($seq) != length($qual))) {
64 2         451 confess "Sequence/quality length mismatch";
65             }
66            
67 77         261 my $self = bless {}, $class;
68            
69              
70 77   100     361 $self->{name} = $name // undef;
71 77         177 $self->{seq} = $seq;
72 77   100     290 $self->{comment} = $comment // undef;
73 77   100     332 $self->{qual} = $qual // undef;
74            
75             # Store defaults
76 77   33     347 $self->{default_quality} = $default_quality // $FASTX::Seq::DEFAULT_QUALITY;
77 77   66     293 $self->{line_len} = $line_len // $FASTX::Seq::DEFAULT_LINE_LEN;
78 77   66     226 $self->{offset} = $offset // $FASTX::Seq::DEFAULT_OFFSET;
79            
80 77         330 return $self;
81            
82             }
83              
84              
85             sub copy {
86 10     10 1 34 my ($self) = @_;
87 10         24 my $copy = __PACKAGE__->new($self->seq, $self->name, $self->comment, $self->qual);
88 10         30 $copy->{default_quality} = $self->default_quality;
89 10         25 $copy->line_len = $self->line_len;
90 10         31 $copy->offset = $self->offset;
91 10         28 return $copy;
92             }
93              
94              
95             sub seq : lvalue {
96             # Update sequence
97 93     93 1 3828 my ($self, $new_val) = @_;
98 93 100       243 $self->{seq} = $new_val if (defined $new_val);
99 93         435 return $self->{seq};
100             }
101              
102              
103              
104             sub name : lvalue {
105             # Update name
106 14     14 1 27 my ($self, $new_val) = @_;
107 14 50       32 $self->{seq} = $new_val if (defined $new_val);
108 14         50 return $self->{name};
109             }
110              
111              
112              
113             sub qual : lvalue {
114             # Update quality
115 56     56 1 746 my ($self, $new_val) = @_;
116 56 50       113 $self->{qual} = $new_val if (defined $new_val);
117 56         205 return $self->{qual};
118             }
119              
120              
121             sub comment : lvalue {
122             # Update comment
123 11     11 1 25 my ($self, $new_val) = @_;
124 11 50       40 $self->{comment} = $new_val if (defined $new_val);
125 11         36 return $self->{comment};
126             }
127              
128              
129             sub offset : lvalue {
130             # Update offset
131 29     29 1 326 my ($self, $new_val) = @_;
132 29 50 66     81 confess "ERROR FASTX::Seq: offset must be a positive integer" if (defined $new_val && $new_val !~ /^\d+$/);
133 29 100       63 $self->{offset} = $new_val if (defined $new_val);
134 29         109 return $self->{offset};
135             }
136              
137              
138             sub line_len : lvalue {
139             # Update line_len
140 20     20 1 32 my ($self, $new_val) = @_;
141 20 50 33     58 confess "ERROR FASTX::Seq: line_len must be a positive integer" if (defined $new_val && $new_val !~ /^\d+$/);
142 20 50       45 $self->{line_len} = $new_val if (defined $new_val);
143 20         43 return $self->{line_len};
144             }
145              
146              
147             sub default_quality : lvalue {
148             # Update default_quality
149 10     10 1 36 my ($self, $new_val) = @_;
150 10 50 33     28 confess "ERROR FASTX::Seq: default_quality must be a single character" if (defined $new_val && length($new_val) != 1);
151 10 50       32 $self->{default_quality} = $new_val if (defined $new_val);
152 10         48 return $self->{default_quality};
153             }
154              
155              
156             sub len {
157             # Update comment
158 23     23 1 5078 my ($self) = @_;
159 23         100 return length($self->{seq});
160             }
161              
162              
163              
164              
165              
166             sub rev {
167             # Update comment
168 2     2 1 4 my ($self) = @_;
169 2         5 $self->{seq} = reverse($self->{seq});
170 2 50       8 $self->{qual} = reverse($self->{qual}) if (defined reverse($self->{qual}));
171 2         5 return $self;
172             }
173              
174              
175              
176             sub rc {
177             # Update comment
178 1     1 1 2 my ($self) = @_;
179 1         3 $self->rev();
180 1 50       6 if ($self->{seq} =~ /U/i) {
181 0         0 $self->{seq} =~ tr/ACGURYSWKMBDHVacguryswkmbdhv/UGCAYRSWMKVHDBugcayrswmkvhdb/;
182             } else {
183 1         3 $self->{seq} =~ tr/ACGTRYSWKMBDHVacgtryswkmbdhv/TGCAYRSWMKVHDBtgcayrswmkvhdb/;
184             }
185             #$self->{qual} = reverse($self->{qual}) if (defined reverse($self->{qual}));
186 1         2 return $self;
187             }
188              
189              
190             sub slice {
191             # Update comment
192 7     7 1 22 my ($self, $from, $len) = @_;
193 7         13 my $new_seq;
194             my $new_qual;
195 7 100       19 if (defined($len)) {
196 4         10 $new_seq = substr($self->{seq}, $from, $len);
197 4 100       12 $new_qual = defined($self->{qual}) ? substr($self->{qual}, $from, $len) : undef;
198             } else {
199 3         10 $new_seq = substr($self->{seq}, $from);
200 3 100       15 $new_qual = defined($self->{qual}) ? substr($self->{qual}, $from) : undef;
201             }
202 7         23 return __PACKAGE__->new($new_seq, $self->{name}, $self->{comment}, $new_qual);
203             }
204              
205              
206             sub char2qual {
207 42     42 1 1003 my ($self, $quality_encoded, $offset) = @_;
208             # Check quality_encoded is a single character
209 42 50       83 confess "Quality encoded character must be a single character" if (length($quality_encoded) != 1);
210 42 50       84 $offset = defined $offset ? $offset : $FASTX::Seq::DEFAULT_OFFSET;
211 42         147 return unpack("C*", $quality_encoded) - $offset;
212             }
213              
214              
215             sub qual2char {
216 2     2 1 11 my ($self, $quality_integer, $offset) = @_;
217 2 50       12 confess "Quality integer must be an integer value" if ($quality_integer !~ /^\d+$/);
218 2 50       5 $offset = defined $offset ? $offset : $FASTX::Seq::DEFAULT_OFFSET;
219 2         11 return chr($quality_integer + $offset);
220             }
221              
222              
223              
224             sub qualities {
225 5     5 1 11 my ($self) = @_;
226 5         11 my @qualities;
227 5 50       13 if (defined $self->{qual}) {
228 5         15 for (my $i = 0; $i < length($self->{qual}); $i++) {
229 40         91 push @qualities, $self->char2qual(substr($self->{qual}, $i, 1), $self->{offset});
230             }
231             }
232 5         19 return @qualities;
233             }
234              
235              
236             sub min_qual {
237 1     1 1 1174 my ($self) = @_;
238 1         3 my @qualities = $self->qualities();
239             # Calculate minimum quality
240 1         6 @qualities = sort {$a <=> $b} @qualities;
  13         18  
241 1         7 return $qualities[0];
242             }
243              
244              
245              
246             sub max_qual {
247 1     1 1 303 my ($self) = @_;
248 1         4 my @qualities = $self->qualities();
249             # Calculate minimum quality
250 1         4 @qualities = sort {$b <=> $a} @qualities;
  13         18  
251 1         2 return $qualities[0];
252             }
253              
254              
255             sub trim_after : lvalue {
256 1     1 1 327 my ($self, $qual_value) = @_;
257 1 50       10 confess "Quality integer must be an integer value" if ($qual_value !~ /^\d+$/);
258             # Detect first base with quality lower or equal than $qual_value
259 1         2 my $i = 0;
260 1         5 my @qualities = $self->qualities();
261 1         6 for ($i = 0; $i < scalar(@qualities); $i++) {
262 7 100       16 if ($qualities[$i] <= $qual_value) {
263 1         2 last;
264             }
265             }
266             # Trim sequence and quality
267 1         5 my $slice = $self->slice(0, $i);
268 1         5 $self->{seq} = $slice->{seq};
269 1         3 $self->{qual} = $slice->{qual};
270 1         11 return $self;
271             }
272              
273              
274              
275             sub trim_until : lvalue {
276 1     1 1 7 my ($self, $qual_value) = @_;
277 1 50       8 confess "Quality integer must be an integer value" if ($qual_value !~ /^\d+$/);
278             # Detect first base with quality lower or equal than $qual_value
279 1         4 my $i = 0;
280 1         4 my @qualities = $self->qualities();
281 1         4 for ($i = 0; $i < scalar(@qualities); $i++) {
282 3 100       9 if ($qualities[$i] >= $qual_value) {
283 1         2 last;
284             }
285             }
286             # Trim sequence and quality
287 1         4 my $slice = $self->slice($i);
288 1         6 $self->{seq} = $slice->{seq};
289 1         3 $self->{qual} = $slice->{qual};
290            
291 1         6 return $self;
292             }
293              
294             sub _kmer2num {
295 22     22   36 my ($kmer) = @_;
296              
297 22         53 my %baseVal = ('T' => 0, 'C' => 1, 'A' => 2, 'G' => 3, 'U' => 0);
298 22         28 my $klen = length($kmer);
299 22         37 my $num = 0;
300              
301 22         38 for my $i (0..($klen - 1)) {
302 66 50       120 if (exists $baseVal{substr($kmer, $i, 1)}) {
303 66         93 my $p = 4 ** ($klen - 1 - $i);
304 66         105 $num += $p * $baseVal{substr($kmer, $i, 1)};
305             } else {
306 0         0 $num = -1;
307 0         0 last;
308             }
309             }
310              
311 22         47 return $num;
312             }
313              
314              
315             sub translate {
316 5     5 1 23 my ($self, $code_number) = @_;
317              
318 5         14 my $record = $self->copy();
319             # Default genetic code if not specified
320 5   50     9 $code_number //= 11;
321            
322 5         52 my @code_map = (
323             "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", #1
324             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
325             "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
326             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
327             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
328             "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
329             "",
330             "",
331             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
332             "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
333             "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", #11 Bact, Ar and Plant Plast
334             "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
335             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
336             "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
337             "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", #15
338             "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
339             "", "", "", "",
340             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
341             "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
342             "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
343             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
344             "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
345             "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
346             "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
347             "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
348             "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
349             "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
350             "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG"
351             );
352            
353 5         11 my $code = $code_map[$code_number - 1];
354             # Check sequence is nucleotidic or RNA
355 5 50       26 confess "ERROR FASTX::Seq: Sequence must be nucleotidic" if ($self->{seq} !~ /^[ACGTU]+$/i);
356 5         10 my @nucleotides = split //, $self->seq;
357 5         7 my @translated_sequence;
358            
359 5         15 for (my $i = 0; $i < scalar(@nucleotides) - 2; $i += 3) {
360 22         43 my $codon = $nucleotides[$i] . $nucleotides[$i+1] . $nucleotides[$i+2];
361 22         36 my $num = _kmer2num($codon); # You need to define this function
362 22 50       38 if ($num != -1) {
363 22         62 push @translated_sequence, substr($code, $num, 1);
364            
365             } else {
366 0         0 push @translated_sequence, '-';
367            
368             }
369             }
370            
371 5         12 my $translated_seq = join('', @translated_sequence);
372            
373 5         12 $record->seq($translated_seq);
374             # Remove qual if present
375 5         12 $record->qual(undef);
376 5         34 return $record;
377             }
378              
379              
380             sub string {
381             # Update comment
382 0     0 1 0 my ($self, @args) = @_;
383 0 0       0 if (defined $self->{qual}) {
384 0         0 return $self->asfastq(@args);
385             } else {
386 0         0 return $self->asfasta(@args);
387             }
388            
389             }
390              
391              
392             sub as_string {
393             # Update comment
394 0     0 1 0 my ($self) = @_;
395 0         0 return $self->string();
396             }
397              
398              
399              
400             sub asfasta {
401             # Update comment
402 15     15 1 3695 my ($self, $len) = @_;
403            
404 15   50     54 my $name = $self->{name} // "sequence";
405 15 100 66     79 my $comment = (defined $self->{comment} and length($self->{comment}) > 0) ? " " . $self->{comment} : "";
406 15         63 return ">" . $name . $comment . "\n" . _split_string($self->{seq}, $len) . "\n";
407             }
408              
409              
410             sub asfastq {
411             # Update comment
412 25     25 1 1594 my ($self, $user_quality) = @_;
413 25         62 my $quality = $self->{qual};
414              
415 25 100       106 if (defined $user_quality) {
    100          
416             # User requests new quality
417 15 100       32 if (length($user_quality) == 1 ) {
418             # And it's valid!
419 10         24 $quality = $user_quality x length($self->{seq});
420             } else {
421 5         1119 say STDERR "[WARNING] FASTX::Seq->as_fastq(): Provide a _char_ as quality, not a value (", $user_quality,"): defaulting to $FASTX::Seq::DEFAULT_QUALITY";
422 5         41 $quality = $FASTX::Seq::DEFAULT_QUALITY x length($self->{seq});
423             }
424             } elsif (not defined $quality) {
425 6         27 $quality = $FASTX::Seq::DEFAULT_QUALITY x length($self->{seq});
426             }
427            
428            
429 25   50     76 my $name = $self->{name} // "sequence";
430 25 100 66     157 my $comment = (defined $self->{comment} and length($self->{comment}) > 0) ? " " . $self->{comment} : "";
431            
432 25         169 return "@" . $name . $comment . "\n" . $self->{seq} . "\n+\n" . $quality . "\n";
433             }
434              
435              
436              
437              
438             sub as_fasta {
439             # Update comment
440 5     5 1 26 my ($self, @args) = @_;
441 5         10 return $self->asfasta(@args);
442             }
443              
444              
445             sub as_fastq {
446             # Update comment
447 10     10 1 5963 my ($self, @args) = @_;
448 10         28 return $self->asfastq(@args);
449             }
450              
451              
452             sub is_fasta {
453             # Return true if record has no quality
454 13     13 1 88 my ($self) = @_;
455 13 100 66     124 return ((defined $self->{qual}) > 0 and length($self->{qual}) == length($self->{seq})) ? 0 : 1;
456             }
457              
458              
459             sub is_fastq {
460             # Update comment
461 7     7 1 32 my ($self) = @_;
462 7 100 66     57 return ((defined $self->{qual}) > 0 and length($self->{qual}) == length($self->{seq})) ? 1 : 0;
463             }
464              
465              
466             sub _split_string {
467 15     15   30 my ($string, $width) = @_;
468 15 100 66     51 if (not defined $width or $width == 0) {
469 10         50 return "$string";
470             }
471              
472 5         10 my $output_string;
473 5         15 for (my $i=0; $i
474 74         96 my $frag = substr($string, $i, $width);
475 74         139 $output_string.=$frag."\n";
476             }
477 5         9 chomp($output_string);
478 5         50 return $output_string;
479             }
480              
481             1;
482              
483             __END__