File Coverage

blib/lib/FASTX/Seq.pm
Criterion Covered Total %
statement 197 209 94.2
branch 73 100 73.0
condition 34 54 62.9
subroutine 34 36 94.4
pod 29 29 100.0
total 367 428 85.7


line stmt bran cond sub pod time code
1             package FASTX::Seq;
2             #ABSTRACT: A class for representing a sequence for FASTX::Reader
3              
4 46     46   213715 use 5.012;
  46         190  
5 46     46   254 use warnings;
  46         106  
  46         1361  
6 46     46   228 use Carp qw(confess);
  46         91  
  46         2068  
7 46     46   2178 use Data::Dumper;
  46         21830  
  46         2305  
8             $Data::Dumper::Sortkeys = 1;
9 46     46   302 use File::Basename;
  46         104  
  46         151361  
10              
11             $FASTX::Seq::VERSION = $FASTX::Reader::VERSION;
12             $FASTX::Seq::DEFAULT_QUALITY = 'I';
13             $FASTX::Seq::DEFAULT_LINE_LEN = 0;
14             $FASTX::Seq::DEFAULT_OFFSET = 33;
15             $FASTX::Seq::QUIET = $ENV{'FASTX_QUIET'} // 0;
16             require Exporter;
17             our @ISA = qw(Exporter);
18              
19              
20              
21             sub new {
22 81     81 1 6015 my $class = shift @_;
23 81         166 my ($seq, $name, $comment, $qual, $offset, $line_len, $default_quality);
24 81 100       190 if (not defined $_[0]) {
25 1         229 confess "ERROR FASTX::Seq: Sequence missing, record cannot be created.\n";
26             }
27             # Descriptive instantiation with parameters -param => value
28 80 100       322 if (substr($_[0], 0, 1) eq '-') {
    50          
29 15         71 my %data = @_;
30             # Try parsing
31 15         54 for my $i (keys %data) {
32 53 100       292 if ($i =~ /^-(seq|sequence)/) {
    100          
    100          
    100          
    100          
    100          
    50          
33 15         47 $seq = $data{$i};
34             } elsif ($i =~ /^-(name|id)/) {
35 12         31 $name = $data{$i};
36             } elsif ($i =~ /^-(comment|desc|description)/) {
37 7         12 $comment = $data{$i};
38             } elsif ($i =~ /^-qual(ity)?/) {
39 13         34 $qual = $data{$i};
40             } elsif ($i =~ /^-offset/) {
41 3         10 $offset = $data{$i};
42             } elsif ($i =~ /^-line_len/) {
43 2         5 $line_len = $data{$i};
44             } elsif ($i =~ /^-default_quality/) {
45 0         0 $default_quality = $data{$i};
46             } else {
47 1         289 confess "ERROR FASTX::Seq: Unknown parameter $i\n";
48             }
49             }
50             } elsif (not defined $seq) {
51             # Positional instantiation
52             # check number of arguments
53 65 50 33     348 confess "ERROR FASTX::Seq: Wrong number of arguments\n" if (scalar(@_) < 1 || scalar(@_) > 4);
54 65         171 ($seq, $name, $comment, $qual) = @_;
55             }
56            
57             # Required NOT empty
58 79 50       188 if (not defined $seq) {
59 0         0 confess "ERROR FASTX::Seq: Sequence missing, record cannot be created\n";
60             }
61              
62             # If quality is provided, must match sequence length
63 79 100 66     481 if ( defined $seq && defined $qual
      100        
64             && (length($seq) != length($qual))) {
65 2         383 confess "Sequence/quality length mismatch";
66             }
67            
68 77         247 my $self = bless {}, $class;
69            
70              
71 77   100     413 $self->{name} = $name // undef;
72 77         177 $self->{seq} = $seq;
73 77   100     305 $self->{comment} = $comment // undef;
74 77   100     230 $self->{qual} = $qual // undef;
75            
76             # Store defaults
77 77   33     336 $self->{default_quality} = $default_quality // $FASTX::Seq::DEFAULT_QUALITY;
78 77   66     233 $self->{line_len} = $line_len // $FASTX::Seq::DEFAULT_LINE_LEN;
79 77   66     236 $self->{offset} = $offset // $FASTX::Seq::DEFAULT_OFFSET;
80            
81 77         279 return $self;
82            
83             }
84              
85              
86             sub copy {
87 10     10 1 35 my ($self) = @_;
88 10         24 my $copy = __PACKAGE__->new($self->seq, $self->name, $self->comment, $self->qual);
89 10         32 $copy->{default_quality} = $self->default_quality;
90 10         19 $copy->line_len = $self->line_len;
91 10         46 $copy->offset = $self->offset;
92 10         25 return $copy;
93             }
94              
95              
96             sub seq : lvalue {
97             # Update sequence
98 93     93 1 4980 my ($self, $new_val) = @_;
99 93 100       194 $self->{seq} = $new_val if (defined $new_val);
100 93         422 return $self->{seq};
101             }
102              
103              
104              
105             sub name : lvalue {
106             # Update name
107 14     14 1 26 my ($self, $new_val) = @_;
108 14 50       32 $self->{seq} = $new_val if (defined $new_val);
109 14         47 return $self->{name};
110             }
111              
112              
113              
114             sub qual : lvalue {
115             # Update quality
116 56     56 1 719 my ($self, $new_val) = @_;
117 56 50       105 $self->{qual} = $new_val if (defined $new_val);
118 56         207 return $self->{qual};
119             }
120              
121              
122             sub comment : lvalue {
123             # Update comment
124 11     11 1 23 my ($self, $new_val) = @_;
125 11 50       24 $self->{comment} = $new_val if (defined $new_val);
126 11         44 return $self->{comment};
127             }
128              
129              
130             sub offset : lvalue {
131             # Update offset
132 29     29 1 325 my ($self, $new_val) = @_;
133 29 50 66     70 confess "ERROR FASTX::Seq: offset must be a positive integer" if (defined $new_val && $new_val !~ /^\d+$/);
134 29 100       53 $self->{offset} = $new_val if (defined $new_val);
135 29         107 return $self->{offset};
136             }
137              
138              
139             sub line_len : lvalue {
140             # Update line_len
141 20     20 1 38 my ($self, $new_val) = @_;
142 20 50 33     48 confess "ERROR FASTX::Seq: line_len must be a positive integer" if (defined $new_val && $new_val !~ /^\d+$/);
143 20 50       41 $self->{line_len} = $new_val if (defined $new_val);
144 20         50 return $self->{line_len};
145             }
146              
147              
148             sub default_quality : lvalue {
149             # Update default_quality
150 10     10 1 19 my ($self, $new_val) = @_;
151 10 50 33     47 confess "ERROR FASTX::Seq: default_quality must be a single character" if (defined $new_val && length($new_val) != 1);
152 10 50       25 $self->{default_quality} = $new_val if (defined $new_val);
153 10         26 return $self->{default_quality};
154             }
155              
156              
157             sub len {
158             # Update comment
159 23     23 1 5121 my ($self) = @_;
160 23         93 return length($self->{seq});
161             }
162              
163              
164              
165              
166              
167             sub rev {
168             # Update comment
169 2     2 1 4 my ($self) = @_;
170 2         8 $self->{seq} = reverse($self->{seq});
171 2 50       7 $self->{qual} = reverse($self->{qual}) if (defined reverse($self->{qual}));
172 2         5 return $self;
173             }
174              
175              
176              
177             sub rc {
178             # Update comment
179 1     1 1 3 my ($self) = @_;
180 1         2 $self->rev();
181 1 50       4 if ($self->{seq} =~ /U/i) {
182 0         0 $self->{seq} =~ tr/ACGURYSWKMBDHVacguryswkmbdhv/UGCAYRSWMKVHDBugcayrswmkvhdb/;
183             } else {
184 1         3 $self->{seq} =~ tr/ACGTRYSWKMBDHVacgtryswkmbdhv/TGCAYRSWMKVHDBtgcayrswmkvhdb/;
185             }
186             #$self->{qual} = reverse($self->{qual}) if (defined reverse($self->{qual}));
187 1         2 return $self;
188             }
189              
190              
191             sub slice {
192             # Update comment
193 7     7 1 22 my ($self, $from, $len) = @_;
194 7         13 my $new_seq;
195             my $new_qual;
196 7 100       17 if (defined($len)) {
197 4         11 $new_seq = substr($self->{seq}, $from, $len);
198 4 100       14 $new_qual = defined($self->{qual}) ? substr($self->{qual}, $from, $len) : undef;
199             } else {
200 3         8 $new_seq = substr($self->{seq}, $from);
201 3 100       9 $new_qual = defined($self->{qual}) ? substr($self->{qual}, $from) : undef;
202             }
203 7         21 return __PACKAGE__->new($new_seq, $self->{name}, $self->{comment}, $new_qual);
204             }
205              
206              
207             sub char2qual {
208 42     42 1 966 my ($self, $quality_encoded, $offset) = @_;
209             # Check quality_encoded is a single character
210 42 50       109 confess "Quality encoded character must be a single character" if (length($quality_encoded) != 1);
211 42 50       70 $offset = defined $offset ? $offset : $FASTX::Seq::DEFAULT_OFFSET;
212 42         146 return unpack("C*", $quality_encoded) - $offset;
213             }
214              
215              
216             sub qual2char {
217 2     2 1 12 my ($self, $quality_integer, $offset) = @_;
218 2 50       13 confess "Quality integer must be an integer value" if ($quality_integer !~ /^\d+$/);
219 2 50       5 $offset = defined $offset ? $offset : $FASTX::Seq::DEFAULT_OFFSET;
220 2         9 return chr($quality_integer + $offset);
221             }
222              
223              
224              
225             sub qualities {
226 5     5 1 12 my ($self) = @_;
227 5         7 my @qualities;
228 5 50       15 if (defined $self->{qual}) {
229 5         12 for (my $i = 0; $i < length($self->{qual}); $i++) {
230 40         101 push @qualities, $self->char2qual(substr($self->{qual}, $i, 1), $self->{offset});
231             }
232             }
233 5         16 return @qualities;
234             }
235              
236              
237             sub min_qual {
238 1     1 1 1179 my ($self) = @_;
239 1         2 my @qualities = $self->qualities();
240             # Calculate minimum quality
241 1         5 @qualities = sort {$a <=> $b} @qualities;
  13         18  
242 1         4 return $qualities[0];
243             }
244              
245              
246              
247             sub max_qual {
248 1     1 1 300 my ($self) = @_;
249 1         3 my @qualities = $self->qualities();
250             # Calculate minimum quality
251 1         4 @qualities = sort {$b <=> $a} @qualities;
  13         19  
252 1         3 return $qualities[0];
253             }
254              
255              
256             sub trim_after : lvalue {
257 1     1 1 286 my ($self, $qual_value) = @_;
258 1 50       9 confess "Quality integer must be an integer value" if ($qual_value !~ /^\d+$/);
259             # Detect first base with quality lower or equal than $qual_value
260 1         2 my $i = 0;
261 1         3 my @qualities = $self->qualities();
262 1         4 for ($i = 0; $i < scalar(@qualities); $i++) {
263 7 100       16 if ($qualities[$i] <= $qual_value) {
264 1         3 last;
265             }
266             }
267             # Trim sequence and quality
268 1         4 my $slice = $self->slice(0, $i);
269 1         9 $self->{seq} = $slice->{seq};
270 1         3 $self->{qual} = $slice->{qual};
271 1         27 return $self;
272             }
273              
274              
275              
276             sub trim_until : lvalue {
277 1     1 1 6 my ($self, $qual_value) = @_;
278 1 50       9 confess "Quality integer must be an integer value" if ($qual_value !~ /^\d+$/);
279             # Detect first base with quality lower or equal than $qual_value
280 1         2 my $i = 0;
281 1         4 my @qualities = $self->qualities();
282 1         8 for ($i = 0; $i < scalar(@qualities); $i++) {
283 3 100       8 if ($qualities[$i] >= $qual_value) {
284 1         2 last;
285             }
286             }
287             # Trim sequence and quality
288 1         2 my $slice = $self->slice($i);
289 1         3 $self->{seq} = $slice->{seq};
290 1         2 $self->{qual} = $slice->{qual};
291            
292 1         4 return $self;
293             }
294              
295             sub _kmer2num {
296 22     22   36 my ($kmer) = @_;
297              
298 22         51 my %baseVal = ('T' => 0, 'C' => 1, 'A' => 2, 'G' => 3, 'U' => 0);
299 22         37 my $klen = length($kmer);
300 22         27 my $num = 0;
301              
302 22         35 for my $i (0..($klen - 1)) {
303 66 50       110 if (exists $baseVal{substr($kmer, $i, 1)}) {
304 66         91 my $p = 4 ** ($klen - 1 - $i);
305 66         105 $num += $p * $baseVal{substr($kmer, $i, 1)};
306             } else {
307 0         0 $num = -1;
308 0         0 last;
309             }
310             }
311              
312 22         43 return $num;
313             }
314              
315              
316             sub translate {
317 5     5 1 14 my ($self, $code_number) = @_;
318              
319 5         13 my $record = $self->copy();
320             # Default genetic code if not specified
321 5   50     24 $code_number //= 11;
322            
323 5         26 my @code_map = (
324             "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", #1
325             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSS**VVVVAAAADDEEGGGG",
326             "FFLLSSSSYY**CCWWTTTTPPPPHHQQRRRRIIMMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
327             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
328             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSSSVVVVAAAADDEEGGGG",
329             "FFLLSSSSYYQQCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
330             "",
331             "",
332             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
333             "FFLLSSSSYY**CCCWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
334             "FFLLSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", #11 Bact, Ar and Plant Plast
335             "FFLLSSSSYY**CC*WLLLSPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
336             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNKKSSGGVVVVAAAADDEEGGGG",
337             "FFLLSSSSYYY*CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
338             "FFLLSSSSYY*QCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG", #15
339             "FFLLSSSSYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
340             "", "", "", "",
341             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIMMTTTTNNNKSSSSVVVVAAAADDEEGGGG",
342             "FFLLSS*SYY*LCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
343             "FF*LSSSSYY**CC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
344             "FFLLSSSSYY**CCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSSKVVVVAAAADDEEGGGG",
345             "FFLLSSSSYY**CCGWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
346             "FFLLSSSSYY**CC*WLLLAPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
347             "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
348             "FFLLSSSSYYQQCCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
349             "FFLLSSSSYYYYCC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
350             "FFLLSSSSYYEECC*WLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG",
351             "FFLLSSSSYYEECCWWLLLLPPPPHHQQRRRRIIIMTTTTNNKKSSRRVVVVAAAADDEEGGGG"
352             );
353            
354 5         8 my $code = $code_map[$code_number - 1];
355             # Check sequence is nucleotidic or RNA
356 5 50       25 confess "ERROR FASTX::Seq: Sequence must be nucleotidic" if ($self->{seq} !~ /^[ACGTU]+$/i);
357 5         11 my @nucleotides = split //, $self->seq;
358 5         9 my @translated_sequence;
359            
360 5         14 for (my $i = 0; $i < scalar(@nucleotides) - 2; $i += 3) {
361 22         52 my $codon = $nucleotides[$i] . $nucleotides[$i+1] . $nucleotides[$i+2];
362 22         30 my $num = _kmer2num($codon); # You need to define this function
363 22 50       37 if ($num != -1) {
364 22         57 push @translated_sequence, substr($code, $num, 1);
365            
366             } else {
367 0         0 push @translated_sequence, '-';
368            
369             }
370             }
371            
372 5         11 my $translated_seq = join('', @translated_sequence);
373            
374 5         13 $record->seq($translated_seq);
375             # Remove qual if present
376 5         11 $record->qual(undef);
377 5         22 return $record;
378             }
379              
380              
381             sub string {
382             # Update comment
383 0     0 1 0 my ($self, @args) = @_;
384 0 0       0 if (defined $self->{qual}) {
385 0         0 return $self->asfastq(@args);
386             } else {
387 0         0 return $self->asfasta(@args);
388             }
389            
390             }
391              
392              
393             sub as_string {
394             # Update comment
395 0     0 1 0 my ($self) = @_;
396 0         0 return $self->string();
397             }
398              
399              
400              
401             sub asfasta {
402             # Update comment
403 15     15 1 4201 my ($self, $len) = @_;
404            
405 15   50     52 my $name = $self->{name} // "sequence";
406 15 100 66     83 my $comment = (defined $self->{comment} and length($self->{comment}) > 0) ? " " . $self->{comment} : "";
407 15         101 return ">" . $name . $comment . "\n" . _split_string($self->{seq}, $len) . "\n";
408             }
409              
410              
411             sub asfastq {
412             # Update comment
413 25     25 1 1571 my ($self, $user_quality) = @_;
414 25         92 my $quality = $self->{qual};
415              
416 25 100       71 if (defined $user_quality) {
    100          
417             # User requests new quality
418 15 100       41 if (length($user_quality) == 1 ) {
419             # And it's valid!
420 10         24 $quality = $user_quality x length($self->{seq});
421             } else {
422 5 50       11 if (not $FASTX::Seq::QUIET) {
423 5         102 say STDERR "[WARNING] FASTX::Seq->as_fastq(): Provide a _char_ as quality, not a value (", $user_quality,"): defaulting to $FASTX::Seq::DEFAULT_QUALITY";
424             }
425 5         26 $quality = $FASTX::Seq::DEFAULT_QUALITY x length($self->{seq});
426             }
427             } elsif (not defined $quality) {
428 6         17 $quality = $FASTX::Seq::DEFAULT_QUALITY x length($self->{seq});
429             }
430            
431            
432 25   50     60 my $name = $self->{name} // "sequence";
433 25 100 66     152 my $comment = (defined $self->{comment} and length($self->{comment}) > 0) ? " " . $self->{comment} : "";
434            
435 25         171 return "@" . $name . $comment . "\n" . $self->{seq} . "\n+\n" . $quality . "\n";
436             }
437              
438              
439              
440              
441             sub as_fasta {
442             # Update comment
443 5     5 1 20 my ($self, @args) = @_;
444 5         12 return $self->asfasta(@args);
445             }
446              
447              
448             sub as_fastq {
449             # Update comment
450 10     10 1 6099 my ($self, @args) = @_;
451 10         27 return $self->asfastq(@args);
452             }
453              
454              
455             sub is_fasta {
456             # Return true if record has no quality
457 13     13 1 67 my ($self) = @_;
458 13 100 66     92 return ((defined $self->{qual}) > 0 and length($self->{qual}) == length($self->{seq})) ? 0 : 1;
459             }
460              
461              
462             sub is_fastq {
463             # Update comment
464 7     7 1 22 my ($self) = @_;
465 7 100 66     54 return ((defined $self->{qual}) > 0 and length($self->{qual}) == length($self->{seq})) ? 1 : 0;
466             }
467              
468              
469             sub _split_string {
470 15     15   32 my ($string, $width) = @_;
471 15 100 66     56 if (not defined $width or $width == 0) {
472 10         47 return "$string";
473             }
474              
475 5         10 my $output_string;
476 5         15 for (my $i=0; $i
477 74         110 my $frag = substr($string, $i, $width);
478 74         162 $output_string.=$frag."\n";
479             }
480 5         9 chomp($output_string);
481 5         20 return $output_string;
482             }
483              
484             1;
485              
486             __END__