File Coverage

lib/Bio/Tradis/AddTagsToSeq.pm
Criterion Covered Total %
statement 37 86 43.0
branch 3 16 18.7
condition 0 3 0.0
subroutine 9 10 90.0
pod 0 1 0.0
total 49 116 42.2


line stmt bran cond sub pod time code
1             package Bio::Tradis::AddTagsToSeq;
2             $Bio::Tradis::AddTagsToSeq::VERSION = '1.3.3';
3             # ABSTRACT: Takes a BAM file and creates a new BAM with tr and tq tags added to the sequence and quality strings.
4              
5              
6 1     1   110301 use Moose;
  1         404720  
  1         7  
7 1     1   7800 use Bio::Seq;
  1         47479  
  1         30  
8 1     1   325 use Bio::Tradis::Parser::Bam;
  1         5  
  1         59  
9 1     1   12 use File::Basename;
  1         2  
  1         95  
10              
11 1     1   6 no warnings qw(uninitialized);
  1         2  
  1         864  
12              
13             has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
14             has 'samtools_exec' => ( is => 'rw', isa => 'Str', default => 'samtools' );
15             has 'bamfile' => ( is => 'rw', isa => 'Str', required => 1 );
16             has 'outfile' => (
17             is => 'rw',
18             isa => 'Str',
19             required => 0,
20             default => sub {
21             my ($self) = @_;
22             my $o = $self->bamfile;
23             $o =~ s/\.bam/\.tr\.bam/;
24             $o =~ s/\.cram/\.tr\.cram/;
25             return $o;
26             }
27             );
28              
29             has '_file_extension' => ( is => 'rw', isa => 'Str', lazy => 1, builder => '_build__file_extension' );
30             has 'extension_to_output_switch' => ( is => 'rw', isa => 'HashRef', default => sub{ {cram => '-C', bam => '-b'} } );
31              
32             sub _build__file_extension
33             {
34 1     1   3 my ($self) = @_;
35 1         20 my($filename, $dirs, $suffix) = fileparse($self->bamfile,qr/[^.]*/);
36 1         25 return lc($suffix);
37             }
38              
39             sub add_tags_to_seq {
40 1     1 0 2 my ($self) = @_;
41              
42             #set up BAM parser
43 1         24 my $filename = $self->bamfile;
44 1         19 my $outfile = $self->outfile;
45              
46             #open temp file in SAM format and output headers from current BAM to it
47 1         114 my @now = localtime();
48 1         10 my $timeStamp = sprintf("%04d%02d%02d%02d%02d%02d",
49             $now[5]+1900, $now[4]+1, $now[3],
50             $now[2], $now[1], $now[0]);
51 1         4 my $tmp_sam = "tmp.$timeStamp.sam";
52 1 50       28 print STDERR "Reading ".uc($self->_file_extension)." header\n" if($self->verbose);
53 1         20 system($self->samtools_exec." view -H $filename > $tmp_sam");
54 1         51 open( TMPFILE, ">>$tmp_sam" );
55              
56             #open BAM file
57 1 50       63 print STDERR "Reading ".uc($self->_file_extension)." file\n" if($self->verbose);
58 1         26 my $pars = Bio::Tradis::Parser::Bam->new( file => $filename, samtools_exec => $self->samtools_exec );
59 1         6 my $read_info = $pars->read_info;
60              
61 1         5 while ( $pars->next_read ) {
62 0         0 my $read_info = $pars->read_info;
63 0         0 my $line = ${$read_info}{READ};
  0         0  
64              
65             # get tags, seq, qual and cigar str
66 0         0 my $trtag = ${$read_info}{tr};
  0         0  
67 0         0 my $tqtag = ${$read_info}{tq};
  0         0  
68              
69 0         0 my $seq_tagged = ${$read_info}{SEQ};
  0         0  
70 0         0 my $qual_tagged = ${$read_info}{QUAL};
  0         0  
71 0         0 my $cigar_update = ${$read_info}{CIGAR};
  0         0  
72              
73             #Check if seq is mapped & rev complement. If so, reformat.
74 0         0 my $mapped = $pars->is_mapped;
75 0         0 my $rev = $pars->is_reverse;
76 0 0 0     0 if ( $mapped && $rev ) {
77              
78             # The transposon is not reverse complimented but the genomic read is
79              
80             # reverse the genomic quality scores.
81 0         0 $qual_tagged = reverse($qual_tagged);
82              
83             # Add the transposon quality score on the beginning
84 0         0 $qual_tagged = $tqtag . $qual_tagged;
85              
86             # Reverse the whole quality string.
87 0         0 $qual_tagged = reverse($qual_tagged);
88              
89             # Reverse the genomic sequence
90 0         0 my $genomic_seq_obj =
91             Bio::Seq->new( -seq => $seq_tagged, -alphabet => 'dna' );
92 0         0 my $reversed_genomic_seq_obj = $genomic_seq_obj->revcom;
93              
94             # Add on the tag sequence
95 0         0 $seq_tagged = $trtag . $reversed_genomic_seq_obj->seq;
96              
97             # Reverse the tag+genomic sequence to get it back into the correct orentation.
98 0         0 my $genomic_and_tag_seq_obj =
99             Bio::Seq->new( -seq => $seq_tagged, -alphabet => 'dna' );
100 0         0 $seq_tagged = $genomic_and_tag_seq_obj->revcom->seq;
101              
102             }
103             else {
104             #print STDERR "$line\n" if(!defined($tqtag));
105 0         0 $seq_tagged = $trtag . $seq_tagged;
106 0         0 $qual_tagged = $tqtag . $qual_tagged;
107             }
108              
109 0 0       0 if ($mapped) {
110 0         0 my $cigar = length($seq_tagged);
111 0         0 $cigar_update = $cigar . 'M';
112             }
113             else {
114 0         0 $cigar_update = '*';
115             }
116              
117             # replace updated fields and print to TMPFILE
118 0         0 my @cols = split( " ", $line );
119 0         0 $cols[5] = $cigar_update;
120 0         0 $cols[9] = $seq_tagged;
121 0         0 $cols[10] = $qual_tagged;
122              
123 0         0 print TMPFILE join( "\t", @cols ) . "\n";
124             }
125 0         0 $pars->close_file_handle;
126 0         0 close TMPFILE;
127              
128             #convert tmp.sam to bam
129 0 0       0 print STDERR "Convert SAM to ".uc($self->_file_extension)."\n" if($self->verbose);
130            
131            
132 0         0 system($self->samtools_exec." view -h -S ".$self->_output_switch." -o $outfile $tmp_sam");
133              
134 0 0       0 if ( $self->_number_of_lines_in_bam_file($outfile) !=
135             $self->_number_of_lines_in_bam_file($filename) )
136             {
137 0         0 die
138             "The number of lines in the input and output files don't match, so something's gone wrong\n";
139             }
140              
141             #remove tmp file
142 0         0 unlink("$tmp_sam");
143 0         0 return 1;
144             }
145              
146             sub _output_switch
147             {
148 1     1   2 my ( $self ) = @_;
149 1 50       29 if(defined($self->extension_to_output_switch->{$self->_file_extension}))
150             {
151 1         21 return $self->extension_to_output_switch->{$self->_file_extension};
152             }
153             else
154             {
155 0           return '';
156             }
157             }
158              
159             sub _number_of_lines_in_bam_file {
160 0     0     my ( $self, $filename ) = @_;
161 0 0         open( my $fh, '-|', $self->samtools_exec." view $filename | wc -l" )
162             or die "Couldn't open file :" . $filename;
163 0           my $number_of_lines_in_file = <$fh>;
164 0           $number_of_lines_in_file =~ s!\W!!gi;
165 0           return $number_of_lines_in_file;
166             }
167              
168             __PACKAGE__->meta->make_immutable;
169 1     1   8 no Moose;
  1         2  
  1         9  
170             1;
171              
172             __END__
173              
174             =pod
175              
176             =encoding UTF-8
177              
178             =head1 NAME
179              
180             Bio::Tradis::AddTagsToSeq - Takes a BAM file and creates a new BAM with tr and tq tags added to the sequence and quality strings.
181              
182             =head1 VERSION
183              
184             version 1.3.3
185              
186             =head1 SYNOPSIS
187              
188             Bio::Tradis::AddTagsToSeq parses BAM files, adds given tags to the start of the sequence and creates temporary SAM file,
189             which is then converted to BAM
190              
191             use Bio::Tradis::AddTagsToSeq;
192            
193             my $pipeline = Bio::Tradis::AddTagsToSeq->new(bamfile => 'abc');
194             $pipeline->add_tags_to_seq();
195              
196             =head1 NAME
197              
198             Bio::Tradis::AddTagsToSeq
199              
200             =head1 PARAMETERS
201              
202             =head2 Required
203              
204             C<bamfile> - path to/name of file containing reads and tags
205              
206             =head2 Optional
207              
208             C<outfile> - name to assign to output BAM. Defaults to C<file.tr.bam> for an input file named C<file.bam>
209              
210             =head1 METHODS
211              
212             C<add_tags_to_seq> - add TraDIS tags to reads. For unmapped reads, the tag
213             is added to the start of the read sequence and quality
214             strings. For reads where the flag indicates that it is
215             mapped and reverse complemented, the reverse complemented
216             tags are added to the end of the read strings.
217             This is because many conversion tools (e.g. picard) takes
218             the read orientation into account and will re-reverse the
219             mapped/rev comp reads during conversion, leaving all tags
220             in the correct orientation at the start of the sequences
221             in the resulting FastQ file.
222              
223             =head1 AUTHOR
224              
225             Carla Cummins <path-help@sanger.ac.uk>
226              
227             =head1 COPYRIGHT AND LICENSE
228              
229             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
230              
231             This is free software, licensed under:
232              
233             The GNU General Public License, Version 3, June 2007
234              
235             =cut