File Coverage

lib/Bio/Tradis/AddTagsToSeq.pm
Criterion Covered Total %
statement 34 83 40.9
branch 3 16 18.7
condition 0 3 0.0
subroutine 9 10 90.0
pod 0 1 0.0
total 46 113 40.7


line stmt bran cond sub pod time code
1             package Bio::Tradis::AddTagsToSeq;
2             $Bio::Tradis::AddTagsToSeq::VERSION = '1.3.2';
3             # ABSTRACT: Takes a BAM file and creates a new BAM with tr and tq tags added to the sequence and quality strings.
4              
5              
6 1     1   105762 use Moose;
  1         401101  
  1         7  
7 1     1   7583 use Bio::Seq;
  1         47243  
  1         32  
8 1     1   324 use Bio::Tradis::Parser::Bam;
  1         4  
  1         45  
9 1     1   9 use File::Basename;
  1         2  
  1         92  
10              
11 1     1   6 no warnings qw(uninitialized);
  1         2  
  1         830  
12              
13             has 'verbose' => ( is => 'rw', isa => 'Bool', default => 0 );
14             has 'samtools_exec' => ( is => 'rw', isa => 'Str', default => 'samtools' );
15             has 'bamfile' => ( is => 'rw', isa => 'Str', required => 1 );
16             has 'outfile' => (
17             is => 'rw',
18             isa => 'Str',
19             required => 0,
20             default => sub {
21             my ($self) = @_;
22             my $o = $self->bamfile;
23             $o =~ s/\.bam/\.tr\.bam/;
24             $o =~ s/\.cram/\.tr\.cram/;
25             return $o;
26             }
27             );
28              
29             has '_file_extension' => ( is => 'rw', isa => 'Str', lazy => 1, builder => '_build__file_extension' );
30             has 'extension_to_output_switch' => ( is => 'rw', isa => 'HashRef', default => sub{ {cram => '-C', bam => '-b'} } );
31              
32             sub _build__file_extension
33             {
34 1     1   3 my ($self) = @_;
35 1         20 my($filename, $dirs, $suffix) = fileparse($self->bamfile,qr/[^.]*/);
36 1         25 return lc($suffix);
37             }
38              
39             sub add_tags_to_seq {
40 1     1 0 3 my ($self) = @_;
41              
42             #set up BAM parser
43 1         23 my $filename = $self->bamfile;
44 1         18 my $outfile = $self->outfile;
45              
46             #open temp file in SAM format and output headers from current BAM to it
47 1 50       20 print STDERR "Reading ".uc($self->_file_extension)." header\n" if($self->verbose);
48 1         18 system($self->samtools_exec." view -H $filename > tmp.sam");
49 1         45 open( TMPFILE, '>>tmp.sam' );
50              
51             #open BAM file
52 1 50       61 print STDERR "Reading ".uc($self->_file_extension)." file\n" if($self->verbose);
53 1         26 my $pars = Bio::Tradis::Parser::Bam->new( file => $filename, samtools_exec => $self->samtools_exec );
54 1         7 my $read_info = $pars->read_info;
55              
56 1         6 while ( $pars->next_read ) {
57 0         0 my $read_info = $pars->read_info;
58 0         0 my $line = ${$read_info}{READ};
  0         0  
59              
60             # get tags, seq, qual and cigar str
61 0         0 my $trtag = ${$read_info}{tr};
  0         0  
62 0         0 my $tqtag = ${$read_info}{tq};
  0         0  
63              
64 0         0 my $seq_tagged = ${$read_info}{SEQ};
  0         0  
65 0         0 my $qual_tagged = ${$read_info}{QUAL};
  0         0  
66 0         0 my $cigar_update = ${$read_info}{CIGAR};
  0         0  
67              
68             #Check if seq is mapped & rev complement. If so, reformat.
69 0         0 my $mapped = $pars->is_mapped;
70 0         0 my $rev = $pars->is_reverse;
71 0 0 0     0 if ( $mapped && $rev ) {
72              
73             # The transposon is not reverse complimented but the genomic read is
74              
75             # reverse the genomic quality scores.
76 0         0 $qual_tagged = reverse($qual_tagged);
77              
78             # Add the transposon quality score on the beginning
79 0         0 $qual_tagged = $tqtag . $qual_tagged;
80              
81             # Reverse the whole quality string.
82 0         0 $qual_tagged = reverse($qual_tagged);
83              
84             # Reverse the genomic sequence
85 0         0 my $genomic_seq_obj =
86             Bio::Seq->new( -seq => $seq_tagged, -alphabet => 'dna' );
87 0         0 my $reversed_genomic_seq_obj = $genomic_seq_obj->revcom;
88              
89             # Add on the tag sequence
90 0         0 $seq_tagged = $trtag . $reversed_genomic_seq_obj->seq;
91              
92             # Reverse the tag+genomic sequence to get it back into the correct orentation.
93 0         0 my $genomic_and_tag_seq_obj =
94             Bio::Seq->new( -seq => $seq_tagged, -alphabet => 'dna' );
95 0         0 $seq_tagged = $genomic_and_tag_seq_obj->revcom->seq;
96              
97             }
98             else {
99             #print STDERR "$line\n" if(!defined($tqtag));
100 0         0 $seq_tagged = $trtag . $seq_tagged;
101 0         0 $qual_tagged = $tqtag . $qual_tagged;
102             }
103              
104 0 0       0 if ($mapped) {
105 0         0 my $cigar = length($seq_tagged);
106 0         0 $cigar_update = $cigar . 'M';
107             }
108             else {
109 0         0 $cigar_update = '*';
110             }
111              
112             # replace updated fields and print to TMPFILE
113 0         0 my @cols = split( " ", $line );
114 0         0 $cols[5] = $cigar_update;
115 0         0 $cols[9] = $seq_tagged;
116 0         0 $cols[10] = $qual_tagged;
117              
118 0         0 print TMPFILE join( "\t", @cols ) . "\n";
119             }
120 0         0 $pars->close_file_handle;
121 0         0 close TMPFILE;
122              
123             #convert tmp.sam to bam
124 0 0       0 print STDERR "Convert SAM to ".uc($self->_file_extension)."\n" if($self->verbose);
125            
126            
127 0         0 system($self->samtools_exec." view -h -S ".$self->_output_switch." -o $outfile tmp.sam");
128              
129 0 0       0 if ( $self->_number_of_lines_in_bam_file($outfile) !=
130             $self->_number_of_lines_in_bam_file($filename) )
131             {
132 0         0 die
133             "The number of lines in the input and output files don't match, so something's gone wrong\n";
134             }
135              
136             #remove tmp file
137 0         0 unlink("tmp.sam");
138 0         0 return 1;
139             }
140              
141             sub _output_switch
142             {
143 1     1   3 my ( $self ) = @_;
144 1 50       27 if(defined($self->extension_to_output_switch->{$self->_file_extension}))
145             {
146 1         21 return $self->extension_to_output_switch->{$self->_file_extension};
147             }
148             else
149             {
150 0           return '';
151             }
152             }
153              
154             sub _number_of_lines_in_bam_file {
155 0     0     my ( $self, $filename ) = @_;
156 0 0         open( my $fh, '-|', $self->samtools_exec." view $filename | wc -l" )
157             or die "Couldn't open file :" . $filename;
158 0           my $number_of_lines_in_file = <$fh>;
159 0           $number_of_lines_in_file =~ s!\W!!gi;
160 0           return $number_of_lines_in_file;
161             }
162              
163             __PACKAGE__->meta->make_immutable;
164 1     1   7 no Moose;
  1         3  
  1         8  
165             1;
166              
167             __END__
168              
169             =pod
170              
171             =encoding UTF-8
172              
173             =head1 NAME
174              
175             Bio::Tradis::AddTagsToSeq - Takes a BAM file and creates a new BAM with tr and tq tags added to the sequence and quality strings.
176              
177             =head1 VERSION
178              
179             version 1.3.2
180              
181             =head1 SYNOPSIS
182              
183             Bio::Tradis::AddTagsToSeq parses BAM files, adds given tags to the start of the sequence and creates temporary SAM file,
184             which is then converted to BAM
185              
186             use Bio::Tradis::AddTagsToSeq;
187            
188             my $pipeline = Bio::Tradis::AddTagsToSeq->new(bamfile => 'abc');
189             $pipeline->add_tags_to_seq();
190              
191             =head1 NAME
192              
193             Bio::Tradis::AddTagsToSeq
194              
195             =head1 PARAMETERS
196              
197             =head2 Required
198              
199             C<bamfile> - path to/name of file containing reads and tags
200              
201             =head2 Optional
202              
203             C<outfile> - name to assign to output BAM. Defaults to C<file.tr.bam> for an input file named C<file.bam>
204              
205             =head1 METHODS
206              
207             C<add_tags_to_seq> - add TraDIS tags to reads. For unmapped reads, the tag
208             is added to the start of the read sequence and quality
209             strings. For reads where the flag indicates that it is
210             mapped and reverse complemented, the reverse complemented
211             tags are added to the end of the read strings.
212             This is because many conversion tools (e.g. picard) takes
213             the read orientation into account and will re-reverse the
214             mapped/rev comp reads during conversion, leaving all tags
215             in the correct orientation at the start of the sequences
216             in the resulting FastQ file.
217              
218             =head1 AUTHOR
219              
220             Carla Cummins <path-help@sanger.ac.uk>
221              
222             =head1 COPYRIGHT AND LICENSE
223              
224             This software is Copyright (c) 2013 by Wellcome Trust Sanger Institute.
225              
226             This is free software, licensed under:
227              
228             The GNU General Public License, Version 3, June 2007
229              
230             =cut