File Coverage

blib/lib/CracTools/App/Command/Extract.pm
Criterion Covered Total %
statement 33 276 11.9
branch 0 128 0.0
condition 0 51 0.0
subroutine 11 25 44.0
pod 13 13 100.0
total 57 493 11.5


line stmt bran cond sub pod time code
1             package CracTools::App::Command::Extract;
2             {
3             $CracTools::App::Command::Extract::DIST = 'CracTools';
4             }
5             # ABSTRACT: Extract events identified by CRAC.
6             # PODNAME: cractools extract
7             $CracTools::App::Command::Extract::VERSION = '1.22';
8 1     1   21304 use CracTools::App -command;
  1         3  
  1         12  
9              
10 1     1   3772 use strict;
  1         2  
  1         20  
11 1     1   5 use warnings;
  1         9  
  1         35  
12              
13 1     1   4 use Carp;
  1         2  
  1         70  
14 1     1   513 use CracTools;
  1         4  
  1         28  
15 1     1   650 use CracTools::Utils;
  1         2  
  1         31  
16 1     1   617 use CracTools::SAMReader;
  1         2  
  1         28  
17 1     1   6 use CracTools::SAMReader::SAMline;
  1         2  
  1         33  
18 1     1   890 use Parallel::ForkManager 0.7.6;
  1         38062  
  1         45  
19              
20 1     1   8 use constant CHUNK_SIZE => 10000000;
  1         2  
  1         60  
21 1     1   6 use constant MIN_GAP_LENGTH => 0;
  1         2  
  1         4098  
22              
23 0     0 1   sub usage_desc { "cractools extract file.bam [regions] [-r ref.fa] [-p nb_threads] [--splices splice.bed] [--mutations file.vcf] [--chimeras chimeras.tsv]" }
24              
25             sub opt_spec {
26             return (
27 0     0 1   [ "p=i", "Number of process to run", { default => 1 } ],
28             [ "r=s", "Reference file (for VCF purpose)", ],
29             [ "splices|s=s", "Bed file where splices will be extracted." ],
30             [ "chimeras|c=s", "Tabulated file where chimeras will be extracted" ],
31             [ "mutations|m=s", "VCF file where mutations will be extracted" ],
32             [ "coverless-splices", "Consider splice that have no cover" ],
33             [ "stranded", "Strand specific protocol", { default => 'False' } ],
34             );
35             }
36              
37             sub validate_args {
38 0     0 1   my ($self, $opt, $args) = @_;
39 0           my %valid_options = map { $_->[0] => $_->[1] } $self->opt_spec;
  0            
40 0 0         $self->usage_error("Missing BAM file to extract") if @$args < 1;
41 0           for my $name ( @$args ) {
42 0 0         $self->usage_error("$name is not a valid option") if $name =~ /^-/;
43             }
44             }
45              
46             sub execute {
47 0     0 1   my ($self, $opt, $args) = @_;
48              
49 0           my $help = $opt->{help};
50 0           my $man = $opt->{man};
51 0           my $verbose = $opt->{verbose};
52 0           my $splices_file = $opt->{splices};
53 0           my $mutations_file = $opt->{mutations};
54 0           my $chimeras_file = $opt->{chimeras};
55 0           my $coverless_splices = $opt->{coverless_splices};
56 0           my $is_stranded = $opt->{stranded};
57 0           my $ref_file = $opt->{r};
58 0 0         my $nb_process = defined $opt->{p}? $opt->{p} : 1;
59              
60 0           my $bam_file = shift @{$args};
  0            
61 0 0         pod2usage(-verbose => 1) unless defined $bam_file;
62 0           my $bam_reader = CracTools::SAMReader->new($bam_file);
63 0           my $crac_version = $bam_reader->getCracVersionNumber();
64 0           my @regions = @{$args};
  0            
65 0           my $min_gap_length = MIN_GAP_LENGTH;
66              
67 0 0         if(@regions == 0) {
68             # If we need to explore the whole genome
69             # we split it in CHUNK_SIZE regions
70 0           my $it = CracTools::Utils::bamFileIterator($bam_file,"-H");
71 0           while (my $line = $it->()) {
72 0 0         next if $line !~ /^\@SQ/;
73 0           my ($chr,$length) = $line =~ /^\@SQ\s+SN:(\S+)\s+LN:(\d+)/;
74 0           for(my $i = 0; $i < $length/CHUNK_SIZE; $i++) {
75 0           push(@regions,"$chr:".($i*CHUNK_SIZE)."-".(($i+1)*CHUNK_SIZE));
76             }
77             }
78             }
79              
80             # Create Fork pool
81 0           my $pm = Parallel::ForkManager->new($nb_process);
82              
83 0           my %chimeras;
84              
85             # data structure retrieval and handling
86             $pm -> run_on_finish ( # called BEFORE the first call to start()
87             sub {
88 0     0     my ($pid, $exit_code, $ident, $exit_signal, $core_dump, $data_structure_reference) = @_;
89              
90             # retrieve chimeras from childs
91 0 0         if (defined($data_structure_reference)) {
92 0           my $region_chimeras = $data_structure_reference->{chimeras};
93 0           foreach my $key (keys %{$region_chimeras}) {
  0            
94 0           my $chim_key = $key;
95 0           my ($chr1,$pos1,$strand1,$chr2,$pos2,$strand2) = split("@",$chim_key);
96 0           my $reverse_key = join("@",$chr2,$pos2,$strand2*-1,$chr1,$pos1,$strand1*-1);
97 0 0 0       if(!$is_stranded && defined $chimeras{$reverse_key}) {
98 0           $chim_key = $reverse_key;
99             }
100 0 0         if(defined $chimeras{$chim_key}) {
101 0           push(@{$chimeras{$chim_key}{reads}},@{$region_chimeras->{$key}->{reads}});
  0            
  0            
102 0 0         $chimeras{$chim_key}{score} += $region_chimeras->{$key}->{score} if defined $region_chimeras->{$key}->{score};
103             } else {
104 0           $chimeras{$chim_key}{reads} = $region_chimeras->{$key}->{reads};
105 0 0         $chimeras{$chim_key}{score} = $region_chimeras->{$key}->{score} if defined $region_chimeras->{$key}->{score};
106             }
107             }
108             }
109             }
110 0           );
111              
112 0           my $nb_region = 0;
113             # Loop over regions
114             REGION:
115 0           foreach my $region (@regions) {
116 0           $nb_region++;
117              
118             # Fork regions
119 0 0         $pm->start() and next REGION;
120              
121             # Open filehandles on output files
122 0 0         my $splices_fh = CracTools::Utils::getWritingFileHandle($splices_file.".".$nb_region) if defined $splices_file;
123 0 0         my $mutations_fh = CracTools::Utils::getWritingFileHandle($mutations_file.".".$nb_region) if defined $mutations_file;
124              
125 0           my($region_chr,$region_start,$region_end) = $region =~ /(\S+):(\d+)-(\d+)/;
126             # Declare hashes that will store events
127 0           my %splices;
128             my %mutations;
129 0           my %region_chimeras;
130 0           my $bam_it = CracTools::Utils::bamFileIterator($bam_file,$region);
131 0           while(my $raw_line = $bam_it->()) {
132 0           my $line = CracTools::SAMReader::SAMline->new($raw_line);
133 0 0         extractSplicesFromSAMline(\%splices,
134             $line,
135             $is_stranded,
136             $min_gap_length,
137             $coverless_splices,
138             $region_chr,
139             $region_start,
140             $region_end,
141             ) if defined $splices_fh;
142 0 0         extractMutationsFromSAMline(\%mutations,
143             $line,
144             $is_stranded,
145             $region_chr,
146             $region_start,
147             $region_end,
148             $bam_file,
149             $ref_file,
150             $crac_version,
151             ) if defined $mutations_fh;
152 0 0         extractChimerasFromSAMline(\%region_chimeras,
153             $line,
154             $is_stranded,
155             $region_chr,
156             $region_start,
157             $region_end,
158             ) if defined $chimeras_file;
159             }
160 0 0         printSplices(\%splices,$splices_fh) if defined $splices_fh;
161 0 0         printMutations(\%mutations,$mutations_fh) if defined $mutations_fh;
162 0           $pm->finish(0,{chimeras => \%region_chimeras});
163             }
164 0           $pm->wait_all_children;
165              
166 0 0         my $chimeras_fh = CracTools::Utils::getWritingFileHandle($chimeras_file) if defined $chimeras_file;
167 0 0         print $chimeras_fh "#",join("\t",qw( chr1 pos1 strand1 chr2 pos2 strand2 score reads cover)),"\n" if defined $chimeras_fh;
168 0 0         printChimeras(\%chimeras,$chimeras_fh) if defined $chimeras_fh;
169              
170             # Merge Splice and Mutation files
171 0 0         my $splices_fh = CracTools::Utils::getWritingFileHandle($splices_file) if defined $splices_file;
172 0 0         my $mutations_fh = CracTools::Utils::getWritingFileHandle($mutations_file) if defined $mutations_file;
173             # Print headers on output files
174 0 0         print $splices_fh "track name=junctions\n" if defined $splices_fh;
175 0 0         print $mutations_fh "##fileformat=VCFv4.1\n",
176             "###source=$CracTools::DIST (v $CracTools::VERSION)\n",
177             "###INFO=\n",
178             "###INFO=\n",
179             "###INFO=\n",
180             "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\n" if defined $mutations_fh;
181              
182 0           for (my $region_id = 1; $region_id <= $nb_region; $region_id++) {
183 0 0         if(defined $splices_file) {
184 0           my $fh = CracTools::Utils::getReadingFileHandle($splices_file.".".$region_id);
185 0           while(my $line = <$fh>) { print $splices_fh $line; }
  0            
186 0           unlink $splices_file.".".$region_id;
187             }
188 0 0         if(defined $mutations_file) {
189 0           my $fh = CracTools::Utils::getReadingFileHandle($mutations_file.".".$region_id);
190 0           while(my $line = <$fh>) { print $mutations_fh $line; }
  0            
191 0           unlink $mutations_file.".".$region_id;
192             }
193             }
194             }
195              
196              
197             sub extractSplicesFromSAMline {
198 0     0 1   my ($splices,$line,$is_stranded,$min_gap_length,$coverless_splices,$region_chr,$region_start,$region_end) = @_;
199             # Next for secondary alignements
200 0 0 0       if (!$line->isFlagged(256) && !$line->isFlagged(2048)) {
201             # Loop over splices
202 0           foreach my $splice (@{$line->events('Junction')}) {
  0            
203             # Only report splices that belong to the query regions
204             # with a gap > MIN_GAP_LENGTH
205             # and with a 'normal' type (ie. not 'coverless')
206             next if ($splice->{loc}->{pos} >= $region_end
207             || $splice->{loc}->{pos} < $region_start
208             || $splice->{loc}->{chr} ne $region_chr
209 0 0 0       || $splice->{gap} < $min_gap_length
      0        
      0        
210             );
211              
212 0 0 0       next if $splice->{type} eq 'coverless' && !$coverless_splices;
213              
214 0           my @nb = $line->cigar =~ /[D|N|M|X|=](\d+)/g;
215 0           my $mapping_length = 0;
216 0           map {$mapping_length += $_} @nb;
  0            
217             # TODO max_pos could be calculated in order to not integrate
218             # an other splice that is following the current splice
219 0           my $max_pos = $line->pos + $mapping_length;
220 0           my $key = $splice->{loc}->{chr}."@".$splice->{loc}->{pos}."@".$splice->{gap};
221             # If this is a new splice we record all information
222 0 0         if(!defined $splices->{$key}) {
223 0           $splices->{$key}{pos} = $splice->{loc}->{pos};
224 0           $splices->{$key}{min} = $line->pos;
225 0           $splices->{$key}{max} = $max_pos;
226 0           $splices->{$key}{chr} = $splice->{loc}->{chr};
227 0           $splices->{$key}{gap} = $splice->{gap};
228 0           $splices->{$key}{cpt} = 1;
229 0           $splices->{$key}{seq} = $line->seq;
230             # If we are stranded we extract the right splice strand taking in account
231             # PE specificity
232 0 0         if($is_stranded) {
233 0 0         if($line->isFlagged($CracTools::SAMReader::SAMline::flags{FIRST_SEGMENT})) {
234 0           $splices->{$key}{strand} = CracTools::Utils::convertStrand($splice->{loc}->{strand}*-1);
235             } else {
236 0           $splices->{$key}{strand} = CracTools::Utils::convertStrand($splice->{loc}->{strand});
237             }
238             } else {
239 0           $splices->{$key}{strand} = "+";
240             }
241             #if($is_stranded) {
242             # if((!$line->isFlagged(16) && $line->isFlagged(64)) || ($line->isFlagged(16) && $line->isFlagged(128))) {
243             # $splices->{$key}{strand} = "-";
244             # } else {
245             # $splices->{$key}{strand} = "+";
246             # }
247             ## If we are not stranded we print the splice on the forward strand
248             #} else {
249             # $splices->{$key}{strand} = "+";
250             #}
251             # If this is not a new splice we just update informations
252             } else {
253 0           $splices->{$key}{cpt}++;
254 0 0         $splices->{$key}{max} = $max_pos if $splices->{$key}{max} < $max_pos;
255 0 0         $splices->{$key}{min} = $line->pos if $splices->{$key}{min} < $line->pos;
256             }
257             }
258             }
259             }
260              
261             # TODO add support of not stranded RNA-Seq
262             sub printSplices {
263 0     0 1   my $splices = shift;
264 0           my $output_fh = shift;
265 0 0         foreach my $splice (sort {$a->{chr} cmp $b->{chr} || $a->{pos} <=> $b->{pos}} values %{$splices}) {
  0            
  0            
266              
267             print $output_fh join "\t", $splice->{chr},
268             $splice->{min},
269             $splice->{max},
270             "CRAC_SPLICE_CALLING",
271             $splice->{cpt},
272             $splice->{strand},
273             $splice->{min},
274             $splice->{max},
275             0,
276             2,
277             ($splice->{pos}-$splice->{min}).",".($splice->{max}-($splice->{pos}+$splice->{gap})),
278 0           "0,".(($splice->{pos}+$splice->{gap})-$splice->{min}), "\n";
279             }
280             }
281              
282             sub extractMutationsFromSAMline {
283 0     0 1   my ($mutations,$line,$is_stranded,$region_chr,$region_start,$region_end,$bam_file,$ref_file,$crac_version) = @_;
284             # Next for secondary alignements
285 0 0 0       if(!$line->isFlagged(256) && !$line->isFlagged(2048)) {
286             # If read has SNP
287 0           foreach my $snp (@{$line->events('SNP')}) {
  0            
288             # TODO make sure the SNP is contained in the region
289 0           my ($chr,$pos) = ($snp->{loc}->{chr},$snp->{loc}->{pos});
290              
291             # We only add SNPS that correspond to the current region
292 0 0 0       next if $pos >= $region_end || $pos < $region_start;
293              
294             # This correspond to a 1bp deletion
295 0 0         if($snp->{actual} eq '?') {
    0          
296 0           $pos--;
297 0           $snp->{expected} = getSeqOrNs($chr,$pos,2,$ref_file);
298 0           $snp->{actual} = substr $snp->{expected}, 0, 1;
299             # This correspond to a 1bp insertion
300             } elsif($snp->{expected} eq '?') {
301             #$pos--; # Crac Already gives the position before the insertion...
302             # Get deleted seq on the reference to avoid cases where a insertion
303             # and a substitution are merged and CRAC has some difficulties
304             # to handle that...
305             #$snp->{actual} = substr $line->seq, $snp->{pos}-1, 2;
306 0           $snp->{actual} = getSeqOrNs($chr,$pos,1,$ref_file).substr $line->seq,$snp->{pos}, 1;
307 0           $snp->{expected} = substr $snp->{actual}, 0, 1;
308             }
309              
310             # Uniq Hash key for SNP
311 0           my $key = 'SNP'.$chr."@".$pos;
312              
313             addMutation(
314             mutations => $mutations,
315             bam_file => $bam_file,
316             key => $key,
317             chr => $chr,
318             pos => $pos,
319             reference => $snp->{expected},
320             alternative => $snp->{actual},
321             crac_score => $snp->{score},
322 0           read_id => $line->qname,
323             );
324             }
325              
326             # If read has a small deletions
327 0           foreach my $del (@{$line->events('Del')}) {
  0            
328 0           my ($chr,$pos) = ($del->{loc}->{chr},$del->{loc}->{pos});
329              
330             # We only add deletions that correspond to the current region
331 0 0 0       next if $pos >= $region_end || $pos < $region_start;
332              
333             # Uniq hash key for deletion
334 0           my $key = 'Del'.$chr."@".$pos;#."@".$del->{nb};
335            
336             # Because VCF needs 1 base before the deletion
337             # but crac gives the position before the deletion so we
338             # do not need this
339 0 0 0       if(defined $crac_version && CracTools::Utils::isVersionGreaterOrEqual($crac_version,'2.4.0')) {
340 0           $pos--;
341             }
342             #$pos--;
343              
344             # Extract deleted genome sequence from reference if available
345 0           my $reference = getSeqOrNs($chr,$pos,$del->{nb}+1,$ref_file);
346 0           my $alternative = substr $reference, 0, 1;
347              
348             addMutation(
349             mutations => $mutations,
350             bam_file => $bam_file,
351             key => $key,
352             chr => $chr,
353             pos => $pos,
354             reference => $reference,
355             alternative => $alternative,
356             crac_score => $del->{score},
357 0           read_id => $line->qname,
358             );
359             }
360              
361             # If read has a small insertions
362 0           foreach my $ins (@{$line->events('Ins')}) {
  0            
363 0           my ($chr,$pos) = ($ins->{loc}->{chr},$ins->{loc}->{pos});
364              
365             # We only add insertions that correspond to the current region
366 0 0 0       next if $pos >= $region_end || $pos < $region_start;
367              
368             # Uniq hash key for insertion
369 0           my $key = 'Ins'.$chr."@".$pos;#."@".$ins->{nb};
370              
371             # Because VCF needs 1 base before the insertion
372 0 0 0       if(defined $crac_version && !CracTools::Utils::isVersionGreaterOrEqual($crac_version,'2.4.0')) {
373 0           $pos--;
374             }
375              
376             # CRAC gives the position in the read after the insertion...
377 0           my $inserted_sequence;
378 0 0 0       if(defined $crac_version && CracTools::Utils::isVersionGreaterOrEqual($crac_version,'2.4.0')) {
379 0           $inserted_sequence = substr $line->seq, $ins->{pos}, $ins->{nb};
380             } else {
381 0           $inserted_sequence = substr $line->seq, $ins->{pos}-$ins->{nb}+1, $ins->{nb};
382             }
383              
384 0           my $alternative = getSeqOrNs($chr,$pos,1,$ref_file).$inserted_sequence;
385 0           my $reference = substr $alternative, 0, 1;
386              
387             addMutation(
388             mutations => $mutations,
389             bam_file => $bam_file,
390             key => $key,
391             chr => $chr,
392             pos => $pos,
393             reference => $reference,
394             alternative => $alternative,
395             crac_score => $ins->{score},
396 0           read_id => $line->qname,
397             );
398             }
399             }
400             }
401              
402             sub printMutations {
403 0     0 1   my $mutations = shift;
404 0           my $output_fh = shift;
405 0 0         foreach my $mut (sort {$a->{chr} cmp $b->{chr} || $a->{pos} <=> $b->{pos}} values %{$mutations}) {
  0            
  0            
406             print $output_fh join("\t",$mut->{chr},
407             $mut->{pos}+1, # to be 1-based
408             '.',
409             $mut->{reference},
410 0           (join ",", keys %{$mut->{alternative}}),
411             '.',
412             'PASS',
413 0           'DP='.$mut->{total}.';AF='.(join ",", map{$_/$mut->{total}} values %{$mut->{alternative}}).';CS='.$mut->{crac_score}),"\n";
  0            
  0            
414             }
415             }
416              
417             ## SUBROUTINES
418             sub addMutation {
419 0     0 1   my %args = @_;
420              
421             # Convert sequences to the uppercase
422 0           $args{reference} = uc $args{reference};
423 0           $args{alternative} = uc $args{alternative};
424            
425 0           my $MUTATIONS = $args{mutations};
426 0           my $key = $args{key};
427             #my $key = $args{chr}."@".$args{pos};
428 0           my $mut = $MUTATIONS->{$key};
429              
430 0 0         if(defined $mut) {
431             # If this mutation position already exists but the
432             # reference sequence is not the same (shorter or longer)
433             # we need to update the alternative
434 0 0         if($mut->{reference} ne $args{reference}) {
435             # If the current reference sequence is larger that the new one
436             # then we only need to update the alternative sequence of
437             # the new mutation
438 0 0         if(length($args{reference}) < length($mut->{reference})) {
    0          
439 0           $args{alternative} = $args{alternative}.substr($mut->{reference},length($args{reference}));
440             } elsif(length($args{reference}) > length($mut->{reference})) {
441             # If the new mutation reference is larger that the old one,
442             # we need to update all the previously added alternatives
443 0           foreach my $alt (keys %{$mut->{alternative}}) {
  0            
444 0           my $new_alt = $alt.substr($args{reference},length($mut->{reference}));
445 0           $mut->{alternative}{$new_alt} = $mut->{alternative}{$alt};
446 0           delete $mut->{alternative}{$alt};
447             }
448             # Then change the reference sequence
449 0           $mut->{reference} = $args{reference};
450             } else {
451             # If both reference are not equal but have the same length
452             # then we have a problem sir
453 0           carp "Reference (".$args{reference}.") is different than the previous one (".$mut->{reference}.") for read (".$args{read_id}.")";
454 0           return 0;
455             }
456             }
457              
458             # Finally we can add the new alternative to the current mutation entry
459 0 0         if(defined $MUTATIONS->{$key}{alternative}{$args{alternative}}) {
460 0           $MUTATIONS->{$key}{alternative}{$args{alternative}}++;
461             } else {
462 0           $MUTATIONS->{$key}{alternative}{$args{alternative}} = 1;
463             }
464             } else {
465 0           $MUTATIONS->{$key}{chr} = $args{chr};
466 0           $MUTATIONS->{$key}{pos} = $args{pos};
467 0           $MUTATIONS->{$key}{reference} = $args{reference};
468 0           $MUTATIONS->{$key}{alternative}{$args{alternative}} = 1;
469 0           $MUTATIONS->{$key}{crac_score} = $args{crac_score};
470 0           $MUTATIONS->{$key}{total} = countReadCoverFromRegion($args{bam_file},$args{chr},$args{pos});
471 0 0         $MUTATIONS->{$key}{total} = 1 if $MUTATIONS->{$key}{total} == 0; # Because of a bug in Crac 1.5.0 where chimeric alignements have been baddly positionned
472             }
473             }
474              
475             sub countReadCoverFromRegion {
476 0     0 1   my ($bam_file,$chr,$pos1,$pos2) = @_;
477 0 0         $pos2 = $pos1 if !defined $pos2;
478 0           my $nb_total = 0; # Start at 0 because we will also count the current read
479 0           my $overlap_it = CracTools::Utils::bamFileIterator($bam_file,"$chr:$pos1-$pos2");
480 0           while(my $line = $overlap_it->()) {
481 0           $nb_total++;
482             }
483 0           return $nb_total;
484             }
485              
486             # TODO Create some kind of buffer to avoid repeating a query that have already been
487             # submited
488             my %retrieved_seq_buffer = ();
489             sub getSeqOrNs {
490 0     0 1   my ($chr,$pos,$length,$ref_file) = @_;
491             # Init seq with buffer
492 0           my $seq = $retrieved_seq_buffer{"$chr-$pos-$length"};
493              
494 0 0 0       if(defined $ref_file && !defined $seq) {
495             # Retrieve the seq from the reference
496 0           $seq = CracTools::Utils::getSeqFromIndexedRef($ref_file,$chr,$pos,$length,'raw');
497             # We update the buffer
498 0 0         $retrieved_seq_buffer{"$chr-$pos-$length"} = $seq if defined $seq;
499             }
500              
501             # If no seq is available we put N's instead
502 0 0         if(!defined $seq) {
503 0           $seq .= 'N' for(1..$length);
504             }
505 0           return $seq;
506             }
507              
508             sub extractChimerasFromSAMline {
509 0     0 1   my ($chimeras,$line,$is_stranded,$region_chr,$region_start,$region_end) = @_;
510             # Next for secondary alignements
511 0 0 0       if (!$line->isFlagged(256) && !$line->isFlagged(2048)) {
512             # Loop over splices
513 0           foreach my $chimera (@{$line->events('chimera')}) {
  0            
514 0           my ($chr1,$pos1,$strand1) = @{$chimera->{loc1}}{'chr','pos','strand'};
  0            
515 0           my ($chr2,$pos2,$strand2) = @{$chimera->{loc2}}{'chr','pos','strand'};
  0            
516              
517 0           my $key = join("@",$chr1,$pos1,$strand1,$chr2,$pos2,$strand2);
518 0           my $reverse_key = join("@",$chr2,$pos2,$strand2*-1,$chr1,$pos1,$strand1*-1);
519              
520 0 0 0       if(!$is_stranded && defined $chimera->{$reverse_key}) {
    0 0        
521 0           $key = $reverse_key;
522             }elsif($is_stranded && $line->isFlagged($CracTools::SAMReader::SAMline::flags{FIRST_SEGMENT})) {
523 0           $key = $reverse_key;
524             }
525              
526 0 0         if(defined $chimeras->{$key}) {
527 0           push(@{$chimeras->{$key}->{reads}},$line->qname);
  0            
528 0 0         $chimeras->{$key}->{score} += $chimera->{score} if defined $chimera->{score};
529             } else {
530 0           $chimeras->{$key}->{reads} = [$line->qname];
531 0 0         $chimeras->{$key}->{score} = $chimera->{score} if defined $chimera->{score};
532             }
533             }
534             }
535             }
536              
537             sub printChimeras {
538 0     0 1   my $chimeras = shift;
539 0           my $output_fh = shift;
540             #foreach my $chimera (sort {$a->{chr1} <=> $b->{chr1} || $a->{pos1} <=> $b->{pos1}} values %{$chimeras}) {
541 0           foreach my $chim_key (keys %{$chimeras}) {
  0            
542            
543 0           my($chr1,$pos1,$strand1,$chr2,$pos2,$strand2) = split("@",$chim_key);
544              
545             print $output_fh join "\t", $chr1,
546             $pos1,
547             CracTools::Utils::convertStrand($strand1),
548             $chr2,
549             $pos2,
550             CracTools::Utils::convertStrand($strand2),
551 0           defined $chimeras->{$chim_key}->{score}? $chimeras->{$chim_key}->{score}/@{$chimeras->{$chim_key}->{reads}} : 'N/A',
552 0           join(",",@{$chimeras->{$chim_key}->{reads}}),
553 0 0         scalar @{$chimeras->{$chim_key}->{reads}}
  0            
554             , "\n";
555             }
556             }
557              
558             1;
559              
560             __END__