File Coverage

blib/lib/Bio/ViennaNGS/SpliceJunc.pm
Criterion Covered Total %
statement 33 242 13.6
branch 0 94 0.0
condition 0 6 0.0
subroutine 11 16 68.7
pod 5 5 100.0
total 49 363 13.5


line stmt bran cond sub pod time code
1             # -*-CPerl-*-
2             # Last changed Time-stamp: <2015-02-06 16:29:08 mtw>
3              
4             package Bio::ViennaNGS::SpliceJunc;
5              
6 1     1   3749 use Exporter;
  1         2  
  1         48  
7 1     1   4 use version; our $VERSION = qv('0.12_15');
  1         1  
  1         8  
8 1     1   64 use strict;
  1         7  
  1         37  
9 1     1   5 use warnings;
  1         0  
  1         30  
10 1     1   4 use Data::Dumper;
  1         2  
  1         41  
11 1     1   375 use Bio::ViennaNGS;
  1         1  
  1         33  
12 1     1   5 use Bio::ViennaNGS::Fasta;
  1         1  
  1         20  
13 1     1   4 use IPC::Cmd qw(can_run run);
  1         1  
  1         48  
14 1     1   5 use File::Basename;
  1         1  
  1         52  
15 1     1   4 use Path::Class;
  1         1  
  1         40  
16 1     1   4 use Carp;
  1         5  
  1         2302  
17              
18             our @ISA = qw(Exporter);
19             our @EXPORT = ();
20             our @EXPORT_OK = qw(bed6_ss_from_bed12 bed6_ss_from_rnaseq
21             bed6_ss_to_bed12 intersect_sj ss_isCanonical);
22              
23             # bed6_ss_from_bed12( $bed12,$dest,$window,$can,$fastaO)
24             #
25             # Extracts splice junctions from BED12 annotation.
26             #
27             # Writes a BED6 file for each transcript found in the BED12, listing
28             # all splice sites of this transcript, optionally flanking it with a
29             # window of +/-$window nt.
30             sub bed6_ss_from_bed12{
31 0     0 1   my ($bed12,$dest,$window,$can,$fastaobjR) = @_;
32 0           my ($i,$tr_name,$pos5,$pos3);
33 0           my ($splicesites,$c,$totalsj,$cansj) = 0x4;
34 0           my @bedline = ();
35 0           my $this_function = (caller(0))[3];
36              
37 0 0         croak "ERROR [$this_function] $bed12 does not exists\n"
38             unless (-e $bed12);
39 0 0         croak "ERROR [$this_function] $dest does not exist"
40             unless (-d $dest);
41              
42 0 0         open(BED12IN, "< $bed12") or croak $!;
43 0           while(<BED12IN>){
44 0           chomp;
45 0           my ($chr,$chromStart,$chromEnd,$name,$score,$strand,$thickStart,
46             $thickEnd,$itemRgb,$blockCount,$blockSizes,$blockStarts) = split("\t");
47 0           my @blockSize = split(/,/,$blockSizes);
48 0           my @blockStart = split(/,/,$blockStarts);
49 0 0         unless (scalar @blockSize == scalar @blockStart){
50 0           croak "ERROR: unequal element count in blockStarts and blockSizes\n";
51             }
52 0           my $fn = sprintf("%s_%d-%d_%s.annotatedSS.bed6",$chr,$chromStart,$chromEnd,$name);
53 0           my $bed6_fn = file($dest,$fn);
54 0           my $tr_count = 1;
55              
56 0 0         if ($blockCount >1){ # only transcripts with 2 or more exons (!!!)
57              
58 0 0         open(BED6OUT, "> $bed6_fn") or croak "cannot open BED6OUT $!";
59              
60 0           for ($i=0;$i<$blockCount-1;$i++){
61 0           $totalsj++;
62 0           $pos5 = $chromStart+$blockStart[$i]+$blockSize[$i];
63 0           $pos3 = $chromStart+$blockStart[$i+1];
64 0 0         if($can){
65 0           $c = ss_isCanonical($chr,$pos5,$pos3,$fastaobjR);
66 0           $cansj++;
67             }
68 0           $tr_name = sprintf("%s.%02d",$name,$tr_count++);
69 0           @bedline = join("\t",$chr,eval($pos5-$window),
70             eval($pos5+$window),$tr_name,$c,$strand);
71 0           print BED6OUT "@bedline\n";
72 0           @bedline = join("\t",$chr,eval($pos3-$window),
73             eval($pos3+$window),$tr_name,$c,$strand);
74 0           print BED6OUT "@bedline\n";
75             } # end for
76              
77 0           close(BED6OUT);
78             } # end if
79             } # end while
80 0           close(BED12IN);
81             }
82              
83             # bed6_ss_from_rnaseq ($bed_in,$dest,$window,$mincov,$can,$fastaO)
84             #
85             # Extracts splice junctions from mapped RNA-seq data
86             #
87             # Writes a BED6 file for each splice junction present in the input,
88             # optionally flanking it with a window of +/-$window nt. Only splice
89             # junctions supported by at least $mcov reads are considered.
90             sub bed6_ss_from_rnaseq{
91 0     0 1   my ($bed_in,$dest,$window,$mcov,$can,$fastaobjR) = @_;
92 0           my ($reads,$proper,$passed,$pos5,$pos3);
93 0           my $c = 0;
94 0           my @bedline = ();
95 0           my $this_function = (caller(0))[3];
96              
97 0 0         croak "ERROR [$this_function] $bed_in does not exist\n"
98             unless (-e $bed_in);
99 0 0         croak "ERROR [$this_function] $dest does not exist"
100             unless (-d $dest);
101              
102 0 0         open(INBED, "< $bed_in") or croak $!;
103 0           while(<INBED>){
104 0           chomp;
105 0           my ($chr, $start, $end, $info, $score, $strand) = split("\t");
106 0           $end = $end-2; # required for segemehl's BED6 files
107              
108 0 0         if ($info =~ /^splits\:(\d+)\:(\d+)\:(\d+):(\w):(\w)/){
109 0           $reads = $1;
110 0           $proper = $4;
111 0           $passed = $5;
112 0 0         next unless ($proper eq 'N');
113 0 0         next unless ($passed =~ /[PFM]$/);
114 0 0         next if($reads < $mcov);
115             }
116             else {
117 0           croak "ERROR [$this_function] unsupported INFO field in input BED:\n$info\n";
118             }
119 0           $pos5 = $start;
120 0           $pos3 = $end;
121 0 0         if($can){$c = ss_isCanonical($chr,$pos5,$pos3,$fastaobjR);}
  0            
122 0           my $fn = sprintf("%s_%d-%d.mappedSS.bed6",$chr,$start,$end);
123 0           my $bed6_fn = file($dest,$fn);
124 0           open(BED6OUT, "> $bed6_fn");
125 0           @bedline = join("\t",$chr,eval($start-$window),
126             eval($start+$window),$info,$c,$strand);
127 0           print BED6OUT "@bedline\n";
128 0           @bedline = join("\t",$chr,eval($end-$window),
129             eval($end+$window),$info,$c,$strand);
130 0           print BED6OUT "@bedline\n";
131 0           close(BED6OUT);
132             }
133 0           close(INBED);
134             }
135              
136             # bed6_ss_to_bed12 ($bed_in,$dest,$window,$mcov)
137             #
138             # Produce BED12 from BED6 file holdig splice junctions from mapped
139             # RNA-seq data
140             sub bed6_ss_to_bed12{
141 0     0 1   my ($bed_in,$dest,$window,$mcov,$circ) = @_;
142 0           my ($reads,$proper,$passed,$pos5,$pos3,$basename,$fn,$bed12_fn);
143 0           my @result = ();
144 0           my @bedline = ();
145 0           my $this_function = (caller(0))[3];
146              
147 0 0         croak "ERROR [$this_function] $bed_in does not exist\n"
148             unless (-e $bed_in);
149 0 0         croak "ERROR [$this_function] $dest does not exist"
150             unless (-d $dest);
151              
152 0           $basename = fileparse($bed_in,qr/\.[^.]*/);
153 0           $fn = $basename.".bed12";
154 0           $bed12_fn = file($dest,$fn);
155 0           open(BED12OUT, "> $bed12_fn");
156              
157 0 0         open(INBED, "< $bed_in") or croak $!;
158 0           while(<INBED>){
159 0           chomp;
160 0           my ($chr, $start, $end, $info, $score, $strand) = split("\t");
161              
162 0 0         if ($info =~ /^splits\:(\d+)\:(\d+)\:(\d+):(\w):(\w)/){
163 0           $reads = $1;
164 0           $proper = $4;
165 0           $passed = $5;
166 0 0         if ($circ == 1){ # skip 'N' (normal), 'L' (left) and 'R' (right)
167 0 0         next unless ($proper =~ /[C]$/);
168             }
169             else { # skip 'L', 'R' and 'C'
170 0 0         next unless ($proper =~ /[N]$/);
171             }
172 0 0         next unless ($passed =~ /[PM]$/); # ignore 'F'
173 0 0         next if($reads < $mcov);
174             }
175             else {
176 0           croak "ERROR [$this_function] unsupported INFO field in input BED:\n$info\n";
177             }
178 0           $pos5 = $start;
179 0           $pos3 = $end;
180              
181 0           @bedline = join("\t",$chr,eval($start-$window),
182             eval($end+$window),$info,$score,$strand,$start,$end,
183             "0","2","1,1","0,".eval($end-$start-1));
184 0           print BED12OUT "@bedline\n";
185             }
186 0           close(INBED);
187 0           close(BED12OUT);
188              
189 0           push (@result, $bed12_fn);
190 0           return @result;
191             }
192              
193             # intersect_sj($p_annot,$p_mapped,$dest,$prefix,$window,$mil)
194             #
195             # Intersect splice junctions determined by RNA-seq with annotated
196             # splice junctions. Determine novel and existing splice junctions.
197             #
198             # Writes BED6 files for existing and novel splice junctions to $dest
199             # and reurns an array with the absolute path to the two resulting BED
200             # files
201             sub intersect_sj{
202 0     0 1   my ($p_annot,$p_mapped,$dest,$prefix,$window,$mil) = @_;
203 0           my ($dha,$dhm);
204 0           my $processed = 0;
205 0           my @junctions = ();
206 0           my @transcript_beds = ();
207 0           my %asj = (); # annotated splice junctions hash
208 0 0         my $bedtools = can_run('bedtools') or croak "bedtools not found";
209 0 0         my $sortBed = can_run('sortBed') or croak "sortBed not found";
210 0           my $this_function = (caller(0))[3];
211              
212 0 0         croak "ERROR [$this_function] $p_annot does not exist\n"
213             unless (-d $p_annot);
214 0 0         croak "ERROR [$this_function] $p_mapped does not exist\n"
215             unless (-d $p_mapped);
216 0 0         croak "ERROR [$this_function] $dest does not exist\n"
217             unless (-d $dest);
218              
219             # get a list of all files in $p_annot
220 0 0         opendir($dha, $p_annot) or croak "Cannot opendir $p_annot: $!";
221 0           while(readdir $dha) { push @transcript_beds, $_; }
  0            
222 0           closedir($dha);
223              
224             # get a list of all splice junctions seen in RNA-seq data
225 0 0         opendir($dhm, $p_mapped) or croak "Cannot opendir $p_mapped: $!";
226 0           @junctions = grep { /^(chr\d+)\_(\d+)-(\d+)\.mappedSS\.bed6/ } readdir($dhm);
  0            
227              
228             # process splice junctions seen in RNA-seq data
229 0           foreach my $file (@junctions){
230 0           $processed++;
231 0 0         croak "Unexpected file name pattern\n" unless ($file =~ /(chr\d+)\_(\d+)-(\d+)/);
232 0           my $sc = $1;
233 0           my $s5 = $2;
234 0           my $s3 = $3;
235 0           my $pattern = $sc."_".$s5."-".$s3;
236              
237 0 0 0       my @annotated_beds = grep { /^(chr\d+)\_(\d+)-(\d+)/ && $2<=$s5 && $3>=$s3 && $sc eq $1} @transcript_beds;
  0   0        
238             #print"\t intersecting against ".(eval $#annotated_beds+1)." transcripts: @annotated_beds \n";
239              
240             # intersect currently opened SJ against all transcripts in @annotated_beds
241 0           foreach my $i (@annotated_beds){
242 0           my $a = file($p_mapped,$file);
243 0           my $b = file($p_annot,$i);
244 0           my $intersect_cmd = "$bedtools intersect -a $a -b $b -c -nobuf";
245 0           open(INTERSECT, $intersect_cmd."|");
246 0           while(<INTERSECT>){
247 0           chomp;
248 0 0         if ($_ =~ /1$/) { $asj{$pattern} = 1;}
  0            
249             }
250 0           close(INTERSECT);
251             }
252 0 0         if ( $processed % 1000 == 0){
253 0           print STDERR "processed $processed splice junctions\n";
254             }
255             }
256 0           closedir($dhm);
257              
258             # go through the mapped splice junctions files once again and
259             # separate novel from existing splice junctions
260 0 0         if (length($prefix)>0){$prefix .= ".";}
  0            
261 0           my $outname_exist = file($dest, $prefix."exist.SS.bed");
262 0           my $outname_exist_u = file($dest, $prefix."exist.SS.u.bed");
263 0           my $outname_novel = file($dest, $prefix."novel.SS.bed");
264 0           my $outname_novel_u = file($dest, $prefix."novel.SS.u.bed");
265 0           open (EXISTOUT, "> $outname_exist_u");
266 0           open (NOVELOUT, "> $outname_novel_u");
267              
268             # write new ones to NOVELOUT; existing ones to EXISTOUT
269 0           foreach my $file (@junctions){
270 0 0         if ($file =~ m/^(chr\d+\_\d+-\d+)/){
271 0           my $pattern = $1;
272 0           my $fn = file($p_mapped,$file);
273 0 0         open (SJ, "< $fn") or croak "Cannot open $fn $!";
274 0           while(<SJ>){
275 0           chomp;
276 0           $_ = m/^(chr\w+)\s(\d+)\s(\d+)\s(splits:\d+:\d+:\d+:\w:\w)\s(\d+)\s([+-01])/;
277 0           my $chr = $1;
278 0           my $start = $2;
279 0           my $end = $3;
280 0           my $name = $4;
281 0           my $score = $5;
282 0           my $strand = $6;
283 0           my @bedline = join("\t",$chr,eval($start+$window),
284             eval($start+$window+1),$name,$score,$strand);
285 0 0         if (exists $asj{$pattern}){ # annotated splice junction
286 0           print EXISTOUT "@bedline\n";
287             }
288             else { # novel splice junction
289 0           print NOVELOUT "@bedline\n";
290             }
291             } # end while
292 0           close(SJ);
293             } # end if
294 0           else{ carp "Error with parsing BED6 junction file names __FILE__ __LINE__\n";}
295             }
296 0           close(EXISTOUT);
297 0           close(NOVELOUT);
298              
299             # sort the resulting bed files
300 0           my $cmd = "$bedtools sort -i $outname_exist_u > $outname_exist";
301 0           my ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) =
302             run( command => $cmd, verbose => 0 );
303 0 0         if( !$success ) {
304 0           print STDERR "ERROR [$this_function] Call to $bedtools unsuccessful\n";
305 0           print STDERR "ERROR: this is what the command printed:\n";
306 0           print join "", @$full_buf;
307 0           croak $!;
308             }
309 0           unlink($outname_exist_u);
310              
311 0           $cmd = "$bedtools sort -i $outname_novel_u > $outname_novel";
312 0           ( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) =
313             run( command => $cmd, verbose => 0 );
314 0 0         if( !$success ) {
315 0           print STDERR "ERROR [$this_function] Call to $bedtools unsuccessful\n";
316 0           print STDERR "ERROR: this is what the command printed:\n";
317 0           print join "", @$full_buf;
318 0           croak $!;
319             }
320 0           unlink($outname_novel_u);
321              
322 0           printf STDERR "processed $processed splice junctions\n";
323 0           my @result = ();
324 0           push(@result, $outname_exist);
325 0           push(@result, $outname_novel);
326 0           return @result;
327             }
328              
329             # ss_isCanonical ( $chr,$p5,$p3,$fastaO )
330              
331             # Checks whether a given splice junction is canonical, ie. whether the
332             # first and last two nucleotides of the enclosed intron correspond to
333             # a certain nucleotide motif. $chr is the chromosome name, $p5 and $p3
334             # the 5' and 3' ends of the splice junction and $fastaobjR is a
335             # Bio::PrimarySeq::Fasta object holding the underlying reference
336             # genome.
337             #
338             # Th most common canonical splice junction motif is GT-AG (shown
339             # below). Other canonical motifs are GC->AG and AT->AC.
340             #
341             # ------------------>
342             # 5'===]GT..........AG[====3'
343             #
344             # <-----------------
345             # 3'===]GA..........TG[====5'
346             #
347             sub ss_isCanonical{
348 0     0 1   my ($chr,$p5,$p3,$fo) = @_;
349 0           my ($seqL,$seqR,$pattern);
350 0           my $ss_motif_length = 2;
351 0           my $this_function = (caller(0))[3];
352 0           my $c = -1;
353              
354 0           $seqL = $fo->stranded_subsequence($chr,$p5+1,$p5+$ss_motif_length,"+");
355 0           $seqR = $fo->stranded_subsequence($chr,$p3-$ss_motif_length+1,$p3,"+");
356              
357 0           $pattern = sprintf("%s|%s",$seqL,$seqR);
358             #print STDERR "[$this_function] p5->p3 ($p5 -- $p3) $seqL -- $seqR\n";
359              
360 0 0         if ($pattern =~ /^GT|AG$/) { $c = 1000;}
  0 0          
    0          
    0          
    0          
    0          
361 0           elsif ($pattern =~ /^CT|AC$/) { $c = 1000; }
362 0           elsif ($pattern =~ /^GC|AG$/) { $c = 1000; }
363 0           elsif ($pattern =~ /^CT|GC$/) { $c = 1000; }
364 0           elsif ($pattern =~ /^AT|AC$/) { $c = 1000; }
365 0           elsif ($pattern =~ /^GT|AT$/) { $c = 1000; }
366 0           else { $c = 0;}
367              
368 0           return $c;
369             }
370              
371             1;
372             __END__
373              
374             =head1 NAME
375              
376             Bio::ViennaNGS::SpliceJunc - Perl extension for alternative splicing
377             analysis
378              
379             =head1 SYNOPSIS
380              
381             use Bio::ViennaNGS::SpliceJunc;
382             use Bio::ViennaNGS::Fasta;
383              
384             # get a Bio::ViennaNGS::Fasta object
385             my $fastaO = Bio::ViennaNGS::Fasta->new($fasta_in);
386              
387             # Extract annotated splice sites from BED12
388             bed6_ss_from_bed12($bed12_in,$dest_annot,$window,$want_canonical,$fastaO);
389              
390             # Extract mapped splice junctions from RNA-seq data
391             bed6_ss_from_rnaseq($bed_in,$dest_ss,$window,$mincov,$want_canonical,$fastaO);
392              
393             # Check for each splice junction seen in RNA-seq if it overlaps with
394             # any annotated splice junction
395             @res = intersect_sj($dest_annot,$dest_ss,$dest,$prefix,$window,$mil);
396              
397             # Convert splice junctions seen in RNA-seq data to BED12
398             @res = bed6_ss_to_bed12($s_in,$outdir,$window,$mincov,$want_circular);
399              
400             # Check whether a splice junction is canonical
401             $c = ss_isCanonical($chr,$pos5,$pos3,$fastaO);
402              
403             =head1 DESCRIPTION
404              
405             L<Bio::ViennaNGS::SpliceJunc> is a Perl module for alternative
406             splicing (AS) analysis. It provides routines for identification,
407             characterization and visualization of novel and existing (annotated)
408             splice junctions from RNA-seq data.
409              
410             Identification of novel splice junctions is based on intersecting
411             potentially novel splice junctions from RNA-seq data with annotated
412             splice junctions.
413              
414             =head2 SUBROUTINES
415              
416             =over
417              
418             =item bed6_ss_from_bed12($bed12,$dest,$window,$can,$fastaO)
419              
420             Extracts splice junctions from a BED12 file (provided via argument
421             C<$bed12>), writes a BED6 file for each transcript to C<$dest>,
422             containing all its splice junctions. If C<$can> is 1, canonical splice
423             junctions are reported in the 'name' field of the output BED6 file.
424             Output splice junctions can be flanked by a window of +/- C<$window>
425             nt. C<$fastaO> is a L<Bio::ViennaNGS::Fasta> object. Each splice
426             junction is represented as two bed lines in the output BED6.
427              
428             =item bed6_ss_from_rnaseq($bed_in,$dest,$window,$mcov,$can,$fastaO)
429              
430             Extracts splice junctions from mapped RNA-seq data. The input BED6
431             file should contain coordinates of introns in the following syntax:
432              
433             chr1 3913 3996 splits:97:97:97:N:P 0 +
434              
435             The fourth column in this BED file (correponding to the 'name' field
436             according to the L<BED
437             specification|http://genome.ucsc.edu/FAQ/FAQformat.html#format1>)
438             should be a colon-separated string of six elements, where the first
439             element should be 'splits' and the second element is assumed to hold
440             the number of reads supporting this splice junction. The fifth element
441             indicates the splice junction type: A capital 'N' determines a normal
442             splice junction, whereas 'C' indicates circular and 'T' indicates
443             trans-splice junctions, respectively. Only normal splice junctions
444             ('N') are considered, the rest is skipped. Elements 3, 4 and 6 are not
445             further processed.
446              
447             We recommend using
448             F<segemehl|http://www.bioinf.uni-leipzig.de/Software/segemehl/> for
449             generating this type of BED6 files. This routine is, however, not
450             limited to F<segemehl> output. BED6 files containing splice junction
451             information from other short read mappers or third-party sources will
452             be processed if they are formatted as described above.
453              
454             This routine writes a BED6 file for each splice junction provided in
455             the input to C<$dest>. Output splice junctions can be flanked by a
456             window of +/- C<$window> nt. Canonical splice junctions are reported
457             in the 'name' field of the output BED6 file if C<$can> is 1 and
458             C<$featO> is a L<Bio::ViennaNGS::Fasta> object. Each splice junction
459             is represented as two BED lines in the output BED6. Only splice
460             junctions that are supported by at least C<$mcov> reads are reported.
461              
462             =item bed6_ss_to_bed12($bed_in,$dest,$window,$mcov,$circ)
463              
464             Produce BED12 output for splice junctions found in RNA-seq data. Input
465             BED6 files (provided via C<$bed_in>) are supposed to conform to the
466             F<segemehl|http://www.bioinf.uni-leipzig.de/Software/segemehl/>
467             standard format for reporting splice junctions, which has the
468             following syntax:
469              
470             chr1 3913 3996 splits:97:97:97:N:P 0 +
471              
472             See L<bed6_ss_rom_rnaseq> for details.
473              
474             C<$dest> is the output path. Output splice junctions can optionally be
475             flanked by a window of +/- C<$window> nt. Only splice junctions that
476             are supported by at least C<$mcov> reads are reported. If C<$circ> is
477             1, B<circular> splice junctions are reported (if present in the
478             input), else B<normal> splice junctions are processed.
479              
480             =item intersect_sj($p_annot,$p_mapped,$dest,$prefix,$window,$mil)
481              
482             Intersects all splice junctions identified in an RNA-seq experiment
483             with annotated splice junctions. Identifies and characterizes novel
484             and existing splice junctions. Each BED6 file in C<$p_mapped> is
485             intersected with those transcript splice junction BED6 files in
486             C<$p_annot>, whose genomic location spans the query splice
487             junction. This is to prevent the tool from intersecting each splice
488             site found in the mapped RNA-seq data with B<all> annotated
489             transcripts. C<$mil> specifies a maximum intron length.
490              
491             The intersection operations are performed with F<bedtools intersect>
492             from the L<BEDtools|https://github.com/arq5x/bedtools2> suite). BED
493             sorting operations are performed with F<bedtools sort>.
494              
495             Writes two BED6 files to C<$dest> (optionally prefixed by C<$prefix>), which
496             contain novel and existing splice junctions, respectively.
497              
498             =item ss_isCanonical($chr,$p5,$p3,$fo)
499              
500             Checks whether a given splice junction is canonical, ie. whether the
501             first and last two nucleotides of the enclosed intron correspond to a
502             certain nucleotide motif. C<$chr> is the chromosome name, C<$p5> and
503             C<$p3> the 5' and 3' ends of the splice junction and C<$fo> is a
504             L<Bio::ViennaNGS::Fasta> object holding the underlying reference
505             genome
506              
507             This routine does not explicitly consider standedness in the sense
508             that splice junction motifs are evaluated in terms of the forward
509             strand of the underlying reference sequence. This is best explained by
510             an example: Consider the splice junction motif GU->G on the reverse
511             strand. In 5' to 3' direction of the forward strandm this junction
512             reads CT->AC. A splice junction is canonical if its motif corresponds
513             to one of the following cases:
514              
515             5'===]GT|CT....AG|AC[====3' ie GT->AG or CT->AC
516             5'===]GC|CT....AG|GC[====3' ie GC->AG or CT->GC
517             5'===]AT|GT....AC|AT[====3' ie AT->AC or GT->AT
518              
519             =back
520              
521             =head1 DEPENDENCIES
522              
523             This modules depends on the following Perl modules:
524              
525             =over
526              
527             =item L<Bio::ViennaNGS>
528              
529             =item L<Bio::ViennaNGS::Fasta>
530              
531             =item L<IPC::Cmd>
532              
533             =item L<Path::Class>
534              
535             =item L<Carp>
536              
537             =back
538              
539             L<Bio::ViennaNGS::SpliceJunc> uses third-party tools for computing
540             intersections of BED files: F<bedtools intersect> from the
541             L<BEDtools|http://bedtools.readthedocs.org/en/latest/content/tools/intersect.html>
542             suite is used to compute overlaps and F<bedtools sort> is used to sort
543             BED output files. Make sure that those third-party utilities are
544             available on your system, and that hey can be found and executed by
545             the perl interpreter. We recommend installing the latest version of
546             L<BEDtools|https://github.com/arq5x/bedtools2> on your system.
547              
548             =head1 SEE ALSO
549              
550             L<Bio::ViennaNGS>
551              
552             =head1 AUTHOR
553              
554             Michael T. Wolfinger E<lt>michael@wolfinger.euE<gt>
555              
556             =head1 COPYRIGHT AND LICENSE
557              
558             Copyright (C) 2015 Michael T. Wolfinger E<lt>michael@wolfinger.euE<gt>
559              
560             This library is free software; you can redistribute it and/or modify
561             it under the same terms as Perl itself, either Perl version 5.10.0 or,
562             at your option, any later version of Perl 5 you may have available.
563              
564             This software is distributed in the hope that it will be useful, but
565             WITHOUT ANY WARRANTY; without even the implied warranty of
566             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
567             General Public License for more details.
568              
569              
570             =cut