File Coverage

blib/lib/Bio/ViennaNGS/UCSC.pm
Criterion Covered Total %
statement 39 396 9.8
branch 0 108 0.0
condition n/a
subroutine 13 30 43.3
pod 2 17 11.7
total 54 551 9.8


line stmt bran cond sub pod time code
1             # -*-CPerl-*-
2             # Last changed Time-stamp: <2015-02-06 16:29:26 mtw>
3              
4             package Bio::ViennaNGS::UCSC;
5              
6 1     1   1377 use Exporter;
  1         2  
  1         47  
7 1     1   7 use version; our $VERSION = qv('0.12_15');
  1         2  
  1         7  
8 1     1   88 use strict;
  1         2  
  1         35  
9 1     1   6 use warnings;
  1         2  
  1         29  
10 1     1   732 use Template;
  1         24690  
  1         38  
11 1     1   11 use Cwd;
  1         2  
  1         82  
12 1     1   6 use File::Basename;
  1         2  
  1         72  
13 1     1   7 use IPC::Cmd qw(can_run run);
  1         2  
  1         65  
14 1     1   6 use File::Share ':all';
  1         1  
  1         208  
15 1     1   7 use Path::Class;
  1         2  
  1         72  
16 1     1   6 use Data::Dumper;
  1         121  
  1         67  
17 1     1   7 use Carp;
  1         3  
  1         71  
18 1     1   6 use Bio::ViennaNGS::Util qw(fetch_chrom_sizes);
  1         2  
  1         6403  
19              
20             our @ISA = qw(Exporter);
21              
22             our @EXPORT_OK = qw( make_assembly_hub make_track_hub );
23              
24             our @EXPORT = ();
25              
26             #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^#
27             #^^^^^^^^^^^ Subroutines ^^^^^^^^^^#
28             #^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^#
29              
30             sub make_assembly_hub{
31 0     0 1   my ($fasta_path, $filesdir, $basedir, $baseURL, $big_wig_ids, $log) = @_;
32 0           my ($basename,$dir,$ext);
33 0           my $this_function = (caller(0))[3];
34             #check arguments
35 0 0         croak ("ERROR [$this_function] \$fasta_path does not exist\n")
36             unless (-e $fasta_path);
37 0 0         croak ("ERROR [$this_function] \$basedir does not exist\n")
38             unless (-d $basedir);
39 0 0         croak ("ERROR [$this_function]: no URL (network location for upload to UCSC) provided")
40             unless(defined $baseURL);
41              
42 0 0         if (defined $log){
43 0 0         open(LOG, ">>", $log) or croak "$!";
44             }
45              
46 0 0         unless ($baseURL =~ /\/$/) { $baseURL .= "/"; }
  0            
47              
48 0           my $tmp_path = dist_file('Bio-ViennaNGS', "hub.txt" );
49 0           ($basename,$dir,$ext) = fileparse($tmp_path,qr/\..*/);
50 0           my $template_path = dir($dir,"template-AssemblyHub");
51              
52 0 0         croak ("ERROR [$this_function] template directory not found\n")
53             unless (-d $template_path);
54 0 0         my $faToTwoBit = can_run('faToTwoBit') or
55             croak ("ERROR [$this_function] faToTwoBit is not installed!");
56              
57 0 0         my $bedToBigBed = can_run('bedToBigBed') or
58             croak ("ERROR [$this_function] bedToBigBed is not installed!");
59              
60             # bedfiles path
61 0           my $parsedHeaderRef = parse_fasta_header($fasta_path,$this_function);
62 0           my @parsedHeader = @$parsedHeaderRef;
63 0           my $unchecked_accession = $parsedHeader[0];
64 0           my $scientificName = $parsedHeader[1];
65 0           my $accession = valid_ncbi_accession($unchecked_accession);
66             # create assembly hub directory structure
67 0           my $assembly_hub_name = "assemblyHub";
68 0           my $assembly_hub_directory = dir($basedir, $assembly_hub_name);
69 0           my $genome_assembly_name = $accession;
70 0           my $genome_assembly_directory = dir($assembly_hub_directory,$genome_assembly_name);
71 0           mkdir $assembly_hub_directory;
72 0           mkdir $genome_assembly_directory;
73 0 0         if (defined $log){
74 0           print LOG "LOG Base directory: $assembly_hub_directory\n";
75 0           print LOG "LOG Assembly Hub directory: $genome_assembly_directory\n";
76             }
77              
78             #2-bit fasta file conversion
79 0           my $fa_modified = file($assembly_hub_directory, $accession.".fa");
80 0           modify_fasta_header($fasta_path,$fa_modified,$accession);
81 0           my $twoBit = file($genome_assembly_directory, $accession.".2bit");
82 0           my $fastaToTwobit_cmd = "$faToTwoBit $fa_modified $twoBit";
83 0 0         if (defined $log){ print LOG "LOG [$this_function] $fastaToTwobit_cmd\n";}
  0            
84              
85 0           my( $success, $error_message, $full_buf, $stdout_buf, $stderr_buf ) =
86             run( command => $fastaToTwobit_cmd, verbose => 0 );
87 0 0         if( !$success ) {
88 0           print STDERR "ERROR [$this_function] External command call unsuccessful\n";
89 0           print STDERR "ERROR: this is what the command printed:\n";
90 0           print join "", @$full_buf;
91 0           croak $!;
92             }
93              
94             #template definition
95 0           my $template = Template->new({
96             INCLUDE_PATH => ["$template_path"],
97             RELATIVE=>1,
98             });
99              
100             #construct hub.txt
101 0           my $hubtxt_path = file($assembly_hub_directory,"hub.txt")->stringify;
102 0           my $hubtxt_file = "hub.txt";
103 0           my $hubtxt_vars =
104             {
105             hubName => $accession,
106             shortLabel => $accession,
107             longLabel => $accession,
108             genomesFile => "genome.txt",
109             email => 'email',
110             descriptionURL => "$baseURL" . "description.html"
111             };
112 0 0         $template->process($hubtxt_file,$hubtxt_vars,"$hubtxt_path") ||
113             croak "Template process failed: ", $template->error(), "\n";
114              
115             #construct genome.txt
116 0           my $genometxt_path = file($assembly_hub_directory, "genome.txt")->stringify;
117 0           my $genometxt_file = "genome.txt";
118 0           my $genometxt_vars =
119             {
120             genome => $accession,
121             trackDb => file($accession, "trackDb.txt"),
122             groups => file($accession, "groups.txt"),
123             description => "$accession",
124             twoBitPath => file($accession,$accession.".2bit"),
125             organism => "organism",
126             defaultPos => $accession,
127             orderKey => "10",
128             scientificName => "$scientificName",
129             htmlPath => file($accession,"description.html")
130             };
131 0 0         $template->process($genometxt_file,$genometxt_vars,$genometxt_path) or
132             croak "Template process failed: ", $template->error(), "\n";
133              
134             #construct description.html
135 0           my $description_html_path = file($genome_assembly_directory, "description.html")->stringify;
136 0           my $description_html_file = "description.html";
137 0           my $description_html_vars =
138             {
139             imageLink => "imageLink",
140             imageSource => "imageSource",
141             imageAlternative => "imageAlternative",
142             taxonomicName => "taxonomicName",
143             imageOrigin => "imageOrigin",
144             imageOriginDescription => "imageOriginDescription",
145             ucscId => "ucscId",
146             sequencingId => "sequencingId",
147             assemblyDate => "assemblyDate",
148             genbankAccessionId => "genbankAccessionId",
149             ncbiGenomeInformationLink => "ncbiGenomeInformationLink",
150             ncbiGenomeInformationDescription => "ncbiGenomeInformationDescription",
151             ncbiAssemblyInformationLink => "ncbiAssemblyInformationLink",
152             ncbiAssemblyInformationDescription => "ncbiAssemblyInformationDescription",
153             bioProjectInformationLink => "bioProjectInformationLink",
154             bioProjectInformationDescription => "bioProjectInformationDescription",
155             sequenceAnnotationLink => "sequenceAnnotationLink"
156             };
157 0 0         $template->process($description_html_file,$description_html_vars,$description_html_path) or
158             croak "Template process failed: ", $template->error(), "\n";
159              
160 0           my $groups = make_group("annotation", "Annotation", "1", "0");
161              
162             #construct group.txt
163 0           my $group_txt_path = file($genome_assembly_directory, "groups.txt")->stringify;
164 0           my $group_txt_file = 'groups.txt';
165 0           my $group_txt_vars =
166             {
167             groups => "$groups",
168             };
169 0 0         $template->process($group_txt_file,$group_txt_vars,$group_txt_path) or
170             croak "Template process failed: ", $template->error(), "\n";
171              
172             #construct big bed
173 0           my $tracksList;
174             #Bigbeds are only created from infolder
175 0 0         unless($filesdir =~ /-/){
176 0           my $chromosome_size = retrieve_chromosome_size($fasta_path);
177 0           my $chromosome_size_filepath = file($genome_assembly_directory,"$accession.chrom.sizes");
178 0           write_chromosome_size_file($chromosome_size_filepath,$accession,$chromosome_size);
179 0           convert_tracks($filesdir, $genome_assembly_directory, $accession, $bedToBigBed, $chromosome_size_filepath);
180 0           my @trackfiles = retrieve_tracks($genome_assembly_directory, $baseURL, $assembly_hub_name, $accession);
181            
182 0           foreach my $track (@trackfiles){
183 0           my $trackString = make_track(@$track);
184 0           $tracksList .= $trackString;
185             }
186             }
187            
188             #big wigs
189 0           my $bigwig_tracks_string = "";
190 0 0         unless($big_wig_ids=~/^-$/){
191 0           $bigwig_tracks_string = retrieve_bigwig_tracks($genome_assembly_directory, $baseURL, $assembly_hub_name, $accession, $big_wig_ids);
192             }
193 0           $tracksList .= $bigwig_tracks_string;
194             #construct trackDb.txt
195 0           my $trackDb_txt_path = file($genome_assembly_directory, "trackDb.txt")->stringify;
196 0           my $trackDb_txt_file = 'trackDb.txt';
197 0           my $trackDb_txt_vars =
198             {
199             tracks => "$tracksList"
200             };
201 0 0         $template->process($trackDb_txt_file,$trackDb_txt_vars,$trackDb_txt_path) or
202             croak "Template process failed: ", $template->error(), "\n";
203              
204 0 0         if (defined $log){
205 0           print LOG "LOG Assembly Hub created\n";
206 0           close(LOG);
207             }
208             }
209              
210             sub make_track_hub{
211 0     0 1   my ($species, $filesdir, $basedir, $baseURL, $chrom_sizes_file, $big_wig_ids, $log) = @_;
212 0           my ($basename,$dir,$ext);
213 0           my $this_function = (caller(0))[3];
214              
215             #check arguments
216 0 0         croak ("ERROR [$this_function] \no species provided\n")
217             unless ($species);
218 0 0         croak ("ERROR [$this_function] \$basedir does not exist\n")
219             unless (-d $basedir);
220 0 0         croak ("ERROR [$this_function]: no URL (network location for upload to UCSC) provided")
221             unless(defined $baseURL);
222              
223 0 0         if (defined $log){
224 0 0         open(LOG, ">>", $log) or croak "$!";
225             }
226              
227 0 0         unless ($baseURL =~ /\/$/) { $baseURL .= "/"; }
  0            
228              
229 0           my $tmp_path = dist_file('Bio-ViennaNGS', "hub.txt" );
230 0           ($basename,$dir,$ext) = fileparse($tmp_path,qr/\..*/);
231 0           my $template_path = dir($dir,"template-TrackHub");
232              
233 0 0         croak ("ERROR [$this_function] template directory not found\n")
234             unless (-d $template_path);
235 0 0         my $faToTwoBit = can_run('faToTwoBit') or
236             croak ("ERROR [$this_function] faToTwoBit is not installed!");
237              
238 0 0         my $bedToBigBed = can_run('bedToBigBed') or
239             croak ("ERROR [$this_function] bedToBigBed is not installed!");
240              
241             # create track hub directory structure
242 0           my $track_hub_name = "trackHub";
243 0           my $track_hub_directory = dir($basedir, $track_hub_name);
244 0           my $genome_assembly_name = $species;
245 0           my $genome_assembly_directory = dir($track_hub_directory,$genome_assembly_name);
246 0           mkdir $track_hub_directory;
247 0           mkdir $genome_assembly_directory;
248 0 0         if (defined $log){
249 0           print LOG "LOG Base directory: $track_hub_directory\n";
250 0           print LOG "LOG Track Hub directory: $genome_assembly_directory\n";
251             }
252              
253             #template definition
254 0           my $template = Template->new({
255             INCLUDE_PATH => ["$template_path"],
256             RELATIVE=>1,
257             });
258              
259             #construct hub.txt
260 0           my $hubtxt_path = file($track_hub_directory,"hub.txt")->stringify;
261 0           my $hubtxt_file = "hub.txt";
262 0           my $hubtxt_vars =
263             {
264             hubName => $species,
265             shortLabel => $species,
266             longLabel => $species,
267             genomesFile => "genome.txt",
268             email => 'email',
269             descriptionURL => "$baseURL" . "description.html"
270             };
271 0 0         $template->process($hubtxt_file,$hubtxt_vars,"$hubtxt_path") ||
272             croak "Template process failed: ", $template->error(), "\n";
273              
274             #construct genome.txt
275 0           my $genometxt_path = file($track_hub_directory, "genome.txt")->stringify;
276 0           my $genometxt_file = "genome.txt";
277 0           my $genometxt_vars =
278             {
279             genome => $species,
280             trackDb => file($species, "trackDb.txt"),
281             groups => file($species, "groups.txt"),
282             description => "$species",
283             twoBitPath => file($species,$species.".2bit"),
284             organism => "organism",
285             defaultPos => $species,
286             orderKey => "10",
287             scientificName => "scientificName",
288             htmlPath => file($species,"description.html")
289             };
290 0 0         $template->process($genometxt_file,$genometxt_vars,$genometxt_path) or
291             croak "Template process failed: ", $template->error(), "\n";
292              
293 0           my $tracksList;
294             #Bigbeds are only created from infolder
295 0 0         unless($filesdir =~ /-/){
296 0 0         if(-e $chrom_sizes_file){
297 0           convert_tracks($filesdir, $genome_assembly_directory, $species, $bedToBigBed, $chrom_sizes_file);
298             }else{
299 0           my $chromosome_sizes = fetch_chrom_sizes($species);
300 0           my $chromosome_size_filepath = file($genome_assembly_directory,"$species.chrom.sizes");
301 0           write_chromosome_sizes_file($chromosome_size_filepath,$chromosome_sizes);
302 0           convert_tracks($filesdir, $genome_assembly_directory, $species, $bedToBigBed, $chromosome_size_filepath);
303             }
304 0           my @trackfiles = retrieve_tracks($genome_assembly_directory, $baseURL, $track_hub_name, $species);
305              
306             #big beds
307 0           foreach my $track (@trackfiles){
308 0           my $trackString = make_track(@$track);
309 0           $tracksList .= $trackString;
310             }
311             }
312             #big wigs
313 0           my $bigwig_tracks_string = "";
314 0 0         unless($big_wig_ids=~/^-$/){
315 0           $bigwig_tracks_string = retrieve_bigwig_tracks($genome_assembly_directory, $baseURL, $track_hub_name, $species, $big_wig_ids);
316             }
317 0           $tracksList .= $bigwig_tracks_string;
318             #construct trackDb.txt
319 0           my $trackDb_txt_path = file($genome_assembly_directory, "trackDb.txt")->stringify;
320 0           my $trackDb_txt_file = 'trackDb.txt';
321 0           my $trackDb_txt_vars =
322             {
323             tracks => "$tracksList"
324             };
325 0 0         $template->process($trackDb_txt_file,$trackDb_txt_vars,$trackDb_txt_path) or
326             croak "Template process failed: ", $template->error(), "\n";
327              
328 0 0         if (defined $log){
329 0           print LOG "LOG Track Hub created\n";
330 0           close(LOG);
331             }
332             }
333              
334             sub convert_tracks{
335 0     0 0   my ($filesdir,$genome_assembly_directory,$accession,$bedToBigBed,$chromosome_size_filepath) = @_;
336 0           my $currentDirectory = getcwd;
337 0 0         chdir $filesdir or croak $!;
338 0           my @bedfiles = <*.bed>;
339 0           foreach my $bedfile (@bedfiles){
340 0           my $bed_file_path = file($filesdir,$bedfile);
341 0           my $filename = $bedfile;
342 0           $filename =~ s/.bed$//;
343 0           my $bigbed_file = $filename . ".bb";
344 0           my $bigbed_file_path = file($genome_assembly_directory,$bigbed_file);
345 0           `$bedToBigBed $bed_file_path $chromosome_size_filepath $bigbed_file_path`;
346             }
347 0 0         chdir $currentDirectory or croak $!;
348 0           return 1;
349             }
350              
351             sub retrieve_tracks{
352 0     0 0   my ($directoryPath,$baseURL,$assembly_hub_name,$accession) = @_;
353 0           my $currentDirectory = getcwd;
354 0 0         chdir $directoryPath or croak $!;
355 0           my @trackfiles = <*.bb>;
356 0           my @tracks;
357 0           my $counter = 0;
358 0           foreach my $trackfile (@trackfiles){
359 0           my $filename = $trackfile;
360 0           $filename =~ s/.bb$//;
361 0           my @filenameSplit = split(/\./, $filename);
362 0           my $fileaccession = $filenameSplit[0];
363 0           my $tag = $filenameSplit[1];
364 0           my $id = lc($tag);
365 0           my $track = "refseq_" . $id;
366 0           my $dir = dir($baseURL,$assembly_hub_name,$accession);
367             #my $bigDataUrl = file($dir, $trackfile);
368 0           my $bigDataUrl = file($trackfile);
369 0           my $shortLabel = "RefSeq " . $tag;
370 0           my $longLabel = "RefSeq " . $tag;
371 0           my $type = "bigBed 12 .";
372 0           my $autoScale = "off";
373 0           my $bedNameLabel = "Gene Id";
374 0           my $searchIndex = "name";
375 0           my $colorByStrand = retrieve_color($counter);
376 0           my $visibility = "pack";
377 0           my $group = "annotation";
378 0           my $priority = "10";
379 0           my @track = ($tag,$track,$bigDataUrl,$shortLabel,$longLabel,$type,$autoScale,$bedNameLabel,$searchIndex,$colorByStrand,$visibility,$group,$priority);
380 0           my $trackreference = \@track;
381 0           push(@tracks, $trackreference);
382 0           $counter++;
383             }
384 0 0         chdir $currentDirectory or croak $!;
385 0           return @tracks;
386             }
387              
388              
389             sub retrieve_bigwig_tracks{
390 0     0 0   my ($directoryPath,$baseURL,$assembly_hub_name,$accession,$bigwigpaths) = @_;
391 0           my $currentDirectory = getcwd;
392 0 0         chdir $directoryPath or croak $!;
393 0           my @big_wig_array = split('\#', $bigwigpaths);
394 0           my $bigwigtracks = "";
395 0           foreach my $bigwig_entry (@big_wig_array){
396 0 0         if($bigwig_entry=~/,/){
397             #multi bigwig container detected
398 0           my @big_wig_pair = split(',', $bigwig_entry);
399 0           my $pos_wig;
400             my $neg_wig;
401 0 0         if ($big_wig_pair[0] =~ /\.pos\./){
    0          
402 0           $pos_wig = $big_wig_pair[0];
403             }elsif($big_wig_pair[1] =~ /\.pos\./){
404 0           $pos_wig = $big_wig_pair[1];
405             }else{
406 0           my $this_function = (caller(0))[3];
407 0           croak ("ERROR [$this_function] \no positive big wig file for container provided\n");
408             }
409 0 0         if ($big_wig_pair[0] =~ /\.neg\./){
    0          
410 0           $neg_wig = $big_wig_pair[0];
411             }elsif($big_wig_pair[1] =~ /\.neg\./){
412 0           $neg_wig = $big_wig_pair[1];
413             }else{
414 0           my $this_function = (caller(0))[3];
415 0           croak ("ERROR [$this_function] \no negative big wig file for container provided\n");
416             }
417 0           my ($basename1,$dir1,$ext1) = fileparse($pos_wig,qr/\..*/);
418 0           my ($basename2,$dir2,$ext2) = fileparse($pos_wig,qr/\..*/);
419             #construct container
420 0           my $id = lc($basename1);
421 0           my $tag = $id;
422 0           my $track = $id;
423 0           my $shortLabel = $tag;
424 0           my $longLabel = $tag;
425 0           my $type = "bigWig";
426 0           my $autoScale = "on";
427 0           my $visibility = "full";
428 0           my $priority = "1500";
429 0           my $container_string = make_multi_bigwig_container_track($tag, $track, $shortLabel, $longLabel, $type, $autoScale, $visibility, $priority);
430 0           $bigwigtracks .= $container_string;
431             #construct positive track
432 0           my $track1 = $track . "_pos";
433 0           my $bigDataUrl1 = $pos_wig;
434 0           my $shortLabel1 = $track1;
435 0           my $longLabel1 = $track1;
436 0           my $type1 = "bigWig";
437 0           my $parent1 = $track;
438 0           my $color1 = "133,154,0";
439 0           my $track1_string = make_bigwig_container_track($track1, $bigDataUrl1, $shortLabel1, $longLabel1, $type1, $parent1, $color1);
440 0           $bigwigtracks .= $track1_string;
441             #construct negative track
442 0           my $track2 = $track . "_neg";
443 0           my $bigDataUrl2 = $neg_wig;
444 0           my $shortLabel2 = $track2;
445 0           my $longLabel2 = $track2;
446 0           my $type2 = "bigWig";
447 0           my $parent2 = $track;
448 0           my $color2 = "220,51,47";
449 0           my $track2_string = make_bigwig_container_track($track2, $bigDataUrl2, $shortLabel2, $longLabel2, $type2, $parent2, $color2);
450 0           $bigwigtracks .= $track2_string;
451             }else{
452             #construct single wig
453 0           my ($basename,$dir,$ext) = fileparse($bigwig_entry,qr/\..*/);
454 0           my $id = lc($basename);
455 0           my $tag = $id;
456 0           my $track = $id;
457 0           my $bigDataUrl = $bigwig_entry;
458 0           my $shortLabel = $tag;
459 0           my $longLabel = $tag;
460 0           my $type = "bigWig";
461 0           my $autoScale = "on";
462 0           my $visibility = "full";
463 0           my $priority = "1500";
464 0           my $color = "38,140,210";
465 0           my $track_string = make_bigwig_track($tag, $track, $bigDataUrl, $shortLabel, $longLabel, $type, $autoScale, $visibility, $priority, $color);
466 0           $bigwigtracks .= $track_string;
467             }
468             }
469 0 0         chdir $currentDirectory or croak $!;
470 0           return $bigwigtracks;
471             }
472              
473              
474             sub retrieve_color{
475 0     0 0   my $counter = shift;
476 0           my $digitnumber = length($counter);
477 0           my $colorcode;
478 0 0         if($digitnumber>1){
479 0           $colorcode = $counter % 10;
480             }else{
481 0           $colorcode = $counter;
482             }
483 0           my @color;
484             #green
485 0           $color[0] = "133,154,0 133,154,0";
486             #cyan
487 0           $color[1] = "42,162,152 42,162,152";
488             #blue
489 0           $color[2] = "38,140,210 38,140,210";
490             #violet
491 0           $color[3] = "108,114,196 108,114,196";
492             #magenta
493 0           $color[4] = "211,55,130 211,55,130";
494             #red
495 0           $color[5] = "220,51,47 220,51,47";
496             #orange
497 0           $color[6] = "203,76,22 203,76,22";
498             #yellow
499 0           $color[7] = "181,138,0 181,138,0";
500             #black
501 0           $color[8] = "0,44,54 0,44,54";
502             #grey
503 0           $color[9] = "88,111,117 88,111,117";
504              
505 0           return $color[$colorcode];
506             }
507              
508             sub make_group{
509 0     0 0   my ($name, $label, $priority, $defaultIsClosed) = @_;
510 0           my $group ="name $name\nlabel $label\npriority $priority\ndefaultIsClosed $defaultIsClosed\n";
511 0           return $group;
512             }
513              
514             sub make_track{
515 0     0 0   my ($tag, $track, $bigDataUrl, $shortLabel, $longLabel, $type, $autoScale, $bedNameLabel, $searchIndex, $colorByStrand, $visibility, $group, $priority) = @_;
516 0           my $trackEntry ="#$tag\ntrack $track\nbigDataUrl $bigDataUrl\nshortLabel $shortLabel\nlongLabel $longLabel\ntype $type\nautoScale $autoScale\nbedNameLabel $bedNameLabel\nsearchIndex $searchIndex\ncolorByStrand $colorByStrand\nvisibility $visibility\ngroup $group\npriority $priority\n\n";
517 0           return $trackEntry;
518             }
519              
520             sub make_multi_bigwig_container_track{
521 0     0 0   my ($tag, $track, $shortLabel, $longLabel, $type, $autoScale, $visibility, $priority) = @_;
522 0           my $trackEntry ="#$tag\ntrack $track\ncontainer multiWig\nnoInherit on\nshortLabel $shortLabel\nlongLabel $longLabel\ntype $type\nconfigureable on\nvisibility $visibility\naggregate transparentOverlay\nshowSubtrackColorOnUi on\nautoScale $autoScale\nwindowingFunction maximum\npriority $priority\nalwaysZero on\nyLineMark 0\nyLineOnOff on\nmaxHeightPixels 125:125:11\n\n";
523 0           return $trackEntry;
524             }
525              
526             sub make_bigwig_container_track{
527 0     0 0   my ($track, $bigDataUrl, $shortLabel, $longLabel, $type, $parent, $color) = @_;
528 0           my $trackEntry = "track $track\nbigDataUrl $bigDataUrl\nshortLabel $shortLabel\nlongLabel $longLabel\ntype $type\nparent $parent\ncolor $color\n\n";
529 0           return $trackEntry;
530             }
531              
532             sub make_bigwig_track{
533 0     0 0   my ($tag, $track, $bigDataUrl, $shortLabel, $longLabel, $type, $autoScale, $visibility, $priority, $color) = @_;
534 0           my $trackEntry ="#$tag\ntrack $track\nbigDataUrl $bigDataUrl\nshortLabel $shortLabel\nlongLabel $longLabel\ntype $type\nvisibility $visibility\nautoScale $autoScale\npriority $priority\nalwaysZero on\nyLineMark 0\nyLineOnOff on\nmaxHeightPixels 125:125:11\ncolor $color\n\n";
535 0           return $trackEntry;
536             }
537              
538             sub valid_ncbi_accession{
539             # receives a NCBI accession ID, with or without version number
540             # returns NCBI accession ID without version number
541 0     0 0   my $acc = shift;
542 0 0         if ($acc =~ /^(N[CST]\_\d{6})\.\d+?$/){
    0          
543 0           return $acc; # NCBI accession ID without version
544             }
545             elsif ($acc =~ /^(N[CST]\_\d{6})$/){
546 0           return $1; # NCBI accession ID without version
547             }
548             else {
549 0           return 0;
550             }
551             }
552              
553             sub parse_fasta_header{
554 0     0 0   my $filepath = shift;
555 0           my $this_function = shift;
556 0           open my $file, '<', "$filepath";
557 0           my $fastaheader = <$file>;
558 0           chomp $fastaheader;
559 0           close $file;
560             #>gi|556503834|ref|NC_000913.3| Escherichia coli str. K-12 substr. MG1655
561 0 0         if($fastaheader=~/^>gi/){
562 0           print LOG "#NCBI fasta header detected\n";
563 0           my @headerfields = split(/\|/, $fastaheader);
564 0           my $accession = $headerfields[3];
565 0           my $scientificName = $headerfields[4];
566 0           my @ids;
567 0           push(@ids,$accession);
568 0           push(@ids,$scientificName);
569 0           return \@ids;
570             }else{
571 0           $fastaheader=~s/^>//;
572 0 0         if(valid_ncbi_accession($fastaheader)){
573 0           print LOG "#Header contains just valid NCBI accession number\n";
574 0           my @ids;
575 0           push(@ids,$fastaheader);
576 0           push(@ids,"scientific name not set");
577 0           return \@ids;
578             }else{
579 0           print LOG "#No valid accession/ ncbi header\n";
580 0           croak ("ERROR [$this_function] \$fasta_path does not contain a valid accession/ ncbi header\n");
581             }
582             }
583             }
584              
585             sub write_chromosome_size_file{
586 0     0 0   my $filepath = shift;
587 0           my $chromosome_name = shift;
588 0           my $chromosome_size = shift;
589 0           my $entry = $chromosome_name . "\t" . $chromosome_size . "\n";
590 0           open CHROMFILE, '>', "$filepath";
591 0           print CHROMFILE $entry;
592 0           close CHROMFILE;
593 0           return 1;
594             }
595              
596             sub write_chromosome_sizes_file{
597 0     0 0   my $filepath = shift;
598 0           my $chromosome_sizes_reference = shift;
599 0           my %chromosome_sizes = %{$chromosome_sizes_reference};
  0            
600 0           open CHROMFILE, '>', "$filepath";
601 0           foreach my $chromosome_name ( keys %chromosome_sizes){
602 0           my $chromosome_size = $chromosome_sizes{$chromosome_name};
603 0           my $entry = $chromosome_name . "\t" . $chromosome_size . "\n";
604 0           print CHROMFILE $entry;
605             }
606 0           close CHROMFILE;
607 0           return 1;
608             }
609              
610             sub retrieve_chromosome_size{
611 0     0 0   my $inputFilepath = shift;
612 0           open INFILE, '<', $inputFilepath;
613 0           my @newfasta;
614 0           my $chromosome_size = 0;
615 0           my $header_skipped = 0;
616 0           while (<INFILE>) {
617 0 0         if($header_skipped){
618 0           chomp;
619 0           $chromosome_size += length($_);
620             }else{
621 0           $header_skipped = 1;
622             }
623             }
624 0           close INFILE;
625 0           return $chromosome_size;
626             }
627              
628             sub modify_fasta_header{
629 0     0 0   my $inputFilepath = shift;
630 0           my $outputFilepath = shift;
631 0           my $header = shift;
632              
633 0           open INFILE, '<', "$inputFilepath";
634 0           my @newfasta;
635 0           while (<INFILE>) {
636 0           push(@newfasta, $_);
637             }
638 0           close INFILE;
639             #@newfasta = @newfasta[ 1 .. $#newfasta ];
640 0           shift @newfasta;
641 0           unshift(@newfasta,">".$header."\n");
642              
643 0           open OUTFILE, '>', "$outputFilepath";
644 0           foreach my $line (@newfasta) {
645 0           print OUTFILE $line;
646             }
647 0           close OUTFILE;
648 0           return 1;
649             }
650              
651             1;
652             __END__
653              
654             =head1 NAME
655              
656             Bio::ViennaNGS::UCSC - Perl extension for easy UCSC Genome Browser
657             integration.
658              
659             =head1 SYNOPSIS
660              
661             use Bio::ViennaNGS::UCSC;
662              
663             =head1 DESCRIPTION
664              
665             ViennaNGS::UCSC is a Perl extension for managing routine tasks with the
666             UCSC Genome Browser. It comes with a set of utilities that serve as
667             reference implementation of the routines implemented in the library. All
668             utilities are located in the 'scripts' folder.
669             The main functionality is provided by the make_assembly_hub function.
670              
671             =head2 EXPORT
672              
673             Routines:
674             make_assembly_hub, make_track_hub
675              
676             Variables:
677             none
678              
679             =head3 make_assembly_hub()
680              
681             Build assembly hubs for the UCSC genome browser.
682             This function takes 4 parameters:
683             1. absolute path of input fasta file(e.g. /home/user/input.fa)
684             2. path to the ouput directory (e.g. /home/user/assemblyhubs/)
685             3. base URL where the output folder will be placed for upload to the UCSC genome browser (e.g. http://www.foo.com/folder/)
686             4. path for the log file (/home/user/logs/assemblyhubconstructionlog)
687              
688             =head3 make_track_hub()
689              
690             Build track hubs for the UCSC genome browser.
691             This function takes 4 parameters:
692             1. chromosome id as used in existing ucsc assembly hub (e.g. chr1)
693             2. path to the ouput directory (e.g. /home/user/assemblyhubs/)
694             3. base URL where the output folder will be placed for upload to the UCSC genome browser (e.g. http://www.foo.com/folder/)
695             4. path for the log file (/home/user/logs/assemblyhubconstructionlog)
696              
697              
698             =head1 SEE ALSO
699              
700             =over
701              
702             =item L<Bio::ViennaNGS>
703              
704             =back
705              
706             =head1 AUTHORS
707              
708             =over
709              
710             =item Michael T. Wolfinger, E<lt>michael@wolfinger.euE<gt>
711              
712             =item Florian Eggenhofer, E<lt>florian.eggenhofer@univie.ac.atE<gt>
713              
714             =back
715              
716             =head1 COPYRIGHT AND LICENSE
717              
718             Copyright (C) 2015 Michael T. Wolfinger, E<lt>michael@wolfinger.euE<gt>
719              
720             This library is free software; you can redistribute it and/or modify
721             it under the same terms as Perl itself, either Perl version 5.10.0 or,
722             at your option, any later version of Perl 5 you may have available.
723              
724             This program is distributed in the hope that it will be useful,
725             but WITHOUT ANY WARRANTY; without even the implied warranty of
726             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
727              
728              
729             =cut