|  line  | 
 stmt  | 
 bran  | 
 cond  | 
 sub  | 
 pod  | 
 time  | 
 code  | 
| 
1
 | 
  
 
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Bio::ToolBox::Data;  | 
| 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 our $VERSION = '1.68';  | 
| 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 NAME  | 
| 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Bio::ToolBox::Data - Reading, writing, and manipulating data structure  | 
| 
7
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SYNOPSIS  | 
| 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
10
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   use Bio::ToolBox::Data;  | 
| 
11
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
12
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ### Open a pre-existing file  | 
| 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # a data table with same columns as input file  | 
| 
14
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $Data = Bio::ToolBox::Data->new(  | 
| 
15
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         file    => 'coordinates.bed',  | 
| 
16
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   );  | 
| 
17
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ### Parse a GTF, GFF3, refFlat, or genePred gene table  | 
| 
19
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # data table with names and references to fully parsed   | 
| 
20
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # SeqFeature transcript objects with exon SeqFeature subfeatures  | 
| 
21
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $Data = Bio::ToolBox::Data->new(  | 
| 
22
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	    file    => 'annotation.gtf.gz',  | 
| 
23
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	    parse   => 1,  | 
| 
24
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	    feature => 'transcript',  | 
| 
25
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	    subfeature => 'exon'  | 
| 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   );  | 
| 
27
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ### New gene list from a Bio::DB::SeqFeature::Store database  | 
| 
29
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # data table with name and reference ID to database SeqFeature objects  | 
| 
30
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $Data = Bio::ToolBox::Data->new(  | 
| 
31
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         db      => 'hg19.sqlite',  | 
| 
32
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         feature => 'gene:ensGene',  | 
| 
33
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   );  | 
| 
34
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
       | 
| 
35
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ### Get a specific value  | 
| 
37
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $value = $Data->value($row, $column);  | 
| 
38
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ### Replace or add a value  | 
| 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->value($row, $column, $new_value);  | 
| 
41
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
42
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ### Add a column  | 
| 
43
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $new_index = $Data->add_column('Data2');  | 
| 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ### Find a column  | 
| 
46
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $old_index = $Data->find_column('Data1');  | 
| 
47
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
48
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ### Return a specific row as an object  | 
| 
49
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $row = $Data->get_row($i); # Bio::ToolBox::Data::Feature  | 
| 
50
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
51
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ### Iterate through a Data structure one row at a time  | 
| 
52
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $stream = $Data->row_stream;  | 
| 
53
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   while (my $row = $stream->next_row) {  | 
| 
54
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  # each row is returned as Bio::ToolBox::Data::Feature object  | 
| 
55
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  # get the positional information from the file data  | 
| 
56
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  # assuming that the input file had these identifiable columns  | 
| 
57
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  my $seq_id = $row->seq_id;  | 
| 
58
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  my $start  = $row->start;  | 
| 
59
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  my $stop   = $row->end;  | 
| 
60
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	    | 
| 
61
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  # work with the referenced SeqFeature object  | 
| 
62
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  my $seqf = $row->seqfeature;  | 
| 
63
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	    | 
| 
64
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  # generate a new Bio::Seq object from the database using   | 
| 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  # these coordinates   | 
| 
66
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  my $region = $db->segment($seq_id, $start, $stop);  | 
| 
67
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	    | 
| 
68
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  # modify a row value  | 
| 
69
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  my $value = $row->value($old_index);  | 
| 
70
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  my $new_value = $value + 1;  | 
| 
71
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   	  $row->value($new_index, $new_value);  | 
| 
72
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
73
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
74
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ### Iterate through a Data table with a code reference  | 
| 
75
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->iterate(\&my_code);  | 
| 
76
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
77
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   ### write the data to file  | 
| 
79
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $success = $Data->write_file(  | 
| 
80
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        filename     => 'new_data.txt',  | 
| 
81
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        gz           => 1,  | 
| 
82
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   );  | 
| 
83
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print "wrote new file $success\n"; # file is new_data.txt.gz  | 
| 
84
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
85
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 DESCRIPTION  | 
| 
86
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
87
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This module works with the primary Bio::ToolBox Data structure. Simply, it   | 
| 
88
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is a complex data structure representing a tabbed-delimited table (array   | 
| 
89
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of arrays), with plenty of options for metadata. Many common bioinformatic   | 
| 
90
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 file formats are simply tabbed-delimited text files (think BED, GFF, VCF).   | 
| 
91
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Each row is a feature or genomic interval, and each column is a piece of   | 
| 
92
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 information about that feature, such as name, type, and/or coordinates.   | 
| 
93
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 We can append to that file additional columns of information, perhaps   | 
| 
94
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 scores from genomic data sets. We can record metadata regarding how   | 
| 
95
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and where we obtained that data. Finally, we can write the updated   | 
| 
96
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 table to a new file.  | 
| 
97
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
98
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 METHODS  | 
| 
99
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Initializing the structure  | 
| 
101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item new  | 
| 
105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Initialize a new Data structure. This generally requires options,   | 
| 
107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 provided as an array of key =E values. A new list of features   | 
| 
108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 may be obtained from an annotation database or an existing file   | 
| 
109
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 may be loaded. If you do not pass any options, a new empty   | 
| 
110
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 structure will be generated for you to populate.   | 
| 
111
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 These are the options available.  | 
| 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
115
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item file  | 
| 
117
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item in  | 
| 
119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $Data = Bio::ToolBox::Data->new(file => $file);  | 
| 
121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Provide the path and name to an existing tabbed-delimited text   | 
| 
123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 file from which to load the contents. This is a shortcut to the   | 
| 
124
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 load_file() method. See that method for more details.  | 
| 
125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
126
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item stream  | 
| 
127
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $Data = Bio::ToolBox::Data->new(file => $file, stream => 1);  | 
| 
129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
130
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Boolean option indicating that the file should be opened as a file    | 
| 
131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 stream. A L object will be returned. This   | 
| 
132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is a convenience method.   | 
| 
133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item noheader  | 
| 
135
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
136
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $Data = Bio::ToolBox::Data->new(file => $file, noheader => 1);  | 
| 
137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Boolean option indicating that the file does not have file headers,   | 
| 
139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in which case dummy headers are provided. This is not necessary for   | 
| 
140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 defined file types that don't normally have file headers, such as   | 
| 
141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 BED, GFF, or UCSC files.  | 
| 
142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item parse  | 
| 
144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $Data = Bio::ToolBox::Data->new(file => $file, parse => 1);  | 
| 
146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Boolean option indicating that a gene annotation table or file should   | 
| 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 be parsed into SeqFeature objects and a general table of names and IDs   | 
| 
149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 representing those objects be generated. The annotation file may   | 
| 
150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 be specified in one of two ways: Through the file option above,   | 
| 
151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or in the database metadata of an existing table file representing   | 
| 
152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 previously parsed objects.  | 
| 
153
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item db  | 
| 
155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $Data = Bio::ToolBox::Data->new(db => 'hg19', feature => 'gene');  | 
| 
157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Provide the name of the database from which to collect the   | 
| 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 features. It may be a short name, whereupon it is checked in   | 
| 
160
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the L configuration file F<.biotoolbox.cfg> for   | 
| 
161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 connection information. Alternatively, a path to a database   | 
| 
162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 file or directory may be given.   | 
| 
163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you already have an opened L database   | 
| 
165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 object, you can simply pass that. See L for   | 
| 
166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 more information. However, this in general should be discouraged,   | 
| 
167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 since the name of the database will not be properly recorded when   | 
| 
168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 saving to file. It is better to simply pass the name of database   | 
| 
169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 again; multiple connections to the same database are smartly handled   | 
| 
170
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in the background.  | 
| 
171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item feature  | 
| 
173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $Data = Bio::ToolBox::Data->new(file => $filename, feature => 'gene');  | 
| 
175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For de novo lists from an annotation database, provide the GFF   | 
| 
177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 type or type:source (columns 3 and 2) for collection. A comma   | 
| 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 delimited string may be accepted for databases. For parsed files,   | 
| 
179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 only a simple string is accepted.  | 
| 
180
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For a list of genomic intervals across the genome, specify a   | 
| 
182
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 feature of 'genome' with a database object.  | 
| 
183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item subfeature  | 
| 
185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 When parsing annotation files such as GTF, one or more subfeature   | 
| 
187
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 types, e.g. C or C, may be specified as a comma-delimited   | 
| 
188
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 string. This ensures that the subfeatures will be parsed into   | 
| 
189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 SeqFeature objects. Otherwise, only the top level features will be   | 
| 
190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parsed. This expedites parsing by skipping unwanted features.  | 
| 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item win  | 
| 
193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item step  | 
| 
195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $Data = Bio::ToolBox::Data->new(db => $dbname, win => $integer);  | 
| 
197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If generating a list of genomic intervals, optionally provide   | 
| 
199
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the window and step values. The default is 500 bp.  | 
| 
200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item chrskip  | 
| 
202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Provide a regular expression compatible or C string for skipping or   | 
| 
204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 excluding specific or classes of chromosomes, for example the mitochondrial   | 
| 
205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 chromosome or unmapped contigs. This works with both feature collection   | 
| 
206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and genomic intervals. The default is to take all chromosomes.  | 
| 
207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 When no file is given or database given to search, then a new,   | 
| 
211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 empty Data object is returned. In this case, you may optionally   | 
| 
212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 specify the names of the columns or indicate a specific file   | 
| 
213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 format structure to assign column names. The following options can   | 
| 
214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 then be provided.  | 
| 
215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
217
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item columns  | 
| 
219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item datasets  | 
| 
221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $Data = Bio::ToolBox::Data->new(columns => [qw(Column1 Column2 ...)] );  | 
| 
223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
224
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Provide the column names in advance as an anonymous array.   | 
| 
225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item gff  | 
| 
227
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Pass the GFF version of the file: 1, 2, 2.5 (GTF), or 3.  | 
| 
229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item bed  | 
| 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Pass the number of BED columns (3-12).  | 
| 
233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item ucsc   | 
| 
235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Pass the number of columns to indicate the type of UCSC format. These   | 
| 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 include 10 (refFlat without gene names), 11 (refFlat with gene names),   | 
| 
238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 12 (knownGene gene prediction table), and 15   | 
| 
239
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (an extended gene prediction or genePredExt table).  | 
| 
240
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
241
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
243
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If successful, the method will return a Bio::ToolBox::Data object.  | 
| 
244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item duplicate  | 
| 
246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This will create a new Data object containing the same column   | 
| 
248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 headers and metadata, but lacking the table content, i.e. no   | 
| 
249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 rows of data. File name metadata, if present in the original, is   | 
| 
250
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 not preserved. The purpose here, for example, is to allow one   | 
| 
251
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to selectively copy rows from one Data object to another.  | 
| 
252
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item parse_table  | 
| 
254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->parse_table($file)  | 
| 
256
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->parse_table( {  | 
| 
257
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          file => $file,   | 
| 
258
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          feature => 'gene',  | 
| 
259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          subfeature => 'exon',  | 
| 
260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
          chrskip => 'chrM|contig',  | 
| 
261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   } );  | 
| 
262
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
263
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This will parse a gene annotation table into SeqFeature objects.   | 
| 
264
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If this is called from an empty Data object, then the table will   | 
| 
265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 be filled with the SeqFeature object names and IDs. If this is   | 
| 
266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 called from a non-empty Data object, then the table's contents   | 
| 
267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 will be associated with the SeqFeature objects using their name and   | 
| 
268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ID. The stored SeqFeature objects can be retrieved using the   | 
| 
269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L method.  | 
| 
270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Pass the method a single argument. This may be either a simple   | 
| 
272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 scalar to a filename to be parsed, or a reference to hash of   | 
| 
273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 one or more argument options. Possible options include:  | 
| 
274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
276
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
277
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item file  | 
| 
278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The file name of the gene table to be parsed. This may   | 
| 
280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 be a GFF, GFF3, GTF, or any of the UCSC gene table formats.   | 
| 
281
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 These will be parsed using Bio::ToolBox::parser::* adapters.  | 
| 
282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item feature  | 
| 
284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A regular expression compatible string or C string to match   | 
| 
286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the top features C to keep. The C is not   | 
| 
287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 checked. The default is 'gene', although a transcript type could   | 
| 
288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 alternatively be specified (in which case transcripts are not   | 
| 
289
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parsed in gene features).  | 
| 
290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item subfeature  | 
| 
292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
293
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A regular expression compatible string or C string to match   | 
| 
294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 any sub features C to parse. The C is not checked.  | 
| 
295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Typically these include exon, CDS, or UTR. The default is nothing.  | 
| 
296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item chrskip  | 
| 
298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 A regular expression compatible string or C string to match   | 
| 
300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 chromosomes to be skipped or excluded. Any feature with a matching   | 
| 
301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C chromosome name will be skipped.  | 
| 
302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
303
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
307
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 General Metadata  | 
| 
308
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 There is a variety of general metadata regarding the Data   | 
| 
310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 structure. The following methods may be used to access or set these   | 
| 
311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 metadata properties. Some of these are stored as comment lines at   | 
| 
312
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the beginning of the file, and will be read or set when the file is   | 
| 
313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 loaded.  | 
| 
314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over  | 
| 
316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
317
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item feature  | 
| 
318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
319
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->feature('something');  | 
| 
320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $feature = $Data->feature;  | 
| 
321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns or sets the name of the features used to collect   | 
| 
323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the list of features. The actual feature types are listed   | 
| 
324
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in the table, so this metadata is merely descriptive.  | 
| 
325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item feature_type  | 
| 
327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
328
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns one of three specific values describing the contents   | 
| 
329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of the data table inferred by the presence of specific column   | 
| 
330
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 names. This provides a clue as to whether the table features   | 
| 
331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 represent genomic regions (defined by coordinate positions) or   | 
| 
332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 named database features. The return values include:  | 
| 
333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * coordinate  | 
| 
337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Table includes at least chromosome and start columns.  | 
| 
339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * named  | 
| 
341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Table includes name, type, and/or Primary_ID, possibly   | 
| 
343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 referring to named database features.  | 
| 
344
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
345
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item * unknown  | 
| 
346
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Table is unrecognized format.  | 
| 
348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
350
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
351
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item program  | 
| 
352
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns or sets the name of the program generating the list.  | 
| 
354
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
355
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item database  | 
| 
356
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
357
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns or sets the name or path of the database from which the   | 
| 
358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 features were derived.  | 
| 
359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
360
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item gff  | 
| 
361
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns or sets the version of loaded GFF files. Supported versions   | 
| 
363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 included 1, 2, 2.5 (GTF), and 3.  | 
| 
364
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
365
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item bed  | 
| 
366
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns or sets the BED file version. Here, the BED version is simply   | 
| 
368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the number of columns.  | 
| 
369
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item ucsc  | 
| 
371
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
372
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns or sets the UCSC file format version. Here, the version is   | 
| 
373
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 simply the number of columns. Supported versions include 10 (gene   | 
| 
374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 prediction), 11 (refFlat, or gene prediction with gene name), 12   | 
| 
375
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 (knownGene table), 15 (extended gene prediction), or 16 (extended   | 
| 
376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 gene prediction with bin).  | 
| 
377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item vcf  | 
| 
379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns or sets the VCF file version number. VCF support is limited.  | 
| 
381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
384
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 File information  | 
| 
385
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
386
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 These methods provide information about the file from which the   | 
| 
387
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 data table was loaded. This does not include parsed annotation tables.  | 
| 
388
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item filename  | 
| 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
393
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item path  | 
| 
394
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item basename  | 
| 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
397
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item extension  | 
| 
398
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
399
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the filename, full path, basename, and extension of   | 
| 
400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the filename. Concatenating the last three values will reconstitute   | 
| 
401
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the first original filename.  | 
| 
402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
403
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item add_file_metadata  | 
| 
404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->add_file_metadata('/path/to/file.txt');  | 
| 
406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
407
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Add filename metadata. This will automatically parse the path,   | 
| 
408
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 basename, and recognized extension from the passed filename and   | 
| 
409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 set the appropriate metadata attributes.  | 
| 
410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
413
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Metadata comments  | 
| 
414
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Comments are any other commented lines from a text file (lines   | 
| 
416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 beginning with a #) that were not parsed as general metadata.  | 
| 
417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
419
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item comments  | 
| 
421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns a copy of the array containing commented lines. Each   | 
| 
423
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 comment line becomes an element in the array.  | 
| 
424
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item add_comment  | 
| 
426
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
427
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Appends the text string to the comment array.  | 
| 
428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item delete_comment  | 
| 
430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Deletes a comment. Provide the array index of the comment to   | 
| 
432
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 delete. If an index is not provided, ALL comments will be deleted!  | 
| 
433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item vcf_headers  | 
| 
435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
436
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For VCF files, this will partially parse the VCF headers into a   | 
| 
437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 hash structure that can be queried or manipulated. Each header   | 
| 
438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 line is parsed for the primary key, being the first word after the   | 
| 
439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ## prefix, e.g. INFO, FORMAT, FILTER, contig, etc. For the simple   | 
| 
440
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 values, they are stored as the value. For complex entries, such as   | 
| 
441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with INFO and FORMAT, a second level hash is created with the ID   | 
| 
442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 extracted and used as the second level key. The value is always the   | 
| 
443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 always the remainder of the string.  | 
| 
444
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For example, the following would be a simple parsed vcf header in   | 
| 
446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 code representation.  | 
| 
447
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
448
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $vcf_header = {  | 
| 
449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      FORMAT => {  | 
| 
450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         GT = q(ID=GT,Number=1,Type=String,Description="Genotype"),  | 
| 
451
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
         AD = q(ID=AD,Number=.,Type=Integer,Description="ref,alt Allelic depths"),  | 
| 
452
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      },  | 
| 
453
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      fileDate => 20150715,  | 
| 
454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
455
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item rewrite_vcf_headers  | 
| 
457
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If you have altered the vcf headers exported by the vcf_headers()   | 
| 
459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 method, then this method will rewrite the hash structure as new   | 
| 
460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 comment lines. Do this prior to writing or saving the Data sturcture  | 
| 
461
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or else you will lose your changed VCF header metadata.  | 
| 
462
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
463
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
464
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
465
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 The Data table  | 
| 
466
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
467
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The Data table is the array of arrays containing all of the   | 
| 
468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 actual information. Rows and columns are indexed using 0-based   | 
| 
469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 indexing as with all Perl arrays. Row 0 is always the column   | 
| 
470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 header row containing the column names, regardless whether an   | 
| 
471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 actual header name existed in the original file format (e.g.   | 
| 
472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 BED or GFF formats). Any individual table "cell" can be   | 
| 
473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 specified as C<[$row][$column]>.   | 
| 
474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
475
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
476
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
477
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item list_columns  | 
| 
478
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns an array or array reference of the column names   | 
| 
480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in ascending (left to right) order.  | 
| 
481
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item number_columns  | 
| 
483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
484
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the number of columns in the Data table.   | 
| 
485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item last_column  | 
| 
487
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the array index number for the last (right most)   | 
| 
489
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 column. This number is always 1 less than the value   | 
| 
490
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 returned by number_columns() due to 0-based indexing.  | 
| 
491
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item last_row  | 
| 
493
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
494
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the array index number of the last row.   | 
| 
495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Since the header row is index 0, this is also the   | 
| 
496
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 number of actual content rows.  | 
| 
497
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
498
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item column_values  | 
| 
499
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $values = $Data->column_values($i);  | 
| 
501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns an array or array reference representing the values   | 
| 
503
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in the specified column. This includes the column header as   | 
| 
504
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the first element. Pass the method the column index.  | 
| 
505
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item add_column  | 
| 
507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # add empty column with name  | 
| 
509
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $i = $Data->add_column($name);  | 
| 
510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     | 
| 
511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # add entire column  | 
| 
512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $i = $Data->add_column(\@array);  | 
| 
513
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Appends a new column to the Data table at the   | 
| 
515
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 rightmost position (highest index). It adds the column   | 
| 
516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 header name and creates a new column metadata hash.   | 
| 
517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Pass the method one of two possibilities. Pass a text   | 
| 
518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 string representing the new column name, in which case   | 
| 
519
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 no data will be added to the column. Alternatively, pass   | 
| 
520
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 an array reference, and the contents of the array will   | 
| 
521
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 become the column data. If the Data table already has   | 
| 
522
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 rows, then the passed array reference must have the same   | 
| 
523
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 number of elements.  | 
| 
524
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
525
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 It returns the new column index if successful.  | 
| 
526
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item copy_column  | 
| 
528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
529
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $j = $Data->copy_column($i);  | 
| 
530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This will copy a column, appending the duplicate column at   | 
| 
532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the rightmost position (highest index). It will duplicate   | 
| 
533
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 column metadata as well. It will return the new index   | 
| 
534
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 position.  | 
| 
535
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item delete_column  | 
| 
537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->delete_column($i, $j);  | 
| 
539
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
540
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Deletes one or more specified columns. Any remaining   | 
| 
541
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 columns rightwards will have their indices shifted   | 
| 
542
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 down appropriately. If you had identified one of the   | 
| 
543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 shifted columns, you may need to re-find or calculate   | 
| 
544
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 its new index.  | 
| 
545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item reorder_column  | 
| 
547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
548
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->reorder_column($c,$b,$a,$a);  | 
| 
549
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Reorders columns into the specified order. Provide the   | 
| 
551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 new desired order of indices. Columns could be duplicated   | 
| 
552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or deleted using this method. The columns will adopt their   | 
| 
553
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 new index numbers.  | 
| 
554
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
555
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item add_row  | 
| 
556
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->add_row(\@values);  | 
| 
558
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->add_row($Row); # Bio::ToolBox::Data::Feature object  | 
| 
559
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
560
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Add a new row of data values to the end of the Data table.   | 
| 
561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Optionally provide either a reference to an array of values   | 
| 
562
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 to put in the row, or pass a L   | 
| 
563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Row object, such as one obtained from another Data object.   | 
| 
564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If the number of columns do not match, the array is filled   | 
| 
565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 up with null values for missing columns, or excess values   | 
| 
566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are dropped.  | 
| 
567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item get_row  | 
| 
569
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
570
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->get_row($i); # Bio::ToolBox::Data::Feature object  | 
| 
571
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns the specified row as a L   | 
| 
573
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 object.  | 
| 
574
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
575
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item delete_row  | 
| 
576
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->delete_row($i, $j);  | 
| 
578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Deletes one or more specified rows. Rows are spliced out   | 
| 
580
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 highest to lowest index to avoid issues. Be very careful   | 
| 
581
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 deleting rows while simultaneously iterating through the   | 
| 
582
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 table!  | 
| 
583
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item row_values  | 
| 
585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $row_values = $Data->row_values($i);  | 
| 
587
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
588
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns a copy of an array for the specified row index.   | 
| 
589
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Modifying this returned array does not migrate back to the   | 
| 
590
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Data table; Use the L method below instead.  | 
| 
591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item value  | 
| 
593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
594
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $value = $Data->value($row, $column);  | 
| 
595
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->value($row, $column, $new_value);  | 
| 
596
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
597
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns or sets the value at a specific row or column index.  | 
| 
598
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Index positions are 0-based (header row is index 0).   | 
| 
599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
602
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Column Metadata  | 
| 
603
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Each column has metadata. Each metadata is a series of key =E   | 
| 
605
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 value pairs. The minimum keys are 'index' (the 0-based index   | 
| 
606
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of the column) and 'name' (the column header name). Additional   | 
| 
607
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 keys and values may be queried or set as appropriate. When the   | 
| 
608
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 file is written, these are stored as commented metadata lines at   | 
| 
609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the beginning of the file.  | 
| 
610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item name  | 
| 
614
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->name($index, $new_name);  | 
| 
616
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $name = $Data->name($i);  | 
| 
617
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
618
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Convenient method to return the name of the column given the   | 
| 
619
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 index number. A column may also be renamed by passing a new name.  | 
| 
620
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item metadata  | 
| 
622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->metadata($index, $key, $new_value);  | 
| 
624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $value = $Data->metadata($index, $key)  | 
| 
625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
626
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Returns or sets the metadata value for a specific $key for a   | 
| 
627
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 specific column $index.  | 
| 
628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This may also be used to add a new metadata key. Simply provide   | 
| 
630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the name of a new $key that is not present  | 
| 
631
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If no key is provided, then a hash or hash reference is returned   | 
| 
633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 representing the entire metadata for that column.  | 
| 
634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
635
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item copy_metadata  | 
| 
636
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
637
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->copy_metadata($source, $target);  | 
| 
638
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
639
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method will copy the metadata (everything except name and   | 
| 
640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 index) between the source column and target column. Returns 1 if   | 
| 
641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 successful.    | 
| 
642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
643
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item delete_metadata  | 
| 
644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
645
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->delete_metadata($index, $key);  | 
| 
646
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
647
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Deletes a column-specific metadata $key and value for a specific   | 
| 
648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 column $index. If a $key is not provided, then all metadata keys   | 
| 
649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for that index will be deleted.  | 
| 
650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
651
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item find_column  | 
| 
652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
653
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $i = $Data->find_column('Gene');  | 
| 
654
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $i = $Data->find_column('^Gene$')  | 
| 
655
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Searches the column names for the specified column name. This   | 
| 
657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 employs a case-insensitive grep search, so simple substitutions   | 
| 
658
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 may be made.  | 
| 
659
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item chromo_column  | 
| 
661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
662
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item start_column  | 
| 
663
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item stop_column  | 
| 
665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
666
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item strand_column  | 
| 
667
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item name_column  | 
| 
669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
670
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item type_column  | 
| 
671
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item id_column  | 
| 
673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 These methods will return the identified column best matching   | 
| 
675
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the description. Returns C if that column is not present.   | 
| 
676
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 These use the L method with a predefined list of   | 
| 
677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 aliases.  | 
| 
678
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
679
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
680
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
681
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Working with databases  | 
| 
682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 These are methods for working primarily with annotation databases.  | 
| 
684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
687
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item open_database  | 
| 
688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
689
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is wrapper method that tries to do the right thing and passes   | 
| 
690
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 on to either L or L methods.   | 
| 
691
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Basically a legacy method for L.  | 
| 
692
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
693
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item open_meta_database  | 
| 
694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Open the database that is listed in the metadata. Returns the   | 
| 
696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 database connection. Pass a true value to force a new database   | 
| 
697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 connection to be opened, rather than returning a cached connection   | 
| 
698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 object (useful when forking).  | 
| 
699
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
700
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item open_new_database  | 
| 
701
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Convenience method for opening a second or new database that is   | 
| 
703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 not specified in the metadata, useful for data collection. This   | 
| 
704
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is a shortcut to L.  | 
| 
705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Pass the database name.  | 
| 
706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
707
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
709
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 SeqFeature Objects  | 
| 
710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 SeqFeature objects corresponding to data rows can be stored in the Data   | 
| 
712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 object. This can be useful if the SeqFeature object is not readily   | 
| 
713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 available from a database or is processor intensive in generating or   | 
| 
714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parsing. Note that storing large numbers of objects will increase memory   | 
| 
715
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 usage.   | 
| 
716
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
717
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 SeqFeature objects are usually either L,   | 
| 
718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L, or L objects, depending   | 
| 
719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 upon their source. More information can obtained from the L   | 
| 
720
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 abstract API.  | 
| 
721
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
722
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
723
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item store_seqfeature  | 
| 
725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->store_seqfeature($row_index, $seqfeature);  | 
| 
727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
728
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Stores the SeqFeature object for the given row index. Only one SeqFeature   | 
| 
729
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 object can be stored per row.  | 
| 
730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item get_seqfeature  | 
| 
732
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
733
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $feature = $Data->get_seqfeature($row_index);  | 
| 
734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
735
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Retrieves the SeqFeature object for the given row index.  | 
| 
736
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item delete_seqfeature  | 
| 
738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->store_seqfeature($row_index);  | 
| 
740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
741
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Removes the SeqFeature object for the given row index.   | 
| 
742
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item collapse_gene_transcripts  | 
| 
744
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $success = $Data->collapse_gene_transcripts;  | 
| 
746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method will iterate through a Data table and collapse multiple alternative   | 
| 
748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 transcript subfeatures of stored gene SeqFeature objects in the table. Exons   | 
| 
749
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 of multiple transcripts will be merged, maximizing exon size and minimizing   | 
| 
750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 intron size. Genes with only one transcript will not be affected. Stored   | 
| 
751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 SeqFeature objects that are do not have a C of "gene" are silently   | 
| 
752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 skipped. Refer to the L method   | 
| 
753
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 for more details. The number of rows successfully collapsed is returned.   | 
| 
754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item add_transcript_length  | 
| 
756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $length_index = $Data->add_transcript_length;  | 
| 
758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $length_index = $Data->add_transcript_length('cds');  | 
| 
759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method will generate a new column in the Data table representing the   | 
| 
761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 length of a transcript, i.e. the sum of the length of subfeatures for   | 
| 
762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the stored SeqFeature object in the Data table. The default subfeature is   | 
| 
763
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C; however, alternative subfeature types may be passed to the method   | 
| 
764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and used. These include C, C<5p_utr>, and C<3p_utr> for CDS, the 5C<'> UTR,   | 
| 
765
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and the 3C<'> UTR, respectively. See the corresponding transcript length   | 
| 
766
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 methods in L for more information. If a length   | 
| 
767
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is not calculated, for example the feature C is a "gene",   | 
| 
768
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 then the simple length of the feature is recorded.   | 
| 
769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The name of the new column is one of "Merged_Transcript_Length" for exon,   | 
| 
771
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 "Transcript_CDS_Length", "Transcript_5p_UTR_Length", or "Transcript_3p_UTR_Length".   | 
| 
772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The index of the new length column is returned.   | 
| 
773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Data Table Functions  | 
| 
777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 These methods alter the Data table en masse.   | 
| 
779
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
782
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item verify  | 
| 
783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method will verify the Data structure, including the metadata and the   | 
| 
785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Data table. It ensures that the table has the correct number of rows and   | 
| 
786
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 columns as described in the metadata, and that each column has the basic   | 
| 
787
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 metadata.   | 
| 
788
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
789
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If the Data structure is marked as a GFF or BED structure, then the table   | 
| 
790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is checked that the structure matches the proper format. If not, for   | 
| 
791
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 example when additional columns have been added, then the GFF or BED value   | 
| 
792
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is set to null.   | 
| 
793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method is automatically called prior to writing the Data table to file.  | 
| 
795
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item sort_data  | 
| 
797
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
798
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->sort_data($index, 'i'); # increasing sort  | 
| 
799
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
800
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method will sort the Data table by the values in the indicated column.   | 
| 
801
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 It will automatically determine whether the contents of the column are   | 
| 
802
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 numbers or alphanumeric, and will sort accordingly, either numerically or   | 
| 
803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 asciibetically. The first non-null value in the column is used to determine.   | 
| 
804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The sort may fail if the values are not consistent. Pass a second optional   | 
| 
805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 value to indicate the direction of the sort. The value should be either   | 
| 
806
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 'i' for 'increasing' or 'd' for 'decreasing'. The default order is   | 
| 
807
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 increasing.   | 
| 
808
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item gsort_data  | 
| 
810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method will sort the Data table by increasing genomic coordinates. It   | 
| 
812
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 requires the presence of chromosome and start (or position) columns,   | 
| 
813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 identified by their column names. These are automatically identified.   | 
| 
814
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Failure to find these columns mean a failure to sort the table. Chromosome   | 
| 
815
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 names are sorted first by their digits (e.g. chr2 before chr10), and then   | 
| 
816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 alphanumerically. Base coordinates are sorted by increasing value.   | 
| 
817
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Identical positions are kept in their original order.  | 
| 
818
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item splice_data  | 
| 
820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
821
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $Data = Bio::ToolBox::Data->new(file => $file);  | 
| 
822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $pm = Parallel::ForkManager->new(4);  | 
| 
823
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	for my $i (1..4) {  | 
| 
824
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$pm->start and next;  | 
| 
825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		### in child ###  | 
| 
826
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$Data->splice_data($i, 4);  | 
| 
827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# do something with this portion  | 
| 
828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# then save to a temporary unique file  | 
| 
829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$Data->save("$file_$i");  | 
| 
830
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$pm->finish;  | 
| 
831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$pm->wait_all_children;  | 
| 
833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# reload children files  | 
| 
834
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	$Data->reload_children(glob "$file_*");  | 
| 
835
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method will splice the Data table into C<$total_parts> number of pieces,   | 
| 
837
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 retaining the C<$current_part> piece. The other parts are discarded. This   | 
| 
838
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 method is intended to be used when a program is forked into separate   | 
| 
839
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 processes, allowing each child process to work on a subset of the original   | 
| 
840
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Data table.   | 
| 
841
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Two values are passed to the method. The first is the current part number,   | 
| 
843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1-based. The second value is the total number of parts that the table   | 
| 
844
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 should be divided, corresponding to the number of concurrent processes.   | 
| 
845
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 One easy approach to forking is to use L. The   | 
| 
846
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 above example shows how to fork into four concurrent processes.  | 
| 
847
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Since each forked child process is separate from their parent process,   | 
| 
849
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 their contents must be reloaded into the current Data object. The   | 
| 
850
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L documentation recommends going through a disk   | 
| 
851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 file intermediate. Therefore, write each child Data object to file using   | 
| 
852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a unique name. Once all children have been reaped, they can be reloaded   | 
| 
853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 into the current Data object using the L method.  | 
| 
854
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
855
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Remember that if you fork your script into child processes, any database   | 
| 
856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 connections must be re-opened; they are typically not clone safe. If you   | 
| 
857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 have an existing database connection by using the L method,   | 
| 
858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 it should be automatically re-opened for you when you use the L   | 
| 
859
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 method, but you will need to call L again in the child   | 
| 
860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 process to obtain the new database object.  | 
| 
861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item reload_children  | 
| 
863
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Discards the current data table in memory and reloads two or more files   | 
| 
865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 written from forked children processes. Provide the name of the child   | 
| 
866
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 files in the order you want them loaded. The files will be automatically   | 
| 
867
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 deleted if successfully loaded. Returns the number of lines reloaded on   | 
| 
868
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 success.  | 
| 
869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
870
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
871
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 File Functions  | 
| 
873
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The Data table may be read in from a file or written out as a file. In   | 
| 
875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 all cases, it is a tab-delimited text file, whether as an ad hoc table   | 
| 
876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 or a specific bioinformatic format, e.g. BED, GFF, etc. Multiple common   | 
| 
877
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 file formats are supported. Column headers are assumed, except in those   | 
| 
878
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 cases where it is not, e.g. BED, GFF, etc. Metadata may be included as   | 
| 
879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 commented lines at the beginning of the file, prefixed with a C<#> symbol.  | 
| 
880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Reading and writing gzip compressed files is fully supported.  | 
| 
881
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
882
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
883
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item load_file  | 
| 
885
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
886
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   $Data->load_file($filename);  | 
| 
887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
888
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This will load a file into a new, empty Data table. This function is   | 
| 
889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 called automatically when a filename is provided to the L function.   | 
| 
890
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The existence of the file is first checked (appending common missing   | 
| 
891
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 extensions as necessary), metadata and column headers processed and/or   | 
| 
892
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 generated from default settings, the content loaded into the table, and   | 
| 
893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the structure verified. Error messages may be printed if the structure or   | 
| 
894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 format is inconsistent or doesn't match the expected format, e.g a file   | 
| 
895
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 with a F<.bed> extension doesn't match the UCSC specification.  | 
| 
896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Pass the name of the filename.  | 
| 
897
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
898
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item taste_file  | 
| 
899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $flavor = $Data->taste_file($filename);  | 
| 
901
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   # returns gff, bed, ucsc, or undef  | 
| 
902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
903
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Tastes, or checks, a file for a certain flavor, or known gene file formats.   | 
| 
904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This is based on file extension, metadata headers, and/or file content   | 
| 
905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 in the first 10 lines or so. Returns a string based on the file format.  | 
| 
906
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Values include gff, bed, ucsc, or undefined. Useful for determining if   | 
| 
907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the file represents a known gene table format that lacks a defined file   | 
| 
908
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 extension, e.g. UCSC formats.  | 
| 
909
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item save  | 
| 
911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
912
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item write_file  | 
| 
913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $success = $Data->save;  | 
| 
915
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $success = $Data->save('my_file.txt');  | 
| 
916
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $success = $Data->save(filename => $file, gz => 1);  | 
| 
917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   print "file $success was saved!\n";  | 
| 
918
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
919
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Pass the file name to be written. If no file name is passed, then   | 
| 
920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the filename and path stored in the metadata are used, if present.  | 
| 
921
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
922
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 These methods will write the Data structure out to file. It will   | 
| 
923
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 be first verified as to proper structure. Opened BED and GFF files   | 
| 
924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are checked to see if their structure is maintained. If so, they   | 
| 
925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 are written in the same format; if not, they are written as regular   | 
| 
926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 tab-delimited text files.   | 
| 
927
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
928
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You may pass additional options.  | 
| 
929
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
930
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item filename  | 
| 
933
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
934
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Optionally pass a new filename. Required for new objects; previous   | 
| 
935
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 opened files may be overwritten if a new name is not provided. If   | 
| 
936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 necessary, the file extension may be changed; for example, BED files   | 
| 
937
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 that no longer match the defined format lose the .bed and gain a .txt   | 
| 
938
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 extension. Compression may or add or strip .gz as appropriate. If   | 
| 
939
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 a path is not provided, the current working directory is used.  | 
| 
940
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
941
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item gz  | 
| 
942
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Boolean value to change the compression status of the output file. If   | 
| 
944
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 overwriting an input file, the default is maintain the compression status,   | 
| 
945
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 otherwise no compression. Pass a 0 for no compression, 1 for standard   | 
| 
946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 gzip compression, or 2 for block gzip (bgzip) compression for tabix   | 
| 
947
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 compatibility.  | 
| 
948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
950
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
951
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 If the file save is successful, it will return the full path and   | 
| 
952
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 name of the saved file, complete with any changes to the file extension.  | 
| 
953
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item summary_file  | 
| 
955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Write a separate file summarizing columns of data (mean values).   | 
| 
957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The mean value of each column becomes a row value, and each column   | 
| 
958
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 header becomes a row identifier (i.e. the table is transposed). The   | 
| 
959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 best use of this is to summarize the mean profile of windowed data   | 
| 
960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 collected across a feature. See the Bio::ToolBox scripts   | 
| 
961
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L and L as examples.   | 
| 
962
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 For data from L where the columns are expressed   | 
| 
963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 as percentile bins, the reported midpoint column is automatically   | 
| 
964
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 converted based on a length of 1000 bp.  | 
| 
965
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
966
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 You may pass these options. They are optional.  | 
| 
967
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
968
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
969
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
970
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item filename  | 
| 
971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
972
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Pass an optional new filename. The default is to take the basename   | 
| 
973
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and append "_summed" to it.  | 
| 
974
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item startcolumn  | 
| 
976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item stopcolumn  | 
| 
978
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Provide the starting and ending columns to summarize. The default   | 
| 
980
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 start is the leftmost column without a recognized standard name.   | 
| 
981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The default ending column is the last rightmost column. Indexes are   | 
| 
982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 0-based.  | 
| 
983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item dataset  | 
| 
985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Pass a string that is the name of the dataset. This could be collected   | 
| 
987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 from the metadata, if present. This will become the name of the score   | 
| 
988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 column if defined.  | 
| 
989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
991
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The name of the summarized column is either the provided dataset name,   | 
| 
993
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 the defined basename in the metadata of the Data structure, or a generic   | 
| 
994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 name. If successful, it will return the name of the file saved.  | 
| 
995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
996
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
997
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
998
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Verifying Datasets  | 
| 
999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 When working with row Features and collecting scores, the dataset   | 
| 
1001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 from which you are collecting must be verified prior to collection.   | 
| 
1002
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This ensures that the proper database adaptor is available and loaded,   | 
| 
1003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 and that the dataset is correctly specified (otherwise nothing would be   | 
| 
1004
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 collected). This verification is normally performed transparently when   | 
| 
1005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 you call L or   | 
| 
1006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L.  | 
| 
1007
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 However, datasets may be explicitly verified prior to calling the score   | 
| 
1008
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 methods.   | 
| 
1009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
1011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1012
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item verify_dataset  | 
| 
1013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
  my $dataset = $Data->verify_dataset($dataset, $database);  | 
| 
1015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1016
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Pass the name of the dataset (GFF type or type:source) for a GFF3-based   | 
| 
1017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 database, e.g. , or path and file name for a   | 
| 
1018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 data file, e.g. Bam, BigWig, BigBed, or USeq file. If a separate database   | 
| 
1019
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 is being used, pass the name or opened database object as a second   | 
| 
1020
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 parameter. For more advance options, see   | 
| 
1021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L.   | 
| 
1022
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 The name of the verified dataset, with a prefix if necessary, is returned.  | 
| 
1024
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
1026
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1027
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head2 Efficient Data Access  | 
| 
1028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1029
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Most of the time we need to iterate over the Data table, one row   | 
| 
1030
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 at a time, collecting data or processing information. These methods   | 
| 
1031
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 simplify the process.  | 
| 
1032
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =over 4  | 
| 
1034
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1035
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item iterate  | 
| 
1036
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1037
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     $Data->iterate( sub {  | 
| 
1038
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        my $row = shift;  | 
| 
1039
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        my $number = $row->value($index);  | 
| 
1040
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        my $log_number = log($number);  | 
| 
1041
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
        $row->value($index, $log_number);  | 
| 
1042
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
     } );  | 
| 
1043
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1044
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This method will process a code reference on every row in the data   | 
| 
1045
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 table. Pass a subroutine or code reference. The subroutine will   | 
| 
1046
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 receive the row as a L object. With this   | 
| 
1047
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 object, you can retrieve values, set values, and add new values.   | 
| 
1048
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item row_stream  | 
| 
1050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1051
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 This returns an C object, which has one   | 
| 
1052
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 method, C. Call this method repeatedly until it returns   | 
| 
1053
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 C to work through each row of data.  | 
| 
1054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Users of the C family of database adaptors may recognize the   | 
| 
1056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 analogy to the C method.  | 
| 
1057
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =item next_row  | 
| 
1059
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   my $stream = $Data->row_stream;  | 
| 
1061
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   while (my $row = $stream->next_row) {  | 
| 
1062
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      # each $row is a Bio::ToolBox::Data::Feature object  | 
| 
1063
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      # representing the row in the data table  | 
| 
1064
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      my $value = $row->value($index);  | 
| 
1065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
      # do something with $value  | 
| 
1066
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
   }  | 
| 
1067
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Called from a C object, it returns a   | 
| 
1069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L row object. If SeqFeature objects are   | 
| 
1070
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 associated with the row, perhaps from a parsed input annotation file,   | 
| 
1071
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 then they are automatically associated with the row object. (Previous   | 
| 
1072
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 versions required separately calling the seqfeature() row method to   | 
| 
1073
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 perform this.)  | 
| 
1074
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =back  | 
| 
1076
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =head1 SEE ALSO  | 
| 
1078
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1079
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L, L, L,  | 
| 
1080
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L, L, L, L,   | 
| 
1081
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 L, L  | 
| 
1082
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 =cut  | 
| 
1084
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1085
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
4197
 | 
 use strict;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
118
 | 
    | 
| 
1086
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
21
 | 
 use Carp qw(carp cluck croak confess);  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
233
 | 
    | 
| 
1087
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
20
 | 
 use List::Util qw(sum0);  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
208
 | 
    | 
| 
1088
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
19
 | 
 use base 'Bio::ToolBox::Data::core';  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1680
 | 
    | 
| 
1089
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
204
 | 
 use Bio::ToolBox::db_helper qw(  | 
| 
1090
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	get_new_feature_list    | 
| 
1091
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	get_new_genome_list  | 
| 
1092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	get_db_feature  | 
| 
1093
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
26
 | 
 );  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
1094
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
22
 | 
 use Bio::ToolBox::utility qw(simplify_dataset_name);  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
155
 | 
    | 
| 
1095
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
22
 | 
 use Module::Load;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
    | 
| 
1096
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1097
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 1;  | 
| 
1098
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ### Initialize  | 
| 
1101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1102
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
1103
 | 
45
 | 
 
 | 
 
 | 
  
45
  
 | 
  
1
  
 | 
7983
 | 
 	my $class = shift;  | 
| 
1104
 | 
45
 | 
 
 | 
 
 | 
 
 | 
 
 | 
120
 | 
 	my %args  = @_;  | 
| 
1105
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# check for important arguments  | 
| 
1107
 | 
45
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
411
 | 
 	$args{features} ||= $args{feature} || 'gene';  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1108
 | 
45
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
272
 | 
 	$args{stream} ||= $args{Stream} || 0;  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1109
 | 
45
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
245
 | 
 	$args{file} ||= $args{in} || undef;  | 
| 
 
 | 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1110
 | 
45
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
172
 | 
 	$args{parse} ||= 0;  | 
| 
1111
 | 
45
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
217
 | 
 	$args{noheader} ||= 0;  | 
| 
1112
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# check for stream  | 
| 
1114
 | 
45
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
112
 | 
 	if ($args{stream}) {  | 
| 
1115
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 		$class = "Bio::ToolBox::Data::Stream";  | 
| 
1116
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 		load($class);  | 
| 
1117
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
156
 | 
 		return $class->new(@_);  | 
| 
1118
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1119
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# initialize  | 
| 
1121
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
179
 | 
 	my $self = $class->SUPER::new();  | 
| 
1122
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# prepare a new table based on the provided arguments  | 
| 
1124
 | 
44
 | 
  
100
  
 | 
  
100
  
 | 
 
 | 
 
 | 
257
 | 
 	if ($args{file} and $args{parse}) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1125
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# parse from file  | 
| 
1126
 | 
7
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
39
 | 
 		$args{subfeature} ||= '';  | 
| 
1127
 | 
7
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
24
 | 
 		unless ( $self->parse_table(\%args) ) {  | 
| 
1128
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
104
 | 
 			my $l = $self->load_file($args{file});  | 
| 
1129
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
23
 | 
 			return unless $l;  | 
| 
1130
 | 
5
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
19
 | 
 			if ($self->database =~ /^Parsed:(.+)$/ and $self->feature_type eq 'named') {  | 
| 
1131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# looks like the loaded file was from a previously parsed table  | 
| 
1132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# let's try this again  | 
| 
1133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# this may die if it doesn't work  | 
| 
1134
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 				$args{file} = $1;  | 
| 
1135
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 				$args{feature} = $self->feature;  | 
| 
1136
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 				$self->parse_table(\%args);   | 
| 
1137
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1139
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1140
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($args{file}) {  | 
| 
1141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# load from file  | 
| 
1142
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
52
 | 
 		unless ( $self->load_file($args{file}, $args{noheader}) ) {  | 
| 
1143
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			return;  | 
| 
1144
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif (exists $args{db} and $args{features}) {  | 
| 
1147
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# generate new list  | 
| 
1148
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$self->feature($args{features});  | 
| 
1149
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$self->database($args{db});  | 
| 
1150
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$args{data} = $self;  | 
| 
1151
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $result;  | 
| 
1152
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		if ($args{features} eq 'genome') {  | 
| 
1153
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$result = get_new_genome_list(%args);  | 
| 
1154
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		else {  | 
| 
1156
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$result = get_new_feature_list(%args);  | 
| 
1157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1158
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		unless ($result) {  | 
| 
1159
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			carp " Cannot generate new $args{features} list!";  | 
| 
1160
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			return;  | 
| 
1161
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1162
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1163
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else {  | 
| 
1164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# a new empty structure  | 
| 
1165
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
1166
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# check to see if user provided column names  | 
| 
1167
 | 
32
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
192
 | 
 		$args{columns} ||= $args{datasets} || undef;  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1168
 | 
32
 | 
  
100
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
203
 | 
 		if (defined $args{columns}) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1169
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 			 foreach my $c ( @{ $args{columns} } ) {  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
1170
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 			 	$self->add_column($c);  | 
| 
1171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			 }  | 
| 
1172
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 			 $self->{feature} = $args{feature} if exists $args{feature};  | 
| 
1173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
1175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# or possibly a specified format structure   | 
| 
1176
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		elsif (exists $args{gff} and $args{gff}) {  | 
| 
1177
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# use standard names for the number of columns indicated  | 
| 
1178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# we trust that the user knows the subtle difference between gff versions  | 
| 
1179
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$self->add_gff_metadata($args{gff});  | 
| 
1180
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			unless ($self->extension =~ /g[tf]f/) {  | 
| 
1181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$self->{extension} = $args{gff} == 2.5 ? '.gtf' :   | 
| 
1182
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 					$args{gff} == 3 ? '.gff3' : '.gff';  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		elsif (exists $args{bed} and $args{bed}) {  | 
| 
1186
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# use standard names for the number of columns indicated  | 
| 
1187
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 			unless ($args{bed} =~ /^\d{1,2}$/ and $args{bed} >= 3) {  | 
| 
1188
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				carp "bed parameter must be an integer 3-12!";  | 
| 
1189
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				return;  | 
| 
1190
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}	  | 
| 
1191
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$self->add_bed_metadata($args{bed});  | 
| 
1192
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			unless ($self->extension =~ /bed|peak/) {  | 
| 
1193
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$self->{extension} = '.bed';  | 
| 
1194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1195
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1196
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		elsif (exists $args{ucsc} and $args{ucsc}) {  | 
| 
1197
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# a ucsc format such as refFlat, genePred, or genePredExt  | 
| 
1198
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $u = $self->add_ucsc_metadata($args{ucsc});  | 
| 
1199
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			unless ($u) {  | 
| 
1200
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				carp "unrecognized number of columns for ucsc format!";  | 
| 
1201
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				return;  | 
| 
1202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			};  | 
| 
1203
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			unless ($self->extension =~ /ucsc|ref+lat|genepred/) {  | 
| 
1204
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$self->{extension} = '.ucsc';  | 
| 
1205
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1209
 | 
44
 | 
 
 | 
 
 | 
 
 | 
 
 | 
264
 | 
 	return $self;  | 
| 
1210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1211
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub duplicate {  | 
| 
1213
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
1578
 | 
 	my $self = shift;  | 
| 
1214
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# duplicate the data structure  | 
| 
1216
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	my $columns = $self->list_columns;  | 
| 
1217
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	my $Dupe = $self->new(  | 
| 
1218
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		'columns' => $columns,  | 
| 
1219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	) or return;  | 
| 
1220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1221
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# copy the metadata  | 
| 
1222
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	for (my $i = 0; $i < $self->number_columns; $i++) {  | 
| 
1223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# column metadata  | 
| 
1224
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
21
 | 
 		my %md = $self->metadata($i);  | 
| 
1225
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
31
 | 
 		$Dupe->{$i} = \%md;  | 
| 
1226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1227
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	foreach (qw(feature program db bed gff ucsc vcf headers)) {  | 
| 
1228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# various keys  | 
| 
1229
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 		$Dupe->{$_} = $self->{$_};  | 
| 
1230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1231
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	my @comments = $self->comments;  | 
| 
1232
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	push @{$Dupe->{comments}}, @comments;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
    | 
| 
1233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1234
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 	return $Dupe;  | 
| 
1235
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub parse_table {  | 
| 
1238
 | 
18
 | 
 
 | 
 
 | 
  
18
  
 | 
  
1
  
 | 
4210
 | 
 	my $self = shift;  | 
| 
1239
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
 	my $args = shift;  | 
| 
1240
 | 
18
 | 
 
 | 
 
 | 
 
 | 
 
 | 
30
 | 
 	my ($file, $feature, $subfeature, $simplify);  | 
| 
1241
 | 
18
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
51
 | 
 	if (ref $args) {  | 
| 
1242
 | 
12
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
43
 | 
 		$file = $args->{file} || '';  | 
| 
1243
 | 
12
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
53
 | 
 		$feature = $args->{feature} || '';  | 
| 
1244
 | 
12
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
50
 | 
 		$subfeature = $args->{subfeature} || '';  | 
| 
1245
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$simplify = (exists $args->{simplify} and defined $args->{simplify}) ?   | 
| 
1246
 | 
12
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
40
 | 
 			$args->{simplify} : 1; # default is to simplify  | 
| 
1247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else {  | 
| 
1249
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# no hash reference, assume just a file name  | 
| 
1250
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 		$file = $args;  | 
| 
1251
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 		undef $args;  | 
| 
1252
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 		$feature = undef;  | 
| 
1253
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 		$subfeature = '';  | 
| 
1254
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 		$simplify = 1;  | 
| 
1255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1256
 | 
18
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
49
 | 
 	unless ($file) {  | 
| 
1257
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		carp "no annotation file provided to parse!";  | 
| 
1258
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return;  | 
| 
1259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# the file format determines the parser class  | 
| 
1262
 | 
18
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
61
 | 
 	my $flavor = $self->taste_file($file) or return;  | 
| 
1263
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
41
 | 
 	my $class = 'Bio::ToolBox::parser::' . $flavor;  | 
| 
1264
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
 	eval {load $class};  | 
| 
 
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
56
 | 
    | 
| 
1265
 | 
13
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
1063
 | 
 	if ($@) {  | 
| 
1266
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		carp "unable to load $class! cannot parse $flavor!";  | 
| 
1267
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return;  | 
| 
1268
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1270
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# open parser  | 
| 
1271
 | 
13
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
116
 | 
 	my $parser = $class->new() or return;  | 
| 
1272
 | 
13
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
45
 | 
 	$parser->open_file($file) or return;  | 
| 
1273
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
 	my $typelist = $parser->typelist;  | 
| 
1274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1275
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# set feature based on the type list from the parser  | 
| 
1276
 | 
13
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
37
 | 
 	unless ($feature) {  | 
| 
1277
 | 
6
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
49
 | 
 		if ($typelist =~ /gene/i) {  | 
| 
 
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1278
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 			$feature = 'gene';  | 
| 
1279
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		elsif ($typelist eq 'region') {  | 
| 
1281
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 			$feature = 'region';  | 
| 
1282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		else {  | 
| 
1284
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 			$feature = 'rna'; # generic RNA  | 
| 
1285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1288
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# set parser parameters  | 
| 
1289
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
 	$parser->simplify($simplify);  | 
| 
1290
 | 
13
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
34
 | 
 	if ($subfeature) {  | 
| 
1291
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 		$parser->do_exon(1) if $subfeature =~ /exon/i;  | 
| 
1292
 | 
2
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
26
 | 
 		$parser->do_cds(1) if $subfeature =~ /cds/i;  | 
| 
1293
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 		$parser->do_utr(1) if $subfeature =~ /utr|untranslated/i;  | 
| 
1294
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 		$parser->do_codon(1) if $subfeature =~/codon/i;  | 
| 
1295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1296
 | 
13
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
57
 | 
 	if ($feature =~ /gene$/i) {  | 
| 
1297
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 		$parser->do_gene(1);  | 
| 
1298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else {  | 
| 
1300
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
27
 | 
 		$parser->do_gene(0);  | 
| 
1301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1302
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
23
 | 
 	my $mrna_check = 0;  | 
| 
1303
 | 
13
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
49
 | 
 	if (lc($feature) eq 'mrna' and $parser->typelist !~ /mrna/i and not $self->last_row) {  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1304
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# user requested mRNA for a new data file but it's not present in the type list  | 
| 
1305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# look for it the hard way by parsing CDS too - sigh  | 
| 
1306
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		load('Bio::ToolBox::GeneTools', 'is_coding');  | 
| 
1307
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$parser->do_cds(1);  | 
| 
1308
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$mrna_check = 1;  | 
| 
1309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# parse the table  | 
| 
1312
 | 
13
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
40
 | 
 	$parser->parse_file or return;  | 
| 
1313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# store the SeqFeature objects  | 
| 
1315
 | 
13
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
83
 | 
 	if ($self->last_row > 0) {  | 
| 
1316
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# we already have a table, presumably representing the features  | 
| 
1317
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 		my $count = 0;  | 
| 
1318
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		$self->iterate( sub {  | 
| 
1319
 | 
65
 | 
 
 | 
 
 | 
  
65
  
 | 
 
 | 
85
 | 
 			my $row = shift;  | 
| 
1320
 | 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
170
 | 
 			my $f = $parser->find_gene(  | 
| 
1321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				name  => $row->name,  | 
| 
1322
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				id    => $row->id,  | 
| 
1323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			);  | 
| 
1324
 | 
65
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
144
 | 
 			if ($f) {  | 
| 
1325
 | 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
155
 | 
 				$self->store_seqfeature($row->row_index, $f);  | 
| 
1326
 | 
65
 | 
 
 | 
 
 | 
 
 | 
 
 | 
200
 | 
 				$count++;  | 
| 
1327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1328
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
51
 | 
 		} );  | 
| 
1329
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
47
 | 
 		unless ($count == $self->last_row) {  | 
| 
1330
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			die <
 | 
| 
1331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Not all features in the input file could be matched to a corresponding SeqFeature   | 
| 
1332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 object in the annotation file $file.  | 
| 
1333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 Double check your input and annotation files. You can create a new table by just   | 
| 
1334
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 providing your annotation file.  | 
| 
1335
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 PARSEFAIL  | 
| 
1336
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else {  | 
| 
1339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# create a new table  | 
| 
1340
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
37
 | 
 		$self->add_column('Primary_ID');  | 
| 
1341
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
 		$self->add_column('Name');  | 
| 
1342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
1343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# check for chromosome exclude  | 
| 
1344
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 		my $chr_exclude;  | 
| 
1345
 | 
8
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
23
 | 
 		if ($args) {  | 
| 
1346
 | 
2
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
12
 | 
 			$chr_exclude = $args->{chrskip} || undef;  | 
| 
1347
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1348
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
1349
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# fill table with features  | 
| 
1350
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
 		while (my $f = $parser->next_top_feature) {  | 
| 
1351
 | 
120
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
206
 | 
 			if ($chr_exclude) {  | 
| 
1352
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				next if $f->seq_id =~ /$chr_exclude/i;  | 
| 
1353
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1354
 | 
120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
243
 | 
 			my $type = $f->type;  | 
| 
1355
 | 
120
 | 
  
100
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
243
 | 
 			if ($f->type =~ /$feature/i or ($mrna_check and is_coding($f)) ) {  | 
| 
 
 | 
 
 | 
 
 | 
  
 66
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1356
 | 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
261
 | 
 				my $index = $self->add_row([ $f->primary_id, $f->display_name ]);  | 
| 
1357
 | 
113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
255
 | 
 				$self->store_seqfeature($index, $f);  | 
| 
1358
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1359
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1360
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
27
 | 
 		unless ($self->last_row) {  | 
| 
1361
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			printf " Zero '%s' features found!\n Check your feature or try generic features like gene, mRNA, or transcript\n",  | 
| 
1362
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$feature;  | 
| 
1363
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1364
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
52
 | 
 		$self->database("Parsed:$file");  | 
| 
1365
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
33
 | 
 		$self->feature($feature);  | 
| 
1366
 | 
8
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 		$self->add_comment("Chromosomes excluded: $chr_exclude") if $chr_exclude;  | 
| 
1367
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
1368
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# add input parsed file metadata  | 
| 
1369
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
44
 | 
 		$self->add_file_metadata($file);  | 
| 
1370
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# but delete some stuff, just want basename  | 
| 
1371
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 		undef $self->{extension};  | 
| 
1372
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
18
 | 
 		undef $self->{filename};  | 
| 
1373
 | 
8
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 		undef $self->{path};  | 
| 
1374
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1375
 | 
13
 | 
 
 | 
 
 | 
 
 | 
 
 | 
300
 | 
 	return 1;  | 
| 
1376
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1377
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1378
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1379
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1380
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1381
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ### Column manipulation  | 
| 
1382
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1383
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub column_values {  | 
| 
1384
 | 
2
 | 
 
 | 
 
 | 
  
2
  
 | 
  
1
  
 | 
1360
 | 
 	my ($self, $column) = @_;  | 
| 
1385
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	return unless defined $column;  | 
| 
1386
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	return unless exists $self->{$column}{name};  | 
| 
1387
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 	my @values = map {$self->value($_, $column)} (0 .. $self->last_row);  | 
| 
 
 | 
159
 | 
 
 | 
 
 | 
 
 | 
 
 | 
240
 | 
    | 
| 
1388
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
24
 | 
 	return wantarray ? @values : \@values;  | 
| 
1389
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1390
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1391
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub add_column {  | 
| 
1392
 | 
28
 | 
 
 | 
 
 | 
  
28
  
 | 
  
1
  
 | 
71
 | 
 	my ($self, $name) = @_;  | 
| 
1393
 | 
28
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
59
 | 
 	return unless $name;  | 
| 
1394
 | 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
48
 | 
 	my $column = $self->{number_columns};  | 
| 
1395
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# check for array of column data  | 
| 
1397
 | 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
 	my $name_ref = ref $name;  | 
| 
1398
 | 
28
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
94
 | 
 	if ($name_ref eq 'ARRAY') {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1399
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
16
 | 
 		if ($self->last_row > 1) {  | 
| 
1400
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# table has existing data beyond column headers  | 
| 
1401
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 			if ($self->last_row == (scalar @$name - 1)) {  | 
| 
1402
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# same number of elements, add it the table  | 
| 
1403
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 				$self->{$column} = {  | 
| 
1404
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					'name'  => $name->[0],  | 
| 
1405
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					'index' => $column,      | 
| 
1406
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				};  | 
| 
1407
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 				for (my $r = 0; $r <= $self->last_row; $r++) {  | 
| 
1408
 | 
158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
368
 | 
 					$self->{data_table}->[$r][$column] = $name->[$r];  | 
| 
1409
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
1410
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1411
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			else {  | 
| 
1412
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# different number of elements  | 
| 
1413
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				cluck "array has different number of elements than Data table!\n";   | 
| 
1414
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				return;  | 
| 
1415
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1416
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1417
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		else {  | 
| 
1418
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# table has no existing data  | 
| 
1419
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$self->{$column} = {  | 
| 
1420
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				'name'  => $name->[0],  | 
| 
1421
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				'index' => $column,      | 
| 
1422
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			};  | 
| 
1423
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			for (my $i = 0; $i < scalar @$name; $i++) {  | 
| 
1424
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$self->value($i, $column, $name->[$i]);  | 
| 
1425
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1426
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$self->{last_row} = scalar @$name - 1;  | 
| 
1427
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$self->{headers} = 1; # boolean to indicate the table now has headers  | 
| 
1428
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1429
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1430
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($name_ref eq 'Bio::DB::GFF::Typename') {  | 
| 
1431
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# a Typename object that was selected from a SeqFeature::Store database  | 
| 
1432
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$self->{$column} = {  | 
| 
1433
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			'name'      => $name->asString,  | 
| 
1434
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			'index'     => $column,  | 
| 
1435
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		};  | 
| 
1436
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$self->{data_table}->[0][$column] = $name->asString;  | 
| 
1437
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1438
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($name_ref eq '') {  | 
| 
1439
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# just a name  | 
| 
1440
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
98
 | 
 		$self->{$column} = {  | 
| 
1441
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			'name'      => $name,  | 
| 
1442
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			'index'     => $column,  | 
| 
1443
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		};  | 
| 
1444
 | 
26
 | 
 
 | 
 
 | 
 
 | 
 
 | 
75
 | 
 		$self->{data_table}->[0][$column] = $name;  | 
| 
1445
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1446
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else {  | 
| 
1447
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		cluck "unrecognized reference '$name_ref'! pass a scalar value or array reference";  | 
| 
1448
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return;  | 
| 
1449
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1450
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1451
 | 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
46
 | 
 	$self->{number_columns}++;  | 
| 
1452
 | 
28
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
59
 | 
 	delete $self->{column_indices} if exists $self->{column_indices};  | 
| 
1453
 | 
28
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
70
 | 
 	if ($self->gff or $self->bed or $self->ucsc or $self->vcf) {  | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1454
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# check if we maintain integrity, at least insofar what we test  | 
| 
1455
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$self->verify(1); # silence so user doesn't get these messages  | 
| 
1456
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1457
 | 
28
 | 
 
 | 
 
 | 
 
 | 
 
 | 
59
 | 
 	return $column;  | 
| 
1458
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1459
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1460
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub copy_column {  | 
| 
1461
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
4
 | 
 	my $self = shift;  | 
| 
1462
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $index = shift;  | 
| 
1463
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	return unless defined $index;  | 
| 
1464
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	my $data = $self->column_values($index);  | 
| 
1465
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	my $new_index = $self->add_column($data);  | 
| 
1466
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 	$self->copy_metadata($index, $new_index);  | 
| 
1467
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	return $new_index;  | 
| 
1468
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1469
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1470
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1471
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1472
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ### Rows and Data access  | 
| 
1473
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1474
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub add_row {  | 
| 
1475
 | 
231
 | 
 
 | 
 
 | 
  
231
  
 | 
  
1
  
 | 
331
 | 
 	my $self = shift;  | 
| 
1476
 | 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
310
 | 
 	my @row_data;  | 
| 
1477
 | 
231
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
918
 | 
 	if ($_[0] and ref $_[0] eq 'ARRAY') {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1478
 | 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
270
 | 
 		@row_data = @{ $_[0] };  | 
| 
 
 | 
191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
565
 | 
    | 
| 
1479
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1480
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($_[0] and ref $_[0] eq 'Bio::ToolBox::Data::Feature') {  | 
| 
1481
 | 
40
 | 
 
 | 
 
 | 
 
 | 
 
 | 
441
 | 
 		@row_data = $_[0]->row_values;  | 
| 
1482
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1483
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($_[0] and $_[0] =~ /\t/) {  | 
| 
1484
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		@row_data = split /\t/, $_[0];  | 
| 
1485
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1486
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else {  | 
| 
1487
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		@row_data = map {'.'} (1 .. $self->{number_columns});  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1488
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1489
 | 
231
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
546
 | 
 	if (scalar @row_data > $self->{number_columns}) {  | 
| 
1490
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		cluck "row added has more elements than table columns! truncating row elements\n";   | 
| 
1491
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		splice @row_data, 0, $self->{number_columns};  | 
| 
1492
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1493
 | 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
520
 | 
 	until (scalar @row_data == $self->{number_columns}) {  | 
| 
1494
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		push @row_data, '.';  | 
| 
1495
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1496
 | 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
451
 | 
 	my $row_index = $self->{last_row} + 1;  | 
| 
1497
 | 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
443
 | 
 	$self->{data_table}->[$row_index] = \@row_data;  | 
| 
1498
 | 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
332
 | 
 	$self->{last_row}++;  | 
| 
1499
 | 
231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
631
 | 
 	return $row_index;  | 
| 
1500
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1501
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1502
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub get_row {  | 
| 
1503
 | 
5
 | 
 
 | 
 
 | 
  
5
  
 | 
  
1
  
 | 
692
 | 
 	my ($self, $row_number) = @_;  | 
| 
1504
 | 
5
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
23
 | 
 	return unless defined $self->{data_table}->[$row_number];  | 
| 
1505
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 	my @options = (  | 
| 
1506
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		'data'      => $self,  | 
| 
1507
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		'index'     => $row_number,  | 
| 
1508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	);  | 
| 
1509
 | 
5
 | 
  
100
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
30
 | 
 	if (  | 
| 
1510
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		exists $self->{SeqFeatureObjects} and  | 
| 
1511
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		defined $self->{SeqFeatureObjects}->[$row_number]   | 
| 
1512
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	) {  | 
| 
1513
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 		push @options, 'feature', $self->{SeqFeatureObjects}->[$row_number];  | 
| 
1514
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1515
 | 
5
 | 
 
 | 
 
 | 
 
 | 
 
 | 
47
 | 
 	return Bio::ToolBox::Data::Feature->new(@options);	  | 
| 
1516
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1517
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1518
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub delete_row {  | 
| 
1519
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
4
 | 
 	my $self = shift;  | 
| 
1520
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	my @deleted = sort {$b <=> $a} @_;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1521
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	while (@deleted) {  | 
| 
1522
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		my $d = shift @deleted;  | 
| 
1523
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 		splice( @{ $self->{data_table} }, $d, 1);  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
1524
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 		$self->{last_row}--;  | 
| 
1525
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 		if (exists $self->{SeqFeatureObjects}) {  | 
| 
1526
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			splice( @{ $self->{SeqFeatureObjects} }, $d, 1);  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1527
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1528
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1529
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	return 1;  | 
| 
1530
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1531
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1532
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub row_values {  | 
| 
1533
 | 
237
 | 
 
 | 
 
 | 
  
237
  
 | 
  
1
  
 | 
371
 | 
 	my ($self, $row)  = @_;  | 
| 
1534
 | 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
327
 | 
 	my @data = @{ $self->{data_table}->[$row] };  | 
| 
 
 | 
237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
602
 | 
    | 
| 
1535
 | 
237
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
811
 | 
 	return wantarray ? @data : \@data;  | 
| 
1536
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1537
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1538
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub value {  | 
| 
1539
 | 
508
 | 
 
 | 
 
 | 
  
508
  
 | 
  
1
  
 | 
1978
 | 
 	my ($self, $row, $column, $value) = @_;  | 
| 
1540
 | 
508
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
1518
 | 
 	return unless (defined $row and defined $column);  | 
| 
1541
 | 
508
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
808
 | 
 	if (defined $value) {  | 
| 
1542
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$self->{data_table}->[$row][$column] = $value;  | 
| 
1543
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1544
 | 
508
 | 
 
 | 
 
 | 
 
 | 
 
 | 
1162
 | 
 	return $self->{data_table}->[$row][$column];  | 
| 
1545
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1546
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1547
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub row_stream {  | 
| 
1548
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
  
1
  
 | 
2430
 | 
 	my $self = shift;  | 
| 
1549
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
71
 | 
 	return Bio::ToolBox::Data::Iterator->new($self);  | 
| 
1550
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1551
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1552
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub iterate {  | 
| 
1553
 | 
6
 | 
 
 | 
 
 | 
  
6
  
 | 
  
1
  
 | 
654
 | 
 	my $self = shift;  | 
| 
1554
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
14
 | 
 	my $code = shift;  | 
| 
1555
 | 
6
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
25
 | 
 	unless (ref $code eq 'CODE') {  | 
| 
1556
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		confess "iterate_function() method requires a code reference!";  | 
| 
1557
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1558
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
20
 | 
 	my $stream = $self->row_stream;  | 
| 
1559
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
22
 | 
 	while (my $row = $stream->next_row) {  | 
| 
1560
 | 
143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
293
 | 
 		&$code($row);  | 
| 
1561
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1562
 | 
6
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
 	return 1;  | 
| 
1563
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1564
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1565
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1566
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 #### Stored SeqFeature manipulation  | 
| 
1567
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1568
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub store_seqfeature {  | 
| 
1569
 | 
178
 | 
 
 | 
 
 | 
  
178
  
 | 
  
1
  
 | 
300
 | 
 	my ($self, $row_i, $seqfeature) = @_;  | 
| 
1570
 | 
178
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
614
 | 
 	unless (defined $row_i and ref($seqfeature)) {  | 
| 
1571
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		confess "must provide a row index and SeqFeature object!";  | 
| 
1572
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1573
 | 
178
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
386
 | 
 	confess "invalid row index" unless ($row_i <= $self->last_row);  | 
| 
1574
 | 
178
 | 
 
 | 
  
100
  
 | 
 
 | 
 
 | 
364
 | 
 	$self->{SeqFeatureObjects} ||= [];  | 
| 
1575
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
348
 | 
 	$self->{SeqFeatureObjects}->[$row_i] = $seqfeature;  | 
| 
1576
 | 
178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
383
 | 
 	return 1;  | 
| 
1577
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1578
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1579
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub delete_seqfeature {  | 
| 
1580
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 	my ($self, $row_i) = @_;  | 
| 
1581
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	confess "invalid row index" unless ($row_i <= $self->last_row);  | 
| 
1582
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return unless $self->{SeqFeatureObjects};  | 
| 
1583
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	undef $self->{SeqFeatureObjects}->[$row_i];  | 
| 
1584
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1585
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1586
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub collapse_gene_transcripts {  | 
| 
1587
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 	my $self = shift;  | 
| 
1588
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	unless ($self->feature_type eq 'named') {  | 
| 
1589
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		carp "Table does not contain named features!";  | 
| 
1590
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return;  | 
| 
1591
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1592
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1593
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# load module  | 
| 
1594
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $class = "Bio::ToolBox::GeneTools";  | 
| 
1595
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	eval {load $class, qw(collapse_transcripts)};  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1596
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	if ($@) {  | 
| 
1597
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		carp "unable to load $class! cannot collapse transcripts!";  | 
| 
1598
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return;  | 
| 
1599
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1600
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1601
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# collapse the transcripts  | 
| 
1602
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $success = 0;  | 
| 
1603
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	if (exists $self->{SeqFeatureObjects}) {  | 
| 
1604
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# we should have stored SeqFeature objects, probably from parsed table  | 
| 
1605
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		for (my $i = 1; $i <= $self->last_row; $i++) {  | 
| 
1606
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $feature = $self->{SeqFeatureObjects}->[$i] or next;  | 
| 
1607
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $collSeqFeat = collapse_transcripts($feature) or next;  | 
| 
1608
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$success += $self->store_seqfeature($i, $collSeqFeat);  | 
| 
1609
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1610
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1611
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else {  | 
| 
1612
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# no stored SeqFeature objects, probably names pointing to a database  | 
| 
1613
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# we will have to fetch the feature from a database  | 
| 
1614
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $db = $self->open_meta_database(1) or  # force open a new db connection  | 
| 
1615
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			confess "No SeqFeature objects stored and no database connection!";  | 
| 
1616
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $name_i = $self->name_column;  | 
| 
1617
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $id_i = $self->id_column;  | 
| 
1618
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $type_i = $self->type_column;  | 
| 
1619
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		for (my $i = 1; $i <= $self->last_row; $i++) {  | 
| 
1620
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 			my $feature = get_db_feature(  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1621
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				db    => $db,  | 
| 
1622
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				name  => $self->value($i, $name_i) || undef,  | 
| 
1623
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				type  => $self->value($i, $type_i) || undef,  | 
| 
1624
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				id    => $self->value($i, $id_i) || undef,  | 
| 
1625
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			) or next;  | 
| 
1626
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $collSeqFeat = collapse_transcripts($feature) or next;  | 
| 
1627
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$success += $self->store_seqfeature($i, $collSeqFeat);  | 
| 
1628
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1629
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1630
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1631
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return $success;  | 
| 
1632
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1633
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1634
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub add_transcript_length {  | 
| 
1635
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 	my $self = shift;  | 
| 
1636
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 	my $subfeature = shift || 'exon';  | 
| 
1637
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	unless (exists $self->{SeqFeatureObjects}) {  | 
| 
1638
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		carp "no SeqFeature objects stored for collapsing!";  | 
| 
1639
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return;  | 
| 
1640
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1641
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1642
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# load module  | 
| 
1643
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	eval {load "Bio::ToolBox::GeneTools", qw(get_transcript_length get_transcript_cds_length  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1644
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		get_transcript_5p_utr_length get_transcript_3p_utr_length)};  | 
| 
1645
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	if ($@) {  | 
| 
1646
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		carp "unable to load Bio::ToolBox::GeneTools! cannot collapse transcripts!";  | 
| 
1647
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return;  | 
| 
1648
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1649
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1650
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# determine name and calculation method  | 
| 
1651
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $length_calculator;  | 
| 
1652
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my $length_name;  | 
| 
1653
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	if ($subfeature eq 'exon') {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1654
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$length_calculator = \&get_transcript_length;  | 
| 
1655
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$length_name= 'Merged_Transcript_Length';  | 
| 
1656
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1657
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($subfeature eq 'cds') {  | 
| 
1658
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$length_calculator = \&get_transcript_cds_length;  | 
| 
1659
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$length_name= 'Transcript_CDS_Length';  | 
| 
1660
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1661
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($subfeature eq '5p_utr') {  | 
| 
1662
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$length_calculator = \&get_transcript_5p_utr_length;  | 
| 
1663
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$length_name= 'Transcript_5p_UTR_Length';  | 
| 
1664
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1665
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($subfeature eq '3p_utr') {  | 
| 
1666
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$length_calculator = \&get_transcript_3p_utr_length;  | 
| 
1667
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$length_name= 'Transcript_3p_UTR_Length';  | 
| 
1668
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1669
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else {  | 
| 
1670
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		carp "unrecognized subfeature type '$subfeature'! No length calculated";  | 
| 
1671
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return;  | 
| 
1672
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1673
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1674
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# add new column and calculate  | 
| 
1675
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $length_i = $self->add_column($length_name);  | 
| 
1676
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	if (exists $self->{SeqFeatureObjects}) {  | 
| 
1677
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# we should have stored SeqFeature objects, probably from parsed table  | 
| 
1678
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		for (my $i = 1; $i <= $self->last_row; $i++) {  | 
| 
1679
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $feature = $self->{SeqFeatureObjects}->[$i] or next;  | 
| 
1680
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $length = &$length_calculator($feature);  | 
| 
1681
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 			$self->{data_table}->[$i][$length_i] = $length || $feature->length;  | 
| 
1682
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1683
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1684
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else {  | 
| 
1685
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# no stored SeqFeature objects, probably names pointing to a database  | 
| 
1686
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# we will have to fetch the feature from a database  | 
| 
1687
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $db = $self->open_meta_database(1) or  # force open a new db connection  | 
| 
1688
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			confess "No SeqFeature objects stored and no database connection!";  | 
| 
1689
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $name_i = $self->name_column;  | 
| 
1690
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $id_i = $self->id_column;  | 
| 
1691
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $type_i = $self->type_column;  | 
| 
1692
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		for (my $i = 1; $i <= $self->last_row; $i++) {  | 
| 
1693
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 			my $feature = get_db_feature(  | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
 
 | 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1694
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				db    => $db,  | 
| 
1695
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				name  => $self->value($i, $name_i) || undef,  | 
| 
1696
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				type  => $self->value($i, $type_i) || undef,  | 
| 
1697
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				id    => $self->value($i, $id_i) || undef,  | 
| 
1698
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			) or next;  | 
| 
1699
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $length = &$length_calculator($feature);  | 
| 
1700
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 			$self->{data_table}->[$i][$length_i] = $length || $feature->length;  | 
| 
1701
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$self->store_seqfeature($i, $feature);  | 
| 
1702
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# to store or not to store? May explode memory, but could save from   | 
| 
1703
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# expensive db calls later  | 
| 
1704
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1705
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1706
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1707
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return $length_i;  | 
| 
1708
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1709
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1710
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1711
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1712
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ### Data structure manipulation  | 
| 
1713
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1714
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub sort_data {  | 
| 
1715
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
1860
 | 
 	my $self = shift;  | 
| 
1716
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $index = shift;  | 
| 
1717
 | 
1
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
5
 | 
 	my $direction = shift || 'i';  | 
| 
1718
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1719
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# confirm options  | 
| 
1720
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	return unless exists $self->{$index}{name};  | 
| 
1721
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	unless ($direction =~ /^[id]/i) {  | 
| 
1722
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		carp "unrecognized sort order '$direction'! Must be i or d";  | 
| 
1723
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return;  | 
| 
1724
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1725
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1726
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Sample the dataset values  | 
| 
1727
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# this will be used to guess the sort method, below  | 
| 
1728
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $example; # an example of the dataset  | 
| 
1729
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	foreach (my $i = 1; $i <= $self->last_row; $i++) {  | 
| 
1730
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# we want to avoid null values, so keep trying  | 
| 
1731
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# null being . or any variation of N/A, NaN, inf  | 
| 
1732
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		my $v = $self->value($i, $index);  | 
| 
1733
 | 
1
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
10
 | 
 		if (defined $v and $v !~ /^(?:\.|n\/?a|nan|\-?inf)$/i) {  | 
| 
1734
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# a non-null value, take it  | 
| 
1735
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 			$example = $v;  | 
| 
1736
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 			last;  | 
| 
1737
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}   | 
| 
1738
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1739
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1740
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Determine sort method, either numerical or alphabetical  | 
| 
1741
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	my $sortmethod;   | 
| 
1742
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	if ($example =~ /[a-z]/i) {   | 
| 
 
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1743
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# there are detectable letters  | 
| 
1744
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 		$sortmethod = 'ascii';  | 
| 
1745
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}   | 
| 
1746
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($example =~ /^\-?\d+\.?\d*$/) {  | 
| 
1747
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# there are only digits, allowing for minus sign and a decimal point  | 
| 
1748
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# I don't think this allows for exponents, though  | 
| 
1749
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$sortmethod = 'numeric';  | 
| 
1750
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}   | 
| 
1751
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else {   | 
| 
1752
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# unable to determine (probably alphanumeric), sort asciibetical  | 
| 
1753
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$sortmethod = 'ascii';  | 
| 
1754
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1755
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1756
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Re-order the datasets  | 
| 
1757
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Directly sorting the @data array is proving difficult. It keeps giving me  | 
| 
1758
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# a segmentation fault. So I'm using a different approach by copying the   | 
| 
1759
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# @data_table into a temporary hash.  | 
| 
1760
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# put data_table array into a temporary hash  | 
| 
1761
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# the hash key will the be dataset value,   | 
| 
1762
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# the hash value will be the reference the row data  | 
| 
1763
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	my %datahash;  | 
| 
1764
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1765
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# reorder numerically  | 
| 
1766
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 	if ($sortmethod eq 'numeric') {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1767
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		for my $row (1..$self->last_row) {  | 
| 
1768
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $value = $self->value($row, $index);   | 
| 
1769
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			  | 
| 
1770
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# check to see whether this value exists or not  | 
| 
1771
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			while (exists $datahash{$value}) {  | 
| 
1772
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# add a really small number to bump it up and make it unique  | 
| 
1773
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# this, of course, presumes that none of the dataset values  | 
| 
1774
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# are really this small - this may be an entirely bad   | 
| 
1775
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# assumption!!!!! I suppose we could somehow calculate an   | 
| 
1776
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# appropriate value.... nah.  | 
| 
1777
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# don't worry, we're only modifying the value used for sorting,  | 
| 
1778
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# not the actual value  | 
| 
1779
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$value += 0.00000001;   | 
| 
1780
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1781
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# store the row data reference  | 
| 
1782
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$datahash{$value} = $self->{data_table}->[$row];   | 
| 
1783
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1784
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
1785
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# re-fill the array based on the sort direction  | 
| 
1786
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		if ($direction =~ /^i/i) {   | 
| 
1787
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# increasing sort  | 
| 
1788
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $i = 1; # keep track of the row, skip the header  | 
| 
1789
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			foreach (sort {$a <=> $b} keys %datahash) {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1790
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# put back the reference to the anonymous array of row data  | 
| 
1791
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$self->{data_table}->[$i] = $datahash{$_};  | 
| 
1792
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$i++; # increment for next row  | 
| 
1793
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1794
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}   | 
| 
1795
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		else {   | 
| 
1796
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# decreasing sort  | 
| 
1797
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $i = 1; # keep track of the row, skip the header  | 
| 
1798
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			foreach (sort {$b <=> $a} keys %datahash) {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1799
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# put back the reference to the anonymous array of row data  | 
| 
1800
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$self->{data_table}->[$i] = $datahash{$_};  | 
| 
1801
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$i++; # increment for next row  | 
| 
1802
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1803
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1804
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
1805
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# summary prompt  | 
| 
1806
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		printf " Data table sorted numerically by the contents of %s\n",  | 
| 
1807
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$self->name($index);  | 
| 
1808
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}   | 
| 
1809
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1810
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# reorder asciibetically  | 
| 
1811
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($sortmethod eq 'ascii') {  | 
| 
1812
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		for my $row (1..$self->last_row) {  | 
| 
1813
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# get the value to sort by  | 
| 
1814
 | 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
126
 | 
 			my $value = $self->value($row, $index);   | 
| 
1815
 | 
78
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
127
 | 
 			if (exists $datahash{$value}) {   | 
| 
1816
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# not unique  | 
| 
1817
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				my $n = 1;  | 
| 
1818
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				my $lookup = $value . sprintf("03%d", $n);  | 
| 
1819
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# we'll try to make a unique value by appending   | 
| 
1820
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# a number to the original value  | 
| 
1821
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				while (exists $datahash{$lookup}) {  | 
| 
1822
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 					# keep bumping up the number till it's unique  | 
| 
1823
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 					$n++;  | 
| 
1824
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 					$lookup = $value . sprintf("03%d", $n);  | 
| 
1825
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
1826
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$datahash{$lookup} = $self->{data_table}->[$row];  | 
| 
1827
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}   | 
| 
1828
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			else {  | 
| 
1829
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# unique  | 
| 
1830
 | 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
218
 | 
 				$datahash{$value} = $self->{data_table}->[$row];  | 
| 
1831
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1832
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1833
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
1834
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# re-fill the array based on the sort direction  | 
| 
1835
 | 
1
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
12
 | 
 		if ($direction eq 'i' or $direction eq 'I') {   | 
| 
 
 | 
 
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1836
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# increasing  | 
| 
1837
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $i = 1; # keep track of the row  | 
| 
1838
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			foreach (sort {$a cmp $b} keys %datahash) {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1839
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# put back the reference to the anonymous array of row data  | 
| 
1840
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$self->{data_table}->[$i] = $datahash{$_};  | 
| 
1841
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$i++; # increment for next row  | 
| 
1842
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1843
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}   | 
| 
1844
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		elsif ($direction eq 'd' or $direction eq 'D') {   | 
| 
1845
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# decreasing  | 
| 
1846
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 			my $i = 1; # keep track of the row  | 
| 
1847
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
 			foreach (sort {$b cmp $a} keys %datahash) {  | 
| 
 
 | 
392
 | 
 
 | 
 
 | 
 
 | 
 
 | 
499
 | 
    | 
| 
1848
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# put back the reference to the anonymous array of row data  | 
| 
1849
 | 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
109
 | 
 				$self->{data_table}->[$i] = $datahash{$_};  | 
| 
1850
 | 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
93
 | 
 				$i++; # increment for next row  | 
| 
1851
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1852
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1853
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
1854
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# summary prompt  | 
| 
1855
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 		printf " Data table sorted asciibetically by the contents of '%s'\n",  | 
| 
1856
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$self->name($index);  | 
| 
1857
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1858
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1859
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
17
 | 
 	return 1;  | 
| 
1860
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1861
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1862
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub gsort_data {  | 
| 
1863
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
4
 | 
 	my $self = shift;  | 
| 
1864
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1865
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# identify indices  | 
| 
1866
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	unless ($self->feature_type eq 'coordinate') {  | 
| 
1867
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		carp "no chromosome and start/position columns to sort!\n";  | 
| 
1868
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return;  | 
| 
1869
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1870
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	my $chromo_i = $self->chromo_column;  | 
| 
1871
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	my $start_i  = $self->start_column;  | 
| 
1872
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1873
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Load the data into a temporary hash  | 
| 
1874
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# The datalines will be put into a hash of hashes: The first key will be   | 
| 
1875
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# the chromosome name, the second hash will be the start value.  | 
| 
1876
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	#   | 
| 
1877
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# To deal with some chromosomes that don't have numbers (e.g. chrM), we'll  | 
| 
1878
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# use two separate hashes: one is for numbers, the other for strings  | 
| 
1879
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# when it comes time to sort, we'll put the numbers first, then strings  | 
| 
1880
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1881
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my %num_datahash;  | 
| 
1882
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my %str_datahash;  | 
| 
1883
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	for my $row (1 .. $self->last_row) {   | 
| 
1884
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
1885
 | 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
137
 | 
 		my $startvalue = $self->value($row, $start_i);  | 
| 
1886
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
1887
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# put the dataline into the appropriate temporary hash  | 
| 
1888
 | 
78
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
134
 | 
 		if ($self->value($row, $chromo_i) =~ /^(?:chr)?(\d+)$/) {  | 
| 
1889
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# dealing with a numeric chromosome name  | 
| 
1890
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# restricting to either chr2 or just 2 but not 2-micron  | 
| 
1891
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $chromovalue = $1;  | 
| 
1892
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			while (exists $num_datahash{$chromovalue}{$startvalue}) {   | 
| 
1893
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# if another item already exists at this location  | 
| 
1894
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# add a really small number to bump it up and make it unique  | 
| 
1895
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$startvalue += 0.001;   | 
| 
1896
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1897
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$num_datahash{$chromovalue}{$startvalue} = $self->{data_table}->[$row];  | 
| 
1898
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}   | 
| 
1899
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		else {  | 
| 
1900
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# dealing with a non-numeric chromosome name  | 
| 
1901
 | 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
125
 | 
 			my $chromovalue = $self->value($row, $chromo_i);  | 
| 
1902
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# use the entire chromosome name as key  | 
| 
1903
 | 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
164
 | 
 			while (exists $str_datahash{$chromovalue}{$startvalue}) {   | 
| 
1904
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# if another item already exists at this location  | 
| 
1905
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# add a really small number to bump it up and make it unique  | 
| 
1906
 | 
36
 | 
 
 | 
 
 | 
 
 | 
 
 | 
174
 | 
 				$startvalue += 0.001;   | 
| 
1907
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
1908
 | 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
281
 | 
 			$str_datahash{$chromovalue}{$startvalue} = $self->{data_table}->[$row];  | 
| 
1909
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1910
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1911
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1912
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1913
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Now re-load the data array with sorted data  | 
| 
1914
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# put the numeric chromosome data back first  | 
| 
1915
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $i = 1; # keep track of the row  | 
| 
1916
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	foreach my $chromovalue (sort {$a <=> $b} keys %num_datahash) {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1917
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# first, numeric sort on increasing chromosome number  | 
| 
1918
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		foreach my $startvalue (  | 
| 
1919
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			sort {$a <=> $b} keys %{ $num_datahash{$chromovalue} }   | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1920
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		) {  | 
| 
1921
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# second, numeric sort on increasing position value  | 
| 
1922
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$self->{data_table}->[$i] = $num_datahash{$chromovalue}{$startvalue};  | 
| 
1923
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$i++; # increment for next row  | 
| 
1924
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1925
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1926
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# next put the string chromosome data back  | 
| 
1927
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	foreach my $chromovalue (sort {$a cmp $b} keys %str_datahash) {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1928
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# first, ascii sort on increasing chromosome name  | 
| 
1929
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 		foreach my $startvalue (  | 
| 
1930
 | 
396
 | 
 
 | 
 
 | 
 
 | 
 
 | 
504
 | 
 			sort {$a <=> $b} keys %{ $str_datahash{$chromovalue} }   | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
    | 
| 
1931
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		) {  | 
| 
1932
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# second, numeric sort on increasing position value  | 
| 
1933
 | 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
128
 | 
 			$self->{data_table}->[$i] = $str_datahash{$chromovalue}{$startvalue};  | 
| 
1934
 | 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
101
 | 
 			$i++; # increment for next row  | 
| 
1935
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1936
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1937
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1938
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
13
 | 
 	return 1;  | 
| 
1939
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
1940
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1941
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub splice_data {  | 
| 
1942
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
4
 | 
 	my ($self, $part, $total_parts) = @_;  | 
| 
1943
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1944
 | 
1
 | 
  
 50
  
 | 
  
 33
  
 | 
 
 | 
 
 | 
10
 | 
 	unless ($part and $total_parts) {  | 
| 
1945
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		confess "ordinal part and total number of parts not passed\n";  | 
| 
1946
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1947
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	my $part_length = int($self->last_row / $total_parts);  | 
| 
1948
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1949
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# check for SeqFeatureObjects array  | 
| 
1950
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	if (exists $self->{SeqFeatureObjects}) {  | 
| 
1951
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# it needs to be the same length as the data table, it should be  | 
| 
1952
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		while (scalar @{$self->{SeqFeatureObjects}} < scalar @{$self->{data_table}}) {  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1953
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			push @{$self->{SeqFeatureObjects}}, undef;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1954
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1955
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1956
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
1957
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# splicing based on which part we do   | 
| 
1958
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
6
 | 
 	if ($part == 1) {  | 
| 
 
 | 
 
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
1959
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# remove all but the first part  | 
| 
1960
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		splice(   | 
| 
1961
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			@{$self->{'data_table'}},   | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1962
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$part_length + 1   | 
| 
1963
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		);  | 
| 
1964
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		if (exists $self->{SeqFeatureObjects}) {  | 
| 
1965
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			splice(   | 
| 
1966
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				@{$self->{SeqFeatureObjects}},   | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1967
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$part_length + 1   | 
| 
1968
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			);  | 
| 
1969
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1970
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1971
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($part == $total_parts) {  | 
| 
1972
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# remove all but the last part  | 
| 
1973
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		splice(   | 
| 
1974
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 			@{$self->{'data_table'}},   | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
1975
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			1,  | 
| 
1976
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$part_length * ($total_parts - 1)   | 
| 
1977
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		);  | 
| 
1978
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 		if (exists $self->{SeqFeatureObjects}) {  | 
| 
1979
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			splice(   | 
| 
1980
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				@{$self->{SeqFeatureObjects}},   | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1981
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				1,  | 
| 
1982
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$part_length * ($total_parts - 1)   | 
| 
1983
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			);  | 
| 
1984
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
1985
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
1986
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	else {  | 
| 
1987
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# splicing in the middle requires two rounds  | 
| 
1988
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
1989
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# remove the last parts  | 
| 
1990
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		splice(   | 
| 
1991
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			@{$self->{'data_table'}},   | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1992
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			($part * $part_length) + 1  | 
| 
1993
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		);  | 
| 
1994
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
1995
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# remove the first parts  | 
| 
1996
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		splice(   | 
| 
1997
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			@{$self->{'data_table'}},   | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
1998
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			1,  | 
| 
1999
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$part_length * ($part - 1)   | 
| 
2000
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		);  | 
| 
2001
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
2002
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		if (exists $self->{SeqFeatureObjects}) {  | 
| 
2003
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			splice(   | 
| 
2004
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				@{$self->{SeqFeatureObjects}},   | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
2005
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				($part * $part_length) + 1  | 
| 
2006
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			);  | 
| 
2007
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			splice(   | 
| 
2008
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				@{$self->{SeqFeatureObjects}},   | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
2009
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				1,  | 
| 
2010
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$part_length * ($part - 1)   | 
| 
2011
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			);  | 
| 
2012
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
2013
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2014
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2015
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# update last row metadata  | 
| 
2016
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
2
 | 
 	$self->{'last_row'} = scalar(@{$self->{'data_table'}}) - 1;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
    | 
| 
2017
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2018
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# re-open a new un-cached database connection  | 
| 
2019
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	if (exists $self->{db_connection}) {  | 
| 
2020
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		delete $self->{db_connection};  | 
| 
2021
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2022
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	return 1;  | 
| 
2023
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2024
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2025
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2026
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2027
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ### File functions  | 
| 
2028
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2029
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub reload_children {  | 
| 
2030
 | 
1
 | 
 
 | 
 
 | 
  
1
  
 | 
  
1
  
 | 
3
 | 
 	my $self = shift;  | 
| 
2031
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	my @files = @_;  | 
| 
2032
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	return unless @files;  | 
| 
2033
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2034
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# prepare Stream  | 
| 
2035
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	my $class = "Bio::ToolBox::Data::Stream";  | 
| 
2036
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	eval {load $class};  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
6
 | 
    | 
| 
2037
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
98
 | 
 	if ($@) {  | 
| 
2038
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		carp "unable to load $class! can't reload children!";  | 
| 
2039
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return;  | 
| 
2040
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2041
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2042
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# open first stream  | 
| 
2043
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	my $first = shift @files;  | 
| 
2044
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
9
 | 
 	my $Stream = $class->new(in => $first);  | 
| 
2045
 | 
1
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	unless ($Stream) {  | 
| 
2046
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		carp "unable to load first child file $first";  | 
| 
2047
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return;  | 
| 
2048
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2049
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2050
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# destroy current data table and metadata  | 
| 
2051
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	$self->{data_table} = [];  | 
| 
2052
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	for (my $i = 0; $i < $self->number_columns; $i++) {  | 
| 
2053
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		delete $self->{$i};  | 
| 
2054
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2055
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2056
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# copy the metadata  | 
| 
2057
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	foreach (qw(program feature db bed gff ucsc headers number_columns last_row)) {  | 
| 
2058
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# various keys  | 
| 
2059
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
19
 | 
 		$self->{$_} = $Stream->{$_};  | 
| 
2060
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2061
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
 	for (my $i = 0; $i < $Stream->number_columns; $i++) {  | 
| 
2062
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# column metadata  | 
| 
2063
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
 		my %md = $Stream->metadata($i);  | 
| 
2064
 | 
4
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
 		$self->{$i} = \%md;  | 
| 
2065
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2066
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
4
 | 
 	my @comments = $Stream->comments;  | 
| 
2067
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	push @{$self->{other}}, @comments;  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
5
 | 
    | 
| 
2068
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2069
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# first row column headers  | 
| 
2070
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
3
 | 
 	$self->{data_table}->[0] = [ @{ $Stream->{data_table}->[0] } ];  | 
| 
 
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
15
 | 
    | 
| 
2071
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2072
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# load the data  | 
| 
2073
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 	while (my $row = $Stream->next_row) {  | 
| 
2074
 | 
39
 | 
 
 | 
 
 | 
 
 | 
 
 | 
81
 | 
 		$self->add_row($row);  | 
| 
2075
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2076
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
45
 | 
 	$Stream->close_fh;  | 
| 
2077
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2078
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# load remaining files  | 
| 
2079
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
24
 | 
 	foreach my $file (@files) {  | 
| 
2080
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
28
 | 
 		my $Stream = $class->new(in => $file);  | 
| 
2081
 | 
2
 | 
  
 50
  
 | 
 
 | 
 
 | 
 
 | 
8
 | 
 		if ($Stream->number_columns != $self->number_columns) {  | 
| 
2082
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			confess "fatal error: child file $file has a different number of columns!";  | 
| 
2083
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
2084
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
7
 | 
 		while (my $row = $Stream->next_row) {  | 
| 
2085
 | 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
158
 | 
 			my $a = $row->row_values;  | 
| 
2086
 | 
78
 | 
 
 | 
 
 | 
 
 | 
 
 | 
164
 | 
 			$self->add_row($a);  | 
| 
2087
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
2088
 | 
2
 | 
 
 | 
 
 | 
 
 | 
 
 | 
75
 | 
 		$Stream->close_fh;  | 
| 
2089
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2090
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
25
 | 
 	$self->verify;  | 
| 
2091
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2092
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# clean up  | 
| 
2093
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
224
 | 
 	unlink($first, @files);  | 
| 
2094
 | 
1
 | 
 
 | 
 
 | 
 
 | 
 
 | 
10
 | 
 	return $self->last_row;  | 
| 
2095
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2096
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2097
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2098
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2099
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2100
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ### Export summary data file  | 
| 
2101
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub summary_file {  | 
| 
2102
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
  
1
  
 | 
0
 | 
 	my $self = shift;  | 
| 
2103
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2104
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Collect passed arguments  | 
| 
2105
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my %args = @_;   | 
| 
2106
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2107
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# parameters  | 
| 
2108
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# either one or more datasets can be summarized  | 
| 
2109
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 	my $outfile = $args{'filename'} || undef;  | 
| 
2110
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my @datasets;  | 
| 
2111
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 	if ($args{dataset} and ref $args{dataset} eq 'ARRAY') {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2112
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		@datasets = @{ $args{dataset} };  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
2113
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2114
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($args{dataset} and ref $args{dataset} eq 'SCALAR') {  | 
| 
2115
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		push @datasets, $args{dataset};  | 
| 
2116
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2117
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my @startcolumns;  | 
| 
2118
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 	if ($args{startcolumn} and ref $args{startcolumn} eq 'ARRAY') {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2119
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		@startcolumns = @{ $args{startcolumn} };  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
2120
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2121
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($args{startcolumn} and ref $args{startcolumn} eq 'SCALAR') {  | 
| 
2122
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		push @startcolumns, $args{startcolumn};  | 
| 
2123
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2124
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my @endcolumns;  | 
| 
2125
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 	$args{endcolumn} ||= $args{stopcolumn};  | 
| 
2126
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 	if ($args{endcolumn} and ref $args{endcolumn} eq 'ARRAY') {  | 
| 
 
 | 
 
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2127
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		@endcolumns = @{ $args{endcolumn} };  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
2128
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2129
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	elsif ($args{endcolumn} and ref $args{endcolumn} eq 'SCALAR') {  | 
| 
2130
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		push @endcolumns, $args{endcolumn};  | 
| 
2131
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2132
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2133
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2134
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Check required values  | 
| 
2135
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	unless ($self->verify) {  | 
| 
2136
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		cluck "bad data structure!";  | 
| 
2137
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		return;  | 
| 
2138
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2139
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	unless (defined $outfile) {  | 
| 
2140
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		if ($self->basename) {  | 
| 
2141
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# use the opened file's filename if it exists  | 
| 
2142
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# prepend the path if it exists  | 
| 
2143
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# the extension will be added later  | 
| 
2144
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$outfile = $self->path . $self->basename;  | 
| 
2145
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
2146
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		else {  | 
| 
2147
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			cluck "no filename passed to write_summary_data!\n";  | 
| 
2148
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			return;  | 
| 
2149
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
2150
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2151
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2152
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Identify possible dataset columns  | 
| 
2153
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my %possibles;  | 
| 
2154
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my %skip = map {$_ => 1} qw (systematicname name id alias aliases type class   | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
2155
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			geneclass chromosome chromo seq_id seqid start stop end gene strand   | 
| 
2156
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			length primary_id merged_transcript_length transcript_cds_length   | 
| 
2157
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			transcript_5p_utr_length transcript_3p_utr_length);  | 
| 
2158
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# walk through the dataset names  | 
| 
2159
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$possibles{unknown} = [];  | 
| 
2160
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	for (my $i = 0; $i < $self->number_columns; $i++) {  | 
| 
2161
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		if (not exists $skip{ lc $self->{$i}{'name'} }) {  | 
| 
2162
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 			my $d = $self->metadata($i, 'dataset') || undef;  | 
| 
2163
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			if (defined $d) {  | 
| 
2164
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# we have what appears to be a dataset column  | 
| 
2165
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 				$possibles{$d} ||= [];  | 
| 
2166
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				push @{$possibles{$d}}, $i;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
2167
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
2168
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			else {  | 
| 
2169
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# still an unknown possibility  | 
| 
2170
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				push @{$possibles{unknown}}, $i;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
2171
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
2172
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
2173
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2174
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2175
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# check datasets  | 
| 
2176
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	unless (@datasets) {  | 
| 
2177
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		if (scalar keys %possibles > 1) {  | 
| 
2178
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# we will always have the unknown category, so anything more than one   | 
| 
2179
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# means we found legitimate dataset columns  | 
| 
2180
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			delete $possibles{unknown};  | 
| 
2181
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
2182
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		@datasets = sort {$a cmp $b} keys %possibles;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
2183
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2184
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2185
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# check starts  | 
| 
2186
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	if (scalar @startcolumns != scalar @datasets) {  | 
| 
2187
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		@startcolumns = (); # ignore what we were given?  | 
| 
2188
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		foreach my $d (@datasets) {  | 
| 
2189
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# take the first column with this dataset  | 
| 
2190
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			push @startcolumns, $possibles{$d}->[0];  | 
| 
2191
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
2192
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2193
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2194
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# check stops  | 
| 
2195
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	if (scalar @endcolumns != scalar @datasets) {  | 
| 
2196
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		@endcolumns = (); # ignore what we were given?  | 
| 
2197
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		foreach my $d (@datasets) {  | 
| 
2198
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# take the last column with this dataset  | 
| 
2199
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			push @endcolumns, $possibles{$d}->[-1];  | 
| 
2200
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
2201
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2202
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2203
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2204
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Prepare Data object to store the summed data  | 
| 
2205
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $summed_data = $self->new(  | 
| 
2206
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		feature => 'averaged_windows',   | 
| 
2207
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		columns => ['Window','Midpoint'],  | 
| 
2208
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	);  | 
| 
2209
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2210
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Go through each dataset  | 
| 
2211
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	foreach my $d (0 .. $#datasets) {  | 
| 
2212
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
2213
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# Prepare score column name  | 
| 
2214
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $data_name = simplify_dataset_name($datasets[$d]);  | 
| 
2215
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
2216
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# add column  | 
| 
2217
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $i = $summed_data->add_column($data_name);  | 
| 
2218
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		$summed_data->metadata($i, 'dataset', $datasets[$d]);  | 
| 
2219
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
2220
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# tag for remembering we're working with percentile bins  | 
| 
2221
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $do_percentile = 0;  | 
| 
2222
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
2223
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# remember the row  | 
| 
2224
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		my $row = 1;  | 
| 
2225
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
2226
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		# Collect summarized data  | 
| 
2227
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		for (  | 
| 
2228
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			my $column = $startcolumns[$d];  | 
| 
2229
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$column <= $endcolumns[$d];  | 
| 
2230
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			$column++  | 
| 
2231
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		) {   | 
| 
2232
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
2233
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# determine the midpoint position of the window  | 
| 
2234
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# this assumes the column metadata has start and stop  | 
| 
2235
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $midpoint = int(sum0($self->metadata($column, 'start'),   | 
| 
2236
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				$self->metadata($column, 'stop')) / 2);   | 
| 
2237
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
2238
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# convert midpoint to fraction of 1000 for plotting if necessary  | 
| 
2239
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			if (substr($self->name($column), -1) eq '%') {  | 
| 
2240
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$midpoint *= 10; # midpoint * 0.01 * 1000 bp  | 
| 
2241
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$do_percentile++;  | 
| 
2242
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
2243
 | 
  
0
  
 | 
  
  0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 			if ($do_percentile and substr($self->name($column), -2) eq 'bp') {  | 
| 
2244
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# working on the extension after the percentile bins  | 
| 
2245
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$midpoint += 1000;  | 
| 
2246
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
2247
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
2248
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# collect the values in the column  | 
| 
2249
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my @values;  | 
| 
2250
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			for my $row (1..$self->last_row) {  | 
| 
2251
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				my $v = $self->value($row, $column);  | 
| 
2252
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				push @values, $v eq '.' ? 0 : $v;  # treat nulls as zero  | 
| 
2253
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
2254
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
2255
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# adjust if log value  | 
| 
2256
 | 
  
0
  
 | 
 
 | 
  
  0
  
 | 
 
 | 
 
 | 
0
 | 
 			my $log = $self->metadata($column, 'log2') || 0;  | 
| 
2257
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			if ($log) {  | 
| 
2258
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				@values = map { 2 ** $_ } @values;  | 
| 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
    | 
| 
2259
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
2260
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
2261
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# determine mean value  | 
| 
2262
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			my $window_mean = sum0(@values) / scalar(@values);  | 
| 
2263
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			if ($log) {   | 
| 
2264
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$window_mean = log($window_mean) / log(2);  | 
| 
2265
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
2266
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		  | 
| 
2267
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			# push to summed output  | 
| 
2268
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			if ($d == 0) {  | 
| 
2269
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# this is the first dataset, so we need to add a row  | 
| 
2270
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$summed_data->add_row( [ $self->{$column}{'name'}, $midpoint, $window_mean ] );  | 
| 
2271
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
2272
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			else {  | 
| 
2273
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# we're summarizing multiple datasets, we already have name midpoint  | 
| 
2274
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				# first do sanity check  | 
| 
2275
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				if ($summed_data->value($row, 1) != $midpoint) {  | 
| 
2276
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 					carp("unable to summarize multiple datasets with nonequal columns of data!");  | 
| 
2277
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 					return;  | 
| 
2278
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 				}  | 
| 
2279
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 				$summed_data->value($row, $i, $window_mean);  | 
| 
2280
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 			}  | 
| 
2281
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 			$row++;  | 
| 
2282
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		}  | 
| 
2283
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2284
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2285
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2286
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	  | 
| 
2287
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	# Write summed data  | 
| 
2288
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	$outfile =~ s/\.txt(\.gz)?$//i; # strip any .txt or .gz extensions if present  | 
| 
2289
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	my $written_file = $summed_data->write_file(  | 
| 
2290
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		'filename'  => $outfile . '_summary.txt',  | 
| 
2291
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		'gz'        => 0,  | 
| 
2292
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	);  | 
| 
2293
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 	return $written_file;  | 
| 
2294
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2295
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2296
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2297
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2298
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2299
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2300
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ####################################################  | 
| 
2301
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2302
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 package Bio::ToolBox::Data::Iterator;  | 
| 
2303
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
22464
 | 
 use Bio::ToolBox::Data::Feature;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
12
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
142
 | 
    | 
| 
2304
 | 
3
 | 
 
 | 
 
 | 
  
3
  
 | 
 
 | 
34
 | 
 use Carp;  | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
8
 | 
    | 
| 
 
 | 
3
 | 
 
 | 
 
 | 
 
 | 
 
 | 
926
 | 
    | 
| 
2305
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2306
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub new {  | 
| 
2307
 | 
9
 | 
 
 | 
 
 | 
  
9
  
 | 
 
 | 
24
 | 
 	my ($class, $data) = @_;  | 
| 
2308
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
35
 | 
 	my %iterator = (  | 
| 
2309
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		'index'     => 1,  | 
| 
2310
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		'data'      => $data,  | 
| 
2311
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	);  | 
| 
2312
 | 
9
 | 
 
 | 
 
 | 
 
 | 
 
 | 
39
 | 
 	return bless \%iterator, $class;  | 
| 
2313
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2314
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2315
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub next_row {  | 
| 
2316
 | 
154
 | 
 
 | 
 
 | 
  
154
  
 | 
 
 | 
1985
 | 
 	my $self = shift;  | 
| 
2317
 | 
154
 | 
  
100
  
 | 
 
 | 
 
 | 
 
 | 
357
 | 
 	return if $self->{'index'} > $self->{data}->{last_row}; # no more  | 
| 
2318
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
199
 | 
 	my $i = $self->{'index'};  | 
| 
2319
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
207
 | 
 	$self->{'index'}++;  | 
| 
2320
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	my @options = (  | 
| 
2321
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		'data'      => $self->{data},  | 
| 
2322
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
294
 | 
 		'index'     => $i,  | 
| 
2323
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	);  | 
| 
2324
 | 
148
 | 
  
 50
  
 | 
  
 66
  
 | 
 
 | 
 
 | 
456
 | 
 	if (  | 
| 
2325
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		exists $self->{data}->{SeqFeatureObjects} and  | 
| 
2326
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 		defined $self->{data}->{SeqFeatureObjects}->[$i]   | 
| 
2327
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	) {  | 
| 
2328
 | 
  
0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
0
 | 
 		push @options, 'feature', $self->{data}->{SeqFeatureObjects}->[$i];  | 
| 
2329
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	}  | 
| 
2330
 | 
148
 | 
 
 | 
 
 | 
 
 | 
 
 | 
388
 | 
 	return Bio::ToolBox::Data::Feature->new(@options);	  | 
| 
2331
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2332
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2333
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 sub row_index {  | 
| 
2334
 | 
  
0
  
 | 
 
 | 
 
 | 
  
0
  
 | 
 
 | 
 
 | 
 	my $self = shift;  | 
| 
2335
 | 
  
0
  
 | 
  
  0
  
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	carp "row_index is a read only method" if @_;  | 
| 
2336
 | 
0
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 	return $self->{'index'};  | 
| 
2337
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 }  | 
| 
2338
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2339
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2340
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2341
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 ####################################################  | 
| 
2342
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
    | 
| 
2343
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 
 | 
 __END__  |