File Coverage

lib/Bio/Graphics/Wiggle.pm
Criterion Covered Total %
statement 262 421 62.2
branch 59 162 36.4
condition 35 105 33.3
subroutine 48 65 73.8
pod 18 40 45.0
total 422 793 53.2


line stmt bran cond sub pod time code
1             package Bio::Graphics::Wiggle;
2              
3             =head1 NAME
4              
5             Bio::Graphics::Wiggle -- Binary storage for dense genomic features
6              
7             =head1 SYNOPSIS
8              
9             # all positions are 1-based
10              
11             my $wig = Bio::Graphics::Wiggle->new('./test.wig',
12             $writeable,
13             { seqid => $seqid,
14             start => $start,
15             step => $step,
16             min => $min,
17             max => $max });
18              
19             $wig->erase;
20              
21             my $seqid = $wig->seqid('new_id');
22             my $max = $wig->max($new_max);
23             my $min = $wig->min($new_min);
24             my $step = $wig->step($new_step); # data stored at modulus step == 0; all else is blank
25              
26             $wig->set_value($position => $value); # store $value at position
27             $wig->set_values($position => \@values); # store array of values at position
28             $wig->set_range($start=>$end,$value); # store the same $value from $start to $end
29              
30             my $value = $wig->value($position); # fetch value from position
31             my $values = $wig->values($start,$end); # fetch range of data from $start to $end
32              
33             $wig->window(100); # sample window size
34             $wig->smoothing('mean'); # when sampling, compute the mean value across sample window
35             my $values = $wig->values($start,$end,$samples); # fetch $samples data points from $start to $end
36              
37              
38             =head1 DESCRIPTION
39              
40             IMPORTANT NOTE: This implementation is still not right. See
41             http://genomewiki.ucsc.edu/index.php/Wiggle for a more space-efficient
42             implementation.
43              
44             This module stores "wiggle" style quantitative genome data for display
45             in a genome browser application. The data for each chromosome (or
46             contig, or other reference sequence) is stored in a single file in the
47             following format:
48              
49             256 byte header
50             50 bytes seqid, zero-terminated C string
51             4 byte long integer, value of "step" (explained later)
52             4 byte perl native float, the "min" value
53             4 byte perl native float, the "max" value
54             4 byte long integer, value of "span"
55             4 byte perl native float, the mean
56             4 byte perl native float, the standard deviation
57             2 byte unsigned short, the version number (currently version 0)
58             4 byte long integer, sequence start position (in 0-based coordinates)
59             null padding to 256 bytes for future use
60              
61             The remainder of the file consists of 8-bit unsigned scaled integer
62             values. This means that all quantitative data will be scaled to 8-bit
63             precision!
64              
65             For a convenient method of creating Wiggle files from UCSC-type WIG
66             input and creating GFF3 output, please see
67             L<Bio::Graphics::Wiggle::Loader>.
68              
69             =head1 METHODS
70              
71             =head2 Constructor and Accessors
72              
73             =over 4
74              
75             =item $wig = Bio::Graphics::Wiggle->new($filename,$writeable,{options})
76              
77             Open/create a wiggle-format data file:
78              
79             $filename -- path to the file to open/create
80             $writeable -- boolean value indicating whether file is
81             writeable. Missing files will only be created
82             if $writeable set to a true value. If path is
83             empty (undef or empty string) and writeable is true,
84             new() will create a temporary file that will be
85             deleted when the object goes out of scope.
86             {options} -- hash ref of the following named options, only valid
87             when creating a new wig file with $writeable true.
88              
89             option name description default
90             ----------- ----- -------
91             seqid name/id of sequence empty name
92             min minimum value of data points 0
93             max maximum value of data points 255
94             step interval between data points 1
95             span width of data points value of "step"
96              
97             The "step" can be used to create sparse files to save space. By
98             default, step is set to 1, in which case a data value will be stored
99             at each base of the sequence. By setting step to 10, then each value
100             is taken to correspond to 10 bp, and the file will be 10x smaller.
101             For example, consider this step 5 data set:
102              
103             1 2 3 4 5 6 7 8 9 10 11 12 13 14
104             20 . . . . 60 . . . . 80 . . .
105              
106             We have stored the values "20" "60" and "80" at positions 1, 6 and 11,
107             respectively. When retrieving this data, it will appear as if
108             positions 1 through 5 have a value of 20, positions 6-10 have a value
109             of 60, and positions 11-14 have a value of 80. In the data file, we
110             store, positions 1,6,and 11 in adjacent bytes.
111              
112             Note that no locking is performed by this module. If you wish to allow
113             multi-user write access to the databases files, you will need to
114             flock() the files yourself.
115              
116             =item $seqid = $wig->seqid(['new_id'])
117              
118             =item $max = $wig->max([$new_max])
119              
120             =item $min = $wig->min([$new_min])
121              
122             =item $step = $wig->step([$new_step])
123              
124             =item $span = $wig->span([$new_span])
125              
126             =item $mean = $wig->mean([$new_mean]);
127              
128             =item $stdev = $wig->stdev([$new_stdev]);
129              
130             These accessors get or set the corresponding values. Setting is only
131             allowed if the file was opened for writing. Note that changing the
132             min, max and step after writing data to the file under another
133             parameter set will produce unexpected (and invalid) results, as the
134             existing data is not automatically updated to be consistent.
135              
136             =item $trim = $wig->trim([$new_trim]);
137              
138             The trim method sets the trimming method, which can be used to trim
139             out extreme values. Three methods are currently supported:
140              
141             none No trimming
142             stdev Trim 1 standard deviation above and below mean
143             stdevN Trim N standard deviations above and below the mean
144              
145             In "stdevN", any can be any positive integer.
146              
147             =back
148              
149             =head2 Setting Data
150              
151             =over 4
152              
153             =item $wig->set_value($position => $value)
154              
155             This method sets the value at $position to $value. If a step>1 is in
156             force, then $position will be rounded down to the nearest multiple of
157             step.
158              
159             =item $wig->set_range($start=>$end, $value)
160              
161             This method sets the value of all bases between $start and $end to
162             $value, honoring step.
163              
164             =item $sig->set_values($position => \@values)
165              
166             This method writes an array of values into the datababase beginning at
167             $position (or the nearest lower multiple of step). If step>1, then
168             values will be written at step intervals.
169              
170             =back
171              
172             =head2 Retrieving Data
173              
174             =over 4
175              
176             =item $value = $wig->value($position)
177              
178             Retrieve the single data item at position $position, or the nearest
179             lower multiple of $step if step>1.
180              
181             =item $values = $wig->values($start=>$end)
182              
183             Retrieve the values in the range $start to $end and return them as an
184             array ref. Note that you will always get an array of size
185             ($end-$start+1) even if step>1; the data in between the step intervals
186             will be filled in.
187              
188             =item $values = $wig->values($start=>$end,$samples)
189              
190             Retrieve a sampling of the values between $start and $end. Nothing
191             very sophisticated is done here; the code simply returns the number of
192             values indicated in $samples, smoothed according to the smoothing
193             method selected (default to "mean"), then selected at even intervals
194             from the range $start to $end. The return value is an arrayref of
195             exactly $samples values.
196              
197             =item $string = $wig->export_to_wif($start,$end)
198              
199             =item $string = $wig->export_to_wif64($start,$end)
200              
201             Export the region from start to end in the "wif" format. This data can
202             later be imported into another Bio::Graphics::Wiggle object. The first
203             version returns a binary string. The second version returns a base64
204             encoded version that is safe for ascii-oriented formata such as GFF3
205             and XML.
206              
207             =item $wig->import_from_wif($string)
208              
209             =item $wig->import_from_wif64($string)
210              
211             Import a wif format data string into the Bio::Graphics::Wiggle
212             object. The first version expects a binary string. The second version
213             expects a base64 encoded version that is safe for ascii-oriented
214             formata such as GFF3 and XML.
215              
216             =back
217              
218              
219             =cut
220              
221             # read/write genome tiling data, to be compatible with Jim Kent's WIG format
222 1     1   5 use strict;
  1         1  
  1         34  
223 1     1   6 use warnings;
  1         2  
  1         31  
224 1     1   910 use IO::File;
  1         1156  
  1         153  
225 1     1   7 use Carp 'croak','carp','confess';
  1         1  
  1         55  
226              
227 1     1   6 use constant HEADER_LEN => 256;
  1         2  
  1         79  
228             # seqid, step, min, max, span, mean, stdev, version, start
229 1     1   5 use constant HEADER => '(Z50LFFLFFSL)@'.HEADER_LEN;
  1         1  
  1         39  
230 1     1   5 use constant BODY => 'C';
  1         2  
  1         69  
231 1     1   10 use constant DEBUG => 0;
  1         2  
  1         52  
232 1     1   5 use constant DEFAULT_SMOOTHING => 'mean';
  1         2  
  1         36  
233 1     1   5 use constant VERSION => 0;
  1         2  
  1         5306  
234             our $VERSION = '1.0';
235              
236             sub new {
237 2     2 1 6 my $class = shift;
238 2         6 my ($path,$write,$options) = @_;
239 2   50     7 $path ||= ''; # to avoid uninit warning
240 2 50       152 my $mode = $write ? -e $path # if file already exists...
    100          
241             ? '+<' # ...open for read/write
242             : '+>' # ...else clobber and open a new one
243             : '<'; # read only
244 2         13 my $fh = $class->new_fh($path,$mode);
245 2 50 0     370 $fh or die (($path||'temporary file').": $!");
246              
247 2   100     10 $options ||= {};
248              
249 2   33     32 my $self = bless {fh => $fh,
250             write => $write,
251             dirty => scalar keys %$options
252             }, ref $class || $class;
253              
254 2   100     5 my $stored_options = eval {$self->_readoptions} || {};
255 2 50       49 $options->{start}-- if defined $options->{start}; # 1-based ==> 0-based coordinates
256 2         19 my %merged_options = (%$stored_options,%$options);
257             # warn "merged options = ",join ' ',%merged_options;
258 2   50     14 $merged_options{version}||= 0;
259 2   50     9 $merged_options{seqid} ||= 'chrUnknown';
260 2   50     7 $merged_options{min} ||= 0;
261 2   50     7 $merged_options{max} ||= 255;
262 2   50     6 $merged_options{mean} ||= 128;
263 2   50     52 $merged_options{stdev} ||= 255;
264 2   100     10 $merged_options{trim} ||= 'none';
265 2   50     10 $merged_options{step} ||= 1;
266 2   50     12 $merged_options{start} ||= 0;
267 2   33     7 $merged_options{span} ||= $merged_options{step};
268 2         8 $self->{options} = \%merged_options;
269 2 100       8 $self->_do_trim unless $self->trim eq 'none';
270 2         12 return $self;
271             }
272              
273             sub new_fh {
274 2     2 0 4 my $self = shift;
275 2         5 my ($path,$mode) = @_;
276 2 50       23 return $path ? IO::File->new($path,$mode)
277             : IO::File->new_tmpfile;
278             }
279              
280             sub end {
281 4     4 0 5 my $self = shift;
282 4 100       13 unless (defined $self->{end}) {
283 1         4 my $size = (stat($self->fh))[7];
284 1         4 my $data_len = $size - HEADER_LEN();
285 1 50       4 return unless $data_len>0; # undef end
286 1         4 $self->{end} = ($self->start-1) + $data_len * $self->step;
287             }
288 4         15 return $self->{end};
289             }
290              
291 2     2   5565 sub DESTROY { shift->write }
292              
293             sub erase {
294 0     0 0 0 my $self = shift;
295 0         0 $self->fh->truncate(HEADER_LEN);
296             }
297              
298 7631     7631 0 31428 sub fh { shift->{fh} }
299 3813     3813 0 5916 sub seek { shift->fh->seek(shift,0) }
300 0     0 0 0 sub tell { shift->fh->tell() }
301              
302             sub _option {
303 7660     7660   8763 my $self = shift;
304 7660         9325 my $option = shift;
305 7660         14091 my $d = $self->{options}{$option};
306 7660 100       14676 if (@_) {
307 2         5 $self->{dirty}++;
308 2         4 $self->{options}{$option} = shift;
309 2 50 66     11 delete $self->{scale} if $option eq 'min' or $option eq 'max';
310             }
311 7660         14806 return $d;
312             }
313              
314 0     0 0 0 sub version { shift->_option('version',@_) }
315 2     2 1 633 sub seqid { shift->_option('seqid',@_) }
316 4     4 1 9 sub min { shift->_option('min',@_) }
317 4     4 1 10 sub max { shift->_option('max',@_) }
318 3818     3818 1 7471 sub step { shift->_option('step',@_) }
319 9     9 1 25 sub span { shift->_option('span',@_) }
320 1     1 1 4 sub mean { shift->_option('mean',@_) }
321 1     1 1 4 sub stdev { shift->_option('stdev',@_) }
322 3     3 1 11 sub trim { shift->_option('trim',@_) }
323              
324             sub start { # slightly different because we have to deal with 1 vs 0-based coordinates
325 3818     3818 0 4775 my $self = shift;
326 3818         6633 my $start = $self->_option('start');
327 3818         4607 $start++; # convert into 1-based coordinates
328 3818 50       7061 if (@_) {
329 0         0 my $newstart = shift;
330 0         0 $self->_option('start',$newstart-1); # store in zero-based coordinates
331             }
332 3818         5792 return $start;
333             }
334              
335             sub smoothing {
336 0     0 0 0 my $self = shift;
337 0   0     0 my $d = $self->{smoothing} || DEFAULT_SMOOTHING;
338 0 0       0 $self->{smoothing} = shift if @_;
339 0         0 $d;
340             }
341              
342             sub write {
343 3     3 0 6 my $self = shift;
344 3 100 66     261 if ($self->{dirty} && $self->{write}) {
345 1         6 $self->_writeoptions($self->{options});
346 1         12 undef $self->{dirty};
347 1         4 $self->fh->flush;
348             }
349             }
350              
351             sub _readoptions {
352 2     2   5 my $self = shift;
353 2         7 my $fh = $self->fh;
354 2         4 my $header;
355 2         22 $fh->seek(0,0);
356 2 100       33 return unless $fh->read($header,HEADER_LEN) == HEADER_LEN;
357 1         46 return $self->_parse_header($header);
358             }
359              
360             sub _parse_header {
361 1     1   2 my $self = shift;
362 1         3 my $header = shift;
363 1         13 my ($seqid,$step,$min,$max,$span,
364             $mean,$stdev,$version,$start) = unpack(HEADER,$header);
365 1         13 return { seqid => $seqid,
366             step => $step,
367             span => $span,
368             min => $min,
369             max => $max,
370             mean => $mean,
371             stdev => $stdev,
372             version => $version,
373             start => $start,
374             };
375             }
376              
377             sub _generate_header {
378 1     1   3 my $self = shift;
379 1         2 my $options = shift;
380 1         3 return pack(HEADER,@{$options}{qw(seqid step min max span mean stdev version start)});
  1         14  
381             }
382              
383             sub _writeoptions {
384 1     1   2 my $self = shift;
385 1         3 my $options = shift;
386 1         3 my $fh = $self->fh;
387 1         5 my $header = $self->_generate_header($options);
388 1         7 $fh->seek(0,0);
389 1 50       30 $fh->print($header) or die "write failed: $!";
390             }
391              
392             sub _do_trim {
393 1     1   3 my $self = shift;
394              
395             # don't trim if there is no score range
396 1 50       4 ($self->max - $self->min) or return;
397            
398 1         6 my $trim = lc $self->trim;
399 1         1 my ($method,$arg);
400 1 50       7 if ($trim =~ /([a-z]+)(\d+)/) {
401 1         4 $method = "_trim_${1}";
402 1         4 $arg = $2;
403             }
404             else {
405 0         0 $method = "_trim_${trim}";
406             }
407 1 50       7 unless ($self->can($method)) {
408 0         0 carp "invalid trim method $trim";
409 0         0 return;
410             }
411            
412 1         4 $self->$method($arg);
413             }
414              
415             # trim n standard deviations from the mean
416             sub _trim_stdev {
417 1     1   2 my $self = shift;
418 1   50     5 my $factor = shift || 1;
419 1         4 my $mean = $self->mean;
420 1         4 my $stdev = $self->stdev * $factor;
421 1 50       3 my $min = $self->min > $mean - $stdev ? $self->min : $mean - $stdev;
422 1 50       3 my $max = $self->max < $mean + $stdev ? $self->max : $mean + $stdev;
423 1         3 warn "_trim_stdev (* $factor) : setting min to $min, max to $max (was ",$self->min,',',$self->max,')'
424             if DEBUG;
425 1         4 $self->min($min);
426 1         4 $self->max($max);
427             }
428              
429             sub set_value {
430 3809     3809 1 4472 my $self = shift;
431 3809 50       8451 croak "usage: \$wig->set_value(\$position => \$value)"
432             unless @_ == 2;
433 3809         8076 $self->value(@_);
434             }
435              
436             sub set_range {
437 0     0 1 0 my $self = shift;
438 0 0       0 croak "usage: \$wig->set_range(\$start_position => \$end_position, \$value)"
439             unless @_ == 3;
440 0         0 $self->value(@_);
441             }
442              
443             sub value {
444 3809     3809 1 4109 my $self = shift;
445 3809         4453 my $position = shift;
446              
447 3809         6153 my $offset = $self->_calculate_offset($position);
448 3809 50       8341 $offset >= HEADER_LEN or die "Tried to retrieve data from before start position";
449 3809 50       7386 $self->seek($offset) or die "Seek failed: $!";
450              
451 3809 50       84831 if (@_ == 2) {
    50          
452 0         0 my $end = shift;
453 0         0 my $new_value = shift;
454 0         0 my $step = $self->step;
455 0         0 my $scaled_value = $self->scale($new_value);
456 0 0       0 $self->fh->print(pack('C*',($scaled_value)x(($end-$position+1)/$step))) or die "Write failed: $!";
457 0 0 0     0 $self->{end} = $end if !exists $self->{end} || $self->{end} < $end;
458             }
459              
460             elsif (@_==1) {
461 3809         6223 my $new_value = shift;
462 3809         7360 my $scaled_value = $self->scale($new_value);
463 3809 50       13694 $self->fh->print(pack('C*',$scaled_value)) or die "Write failed: $!";
464 3809 50 66     47208 $self->{end} = $position if !exists $self->{end} || $self->{end} < $position;
465 3809         11010 return $new_value;
466             }
467              
468             else { # retrieving data
469 0         0 my $buffer;
470 0 0       0 $self->fh->read($buffer,1) or die "Read failed: $!";
471 0         0 my $scaled_value = unpack('C*',$buffer);
472              
473             # missing data, so look back at most span values to get it
474 0 0 0     0 if ($scaled_value == 0 && (my $span = $self->span) > 1) {
475 0         0 $offset = $self->_calculate_offset($position-$span+1);
476 0 0       0 $offset >= HEADER_LEN or die "Tried to retrieve data from before start position";
477 0 0       0 $self->seek($offset) or die "Seek failed: $!";
478              
479 0         0 $self->fh->read($buffer,$span/$self->step);
480 0         0 for (my $i=length($buffer)-2;$i>=0;$i--) {
481 0         0 my $val = substr($buffer,$i,1);
482 0 0       0 next if $val eq "\0";
483 0         0 $scaled_value = unpack('C*',$val);
484 0         0 last;
485             }
486              
487             }
488 0         0 return $self->unscale($scaled_value);
489             }
490             }
491              
492             sub _calculate_offset {
493 3813     3813   4870 my $self = shift;
494 3813         4457 my $position = shift;
495 3813         7291 my $step = $self->step;
496 3813         7878 my $start = $self->start;
497 3813         10199 return HEADER_LEN + int(($position-$start)/$step);
498             }
499              
500             sub set_values {
501 0     0 1 0 my $self = shift;
502 0 0 0     0 croak "usage: \$wig->set_values(\$position => \@values)"
503             unless @_ == 2 and ref $_[1] eq 'ARRAY';
504 0         0 $self->values(@_);
505             }
506              
507             # read or write a series of values
508             sub values {
509 4     4 1 11 my $self = shift;
510 4         7 my $start = shift;
511 4 50 33     18 if (ref $_[0] && ref $_[0] eq 'ARRAY') {
512 0         0 $self->_store_values($start,@_);
513             } else {
514 4         18 $self->_retrieve_values($start,@_);
515             }
516             }
517              
518             sub export_to_wif64 {
519 0     0 1 0 my $self = shift;
520 0         0 my $data = $self->export_to_wif(@_);
521 0 0       0 eval "require MIME::Base64"
522             unless MIME::Base64->can('encode_base64');
523 0         0 return MIME::Base64::encode_base64($data);
524             }
525             sub import_from_wif64 {
526 0     0 1 0 my $self = shift;
527 0         0 my $data = shift;
528              
529 0 0       0 eval "require MIME::Base64"
530             unless MIME::Base64->can('decode_base64');
531 0         0 return $self->import_from_wif(MIME::Base64::decode_base64($data));
532             }
533              
534             # subregion in "wiggle interchange format" (wif)
535             sub export_to_wif {
536 0     0 1 0 my $self = shift;
537 0         0 my ($start,$end) = @_;
538              
539             # get the 256 byte header
540 0         0 my $data = $self->_generate_header($self->{options});
541              
542             # add the range to the data (8 bytes overhead)
543 0         0 $data .= pack("L",$start);
544 0         0 $data .= pack("L",$end);
545              
546             # add the packed data for this range
547 0         0 $data .= $self->_retrieve_packed_range($start,$end-$start+1,$self->step);
548 0         0 return $data;
549             }
550              
551             sub export_to_bedgraph {
552 1     1 0 1162 my $self = shift;
553 1         2 my ($start,$end,$fh) = @_;
554 1         3 my $max_range = 100_000;
555              
556 1   50     7 $start ||= 1;
557 1   33     4 $end ||= $self->end;
558              
559 1         2 my $lines;
560 1         6 for (my $s=$start;$s<$end;$s+=$max_range) {
561 1         3 my $e = $s + $max_range - 1;
562 1 50       4 $e = $end if $e > $end;
563 1         5 my $b = $self->values($s,$e);
564 1         13 $lines .= $self->_bedgraph_lines($s,$b,$fh);
565             }
566              
567 1         8 return $lines;
568             }
569              
570             sub _bedgraph_lines {
571 1     1   4 my $self = shift;
572 1         3 my ($start,$values,$fh) = @_;
573 1         8 my $seqid = $self->seqid;
574 1         2 my $result;
575              
576 1         3 my ($last_val,$last_start,$end);
577 1         4 $last_start = $start-1; # 0 based indexing
578 1         6 for (my $i=0;$i<@$values;$i++) {
579 5000         19311 my $v = $values->[$i];
580              
581 5000 100       47273 if (!defined $v) {
582 2168 100       13169 if (defined $last_val) {
583 56         354 $result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$start+$i-1,$last_val);
584 56         136 undef $last_val;
585             }
586 2168         4394 $last_start = $start+$i;
587 2168         23099 next;
588             }
589              
590 2832 50 66     44513 if (defined $last_val && $last_val != $v) {
591 0         0 $result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$start+$i-1,$last_val);
592 0         0 $last_start = $start+$i-1;
593             }
594              
595 2832         7334 $last_val = $v;
596 2832         20723 $end = $start+$i-1;
597             }
598 1 50       12 $result .= $self->_append_or_print_bedgraph($fh,$seqid,$last_start,$end+1,$last_val) if $last_val;
599 1         114 return $result;
600             }
601              
602             sub _append_or_print_bedgraph {
603 57     57   246 my $self = shift;
604 57         973 my ($fh,$seqid,$start,$end,$val) = @_;
605 57         5846 my $data = join("\t",$seqid,$start,$end,sprintf("%.2f",$val))."\n";
606 57 50       381 if ($fh) {
607 0         0 print $fh $data;
608 0         0 return '';
609             } else {
610 57         481 return $data;
611             }
612             }
613              
614             sub import_from_wif {
615 0     0 1 0 my $self = shift;
616 0         0 my $wifdata = shift;
617              
618             # BUG: should check that header is compatible
619 0         0 my $header = substr($wifdata,0,HEADER_LEN);
620 0         0 my $start = unpack('L',substr($wifdata,HEADER_LEN, 4));
621 0         0 my $end = unpack('L',substr($wifdata,HEADER_LEN+4,4));
622              
623 0         0 my $options = $self->_parse_header($header);
624 0   0     0 my $stored_options = eval {$self->_readoptions} || {};
625 0         0 my %merged_options = (%$stored_options,%$options);
626 0         0 $self->{options} = \%merged_options;
627 0         0 $self->{dirty}++;
628              
629             # write the data
630 0         0 $self->seek($self->_calculate_offset($start));
631 0 0       0 $self->fh->print(substr($wifdata,HEADER_LEN+8)) or die "write failed: $!";
632 0 0 0     0 $self->{end} = $end if !defined $self->{end} or $self->{end} < $end;
633             }
634              
635             sub _retrieve_values {
636 4     4   6 my $self = shift;
637 4         8 my ($start,$end,$samples) = @_;
638              
639 4         11 my $data_start = $self->start;
640 4         42 my $step = $self->step;
641 4         10 my $span = $self->span;
642              
643 4 50       14 croak "Value of start position ($start) is less than data start of $data_start"
644             unless $start >= $data_start;
645 4 50       15 croak "Value of end position ($end) is greater than data end of ",$self->end+$span,
646             unless $end <= $self->end + $span;
647              
648             # generate list of positions to sample from
649 4         10 my $length = $end-$start+1;
650 4   33     22 $samples ||= $length;
651              
652             # warn "samples = $samples, length=$length, span=$span, step=$step";
653              
654             # if the length is grossly greater than the samples, then we won't even
655             # bother fetching all the data, but just sample into the disk file
656 4 50 33     36 if ($length/$samples > 100 && $step == 1) {
657 0         0 my @result;
658             # my $window = 20*($span/$step);
659 0         0 my $interval = $length/$samples;
660             # my $window = 100*$interval/$span;
661 0         0 my $window = $interval/2;
662             # warn "window = $window, interval = $interval";
663 0         0 for (my $i=0;$i<$samples;$i++) {
664 0         0 my $packed_data = $self->_retrieve_packed_range(int($start+$i*$interval-$window),
665             int($window),
666             $step);
667 0         0 my @bases= grep {$_} unpack('C*',$packed_data);
  0         0  
668 0 0       0 if (@bases) {
669 0         0 local $^W = 0;
670 0         0 my $arry = $self->unscale(\@bases);
671 0         0 my $n = @$arry;
672 0         0 my $total = 0;
673 0         0 $total += $_ foreach @$arry;
674 0         0 my $mean = $total/$n;
675 0         0 my $max;
676 0 0 0     0 for (@$arry) { $max = $_ if !defined $max || $max < $_ }
  0         0  
677             # warn $start+$i*$interval,': ',join(',',map {int($_)} @$arry),
678             # " mean = $mean, max = $max";
679             # push @result,$mean;
680 0         0 push @result,$max;
681             } else {
682 0         0 push @result,0;
683             }
684             }
685 0         0 return \@result;
686             }
687              
688 4         16 my $packed_data = $self->_retrieve_packed_range($start,$length,$step);
689              
690 4         6 my @bases;
691 4         91 $#bases = $length-1;
692              
693 4 50       50 if ($step == $span) {
694             # in this case, we do not have any partially-empty
695             # steps, so can operate on the step-length data structure
696             # directly
697 0         0 @bases = unpack('C*',$packed_data);
698             }
699              
700             else {
701             # In this case some regions may have partially missing data,
702             # so we create an array equal to the length of the requested region,
703             # fill it in, and then sample it
704 4         16 for (my $i=0; $i<length $packed_data; $i++) {
705 5202         13606 my $index = $i * $step;
706 5202         10343 my $value = unpack('C',substr($packed_data,$i,1));
707 5202 100       23650 next unless $value; # ignore 0 values
708 61         1507 @bases[$index..$index+$span-1] = ($value) x $span;
709             }
710 4         17 $#bases = $length-1;
711             }
712              
713 4         25 my $r = $self->unscale(\@bases);
714 4         36 $r = $self->sample($r,$samples);
715 4 50 33     2599 $r = $self->smooth($r,$self->window * $samples/@bases)
716             if defined $self->window && $self->window>1;
717 4         121 return $r;
718             }
719              
720             sub _retrieve_packed_range {
721 4     4   5 my $self = shift;
722 4         95 my ($start,$length,$step) = @_;
723 4         9 my $span = $self->span;
724              
725 4         11 my $offset = $self->_calculate_offset($start);
726              
727 4         14 $self->seek($offset);
728 4         60 my $packed_data;
729 4         10 $self->fh->read($packed_data,$length/$step);
730              
731             # pad data up to required amount
732 4 50       88 $packed_data .= "\0" x ($length/$step-length($packed_data))
733             if length $packed_data < $length/$step;
734 4         157 return $packed_data;
735             }
736              
737              
738             sub sample {
739 4     4 0 9 my $self = shift;
740 4         9 my ($values,$samples) = @_;
741 4         8 my $length = @$values;
742 4         10 my $window_size = $length/$samples;
743              
744 4         6 my @samples;
745 4         389 $#samples = $samples-1;
746              
747 4 50       16 if ($window_size < 2) { # no data smoothing needed
748 4         183 @samples = map { $values->[$_*$window_size] } (0..$samples-1);
  5202         16953  
749             }
750             else {
751 0         0 my $smoothsub = $self->smoothsub;
752 0         0 for (my $i=0; $i<$samples; $i++) {
753 0         0 my $start = $i * $window_size;
754 0         0 my $end = $start + $window_size - 1;
755 0         0 my @window = @{$values}[$start..$end];
  0         0  
756              
757 0         0 my $value = $smoothsub->(\@window);
758 0         0 $samples[$i] = $value;
759             }
760             }
761              
762 4         858 return \@samples;
763             }
764              
765             sub smoothsub {
766 0     0 0 0 my $self = shift;
767            
768 0         0 my $smoothing = $self->smoothing;
769 0 0       0 my $smoothsub = $smoothing eq 'mean' ? \&sample_mean
    0          
    0          
    0          
770             :$smoothing eq 'max' ? \&sample_max
771             :$smoothing eq 'min' ? \&sample_min
772             :$smoothing eq 'none' ? \&sample_center
773             :croak("invalid smoothing type '$smoothing'");
774 0         0 return $smoothsub;
775             }
776              
777             sub smooth {
778 0     0 0 0 my ($self,$data,$window) = @_;
779              
780 0         0 my $smoothing = $self->smoothing;
781 0   0     0 $window ||= $self->window;
782              
783 0 0 0     0 return $data if $smoothing eq 'none' || $window < 2;
784              
785 0         0 my @data = @$data;
786 0         0 my $smoother = $self->smoothsub;
787 0 0       0 $window++ unless $window % 2;
788 0         0 my $offset = int($window/2);
789              
790 0         0 for (my $i=$offset; $i<@$data-$offset; $i++) {
791 0         0 my $start = $i - $offset;
792 0         0 my $end = $i + $offset;
793 0         0 my @subset = @data[$start..$end];
794 0         0 $data->[$i] = $smoother->(\@subset);
795             }
796              
797 0         0 return $data;
798             }
799              
800             sub window {
801 4     4 0 9 my $self = shift;
802 4         10 my $d = $self->{window};
803 4 50       347 $self->{window} = shift if @_;
804 4         20 $d;
805             }
806              
807             sub sample_mean {
808 0     0 0 0 my $values = shift;
809 0         0 my ($total,$items);
810 0         0 for my $v (@$values) {
811 0 0       0 next unless defined $v;
812 0         0 $items++;
813 0         0 $total+=$v;
814             }
815 0 0       0 return $items ? $total/$items : undef;
816             }
817              
818             sub sample_max {
819 0     0 0 0 my $values = shift;
820 0         0 my $max;
821 0         0 for my $v (@$values) {
822 0 0       0 next unless defined $v;
823 0 0 0     0 $max = $v if !defined $max or $max < $v;
824             }
825 0         0 return $max;
826             }
827              
828             sub sample_min {
829 0     0 0 0 my $values = shift;
830 0         0 my $min;
831 0         0 for my $v (@$values) {
832 0 0       0 next unless defined $v;
833 0 0 0     0 $min = $v if !defined $min or $min > $v;
834             }
835 0         0 return $min;
836             }
837              
838             sub sample_center {
839 0     0 0 0 my $values = shift;
840 0         0 return $values->[@$values/2];
841             }
842              
843             sub _store_values {
844 0     0   0 my $self = shift;
845 0         0 my ($position,$data) = @_;
846              
847             # where does data start
848 0         0 my $offset = $self->_calculate_offset($position);
849 0         0 my $fh = $self->fh;
850 0         0 my $step = $self->step;
851              
852 0         0 my $scaled = $self->scale($data);
853              
854 0         0 $self->seek($offset);
855 0         0 my $packed_data = pack('C*',@$scaled);
856 0 0       0 $fh->print($packed_data) or die "Write failed: $!";
857              
858 0         0 my $new_end = $position+@$data-1;
859 0 0 0     0 $self->{end} = $new_end if !exists $self->{end} || $self->{end} < $new_end;
860             }
861              
862             # zero means "no data"
863             # everything else is scaled from 1-255
864             sub scale {
865 3809     3809 0 4104 my $self = shift;
866 3809         4184 my $values = shift;
867 3809         6514 my $scale = $self->_get_scale;
868 3809         7978 my $min = $self->{options}{min};
869 3809 50 33     10110 if (ref $values && ref $values eq 'ARRAY') {
870 0         0 my @return = map {
871 0         0 my $i = ($_ - $min)/$scale;
872 0         0 my $v = 1 + int($i+0.5*($i<=>0)); # avoid call to round()
873 0 0       0 $v = 1 if $v < 1;
874 0 0       0 $v = 255 if $v > 255;
875 0         0 $v;
876             } @$values;
877 0         0 return \@return;
878             } else {
879 3809         12077 my $v = 1 + round (($values - $min)/$scale);
880 3809 50       8130 $v = 1 if $v < 1;
881 3809 50       7193 $v = 255 if $v > 255;
882 3809         7442 return $v;
883             }
884             }
885              
886             sub unscale {
887 4     4 0 7 my $self = shift;
888 4         6 my $values = shift;
889 4         12 my $scale = $self->_get_scale;
890 4         12 my $min = $self->{options}{min};
891              
892 4 50 33     32 if (ref $values && ref $values eq 'ARRAY') {
893 4 100       332 my @return = map {$_ ? (($_-1) * $scale + $min) : undef} @$values;
  5202         14863  
894 4         166 return \@return;
895             } else {
896 0 0       0 return $values ? ($values-1) * $scale + $min : undef;
897             }
898             }
899              
900             sub _get_scale {
901 3813     3813   4521 my $self = shift;
902 3813 100       8797 unless ($self->{scale}) {
903 2         6 my $min = $self->{options}{min};
904 2         26 my $max = $self->{options}{max};
905 2   50     10 my $range = $max - $min || 0.001; # can't be zero!
906 2         267 $self->{scale} = $range/254;
907             }
908 3813         6663 return $self->{scale};
909             }
910              
911             sub round {
912 3809     3809 0 10586 return int($_[0]+0.5*($_[0]<=>0));
913             }
914              
915              
916             1;
917              
918             __END__
919              
920             =head1 SEE ALSO
921              
922             L<Bio::Graphics::Wiggle::Loader>,
923             L<Bio::Graphics::Panel>,
924             L<Bio::Graphics::Glyph>,
925             L<Bio::Graphics::Feature>,
926             L<Bio::Graphics::FeatureFile>
927              
928             =head1 AUTHOR
929              
930             Lincoln Stein E<lt>lstein@cshl.orgE<gt>.
931              
932             Copyright (c) 2007 Cold Spring Harbor Laboratory
933              
934             This package and its accompanying libraries is free software; you can
935             redistribute it and/or modify it under the terms of the GPL (either
936             version 1, or at your option, any later version) or the Artistic
937             License 2.0. Refer to LICENSE for the full license text. In addition,
938             please see DISCLAIMER.txt for disclaimers of warranty.
939              
940             =cut