File Coverage

blib/lib/PDL/IO/FastRaw.pm
Criterion Covered Total %
statement 78 90 86.6
branch 18 34 52.9
condition 7 15 46.6
subroutine 12 14 85.7
pod 3 7 42.8
total 118 160 73.7


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             PDL::IO::FastRaw -- A simple, fast and convenient io format for PerlDL.
4              
5             =head1 VERSION
6              
7             This documentation refers to PDL::IO::FastRaw version 0.0.2, I guess.
8              
9             =head1 SYNOPSIS
10              
11             use PDL;
12             use PDL::IO::FastRaw;
13              
14             writefraw($pdl,"fname"); # write a raw file
15              
16             $pdl2 = readfraw("fname"); # read a raw file
17             $pdl2 = PDL->readfraw("fname");
18              
19             $pdl3 = mapfraw("fname2",{ReadOnly => 1}); # mmap a file, don't read yet
20              
21             $pdl4 = maptextfraw("fname3",{...}); # map a text file into a 1-D pdl.
22              
23              
24             =head1 DESCRIPTION
25              
26             This is a very simple and fast io format for PerlDL.
27             The disk data consists of two files, a header metadata file
28             in ASCII and a binary file consisting simply of consecutive
29             bytes, shorts or whatever.
30              
31             It is hoped that this will not only make for a simple PerlDL module
32             for saving and retrieving these files but also make it easy
33             for other programs to use these files.
34              
35             The format of the ASCII header is simply
36              
37            
38            
39             ...
40              
41             You should probably stick with the default header name. You may want
42             to specify your own header, however, such as when you have a large
43             collection of data files with identical dimensions and data types.
44             Under these circumstances, simply specify the C
option in the
45             options hash.
46              
47             The binary files are in general
48             NOT interchangeable between different architectures since the binary
49             file is simply dumped from the memory region of the piddle.
50             This is what makes the approach efficient.
51              
52             It is also possible to mmap the file which can give a large
53             speedup in certain situations as well as save a lot of memory
54             by using a disk file as virtual memory. When a file is mapped,
55             parts of it are read only as they are accessed in the memory
56             (or as the kernel decides: if you are reading the pages in order,
57             it may well preread some for you).
58              
59             Note that memory savings and copy-on-write are operating-system
60             dependent - see Core.xs and your operating system documentation
61             for exact semantics of whatever. Basically, if you write to a
62             mmapped file without C, the change will be reflected
63             in the file immediately. C doesn't really make it impossible
64             to write to the piddle but maps the memory privately so the file
65             will not be changed when you change the piddle. Be aware though
66             that mmapping a 40Mb file without C spends no virtual
67             memory but with C it does reserve 40Mb.
68              
69             =head2 Example: Converting ASCII to raw
70              
71             You have a whole slew of data files in ASCII from an experiment
72             that you ran in your lab. You're still tweaking the analysis
73             and plots, so you'd like if your data could load as fast as
74             possible. Eventually you'll read the data into your scripts
75             using C, but the first thing you might do is create
76             a script that converts all the data files to raw files:
77              
78             #!/usr/bin/perl
79             # Assumes that the data files end with a .asc or .dat extension
80             # and saves the raw file output with a .bdat extension.
81             # call with
82             # >./convert_to_raw.pl file1.dat file2.dat ...
83             # or
84             # >./convert_to_raw.pl *.dat
85            
86             use PDL;
87             use PDL::IO::FastRaw; # for saving raw files
88             use PDL::IO::Misc; # for reading ASCII files with rcols
89             while(shift) { # run through the entire supplied list of file names
90             ($newName = $_) =~ s/\.(asc|dat)/.bdat/;
91             print "Saving contents of $_ to $newName\n";
92             $data = rcols($_);
93             writefraw($data, $newName);
94             }
95              
96              
97             =head2 Example: readfraw
98              
99             Now that you've gotten your data into a raw file format, you can
100             start working on your analysis scripts. If you scripts used C
101             in the past, the reading portion of the script should go much,
102             much faster now:
103              
104             #!/usr/bin/perl
105             # My plotting script.
106             # Assume I've specified the files to plot on the command line like
107             # >./plot_script.pl file1.bdat file2.bdat ...
108             # or
109             # >./plot_script.pl *.bdat
110            
111             use PDL;
112             use PDL::IO::FastRaw;
113             while(shift) { # run through the entire supplied list of file names
114             $data = readfraw($_);
115             my_plot_func($data);
116             }
117              
118             =head2 Example: Custom headers
119              
120             In the first example, I allow C to use the standard header
121             file name, which would be C. However, I often measure
122             time series that have identical length, so all of those header files
123             are redundant. To fix that, I simply pass the Header option to the
124             C command. A modified script would look like this:
125              
126             #!/usr/bin/perl
127             # Assumes that the data files end with a .asc or .dat extension
128             # and saves the raw file output with a .bdat extension.
129             # call with
130             # >./convert_to_raw.pl [-hHeaderFile] [-hHeaderFile] ...
131            
132             use PDL;
133             use PDL::IO::FastRaw; # for saving raw files
134             use PDL::IO::Misc; # for reading ASCII files with rcols
135             my $header_file = undef;
136             CL_OPTION: while($_ = shift @ARGV) { # run through the entire list of command-line options
137             if(/-h(.*)/) {
138             $header_file = $1;
139             next CL_OPTION;
140             }
141             ($newName = $_) =~ s/\.(asc|dat)/.bdat/;
142             print "Saving contents of $_ to $newName\n";
143             $data = rcols($_);
144             writefraw($data, $newName, {Header => $header_file});
145             }
146              
147             Modifying the read script is left as an exercise for the reader. :]
148              
149              
150             =head2 Example: Using mapfraw
151              
152             Sometimes you'll want to use C rather than the read/write
153             functions. In fact, the original author of the module doesn't
154             use the read/write functions anymore, prefering to always use
155             C. How would you go about doing this?
156              
157             Assuming you've already saved your data into the raw format, the
158             only change you would have to make to the script in example 2 would
159             be to change the call to C to C. That's it.
160             You will probably see differences in performance, though I (David
161             Mertens) couldn't tell you about them because I haven't played
162             around with C much myself.
163              
164             What if you eschew the use of C and prefer to only use
165             C? How would you save your data to a raw format? In that
166             case, you would have to create a C piddle with the correct
167             dimensions first using
168              
169             $piddle_on_hd = mapfraw('fname', {Creat => 1, Dims => [dim1, dim2, ...]});
170              
171             Note that you must specify the dimensions and you must tell
172             C to create the new piddle for you by setting the
173             C option to a true value, not C (note the missing
174             final 'e').
175              
176              
177             =head1 FUNCTIONS
178              
179             =head2 readfraw
180              
181             =for ref
182              
183             Read a raw format binary file
184              
185             =for usage
186              
187             $pdl2 = readfraw("fname");
188             $pdl2 = PDL->readfraw("fname");
189             $pdl2 = readfraw("fname", {Header => 'headerfname'});
190              
191             =for options
192              
193             The C command
194             supports the following option:
195              
196             =over 8
197              
198             =item Header
199              
200             Specify the header file name.
201              
202             =back
203              
204             =head2 writefraw
205              
206             =for ref
207              
208             Write a raw format binary file
209              
210             =for usage
211              
212             writefraw($pdl,"fname");
213             writefraw($pdl,"fname", {Header => 'headerfname'});
214              
215             =for options
216              
217             The C command
218             supports the following option:
219              
220             =over 8
221              
222             =item Header
223              
224             Specify the header file name.
225              
226             =back
227              
228             =head2 mapfraw
229              
230             =for ref
231              
232             Memory map a raw format binary file (see the module docs also)
233              
234             =for usage
235              
236             $pdl3 = mapfraw("fname2",{ReadOnly => 1});
237              
238             =for options
239              
240             The C command
241             supports the following options (not all combinations make sense):
242              
243             =over 8
244              
245             =item Dims, Datatype
246              
247             If creating a new file or if you want to specify your own header
248             data for the file, you can give an array reference and a scalar,
249             respectively.
250              
251             =item Creat
252              
253             Create the file. Also writes out a header for the file.
254              
255             =item Trunc
256              
257             Set the file size. Automatically enabled with C. NOTE: This also
258             clears the file to all zeroes.
259              
260             =item ReadOnly
261              
262             Disallow writing to the file.
263              
264             =item Header
265              
266             Specify the header file name.
267              
268             =back
269              
270             =head2 maptextfraw
271              
272             =for ref
273              
274             Memory map a text file (see the module docs also).
275              
276             Note that this function maps the raw format so if you are
277             using an operating system which does strange things to e.g.
278             line delimiters upon reading a text file, you get the raw (binary)
279             representation.
280              
281             The file doesn't really need to be text but it is just mapped
282             as one large binary chunk.
283              
284             This function is just a convenience wrapper which firsts Cs
285             the file and sets the dimensions and datatype.
286              
287             =for usage
288              
289             $pdl4 = maptextfraw("fname", {options}
290              
291             =for options
292              
293             The options other than Dims, Datatype of C are
294             supported.
295              
296             =head1 BUGS
297              
298             Should be documented better. C and C should
299             also have options (the author nowadays only uses C ;)
300              
301             =head1 AUTHOR
302              
303             Copyright (C) Tuomas J. Lukka 1997.
304             All rights reserved. There is no warranty. You are allowed
305             to redistribute this software / documentation under certain
306             conditions. For details, see the file COPYING in the PDL
307             distribution. If this file is separated from the PDL distribution,
308             the copyright notice should be included in the file.
309              
310              
311             =cut
312              
313             package PDL::IO::FastRaw;
314              
315             ## use version; our $VERSION = qv('0.0.3');
316             our $VERSION = '0.000003';
317             $VERSION = eval $VERSION;
318              
319             BEGIN {
320 1     1   1038 our $have_file_map = 0;
321              
322 1     1   75 eval "use File::Map 0.57 qw(:all)";
  1         8  
  1         37  
  1         11  
323 1 50       242 $have_file_map = 1 unless $@;
324             }
325              
326             require Exporter;
327 1     1   7 use PDL::Core '';
  1         3  
  1         5  
328 1     1   7 use PDL::Exporter;
  1         2  
  1         5  
329 1     1   471 use FileHandle;
  1         930  
  1         5  
330              
331             @PDL::IO::FastRaw::ISA = qw/PDL::Exporter/;
332              
333             @EXPORT_OK = qw/writefraw readfraw mapfraw maptextfraw/;
334             %EXPORT_TAGS = (Func=>[@EXPORT_OK]);
335              
336             # Exported functions
337              
338             *writefraw = \&PDL::writefraw;
339 4     4 1 1004 sub readfraw {PDL->readfraw(@_)}
340 3     3 1 47 sub mapfraw {PDL->mapfraw(@_)}
341 0     0 1 0 sub maptextfraw {PDL->maptextfraw(@_)}
342              
343             sub _read_frawhdr {
344 6     6   13 my($name,$opts) = @_;
345 6   66     32 my $hname = $opts->{Header} || "$name.hdr";
346 6 50       29 my $h = new FileHandle "$hname"
347             or barf "Couldn't open '$hname' for reading";
348 6         608 chomp(my $tid = <$h>);
349 6         20 chomp(my $ndims = <$h>);
350 6 50       10 chomp(my $str = <$h>); if(!defined $str) {barf("Format error in '$hname'");}
  6         17  
  0         0  
351 6         23 my @dims = split ' ',$str;
352 6 50       31 if($#dims != $ndims-1) {
353 0         0 barf("Format error reading fraw header file '$hname'");
354             }
355             return {
356 6         101 Type => $tid,
357             Dims => \@dims,
358             NDims => $ndims
359             };
360             }
361              
362             sub _writefrawhdr {
363 3     3   10 my($pdl,$name,$opts) = @_;
364 3   66     20 my $hname = $opts->{Header} || "$name.hdr";
365 3 50       23 my $h = new FileHandle ">$hname"
366             or barf "Couldn't open '$hname' for writing";
367 3         363 print $h map {"$_\n"} ($pdl->get_datatype,
  9         167  
368             $pdl->getndims, (join ' ',$pdl->dims));
369             }
370              
371             sub PDL::writefraw {
372 2     2 0 19 my($pdl,$name,$opts) = @_;
373 2         6 _writefrawhdr($pdl,$name,$opts);
374 2 50       23 my $d = new FileHandle ">$name"
375             or barf "Couldn't open '$name' for writing";
376 2         198 binmode $d;
377 2         5 print $d ${$pdl->get_dataref};
  2         73  
378             }
379              
380             sub PDL::readfraw {
381 4     4 0 10 my $class = shift;
382 4         10 my($name,$opts) = @_;
383 4 50       23 my $d = new FileHandle "$name"
384             or barf "Couldn't open '$name' for reading";
385 4         368 binmode $d;
386 4         15 my $hdr = _read_frawhdr($name,$opts);
387 4         34 my $pdl = $class->zeroes ((new PDL::Type($hdr->{Type})), @{$hdr->{Dims}});
  4         21  
388 4         15 my $len = length ${$pdl->get_dataref};
  4         15  
389             # wrong.
390             # $d->sysread(${$pdl->get_dataref},$len) == $len
391             # or barf "Couldn't read enough data from '$name'";
392 4         8 my $index = 0;
393 4         7 my $data;
394             my $retlen;
395 4         21 while (($retlen = $d->sysread($data, $len)) != 0) {
396 4         75 substr(${$pdl->get_dataref},$index,$len) = $data;
  4         21  
397 4         9 $index += $retlen;
398 4         13 $len -= $retlen;
399             }
400 4         60 $pdl->upd_data();
401 4         65 return $pdl;
402             }
403              
404             sub PDL::mapfraw {
405 3     3 0 8 my $class = shift;
406 3         8 my($name,$opts) = @_;
407 3         5 my $hdr;
408 3 100       10 if($opts->{Dims}) {
409 1         3 my $datatype = $opts->{Datatype};
410 1 50       5 if(!defined $datatype) {$datatype = $PDL_D;}
  0         0  
411 1         3 $hdr->{Type} = $datatype;
412 1         2 $hdr->{Dims} = $opts->{Dims};
413 1         3 $hdr->{NDims} = scalar(@{$opts->{Dims}});
  1         3  
414             } else {
415 2         6 $hdr = _read_frawhdr($name,$opts);
416             }
417 3         16 $s = PDL::Core::howbig($hdr->{Type});
418 3         5 for(@{$hdr->{Dims}}) {
  3         11  
419 6         12 $s *= $_;
420             }
421 3         15 my $pdl = $class->zeroes(new PDL::Type($hdr->{Type}));
422 3         19 $pdl->setdims($hdr->{Dims});
423              
424 3 50 33     18 if ($have_file_map and not defined($PDL::force_use_mmap_code) ) {
425             $pdl->set_data_by_file_map(
426             $name,
427             $s,
428             1,
429             ($opts->{ReadOnly}?0:1),
430             ($opts->{Creat}?1:0),
431             (0644),
432 3 50 66     28 ($opts->{Creat} || $opts->{Trunc} ? 1:0)
    100          
    100          
433             );
434             } else {
435 0         0 warn "mapfraw: direct mmap support will be deprecated, please install File::Map\n";
436             $pdl->set_data_by_mmap(
437             $name,
438             $s,
439             1,
440             ($opts->{ReadOnly}?0:1),
441             ($opts->{Creat}?1:0),
442             (0644),
443 0 0 0     0 ($opts->{Creat} || $opts->{Trunc} ? 1:0)
    0          
    0          
444             );
445             }
446              
447 3 100       13 if($opts->{Creat}) {
448 1         5 _writefrawhdr($pdl,$name,$opts);
449             }
450 3         20 return $pdl;
451             }
452              
453             sub PDL::maptextfraw {
454 0     0 0   my($class, $name, $opts) = @_;
455 0           $opts = {%$opts}; # Copy just in case
456 0           my @s = stat $name;
457 0           $opts->{Dims} = [$s[7]];
458 0           $opts->{Datatype} = &PDL::byte;
459 0           return PDL::mapfraw($class, $name, $opts);
460             }
461              
462             1;