File Coverage

lib/BoutrosLab/TSVStream/IO/Role/Reader/Fixed.pm
Criterion Covered Total %
statement 124 129 96.1
branch 40 60 66.6
condition 11 15 73.3
subroutine 24 25 96.0
pod 1 7 14.2
total 200 236 84.7


line stmt bran cond sub pod time code
1             package BoutrosLab::TSVStream::IO::Role::Reader::Fixed;
2              
3             # safe Perl
4 8     8   478 use warnings;
  8         8  
  8         405  
5 8     8   32 use strict;
  8         10  
  8         147  
6 8     8   25 use Carp;
  8         8  
  8         487  
7              
8             =head1 NAME
9              
10             BoutrosLab::TSVStream::IO::Role::Reader::Fixed
11              
12             =cut
13              
14 8     8   36 use Moose::Role;
  8         9  
  8         45  
15 8     8   27561 use Moose::Util::TypeConstraints;
  8         8  
  8         57  
16 8     8   10832 use namespace::autoclean;
  8         12  
  8         45  
17 8     8   514 use List::MoreUtils qw(all);
  8         12  
  8         79  
18              
19             enum 'ReadHeaderType', [qw(auto none check)];
20              
21             has header => (
22             is => 'ro',
23             lazy => 1,
24             isa => 'ReadHeaderType',
25             default => 'auto'
26             );
27              
28             has extra_class_params => (
29             is => 'ro',
30             isa => 'ArrayRef[Str]',
31             default => sub { [] }
32             );
33              
34             has pre_header_pattern => (
35             is => 'ro',
36             isa => 'Maybe[RegexpRef]',
37             default => undef
38             );
39              
40             has _is_pre_header => (
41             is => 'ro',
42             isa => 'CodeRef',
43             lazy => 1,
44             builder => '_init_is_pre_header'
45             );
46              
47             sub _init_is_pre_header {
48 13     13   15 my $self = shift;
49 13 50       398 if (my $pat = $self->pre_header_pattern) {
50 38     38   106 sub { $_[0] =~ /$pat/ }
51 13         397 }
52             else {
53 0         0 $self->_is_comment
54             }
55             }
56              
57             has pre_headers => (
58             is => 'ro',
59             isa => 'ArrayRef[Str]',
60             init_arg => undef,
61             default => sub { [] }
62             );
63              
64             has _comments => (
65             is => 'ro',
66             isa => 'ArrayRef[Str]',
67             init_arg => undef,
68             writer => '_set_comments',
69             default => sub { [] }
70             );
71              
72             around BUILDARGS => sub {
73             my $orig = shift;
74             my $class = shift;
75             my $arg = ref($_[0]) ? $_[0] : { @_ };
76              
77             my %valid_arg = (
78             file => 1,
79             handle => 1,
80             header => 1,
81             class => 1,
82             comment => 1,
83             pre_comment => 1,
84             pre_header => 1,
85             header_fix => 1,
86             extra_class_params => 1,
87              
88             pre_header_pattern => 1,
89             comment_pattern => 1
90             );
91             $arg->{_valid_arg} = \%valid_arg;
92             $arg->{_open_mode} = '<';
93             $class->$orig( $arg );
94             };
95              
96             sub _read_no_header {
97 32     32   29 my $self = shift;
98 32         967 my $none = $self->header eq 'none';
99 32         70 ( $none, $none );
100             }
101              
102             sub _fill_dyn_fields {
103 32     32   33 return;
104             }
105              
106             sub _header {
107 95     95   93 my $self = shift;
108 95         84 my $stream_fields = shift;
109 95         2724 my $class_fields = $self->fields;
110             return $#$class_fields <= $#$stream_fields
111 95   100 204   775 && all { uc( $stream_fields->[$_] ) eq uc( $class_fields->[$_] ) } 0 .. $#$class_fields;
  204         403  
112             }
113              
114             sub BUILD {
115 119     119 0 126 my $self = shift;
116              
117 119         327 my ( $none, $ret ) = $self->_read_no_header;
118 119 50       231 return if $ret;
119              
120 119         98 my @pre;
121 119         127 my $stream_fields = [];
122 119         104 my $is_head = undef;
123 119 50       272 print "Starting pre-header checks\n" if $ENV{HEADER_PROCESS};
124 119 100       315 if (!$self->_peek) {
125 24         70 $self->_fill_dyn_fields( $none, 0, $stream_fields );
126             }
127             else {
128 95         239 while (my $line = $self->_read) {
129 121         96 my $is_pre;
130 121         138 my $lline = $line->{line};
131              
132             sub check1 {
133 0     0 0 0 my( $self, $test, $bool, $check, $line ) = @_;
134 0 0       0 if ($self->$bool) {
135 0 0       0 print " ",uc($test),($self->$check->($line) ? ":YES" : ":no "), "\n";
136             }
137             else {
138 0         0 print " ",lc($test), "\n";
139             }
140             }
141 121 50       206 print "Checking line: $lline\n" if $ENV{HEADER_PROCESS};
142 121 50       208 check1( $self, 'PH', pre_header => _is_pre_header => $lline ) if $ENV{HEADER_PROCESS};
143 121 50       189 check1( $self, 'PC', pre_comment => _is_comment => $lline ) if $ENV{HEADER_PROCESS};
144 121 50       173 check1( $self, 'CO', comment => _is_comment => $lline ) if $ENV{HEADER_PROCESS};
145              
146 121 100       3492 if ($self->pre_header) {
147 38         1085 $is_pre = $self->_is_pre_header->($lline);
148 38 50 0     1052 $is_pre ||= $self->_is_comment->($lline) if $self->pre_comment;
149             }
150             else {
151 83 100       2309 $is_pre = $self->_is_comment->($lline) if $self->comment;
152             }
153 121 100       213 if ($is_pre) {
154 26 50       57 print " -> pre\n" if $ENV{HEADER_PROCESS};
155 26         29 push @pre, $line;
156 26         64 next;
157             }
158 95         2720 $stream_fields = $self->header_fix->($line)->{fields};
159             # $stream_fields = $line->{fields};
160 95         225 $is_head = $self->_header($stream_fields);
161 95 50       335 print " -> NOT pre, none: $none, is_head: $is_head, header_proc: ",$self->header,"\n" if $ENV{HEADER_PROCESS};
162 95         265 $self->_fill_dyn_fields( $none, $is_head, $stream_fields );
163 95 100 100     636 if ($none or !$is_head && $self->header eq 'auto') {
      66        
164 24 50       57 print " *** put back\n" if $ENV{HEADER_PROCESS};
165 24         70 $self->_unread( @pre, $line );
166 24         667 return;
167             }
168 71         115 last;
169             }
170              
171 71 50       153 print " *** kept\n" if $ENV{HEADER_PROCESS};
172 71         2157 my $die = $self->_num_fields != scalar(@$stream_fields);
173              
174 71 100 100     280 if ($die || !$is_head) {
175 9         17 my $error = '';
176 9 100       23 $error = 'Headers do not match' if !$is_head;
177 9 100       27 $error .= ' and wrong number of fields' if $die;
178 9         23 $error =~ s/^ and w/W/;
179 9         35 $self->_croak( $error, $stream_fields );
180             }
181 62         57 push @{ $self->pre_headers }, ( map { $_->{line} } @pre );
  62         1881  
  18         335  
182             }
183             }
184              
185             sub read_comments {
186 6     6 0 2921 my $self = shift;
187 6         201 my $comments = $self->_comments;
188 6         212 $self->_set_comments( [] );
189 6         24 return $comments;
190             }
191              
192             sub _load_comments {
193 239     239   204 my $self = shift;
194 239 100       7895 return unless $self->comment;
195 6         168 my $comments = $self->_comments;
196 6         15 while (my $line = $self->_read) {
197 10 100       274 if (! $self->_is_comment->( $line->{line} )) {
198 4         11 $self->_unread($line);
199 4         6 return;
200             }
201 6         25 push @$comments, $line->{line};
202             }
203             }
204              
205             sub read {
206 239     239 1 70188 my $self = shift;
207 239         418 $self->_load_comments;
208 239 100       643 return unless my $values = $self->_read;
209 133         184 my $line = $values->{line};
210 133         157 $values = $values->{fields};
211 133         208 my $error;
212             my $obj;
213 133 50       3775 $error = 'Wrong number of fields' if scalar(@$values) != $self->_num_fields;
214              
215 133 50       210 unless ($error) {
216 133         131 eval {
217             $obj = $self->class->new(
218             field_values => $values,
219 133         3565 @{ $self->extra_class_params },
  133         3957  
220             $self->_read_config
221             );
222             };
223 133 50       829 $error = $@ if $@;
224             }
225              
226 133 50       205 $self->_croak( $error, $values ) if $error;
227              
228 133         370 return $obj;
229             }
230              
231             sub filter {
232 2     2 0 4 my ( $self, $filtersub ) = @_;
233 2         27 return BoutrosLab::TSVStream::IO::Role::Reader::Filter->new(
234             reader => $self,
235             filtersub => $filtersub
236             );
237             }
238              
239             package BoutrosLab::TSVStream::IO::Role::Reader::Filter;
240              
241             # safe Perl
242 8     8   12286 use warnings;
  8         10  
  8         205  
243 8     8   26 use strict;
  8         8  
  8         121  
244 8     8   23 use Carp;
  8         12  
  8         424  
245              
246 8     8   32 use Moose;
  8         12  
  8         49  
247              
248             has reader => (
249             is => 'ro',
250             isa => 'Object',
251             required => 1
252             );
253              
254             has filtersub => (
255             is => 'ro',
256             isa => 'CodeRef',
257             required => 1
258             );
259              
260             sub read {
261 7     7 0 2368 my $self = shift;
262 7         232 while (my $record = $self->reader->read) {
263 7 100       250 return $record if $self->filtersub->($record);
264             }
265 3         29 return;
266             }
267              
268             sub filter {
269 1     1 0 857 my ( $self, $filtersub ) = @_;
270 1         7 return BoutrosLab::TSVStream::IO::Role::Reader::Filter->new(
271             reader => $self,
272             filtersub => $filtersub
273             );
274             }
275              
276             =head1 SYNOPSIS
277              
278             $class->reader( ... );
279              
280             # ($class will use the role BoutrosLab::TSVStream which will provide
281             # the reader method, that method will return a Reader object with:
282             # ...
283             # return BoutrosLab::TSVStream::IO::Role::Reader::Fixed->new(
284             # handle => $fd, # (required)
285             # class => $class, # (required) class
286             # file => $file, # (optional) used (as filename) in error messages
287             # header => $str, # (optional) one of: check none (default 'check')
288             # );
289              
290             while (my $record = $reader->read) {
291             # ... $record is a $class object
292             # ... use $record->field1, $record->field2, etc. - all of the methods of $class object
293             }
294              
295             =head1 DESCRIPTION
296              
297             This object provides an iterator to read through the lines
298             of a data stream (C<$fd>), converting each from a line with
299             tab separated fields into an object of a class (C<$classs>)
300             that has attributes for those fields.
301              
302             Usually, the data stream will start with a line that has the
303             fieldnames in a tab separated list, and the rest of the stream
304             has lines that contain the field values in a tab separated list.
305              
306             Any error diagnostics will refer to the stream using the
307             C<$file> filename if it is provided.
308              
309             The C<$class> class will have a class attribute named
310             C<_fields>. Usually, this will be a read-only method that
311             returns a list of fieldnames that will be used to validate
312             the first line in the data stream (which should contain the
313             field names as the column vlues).
314              
315             A class C<$class> object will be created for each line.
316             The object will be initialized with a list of names and values
317             matching the fields and the contents .of the line.
318              
319             If C<header> is provided, it can be 'check', or 'none'.
320             This controls what is done to the handle initially.
321              
322             If 'check' is specified, the first line of the stream is read
323             and it is checked to ensure that it matches the C<fields> both
324             in name and order. The fields list must be complete. However,
325             it is permitted for the field names to mismatch by having
326             different capitalization - the comparison is not case sensitive.
327              
328             If 'none' is specified, the stream is not checked for a header
329             line. (You would use this option either if the file does not
330             have a header line, or if you are scanning from the middle of
331             a file handle that is no longer at the start of the file.)
332              
333             =cut
334              
335             =head1 ATTRIBUTES
336              
337             =head2 handle - the filehandle to be read
338              
339             =head2 file - the name of the stream, usually a filename, for diagnostic purposes
340              
341             =head2 class - the class that records transformed into
342              
343             =head2 fields - list of field names, usually provided by class
344              
345             handle, file, class and fields are provided by the ...::Base role
346              
347             =head2 header - 'auto', 'check', or 'none' (default 'auto')
348              
349             The C<'check'> setting causes the first line of the stream to
350             be read and validated against the C<fields> list. The field
351             names are accepted if they match (but differences in upper/lower
352             case are ignored). If they do not match, an exception is thrown.
353              
354             If the C<'none'> setting is provided, the stream should already be
355             positioned at a data value (i.e. the stream was previously opened and
356             is no longer positioned at the start, or else the stream was originally
357             created without a leading header line).
358              
359             The default C<'auto'> setting causes the first line to be read and
360             validated as for the C<'check'> setting, but if the line does not
361             match the list of fields it is assumed to instead be the first data
362             line of a stream that has no headers, and processing continues as
363             if the C<'none'> setting were specified instead.
364              
365             =head1 BUILDARGS
366              
367             The BUILDARGS opens a handle if only a file name was provided.
368              
369             =head1 BUILD
370              
371             The BUILD method handles any requirements for reading and processing a
372             header line.
373              
374             =head1 METHODS
375              
376             =head2 read - read a line from the stream end turn it into a class element
377              
378             =cut
379              
380             =head1 AUTHOR
381              
382             John Macdonald - Boutros Lab
383              
384             =head1 ACKNOWLEDGEMENTS
385              
386             Paul Boutros, Phd, PI - Boutros Lab
387              
388             The Ontario Institute for Cancer Research
389              
390             =cut
391              
392             1;