File Coverage

lib/BoutrosLab/TSVStream/IO/Role/Reader/Fixed.pm
Criterion Covered Total %
statement 124 129 96.1
branch 40 60 66.6
condition 11 15 73.3
subroutine 24 25 96.0
pod 1 7 14.2
total 200 236 84.7


line stmt bran cond sub pod time code
1             package BoutrosLab::TSVStream::IO::Role::Reader::Fixed;
2              
3             # safe Perl
4 8     8   517 use warnings;
  8         12  
  8         380  
5 8     8   33 use strict;
  8         9  
  8         132  
6 8     8   21 use Carp;
  8         8  
  8         407  
7              
8             =head1 NAME
9              
10             BoutrosLab::TSVStream::IO::Role::Reader::Fixed
11              
12             =cut
13              
14 8     8   27 use Moose::Role;
  8         10  
  8         49  
15 8     8   27947 use Moose::Util::TypeConstraints;
  8         13  
  8         59  
16 8     8   10492 use namespace::autoclean;
  8         12  
  8         53  
17 8     8   4555 use List::MoreUtils qw(all);
  8         58405  
  8         39  
18              
19             enum 'ReadHeaderType', [qw(auto none check)];
20              
21             has header => (
22             is => 'ro',
23             lazy => 1,
24             isa => 'ReadHeaderType',
25             default => 'auto'
26             );
27              
28             has extra_class_params => (
29             is => 'ro',
30             isa => 'ArrayRef[Str]',
31             default => sub { [] }
32             );
33              
34             has pre_header_pattern => (
35             is => 'ro',
36             isa => 'Maybe[RegexpRef]',
37             default => undef
38             );
39              
40             has _is_pre_header => (
41             is => 'ro',
42             isa => 'CodeRef',
43             lazy => 1,
44             builder => '_init_is_pre_header'
45             );
46              
47             sub _init_is_pre_header {
48 13     13   14 my $self = shift;
49 13 50       391 if (my $pat = $self->pre_header_pattern) {
50 38     38   116 sub { $_[0] =~ /$pat/ }
51 13         396 }
52             else {
53 0         0 $self->_is_comment
54             }
55             }
56              
57             has pre_headers => (
58             is => 'ro',
59             isa => 'ArrayRef[Str]',
60             init_arg => undef,
61             default => sub { [] }
62             );
63              
64             has _comments => (
65             is => 'ro',
66             isa => 'ArrayRef[Str]',
67             init_arg => undef,
68             writer => '_set_comments',
69             default => sub { [] }
70             );
71              
72             around BUILDARGS => sub {
73             my $orig = shift;
74             my $class = shift;
75             my $arg = ref($_[0]) ? $_[0] : { @_ };
76              
77             my %valid_arg = (
78             file => 1,
79             handle => 1,
80             header => 1,
81             class => 1,
82             comment => 1,
83             pre_comment => 1,
84             pre_header => 1,
85             header_fix => 1,
86             extra_class_params => 1,
87              
88             pre_header_pattern => 1,
89             comment_pattern => 1
90             );
91             $arg->{_valid_arg} = \%valid_arg;
92             $arg->{_open_mode} = '<';
93             $class->$orig( $arg );
94             };
95              
96             sub _read_no_header {
97 32     32   36 my $self = shift;
98 32         995 my $none = $self->header eq 'none';
99 32         80 ( $none, $none );
100             }
101              
102             sub _fill_dyn_fields {
103 32     32   31 return;
104             }
105              
106             sub _header {
107 95     95   107 my $self = shift;
108 95         75 my $stream_fields = shift;
109 95         2710 my $class_fields = $self->fields;
110             return $#$class_fields <= $#$stream_fields
111 95   100 204   810 && all { uc( $stream_fields->[$_] ) eq uc( $class_fields->[$_] ) } 0 .. $#$class_fields;
  204         434  
112             }
113              
114             sub BUILD {
115 119     119 0 120 my $self = shift;
116              
117 119         336 my ( $none, $ret ) = $self->_read_no_header;
118 119 50       212 return if $ret;
119              
120 119         95 my @pre;
121 119         128 my $stream_fields = [];
122 119         125 my $is_head = undef;
123 119 50       251 print "Starting pre-header checks\n" if $ENV{HEADER_PROCESS};
124 119 100       292 if (!$self->_peek) {
125 24         65 $self->_fill_dyn_fields( $none, 0, $stream_fields );
126             }
127             else {
128 95         233 while (my $line = $self->_read) {
129 121         122 my $is_pre;
130 121         138 my $lline = $line->{line};
131              
132             sub check1 {
133 0     0 0 0 my( $self, $test, $bool, $check, $line ) = @_;
134 0 0       0 if ($self->$bool) {
135 0 0       0 print " ",uc($test),($self->$check->($line) ? ":YES" : ":no "), "\n";
136             }
137             else {
138 0         0 print " ",lc($test), "\n";
139             }
140             }
141 121 50       209 print "Checking line: $lline\n" if $ENV{HEADER_PROCESS};
142 121 50       195 check1( $self, 'PH', pre_header => _is_pre_header => $lline ) if $ENV{HEADER_PROCESS};
143 121 50       196 check1( $self, 'PC', pre_comment => _is_comment => $lline ) if $ENV{HEADER_PROCESS};
144 121 50       202 check1( $self, 'CO', comment => _is_comment => $lline ) if $ENV{HEADER_PROCESS};
145              
146 121 100       3387 if ($self->pre_header) {
147 38         1065 $is_pre = $self->_is_pre_header->($lline);
148 38 50 0     1009 $is_pre ||= $self->_is_comment->($lline) if $self->pre_comment;
149             }
150             else {
151 83 100       2299 $is_pre = $self->_is_comment->($lline) if $self->comment;
152             }
153 121 100       212 if ($is_pre) {
154 26 50       48 print " -> pre\n" if $ENV{HEADER_PROCESS};
155 26         31 push @pre, $line;
156 26         62 next;
157             }
158 95         2657 $stream_fields = $self->header_fix->($line)->{fields};
159             # $stream_fields = $line->{fields};
160 95         213 $is_head = $self->_header($stream_fields);
161 95 50       320 print " -> NOT pre, none: $none, is_head: $is_head, header_proc: ",$self->header,"\n" if $ENV{HEADER_PROCESS};
162 95         258 $self->_fill_dyn_fields( $none, $is_head, $stream_fields );
163 95 100 100     613 if ($none or !$is_head && $self->header eq 'auto') {
      66        
164 24 50       45 print " *** put back\n" if $ENV{HEADER_PROCESS};
165 24         62 $self->_unread( @pre, $line );
166 24         635 return;
167             }
168 71         93 last;
169             }
170              
171 71 50       168 print " *** kept\n" if $ENV{HEADER_PROCESS};
172 71         2148 my $die = $self->_num_fields != scalar(@$stream_fields);
173              
174 71 100 100     271 if ($die || !$is_head) {
175 9         16 my $error = '';
176 9 100       39 $error = 'Headers do not match' if !$is_head;
177 9 100       29 $error .= ' and wrong number of fields' if $die;
178 9         29 $error =~ s/^ and w/W/;
179 9         33 $self->_croak( $error, $stream_fields );
180             }
181 62         67 push @{ $self->pre_headers }, ( map { $_->{line} } @pre );
  62         1878  
  18         305  
182             }
183             }
184              
185             sub read_comments {
186 6     6 0 3718 my $self = shift;
187 6         220 my $comments = $self->_comments;
188 6         203 $self->_set_comments( [] );
189 6         25 return $comments;
190             }
191              
192             sub _load_comments {
193 239     239   243 my $self = shift;
194 239 100       8035 return unless $self->comment;
195 6         169 my $comments = $self->_comments;
196 6         16 while (my $line = $self->_read) {
197 10 100       274 if (! $self->_is_comment->( $line->{line} )) {
198 4         10 $self->_unread($line);
199 4         6 return;
200             }
201 6         25 push @$comments, $line->{line};
202             }
203             }
204              
205             sub read {
206 239     239 1 87367 my $self = shift;
207 239         421 $self->_load_comments;
208 239 100       571 return unless my $values = $self->_read;
209 133         181 my $line = $values->{line};
210 133         140 $values = $values->{fields};
211 133         217 my $error;
212             my $obj;
213 133 50       3825 $error = 'Wrong number of fields' if scalar(@$values) != $self->_num_fields;
214              
215 133 50       204 unless ($error) {
216 133         142 eval {
217             $obj = $self->class->new(
218             field_values => $values,
219 133         3656 @{ $self->extra_class_params },
  133         3805  
220             $self->_read_config
221             );
222             };
223 133 50       783 $error = $@ if $@;
224             }
225              
226 133 50       227 $self->_croak( $error, $values ) if $error;
227              
228 133         350 return $obj;
229             }
230              
231             sub filter {
232 2     2 0 4 my ( $self, $filtersub ) = @_;
233 2         28 return BoutrosLab::TSVStream::IO::Role::Reader::Filter->new(
234             reader => $self,
235             filtersub => $filtersub
236             );
237             }
238              
239             package BoutrosLab::TSVStream::IO::Role::Reader::Filter;
240              
241             # safe Perl
242 8     8   11664 use warnings;
  8         19  
  8         214  
243 8     8   29 use strict;
  8         10  
  8         127  
244 8     8   33 use Carp;
  8         11  
  8         488  
245              
246 8     8   33 use Moose;
  8         10  
  8         56  
247              
248             has reader => (
249             is => 'ro',
250             isa => 'Object',
251             required => 1
252             );
253              
254             has filtersub => (
255             is => 'ro',
256             isa => 'CodeRef',
257             required => 1
258             );
259              
260             sub read {
261 7     7 0 2377 my $self = shift;
262 7         221 while (my $record = $self->reader->read) {
263 7 100       196 return $record if $self->filtersub->($record);
264             }
265 3         27 return;
266             }
267              
268             sub filter {
269 1     1 0 904 my ( $self, $filtersub ) = @_;
270 1         7 return BoutrosLab::TSVStream::IO::Role::Reader::Filter->new(
271             reader => $self,
272             filtersub => $filtersub
273             );
274             }
275              
276             =head1 SYNOPSIS
277              
278             $class->reader( ... );
279              
280             # ($class will use the role BoutrosLab::TSVStream which will provide
281             # the reader method, that method will return a Reader object with:
282             # ...
283             # return BoutrosLab::TSVStream::IO::Role::Reader::Fixed->new(
284             # handle => $fd, # (required)
285             # class => $class, # (required) class
286             # file => $file, # (optional) used (as filename) in error messages
287             # header => $str, # (optional) one of: check none (default 'check')
288             # );
289              
290             while (my $record = $reader->read) {
291             # ... $record is a $class object
292             # ... use $record->field1, $record->field2, etc. - all of the methods of $class object
293             }
294              
295             =head1 DESCRIPTION
296              
297             This object provides an iterator to read through the lines
298             of a data stream (C<$fd>), converting each from a line with
299             tab separated fields into an object of a class (C<$classs>)
300             that has attributes for those fields.
301              
302             Usually, the data stream will start with a line that has the
303             fieldnames in a tab separated list, and the rest of the stream
304             has lines that contain the field values in a tab separated list.
305              
306             Any error diagnostics will refer to the stream using the
307             C<$file> filename if it is provided.
308              
309             The C<$class> class will have a class attribute named
310             C<_fields>. Usually, this will be a read-only method that
311             returns a list of fieldnames that will be used to validate
312             the first line in the data stream (which should contain the
313             field names as the column vlues).
314              
315             A class C<$class> object will be created for each line.
316             The object will be initialized with a list of names and values
317             matching the fields and the contents .of the line.
318              
319             If C<header> is provided, it can be 'check', or 'none'.
320             This controls what is done to the handle initially.
321              
322             If 'check' is specified, the first line of the stream is read
323             and it is checked to ensure that it matches the C<fields> both
324             in name and order. The fields list must be complete. However,
325             it is permitted for the field names to mismatch by having
326             different capitalization - the comparison is not case sensitive.
327              
328             If 'none' is specified, the stream is not checked for a header
329             line. (You would use this option either if the file does not
330             have a header line, or if you are scanning from the middle of
331             a file handle that is no longer at the start of the file.)
332              
333             =cut
334              
335             =head1 ATTRIBUTES
336              
337             =head2 handle - the filehandle to be read
338              
339             =head2 file - the name of the stream, usually a filename, for diagnostic purposes
340              
341             =head2 class - the class that records transformed into
342              
343             =head2 fields - list of field names, usually provided by class
344              
345             handle, file, class and fields are provided by the ...::Base role
346              
347             =head2 header - 'auto', 'check', or 'none' (default 'auto')
348              
349             The C<'check'> setting causes the first line of the stream to
350             be read and validated against the C<fields> list. The field
351             names are accepted if they match (but differences in upper/lower
352             case are ignored). If they do not match, an exception is thrown.
353              
354             If the C<'none'> setting is provided, the stream should already be
355             positioned at a data value (i.e. the stream was previously opened and
356             is no longer positioned at the start, or else the stream was originally
357             created without a leading header line).
358              
359             The default C<'auto'> setting causes the first line to be read and
360             validated as for the C<'check'> setting, but if the line does not
361             match the list of fields it is assumed to instead be the first data
362             line of a stream that has no headers, and processing continues as
363             if the C<'none'> setting were specified instead.
364              
365             =head1 BUILDARGS
366              
367             The BUILDARGS opens a handle if only a file name was provided.
368              
369             =head1 BUILD
370              
371             The BUILD method handles any requirements for reading and processing a
372             header line.
373              
374             =head1 METHODS
375              
376             =head2 read - read a line from the stream end turn it into a class element
377              
378             =cut
379              
380             =head1 AUTHOR
381              
382             John Macdonald - Boutros Lab
383              
384             =head1 ACKNOWLEDGEMENTS
385              
386             Paul Boutros, Phd, PI - Boutros Lab
387              
388             The Ontario Institute for Cancer Research
389              
390             =cut
391              
392             1;