File Coverage

lib/Parse/File/Taxonomy/Index.pm
Criterion Covered Total %
statement 18 212 8.4
branch 0 84 0.0
condition 0 36 0.0
subroutine 6 18 33.3
pod 5 8 62.5
total 29 358 8.1


line stmt bran cond sub pod time code
1             package Parse::File::Taxonomy::Index;
2 3     3   7402 use strict;
  3         6  
  3         115  
3 3     3   1454 use parent qw( Parse::File::Taxonomy );
  3         907  
  3         15  
4 3     3   146 use Carp;
  3         8  
  3         171  
5 3     3   12 use Text::CSV;
  3         5  
  3         15  
6 3     3   63 use Scalar::Util qw( reftype );
  3         44  
  3         176  
7             our $VERSION = '0.03';
8 3         7158 use Parse::File::Taxonomy::Auxiliary qw(
9             path_check_fields
10             components_check_fields
11 3     3   1228 );
  3         5  
12             #use Data::Dump;
13              
14             =head1 NAME
15              
16             Parse::File::Taxonomy::Index - Extract a taxonomy from a hierarchy inside a CSV file
17              
18             =head1 SYNOPSIS
19              
20             use Parse::File::Taxonomy::Index;
21              
22             $source = "./t/data/alpha.csv";
23             $obj = Parse::File::Taxonomy::Index->new( {
24             file => $source,
25             } );
26              
27             =cut
28              
29             =head1 METHODS
30              
31             =head2 C
32              
33             =over 4
34              
35             =item * Purpose
36              
37             Parse::File::Taxonomy::Index constructor.
38              
39             =item * Arguments
40              
41             Single hash reference. There are two possible interfaces: C and C.
42              
43             =over 4
44              
45             =item 1 C interface
46              
47             $source = "./t/data/delta.csv";
48             $obj = Parse::File::Taxonomy::Index->new( {
49             file => $source,
50             } );
51              
52             Elements in the hash reference are keyed on:
53              
54             =over 4
55              
56             =item * C
57              
58             Absolute or relative path to the incoming taxonomy file.
59             B for this interface.
60              
61             =item * C
62              
63             The name of the column in the header row under which each data record's unique
64             ID can be found. Defaults to C.
65              
66             =item * C
67              
68             The name of the column in the header row under which each data record's parent
69             ID can be found. (Will be empty in the case of top-level nodes, as they have
70             no parent.) Defaults to C.
71              
72             =item * C
73              
74             The name of the column in the header row under which, in each data record, there
75             is a found a string which differentiates that record from all other records
76             with the same parent ID. Defaults to C.
77              
78             =item * Text::CSV options
79              
80             Any other options which could normally be passed to Cnew()> will
81             be passed through to that module's constructor. On the recommendation of the
82             Text::CSV documentation, C is always set to a true value.
83              
84             =back
85              
86             =item 2 C interface
87              
88             $obj = Parse::File::Taxonomy::Index->new( {
89             components => {
90             fields => $fields,
91             data_records => $data_records,
92             }
93             } );
94              
95             Elements in this hash are keyed on:
96              
97             =over 4
98              
99             =item * C
100              
101             This element is B for the C interface. The value of this
102             element is a hash reference with two keys, C and C.
103             C is a reference to an array holding the field or column names for the
104             data set. C is a reference to an array of array references,
105             each of the latter arrayrefs holding one record or row from the data set.
106              
107             =back
108              
109             =back
110              
111             =item * Return Value
112              
113             Parse::File::Taxonomy::Index object.
114              
115             =item * Exceptions
116              
117             C will throw an exception under any of the following conditions:
118              
119             =over 4
120              
121             =item * Argument to C is not a reference.
122              
123             =item * Argument to C is not a hash reference.
124              
125             =item * Argument to C must have either 'file' or 'components' element but not both.
126              
127             =item * Lack columns in header row to match requirements.
128              
129             =item * Non-numeric entry in C or C column.
130              
131             =item * Duplicate entries in C column.
132              
133             =item * Number of fields in a data record does not match number in header row.
134              
135             =item * Empty string in a C column of a record.
136              
137             =item * Unable to locate a record whose C is the C of a different record.
138              
139             =item * No records with same C may share value of C column.
140              
141             =item * C interface
142              
143             =over 4
144              
145             =item * In the C interface, unable to locate the file which is the value of the C element.
146              
147             =item * The same field is found more than once in the header row of the
148             incoming taxonomy file.
149              
150             =item * Unable to open or close the incoming taxonomy file for reading.
151              
152             =back
153              
154             =item * C interface
155              
156             =over 4
157              
158             =item * In the C interface, C element must be a hash reference with C and C elements.
159              
160             =item * C element must be array reference.
161              
162             =item * C element must be reference to array of array references.
163              
164             =item * No duplicate fields in C element's array reference.
165              
166             =back
167              
168             =back
169              
170             =back
171              
172             =cut
173              
174             sub new {
175 0     0 1   my ($class, $args) = @_;
176 0           my $data;
177              
178 0 0 0       croak "Argument to 'new()' must be hashref"
179             unless (ref($args) and reftype($args) eq 'HASH');
180 0 0 0       croak "Argument to 'new()' must have either 'file' or 'components' element"
181             unless ($args->{file} or $args->{components});
182 0 0 0       croak "Argument to 'new()' must have either 'file' or 'components' element but not both"
183             if ($args->{file} and $args->{components});
184              
185 0 0         $data->{id_col} = $args->{id_col}
186             ? delete $args->{id_col}
187             : 'id';
188 0 0         $data->{parent_id_col} = $args->{parent_id_col}
189             ? delete $args->{parent_id_col}
190             : 'parent_id';
191 0 0         $data->{component_col} = $args->{component_col}
192             ? delete $args->{component_col}
193             : 'name';
194              
195 0 0         if ($args->{components}) {
196 0 0 0       croak "Value of 'components' element must be hashref"
197             unless (ref($args->{components}) and reftype($args->{components}) eq 'HASH');
198 0           for my $k ( qw| fields data_records | ) {
199 0 0         croak "Value of 'components' element must have '$k' key-value pair"
200             unless exists $args->{components}->{$k};
201 0 0 0       croak "Value of '$k' element must be arrayref"
202             unless (ref($args->{components}->{$k}) and
203             reftype($args->{components}->{$k}) eq 'ARRAY');
204             }
205 0           for my $row (@{$args->{components}->{data_records}}) {
  0            
206 0 0 0       croak "Each element in 'data_records' array must be arrayref"
207             unless (ref($row) and reftype($row) eq 'ARRAY');
208             }
209 0           _prepare_fields($data, $args->{components}->{fields}, 1);
210 0           my $these_data_records = $args->{components}->{data_records};
211 0           delete $args->{components};
212 0           _prepare_data_records($data, $these_data_records, $args);
213             }
214             else {
215 0 0         croak "Cannot locate file '$args->{file}'"
216             unless (-f $args->{file});
217 0           $data->{file} = delete $args->{file};
218 0           $args->{binary} = 1;
219 0 0         my $csv = Text::CSV->new ( $args )
220             or croak "Cannot use CSV: ".Text::CSV->error_diag ();
221 0 0         open my $IN, "<", $data->{file}
222             or croak "Unable to open '$data->{file}' for reading";
223 0           my $header_ref = $csv->getline($IN);
224 0           _prepare_fields($data, $header_ref);
225              
226 0           my $data_records = $csv->getline_all($IN);
227 0 0         close $IN or croak "Unable to close after reading";
228 0           _prepare_data_records($data, $data_records, $args);
229             }
230              
231 0           while (my ($k,$v) = each %{$args}) {
  0            
232 0           $data->{$k} = $v;
233             }
234 0           return bless $data, $class;
235             }
236              
237             sub _prepare_fields {
238 0     0     my ($data, $fields_ref, $components) = @_;
239 0 0         if (! $components) {
240 0           path_check_fields($data, $fields_ref);
241 0           _check_required_columns($data, $fields_ref);
242             }
243             else { # 'components' interface
244 0           components_check_fields($data, $fields_ref);
245 0           _check_required_columns($data, $fields_ref);
246             }
247 0           $data->{fields} = $fields_ref;
248 0           return $data;
249             }
250              
251             sub _check_required_columns {
252 0     0     my ($data, $fields_ref) = @_;
253 0           my %col2idx = map { $fields_ref->[$_] => $_ } (0 .. $#{$fields_ref});
  0            
  0            
254 0           my %missing_columns = ();
255 0           my %main_columns = map { $_ => 1 } ( qw| id_col parent_id_col component_col | );
  0            
256 0           for my $c ( keys %main_columns ) {
257 0 0         if (! exists $col2idx{$data->{$c}}) {
258 0           $missing_columns{$c} = $data->{$c};
259             }
260             }
261 0           my $error_msg = "Could not locate columns in header to match required arguments:";
262 0           for my $c (sort keys %missing_columns) {
263 0           $error_msg .= "\n $c: $missing_columns{$c}";
264             }
265 0 0         croak $error_msg if scalar keys %missing_columns;
266 0           $data->{fields} = $fields_ref;
267 0           for my $c (keys %main_columns) {
268 0           $data->{$c.'_idx'} = $col2idx{$data->{$c}};
269             }
270 0           return $data;
271             }
272              
273             sub _prepare_data_records {
274 0     0     my ($data, $data_records, $args) = @_;
275             # Confirm no duplicate entries in 'id_col'. DONE
276             # Confirm all rows have same number of columns as header. DONE
277 0           my $error_msg = '';
278 0           my $field_count = scalar(@{$data->{fields}});
  0            
279 0           my @non_numeric_id_records = ();
280 0           my %ids_seen = ();
281 0           my @bad_count_records = ();
282 0           my @nameless_component_records = ();
283 0           for my $rec (@{$data_records}) {
  0            
284 0 0         if ($rec->[$data->{id_col_idx}] !~ m/^\d+$/) {
285 0           push @non_numeric_id_records, [ $rec->[$data->{id_col_idx}], '' ];
286             }
287 0 0 0       if (length($rec->[$data->{parent_id_col_idx}]) and
288             $rec->[$data->{parent_id_col_idx}] !~ m/^\d+$/
289             ) {
290 0           push @non_numeric_id_records, [
291             $rec->[$data->{id_col_idx}],
292             $rec->[$data->{parent_id_col_idx}]
293             ];
294             }
295 0           $ids_seen{$rec->[$data->{id_col_idx}]}++;
296 0           my $this_row_count = scalar(@{$rec});
  0            
297 0 0         if ($this_row_count != $field_count) {
298 0           push @bad_count_records,
299             [ $rec->[$data->{id_col_idx}], $this_row_count ];
300             }
301 0 0         if (! length($rec->[$data->{component_col_idx}])) {
302 0           push @nameless_component_records, $rec->[$data->{id_col_idx}];
303             }
304             }
305 0           $error_msg = <
306             Non-numeric entries are not permitted in the '$data->{id_col}' or '$data->{parent_id_col}' columns.
307             The following records each violate this restriction one or two times:
308             NON_NUMERIC_IDS
309 0           for my $rec (@non_numeric_id_records) {
310 0           $error_msg .= " $data->{id_col}: $rec->[0]\t$data->{parent_id_col}: $rec->[1]\n";
311             }
312 0 0         croak $error_msg if @non_numeric_id_records;
313              
314 0           my @dupe_ids = ();
315 0           for my $id (sort keys %ids_seen) {
316 0 0         push @dupe_ids, $id if $ids_seen{$id} > 1;
317             }
318 0           $error_msg = <
319             No duplicate entries are permitted in the '$data->{id_col}'column.
320             The following entries appear the number of times shown:
321             ERROR_MSG_DUPE
322 0           for my $id (@dupe_ids) {
323 0           $error_msg .= " $id:" . sprintf(" %6s\n" => $ids_seen{$id});
324             }
325 0 0         croak $error_msg if @dupe_ids;
326              
327 0           $error_msg = <
328             Header row has $field_count columns. The following records
329             (identified by the value in their '$data->{id_col}' columns)
330             have different counts:
331             ERROR_MSG_WRONG_COUNT
332 0           for my $rec (@bad_count_records) {
333 0           $error_msg .= " $rec->[0]: $rec->[1]\n";
334             }
335 0 0         croak $error_msg if @bad_count_records;
336              
337 0           $error_msg = <
338             Each data record must have a non-empty string in its 'component' column.
339             The following records (identified by the value in their '$data->{id_col}' columns)
340             lack valid components:
341             NAMELESS_COMPONENT
342 0           for my $rec (@nameless_component_records) {
343 0           $error_msg .= " id: $rec\n";
344             }
345 0 0         croak $error_msg if @nameless_component_records;
346              
347 0           my %ids_missing_parents = ();
348 0           for my $rec (@{$data_records}) {
  0            
349 0           my $parent_id = $rec->[$data->{parent_id_col_idx}];
350 0 0 0       if ( (length($parent_id)) and (! $ids_seen{$parent_id}) ) {
351 0           $ids_missing_parents{$rec->[$data->{id_col_idx}]} = $parent_id;
352             }
353             }
354 0           $error_msg = <
355             For each record with a non-null value in the '$data->{parent_id_col}' column,
356             there must be a record with that value in the '$data->{id_col}' column.
357             The following records (identified by the value in their '$data->{id_col}' columns)
358             appear to to have parent IDs which do not have records of their own:
359             ERROR_MSG_MISSING_PARENT
360 0           for my $k (sort {$a <=> $b} keys %ids_missing_parents) {
  0            
361 0           $error_msg .= " $k: $ids_missing_parents{$k}\n";
362             }
363 0 0         croak $error_msg if scalar keys %ids_missing_parents;
364              
365 0           my %families = ();
366 0           for my $rec (@{$data_records}) {
  0            
367 0 0         if (length($rec->[$data->{parent_id_col_idx}])) {
368 0           $families{$rec->[$data->{parent_id_col_idx}]}{$rec->[$data->{component_col_idx}]}++;
369             }
370             }
371 0           $error_msg = <
372             No record with a non-null value in the '$data->{parent_id_col}' column
373             may have two children with the same value in the '$data->{component_col}' column.
374             The following are violations:
375             ERROR_MSG_SIBLINGS_NAMED_SAME
376              
377 0           my $same_names = 0;
378 0           for my $k (sort {$a <=> $b} keys %families) {
  0            
379 0           for my $l (sort keys %{$families{$k}}) {
  0            
380 0 0         if ($families{$k}{$l} > 1) {
381 0           $error_msg .= " $data->{parent_id_col}: $k|$data->{component_col}: $l|count of $data->{component_col}s: $families{$k}{$l}\n";
382 0           $same_names++;
383             }
384             }
385             }
386 0 0         croak $error_msg if $same_names;
387              
388 0           $data->{data_records} = $data_records;
389 0           return $data;
390             }
391              
392             =head2 C
393              
394             =over 4
395              
396             =item * Purpose
397              
398             Identify the names of the columns in the taxonomy.
399              
400             =item * Arguments
401              
402             my $fields = $self->fields();
403              
404             No arguments; the information is already inside the object.
405              
406             =item * Return Value
407              
408             Reference to an array holding a list of the columns as they appear in the
409             header row of the incoming taxonomy file.
410              
411             =item * Comment
412              
413             Read-only.
414              
415             =back
416              
417             # Implemented in lib/Parse/File/Taxonomy.pm
418              
419             =head2 C
420              
421             =over 4
422              
423             =item * Purpose
424              
425             Once the taxonomy has been validated, get a list of its data rows as a Perl
426             data structure.
427              
428             =item * Arguments
429              
430             $data_records = $self->data_records;
431              
432             None.
433              
434             =item * Return Value
435              
436             Reference to array of array references. The array will hold the data records
437             found in the incoming taxonomy file in their order in that file.
438              
439             =item * Comment
440              
441             Does not contain any information about the fields in the taxonomy, so you
442             should probably either (a) use in conjunction with C method above;
443             or (b) use C.
444              
445             =back
446              
447             # Implemented in lib/Parse/File/Taxonomy.pm
448              
449             =cut
450              
451             =head2 C
452              
453             =over 4
454              
455             =item * Purpose
456              
457             Identify the index position of a given field within the header row.
458              
459             =item * Arguments
460              
461             $index = $obj->get_field_position('income');
462              
463             Takes a single string holding the name of one of the fields (column names).
464              
465             =item * Return Value
466              
467             Integer representing the index position (counting from C<0>) of the field
468             provided as argument. Throws exception if the argument is not actually a
469             field.
470              
471             =back
472              
473             =cut
474              
475             # Implemented in lib/Parse/File/Taxonomy.pm
476              
477             =head2 Accessors
478              
479             The following methods provide information about key columns in a
480             Parse::File::Taxonomy::Path object. The key columns are those which hold the
481             ID, parent ID and component information. They take no arguments. The methods
482             whose names end in C<_idx> return integers, as they return the index position
483             of the column in the header row. The other methods return strings.
484              
485             $index_of_id_column = $self->id_col_idx;
486              
487             $name_of_id_column = $self->id_col;
488              
489             $index_of_parent_id_column = $self->parent_id_col_idx;
490              
491             $name_of_parent_id_column = $self->parent_id_col;
492              
493             $index_of_component_column = $self->component_col_idx;
494              
495             $name_of_component_column = $self->component_col;
496              
497             =cut
498              
499             sub id_col_idx {
500 0     0 0   my $self = shift;
501 0           return $self->{id_col_idx};
502             }
503              
504             sub id_col {
505 0     0 1   my $self = shift;
506 0           return $self->{id_col};
507             }
508              
509             sub parent_id_col_idx {
510 0     0 0   my $self = shift;
511 0           return $self->{parent_id_col_idx};
512             }
513              
514             sub parent_id_col {
515 0     0 1   my $self = shift;
516 0           return $self->{parent_id_col};
517             }
518              
519             sub component_col_idx {
520 0     0 0   my $self = shift;
521 0           return $self->{component_col_idx};
522             }
523              
524             sub component_col {
525 0     0 1   my $self = shift;
526 0           return $self->{component_col};
527             }
528              
529             =head2 C
530              
531             =over 4
532              
533             =item * Purpose
534              
535             Generate a new Perl data structure which holds the same information as a
536             Parse::File::Taxonomy::Index object but which expresses the route from the
537             root node to a given branch or leaf node as either a separator-delimited
538             string (as in the C column of a Parse::File::Taxonomy::Path object) or
539             as an array reference holding the list of names which delineate that route.
540              
541             Another way of expressing this: Transform a taxonomy-by-index to a
542             taxonomy-by-path.
543              
544             Example: Suppose we have a CSV file which serves as a taxonomy-by-index for
545             this data:
546              
547             "id","parent_id","name","is_actionable"
548             "1","","Alpha","0"
549             "2","","Beta","0"
550             "3","1","Epsilon","0"
551             "4","3","Kappa","1"
552             "5","1","Zeta","0"
553             "6","5","Lambda","1"
554             "7","5","Mu","0"
555             "8","2","Eta","1"
556             "9","2","Theta","1"
557              
558             Instead of having the route from the root node to a given node be represented
559             B by following Cs up the tree, suppose we want that
560             route to be represented by a string. Assuming that we work with default
561             column names, that would mean representing the information currently spread
562             out among the C, C and C columns in a single C
563             column which, by default, would hold an array reference.
564              
565             $source = "./t/data/theta.csv";
566             $obj = Parse::File::Taxonomy::Index->new( {
567             file => $source,
568             } );
569              
570             $taxonomy_with_path_as_array = $obj->pathify;
571              
572             Yielding:
573              
574             [
575             ["path", "is_actionable"],
576             [["", "Alpha"], 0],
577             [["", "Beta"], 0],
578             [["", "Alpha", "Epsilon"], 0],
579             [["", "Alpha", "Epsilon", "Kappa"], 1],
580             [["", "Alpha", "Zeta"], 0],
581             [["", "Alpha", "Zeta", "Lambda"], 1],
582             [["", "Alpha", "Zeta", "Mu"], 0],
583             [["", "Beta", "Eta"], 1],
584             [["", "Beta", "Theta"], 1],
585             ]
586              
587             If we wanted the path information represented as a string rather than an array
588             reference, we would say:
589              
590             $taxonomy_with_path_as_string = $obj->pathify( { as_string => 1 } );
591              
592             Yielding:
593              
594             [
595             ["path", "is_actionable"],
596             ["|Alpha", 0],
597             ["|Beta", 0],
598             ["|Alpha|Epsilon", 0],
599             ["|Alpha|Epsilon|Kappa", 1],
600             ["|Alpha|Zeta", 0],
601             ["|Alpha|Zeta|Lambda", 1],
602             ["|Alpha|Zeta|Mu", 0],
603             ["|Beta|Eta", 1],
604             ["|Beta|Theta", 1],
605             ]
606              
607             If we are providing a true value to the C key, we also get to
608             choose what character to use as the separator in the C column.
609              
610             $taxonomy_with_path_as_string_different_path_col_sep =
611             $obj->pathify( {
612             as_string => 1,
613             path_col_sep => '~~',
614             } );
615              
616             Yields:
617              
618             [
619             ["path", "is_actionable"],
620             ["~~Alpha", 0],
621             ["~~Beta", 0],
622             ["~~Alpha~~Epsilon", 0],
623             ["~~Alpha~~Epsilon~~Kappa", 1],
624             ["~~Alpha~~Zeta", 0],
625             ["~~Alpha~~Zeta~~Lambda", 1],
626             ["~~Alpha~~Zeta~~Mu", 0],
627             ["~~Beta~~Eta", 1],
628             ["~~Beta~~Theta", 1],
629             ]
630              
631             Finally, should we want the C column in the returned arrayref to be
632             named something other than I, we can provide a value to the C
633             key.
634              
635             [
636             ["foo", "is_actionable"],
637             [["", "Alpha"], 0],
638             [["", "Beta"], 0],
639             [["", "Alpha", "Epsilon"], 0],
640             [["", "Alpha", "Epsilon", "Kappa"], 1],
641             [["", "Alpha", "Zeta"], 0],
642             [["", "Alpha", "Zeta", "Lambda"], 1],
643             [["", "Alpha", "Zeta", "Mu"], 0],
644             [["", "Beta", "Eta"], 1],
645             [["", "Beta", "Theta"], 1],
646             ]
647              
648             item * Arguments
649              
650             Optional single hash reference. If provided, the following keys may be used:
651              
652             =over 4
653              
654             =item * C
655              
656             User-supplied name for column holding path information in the returned array
657             reference. Defaults to C.
658              
659             =item * C
660              
661             Boolean. If supplied with a true value, path information will be represented
662             as a separator-delimited string rather than an array reference.
663              
664             =item * C
665              
666             User-supplied string to be used to separate the parts of the route when
667             C is called with a true value. Not meaningful unless C
668             is true.
669              
670             =back
671              
672             =item * Return Value
673              
674             Reference to an array of array references. The first element in the array
675             will be a reference to an array of field names. Each succeeding element will
676             be a reference to an array holding data for one record in the original
677             taxonomy. The path data will be represented, by default, as an array
678             reference built up from the component (C) column in the original
679             taxonomy, but if C is selected, the path data in all non-header
680             elements will be a separator-delimited string.
681              
682             =back
683              
684             =cut
685              
686             sub pathify {
687 0     0 1   my ($self, $args) = @_;
688 0 0         if (defined $args) {
689 0 0 0       unless (ref($args) and (reftype($args) eq 'HASH')) {
690 0           croak "Argument to pathify() must be hash ref";
691             }
692 0           my %permissible_args = map { $_ => 1 } ( qw| path_col as_string path_col_sep | );
  0            
693 0           for my $k (keys %{$args}) {
  0            
694 0 0         croak "'$k' is not a recognized key for pathify() argument hashref"
695             unless $permissible_args{$k};
696             }
697 0 0 0       if ($args->{path_col_sep} and not $args->{as_string}) {
698 0           croak "Supplying a value for key 'path_col_step' is only valid when also supplying true value for 'as_string'";
699             }
700             }
701 0 0         $args->{path_col} = defined($args->{path_col}) ? $args->{path_col} : 'path';
702 0 0         if ($args->{as_string}) {
703 0 0         $args->{path_col_sep} = defined($args->{path_col_sep}) ? $args->{path_col_sep} : '|';
704             }
705              
706 0           my @rewritten = ();
707 0           my @fields_in = @{$self->fields};
  0            
708 0           my @fields_out = ( $args->{path_col} );
709 0           for my $f (@fields_in) {
710 0 0 0       unless (
      0        
711             ($f eq $self->id_col) or
712             ($f eq $self->parent_id_col) or
713             ($f eq $self->component_col)
714             ) {
715 0           push @fields_out, $f;
716             }
717             }
718 0           push @rewritten, \@fields_out;
719              
720 0           my %colsin2idx = map { $fields_in[$_] => $_ } (0 .. $#fields_in);
  0            
721              
722 0           my %hashed_data = map { $_->[$self->id_col_idx] => {
  0            
723             parent_id => $_->[$self->parent_id_col_idx],
724             component => $_->[$self->component_col_idx],
725 0           } } @{$self->data_records};
726              
727 0           my @this_path = ();
728 0           my $code;
729             $code = sub {
730 0     0     my $id = shift;
731 0           push @this_path, $hashed_data{$id}{component};
732 0           my $parent_id = $hashed_data{$id}{parent_id};
733 0 0         if (length($parent_id)) {
734 0           &{$code}($parent_id);
  0            
735             }
736             else {
737 0           push @this_path, '';
738             }
739 0           return;
740 0           };
741 0           for my $rec (@{$self->data_records}) {
  0            
742 0           my @new_record;
743 0           &{$code}($rec->[$self->id_col_idx]);
  0            
744 0           my $path_as_array_ref = [ reverse @this_path ];
745 0 0         if ($args->{as_string}) {
746 0           push @new_record,
747 0           join($args->{path_col_sep} => @{$path_as_array_ref});
748             }
749             else {
750 0           push @new_record, $path_as_array_ref;
751             }
752 0           for my $f (grep { $_ ne $args->{path_col} } @fields_out) {
  0            
753 0           push @new_record, $rec->[$colsin2idx{$f}];
754             }
755 0           push @rewritten, \@new_record;
756 0           @this_path = ();
757             }
758 0           return \@rewritten;
759             }
760              
761             1;
762              
763             # vim: formatoptions=crqot