File Coverage

lib/Parse/File/Taxonomy/Index.pm
Criterion Covered Total %
statement 212 212 100.0
branch 81 84 96.4
condition 36 36 100.0
subroutine 18 18 100.0
pod 5 8 62.5
total 352 358 98.3


line stmt bran cond sub pod time code
1             package Parse::File::Taxonomy::Index;
2 3     3   7756 use strict;
  3         5  
  3         112  
3 3     3   1598 use parent qw( Parse::File::Taxonomy );
  3         1068  
  3         15  
4 3     3   128 use Carp;
  3         5  
  3         178  
5 3     3   14 use Text::CSV;
  3         5  
  3         15  
6 3     3   68 use Scalar::Util qw( reftype );
  3         34  
  3         167  
7             our $VERSION = '0.04';
8 3         6540 use Parse::File::Taxonomy::Auxiliary qw(
9             path_check_fields
10             components_check_fields
11 3     3   1088 );
  3         6  
12             #use Data::Dump;
13              
14             =head1 NAME
15              
16             Parse::File::Taxonomy::Index - Extract a taxonomy from a hierarchy inside a CSV file
17              
18             =head1 SYNOPSIS
19              
20             use Parse::File::Taxonomy::Index;
21              
22             $source = "./t/data/alpha.csv";
23             $obj = Parse::File::Taxonomy::Index->new( {
24             file => $source,
25             } );
26              
27             =cut
28              
29             =head1 METHODS
30              
31             =head2 C
32              
33             =over 4
34              
35             =item * Purpose
36              
37             Parse::File::Taxonomy::Index constructor.
38              
39             =item * Arguments
40              
41             Single hash reference. There are two possible interfaces: C and C.
42              
43             =over 4
44              
45             =item 1 C interface
46              
47             $source = "./t/data/delta.csv";
48             $obj = Parse::File::Taxonomy::Index->new( {
49             file => $source,
50             } );
51              
52             Elements in the hash reference are keyed on:
53              
54             =over 4
55              
56             =item * C
57              
58             Absolute or relative path to the incoming taxonomy file.
59             B for this interface.
60              
61             =item * C
62              
63             The name of the column in the header row under which each data record's unique
64             ID can be found. Defaults to C.
65              
66             =item * C
67              
68             The name of the column in the header row under which each data record's parent
69             ID can be found. (Will be empty in the case of top-level nodes, as they have
70             no parent.) Defaults to C.
71              
72             =item * C
73              
74             The name of the column in the header row under which, in each data record, there
75             is a found a string which differentiates that record from all other records
76             with the same parent ID. Defaults to C.
77              
78             =item * Text::CSV options
79              
80             Any other options which could normally be passed to Cnew()> will
81             be passed through to that module's constructor. On the recommendation of the
82             Text::CSV documentation, C is always set to a true value.
83              
84             =back
85              
86             =item 2 C interface
87              
88             $obj = Parse::File::Taxonomy::Index->new( {
89             components => {
90             fields => $fields,
91             data_records => $data_records,
92             }
93             } );
94              
95             Elements in this hash are keyed on:
96              
97             =over 4
98              
99             =item * C
100              
101             This element is B for the C interface. The value of this
102             element is a hash reference with two keys, C and C.
103             C is a reference to an array holding the field or column names for the
104             data set. C is a reference to an array of array references,
105             each of the latter arrayrefs holding one record or row from the data set.
106              
107             =back
108              
109             =back
110              
111             =item * Return Value
112              
113             Parse::File::Taxonomy::Index object.
114              
115             =item * Exceptions
116              
117             C will throw an exception under any of the following conditions:
118              
119             =over 4
120              
121             =item * Argument to C is not a reference.
122              
123             =item * Argument to C is not a hash reference.
124              
125             =item * Argument to C must have either 'file' or 'components' element but not both.
126              
127             =item * Lack columns in header row to match requirements.
128              
129             =item * Non-numeric entry in C or C column.
130              
131             =item * Duplicate entries in C column.
132              
133             =item * Number of fields in a data record does not match number in header row.
134              
135             =item * Empty string in a C column of a record.
136              
137             =item * Unable to locate a record whose C is the C of a different record.
138              
139             =item * No records with same C may share value of C column.
140              
141             =item * C interface
142              
143             =over 4
144              
145             =item * In the C interface, unable to locate the file which is the value of the C element.
146              
147             =item * The same field is found more than once in the header row of the
148             incoming taxonomy file.
149              
150             =item * Unable to open or close the incoming taxonomy file for reading.
151              
152             =back
153              
154             =item * C interface
155              
156             =over 4
157              
158             =item * In the C interface, C element must be a hash reference with C and C elements.
159              
160             =item * C element must be array reference.
161              
162             =item * C element must be reference to array of array references.
163              
164             =item * No duplicate fields in C element's array reference.
165              
166             =back
167              
168             =back
169              
170             =back
171              
172             =cut
173              
174             sub new {
175 45     45 1 54294 my ($class, $args) = @_;
176 45         67 my $data;
177              
178 45 100 100     784 croak "Argument to 'new()' must be hashref"
179             unless (ref($args) and reftype($args) eq 'HASH');
180 43 100 100     295 croak "Argument to 'new()' must have either 'file' or 'components' element"
181             unless ($args->{file} or $args->{components});
182 42 100 100     356 croak "Argument to 'new()' must have either 'file' or 'components' element but not both"
183             if ($args->{file} and $args->{components});
184              
185 41 100       133 $data->{id_col} = $args->{id_col}
186             ? delete $args->{id_col}
187             : 'id';
188 41 100       99 $data->{parent_id_col} = $args->{parent_id_col}
189             ? delete $args->{parent_id_col}
190             : 'parent_id';
191 41 100       117 $data->{component_col} = $args->{component_col}
192             ? delete $args->{component_col}
193             : 'name';
194              
195 41 100       75 if ($args->{components}) {
196 22 100 100     323 croak "Value of 'components' element must be hashref"
197             unless (ref($args->{components}) and reftype($args->{components}) eq 'HASH');
198 20         34 for my $k ( qw| fields data_records | ) {
199 37 100       271 croak "Value of 'components' element must have '$k' key-value pair"
200             unless exists $args->{components}->{$k};
201 35 100 100     530 croak "Value of '$k' element must be arrayref"
202             unless (ref($args->{components}->{$k}) and
203             reftype($args->{components}->{$k}) eq 'ARRAY');
204             }
205 15         26 for my $row (@{$args->{components}->{data_records}}) {
  15         38  
206 173 100 100     741 croak "Each element in 'data_records' array must be arrayref"
207             unless (ref($row) and reftype($row) eq 'ARRAY');
208             }
209 13         39 _prepare_fields($data, $args->{components}->{fields}, 1);
210 9         15 my $these_data_records = $args->{components}->{data_records};
211 9         53 delete $args->{components};
212 9         22 _prepare_data_records($data, $these_data_records, $args);
213             }
214             else {
215 19 100       556 croak "Cannot locate file '$args->{file}'"
216             unless (-f $args->{file});
217 18         57 $data->{file} = delete $args->{file};
218 18         41 $args->{binary} = 1;
219 18 50       102 my $csv = Text::CSV->new ( $args )
220             or croak "Cannot use CSV: ".Text::CSV->error_diag ();
221 18 50       2162 open my $IN, "<", $data->{file}
222             or croak "Unable to open '$data->{file}' for reading";
223 18         78 my $header_ref = $csv->getline($IN);
224 18         26761 _prepare_fields($data, $header_ref);
225              
226 14         55 my $data_records = $csv->getline_all($IN);
227 14 50       58697 close $IN or croak "Unable to close after reading";
228 14         54 _prepare_data_records($data, $data_records, $args);
229             }
230              
231 13         23 while (my ($k,$v) = each %{$args}) {
  21         66  
232 8         20 $data->{$k} = $v;
233             }
234 13         77 return bless $data, $class;
235             }
236              
237             sub _prepare_fields {
238 31     31   55 my ($data, $fields_ref, $components) = @_;
239 31 100       73 if (! $components) {
240 18         65 path_check_fields($data, $fields_ref);
241 17         47 _check_required_columns($data, $fields_ref);
242             }
243             else { # 'components' interface
244 13         47 components_check_fields($data, $fields_ref);
245 12         26 _check_required_columns($data, $fields_ref);
246             }
247 23         41 $data->{fields} = $fields_ref;
248 23         30 return $data;
249             }
250              
251             sub _check_required_columns {
252 29     29   44 my ($data, $fields_ref) = @_;
253 29         53 my %col2idx = map { $fields_ref->[$_] => $_ } (0 .. $#{$fields_ref});
  228         346  
  29         72  
254 29         70 my %missing_columns = ();
255 29         57 my %main_columns = map { $_ => 1 } ( qw| id_col parent_id_col component_col | );
  87         164  
256 29         83 for my $c ( keys %main_columns ) {
257 87 100       187 if (! exists $col2idx{$data->{$c}}) {
258 6         12 $missing_columns{$c} = $data->{$c};
259             }
260             }
261 29         58 my $error_msg = "Could not locate columns in header to match required arguments:";
262 29         88 for my $c (sort keys %missing_columns) {
263 6         16 $error_msg .= "\n $c: $missing_columns{$c}";
264             }
265 29 100       860 croak $error_msg if scalar keys %missing_columns;
266 23         47 $data->{fields} = $fields_ref;
267 23         47 for my $c (keys %main_columns) {
268 69         170 $data->{$c.'_idx'} = $col2idx{$data->{$c}};
269             }
270 23         81 return $data;
271             }
272              
273             sub _prepare_data_records {
274 23     23   53 my ($data, $data_records, $args) = @_;
275             # Confirm no duplicate entries in 'id_col'. DONE
276             # Confirm all rows have same number of columns as header. DONE
277 23         41 my $error_msg = '';
278 23         23 my $field_count = scalar(@{$data->{fields}});
  23         53  
279 23         48 my @non_numeric_id_records = ();
280 23         45 my %ids_seen = ();
281 23         38 my @bad_count_records = ();
282 23         31 my @nameless_component_records = ();
283 23         36 for my $rec (@{$data_records}) {
  23         54  
284 295 100       735 if ($rec->[$data->{id_col_idx}] !~ m/^\d+$/) {
285 2         5 push @non_numeric_id_records, [ $rec->[$data->{id_col_idx}], '' ];
286             }
287 295 100 100     990 if (length($rec->[$data->{parent_id_col_idx}]) and
288             $rec->[$data->{parent_id_col_idx}] !~ m/^\d+$/
289             ) {
290 2         5 push @non_numeric_id_records, [
291             $rec->[$data->{id_col_idx}],
292             $rec->[$data->{parent_id_col_idx}]
293             ];
294             }
295 295         411 $ids_seen{$rec->[$data->{id_col_idx}]}++;
296 295         207 my $this_row_count = scalar(@{$rec});
  295         272  
297 295 100       390 if ($this_row_count != $field_count) {
298 6         10 push @bad_count_records,
299             [ $rec->[$data->{id_col_idx}], $this_row_count ];
300             }
301 295 100       512 if (! length($rec->[$data->{component_col_idx}])) {
302 3         7 push @nameless_component_records, $rec->[$data->{id_col_idx}];
303             }
304             }
305 23         91 $error_msg = <
306             Non-numeric entries are not permitted in the '$data->{id_col}' or '$data->{parent_id_col}' columns.
307             The following records each violate this restriction one or two times:
308             NON_NUMERIC_IDS
309 23         45 for my $rec (@non_numeric_id_records) {
310 4         10 $error_msg .= " $data->{id_col}: $rec->[0]\t$data->{parent_id_col}: $rec->[1]\n";
311             }
312 23 100       191 croak $error_msg if @non_numeric_id_records;
313              
314 22         40 my @dupe_ids = ();
315 22         193 for my $id (sort keys %ids_seen) {
316 278 100       430 push @dupe_ids, $id if $ids_seen{$id} > 1;
317             }
318 22         70 $error_msg = <
319             No duplicate entries are permitted in the '$data->{id_col}'column.
320             The following entries appear the number of times shown:
321             ERROR_MSG_DUPE
322 22         39 for my $id (@dupe_ids) {
323 4         15 $error_msg .= " $id:" . sprintf(" %6s\n" => $ids_seen{$id});
324             }
325 22 100       372 croak $error_msg if @dupe_ids;
326              
327 20         57 $error_msg = <
328             Header row has $field_count columns. The following records
329             (identified by the value in their '$data->{id_col}' columns)
330             have different counts:
331             ERROR_MSG_WRONG_COUNT
332 20         38 for my $rec (@bad_count_records) {
333 6         16 $error_msg .= " $rec->[0]: $rec->[1]\n";
334             }
335 20 100       392 croak $error_msg if @bad_count_records;
336              
337 18         47 $error_msg = <
338             Each data record must have a non-empty string in its 'component' column.
339             The following records (identified by the value in their '$data->{id_col}' columns)
340             lack valid components:
341             NAMELESS_COMPONENT
342 18         32 for my $rec (@nameless_component_records) {
343 3         7 $error_msg .= " id: $rec\n";
344             }
345 18 100       258 croak $error_msg if @nameless_component_records;
346              
347 17         33 my %ids_missing_parents = ();
348 17         16 for my $rec (@{$data_records}) {
  17         26  
349 217         199 my $parent_id = $rec->[$data->{parent_id_col_idx}];
350 217 100 100     605 if ( (length($parent_id)) and (! $ids_seen{$parent_id}) ) {
351 4         10 $ids_missing_parents{$rec->[$data->{id_col_idx}]} = $parent_id;
352             }
353             }
354 17         73 $error_msg = <
355             For each record with a non-null value in the '$data->{parent_id_col}' column,
356             there must be a record with that value in the '$data->{id_col}' column.
357             The following records (identified by the value in their '$data->{id_col}' columns)
358             appear to to have parent IDs which do not have records of their own:
359             ERROR_MSG_MISSING_PARENT
360 17         55 for my $k (sort {$a <=> $b} keys %ids_missing_parents) {
  2         12  
361 4         14 $error_msg .= " $k: $ids_missing_parents{$k}\n";
362             }
363 17 100       424 croak $error_msg if scalar keys %ids_missing_parents;
364              
365 15         23 my %families = ();
366 15         20 for my $rec (@{$data_records}) {
  15         28  
367 191 100       287 if (length($rec->[$data->{parent_id_col_idx}])) {
368 133         262 $families{$rec->[$data->{parent_id_col_idx}]}{$rec->[$data->{component_col_idx}]}++;
369             }
370             }
371 15         54 $error_msg = <
372             No record with a non-null value in the '$data->{parent_id_col}' column
373             may have two children with the same value in the '$data->{component_col}' column.
374             The following are violations:
375             ERROR_MSG_SIBLINGS_NAMED_SAME
376              
377 15         25 my $same_names = 0;
378 15         71 for my $k (sort {$a <=> $b} keys %families) {
  144         159  
379 88         72 for my $l (sort keys %{$families{$k}}) {
  88         152  
380 129 100       268 if ($families{$k}{$l} > 1) {
381 4         14 $error_msg .= " $data->{parent_id_col}: $k|$data->{component_col}: $l|count of $data->{component_col}s: $families{$k}{$l}\n";
382 4         7 $same_names++;
383             }
384             }
385             }
386 15 100       329 croak $error_msg if $same_names;
387              
388 13         29 $data->{data_records} = $data_records;
389 13         196 return $data;
390             }
391              
392             =head2 C
393              
394             =over 4
395              
396             =item * Purpose
397              
398             Identify the names of the columns in the taxonomy.
399              
400             =item * Arguments
401              
402             my $fields = $self->fields();
403              
404             No arguments; the information is already inside the object.
405              
406             =item * Return Value
407              
408             Reference to an array holding a list of the columns as they appear in the
409             header row of the incoming taxonomy file.
410              
411             =item * Comment
412              
413             Read-only.
414              
415             =back
416              
417             # Implemented in lib/Parse/File/Taxonomy.pm
418              
419             =head2 C
420              
421             =over 4
422              
423             =item * Purpose
424              
425             Once the taxonomy has been validated, get a list of its data rows as a Perl
426             data structure.
427              
428             =item * Arguments
429              
430             $data_records = $self->data_records;
431              
432             None.
433              
434             =item * Return Value
435              
436             Reference to array of array references. The array will hold the data records
437             found in the incoming taxonomy file in their order in that file.
438              
439             =item * Comment
440              
441             Does not contain any information about the fields in the taxonomy, so you
442             should probably either (a) use in conjunction with C method above;
443             or (b) use C.
444              
445             =back
446              
447             # Implemented in lib/Parse/File/Taxonomy.pm
448              
449             =cut
450              
451             =head2 C
452              
453             =over 4
454              
455             =item * Purpose
456              
457             Identify the index position of a given field within the header row.
458              
459             =item * Arguments
460              
461             $index = $obj->get_field_position('income');
462              
463             Takes a single string holding the name of one of the fields (column names).
464              
465             =item * Return Value
466              
467             Integer representing the index position (counting from C<0>) of the field
468             provided as argument. Throws exception if the argument is not actually a
469             field.
470              
471             =back
472              
473             =cut
474              
475             # Implemented in lib/Parse/File/Taxonomy.pm
476              
477             =head2 Accessors
478              
479             The following methods provide information about key columns in a
480             Parse::File::Taxonomy::Path object. The key columns are those which hold the
481             ID, parent ID and component information. They take no arguments. The methods
482             whose names end in C<_idx> return integers, as they return the index position
483             of the column in the header row. The other methods return strings.
484              
485             $index_of_id_column = $self->id_col_idx;
486              
487             $name_of_id_column = $self->id_col;
488              
489             $index_of_parent_id_column = $self->parent_id_col_idx;
490              
491             $name_of_parent_id_column = $self->parent_id_col;
492              
493             $index_of_component_column = $self->component_col_idx;
494              
495             $name_of_component_column = $self->component_col;
496              
497             =cut
498              
499             sub id_col_idx {
500 261     261 0 840 my $self = shift;
501 261         435 return $self->{id_col_idx};
502             }
503              
504             sub id_col {
505 79     79 1 75 my $self = shift;
506 79         227 return $self->{id_col};
507             }
508              
509             sub parent_id_col_idx {
510 134     134 0 109 my $self = shift;
511 134         203 return $self->{parent_id_col_idx};
512             }
513              
514             sub parent_id_col {
515 68     68 1 62 my $self = shift;
516 68         215 return $self->{parent_id_col};
517             }
518              
519             sub component_col_idx {
520 134     134 0 109 my $self = shift;
521 134         408 return $self->{component_col_idx};
522             }
523              
524             sub component_col {
525 57     57 1 46 my $self = shift;
526 57         162 return $self->{component_col};
527             }
528              
529             =head2 C
530              
531             =over 4
532              
533             =item * Purpose
534              
535             Generate a new Perl data structure which holds the same information as a
536             Parse::File::Taxonomy::Index object but which expresses the route from the
537             root node to a given branch or leaf node as either a separator-delimited
538             string (as in the C column of a Parse::File::Taxonomy::Path object) or
539             as an array reference holding the list of names which delineate that route.
540              
541             Another way of expressing this: Transform a taxonomy-by-index to a
542             taxonomy-by-path.
543              
544             Example: Suppose we have a CSV file which serves as a taxonomy-by-index for
545             this data:
546              
547             "id","parent_id","name","is_actionable"
548             "1","","Alpha","0"
549             "2","","Beta","0"
550             "3","1","Epsilon","0"
551             "4","3","Kappa","1"
552             "5","1","Zeta","0"
553             "6","5","Lambda","1"
554             "7","5","Mu","0"
555             "8","2","Eta","1"
556             "9","2","Theta","1"
557              
558             Instead of having the route from the root node to a given node be represented
559             B by following Cs up the tree, suppose we want that
560             route to be represented by a string. Assuming that we work with default
561             column names, that would mean representing the information currently spread
562             out among the C, C and C columns in a single C
563             column which, by default, would hold an array reference.
564              
565             $source = "./t/data/theta.csv";
566             $obj = Parse::File::Taxonomy::Index->new( {
567             file => $source,
568             } );
569              
570             $taxonomy_with_path_as_array = $obj->pathify;
571              
572             Yielding:
573              
574             [
575             ["path", "is_actionable"],
576             [["", "Alpha"], 0],
577             [["", "Beta"], 0],
578             [["", "Alpha", "Epsilon"], 0],
579             [["", "Alpha", "Epsilon", "Kappa"], 1],
580             [["", "Alpha", "Zeta"], 0],
581             [["", "Alpha", "Zeta", "Lambda"], 1],
582             [["", "Alpha", "Zeta", "Mu"], 0],
583             [["", "Beta", "Eta"], 1],
584             [["", "Beta", "Theta"], 1],
585             ]
586              
587             If we wanted the path information represented as a string rather than an array
588             reference, we would say:
589              
590             $taxonomy_with_path_as_string = $obj->pathify( { as_string => 1 } );
591              
592             Yielding:
593              
594             [
595             ["path", "is_actionable"],
596             ["|Alpha", 0],
597             ["|Beta", 0],
598             ["|Alpha|Epsilon", 0],
599             ["|Alpha|Epsilon|Kappa", 1],
600             ["|Alpha|Zeta", 0],
601             ["|Alpha|Zeta|Lambda", 1],
602             ["|Alpha|Zeta|Mu", 0],
603             ["|Beta|Eta", 1],
604             ["|Beta|Theta", 1],
605             ]
606              
607             If we are providing a true value to the C key, we also get to
608             choose what character to use as the separator in the C column.
609              
610             $taxonomy_with_path_as_string_different_path_col_sep =
611             $obj->pathify( {
612             as_string => 1,
613             path_col_sep => '~~',
614             } );
615              
616             Yields:
617              
618             [
619             ["path", "is_actionable"],
620             ["~~Alpha", 0],
621             ["~~Beta", 0],
622             ["~~Alpha~~Epsilon", 0],
623             ["~~Alpha~~Epsilon~~Kappa", 1],
624             ["~~Alpha~~Zeta", 0],
625             ["~~Alpha~~Zeta~~Lambda", 1],
626             ["~~Alpha~~Zeta~~Mu", 0],
627             ["~~Beta~~Eta", 1],
628             ["~~Beta~~Theta", 1],
629             ]
630              
631             Finally, should we want the C column in the returned arrayref to be
632             named something other than I, we can provide a value to the C
633             key.
634              
635             [
636             ["foo", "is_actionable"],
637             [["", "Alpha"], 0],
638             [["", "Beta"], 0],
639             [["", "Alpha", "Epsilon"], 0],
640             [["", "Alpha", "Epsilon", "Kappa"], 1],
641             [["", "Alpha", "Zeta"], 0],
642             [["", "Alpha", "Zeta", "Lambda"], 1],
643             [["", "Alpha", "Zeta", "Mu"], 0],
644             [["", "Beta", "Eta"], 1],
645             [["", "Beta", "Theta"], 1],
646             ]
647              
648             item * Arguments
649              
650             Optional single hash reference. If provided, the following keys may be used:
651              
652             =over 4
653              
654             =item * C
655              
656             User-supplied name for column holding path information in the returned array
657             reference. Defaults to C.
658              
659             =item * C
660              
661             Boolean. If supplied with a true value, path information will be represented
662             as a separator-delimited string rather than an array reference.
663              
664             =item * C
665              
666             User-supplied string to be used to separate the parts of the route when
667             C is called with a true value. Not meaningful unless C
668             is true.
669              
670             =back
671              
672             =item * Return Value
673              
674             Reference to an array of array references. The first element in the array
675             will be a reference to an array of field names. Each succeeding element will
676             be a reference to an array holding data for one record in the original
677             taxonomy. The path data will be represented, by default, as an array
678             reference built up from the component (C) column in the original
679             taxonomy, but if C is selected, the path data in all non-header
680             elements will be a separator-delimited string.
681              
682             =back
683              
684             =cut
685              
686             sub pathify {
687 15     15 1 14624 my ($self, $args) = @_;
688 15 100       38 if (defined $args) {
689 10 100 100     63 unless (ref($args) and (reftype($args) eq 'HASH')) {
690 2         320 croak "Argument to pathify() must be hash ref";
691             }
692 8         15 my %permissible_args = map { $_ => 1 } ( qw| path_col as_string path_col_sep | );
  24         45  
693 8         11 for my $k (keys %{$args}) {
  8         22  
694 12 100       124 croak "'$k' is not a recognized key for pathify() argument hashref"
695             unless $permissible_args{$k};
696             }
697 7 100 100     36 if ($args->{path_col_sep} and not $args->{as_string}) {
698 1         85 croak "Supplying a value for key 'path_col_step' is only valid when also supplying true value for 'as_string'";
699             }
700             }
701 11 100       31 $args->{path_col} = defined($args->{path_col}) ? $args->{path_col} : 'path';
702 11 100       23 if ($args->{as_string}) {
703 4 100       11 $args->{path_col_sep} = defined($args->{path_col_sep}) ? $args->{path_col_sep} : '|';
704             }
705              
706 11         14 my @rewritten = ();
707 11         10 my @fields_in = @{$self->fields};
  11         35  
708 11         20 my @fields_out = ( $args->{path_col} );
709 11         15 for my $f (@fields_in) {
710 72 100 100     85 unless (
      100        
711             ($f eq $self->id_col) or
712             ($f eq $self->parent_id_col) or
713             ($f eq $self->component_col)
714             ) {
715 39         54 push @fields_out, $f;
716             }
717             }
718 11         19 push @rewritten, \@fields_out;
719              
720 11         21 my %colsin2idx = map { $fields_in[$_] => $_ } (0 .. $#fields_in);
  72         120  
721              
722 127         164 my %hashed_data = map { $_->[$self->id_col_idx] => {
  11         25  
723             parent_id => $_->[$self->parent_id_col_idx],
724             component => $_->[$self->component_col_idx],
725 11         20 } } @{$self->data_records};
726              
727 11         22 my @this_path = ();
728 11         8 my $code;
729             $code = sub {
730 258     258   217 my $id = shift;
731 258         301 push @this_path, $hashed_data{$id}{component};
732 258         249 my $parent_id = $hashed_data{$id}{parent_id};
733 258 100       295 if (length($parent_id)) {
734 131         126 &{$code}($parent_id);
  131         183  
735             }
736             else {
737 127         115 push @this_path, '';
738             }
739 258         248 return;
740 11         41 };
741 11         12 for my $rec (@{$self->data_records}) {
  11         23  
742 127         103 my @new_record;
743 127         193 &{$code}($rec->[$self->id_col_idx]);
  127         155  
744 127         214 my $path_as_array_ref = [ reverse @this_path ];
745 127 100       179 if ($args->{as_string}) {
746 44         80 push @new_record,
747 44         37 join($args->{path_col_sep} => @{$path_as_array_ref});
748             }
749             else {
750 83         83 push @new_record, $path_as_array_ref;
751             }
752 127         121 for my $f (grep { $_ ne $args->{path_col} } @fields_out) {
  618         729  
753 491         681 push @new_record, $rec->[$colsin2idx{$f}];
754             }
755 127         149 push @rewritten, \@new_record;
756 127         203 @this_path = ();
757             }
758 11         57 return \@rewritten;
759             }
760              
761             1;
762              
763             # vim: formatoptions=crqot