| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | # | 
| 2 |  |  |  |  |  |  | # This file is part of TBX-Min | 
| 3 |  |  |  |  |  |  | # | 
| 4 |  |  |  |  |  |  | # This software is copyright (c) 2016 by Alan Melby. | 
| 5 |  |  |  |  |  |  | # | 
| 6 |  |  |  |  |  |  | # This is free software; you can redistribute it and/or modify it under | 
| 7 |  |  |  |  |  |  | # the same terms as the Perl 5 programming language system itself. | 
| 8 |  |  |  |  |  |  | # | 
| 9 |  |  |  |  |  |  | package TBX::Min; | 
| 10 | 8 |  |  | 8 |  | 266611 | use strict; | 
|  | 8 |  |  |  |  | 17 |  | 
|  | 8 |  |  |  |  | 294 |  | 
| 11 | 8 |  |  | 8 |  | 47 | use warnings; | 
|  | 8 |  |  |  |  | 16 |  | 
|  | 8 |  |  |  |  | 373 |  | 
| 12 |  |  |  |  |  |  | our $VERSION = '0.07'; # VERSION | 
| 13 |  |  |  |  |  |  | # ABSTRACT: Read, write and edit TBX-Min files | 
| 14 | 8 |  |  | 8 |  | 6674 | use subs qw(date_created directionality entries); | 
|  | 8 |  |  |  |  | 183 |  | 
|  | 8 |  |  |  |  | 40 |  | 
| 15 | 8 |  |  |  |  | 54 | use Class::Tiny qw( | 
| 16 |  |  |  |  |  |  | id | 
| 17 |  |  |  |  |  |  | description | 
| 18 |  |  |  |  |  |  | creator | 
| 19 |  |  |  |  |  |  | license | 
| 20 |  |  |  |  |  |  | source_lang | 
| 21 |  |  |  |  |  |  | target_lang | 
| 22 |  |  |  |  |  |  | date_created | 
| 23 |  |  |  |  |  |  | directionality | 
| 24 |  |  |  |  |  |  | entries | 
| 25 | 8 |  |  | 8 |  | 6829 | ); | 
|  | 8 |  |  |  |  | 27733 |  | 
| 26 | 8 |  |  | 8 |  | 33144 | use XML::Twig; | 
|  | 0 |  |  |  |  |  |  | 
|  | 0 |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | use autodie; | 
| 28 |  |  |  |  |  |  | use Path::Tiny; | 
| 29 |  |  |  |  |  |  | use Carp; | 
| 30 |  |  |  |  |  |  | use Import::Into; | 
| 31 |  |  |  |  |  |  | use DateTime::Format::ISO8601; | 
| 32 |  |  |  |  |  |  | use Try::Tiny; | 
| 33 |  |  |  |  |  |  | use TBX::Min::TermEntry; | 
| 34 |  |  |  |  |  |  | use TBX::Min::LangSet; | 
| 35 |  |  |  |  |  |  | use TBX::Min::TIG; | 
| 36 |  |  |  |  |  |  | use TBX::Min::NoteGrp; | 
| 37 |  |  |  |  |  |  | use TBX::Min::Note; | 
| 38 |  |  |  |  |  |  |  | 
| 39 |  |  |  |  |  |  | # Use Import::Into to export subclasses into caller | 
| 40 |  |  |  |  |  |  | sub import { | 
| 41 |  |  |  |  |  |  | my $target = caller; | 
| 42 |  |  |  |  |  |  | TBX::Min::TermEntry->import::into($target); | 
| 43 |  |  |  |  |  |  | TBX::Min::LangSet->import::into($target); | 
| 44 |  |  |  |  |  |  | TBX::Min::TIG->import::into($target); | 
| 45 |  |  |  |  |  |  | TBX::Min::NoteGrp->import::into($target); | 
| 46 |  |  |  |  |  |  | TBX::Min::Note->import::into($target); | 
| 47 |  |  |  |  |  |  | return; | 
| 48 |  |  |  |  |  |  | } | 
| 49 |  |  |  |  |  |  |  | 
| 50 |  |  |  |  |  |  | sub new_from_xml { | 
| 51 |  |  |  |  |  |  | my ($class, $data) = @_; | 
| 52 |  |  |  |  |  |  |  | 
| 53 |  |  |  |  |  |  | if(!$data){ | 
| 54 |  |  |  |  |  |  | croak 'missing required data argument'; | 
| 55 |  |  |  |  |  |  | } | 
| 56 |  |  |  |  |  |  |  | 
| 57 |  |  |  |  |  |  | my $fh = _get_handle($data); | 
| 58 |  |  |  |  |  |  |  | 
| 59 |  |  |  |  |  |  | # build a twig out of the input document | 
| 60 |  |  |  |  |  |  | my $twig = XML::Twig->new( | 
| 61 |  |  |  |  |  |  | output_encoding => 'UTF-8', | 
| 62 |  |  |  |  |  |  | # do_not_chain_handlers => 1, #can be important when things get complicated | 
| 63 |  |  |  |  |  |  | keep_spaces     => 0, | 
| 64 |  |  |  |  |  |  |  | 
| 65 |  |  |  |  |  |  | # these store new entries, langSets and tigs | 
| 66 |  |  |  |  |  |  | start_tag_handlers => { | 
| 67 |  |  |  |  |  |  | termEntry => \&_termEntryStart, | 
| 68 |  |  |  |  |  |  | langSet => \&_langStart,	#langset | 
| 69 |  |  |  |  |  |  | tig => \&_termGrpStart, #tig | 
| 70 |  |  |  |  |  |  | noteGrp => \&_noteGrpStart, #note group | 
| 71 |  |  |  |  |  |  | note => \&_noteStart, #note | 
| 72 |  |  |  |  |  |  | }, | 
| 73 |  |  |  |  |  |  |  | 
| 74 |  |  |  |  |  |  | TwigHandlers    => { | 
| 75 |  |  |  |  |  |  | TBX => \&_check_dialect, | 
| 76 |  |  |  |  |  |  | # header attributes become attributes of the TBX::Min object | 
| 77 |  |  |  |  |  |  | id => \&_headerAtt, | 
| 78 |  |  |  |  |  |  | description => \&_headerAtt, | 
| 79 |  |  |  |  |  |  | dateCreated => \&_date_created, | 
| 80 |  |  |  |  |  |  | creator => \&_headerAtt, | 
| 81 |  |  |  |  |  |  | license => \&_headerAtt, | 
| 82 |  |  |  |  |  |  | directionality => \&_directionality, | 
| 83 |  |  |  |  |  |  | languages => \&_languages, | 
| 84 |  |  |  |  |  |  |  | 
| 85 |  |  |  |  |  |  | # becomes part of the current TBX::Min::TermEntry object | 
| 86 |  |  |  |  |  |  | subjectField => sub { | 
| 87 |  |  |  |  |  |  | shift->{tbx_min_entries}->[-1]->subject_field($_->text)}, | 
| 88 |  |  |  |  |  |  |  | 
| 89 |  |  |  |  |  |  | # these become attributes of the current TBX::Min::TIG object | 
| 90 |  |  |  |  |  |  | term => sub {shift->{tbx_min_current_term_grp}->term($_->text)}, | 
| 91 |  |  |  |  |  |  | partOfSpeech => sub { | 
| 92 |  |  |  |  |  |  | shift->{tbx_min_current_term_grp}->part_of_speech($_->text)}, | 
| 93 |  |  |  |  |  |  | customer => sub { | 
| 94 |  |  |  |  |  |  | shift->{tbx_min_current_term_grp}->customer($_->text)}, | 
| 95 |  |  |  |  |  |  | termStatus => sub { | 
| 96 |  |  |  |  |  |  | shift->{tbx_min_current_term_grp}->status($_->text)}, | 
| 97 |  |  |  |  |  |  |  | 
| 98 |  |  |  |  |  |  | # these become attributes of the current TBX::Min::Note object | 
| 99 |  |  |  |  |  |  | noteKey => sub {shift->{tbx_min_current_note}->noteKey($_->text)}, | 
| 100 |  |  |  |  |  |  | noteValue => sub {shift->{tbx_min_current_note}->noteValue($_->text)}, | 
| 101 |  |  |  |  |  |  |  | 
| 102 |  |  |  |  |  |  | # delete the termEntry twig when finished to free up memory | 
| 103 |  |  |  |  |  |  | termEntry => sub {$_[0]->purge}, | 
| 104 |  |  |  |  |  |  | } | 
| 105 |  |  |  |  |  |  | ); | 
| 106 |  |  |  |  |  |  |  | 
| 107 |  |  |  |  |  |  | # use handlers to process individual tags, then grab the result | 
| 108 |  |  |  |  |  |  | $twig->parse($fh); | 
| 109 |  |  |  |  |  |  | my $self = $twig->{tbx_min_att}; | 
| 110 |  |  |  |  |  |  | $self->{entries} = $twig->{tbx_min_entries} || []; | 
| 111 |  |  |  |  |  |  | bless $self, $class; | 
| 112 |  |  |  |  |  |  | return $self; | 
| 113 |  |  |  |  |  |  | } | 
| 114 |  |  |  |  |  |  |  | 
| 115 |  |  |  |  |  |  | sub _get_handle { | 
| 116 |  |  |  |  |  |  | my ($data) = @_; | 
| 117 |  |  |  |  |  |  | my $fh; | 
| 118 |  |  |  |  |  |  | if((ref $data) eq 'SCALAR'){ | 
| 119 |  |  |  |  |  |  | open $fh, '<', $data; ## no critic(RequireBriefOpen) | 
| 120 |  |  |  |  |  |  | }else{ | 
| 121 |  |  |  |  |  |  | $fh = path($data)->filehandle('<'); | 
| 122 |  |  |  |  |  |  | } | 
| 123 |  |  |  |  |  |  | return $fh; | 
| 124 |  |  |  |  |  |  | } | 
| 125 |  |  |  |  |  |  |  | 
| 126 |  |  |  |  |  |  |  | 
| 127 |  |  |  |  |  |  | my %valid = map +($_=>1), Class::Tiny->get_all_attributes_for(__PACKAGE__); | 
| 128 |  |  |  |  |  |  | sub new { | 
| 129 |  |  |  |  |  |  | my ($class, $args) = @_; | 
| 130 |  |  |  |  |  |  | my $self; | 
| 131 |  |  |  |  |  |  | if((ref $args) eq 'HASH'){ | 
| 132 |  |  |  |  |  |  |  | 
| 133 |  |  |  |  |  |  | # validate arguments | 
| 134 |  |  |  |  |  |  | if(my @invalids = grep !$valid{$_}, sort keys %$args){ | 
| 135 |  |  |  |  |  |  | croak 'Invalid attributes for class: ' . | 
| 136 |  |  |  |  |  |  | join ' ', @invalids | 
| 137 |  |  |  |  |  |  | } | 
| 138 |  |  |  |  |  |  | if($args->{entries} && ref $args->{entries} ne 'ARRAY'){ | 
| 139 |  |  |  |  |  |  | croak q{Attribute 'entries' should be an array reference}; | 
| 140 |  |  |  |  |  |  | } | 
| 141 |  |  |  |  |  |  | if(exists $args->{directionality}){ | 
| 142 |  |  |  |  |  |  | _validate_dir($args->{directionality}); | 
| 143 |  |  |  |  |  |  | } | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | # validate datetime and store object, not string | 
| 146 |  |  |  |  |  |  | if(my $dt_string = $args->{date_created}){ | 
| 147 |  |  |  |  |  |  | $args->{date_created} = _parse_datetime($dt_string); | 
| 148 |  |  |  |  |  |  | } | 
| 149 |  |  |  |  |  |  |  | 
| 150 |  |  |  |  |  |  | $self = $args; | 
| 151 |  |  |  |  |  |  | }else{ | 
| 152 |  |  |  |  |  |  | $self = {}; | 
| 153 |  |  |  |  |  |  | } | 
| 154 |  |  |  |  |  |  | $self->{entries} ||= []; | 
| 155 |  |  |  |  |  |  | return bless $self, $class; | 
| 156 |  |  |  |  |  |  | } | 
| 157 |  |  |  |  |  |  |  | 
| 158 |  |  |  |  |  |  | sub date_created { | 
| 159 |  |  |  |  |  |  | my ($self, $date_created) = @_; | 
| 160 |  |  |  |  |  |  | if($date_created) { | 
| 161 |  |  |  |  |  |  | return $self->{date_created} = | 
| 162 |  |  |  |  |  |  | _parse_datetime($date_created); | 
| 163 |  |  |  |  |  |  | } | 
| 164 |  |  |  |  |  |  | if(my $dt = $self->{date_created}){ | 
| 165 |  |  |  |  |  |  | return $dt->iso8601; | 
| 166 |  |  |  |  |  |  | } | 
| 167 |  |  |  |  |  |  | return; | 
| 168 |  |  |  |  |  |  | } | 
| 169 |  |  |  |  |  |  |  | 
| 170 |  |  |  |  |  |  | sub _parse_datetime { | 
| 171 |  |  |  |  |  |  | my ($dt_string) = @_; | 
| 172 |  |  |  |  |  |  | my $dt; | 
| 173 |  |  |  |  |  |  | try{ | 
| 174 |  |  |  |  |  |  | $dt = DateTime::Format::ISO8601->parse_datetime($dt_string); | 
| 175 |  |  |  |  |  |  | }catch{ | 
| 176 |  |  |  |  |  |  | croak 'date is not in ISO8601 format'; | 
| 177 |  |  |  |  |  |  | }; | 
| 178 |  |  |  |  |  |  | return $dt; | 
| 179 |  |  |  |  |  |  | } | 
| 180 |  |  |  |  |  |  |  | 
| 181 |  |  |  |  |  |  | sub directionality { | 
| 182 |  |  |  |  |  |  | my ($self, $directionality) = @_; | 
| 183 |  |  |  |  |  |  | if(defined $directionality) { | 
| 184 |  |  |  |  |  |  | _validate_dir($directionality); | 
| 185 |  |  |  |  |  |  | return $self->{directionality} = $directionality; | 
| 186 |  |  |  |  |  |  | } | 
| 187 |  |  |  |  |  |  | return $self->{directionality}; | 
| 188 |  |  |  |  |  |  | } | 
| 189 |  |  |  |  |  |  |  | 
| 190 |  |  |  |  |  |  | sub _validate_dir { | 
| 191 |  |  |  |  |  |  | my ($dir) = @_; | 
| 192 |  |  |  |  |  |  | if($dir ne 'bidirectional' and $dir ne 'monodirectional'){ | 
| 193 |  |  |  |  |  |  | croak "Illegal directionality '$dir'"; | 
| 194 |  |  |  |  |  |  | } | 
| 195 |  |  |  |  |  |  | return; | 
| 196 |  |  |  |  |  |  | } | 
| 197 |  |  |  |  |  |  |  | 
| 198 |  |  |  |  |  |  | sub entries { ## no critic(RequireArgUnpacking) | 
| 199 |  |  |  |  |  |  | my ($self) = @_; | 
| 200 |  |  |  |  |  |  | if (@_ > 1){ | 
| 201 |  |  |  |  |  |  | croak 'extra argument found (entries is a getter only)'; | 
| 202 |  |  |  |  |  |  | } | 
| 203 |  |  |  |  |  |  | return $self->{entries}; | 
| 204 |  |  |  |  |  |  | } | 
| 205 |  |  |  |  |  |  |  | 
| 206 |  |  |  |  |  |  | sub add_entry { | 
| 207 |  |  |  |  |  |  | my ($self, $termEntry) = @_; | 
| 208 |  |  |  |  |  |  | if( !$termEntry || !$termEntry->isa('TBX::Min::TermEntry') ){ | 
| 209 |  |  |  |  |  |  | croak 'argument to add_entry should be a TBx::Min::TermEntry'; | 
| 210 |  |  |  |  |  |  | } | 
| 211 |  |  |  |  |  |  | push @{$self->{entries}}, $termEntry; | 
| 212 |  |  |  |  |  |  | return; | 
| 213 |  |  |  |  |  |  | } | 
| 214 |  |  |  |  |  |  |  | 
| 215 |  |  |  |  |  |  | sub as_xml { | 
| 216 |  |  |  |  |  |  | my ($self) = @_; | 
| 217 |  |  |  |  |  |  |  | 
| 218 |  |  |  |  |  |  | # construct the whole document using XML::Twig::El's | 
| 219 |  |  |  |  |  |  | my $root = XML::Twig::Elt->new(TBX => {dialect => 'TBX-Min'}); | 
| 220 |  |  |  |  |  |  | my $header = XML::Twig::Elt->new('header')->paste($root); | 
| 221 |  |  |  |  |  |  |  | 
| 222 |  |  |  |  |  |  | # each of these header elements is a simple element with text | 
| 223 |  |  |  |  |  |  | for my $header_att ( | 
| 224 |  |  |  |  |  |  | qw(id creator license directionality description)){ | 
| 225 |  |  |  |  |  |  | next unless $self->{$header_att}; | 
| 226 |  |  |  |  |  |  | XML::Twig::Elt->new($header_att, | 
| 227 |  |  |  |  |  |  | $self->{$header_att})->paste(last_child => $header); | 
| 228 |  |  |  |  |  |  | } | 
| 229 |  |  |  |  |  |  | if($self->source_lang || $self->target_lang){ | 
| 230 |  |  |  |  |  |  | my @atts; | 
| 231 |  |  |  |  |  |  | push @atts, (source => $self->source_lang) if $self->source_lang; | 
| 232 |  |  |  |  |  |  | push @atts, (target => $self->target_lang) if $self->target_lang; | 
| 233 |  |  |  |  |  |  | XML::Twig::Elt->new(languages => {@atts})->paste( | 
| 234 |  |  |  |  |  |  | last_child => $header) | 
| 235 |  |  |  |  |  |  | } | 
| 236 |  |  |  |  |  |  | if(my $dt = $self->{date_created}){ | 
| 237 |  |  |  |  |  |  | XML::Twig::Elt->new(dateCreated => $dt->iso8601)->paste( | 
| 238 |  |  |  |  |  |  | last_child => $header); | 
| 239 |  |  |  |  |  |  | } | 
| 240 |  |  |  |  |  |  |  | 
| 241 |  |  |  |  |  |  | my $body = XML::Twig::Elt->new('body')->paste(last_child => $root); | 
| 242 |  |  |  |  |  |  | for my $termEntry (@{$self->entries}){ | 
| 243 |  |  |  |  |  |  | my $entry_el = XML::Twig::Elt->new( | 
| 244 |  |  |  |  |  |  | termEntry => {$termEntry->id ? (id => $termEntry->id) : ()})-> | 
| 245 |  |  |  |  |  |  | paste(last_child => $body); | 
| 246 |  |  |  |  |  |  | my $entry_el_comment = XML::Twig::Elt->new( '#COMMENT', 'terminological entry')-> | 
| 247 |  |  |  |  |  |  | paste(last_child => $entry_el); | 
| 248 |  |  |  |  |  |  | if(my $sf = $termEntry->subject_field){ | 
| 249 |  |  |  |  |  |  | XML::Twig::Elt->new(subjectField => $sf)->paste( | 
| 250 |  |  |  |  |  |  | last_child => $entry_el); | 
| 251 |  |  |  |  |  |  | } | 
| 252 |  |  |  |  |  |  | for my $langGrp (@{$termEntry->lang_groups}){ | 
| 253 |  |  |  |  |  |  | my $lang_el = XML::Twig::Elt->new(langSet => | 
| 254 |  |  |  |  |  |  | {$langGrp->code ? ('xml:lang' => $langGrp->code) : ()} | 
| 255 |  |  |  |  |  |  | )->paste(last_child => $entry_el); | 
| 256 |  |  |  |  |  |  | for my $termGrp (@{$langGrp->term_groups}){ | 
| 257 |  |  |  |  |  |  | my $term_el = XML::Twig::Elt->new('tig')->paste( | 
| 258 |  |  |  |  |  |  | last_child => $lang_el); | 
| 259 |  |  |  |  |  |  | my $term_el_comment = XML::Twig::Elt->new( '#COMMENT', 'terminological information group')-> | 
| 260 |  |  |  |  |  |  | paste(last_child => $term_el); | 
| 261 |  |  |  |  |  |  | if (my $term = $termGrp->term){ | 
| 262 |  |  |  |  |  |  | XML::Twig::Elt->new(term => $term)->paste( | 
| 263 |  |  |  |  |  |  | last_child => $term_el); | 
| 264 |  |  |  |  |  |  | } | 
| 265 |  |  |  |  |  |  |  | 
| 266 |  |  |  |  |  |  | if (my $customer = $termGrp->customer){ | 
| 267 |  |  |  |  |  |  | XML::Twig::Elt->new(customer => $customer)->paste( | 
| 268 |  |  |  |  |  |  | last_child => $term_el); | 
| 269 |  |  |  |  |  |  | } | 
| 270 |  |  |  |  |  |  |  | 
| 271 |  |  |  |  |  |  | for my $noteGrp (@{$termGrp->note_groups}){ | 
| 272 |  |  |  |  |  |  | my $note_grp_el = XML::Twig::Elt->new('noteGrp')->paste( | 
| 273 |  |  |  |  |  |  | last_child => $term_el); | 
| 274 |  |  |  |  |  |  |  | 
| 275 |  |  |  |  |  |  | for my $note (@{$noteGrp->notes}){ | 
| 276 |  |  |  |  |  |  | my $note_el = XML::Twig::Elt->new('note')->paste( | 
| 277 |  |  |  |  |  |  | last_child => $note_grp_el); | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  | if (my $noteKey = $note->noteKey){ | 
| 280 |  |  |  |  |  |  | XML::Twig::Elt->new(noteKey => $noteKey)->paste( | 
| 281 |  |  |  |  |  |  | last_child => $note_el); | 
| 282 |  |  |  |  |  |  | } | 
| 283 |  |  |  |  |  |  |  | 
| 284 |  |  |  |  |  |  | if (my $noteValue = $note->noteValue){ | 
| 285 |  |  |  |  |  |  | XML::Twig::Elt->new(noteValue => $noteValue)->paste( | 
| 286 |  |  |  |  |  |  | last_child => $note_el); | 
| 287 |  |  |  |  |  |  | } | 
| 288 |  |  |  |  |  |  |  | 
| 289 |  |  |  |  |  |  | } | 
| 290 |  |  |  |  |  |  | } | 
| 291 |  |  |  |  |  |  |  | 
| 292 |  |  |  |  |  |  | if (my $status = $termGrp->status){ | 
| 293 |  |  |  |  |  |  | XML::Twig::Elt->new(termStatus => $status )->paste( | 
| 294 |  |  |  |  |  |  | last_child => $term_el); | 
| 295 |  |  |  |  |  |  | } | 
| 296 |  |  |  |  |  |  |  | 
| 297 |  |  |  |  |  |  | if (my $pos = $termGrp->part_of_speech){ | 
| 298 |  |  |  |  |  |  | XML::Twig::Elt->new(partOfSpeech => $pos)->paste( | 
| 299 |  |  |  |  |  |  | last_child => $term_el); | 
| 300 |  |  |  |  |  |  | } | 
| 301 |  |  |  |  |  |  |  | 
| 302 |  |  |  |  |  |  | } # end tig | 
| 303 |  |  |  |  |  |  | } # end langSet | 
| 304 |  |  |  |  |  |  | } # end termEntry | 
| 305 |  |  |  |  |  |  |  | 
| 306 |  |  |  |  |  |  | # return pretty-printed string | 
| 307 |  |  |  |  |  |  | XML::Twig->set_pretty_print('indented'); | 
| 308 |  |  |  |  |  |  | my $TBXmin = \$root->sprint; | 
| 309 |  |  |  |  |  |  | $$TBXmin =~ s/>[\s\t\n]* |