File Coverage

blib/lib/File/ELAN.pm
Criterion Covered Total %
statement 13 15 86.6
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 18 20 90.0


line stmt bran cond sub pod time code
1             package File::ELAN;
2              
3 1     1   25087 use 5.006;
  1         5  
  1         53  
4 1     1   12 use strict;
  1         2  
  1         51  
5 1     1   6 use warnings FATAL => 'all';
  1         7  
  1         63  
6 1     1   6 use Carp;
  1         1  
  1         105  
7              
8             =head1 NAME
9              
10             File::ELAN - Parsing and manipulating ELAN files
11              
12             =head1 VERSION
13              
14             Version 0.01
15              
16             =cut
17              
18             our $VERSION = '0.01';
19              
20              
21             =head1 SYNOPSIS
22              
23             ELAN (http://tla.mpi.nl/tools/tla-tools/elan/) is a tool for creating annotations
24             on video and audio resources. This module allows you to read and manipulate the
25             annotation files.
26              
27             use File::ELAN;
28              
29             my $elan = File::ELAN->read("test.eaf");
30             for my $annot ($elan->{annotations}{tier1}) {
31             ...
32             }
33              
34             $elan->{annotations}{newtier} = [ ... ];
35              
36             $elan->write("test2.eaf");
37              
38             =head1 SUBROUTINES/METHODS
39              
40             =head2 read($filename)
41              
42             Reads the annotations from the ELAN file into a data structure, to be found in
43             the C<{annotations}> hashref. C contains a hash of tiers, and each
44             tier contains an array of C.
45              
46             my @annotations_in_this_tier = @{$elan->{annotations}{tier_i_am_interested_in}};
47              
48             Each element of C<@annotations_in_this_tier> is a hashref which looks like this:
49              
50             {
51             id => "a123", # Annotation ID
52             start => 12.34, # Time value in seconds
53             end => 23.45,
54             value => "Hello" # Content of annotation
55             }
56              
57              
58             =cut
59              
60 1     1   478 use XML::LibXML;
  0            
  0            
61             use utf8;
62              
63             sub read {
64             my ($class, $eaf) = @_;
65             my $self = bless({}, $class);
66             my $dom = XML::LibXML->load_xml(location => $eaf) or die "Couldn't load ELAN file $eaf: $!";
67             $self->{dom} = $dom;
68             $self->_read_timeslots();
69             $self->_read_tiers();
70             #delete $self->{timeslots};
71             return $self;
72             }
73              
74             sub _read_timeslots {
75             my $self = shift;
76             my $ts = $self->{dom}->find("//*/TIME_SLOT");
77             $self->{timeslots} = {};
78             foreach my $node ($ts->get_nodelist) {
79             $self->{timeslots}{$node->getAttribute("TIME_SLOT_ID")} = $node->getAttribute("TIME_VALUE") / 1000;
80             }
81             }
82             sub _trim {my $foo = shift; $foo =~ s/^\s+//gsm; $foo =~ s/\s+$//gsm; return $foo; }
83             sub _read_tiers {
84             my $self = shift;
85             my $tiers = $self->{dom}->find("//*/TIER");
86             $self->{annotations} = {};
87             foreach my $tier ($tiers->get_nodelist) {
88             my $id = $tier->getAttribute("TIER_ID");
89             $self->{annotations}{$id} = [];
90             my $nl = $tier->find("*/ALIGNABLE_ANNOTATION") or die $!;
91             foreach my $node ($nl->get_nodelist) {
92             push @{$self->{annotations}{$id}}, {
93             id => $node->getAttribute("ANNOTATION_ID"),
94             start => $self->{timeslots}{$node->getAttribute("TIME_SLOT_REF1")},
95             end => $self->{timeslots}{$node->getAttribute("TIME_SLOT_REF2")},
96             value => _trim($node->textContent())
97             }
98             }
99             }
100             }
101              
102             =head2 write($filename)
103              
104             After fiddling with the annotations hash, you can call C to write the
105             information back to another ELAN file. You can create new tiers, and new annotations.
106             You don't have to mess about with timeslots; we work all that out for you. You also
107             don't need to give each annotation an C; one will be automatically assigned.
108              
109             =cut
110              
111             sub write {
112             my ($self, $fn) = @_;
113              
114             # Assign timeslots to everyone
115             my %timeslots;
116             for my $tier (values %{$self->{annotations}}) {
117             for (@$tier) {
118             $timeslots{ $_->{start} * 1000 } = 1;
119             $timeslots{ $_->{end} * 1000 } = 1;
120             }
121             }
122              
123             my $tsid = "ts1";
124             for (sort {$a <=> $b} keys %timeslots) { $timeslots{$_} = $tsid++; }
125             for my $tier (values %{$self->{annotations}}) {
126             for (@$tier) {
127             $_->{start} = $timeslots{$_->{start}*1000};
128             $_->{end} = $timeslots{$_->{end}*1000};
129             }
130             }
131              
132             # Clean out TIME_ORDER tag and write timeslots there.
133             my ($to) = $self->{dom}->find("//*/TIME_ORDER")->get_nodelist();
134             $to->removeChildNodes();
135             for (sort {$a <=> $b} keys %timeslots) {
136             my $node = XML::LibXML::Element->new( "TIME_SLOT" );
137             $node->setAttribute("TIME_SLOT_ID", $timeslots{$_});
138             $node->setAttribute("TIME_VALUE", $_);
139             $to->addChild($node);
140             }
141              
142             my ($lastid) = $self->{dom}->find('//*/PROPERTY[@NAME="lastUsedAnnotationId"]/text()')->get_nodelist;
143             # For each tier:
144             while (my ($tier_name, $annots) = each %{$self->{annotations}}) {
145             my ($tier) = $self->{dom}->find("//TIER[\@TIER_ID='$tier_name']")->get_nodelist();
146             if (!$tier) {
147             # Add a tier tag after the last one present or after TIME_ORDER if not
148             $tier = XML::LibXML::Element->new( "TIER" );
149             $tier->setAttribute("TIER_ID", $tier_name);
150             $tier->setAttribute("DEFAULT_LOCALE", "en");
151             $tier->setAttribute("LINGUISTIC_TYPE_REF", "default-lt");
152             my ($refnode) = $self->{dom}->find("(//TIME_ORDER|//TIER)[last()]")->get_nodelist();
153             if (!$refnode) { die "Couldn't find anywhere to add a new tier"}
154             $refnode->parentNode->insertAfter($tier, $refnode);
155             }
156             $tier->removeChildNodes();
157              
158             # Dump out all the annotations into the tier tag
159             for (@$annots) {
160             my $aa = XML::LibXML::Element->new( "ALIGNABLE_ANNOTATION" );
161             $aa->addNewChild("", "ANNOTATION_VALUE")->addChild($self->{dom}->createTextNode($_->{value}));
162             $aa->setAttribute("ANNOTATION_ID", $_->{id} || "a".$lastid++);
163             $aa->setAttribute("TIME_SLOT_REF1", $_->{start});
164             $aa->setAttribute("TIME_SLOT_REF2", $_->{end});
165             my $annot = XML::LibXML::Element->new( "ANNOTATION" );
166             $annot->appendChild($aa);
167             $tier->appendChild($annot);
168             }
169             }
170             # Update last ID
171             my ($lid) = $self->{dom}->find('//*/PROPERTY[@NAME="lastUsedAnnotationId"]')->get_nodelist();
172             $lid->removeChildNodes();
173             $lid->addChild($self->{dom}->createTextNode($lastid));
174              
175             open my $out, '>', $fn;
176             binmode $out;
177             $self->{dom}->toFH($out);
178             close $out;
179              
180             }
181              
182             1;
183              
184             =head1 AUTHOR
185              
186             Simon Cozens, C<< >>
187              
188             =head1 BUGS
189              
190             Please report any bugs or feature requests to C, or through
191             the web interface at L. I will be notified, and then you'll
192             automatically be notified of progress on your bug as I make changes.
193              
194             =head1 SUPPORT
195              
196             You can find documentation for this module with the perldoc command.
197              
198             perldoc File::ELAN
199              
200              
201             You can also look for information at:
202              
203             =over 4
204              
205             =item * RT: CPAN's request tracker (report bugs here)
206              
207             L
208              
209             =item * AnnoCPAN: Annotated CPAN documentation
210              
211             L
212              
213             =item * CPAN Ratings
214              
215             L
216              
217             =item * Search CPAN
218              
219             L
220              
221             =back
222              
223              
224             =head1 ACKNOWLEDGEMENTS
225              
226              
227             =head1 LICENSE AND COPYRIGHT
228              
229             Copyright 2014 Simon Cozens.
230              
231             This program is free software; you can redistribute it and/or modify it
232             under the terms of the the Artistic License (2.0). You may obtain a
233             copy of the full license at:
234              
235             L
236              
237             Any use, modification, and distribution of the Standard or Modified
238             Versions is governed by this Artistic License. By using, modifying or
239             distributing the Package, you accept this license. Do not use, modify,
240             or distribute the Package, if you do not accept this license.
241              
242             If your Modified Version has been derived from a Modified Version made
243             by someone other than you, you are nevertheless required to ensure that
244             your Modified Version complies with the requirements of this license.
245              
246             This license does not grant you the right to use any trademark, service
247             mark, tradename, or logo of the Copyright Holder.
248              
249             This license includes the non-exclusive, worldwide, free-of-charge
250             patent license to make, have made, use, offer to sell, sell, import and
251             otherwise transfer the Package with respect to any patent claims
252             licensable by the Copyright Holder that are necessarily infringed by the
253             Package. If you institute patent litigation (including a cross-claim or
254             counterclaim) against any party alleging that the Package constitutes
255             direct or contributory patent infringement, then this Artistic License
256             to you shall terminate on the date that such litigation is filed.
257              
258             Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER
259             AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
260             THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
261             PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY
262             YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR
263             CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR
264             CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE,
265             EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
266              
267              
268             =cut
269              
270             1; # End of File::ELAN