| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
1
|
|
|
1
|
|
43354
|
use 5.008; |
|
|
1
|
|
|
|
|
4
|
|
|
|
1
|
|
|
|
|
33
|
|
|
2
|
1
|
|
|
1
|
|
7
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
38
|
|
|
3
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
71
|
|
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
# ABSTRACT: a low-level reader for EBML files |
|
6
|
|
|
|
|
|
|
package Parse::Matroska::Reader; |
|
7
|
|
|
|
|
|
|
{ |
|
8
|
|
|
|
|
|
|
$Parse::Matroska::Reader::VERSION = '0.003'; |
|
9
|
|
|
|
|
|
|
} |
|
10
|
|
|
|
|
|
|
|
|
11
|
1
|
|
|
1
|
|
550
|
use Parse::Matroska::Definitions qw{elem_by_hexid}; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
76
|
|
|
12
|
1
|
|
|
1
|
|
590
|
use Parse::Matroska::Element; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
25
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
1
|
|
|
1
|
|
5
|
use Carp; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
46
|
|
|
15
|
1
|
|
|
1
|
|
4
|
use Scalar::Util qw{openhandle weaken}; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
86
|
|
|
16
|
1
|
|
|
1
|
|
870
|
use IO::Handle; |
|
|
1
|
|
|
|
|
7033
|
|
|
|
1
|
|
|
|
|
55
|
|
|
17
|
1
|
|
|
1
|
|
1048
|
use IO::File; |
|
|
1
|
|
|
|
|
2080
|
|
|
|
1
|
|
|
|
|
114
|
|
|
18
|
1
|
|
|
1
|
|
5
|
use List::Util qw{first}; |
|
|
1
|
|
|
|
|
3
|
|
|
|
1
|
|
|
|
|
50
|
|
|
19
|
1
|
|
|
1
|
|
920
|
use Encode; |
|
|
1
|
|
|
|
|
15902
|
|
|
|
1
|
|
|
|
|
96
|
|
|
20
|
|
|
|
|
|
|
|
|
21
|
1
|
|
|
1
|
|
9
|
use constant BIGINT_TRY => 'Pari,GMP,FastCalc'; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
75
|
|
|
22
|
1
|
|
|
1
|
|
1456
|
use Math::BigInt try => BIGINT_TRY; |
|
|
1
|
|
|
|
|
21088
|
|
|
|
1
|
|
|
|
|
6
|
|
|
23
|
1
|
|
|
1
|
|
19343
|
use Math::BigRat try => BIGINT_TRY; |
|
|
1
|
|
|
|
|
33987
|
|
|
|
1
|
|
|
|
|
5
|
|
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
sub new { |
|
26
|
1
|
|
|
1
|
1
|
720
|
my $class = shift; |
|
27
|
1
|
|
|
|
|
3
|
my $self = {}; |
|
28
|
1
|
|
|
|
|
3
|
bless $self, $class; |
|
29
|
|
|
|
|
|
|
|
|
30
|
1
|
50
|
|
|
|
9
|
$self->open(@_) if @_; |
|
31
|
1
|
|
|
|
|
150
|
return $self; |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub open { |
|
35
|
2
|
|
|
2
|
1
|
10
|
my ($self, $arg) = @_; |
|
36
|
2
|
50
|
33
|
|
|
34
|
$self->{fh} = openhandle($arg) || IO::File->new($arg, "<:raw") |
|
37
|
|
|
|
|
|
|
or croak "Can't open $arg: $!"; |
|
38
|
|
|
|
|
|
|
} |
|
39
|
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub close { |
|
41
|
1
|
|
|
1
|
1
|
530
|
my ($self) = @_; |
|
42
|
1
|
|
|
|
|
14
|
$self->{fh}->close; |
|
43
|
1
|
|
|
|
|
39
|
delete $self->{fh}; |
|
44
|
|
|
|
|
|
|
} |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# equivalent to $self->readlen(1), possibly faster |
|
47
|
|
|
|
|
|
|
sub _getc { |
|
48
|
50
|
|
|
50
|
|
52
|
my ($self) = @_; |
|
49
|
50
|
|
|
|
|
148
|
my $c = $self->{fh}->getc; |
|
50
|
50
|
0
|
33
|
|
|
286
|
croak "Can't do read of length 1: $!" if !defined $c && $!; |
|
51
|
50
|
|
|
|
|
98
|
return $c; |
|
52
|
|
|
|
|
|
|
} |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
sub readlen { |
|
55
|
66
|
|
|
66
|
1
|
71
|
my ($self, $len) = @_; |
|
56
|
66
|
|
|
|
|
53
|
my $data; |
|
57
|
66
|
|
|
|
|
161
|
my $readlen = $self->{fh}->read($data, $len); |
|
58
|
66
|
50
|
|
|
|
360
|
croak "Can't do read of length $len: $!" |
|
59
|
|
|
|
|
|
|
unless defined $readlen; |
|
60
|
66
|
|
|
|
|
222
|
return $data; |
|
61
|
|
|
|
|
|
|
} |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
# converts a byte string into an integer |
|
64
|
|
|
|
|
|
|
# we do so by converting the integer into a hex string (big-endian) |
|
65
|
|
|
|
|
|
|
# and then reading the hex-string into an integer |
|
66
|
|
|
|
|
|
|
sub _bin2int($) { |
|
67
|
35
|
|
|
35
|
|
80
|
my ($bin) = @_; |
|
68
|
|
|
|
|
|
|
# if the length is larger than 3 |
|
69
|
|
|
|
|
|
|
# the resulting integer might be larger than INT_MAX |
|
70
|
35
|
100
|
|
|
|
64
|
if (length($bin) > 3) { |
|
71
|
4
|
|
|
|
|
24
|
return Math::BigInt->from_hex(unpack("H*", $bin)); |
|
72
|
|
|
|
|
|
|
} |
|
73
|
31
|
|
|
|
|
89
|
return hex(unpack("H*", $bin)); |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
# creates a floating-point number with the given mantissa and exponent |
|
77
|
|
|
|
|
|
|
sub _ldexp { |
|
78
|
1
|
|
|
1
|
|
380
|
my ($mantissa, $exponent) = @_; |
|
79
|
1
|
|
|
|
|
9
|
return $mantissa * Math::BigRat->new(2)**$exponent; |
|
80
|
|
|
|
|
|
|
} |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
# NOTE: the read_* functions are hard to read because they're ports |
|
83
|
|
|
|
|
|
|
# of even harder to read python functions. |
|
84
|
|
|
|
|
|
|
# TODO: make them readable |
|
85
|
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
sub read_id { |
|
87
|
25
|
|
|
25
|
1
|
27
|
my ($self) = @_; |
|
88
|
25
|
|
|
|
|
48
|
my $t = $self->_getc; |
|
89
|
25
|
50
|
|
|
|
43
|
return undef unless defined $t; |
|
90
|
25
|
|
|
|
|
25
|
my $i = 0; |
|
91
|
25
|
|
|
|
|
23
|
my $mask = 1<<7; |
|
92
|
|
|
|
|
|
|
|
|
93
|
25
|
50
|
|
|
|
47
|
if (ord($t) == 0) { |
|
94
|
0
|
|
|
|
|
0
|
croak "Matroska Syntax error: first byte of ID was \\0" |
|
95
|
|
|
|
|
|
|
} |
|
96
|
25
|
|
|
|
|
45
|
until (ord($t) & $mask) { |
|
97
|
37
|
|
|
|
|
43
|
++$i; |
|
98
|
37
|
|
|
|
|
64
|
$mask >>= 1; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
# return hex string of the bytes we just read |
|
101
|
25
|
|
|
|
|
53
|
return unpack "H*", ($t . $self->readlen($i)); |
|
102
|
|
|
|
|
|
|
} |
|
103
|
|
|
|
|
|
|
|
|
104
|
|
|
|
|
|
|
sub read_size { |
|
105
|
25
|
|
|
25
|
1
|
26
|
my ($self) = @_; |
|
106
|
25
|
|
|
|
|
37
|
my $t = $self->_getc; |
|
107
|
25
|
|
|
|
|
26
|
my $i = 0; |
|
108
|
25
|
|
|
|
|
22
|
my $mask = 1<<7; |
|
109
|
|
|
|
|
|
|
|
|
110
|
25
|
50
|
|
|
|
48
|
if (ord($t) == 0) { |
|
111
|
0
|
|
|
|
|
0
|
croak "Matroska Syntax error: first byte of data size was \\0" |
|
112
|
|
|
|
|
|
|
} |
|
113
|
25
|
|
|
|
|
46
|
until (ord($t) & $mask) { |
|
114
|
12
|
|
|
|
|
8
|
++$i; |
|
115
|
12
|
|
|
|
|
20
|
$mask >>= 1; |
|
116
|
|
|
|
|
|
|
} |
|
117
|
25
|
|
|
|
|
44
|
$t = $t & chr($mask-1); # strip length bits (keep only significant bits) |
|
118
|
25
|
|
|
|
|
48
|
return ($i+1, _bin2int $t . $self->readlen($i)); |
|
119
|
|
|
|
|
|
|
} |
|
120
|
|
|
|
|
|
|
|
|
121
|
|
|
|
|
|
|
{ |
|
122
|
|
|
|
|
|
|
my $utf8 = find_encoding("UTF-8"); |
|
123
|
|
|
|
|
|
|
sub read_str { |
|
124
|
5
|
|
|
5
|
1
|
7
|
my ($self, $length) = @_; |
|
125
|
5
|
|
|
|
|
7
|
return $utf8->decode($self->readlen($length)); |
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub read_uint { |
|
130
|
10
|
|
|
10
|
1
|
11
|
my ($self, $length) = @_; |
|
131
|
10
|
|
|
|
|
17
|
return _bin2int $self->readlen($length); |
|
132
|
|
|
|
|
|
|
} |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
sub read_sint { |
|
135
|
1
|
|
|
1
|
1
|
2
|
my ($self, $length) = @_; |
|
136
|
1
|
|
|
|
|
2
|
my $i = $self->read_uint($length); |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
# Apply 2's complement to the unsigned int |
|
139
|
1
|
|
|
|
|
130
|
my $mask = int(2 ** ($length * 8 - 1)); |
|
140
|
|
|
|
|
|
|
# if the most significant bit is set... |
|
141
|
1
|
50
|
|
|
|
3
|
if ($i & $mask) { |
|
142
|
|
|
|
|
|
|
# subtract the MSB twice |
|
143
|
0
|
|
|
|
|
0
|
$i -= 2 * $mask; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
1
|
|
|
|
|
303
|
return $i; |
|
146
|
|
|
|
|
|
|
} |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
sub read_float { |
|
149
|
1
|
|
|
1
|
1
|
3
|
my ($self, $length) = @_; |
|
150
|
1
|
|
|
|
|
2
|
my $i = $self->read_uint($length); |
|
151
|
1
|
|
|
|
|
104
|
my $f; |
|
152
|
|
|
|
|
|
|
|
|
153
|
1
|
|
|
1
|
|
3913
|
use bigrat try => BIGINT_TRY; |
|
|
1
|
|
|
|
|
8990
|
|
|
|
1
|
|
|
|
|
6
|
|
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
# These evil expressions reinterpret an unsigned int as IEEE binary floats |
|
156
|
1
|
50
|
|
|
|
6
|
if ($length == 4) { |
|
|
|
50
|
|
|
|
|
|
|
157
|
0
|
|
|
|
|
0
|
$f = _ldexp(($i & (1<<23 - 1)) + (1<<23), ($i>>23 & (1<<8 - 1)) - 150); |
|
158
|
0
|
0
|
|
|
|
0
|
$f = -$f if $i & (1<<31); |
|
159
|
|
|
|
|
|
|
} elsif ($length == 8) { |
|
160
|
1
|
|
|
|
|
175
|
$f = _ldexp(($i & (1<<52 - 1)) + (1<<52), ($i>>52 & (1<<12 - 1)) - 1075); |
|
161
|
1
|
50
|
|
|
|
1790
|
$f = -$f if $i & (1<<63); |
|
162
|
|
|
|
|
|
|
} else { |
|
163
|
0
|
|
|
|
|
0
|
croak "Matroska Syntax error: unsupported IEEE float byte size $length"; |
|
164
|
|
|
|
|
|
|
} |
|
165
|
|
|
|
|
|
|
|
|
166
|
1
|
|
|
|
|
87
|
return $f; |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
sub read_ebml_id { |
|
170
|
0
|
|
|
0
|
1
|
0
|
my ($self, $length) = @_; |
|
171
|
0
|
|
|
|
|
0
|
return elem_by_hexid(unpack("H*", $self->readlen($length))); |
|
172
|
|
|
|
|
|
|
} |
|
173
|
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
sub skip { |
|
175
|
3
|
|
|
3
|
1
|
4
|
my ($self, $len) = @_; |
|
176
|
3
|
50
|
33
|
|
|
17
|
return if $self->{fh}->can('seek') && $self->{fh}->seek($len, 1); |
|
177
|
0
|
|
|
|
|
0
|
$self->readlen($len); |
|
178
|
0
|
|
|
|
|
0
|
return; |
|
179
|
|
|
|
|
|
|
} |
|
180
|
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
sub getpos { |
|
182
|
73
|
|
|
73
|
1
|
79
|
my ($self) = @_; |
|
183
|
73
|
50
|
|
|
|
230
|
return undef unless $self->{fh}->can('getpos'); |
|
184
|
73
|
|
|
|
|
281
|
return $self->{fh}->getpos; |
|
185
|
|
|
|
|
|
|
} |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
sub setpos { |
|
188
|
23
|
|
|
23
|
1
|
30
|
my ($self, $pos) = @_; |
|
189
|
23
|
50
|
33
|
|
|
126
|
return undef unless $pos && $self->{fh}->can('setpos'); |
|
190
|
|
|
|
|
|
|
|
|
191
|
23
|
|
|
|
|
202
|
my $ret = $self->{fh}->setpos($pos); |
|
192
|
23
|
50
|
|
|
|
44
|
croak "Cannot seek to correct position" |
|
193
|
|
|
|
|
|
|
unless $self->getpos eq $pos; |
|
194
|
23
|
|
|
|
|
53
|
return $ret; |
|
195
|
|
|
|
|
|
|
} |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub read_element { |
|
198
|
25
|
|
|
25
|
1
|
33
|
my ($self, $read_bin) = @_; |
|
199
|
25
|
50
|
|
|
|
72
|
return undef if $self->{fh}->eof; |
|
200
|
|
|
|
|
|
|
|
|
201
|
25
|
|
|
|
|
293
|
my $elem_pos = $self->getpos; |
|
202
|
|
|
|
|
|
|
|
|
203
|
25
|
|
|
|
|
52
|
my $elid = $self->read_id; |
|
204
|
25
|
|
|
|
|
69
|
my $elem_def = elem_by_hexid($elid); |
|
205
|
25
|
|
|
|
|
51
|
my ($size_len, $content_len) = $self->read_size; |
|
206
|
25
|
|
|
|
|
243
|
my $full_len = length($elid)/2 + $size_len + $content_len; |
|
207
|
|
|
|
|
|
|
|
|
208
|
25
|
|
33
|
|
|
319
|
my $elem = Parse::Matroska::Element->new( |
|
|
|
|
33
|
|
|
|
|
|
209
|
|
|
|
|
|
|
elid => $elid, |
|
210
|
|
|
|
|
|
|
name => $elem_def && $elem_def->{name}, |
|
211
|
|
|
|
|
|
|
type => $elem_def && $elem_def->{valtype}, |
|
212
|
|
|
|
|
|
|
size_len => $size_len, |
|
213
|
|
|
|
|
|
|
content_len => $content_len, |
|
214
|
|
|
|
|
|
|
full_len => $full_len, |
|
215
|
|
|
|
|
|
|
reader => $self, |
|
216
|
|
|
|
|
|
|
elem_pos => $elem_pos, |
|
217
|
|
|
|
|
|
|
data_pos => $self->getpos, |
|
218
|
|
|
|
|
|
|
); |
|
219
|
25
|
|
|
|
|
58
|
weaken($elem->{reader}); |
|
220
|
|
|
|
|
|
|
|
|
221
|
25
|
50
|
|
|
|
44
|
if (defined $elem_def) { |
|
222
|
25
|
100
|
|
|
|
92
|
if ($elem->{type} eq 'sub') { |
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
223
|
7
|
|
|
|
|
15
|
$elem->{value} = []; |
|
224
|
|
|
|
|
|
|
} elsif ($elem->{type} eq 'str') { |
|
225
|
5
|
|
|
|
|
11
|
$elem->{value} = $self->read_str($content_len); |
|
226
|
|
|
|
|
|
|
} elsif ($elem->{type} eq 'ebml_id') { |
|
227
|
0
|
|
|
|
|
0
|
$elem->{value} = $self->read_ebml_id($content_len); |
|
228
|
|
|
|
|
|
|
} elsif ($elem->{type} eq 'uint') { |
|
229
|
8
|
|
|
|
|
19
|
$elem->{value} = $self->read_uint($content_len); |
|
230
|
|
|
|
|
|
|
} elsif ($elem->{type} eq 'sint') { |
|
231
|
1
|
|
|
|
|
5
|
$elem->{value} = $self->read_sint($content_len); |
|
232
|
|
|
|
|
|
|
} elsif ($elem->{type} eq 'float') { |
|
233
|
1
|
|
|
|
|
4
|
$elem->{value} = $self->read_float($content_len); |
|
234
|
|
|
|
|
|
|
} elsif ($elem->{type} eq 'skip') { |
|
235
|
0
|
|
|
|
|
0
|
$self->skip($content_len); |
|
236
|
|
|
|
|
|
|
} elsif ($elem->{type} eq 'binary') { |
|
237
|
3
|
50
|
|
|
|
6
|
if ($read_bin) { |
|
238
|
0
|
|
|
|
|
0
|
$elem->{value} = $self->readlen($content_len); |
|
239
|
|
|
|
|
|
|
} else { |
|
240
|
3
|
|
|
|
|
9
|
$self->skip($content_len); |
|
241
|
|
|
|
|
|
|
} |
|
242
|
|
|
|
|
|
|
} else { |
|
243
|
0
|
|
|
|
|
0
|
die "Matroska Definition error: type $elem->{valtype} unknown" |
|
244
|
|
|
|
|
|
|
} |
|
245
|
|
|
|
|
|
|
} else { |
|
246
|
0
|
|
|
|
|
0
|
$self->skip($content_len); |
|
247
|
|
|
|
|
|
|
} |
|
248
|
25
|
|
|
|
|
322
|
return $elem; |
|
249
|
|
|
|
|
|
|
} |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
1; |
|
252
|
|
|
|
|
|
|
|
|
253
|
|
|
|
|
|
|
__END__ |