line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
package EPUB::Parser; |
2
|
14
|
|
|
14
|
|
302013
|
use 5.008005; |
|
14
|
|
|
|
|
35
|
|
3
|
14
|
|
|
14
|
|
45
|
use strict; |
|
14
|
|
|
|
|
12
|
|
|
14
|
|
|
|
|
220
|
|
4
|
14
|
|
|
14
|
|
47
|
use warnings; |
|
14
|
|
|
|
|
15
|
|
|
14
|
|
|
|
|
275
|
|
5
|
14
|
|
|
14
|
|
41
|
use Carp; |
|
14
|
|
|
|
|
14
|
|
|
14
|
|
|
|
|
645
|
|
6
|
14
|
|
|
14
|
|
4558
|
use EPUB::Parser::Util::EpubLoad; |
|
14
|
|
|
|
|
31
|
|
|
14
|
|
|
|
|
515
|
|
7
|
14
|
|
|
14
|
|
4889
|
use EPUB::Parser::Util::Archive; |
|
14
|
|
|
|
|
23
|
|
|
14
|
|
|
|
|
348
|
|
8
|
14
|
|
|
14
|
|
4923
|
use EPUB::Parser::File::OPF; |
|
0
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
use EPUB::Parser::File::Navi; |
10
|
|
|
|
|
|
|
use EPUB::Parser::Manager::Pages; |
11
|
|
|
|
|
|
|
use EPUB::Parser::Util::ShortcutMethod qw/ |
12
|
|
|
|
|
|
|
title creator language identifier |
13
|
|
|
|
|
|
|
items_by_media items_by_media_type |
14
|
|
|
|
|
|
|
toc_list |
15
|
|
|
|
|
|
|
/; |
16
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
our $VERSION = "0.06"; |
19
|
|
|
|
|
|
|
|
20
|
|
|
|
|
|
|
sub new { |
21
|
|
|
|
|
|
|
my $class = shift; |
22
|
|
|
|
|
|
|
my $args = shift || {}; |
23
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
bless { |
25
|
|
|
|
|
|
|
zip => '', |
26
|
|
|
|
|
|
|
opf => '', |
27
|
|
|
|
|
|
|
epub_version => $args->{epub_version} || '3.0', |
28
|
|
|
|
|
|
|
} => $class; |
29
|
|
|
|
|
|
|
} |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
sub opf { |
32
|
|
|
|
|
|
|
my $self = shift; |
33
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
$self->{opf} ||= EPUB::Parser::File::OPF->new({ |
35
|
|
|
|
|
|
|
zip => $self->{zip}, |
36
|
|
|
|
|
|
|
epub_version => $self->{epub_version}, |
37
|
|
|
|
|
|
|
}); |
38
|
|
|
|
|
|
|
} |
39
|
|
|
|
|
|
|
|
40
|
|
|
|
|
|
|
sub navi { |
41
|
|
|
|
|
|
|
my $self = shift; |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
$self->{navi} ||= EPUB::Parser::File::Navi->new({ |
44
|
|
|
|
|
|
|
zip => $self->{zip}, |
45
|
|
|
|
|
|
|
path => $self->opf->nav_path, |
46
|
|
|
|
|
|
|
}); |
47
|
|
|
|
|
|
|
} |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
sub data_from_path { |
50
|
|
|
|
|
|
|
my $self = shift; |
51
|
|
|
|
|
|
|
my $path = shift; |
52
|
|
|
|
|
|
|
$self->{zip}->get_member_data({ file_path => $path }); |
53
|
|
|
|
|
|
|
} |
54
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
sub pages_manager { |
56
|
|
|
|
|
|
|
my $self = shift; |
57
|
|
|
|
|
|
|
$self->{pages_manager} ||= EPUB::Parser::Manager::Pages->new({ |
58
|
|
|
|
|
|
|
opf => $self->opf, |
59
|
|
|
|
|
|
|
navi => $self->navi, |
60
|
|
|
|
|
|
|
}); |
61
|
|
|
|
|
|
|
} |
62
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
|
64
|
|
|
|
|
|
|
sub _load_epub { |
65
|
|
|
|
|
|
|
my ($self,$args,$method) = @_; |
66
|
|
|
|
|
|
|
$self->{zip} ||= do { |
67
|
|
|
|
|
|
|
my $data = EPUB::Parser::Util::EpubLoad->$method($args); |
68
|
|
|
|
|
|
|
EPUB::Parser::Util::Archive->new({ data => $data }); |
69
|
|
|
|
|
|
|
}; |
70
|
|
|
|
|
|
|
return $self; |
71
|
|
|
|
|
|
|
} |
72
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
sub load_file { _load_epub(@_, 'load_file' ) }; |
74
|
|
|
|
|
|
|
sub load_binary { _load_epub(@_, 'load_binary') }; |
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
1; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
__END__ |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
=encoding utf-8 |
81
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
=head1 NAME |
83
|
|
|
|
|
|
|
|
84
|
|
|
|
|
|
|
EPUB::Parser - EPUB Parser class |
85
|
|
|
|
|
|
|
|
86
|
|
|
|
|
|
|
=head1 SYNOPSIS |
87
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
use EPUB::Parser; |
89
|
|
|
|
|
|
|
my $ep = EPUB::Parser->new; |
90
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
# load epub |
92
|
|
|
|
|
|
|
$ep->load_file({ file_path => 'sample.epub' }); |
93
|
|
|
|
|
|
|
# or |
94
|
|
|
|
|
|
|
$ep->load_binary({ data => $binary_data }) |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# get opf version |
97
|
|
|
|
|
|
|
my $version = $ep->opf->guess_version; |
98
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
# get css. Return value is 'EPUB::Parser::Util::Archive::Iterator' object. |
100
|
|
|
|
|
|
|
my $itr = $ep->items_by_media_type({ regexp => qr{text/css}ix }); |
101
|
|
|
|
|
|
|
while ( my $zip_member = $itr->next ) { |
102
|
|
|
|
|
|
|
$zip_member->data; |
103
|
|
|
|
|
|
|
$zip_member->path; |
104
|
|
|
|
|
|
|
} |
105
|
|
|
|
|
|
|
|
106
|
|
|
|
|
|
|
# shortcut method. iterator object contain image,audio,video item path. |
107
|
|
|
|
|
|
|
my $itr = $ep->items_by_media; |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
# get list under <nav id="toc" epub:type="toc"> |
110
|
|
|
|
|
|
|
# todo: parse nested list |
111
|
|
|
|
|
|
|
for my $chapter ( $ep->toc_list ) { |
112
|
|
|
|
|
|
|
$chapter->{title}; |
113
|
|
|
|
|
|
|
$chapter->{href}; |
114
|
|
|
|
|
|
|
} |
115
|
|
|
|
|
|
|
|
116
|
|
|
|
|
|
|
# get cover image blob |
117
|
|
|
|
|
|
|
my $cover_img_path = $ep->opf->cover_image_path; |
118
|
|
|
|
|
|
|
$ep->data_from_path($cover_img_path); |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
# get page list from each chapter. |
121
|
|
|
|
|
|
|
my $collect_pages = $ep->pages_manager->get_page_from_each_chapter; |
122
|
|
|
|
|
|
|
# no_chapter_member => [ |
123
|
|
|
|
|
|
|
# 'OEBPS/cover.xhtml', |
124
|
|
|
|
|
|
|
# 'OEBPS/nav.xhtml' |
125
|
|
|
|
|
|
|
# ], |
126
|
|
|
|
|
|
|
# chapter_group => [ |
127
|
|
|
|
|
|
|
# [ |
128
|
|
|
|
|
|
|
# 'OEBPS/0_1.xhtml' |
129
|
|
|
|
|
|
|
# 'OEBPS/0_2.xhtml' |
130
|
|
|
|
|
|
|
# 'OEBPS/0_3.xhtml' |
131
|
|
|
|
|
|
|
# ], |
132
|
|
|
|
|
|
|
# [ |
133
|
|
|
|
|
|
|
# 'OEBPS/1_1.xhtml' |
134
|
|
|
|
|
|
|
# 'OEBPS/1_2.xhtml' |
135
|
|
|
|
|
|
|
# 'OEBPS/1_3.xhtml' |
136
|
|
|
|
|
|
|
# ], |
137
|
|
|
|
|
|
|
# .... |
138
|
|
|
|
|
|
|
# ] |
139
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
=head1 DESCRIPTION |
141
|
|
|
|
|
|
|
|
142
|
|
|
|
|
|
|
EPUB::Parser parse EPUB3 and return Perl Data Structure. |
143
|
|
|
|
|
|
|
This module can only parse EPUB3. |
144
|
|
|
|
|
|
|
|
145
|
|
|
|
|
|
|
=head1 METHODS |
146
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=head2 new(\%opts) |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
Constructor. |
150
|
|
|
|
|
|
|
Creates a new EPUB::Parser instance. Valid options are: |
151
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=over 4 |
153
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
=item epub_version |
155
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
EPUB::Parser->new({ epub_version => '3.0' }); |
157
|
|
|
|
|
|
|
epub_version is default 3.0 and current supoprt only 3.0. |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=back |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
=head2 opf |
162
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
Returns instance of L<EPUB::Parser::File::OPF>. |
164
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
=head2 navi |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
Returns instance of L<EPUB::Parser::File::Navi>. |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
=head2 data_from_path($path) |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
get blob from loaded EPUB with path indicated in $path. |
172
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
=head2 pages_manager |
174
|
|
|
|
|
|
|
|
175
|
|
|
|
|
|
|
Returns instance of L<EPUB::Parser::Manager::Pages>. |
176
|
|
|
|
|
|
|
|
177
|
|
|
|
|
|
|
=head2 load_file({ file_path => 'sample.epub' }) |
178
|
|
|
|
|
|
|
|
179
|
|
|
|
|
|
|
load from EPUB file. |
180
|
|
|
|
|
|
|
|
181
|
|
|
|
|
|
|
=head2 load_binary({ data => $binary_data }) |
182
|
|
|
|
|
|
|
|
183
|
|
|
|
|
|
|
load from EPUB blob. |
184
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
=head1 LICENSE |
186
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
Copyright (C) tokubass. |
188
|
|
|
|
|
|
|
|
189
|
|
|
|
|
|
|
This library is free software; you can redistribute it and/or modify |
190
|
|
|
|
|
|
|
it under the same terms as Perl itself. |
191
|
|
|
|
|
|
|
|
192
|
|
|
|
|
|
|
=head1 AUTHOR |
193
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
tokubass E<lt>tokubass {at} cpan.orgE<gt> |
195
|
|
|
|
|
|
|
|
196
|
|
|
|
|
|
|
=cut |
197
|
|
|
|
|
|
|
|