File Coverage

blib/lib/HTML/SiteTear.pm
Criterion Covered Total %
statement 39 110 35.4
branch 0 32 0.0
condition n/a
subroutine 13 19 68.4
pod 2 5 40.0
total 54 166 32.5


line stmt bran cond sub pod time code
1             package HTML::SiteTear;
2              
3 1     1   23427 use 5.008;
  1         5  
  1         42  
4 1     1   7 use strict;
  1         2  
  1         35  
5 1     1   6 use warnings;
  1         6  
  1         30  
6 1     1   6 use File::Basename;
  1         1  
  1         111  
7 1     1   7 use File::Spec;
  1         2  
  1         26  
8 1     1   7 use File::Path;
  1         1  
  1         49  
9 1     1   6 use File::Find;
  1         1  
  1         54  
10 1     1   6 use Cwd;
  1         2  
  1         69  
11 1     1   5 use Carp;
  1         2  
  1         57  
12 1     1   11 use base qw(Class::Accessor);
  1         2  
  1         1058  
13             __PACKAGE__->mk_accessors( qw(source_path
14             site_root_path
15             site_root_url
16             target_path
17             member_files) );
18              
19 1     1   2598 use HTML::SiteTear::Root;
  1         3  
  1         8  
20 1     1   548 use HTML::SiteTear::Page;
  1         4  
  1         12  
21              
22 1     1   34 use Data::Dumper;
  1         2  
  1         1094  
23              
24             =head1 NAME
25              
26             HTML::SiteTear - Make a separated copy of a part of the site
27              
28             =head1 VERSION
29              
30             Version 1.44
31              
32             =cut
33              
34             our $VERSION = '1.44';
35              
36             =head1 SYMPOSIS
37              
38             use HTML::SiteTear;
39              
40             $p = HTML::SiteTear->new("/dev1/website/index.html");
41             $p->copy_to("/dev1/website2/newindex.html");
42              
43             =head1 DESCRIPTION
44              
45             This module is to make a separated copy of a part of web site in local file system. All linked files (HTML file, image file, javascript, cascading style shieet) from a source HTML file will be copied under a new page.
46              
47             This module is useful to make a destributable copy of a part of a web site.
48              
49             =head1 METHODS
50              
51             =head2 new
52              
53             $p = HTML::SiteTear->new($source_path);
54            
55             $p = HTML::SiteTear->new('source_path' => $source_path,
56             'site_root_path' => $root_path,
57             'site_root_url' => $url);
58              
59             $p = HTML::SiteTear->new('source_path' => $source_dir,
60             'member_files' => \@pathes);
61              
62             Make an instance of this module. The path to source HTML file "$source_path" is required as an arguemnt. See L about 'site_root_path' and 'site_root_url' parameters
63              
64             =cut
65              
66             our @DEFAULT_HTML_SUFFIXES = qw(.html .htm .xhtml);
67              
68             sub new {
69 0     0 1   my $class = shift @_;
70 0           my $self;
71 0 0         if (@_ == 1) {
72 0           $self = bless {'source_path' => shift @_}, $class;
73              
74             } else {
75 0           my %args = @_;
76 0           $self = $class->SUPER::new(\%args);
77             }
78            
79 0 0         $self->source_path or croak "source_path is not specified.\n";
80 0 0         (-e $self->source_path) or croak $self->source_path." is not found.\n";
81            
82 0 0         if (-d $self->source_path) {
83 0 0         unless (File::Spec->file_name_is_absolute($self->source_path)) {
84 0           my $cwd = fix_dir_path(cwd);
85 0           $self->source_path(
86             URI::file->new($self->source_path)->abs($cwd)->file);
87             }
88            
89 0 0         unless ($self->member_files) {
90 0           my @htmlfiles;
91             my $wanted = sub {
92 0     0     my $name = $_;
93 0 0         if (grep {$name =~ /\Q$_\E$/} @DEFAULT_HTML_SUFFIXES) {
  0            
94 0           push @htmlfiles, $File::Find::name;
95             }
96 0           };
97 0           find($wanted, $self->source_path);
98 0 0         if (@htmlfiles) {
99 0           $self->member_files(\@htmlfiles);
100             } else {
101 0           croak "Can't find HTML files under $self->source_path.\n";
102             }
103             }
104 0           $self->source_path(fix_dir_path($self->source_path));
105            
106             } else {
107 0 0         if ($self->member_files) {
108 0           croak $self->source_path.
109             " is not a directory. Must be a directory.\n";
110             }
111             }
112            
113 0           return $self;
114             }
115              
116             sub page_filter {
117 0     0 0   my ($class, $module) = @_;
118 0           return HTML::SiteTear::Page->page_filter($module);
119             }
120              
121             =head2 copy_to
122              
123             $p->copy_to($destination_path);
124              
125             Copy $source_path into $destination_path. All linked file in $source_path will be copied into directories under $destination_path
126              
127             =cut
128              
129             sub copy_to {
130             #print "start copy_to in SiteTear.pm\n";
131 0     0 1   my ($self, $destination_path) = @_;
132 0           my $source_path = $self->source_path;
133 0 0         if ($self->member_files) {
134 0           return $self->copy_to_dir($destination_path);
135             }
136              
137 0 0         if (-e $destination_path){
138 0 0         if (-d $destination_path) {
139 0           $destination_path = File::Spec->catfile($destination_path,
140             basename($source_path));
141             }
142             } else {
143 0           my ($name, $dir) = fileparse($destination_path);
144 0           mkpath($dir);
145 0 0         unless ($name) {
146 0           $destination_path = File::Spec->catfile($dir,
147             basename($source_path));
148             }
149             }
150            
151 0           $self->target_path($destination_path);
152 0           my $root = HTML::SiteTear::Root->new(%$self);
153 0           my $new_source_page = HTML::SiteTear::Page->new(
154             'parent' => $root,
155             'source_path' => $source_path);
156 0           $new_source_page->linkpath(basename($destination_path) );
157             #$new_source_page->link_uri(URI::file->new(Cwd::abs_path($destination_path)));
158 0           $new_source_page->link_uri(URI::file->new_abs($destination_path));
159 0           $new_source_page->copy_to_linkpath;
160 0           return $new_source_page;
161             }
162              
163             sub fix_dir_path {
164 0     0 0   my ($path) = @_;
165 0           return File::Spec->catfile($path, File::Spec->curdir);
166             }
167              
168             sub copy_to_dir {
169 0     0 0   my ($self, $destination_path) = @_;
170 0 0         if (-e $destination_path){
171 0 0         unless (-d $destination_path) {
172 0           croak $destination_path."is not directory.\n";
173             }
174             }
175              
176 0           $destination_path = fix_dir_path($destination_path);
177            
178 0           $self->target_path($destination_path);
179 0           my $root = HTML::SiteTear::Root->new(%$self);
180 0           my $source_root_uri = $root->source_root_uri;
181 0           my $dest_uri = URI::file->new($destination_path);
182 0           my @results;
183 0           foreach my $file (@{$self->member_files}) {
  0            
184 0           my $a_member_file = $file;
185 0 0         unless (File::Spec->file_name_is_absolute($a_member_file)) {
186 0           $a_member_file = URI::file->new($a_member_file)
187             ->abs($self->source_path)->file;
188             }
189 0           my $page = HTML::SiteTear::Page->new(
190             'parent' => $root,
191             'source_path' => $a_member_file);
192 0           my $rel_from_source_root = $page->source_uri->rel($source_root_uri);
193 0           my $abs_from_dest = $rel_from_source_root->abs($dest_uri);
194 0           $page->link_uri($abs_from_dest);
195 0           $page->copy_to_linkpath;
196 0           push @results, $page;
197             }
198 0           return \@results;
199             }
200              
201             =head1 ABSOLUTE LINK
202              
203             The default behavior of HTML::SiteTear follows all of links in HTML files. In some case, there are links should not be followd. For example, if theare is a link to the top page of the site, all of files in the site will be copyied. Such links should be converted to absolute links (e.g. "http://www.....").
204              
205             To convert links should not be followed into absolute links,
206              
207             =over
208              
209             =item *
210              
211             Give parameters of 'site_root_path' and 'site_root_url' to L method.
212              
213             =over
214              
215             =item 'site_root_path'
216              
217             A file path of the root of the site in the local file system.
218              
219             =item 'site_root_url'
220              
221             A URL corresponding to 'site_root_path' in WWW.
222              
223             =back
224              
225             =item *
226              
227             Relative links to upper level files from 'source_path' are automatically converted to absolute links.
228              
229             =item *
230              
231             To indicate links should be conveted to absolute links, enclose links in HTML files with specail comment tags and
232              
233             =back
234              
235             =head1 AUTHOR
236              
237             Tetsuro KURITA
238              
239             =cut
240              
241             1;