File Coverage

blib/lib/Mir/FileHandler.pm

Criterion	Covered	Total	%
statement	94	123	76.4
branch	28	68	41.1
condition	13	48	27.0
subroutine	21	26	80.7
pod	7	9	77.7
total	163	274	59.4

line	stmt	bran	cond	sub	pod	time	code
1							# ABSTRACT: A module to handle files and folders
2							package Mir::FileHandler;
3
4	4			4		560217	use 5.026;
	4					49
5	4			4		2280	use Moose;
	4					1925132
	4					31
6	4			4		33334	use DirHandle;
	4					7494
	4					133
7	4			4		30	use File::Find;
	4					11
	4					254
8	4			4		30	use File::Basename qw( dirname );
	4					7
	4					275
9	4			4		1805	use Log::Log4perl;
	4					67602
	4					39
10	4			4		226	use Digest::MD5 'md5_base64';
	4					8
	4					265
11	4			4		2740	use Cache::FileCache;
	4					170145
	4					7764
12
13							=head1 NAME
14
15							Mir::FileHandler - An Mir module to handle files and folders...
16
17							=head1 VERSION
18
19							Version 0.03
20
21							=cut
22
23							our $VERSION = '0.03';
24							our @rec_files = ();
25							our $files_as_hash = {};
26							our @types = ();
27
28							=head1 SYNOPSIS
29
30							use Mir::FileHandler;
31
32							# the oo way...
33							# get a new FileHandler obj for the passed root directory
34							my $o = Mir::FileHandler->new(
35							path => $path, # root folder to start with
36							);
37
38							# get plain files list inside the root directory
39							my $list = $o->plainfiles(); # or pass a path in input
40
41							# get plain files list from folder and sub-folders
42							my $list = $o->plainfiles_recursive( $path, $suffix, \&found );
43
44							# Traverses a directory tree and exec code for each file
45							$o->dir_walk(
46							path => $path,
47							code => $code,
48							);
49
50							# Traverses a directory tree and exec code for each file
51							# Stops after max success code execution
52							# the sub pointed by $code has to return 1 in
53							# case of success
54							# if cached_dir is set, at each iteration starts from
55							# what stored in cache, if something has been stored,
56							# otherwise starts from path
57							$o->clear_cache(); # to clear current dir stored in cache
58							$o->dir_walk_max(
59							code => $code, # not mandatory, code to exec for each file found
60							max => $max # not mandatory, max files successfully processed
61							);
62
63							=head1 EXPORT
64
65							=over
66
67							=item plainfiles
68
69							=item plainfiles_recursive
70
71							=back
72
73							=head1 SUBROUTINES/METHODS
74
75							=cut
76
77							#=============================================================
78							has 'path' => (
79							is => 'rw',
80							isa => 'Str',
81							required => 1
82							);
83
84							has 'cache_key' => (
85							is => 'rw',
86							isa => 'Str',
87							);
88
89							has 'cache' => (
90							is => 'ro',
91							isa => 'Cache::FileCache',
92							lazy => 1,
93							default => sub {
94							my $self = shift;
95							my $o = Cache::FileCache->new();
96							return $o;
97							}
98							);
99
100							has 'log' => (
101							is => 'rw',
102							isa => 'Log::Log4perl::Logger',
103							lazy => 1,
104							default => sub {
105							return Log::Log4perl->get_logger( __PACKAGE__ );
106							}
107							);
108
109							sub BUILD {
110	3			3	0	6	my $self = shift;
111	3					96	$self->cache_key( md5_base64( $self->path ) );
112							}
113
114							#=============================================================
115
116							=head2 plainfiles
117
118							=head3 INPUT
119
120							$path: path to look for (not mandatory, eventually takes the
121							one passed at construction time)
122
123							=head3 OUTPUT
124
125							An ArrayRef
126
127							=head3 DESCRIPTION
128
129							Returns the list of folder plain files.
130
131							=cut
132
133							#=============================================================
134							sub plainfiles {
135	1			1	1	2	my $self = shift;
136
137	1	50				34	return undef unless ( -e $self->path );
138	1					32	return [ _pf( $self->path ) ];
139							}
140
141							#=============================================================
142
143							=head2 _pf
144
145							=head3 INPUT
146
147							$path: the path to look for docs
148
149							=head3 OUTPUT
150
151							A sorted list of docs.
152
153							=head3 DESCRIPTION
154
155							Private function. Returns the sorted list of regular
156							files in current folder.
157
158							=cut
159
160							#=============================================================
161							sub _pf {
162	1	50		1		4	my $dir = shift or return undef;
163	1	50				10	my $dh = DirHandle->new($dir) or die "can't opendir $dir: $!";
164							return sort
165	1					22	grep { -f }
166	1					6	map { "$dir/$_" }
167	1					104	grep { !/^\./ }
	3					50
168							$dh->read();
169							}
170
171							#=============================================================
172
173							=head2 _get_path_from_cache
174
175							=head3 INPUT
176
177							None
178
179							=head3 OUTPUT
180
181							The cache path or undef
182
183							=head3 DESCRIPTION
184
185							Tries to retrieve a current path cached for the root one or
186							returns undef
187
188							=cut
189
190							#=============================================================
191							sub _get_path_from_cache {
192	3			3		5	my $self = shift;
193	3					100	return $self->cache->get( $self->cache_key );
194							}
195
196							sub _set_path_in_cache {
197	3			3		6	my ( $self, $path ) = @_;
198	3	50				19	return $self->clear_cache() unless ( $path );
199	0					0	return $self->cache->set( $self->cache_key, $path );
200							}
201
202							sub clear_cache {
203	6			6	0	13	my $self = shift;
204	6					178	$self->cache->remove( $self->cache_key );
205	6					940	return 1;
206							}
207
208							#=============================================================
209
210							=head2 dir_walk - traverses recursively a folder
211
212							=head3 INPUT
213
214							An hash with keys:
215							code: a coderef to apply to each file
216
217							=head3 OUTPUT
218
219							-1 if root file or folder are not existent, else
220							the number of valid files.
221
222							=head3 DESCRIPTION
223
224							Traverse a directory, triggering a sub for each file found.
225							The sub should return 1 if file is good, 0 otherwise.
226							It is meant to recursively process a folder in one shot.
227							This is the minimal logic to traverse a tree, for added,
228							features look at dir_walk_max.
229
230							=cut
231
232							#=============================================================
233							sub dir_walk {
234	1			1	1	4	my ( $self, %params ) = @_;
235	1					3	my $code = $params{code};
236	1	50				3	return undef unless $code;
237	1					34	return $self->_walk( $self->path, $code, 0);
238							}
239
240							#=============================================================
241
242							=head2 _walk
243
244							=head3 INPUT
245
246							$top: subtree root
247							$code: code to execute against each valid file
248							$count: number of valid files till now...
249
250							=head3 OUTPUT
251
252							The number of valid files.
253
254							=head3 DESCRIPTION
255
256							Traverse the subtree.
257
258							=cut
259
260							#=============================================================
261							sub _walk {
262	2			2		7	my ( $self, $top, $code, $count ) = @_;
263	2					3	my $DIR;
264	2	50				80	unless (opendir $DIR, $top) {
265	0					0	$self->log->error( "Couldn’t open directory $top: $!" );
266	0					0	return;
267							}
268	2					7	my $file;
269	2					43	while ($file = readdir $DIR) {
270	6					758	my $item = "$top/$file";
271	6	100				116	if ( -f $item ) {
		50
272	1					5	$count += $code->($item);
273							} elsif (-d $item ) {
274	5	100	100			42	next if $file eq '.' \|\| $file eq '..';
275	1					6	$count = $self->_walk( $item, $code, $count );
276							}
277							}
278	2					28	closedir $DIR;
279	2					18	return $count;
280							}
281
282							#=============================================================
283
284							=head2 dir_walk_max - traverses recursively a folder, stops after
285							max valid files found or all tree has been
286							traversed
287
288							=head3 INPUT
289
290							An hash with keys:
291							code: a coderef to apply to
292							max: max number of items to evaluate
293
294							=head3 OUTPUT
295
296							-1 if root file or folder are not existent, else
297							the number of valid files.
298
299							=head3 DESCRIPTION
300
301							Traverse a directory, triggering a sub for each file found.
302							The sub should return 1 if file is good, 0 otherwise.
303							The method stops when all files are consumed or max number
304							of good files is reached.
305
306
307							=cut
308
309							#=============================================================
310							sub dir_walk_max {
311	3			3	1	15	my ( $self, %params ) = @_;
312	3					7	my $code = $params{code};
313	3					7	my $max = $params{max};
314	3	50	33			20	return undef unless ( $code && $max );
315	3					9	my $rel_path = $self->_get_path_from_cache();
316	3					529	return $self->_walk_max( $self->path, 0, $max, $code, $rel_path );
317							}
318
319
320							#=============================================================
321
322							=head2 _walk_max
323
324							=head3 INPUT
325
326							root : the root folder to start with
327							count : number of valid items processed
328							rel_path : the relative path from root
329							max : max number of valid items
330							code : code to run against each item
331
332							=head3 OUTPUT
333
334							The number of valid items processed
335
336							=head3 DESCRIPTION
337
338							Inner loop to analize each item of the tree.
339							it calls recursively itself for each sub-tree.
340							Ends when 1 of these conditions is met:
341							- no more files found
342							- max number of valid items reached
343							this number is set to be 10 times the max number of valid items.
344
345
346							=cut
347
348							#=============================================================
349							sub _walk_max {
350	3			3		8	my ( $self, $root, $count, $max, $code, $rel_path ) = @_;
351
352	3	50				7	$self->log->info(sprintf("[%06d]:Scanning %s", $count, $rel_path)) if( $rel_path );
353	3	50				7	my $path = ( $rel_path ) ? "$root/$rel_path" : $root;
354	3					10	$self->_set_path_in_cache( $rel_path );
355
356	3					7	my $DIR;
357	3	50				116	unless (opendir $DIR, $path) {
358	0					0	$self->log->error( "Couldn’t open directory $path $!" );
359	0					0	return undef;
360							}
361
362	3					9	my $file;
363	3		100			79	while ( defined($file = readdir $DIR) && ( $count < $max ) ) {
364	11					94	my $item = "$path/$file";
365	11	100				152	if ( -f $item ) {
		50
366	9					32	$count += $code->($item);
367							} elsif (-d $item ) {
368	2	50	66			19	next if $file eq '.' \|\| $file eq '..';
369	0	0				0	$count = $self->_walk_max( $root, $count, $max, $code, ( $rel_path ) ? $rel_path.'/'.$file : $file );
370							}
371							}
372	3					77	closedir $DIR;
373	3					17	undef $DIR;
374	3	100				8	unless ( $file ) {
375	1	50				7	( $rel_path ) ? $self->_set_path_in_cache( join('/', splice(@{[split(m\|/\|, $rel_path)]},0,-1))) :
	0					0
376							$self->clear_cache();
377							}
378	3					6	undef $file;
379	3					20	return $count;
380							}
381
382							#=============================================================
383
384							=head2 plainfiles_recursive
385
386							=head3 INPUT
387
388							$path: a path to start from.
389							$avoid: arrayref con lista risorse da evitare
390							$suffix: arrayref con lista suffissi da processare
391							$found: ref sub callback per ogni risorsa
392
393							=head3 OUTPUT
394
395							An arrayref.
396
397							=head3 DESCRIPTION
398
399							Returns recursively the list of all files from passed folder.
400
401							=cut
402
403							#=============================================================
404							sub plainfiles_recursive {
405	1			1	1	9311	my ( $self, $path, $found ) = @_;
406
407	1	50	33			17	my $dir = ( $self && ref $self ) ? $path \|\| $self->{path} : $path;
			33
408	1	50				5	return undef unless $dir;
409
410	1	50				5	$found = \&_found unless $found;
411
412	1					3	@rec_files = ();
413	1					109	find ( $found, ( $dir ) );
414	1					8	return \@rec_files;
415							}
416
417							sub _found {
418	3	100		3		232	return if ( -d );
419	1	50	33			21	if ( -f && !/^\./ && !/\~$/ ) {
			33
420	1					34	push @rec_files, $File::Find::name;
421							}
422							}
423
424							sub _collect_as_hash {
425	0	0	0	0			return if ( -d && /\.svn/ );
426	0	0	0				if ( -f && !/^\./ && !/\~$/ ) {
			0
427	0	0					if ( m\|(\w+)\.(\w+)$\| ) {
428	0						push @{ $files_as_hash->{ $2 } }, $File::Find::name;
	0
429							}
430							}
431							}
432
433							sub _collect_of_type {
434	0	0	0	0			return if ( -d && /\.\w+/ );
435	0	0	0				if ( -f && !/^\./ && !/\~$/ ) {
			0
436	0	0					if ( m\|(\w+)\.(\w+)$\| ) {
437	0	0					if ( $2 ~~ @types ) {
438	0						push @rec_files, $File::Find::name;
439							}
440							}
441							}
442							}
443
444							#=============================================================
445
446							=head2 plainfiles_recursive_as_hash
447
448							=head3 INPUT
449
450							$path : root path (not mandatory if already passed at
451							construction time)
452
453							=head3 OUTPUT
454
455							An HashRef.
456
457							=head3 DESCRIPTION
458
459							Recursively collects files in an hash indexed by file suffix.
460
461							=cut
462
463							#=============================================================
464							sub plainfiles_recursive_as_hash {
465	0			0	1		my ( $self, $path ) = @_;
466
467	0	0	0				my $dir = ( $self && ref $self ) ? $path \|\| $self->{path} : $path;
			0
468	0	0					return undef unless $dir;
469
470	0						$files_as_hash = {};
471	0						$self->plainfiles_recursive( $dir, \&_collect_as_hash );
472
473							# foreach my $file ( @$rec_files ) {
474							# if ( $file =~ m\|(\w+)\.(\w+)$\| ) {
475							# push @{ $files_as_hash->{ $2 } }, $file;
476							# }
477							# }
478
479	0						return $files_as_hash;
480							}
481
482							#=============================================================
483
484							=head2 plainfiles_recursive_of_type
485
486							=head3 INPUT
487
488							@types: list of valid file suffixes
489
490							=head3 OUTPUT
491
492							An ArrayRef or undef in case of errors.
493
494							=head3 DESCRIPTION
495
496							Recursively collects all files with valid suffixes
497
498							=cut
499
500							#=============================================================
501							sub plainfiles_recursive_of_type {
502	0			0	1		my ( $self, @ptypes ) = @_;
503
504	0	0					return undef unless $self->{path};
505
506	0						@types = @ptypes;
507	0						@rec_files = ();
508	0						$self->plainfiles_recursive( $self->{path}, \&_collect_of_type );
509	0						return \@rec_files;
510							}
511
512							#=============================================================
513
514							=head2 process_dir
515
516							=head3 INPUT
517
518							$dir : dir to start with
519							$suffix : arrayref of valid file suffixes
520							$depth : depth in processing subdirs
521
522							=head3 OUTPUT
523
524							An arrayref
525
526							=head3 DESCRIPTION
527
528							Workflow:
529							get a single dir as input and the level of recursions
530							get the list of valid dir files and process them
531							get the list of dir direct subdirs
532							depth--
533							if depth > 0
534							call process_dir foreach subdir
535
536							=cut
537
538							#=============================================================
539				0	1		sub process_dir ($$) {
540
541							}
542
543							=head1 AUTHOR
544
545							Marco Masetti, C<< <marco.masetti at softeco.it> >>
546
547							=head1 BUGS
548
549							Please report any bugs or feature requests to C<bug-ishare-filehandler at rt.cpan.org>, or through
550							the web interface at L<http://rt.cpan.org/NoAuth/ReportBug.html?Queue=Mir-FileHandler>. I will be notified, and then you'll
551							automatically be notified of progress on your bug as I make changes.
552
553							=head1 SUPPORT
554
555							You can find documentation for this module with the perldoc command.
556
557							perldoc Mir::FileHandler
558
559							=head1 LICENSE AND COPYRIGHT
560
561							Copyright 2013 Marco Masetti.
562
563							This program is free software; you can redistribute it and/or modify it
564							under the terms of either: the GNU General Public License as published
565							by the Free Software Foundation; or the Artistic License.
566
567							See http://dev.perl.org/licenses/ for more information.
568
569							=cut
570
571	4			4		58	no Moose;
	4					11
	4					41
572							__PACKAGE__->meta->make_immutable;