File Coverage

blib/lib/File/Find/Duplicates.pm
Criterion Covered Total %
statement 33 33 100.0
branch 4 6 66.6
condition n/a
subroutine 7 7 100.0
pod 1 1 100.0
total 45 47 95.7


line stmt bran cond sub pod time code
1             package File::Find::Duplicates;
2              
3             =head1 NAME
4              
5             File::Find::Duplicates - Find duplicate files
6              
7             =head1 SYNOPSIS
8              
9             use File::Find::Duplicates;
10              
11             my @dupes = find_duplicate_files('/basedir1', '/basedir2');
12              
13             foreach my $dupeset (@dupes) {
14             printf "Files %s (of size %d) hash to %s\n",
15             join(", ", @{$dupeset->files}), $dupeset->size, $dupeset->md5;
16             }
17              
18             =head1 DESCRIPTION
19              
20             This module provides a way of finding duplicate files on your system.
21              
22             =head1 FUNCTIONS
23              
24             =head2 find_duplicate_files
25              
26             my %dupes = find_duplicate_files('/basedir1', '/basedir2');
27              
28             When passed a base directory (or list of such directories) it returns
29             a list of objects with the following methods:
30              
31             =head2 files
32              
33             A listref of the names of the duplicate files.
34              
35             =head2 size
36              
37             The size of the duplicate files.
38              
39             =head2 md5
40              
41             The md5 sum of the duplicate files.
42              
43             =head1 TODO
44              
45             Check the contents of tars, zipfiles etc to ensure none of these also
46             exist elsewhere (if so requested).
47              
48             =head1 SEE ALSO
49              
50             L.
51              
52             =head1 AUTHOR
53              
54             Tony Bowden
55              
56             =head1 BUGS and QUERIES
57              
58             Please direct all correspondence regarding this module to:
59             bug-File-Find-Duplicates@rt.cpan.org
60              
61             =head1 COPYRIGHT AND LICENSE
62              
63             Copyright (C) 2001-2005 Tony Bowden.
64              
65             This program is free software; you can redistribute it and/or modify it under
66             the terms of the GNU General Public License; either version 2 of the License,
67             or (at your option) any later version.
68              
69             This program is distributed in the hope that it will be useful, but WITHOUT
70             ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
71             FOR A PARTICULAR PURPOSE.
72              
73             =cut
74              
75 1     1   21095 use strict;
  1         3  
  1         36  
76 1     1   5 use File::Find;
  1         2  
  1         75  
77 1     1   5 use Digest::MD5;
  1         5  
  1         37  
78             require Exporter;
79 1     1   5 use vars qw($VERSION @ISA @EXPORT);
  1         2  
  1         92  
80              
81             @ISA = qw/Exporter/;
82             @EXPORT = qw/find_duplicate_files/;
83             $VERSION = '1.00';
84              
85 1         7 use Class::Struct 'File::Find::Duplicates::Set' =>
86 1     1   5069 [ files => '@', size => '$', md5 => '$' ];
  1         2180  
87              
88             sub find_duplicate_files {
89 1     1 1 4175 my (@dupes, %files);
90             find sub {
91 10 100   10   306 -f && push @{ $files{ (stat(_))[7] } }, $File::Find::name;
  8         173  
92 1         151 }, @_;
93 1         14 foreach my $size (sort { $b <=> $a } keys %files) {
  1         8  
94 2 50       59 next unless @{ $files{$size} } > 1;
  2         8  
95 2         3 my %md5;
96 2         3 foreach my $file (@{ $files{$size} }) {
  2         5  
97 8 50       311 open(my $fh, $file) or next;
98 8         55 binmode($fh);
99 8         9 push @{ $md5{ Digest::MD5->new->addfile($fh)->hexdigest } }, $file;
  8         461  
100             }
101              
102 4         90 push @dupes, map File::Find::Duplicates::Set->new(
103             files => $md5{$_},
104             size => $size,
105             md5 => $_,
106             ),
107 2         8 grep @{ $md5{$_} } > 1, keys %md5;
108             }
109 1         73 return @dupes;
110             }
111              
112             return q/
113             dissolving ... removing ... there is water at the bottom of the ocean
114             /;