File Coverage

blib/lib/Text/Stripper.pm
Criterion Covered Total %
statement 47 48 97.9
branch 19 26 73.0
condition n/a
subroutine 4 4 100.0
pod 0 1 0.0
total 70 79 88.6


line stmt bran cond sub pod time code
1             package Text::Stripper;
2              
3             # DOCUMENT: Text-Stripper, strips of text.
4             # VERSION: $Revision: 1.18 $
5             # DATE: $Date: 2007-06-14 20:00:01 $
6             # AUTHOR: M. Beranek
7             # COPYRIGHT: M. Beranek
8              
9 3     3   97132 use 5.006001;
  3         11  
  3         126  
10 3     3   18 use strict;
  3         8  
  3         105  
11 3     3   17 use warnings;
  3         17  
  3         13981  
12              
13             require Exporter;
14              
15             our @ISA = qw(Exporter);
16              
17             # Items to export into callers namespace by default. Note: do not export
18             # names by default without a very good reason. Use EXPORT_OK instead.
19             # Do not simply export all your public functions/methods/constants.
20              
21             # This allows declaration use Text::Stripper ':all';
22             # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
23             # will save memory.
24             #our %EXPORT_TAGS = ( 'all' => [ qw(
25             # stripof
26             #) ] );
27             #
28             #our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
29              
30             our @EXPORT_OK = qw(
31             stripof
32             breakpoints
33             );
34              
35             #my $cvsRev = '$Revision: 1.18 $';
36             #$cvsRev =~ s/\$Revision:\s//g;
37             #$cvsRev =~ s/\s\$//g;
38              
39             our $VERSION = '1.16';
40              
41             # Possible breakpoints:
42             our @breakpoints = ( ' ', '\t', '\.', ',', ';', ':', '!',
43             '-', '\?', '\n', '\r', '\/', '\|', '\(', '\)' );
44              
45             # ----------------------------------------------------------------------
46             # Shortens text at a possible position.
47             # $text - text to be shortend
48             # $len - minimum length
49             # $tol - maximum tolerance
50             # $max - 0 = shorten as early as possible | 1 = shorten as late as possible
51             # $dots - 0 = no dots after shortening | 1 add dots after shortening
52             # ----------------------------------------------------------------------
53             sub stripof {
54            
55             # Parameter:
56 35 50   35 0 92 my $text = shift or return;
57 35 50       63 my $len = shift or return $text;
58 35 50       65 my $tol = shift or return substr($text, $len);
59 35 100       61 my $max = shift or 0;
60 35 50       60 my $dots = shift or 0;
61            
62             # Possible breakpoints:
63             #my @breakpoints = ( ' ', '\t', '.', ',', ';', ':', '!', '-', '?', '/', '|', '(', ')' );
64            
65             # minimum / maximum length:
66 35         44 my $maxLen = $len + $tol;
67 35         31 my $minLen = $len;
68            
69             # current length:
70 35         37 my $textLen = length( $text );
71            
72             # if search for latest break:
73 35 100       81 if( $max ){
74             # stop, if text is shorter than maximum:
75 20 100       38 if( $textLen <= $maxLen ){
76 7         22 return $text;
77             }
78             }
79            
80             # shortest possible text (will be in returned string always):
81 28         33 my $minText = substr( $text, 0, $minLen );
82            
83             # longest possible text:
84 28         45 my $maxText = substr( $text, 0, $maxLen );
85            
86             # text between minimum and maximum:
87 28         37 my $restText = substr( $text, $minLen, $tol );
88            
89             # buffer for return-string:
90 28         30 my $shortText = "";
91            
92             # buffer for additional-text:
93 28         29 my $addText = "";
94            
95             # find breakpoint as late as possible:
96 28 100       44 if( $max ){
97            
98             # we're just working on $resttext:
99 13         14 $addText = $restText;
100            
101             # previously hardcoded regexp:
102             # $addText =~ s/(.*)[ ,\t\.;:!-\?\/\|\(\)].+/$1/gi;
103            
104             # use regexp to find possible breakpoints. regexps are greedy,
105             # so they will find the last possible space:
106 13         28 my $regexpBreakpoints = join '', @breakpoints;
107             # print "X:".$regexpBreakpoints.":X";
108 13         137 $addText =~ s/(.*)[$regexpBreakpoints].*/$1/g;
109            
110             # if no space was found:
111 13 50       36 if( $addText eq '' ){
112             # return complete text:
113 0         0 $addText = $restText;
114             }
115             # return minimum + additional:
116 13         28 $shortText = "$minText$addText";
117             }
118            
119             # search for first possible break:
120             else {
121             # emty additional text:
122 15         17 $addText = "";
123            
124             # find first break:
125             # test all characters in the $restText
126 15         37 for( my $idx = 0; $idx < $tol; $idx ++ ){
127            
128             # current character:
129 39         46 my $char = substr( $restText, $idx, 1 );
130            
131             # is character a space?
132 39         40 my $isSpace = 0;
133            
134             # test if character matches on of the
135             # space-characters defined in @breakpoints:
136 39         51 foreach( @breakpoints ){
137            
138             # if caharcter matches space:
139 245 100       417 if( $char eq $_ ){
140            
141             # mark as space, skip rest of @breakpoints:
142 15         16 $isSpace = 1;
143 15         16 last;
144             }
145            
146            
147             }
148            
149             # if we didn't find a space:
150 39 100       63 if( ! $isSpace ){
151             # append the character to the buffer:
152 24         54 $addText .= $char;
153             }
154             # if we found a space:
155             else {
156             # stop here:
157 15         17 last;
158             }
159            
160             }
161             # return text = minimum-text + additional-text:
162 15         21 $shortText = "$minText$addText";
163             }
164            
165             # if we want some dots on the shortened text:
166 28 50       54 if( $dots ){
167             # only if text is really shorter than the original text:
168 28 50       58 if( length($shortText) < length($text) ){
169             # append dots:
170 28         35 $shortText .= "...";
171             }
172             }
173            
174             # return the shortened text:
175 28         152 return $shortText;
176            
177             }
178              
179              
180             1;
181             __END__