File Coverage

blib/lib/JavaScript/Squish.pm
Criterion Covered Total %
statement 254 304 83.5
branch 89 144 61.8
condition 31 54 57.4
subroutine 20 23 86.9
pod 19 19 100.0
total 413 544 75.9


line stmt bran cond sub pod time code
1              
2             package JavaScript::Squish;
3              
4             =head1 NAME
5              
6             JavaScript::Squish - Reduce/Compact JavaScript code to as few characters as possible.
7              
8             =head1 SYNOPSIS
9              
10             use JavaScript::Squish;
11             my $compacted = JavaScript::Squish->squish(
12             $javascript,
13             remove_comments_exceptions => qr/copyright/i )
14             or die $JavaScript::Squish::err_msg;
15              
16             # OR, to just do a few steps #
17              
18             my $c = JavaScript::Squish->new();
19             $c->data( $javascript );
20             $c->extract_strings_and_comments();
21             $c->replace_white_space();
22             my $new = $c->data();
23              
24             =head1 DESCRIPTION
25              
26             This module provides methods to compact javascript source down to just what is needed. It can remove all comments, put everything on one line (semi-)safely, and remove extra whitespace.
27              
28             Any one of the various compacting techniques can be applied individually, or with in any group.
29              
30             It also provides a means by which to extract all text literals or comments in separate arrays in the order they appear.
31              
32             Since JavaScript eats up bandwidth, this can be very helpful, and you can then be free to properly comment your JavaScript without fear of burning up too much bandwidth.
33              
34             =head2 EXPORT
35              
36             None by default.
37              
38             "squish" may be exported via "use JavaScript::Squish qw(squish);"
39              
40             =head1 METHODS
41              
42             =head2 Bsquish($js [, %options] )>
43              
44             Class method. This is a wrapper around all methods in here, to allow you to do all compacting operations in one call.
45              
46             my $squished = JavaScript::Squish->squish( $javascript );
47              
48             Current supported options:
49              
50             =over
51              
52             =item remove_comments_exceptions : array ref of regexp's
53              
54             Bsquish($js, remove_comments_exceptions =E [ qr/copyright/i ] )>
55              
56             Any comment strings matching any of the supplied regexp's will not be removed. This is the recommended way to retain copyright notices, while still compacting out all other comments.
57              
58             =back
59              
60             =head2 Bnew()>
61              
62             Constructor. Currently takes no options. Returns JavaScript::Squish object.
63              
64             NOTE: if you want to specify a "remove_comments_exceptions" option via one of these object, you must do so directly against the C method (SEE BELOW).
65              
66             =head2 B<$djc-Edata($js)>
67              
68             If the option C<$js> is passed in, this sets the javascript that will be worked on.
69              
70             If not passed in, this returns the javascript in whatever state it happens to be in (so you can step through, and pull the data out at any time).
71              
72             =head2 B<$djc-Estrings()>
73              
74             Returns all strings extracted by either C or C (NOTE: be sure to call one of the aforementioned extract methods prior to C, or you won't get anything back).
75              
76             =head2 B<$djc-Ecomments()>
77              
78             Returns all comments extracted by either C or C (NOTE: be sure to call one of the aforementioned extract methods prior to C, or you won't get anything back).
79              
80             =head2 B<$djc-Edetermine_line_ending()>
81              
82             Method to automatically determine the line ending character in the source data.
83              
84             =head2 B<$djc-Eeol_char("\n")>
85              
86             Method to set/override the line ending character which will be used to parse/join lines. Set to "\r\n" if you are working on a DOS / Windows formatted file.
87              
88             =head2 B<$djc-Eextract_strings_and_comments()>
89              
90             Finds all string literals (eg. things in quotes) and comments (// or /*...*/) and replaces them with tokens of the form "\0\0N\0\0" and "\0\0_N_\0\0" respectively, where N is the occurrance number in the file, and \0 is the null byte. The strings are stored inside the object so they may be resotred later.
91              
92             After calling this, you may retrieve a list of all extracted strings or comments using the C or C methods.
93              
94             =head2 B<$djc-Eextract_literal_strings()>
95              
96             This is a wrapper around C, which will restore all comments afterwards (if they had not been stripped prior to its call).
97              
98             NOTE: sets C<$djc-Estrings()>
99              
100             =head2 B<$djc-Eextract_comments()>
101              
102             This is a wrapper around C, which will restore all literal strings afterwards (if they had not been stripped prior to its call).
103              
104             NOTE: sets C<$djc-Ecomments()>
105              
106             =head2 B<$djc-Ereplace_white_space()>
107              
108             Per each line:
109              
110             =over
111              
112             =item * Removes all begining of line whitespace.
113              
114             =item * Removes all end of line whitespace.
115              
116             =item * Combined all series of whitespace into one space character (eg. s/\s+/ /g)
117              
118             =back
119              
120             Comments and string literals (if still embeded) are untouched.
121              
122             =head2 B<$djc-Eremove_blank_lines()>
123              
124             ...does what it says.
125              
126             Comments and string literals (if still embeded) are untouched.
127              
128             =head2 B<$djc-Ecombine_concats()>
129              
130             Removes any string literal concatenations. Eg.
131              
132             "bob and " + "sam " + someVar;
133              
134             Becomes:
135              
136             "bob and sam " + someVar
137              
138             Comments (if still embeded) are untouched.
139              
140             =head2 B<$djc-Ejoin_all()>
141              
142             Puts everything on one line.
143              
144             Coments begining with "//", if still embeded, are the exception, as they require a new line character at the end of the comment.
145              
146             =head2 B<$djc-Ereplace_extra_whitespace()>
147              
148             This removes any excess whitespace. Eg.
149              
150             if (someVar = "foo") {
151              
152             Becomes:
153              
154             if(someVar="foo"){
155              
156             Comments and string literals (if still embeded) are untouched.
157              
158             =head2 B<$djc-Eremove_comments(%options)>
159              
160             Current supported options:
161              
162             =over
163              
164             =item exceptions : array ref of regexp's
165              
166             B<$djc-Eremove_comments( exceptions =E [ qr/copyright/i ] )>
167              
168             Any comment strings matching any of the supplied regexp's will not be removed. This is the recommended way to retain copyright notices, while still compacting out all other comments.
169              
170             =back
171              
172             NOTE: this is destructive (ie. you cannot restore comments after this has been called).
173              
174             =head2 B<$djc-Erestore_comments()>
175              
176             All comments that were extracted with C<$djc-Eextract_strings_and_comments()> or C<$djc-Eextract_comments()> are restored. Comments retain all spacing and extra lines and such.
177              
178             =head2 B<$djc-Erestore_literal_strings()>
179              
180             All string literals that were extracted with C<$djc-Eextract_strings_and_comments()> or C<$djc-Eextract_comments()> are restored. String literals retain all spacing and extra lines and such.
181              
182             =head2 B<$djc-Ereplace_final_eol()>
183              
184             Prior to this being called, the end of line may not terminated with a new line character (especially after some of the steps above). This assures the data ends in at least one of whatever is set in C<$djc-Eeol_char()>.
185              
186             =head1 NOTES
187              
188             The following should only cause an issue in rare and odd situations... If the input file is in dos format (line termination with "\r\n" (ie. CR LF / Carriage return Line feed)), we'll attempt to make the output the same. If you have a mixture of embeded "\r\n" and "\n" characters (not escaped, those are still safe) then this script may get confused and make them all conform to whatever is first seen in the file.
189              
190             The line-feed stripping isn't as thorough as it could be. It matches the behavior of JSMIN, and goes one step better with replace_extra_whitespace(), but I'm certain there are edge cases that could be optimised further. This shouldn't cause a noticable increase in size though.
191              
192             =head1 TODO
193              
194             Function and variable renaming, and other more dangerous compating techniques.
195              
196             Currently, JavaScript::Squish::err_msg never gets set, as we die on any real errors. We should look into returning proper error codes and setting this if needed.
197              
198             Fix Bugs :-)
199              
200             =head1 BUGS
201              
202             There are a few bugs, which may rear their head in some minor situations.
203              
204             =over
205              
206             =item Statements not terminated by semi-colon.
207              
208             These should be ok now - leaving a note here because this hasn't been thoroughly tested (I don't have any javascript to test with that meets this criteria).
209              
210             This would affect statements like the following:
211              
212             i = 5.4
213             j = 42
214              
215             This used to become "i=5.4 j=42", and would generate an error along the lines of "expected ';' before statement".
216              
217             The linebreak should be retained now. Please let me know if you see otherwise.
218              
219             =item Ambiguous operator precidence
220              
221             Operator precidence may get screwed up in ambiguous statements. Eg. "x = y + ++b;" will be compacted into "x=y+++b;", which means something different.
222              
223             =back
224              
225             Still looking for them. If you find some, let us know.
226              
227             =head1 SEE ALSO
228              
229             =over
230              
231             =item Latest releases, bugzilla, cvs repository, etc:
232              
233             https://developer.berlios.de/projects/jscompactor/
234              
235             =item Simlar projects:
236              
237             http://crockford.com/javascript/jsmin
238             http://search.cpan.org/%7Epmichaux/JavaScript-Minifier/lib/JavaScript/Minifier.pm
239             http://dojotoolkit.org/docs/shrinksafe
240             http://dean.edwards.name/packer/
241              
242             =back
243              
244             =head1 AUTHOR
245              
246             Joshua I. Miller
247              
248             =head1 COPYRIGHT AND LICENSE
249              
250             Copyright (c) 2005 by CallTech Communications, Inc.
251              
252             This library is free software; you can redistribute it and/or modify
253             it under the same terms as Perl itself, either Perl version 5.8.3 or,
254             at your option, any later version of Perl 5 you may have available.
255              
256             =cut
257              
258 3     3   27560 use 5.00503;
  3         13  
  3         167  
259 3     3   17 use strict;
  3         4  
  3         142  
260 3     3   18 use Carp qw(croak carp);
  3         10  
  3         228  
261              
262             require Exporter;
263 3     3   22 use vars qw($VERSION @ISA @EXPORT @EXPORT_OK %EXPORT_TAGS);
  3         4  
  3         10647  
264             @ISA = qw(Exporter);
265              
266             %EXPORT_TAGS = ( 'all' => [ qw( squish ) ] );
267              
268             @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
269              
270             @EXPORT = qw( );
271              
272             $VERSION = '0.07';
273              
274             sub squish
275             {
276 1     1 1 1801 my $this = shift;
277              
278             # squish() can be used as a class method or instance method
279 1 50       6 unless (ref $this)
280             {
281 1         6 $this = $this->new();
282             }
283              
284             {
285 1 50       3 my $data = (ref($_[0]) eq 'SCALAR') ? ${(shift)} : shift;
  1         5  
  0         0  
286 1         6 $this->data($data);
287             }
288 1 50       6 my %opts = (ref($_[0]) eq 'HASH') ? %{$_[0]} : @_;
  0         0  
289              
290             # determine line ending
291 1 50       7 print STDERR "Determining line ending format (LF || CRLF)...\n" if $opts{DEBUG};
292 1         5 $this->determine_line_ending();
293              
294             # extract literal strings and comments
295 1 50       4 print STDERR "Replacing literal strings and comments...\n" if $opts{DEBUG};
296 1         5 $this->extract_strings_and_comments();
297              
298             # remove comments
299 1 50       5 print STDERR "Removing comments...\n" if $opts{DEBUG};
300 1         4 my %rc_opts = ();
301 1 50       4 $rc_opts{exceptions} = $opts{remove_comments_exceptions} if $opts{remove_comments_exceptions};
302 1         8 $this->remove_comments(%rc_opts);
303              
304             # replace white space
305 1 50       4 print STDERR "Replacing white space...\n" if $opts{DEBUG};
306 1         11 $this->replace_white_space();
307              
308             # remove blank lines
309 1 50       7 print STDERR "Removing blank lines...\n" if $opts{DEBUG};
310 1         4 $this->remove_blank_lines();
311              
312             # combine literal string concatenators
313 1 50       4 print STDERR "Combining literal string concatenators...\n" if $opts{DEBUG};
314 1         5 $this->combine_concats();
315              
316             # join all lines
317 1 50       3 print STDERR "Joining all lines...\n" if $opts{DEBUG};
318 1         4 $this->join_all();
319              
320             # replace extra extra whitespace
321 1 50       19 print STDERR "Replacing extra extra whitespace...\n" if $opts{DEBUG};
322 1         4 $this->replace_extra_whitespace();
323              
324             # restore literals
325 1 50       3 print STDERR "Restoring all literal strings...\n" if $opts{DEBUG};
326 1         5 $this->restore_literal_strings();
327              
328             # replace final EOL
329 1 50       4 print STDERR "Replace final EOL...\n" if $opts{DEBUG};
330 1         4 $this->replace_final_eol();
331              
332 1         3 return $this->data;
333             }
334              
335             sub new
336             {
337 2     2 1 2354 my $proto = shift;
338 2   33     18 my $class = ref($proto) || $proto;
339              
340 2         17 my $this = {
341             data => '',
342             strings => [ ],
343             comments => [ ],
344             eol => "\n",
345             _strings_extracted => 0, # status var
346             _comments_extracted => 0, # status var
347             };
348 2         8 bless $this, $class;
349              
350 2         6 return $this;
351             }
352              
353             sub data
354             {
355 61     61 1 693 my $this = shift;
356 61 100       99 if ($_[0]) {
357 23 50       58 my $data = (ref($_[0]) eq 'SCALAR') ? ${$_[0]} : $_[0];
  0         0  
358 23         84 $this->{data} = $_[0];
359             } else {
360 38         478 return $this->{data};
361             }
362             }
363              
364             sub strings
365             {
366 12     12 1 17 my $this = shift;
367 12 100       24 if ($_[0]) {
368 6         13 $this->{strings} = $_[0];
369             } else {
370 6         15 return $this->{strings};
371             }
372             }
373              
374             sub comments
375             {
376 12     12 1 15 my $this = shift;
377 12 100       24 if ($_[0]) {
378 6         15 $this->{comments} = $_[0];
379             } else {
380 6         15 return $this->{comments};
381             }
382             }
383              
384             sub eol_char
385             {
386 163     163 1 712 my $this = shift;
387 163 100       243 if ($_[0]) {
388 4         22 $this->{eol} = $_[0];
389             } else {
390 159         632 return $this->{eol};
391             }
392             }
393              
394             sub determine_line_ending
395             {
396 2     2 1 191 my $this = shift;
397              
398             # Where is the first LF character?
399 2         8 my $lf_position = index($this->data, "\n");
400 2 50       10 if ($lf_position == -1)
401             { # not found, set to default, cause it won't (shouldn't) matter
402 0         0 $this->eol_char("\n");
403             } else {
404 2 50       11 if ($lf_position == 0)
405             { # found at first char, so there is no prior character to observe
406 0         0 $this->eol_char("\n");
407             } else {
408             # Is the character immediately before it a CR?
409 2         7 my $test_cr = substr($this->data, ($lf_position -1),1);
410 2 50       9 if ($test_cr eq "\r")
411             {
412 0         0 $this->eol_char("\r\n");
413             } else {
414 2         8 $this->eol_char("\n");
415             }
416             }
417             }
418             }
419              
420             # extract_literal_strings() - wrapper around extract_strings_and_comments
421             sub extract_literal_strings
422             {
423 0     0 1 0 my $this = shift;
424              
425             # don't do it twice...
426 0 0       0 return if $this->{_strings_extracted};
427              
428             # save state of comments
429 0         0 my $comment_state = $this->{_comments_extracted};
430              
431 0         0 $this->extract_strings_and_comments();
432             # only restore comments if they weren't extraced when we started
433 0 0       0 $this->restore_comments() unless $comment_state;
434             }
435              
436             # extract_comments() - wrapper around extract_strings_and_comments
437             sub extract_comments
438             {
439 0     0 1 0 my $this = shift;
440              
441             # don't do it twice...
442 0 0       0 return if $this->{_comments_extracted};
443              
444             # save state of strings
445 0         0 my $string_state = $this->{_strings_extracted};
446              
447 0         0 $this->extract_strings_and_comments();
448             # only restore strings if they weren't extraced when we started
449 0 0       0 $this->restore_literal_strings() unless $string_state;
450             }
451              
452             sub extract_strings_and_comments
453             {
454 3     3 1 180 my $this = shift;
455              
456             # SAFETY CHECKS
457             # Can't extract strings twice, as the keep the
458             # quotes in the original when we extract them
459 3 100       13 if ($this->{_strings_extracted}) {
460 1         4 $this->restore_literal_strings();
461             }
462             # Restore comments, so that we still get them
463             # in the cache (this could be optimized out)
464             # NOTE: if they had called remove_comments(), then we'll
465             # officially lose all our history of comments here.
466 3 50       14 if ($this->{_comments_extracted}) {
467 0         0 $this->restore_comments();
468             }
469              
470             # reset the instance variables caching strings and comments:
471 3         10 $this->strings([]);
472 3         32 $this->comments([]);
473             # where we'll store the literals
474 3         10 my $strings = $this->strings();
475             # where we'll store the comments
476 3         8 my $comments = $this->comments();
477              
478 3         4 my ($escaped, $quoteChar, $inQuote);
479              
480 3         5 my $lastnws = ''; # last non-whitespace character
481 3         27 my $literal = ""; # literal strings we're building
482 3         5 my $t = ""; # replacement text
483              
484 3         8 my @lines = split(/\r?\n/, $this->data); # dos or unix... output is unix
485             # step through each line
486 3         19 LINE: for (my $i=0; $i<@lines; $i++)
487             {
488             # step through each character
489 141         387 LINE_CHAR: for (my $j=0; $j
490             {
491 1967         2521 my $c = substr($lines[$i],$j,1);
492 1967         2170 my $c2 = substr($lines[$i],$j,2);
493             # look for start of string (if not in one)
494 1967 100       2614 if (! $inQuote)
495             {
496             # double-slash comments
497 1427 100 100     8254 if ($c2 eq "//") {
    100 100        
    100 66        
498 16         32 my $comment = substr($lines[$i],$j);
499 16         17 my $key_num = scalar(@{$comments});
  16         22  
500 16         32 $t .= "\0\0".'_'.$key_num.'_'."\0\0";
501 16         38 $t .= $this->eol_char();
502 16         18 push(@{$comments}, $comment);
  16         41  
503 16         54 next LINE;
504              
505             # slash-star comments
506             } elsif ($c2 eq "/*") {
507 13         16 my $comment = "/*";
508 13         17 my $comstart = $j+2;
509 13         14 my $found_end = 0;
510 13         31 COMM_SEARCH1: for (my $k=($j+2); $k
511             {
512 359         383 my $end = substr($lines[$i],$k,2);
513 359 100       926 if ($end eq "*/") {
514 8         30 $comment .= substr($lines[$i],$comstart,($k+2 - $comstart));
515 8         9 $j = $k+1;
516 8         8 $found_end = 1;
517             #next LINE_CHAR;
518 8         12 last COMM_SEARCH1;
519             }
520             }
521              
522 13 100       31 if (! $found_end)
523             {
524 5         14 $comment .= substr($lines[$i],$comstart).$this->eol_char();
525 5         22 COMM_SEARCH2: for (my $l=($i+1); $l<@lines; $l++)
526             {
527 11         30 for (my $k=0; $k
528             {
529 321         375 my $end = substr($lines[$l],$k,2);
530 321 100       781 if ($end eq "*/") {
531 5         10 $comment .= substr($lines[$l],0,$k+2);
532 5         6 $i = $l;
533 5         6 $j = $k+1;
534 5         7 $found_end = 1;
535             #next LINE_CHAR;
536 5         13 last COMM_SEARCH2;
537             }
538             }
539 6         14 $comment .= $lines[$l].$this->eol_char();
540             }
541             }
542 13 50       23 if (! $found_end)
543             {
544 0         0 die "Unterminated /* */ style comment found around line[$i]\n";
545             } else {
546 13         13 my $key_num = scalar(@{$comments});
  13         22  
547 13         28 $t .= "\0\0".'_'.$key_num.'_'."\0\0";
548             #$t .= $this->eol_char();
549 13         15 push(@{$comments}, $comment);
  13         25  
550 13         43 next LINE_CHAR;
551             }
552              
553             # standard quoted strings, and bare regex's
554             # "/" is considered division if it's preceeded by: )._$\ or alphanum
555             } elsif ( $c eq '"' || $c eq "'" ||
556             ($c eq '/' && $lastnws !~ /[\)\.a-zA-Z0-9_\$\\]/) ) {
557 72         87 $inQuote = 1;
558 72         92 $escaped = 0;
559 72         106 $quoteChar = $c;
560 72         70 $t .= $c;
561 72         73 $literal = '';
562 72 50       253 $lastnws = $c unless $c =~ /\s/;
563              
564             # standard code
565             } else {
566 1326         1267 $t .= $c;
567 1326 100       4763 $lastnws = $c unless $c =~ /\s/;
568             }
569              
570             # else we're in a quote
571             } else {
572 540 100 66     1783 if ($c eq $quoteChar && !$escaped)
    50 33        
573             {
574 72         73 $inQuote = 0;
575 72         67 my $key_num = scalar(@{$strings});
  72         88  
576 72         119 $t .= "\0\0".$key_num."\0\0";
577 72         70 $t .= $c;
578 72         63 push(@{$strings}, $literal);
  72         149  
579 72 50       276 $lastnws = $c unless $c =~ /\s/;
580              
581             } elsif ($c eq "\\" && !$escaped) {
582 0         0 $escaped = 1;
583 0         0 $literal .= $c;
584 0 0       0 $lastnws = $c unless $c =~ /\s/;
585             } else {
586 468         431 $escaped = 0;
587 468         463 $literal .= $c;
588 468 100       1587 $lastnws = $c unless $c =~ /\s/;
589             }
590             }
591             }
592 125 100       181 if ($inQuote) {
593 6         17 $literal .= $this->eol_char();
594             } else {
595 119         208 $t .= $this->eol_char();
596             }
597             }
598              
599 3         11 $this->{_comments_extracted} = 1;
600 3         7 $this->{_strings_extracted} = 1;
601 3         12 $this->comments($comments);
602 3         10 $this->strings($strings);
603 3         12 $this->data($t);
604             }
605              
606             sub replace_white_space
607             {
608 2     2 1 455 my $this = shift;
609              
610             # can't do this if literal strings are still in the thing.
611 2         6 my $string_state = $this->{_strings_extracted};
612 2         4 my $comment_state = $this->{_comments_extracted};
613 2 50 33     557 unless ($this->{_strings_extracted} && $this->{_comments_extracted}) {
614 0         0 $this->extract_strings_and_comments();
615             }
616              
617 2         7 my @lines = split(/\r?\n/, $this->data);
618              
619             # condense white space
620 2         10 foreach (@lines)
621             {
622 90         280 s/\s+/\ /g;
623 90         159 s/^\s//;
624 90         162 s/\s$//;
625             }
626              
627 2         9 $this->data( join($this->eol_char(), @lines) );
628              
629             # restore strings/comments if needed
630 2 50       6 unless ($string_state) {
631 0         0 $this->restore_literal_strings();
632             }
633 2 50       14 unless ($comment_state) {
634 0         0 $this->restore_comments();
635             }
636             }
637              
638             sub remove_blank_lines
639             {
640 2     2 1 166 my $this = shift;
641              
642             # can't do this if literal strings are still in the thing.
643 2         6 my $string_state = $this->{_strings_extracted};
644 2         3 my $comment_state = $this->{_comments_extracted};
645 2 50 33     15 unless ($this->{_strings_extracted} && $this->{_comments_extracted}) {
646 0         0 $this->extract_strings_and_comments();
647             }
648              
649 2         7 my @lines = split(/\r?\n/, $this->data);
650 2         29 my @new_lines = ();
651 2         5 foreach (@lines)
652             {
653 90 100       204 next if /^\s*$/;
654 50         106 push(@new_lines,$_);
655              
656             }
657              
658 2         8 $this->data( join($this->eol_char(), @new_lines) );
659              
660             # restore strings/comments if needed
661 2 50       7 unless ($string_state) {
662 0         0 $this->restore_literal_strings();
663             }
664 2 50       16 unless ($comment_state) {
665 0         0 $this->restore_comments();
666             }
667             }
668              
669             sub combine_concats
670             {
671 2     2 1 155 my $this = shift;
672              
673             # can't do this if literal strings are still in the thing.
674 2         4 my $string_state = $this->{_strings_extracted};
675 2         4 my $comment_state = $this->{_comments_extracted};
676 2 50 33     17 unless ($this->{_strings_extracted} && $this->{_comments_extracted}) {
677 0         0 $this->extract_strings_and_comments();
678             }
679              
680 2         5 my $data = $this->data;
681             # TODO: currently, we only concat two literals if
682             # they both use the same quote style. Eg.
683             # this: "foo " + "bar" == "foo bar"
684             # not : "foo " + 'bar' == "foo "+'bar'
685             # this just makes things easier to do w/ a regexp, but we should be
686             # able to do the second form as well (can't w/out lookahead and
687             # lookbehind searches).
688 2         29 $data =~ s/(['"])\s?\+\s?\1//g;
689 2         6 $this->data($data);
690              
691             # restore strings/comments if needed
692 2 50       6 unless ($string_state) {
693 0         0 $this->restore_literal_strings();
694             }
695 2 50       8 unless ($comment_state) {
696 0         0 $this->restore_comments();
697             }
698             }
699              
700             sub join_all
701             {
702 2     2 1 185 my $this = shift;
703              
704             # we can't join lines that contain "//" comments
705             # and we can't process unless strings are not there
706              
707 2         4 my $string_state = $this->{_strings_extracted};
708 2         4 my $comment_state = $this->{_comments_extracted};
709 2 50 33     22 unless ($this->{_strings_extracted} && $this->{_comments_extracted}) {
710 0         0 $this->extract_strings_and_comments();
711             }
712              
713 2         3 my $last_eol;
714             my $newdata;
715 2         8 foreach my $line (split(/\r?\n/, $this->data))
716             {
717             # if we have a linebreak between these charsets (not counting spaces/other-newlines)
718             # we retain it so we don't break any code.
719 48         112 my ($first_char) = ($line =~ /^\s*(\S)/);
720 48 100 66     344 if (defined($last_eol) &&
    100 66        
      66        
      66        
721             ($last_eol =~ /[a-zA-Z0-9\\\$_}\])+\-"']/ || ord($last_eol) > 126) &&
722             ($first_char =~ /[a-zA-Z0-9\\\$_{[(+\-]/ || ord($first_char) > 126) )
723             {
724 6         8 $newdata .= "\n";
725             } elsif (defined $last_eol) {
726 40         43 $newdata .= " ";
727             }
728              
729 48         72 $newdata .= $line;
730              
731 48 50       274 if ($line =~ /(\S)\s*$/) {
732 48         89 $last_eol = $1;
733             }
734             }
735 2         9 $newdata =~ s/\ $//;
736 2         6 $this->data($newdata);
737              
738             # restore comments if they're supposed to be in here
739 2 50       6 unless ($comment_state) {
740 0         0 $this->restore_comments();
741             }
742              
743             # restore strings/comments if needed
744 2 50       8 unless ($string_state) {
745 0         0 $this->restore_literal_strings();
746             }
747             }
748              
749             sub replace_extra_whitespace
750             {
751 2     2 1 161 my $this = shift;
752              
753             # can't do this if literal strings are still in the thing.
754 2         5 my $string_state = $this->{_strings_extracted};
755 2         4 my $comment_state = $this->{_comments_extracted};
756 2 50 33     14 unless ($this->{_strings_extracted} && $this->{_comments_extracted}) {
757 0         0 $this->extract_strings_and_comments();
758             }
759              
760 2         5 my $data = $this->data;
761             # remove unneccessary white space around operators, braces, parenthesis
762 2         71 $data =~ s/\s([\x21\x25\x26\x28\x29\x2a\x2b\x2c\x2d\x2f\x3a\x3b\x3c\x3d\x3e\x3f\x5b\x5d\x5c\x7b\x7c\x7d\x7e])/$1/g;
763 2         71 $data =~ s/([\x21\x25\x26\x28\x29\x2a\x2b\x2c\x2d\x2f\x3a\x3b\x3c\x3d\x3e\x3f\x5b\x5d\x5c\x7b\x7c\x7d\x7e])\s/$1/g;
764 2         12 $this->data($data);
765              
766             # restore strings/comments if needed
767 2 50       7 unless ($string_state) {
768 0         0 $this->restore_literal_strings();
769             }
770 2 50       8 unless ($comment_state) {
771 0         0 $this->restore_comments();
772             }
773             }
774              
775             sub remove_comments
776             {
777 3     3 1 513 my $this = shift;
778 3         12 my %opts = @_;
779 3         9 my @exceptions;
780 3 50 66     44 if (ref($opts{exceptions}) eq 'ARRAY') {
    100 66        
781 0         0 @exceptions = @{$opts{exceptions}};
  0         0  
782             } elsif ( ((ref($opts{exceptions}) eq 'Regexp') || (! ref($opts{exceptions})))
783             && $opts{exceptions} ) {
784 1         5 @exceptions = ( $opts{exceptions} );
785             }
786              
787             # can't do this if literal strings are still in the thing.
788 3         8 my $string_state = $this->{_strings_extracted};
789 3         7 my $comment_state = $this->{_comments_extracted};
790 3 100 66     34 unless ($this->{_strings_extracted} && $this->{_comments_extracted}) {
791 1         4 $this->extract_strings_and_comments();
792             }
793              
794 3         12 my $comments = $this->comments();
795              
796 3         11 my $data = $this->data;
797 3         9 my $exception_caught = 0;
798             # replace each of the comments
799 3         10 for (my $i=0; $i<@{$comments}; $i++)
  32         123  
800             {
801 29         37 my $comment = $comments->[$i];
802 29 100       50 if (grep { $comment =~ /$_/ } @exceptions)
  14         70  
803             {
804 1         2 $exception_caught++;
805 1         44 $data =~ s/\0\0\_($i)\_\0\0/$comment/g;
806             } else {
807 28         487 $data =~ s/\0\0\_($i)\_\0\0//g;
808             }
809             }
810 3 100       12 $this->{_comments_extracted} = 0 if $exception_caught;
811 3         9 $this->data($data);
812              
813             # restore strings if needed
814 3 50       19 unless ($string_state) {
815 0         0 $this->restore_literal_strings();
816             }
817             }
818              
819             sub restore_comments
820             {
821 0     0 1 0 my $this = shift;
822              
823 0 0       0 return unless $this->{_comments_extracted};
824              
825 0         0 my $comments = $this->comments();
826              
827 0         0 my $data = $this->data;
828             # replace each of the comments
829 0         0 for (my $i=0; $i<@{$comments}; $i++)
  0         0  
830             {
831 0         0 my $comment = $comments->[$i];
832 0         0 $data =~ s/\0\0\_($i)\_\0\0/$comment/g;
833             }
834 0         0 $this->{_comments_extracted} = 0;
835 0         0 $this->data($data);
836             }
837              
838             sub restore_literal_strings
839             {
840 3     3 1 149 my $this = shift;
841              
842 3 50       585 return unless $this->{_strings_extracted};
843              
844 3         9 my $strings = $this->strings();
845              
846 3         7 my $data = $this->data;
847             # replace each of the strings
848 3         7 for (my $i=0; $i<@{$strings}; $i++)
  75         160  
849             {
850 72         91 my $string = $strings->[$i];
851 72         1049 $data =~ s/\0\0($i)\0\0/$string/g;
852             }
853 3         7 $this->{_strings_extracted} = 0;
854 3         8 $this->data($data);
855             }
856              
857             sub replace_final_eol
858             {
859 2     2 1 151 my $this = shift;
860              
861 2         6 my $eol = $this->eol_char();
862 2         17 my $data = $this->data;
863 2 50       9 if ($data =~ /\r?\n$/) {
864 0         0 $data =~ s/\r?\n$/$eol/;
865             } else {
866 2         4 $data .= $eol;
867             }
868 2         20 $this->data($data);
869             }
870              
871              
872              
873             1;