File Coverage

blib/lib/SWF/Search.pm
Criterion Covered Total %
statement 71 165 43.0
branch 13 54 24.0
condition 6 17 35.2
subroutine 15 23 65.2
pod 5 7 71.4
total 110 266 41.3


line stmt bran cond sub pod time code
1             package SWF::Search;
2              
3 1     1   7926 use 5.006;
  1         4  
  1         38  
4 1     1   6 use strict;
  1         2  
  1         51  
5 1     1   6 use Carp;
  1         6  
  1         71  
6 1     1   996 use IO::File;
  1         14288  
  1         154  
7 1     1   10 use warnings;
  1         2  
  1         2080  
8              
9             our $VERSION = '0.01';
10              
11             =head1 NAME
12              
13             SWF::Search - Extract strings and information from Macromedia SWF files
14              
15             =head1 SYNOPSIS
16              
17             use SWF::Search;
18              
19             my $search = SWF::Search->new(File=>"mymovie.swf");
20              
21             my @found = $search("Spock");
22             my @text = $search->strings;
23              
24             =head1 DESCRIPTION
25              
26             This module allows the searching of Macromedia SWF files for text
27             strings. The supported methods of searching for text are currently
28             limited to strings within editable text fields, and frame labels.
29             Future versions will support strings created by font-shape based text,
30             and text used within actionscript expressions.
31              
32             This initial release also does not support the compressed SWF files
33             created by Flash MX.
34              
35             =head1 METHODS
36              
37             =over
38              
39             =item my $search = SWF::Search->new(File=>"mymovie.swf");
40              
41             Instantiates and returns a new search object. Valid options are:
42              
43             =over
44              
45             =item File => $filename
46              
47             Selects a SWF file to search on.
48              
49             =item CaseSensitive => 1/0
50              
51             Turns search case sensitivity on or off. (Off by default.)
52              
53             =item Debug => 1/0
54              
55             Turns debugging messages on or off. (Off by default.)
56              
57             =back
58              
59             =cut
60              
61             sub new {
62              
63             # usage: my $s = SWF::Search->new(File=>"mymovie.swf");
64             #
65             # The Instantiator. Takes options, returns an object. Surprise!
66             # Also does an initial parse of the given SWF file for kicks.
67              
68 1     1 1 130 my ($class, %args) = @_;
69            
70 1   33     17 my $self = bless {
71             _debug => $args{Debug},
72             _case_sensitive => $args{CaseSensitive},
73             _file => undef,
74             _fh => undef,
75             _bitbuf => '',
76             _strings => [],
77             _labels => [],
78             }, ref($class) || $class;
79              
80 1 50       11 $self->file($args{File}) if (defined $args{File});
81 0         0 return $self;
82             }
83              
84              
85             =item $s->search("string" [,Type=>I, %options]);
86              
87             Examines the SWF file for the given search term, and returns a list of
88             text strings containing the term. If no search term is given, the
89             method will return all found text strings in the file. A B option
90             may also be passed to the method, requesting that the search only be
91             performed on a certain kind of text. The currently supported text
92             types are B, B, and B
93             given, the method defaults to searching B text. Additional options
94             for the search may also be given. Currently, the only available option
95             flag is B; searches are case-insensitive by default.
96              
97             =cut
98              
99             sub search {
100 0     0 1 0 my ($self, $term, %opt) = @_;
101 0         0 my @str;
102              
103 0   0     0 for ($opt{Type} || 'All') {
104 0 0       0 /^(All|EditText)$/ && do {@str = (@str,$self->strings)};
  0         0  
105 0 0       0 /^(All|Label)$/ && do {@str = (@str,$self->labels)};
  0         0  
106             }
107              
108 0 0       0 return @str unless (defined $term);
109 0 0 0     0 if ($opt{CaseSensitive} || $self->{_case_sensitive}) {
110 0         0 return grep /$term/, @str;
111             }
112             else {
113 0         0 return grep /$term/i, @str;
114             }
115             }
116              
117              
118             =item $s->strings();
119              
120             Returns a list of text strings found in the SWF movie file.
121              
122             =cut
123              
124             sub strings {
125 0     0 1 0 my $self = shift;
126 0         0 return @{$self->{_strings}};
  0         0  
127             }
128              
129             =item $s->labels();
130              
131             Returns a list of frame label text found in the SWF movie file.
132              
133             =cut
134              
135             sub labels {
136 0     0 1 0 my $self = shift;
137 0         0 return @{$self->{_labels}};
  0         0  
138             }
139              
140              
141             =item $s->file($filename);
142              
143             Sets and parses the filename on which to search if an argument
144             is given, or returns the filename if called without an argument..
145              
146             =cut
147              
148             sub file {
149 2     2 1 4 my ($self, $val) = @_;
150 2 100       6 if (@_ > 1) {
151 1         7 $self->{_file} = $val;
152 1   50     9 my $fh = new IO::File($val, "r") || die "can't open $val: $!";
153 1         212 $self->fh($fh);
154 1         3 $self->{_strings} = [];
155 1         4 $self->_flush_bitbuf;
156 1         5 $self->_parse_file;
157             }
158 1         22 return $self->{_file};
159             }
160              
161              
162             sub _parse_file {
163 1     1   2 my $self = shift;
164              
165 1         4 $self->_read_header;
166              
167 0         0 while (my ($tagid,$taglen) = $self->_read_tag) {
168 0 0       0 $self->{_debug} && print "TAGID: $tagid = ".debug_tag($tagid)."\n";
169              
170 0 0       0 $tagid == 37 and do {$self->_parse_DefineEditText;next;};
  0         0  
  0         0  
171 0 0       0 $tagid == 43 and do {$self->_parse_FrameLabel; next;};
  0         0  
  0         0  
172              
173 0         0 $self->fh->read(undef,$taglen); # skip non-string tag
174             }
175             }
176              
177              
178             ### SWF reader methods
179             #
180             # In an actual SWF parser, these would be more complex. However,
181             # since we only care about text and such here, I've simplified
182             # things considerably for speed and laziness.
183              
184             sub _read_header {
185 1     1   3 my $self = shift;
186 1         3 my ($sig,$version,$length,$fsize,$frate,$fcount);
187            
188             # read header, check format, version and size
189              
190 1         3 $self->fh->read($sig,3);
191              
192 1 50       42 if ($sig eq "CWS") {
193 0         0 carp "Compressed SWF files not supported yet. Sorry.";
194 0         0 return;
195             }
196 1 50       4 if ($sig ne "FWS") {
197 0         0 carp "Unsupported file format. [$sig] Try again...";
198 0         0 return;
199             }
200            
201 1         4 $self->fh->read($version,1);
202 1 50       11 if (_bits2num(unpack("B*",$version))>5) {
203 0 0       0 $self->{_debug} &&
204             carp "There may be problems with Flash MX files. Be warned...";
205             }
206              
207 1         5 my ($file_length) = (stat($self->file))[7];
208 1         148 $self->fh->read($length,4);
209 1         8 $length = unpack("L",$length);
210            
211 1 50       5 if ($file_length != $length) {
212 0         0 carp "File length incorrect. Hrm.";
213 0         0 return;
214             }
215            
216             # read framesize rect, dump
217 1         5 $self->_read_rect;
218              
219             # dump framerate and framecount
220 1         3 $self->fh->read(undef,4);
221             }
222              
223             sub _read_tag {
224             # reads a tag structure, returns the tag id and length
225 0     0   0 my $self = shift;
226              
227 0         0 my $tag_head;
228 0         0 $self->fh->read($tag_head,2);
229 0         0 $tag_head = unpack("B*", $tag_head);
230              
231 0         0 my $tagid = _bits2num(substr($tag_head, 8, 8) .
232             substr($tag_head, 0, 2));
233 0         0 my $taglen = _bits2num(substr($tag_head, 2, 6));
234              
235 0 0       0 if ($tagid == 0) {
236 0         0 return;
237             }
238              
239 0 0       0 if ($taglen == 63) { # long tag
240 0         0 my $tmpbytes;
241 0         0 $self->fh->read($tmpbytes,4);
242 0         0 $taglen = unpack("L",$tmpbytes);
243             }
244            
245 0         0 return($tagid,$taglen);
246             }
247              
248             sub _read_rect {
249             # we don't really care what the contents of the rect are,
250             # so this is just for reading and dumping the right bits.
251              
252 1     1   2 my $self = shift;
253              
254 1         4 my $Nbits = $self->_read_bits(5);
255 1         2 $Nbits = _bits2num($Nbits);
256 1         3 my $Xmin = $self->_read_bits($Nbits);
257 1         3 my $Xmax = $self->_read_bits($Nbits);
258 1         3 my $Ymin = $self->_read_bits($Nbits);
259 1         26 my $Ymax = $self->_read_bits($Nbits);
260 1         3 $self->_flush_bitbuf;
261             }
262              
263             sub _read_string {
264             # reads and returns a null-terminated string from the filehandle
265              
266 0     0   0 my $self = shift;
267 0         0 my ($chr,$str);
268              
269 0         0 while ($self->fh->read($chr,1)) {
270 0 0       0 last if (ord($chr) == 0);
271 0         0 $str .= $chr;
272             }
273              
274 0 0       0 $str =~ s/[\n\cM\cJ]//g if (defined $str);
275 0         0 return $str;
276             }
277              
278             sub _read_bits {
279              
280             # reads the requested number of bits and returns them as a string
281              
282 5     5   8 my $self = shift;
283 5   50     21 my $numbits = shift || return;
284 5         6 my ($bits,$tmpbits);
285              
286 5 50 66     15 if (($self->_bitbuf eq "") && (($numbits % 8) == 0)) { # read whole bytes
287 0         0 $self->fh->read($bits,$numbits/8);
288 0         0 $bits = unpack("B*",$bits);
289             }
290             else {
291 5 50       12 if ($numbits > length($self->_bitbuf)) { # need to fill up bitBuffer
292 5         12 my $bytes2read = int(($numbits-length($self->_bitbuf))/8)+1;
293 5         13 $self->fh->read($tmpbits,$bytes2read);
294 5         29 $self->_bitbuf($self->_bitbuf.unpack("B*",$tmpbits));
295             }
296            
297             # read bits from the cache
298            
299 5         12 $bits = substr($self->_bitbuf,0,$numbits);
300 5         10 $self->_bitbuf(substr($self->_bitbuf,$numbits));
301             }
302            
303 5         13 return $bits;
304             }
305              
306             sub _bits2num {
307 2   50 2   10 my $bits = shift || return;
308 2         19 return unpack('N',pack("B32","0"x(32-length$bits).$bits));
309             }
310              
311              
312             sub _parse_FrameLabel {
313 0     0   0 my $self = shift;
314 0         0 my $Name = $self->_read_string;
315 0         0 push @{$self->{_labels}},$Name;
  0         0  
316             }
317              
318             sub _parse_DefineEditText {
319 0     0   0 my $self = shift;
320              
321 0         0 $self->fh->read(undef,2); # TextId
322 0         0 $self->_read_rect; # Bounds
323 0         0 my $HasText = $self->_read_bits(1);
324 0         0 my $WordWrap = $self->_read_bits(1);
325 0         0 my $Multiline = $self->_read_bits(1);
326 0         0 my $Password = $self->_read_bits(1);
327 0         0 my $ReadOnly = $self->_read_bits(1);
328 0         0 my $HasTextColor = $self->_read_bits(1);
329 0         0 my $HasMaxLength = $self->_read_bits(1);
330 0         0 my $HasFont = $self->_read_bits(1);
331 0         0 my $Reserved = $self->_read_bits(2);
332 0         0 my $HasLayout = $self->_read_bits(1);
333 0         0 my $NoSelect = $self->_read_bits(1);
334 0         0 my $Border = $self->_read_bits(1);
335 0         0 my $Reserved2 = $self->_read_bits(2);
336 0         0 my $UseOutlines = $self->_read_bits(1);
337              
338 0 0       0 if ($HasFont) {
339 0         0 $self->fh->read(undef,4); # FontId, Fontheight
340             }
341 0 0       0 if ($HasTextColor) {
342 0         0 $self->fh->read(undef,4); # TextColor
343             }
344 0 0       0 if ($HasMaxLength) {
345 0         0 $self->fh->read(undef,2); # MaxLength
346             }
347 0 0       0 if ($HasLayout) {
348 0         0 $self->fh->read(undef,9); # Align, LeftMargin, RightMargin,
349             # Indent, Leading
350             }
351 0         0 my $VariableName = $self->_read_string;
352 0 0       0 if ($HasText) {
353 0         0 my $InitialText = $self->_read_string;
354 0         0 push @{$self->{_strings}},$InitialText;
  0         0  
355             }
356             }
357              
358              
359             ### "private" methods
360              
361             sub fh {
362 10     10 0 14 my ($self, $val) = @_;
363 10 100       23 $self->{_fh} = $val if (@_ > 1);
364 10         50 return $self->{_fh};
365             }
366              
367             sub _bitbuf {
368 40     40   56 my ($self, $val) = @_;
369 40 100       83 $self->{_bitbuf} = $val if (@_ > 1);
370 40         130 return $self->{_bitbuf};
371             }
372              
373             sub _flush_bitbuf {
374 2     2   3 my $self = shift;
375 2         6 $self->{_bitbuf} = '';
376             }
377              
378              
379             =back
380              
381             =head1 AUTHOR
382              
383             Copyright 2002, Marc Majcher. All rights reserved.
384              
385             This library is free software; you may redistribute it and/or modify
386             it under the same terms as Perl itself.
387              
388             Address bug reports and comments to: swf-search@majcher.com
389              
390             This module is based on the OpenSWF specification and SDK released by
391             Macromedia at http://www.macromedia.com/software/flash/open/licensing/fileformat/
392              
393             =head1 SEE ALSO
394              
395             SWF::File
396              
397             =cut
398              
399              
400             sub debug_tag {
401 0     0 0   my $num = shift;
402 0           my %tag2num = (
403             Header => -1, # to make my life easier
404             End => 0,
405             ShowFrame => 1,
406             DefineShape => 2,
407             FreeCharacter => 3,
408             PlaceObject => 4,
409             RemoveObject => 5,
410             DefineBits => 6,
411             DefineButton => 7,
412             JPEGTables => 8,
413             SetBackgroundColor => 9,
414             DefineFont => 10,
415             DefineText => 11,
416             DoAction => 12,
417             DefineFontInfo => 13,
418             DefineSound => 14,
419             StartSound => 15,
420             DefineButtonSound => 17,
421             SoundStreamHead => 18,
422             SoundStreamBlock => 19,
423             DefineBitsLossless => 20,
424             DefineBitsJPEG2 => 21,
425             DefineShape2 => 22,
426             DefineButtonCxform => 23,
427             Protect => 24,
428             PathsArePostScript => 25,
429             PlaceObject2 => 26,
430             RemoveObject2 => 28,
431             SyncFrame => 29,
432             FreeAll => 31,
433             DefineShape3 => 32,
434             DefineText2 => 33,
435             DefineButton2 => 34,
436             DefineBitsJPEG3 => 35,
437             DefineBitsLossless2 => 36,
438             DefineEditText => 37,
439             DefineMovie => 38,
440             DefineSprite => 39,
441             NameCharacter => 40,
442             SerialNumber => 41,
443             DefineTextFormat => 42,
444             FrameLabel => 43,
445             SoundStreamHead2 => 45,
446             DefineMorphShape => 46,
447             FrameTag => 47,
448             DefineFont2 => 48,
449             GenCommand => 49,
450             DefineCommandObj => 50,
451             CharacterSet => 51,
452             FontRef => 52,
453              
454             #undocumented MX tags
455              
456             UnknownActionScript => 59,
457             NewFontInfo => 62,
458              
459             # tag 59:
460             # two bytes of unknown content,
461             # followed by actionscript bytecode
462             # (usually 0x88 constant pool)
463              
464             # tag 62 - new font info:
465             # font_id - UI16
466             # name_length - UI8
467             # name - name_length bytes
468             # unknown - UI16
469             # character codes for referenced fonts -
470             # UI16[nglyphs] - could be unicode
471            
472             );
473            
474 0           my %num2tag = reverse %tag2num;
475 0           return $num2tag{$num};
476             }
477              
478              
479             1;