File Coverage

blib/lib/WordPress/Grep.pm
Criterion Covered Total %
statement 17 132 12.8
branch 0 54 0.0
condition 0 12 0.0
subroutine 6 25 24.0
pod 3 3 100.0
total 26 226 11.5


line stmt bran cond sub pod time code
1             package WordPress::Grep;
2 1     1   837 use v5.14;
  1         3  
3 1     1   5 use strict;
  1         2  
  1         21  
4 1     1   5 use warnings;
  1         2  
  1         24  
5              
6 1     1   630 use utf8;
  1         15  
  1         6  
7              
8 1     1   39 use Carp qw(croak);
  1         1  
  1         52  
9 1     1   1582 use DBI;
  1         18595  
  1         1991  
10              
11             our $VERSION = '0.010_006';
12              
13             =encoding utf8
14              
15             =head1 NAME
16              
17             WordPress::Grep - Search Wordpress titles and content
18              
19             =head1 SYNOPSIS
20              
21             use WordPress::Grep;
22              
23             my $wp_grep = WordPress::Grep->connect(
24             # required
25             user => $user,
26             database => $db,
27              
28             # optional
29             password => $pass,
30              
31             # has defaults
32             host => 'localhost',
33             port => '3306',
34             );
35              
36             my $posts = $wp_grep->search(
37             sql_like => '....',
38             regex => qr/ ... /,
39             code => sub { ... },
40             include_columns => [ ], # not implemented
41             exclude_columns => [ ], # not implemented
42             );
43              
44             foreach my $post_id ( keys %$post ) {
45             printf "%4d %s\n",
46             $posts->{$post_id}{ID}, $posts->{$post_id}{post_title};
47             }
48              
49             =head1 DESCRIPTION
50              
51             [This is alpha software.]
52              
53             This module allows you to search through the posts in a WordPress
54             database by directly examining the C table. Forget about
55             these limited APIs. Use the power of Perl directly on the content.
56              
57             I've long wanted this tool to examine consistency in my posts. I want
58             to check my use of CSS and HTML across all posts to check what I may
59             need to change when I change how I do things. This sort of thing is hard
60             to do with existing tools and the WordPress API (although there is a
61             L).
62              
63             I want to go through all posts with all the power of Perl, so my
64             grep:
65              
66             =over 4
67              
68             =item 1 Takes an optional LIKE argument that it applies to C and C.
69              
70             =item 2 Takes an optional regex argument that it uses to filter the returned rows, keeping only the rows whose titles or content that satisfy the regex.
71              
72             =item 3 Takes a code argument that it uses to filter the returned rows, keeping only the rows which return true for that subroutine.
73              
74             =item 4 Returns the matching rows in the same form that C's C returns. The top-level key is the value in the
75             C column.
76              
77             =back
78              
79             Right now, there are some limitations based on my particular use:
80              
81             =over 4
82              
83             =item * I only select the C types.
84              
85             =item * I assume UTF-8 everywhere, including in the database.
86              
87             =item * Applying a regex or code filter always return (at least) the C and C.
88              
89             =item * The LIKE and regex filters only work on C and C. The code filter gets the entire row as a hash reference and can do what it likes.
90              
91             =back
92              
93             I've set up a slave of the MySQL server that runs my WordPress
94             installations. In that slave, I set up a read-only user for this tool.
95              
96             =head2 Methods
97              
98             =over 4
99              
100             =item connect
101              
102             Connect to the WordPress database. You must specify these parameters,
103             which should be the same ones in your I (although if
104             you need this tool frequently, consider setting up a read-only user
105             for this, or run it against a slave).
106              
107             user
108             database
109              
110             If you need a password, you'll have to provide that:
111              
112             password
113              
114             These parameters have defaults
115              
116             host defaults to localhost
117             port defaults to 3306
118             user defaults to root
119              
120             =cut
121              
122             sub connect {
123 0     0 1   my( $class, %args ) = @_;
124              
125 0           foreach my $required ( qw(database) ) {
126             croak "You must set '$required' in connect()"
127 0 0         unless defined $args{$required};
128             }
129              
130 0   0       $args{host} //= 'localhost';
131 0   0       $args{port} //= 3306;
132 0   0       $args{user} //= 'root';
133              
134 0           my $dsn = "dbi:mysql:db=$args{database};host=$args{host};port=$args{port}";
135              
136             #dbi:DriverName:database_name
137             #dbi:DriverName:database_name@hostname:port
138             #dbi:DriverName:database=database_name;host=hostname;port=port
139              
140 0           my $db = DBI->connect( $dsn, $args{user}, $args{password} );
141 0 0         croak "Could not connect to database [$args{host}:$args{port}]\n$DBI::Error"
142             unless defined $db;
143 0           my $self = bless {
144             db => $db,
145             args => \%args,
146             }, $class;
147 0           $self->_db_init;
148              
149 0           return $self;
150             }
151              
152             sub _db_init {
153 0     0     my( $self ) = @_;
154              
155 0           $self->_db_utf8;
156             }
157              
158             sub _db_utf8 {
159 0     0     my( $self ) = @_;
160              
161 0           my $sql = qq{SET NAMES 'utf8';};
162 0           $self->db->do($sql);
163 0           $self->db->{'mysql_enable_utf8'} = 1;
164             }
165              
166             =item db
167              
168             Return the db connection. This is a vanilla DBI connection to MySQL.
169             If you subclass this, you can do further setup by overriding C<_db_init>.
170              
171             =cut
172              
173 0     0 1   sub db { $_[0]->{db} }
174              
175             =item search
176              
177             The possible arguments:
178              
179             sql_like - a string
180             regex - a regular expression reference (qr//)
181             code - a subroutine reference
182              
183             categories - an array reference of category names
184             tags - an array reference of tags names
185              
186             This method first builds a query to search through the C
187             table.
188              
189             If you specify C, it limits the returned rows to those whose
190             C or C match that argument.
191              
192             If you specify C or C, another query annotates the
193             return rows with term information. If the C or C have
194             values, the return rows are reduced to those that have those categories
195             or tags. If you don't want to reduce the rows just yet, you can use C
196             to examine the row yourself.
197              
198             If you specify C, it filters the returned rows to those whose
199             C or C satisfy the regular expression.
200              
201             If you specify C, it filters the returned rows to those for
202             which the subroutine reference returns true. The coderef gets a hash
203             reference of the current row. It's up to you to decide what to do with
204             it.
205              
206             These filters are consecutive. You can specify any combination of them
207             but they always happen in that order. The C only gets the rows
208             that satisfied the C, and the C only gets the rows
209             that satisfied C and C.
210              
211             =cut
212              
213             sub search {
214 0     0 1   my( $self, %args ) = @_;
215              
216 0           $self->_set_args( \%args );
217 0           $self->_check_args;
218              
219 0           my $query = $self->_get_sql;
220              
221             # filter results by the LIKE, directly in the SQL
222 0 0         $query .= $self->_like_where_clause if defined $args{sql_like};
223 0           $self->_set_query( $query );
224              
225 0           my $posts = $self->_get_posts;
226              
227             # filter posts by the regex
228 0 0         if( defined $self->_args->{regex} ) {
229 0           my $re = $self->_args->{regex};
230 0           foreach my $post_id ( keys %$posts ) {
231             delete $posts->{$post_id} unless
232             (
233             $posts->{$post_id}{post_title} =~ m/$re/
234             or
235 0 0 0       $posts->{$post_id}{post_content} =~ m/$re/
236             );
237             }
238             }
239              
240             # filter posts by the sub
241 0 0         if( defined $args{code} ) {
242 0           foreach my $post_id ( keys %$posts ) {
243             delete $posts->{$post_id}
244 0 0         unless $args{code}->( $posts->{$post_id} );
245             }
246             }
247              
248 0           $self->_clear_search;
249              
250 0           return $posts;
251             }
252              
253 0 0   0     sub _query { exists $_[0]->{query} ? $_[0]->{query} : '' }
254             sub _set_query {
255 0     0     my( $self, $query ) = @_;
256              
257             #XXX Can I figure out the number dynamically in a better way?
258 0           my $param_count = () = $query =~ /\?/g;
259             $self->_set_bind_params( [ ( $self->_args->{sql_like} ) x $param_count ] )
260 0 0         if defined $self->_args->{sql_like};
261              
262 0           $self->{query} = $query;
263             }
264              
265 0 0   0     sub _bind_params { exists $_[0]->{bind_params} ? @{$_[0]->{bind_params}} : () }
  0            
266             sub _set_bind_params {
267 0 0   0     croak "_set_bind_params must be an array reference" unless
268             ref $_[1] eq ref [];
269 0           $_[0]->{bind_params} = $_[1];
270             }
271              
272 0 0   0     sub _args { exists $_[0]->{args} ? $_[0]->{args} : {} }
273             sub _set_args {
274 0 0   0     croak "_set_args must be a hash reference" unless
275             ref $_[1] eq ref {};
276 0           $_[0]->{args} = $_[1];
277             }
278              
279              
280             sub _clear_search {
281 0     0     my @clear_keys = qw( args sql bind_params );
282 0           delete @{ $_[0] }{ @clear_keys };
  0            
283             }
284              
285             sub _check_args {
286 0     0     my( $self ) = @_;
287              
288 0 0         if( exists $self->_args->{regex} ) {
289 0           croak "'regex' value must be a regex reference [@{[$self->_args->{regex}]}]"
290 0 0         unless ref $self->_args->{regex} eq ref qr//;
291             }
292              
293 0 0         if( exists $self->_args->{code} ) {
294             croak "'code' value must be a code reference"
295 0 0   0     unless ref $self->_args->{code} eq ref sub {};
296             }
297              
298 0           my @array_keys = qw( type categories tags include_columns exclude_columns );
299 0           foreach my $array_arg ( @array_keys ) {
300 0 0         next unless exists $self->_args->{$array_arg};
301             croak "'$array_arg' value must be an array reference"
302 0 0         unless ref $self->_args->{$array_arg} eq ref [];
303             }
304              
305 0           return 1;
306             }
307              
308             sub _get_sql {
309 0     0     'SELECT * FROM wp_posts WHERE post_type = "post"'
310             }
311              
312             sub _like_where_clause {
313 0     0     ' AND (post_title LIKE ? OR post_content LIKE ?)'
314             }
315              
316             sub _get_posts {
317 0     0     my( $self ) = @_;
318 0           my $sth = $self->db->prepare( $self->_query );
319 0 0         croak
320             "Could not create statement handle\n\n" .
321             "DBI Error: $DBI::Error\n\n" .
322 0           "Statement-----\n@{[$self->_query]}\n-----\n"
323             unless defined $sth;
324 0           $sth->execute( $self->_bind_params );
325              
326 0           my $posts = $sth->fetchall_hashref( 'ID' );
327              
328 0 0         if( $self->_include_terms ) {
329 0           my @post_ids = keys %$posts;
330 0           my $terms = $self->_get_terms;
331              
332 0           my %categories = map { $_, 1 } @{ $self->_args->{categories} };
  0            
  0            
333 0           my %tags = map { $_, 1 } @{ $self->_args->{tags} };
  0            
  0            
334              
335             # reduce the posts in
336 0           foreach my $post_key ( @post_ids ) {
337 0           my $this = $terms->{$post_key};
338              
339 0           my $found = 0;
340              
341             # if none are specified, include all
342 0 0 0       $found = 1 if( 0 == keys %tags && 0 == keys %categories );
343              
344             my %this_tags =
345 0           map { $this->{$_}{name}, $this->{$_}{term_taxonomy_id} }
346 0           grep { $this->{$_}{taxonomy} eq 'post_tag' }
  0            
347             keys %$this;
348              
349 0           my %Seen_tags;
350 0           my @found_tags = grep { ++$Seen_tags{$_} > 1 }
  0            
351             keys %this_tags, keys %tags;
352 0           $found += do {
353 0 0         if( $self->_args->{tags_and} ) { @found_tags == keys %tags }
  0            
354 0           else { scalar @found_tags }
355             };
356              
357             my %this_categories =
358 0           map { $this->{$_}{name}, $this->{$_}{term_taxonomy_id} }
359 0           grep { $this->{$_}{taxonomy} eq 'category' }
  0            
360             keys %$this;
361 0           my %Seen_categories;
362 0           my @found_categories = grep { ++$Seen_categories{$_} > 1 }
  0            
363             keys %this_categories, keys %categories;
364 0           $found += do {
365 0 0         if( $self->_args->{categories_and} ) { @found_categories == keys %categories }
  0            
366 0           else { scalar @found_categories }
367             };
368              
369 0 0         if( $found ) {
370 0           $posts->{$post_key}{terms} = $terms->{$post_key};
371 0           $posts->{$post_key}{tags} = [ keys %this_tags ];
372 0           $posts->{$post_key}{categories} = [ keys %this_categories ];
373             }
374             else {
375 0           delete $posts->{$post_key};
376             }
377              
378             }
379             }
380              
381 0           return $posts;
382             }
383              
384             sub _include_terms {
385 0 0   0     $_[0]->_args->{categories} or $_[0]->_args->{tags};
386             }
387              
388             sub _get_terms {
389 0     0     my( $self, $post_ids ) = @_;
390              
391 0           my $query =<<'SQL';
392             SELECT
393             wp_posts.ID,
394             wp_posts.post_title,
395             wp_terms.term_id,
396             wp_terms.name,
397             wp_term_taxonomy.term_taxonomy_id,
398             wp_term_taxonomy.parent,
399             wp_term_taxonomy.taxonomy
400             FROM
401             wp_posts
402             LEFT JOIN
403             wp_term_relationships ON wp_term_relationships.object_id = wp_posts.ID
404             LEFT JOIN
405             wp_term_taxonomy ON wp_term_taxonomy.term_taxonomy_id = wp_term_relationships.term_taxonomy_id
406             LEFT JOIN
407             wp_terms ON wp_terms.term_id = wp_term_taxonomy.term_id
408             WHERE
409             wp_term_taxonomy.taxonomy IS NOT NULL
410             SQL
411              
412 0           my $sth = $self->db->prepare( $query );
413 0 0         croak
414             "Could not create statement handle\n\n" .
415             "DBI Error: $DBI::Error\n\n" .
416             "Statement-----\n$query\n-----\n"
417             unless defined $sth;
418              
419 0           $sth->execute;
420              
421 0           my $terms = $sth->fetchall_hashref( [ qw( ID term_id) ] );
422             }
423              
424             =back
425              
426             =head1 TO DO
427              
428              
429             =head1 SEE ALSO
430              
431             L
432              
433             =head1 SOURCE AVAILABILITY
434              
435             This source is in Github:
436              
437             http://github.com/briandfoy/wordpress-grep/
438              
439             =head1 AUTHOR
440              
441             brian d foy, C<< >>
442              
443             =head1 COPYRIGHT AND LICENSE
444              
445             Copyright © 2013-2022, brian d foy . All rights reserved.
446              
447             You may redistribute this under the Artistic License 2.0.
448              
449             =cut
450              
451             1;