File Coverage

blib/lib/WWW/Mechanize/Chrome/DOMops.pm
Criterion Covered Total %
statement 14 143 9.7
branch 0 68 0.0
condition 0 33 0.0
subroutine 5 7 71.4
pod 2 2 100.0
total 21 253 8.3


line stmt bran cond sub pod time code
1             package WWW::Mechanize::Chrome::DOMops;
2              
3 3     3   1050195 use 5.006;
  3         33  
4 3     3   21 use strict;
  3         8  
  3         73  
5 3     3   12 use warnings;
  3         6  
  3         131  
6              
7 3     3   19 use Exporter qw(import);
  3         7  
  3         203  
8             our @EXPORT = qw(
9             zap
10             find
11             VERBOSE_DOMops
12             );
13              
14 3     3   1766 use Data::Roundtrip qw/perl2dump no-unicode-escape-permanently/;
  3         67660  
  3         24  
15            
16             our $VERSION = '0.03';
17              
18             # caller can set this to 0,1,2,3
19             our $VERBOSE_DOMops = 0;
20              
21             my $_aux_js_functions = <<'EOJ';
22             const getAllChildren = (htmlElement) => {
23             if( (htmlElement === null) || (htmlElement === undefined) ){
24             console.log("getAllChildren() : warning null input");
25             return [];
26             }
27             if( VERBOSE_DOMops > 1 ){ console.log("getAllChildren() : called for element '"+htmlElement+"' with tag '"+htmlElement.tagName+"' and id '"+htmlElement.id+"' ..."); }
28              
29             if (htmlElement.children.length === 0) return [htmlElement];
30              
31             let allChildElements = [];
32              
33             for (let i = 0; i < htmlElement.children.length; i++) {
34             let children = getAllChildren(htmlElement.children[i]);
35             if (children) allChildElements.push(...children);
36             }
37             allChildElements.push(htmlElement);
38              
39             return allChildElements;
40             };
41             EOJ
42              
43             # The input is a hashref of parameters
44             # the 'element-*' parameters specify some condition to be matched
45             # for example id to be such and such.
46             # The conditions can be combined either as a union (OR)
47             # or an intersection (AND). Default is intersection.
48             # The param || => 1 changes this to Union.
49             #
50             # returns -3 parameters error
51             # returns -2 if javascript failed
52             # returns -1 if one or more of the specified selectors failed to match
53             # returns >=0 : the number of elements matched
54             sub find {
55 0     0 1   my $params = $_[0];
56 0   0       my $parent = ( caller(1) )[3] || "N/A";
57 0           my $whoami = ( caller(0) )[3];
58              
59 0 0         if( $WWW::Mechanize::Chrome::DOMops::VERBOSE_DOMops > 0 ){ print STDOUT "$whoami (via $parent) : called ...\n" }
  0            
60              
61 0 0         my $amech_obj = exists($params->{'mech-obj'}) ? $params->{'mech-obj'} : undef;
62 0 0         if( ! $amech_obj ){
63 0           my $anerrmsg = "$whoami (via $parent) : a mech-object is required via 'mech-obj'.";
64 0           print STDERR $anerrmsg."\n";
65             return {
66 0           'status' => -3,
67             'message' => $anerrmsg
68             }
69             }
70 0 0         my $js_outfile = exists($params->{'js-outfile'}) ? $params->{'js-outfile'} : undef;
71              
72             # html element selectors:
73             # e.g. params->{'element-name'} = ['a','b'] or params->{'element-name'} = 'a'
74 0           my @known_selectors = ('element-name', 'element-class', 'element-tag', 'element-id', 'element-cssselector');
75 0           my (%selectors, $have_a_selector, $m);
76 0           for my $asel (@known_selectors){
77 0 0 0       next unless exists($params->{$asel}) and defined($params->{$asel});
78 0 0         if( ref($params->{$asel}) eq '' ){
    0          
79 0           $selectors{$asel} = '["' . $params->{$asel} . '"]';
80             } elsif( ref($params->{$asel}) eq 'ARRAY' ){
81 0           $selectors{$asel} = '["' . join('","', @{$params->{$asel}}) . '"]';
  0            
82             } else {
83 0           my $anerrmsg = "$whoami (via $parent) : error, parameter '$asel' expects a scalar or an ARRAYref and not '".ref($params->{$asel})."'.";
84 0           print STDERR $anerrmsg."\n";
85             return {
86 0           'status' => -3,
87             'message' => $anerrmsg
88             }
89             }
90 0           $have_a_selector = 1;
91 0 0         if( $WWW::Mechanize::Chrome::DOMops::VERBOSE_DOMops > 1 ){ print STDOUT "$whoami (via $parent) : found selector '$asel' with value '".$selectors{$asel}."'.\n" }
  0            
92             }
93 0 0         if( not $have_a_selector ){
94 0           my $anerrmsg = "$whoami (via $parent) : at least one selector must be specified by supplying one or more parameters from these: '".join("','", @known_selectors)."'.";
95 0           print STDERR $anerrmsg."\n";
96             return {
97 0           'status' => -3,
98             'message' => $anerrmsg
99             }
100             }
101              
102             # If specified it will add an ID to any html element which does not have an ID (field id).
103             # The ID will be prefixed by this string and have an incrementing counter postfixed
104 0           my $insert_id_if_none;
105 0 0 0       if( exists($params->{'insert-id-if-none-random'}) && defined($params->{'insert-id-if-none-random'}) ){
    0 0        
106             # we are given a prefix and also asked to add our own rands
107 0           $insert_id_if_none = $params->{'insert-id-if-none-random'} . int(rand(1_000_000)) . int(rand(1_000_000)) . int(rand(1_000_000));
108             } elsif( exists($params->{'insert-id-if-none'}) && defined($params->{'insert-id-if-none'}) ){
109             # we are given a prefix and no randomisation, both cases we will be adding the counter at the end
110 0           $insert_id_if_none = $params->{'insert-id-if-none'};
111             }
112              
113             # these callbacks are pieces of javascript code to execute but they should not have the function
114             # preamble or postamble, just the function content. The parameter 'htmlElement' is what
115             # we pass in and it is the currently matched HTML element.
116             # whatever the callback returns (including nothing = undef) will be recorded
117             # The callbacks are in an array with keys 'code' and 'name'.
118             # The callbacks are executed in the same order they have in this array
119             # the results are recorded in the same order in an array, one result for one htmlElement matched.
120             # callback(s) to execute for each html element matched in the 1st level (that is, not including children of match)
121 0           my @known_callbacks = ('find-cb-on-matched', 'find-cb-on-matched-and-their-children');
122 0           my %callbacks;
123 0           for my $acbname (@known_callbacks){
124 0 0 0       if( exists($params->{$acbname}) && defined($m=$params->{$acbname}) ){
125 0 0         if( ref($m) ne 'ARRAY' ){
126 0           my $anerrmsg = "$whoami (via $parent) : error callback parameter '$acbname' must be an array of hashes each containing a 'code' and a 'description' field. You supplied a '".ref($m)."'.";
127 0           print STDERR $anerrmsg."\n";
128 0           return { 'status' => -3, 'message' => $anerrmsg }
129             }
130 0           for my $acbitem (@$m){
131 0 0 0       if( ! exists($acbitem->{'code'}) || ! exists($acbitem->{'name'}) ){
132 0           my $anerrmsg = "$whoami (via $parent) : error callback parameter '$acbname' must be an array of hashes each containing a 'code' and a 'description' field.";
133 0           print STDERR $anerrmsg."\n";
134 0           return { 'status' => -3, 'message' => $anerrmsg }
135             }
136             }
137 0           $callbacks{$acbname} = $m;
138 0 0         if( $WWW::Mechanize::Chrome::DOMops::VERBOSE_DOMops > 0 ){ print STDOUT "$whoami (via $parent) : adding ".scalar(@$m)." callback(s) of type '$acbname' ...\n" }
  0            
139             }
140             }
141              
142             # each specifier yields a list each, how to combine this list?:
143             # intersection (default): specified with '||' => 0 or '&&' => 1 in params,
144             # the list is produced by the intersection set of all individual result sets (elements-by-name, by-id, etc.)
145             # This means an item must exist in ALL result sets which were specified by the caller.
146             # or
147             # union: specified with '||' => 1 or '&&' => 0 in params
148             # the list is produced by the union set of all individual result sets (elements-by-name, by-id, etc.)
149             # This means an item must exist in just one result set specified by the caller.
150             # Remember that the caller can specify elements by name ('element-name' => '...'), by id, by tag etc.
151             my $Union = (exists($params->{'||'}) && defined($params->{'||'}) && ($params->{'||'} == 1))
152 0   0       || (exists($params->{'&&'}) && defined($params->{'&&'}) && ($params->{'&&'} == 0))
153             || 0 # <<< default is intersection (superfluous but verbose)
154             ;
155              
156 0 0         if( $WWW::Mechanize::Chrome::DOMops::VERBOSE_DOMops > 1 ){ print "$whoami (via $parent) : using ".($Union?'UNION':'INTERSECTION')." to combine the matched elements.\n"; }
  0 0          
157             # there is no way to break a JS eval'ed via perl and return something back unless
158             # one uses gotos or an anonymous function, see
159             # https://www.perlmonks.org/index.pl?node_id=1232479
160             # Here we are preparing JS code to be eval'ed in the page
161              
162 0           my $cb_functions = "const cb_functions = {\n";
163 0           for my $acbname (@known_callbacks){
164 0 0         next unless exists $callbacks{$acbname};
165 0           $m = $callbacks{$acbname};
166 0           $cb_functions .= " \"${acbname}\" : [\n";
167 0           for my $acb (@$m){
168 0           my $code = $acb->{'code'};
169 0           my $name = $acb->{'name'}; # something to identify it with
170 0           $cb_functions .= <<EOJ;
171             {"code" : (htmlElement) => { ${code} }, "name" : "${name}"},
172             EOJ
173             }
174 0           $cb_functions =~ s/,\n$//m;
175 0           $cb_functions .= "\n ],\n";
176             }
177 0           $cb_functions =~ s/,\n*$//s;
178 0           $cb_functions .= "\n};";
179              
180 0           my $jsexec = '{ /* our own scope */' # <<< run it inside its own scope because multiple mech->eval() accumulate and global vars are re-declared etc.
181             . "\n\nconst VERBOSE_DOMops = ${WWW::Mechanize::Chrome::DOMops::VERBOSE_DOMops};\n\n"
182             . $_aux_js_functions . "\n\n"
183             . $cb_functions . "\n\n"
184             # the semicolon must be exactly after 'EOJ'!
185             . <<'EOJ';
186             // the return value of this anonymous function is what perl's eval will get back
187             (function(){
188             var retval = -1; // this is what we return
189             // returns -1 for when one of the element searches matched nothing
190             // returns 0 if after intersection/union nothing was found to delete
191             // returns >0 : the number of elements deleted
192             var anelem, anelems, i, j;
193             var allfound = [];
194             var allfound_including_children = [];
195             var elems = [];
196             EOJ
197 0           for my $asel (@known_selectors){ $jsexec .= "\telems['${asel}'] = null;\n"; }
  0            
198 0           $jsexec .= <<EOJ;
199             const union = ${Union};
200             EOJ
201 0 0         $jsexec .= "\tconst insert_id_if_none = ".(defined($insert_id_if_none) ? "'${insert_id_if_none}'" : "null").";\n";
202 0           $jsexec .= "\tconst known_callbacks = [\"" . join('", "', @known_callbacks) . "\"];\n";
203 0           my %selfuncs = (
204             'element-class' => 'document.getElementsByClassName',
205             'element-tag' => 'document.getElementsByTagName',
206             'element-name' => 'document.getElementsByName',
207             'element-id' => 'document.getElementById',
208             'element-cssselector' => 'document.querySelectorAll'
209             );
210 0           for my $aselname (keys %selectors){
211 0           my $selfunc = $selfuncs{$aselname};
212 0           my $aselvalue = $selectors{$aselname};
213 0           $jsexec .= <<EOJ;
214             // selector '${aselname}' was specified: ${aselvalue}
215             for(let asel of ${aselvalue}){
216             // this can return an array or a single html element (e.g. in ById)
217             if( VERBOSE_DOMops > 1 ){ console.log("$whoami (via $parent) via js-eval : selecting elements with this function '${selfunc}' ..."); }
218             let tmp = ${selfunc}(asel);
219             // if getElementsBy return an HTMLCollection,
220             // getElementBy (e.g. ById) returns an html element
221             // and querySelectorAll returns NodeList
222             // convert them all to an array:
223             if( (tmp === null) || (tmp === undefined) ){
224             if( VERBOSE_DOMops > 1 ){ console.log("$whoami (via $parent) : nothing matched."); }
225             continue;
226             }
227             anelems = (tmp.constructor.name === 'HTMLCollection') || (tmp.constructor.name === 'NodeList')
228             ? Array.prototype.slice.call(tmp) : [tmp]
229             ;
230             if( anelems == null ){
231             if( union == 0 ){
232             msg = "$whoami (via $parent) via js-eval : element(s) selected with ${aselname} '"+asel+"' not found, this specifier has failed and will not continue with the rest.";
233             console.log(msg);
234             return {"status":-1,"message":msg};
235             } else {
236             console.log("$whoami (via $parent) via js-eval : element(s) selected with ${aselname} '"+asel+"' not found (but because we are doing a union of the results, we continue with the other specifiers).");
237             continue;
238             }
239             }
240             if( anelems.length == 0 ){
241             if( union == 0 ){
242             msg = "$whoami (via $parent) via js-eval : element(s) selected with ${aselname} '"+asel+"' not found, this specifier has failed and will not continue with the rest.";
243             console.log(msg);
244             return {"status":-1,"message":msg};
245             } else {
246             console.log("$whoami (via $parent) via js-eval : element(s) selected with ${aselname} '"+asel+"' not found (but because we are doing a union of the results, we continue with the other specifiers).");
247             continue;
248             }
249             }
250             // now anelems is an array
251             if( elems["${aselname}"] === null ){
252             elems["${aselname}"] = anelems;
253             } else {
254             elems["${aselname}"] = elems["${aselname}"].length > 0 ? [...elems["${aselname}"], ...anelems] : anelems;
255             }
256             allfound = allfound.length > 0 ? [...allfound, ...anelems] : anelems;
257             if( VERBOSE_DOMops > 1 ){
258             console.log("$whoami (via $parent) via js-eval : found "+elems["${aselname}"].length+" elements selected with ${aselname} '"+asel+"'");
259             if( (VERBOSE_DOMops > 2) && (elems["${aselname}"].length>0) ){
260             for(let el of elems["${aselname}"]){ console.log(" tag: '"+el.tagName+"', id: '"+el.id+"'"); }
261             console.log("--- end of the elements selected with ${aselname}.");
262             }
263             }
264             }
265             EOJ
266             } # for my $aselname (keys %selectors){
267              
268             # if even one specified has failed, we do not reach this point, it returns -1
269 0 0         if( $Union ){
270             # union of all elements matched individually without duplicates:
271             # we just remove the duplicates from the allfound
272             # from https://stackoverflow.com/questions/9229645/remove-duplicate-values-from-js-array (by Christian Landgren)
273 0           $jsexec .= "\t// calculating the UNION of all elements found...\n";
274 0 0         if( $WWW::Mechanize::Chrome::DOMops::VERBOSE_DOMops > 1 ){ $jsexec .= "\t".'console.log("calculating the UNION of all elements found (without duplicates).\n");'."\n"; }
  0            
275 0           $jsexec .= "\t".'allfound.slice().sort(function(a,b){return a > b}).reduce(function(a,b){if (a.slice(-1)[0] !== b) a.push(b);return a;},[]);'."\n";
276             } else {
277             # intersection of all the elements matched individually
278 0           $jsexec .= "\t// calculating the INTERSECTION of all elements found...\n";
279 0 0         if( $WWW::Mechanize::Chrome::DOMops::VERBOSE_DOMops > 1 ){ $jsexec .= "\t".'console.log("calculating the INTERSECTION of all elements found per selector category (if any).\n");'."\n"; }
  0            
280 0           $jsexec .= "\tvar opts = ['".join("','", @known_selectors)."'];\n";
281 0           $jsexec .= <<'EOJ';
282             allfound = null;
283             var nopts = opts.length;
284             var n1, n2, I;
285             for(let i=0;i<nopts;i++){
286             n1 = opts[i];
287             if( (elems[n1] != null) && (elems[n1].length > 0) ){ allfound = elems[n1].slice(0); I = i; break; }
288             }
289             for(let j=0;j<nopts;j++){
290             if( j == I ) continue;
291             n2 = opts[j];
292             if( elems[n2] != null ){
293             var array2 = elems[n2];
294             // intersection of total and current
295             allfound = allfound.filter(function(n) {
296             return array2.indexOf(n) !== -1;
297             });
298             }
299             }
300             if( allfound === null ){ allfound = []; }
301             EOJ
302             } # if Union/Intersection
303              
304             # post-process and return
305 0           $jsexec .= <<'EOJ';
306             // first, make a separate list of all the children of those found (recursively all children)
307             for(let i=allfound.length;i-->0;){ allfound_including_children.push(...getAllChildren(allfound[i])); }
308             // second, add id to any html element which does not have any
309             if( insert_id_if_none !== null ){
310             let counter = 0;
311             for(let i=allfound.length;i-->0;){
312             let el = allfound[i];
313             if( el.id == '' ){ el.id = insert_id_if_none+'_'+counter++; }
314             }
315             for(let i=allfound_including_children.length;i-->0;){
316             let el = allfound_including_children[i];
317             if( el.id == '' ){ el.id = insert_id_if_none+'_'+counter++; }
318             }
319             // now that we are sure each HTML element has an ID we can remove duplicates if any
320             // basically there will not be duplicates in the 1st level but in all-levels there may be
321             let unis = {};
322             for(let i=allfound.length;i-->0;){
323             let el = allfound[i];
324             unis[el.id] = el;
325             }
326             allfound = Object.values(unis);
327             unis = {};
328             for(let i=allfound_including_children.length;i-->0;){
329             let el = allfound_including_children[i];
330             unis[el.id] = el;
331             }
332             allfound_including_children = Object.values(unis);
333             }
334              
335             if( VERBOSE_DOMops > 1 ){
336             console.log("Eventually matched "+allfound.length+" elements");
337             if( (VERBOSE_DOMops > 2) && (allfound.length>0) ){
338             console.log("---begin matched elements:");
339             for(let el of allfound){ console.log(" tag: '"+el.tagName+"', id: '"+el.id+"'"); }
340             console.log("---end matched elements.");
341             }
342             }
343             // now call the js callback function on those matched (not the children, if you want children then do it in the cb)
344             let cb_results = {};
345             for(let acbname of known_callbacks){
346             // this *crap* does not work: if( ! acbname in cb_functions ){ continue; }
347             // and caused me a huge waste of time
348             if( ! cb_functions[acbname] ){ continue; }
349             if( VERBOSE_DOMops > 1 ){ console.log("found callback for '"+acbname+"' and processing its code blocks ..."); }
350             let res1 = [];
351             let adata = acbname == 'find-cb-on-matched-and-their-children' ? allfound_including_children : allfound;
352             for(let acb of cb_functions[acbname]){
353             let res2 = [];
354             for(let i=0;i<adata.length;i++){
355             let el = adata[i];
356             if( VERBOSE_DOMops > 1 ){ console.log("executing callback of type '"+acbname+"' (name: '"+acb["name"]+"') on matched element tag '"+el.tagName+"' and id '"+el.id+"' ..."); }
357             let ares;
358             try {
359             ares = acb["code"](el);
360             } catch(err) {
361             msg = "error, call to the user-specified callback of type '"+acbname+"' (name: '"+acb["name"]+"') has failed with exception : "+err.message;
362             console.log(msg);
363             return {"status":-1,"message":msg};
364             }
365             res2.push({"name":acb["name"],"result":ares});
366             if( VERBOSE_DOMops > 1 ){ console.log("success executing callback of type '"+acbname+"' (name: '"+acb["name"]+"') on matched element tag '"+el.tagName+"' and id '"+el.id+"'. Result is '"+ares+"'."); }
367             }
368             res1.push(res2);
369             }
370             cb_results[acbname] = res1;
371             }
372              
373             // returned will be an array of arrays : [tag, id] for each html element matched
374             var returnedids = [], returnedids_of_children_too = [];
375             for(let i=allfound.length;i-->0;){
376             let el = allfound[i];
377             returnedids.push({"tag" : el.tagName, "id" : el.id});
378             }
379             for(let i=allfound_including_children.length;i-->0;){
380             let el = allfound_including_children[i];
381             returnedids_of_children_too.push({"tag" : el.tagName, "id" : el.id});
382             }
383              
384             let ret = {
385             "found" : {
386             "first-level" : returnedids,
387             "all-levels" : returnedids_of_children_too
388             },
389             "status" : returnedids.length
390             };
391             if( Object.keys(cb_results).length > 0 ){
392             ret["cb-results"] = cb_results;
393             }
394             console.dir(ret);
395              
396             return ret;
397             })(); // end of anonymous function and now execute it
398             } // end our eval scope
399             EOJ
400 0 0         if( $WWW::Mechanize::Chrome::DOMops::VERBOSE_DOMops > 2 ){ print "--begin javascript code to eval:\n\n${jsexec}\n\n--end javascript code.\n$whoami (via $parent) : evaluating above javascript code.\n" }
  0            
401              
402 0 0         if( defined $js_outfile ){
403 0 0         if( open(my $FH, '>', $js_outfile) ){ print $FH $jsexec; close $FH }
  0            
  0            
404 0           else { print STDERR "$whoami (via $parent) : warning, failed to open file '$js_outfile' for writing the output javascript code, skipping it ...\n" }
405             }
406 0           my ($retval, $typ);
407 0           eval { ($retval, $typ) = $amech_obj->eval($jsexec) };
  0            
408 0 0         if( $@ ){
409 0           print STDERR "--begin javascript to eval:\n\n${jsexec}\n\n--end javascript code.\n$whoami (via $parent) : eval of above javascript has failed: $@\n";
410             return {
411 0           'status' => -2,
412             'message' => "eval has failed: $@"
413             };
414             };
415 0 0         if( ! defined $retval ){
416 0           print STDERR "--begin javascript to eval:\n\n${jsexec}\n\n--end javascript code.\n$whoami (via $parent) : eval of above javascript has returned an undefined result.\n";
417             return {
418 0           'status' => -2,
419             'message' => "eval returned un undefined result."
420             };
421             }
422              
423 0           return $retval; # success
424             }
425              
426             # The input is a hashref of parameters
427             # the 'element-*' parameters specify some condition to be matched
428             # for example id to be such and such.
429             # The conditions can be combined either as a union (OR)
430             # or an intersection (AND). Default is intersection.
431             # The param || => 1 changes this to Union.
432             #
433             # returns a hash of results, which contains status
434             # status is -2 if javascript failed
435             # status is -1 if one or more of the specified selectors failed to match
436             # status is >=0 : the number of elements deleted
437             # an error 'message' if status < 0
438             # and various other items if status >= 0
439             sub zap {
440 0     0 1   my $params = $_[0];
441 0   0       my $parent = ( caller(1) )[3] || "N/A";
442 0           my $whoami = ( caller(0) )[3];
443              
444 0 0         if( $WWW::Mechanize::Chrome::DOMops::VERBOSE_DOMops > 0 ){ print STDOUT "$whoami (via $parent) : called ...\n" }
  0            
445              
446 0 0         my $amech_obj = exists($params->{'mech-obj'}) ? $params->{'mech-obj'} : undef;
447 0 0         if( ! $amech_obj ){ print STDERR "$whoami (via $parent) : a mech-object is required via 'mech-obj'.\n"; return 0 }
  0            
  0            
448              
449             my $cbex = exists($params->{'find-cb-on-matched'}) && defined($params->{'find-cb-on-matched'})
450 0 0 0       ? [ @{$params->{'find-cb-on-matched'}} ] : [];
  0            
451             # execute our callback last, after all user-specified if any
452 0           push @$cbex, {
453             'code' => 'htmlElement.parentNode.removeChild(htmlElement); return 1;',
454             'name' => '_thezapper'
455             };
456 0           my %myparams = (
457             'find-cb-on-matched' => $cbex
458             );
459 0 0 0       if( ! (exists($params->{'insert-id-if-none-random'}) && defined($params->{'insert-id-if-none-random'}))
      0        
      0        
460             && ! (exists($params->{'insert-id-if-none'}) && defined($params->{'insert-id-if-none'}))
461             ){
462             # if no fixing of missing html element ids we ask for it and also let it be randomised
463 0           $myparams{'insert-id-if-none-random'} = '_domops_created_id';
464             }
465              
466 0           my $ret = find({
467             'mech-obj' => $amech_obj,
468             %$params,
469             # overwrite anything like these the user specified:
470             %myparams
471             });
472              
473 0 0         if( ! defined $ret ){
474 0           my $anerrmsg = perl2dump($params)."$whoami (via $parent) : error, call to find() has failed for above parameters.";
475 0           print STDERR $anerrmsg."\n";
476             return {
477 0           'status' => -2,
478             'message' => $anerrmsg
479             }
480             }
481 0 0         if( $ret->{'status'} < 0 ){
482 0           my $anerrmsg = perl2dump($params)."$whoami (via $parent) : error, call to find() has failed for above parameters with this error message: ".$ret->{'message'};
483 0           print STDERR $anerrmsg."\n";
484             return {
485 0           'status' => -2,
486             'message' => $anerrmsg
487             }
488             }
489              
490 0           return $ret; # success
491             }
492              
493             ## POD starts here
494              
495             =head1 NAME
496              
497             WWW::Mechanize::Chrome::DOMops - Operations on the DOM loaded in Chrome
498              
499             =head1 VERSION
500              
501             Version 0.03
502              
503             =head1 SYNOPSIS
504              
505             This module provides a set of tools to operate on the DOM of the
506             provided L<WWW::Mechanize::Chrome> object. Currently,
507             supported operations are:
508              
509             =over 4
510              
511             =item * C<find()> : finds HTML elements
512              
513             =item * C<zap()> : deletes HTML elements
514              
515             =back
516              
517             The selection of the HTML elements in the DOM
518             can be done in various ways:
519              
520             =over 4
521              
522             =item * by B<CSS selector>,
523              
524             =item * by B<tag>,
525              
526             =item * by B<class>.
527              
528             =item * by B<id>,
529              
530             =item * by B<name>.
531              
532             =back
533              
534             There is more information about this in section L<ELEMENT SELECTORS>.
535              
536             Here are some usage scenaria:
537              
538             use WWW::Mechanize::Chrome::DOMops qw/zap find VERBOSE_DOMops/;
539              
540             # increase verbosity: 0, 1, 2, 3
541             $WWW::Mechanize::Chrome::VERBOSE_DOMops = 3;
542              
543             # First, create a mech object and load a URL on it
544             # Note: you need google-chrome binary installed in your system!
545             my $mechobj = WWW::Mechanize::Chrome->new();
546             $mechobj->get('https://www.bbbbbbbbb.com');
547              
548             # find elements in the DOM, select by id, tag, name, or
549             # by CSS selector.
550             my $ret = find({
551             'mech-obj' => $mechobj,
552             # find elements whose class is in the provided
553             # scalar class name or array of class names
554             'element-class' => ['slanted-paragraph', 'class2', 'class3'],
555             # *OR* their tag is this:
556             'element-tag' => 'p',
557             # *OR* their name is this:
558             'element-name' => ['aname', 'name2'],
559             # *OR* their id is this:
560             'element-id' => ['id1', 'id2'],
561             # just provide a CSS selector and get done with it already
562             'element-cssselector' => 'a-css-selector',
563             # specifies that we should use the union of the above sets
564             # hence the *OR* in above comment
565             '||' => 1,
566             # this says to find all elements whose class
567             # is such-and-such AND element tag is such-and-such
568             # '&&' => 1 means to calculate the INTERSECTION of all
569             # individual matches.
570            
571             # optionally run javascript code on all those elements matched
572             'find-cb-on-matched' => [
573             {
574             'code' =><<'EOJS',
575             // the element to operate on is 'htmlElement'
576             console.log("operating on this element "+htmlElement.tagName);
577             // this is returned back in the results of find() under
578             // key "find-cb-on-matched"
579             return 1;
580             EOJS
581             'name' => 'func1'
582             }, {...}
583             ],
584             # optionally run javascript code on all those elements
585             # matched AND THEIR CHILDREN too!
586             'find-cb-on-matched-and-their-children' => [
587             {
588             'code' =><<'EOJS',
589             // the element to operate on is 'htmlElement'
590             console.log("operating on this element "+htmlElement.tagName);
591             // this is returned back in the results of find() under
592             // key "find-cb-on-matched" notice the complex data
593             return {"abc":"123",{"xyz":[1,2,3]}};
594             EOJS
595             'name' => 'func2'
596             }
597             ],
598             # optionally ask it to create a valid id for any HTML
599             # element returned which does not have an id.
600             # The text provided will be postfixed with a unique
601             # incrementing counter value
602             'insert-id-if-none' => '_prefix_id',
603             # or ask it to randomise that id a bit to avoid collisions
604             'insert-id-if-none-random' => '_prefix_id',
605              
606             # optionally, also output the javascript code to a file for debugging
607             'js-outfile' => 'output.js',
608             });
609              
610              
611             # Delete an element from the DOM
612             $ret = zap({
613             'mech-obj' => $mechobj,
614             'element-id' => 'paragraph-123'
615             });
616              
617             # Mass murder:
618             $ret = zap({
619             'mech-obj' => $mechobj,
620             'element-tag' => ['div', 'span', 'p'],
621             '||' => 1, # the union of all those matched with above criteria
622             });
623              
624             # error handling
625             if( $ret->{'status'} < 0 ){ die "error: ".$ret->{'message'} }
626             # status of -3 indicates parameter errors,
627             # -2 indicates that eval of javascript code inside the mech object
628             # has failed (syntax errors perhaps, which could have been introduced
629             # by user-specified callback
630             # -1 indicates that javascript code executed correctly but
631             # failed somewhere in its logic.
632              
633             print "Found " . $ret->{'status'} . " matches which are: "
634             # ... results are in $ret->{'found'}->{'first-level'}
635             # ... and also in $ret->{'found'}->{'all-levels'}
636             # the latter contains a recursive list of those
637             # found AND ALL their children
638              
639             =head1 EXPORT
640              
641             the sub to find element(s) in the DOM
642              
643             find()
644              
645             the sub to delete element(s) from the DOM
646              
647             zap()
648              
649             and the flag to denote verbosity (default is 0, no verbosity)
650              
651             $WWW::Mechanize::Chrome::DOMops::VERBOSE_DOMops
652              
653              
654             =head1 SUBROUTINES/METHODS
655              
656             =head2 find($params)
657              
658             It finds HTML elements in the DOM currently loaded on the
659             parameters-specified L<WWW::Mechanize::Chrome> object. The
660             parameters are:
661              
662             =over 4
663              
664             =item * C<mech-obj> : supply a L<WWW::Mechanize::Chrome>, required
665              
666             =item * C<insert-id-if-none> : some HTML elements simply do not have
667             an id (e.g. C<<p>>). If any of these elements is matched,
668             its tag and its id (empty string) will be returned.
669             By specifying this parameter (as a string, e.g. C<_replacing_empty_ids>)
670             all such elements matched will have their id set to
671             C<_replacing_empty_ids_X> where X is an incrementing counter
672             value starting from a random number. By running C<find()>
673             more than once on the same on the same DOM you are risking
674             having the same ID. So provide a different prefix every time.
675             Or use C<insert-id-if-none-random>, see below.
676              
677             =item * C<insert-id-if-none-random> : each time C<find()> is called
678             a new random base id will be created formed by the specified prefix (as with
679             C<insert-id-if-none>) plus a long random string plus the incrementing
680             counter, as above. This is supposed to be better at
681             avoiding collisions but it can not guarantee it.
682             If you are setting C<rand()>'s seed to the same number
683             before you call C<find()> then you are guaranteed to
684             have collisions.
685              
686             =item * C<find-cb-on-matched> : an array of
687             user-specified javascript code
688             to be run on each element matched in the order
689             the elements are returned and in the order of the javascript
690             code in the specified array. Each item of the array
691             is a hash with keys C<code> and C<name>. The former
692             contains the code to be run assuming that the
693             html element to operate on is named C<htmlElement>.
694             The code must end with a C<return> statement.
695             Basically the code is the body of a function
696             B<without> the preamble (signature and function name etc.)
697             and the postamble. Key C<name> is just for
698             making this process more descriptive and will
699             be printed on log messages and returned back with
700             the results. Here is an example:
701              
702             'find-cb-on-matched' : [
703             {
704             # this returns a complex data type
705             'code' => 'console.log("found id "+htmlElement.id); return {"a":"1","b":"2"};'
706             'name' => 'func1'
707             },
708             {
709             'code' => 'console.log("second func: found id "+htmlElement.id); return 1;'
710             'name' => 'func2'
711             },
712             ]
713              
714             =item * C<find-cb-on-matched-and-their-children> : exactly the same
715             as C<find-cb-on-matched> but it operates on all those HTML elements
716             matched and also all their children and children of children etc.
717              
718             =item * C<js-outfile> : optionally save the javascript
719             code (which is evaluated within the mech object) to a file.
720              
721             =item * C<element selectors> are covered in section L</ELEMENT SELECTORS>.
722              
723             =back
724              
725             B<JAVASCRIPT HELPERS>
726              
727             There is one javascript function available to all user-specified callbacks:
728              
729             =over 2
730              
731             =item * C<getAllChildren(anHtmlElement)> : it returns
732             back an array of HTML elements which are the children (at any depth)
733             of the given C<anHtmlElement>.
734              
735             =back
736              
737             B<RETURN VALUE>:
738              
739             The returned value is a hashref with at least a C<status> key
740             which is greater or equal to zero in case of success and
741             denotes the number of matched HTML elements. Or it is -3, -2 or
742             -1 in case of errors:
743              
744             =over 4
745              
746             =item * C<-3> : there is an error with the parameters passed to this sub.
747              
748             =item * C<-2> : there is a syntax error with the javascript code to evaluate
749             C<eval()> inside the mech object. Most likely this syntax error is
750             with user-specified callback code.
751              
752             =item * C<-1> : there is a logical error while running the javascript code.
753             For example a division by zero etc. This can be both in the callback code
754             as well as in the internal javascript code for edge cases not covered
755             by tests. Please report these.
756              
757             =back
758              
759             If C<status> is not negative, then this is success and its value
760             denotes the number of matched HTML elements. Which can be zero
761             or more. In this case the returned hash contains this
762              
763             "found" => {
764             "first-level" => [
765             {
766             "tag" => "NAV",
767             "id" => "nav-id-1"
768             }
769             ],
770             "all-levels" => [
771             {
772             "tag" => "NAV",
773             "id" => "nav-id-1"
774             },
775             {
776             "id" => "li-id-2",
777             "tag" => "LI"
778             },
779             ]
780             }
781              
782             Key C<first-level> contains those items matched directly while
783             key C<all-levels> contains those matched directly as well as those
784             matched because they are descendents (direct or indirect)
785             of each matched element.
786              
787             Each item representing a matched HTML element has two fields:
788             C<tag> and C<id>. Beware of missing C<id> or
789             use C<insert-id-if-none> or C<insert-id-if-none-random> to
790             fill in the missing ids.
791              
792             If C<find-cb-on-matched> or C<find-cb-on-matched-and-their-children>
793             were specified, then the returned result contains this additional data:
794              
795             "cb-results" => {
796             "find-cb-on-matched" => [
797             [
798             {
799             "name" => "func1",
800             "result" => {
801             "a" => 1,
802             "b" => 2
803             }
804             }
805             ],
806             [
807             {
808             "result" => 1,
809             "name" => "func2"
810             }
811             ]
812             ],
813             "find-cb-on-matched-and-their-children" => ...
814             },
815              
816             C<find-cb-on-matched> and/or C<find-cb-on-matched-and-their-children> will
817             be present depending on whether corresponding value in the input
818             parameters was specified or not. Each of these contain the return
819             result for running the callback on each HTML element in the same
820             order as returned under key C<found>.
821              
822             HTML elements allows for missing C<id>. So field C<id> can be empty
823             unless caller set the C<insert-id-if-none> input parameter which
824             will create a unique id for each HTML element matched but with
825             missing id. These changes will be saved in the DOM.
826             When this parameter is specified, the returned HTML elements will
827             be checked for duplicates because now all of them have an id field.
828             Therefore, if you did not specify this parameter results may
829             contain duplicate items and items with empty id field.
830             If you did specify this parameter then some elements of the DOM
831             (those matched by our selectors) will have their missing id
832             created and saved in the DOM.
833              
834             Another implication of using this parameter when
835             running it twice or more with the same value is that
836             you can get same ids. So, always supply a different
837             value to this parameter if run more than once on the
838             same DOM.
839              
840             =head2 zap($params)
841              
842             It removes HTML element(s) from the DOM currently loaded on the
843             parameters-specified L<WWW::Mechanize::Chrome> object. The params
844             are exactly the same as with L</find($params)> except that
845             C<insert-id-if-none> is ignored.
846              
847             C<zap()> is implemented as a C<find()> with
848             an additional callback for all elements matched
849             in the first level (not their children) as:
850              
851             'find-cb-on-matched' => {
852             'code' => 'htmlElement.parentNode.removeChild(htmlElement); return 1;',
853             'name' => '_thezapper'
854             };
855              
856              
857             B<RETURN VALUE>:
858              
859             Return value is exactly the same as with L</find($params)>
860              
861             =head1 ELEMENT SELECTORS
862              
863             C<Element selectors> are how one selects HTML elements from the DOM.
864             There are 5 ways to select HTML elements: by id, class, tag, name
865             or via a CSS selector. Multiple selectors can be specified
866             as well as multiple criteria in each selector (e.g. multiple
867             class names in a C<element-class> selector). The results
868             from each selector are combined into a list of
869             unique HTML elements (BEWARE of missing id fields) by
870             means of UNION or INTERSECTION of the individual matches
871              
872             These are the valid selectors:
873              
874             =over 2
875              
876             =item * C<element-class> : find DOM elements matching this class name
877              
878             =item * C<element-tag> : find DOM elements matching this element tag
879              
880             =item * C<element-id> : find DOM element matching this element id
881              
882             =item * C<element-name> : find DOM element matching this element name
883              
884             =item * C<element-cssselector> : find DOM element matching this CSS selector
885              
886             =back
887              
888             And one of these two must be used to combine the results
889             into a final list
890              
891             =over 2
892              
893             =item C<&&> : Intersection. When set to 1 the result is the intersection of all individual results.
894             Meaning that an element will make it to the final list if it was matched
895             by every selector specified. This is the default.
896              
897             =item C<||> : Union. When set to 1 the result is the union of all individual results.
898             Meaning that an element will make it to the final list if it was matched
899             by at least one of the selectors specified.
900              
901             =back
902              
903             =head1 DEPENDENCIES
904              
905             This module depends on L<WWW::Mechanize::Chrome> which, in turn,
906             depends on the C<google-chrome> executable be installed on the
907             host computer. See L<WWW::Mechanize::Chrome::Install> on
908             how to install the executable.
909              
910             =head1 AUTHOR
911              
912             Andreas Hadjiprocopis, C<< <bliako at cpan.org> >>
913              
914             =head1 BUGS
915              
916             Please report any bugs or feature requests to C<bug-www-mechanize-chrome-domops at rt.cpan.org>, or through
917             the web interface at L<https://rt.cpan.org/NoAuth/ReportBug.html?Queue=WWW-Mechanize-Chrome-DOMops>. I will be notified, and then you'll
918             automatically be notified of progress on your bug as I make changes.
919              
920             =head1 SUPPORT
921              
922             You can find documentation for this module with the perldoc command.
923              
924             perldoc WWW::Mechanize::Chrome::DOMops
925              
926              
927             You can also look for information at:
928              
929             =over 4
930              
931             =item * RT: CPAN's request tracker (report bugs here)
932              
933             L<https://rt.cpan.org/NoAuth/Bugs.html?Dist=WWW-Mechanize-Chrome-DOMops>
934              
935             =item * AnnoCPAN: Annotated CPAN documentation
936              
937             L<http://annocpan.org/dist/WWW-Mechanize-Chrome-DOMops>
938              
939             =item * CPAN Ratings
940              
941             L<https://cpanratings.perl.org/d/WWW-Mechanize-Chrome-DOMops>
942              
943             =item * Search CPAN
944              
945             L<https://metacpan.org/release/WWW-Mechanize-Chrome-DOMops>
946              
947             =back
948              
949             =head1 DEDICATIONS
950              
951             Almaz
952              
953              
954             =head1 ACKNOWLEDGEMENTS
955              
956             L<CORION> for publishing L<WWW::Mechanize::Chrome> and all its
957             contributors.
958              
959              
960             =head1 LICENSE AND COPYRIGHT
961              
962             Copyright 2019 Andreas Hadjiprocopis.
963              
964             This program is free software; you can redistribute it and/or modify it
965             under the terms of the the Artistic License (2.0). You may obtain a
966             copy of the full license at:
967              
968             L<http://www.perlfoundation.org/artistic_license_2_0>
969              
970             Any use, modification, and distribution of the Standard or Modified
971             Versions is governed by this Artistic License. By using, modifying or
972             distributing the Package, you accept this license. Do not use, modify,
973             or distribute the Package, if you do not accept this license.
974              
975             If your Modified Version has been derived from a Modified Version made
976             by someone other than you, you are nevertheless required to ensure that
977             your Modified Version complies with the requirements of this license.
978              
979             This license does not grant you the right to use any trademark, service
980             mark, tradename, or logo of the Copyright Holder.
981              
982             This license includes the non-exclusive, worldwide, free-of-charge
983             patent license to make, have made, use, offer to sell, sell, import and
984             otherwise transfer the Package with respect to any patent claims
985             licensable by the Copyright Holder that are necessarily infringed by the
986             Package. If you institute patent litigation (including a cross-claim or
987             counterclaim) against any party alleging that the Package constitutes
988             direct or contributory patent infringement, then this Artistic License
989             to you shall terminate on the date that such litigation is filed.
990              
991             Disclaimer of Warranty: THE PACKAGE IS PROVIDED BY THE COPYRIGHT HOLDER
992             AND CONTRIBUTORS "AS IS' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES.
993             THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR
994             PURPOSE, OR NON-INFRINGEMENT ARE DISCLAIMED TO THE EXTENT PERMITTED BY
995             YOUR LOCAL LAW. UNLESS REQUIRED BY LAW, NO COPYRIGHT HOLDER OR
996             CONTRIBUTOR WILL BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, OR
997             CONSEQUENTIAL DAMAGES ARISING IN ANY WAY OUT OF THE USE OF THE PACKAGE,
998             EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
999              
1000              
1001             =cut
1002              
1003             1; # End of WWW::Mechanize::Chrome::DOMops