File Coverage

blib/lib/WebService/ISBNDB/Iterator.pm
Criterion Covered Total %
statement 21 83 25.3
branch 0 20 0.0
condition 0 9 0.0
subroutine 7 13 53.8
pod 5 6 83.3
total 33 131 25.1


line stmt bran cond sub pod time code
1             ###############################################################################
2             #
3             # This file copyright (c) 2006-2008 by Randy J. Ray, all rights reserved
4             #
5             # See "LICENSE" in the documentation for licensing and redistribution terms.
6             #
7             ###############################################################################
8             #
9             # $Id: Iterator.pm 47 2008-04-06 10:12:34Z $
10             #
11             # Description: This class provides an iterator in the spirit of chapter 4
12             # of "Higher Order Perl", by Mark-Jason Dominus. Not all of
13             # this class follows his style to the letter, but the
14             # concepts here have their basis there.
15             #
16             # The role of the Iterator is to encapsulate a set of
17             # records returned by a call to the isbndb.com service. The
18             # set may be a disjoint set, such as the authors or subjects
19             # associated with a book. Or, the results may be from a call
20             # to a class' search() method, which can return potentially
21             # hundreds of records.
22             #
23             # Functions: BUILD
24             # first
25             # next
26             # all
27             # fetch_next_page
28             # reset
29             #
30             # Libraries: Class::Std
31             # Error
32             #
33             # Global Consts: $VERSION
34             #
35             ###############################################################################
36              
37             package WebService::ISBNDB::Iterator;
38              
39 1     1   17442 use 5.006;
  1         4  
  1         47  
40 1     1   6 use strict;
  1         3  
  1         39  
41 1     1   6 use warnings;
  1         2  
  1         39  
42 1     1   5 use vars qw($VERSION);
  1         2  
  1         47  
43              
44 1     1   5 use Class::Std;
  1         1  
  1         9  
45 1     1   98 use Error;
  1         1  
  1         8  
46              
47 1     1   52 use WebService::ISBNDB::API;
  1         1  
  1         8  
48              
49             $VERSION = "0.10";
50              
51             my %total_results : ATTR(:name :default<0> );
52             my %page_size : ATTR(:name :default<10>);
53             my %page_number : ATTR(:name :default<1> );
54             my %shown_results : ATTR(:name :default<0> );
55             my %contents : ATTR(:name :default<> );
56             my %request_args : ATTR(:get );
57             my %index : ATTR(:name :default<0> );
58             my %agent : ATTR(:name :default<> );
59             my %fetch_page_hook : ATTR(:name :default<> );
60             my %first_contents : ATTR;
61              
62             ###############################################################################
63             #
64             # Sub Name: BUILD
65             #
66             # Description: Check for an "agent" argument, and get the default agent
67             # if there is none. Also assign "request_args", as it does
68             # not have an explicit settor. Lastly, default "contents" to
69             # an empty array-ref.
70             #
71             # Arguments: NAME IN/OUT TYPE DESCRIPTION
72             # $self in ref Object
73             # $id in scalar The unique ID for the object
74             # $args in hashref The arguments passed to new()
75             #
76             # Globals: %agent
77             # %request_args
78             #
79             # Returns: Success: $self
80             # Failure: throws Error::Simple
81             #
82             ###############################################################################
83             sub BUILD
84             {
85 0     0 0   my ($self, $id, $args) = @_;
86              
87 0 0 0       throw Error::Simple("Argument 'contents' cannot be null")
88             unless ($args->{contents} and
89             (ref($args->{contents}) eq 'ARRAY'));
90 0 0 0       throw Error::Simple("Argument 'request_args' cannot be null")
91             unless ($args->{request_args} and
92             (ref($args->{request_args}) eq 'HASH'));
93             # Copy the args to the local attribute store, making sure to deep-copy any
94             # array-refs.
95 0           $request_args{$id} = {};
96 0           for (keys %{$args->{request_args}})
  0            
97             {
98 0 0         if (ref $args->{request_args}->{$_})
99             {
100 0           $request_args{$id}->{$_} = [ @{$args->{request_args}->{$_}} ];
  0            
101             }
102             else
103             {
104 0           $request_args{$id}->{$_} = $args->{request_args}->{$_};
105             }
106             }
107              
108 0 0         $args->{agent} = WebService::ISBNDB::API->get_default_agent()
109             unless $args->{agent};
110 0           $first_contents{$id} = [ @{$args->{contents}} ];
  0            
111              
112 0           $self;
113             }
114              
115             ###############################################################################
116             #
117             # Sub Name: first
118             #
119             # Description: Return the first element in the list of results for this
120             # iterator.
121             #
122             # Arguments: NAME IN/OUT TYPE DESCRIPTION
123             # $self in ref Object
124             #
125             # Globals: %contents
126             #
127             # Returns: Success: Object instance or undef
128             #
129             ###############################################################################
130             sub first
131             {
132 0     0 1   my $self = shift;
133              
134 0           $first_contents{ident $self}->[0];
135             }
136              
137             ###############################################################################
138             #
139             # Sub Name: next
140             #
141             # Description: Return the next element in the list, or undef if the
142             # iterator is exhausted. If the list is at the end, but there
143             # are pages yet to be fetched, get the next page.
144             #
145             # Arguments: NAME IN/OUT TYPE DESCRIPTION
146             # $self in ref Object
147             #
148             # Returns: Success: Object or undef
149             # Failure: throws Error::Simple
150             #
151             ###############################################################################
152             sub next
153             {
154 0     0 1   my $self = shift;
155              
156 0           my $index = $self->get_index;
157 0           my $contents = $self->get_contents;
158 0           my $total = $self->get_total_results;
159 0           my $page_size = $self->get_page_size;
160 0           my $retval;
161              
162 0 0 0       if ($index and ($index % $page_size == 0))
    0          
163             {
164             # We've gone past our internal cache, but there are still pages to be
165             # fetched from isbndb.com.
166 0           $self->fetch_next_page;
167             # Because the previous method changed the internal contents list, and
168             # $contents points to the same list, the next statement is perfectly
169             # fine.
170 0           $retval = $contents->[$index++ % $page_size];
171             }
172             elsif ($index % $page_size <= $#$contents)
173             {
174             # We still have enough data held internally in @$contents
175 0           $retval = $contents->[$index++ % $page_size];
176             }
177             else
178             {
179             # The iterator is out of elements.
180 0           $retval = undef;
181             }
182              
183             # Set new index
184 0           $self->set_index($index);
185              
186 0           $retval;
187             }
188              
189             ###############################################################################
190             #
191             # Sub Name: all
192             #
193             # Description: Return a list or list-reference of all the elements in this
194             # iterator. Leaves the iterator in an exhausted state, but
195             # always starts from the beginning (via a call to reset())
196             # regardless of where the iterator was before this call.
197             #
198             # Arguments: NAME IN/OUT TYPE DESCRIPTION
199             # $self in ref Object
200             #
201             # Returns: Success: list or list-reference, depending on wantarray
202             # Failure: throws Error::Simple
203             #
204             ###############################################################################
205             sub all
206             {
207 0     0 1   my $self = shift;
208              
209 0           my @all;
210 0           $self->reset;
211 0           while ($_ = $self->next)
212             {
213 0           push(@all, $_);
214             }
215              
216 0 0         return wantarray ? @all : \@all;
217             }
218              
219             ###############################################################################
220             #
221             # Sub Name: reset
222             #
223             # Description: Reset the internal index back to zero, so that the next
224             # bump of the iterator starts over at the beginning.
225             #
226             # Arguments: NAME IN/OUT TYPE DESCRIPTION
227             # $self in ref Object
228             #
229             # Returns: 0
230             #
231             ###############################################################################
232             sub reset
233             {
234 0     0 1   my $self = shift;
235              
236 0           my $contents = $self->get_contents; # Returns the actual list-reference
237             # Explicitly overwrite any current contents with the initial set
238 0           @$contents = @{$first_contents{ident $self}};
  0            
239 0           $self->set_page_number(1);
240 0           $self->set_index(0);
241              
242 0           0;
243             }
244              
245             ###############################################################################
246             #
247             # Sub Name: fetch_next_page
248             #
249             # Description: Retrieve the next page of results from the service and
250             # tack them on to the end of the contents list.
251             #
252             # Arguments: NAME IN/OUT TYPE DESCRIPTION
253             # $self in ref Object
254             #
255             # Returns: Success: $self
256             # Failure: throws Error::Simple
257             #
258             ###############################################################################
259             sub fetch_next_page
260             {
261 0     0 1   my $self = shift;
262              
263 0           my $agent = $self->get_agent;
264 0           my $req_args = $self->get_request_args;
265 0           my $contents = $self->get_contents;
266 0           my $page_size = $self->get_page_size;
267 0           my $page_num = $self->get_page_number;
268 0           my $shown = $self->get_shown_results;
269              
270             # In theory, we shouldn't get called by next() when we've already read the
271             # last page from the source. However, it's better to be safe.
272 0 0         return $self if (($self->get_index + 1) == $self->get_total_results);
273              
274 0           my %args = %$req_args;
275 0           $args{page_number} = ++$page_num;
276              
277 0 0         if (ref(my $hook = $self->get_fetch_page_hook) eq 'CODE')
278             {
279 0           eval { $hook->($self, \%args); };
  0            
280 0 0         throw Error::Simple("Error invoking fetch-page hook: $@") if $@;
281             }
282              
283 0           my $newcontent = $agent->request_all($contents->[$#$contents], \%args);
284             # If the request failed, it already threw an uncaught exception
285 0           @$contents = @{$newcontent->get_contents}; # Overwrite @$contents
  0            
286             # Update the tracking values
287 0           $self->set_page_number($page_num);
288 0           $self->set_shown_results($newcontent->get_shown_results);
289              
290 0           $self;
291             }
292              
293             1;
294              
295             =pod
296              
297             =head1 NAME
298              
299             WebService::ISBNDB::Iterator - Iterator class for large result-sets
300              
301             =head1 SYNOPSIS
302              
303             # The search() method of API-derived classes returns an Iterator
304             $iter = WebService::ISBNDB::API->search(Books =>
305             { author =>
306             'poe_edgar_allan' });
307              
308             print $iter->get_total_results, " books found.\n";
309             while ($book = $iter->next)
310             {
311             print $book->get_title, "\n";
312             }
313              
314             # Reset the iterator
315             $iter->reset;
316              
317             # Do something else with all the elements found by the search
318             for ($iter->all)
319             {
320             ...
321             }
322              
323             =head1 DESCRIPTION
324              
325             This class provides an iterator object to abstract the results from a search.
326             Searches may return anywhere from no matches to thousands. Besides the fact
327             that trying to allocate all of that data at once could overwhelm system
328             memory, the B service returns data in "pages", rather than risk
329             sending an overwhelming response.
330              
331             The iterator stores information about the initial request, and as the user
332             progresses past the in-memory slice of data, it makes subsequent requests
333             behind the scenes to refresh the data until the end of the results-set is
334             reached.
335              
336             It is not expected that users will manually create iterators. Iterators will
337             be created as needed by the C method in the API classes.
338              
339             =head1 METHODS
340              
341             Methods are broken in the following groups:
342              
343             =head2 Constructor
344              
345             =over 4
346              
347             =item new($ARGS)
348              
349             The constructor is based on the B model. The argument it takes is
350             a hash-reference whose key/value pairs are attribute names and values. The
351             attributes are defined below, in L.
352              
353             The only I attributes in the arguments list are C and
354             C. The first
355             is the set of arguments used in the initial request made to the service. They
356             are reused when subsequent pages need to be fetched. The second is the initial
357             set of objects, fetched from the first page of results.
358              
359             =back
360              
361             =head2 Iterator Methods
362              
363             These methods are the general-use interface between the user and the iterator.
364             In most cases, an application will only need to use the methods listed here:
365              
366             =over 4
367              
368             =item first
369              
370             Return the first element in the results-set. Regardless of the current position
371             within the iterator, this is always the very first element (or C, if
372             there were no elements found by the search). This does not alter the position
373             of the internal pointer, or trigger any additional requests to the data
374             source.
375              
376             =item next
377              
378             Return the next element off the iterator, or C if the iterator is
379             exhausted. All elements returned by an iterator descend from
380             B. All elements in a given iterator will always be
381             from the same implementation class. The iterator does not explicitly
382             identify the class of the objects, since the application had to have had some
383             degree of knowledge before making the call to C.
384              
385             =item all
386              
387             Returns the full set of results from the iterator, from the beginning
388             to the end (if the iterator has already been read some number of times, it
389             is reset before the list is constructed). The return value is the list of
390             elements when called in a list-context, or a list-reference of the elements
391             when called in a scalar context. The iterator will be in an exhausted state
392             after this returns.
393              
394             =item reset
395              
396             Resets the internal counter within the iterator to the beginning of the list.
397             This allows the iterator to be re-used when and if the user desires.
398              
399             =item fetch_next_page
400              
401             When a request (via next()) goes past the internal set of data, this method is
402             called to request the next page of results from the data source, until the
403             last page has been read. This method alters the C, C
404             and C attributes. If the user has set a hook (via
405             set_fetch_page_hook()), it is called with the arguments for the request just
406             prior to the request itself. The arguments are those provided in the
407             C attribute, plus a C argument set to the page
408             that is being requested.
409              
410             =back
411              
412             =head2 Accessor Methods
413              
414             The accessor methods provide access to the internal attributes of the object.
415             These attributes are:
416              
417             =over 4
418              
419             =item total_results
420              
421             The total number of results in the result-set, not to be confused with the
422             number of results currently in memory.
423              
424             =item page_size
425              
426             The size of the "page" returned by the data source, in turn the maximum
427             number of elements held internally by the iterator at any given time. As the
428             index proceeds to the end of the in-memory list, a new page is fetched and
429             this many new elements replace the previous set internally.
430              
431             =item page_number
432              
433             The number of the page of results currently held within the iterator. When the
434             iterator fetches a new page, this is incremented. When the iterator is reset,
435             this is set to 1.
436              
437             =item shown_results
438              
439             The number of results currently held within the iterator. When the last page
440             of a results-set is fetched, it may have fewer than C elements in
441             it. This attribute will always identify the number of elements currently kept
442             internally.
443              
444             =item contents
445              
446             The list reference used internally to store the current set of objects for
447             the page of results held by the iterator. Be careful with this value, as
448             changing its contents can change the internal state of the iterator.
449              
450             =item request_args
451              
452             The hash reference that stores the original request arguments used to fetch
453             the initial page of data from the data source. This is used to make any
454             additional requests for subsequent pages, as needed. Be careful with the
455             value, as changing its contents can affect the iterator's ability to fetch
456             further pages.
457              
458             =item index
459              
460             The integer value that marks the current position within the iterator. The
461             value is the position within the whole set of results, not just within the
462             single page held internally.
463              
464             =item agent
465              
466             The B instance that is used to fetch additional
467             pages as needed. It is generally set at object-construction time by the
468             API object that creates the iterator. If it is not specified in the
469             constructor, the C method of B
470             is called.
471              
472             =item fetch_page_hook
473              
474             If this attribute has a value that is a code-reference, the code-reference
475             is invoked with the arguments that are going to be passed to the C
476             method of the C. The hook (or callback) will receive the iterator
477             object referent and the hash-reference of arguments, as if it had been called
478             as a method in this class. The arguments are those stored in C
479             as well as one additional argument, C, containing the number of
480             the page being requested.
481              
482             Note that the hook will B be called for the first page fetched from the
483             data source. That is because that fetch is done outside the scope of the
484             iterator class, and the data from that initial fetch is provided when the
485             iterator is constructed.
486              
487             =back
488              
489             Note that for most of the attributes, only the "get" accessor is documented.
490             Users should not need to manually set any of the attributes (except for
491             C) unless they are sub-classing this class:
492              
493             =over 4
494              
495             =item get_total_results
496              
497             =item get_page_size
498              
499             =item get_page_number
500              
501             =item get_shown_results
502              
503             =item get_contents
504              
505             =item get_request_args
506              
507             =item get_index
508              
509             =item get_agent
510              
511             Return the relevant attribute's value. Note, again, that get_contents() and
512             get_request_args() return the actual reference value used internally. Changes
513             to the contents of those reference values may impact the behavior of the
514             iterator itself.
515              
516             =item set_fetch_page_hook($HOOK)
517              
518             Set a hook (callback) to be called each time the iterator has to fetch a new
519             page from the data source. The value is a code-reference, and is called with
520             the iterator object and a hash-reference of the request arguments as
521             parameters. Any return value is ignored. If the hook dies, an exception
522             is thrown by fetch_next_page() with the error message.
523              
524             =item get_fetch_page_hook
525              
526             Get the current hook value, if any.
527              
528             =back
529              
530             =head1 SEE ALSO
531              
532             L, L
533              
534             =head1 AUTHOR
535              
536             Randy J. Ray Erjray@blackperl.comE
537              
538             =head1 LICENSE
539              
540             This module and the code within are
541             released under the terms of the Artistic License 2.0
542             (http://www.opensource.org/licenses/artistic-license-2.0.php). This
543             code may be redistributed under either the Artistic License or the GNU
544             Lesser General Public License (LGPL) version 2.1
545             (http://www.opensource.org/licenses/lgpl-license.php).
546              
547             =cut