File Coverage

blib/lib/DataLoader.pm
Criterion Covered Total %
statement 137 137 100.0
branch 50 50 100.0
condition 34 35 100.0
subroutine 27 27 100.0
pod 8 8 100.0
total 256 257 100.0


line stmt bran cond sub pod time code
1             package DataLoader;
2              
3             =encoding utf8
4              
5             =head1 NAME
6              
7             DataLoader - automatically batch and cache repeated data loads
8              
9             =head1 SYNOPSIS
10              
11             use DataLoader;
12             my $user_loader = DataLoader->new(sub {
13             my @user_ids = @_;
14             return getUsers(@user_ids); # a Mojo::Promise
15             });
16              
17             # Now fetch your data whenever (asynchronously)
18             my $data = Mojo::Promise->all(
19             $loader->load(1),
20             $loader->load(2),
21             $loader->load(2),
22             );
23              
24             # getUsers is called only once - with (1,2)
25              
26             =head1 DESCRIPTION
27              
28             L is a generic utility to be used as part of your application's data
29             fetching layer. It provides a consistent API over various backends and reduces requests
30             to those backends via automatic batching and caching of data.
31              
32             It is primarily useful for GraphQL APIs where each resolver independently requests
33             the object(s) it wants, then this loader can ensure requests are batched together and
34             not repeated multiple times.
35              
36             It is a port of the JavaScript version available at L.
37              
38             =head2 Batching
39              
40             To get started, create a batch loading function that maps a list of keys (typically
41             strings/integers) to a L that returns a list of values.
42              
43             my $user_loader = DataLoader->new(\&myBatchGetUsers);
44              
45             Then load individual values from the loader. All individual loads that occur within a
46             single tick of the event loop will be batched together.
47              
48             $user_loader->load(1)
49             ->then(fun($user) { $user_loader->load($user->invitedById) })
50             ->then(fun($invitedBy) { say "User 1 was invited by ", $invitedBy->name });
51            
52             # Somewhere else in the application
53             $user_loader->load(2)
54             ->then(fun($user) { $user_loader->load($user->lastInvitedId) })
55             ->then(fun($lastInvited) { say "User 2 last invited ", $lastInvited->name });
56              
57             A naive application may have issued four round-trips to the backend for the required
58             information, but with DataLoader this application will make at most two.
59              
60             =head3 Batch function
61              
62             The batch loading function takes a list of keys as input, and returns a L
63             that resolves to a list of values. The ordering of the values should correspond to the
64             ordering of the keys, with any missing values filled in with C. For example, if
65             the input is C<(2,9,6,1)> and the backend service (e.g. database) returns:
66              
67             { id => 9, name => 'Chicago' }
68             { id => 1, name => 'New York' }
69             { id => 2, name => 'San Francisco' }
70              
71             The backend has returned results in a different order than we requested, and omitted a
72             result for key C<6>, presumably because no value exists for that key.
73              
74             We need to re-order these results to match the original input C<(2,9,6,1)>, and include
75             an undef result for C<6>:
76              
77             [
78             { id => 2, name => 'San Francisco' },
79             { id => 9, name => 'Chicago' },
80             undef,
81             { id => 1, name => 'New York' },
82             ]
83              
84             There are two typical error cases in the batch loading function. One is you get an error
85             that invalidates the whole batch, for example you do a DB query for all input rows, and
86             the DB fails to connect. In this case, simply C and the error will be passed through
87             to all callers that are waiting for values included in this batch. In this case, the error
88             is assumed to be transient, and nothing will be cached.
89              
90             The second case is where some of the batch succeeds but some fails. In this case, use
91             C<< DataLoader->error >> to create error objects, and mix them in with the successful
92             values:
93              
94             [
95             { id => 2, name => 'San Francisco' }, # this succeeded
96             DataLoader->error("no permission"), # this failed (id 9)
97             undef, # this item is missing (id 6)
98             { id => 1, name => 'New York' }, # this succeeded
99             ]
100              
101             Now callers that have called C<< load->(9) >> will get an exception. Callers for id 6
102             will receive C and callers for ids 1 and 2 will get hashrefs of data. Additionally,
103             these errors will be cached (see 'Caching Errors' below).
104              
105             =head2 Caching
106              
107             DataLoader provides a simple memoization cache for all loads that occur within a single
108             request for your application. Multiple loads for the same value result in only one
109             backend request, and additionally, the same object in memory is returned each time,
110             reducing memory use.
111              
112             my $user_loader = DataLoader->new(...);
113             my $promise1a = $user_loader->load(1);
114             my $promise1b = $user_loader->load(1);
115             is( refaddr($promise1a), refaddr($promise1b) ); # same object
116              
117             =head3 Caching per-Request
118              
119             The suggested way to use DataLoader is to create a new loader when a request (for example
120             GraphQL request) begins, and destroy it once the request ends. This prevents duplicate
121             backend operations and provides a consistent view of data across the request.
122              
123             Using the same loader for multiple requests is not recommended as it may result in cached
124             data being returned unexpectedly, or sensitive data being leaked to other users who should
125             not be able to view it.
126              
127             The default cache used by DataLoader is a simple hashref that stores all values for all
128             keys loaded during the lifetime of the request; it is useful when request lifetime is
129             short. If other behaviour is desired, see the C constructor parameter.
130              
131             =head3 Clearing Cache
132              
133             It is sometimes necessary to clear values from the cache, for example after running an
134             SQL UPDATE or similar, to prevent out of date values from being used. This can be done
135             with the C method.
136              
137             =head3 Caching Errors
138              
139             If the batch load fails (throws an exception or returns a rejected Promise), the requested
140             values will not be cached. However, if the batch function returns a C
141             instance for individual value(s), those errors will be cached to avoid frequently loading
142             the same error.
143              
144             If you want to avoid this, you can catch the Promise error and clear the cache immediately
145             afterwards, e.g.
146              
147             $user_loader->load(1)->catch(fun ($error) {
148             if ($should_clear_error) {
149             $user_loader->clear(1);
150             }
151             die $error; # or whatever
152             });
153              
154             =head3 Priming the Cache
155              
156             It is also possible to prime the cache with data. For example if you fetch a user by ID,
157             you could also prime a username-based cache:
158              
159             $user_by_id->load(1)->then(fun ($user) {
160             $user_by_name->prime($user->name, $user);
161             ...
162             });
163              
164             If your backend query includes additional data, you could cache that too:
165              
166             for my $tag (@{$user->tags}) {
167             $tag_loader->prime($tag->id, $tag->name);
168             }
169              
170             If you update a value in the backend, you can update the cache to save queries later:
171              
172             $user = $user->update(favourite_color => 'red');
173             $user_cache->clear($user->id)->prime($user->id, $user);
174              
175             =head2 Using outside of GraphQL
176              
177             L assumes the use of L, specifically its promise implementation
178             L. The L backend is recommended (and is automatically
179             used provided you have L installed) for optimal batching, although other backends will
180             also work.
181              
182             With the EV backend, DataLoader will work fine with any L-based code. See the
183             unit tests of this module for examples.
184              
185             It would be possible to write a version of DataLoader that depends only on AnyEvent/EV
186             and does not depend on Mojolicious. Let me know if there is interest.
187              
188             =head1 METHODS
189              
190             =over
191              
192             =cut
193              
194 10     10   851884 use v5.14;
  10         119  
195 10     10   53 use warnings;
  10         19  
  10         283  
196              
197 10     10   45 use Carp qw(croak);
  10         38  
  10         508  
198 10     10   4791 use Data::Dump qw(dump);
  10         52488  
  10         592  
199 10     10   4840 use Mojo::IOLoop;
  10         3327255  
  10         72  
200 10     10   506 use Mojo::Promise;
  10         25  
  10         115  
201 10     10   358 use Scalar::Util qw(blessed);
  10         21  
  10         474  
202              
203 10     10   4580 use DataLoader::Error;
  10         33  
  10         16273  
204              
205             our $VERSION = '0.02';
206              
207             =item new ( batch_load_function, %options )
208              
209             Creates a public API for loading data from a particular back-end with unique keys,
210             such as the C column of an SQL table. You must provide a batch loading function
211             (described above).
212              
213             Each instance gets, by default, a unique memoized cache of all loads made during the
214             lifetime of the object. Consider a different cache for long-lived applications, and
215             consider a new instance per request if each request has users with different access
216             permissions or where fresh data is desired for each request.
217              
218             Options:
219              
220             =over
221              
222             =item batch (true)
223              
224             Set to false to disable batching: the batch load function will be invoked once for
225             each key.
226              
227             =item max_batch_size (Infinity)
228              
229             If set, limit the maximum number of items to pass to the batch load function at once.
230              
231             If unset (undef or missing), there will be no limit.
232              
233             =item cache (true)
234              
235             Set to false to disable caching, which will create a new Promise and new key in the
236             batch load function for every load of the same key. (This means the batch loader function
237             may be called with duplicate keys).
238              
239             =item cache_key_func (identity function)
240              
241             Maps a load key C<$_> to a cache key. Useful when using objects as keys and two
242             different objects should be considered equivalent, or to handle case-
243             insensitivity, etc.
244              
245             For example: C<< cache_key_func => sub { lc } >> for case-insensitive comparisons
246              
247             Compare objects as long as their id is the same:
248              
249             ... cache_key_func => sub { $_->{id} }
250              
251             Compare the content of objects:
252              
253             use Storable;
254             ... cache_key_func => sub { thaw($_) }
255              
256             =item cache_hashref ({})
257              
258             Pass a custom hashref for caching. You can tie this hashref to any tie module to get
259             custom behaviour (such as LRU). (L support will be considered if there is interest)
260              
261             =back
262              
263             =cut
264              
265             sub new {
266 52     52 1 279694 my ($class, $batch_load_func, %opts) = @_;
267              
268 52   100     326 my $do_batch = delete $opts{batch} // 1;
269 52   100     189 my $max_batch_size = delete $opts{max_batch_size} // undef;
270 52   100     160 my $do_cache = delete $opts{cache} // 1;
271 52   100     164 my $cache_key_func = delete $opts{cache_key_func} // undef;
272 52   100     157 my $cache_map = delete $opts{cache_hashref} // {};
273              
274 52 100       150 if (keys %opts) {
275 1         14 croak "unknown option " . join(', ', sort keys %opts);
276             }
277              
278 51 100 100     219 if ((ref $batch_load_func || '') ne 'CODE') {
279 2         5 croak "batch_load_func must be a function that accepts a list of keys"
280             . " and returns a Mojo::Promise resolving to a list of values, but"
281             . " got: " . dump($batch_load_func);
282             }
283 49 100 100     143 if (defined $cache_key_func && (ref $cache_key_func || '') ne 'CODE') {
      100        
284 2         16 croak "cache_key_func must be a function that returns the cache key for key=\$_";
285             }
286 47 100 100     188 if (!ref $cache_map || ref $cache_map ne 'HASH') {
287 2         25 croak "cache_hashref must be a HASH ref (tied or plain)";
288             }
289 45 100       93 if (defined $max_batch_size) {
290 4 100       31 $max_batch_size =~ /^\d+$/ or croak "max_batch_size must be a positive integer";
291 3 100       17 $max_batch_size > 0 or croak "max_batch_size cannot be zero";
292             }
293              
294 43         302 return bless {
295             batch_load_func => $batch_load_func,
296             do_batch => $do_batch,
297             max_batch_size => $max_batch_size,
298             do_cache => $do_cache,
299             cache_key_func => $cache_key_func,
300             promise_cache => $cache_map,
301             queue => [],
302             }, $class;
303             }
304              
305             =item load ( key )
306              
307             Loads a key, returning a L for the value represented by that key.
308              
309             =cut
310              
311             # False-positive on Perl::Critic < 1.119 due to @_ size tests
312             ## no critic (RequireArgUnpacking)
313             sub load {
314 132     132 1 59291 my ($self, $key) = @_;
315              
316 132 100       350 @_ >= 2 or croak "load: key is required";
317 131 100       259 defined $key or croak "load: key must be defined";
318 130 100       262 @_ == 2 or croak "load: too many arguments, expected 1";
319              
320 129         267 my $cache_key = $self->_cache_key($key);
321              
322             # If caching and there is a cache-hit, return cached promise
323 129 100 100     585 if ($self->{do_cache} && (my $promise = $self->{promise_cache}{$cache_key})) {
324 27         134 return $promise;
325             }
326              
327             # Otherwise, produce a new Promise for this value
328 102         393 my $promise = Mojo::Promise->new;
329              
330             # JS code calls new Promise((resolve, reject) => ...) with the below code
331             # but this should be equivalent.
332 102         3351 push @{$self->{queue}}, [$key, $promise];
  102         262  
333              
334             # Determine if a dispatch of this queue should be scheduled.
335             # A single dispatch should be scheduled per queue at the time when the queue
336             # changes from 'empty' to 'full'
337 102 100       177 if (@{$self->{queue}} == 1) {
  102         278  
338 63 100       148 if ($self->{do_batch}) {
339             # Schedule next tick, to allow all batch calls this frame to be batched
340             # together.
341              
342             # We prefer an idle watcher as it will execute after all Promises are
343             # resolved (batching as much as possible). But Mojo::IOLoop's API does
344             # not provide this. And we cannot assume AnyEvent can be used.
345             # The best we can do is detect the EV backend and use EV::idle.
346 60 100       245 if (Mojo::IOLoop->singleton->reactor->isa('Mojo::Reactor::EV')) {
347             # Capture the lexical inside the coderef to keep it alive until
348             # the callback is finished.
349 59         635 my $w; $w = EV::idle(sub {
350 59     59   17686 $self->_dispatch_queue;
351 59         7988 undef $w;
352 59         399 });
353             }
354             else {
355             # We fall back to next_tick, which is less efficient.
356 1     1   33 Mojo::IOLoop->next_tick(sub { $self->_dispatch_queue });
  1         797  
357             }
358             }
359             else {
360             # Dispatch immediately
361 3         8 $self->_dispatch_queue;
362             }
363             }
364              
365             # If caching, cache this cv
366 102 100       876 if ($self->{do_cache}) {
367 80         237 $self->{promise_cache}{$cache_key} = $promise;
368             }
369              
370 102         440 return $promise;
371             }
372              
373             =item load_many ( @keys )
374              
375             Loads multiple keys, returning a Promise that resolves a list of values.
376              
377             Equivalent to C<< DataLoader->all(map { $loader->load($_) } @keys) >>.
378              
379             =cut
380              
381             sub load_many {
382 7     7 1 19581 my ($self, @keys) = @_;
383              
384 7         20 return $self->all(map { $self->load($_) } @keys);
  14         25  
385             }
386              
387             =item clear ( key )
388              
389             Clear the value at C from the cache, if it exists. Returns itself for method
390             chaining.
391              
392             =cut
393              
394             sub clear {
395 17     17 1 26588 my ($self, $key) = @_;
396              
397 17         42 my $cache_key = $self->_cache_key($key);
398 17         61 delete $self->{promise_cache}{$cache_key};
399              
400 17         76 return $self;
401             }
402              
403             =item clear_all ()
404              
405             Clears the entire cache. To be used when some event results in unknown invalidations
406             across this particular L. Returns itself for method chaining.
407              
408             =cut
409              
410             sub clear_all {
411 4     4 1 5530 my ($self) = @_;
412              
413 4         10 %{$self->{promise_cache}} = ();
  4         22  
414              
415 4         19 return $self;
416             }
417              
418             =item prime ( key, value )
419              
420             Primes the cache with the provided key and value. If the key already exists, no
421             change is made. (To forcefully prime the cache, clear the key first with
422             C<< $loader->clear($key)->prime($key, $value) >>.) Returns itself for method chaining.
423              
424             If you want to prime an error value, use C<< DataLoader->error($message) >> as the
425             second argument.
426              
427             =cut
428              
429             sub prime {
430 11     11 1 9199 my ($self, $key, $value) = @_;
431              
432 11         26 my $cache_key = $self->_cache_key($key);
433              
434             # (Test coverage) There is no situation where the cache is unprimed AND we
435             # fail to populate it with a Promise, so mark uncoverable.
436             # uncoverable condition false
437 11 100 66     57 $self->{promise_cache}{$cache_key} //= (
438             _is_error_object($value) ? Mojo::Promise->reject($value->message)
439             : Mojo::Promise->resolve($value)
440             );
441              
442 11         734 return $self;
443             }
444              
445             =item DataLoader->error( @message )
446              
447             Shorthand for C<< DataLoader::Error->new(@message) >>. Should be used by the batch
448             loading function to indicate particular items of data that could not be loaded. The
449             error will be propogated to the C caller(s) for the data. Can also be used
450             with C.
451              
452             =cut
453              
454             sub error {
455 7     7 1 83 my ($class, @data) = @_;
456 7         28 return DataLoader::Error->new(@data);
457             }
458              
459             =item DataLoader->all( @promises )
460              
461             Alternative to Mojo::Promise's C that assumes all promises return a single
462             argument only, and will return a list of single return values for all promises,
463             in the same order as the promises.
464              
465             For example:
466              
467             Mojo::Promise->all( Mojo::Promise->resolve(1), Mojo::Promise->resolve(2) );
468              
469             resolves to C<[[1], [2]]>, but:
470              
471             DataLoader->all( Mojo::Promise->resolve(1), Mojo::Promise->resolve(2) );
472              
473             resolves to C<[1, 2]>.
474              
475             Additionally, C<< Mojo::Promise->all() >> will die with "unable to call 'clone' on
476             undefined value" (or similar), while C<< DataLoader->all() >> returns a Promise that
477             resolves to the empty list.
478              
479             Throws an exception if any promise passed as an argument resolves to a list of more
480             than one return value.
481              
482             =cut
483              
484             sub all {
485 33     33 1 23937 my ($class, @promises) = @_;
486              
487 33 100       91 if (!@promises) {
488 3         16 return Mojo::Promise->resolve();
489             }
490             else {
491 30         108 my $all = $promises[0]->clone;
492 30         1186 my @results;
493 30         45 my $remaining = @promises;
494 30         107 for my $i (0..$#promises) {
495             $promises[$i]->then(
496             sub {
497             # Only consider first argument
498 78 100   78   19885 @_ > 1 && $all->reject("all: got promise with multiple return values");
499 78         254 $results[$i] = $_[0];
500 78 100       295 $all->resolve(@results) if --$remaining <= 0;
501             },
502 1     1   132 sub { $all->reject(@_) }
503 79         3108 );
504             }
505 30         1795 return $all;
506             }
507             }
508              
509             =back
510              
511             =cut
512              
513             # ---------------
514             # Private methods
515             # ---------------
516              
517             # Schedule a data load for all items in the queue, splitting up if needed.
518             # The schedule is async; no return value.
519             sub _dispatch_queue {
520 63     63   112 my $self = shift;
521              
522 63         91 my @queue = @{$self->{queue}};
  63         186  
523 63         167 $self->{queue} = [];
524              
525 63         115 my $max_batch_size = $self->{max_batch_size};
526 63 100 100     210 if ($max_batch_size && @queue > $max_batch_size) {
527             # Need to split the queue into multiple batches
528 1         9 while(my @batch = splice @queue, 0, $max_batch_size) {
529 2         141 $self->_dispatch_queue_batch(@batch);
530             }
531             }
532             else {
533 62         158 $self->_dispatch_queue_batch(@queue);
534             }
535             }
536              
537             # Schedule a data load for a batch of items. Returns nothing.
538             sub _dispatch_queue_batch {
539 64     64   151 my ($self, @queue) = @_;
540              
541 64         126 my @keys = map { $_->[0] } @queue;
  102         263  
542              
543             # Actually schedule the data load
544 64         110 my $batch_promise = eval { $self->{batch_load_func}->(@keys) };
  64         194  
545 64 100 100     9336 if ($@) {
    100          
546 1         5 return $self->_failed_dispatch(\@queue, $@);
547             }
548             elsif (!$batch_promise || !blessed $batch_promise || !$batch_promise->can('then')) {
549 3         9 return $self->_failed_dispatch(\@queue,
550             "DataLoader batch function did not return a Promise!");
551             }
552              
553             # Await the resolution of the call of batch_load_func
554             $batch_promise->then(sub {
555 58     58   65991 my @values = @_;
556              
557 58 100       215 if (@values != @keys) {
558 1         9 die "DataLoader batch function returned the wrong number of keys:"
559             . " returned " . @values . ", expected " . @keys . "\n"
560             . "values: " . dump(@values) . "\n"
561             . "keys: " . dump(@keys) . "\n";
562             }
563              
564             # Step through each value, resolving or rejecting each Promise
565 57         215 for my $i (0..$#queue) {
566 94         3068 my (undef, $promise) = @{$queue[$i]};
  94         204  
567 94         173 my $value = $values[$i];
568 94 100       215 if (_is_error_object($value)) {
569 5         18 $promise->reject($value->message);
570             }
571             else {
572 89         251 $promise->resolve($value);
573             }
574             }
575             })->catch(sub {
576 3     3   2957 my $error = shift;
577 3         11 $self->_failed_dispatch(\@queue, $error);
578 60         438 });
579             }
580              
581             # Called when a batch fails. Clear all items from the queue (so we don't cache the error
582             # response) and reject the Promise so callers get an exception.
583             sub _failed_dispatch {
584 7     7   16 my ($self, $queue, $error) = @_;
585 7         14 for my $job (@$queue) {
586 8         92 my ($key, $promise) = @$job;
587 8         22 $self->clear($key);
588 8         22 $promise->reject($error);
589             }
590             }
591              
592             # Indicates if the value is a dataloader error object.
593             sub _is_error_object {
594 102     102   172 my ($object) = @_;
595 102   100     534 return blessed($object) && $object->isa('DataLoader::Error');
596             }
597              
598             # Returns the cache_key for a key
599             sub _cache_key {
600 157     157   311 my ($self, $key) = @_;
601 157 100       450 return $key if !defined $self->{cache_key_func};
602 9         15 return do { local $_ = $key; $self->{cache_key_func}->() };
  9         14  
  9         23  
603             }
604              
605             1;