File Coverage

blib/lib/DataLoader.pm
Criterion Covered Total %
statement 135 137 98.5
branch 49 50 98.0
condition 34 35 100.0
subroutine 26 27 96.3
pod 8 8 100.0
total 252 257 98.4


line stmt bran cond sub pod time code
1             package DataLoader;
2              
3             =encoding utf8
4              
5             =head1 NAME
6              
7             DataLoader - automatically batch and cache repeated data loads
8              
9             =head1 SYNOPSIS
10              
11             use DataLoader;
12             my $user_loader = DataLoader->new(sub {
13             my @user_ids = @_;
14             return getUsers(@user_ids); # a Mojo::Promise
15             });
16              
17             # Now fetch your data whenever (asynchronously)
18             my $data = Mojo::Promise->all(
19             $loader->load(1),
20             $loader->load(2),
21             $loader->load(2),
22             );
23              
24             # getUsers is called only once - with (1,2)
25              
26             =head1 DESCRIPTION
27              
28             L is a generic utility to be used as part of your application's data
29             fetching layer. It provides a consistent API over various backends and reduces requests
30             to those backends via automatic batching and caching of data.
31              
32             It is primarily useful for GraphQL APIs where each resolver independently requests
33             the object(s) it wants, then this loader can ensure requests are batched together and
34             not repeated multiple times.
35              
36             It is a port of the JavaScript version available at L.
37              
38             =head2 Batching
39              
40             To get started, create a batch loading function that maps a list of keys (typically
41             strings/integers) to a L that returns a list of values.
42              
43             my $user_loader = DataLoader->new(\&myBatchGetUsers);
44              
45             Then load individual values from the loader. All individual loads that occur within a
46             single tick of the event loop will be batched together.
47              
48             $user_loader->load(1)
49             ->then(fun($user) { $user_loader->load($user->invitedById) })
50             ->then(fun($invitedBy) { say "User 1 was invited by ", $invitedBy->name });
51            
52             # Somewhere else in the application
53             $user_loader->load(2)
54             ->then(fun($user) { $user_loader->load($user->lastInvitedId) })
55             ->then(fun($lastInvited) { say "User 2 last invited ", $lastInvited->name });
56              
57             A naive application may have issued four round-trips to the backend for the required
58             information, but with DataLoader this application will make at most two.
59              
60             =head3 Batch function
61              
62             The batch loading function takes a list of keys as input, and returns a L
63             that resolves to a list of values. The ordering of the values should correspond to the
64             ordering of the keys, with any missing values filled in with C. For example, if
65             the input is C<(2,9,6,1)> and the backend service (e.g. database) returns:
66              
67             { id => 9, name => 'Chicago' }
68             { id => 1, name => 'New York' }
69             { id => 2, name => 'San Francisco' }
70              
71             The backend has returned results in a different order than we requested, and omitted a
72             result for key C<6>, presumably because no value exists for that key.
73              
74             We need to re-order these results to match the original input C<(2,9,6,1)>, and include
75             an undef result for C<6>:
76              
77             [
78             { id => 2, name => 'San Francisco' },
79             { id => 9, name => 'Chicago' },
80             undef,
81             { id => 1, name => 'New York' },
82             ]
83              
84             There are two typical error cases in the batch loading function. One is you get an error
85             that invalidates the whole batch, for example you do a DB query for all input rows, and
86             the DB fails to connect. In this case, simply C and the error will be passed through
87             to all callers that are waiting for values included in this batch. In this case, the error
88             is assumed to be transient, and nothing will be cached.
89              
90             The second case is where some of the batch succeeds but some fails. In this case, use
91             C<< DataLoader->error >> to create error objects, and mix them in with the successful
92             values:
93              
94             [
95             { id => 2, name => 'San Francisco' }, # this succeeded
96             DataLoader->error("no permission"), # this failed (id 9)
97             undef, # this item is missing (id 6)
98             { id => 1, name => 'New York' }, # this succeeded
99             ]
100              
101             Now callers that have called C<< load->(9) >> will get an exception. Callers for id 6
102             will receive C and callers for ids 1 and 2 will get hashrefs of data. Additionally,
103             these errors will be cached (see 'Caching Errors' below).
104              
105             =head2 Caching
106              
107             DataLoader provides a simple memoization cache for all loads that occur within a single
108             request for your application. Multiple loads for the same value result in only one
109             backend request, and additionally, the same object in memory is returned each time,
110             reducing memory use.
111              
112             my $user_loader = DataLoader->new(...);
113             my $promise1a = $user_loader->load(1);
114             my $promise1b = $user_loader->load(1);
115             is( refaddr($promise1a), refaddr($promise1b) ); # same object
116              
117             =head3 Caching per-Request
118              
119             The suggested way to use DataLoader is to create a new loader when a request (for example
120             GraphQL request) begins, and destroy it once the request ends. This prevents duplicate
121             backend operations and provides a consistent view of data across the request.
122              
123             Using the same loader for multiple requests is not recommended as it may result in cached
124             data being returned unexpectedly, or sensitive data being leaked to other users who should
125             not be able to view it.
126              
127             The default cache used by DataLoader is a simple hashref that stores all values for all
128             keys loaded during the lifetime of the request; it is useful when request lifetime is
129             short. If other behaviour is desired, see the C constructor parameter.
130              
131             =head3 Clearing Cache
132              
133             It is sometimes necessary to clear values from the cache, for example after running an
134             SQL UPDATE or similar, to prevent out of date values from being used. This can be done
135             with the C method.
136              
137             =head3 Caching Errors
138              
139             If the batch load fails (throws an exception or returns a rejected Promise), the requested
140             values will not be cached. However, if the batch function returns a C
141             instance for individual value(s), those errors will be cached to avoid frequently loading
142             the same error.
143              
144             If you want to avoid this, you can catch the Promise error and clear the cache immediately
145             afterwards, e.g.
146              
147             $user_loader->load(1)->catch(fun ($error) {
148             if ($should_clear_error) {
149             $user_loader->clear(1);
150             }
151             die $error; # or whatever
152             });
153              
154             =head1 METHODS
155              
156             =over
157              
158             =cut
159              
160 9     9   802938 use v5.14;
  9         171  
161 9     9   53 use warnings;
  9         17  
  9         306  
162              
163 9     9   48 use Carp qw(croak);
  9         35  
  9         514  
164 9     9   4312 use Data::Dump qw(dump);
  9         50024  
  9         680  
165 9     9   4925 use Mojo::IOLoop;
  9         3111267  
  9         80  
166 9     9   478 use Mojo::Promise;
  9         34  
  9         125  
167 9     9   356 use Scalar::Util qw(blessed);
  9         24  
  9         495  
168              
169 9     9   4618 use DataLoader::Error;
  9         26  
  9         14825  
170              
171             our $VERSION = '0.01';
172              
173             =item new ( batch_load_function, %options )
174              
175             Creates a public API for loading data from a particular back-end with unique keys,
176             such as the C column of an SQL table. You must provide a batch loading function
177             (described above).
178              
179             Each instance gets, by default, a unique memoized cache of all loads made during the
180             lifetime of the object. Consider a different cache for long-lived applications, and
181             consider a new instance per request if each request has users with different access
182             permissions or where fresh data is desired for each request.
183              
184             Options:
185              
186             =over
187              
188             =item batch (true)
189              
190             Set to false to disable batching: the batch load function will be invoked once for
191             each key.
192              
193             =item max_batch_size (Infinity)
194              
195             If set, limit the maximum number of items to pass to the batch load function at once.
196              
197             =item cache (true)
198              
199             Set to false to disable caching, which will create a new Promise and new key in the
200             batch load function for every load of the same key. (This means the batch loda function
201             may be called with duplicate keys).
202              
203             =item cache_key_func (identity function)
204              
205             Maps a load key C<$_> to a cache key. Useful when using objects as keys and two
206             different objects should be considered equivalent, or to handle case-
207             insensitivity, etc.
208              
209             For example: C<< cache_key_func => sub { lc } >> for case-insensitive comparisons
210              
211             Compare objects as long as their id is the same:
212              
213             ... cache_key_func => sub { $_->{id} }
214              
215             Compare the content of objects:
216              
217             use Storable;
218             ... cache_key_func => sub { thaw($_) }
219              
220             =item cache_hashref ({})
221              
222             Pass a custom hashref for caching. You can tie this hashref to any tie module to get
223             custom behaviour (such as LRU). (L support will be considered if there is interest)
224              
225             =back
226              
227             =cut
228              
229             sub new {
230 51     51 1 312575 my ($class, $batch_load_func, %opts) = @_;
231              
232 51   100     458 my $do_batch = delete $opts{batch} // 1;
233 51   100     246 my $max_batch_size = delete $opts{max_batch_size} // undef;
234 51   100     224 my $do_cache = delete $opts{cache} // 1;
235 51   100     210 my $cache_key_func = delete $opts{cache_key_func} // undef;
236 51   100     251 my $cache_map = delete $opts{cache_hashref} // {};
237              
238 51 100       193 if (keys %opts) {
239 1         29 croak "unknown option " . join(', ', sort keys %opts);
240             }
241              
242 50 100 100     258 if ((ref $batch_load_func || '') ne 'CODE') {
243 2         15 croak "batch_load_func must be a function that accepts a list of keys"
244             . " and returns a Mojo::Promise resolving to a list of values, but"
245             . " got: " . dump($batch_load_func);
246             }
247 48 100 100     233 if (defined $cache_key_func && (ref $cache_key_func || '') ne 'CODE') {
      100        
248 2         31 croak "cache_key_func must be a function that returns the cache key for key=\$_";
249             }
250 46 100 100     277 if (!ref $cache_map || ref $cache_map ne 'HASH') {
251 2         40 croak "cache_hashref must be a HASH ref (tied or plain)";
252             }
253 44 100       144 if (defined $max_batch_size) {
254 4 100       56 $max_batch_size =~ /^\d+$/ or croak "max_batch_size must be a positive integer";
255 3 100       24 $max_batch_size > 0 or croak "max_batch_size cannot be zero";
256             }
257              
258 42         383 return bless {
259             batch_load_func => $batch_load_func,
260             do_batch => $do_batch,
261             max_batch_size => $max_batch_size,
262             do_cache => $do_cache,
263             cache_key_func => $cache_key_func,
264             promise_cache => $cache_map,
265             queue => [],
266             }, $class;
267             }
268              
269             =item load ( key )
270              
271             Loads a key, returning a L for the value represented by that key.
272              
273             =cut
274              
275             sub load {
276 131     131 1 64246 my ($self, $key) = @_;
277              
278 131 100       415 @_ >= 2 or croak "load: key is required";
279 130 100       292 defined $key or croak "load: key must be defined";
280 129 100       316 @_ == 2 or croak "load: too many arguments, expected 1";
281              
282 128         334 my $cache_key = $self->_cache_key($key);
283              
284             # If caching and there is a cache-hit, return cached promise
285 128 100 100     640 if ($self->{do_cache} && (my $promise = $self->{promise_cache}{$cache_key})) {
286 27         145 return $promise;
287             }
288              
289             # Otherwise, produce a new Promise for this value
290 101         537 my $promise = Mojo::Promise->new;
291              
292             # JS code calls new Promise((resolve, reject) => ...) with the below code
293             # but this should be equivalent.
294 101         3501 push @{$self->{queue}}, [$key, $promise];
  101         280  
295              
296             # Determine if a dispatch of this queue should be scheduled.
297             # A single dispatch should be scheduled per queue at the time when the queue
298             # changes from 'empty' to 'full'
299 101 100       168 if (@{$self->{queue}} == 1) {
  101         294  
300 62 100       176 if ($self->{do_batch}) {
301             # Schedule next tick, to allow all batch calls this frame to be batched
302             # together.
303              
304             # We prefer an idle watcher as it will execute after all Promises are
305             # resolved (batching as much as possible). But Mojo::IOLoop's API does
306             # not provide this. And we cannot assume AnyEvent can be used.
307             # The best we can do is detect the EV backend and use EV::idle.
308 59 50       362 if (Mojo::IOLoop->singleton->reactor->isa('Mojo::Reactor::EV')) {
309             # Capture the lexical inside the coderef to keep it alive until
310             # the callback is finished.
311 59         847 my $w; $w = EV::idle(sub {
312 59     59   19978 $self->_dispatch_queue;
313 59         8446 undef $w;
314 59         550 });
315             }
316             else {
317             # We fall back to next_tick, which is less efficient.
318 0     0   0 Mojo::IOLoop->next_tick(sub { $self->_dispatch_queue });
  0         0  
319             }
320             }
321             else {
322             # Dispatch immediately
323 3         9 $self->_dispatch_queue;
324             }
325             }
326              
327             # If caching, cache this cv
328 101 100       736 if ($self->{do_cache}) {
329 79         226 $self->{promise_cache}{$cache_key} = $promise;
330             }
331              
332 101         416 return $promise;
333             }
334              
335             =item load_many ( @keys )
336              
337             Loads multiple keys, returning a Promise that resolves a list of values.
338              
339             Equivalent to C<< DataLoader->all(map { $loader->load($_) } @keys) >>.
340              
341             =cut
342              
343             sub load_many {
344 7     7 1 21291 my ($self, @keys) = @_;
345              
346 7         49 return $self->all(map { $self->load($_) } @keys);
  14         30  
347             }
348              
349             =item clear ( key )
350              
351             Clear the value at C from the cache, if it exists. Returns itself for method
352             chaining.
353              
354             =cut
355              
356             sub clear {
357 17     17 1 26753 my ($self, $key) = @_;
358              
359 17         65 my $cache_key = $self->_cache_key($key);
360 17         79 delete $self->{promise_cache}{$cache_key};
361              
362 17         45 return $self;
363             }
364              
365             =item clear_all ()
366              
367             Clears the entire cache. To be used when some event results in unknown invalidations
368             across this particular L. Returns itself for method chaining.
369              
370             =cut
371              
372             sub clear_all {
373 4     4 1 6316 my ($self) = @_;
374              
375 4         8 %{$self->{promise_cache}} = ();
  4         31  
376              
377 4         18 return $self;
378             }
379              
380             =item prime ( key, value )
381              
382             Primes the cache with the provided key and value. If the key already exists, no
383             change is made. (To forcefully prime the cache, clear the key first with
384             C<< $loader->clear($key)->prime($key, $value) >>.) Returns itself for method chaining.
385              
386             If you want to prime an error value, use C<< DataLoader->error($message) >> as the
387             second argument.
388              
389             =cut
390              
391             sub prime {
392 11     11 1 9945 my ($self, $key, $value) = @_;
393              
394 11         34 my $cache_key = $self->_cache_key($key);
395              
396             # (Test coverage) There is no situation where the cache is unprimed AND we
397             # fail to populate it with a Promise, so mark uncoverable.
398             # uncoverable condition false
399 11 100 66     70 $self->{promise_cache}{$cache_key} //= (
400             _is_error_object($value) ? Mojo::Promise->reject($value->message)
401             : Mojo::Promise->resolve($value)
402             );
403              
404 11         1130 return $self;
405             }
406              
407             =item DataLoader->error( @message )
408              
409             Shorthand for C<< DataLoader::Error->new(@message) >>. Should be used by the batch
410             loading function to indicate particular items of data that could not be loaded. The
411             error will be propogated to the C caller(s) for the data. Can also be used
412             with C.
413              
414             =cut
415              
416             sub error {
417 7     7 1 86 my ($class, @data) = @_;
418 7         33 return DataLoader::Error->new(@data);
419             }
420              
421             =item DataLoader->all( @promises )
422              
423             Alternative to Mojo::Promise's C that assumes all promises return a single
424             argument only, and will return a list of single return values for all promises,
425             in the same order as the promises.
426              
427             For example:
428              
429             Mojo::Promise->all( Mojo::Promise->resolve(1), Mojo::Promise->resolve(2) );
430              
431             resolves to C<[[1], [2]]>, but:
432              
433             DataLoader->all( Mojo::Promise->resolve(1), Mojo::Promise->resolve(2) );
434              
435             resolves to C<[1, 2]>.
436              
437             Additionally, C<< Mojo::Promise->all() >> will die with "unable to call 'clone' on
438             undefined value" (or similar), while C<< DataLoader->all() >> returns a Promise that
439             resolves to the empty list.
440              
441             Throws an exception if any promise passed as an argument resolves to a list of more
442             than one return value.
443              
444             =cut
445              
446             sub all {
447 33     33 1 25401 my ($class, @promises) = @_;
448              
449 33 100       134 if (!@promises) {
450 3         31 return Mojo::Promise->resolve();
451             }
452             else {
453 30         176 my $all = $promises[0]->clone;
454 30         1339 my @results;
455 30         85 my $remaining = @promises;
456 30         125 for my $i (0..$#promises) {
457             $promises[$i]->then(
458             sub {
459             # Only consider first argument
460 78 100   78   20885 @_ > 1 && $all->reject("all: got promise with multiple return values");
461 78         272 $results[$i] = $_[0];
462 78 100       285 $all->resolve(@results) if --$remaining <= 0;
463             },
464 1     1   160 sub { $all->reject(@_) }
465 79         3592 );
466             }
467 30         1909 return $all;
468             }
469             }
470              
471             =back
472              
473             =cut
474              
475             # ---------------
476             # Private methods
477             # ---------------
478              
479             # Schedule a data load for all items in the queue, splitting up if needed.
480             # The schedule is async; no return value.
481             sub _dispatch_queue {
482 62     62   143 my $self = shift;
483              
484 62         105 my @queue = @{$self->{queue}};
  62         227  
485 62         184 $self->{queue} = [];
486              
487 62         139 my $max_batch_size = $self->{max_batch_size};
488 62 100 100     248 if ($max_batch_size && @queue > $max_batch_size) {
489             # Need to split the queue into multiple batches
490 1         14 while(my @batch = splice @queue, 0, $max_batch_size) {
491 2         145 $self->_dispatch_queue_batch(@batch);
492             }
493             }
494             else {
495 61         178 $self->_dispatch_queue_batch(@queue);
496             }
497             }
498              
499             # Schedule a data load for a batch of items. Returns nothing.
500             sub _dispatch_queue_batch {
501 63     63   183 my ($self, @queue) = @_;
502              
503 63         152 my @keys = map { $_->[0] } @queue;
  101         285  
504              
505             # Actually schedule the data load
506 63         115 my $batch_promise = eval { $self->{batch_load_func}->(@keys) };
  63         226  
507 63 100 100     10525 if ($@) {
    100          
508 1         4 return $self->_failed_dispatch(\@queue, $@);
509             }
510             elsif (!$batch_promise || !blessed $batch_promise || !$batch_promise->can('then')) {
511 3         15 return $self->_failed_dispatch(\@queue,
512             "DataLoader batch function did not return a Promise!");
513             }
514              
515             # Await the resolution of the call of batch_load_func
516             $batch_promise->then(sub {
517 57     57   66085 my @values = @_;
518              
519 57 100       259 if (@values != @keys) {
520 1         15 die "DataLoader batch function returned the wrong number of keys:"
521             . " returned " . @values . ", expected " . @keys . "\n"
522             . "values: " . dump(@values) . "\n"
523             . "keys: " . dump(@keys) . "\n";
524             }
525              
526             # Step through each value, resolving or rejecting each Promise
527 56         246 for my $i (0..$#queue) {
528 93         3205 my (undef, $promise) = @{$queue[$i]};
  93         268  
529 93         168 my $value = $values[$i];
530 93 100       252 if (_is_error_object($value)) {
531 5         19 $promise->reject($value->message);
532             }
533             else {
534 88         271 $promise->resolve($value);
535             }
536             }
537             })->catch(sub {
538 3     3   3078 my $error = shift;
539 3         16 $self->_failed_dispatch(\@queue, $error);
540 59         452 });
541             }
542              
543             # Called when a batch fails. Clear all items from the queue (so we don't cache the error
544             # response) and reject the Promise so callers get an exception.
545             sub _failed_dispatch {
546 7     7   25 my ($self, $queue, $error) = @_;
547 7         22 for my $job (@$queue) {
548 8         70 my ($key, $promise) = @$job;
549 8         29 $self->clear($key);
550 8         27 $promise->reject($error);
551             }
552             }
553              
554             # Indicates if the value is a dataloader error object.
555             sub _is_error_object {
556 101     101   200 my ($object) = @_;
557 101   100     560 return blessed($object) && $object->isa('DataLoader::Error');
558             }
559              
560             # Returns the cache_key for a key
561             sub _cache_key {
562 156     156   300 my ($self, $key) = @_;
563 156 100       515 return $key if !defined $self->{cache_key_func};
564 9         13 return do { local $_ = $key; $self->{cache_key_func}->() };
  9         14  
  9         21  
565             }
566              
567             1;