File Coverage

blib/lib/SQL/Inserter.pm
Criterion Covered Total %
statement 153 153 100.0
branch 52 52 100.0
condition 27 27 100.0
subroutine 20 20 100.0
pod 4 4 100.0
total 256 256 100.0


line stmt bran cond sub pod time code
1             package SQL::Inserter;
2              
3 5     5   1047510 use 5.008;
  5         52  
4 5     5   55 use strict;
  5         9  
  5         121  
5 5     5   25 use warnings;
  5         8  
  5         151  
6              
7 5     5   28 use Carp;
  5         9  
  5         320  
8 5     5   31 use Exporter 'import';
  5         19  
  5         9358  
9              
10             =head1 NAME
11              
12             SQL::Inserter - Efficient buffered DBI inserter and fast INSERT SQL builder
13              
14             =head1 VERSION
15              
16             Version 0.02
17              
18             =cut
19              
20             our $VERSION = '0.02';
21              
22             our @EXPORT_OK = qw(simple_insert multi_insert_sql);
23              
24             =head1 SYNOPSIS
25              
26             use SQL::Inserter;
27              
28             my $sql = SQL::Inserter->new(
29             dbh => $dbh,
30             table => 'table',
31             cols => [qw/col1 col2.../],
32             buffer => 100? # Default buffer is 100 rows
33             );
34              
35             # Fastest method: pass single or multiple rows of data as an array
36             $sql->insert($col1_row1, $col2_row1, $col1_row2...);
37              
38             # You can manually flush the buffer at any time with no argument on insert
39             # (otherwise there is auto-flush on the object's destruction)
40             $sql->insert();
41              
42             # Alternative, pass a single row as a hash, allows SQL code passed as
43             # references instead of values (no need to define cols in constructor)
44             $sql->insert({
45             column1 => $data1,
46             column2 => \'NOW()',
47             ...
48             });
49              
50             # There are also functions to just get the SQL statement and its bind vars
51             # similar to SQL::Abstract or SQL::Maker insert, but with much less overhead:
52             my ($sql, @bind) = simple_insert($table, {col1=>$val...});
53              
54             # Multi-row variant:
55             my ($sql, @bind) = simple_insert($table, [{col1=>$val1...},{col1=>$val2...},...]);
56              
57             # Or, construct an SQL statement with placeholders for a given number of rows:
58             my $sql = multi_insert_sql('table', [qw/col1 col2.../], $num_of_rows);
59              
60             =head1 DESCRIPTION
61              
62             SQL::Inserter's main lightweight OO interface will let you do L inserts as
63             efficiently as possible by managing a multi-row buffer and prepared statements.
64              
65             You only have to select the number of rows for the buffered writes (default is 100)
66             and choose whether to pass your data in arrays (fastest, requires all data to be bind
67             values, will execute the same prepared statement every time the buffer is full) or
68             hashes (allows SQL code apart from plain values).
69              
70             It also provides lightweight functions that return the SQL queries to be used manually,
71             similar to C, but much faster.
72              
73             C and C variants supported for MySQL/MariaDB.
74              
75             =head1 EXPORTS
76              
77             On request: C C.
78              
79             =head1 CONSTRUCTOR
80              
81             =head2 C
82              
83             my $sql = SQL::Inserter->new(
84             dbh => $dbh,
85             table => $table,
86             cols => \@column_names?,
87             buffer => 100?,
88             duplicates => $ignore_or_update?,
89             null_undef => $convert_undef_to_NULL?
90             );
91              
92             Creates an object to insert data to a specific table. Buffering is enabled by default
93             and anything left on it will be written when the object falls out of scope / is destroyed.
94              
95             Required parameters:
96              
97             =over 4
98              
99             =item * C : A L database handle.
100              
101             =item * C : The name of the db table to insert to. See L if you
102             are using a restricted word for a table name.
103              
104             =back
105              
106             Optional parameters:
107              
108             =over 4
109              
110             =item * C : The names of the columns to insert. It is required if arrays are
111             used to pass the data. With hashes they are optional (the order will be followed
112             if they are defined). See L if you are using any restricted words for
113             column names.
114              
115             =item * C : Max number of rows to be held in buffer before there is a write.
116             The buffer flushes (writes contents) when the object is destroyed. Setting it to 1
117             writes each row separately (least efficient). For small rows you can set buffer to
118             thousands. The default is a (conservative) 100 which works with big data rows.
119              
120             =item * C : For MySQL, define as C<'ignore'> or C<'update'> to get an
121             C or C query respectively. See L
122             for details on the latter.
123              
124             =item * C : Applies to the hash inserts only. If true, any undefined
125             values will be converted to SQL's C (similar to the C default).
126             The default behaviour will leave an undef as the bind variable, which may either
127             create an empty string in the db or give an error depending on your column type and
128             db settings.
129              
130             =back
131              
132             =cut
133              
134             sub new {
135 13     13 1 36451 my $class = shift;
136 13         46 my %args = @_;
137              
138 13         22 my $self = {};
139 13         25 bless($self, $class);
140              
141 13   100     265 $self->{dbh} = $args{dbh} || croak("dbh parameter (db handle) required.");
142 12   100     131 $self->{table} = $args{table} || croak("table parameter required.");
143 11         26 $self->{cols} = $args{cols};
144 11   100     40 $self->{buffer} = $args{buffer} || 100;
145 11         18 $self->{dupes} = $args{duplicates};
146 11         21 $self->{null} = $args{null_undef};
147 11 100       36 if ($self->{dupes}) {
148 2 100       6 $self->{ignore} = 1 if $self->{dupes} eq "ignore";
149 2 100       7 $self->{update} = 1 if $self->{dupes} eq "update";
150             }
151 11         41 $self->_cleanup();
152              
153 11         49 return $self;
154             }
155              
156             =head1 METHODS
157              
158             =head2 insert
159              
160             # Fastest array method. Only bind data is passed.
161             my $ret = $sql->insert(@column_data_array);
162              
163             # Alternative, allows SQL code as values in addition to bind variables
164             my $ret = $sql->insert(\%row_data);
165              
166             # No parameters will force emtying of buffer (db write)
167             my $ret = $sql->insert();
168              
169             The main insert method. Returns the return value of the last C statement
170             if there was one called, 0 otherwise (buffer not full.
171              
172             It works in two main modes, by passing an array or a hashref:
173              
174             =over 4
175              
176             =item Array mode
177              
178             Pass the data for one or more rows in a flat array, buffering will work automatically
179             based on your C settings. Obviously your C<@column_data_array> has to contain
180             a multiple of the number of C defined on the constructor.
181              
182             This is the fastest mode, but it only allows simple bind values. Any undefined values
183             will be passed directly to DBI->execute, which may or may not be what you expect -
184             there will not be any explicit conversion to SQL C.
185              
186             =item Hash mode
187              
188             Pass a reference to a hash containing the column names & values for a single row
189             of data. If C was not defined on the constructor, the columns from the first
190             data row will be used instead. For subsequent rows any extra columns will be disregarded
191             and any missing columns will be considered to have an C (which can be
192             automatically converted to C if the C option was set).
193              
194             =item Flushing the buffer
195              
196             Calling C with no arguments forces a write to the db, flushing the buffer.
197             You don't have to call this manually as the buffer will be flushed when the object
198             is destroyed (e.g. your object falls out of scope).
199              
200             =item Mixing modes
201              
202             You can theoretically mix modes, but only when the buffer is empty e.g. you can start
203             with the array mode, flush the buffer and continue with hash mode (C will be
204             defined from the array mode). Or you can start with hash mode (so C will be defined
205             from the very first hash), and after flushing the buffer you can switch to array mode.
206              
207             =back
208              
209             =cut
210              
211             sub insert {
212 33     33 1 6701 my $self = shift;
213              
214 33 100 100     171 return $self->_hash_insert(@_) if $_[0] and ref($_[0]);
215              
216 18         34 my $ret = 0;
217 18 100       50 if (@_) {
    100          
218              
219             croak("Calling insert without a hash requires cols defined in constructor")
220 14 100       124 unless $self->{cols};
221            
222             croak("Insert arguments must be multiple of cols")
223 13 100       20 if scalar(@_) % scalar @{$self->{cols}};
  13         178  
224              
225             croak("Insert was previously called with hash argument (still in buffer)")
226 11 100       105 if $self->{hash_buffer};
227              
228 10         26 while (@_) {
229 14         19 my $rows = scalar(@_) / scalar @{$self->{cols}};
  14         30  
230 14         28 my $left = $self->{buffer} - $self->{buffer_counter}; # Space left in buffer
231              
232 14 100       34 if ($rows > $left) { # Can't fit buffer
233 4         11 my $max = $left * scalar @{$self->{cols}};
  4         13  
234 4         6 push @{$self->{bind}}, splice(@_,0,$max);
  4         29  
235 4         9 $self->{buffer_counter} = $self->{buffer};
236             } else {
237 10         16 push @{$self->{bind}}, splice(@_);
  10         27  
238 10         18 $self->{buffer_counter} += $rows;
239             }
240 14 100       45 $ret = $self->_write_full_buffer() if $self->{buffer_counter} == $self->{buffer};
241             }
242             } elsif ($self->{buffer_counter}) { # Empty the buffer
243 2         21 $ret = $self->_empty_buffer();
244             }
245 14         56 return $ret;
246             }
247              
248             =head1 ATTRIBUTES
249              
250             =head2 C
251              
252             my $val = $sql->{last_retval}
253              
254             The return value of the last DBI C is stored in this attribute. On a successful
255             insert it should contain the number of rows of that statement. Note that an C
256             call, depending on the buffering, may call C zero, one or more times.
257              
258             =head2 C
259              
260             my $total = $sql->{row_total}
261              
262             Basically a running total of the return values, for successful inserts it shows you
263             how many rows were inserted into the database. It will be undef if no C has
264             been called.
265              
266             =head2 C
267              
268             my $count = $sql->{buffer_counter}
269              
270             Check how many un-inserted data rows the buffer currently holds.
271              
272             =head1 FUNCTIONS
273              
274             =head2 simple_insert
275              
276             # Single row
277             my ($sql, @bind) = simple_insert($table, \%fieldvals, \%options);
278              
279             # Multi-row
280             my ($sql, @bind) = simple_insert($table, [\%fieldvals_row1,...], \%options);
281              
282             Returns the SQL statement and bind variable array for a hash containing the row
283             columns and values. Values are treated as bind variables unless they are references
284             to SQL code strings. E.g. :
285              
286             my ($sql, @bind) = simple_insert('table', {foo=>"bar",when=>\"NOW()"});
287             ### INSERT INTO table (foo, when) VALUES (?,NOW())
288              
289             The function also accepts an array of hashes to allow multi-row inserts:
290              
291             my ($sql, @bind) = simple_insert('table', [{foo=>"foo"},{foo=>"bar"}]);
292             ### INSERT INTO table (foo) VALUES (?),(?)
293              
294             The first row (element in array) needs to contain the superset of all the columns
295             that you want to insert, if some of your rows have undefined column data.
296              
297             Options:
298              
299             =over 4
300            
301             =item * C : If true, any undefined values will be converted to SQL's
302             C (similar to the C default). The default behaviour will leave
303             an undef as the bind variable, which may either create an empty string in the db or
304             give an error depending on your column type and db settings.
305              
306             =item * C : For MySQL, define as C<'ignore'> or C<'update'> to get an
307             C or C query respectively. See L
308             for details on the latter.
309              
310             =back
311              
312             =cut
313              
314             sub simple_insert {
315 10     10 1 13027 my $table = shift;
316 10         15 my $fields = shift;
317 10         13 my $opt = shift;
318              
319 10         18 my ($placeh, @bind, @cols);
320 10 100       26 if (ref($fields) eq 'ARRAY') {
321 3         13 @cols = keys %{$fields->[0]};
  3         17  
322 3         4 my @rows;
323 3         7 foreach my $f (@$fields) {
324 5         12 my ($row, @b) = _row_placeholders($f, \@cols, $opt->{null_undef});
325 5         10 push @rows, $row;
326 5         21 push @bind, @b;
327             }
328 3         8 $placeh = join(",\n", @rows);
329             } else {
330 7         21 @cols = keys %$fields;
331 7         24 ($placeh, @bind) = _row_placeholders($fields, \@cols, $opt->{null_undef});
332             }
333              
334             return _create_insert_sql(
335             $table, \@cols, $placeh, $opt->{duplicates}
336 10         45 ), @bind;
337             }
338              
339             =head2 multi_insert_sql
340              
341             my $sql = multi_insert_sql(
342             $table,
343             \@columns, # names of table columns
344             $num_of_rows?, # default = 1
345             $duplicates? # can be set as ignore/update in case of duplicate key (MySQL)
346             );
347              
348             Builds bulk insert query (single insert is possible too), with ability for
349             ignore/on duplicate key update variants for MySQL.
350              
351             Requires at least the name of the table C<$table> and an arrayref with the column
352             names C<\@columns>. See L if you want to quote table or column names.
353              
354             Optional parameters:
355              
356             =over 4
357            
358             =item * C<$num_of_rows> : By default it returns SQL with bind value placeholders
359             for a single row. You can define any number of rows to use with multi-row bind
360             variable arrays.
361              
362             =item * C<$duplicate> : For MySQL, passing C<'ignore'> as the 4th argument returns
363             an C query. Passing C<'update'> as the argument returns a query
364             containing an `ON DUPLICATE KEY UPDATE` clause (see L for further details).
365              
366             =back
367              
368             =cut
369              
370             sub multi_insert_sql {
371 18     18 1 6214 my $table = shift;
372 18         32 my $columns = shift;
373 18   100     59 my $num_rows = shift || 1;
374 18         25 my $dupe = shift;
375              
376 18 100 100     105 return unless $table && $columns && @$columns;
      100        
377              
378 15         95 my $placeholders =
379             join(",\n", ('(' . join(',', ('?') x @$columns) . ')') x $num_rows);
380              
381 15         39 return _create_insert_sql($table, $columns, $placeholders, $dupe);
382             }
383              
384             ## Private methods
385              
386             sub _hash_insert {
387 15     15   26 my $self = shift;
388 15         20 my $fields = shift;
389 15         21 my $ret = 0;
390              
391             croak("Insert was previously called with an array argument (still in buffer)")
392 15 100 100     151 if $self->{buffer_counter} && !$self->{hash_buffer};
393              
394 14         22 $self->{buffer_counter}++;
395 14 100       32 $self->{cols} = [keys %$fields] if !defined($self->{cols});
396 14         31 my ($row, @bind) = _row_placeholders($fields, $self->{cols}, $self->{null});
397 14         28 push @{$self->{hash_buffer}}, $row;
  14         31  
398 14         19 push @{$self->{bind}}, @bind;
  14         29  
399              
400 14 100       38 $ret = $self->_write_hash_buffer() if $self->{buffer_counter} == $self->{buffer};
401              
402 14         52 return $ret;
403             }
404              
405             sub _write_full_buffer {
406 9     9   18 my $self = shift;
407              
408             $self->{full_buffer_insert} = $self->_prepare_full_buffer_insert()
409 9 100       41 if !$self->{full_buffer_insert};
410              
411 9         50 $self->_execute($self->{full_buffer_insert});
412 9         19 $self->_cleanup();
413              
414 9         21 return $self->{last_retval};
415             }
416              
417             sub _prepare_full_buffer_insert {
418 6     6   9 my $self = shift;
419             $self->{full_buffer_insert} = $self->{dbh}->prepare(
420 6         14 multi_insert_sql(map {$self->{$_}} qw/table cols buffer dupes/)
  24         50  
421             );
422             }
423              
424             sub _empty_buffer {
425 5     5   11 my $self = shift;
426              
427 5 100       24 return $self->_write_hash_buffer() if $self->{hash_buffer};
428              
429 3         15 my $rows = scalar(@{$self->{bind}}) / scalar @{$self->{cols}};
  3         8  
  3         9  
430             my $sth = $self->{dbh}->prepare(
431             multi_insert_sql(
432             $self->{table},
433             $self->{cols},
434             $rows,
435             $self->{dupes}
436             )
437 3         15 );
438 3         29 $self->_execute($sth);
439 3         11 $self->_cleanup();
440              
441 3         8 return $self->{last_retval};
442             }
443              
444             sub _write_hash_buffer {
445 9     9   16 my $self = shift;
446              
447 9         14 my $placeh = join(",\n", @{$self->{hash_buffer}});
  9         24  
448             my $sth = $self->{dbh}->prepare(
449             _create_insert_sql(
450             $self->{table}, $self->{cols}, $placeh, $self->{dupes}
451             )
452 9         25 );
453 9         54 $self->_execute($sth);
454 9         24 $self->_cleanup();
455              
456 9         22 return $self->{last_retval};
457             }
458              
459             sub _execute {
460 21     21   78 my $self = shift;
461 21         29 my $sth = shift;
462              
463 21 100       51 $self->{row_total} = 0 if !defined($self->{row_total});
464 21         32 $self->{last_retval} = $sth->execute(@{$self->{bind}});
  21         52  
465 21 100       142 $self->{row_total} += $self->{last_retval} if $self->{last_retval};
466             }
467              
468             sub _cleanup {
469 32     32   50 my $self = shift;
470 32         56 $self->{bind} = undef;
471 32         67 $self->{hash_buffer} = undef;
472 32         57 $self->{buffer_counter} = 0;
473             }
474              
475             sub DESTROY {
476 13     13   18902 my $self = shift;
477             # Empty buffer
478 13 100       78 $self->_empty_buffer() if $self->{buffer_counter};
479             }
480              
481             ## Private functions
482              
483             sub _create_insert_sql {
484 38     38   3070 my $table = shift;
485 38         55 my $columns = shift;
486 38         79 my $placeh = shift;
487 38   100     121 my $dupe = shift || "";
488              
489 38 100       86 my $ignore = ($dupe eq 'ignore') ? ' IGNORE' : '';
490 38         79 my $cols = join(',', @$columns);
491 38         92 my $sql = "INSERT$ignore INTO $table ($cols)\nVALUES $placeh";
492              
493 38 100       97 $sql .= _on_duplicate_key_update($columns) if $dupe eq 'update';
494              
495 38         141 return $sql;
496             }
497              
498             sub _row_placeholders {
499 34     34   12176 my $fields = shift;
500 34         48 my $cols = shift;
501 34         51 my $null = shift;
502 34         48 my @bind = ();
503 34         71 my $sql = "(";
504              
505 34         49 my $val;
506              
507 34         61 foreach my $key (@$cols) {
508 56 100 100     154 $fields->{$key} = \"NULL" if $null && !defined($fields->{$key});
509              
510 56 100       115 if (ref($fields->{$key})) {
511 12         31 $val = ${$fields->{$key}};
  12         26  
512             } else {
513 44         65 $val = "?";
514 44         71 push @bind, $fields->{$key};
515             }
516 56         107 $sql .= "$val,";
517             }
518              
519 34 100       93 chop($sql) if @$cols;
520              
521 34         131 return "$sql)", @bind;
522             }
523              
524             sub _on_duplicate_key_update {
525 8     8   3879 my $columns = shift;
526             return "\nON DUPLICATE KEY UPDATE "
527 8         33 . join(',', map {"$_=VALUES($_)"} @$columns);
  11         52  
528             }
529              
530             =head1 NOTES
531              
532             =head2 Using reserved words as object names
533              
534             If you are using reserved words as table/column names (which is strongly discouraged),
535             just include the appropriate delimiter in the C or C parameter. E.g. for
536             MySQL with columns named C and C you can do:
537              
538             cols => [qw/`from` `to`/]
539              
540             For PostgreSQL or Oracle you'd do C<[qw/"from" "to"/]>, for SQL Server C<[qw/[from] [to]/]> etc.
541              
542             =head2 On duplicate key update
543              
544             The C 'update'> option creates an C clause
545             for the query. E.g.:
546              
547             my $sql = multi_insert_sql('table_name', [qw/col1 col2/], 2, 'update');
548              
549             will produce:
550              
551             ## INSERT INTO table_name (col1,col2) VALUES (?,?),(?,?) ON DUPLICATE KEY UPDATE col1=VALUES(col1),col2=VALUES(col2)
552              
553             Note that as of MySQL 8.0.20 the C in C is deprecated (row alias is
554             used instead), so this functionality might need to be updated some day if C is
555             removed completely.
556              
557             =head2 Output whitespace
558              
559             No spaces are added to the output string beyond the minimum. However, there is a new
560             line (C<\n>) added for each row of value placeholders - mainly to easily count the
561             number of rows from the string.
562             Also, the C clause is on a new line.
563              
564             =head2 Error handling
565              
566             The module does not do any error handling on C/C statements,
567             you should use L's C and C.
568              
569             =head2 Performance
570              
571             The OO interface has minimal overhead. The only consideration is that if your rows
572             do not contain particularly large amounts of data, you may want to increase the buffer
573             size which is at a modest 100 rows.
574              
575             Internally, to construct the prepared statements it uses similar logic to the public
576             functions. C is of particular interest as it is a minimalistic function
577             that may replace (similar interface / feature set) the C functions from
578             C or C while being over 40x faster than the former and
579             around 3x faster than the latter. The included C script gives
580             an idea (results on an M1 Pro Macbook):
581              
582             Compare SQL::Abstract, SQL::Maker, simple_insert:
583             Rate Abstract Abstract cached Maker Maker cached simple_insert
584             Abstract 4207/s -- -6% -90% -91% -98%
585             Abstract cached 4482/s 7% -- -90% -90% -98%
586             Maker 44245/s 952% 887% -- -4% -76%
587             Maker cached 46205/s 998% 931% 4% -- -75%
588             simple_insert 187398/s 4355% 4081% 324% 306% --
589            
590             Compare simple_insert, multi_insert_sql for single row:
591             Rate simple_insert multi_insert_sql
592             simple_insert 190037/s -- -76%
593             multi_insert_sql 797596/s 320% --
594              
595             =head1 AUTHOR
596              
597             Dimitrios Kechagias, C<< >>
598              
599             =head1 BUGS
600              
601             Please report any bugs or feature requests either on L (preferred), or on RT
602             (via the email , or L).
603              
604             I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.
605              
606             =head1 GIT
607              
608             L
609              
610             =head1 CPAN
611              
612             L
613              
614             =head1 LICENSE AND COPYRIGHT
615              
616             Copyright (C) 2023, SpareRoom
617              
618             This is free software; you can redistribute it and/or modify it under
619             the same terms as the Perl 5 programming language system itself.
620              
621             =cut
622              
623             1; # End of SQL::Inserter