File Coverage

blib/lib/SQL/Inserter.pm
Criterion Covered Total %
statement 153 153 100.0
branch 52 52 100.0
condition 27 27 100.0
subroutine 20 20 100.0
pod 4 4 100.0
total 256 256 100.0


line stmt bran cond sub pod time code
1             package SQL::Inserter;
2              
3 5     5   1024703 use 5.008;
  5         44  
4 5     5   27 use strict;
  5         9  
  5         112  
5 5     5   23 use warnings;
  5         13  
  5         111  
6              
7 5     5   23 use Carp;
  5         10  
  5         312  
8 5     5   30 use Exporter 'import';
  5         9  
  5         9399  
9              
10             =head1 NAME
11              
12             SQL::Inserter - Efficient buffered DBI inserter and fast INSERT SQL builder
13              
14             =head1 VERSION
15              
16             Version 0.01
17              
18             =cut
19              
20             our $VERSION = '0.00_3';
21              
22             our @EXPORT_OK = qw(simple_insert multi_insert_sql);
23              
24             =head1 SYNOPSIS
25              
26             use SQL::Inserter;
27              
28             my $sql = SQL::Inserter->new(
29             dbh => $dbh,
30             table => 'table',
31             cols => [qw/column1 column2.../]?,
32             buffer => 100? # No. of rows for multi-row insert
33             );
34              
35             # Fastest method: pass single or multiple rows of data as an array
36             $sql->insert($col1_row1, $col2_row1, $col1_row2...);
37              
38             # You can manually flush the buffer at any time with no argument on insert
39             # (otherwise there is auto-flush on the object's destruction)
40             $sql->insert();
41              
42             # Alternative, pass a single row as a hash, allows SQL code passed as references
43             # instead of values
44             $sql->insert({
45             column1 => $data1,
46             column2 => \'NOW()',
47             ...
48             });
49              
50             # There are also functions to just get the SQL statement and its bind vars
51             # similar to SQL::Abstract or SQL::Maker insert, but with much less overhead:
52             my ($sql, @bind) = simple_insert($table, {col1=>$val...});
53              
54             # Multi-row variant:
55             my ($sql, @bind) = simple_insert($table, [{col1=>$row1_val...},{col1=>$row2_val...},...]);
56              
57             # Or, construct an SQL statement with placeholders for a given number of rows:
58             my $sql = multi_insert_sql('table', [qw/col1 col2.../], $num_of_rows);
59              
60             =head1 DESCRIPTION
61              
62             SQL::Inserter's main lightweight OO interface will let you do L inserts as
63             efficiently as possible by managing a multi-row buffer and prepared statements.
64              
65             You only have to select the number of rows for the buffered writes (default is 100)
66             and choose whether to pass your data in arrays (fastest, requires all data to be bind
67             values, will execute the same prepared statement every time the buffer is full) or
68             hashes (allows SQL code apart from plain values).
69              
70             It also provides lightweight functions that return the SQL queries to be used manually,
71             similar to C, but much faster.
72              
73             =head1 EXPORTS
74              
75             On request: C C.
76              
77             =head1 CONSTRUCTOR
78              
79             =head2 C
80              
81             my $sql = SQL::Inserter->new(
82             dbh => $dbh,
83             table => $table,
84             cols => \@column_names?,
85             buffer => 100?,
86             duplicates => $ignore_or_update?,
87             null_undef => $convert_undef_to_NULL?
88             );
89              
90             Creates an object to insert data to a specific table. Buffering is enabled by default
91             and anything left on it will be written when the object falls out of scope / is destroyed.
92              
93             Required parameters:
94              
95             =over 4
96              
97             =item * C : A L database handle.
98              
99             =item * C : The name of the db table to insert to. See L if you
100             are using a restricted word for a table name.
101              
102             =back
103              
104             Optional parameters:
105              
106             =over 4
107              
108             =item * C : The names of the columns to insert. It is required if arrays are
109             used to pass the data. With hashes they are optional (the order will be followed
110             if they are defined). See L if you are using any restricted words for
111             column names.
112              
113             =item * C : Max number of rows to be held in buffer before there is a write.
114             The buffer flushes (writes contents) when the object is destroyed. Setting it to 1
115             writes each row separately (least efficient). For small rows you can set buffer to
116             thousands. The default is a (conservative) 100 which works with big data rows.
117              
118             =item * C : For MySQL, define as C<'ignore'> or C<'update'> to get an
119             C or C query respectively. See L
120             for details on the latter.
121              
122             =item * C : Applies to the hash inserts only. If true, any undefined
123             values will be converted to SQL's C (similar to the C default).
124             The default behaviour will leave an undef as the bind variable, which may either
125             create an empty string in the db or give an error depending on your column type and
126             db settings.
127              
128             =back
129              
130             =cut
131              
132             sub new {
133 13     13 1 34806 my $class = shift;
134 13         41 my %args = @_;
135              
136 13         25 my $self = {};
137 13         22 bless($self, $class);
138              
139 13   100     282 $self->{dbh} = $args{dbh} || croak("dbh parameter (db handle) required.");
140 12   100     130 $self->{table} = $args{table} || croak("table parameter required.");
141 11         27 $self->{cols} = $args{cols};
142 11   100     34 $self->{buffer} = $args{buffer} || 100;
143 11         16 $self->{dupes} = $args{duplicates};
144 11         22 $self->{null} = $args{null_undef};
145 11 100       27 if ($self->{dupes}) {
146 2 100       8 $self->{ignore} = 1 if $self->{dupes} eq "ignore";
147 2 100       7 $self->{update} = 1 if $self->{dupes} eq "update";
148             }
149 11         28 $self->_cleanup();
150              
151 11         45 return $self;
152             }
153              
154             =head1 METHODS
155              
156             =head2 insert
157              
158             # Fastest array method. Only bind data is passed.
159             my $ret = $sql->insert(@column_data_array);
160              
161             # Alternative, allows SQL code as values in addition to bind variables
162             my $ret = $sql->insert(\%row_data);
163              
164             # No parameters will force emtying of buffer (db write)
165             my $ret = $sql->insert();
166              
167             The main insert method. Returns the return value of the last C statement
168             if there was one called, 0 otherwise (buffer not full.
169              
170             It works in two main modes, by passing an array or a hashref:
171              
172             =over 4
173              
174             =item Array mode
175              
176             Pass the data for one or more rows in a flat array, buffering will work automatically
177             based on your C settings. Obviously your C<@column_data_array> has to contain
178             a multiple of the number of C defined on the constructor.
179              
180             This is the fastest mode, but it only allows simple bind values. Any undefined values
181             will be passed directly to DBI->execute, which may or may not be what you expect -
182             there will not be any explicit conversion to SQL C.
183              
184             =item Hash mode
185              
186             Pass a reference to a hash containing the column names & values for a single row
187             of data. If C was not defined on the constructor, the columns from the first
188             data row will be used instead. For subsequent rows any extra columns will be disregarded
189             and any missing columns will be considered to have an C (which can be
190             automatically converted to C if the C option was set).
191              
192             =item Flushing the buffer
193              
194             Calling C with no arguments forces a write to the db, flushing the buffer.
195             You don't have to call this manually as the buffer will be flushed when the object
196             is destroyed (e.g. your object falls out of scope).
197              
198             =item Mixing modes
199              
200             You can theoretically mix modes, but only when the buffer is empty e.g. you can start
201             with the array mode, flush the buffer and continue with hash mode (C will be
202             defined from the array mode). Or you can start with hash mode (so C will be defined
203             from the very first hash), and after flushing the buffer you can switch to array mode.
204              
205             =back
206              
207             =cut
208              
209             sub insert {
210 31     31 1 4036 my $self = shift;
211              
212 31 100 100     140 return $self->_hash_insert(@_) if $_[0] and ref($_[0]);
213              
214 18         34 my $ret = 0;
215 18 100       49 if (@_) {
    100          
216              
217             croak("Calling insert without a hash requires cols defined in constructor")
218 14 100       142 unless $self->{cols};
219            
220             croak("Insert arguments must be multiple of cols")
221 13 100       19 if scalar(@_) % scalar @{$self->{cols}};
  13         183  
222              
223             croak("Insert was previously called with hash argument (still in buffer)")
224 11 100       101 if $self->{hash_buffer};
225              
226 10         22 while (@_) {
227 14         23 my $rows = scalar(@_) / scalar @{$self->{cols}};
  14         29  
228 14         27 my $left = $self->{buffer} - $self->{buffer_counter}; # Space left in buffer
229              
230 14 100       32 if ($rows > $left) { # Can't fit buffer
231 4         8 my $max = $left * scalar @{$self->{cols}};
  4         7  
232 4         8 push @{$self->{bind}}, splice(@_,0,$max);
  4         30  
233 4         10 $self->{buffer_counter} = $self->{buffer};
234             } else {
235 10         14 push @{$self->{bind}}, splice(@_);
  10         28  
236 10         14 $self->{buffer_counter} += $rows;
237             }
238 14 100       45 $ret = $self->_write_full_buffer() if $self->{buffer_counter} == $self->{buffer};
239             }
240             } elsif ($self->{buffer_counter}) { # Empty the buffer
241 2         14 $ret = $self->_empty_buffer();
242             }
243 14         45 return $ret;
244             }
245              
246             =head1 ATTRIBUTES
247              
248             =head2 C
249              
250             my $val = $sql->{last_retval}
251              
252             The return value of the last DBI C is stored in this attribute. On a successful
253             insert it should contain the number of rows of that statement. Note that an C
254             call, depending on the buffering, may call C zero, one or more times.
255              
256             =head2 C
257              
258             my $total = $sql->{row_total}
259              
260             Basically a running total of the return values, for successful inserts it shows you
261             how many rows were inserted into the database. It will be undef if no C has
262             been called.
263              
264             =head2 C
265              
266             my $count = $sql->{buffer_counter}
267              
268             Check how many un-inserted data rows the buffer currently holds.
269              
270             =head1 FUNCTIONS
271              
272             =head2 simple_insert
273              
274             # Single row
275             my ($sql, @bind) = simple_insert($table, \%fieldvals, \%options);
276              
277             # Multi-row
278             my ($sql, @bind) = simple_insert($table, [\%fieldvals_row1,...], \%options);
279              
280             Returns the SQL statement and bind variable array for a hash containing the row
281             columns and values. Values are treated as bind variables unless they are references
282             to SQL code strings. E.g. :
283              
284             my ($sql, @bind) = simple_insert('table', {foo=>"bar",when=>\"NOW()"});
285             ### INSERT INTO table (foo, when) VALUES (?,NOW())
286              
287             The function also accepts an array of hashes to allow multi-row inserts:
288              
289             my ($sql, @bind) = simple_insert('table', [{foo=>"foo"},{foo=>"bar"}]);
290             ### INSERT INTO table (foo) VALUES (?),(?)
291              
292             The first row (element in array) needs to contain the superset of all the columns
293             that you want to insert, if some of your rows have undefined column data.
294              
295             Options:
296              
297             =over 4
298            
299             =item * C : If true, any undefined values will be converted to SQL's
300             C (similar to the C default). The default behaviour will leave
301             an undef as the bind variable, which may either create an empty string in the db or
302             give an error depending on your column type and db settings.
303              
304             =item * C : For MySQL, define as C<'ignore'> or C<'update'> to get an
305             C or C query respectively. See L
306             for details on the latter.
307              
308             =back
309              
310             =cut
311              
312             sub simple_insert {
313 10     10 1 12564 my $table = shift;
314 10         19 my $fields = shift;
315 10         11 my $opt = shift;
316              
317 10         18 my ($placeh, @bind, @cols);
318 10 100       27 if (ref($fields) eq 'ARRAY') {
319 3         6 @cols = keys %{$fields->[0]};
  3         11  
320 3         5 my @rows;
321 3         6 foreach my $f (@$fields) {
322 5         14 my ($row, @b) = _row_placeholders($f, \@cols, $opt->{null_undef});
323 5         11 push @rows, $row;
324 5         10 push @bind, @b;
325             }
326 3         9 $placeh = join(",\n", @rows);
327             } else {
328 7         22 @cols = keys %$fields;
329 7         23 ($placeh, @bind) = _row_placeholders($fields, \@cols, $opt->{null_undef});
330             }
331              
332             return _create_insert_sql(
333             $table, \@cols, $placeh, $opt->{duplicates}
334 10         39 ), @bind;
335             }
336              
337             =head2 multi_insert_sql
338              
339             my $sql = multi_insert_sql(
340             $table,
341             \@columns, # names of table columns
342             $num_of_rows?, # default = 1
343             $duplicates? # can be set as ignore/update in case of duplicate key (MySQL)
344             );
345              
346             Builds bulk insert query (single insert is possible too), with ability for
347             ignore/on duplicate key update variants for MySQL.
348              
349             Requires at least the name of the table C<$table> and an arrayref with the column
350             names C<\@columns>. See L if you want to quote table or column names.
351              
352             Optional parameters:
353              
354             =over 4
355            
356             =item * C<$num_of_rows> : By default it returns SQL with bind value placeholders
357             for a single row. You can define any number of rows to use with multi-row bind
358             variable arrays.
359              
360             =item * C<$duplicate> : For MySQL, passing C<'ignore'> as the 4th argument returns
361             an C query. Passing C<'update'> as the argument returns a query
362             containing an `ON DUPLICATE KEY UPDATE` clause (see L for further details).
363              
364             =back
365              
366             =cut
367              
368             sub multi_insert_sql {
369 18     18 1 6525 my $table = shift;
370 18         27 my $columns = shift;
371 18   100     65 my $num_rows = shift || 1;
372 18         28 my $dupe = shift;
373              
374 18 100 100     116 return unless $table && $columns && @$columns;
      100        
375              
376 15         76 my $placeholders =
377             join(",\n", ('(' . join(',', ('?') x @$columns) . ')') x $num_rows);
378              
379 15         36 return _create_insert_sql($table, $columns, $placeholders, $dupe);
380             }
381              
382             ## Private methods
383              
384             sub _hash_insert {
385 13     13   22 my $self = shift;
386 13         16 my $fields = shift;
387 13         18 my $ret = 0;
388              
389             croak("Insert was previously called with an array argument (still in buffer)")
390 13 100 100     134 if $self->{buffer_counter} && !$self->{hash_buffer};
391              
392 12         18 $self->{buffer_counter}++;
393 12 100       36 $self->{cols} = [keys %$fields] if !defined($self->{cols});
394 12         28 my ($row, @bind) = _row_placeholders($fields, $self->{cols}, $self->{null});
395 12         18 push @{$self->{hash_buffer}}, $row;
  12         26  
396 12         18 push @{$self->{bind}}, @bind;
  12         23  
397              
398 12 100       32 $ret = $self->_write_hash_buffer() if $self->{buffer_counter} == $self->{buffer};
399              
400 12         37 return $ret;
401             }
402              
403             sub _write_full_buffer {
404 9     9   14 my $self = shift;
405              
406             $self->{full_buffer_insert} = $self->_prepare_full_buffer_insert()
407 9 100       36 if !$self->{full_buffer_insert};
408              
409 9         53 $self->_execute($self->{full_buffer_insert});
410 9         25 $self->_cleanup();
411              
412 9         23 return $self->{last_retval};
413             }
414              
415             sub _prepare_full_buffer_insert {
416 6     6   9 my $self = shift;
417             $self->{full_buffer_insert} = $self->{dbh}->prepare(
418 6         15 multi_insert_sql(map {$self->{$_}} qw/table cols buffer dupes/)
  24         50  
419             );
420             }
421              
422             sub _empty_buffer {
423 5     5   7 my $self = shift;
424              
425 5 100       19 return $self->_write_hash_buffer() if $self->{hash_buffer};
426              
427 3         6 my $rows = scalar(@{$self->{bind}}) / scalar @{$self->{cols}};
  3         6  
  3         7  
428             my $sth = $self->{dbh}->prepare(
429             multi_insert_sql(
430             $self->{table},
431             $self->{cols},
432             $rows,
433             $self->{dupes}
434             )
435 3         11 );
436 3         25 $self->_execute($sth);
437 3         9 $self->_cleanup();
438              
439 3         8 return $self->{last_retval};
440             }
441              
442             sub _write_hash_buffer {
443 7     7   14 my $self = shift;
444              
445 7         11 my $placeh = join(",\n", @{$self->{hash_buffer}});
  7         27  
446             my $sth = $self->{dbh}->prepare(
447             _create_insert_sql(
448             $self->{table}, $self->{cols}, $placeh, $self->{dupe}
449             )
450 7         26 );
451 7         49 $self->_execute($sth);
452 7         16 $self->_cleanup();
453              
454 7         21 return $self->{last_retval};
455             }
456              
457             sub _execute {
458 19     19   40 my $self = shift;
459 19         28 my $sth = shift;
460              
461 19 100       47 $self->{row_total} = 0 if !defined($self->{row_total});
462 19         29 $self->{last_retval} = $sth->execute(@{$self->{bind}});
  19         49  
463 19 100       143 $self->{row_total} += $self->{last_retval} if $self->{last_retval};
464             }
465              
466             sub _cleanup {
467 30     30   47 my $self = shift;
468 30         54 $self->{bind} = undef;
469 30         73 $self->{hash_buffer} = undef;
470 30         49 $self->{buffer_counter} = 0;
471             }
472              
473             sub DESTROY {
474 13     13   18526 my $self = shift;
475             # Empty buffer
476 13 100       72 $self->_empty_buffer() if $self->{buffer_counter};
477             }
478              
479             ## Private functions
480              
481             sub _create_insert_sql {
482 36     36   3013 my $table = shift;
483 36         54 my $columns = shift;
484 36         64 my $placeh = shift;
485 36   100     133 my $dupe = shift || "";
486              
487 36 100       127 my $ignore = ($dupe eq 'ignore') ? ' IGNORE' : '';
488 36         71 my $cols = join(',', @$columns);
489 36         93 my $sql = "INSERT$ignore INTO $table ($cols)\nVALUES $placeh";
490              
491 36 100       84 $sql .= _on_duplicate_key_update($columns) if $dupe eq 'update';
492              
493 36         127 return $sql;
494             }
495              
496             sub _row_placeholders {
497 32     32   11374 my $fields = shift;
498 32         47 my $cols = shift;
499 32         47 my $null = shift;
500 32         50 my @bind = ();
501 32         47 my $sql = "(";
502              
503 32         84 my $val;
504              
505 32         63 foreach my $key (@$cols) {
506 52 100 100     190 $fields->{$key} = \"NULL" if $null && !defined($fields->{$key});
507              
508 52 100       104 if (ref($fields->{$key})) {
509 12         15 $val = ${$fields->{$key}};
  12         25  
510             } else {
511 40         59 $val = "?";
512 40         64 push @bind, $fields->{$key};
513             }
514 52         96 $sql .= "$val,";
515             }
516              
517 32 100       82 chop($sql) if @$cols;
518              
519 32         112 return "$sql)", @bind;
520             }
521              
522             sub _on_duplicate_key_update {
523 7     7   3721 my $columns = shift;
524             return "\nON DUPLICATE KEY UPDATE "
525 7         19 . join(',', map {"$_=VALUES($_)"} @$columns);
  9         42  
526             }
527              
528             =head1 NOTES
529              
530             =head2 Using reserved words as object names
531              
532             If you are using reserved words as table/column names (which is strongly discouraged),
533             just include the appropriate delimiter in the C or C parameter. E.g. for
534             MySQL with columns named C and C you can do:
535              
536             cols => [qw/`from` `to`/]
537              
538             For PostgreSQL or Oracle you'd do C<[qw/"from" "to"/]>, for SQL Server C<[qw/[from] [to]/]> etc.
539              
540             =head2 On duplicate key update
541              
542             The C 'update'> option creates an C clause
543             for the query. E.g.:
544              
545             my $sql = multi_insert_sql('table_name', [qw/col1 col2/], 2, 'update');
546              
547             will produce:
548              
549             ## INSERT INTO table_name (col1,col2) VALUES (?,?),(?,?) ON DUPLICATE KEY UPDATE col1=VALUES(col1),col2=VALUES(col2)
550              
551             Note that as of MySQL 8.0.20 the C in C is deprecated (row alias is
552             used instead), so this functionality might need to be updated some day if C is
553             removed completely.
554              
555             =head2 Output whitespace
556              
557             No spaces are added to the output string beyond the minimum. However, there is a new
558             line (C<\n>) added for each row of value placeholders - mainly to easily count the
559             number of rows from the string.
560             Also, the C clause is on a new line.
561              
562             =head2 Error handling
563              
564             The module does not do any error handling on C/C statements,
565             you should use L's C and C.
566              
567             =head2 Performance
568              
569             The OO interface has minimal overhead. The only consideration is that if your rows
570             do not contain particularly large amounts of data, you may want to increase the buffer
571             size which is at a modest 100 rows.
572              
573             Internally, to construct the prepared statements it uses similar logic to the public
574             functions. C is of particular interest as it is a minimalistic function
575             that may replace (similar interface / feature set) the C functions from
576             C or C while being over 40x faster than the former and
577             around 3x faster than the latter. The included C script gives
578             an idea (results on an M1 Pro Macbook):
579              
580             Compare SQL::Abstract, SQL::Maker, simple_insert:
581             Rate Abstract Abstract cached Maker Maker cached simple_insert
582             Abstract 4207/s -- -6% -90% -91% -98%
583             Abstract cached 4482/s 7% -- -90% -90% -98%
584             Maker 44245/s 952% 887% -- -4% -76%
585             Maker cached 46205/s 998% 931% 4% -- -75%
586             simple_insert 187398/s 4355% 4081% 324% 306% --
587            
588             Compare simple_insert, multi_insert_sql for single row:
589             Rate simple_insert multi_insert_sql
590             simple_insert 190037/s -- -76%
591             multi_insert_sql 797596/s 320% --
592              
593             =head1 AUTHOR
594              
595             Dimitrios Kechagias, C<< >>
596              
597             =head1 BUGS
598              
599             Please report any bugs or feature requests either on L (preferred), or on RT
600             (via the email , or L).
601              
602             I will be notified, and then you'll automatically be notified of progress on your bug as I make changes.
603              
604             =head1 GIT
605              
606             L
607              
608             =head1 CPAN
609              
610             L
611              
612             =head1 LICENSE AND COPYRIGHT
613              
614             Copyright (C) 2023, SpareRoom
615              
616             This is free software; you can redistribute it and/or modify it under
617             the same terms as the Perl 5 programming language system itself.
618              
619             =cut
620              
621             1; # End of SQL::Inserter