File Coverage

blib/lib/Library/CallNumber/LC.pm
Criterion Covered Total %
statement 125 131 95.4
branch 44 54 81.4
condition 8 13 61.5
subroutine 16 16 100.0
pod 9 9 100.0
total 202 223 90.5


line stmt bran cond sub pod time code
1             package Library::CallNumber::LC;
2              
3 3     3   61166 use warnings;
  3         7  
  3         108  
4 3     3   17 use strict;
  3         5  
  3         177  
5 3     3   6812 use Math::BigInt;
  3         116001  
  3         20  
6              
7             =head1 NAME
8              
9             Library::CallNumber::LC - Deal with Library-of-Congress call numbers
10              
11             =head1 VERSION
12              
13             Version 0.22;
14              
15             =cut
16              
17             our $VERSION = '0.22';
18              
19              
20             =head1 SYNOPSIS
21              
22             Utility functions to deal with Library of Congress Call Numbers
23              
24             use Library::CallNumber::LC;
25             my $a = Library::CallNumber::LC->new('A 123.4 .c11');
26             print $a->normalize; # normalizes for string comparisons.
27             # gives 'A01234 C11'
28             print $a->start_of_range; # same as "normalize"
29             my $b = Library::CallNumber::LC->new('B11 .c13 .d11');
30             print $b->normalize;
31             # gives 'B0011 C13 D11'
32             my @range = ($a->start_of_range, $b->end_of_range);
33             # end of range is 'B0011 C13 D11~'
34            
35             # Get components suitable for printing (e.g., number and decimal joined, leading dot on first cutter)
36             @comps = Library::CallNumber::LC->new('A 123.4 .c11')->components()
37            
38             # Same thing, but return empty strings for missing components (e.g., the cutters)
39             @comps = Library::CallNumber::LC->new('A 123.4 .c11')->components('true');
40              
41             =head1 ABSTRACT
42              
43             Library::CallNumber::LC is mostly designed to do call number normalization, with the following goals:
44              
45             =over 4
46              
47             =item * The normalized call numbers are comparable with each other, for proper sorting
48              
49             =item * The normalized call number is a short as possible, so left-anchored wildcard searches are possible (e.g., searching on "A11*" should give you all the A11 call numbers)
50              
51             =item * A range defined by start_of_range and end_of_range should be correct, assuming that the string given for the end of the range is, in fact, a left prefix.
52              
53             =back
54              
55             That last point needs some explanation. The idea is that if someone gives a range of, say, A-AZ, what they really mean is A - AZ9999.99. The end_of_range method generates a key which lies immediately beyond the last possible key for a given starting point. There is no attempt to make end_of_range normalization correspond to anything in real life.
56              
57             =cut
58              
59             # Set up the prefix mapping for longints
60             my %intmap;
61             my $i = 0;
62             foreach my $prefix (qw(a aa ab abc ac ae ag ah ai al am an anl ao ap aq arx as at aug aw awo ay az b bc bd bf bg bh bj bl bm bn bp bq br bs bt bu bv bx c cb cc cd ce cg cis cj cmh cmm cn cr cs ct cz d da daa daw db dc dd de df dff dg dh dj djk dk dkj dl doc dp dq dr ds dt dth du dx e ea eb ec ed ee ek ep epa ex f fb fc fem fg fj fnd fp fsd ft ful g ga gb gc gda ge gf gh gn gr gs gt gv h ha hb hc hcg hd he hf hfs hg hh hhg hj hjc hm hmk hn hq hs ht hv hx i ia ib iid ill ilm in ioe ip j ja jan jb jc jf jg jh jhe jj jk jkc jl jln jn jq js jv jx jz k kb kbm kbp kbq kbr kbu kc kd kdc kde kdg kdk kds kdz ke kea keb kem ken keo keq kes kf kfa kfc kfd kff kfg kfh kfi kfk kfl kfm kfn kfo kfp kfr kfs kft kfu kfv kfw kfx kfz kg kga kgb kgc kgd kge kgf kgg kgh kgj kgk kgl kgn kgq kgs kgt kgv kgx kh kha khc khd khf khh khk khp khq khu khw kit kj kja kjc kje kjg kjj kjk kjm kjn kjp kjq kjr kjs kjt kjv kjw kk kka kkb kkc kke kkf kkg kkh kki kkj kkm kkn kkp kkq kkr kks kkt kkv kkw kkx kky kkz kl kla klb kld kle klf klg klh klm kln klp klr kls klt klv klw km kmc kme kmf kmh kmj kmk kml kmm kmn kmo kmp kmq kmt kmu kmv kmx kn knc knd kne knf kng knh knk knl knm knn knp knq knr kns knt knu knw knx kny kp kpa kpc kpe kpf kpg kph kpj kpk kpl kpm kpp kps kpt kpv kpw kq kqc kqe kqg kqj kqk kqp kqw krb krc krg krm krn krp krr krs kru krv krx ks ksa ksc ksh ksj ksk ksl ksp kss kst ksv ksw ksx ksy kta ktd ktg ktj ktk ktl ktq ktr ktt ktu ktv ktw ktx kty ktz ku kuc kuq kvc kvf kvm kvn kvp kvq kvr kvs kvw kwc kwg kwh kwl kwp kwr kww kwx kz kza kzd l la law lb lc ld le lf lg lh lj ll ln lrm lt lv m may mb mc me mf mh mkl ml mpc mr ms mt my n na nat nax nb nc nd nda nds ne ner new ng nh nk nl nmb nn no nt nv nx ok onc p pa pb pc pcr pd pe pf pg ph phd pj pjc pk pl pm pn pnb pp pq pr ps pt pz q qa qb qc qd qe qh qk ql qm qp qr qry qu qv r ra rb rbw rc rcc rd re ref res rf rg rh rj rk rl rm rn rp rs rt rv rx rz s sb sd see sf sfk sgv sh sk sn sql sw t ta tc td tdd te tf tg tgg th tj tk tl tn tnj to tp tr ts tt tx tz u ua ub uc ud ue uf ug uh un use v va vb vc vd ve vf vg vk vla vm w wq x xp xx y yh yl yy z za zhn zz zzz)) {
63             $intmap{$prefix} = $i;
64             $i++;
65             }
66              
67             # Regexp constants to deal with matching LC and variants
68              
69             my $lcregex = qr/^
70             \s*
71             (?:VIDEO-D)? # for video stuff
72             (?:DVD-ROM)? # DVDs, obviously
73             (?:CD-ROM)? # CDs
74             (?:TAPE-C)? # Tapes
75             \s*
76             ([A-Z]{1,3}) # alpha
77             \s*
78             (?: # optional numbers with optional decimal point
79             (\d+)
80             (?:\s*?\.\s*?(\d+))?
81             )?
82             \s*
83             (\d+[stndrh]*)? # optional extra numbering including suffixes (1st, 2nd, etc.)
84             \s*
85             (?: # optional cutter
86             \.? \s*
87             ([A-Z]) # cutter letter
88             \s*
89             (\d+ | \Z)? # cutter numbers
90             )?
91             \s*
92             (?: # optional cutter
93             \.? \s*
94             ([A-Z]) # cutter letter
95             \s*
96             (\d+ | \Z)? # cutter numbers
97             )?
98             \s*
99             (?: # optional cutter
100             \.? \s*
101             ([A-Z]) # cutter letter
102             \s*
103             (\d+ | \Z)? # cutter numbers
104             )?
105             (\s+.+?)? # everthing else
106             \s*$
107             /x;
108              
109              
110              
111             my $weird = qr/
112             ^
113             \s*[A-Z]+\s*\d+\.\d+\.\d+
114             /x;
115              
116             # Class variables for top/bottom sort chars
117             my $Topper = ' '; # must sort before 'A'
118             my $Bottomer = '~'; # must sort after 'Z' and '9'
119              
120              
121             =head1 CONSTRUCTORS
122              
123             =head2 new([call_number_text, [topper_character, [bottomer_character]]]) -- create a new object, optionally passing in the initial string, a "topper", and a "bottomer" (explained below)
124              
125             =cut
126              
127             sub new {
128 6     6 1 6767 my $proto = shift;
129 6   33     41 my $class = ref($proto) || $proto;
130 6   100     27 my $lc = shift || '';
131 6         10 my $topper = shift;
132 6 100       26 $topper = $Topper if !defined($topper); # ZERO is false but might be a reasonable value, so we need this more specific check
133 6   66     32 my $bottomer = shift || $Bottomer;
134 6         36 my $self = {
135             callno => uc($lc),
136             topper => $topper,
137             bottomer => $bottomer
138             };
139 6         24 bless $self, $class;
140 6         24 return $self;
141             }
142              
143              
144             =head1 BASIC ACCESSORS
145              
146             =head2 call_number([call_number_text])
147              
148             The text of the call number we are dealing with.
149              
150             =cut
151              
152             sub call_number {
153 2     2 1 6 my $self = shift;
154 2 100       9 if (@_) { $self->{callno} = uc(shift) }
  1         4  
155 2         10 return $self->{callno};
156             }
157              
158             =head2 topper([character])
159              
160             Specify which character occupies the 'always-sort-to-the-top' slots in the sort keys. Defaults to the SPACE character, but can reasonably be anything with an ASCII value lower than 48 (i.e. the 'zero' character, '0'). This can function as either a class or instance method depending on need.
161              
162             =cut
163              
164             sub topper {
165 402     402 1 463 my $self = shift;
166 402         452 my $topper = shift;
167 402 100       721 if (ref $self) {
168 397 100       842 $self->{topper} = $topper if $topper; # just myself
169 397         1100 return $self->{topper};
170             } else {
171 5 100       12 $Topper = $topper if $topper; # whole class
172 5         12 return $Topper;
173             }
174             }
175              
176             =head2 bottomer([character])
177              
178             Specify which character occupies the 'always-sort-to-the-bottom' slots in the sort keys. Defaults to the TILDE character, but can reasonably be anything with an ASCII value higher than 90 (i.e. 'Z'). This can function as either a class or instance method depending on need.
179              
180             =cut
181              
182             sub bottomer {
183 153     153 1 204 my $self = shift;
184 153         169 my $bottomer = shift;
185 153 100       308 if (ref $self) {
186 150 100       277 $self->{bottomer} = $bottomer if $bottomer; # just myself
187 150         339 return $self->{bottomer};
188             } else {
189 3 100       9 $Bottomer = $bottomer if $bottomer; # whole class
190 3         9 return $Bottomer;
191             }
192             }
193              
194             =head1 OTHER METHODS
195              
196             =head2 components(boolean returnAll = false)
197              
198             @comps = Library::CallNumber::LC->new('A 123.4 .c11')->components($returnAll)
199              
200             Returns an array of the individual components of the call number (or undef if it doesn't look like a call number).
201             Components are:
202              
203             =over 4
204              
205             =item * alpha
206              
207             =item * number (numeric.decimal)
208              
209             =item * cutter1
210              
211             =item * cutter2
212              
213             =item * cutter3
214              
215             =item * "extra" (anything after the cutters)
216              
217             =back
218              
219             The optional argument (false by default) determines whether or not empty components (e.g.,
220             extra cutters) get a slot in the returned list.
221              
222             =cut
223              
224             sub components {
225 3     3 1 4 my $self = shift;
226 3         5 my $returnAll = shift;
227 3         9 my $lc = $self->{callno};
228              
229 3 50       28 return undef if ($lc =~ $weird);
230 3 50       76 return undef unless ($lc =~ $lcregex);
231              
232              
233 3         24 my ($alpha, $num, $dec, $othernum, $c1alpha, $c1num, $c2alpha, $c2num, $c3alpha, $c3num, $extra) = ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11);
234              
235             #combine stuff if need be
236            
237 3 100       9 if ($dec) {
238 1         3 $num .= '.' . $dec;
239             }
240            
241 3     3   85616 no warnings;
  3         18  
  3         460  
242 3         8 my $c1 = join('', $c1alpha, $c1num);
243 3         5 my $c2 = join('', $c2alpha, $c2num);
244 3         6 my $c3 = join('', $c3alpha, $c3num);
245            
246 3 50       16 $c1 = '.' . $c1 if ($c1 =~ /\S/);
247 3     3   21 use warnings;
  3         7  
  3         994  
248            
249 3         71 my @return;
250 3         7 foreach my $comp ($alpha, $num, $othernum, $c1, $c2, $c3, $extra) {
251 21 100       40 $comp = '' unless (defined $comp);
252 21 100 66     63 next unless ($comp =~ /\S/ or $returnAll);
253 15         39 $comp =~ m/^\s*(.*?)\s*$/;
254 15         21 $comp = $1;
255 15         25 push @return, $comp;
256             }
257 3         37 return @return;
258             }
259              
260             =head2 _normalize(call_number_text)
261              
262             Base function to perform normalization.
263              
264             =cut
265              
266             sub _normalize {
267 255     255   310 my $self = shift;
268 255         402 my $lc = uc(shift);
269              
270 255         505 my $topper = $self->topper;
271              
272             # return undef if ($lc =~ $weird);
273 255 50       3851 return undef unless ($lc =~ $lcregex);
274            
275 255         1373 my ($alpha, $num, $dec, $othernum, $c1alpha, $c1num, $c2alpha, $c2num, $c3alpha, $c3num, $extra) = ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11);
276              
277 3     3   21 no warnings;
  3         14  
  3         3233  
278 255         377 my $class = $alpha;
279 255 100       1027 $class .= sprintf('%04s', $num) if $num;
280 255 100       550 $class .= $dec if $dec;
281 255         401 my $c1 = $c1alpha.$c1num;
282 255         390 my $c2 = $c2alpha.$c2num;
283 255         353 my $c3 = $c3alpha.$c3num;
284              
285             # normalize extra (most commonly years/numbering, benefits from padding)
286             # this could be reduced to a four digit pad, as very, very few numbers
287             # reach 10000, but we'll be conservative here (for now)
288 255         491 $extra =~ s/^\s+//g;
289 255         415 $extra =~ s/\.\s+/./g;
290 255         284 $extra =~ s/(\d)\s*-\s*(\d)/$1-$2/g;
291 255         396 $extra =~ s/(\d+)/sprintf("%05s", $1)/ge;
  48         181  
292 255 100       602 $extra = $topper . $extra if ($extra ne ''); # give the extra less 'weight' for falling down the list
293            
294             # pad out othernum (again, conservatively)
295 255         378 $othernum =~ s/(\d+)/sprintf("%05s", $1)/ge;
  25         77  
296              
297 255         451 return join($topper, grep {/\S/} ($class, $othernum, $c1, $c2, $c3, $extra));
  1530         4672  
298             }
299              
300             =head2 normalize([call_number_text])
301              
302             Normalize the stored or passed call number as a sortable string
303              
304             =cut
305              
306             sub normalize {
307 249     249 1 55187 my $self = shift;
308 249         321 my $lc = shift;
309 249 100       572 $lc = $lc? uc($lc) : $self->{callno};
310 249         530 return $self->_normalize($lc)
311             }
312              
313             =head2 start_of_range([call_number_text])
314              
315             Alias for normalize
316              
317             =cut
318              
319             sub start_of_range {
320 2     2 1 5 my $self = shift;
321 2         5 return $self->normalize(@_);
322             }
323              
324             =head2 end_of_range([call_number_text])
325              
326             Downshift an lc number so it represents the end of a range
327              
328             =cut
329              
330             sub end_of_range {
331 6     6 1 28 my $self = shift;
332 6         11 my $lc = shift;
333 6 100       25 $lc = $lc? uc($lc) : $self->{callno};
334 6         17 my $bottomer = $self->bottomer;
335 6         25 return $self->_normalize($lc) . $bottomer;
336             }
337              
338             =head2 toLongInt(call_number_text, num_digits)
339              
340             Attempt to turn a call number into an integer value. Possibly useful for fast range checks, although obviously not perfectly accurate. Optional argument I<$num_digits> can be used to control the number of digits used, and therefore the precision of the results.
341              
342             =cut
343              
344             my $minval = new Math::BigInt('0'); # set to zero until this code matures
345             my $minvalstring = $minval->bstr;
346              
347             # this is a work in progress, with room for improvement in both exception
348             # logic and overall economy of bits
349             sub toLongInt {
350 142     142 1 6378 my $self = shift;
351 142         224 my $lc = shift;
352 142   50     534 my $num_digits = shift || 19; # 19 is a max-fit for 64-bits within our scope
353              
354 142         299 my $topper = $self->topper;
355 142         396 my $bottomer = $self->bottomer;
356              
357             #print "$lc\n";
358 142         211 my $topper_ord = ord($topper);
359 142         254 my $long = $self->normalize($lc);
360              
361 142 50       307 return $minvalstring unless ($long);
362              
363 142         145 my ($alpha, $num, $rest);
364 142 100       580 if ($long =~ /^([A-Z]+)(\d{4})(.*)$/) { # we have a 'full' call number
    50          
365 131         419 ($alpha, $num, $rest) = (lc($1), $2, $3);
366             } elsif ($long =~ /^([A-Z]+)(.*)$/) { # numberless class; generally invalid, but let it slide for now
367 11         29 ($alpha, $rest) = (lc($1), $2);
368 11 50       45 if ($rest =~ /^$bottomer/) { # bottomed-out class
369 0         0 $num = '9999';
370             } else {
371 11         25 $num = '0000';
372             }
373             }
374 142         210 my $class_int_string = '';
375 142 50       324 if (defined($intmap{$alpha})) {
376 142         238 $class_int_string .= $intmap{$alpha} . $num;
377             } else {
378 0         0 warn "Unknown prefix '$alpha'\n";
379 0         0 return $minvalstring;
380             }
381 142         226 my $rest_int_string = '';
382 142         128 my $bottomout;
383 142         544 foreach my $char (split('', $rest)) {
384 1103 50       1943 if ($char eq $bottomer) {
385 0         0 $bottomout = 1;
386 0         0 last;
387             }
388 1103         2205 $rest_int_string .= sprintf('%02d', ord($char) - $topper_ord);
389             }
390              
391 142         417 $rest_int_string = substr($rest_int_string, 0, $num_digits - 7); # Reserve first seven digits for $alpha and $num
392 142 50       218 if ($bottomout) {
393 0         0 $rest_int_string .= '9' x (($num_digits - 7) - length($rest_int_string)); # pad it if need be
394             } else {
395 142         268 $rest_int_string .= '0' x (($num_digits - 7) - length($rest_int_string)); # pad it if need be
396             }
397              
398             # print " $long => ", join('', @rv), "\n";
399 142         1148 my $longint = Math::BigInt->new($class_int_string . $rest_int_string);
400 142         9186 $longint->badd($minval);
401             # warn "\n\n".$self->_normalize($lc)." = ".$longint->bstr." ( $class_int_string + $rest_int_string) \n\n";
402 142         7778 return $longint->bstr;
403            
404             }
405              
406              
407              
408             =head1 AUTHOR
409              
410             Current Maintainer: Dan Wells, C<< >>
411             Original Author: Bill Dueber, C<< >>
412              
413             =head1 BUGS
414              
415             Please report any bugs or feature requests through the web interface at
416             L. I will be
417             notified, and then you'll automatically be notified of progress on your bug as
418             I make changes.
419              
420              
421             =head1 SUPPORT
422              
423             You can find documentation for this module with the perldoc command.
424              
425             perldoc Library::CallNumber::LC
426              
427              
428             You can also look for information at the Google Code page:
429              
430             http://code.google.com/p/library-callnumber-lc/
431              
432              
433             =head1 COPYRIGHT & LICENSE
434              
435             Copyright 2009 Bill Dueber, all rights reserved.
436             Copyright 2011 Dan Wells, all rights reserved.
437              
438             This program is free software; you can redistribute it and/or modify it
439             under the same terms as Perl itself and also under the new BSD license
440             as described at http://www.opensource.org/licenses/bsd-license.php
441              
442              
443             =cut
444              
445             1; # End of Library::CallNumber::LC