| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package MARC::Lint; |
|
2
|
|
|
|
|
|
|
|
|
3
|
5
|
|
|
5
|
|
150009
|
use strict; |
|
|
5
|
|
|
|
|
10
|
|
|
|
5
|
|
|
|
|
175
|
|
|
4
|
5
|
|
|
5
|
|
20
|
use warnings; |
|
|
5
|
|
|
|
|
5
|
|
|
|
5
|
|
|
|
|
108
|
|
|
5
|
5
|
|
|
5
|
|
17
|
use integer; |
|
|
5
|
|
|
|
|
10
|
|
|
|
5
|
|
|
|
|
27
|
|
|
6
|
5
|
|
|
5
|
|
1655
|
use MARC::Record; |
|
|
5
|
|
|
|
|
6158
|
|
|
|
5
|
|
|
|
|
172
|
|
|
7
|
5
|
|
|
5
|
|
23
|
use MARC::Field; |
|
|
5
|
|
|
|
|
6
|
|
|
|
5
|
|
|
|
|
109
|
|
|
8
|
|
|
|
|
|
|
|
|
9
|
5
|
|
|
5
|
|
2081
|
use MARC::Lint::CodeData qw(%GeogAreaCodes %ObsoleteGeogAreaCodes %LanguageCodes %ObsoleteLanguageCodes); |
|
|
5
|
|
|
|
|
9
|
|
|
|
5
|
|
|
|
|
4029
|
|
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
our $VERSION = 1.49 ; |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 NAME |
|
14
|
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
MARC::Lint - Perl extension for checking validity of MARC records |
|
16
|
|
|
|
|
|
|
|
|
17
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
18
|
|
|
|
|
|
|
|
|
19
|
|
|
|
|
|
|
use MARC::File::USMARC; |
|
20
|
|
|
|
|
|
|
use MARC::Lint; |
|
21
|
|
|
|
|
|
|
|
|
22
|
|
|
|
|
|
|
my $lint = new MARC::Lint; |
|
23
|
|
|
|
|
|
|
my $filename = shift; |
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
my $file = MARC::File::USMARC->in( $filename ); |
|
26
|
|
|
|
|
|
|
while ( my $marc = $file->next() ) { |
|
27
|
|
|
|
|
|
|
$lint->check_record( $marc ); |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
# Print the title tag |
|
30
|
|
|
|
|
|
|
print $marc->title, "\n"; |
|
31
|
|
|
|
|
|
|
|
|
32
|
|
|
|
|
|
|
# Print the errors that were found |
|
33
|
|
|
|
|
|
|
print join( "\n", $lint->warnings ), "\n"; |
|
34
|
|
|
|
|
|
|
} # while |
|
35
|
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
Given the following MARC record: |
|
37
|
|
|
|
|
|
|
|
|
38
|
|
|
|
|
|
|
LDR 00000nam 22002538a 4500 |
|
39
|
|
|
|
|
|
|
040 _aMdSSJTT |
|
40
|
|
|
|
|
|
|
_cMdSSJTT |
|
41
|
|
|
|
|
|
|
040 _aMdSSJTT |
|
42
|
|
|
|
|
|
|
_beng |
|
43
|
|
|
|
|
|
|
_cMdSSJTT |
|
44
|
|
|
|
|
|
|
100 14 _aWall, Larry. |
|
45
|
|
|
|
|
|
|
110 1 _aO'Reilly & Associates. |
|
46
|
|
|
|
|
|
|
245 90 _aProgramming Perl / |
|
47
|
|
|
|
|
|
|
_aBig Book of Perl / |
|
48
|
|
|
|
|
|
|
_cLarry Wall, Tom Christiansen & Jon Orwant. |
|
49
|
|
|
|
|
|
|
250 _a3rd ed. |
|
50
|
|
|
|
|
|
|
250 _a3rd ed. |
|
51
|
|
|
|
|
|
|
260 _aCambridge, Mass. : |
|
52
|
|
|
|
|
|
|
_bO'Reilly, |
|
53
|
|
|
|
|
|
|
_r2000. |
|
54
|
|
|
|
|
|
|
590 4 _aPersonally signed by Larry. |
|
55
|
|
|
|
|
|
|
856 43 _uhttp://www.perl.com/ |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
the following errors are generated: |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
1XX: Only one 1XX tag is allowed, but I found 2 of them. |
|
60
|
|
|
|
|
|
|
100: Indicator 2 must be blank but it's "4" |
|
61
|
|
|
|
|
|
|
245: Indicator 1 must be 0 or 1 but it's "9" |
|
62
|
|
|
|
|
|
|
245: Subfield _a is not repeatable. |
|
63
|
|
|
|
|
|
|
040: Field is not repeatable. |
|
64
|
|
|
|
|
|
|
260: Subfield _r is not allowed. |
|
65
|
|
|
|
|
|
|
856: Indicator 2 must be blank, 0, 1, 2 or 8 but it's "3" |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
68
|
|
|
|
|
|
|
|
|
69
|
|
|
|
|
|
|
Module for checking validity of MARC records. 99% of the users will want to do |
|
70
|
|
|
|
|
|
|
something like is shown in the synopsis. The other intrepid 1% will overload the |
|
71
|
|
|
|
|
|
|
C module's methods and provide their own special field-level checking. |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
What this means is that if you have certain requirements, such as making sure that |
|
74
|
|
|
|
|
|
|
all 952 tags have a certain call number in them, you can write a function that |
|
75
|
|
|
|
|
|
|
checks for that, and still get all the benefits of the MARC::Lint framework. |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
=head1 EXPORT |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
None. Everything is done through objects. |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 METHODS |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head2 new() |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
No parms needed. The C object is little more than a list of warnings |
|
86
|
|
|
|
|
|
|
and a bunch of rules. |
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
=cut |
|
89
|
|
|
|
|
|
|
|
|
90
|
|
|
|
|
|
|
sub new { |
|
91
|
6
|
|
|
6
|
1
|
4656
|
my $class = shift; |
|
92
|
|
|
|
|
|
|
|
|
93
|
6
|
|
|
|
|
23
|
my $self = { |
|
94
|
|
|
|
|
|
|
_warnings => [], |
|
95
|
|
|
|
|
|
|
}; |
|
96
|
6
|
|
|
|
|
13
|
bless $self, $class; |
|
97
|
|
|
|
|
|
|
|
|
98
|
6
|
|
|
|
|
58
|
$self->_read_rules(); |
|
99
|
|
|
|
|
|
|
|
|
100
|
6
|
|
|
|
|
34
|
return $self; |
|
101
|
|
|
|
|
|
|
} |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
=head2 warnings() |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Returns a list of warnings found by C and its brethren. |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=cut |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
sub warnings { |
|
110
|
64
|
|
|
64
|
1
|
683
|
my $self = shift; |
|
111
|
|
|
|
|
|
|
|
|
112
|
64
|
50
|
|
|
|
95
|
return wantarray ? @{$self->{_warnings}} : scalar @{$self->{_warnings}}; |
|
|
64
|
|
|
|
|
159
|
|
|
|
0
|
|
|
|
|
0
|
|
|
113
|
|
|
|
|
|
|
} |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
=head2 clear_warnings() |
|
116
|
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
Clear the list of warnings for this linter object. It's automatically called |
|
118
|
|
|
|
|
|
|
when you call C. |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=cut |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
sub clear_warnings { |
|
123
|
64
|
|
|
64
|
1
|
131
|
my $self = shift; |
|
124
|
|
|
|
|
|
|
|
|
125
|
64
|
|
|
|
|
112
|
$self->{_warnings} = []; |
|
126
|
|
|
|
|
|
|
} |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
=head2 warn( $str [, $str...] ) |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
Create a warning message, built from strings passed, like a C |
|
131
|
|
|
|
|
|
|
statement. |
|
132
|
|
|
|
|
|
|
|
|
133
|
|
|
|
|
|
|
Typically, you'll leave this to C, but industrious |
|
134
|
|
|
|
|
|
|
programmers may want to do their own checking as well. |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
=cut |
|
137
|
|
|
|
|
|
|
|
|
138
|
|
|
|
|
|
|
sub warn { |
|
139
|
56
|
|
|
56
|
1
|
973
|
my $self = shift; |
|
140
|
|
|
|
|
|
|
|
|
141
|
56
|
|
|
|
|
54
|
push( @{$self->{_warnings}}, join( "", @_ ) ); |
|
|
56
|
|
|
|
|
144
|
|
|
142
|
|
|
|
|
|
|
|
|
143
|
56
|
|
|
|
|
161
|
return; |
|
144
|
|
|
|
|
|
|
} |
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
=head2 check_record( $marc ) |
|
147
|
|
|
|
|
|
|
|
|
148
|
|
|
|
|
|
|
Does all sorts of lint-like checks on the MARC record I<$marc>, |
|
149
|
|
|
|
|
|
|
both on the record as a whole, and on the individual fields & |
|
150
|
|
|
|
|
|
|
subfields. |
|
151
|
|
|
|
|
|
|
|
|
152
|
|
|
|
|
|
|
=cut |
|
153
|
|
|
|
|
|
|
|
|
154
|
|
|
|
|
|
|
sub check_record { |
|
155
|
13
|
|
|
13
|
1
|
24410
|
my $self = shift; |
|
156
|
13
|
|
|
|
|
19
|
my $marc = shift; |
|
157
|
|
|
|
|
|
|
|
|
158
|
13
|
|
|
|
|
32
|
$self->clear_warnings(); |
|
159
|
|
|
|
|
|
|
|
|
160
|
13
|
50
|
33
|
|
|
100
|
( (ref $marc) && $marc->isa('MARC::Record') ) |
|
161
|
|
|
|
|
|
|
or return $self->warn( "Must pass a MARC::Record object to check_record" ); |
|
162
|
|
|
|
|
|
|
|
|
163
|
13
|
|
|
|
|
33
|
my @_1xx = $marc->field( "1.." ); |
|
164
|
13
|
|
|
|
|
992
|
my $n1xx = scalar @_1xx; |
|
165
|
13
|
100
|
|
|
|
30
|
if ( $n1xx > 1 ) { |
|
166
|
1
|
|
|
|
|
7
|
$self->warn( "1XX: Only one 1XX tag is allowed, but I found $n1xx of them." ); |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
|
|
169
|
13
|
50
|
|
|
|
32
|
if ( not $marc->field( 245 ) ) { |
|
170
|
0
|
|
|
|
|
0
|
$self->warn( "245: No 245 tag." ); |
|
171
|
|
|
|
|
|
|
} |
|
172
|
|
|
|
|
|
|
|
|
173
|
|
|
|
|
|
|
|
|
174
|
13
|
|
|
|
|
630
|
my %field_seen; |
|
175
|
13
|
|
|
|
|
17
|
my $rules = $self->{_rules}; |
|
176
|
13
|
|
|
|
|
52
|
for my $field ( $marc->fields ) { |
|
177
|
206
|
|
|
|
|
411
|
my $tagno = $field->tag; |
|
178
|
|
|
|
|
|
|
|
|
179
|
206
|
|
|
|
|
559
|
my $tagrules = ''; |
|
180
|
|
|
|
|
|
|
#if 880 field, inherit rules from tagno in subfield _6 |
|
181
|
206
|
|
|
|
|
168
|
my $is_880 = 0; |
|
182
|
206
|
100
|
|
|
|
255
|
if ($tagno eq '880') { |
|
183
|
1
|
|
|
|
|
1
|
$is_880 = 1; |
|
184
|
1
|
50
|
|
|
|
4
|
if ($field->subfield('6')) { |
|
185
|
1
|
|
|
|
|
15
|
my $sub6 = $field->subfield('6'); |
|
186
|
1
|
|
|
|
|
14
|
$tagno = substr($sub6, 0, 3); |
|
187
|
|
|
|
|
|
|
|
|
188
|
1
|
50
|
|
|
|
3
|
$tagrules = $rules->{$tagno} or next; |
|
189
|
|
|
|
|
|
|
#880 is repeatable, but its linked field may not be |
|
190
|
1
|
50
|
33
|
|
|
10
|
if ( ($tagrules->{'repeatable'} && ( $tagrules->{'repeatable'} eq 'NR' )) && $field_seen{'880.'.$tagno} ) { |
|
|
|
|
33
|
|
|
|
|
|
191
|
0
|
|
|
|
|
0
|
$self->warn( "$tagno: Field is not repeatable." ); |
|
192
|
|
|
|
|
|
|
} #if repeatability |
|
193
|
|
|
|
|
|
|
} #if subfield 6 present |
|
194
|
|
|
|
|
|
|
else { |
|
195
|
0
|
|
|
|
|
0
|
$self->warn( "880: No subfield 6." ); |
|
196
|
|
|
|
|
|
|
} #else no subfield 6 in 880 field |
|
197
|
|
|
|
|
|
|
} #if this is 880 field |
|
198
|
|
|
|
|
|
|
else { |
|
199
|
205
|
100
|
|
|
|
436
|
$tagrules = $rules->{$tagno} or next; |
|
200
|
|
|
|
|
|
|
|
|
201
|
204
|
50
|
66
|
|
|
959
|
if ( ($tagrules->{'repeatable'} && ( $tagrules->{'repeatable'} eq 'NR' )) && $field_seen{$tagno} ) { |
|
|
|
|
66
|
|
|
|
|
|
202
|
0
|
|
|
|
|
0
|
$self->warn( "$tagno: Field is not repeatable." ); |
|
203
|
|
|
|
|
|
|
} #if repeatability |
|
204
|
|
|
|
|
|
|
} #else not 880 |
|
205
|
|
|
|
|
|
|
|
|
206
|
205
|
100
|
|
|
|
353
|
if ( $tagno >= 10 ) { |
|
|
|
50
|
|
|
|
|
|
|
207
|
158
|
|
|
|
|
185
|
for my $ind ( 1..2 ) { |
|
208
|
316
|
|
|
|
|
515
|
my $indvalue = $field->indicator($ind); |
|
209
|
316
|
100
|
|
|
|
3233
|
if ( not ($indvalue =~ $tagrules->{"ind$ind" . "_regex"}) ) { |
|
210
|
4
|
|
|
|
|
20
|
$self->warn( |
|
211
|
|
|
|
|
|
|
"$tagno: Indicator $ind must be ", |
|
212
|
|
|
|
|
|
|
$tagrules->{"ind$ind" . "_desc"}, |
|
213
|
|
|
|
|
|
|
" but it's \"$indvalue\"" |
|
214
|
|
|
|
|
|
|
); |
|
215
|
|
|
|
|
|
|
} |
|
216
|
|
|
|
|
|
|
} # for |
|
217
|
|
|
|
|
|
|
|
|
218
|
158
|
|
|
|
|
160
|
my %sub_seen; |
|
219
|
158
|
|
|
|
|
266
|
for my $subfield ( $field->subfields ) { |
|
220
|
262
|
|
|
|
|
1887
|
my ($code,$data) = @$subfield; |
|
221
|
|
|
|
|
|
|
|
|
222
|
262
|
|
|
|
|
336
|
my $rule = $tagrules->{$code}; |
|
223
|
262
|
100
|
100
|
|
|
901
|
if ( not defined $rule ) { |
|
|
|
100
|
|
|
|
|
|
|
224
|
2
|
|
|
|
|
6
|
$self->warn( "$tagno: Subfield _$code is not allowed." ); |
|
225
|
|
|
|
|
|
|
} elsif ( ($rule eq "NR") && $sub_seen{$code} ) { |
|
226
|
1
|
|
|
|
|
4
|
$self->warn( "$tagno: Subfield _$code is not repeatable." ); |
|
227
|
|
|
|
|
|
|
} |
|
228
|
|
|
|
|
|
|
|
|
229
|
262
|
50
|
|
|
|
491
|
if ( $data =~ /[\t\r\n]/ ) { |
|
230
|
0
|
|
|
|
|
0
|
$self->warn( "$tagno: Subfield _$code has an invalid control character" ); |
|
231
|
|
|
|
|
|
|
} |
|
232
|
|
|
|
|
|
|
|
|
233
|
262
|
|
|
|
|
483
|
++$sub_seen{$code}; |
|
234
|
|
|
|
|
|
|
} # for $subfields |
|
235
|
|
|
|
|
|
|
} # if $tagno >= 10 |
|
236
|
|
|
|
|
|
|
|
|
237
|
|
|
|
|
|
|
elsif ($tagno < 10) { |
|
238
|
|
|
|
|
|
|
#check for subfield characters |
|
239
|
47
|
100
|
|
|
|
82
|
if ($field->data() =~ /\x1F/) { |
|
240
|
1
|
|
|
|
|
10
|
$self->warn( "$tagno: Subfields are not allowed in fields lower than 010" ); |
|
241
|
|
|
|
|
|
|
} #if control field has subfield delimiter |
|
242
|
|
|
|
|
|
|
} #elsif $tagno < 10 |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
# Check to see if a check_xxx() function exists, and call it on the field if it does |
|
245
|
205
|
|
|
|
|
571
|
my $checker = "check_$tagno"; |
|
246
|
205
|
100
|
|
|
|
649
|
if ( $self->can( $checker ) ) { |
|
247
|
25
|
|
|
|
|
50
|
$self->$checker( $field ); |
|
248
|
|
|
|
|
|
|
} |
|
249
|
|
|
|
|
|
|
|
|
250
|
205
|
100
|
|
|
|
2041
|
if ($is_880) { |
|
251
|
1
|
|
|
|
|
5
|
++$field_seen{'880.'.$tagno}; |
|
252
|
|
|
|
|
|
|
} #if 880 field |
|
253
|
|
|
|
|
|
|
else { |
|
254
|
204
|
|
|
|
|
306
|
++$field_seen{$tagno}; |
|
255
|
|
|
|
|
|
|
} |
|
256
|
|
|
|
|
|
|
} # for my $fields |
|
257
|
|
|
|
|
|
|
|
|
258
|
13
|
|
|
|
|
52
|
return; |
|
259
|
|
|
|
|
|
|
} |
|
260
|
|
|
|
|
|
|
|
|
261
|
|
|
|
|
|
|
=head2 check_I( $field ) |
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
Various functions to check the different fields. If the function doesn't exist, |
|
264
|
|
|
|
|
|
|
then it doesn't get checked. |
|
265
|
|
|
|
|
|
|
|
|
266
|
|
|
|
|
|
|
=head2 check_020() |
|
267
|
|
|
|
|
|
|
|
|
268
|
|
|
|
|
|
|
Looks at 020$a and reports errors if the check digit is wrong. |
|
269
|
|
|
|
|
|
|
Looks at 020$z and validates number if hyphens are present. |
|
270
|
|
|
|
|
|
|
|
|
271
|
|
|
|
|
|
|
Uses Business::ISBN to do validation. Thirteen digit checking is currently done |
|
272
|
|
|
|
|
|
|
with the internal sub _isbn13_check_digit(), based on code from Business::ISBN. |
|
273
|
|
|
|
|
|
|
|
|
274
|
|
|
|
|
|
|
TO DO (check_020): |
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
Fix 13-digit ISBN checking. |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
=cut |
|
279
|
|
|
|
|
|
|
|
|
280
|
|
|
|
|
|
|
sub check_020 { |
|
281
|
|
|
|
|
|
|
|
|
282
|
|
|
|
|
|
|
|
|
283
|
5
|
|
|
5
|
|
2866
|
use Business::ISBN; |
|
|
5
|
|
|
|
|
175542
|
|
|
|
5
|
|
|
|
|
14365
|
|
|
284
|
|
|
|
|
|
|
|
|
285
|
22
|
|
|
22
|
1
|
7742
|
my $self = shift; |
|
286
|
22
|
|
|
|
|
25
|
my $field = shift; |
|
287
|
|
|
|
|
|
|
|
|
288
|
|
|
|
|
|
|
################################################### |
|
289
|
|
|
|
|
|
|
|
|
290
|
|
|
|
|
|
|
# break subfields into code-data array and validate data |
|
291
|
|
|
|
|
|
|
|
|
292
|
22
|
|
|
|
|
42
|
my @subfields = $field->subfields(); |
|
293
|
|
|
|
|
|
|
|
|
294
|
22
|
|
|
|
|
259
|
while (my $subfield = pop(@subfields)) { |
|
295
|
22
|
|
|
|
|
33
|
my ($code, $data) = @$subfield; |
|
296
|
22
|
|
|
|
|
21
|
my $isbnno = $data; |
|
297
|
|
|
|
|
|
|
#remove any hyphens |
|
298
|
22
|
|
|
|
|
40
|
$isbnno =~ s/\-//g; |
|
299
|
|
|
|
|
|
|
#remove nondigits |
|
300
|
22
|
|
|
|
|
111
|
$isbnno =~ s/^\D*(\d{9,12}[X\d])\b.*$/$1/; |
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
#report error if this is subfield 'a' |
|
303
|
|
|
|
|
|
|
#and the first 10 or 13 characters are not a match for $isbnno |
|
304
|
22
|
100
|
|
|
|
53
|
if ($code eq 'a') { |
|
|
|
50
|
|
|
|
|
|
|
305
|
21
|
100
|
|
|
|
60
|
if ((substr($data,0,length($isbnno)) ne $isbnno)) { |
|
306
|
2
|
|
|
|
|
7
|
$self->warn( "020: Subfield a may have invalid characters."); |
|
307
|
|
|
|
|
|
|
} #if first characters don't match |
|
308
|
|
|
|
|
|
|
|
|
309
|
|
|
|
|
|
|
#report error if no space precedes a qualifier in subfield a |
|
310
|
21
|
100
|
|
|
|
52
|
if ($data =~ /\(/) { |
|
311
|
8
|
100
|
|
|
|
33
|
$self->warn( "020: Subfield a qualifier must be preceded by space, $data.") unless ($data =~ /[X0-9] \(/); |
|
312
|
|
|
|
|
|
|
} #if data has parenthetical qualifier |
|
313
|
|
|
|
|
|
|
|
|
314
|
|
|
|
|
|
|
#report error if unable to find 10-13 digit string of digits in subfield 'a' |
|
315
|
21
|
100
|
|
|
|
60
|
if (($isbnno !~ /(?:^\d{10}$)|(?:^\d{13}$)|(?:^\d{9}X$)/)) { |
|
316
|
3
|
|
|
|
|
15
|
$self->warn( "020: Subfield a has the wrong number of digits, $data."); |
|
317
|
|
|
|
|
|
|
} # if subfield 'a' but not 10 or 13 digit isbn |
|
318
|
|
|
|
|
|
|
#otherwise, check 10 and 13 digit checksums for validity |
|
319
|
|
|
|
|
|
|
else { |
|
320
|
18
|
100
|
|
|
|
35
|
if ((length ($isbnno) == 10)) { |
|
|
|
50
|
|
|
|
|
|
|
321
|
|
|
|
|
|
|
|
|
322
|
16
|
50
|
|
|
|
29
|
if ($Business::ISBN::VERSION gt '2.02_01') { |
|
|
|
0
|
|
|
|
|
|
|
323
|
16
|
100
|
|
|
|
49
|
$self->warn( "020: Subfield a has bad checksum, $data." ) if (Business::ISBN::valid_isbn_checksum($isbnno) != 1); |
|
324
|
|
|
|
|
|
|
} #if Business::ISBN version higher than 2.02_01 |
|
325
|
|
|
|
|
|
|
elsif ($Business::ISBN::VERSION lt '2') { |
|
326
|
0
|
0
|
|
|
|
0
|
$self->warn( "020: Subfield a has bad checksum, $data." ) if (Business::ISBN::is_valid_checksum($isbnno) != 1); |
|
327
|
|
|
|
|
|
|
} #elsif Business::ISBN version lower than 2 |
|
328
|
|
|
|
|
|
|
else { |
|
329
|
0
|
|
|
|
|
0
|
$self->warn( "Business::ISBN version must be below 2 or above 2.02_02." ); |
|
330
|
|
|
|
|
|
|
} #else Business::ISBN version between 2 and 2.02_02 |
|
331
|
|
|
|
|
|
|
} #if 10 digit ISBN has invalid check digit |
|
332
|
|
|
|
|
|
|
# do validation check for 13 digit isbn |
|
333
|
|
|
|
|
|
|
######################################### |
|
334
|
|
|
|
|
|
|
### Not yet fully implemented ########### |
|
335
|
|
|
|
|
|
|
######################################### |
|
336
|
|
|
|
|
|
|
elsif (length($isbnno) == 13){ |
|
337
|
|
|
|
|
|
|
#change line below once Business::ISBN handles 13-digit ISBNs |
|
338
|
2
|
|
|
|
|
5
|
my $is_valid_13 = _isbn13_check_digit($isbnno); |
|
339
|
2
|
100
|
|
|
|
12
|
$self->warn( "020: Subfield a has bad checksum (13 digit), $data.") unless ($is_valid_13 == 1); |
|
340
|
|
|
|
|
|
|
} #elsif 13 digit ISBN has invalid check digit |
|
341
|
|
|
|
|
|
|
################################################### |
|
342
|
|
|
|
|
|
|
} #else subfield 'a' has 10 or 13 digits |
|
343
|
|
|
|
|
|
|
} #if subfield 'a' |
|
344
|
|
|
|
|
|
|
#look for valid isbn in 020$z |
|
345
|
|
|
|
|
|
|
elsif ($code eq 'z') { |
|
346
|
1
|
50
|
33
|
|
|
12
|
if (($data =~ /^ISBN/) || ($data =~ /^\d*\-\d+/)){ |
|
347
|
|
|
|
|
|
|
################################################## |
|
348
|
|
|
|
|
|
|
## Turned on for now--Comment to unimplement #### |
|
349
|
|
|
|
|
|
|
################################################## |
|
350
|
0
|
0
|
0
|
|
|
0
|
$self->warn( "020: Subfield z is numerically valid.") if ((length ($isbnno) == 10) && (Business::ISBN::is_valid_checksum($isbnno) == 1)); |
|
351
|
|
|
|
|
|
|
} #if 10 digit ISBN has invalid check digit |
|
352
|
|
|
|
|
|
|
} #elsif subfield 'z' |
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
} # while @subfields |
|
355
|
|
|
|
|
|
|
|
|
356
|
|
|
|
|
|
|
} #check_020 |
|
357
|
|
|
|
|
|
|
|
|
358
|
|
|
|
|
|
|
=head2 _isbn13_check_digit($ean) |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
Internal sub to determine if 13-digit ISBN has a valid checksum. The code is |
|
361
|
|
|
|
|
|
|
taken from Business::ISBN::as_ean. It is expected to be temporary until |
|
362
|
|
|
|
|
|
|
Business::ISBN is updated to check 13-digit ISBNs itself. |
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
=cut |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
sub _isbn13_check_digit { |
|
367
|
|
|
|
|
|
|
|
|
368
|
2
|
|
|
2
|
|
3
|
my $ean = shift; |
|
369
|
|
|
|
|
|
|
#remove and store current check digit |
|
370
|
2
|
|
|
|
|
5
|
my $check_digit = chop($ean); |
|
371
|
|
|
|
|
|
|
|
|
372
|
|
|
|
|
|
|
#calculate valid checksum |
|
373
|
2
|
|
|
|
|
3
|
my $sum = 0; |
|
374
|
2
|
|
|
|
|
3
|
foreach my $index ( 0, 2, 4, 6, 8, 10 ) |
|
375
|
|
|
|
|
|
|
{ |
|
376
|
12
|
|
|
|
|
12
|
$sum += substr($ean, $index, 1); |
|
377
|
12
|
|
|
|
|
14
|
$sum += 3 * substr($ean, $index + 1, 1); |
|
378
|
|
|
|
|
|
|
} |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
#take the next higher multiple of 10 and subtract the sum. |
|
381
|
|
|
|
|
|
|
#if $sum is 37, the next highest multiple of ten is 40. the |
|
382
|
|
|
|
|
|
|
#check digit would be 40 - 37 => 3. |
|
383
|
2
|
|
|
|
|
7
|
my $valid_check_digit = ( 10 * ( int( $sum / 10 ) + 1 ) - $sum ) % 10; |
|
384
|
|
|
|
|
|
|
|
|
385
|
2
|
100
|
|
|
|
6
|
return $check_digit == $valid_check_digit ? 1 : 0; |
|
386
|
|
|
|
|
|
|
|
|
387
|
|
|
|
|
|
|
} # _isbn13_check_digit |
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
######################################### |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=head2 check_041( $field ) |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
Warns if subfields are not evenly divisible by 3 unless second indicator is 7 |
|
394
|
|
|
|
|
|
|
(future implementation would ensure that each subfield is exactly 3 characters |
|
395
|
|
|
|
|
|
|
unless ind2 is 7--since subfields are now repeatable. This is not implemented |
|
396
|
|
|
|
|
|
|
here due to the large number of records needing to be corrected.). Validates |
|
397
|
|
|
|
|
|
|
against the MARC Code List for Languages (L) using the |
|
398
|
|
|
|
|
|
|
MARC::Lint::CodeData data pack to MARC::Lint (%LanguageCodes, |
|
399
|
|
|
|
|
|
|
%ObsoleteLanguageCodes). |
|
400
|
|
|
|
|
|
|
|
|
401
|
|
|
|
|
|
|
=cut |
|
402
|
|
|
|
|
|
|
|
|
403
|
|
|
|
|
|
|
sub check_041 { |
|
404
|
|
|
|
|
|
|
|
|
405
|
|
|
|
|
|
|
|
|
406
|
3
|
|
|
3
|
1
|
2964
|
my $self = shift; |
|
407
|
3
|
|
|
|
|
4
|
my $field = shift; |
|
408
|
|
|
|
|
|
|
|
|
409
|
|
|
|
|
|
|
# break subfields into code-data array (so the entire field is in one array) |
|
410
|
|
|
|
|
|
|
|
|
411
|
3
|
|
|
|
|
13
|
my @subfields = $field->subfields(); |
|
412
|
3
|
|
|
|
|
49
|
my @newsubfields = (); |
|
413
|
|
|
|
|
|
|
|
|
414
|
3
|
|
|
|
|
11
|
while (my $subfield = pop(@subfields)) { |
|
415
|
7
|
|
|
|
|
10
|
my ($code, $data) = @$subfield; |
|
416
|
7
|
|
|
|
|
21
|
unshift (@newsubfields, $code, $data); |
|
417
|
|
|
|
|
|
|
} # while |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
#warn if length of each subfield is not divisible by 3 unless ind2 is 7 |
|
420
|
3
|
50
|
|
|
|
10
|
unless ($field->indicator(2) eq '7') { |
|
421
|
3
|
|
|
|
|
38
|
for (my $index = 0; $index <=$#newsubfields; $index+=2) { |
|
422
|
7
|
100
|
|
|
|
32
|
if (length ($newsubfields[$index+1]) %3 != 0) { |
|
423
|
3
|
|
|
|
|
10
|
$self->warn( "041: Subfield _$newsubfields[$index] must be evenly divisible by 3 or exactly three characters if ind2 is not 7, ($newsubfields[$index+1])." ); |
|
424
|
|
|
|
|
|
|
} #if field length not divisible evenly by 3 |
|
425
|
|
|
|
|
|
|
############################################## |
|
426
|
|
|
|
|
|
|
# validation against code list data |
|
427
|
|
|
|
|
|
|
## each subfield has a multiple of 3 chars |
|
428
|
|
|
|
|
|
|
# need to look at each group of 3 characters |
|
429
|
|
|
|
|
|
|
else { |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
#break each character of the subfield into an array position |
|
432
|
4
|
|
|
|
|
15
|
my @codechars = split '', $newsubfields[$index+1]; |
|
433
|
|
|
|
|
|
|
|
|
434
|
4
|
|
|
|
|
6
|
my $pos = 0; |
|
435
|
|
|
|
|
|
|
#store each 3 char code in a slot of @codes041 |
|
436
|
4
|
|
|
|
|
4
|
my @codes041 = (); |
|
437
|
4
|
|
|
|
|
10
|
while ($pos <= $#codechars) { |
|
438
|
6
|
|
|
|
|
16
|
push @codes041, (join '', @codechars[$pos..$pos+2]); |
|
439
|
6
|
|
|
|
|
11
|
$pos += 3; |
|
440
|
|
|
|
|
|
|
} |
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
|
|
443
|
4
|
|
|
|
|
7
|
foreach my $code041 (@codes041) { |
|
444
|
|
|
|
|
|
|
#see if language code matches valid code |
|
445
|
6
|
50
|
|
|
|
14
|
my $validlang = $LanguageCodes{$code041} ? 1 : 0; |
|
446
|
|
|
|
|
|
|
#look for invalid code match if valid code was not matched |
|
447
|
6
|
100
|
|
|
|
13
|
my $obsoletelang = $ObsoleteLanguageCodes{$code041} ? 1 : 0; |
|
448
|
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
# skip valid subfields |
|
450
|
6
|
50
|
|
|
|
12
|
unless ($validlang) { |
|
451
|
|
|
|
|
|
|
#report invalid matches as possible obsolete codes |
|
452
|
6
|
100
|
|
|
|
9
|
if ($obsoletelang) { |
|
453
|
1
|
|
|
|
|
6
|
$self->warn( "041: Subfield _$newsubfields[$index], $newsubfields[$index+1], may be obsolete."); |
|
454
|
|
|
|
|
|
|
} |
|
455
|
|
|
|
|
|
|
else { |
|
456
|
5
|
|
|
|
|
21
|
$self->warn( "041: Subfield _$newsubfields[$index], $newsubfields[$index+1] ($code041), is not valid."); |
|
457
|
|
|
|
|
|
|
} #else code not found |
|
458
|
|
|
|
|
|
|
} # unless found valid code |
|
459
|
|
|
|
|
|
|
} #foreach code in 041 |
|
460
|
|
|
|
|
|
|
} # else subfield has multiple of 3 chars |
|
461
|
|
|
|
|
|
|
############################################## |
|
462
|
|
|
|
|
|
|
} # foreach subfield |
|
463
|
|
|
|
|
|
|
} #unless ind2 is 7 |
|
464
|
|
|
|
|
|
|
} #check_041 |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
=head2 check_043( $field ) |
|
467
|
|
|
|
|
|
|
|
|
468
|
|
|
|
|
|
|
Warns if each subfield a is not exactly 7 characters. Validates each code |
|
469
|
|
|
|
|
|
|
against the MARC code list for Geographic Areas (L) |
|
470
|
|
|
|
|
|
|
using the MARC::Lint::CodeData data pack to MARC::Lint (%GeogAreaCodes, |
|
471
|
|
|
|
|
|
|
%ObsoleteGeogAreaCodes). |
|
472
|
|
|
|
|
|
|
|
|
473
|
|
|
|
|
|
|
=cut |
|
474
|
|
|
|
|
|
|
|
|
475
|
|
|
|
|
|
|
sub check_043 { |
|
476
|
|
|
|
|
|
|
|
|
477
|
2
|
|
|
2
|
1
|
2111
|
my $self = shift; |
|
478
|
2
|
|
|
|
|
10
|
my $field = shift; |
|
479
|
|
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
# break subfields into code-data array (so the entire field is in one array) |
|
481
|
|
|
|
|
|
|
|
|
482
|
2
|
|
|
|
|
7
|
my @subfields = $field->subfields(); |
|
483
|
2
|
|
|
|
|
29
|
my @newsubfields = (); |
|
484
|
|
|
|
|
|
|
|
|
485
|
2
|
|
|
|
|
8
|
while (my $subfield = pop(@subfields)) { |
|
486
|
5
|
|
|
|
|
8
|
my ($code, $data) = @$subfield; |
|
487
|
5
|
|
|
|
|
18
|
unshift (@newsubfields, $code, $data); |
|
488
|
|
|
|
|
|
|
} # while |
|
489
|
|
|
|
|
|
|
|
|
490
|
|
|
|
|
|
|
#warn if length of subfield a is not exactly 7 |
|
491
|
2
|
|
|
|
|
10
|
for (my $index = 0; $index <=$#newsubfields; $index+=2) { |
|
492
|
5
|
100
|
66
|
|
|
32
|
if (($newsubfields[$index] eq 'a') && (length ($newsubfields[$index+1]) != 7)) { |
|
|
|
50
|
|
|
|
|
|
|
493
|
2
|
|
|
|
|
7
|
$self->warn( "043: Subfield _a must be exactly 7 characters, $newsubfields[$index+1]" ); |
|
494
|
|
|
|
|
|
|
} # if suba and length is not 7 |
|
495
|
|
|
|
|
|
|
#check against code list for geographic areas. |
|
496
|
|
|
|
|
|
|
elsif ($newsubfields[$index] eq 'a') { |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
#see if geog area code matches valid code |
|
499
|
3
|
50
|
|
|
|
11
|
my $validgac = $GeogAreaCodes{$newsubfields[$index+1]} ? 1 : 0; |
|
500
|
|
|
|
|
|
|
#look for obsolete code match if valid code was not matched |
|
501
|
3
|
100
|
|
|
|
10
|
my $obsoletegac = $ObsoleteGeogAreaCodes{$newsubfields[$index+1]} ? 1 : 0; |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
# skip valid subfields |
|
504
|
3
|
50
|
|
|
|
12
|
unless ($validgac) { |
|
505
|
|
|
|
|
|
|
#report invalid matches as possible obsolete codes |
|
506
|
3
|
100
|
|
|
|
6
|
if ($obsoletegac) { |
|
507
|
1
|
|
|
|
|
4
|
$self->warn( "043: Subfield _a, $newsubfields[$index+1], may be obsolete."); |
|
508
|
|
|
|
|
|
|
} |
|
509
|
|
|
|
|
|
|
else { |
|
510
|
2
|
|
|
|
|
12
|
$self->warn( "043: Subfield _a, $newsubfields[$index+1], is not valid."); |
|
511
|
|
|
|
|
|
|
} #else code not found |
|
512
|
|
|
|
|
|
|
} # unless found valid code |
|
513
|
|
|
|
|
|
|
|
|
514
|
|
|
|
|
|
|
} #elsif suba |
|
515
|
|
|
|
|
|
|
} #foreach subfield |
|
516
|
|
|
|
|
|
|
} #check_043 |
|
517
|
|
|
|
|
|
|
|
|
518
|
|
|
|
|
|
|
=head2 check_245( $field ) |
|
519
|
|
|
|
|
|
|
|
|
520
|
|
|
|
|
|
|
-Makes sure $a exists (and is first subfield). |
|
521
|
|
|
|
|
|
|
-Warns if last character of field is not a period |
|
522
|
|
|
|
|
|
|
--Follows LCRI 1.0C, Nov. 2003 rather than MARC21 rule |
|
523
|
|
|
|
|
|
|
-Verifies that $c is preceded by / (space-/) |
|
524
|
|
|
|
|
|
|
-Verifies that initials in $c are not spaced |
|
525
|
|
|
|
|
|
|
-Verifies that $b is preceded by :;= (space-colon, space-semicolon, space-equals) |
|
526
|
|
|
|
|
|
|
-Verifies that $h is not preceded by space unless it is dash-space |
|
527
|
|
|
|
|
|
|
-Verifies that data of $h is enclosed in square brackets |
|
528
|
|
|
|
|
|
|
-Verifies that $n is preceded by . (period) |
|
529
|
|
|
|
|
|
|
--As part of that, looks for no-space period, or dash-space-period (for replaced elipses) |
|
530
|
|
|
|
|
|
|
-Verifies that $p is preceded by , (no-space-comma) when following $n and . (period) when following other subfields. |
|
531
|
|
|
|
|
|
|
-Performs rudimentary article check of 245 2nd indicator vs. 1st word of 245$a (for manual verification). |
|
532
|
|
|
|
|
|
|
|
|
533
|
|
|
|
|
|
|
Article checking is done by internal _check_article method, which should work for 130, 240, 245, 440, 630, 730, and 830. |
|
534
|
|
|
|
|
|
|
|
|
535
|
|
|
|
|
|
|
=cut |
|
536
|
|
|
|
|
|
|
|
|
537
|
|
|
|
|
|
|
sub check_245 { |
|
538
|
|
|
|
|
|
|
|
|
539
|
49
|
|
|
49
|
1
|
24709
|
my $self = shift; |
|
540
|
49
|
|
|
|
|
55
|
my $field = shift; |
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
#set tagno for reporting |
|
543
|
49
|
|
|
|
|
52
|
my $tagno = '245'; |
|
544
|
|
|
|
|
|
|
|
|
545
|
49
|
100
|
|
|
|
107
|
if ( not $field->subfield( "a" ) ) { |
|
546
|
1
|
|
|
|
|
16
|
$self->warn( "245: Must have a subfield _a." ); |
|
547
|
|
|
|
|
|
|
} |
|
548
|
|
|
|
|
|
|
|
|
549
|
|
|
|
|
|
|
# break subfields into code-data array (so the entire field is in one array) |
|
550
|
|
|
|
|
|
|
|
|
551
|
49
|
|
|
|
|
801
|
my @subfields = $field->subfields(); |
|
552
|
49
|
|
|
|
|
546
|
my @newsubfields = (); |
|
553
|
49
|
|
|
|
|
47
|
my $has_sub_6 = 0; |
|
554
|
|
|
|
|
|
|
|
|
555
|
49
|
|
|
|
|
93
|
while (my $subfield = pop(@subfields)) { |
|
556
|
90
|
|
|
|
|
98
|
my ($code, $data) = @$subfield; |
|
557
|
|
|
|
|
|
|
#check for subfield 6 being present |
|
558
|
90
|
100
|
|
|
|
131
|
$has_sub_6 = 1 if ($code eq '6'); |
|
559
|
90
|
|
|
|
|
239
|
unshift (@newsubfields, $code, $data); |
|
560
|
|
|
|
|
|
|
} # while |
|
561
|
|
|
|
|
|
|
|
|
562
|
|
|
|
|
|
|
# 245 must end in period (may want to make this less restrictive by allowing trailing spaces) |
|
563
|
|
|
|
|
|
|
#do 2 checks--for final punctuation (MARC21 rule), and for period (LCRI 1.0C, Nov. 2003; LCPS 1.7.1) |
|
564
|
49
|
100
|
|
|
|
261
|
if ($newsubfields[$#newsubfields] !~ /[.?!]$/) { |
|
|
|
100
|
|
|
|
|
|
|
565
|
1
|
|
|
|
|
2
|
$self->warn ( "245: Must end with . (period)."); |
|
566
|
|
|
|
|
|
|
} |
|
567
|
|
|
|
|
|
|
elsif($newsubfields[$#newsubfields] =~ /[?!]$/) { |
|
568
|
2
|
|
|
|
|
4
|
$self->warn ( "245: MARC21 allows ? or ! as final punctuation but LCRI 1.0C, Nov. 2003 (LCPS 1.7.1 for RDA records), requires period."); |
|
569
|
|
|
|
|
|
|
} |
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
##Check for first subfield |
|
572
|
|
|
|
|
|
|
#subfield a should be first subfield (or 2nd if subfield '6' is present) |
|
573
|
49
|
100
|
|
|
|
64
|
if ($has_sub_6) { |
|
574
|
|
|
|
|
|
|
#make sure there are at least 2 subfields |
|
575
|
2
|
50
|
|
|
|
4
|
if ($#newsubfields < 3) { |
|
576
|
0
|
|
|
|
|
0
|
$self->warn ("$tagno: May have too few subfields."); |
|
577
|
|
|
|
|
|
|
} #if fewer than 2 subfields |
|
578
|
|
|
|
|
|
|
else { |
|
579
|
2
|
50
|
|
|
|
5
|
if ($newsubfields[0] ne '6') { |
|
580
|
0
|
|
|
|
|
0
|
$self->warn ( "$tagno: First subfield must be _6, but it is $newsubfields[0]"); |
|
581
|
|
|
|
|
|
|
} #if 1st subfield not '6' |
|
582
|
2
|
50
|
|
|
|
4
|
if ($newsubfields[2] ne 'a') { |
|
583
|
0
|
|
|
|
|
0
|
$self->warn ( "$tagno: First subfield after subfield _6 must be _a, but it is _$newsubfields[2]"); |
|
584
|
|
|
|
|
|
|
} #if 2nd subfield not 'a' |
|
585
|
|
|
|
|
|
|
} #else at least 2 subfields |
|
586
|
|
|
|
|
|
|
} #if has subfield 6 |
|
587
|
|
|
|
|
|
|
else { |
|
588
|
|
|
|
|
|
|
#1st subfield must be 'a' |
|
589
|
47
|
100
|
|
|
|
87
|
if ($newsubfields[0] ne 'a') { |
|
590
|
1
|
|
|
|
|
5
|
$self->warn ( "$tagno: First subfield must be _a, but it is _$newsubfields[0]"); |
|
591
|
|
|
|
|
|
|
} #if 2nd subfield not 'a' |
|
592
|
|
|
|
|
|
|
} #else no subfield _6 |
|
593
|
|
|
|
|
|
|
##End check for first subfield |
|
594
|
|
|
|
|
|
|
|
|
595
|
|
|
|
|
|
|
#subfield c, if present, must be preceded by / |
|
596
|
|
|
|
|
|
|
#also look for space between initials |
|
597
|
49
|
100
|
|
|
|
97
|
if ($field->subfield("c")) { |
|
598
|
|
|
|
|
|
|
|
|
599
|
14
|
|
|
|
|
216
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
|
600
|
|
|
|
|
|
|
# 245 subfield c must be preceded by / (space-/) |
|
601
|
17
|
100
|
|
|
|
36
|
if ($newsubfields[$index] eq 'c') { |
|
602
|
14
|
100
|
|
|
|
54
|
$self->warn ( "245: Subfield _c must be preceded by /") if ($newsubfields[$index-1] !~ /\s\/$/); |
|
603
|
|
|
|
|
|
|
# 245 subfield c initials should not have space |
|
604
|
14
|
100
|
66
|
|
|
48
|
$self->warn ( "245: Subfield _c initials should not have a space.") if (($newsubfields[$index+1] =~ /\b\w\. \b\w\./) && ($newsubfields[$index+1] !~ /\[\bi\.e\. \b\w\..*\]/)); |
|
605
|
14
|
|
|
|
|
20
|
last; |
|
606
|
|
|
|
|
|
|
} #if |
|
607
|
|
|
|
|
|
|
} #for |
|
608
|
|
|
|
|
|
|
} # subfield c exists |
|
609
|
|
|
|
|
|
|
|
|
610
|
|
|
|
|
|
|
#each subfield b, if present, should be preceded by :;= (colon, semicolon, or equals sign) |
|
611
|
|
|
|
|
|
|
### Are there others? ### |
|
612
|
49
|
100
|
|
|
|
470
|
if ($field->subfield("b")) { |
|
613
|
|
|
|
|
|
|
|
|
614
|
|
|
|
|
|
|
# 245 subfield b should be preceded by space-:;= (colon, semicolon, or equals sign) |
|
615
|
13
|
|
|
|
|
184
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
|
616
|
|
|
|
|
|
|
#report error if subfield 'b' is not preceded by space-:;= (colon, semicolon, or equals sign) |
|
617
|
16
|
100
|
100
|
|
|
106
|
if (($newsubfields[$index] eq 'b') && ($newsubfields[$index-1] !~ / [:;=]$/)) { |
|
618
|
4
|
|
|
|
|
7
|
$self->warn ( "245: Subfield _b should be preceded by space-colon, space-semicolon, or space-equals sign."); |
|
619
|
|
|
|
|
|
|
} #if |
|
620
|
|
|
|
|
|
|
} #for |
|
621
|
|
|
|
|
|
|
} # subfield b exists |
|
622
|
|
|
|
|
|
|
|
|
623
|
|
|
|
|
|
|
|
|
624
|
|
|
|
|
|
|
#each subfield h, if present, should be preceded by non-space |
|
625
|
49
|
100
|
|
|
|
444
|
if ($field->subfield("h")) { |
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
# 245 subfield h should not be preceded by space |
|
628
|
4
|
|
|
|
|
59
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
|
629
|
|
|
|
|
|
|
#report error if subfield 'h' is preceded by space (unless dash-space) |
|
630
|
6
|
100
|
100
|
|
|
42
|
if (($newsubfields[$index] eq 'h') && ($newsubfields[$index-1] !~ /(\S$)|(\-\- $)/)) { |
|
631
|
1
|
|
|
|
|
2
|
$self->warn ( "245: Subfield _h should not be preceded by space."); |
|
632
|
|
|
|
|
|
|
} #if h and not preceded by no-space (unless dash) |
|
633
|
|
|
|
|
|
|
#report error if subfield 'h' does not start with open square bracket with a matching close bracket |
|
634
|
|
|
|
|
|
|
##could have check against list of valid values here |
|
635
|
6
|
100
|
100
|
|
|
38
|
if (($newsubfields[$index] eq 'h') && ($newsubfields[$index+1] !~ /^\[\w*\s*\w*\]/)) { |
|
636
|
1
|
|
|
|
|
5
|
$self->warn ( "245: Subfield _h must have matching square brackets, $newsubfields[$index]."); |
|
637
|
|
|
|
|
|
|
} |
|
638
|
|
|
|
|
|
|
} #for |
|
639
|
|
|
|
|
|
|
} # subfield h exists |
|
640
|
|
|
|
|
|
|
|
|
641
|
|
|
|
|
|
|
#each subfield n, if present, must be preceded by . (period) |
|
642
|
49
|
100
|
|
|
|
545
|
if ($field->subfield("n")) { |
|
643
|
|
|
|
|
|
|
|
|
644
|
|
|
|
|
|
|
# 245 subfield n must be preceded by . (period) |
|
645
|
4
|
|
|
|
|
54
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
|
646
|
|
|
|
|
|
|
#report error if subfield 'n' is not preceded by non-space-period or dash-space-period |
|
647
|
6
|
100
|
100
|
|
|
42
|
if (($newsubfields[$index] eq 'n') && ($newsubfields[$index-1] !~ /(\S\.$)|(\-\- \.$)/)) { |
|
648
|
1
|
|
|
|
|
3
|
$self->warn ( "245: Subfield _n must be preceded by . (period)."); |
|
649
|
|
|
|
|
|
|
} #if |
|
650
|
|
|
|
|
|
|
} #for |
|
651
|
|
|
|
|
|
|
} # subfield n exists |
|
652
|
|
|
|
|
|
|
|
|
653
|
|
|
|
|
|
|
#each subfield p, if present, must be preceded by a , (no-space-comma) if it follows subfield n, or by . (no-space-period or dash-space-period) following other subfields |
|
654
|
49
|
100
|
|
|
|
534
|
if ($field->subfield("p")) { |
|
655
|
|
|
|
|
|
|
|
|
656
|
|
|
|
|
|
|
# 245 subfield p must be preceded by . (period) or , (comma) |
|
657
|
4
|
|
|
|
|
54
|
for (my $index = 2; $index <=$#newsubfields; $index+=2) { |
|
658
|
|
|
|
|
|
|
#only looking for subfield p |
|
659
|
6
|
100
|
|
|
|
38
|
if ($newsubfields[$index] eq 'p') { |
|
660
|
|
|
|
|
|
|
# case for subfield 'n' being field before this one (allows dash-space-comma) |
|
661
|
4
|
100
|
100
|
|
|
38
|
if (($newsubfields[$index-2] eq 'n') && ($newsubfields[$index-1] !~ /(\S,$)|(\-\- ,$)/)) { |
|
|
|
100
|
100
|
|
|
|
|
|
662
|
1
|
|
|
|
|
2
|
$self->warn ( "245: Subfield _p must be preceded by , (comma) when it follows subfield _n."); |
|
663
|
|
|
|
|
|
|
} #if subfield n precedes this one |
|
664
|
|
|
|
|
|
|
# elsif case for subfield before this one is not n |
|
665
|
|
|
|
|
|
|
elsif (($newsubfields[$index-2] ne 'n') && ($newsubfields[$index-1] !~ /(\S\.$)|(\-\- \.$)/)) { |
|
666
|
1
|
|
|
|
|
3
|
$self->warn ( "245: Subfield _p must be preceded by . (period) when it follows a subfield other than _n."); |
|
667
|
|
|
|
|
|
|
} #elsif subfield p preceded by non-period when following a non-subfield 'n' |
|
668
|
|
|
|
|
|
|
} #if index is looking at subfield p |
|
669
|
|
|
|
|
|
|
} #for |
|
670
|
|
|
|
|
|
|
} # subfield p exists |
|
671
|
|
|
|
|
|
|
|
|
672
|
|
|
|
|
|
|
###################################### |
|
673
|
|
|
|
|
|
|
#check for invalid 2nd indicator |
|
674
|
49
|
|
|
|
|
539
|
$self->_check_article($field); |
|
675
|
|
|
|
|
|
|
|
|
676
|
|
|
|
|
|
|
} # check_245 |
|
677
|
|
|
|
|
|
|
|
|
678
|
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
|
|
680
|
|
|
|
|
|
|
|
|
681
|
|
|
|
|
|
|
############ |
|
682
|
|
|
|
|
|
|
# Internal # |
|
683
|
|
|
|
|
|
|
############ |
|
684
|
|
|
|
|
|
|
|
|
685
|
|
|
|
|
|
|
=head2 _check_article |
|
686
|
|
|
|
|
|
|
|
|
687
|
|
|
|
|
|
|
Check of articles is based on code from Ian Hamilton. This version is more |
|
688
|
|
|
|
|
|
|
limited in that it focuses on English, Spanish, French, Italian and German |
|
689
|
|
|
|
|
|
|
articles. Certain possible articles have been removed if they are valid English |
|
690
|
|
|
|
|
|
|
non-articles. This version also disregards 008_language/041 codes and just uses |
|
691
|
|
|
|
|
|
|
the list of articles to provide warnings/suggestions. |
|
692
|
|
|
|
|
|
|
|
|
693
|
|
|
|
|
|
|
source for articles = L |
|
694
|
|
|
|
|
|
|
|
|
695
|
|
|
|
|
|
|
Should work with fields 130, 240, 245, 440, 630, 730, and 830. Reports error if |
|
696
|
|
|
|
|
|
|
another field is passed in. |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
=cut |
|
699
|
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
sub _check_article { |
|
701
|
|
|
|
|
|
|
|
|
702
|
49
|
|
|
49
|
|
51
|
my $self = shift; |
|
703
|
49
|
|
|
|
|
70
|
my $field = shift; |
|
704
|
|
|
|
|
|
|
|
|
705
|
|
|
|
|
|
|
#add articles here as needed |
|
706
|
|
|
|
|
|
|
##Some omitted due to similarity with valid words (e.g. the German 'die'). |
|
707
|
49
|
|
|
|
|
652
|
my %article = ( |
|
708
|
|
|
|
|
|
|
'a' => 'eng glg hun por', |
|
709
|
|
|
|
|
|
|
'an' => 'eng', |
|
710
|
|
|
|
|
|
|
'das' => 'ger', |
|
711
|
|
|
|
|
|
|
'dem' => 'ger', |
|
712
|
|
|
|
|
|
|
'der' => 'ger', |
|
713
|
|
|
|
|
|
|
'ein' => 'ger', |
|
714
|
|
|
|
|
|
|
'eine' => 'ger', |
|
715
|
|
|
|
|
|
|
'einem' => 'ger', |
|
716
|
|
|
|
|
|
|
'einen' => 'ger', |
|
717
|
|
|
|
|
|
|
'einer' => 'ger', |
|
718
|
|
|
|
|
|
|
'eines' => 'ger', |
|
719
|
|
|
|
|
|
|
'el' => 'spa', |
|
720
|
|
|
|
|
|
|
'en' => 'cat dan nor swe', |
|
721
|
|
|
|
|
|
|
'gl' => 'ita', |
|
722
|
|
|
|
|
|
|
'gli' => 'ita', |
|
723
|
|
|
|
|
|
|
'il' => 'ita mlt', |
|
724
|
|
|
|
|
|
|
'l' => 'cat fre ita mlt', |
|
725
|
|
|
|
|
|
|
'la' => 'cat fre ita spa', |
|
726
|
|
|
|
|
|
|
'las' => 'spa', |
|
727
|
|
|
|
|
|
|
'le' => 'fre ita', |
|
728
|
|
|
|
|
|
|
'les' => 'cat fre', |
|
729
|
|
|
|
|
|
|
'lo' => 'ita spa', |
|
730
|
|
|
|
|
|
|
'los' => 'spa', |
|
731
|
|
|
|
|
|
|
'os' => 'por', |
|
732
|
|
|
|
|
|
|
'the' => 'eng', |
|
733
|
|
|
|
|
|
|
'um' => 'por', |
|
734
|
|
|
|
|
|
|
'uma' => 'por', |
|
735
|
|
|
|
|
|
|
'un' => 'cat spa fre ita', |
|
736
|
|
|
|
|
|
|
'una' => 'cat spa ita', |
|
737
|
|
|
|
|
|
|
'une' => 'fre', |
|
738
|
|
|
|
|
|
|
'uno' => 'ita', |
|
739
|
|
|
|
|
|
|
); |
|
740
|
|
|
|
|
|
|
|
|
741
|
|
|
|
|
|
|
#add exceptions here as needed |
|
742
|
|
|
|
|
|
|
# may want to make keys lowercase |
|
743
|
49
|
|
|
|
|
393
|
my %exceptions = ( |
|
744
|
|
|
|
|
|
|
'A & E' => 1, |
|
745
|
|
|
|
|
|
|
'A & ' => 1, |
|
746
|
|
|
|
|
|
|
'A-' => 1, |
|
747
|
|
|
|
|
|
|
'A+' => 1, |
|
748
|
|
|
|
|
|
|
'A is ' => 1, |
|
749
|
|
|
|
|
|
|
'A isn\'t ' => 1, |
|
750
|
|
|
|
|
|
|
'A l\'' => 1, |
|
751
|
|
|
|
|
|
|
'A la ' => 1, |
|
752
|
|
|
|
|
|
|
'A posteriori' => 1, |
|
753
|
|
|
|
|
|
|
'A priori' => 1, |
|
754
|
|
|
|
|
|
|
'A to ' => 1, |
|
755
|
|
|
|
|
|
|
'El Nino' => 1, |
|
756
|
|
|
|
|
|
|
'El Salvador' => 1, |
|
757
|
|
|
|
|
|
|
'L is ' => 1, |
|
758
|
|
|
|
|
|
|
'L-' => 1, |
|
759
|
|
|
|
|
|
|
'La Salle' => 1, |
|
760
|
|
|
|
|
|
|
'Las Vegas' => 1, |
|
761
|
|
|
|
|
|
|
'Lo mein' => 1, |
|
762
|
|
|
|
|
|
|
'Los Alamos' => 1, |
|
763
|
|
|
|
|
|
|
'Los Angeles' => 1, |
|
764
|
|
|
|
|
|
|
); |
|
765
|
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
#get tagno to determine which indicator to check and for reporting |
|
767
|
49
|
|
|
|
|
100
|
my $tagno = $field->tag(); |
|
768
|
|
|
|
|
|
|
#retrieve tagno from subfield 6 if 880 field |
|
769
|
49
|
100
|
|
|
|
202
|
if ($tagno eq '880') { |
|
770
|
1
|
50
|
|
|
|
8
|
if ($field->subfield('6')) { |
|
771
|
1
|
|
|
|
|
15
|
my $sub6 = $field->subfield('6'); |
|
772
|
1
|
|
|
|
|
13
|
$tagno = substr($sub6, 0, 3); |
|
773
|
|
|
|
|
|
|
} #if subfield 6 |
|
774
|
|
|
|
|
|
|
} #if 880 field |
|
775
|
|
|
|
|
|
|
|
|
776
|
|
|
|
|
|
|
#$ind holds nonfiling character indicator value |
|
777
|
49
|
|
|
|
|
44
|
my $ind = ''; |
|
778
|
|
|
|
|
|
|
#$first_or_second holds which indicator is for nonfiling char value |
|
779
|
49
|
|
|
|
|
53
|
my $first_or_second = ''; |
|
780
|
49
|
50
|
|
|
|
288
|
if ($tagno !~ /^(?:130|240|245|440|630|730|830)$/) { |
|
|
|
50
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
781
|
0
|
|
|
|
|
0
|
print $tagno, " is not a valid field for article checking\n"; |
|
782
|
0
|
|
|
|
|
0
|
return; |
|
783
|
|
|
|
|
|
|
} #if field is not one of those checked for articles |
|
784
|
|
|
|
|
|
|
#130, 630, 730 => ind1 |
|
785
|
|
|
|
|
|
|
elsif ($tagno =~ /^(?:130|630|730)$/) { |
|
786
|
0
|
|
|
|
|
0
|
$ind = $field->indicator(1); |
|
787
|
0
|
|
|
|
|
0
|
$first_or_second = '1st'; |
|
788
|
|
|
|
|
|
|
} #if field is 130, 630, or 730 |
|
789
|
|
|
|
|
|
|
#240, 245, 440, 830 => ind2 |
|
790
|
|
|
|
|
|
|
elsif ($tagno =~ /^(?:240|245|440|830)$/) { |
|
791
|
49
|
|
|
|
|
94
|
$ind = $field->indicator(2); |
|
792
|
49
|
|
|
|
|
370
|
$first_or_second = '2nd'; |
|
793
|
|
|
|
|
|
|
} #if field is 240, 245, 440, or 830 |
|
794
|
|
|
|
|
|
|
|
|
795
|
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
#report non-numeric non-filing indicators as invalid |
|
797
|
49
|
50
|
|
|
|
115
|
$self->warn ( $tagno, ": Non-filing indicator is non-numeric" ) unless ($ind =~ /^[0-9]$/); |
|
798
|
|
|
|
|
|
|
#get subfield 'a' of the title field |
|
799
|
49
|
|
100
|
|
|
84
|
my $title = $field->subfield('a') || ''; |
|
800
|
|
|
|
|
|
|
|
|
801
|
|
|
|
|
|
|
|
|
802
|
49
|
|
|
|
|
615
|
my $char1_notalphanum = 0; |
|
803
|
|
|
|
|
|
|
#check for apostrophe, quote, bracket, or parenthesis, before first word |
|
804
|
|
|
|
|
|
|
#remove if found and add to non-word counter |
|
805
|
49
|
|
|
|
|
135
|
while ($title =~ /^["'\[\(*]/){ |
|
806
|
4
|
|
|
|
|
4
|
$char1_notalphanum++; |
|
807
|
4
|
|
|
|
|
16
|
$title =~ s/^["'\[\(*]//; |
|
808
|
|
|
|
|
|
|
} |
|
809
|
|
|
|
|
|
|
# split title into first word + rest on space, parens, bracket, apostrophe, quote, or hyphen |
|
810
|
49
|
|
|
|
|
183
|
my ($firstword, $separator, $etc) = $title =~ /^([^ \(\)\[\]'"\-]+)([ \(\)\[\]'"\-])?(.*)/i; |
|
811
|
49
|
100
|
|
|
|
85
|
$firstword = '' if ! defined( $firstword ); |
|
812
|
49
|
100
|
|
|
|
77
|
$separator = '' if ! defined( $separator ); |
|
813
|
49
|
100
|
|
|
|
68
|
$etc = '' if ! defined( $etc ); |
|
814
|
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
#get length of first word plus the number of chars removed above plus one for the separator |
|
816
|
49
|
|
|
|
|
61
|
my $nonfilingchars = length($firstword) + $char1_notalphanum + 1; |
|
817
|
|
|
|
|
|
|
|
|
818
|
|
|
|
|
|
|
#check to see if first word is an exception |
|
819
|
49
|
|
|
|
|
68
|
my $isan_exception = 0; |
|
820
|
49
|
|
|
|
|
164
|
$isan_exception = grep {$title =~ /^\Q$_\E/i} (keys %exceptions); |
|
|
980
|
|
|
|
|
4969
|
|
|
821
|
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
#lowercase chars of $firstword for comparison with article list |
|
823
|
49
|
|
|
|
|
101
|
$firstword = lc($firstword); |
|
824
|
|
|
|
|
|
|
|
|
825
|
49
|
|
|
|
|
45
|
my $isan_article = 0; |
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
#see if first word is in the list of articles and not an exception |
|
828
|
49
|
100
|
100
|
|
|
132
|
$isan_article = 1 if (($article{$firstword}) && !($isan_exception)); |
|
829
|
|
|
|
|
|
|
|
|
830
|
|
|
|
|
|
|
#if article then $nonfilingchars should match $ind |
|
831
|
49
|
100
|
|
|
|
65
|
if ($isan_article) { |
|
832
|
|
|
|
|
|
|
#account for quotes, apostrophes, parens, or brackets before 2nd word |
|
833
|
|
|
|
|
|
|
# if (($separator eq ' ') && ($etc =~ /^['"]/)) { |
|
834
|
9
|
100
|
66
|
|
|
39
|
if (($separator) && ($etc =~ /^[ \(\)\[\]'"\-]+/)) { |
|
835
|
4
|
|
|
|
|
16
|
while ($etc =~ /^[ "'\[\]\(\)*]/){ |
|
836
|
6
|
|
|
|
|
6
|
$nonfilingchars++; |
|
837
|
6
|
|
|
|
|
20
|
$etc =~ s/^[ "'\[\]\(\)*]//; |
|
838
|
|
|
|
|
|
|
} #while etc starts with nonfiling chars |
|
839
|
|
|
|
|
|
|
} #if separator defined and etc starts with nonfiling chars |
|
840
|
|
|
|
|
|
|
#special case for 'en' (unsure why) |
|
841
|
9
|
50
|
|
|
|
65
|
if ($firstword eq 'en') { |
|
|
|
100
|
|
|
|
|
|
|
842
|
0
|
0
|
0
|
|
|
0
|
$self->warn ( $tagno, ": First word, , $firstword, may be an article, check $first_or_second indicator ($ind)." ) unless (($ind eq '3') || ($ind eq '0')); |
|
843
|
|
|
|
|
|
|
} |
|
844
|
|
|
|
|
|
|
elsif ($nonfilingchars ne $ind) { |
|
845
|
3
|
|
|
|
|
11
|
$self->warn ( $tagno, ": First word, $firstword, may be an article, check $first_or_second indicator ($ind)." ); |
|
846
|
|
|
|
|
|
|
} #unless ind is same as length of first word and nonfiling characters |
|
847
|
|
|
|
|
|
|
} #if first word is in article list |
|
848
|
|
|
|
|
|
|
#not an article so warn if $ind is not 0 |
|
849
|
|
|
|
|
|
|
else { |
|
850
|
40
|
100
|
|
|
|
355
|
unless ($ind eq '0') { |
|
851
|
1
|
|
|
|
|
7
|
$self->warn ( $tagno, ": First word, $firstword, does not appear to be an article, check $first_or_second indicator ($ind)." ); |
|
852
|
|
|
|
|
|
|
} #unless ind is 0 |
|
853
|
|
|
|
|
|
|
} #else not in article list |
|
854
|
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
####################################### |
|
856
|
|
|
|
|
|
|
|
|
857
|
|
|
|
|
|
|
} #_check_article |
|
858
|
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
|
|
860
|
|
|
|
|
|
|
############ |
|
861
|
|
|
|
|
|
|
|
|
862
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
863
|
|
|
|
|
|
|
|
|
864
|
|
|
|
|
|
|
Check the docs for L. All software links are there. |
|
865
|
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
=head1 TODO |
|
867
|
|
|
|
|
|
|
|
|
868
|
|
|
|
|
|
|
=over 4 |
|
869
|
|
|
|
|
|
|
|
|
870
|
|
|
|
|
|
|
=item * Subfield 6 |
|
871
|
|
|
|
|
|
|
|
|
872
|
|
|
|
|
|
|
For subfield 6, it should always be the 1st subfield according to MARC 21 specifications. Perhaps a generic check should be added that warns if subfield 6 is not the 1st subfield. |
|
873
|
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
=item * Subfield 8. |
|
875
|
|
|
|
|
|
|
|
|
876
|
|
|
|
|
|
|
This subfield could be the 1st or 2nd subfield, so the code that checks for the 1st few subfields (check_245, check_250) should take that into account. |
|
877
|
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
=item * Subfield 9 |
|
879
|
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
This subfield is not officially allowed in MARC, since it is locally defined. Some way needs to be made to allow messages/warnings about this subfield to be turned off (or otherwise deal with records using/allowing locally defined subfield 9). |
|
881
|
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
=item * 008 length and presence check |
|
883
|
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
Currently, 008 validation is not implemented in MARC::Lint, but is left to MARC::Errorchecks. It might be useful if MARC::Lint's basic validation checks included a verification that the 008 exists and is exactly 40 characters long. Additional 008-related checking and byte validation would remain in MARC::Errorchecks. |
|
885
|
|
|
|
|
|
|
|
|
886
|
|
|
|
|
|
|
=item * ISBN and ISSN checking |
|
887
|
|
|
|
|
|
|
|
|
888
|
|
|
|
|
|
|
020 and 022 fields are validated with the C and |
|
889
|
|
|
|
|
|
|
C modules, respectively. Business::ISBN versions between 2 and |
|
890
|
|
|
|
|
|
|
2.02_01 are incompatible with MARC::Lint. |
|
891
|
|
|
|
|
|
|
|
|
892
|
|
|
|
|
|
|
=item * check_041 cleanup |
|
893
|
|
|
|
|
|
|
|
|
894
|
|
|
|
|
|
|
Splitting subfield code strings every 3 chars could probably be written more efficiently. |
|
895
|
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
=item * check_245 cleanup |
|
897
|
|
|
|
|
|
|
|
|
898
|
|
|
|
|
|
|
The article checking in particular. |
|
899
|
|
|
|
|
|
|
|
|
900
|
|
|
|
|
|
|
=item * Method for turning off checks |
|
901
|
|
|
|
|
|
|
|
|
902
|
|
|
|
|
|
|
Provide a way for users to skip checks more easily when using check_record, or a |
|
903
|
|
|
|
|
|
|
specific check_xxx method (e.g. skip article checking). |
|
904
|
|
|
|
|
|
|
|
|
905
|
|
|
|
|
|
|
=back |
|
906
|
|
|
|
|
|
|
|
|
907
|
|
|
|
|
|
|
=head1 LICENSE |
|
908
|
|
|
|
|
|
|
|
|
909
|
|
|
|
|
|
|
This code may be distributed under the same terms as Perl itself. |
|
910
|
|
|
|
|
|
|
|
|
911
|
|
|
|
|
|
|
Please note that these modules are not products of or supported by the |
|
912
|
|
|
|
|
|
|
employers of the various contributors to the code. |
|
913
|
|
|
|
|
|
|
|
|
914
|
|
|
|
|
|
|
=cut |
|
915
|
|
|
|
|
|
|
|
|
916
|
|
|
|
|
|
|
# Used only to read the stuff from __DATA__ |
|
917
|
|
|
|
|
|
|
sub _read_rules { |
|
918
|
6
|
|
|
6
|
|
11
|
my $self = shift; |
|
919
|
|
|
|
|
|
|
|
|
920
|
6
|
|
|
|
|
19
|
my $tell = tell(DATA); # Stash the position so we can reset it for next time |
|
921
|
|
|
|
|
|
|
|
|
922
|
6
|
|
|
|
|
23
|
local $/ = ""; |
|
923
|
6
|
|
|
|
|
90
|
while ( my $tagblock = ) { |
|
924
|
1422
|
|
|
|
|
6345
|
my @lines = split( /\n/, $tagblock ); |
|
925
|
1422
|
|
|
|
|
25796
|
s/\s+$// for @lines; |
|
926
|
|
|
|
|
|
|
|
|
927
|
1422
|
100
|
|
|
|
2704
|
next unless @lines >= 4; # Some of our entries are tag-only |
|
928
|
|
|
|
|
|
|
|
|
929
|
1320
|
|
|
|
|
1378
|
my $tagline = shift @lines; |
|
930
|
1320
|
|
|
|
|
2512
|
my @keyvals = split( /\s+/, $tagline, 3 ); |
|
931
|
1320
|
|
|
|
|
1304
|
my $tagno = shift @keyvals; |
|
932
|
1320
|
|
|
|
|
1190
|
my $repeatable = shift @keyvals; |
|
933
|
|
|
|
|
|
|
|
|
934
|
1320
|
|
|
|
|
2410
|
$self->_parse_tag_rules( $tagno, $repeatable, @lines ); |
|
935
|
|
|
|
|
|
|
} # while |
|
936
|
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
# Set the pointer back to where it was, in case we do this again |
|
938
|
6
|
|
|
|
|
53
|
seek( DATA, $tell, 0 ); |
|
939
|
|
|
|
|
|
|
} |
|
940
|
|
|
|
|
|
|
|
|
941
|
|
|
|
|
|
|
sub _parse_tag_rules { |
|
942
|
1320
|
|
|
1320
|
|
1125
|
my $self = shift; |
|
943
|
1320
|
|
|
|
|
1113
|
my $tagno = shift; |
|
944
|
1320
|
|
|
|
|
1149
|
my $repeatable = shift; |
|
945
|
1320
|
|
|
|
|
2414
|
my @lines = @_; |
|
946
|
|
|
|
|
|
|
|
|
947
|
1320
|
|
50
|
|
|
5501
|
my $rules = ($self->{_rules}->{$tagno} ||= {}); |
|
948
|
1320
|
|
|
|
|
1765
|
$rules->{'repeatable'} = $repeatable; |
|
949
|
|
|
|
|
|
|
|
|
950
|
1320
|
|
|
|
|
1316
|
for my $line ( @lines ) { |
|
951
|
15744
|
|
|
|
|
25325
|
my @keyvals = split( /\s+/, $line, 3 ); |
|
952
|
15744
|
|
|
|
|
14977
|
my $key = shift @keyvals; |
|
953
|
15744
|
|
|
|
|
12798
|
my $val = shift @keyvals; |
|
954
|
|
|
|
|
|
|
|
|
955
|
|
|
|
|
|
|
# Do magic for indicators |
|
956
|
15744
|
100
|
|
|
|
20695
|
if ( $key =~ /^ind/ ) { |
|
957
|
2640
|
|
|
|
|
3418
|
$rules->{$key} = $val; |
|
958
|
|
|
|
|
|
|
|
|
959
|
2640
|
|
|
|
|
1919
|
my $desc; |
|
960
|
|
|
|
|
|
|
my $regex; |
|
961
|
|
|
|
|
|
|
|
|
962
|
2640
|
100
|
|
|
|
3196
|
if ( $val eq "blank" ) { |
|
963
|
1608
|
|
|
|
|
1290
|
$desc = "blank"; |
|
964
|
1608
|
|
|
|
|
3525
|
$regex = qr/^ $/; |
|
965
|
|
|
|
|
|
|
} else { |
|
966
|
1032
|
|
|
|
|
1397
|
$desc = _nice_list($val); |
|
967
|
1032
|
|
|
|
|
1495
|
$val =~ s/^b/ /; |
|
968
|
1032
|
|
|
|
|
9314
|
$regex = qr/^[$val]$/; |
|
969
|
|
|
|
|
|
|
} |
|
970
|
|
|
|
|
|
|
|
|
971
|
2640
|
|
|
|
|
4186
|
$rules->{$key."_desc"} = $desc; |
|
972
|
2640
|
|
|
|
|
4807
|
$rules->{$key."_regex"} = $regex; |
|
973
|
|
|
|
|
|
|
} # if indicator |
|
974
|
|
|
|
|
|
|
else { |
|
975
|
13104
|
100
|
|
|
|
13198
|
if ( $key =~ /(.)-(.)/ ) { |
|
976
|
18
|
|
|
|
|
38
|
my ($min,$max) = ($1,$2); |
|
977
|
18
|
|
|
|
|
316
|
$rules->{$_} = $val for ($min..$max); |
|
978
|
|
|
|
|
|
|
} else { |
|
979
|
13086
|
|
|
|
|
29435
|
$rules->{$key} = $val; |
|
980
|
|
|
|
|
|
|
} |
|
981
|
|
|
|
|
|
|
} # not an indicator |
|
982
|
|
|
|
|
|
|
} # for $line |
|
983
|
|
|
|
|
|
|
} |
|
984
|
|
|
|
|
|
|
|
|
985
|
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
sub _nice_list { |
|
987
|
1032
|
|
|
1032
|
|
906
|
my $str = shift; |
|
988
|
|
|
|
|
|
|
|
|
989
|
1032
|
100
|
|
|
|
2043
|
if ( $str =~ s/(\d)-(\d)/$1 thru $2/ ) { |
|
990
|
66
|
|
|
|
|
169
|
return $str; |
|
991
|
|
|
|
|
|
|
} |
|
992
|
|
|
|
|
|
|
|
|
993
|
966
|
|
|
|
|
1909
|
my @digits = split( //, $str ); |
|
994
|
966
|
100
|
|
|
|
1770
|
$digits[0] = "blank" if $digits[0] eq "b"; |
|
995
|
966
|
|
|
|
|
872
|
my $last = pop @digits; |
|
996
|
966
|
|
|
|
|
2286
|
return join( ", ", @digits ) . " or $last"; |
|
997
|
|
|
|
|
|
|
} |
|
998
|
|
|
|
|
|
|
|
|
999
|
|
|
|
|
|
|
sub _ind_regex { |
|
1000
|
0
|
|
|
0
|
|
|
my $str = shift; |
|
1001
|
|
|
|
|
|
|
|
|
1002
|
0
|
0
|
|
|
|
|
return qr/^ $/ if $str eq "blank"; |
|
1003
|
|
|
|
|
|
|
|
|
1004
|
0
|
|
|
|
|
|
return qr/^[$str]$/; |
|
1005
|
|
|
|
|
|
|
} |
|
1006
|
|
|
|
|
|
|
|
|
1007
|
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
1; |
|
1009
|
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
__DATA__ |