| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#!perl |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
package MARC::File::MARCMaker; |
|
4
|
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 NAME |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
MARC::File::MARCMaker -- Work with MARCMaker/MARCBreaker records. |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
=cut |
|
10
|
|
|
|
|
|
|
|
|
11
|
2
|
|
|
2
|
|
119051
|
use strict; |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
67
|
|
|
12
|
2
|
|
|
2
|
|
9
|
use integer; |
|
|
2
|
|
|
|
|
3
|
|
|
|
2
|
|
|
|
|
12
|
|
|
13
|
|
|
|
|
|
|
|
|
14
|
2
|
|
|
2
|
|
46
|
use vars qw( $VERSION $ERROR ); |
|
|
2
|
|
|
|
|
8
|
|
|
|
2
|
|
|
|
|
120
|
|
|
15
|
|
|
|
|
|
|
|
|
16
|
|
|
|
|
|
|
$VERSION = 0.05; |
|
17
|
|
|
|
|
|
|
|
|
18
|
2
|
|
|
2
|
|
10
|
use MARC::File; |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
49
|
|
|
19
|
2
|
|
|
2
|
|
8
|
use vars qw( @ISA ); @ISA = qw( MARC::File ); |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
90
|
|
|
20
|
|
|
|
|
|
|
|
|
21
|
2
|
|
|
2
|
|
10
|
use MARC::Record qw( LEADER_LEN ); |
|
|
2
|
|
|
|
|
5
|
|
|
|
2
|
|
|
|
|
137
|
|
|
22
|
2
|
|
|
2
|
|
10
|
use constant SUBFIELD_INDICATOR => "\x24"; #dollar sign |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
479
|
|
|
23
|
2
|
|
|
2
|
|
13
|
use constant END_OF_FIELD => "\n\x3D"; #line break, equals sign |
|
|
2
|
|
|
|
|
4
|
|
|
|
2
|
|
|
|
|
326
|
|
|
24
|
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
use MARC::File::MARCMaker; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
my $file = MARC::File::MARCMaker->in( $filename ); |
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
while ( my $marc = $file->next() ) { |
|
34
|
|
|
|
|
|
|
# Do something |
|
35
|
|
|
|
|
|
|
} |
|
36
|
|
|
|
|
|
|
$file->close(); |
|
37
|
|
|
|
|
|
|
undef $file; |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
#################################################### |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
use MARC::File::MARCMaker; |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
## reading with MARC::Batch |
|
44
|
|
|
|
|
|
|
my $batch = MARC::Batch->new( 'MARCMaker', $filename ); |
|
45
|
|
|
|
|
|
|
my $record = $batch->next(); |
|
46
|
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
## or reading with MARC::File::MARCMaker explicitly |
|
48
|
|
|
|
|
|
|
my $file = MARC::File::MARCMaker->in( $filename ); |
|
49
|
|
|
|
|
|
|
my $record = $file->next(); |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
## output a single MARC::Record object in MARCMaker format (formatted plain text) |
|
52
|
|
|
|
|
|
|
#print $record->as_marcmaker(); #goal syntax |
|
53
|
|
|
|
|
|
|
print MARC::File::MARCMaker->encode($record); #current syntax |
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
The MARC-File-MARCMaker distribution is an extension to the MARC-Record |
|
58
|
|
|
|
|
|
|
distribution for working with MARC21 data using the format used by the Library |
|
59
|
|
|
|
|
|
|
of Congress MARCMaker and MARCBreaker programs. |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
More information may be obtained here: L |
|
62
|
|
|
|
|
|
|
|
|
63
|
|
|
|
|
|
|
You must have MARC::Record installed to use MARC::File::MARCMaker. In fact |
|
64
|
|
|
|
|
|
|
once you install the MARC-File-MARCMaker distribution you will most likely not |
|
65
|
|
|
|
|
|
|
use it directly, but will have an additional file format available to you |
|
66
|
|
|
|
|
|
|
when you use MARC::Batch. |
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
This module is based on code from the original MARC.pm module, as well as the |
|
69
|
|
|
|
|
|
|
MARC::Record distribution's MARC::File::USMARC and MARC::File::MicroLIF modules. |
|
70
|
|
|
|
|
|
|
|
|
71
|
|
|
|
|
|
|
=head2 DEVIATIONS FROM LC'S DOCUMENTATION |
|
72
|
|
|
|
|
|
|
|
|
73
|
|
|
|
|
|
|
LC's MARCMaker/MARCBreaker programs require files to have DOS line endings. |
|
74
|
|
|
|
|
|
|
This module should be capable of reading any type of line ending. |
|
75
|
|
|
|
|
|
|
It converts existing endings to "\n", the endings of the platform. |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
Initial version may or may not work well with line breaks in the middle of a field. |
|
78
|
|
|
|
|
|
|
|
|
79
|
|
|
|
|
|
|
MARCMaker version of the LDR (record size bytes) will not necessarily be dependable, and should not be relied upon. |
|
80
|
|
|
|
|
|
|
|
|
81
|
|
|
|
|
|
|
=head1 EXPORT |
|
82
|
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
None. |
|
84
|
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=head1 TODO |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
Do limit tests in filling the buffer and getting chunks. Seems to work for first fill, but may fail on larger reads/multiple reads to fill the buffer. |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
Test special characters (those requiring escapes). Initial version may not fully support non-English characters. All MARC-8 may work, Unicode support is untested and unassured. |
|
90
|
|
|
|
|
|
|
|
|
91
|
|
|
|
|
|
|
Implement better character encoding and decoding, including Unicode support. |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
Work on character set internal subs for both input and output. Currently, the original subs from MARC.pm are being used essentially as-is. |
|
94
|
|
|
|
|
|
|
|
|
95
|
|
|
|
|
|
|
Error checking for line breaks vs. new fields? Probably not possible, since line breaks are allowed within fields, so checking for missing equals sign is not really possible. |
|
96
|
|
|
|
|
|
|
|
|
97
|
|
|
|
|
|
|
Account for multiple occurences of =LDR in a single record, usually caused by lack of blank line between records, so records get mushed together. Also check for multiple =001s. |
|
98
|
|
|
|
|
|
|
|
|
99
|
|
|
|
|
|
|
Determine why the constant SUBFIELD_INDICATOR can't be used in the split into subfields. |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
Work on encode(). |
|
102
|
|
|
|
|
|
|
|
|
103
|
|
|
|
|
|
|
Allow as_marcmaker() to be called with either MARC::Field or MARC::Record objects, returning the appropriate result. Desired behavior is as_usmarc() methods in MARC::Record and MARC::Field |
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
Decode should mostly be working. Test for correctness. |
|
106
|
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
Remove unnecessary code and documentation, remnants of the initial development of the module. Move internal subs to end of module? |
|
108
|
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=head1 VERSION HISTORY |
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
Version 0.05: First CPAN release, Oct. 30, 2005. |
|
112
|
|
|
|
|
|
|
|
|
113
|
|
|
|
|
|
|
Version 0.04: Updated Oct. 22, 2005. Released Oct. 23, 2005. |
|
114
|
|
|
|
|
|
|
|
|
115
|
|
|
|
|
|
|
-Initial commit to CVS on SourceForge |
|
116
|
|
|
|
|
|
|
-Misc. cleanup. |
|
117
|
|
|
|
|
|
|
|
|
118
|
|
|
|
|
|
|
Version 0.03: Updated Aug. 2, 2005. Released Aug. 14, 2005. |
|
119
|
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
-Revised decode() to fix problem with dollar sign conversion from mnemonics to characters. |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
Version 0.02: Updated July 12-13, 2005. Released July 16, 2005. |
|
123
|
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
-Preliminary version of encode() for fields and records |
|
125
|
|
|
|
|
|
|
|
|
126
|
|
|
|
|
|
|
Version 0.01: Initial version, Nov. 21, 2004-Mar. 7, 2005. Released Mar. 7, 2005. |
|
127
|
|
|
|
|
|
|
|
|
128
|
|
|
|
|
|
|
-Basic version, translates .mrk format file into MARC::Record objects. |
|
129
|
|
|
|
|
|
|
|
|
130
|
|
|
|
|
|
|
=for internal |
|
131
|
|
|
|
|
|
|
|
|
132
|
|
|
|
|
|
|
############################################################ |
|
133
|
|
|
|
|
|
|
This section is copied from MARC::File::MicroLIF. |
|
134
|
|
|
|
|
|
|
############################################################ |
|
135
|
|
|
|
|
|
|
|
|
136
|
|
|
|
|
|
|
The buffer must be large enough to handle any valid record because |
|
137
|
|
|
|
|
|
|
we don't check for cases like a CR/LF pair or an end-of-record/CR/LF |
|
138
|
|
|
|
|
|
|
trio being only partially in the buffer. |
|
139
|
|
|
|
|
|
|
|
|
140
|
|
|
|
|
|
|
The max valid record is the max MARC record size (99999) plus one |
|
141
|
|
|
|
|
|
|
or two characters per tag (CR, LF, or CR/LF). It's hard to say |
|
142
|
|
|
|
|
|
|
what the max number of tags is, so here we use 6000. (6000 tags |
|
143
|
|
|
|
|
|
|
can be squeezed into a MARC record only if every tag has only one |
|
144
|
|
|
|
|
|
|
subfield containing a maximum of one character, or if data from |
|
145
|
|
|
|
|
|
|
multiple tags overlaps in the MARC record body. We're pretty safe.) |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
=cut |
|
148
|
|
|
|
|
|
|
|
|
149
|
2
|
|
|
2
|
|
12
|
use constant BUFFER_MIN => (99999 + 6000 * 2); |
|
|
2
|
|
|
|
|
2
|
|
|
|
2
|
|
|
|
|
14358
|
|
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
=head1 METHODS |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=cut |
|
154
|
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
################################## |
|
156
|
|
|
|
|
|
|
### START OF MARCMAKER METHODS ### |
|
157
|
|
|
|
|
|
|
################################## |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head2 _next (merged from MicroLIF and USMARC) |
|
160
|
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
Called by MARC::File::next(). |
|
162
|
|
|
|
|
|
|
|
|
163
|
|
|
|
|
|
|
=cut |
|
164
|
|
|
|
|
|
|
|
|
165
|
|
|
|
|
|
|
sub _next { #done for MARCMaker? |
|
166
|
|
|
|
|
|
|
|
|
167
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
168
|
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
#_get_chunk will separate records from each other and should convert |
|
170
|
|
|
|
|
|
|
# line endings to those of the platform. |
|
171
|
0
|
|
|
|
|
0
|
my $makerrec = $self->_get_chunk(); |
|
172
|
|
|
|
|
|
|
# for ease, make sure the newlines match this platform |
|
173
|
0
|
0
|
|
|
|
0
|
$makerrec =~ s/[\x0d\x0a]+/\n/g if defined $makerrec; |
|
174
|
|
|
|
|
|
|
|
|
175
|
0
|
|
|
|
|
0
|
return $makerrec; |
|
176
|
|
|
|
|
|
|
} #_next |
|
177
|
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=head2 decode( $string [, \&filter_func ] ) |
|
179
|
|
|
|
|
|
|
|
|
180
|
|
|
|
|
|
|
(description based on MARC::File::USMARC::decode POD information) |
|
181
|
|
|
|
|
|
|
|
|
182
|
|
|
|
|
|
|
Constructor for handling data from a MARCMaker file. This function takes care |
|
183
|
|
|
|
|
|
|
of all the tag directory parsing & mangling. |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
Any warnings or coercions can be checked in the C function. |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
The C<$filter_func> is an optional reference to a user-supplied function |
|
188
|
|
|
|
|
|
|
that determines on a tag-by-tag basis if you want the tag passed to it |
|
189
|
|
|
|
|
|
|
to be put into the MARC record. The function is passed the tag number |
|
190
|
|
|
|
|
|
|
and the raw tag data, and must return a boolean. The return of a true |
|
191
|
|
|
|
|
|
|
value tells MARC::File::MARCMaker::decode that the tag should get put into |
|
192
|
|
|
|
|
|
|
the resulting MARC record. |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
For example, if you only want title and subject tags in your MARC record, |
|
195
|
|
|
|
|
|
|
try this: |
|
196
|
|
|
|
|
|
|
|
|
197
|
|
|
|
|
|
|
sub filter { |
|
198
|
|
|
|
|
|
|
my ($tagno,$tagdata) = @_; |
|
199
|
|
|
|
|
|
|
|
|
200
|
|
|
|
|
|
|
return ($tagno == 245) || ($tagno >= 600 && $tagno <= 699); |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
|
|
203
|
|
|
|
|
|
|
my $marc = MARC::File::MARCMaker->decode( $string, \&filter ); |
|
204
|
|
|
|
|
|
|
|
|
205
|
|
|
|
|
|
|
Why would you want to do such a thing? The big reason is that creating |
|
206
|
|
|
|
|
|
|
fields is processor-intensive, and if your program is doing read-only |
|
207
|
|
|
|
|
|
|
data analysis and needs to be as fast as possible, you can save time by |
|
208
|
|
|
|
|
|
|
not creating fields that you'll be ignoring anyway. |
|
209
|
|
|
|
|
|
|
|
|
210
|
|
|
|
|
|
|
Another possible use is if you're only interested in printing certain |
|
211
|
|
|
|
|
|
|
tags from the record, then you can filter them when you read from disc |
|
212
|
|
|
|
|
|
|
and not have to delete unwanted tags yourself. |
|
213
|
|
|
|
|
|
|
|
|
214
|
|
|
|
|
|
|
=cut |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
|
|
217
|
|
|
|
|
|
|
sub decode { #MARCMaker |
|
218
|
|
|
|
|
|
|
|
|
219
|
1
|
|
|
1
|
1
|
22
|
my $text; |
|
220
|
1
|
|
|
|
|
2
|
my $location = ''; |
|
221
|
|
|
|
|
|
|
|
|
222
|
|
|
|
|
|
|
## decode can be called in a variety of ways |
|
223
|
|
|
|
|
|
|
## $object->decode( $string ) |
|
224
|
|
|
|
|
|
|
## MARC::File::MARCMaker->decode( $string ) |
|
225
|
|
|
|
|
|
|
## MARC::File::MARCMaker::decode( $string ) |
|
226
|
|
|
|
|
|
|
## this bit of code covers all three |
|
227
|
|
|
|
|
|
|
|
|
228
|
1
|
|
|
|
|
2
|
my $self = shift; |
|
229
|
1
|
50
|
|
|
|
5
|
if ( ref($self) =~ /^MARC::File/ ) { |
|
230
|
0
|
|
|
|
|
0
|
$location = 'in record '.$self->{recnum}; |
|
231
|
0
|
|
|
|
|
0
|
$text = shift; |
|
232
|
|
|
|
|
|
|
} else { |
|
233
|
1
|
|
|
|
|
2
|
$location = 'in record 1'; |
|
234
|
1
|
50
|
|
|
|
5
|
$text = $self=~/MARC::File/ ? shift : $self; |
|
235
|
|
|
|
|
|
|
} |
|
236
|
|
|
|
|
|
|
|
|
237
|
1
|
|
|
|
|
3
|
my $filter_func = shift; |
|
238
|
|
|
|
|
|
|
|
|
239
|
|
|
|
|
|
|
# for ease, make the newlines match this platform |
|
240
|
|
|
|
|
|
|
# this has probably already been taken care of at least once, but just in case |
|
241
|
1
|
50
|
|
|
|
25
|
$text =~ s/[\x0d\x0a]+/\n/g if defined $text; |
|
242
|
|
|
|
|
|
|
|
|
243
|
1
|
|
|
|
|
6
|
my $marc = MARC::Record->new(); |
|
244
|
|
|
|
|
|
|
|
|
245
|
|
|
|
|
|
|
#report improperly passed $text (undefined $text) |
|
246
|
1
|
50
|
|
|
|
12
|
return $marc->_warn( "Unable to retrieve a record string $location" ) unless defined $text; |
|
247
|
|
|
|
|
|
|
|
|
248
|
|
|
|
|
|
|
############################# |
|
249
|
|
|
|
|
|
|
#### Charset work needed #### |
|
250
|
|
|
|
|
|
|
############################# |
|
251
|
|
|
|
|
|
|
#use default charset until that function is revised |
|
252
|
1
|
|
|
|
|
3
|
my $charset = usmarc_default(); |
|
253
|
|
|
|
|
|
|
############################# |
|
254
|
|
|
|
|
|
|
############################# |
|
255
|
|
|
|
|
|
|
|
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
#Split each record on the "\n=" into the @lines array |
|
258
|
1
|
|
|
|
|
10
|
my @lines=split END_OF_FIELD, $text; |
|
259
|
1
|
|
|
|
|
2
|
my $leader = shift @lines; |
|
260
|
1
|
50
|
|
|
|
5
|
unless ($leader =~ /^=LDR /) { |
|
261
|
0
|
|
|
|
|
0
|
$marc->_warn( "First line must begin with =LDR" ); |
|
262
|
|
|
|
|
|
|
} |
|
263
|
|
|
|
|
|
|
|
|
264
|
1
|
|
|
|
|
3
|
$leader=~s/^=LDR //; #Remove "=LDR " |
|
265
|
1
|
|
|
|
|
3
|
$leader=~s/[\n\r]//g; #remove line endings |
|
266
|
1
|
|
|
|
|
2
|
$leader=~s/\\/ /g; # substitute " " for \ |
|
267
|
|
|
|
|
|
|
#report error if result is not 24 bytes long |
|
268
|
1
|
50
|
|
|
|
4
|
unless (length($leader) == LEADER_LEN) { |
|
269
|
0
|
|
|
|
|
0
|
$marc->_warn( "Leader must be exactly 24 bytes long" ); |
|
270
|
|
|
|
|
|
|
} |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
#add leader to the record |
|
273
|
1
|
|
|
|
|
6
|
$marc->leader( substr( $leader, 0, LEADER_LEN ) ); |
|
274
|
|
|
|
|
|
|
|
|
275
|
1
|
|
|
|
|
9
|
LINE: foreach my $line (@lines) { |
|
276
|
|
|
|
|
|
|
#Remove newlines from $line ; and also substitute " " for \ |
|
277
|
7
|
|
|
|
|
131
|
$line=~s/[\n\r]//g; |
|
278
|
7
|
|
|
|
|
23
|
$line=~s/\\/ /g; |
|
279
|
|
|
|
|
|
|
#get the tag name |
|
280
|
7
|
|
|
|
|
13
|
my $tagno = substr($line,0,3); |
|
281
|
|
|
|
|
|
|
# Check tag validity |
|
282
|
7
|
50
|
|
|
|
17
|
( $tagno =~ /^[0-9A-Za-z]{3}$/ ) or $marc->_warn( "Invalid tag in $location: \"$tagno\"" ); |
|
283
|
|
|
|
|
|
|
|
|
284
|
7
|
100
|
66
|
|
|
38
|
if ( ($tagno =~ /^\d+$/ ) && ( $tagno < 10 ) ) { |
|
285
|
|
|
|
|
|
|
#translate characters for tag data |
|
286
|
|
|
|
|
|
|
#revise line below as needed for _maker2char |
|
287
|
2
|
|
|
|
|
6
|
my $tagdata = _maker2char ( substr( $line, 5 ), $charset ); |
|
288
|
|
|
|
|
|
|
#filter_func implementation needs work |
|
289
|
2
|
50
|
|
|
|
5
|
if ( $filter_func ) { |
|
290
|
0
|
0
|
|
|
|
0
|
next LINE unless $filter_func->( $tagno, $tagdata ); |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
#add field to record |
|
293
|
2
|
|
|
|
|
6
|
$marc->append_fields( MARC::Field->new( $tagno, $tagdata ) ); |
|
294
|
|
|
|
|
|
|
} #if $tagno < 10 |
|
295
|
|
|
|
|
|
|
else { |
|
296
|
|
|
|
|
|
|
#translate characters for subfield data |
|
297
|
|
|
|
|
|
|
#get indicators |
|
298
|
5
|
|
|
|
|
8
|
my $ind1 = substr( $line, 5, 1 ); |
|
299
|
5
|
|
|
|
|
7
|
my $ind2 = substr( $line, 6, 1 ); |
|
300
|
5
|
|
|
|
|
8
|
my $tagdata = substr( $line, 7 ); |
|
301
|
|
|
|
|
|
|
#report error if first character of tagdata is not a subfield indicator ($) |
|
302
|
5
|
50
|
|
|
|
16
|
$marc->_warn( "First character of subfield data must be a subfield indicator (dollar sign), $tagdata, $location for tag $tagno" ) unless ($tagdata =~ /^\$/ ); |
|
303
|
5
|
50
|
|
|
|
9
|
if ( $filter_func ) { |
|
304
|
0
|
0
|
|
|
|
0
|
next LINE unless $filter_func->( $tagno, $tagdata ); |
|
305
|
|
|
|
|
|
|
} |
|
306
|
|
|
|
|
|
|
|
|
307
|
|
|
|
|
|
|
#why doesn't SUBFIELD_INDICATOR work in the split? |
|
308
|
5
|
|
|
|
|
17
|
my @subfields_mnemonic = split( /\x24/, $tagdata ); |
|
309
|
|
|
|
|
|
|
#convert characters from mnemonics to characters |
|
310
|
5
|
|
|
|
|
7
|
my @subfields = map {_maker2char($_, $charset)} @subfields_mnemonic; |
|
|
11
|
|
|
|
|
17
|
|
|
311
|
|
|
|
|
|
|
|
|
312
|
|
|
|
|
|
|
#is there a better way to deal with the empty first item? |
|
313
|
5
|
|
|
|
|
8
|
my $empty = shift @subfields; |
|
314
|
5
|
50
|
|
|
|
10
|
$marc->_warn( "Subfield data appears before first subfield? $location in $tagno" ) if $empty; |
|
315
|
|
|
|
|
|
|
|
|
316
|
|
|
|
|
|
|
# Split the subfield data into subfield name and data pairs |
|
317
|
5
|
|
|
|
|
6
|
my @subfield_data; |
|
318
|
5
|
|
|
|
|
5
|
for ( @subfields ) { |
|
319
|
6
|
50
|
|
|
|
11
|
if ( length > 0 ) { |
|
320
|
6
|
|
|
|
|
16
|
push( @subfield_data, substr($_,0,1),substr($_,1) ); |
|
321
|
|
|
|
|
|
|
} else { |
|
322
|
0
|
|
|
|
|
0
|
$marc->_warn( "Entirely empty subfield found in tag $tagno" ); |
|
323
|
|
|
|
|
|
|
} |
|
324
|
|
|
|
|
|
|
} #for @subfields |
|
325
|
|
|
|
|
|
|
|
|
326
|
5
|
50
|
|
|
|
14
|
if ( !@subfield_data ) { |
|
327
|
0
|
|
|
|
|
0
|
$marc->_warn( "no subfield data found $location for tag $tagno" ); |
|
328
|
0
|
|
|
|
|
0
|
next; |
|
329
|
|
|
|
|
|
|
} |
|
330
|
|
|
|
|
|
|
|
|
331
|
5
|
|
|
|
|
16
|
my $field = MARC::Field->new($tagno, $ind1, $ind2, @subfield_data ); |
|
332
|
5
|
50
|
|
|
|
182
|
if ( $field->warnings() ) { |
|
333
|
0
|
|
|
|
|
0
|
$marc->_warn( $field->warnings() ); |
|
334
|
|
|
|
|
|
|
} |
|
335
|
5
|
|
|
|
|
30
|
$marc->append_fields( $field ); |
|
336
|
|
|
|
|
|
|
} |
|
337
|
|
|
|
|
|
|
} # looping through all the fields |
|
338
|
|
|
|
|
|
|
|
|
339
|
|
|
|
|
|
|
|
|
340
|
1
|
|
|
|
|
33
|
return $marc; |
|
341
|
|
|
|
|
|
|
|
|
342
|
|
|
|
|
|
|
} #decode MARCMaker |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
=head2 update_leader() #from USMARC |
|
345
|
|
|
|
|
|
|
|
|
346
|
|
|
|
|
|
|
This may be unnecessary code. Delete this section if that is the case. |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
If any changes get made to the MARC record, the first 5 bytes of the |
|
349
|
|
|
|
|
|
|
leader (the length) will be invalid. This function updates the |
|
350
|
|
|
|
|
|
|
leader with the correct length of the record as it would be if |
|
351
|
|
|
|
|
|
|
written out to a file. |
|
352
|
|
|
|
|
|
|
|
|
353
|
|
|
|
|
|
|
|
|
354
|
|
|
|
|
|
|
sub update_leader() { #from USMARC |
|
355
|
|
|
|
|
|
|
my $self = shift; |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
my (undef,undef,$reclen,$baseaddress) = $self->_build_tag_directory(); |
|
358
|
|
|
|
|
|
|
|
|
359
|
|
|
|
|
|
|
$self->_set_leader_lengths( $reclen, $baseaddress ); |
|
360
|
|
|
|
|
|
|
} #updated_leader() from USMARC |
|
361
|
|
|
|
|
|
|
|
|
362
|
|
|
|
|
|
|
=head2 encode() #based on MARC::File::USMARC |
|
363
|
|
|
|
|
|
|
|
|
364
|
|
|
|
|
|
|
Returns a string of characters suitable for writing out to a MARCMaker file, |
|
365
|
|
|
|
|
|
|
including the leader, directory and all the fields. |
|
366
|
|
|
|
|
|
|
|
|
367
|
|
|
|
|
|
|
Uses as_marcmaker() below to build each field. |
|
368
|
|
|
|
|
|
|
|
|
369
|
|
|
|
|
|
|
=cut |
|
370
|
|
|
|
|
|
|
|
|
371
|
|
|
|
|
|
|
sub encode { #MARCMaker, based on USMARC's encode() |
|
372
|
1
|
|
|
1
|
1
|
623
|
my $marc = shift; |
|
373
|
1
|
50
|
33
|
|
|
8
|
$marc = shift if (ref($marc)||$marc) =~ /^MARC::File/; |
|
374
|
1
|
|
|
|
|
2
|
my $field_string = ''; |
|
375
|
|
|
|
|
|
|
#convert each field (after the leader) to MARCMaker format |
|
376
|
1
|
|
|
|
|
6
|
foreach my $field ($marc->fields()) { |
|
377
|
7
|
|
|
|
|
355
|
$field_string .= $field->MARC::File::MARCMaker::as_marcmaker(); |
|
378
|
|
|
|
|
|
|
} #foreach field in record |
|
379
|
|
|
|
|
|
|
|
|
380
|
|
|
|
|
|
|
# Glomp it all together |
|
381
|
1
|
|
|
|
|
38
|
return join("", "=LDR ", $marc->leader, "\n", $field_string, "\n"); |
|
382
|
|
|
|
|
|
|
|
|
383
|
|
|
|
|
|
|
} #encode from USMARC |
|
384
|
|
|
|
|
|
|
|
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
=head2 as_marcmaker() |
|
387
|
|
|
|
|
|
|
|
|
388
|
|
|
|
|
|
|
Based on MARC::Field::as_usmarc(). |
|
389
|
|
|
|
|
|
|
Turns a MARC::Field into a MARCMaker formatted field string. |
|
390
|
|
|
|
|
|
|
|
|
391
|
|
|
|
|
|
|
=head2 TODO (as_marcmaker()) |
|
392
|
|
|
|
|
|
|
|
|
393
|
|
|
|
|
|
|
-Change field encoding portion of as_marcmaker() to internal _as_marcmaker() |
|
394
|
|
|
|
|
|
|
-Implement as_marcmaker() as wrapper for MARC::Record object and MARC::Field object encoding into MARCMaker format. |
|
395
|
|
|
|
|
|
|
|
|
396
|
|
|
|
|
|
|
|
|
397
|
|
|
|
|
|
|
=cut |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
sub as_marcmaker() { |
|
400
|
12
|
|
|
12
|
1
|
3645
|
my $self = shift; |
|
401
|
|
|
|
|
|
|
# $self = shift if (ref($self)||$self) =~ /^MARC::File/; |
|
402
|
|
|
|
|
|
|
|
|
403
|
12
|
50
|
|
|
|
36
|
die "Wanted a MARC::Field but got a ", ref($self) unless ref($self) eq "MARC::Field"; |
|
404
|
|
|
|
|
|
|
|
|
405
|
12
|
|
|
|
|
22
|
my $charset = ustext_default(); |
|
406
|
|
|
|
|
|
|
|
|
407
|
|
|
|
|
|
|
# Tags < 010 are pretty easy |
|
408
|
12
|
100
|
|
|
|
50
|
if ( $self->is_control_field ) { |
|
|
0
|
50
|
|
|
|
0
|
|
|
409
|
|
|
|
|
|
|
#convert characters to MARCMaker codes |
|
410
|
2
|
|
|
|
|
19
|
my $field_data = (_char2maker($self->data(), $charset)); |
|
411
|
|
|
|
|
|
|
#swap blank spaces for backslash ( \ ) |
|
412
|
2
|
|
|
|
|
11
|
$field_data =~ s/ /\\/g; |
|
413
|
|
|
|
|
|
|
#return formatted field |
|
414
|
2
|
|
|
|
|
9
|
return sprintf "=%s %s\n", $self->tag(), $field_data; |
|
415
|
|
|
|
|
|
|
} #if control field |
|
416
|
|
|
|
|
|
|
elsif ($self->tag() eq '000') {print "Leader?\n"} #leader? |
|
417
|
|
|
|
|
|
|
else { |
|
418
|
10
|
|
|
|
|
127
|
my @subs; |
|
419
|
10
|
|
|
|
|
14
|
my @subdata = @{$self->{_subfields}}; |
|
|
10
|
|
|
|
|
34
|
|
|
420
|
10
|
|
|
|
|
42
|
while ( @subdata ) { |
|
421
|
|
|
|
|
|
|
#convert characters to MARCMaker codes as each subfield goes by |
|
422
|
11
|
|
|
|
|
33
|
push( @subs, join( "", SUBFIELD_INDICATOR, shift @subdata, (_char2maker(shift @subdata, $charset))) ); |
|
423
|
|
|
|
|
|
|
} # while |
|
424
|
|
|
|
|
|
|
|
|
425
|
10
|
|
|
|
|
46
|
my $ind1 = $self->indicator(1); |
|
426
|
10
|
|
|
|
|
128
|
my $ind2 = $self->indicator(2); |
|
427
|
|
|
|
|
|
|
#swap blank for backslash ( \ ) |
|
428
|
10
|
|
|
|
|
105
|
$ind1 =~ s/ /\\/g; |
|
429
|
10
|
|
|
|
|
22
|
$ind2 =~ s/ /\\/g; |
|
430
|
|
|
|
|
|
|
|
|
431
|
|
|
|
|
|
|
return |
|
432
|
10
|
|
|
|
|
31
|
join ("", "=", $self->tag(), " ", |
|
433
|
|
|
|
|
|
|
$ind1, |
|
434
|
|
|
|
|
|
|
$ind2, |
|
435
|
|
|
|
|
|
|
@subs, |
|
436
|
|
|
|
|
|
|
"\n", |
|
437
|
|
|
|
|
|
|
); |
|
438
|
|
|
|
|
|
|
} |
|
439
|
|
|
|
|
|
|
} #as_usmarc() #MARC::Field |
|
440
|
|
|
|
|
|
|
|
|
441
|
|
|
|
|
|
|
|
|
442
|
|
|
|
|
|
|
#################################### |
|
443
|
|
|
|
|
|
|
###### END USMARC subs ############# |
|
444
|
|
|
|
|
|
|
#################################### |
|
445
|
|
|
|
|
|
|
|
|
446
|
|
|
|
|
|
|
|
|
447
|
|
|
|
|
|
|
|
|
448
|
|
|
|
|
|
|
|
|
449
|
|
|
|
|
|
|
######################################### |
|
450
|
|
|
|
|
|
|
### begin internal subs from MicroLIF ### |
|
451
|
|
|
|
|
|
|
######################################### |
|
452
|
|
|
|
|
|
|
|
|
453
|
|
|
|
|
|
|
################################# |
|
454
|
|
|
|
|
|
|
# fill the buffer if we need to # |
|
455
|
|
|
|
|
|
|
################################# |
|
456
|
|
|
|
|
|
|
|
|
457
|
|
|
|
|
|
|
sub _fill_buffer { #done for MARCMaker? |
|
458
|
|
|
|
|
|
|
|
|
459
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
460
|
0
|
|
|
|
|
0
|
my $ok = 1; |
|
461
|
|
|
|
|
|
|
|
|
462
|
0
|
0
|
0
|
|
|
0
|
if ( !$self->{exhaustedfh} && length( $self->{inputbuf} ) < BUFFER_MIN ) { |
|
463
|
|
|
|
|
|
|
# append the next chunk of bytes to the buffer |
|
464
|
0
|
|
|
|
|
0
|
my $read = read $self->{fh}, $self->{inputbuf}, BUFFER_MIN, length($self->{inputbuf}); |
|
465
|
|
|
|
|
|
|
#convert line endings within the input buffer |
|
466
|
0
|
0
|
|
|
|
0
|
if ($self->{inputbuf} =~ /\x0d\x0a/s) { |
|
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
467
|
0
|
|
|
|
|
0
|
$self->{inputbuf} =~ s/\x0d\x0a/\n/sg; |
|
468
|
|
|
|
|
|
|
} #if DOS endings |
|
469
|
|
|
|
|
|
|
elsif ($self->{inputbuf} =~ /\x0a/) { |
|
470
|
0
|
|
|
|
|
0
|
$self->{inputbuf} =~ s/\x0a/\n/sg; |
|
471
|
|
|
|
|
|
|
} #elsif Unix endings |
|
472
|
|
|
|
|
|
|
elsif ($self->{inputbuf} =~ /\x0d/) { |
|
473
|
0
|
|
|
|
|
0
|
$self->{inputbuf} =~ s/\x0d/\n/sg; |
|
474
|
|
|
|
|
|
|
} #elsif Macintosh endings |
|
475
|
|
|
|
|
|
|
|
|
476
|
|
|
|
|
|
|
#remove extra blank lines between records |
|
477
|
0
|
|
|
|
|
0
|
$self->{inputbuf} =~ s/\n\s*\n+/\n\n/g; |
|
478
|
|
|
|
|
|
|
|
|
479
|
0
|
0
|
|
|
|
0
|
if ( !defined $read ) { |
|
|
|
0
|
|
|
|
|
|
|
480
|
|
|
|
|
|
|
# error! |
|
481
|
0
|
|
|
|
|
0
|
$ok = undef; |
|
482
|
0
|
|
|
|
|
0
|
$MARC::File::ERROR = "error reading from file " . $self->{filename}; |
|
483
|
|
|
|
|
|
|
} |
|
484
|
|
|
|
|
|
|
elsif ( $read < 1 ) { |
|
485
|
0
|
|
|
|
|
0
|
$self->{exhaustedfh} = 1; |
|
486
|
|
|
|
|
|
|
} |
|
487
|
|
|
|
|
|
|
} |
|
488
|
|
|
|
|
|
|
|
|
489
|
0
|
|
|
|
|
0
|
return $ok; |
|
490
|
|
|
|
|
|
|
} |
|
491
|
|
|
|
|
|
|
|
|
492
|
|
|
|
|
|
|
=for internal |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
=head2 _get_chunk( ) #for MARCMaker |
|
495
|
|
|
|
|
|
|
|
|
496
|
|
|
|
|
|
|
Gets the next chunk of data (which should be a single complete record). |
|
497
|
|
|
|
|
|
|
|
|
498
|
|
|
|
|
|
|
All extra \r and \n are stripped and line endings are converted to those of the platform (\n). |
|
499
|
|
|
|
|
|
|
|
|
500
|
|
|
|
|
|
|
=cut |
|
501
|
|
|
|
|
|
|
|
|
502
|
|
|
|
|
|
|
sub _get_chunk { #done for MARCMaker? |
|
503
|
|
|
|
|
|
|
|
|
504
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
505
|
|
|
|
|
|
|
|
|
506
|
0
|
|
|
|
|
0
|
my $chunk = undef; |
|
507
|
|
|
|
|
|
|
|
|
508
|
|
|
|
|
|
|
#read from the file and fill the input buffer |
|
509
|
0
|
0
|
0
|
|
|
0
|
if ( $self->_fill_buffer() && length($self->{inputbuf}) > 0 ) { |
|
510
|
|
|
|
|
|
|
|
|
511
|
|
|
|
|
|
|
#retrieve the next record |
|
512
|
0
|
|
|
|
|
0
|
($chunk) = split /\n\n/, $self->{inputbuf}, 0; |
|
513
|
|
|
|
|
|
|
#remove the chunk and record separator from the input buffer |
|
514
|
0
|
|
|
|
|
0
|
$self->{inputbuf} = substr( $self->{inputbuf}, length($chunk)+length("\n\n") ); |
|
515
|
0
|
0
|
|
|
|
0
|
if ( !$chunk ) { |
|
516
|
0
|
|
|
|
|
0
|
$chunk = $self->{inputbuf}; |
|
517
|
0
|
|
|
|
|
0
|
$self->{inputbuf} = ''; |
|
518
|
0
|
|
|
|
|
0
|
$self->{exhaustedfh} = 1; |
|
519
|
|
|
|
|
|
|
} #if not chunk |
|
520
|
|
|
|
|
|
|
|
|
521
|
|
|
|
|
|
|
} #if buffer can be filled and has characters |
|
522
|
0
|
|
|
|
|
0
|
return $chunk; |
|
523
|
|
|
|
|
|
|
} #_get_chunk() |
|
524
|
|
|
|
|
|
|
|
|
525
|
|
|
|
|
|
|
=head2 _unget_chunk ( ) #done for MARCMaker? |
|
526
|
|
|
|
|
|
|
|
|
527
|
|
|
|
|
|
|
$chunk is put at the beginning of the buffer followed |
|
528
|
|
|
|
|
|
|
by two line endings ("\n\n") as a record separator. |
|
529
|
|
|
|
|
|
|
|
|
530
|
|
|
|
|
|
|
I don't know that this sub is necessary. |
|
531
|
|
|
|
|
|
|
|
|
532
|
|
|
|
|
|
|
=cut |
|
533
|
|
|
|
|
|
|
|
|
534
|
|
|
|
|
|
|
sub _unget_chunk { |
|
535
|
0
|
|
|
0
|
|
0
|
my $self = shift; |
|
536
|
0
|
|
|
|
|
0
|
my $chunk = shift; |
|
537
|
0
|
|
|
|
|
0
|
$self->{inputbuf} = $chunk . $self->{inputbuf}; |
|
538
|
0
|
|
|
|
|
0
|
return; |
|
539
|
|
|
|
|
|
|
} |
|
540
|
|
|
|
|
|
|
|
|
541
|
|
|
|
|
|
|
|
|
542
|
|
|
|
|
|
|
####################################### |
|
543
|
|
|
|
|
|
|
### End internal subs from MicroLIF ### |
|
544
|
|
|
|
|
|
|
####################################### |
|
545
|
|
|
|
|
|
|
|
|
546
|
|
|
|
|
|
|
####################################### |
|
547
|
|
|
|
|
|
|
### Character handling from MARC.pm ### |
|
548
|
|
|
|
|
|
|
####################################### |
|
549
|
|
|
|
|
|
|
|
|
550
|
|
|
|
|
|
|
=head2 _char2maker |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
Pass in string of characters from a MARC record and a character map ($charset, or usmarc_default() by default). |
|
553
|
|
|
|
|
|
|
Returns string of characters encoded in MARCMaker format. |
|
554
|
|
|
|
|
|
|
(e.g. replaces '$' with {dollar}) |
|
555
|
|
|
|
|
|
|
|
|
556
|
|
|
|
|
|
|
=cut |
|
557
|
|
|
|
|
|
|
|
|
558
|
|
|
|
|
|
|
sub _char2maker { #deal with charmap default |
|
559
|
13
|
|
|
13
|
|
1430
|
my @marc_string = split (//, shift); |
|
560
|
13
|
|
|
|
|
171
|
my $charmap = shift; #|| $charset; #add default value |
|
561
|
13
|
|
|
|
|
28
|
my $maker_string = join ('', map {${$charmap}{$_} } @marc_string); |
|
|
4608
|
|
|
|
|
6169
|
|
|
|
4608
|
|
|
|
|
10063
|
|
|
562
|
|
|
|
|
|
|
#replace html-style entities (´) with code in curly braces ({acute}) |
|
563
|
13
|
|
|
|
|
345
|
while ($maker_string =~ s/(&)([^ ]{1,7}?)(;)/{$2}/o) {} |
|
564
|
|
|
|
|
|
|
|
|
565
|
13
|
|
|
|
|
424
|
return $maker_string; |
|
566
|
|
|
|
|
|
|
} #_char2maker |
|
567
|
|
|
|
|
|
|
|
|
568
|
|
|
|
|
|
|
###################### |
|
569
|
|
|
|
|
|
|
|
|
570
|
|
|
|
|
|
|
|
|
571
|
|
|
|
|
|
|
=head2 Default charset |
|
572
|
|
|
|
|
|
|
|
|
573
|
|
|
|
|
|
|
usmarc_default() -- Originally from MARC.pm. Offers default mnemonics for character encoding and decoding. |
|
574
|
|
|
|
|
|
|
|
|
575
|
|
|
|
|
|
|
Used by _maker2char. |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
This perhaps should be an internal _usmarc_default(). |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
=cut |
|
580
|
|
|
|
|
|
|
|
|
581
|
|
|
|
|
|
|
sub usmarc_default { # rec |
|
582
|
1
|
|
|
1
|
0
|
9
|
my @hexchar = (0x00..0x1a,0x1c,0x7f..0x8c,0x8f..0xa0,0xaf,0xbb, |
|
583
|
|
|
|
|
|
|
0xbe,0xbf,0xc7..0xdf,0xfc,0xfd,0xff); |
|
584
|
1
|
|
|
|
|
5
|
my %inchar = map {sprintf ("%2.2X",int $_), chr($_)} @hexchar; |
|
|
92
|
|
|
|
|
207
|
|
|
585
|
|
|
|
|
|
|
|
|
586
|
1
|
|
|
|
|
10
|
$inchar{esc} = chr(0x1b); # escape |
|
587
|
1
|
|
|
|
|
2
|
$inchar{dollar} = chr(0x24); # dollar sign |
|
588
|
1
|
|
|
|
|
3
|
$inchar{curren} = chr(0x24); # dollar sign - alternate |
|
589
|
1
|
|
|
|
|
1
|
$inchar{24} = chr(0x24); # dollar sign - alternate |
|
590
|
1
|
|
|
|
|
2
|
$inchar{bsol} = chr(0x5c); # back slash (reverse solidus) |
|
591
|
1
|
|
|
|
|
2
|
$inchar{lcub} = chr(0x7b); # opening curly brace |
|
592
|
1
|
|
|
|
|
2
|
$inchar{rcub} = "}"; # closing curly brace - part 1 |
|
593
|
1
|
|
|
|
|
2
|
$inchar{joiner} = chr(0x8d); # zero width joiner |
|
594
|
1
|
|
|
|
|
2
|
$inchar{nonjoin} = chr(0x8e); # zero width non-joiner |
|
595
|
1
|
|
|
|
|
2
|
$inchar{Lstrok} = chr(0xa1); # latin capital letter l with stroke |
|
596
|
1
|
|
|
|
|
2
|
$inchar{Ostrok} = chr(0xa2); # latin capital letter o with stroke |
|
597
|
1
|
|
|
|
|
3
|
$inchar{Dstrok} = chr(0xa3); # latin capital letter d with stroke |
|
598
|
1
|
|
|
|
|
1
|
$inchar{THORN} = chr(0xa4); # latin capital letter thorn (icelandic) |
|
599
|
1
|
|
|
|
|
2
|
$inchar{AElig} = chr(0xa5); # latin capital letter AE |
|
600
|
1
|
|
|
|
|
1
|
$inchar{OElig} = chr(0xa6); # latin capital letter OE |
|
601
|
1
|
|
|
|
|
3
|
$inchar{softsign} = chr(0xa7); # modifier letter soft sign |
|
602
|
1
|
|
|
|
|
2
|
$inchar{middot} = chr(0xa8); # middle dot |
|
603
|
1
|
|
|
|
|
2
|
$inchar{flat} = chr(0xa9); # musical flat sign |
|
604
|
1
|
|
|
|
|
2
|
$inchar{reg} = chr(0xaa); # registered sign |
|
605
|
1
|
|
|
|
|
2
|
$inchar{plusmn} = chr(0xab); # plus-minus sign |
|
606
|
1
|
|
|
|
|
1
|
$inchar{Ohorn} = chr(0xac); # latin capital letter o with horn |
|
607
|
1
|
|
|
|
|
2
|
$inchar{Uhorn} = chr(0xad); # latin capital letter u with horn |
|
608
|
1
|
|
|
|
|
2
|
$inchar{mlrhring} = chr(0xae); # modifier letter right half ring (alif) |
|
609
|
1
|
|
|
|
|
2
|
$inchar{mllhring} = chr(0xb0); # modifier letter left half ring (ayn) |
|
610
|
1
|
|
|
|
|
2
|
$inchar{lstrok} = chr(0xb1); # latin small letter l with stroke |
|
611
|
1
|
|
|
|
|
2
|
$inchar{ostrok} = chr(0xb2); # latin small letter o with stroke |
|
612
|
1
|
|
|
|
|
2
|
$inchar{dstrok} = chr(0xb3); # latin small letter d with stroke |
|
613
|
1
|
|
|
|
|
1
|
$inchar{thorn} = chr(0xb4); # latin small letter thorn (icelandic) |
|
614
|
1
|
|
|
|
|
2
|
$inchar{aelig} = chr(0xb5); # latin small letter ae |
|
615
|
1
|
|
|
|
|
2
|
$inchar{oelig} = chr(0xb6); # latin small letter oe |
|
616
|
1
|
|
|
|
|
2
|
$inchar{hardsign} = chr(0xb7); # modifier letter hard sign |
|
617
|
1
|
|
|
|
|
1
|
$inchar{inodot} = chr(0xb8); # latin small letter dotless i |
|
618
|
1
|
|
|
|
|
2
|
$inchar{pound} = chr(0xb9); # pound sign |
|
619
|
1
|
|
|
|
|
27
|
$inchar{eth} = chr(0xba); # latin small letter eth |
|
620
|
1
|
|
|
|
|
3
|
$inchar{ohorn} = chr(0xbc); # latin small letter o with horn |
|
621
|
1
|
|
|
|
|
7
|
$inchar{uhorn} = chr(0xbd); # latin small letter u with horn |
|
622
|
1
|
|
|
|
|
3
|
$inchar{deg} = chr(0xc0); # degree sign |
|
623
|
1
|
|
|
|
|
2
|
$inchar{scriptl} = chr(0xc1); # latin small letter script l |
|
624
|
1
|
|
|
|
|
3
|
$inchar{phono} = chr(0xc2); # sound recording copyright |
|
625
|
1
|
|
|
|
|
2
|
$inchar{copy} = chr(0xc3); # copyright sign |
|
626
|
1
|
|
|
|
|
3
|
$inchar{sharp} = chr(0xc4); # sharp |
|
627
|
1
|
|
|
|
|
2
|
$inchar{iquest} = chr(0xc5); # inverted question mark |
|
628
|
1
|
|
|
|
|
2
|
$inchar{iexcl} = chr(0xc6); # inverted exclamation mark |
|
629
|
1
|
|
|
|
|
2
|
$inchar{hooka} = chr(0xe0); # combining hook above |
|
630
|
1
|
|
|
|
|
3
|
$inchar{grave} = chr(0xe1); # combining grave |
|
631
|
1
|
|
|
|
|
1
|
$inchar{acute} = chr(0xe2); # combining acute |
|
632
|
1
|
|
|
|
|
3
|
$inchar{circ} = chr(0xe3); # combining circumflex |
|
633
|
1
|
|
|
|
|
2
|
$inchar{tilde} = chr(0xe4); # combining tilde |
|
634
|
1
|
|
|
|
|
2
|
$inchar{macr} = chr(0xe5); # combining macron |
|
635
|
1
|
|
|
|
|
2
|
$inchar{breve} = chr(0xe6); # combining breve |
|
636
|
1
|
|
|
|
|
2
|
$inchar{dot} = chr(0xe7); # combining dot above |
|
637
|
1
|
|
|
|
|
3
|
$inchar{diaer} = chr(0xe8); # combining diaeresis |
|
638
|
1
|
|
|
|
|
2
|
$inchar{uml} = chr(0xe8); # combining umlaut |
|
639
|
1
|
|
|
|
|
2
|
$inchar{caron} = chr(0xe9); # combining hacek |
|
640
|
1
|
|
|
|
|
2
|
$inchar{ring} = chr(0xea); # combining ring above |
|
641
|
1
|
|
|
|
|
1
|
$inchar{llig} = chr(0xeb); # combining ligature left half |
|
642
|
1
|
|
|
|
|
2
|
$inchar{rlig} = chr(0xec); # combining ligature right half |
|
643
|
1
|
|
|
|
|
2
|
$inchar{rcommaa} = chr(0xed); # combining comma above right |
|
644
|
1
|
|
|
|
|
2
|
$inchar{dblac} = chr(0xee); # combining double acute |
|
645
|
1
|
|
|
|
|
2
|
$inchar{candra} = chr(0xef); # combining candrabindu |
|
646
|
1
|
|
|
|
|
2
|
$inchar{cedil} = chr(0xf0); # combining cedilla |
|
647
|
1
|
|
|
|
|
2
|
$inchar{ogon} = chr(0xf1); # combining ogonek |
|
648
|
1
|
|
|
|
|
2
|
$inchar{dotb} = chr(0xf2); # combining dot below |
|
649
|
1
|
|
|
|
|
2
|
$inchar{dbldotb} = chr(0xf3); # combining double dot below |
|
650
|
1
|
|
|
|
|
2
|
$inchar{ringb} = chr(0xf4); # combining ring below |
|
651
|
1
|
|
|
|
|
1
|
$inchar{dblunder} = chr(0xf5); # combining double underscore |
|
652
|
1
|
|
|
|
|
2
|
$inchar{under} = chr(0xf6); # combining underscore |
|
653
|
1
|
|
|
|
|
2
|
$inchar{commab} = chr(0xf7); # combining comma below |
|
654
|
1
|
|
|
|
|
2
|
$inchar{rcedil} = chr(0xf8); # combining right cedilla |
|
655
|
1
|
|
|
|
|
2
|
$inchar{breveb} = chr(0xf9); # combining breve below |
|
656
|
1
|
|
|
|
|
1
|
$inchar{ldbltil} = chr(0xfa); # combining double tilde left half |
|
657
|
1
|
|
|
|
|
2
|
$inchar{rdbltil} = chr(0xfb); # combining double tilde right half |
|
658
|
1
|
|
|
|
|
2
|
$inchar{commaa} = chr(0xfe); # combining comma above |
|
659
|
1
|
50
|
|
|
|
3
|
if ($MARC::DEBUG) { |
|
660
|
0
|
|
|
|
|
0
|
foreach my $str (sort keys %inchar) { |
|
661
|
0
|
|
|
|
|
0
|
printf "%s = %x\n", $str, ord($inchar{$str}); |
|
662
|
|
|
|
|
|
|
} |
|
663
|
|
|
|
|
|
|
} |
|
664
|
1
|
|
|
|
|
5
|
return \%inchar; |
|
665
|
|
|
|
|
|
|
} #usmarc_default |
|
666
|
|
|
|
|
|
|
|
|
667
|
|
|
|
|
|
|
################################################### |
|
668
|
|
|
|
|
|
|
|
|
669
|
|
|
|
|
|
|
=head2 ustext_default |
|
670
|
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
ustext_default -- Originally from MARC.pm. Offers default mnemonics for character encoding and decoding. |
|
672
|
|
|
|
|
|
|
|
|
673
|
|
|
|
|
|
|
Used by _char2maker. |
|
674
|
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
This perhaps should be an internal _ustext_default(). |
|
676
|
|
|
|
|
|
|
|
|
677
|
|
|
|
|
|
|
=cut |
|
678
|
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
sub ustext_default { |
|
680
|
12
|
|
|
12
|
1
|
109
|
my @hexchar = (0x00..0x1a,0x1c,0x7f..0x8c,0x8f..0xa0,0xaf,0xbb, |
|
681
|
|
|
|
|
|
|
0xbe,0xbf,0xc7..0xdf,0xfc,0xfd,0xff); |
|
682
|
12
|
|
|
|
|
24
|
my %outchar = map {chr($_), sprintf ("{%2.2X}",int $_)} @hexchar; |
|
|
1104
|
|
|
|
|
3599
|
|
|
683
|
|
|
|
|
|
|
|
|
684
|
12
|
|
|
|
|
139
|
my @ascchar = map {chr($_)} (0x20..0x23,0x25..0x7a,0x7c,0x7e); |
|
|
1104
|
|
|
|
|
1829
|
|
|
685
|
12
|
|
|
|
|
64
|
foreach my $asc (@ascchar) { $outchar{$asc} = $asc;} |
|
|
1104
|
|
|
|
|
1901
|
|
|
686
|
|
|
|
|
|
|
|
|
687
|
12
|
|
|
|
|
28
|
$outchar{chr(0x1b)} = '{esc}'; # escape |
|
688
|
12
|
|
|
|
|
21
|
$outchar{chr(0x24)} = '{dollar}'; # dollar sign |
|
689
|
12
|
|
|
|
|
22
|
$outchar{chr(0x5c)} = '{bsol}'; # back slash (reverse solidus) |
|
690
|
12
|
|
|
|
|
51
|
$outchar{chr(0x7b)} = '{lcub}'; # opening curly brace |
|
691
|
12
|
|
|
|
|
21
|
$outchar{chr(0x7d)} = '{rcub}'; # closing curly brace |
|
692
|
12
|
|
|
|
|
20
|
$outchar{chr(0x8d)} = '{joiner}'; # zero width joiner |
|
693
|
12
|
|
|
|
|
16
|
$outchar{chr(0x8e)} = '{nonjoin}'; # zero width non-joiner |
|
694
|
12
|
|
|
|
|
18
|
$outchar{chr(0xa1)} = '{Lstrok}'; # latin capital letter l with stroke |
|
695
|
12
|
|
|
|
|
21
|
$outchar{chr(0xa2)} = '{Ostrok}'; # latin capital letter o with stroke |
|
696
|
12
|
|
|
|
|
15
|
$outchar{chr(0xa3)} = '{Dstrok}'; # latin capital letter d with stroke |
|
697
|
12
|
|
|
|
|
21
|
$outchar{chr(0xa4)} = '{THORN}'; # latin capital letter thorn (icelandic) |
|
698
|
12
|
|
|
|
|
16
|
$outchar{chr(0xa5)} = '{AElig}'; # latin capital letter AE |
|
699
|
12
|
|
|
|
|
15
|
$outchar{chr(0xa6)} = '{OElig}'; # latin capital letter OE |
|
700
|
12
|
|
|
|
|
21
|
$outchar{chr(0xa7)} = '{softsign}'; # modifier letter soft sign |
|
701
|
12
|
|
|
|
|
17
|
$outchar{chr(0xa8)} = '{middot}'; # middle dot |
|
702
|
12
|
|
|
|
|
17
|
$outchar{chr(0xa9)} = '{flat}'; # musical flat sign |
|
703
|
12
|
|
|
|
|
17
|
$outchar{chr(0xaa)} = '{reg}'; # registered sign |
|
704
|
12
|
|
|
|
|
19
|
$outchar{chr(0xab)} = '{plusmn}'; # plus-minus sign |
|
705
|
12
|
|
|
|
|
35
|
$outchar{chr(0xac)} = '{Ohorn}'; # latin capital letter o with horn |
|
706
|
12
|
|
|
|
|
16
|
$outchar{chr(0xad)} = '{Uhorn}'; # latin capital letter u with horn |
|
707
|
12
|
|
|
|
|
19
|
$outchar{chr(0xae)} = '{mlrhring}'; # modifier letter right half ring (alif) |
|
708
|
12
|
|
|
|
|
17
|
$outchar{chr(0xb0)} = '{mllhring}'; # modifier letter left half ring (ayn) |
|
709
|
12
|
|
|
|
|
14
|
$outchar{chr(0xb1)} = '{lstrok}'; # latin small letter l with stroke |
|
710
|
12
|
|
|
|
|
18
|
$outchar{chr(0xb2)} = '{ostrok}'; # latin small letter o with stroke |
|
711
|
12
|
|
|
|
|
17
|
$outchar{chr(0xb3)} = '{dstrok}'; # latin small letter d with stroke |
|
712
|
12
|
|
|
|
|
25
|
$outchar{chr(0xb4)} = '{thorn}'; # latin small letter thorn (icelandic) |
|
713
|
12
|
|
|
|
|
17
|
$outchar{chr(0xb5)} = '{aelig}'; # latin small letter ae |
|
714
|
12
|
|
|
|
|
18
|
$outchar{chr(0xb6)} = '{oelig}'; # latin small letter oe |
|
715
|
12
|
|
|
|
|
17
|
$outchar{chr(0xb7)} = '{hardsign}'; # modifier letter hard sign |
|
716
|
12
|
|
|
|
|
17
|
$outchar{chr(0xb8)} = '{inodot}'; # latin small letter dotless i |
|
717
|
12
|
|
|
|
|
36
|
$outchar{chr(0xb9)} = '{pound}'; # pound sign |
|
718
|
12
|
|
|
|
|
19
|
$outchar{chr(0xba)} = '{eth}'; # latin small letter eth |
|
719
|
12
|
|
|
|
|
21
|
$outchar{chr(0xbc)} = '{ohorn}'; # latin small letter o with horn |
|
720
|
12
|
|
|
|
|
16
|
$outchar{chr(0xbd)} = '{uhorn}'; # latin small letter u with horn |
|
721
|
12
|
|
|
|
|
20
|
$outchar{chr(0xc0)} = '{deg}'; # degree sign |
|
722
|
12
|
|
|
|
|
20
|
$outchar{chr(0xc1)} = '{scriptl}'; # latin small letter script l |
|
723
|
12
|
|
|
|
|
18
|
$outchar{chr(0xc2)} = '{phono}'; # sound recording copyright |
|
724
|
12
|
|
|
|
|
17
|
$outchar{chr(0xc3)} = '{copy}'; # copyright sign |
|
725
|
12
|
|
|
|
|
23
|
$outchar{chr(0xc4)} = '{sharp}'; # sharp |
|
726
|
12
|
|
|
|
|
19
|
$outchar{chr(0xc5)} = '{iquest}'; # inverted question mark |
|
727
|
12
|
|
|
|
|
21
|
$outchar{chr(0xc6)} = '{iexcl}'; # inverted exclamation mark |
|
728
|
12
|
|
|
|
|
16
|
$outchar{chr(0xe0)} = '{hooka}'; # combining hook above |
|
729
|
12
|
|
|
|
|
16
|
$outchar{chr(0xe1)} = '{grave}'; # combining grave |
|
730
|
12
|
|
|
|
|
15
|
$outchar{chr(0xe2)} = '{acute}'; # combining acute |
|
731
|
12
|
|
|
|
|
17
|
$outchar{chr(0xe3)} = '{circ}'; # combining circumflex |
|
732
|
12
|
|
|
|
|
16
|
$outchar{chr(0xe4)} = '{tilde}'; # combining tilde |
|
733
|
12
|
|
|
|
|
17
|
$outchar{chr(0xe5)} = '{macr}'; # combining macron |
|
734
|
12
|
|
|
|
|
15
|
$outchar{chr(0xe6)} = '{breve}'; # combining breve |
|
735
|
12
|
|
|
|
|
14
|
$outchar{chr(0xe7)} = '{dot}'; # combining dot above |
|
736
|
12
|
|
|
|
|
17
|
$outchar{chr(0xe8)} = '{uml}'; # combining diaeresis (umlaut) |
|
737
|
12
|
|
|
|
|
28
|
$outchar{chr(0xe9)} = '{caron}'; # combining hacek |
|
738
|
12
|
|
|
|
|
23
|
$outchar{chr(0xea)} = '{ring}'; # combining ring above |
|
739
|
12
|
|
|
|
|
25
|
$outchar{chr(0xeb)} = '{llig}'; # combining ligature left half |
|
740
|
12
|
|
|
|
|
15
|
$outchar{chr(0xec)} = '{rlig}'; # combining ligature right half |
|
741
|
12
|
|
|
|
|
16
|
$outchar{chr(0xed)} = '{rcommaa}'; # combining comma above right |
|
742
|
12
|
|
|
|
|
17
|
$outchar{chr(0xee)} = '{dblac}'; # combining double acute |
|
743
|
12
|
|
|
|
|
18
|
$outchar{chr(0xef)} = '{candra}'; # combining candrabindu |
|
744
|
12
|
|
|
|
|
15
|
$outchar{chr(0xf0)} = '{cedil}'; # combining cedilla |
|
745
|
12
|
|
|
|
|
18
|
$outchar{chr(0xf1)} = '{ogon}'; # combining ogonek |
|
746
|
12
|
|
|
|
|
17
|
$outchar{chr(0xf2)} = '{dotb}'; # combining dot below |
|
747
|
12
|
|
|
|
|
16
|
$outchar{chr(0xf3)} = '{dbldotb}'; # combining double dot below |
|
748
|
12
|
|
|
|
|
15
|
$outchar{chr(0xf4)} = '{ringb}'; # combining ring below |
|
749
|
12
|
|
|
|
|
20
|
$outchar{chr(0xf5)} = '{dblunder}'; # combining double underscore |
|
750
|
12
|
|
|
|
|
31
|
$outchar{chr(0xf6)} = '{under}'; # combining underscore |
|
751
|
12
|
|
|
|
|
17
|
$outchar{chr(0xf7)} = '{commab}'; # combining comma below |
|
752
|
12
|
|
|
|
|
17
|
$outchar{chr(0xf8)} = '{rcedil}'; # combining right cedilla |
|
753
|
12
|
|
|
|
|
15
|
$outchar{chr(0xf9)} = '{breveb}'; # combining breve below |
|
754
|
12
|
|
|
|
|
18
|
$outchar{chr(0xfa)} = '{ldbltil}'; # combining double tilde left half |
|
755
|
12
|
|
|
|
|
18
|
$outchar{chr(0xfb)} = '{rdbltil}'; # combining double tilde right half |
|
756
|
12
|
|
|
|
|
17
|
$outchar{chr(0xfe)} = '{commaa}'; # combining comma above |
|
757
|
12
|
50
|
|
|
|
31
|
if ($MARC::DEBUG) { |
|
758
|
0
|
|
|
|
|
0
|
foreach my $num (sort keys %outchar) { |
|
759
|
0
|
|
|
|
|
0
|
printf "%x = %s\n", ord($num), $outchar{$num}; |
|
760
|
|
|
|
|
|
|
} |
|
761
|
|
|
|
|
|
|
} |
|
762
|
12
|
|
|
|
|
134
|
return \%outchar; |
|
763
|
|
|
|
|
|
|
} #ustext_default |
|
764
|
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
|
|
766
|
|
|
|
|
|
|
#################################################################### |
|
767
|
|
|
|
|
|
|
|
|
768
|
|
|
|
|
|
|
=head2 _maker2char default |
|
769
|
|
|
|
|
|
|
|
|
770
|
|
|
|
|
|
|
_maker2char() -- Translates MARCMaker encoded character into MARC-8 character. |
|
771
|
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
=cut |
|
773
|
|
|
|
|
|
|
|
|
774
|
|
|
|
|
|
|
sub _maker2char { # rec |
|
775
|
13
|
|
|
13
|
|
18
|
my $marc_string = shift; |
|
776
|
13
|
|
|
|
|
14
|
my $charmap = shift; |
|
777
|
13
|
|
|
|
|
35
|
while ($marc_string =~ /{(\w{1,8}?)}/o) { |
|
778
|
75
|
50
|
|
|
|
70
|
if (exists ${$charmap}{$1}) { |
|
|
75
|
|
|
|
|
146
|
|
|
779
|
75
|
|
|
|
|
70
|
$marc_string = join ('', $`, ${$charmap}{$1}, $'); |
|
|
75
|
|
|
|
|
525
|
|
|
780
|
|
|
|
|
|
|
} |
|
781
|
|
|
|
|
|
|
else { |
|
782
|
0
|
|
|
|
|
0
|
$marc_string = join ('', $`, '&', $1, ';', $'); |
|
783
|
|
|
|
|
|
|
} |
|
784
|
|
|
|
|
|
|
} |
|
785
|
|
|
|
|
|
|
# closing curly brace - part 2, permits {lcub}text{rcub} in input |
|
786
|
13
|
|
|
|
|
19
|
$marc_string =~ s/\}/\x7d/go; |
|
787
|
13
|
|
|
|
|
27
|
return $marc_string; |
|
788
|
|
|
|
|
|
|
} |
|
789
|
|
|
|
|
|
|
|
|
790
|
|
|
|
|
|
|
################################ |
|
791
|
|
|
|
|
|
|
### END OF MARCMAKER METHODS ### |
|
792
|
|
|
|
|
|
|
################################ |
|
793
|
|
|
|
|
|
|
|
|
794
|
|
|
|
|
|
|
1; |
|
795
|
|
|
|
|
|
|
|
|
796
|
|
|
|
|
|
|
=head1 RELATED MODULES |
|
797
|
|
|
|
|
|
|
|
|
798
|
|
|
|
|
|
|
L |
|
799
|
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
L |
|
801
|
|
|
|
|
|
|
|
|
802
|
|
|
|
|
|
|
=head1 SEE ALSO |
|
803
|
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
L |
|
805
|
|
|
|
|
|
|
|
|
806
|
|
|
|
|
|
|
L for more information about the |
|
807
|
|
|
|
|
|
|
DOS-based MARCMaker and MARCBreaker programs. |
|
808
|
|
|
|
|
|
|
|
|
809
|
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
The methods in this MARCMaker module are based upon MARC::File::USMARC.pm and MARC::File::MicroLIF.pm. |
|
811
|
|
|
|
|
|
|
Those are distributed with MARC::Record. |
|
812
|
|
|
|
|
|
|
The underlying code is based on the MARCMaker-related methods in MARC.pm. |
|
813
|
|
|
|
|
|
|
|
|
814
|
|
|
|
|
|
|
|
|
815
|
|
|
|
|
|
|
=head1 LICENSE |
|
816
|
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
This code may be distributed under the same terms as Perl itself. |
|
818
|
|
|
|
|
|
|
|
|
819
|
|
|
|
|
|
|
Please note that this module is not a product of or supported by the |
|
820
|
|
|
|
|
|
|
employers of the various contributors to the code. |
|
821
|
|
|
|
|
|
|
|
|
822
|
|
|
|
|
|
|
=head1 AUTHOR |
|
823
|
|
|
|
|
|
|
|
|
824
|
|
|
|
|
|
|
Bryan Baldus |
|
825
|
|
|
|
|
|
|
eijabb@cpan.org |
|
826
|
|
|
|
|
|
|
|
|
827
|
|
|
|
|
|
|
Copyright (c) 2004-2005. |
|
828
|
|
|
|
|
|
|
|
|
829
|
|
|
|
|
|
|
=cut |