File Coverage

blib/lib/MARC/File/MARCMaker.pm
Criterion Covered Total %
statement 270 318 84.9
branch 24 66 36.3
condition 3 12 25.0
subroutine 16 20 80.0
pod 4 5 80.0
total 317 421 75.3


line stmt bran cond sub pod time code
1             #!perl
2              
3             package MARC::File::MARCMaker;
4              
5             =head1 NAME
6              
7             MARC::File::MARCMaker -- Work with MARCMaker/MARCBreaker records.
8              
9             =cut
10              
11 2     2   119051 use strict;
  2         5  
  2         67  
12 2     2   9 use integer;
  2         3  
  2         12  
13              
14 2     2   46 use vars qw( $VERSION $ERROR );
  2         8  
  2         120  
15              
16             $VERSION = 0.05;
17              
18 2     2   10 use MARC::File;
  2         4  
  2         49  
19 2     2   8 use vars qw( @ISA ); @ISA = qw( MARC::File );
  2         4  
  2         90  
20              
21 2     2   10 use MARC::Record qw( LEADER_LEN );
  2         5  
  2         137  
22 2     2   10 use constant SUBFIELD_INDICATOR => "\x24"; #dollar sign
  2         4  
  2         479  
23 2     2   13 use constant END_OF_FIELD => "\n\x3D"; #line break, equals sign
  2         4  
  2         326  
24              
25              
26             =head1 SYNOPSIS
27              
28              
29             use MARC::File::MARCMaker;
30              
31             my $file = MARC::File::MARCMaker->in( $filename );
32              
33             while ( my $marc = $file->next() ) {
34             # Do something
35             }
36             $file->close();
37             undef $file;
38              
39             ####################################################
40              
41             use MARC::File::MARCMaker;
42              
43             ## reading with MARC::Batch
44             my $batch = MARC::Batch->new( 'MARCMaker', $filename );
45             my $record = $batch->next();
46              
47             ## or reading with MARC::File::MARCMaker explicitly
48             my $file = MARC::File::MARCMaker->in( $filename );
49             my $record = $file->next();
50              
51             ## output a single MARC::Record object in MARCMaker format (formatted plain text)
52             #print $record->as_marcmaker(); #goal syntax
53             print MARC::File::MARCMaker->encode($record); #current syntax
54              
55             =head1 DESCRIPTION
56              
57             The MARC-File-MARCMaker distribution is an extension to the MARC-Record
58             distribution for working with MARC21 data using the format used by the Library
59             of Congress MARCMaker and MARCBreaker programs.
60              
61             More information may be obtained here: L
62              
63             You must have MARC::Record installed to use MARC::File::MARCMaker. In fact
64             once you install the MARC-File-MARCMaker distribution you will most likely not
65             use it directly, but will have an additional file format available to you
66             when you use MARC::Batch.
67              
68             This module is based on code from the original MARC.pm module, as well as the
69             MARC::Record distribution's MARC::File::USMARC and MARC::File::MicroLIF modules.
70              
71             =head2 DEVIATIONS FROM LC'S DOCUMENTATION
72              
73             LC's MARCMaker/MARCBreaker programs require files to have DOS line endings.
74             This module should be capable of reading any type of line ending.
75             It converts existing endings to "\n", the endings of the platform.
76              
77             Initial version may or may not work well with line breaks in the middle of a field.
78              
79             MARCMaker version of the LDR (record size bytes) will not necessarily be dependable, and should not be relied upon.
80              
81             =head1 EXPORT
82              
83             None.
84              
85             =head1 TODO
86              
87             Do limit tests in filling the buffer and getting chunks. Seems to work for first fill, but may fail on larger reads/multiple reads to fill the buffer.
88              
89             Test special characters (those requiring escapes). Initial version may not fully support non-English characters. All MARC-8 may work, Unicode support is untested and unassured.
90              
91             Implement better character encoding and decoding, including Unicode support.
92              
93             Work on character set internal subs for both input and output. Currently, the original subs from MARC.pm are being used essentially as-is.
94              
95             Error checking for line breaks vs. new fields? Probably not possible, since line breaks are allowed within fields, so checking for missing equals sign is not really possible.
96              
97             Account for multiple occurences of =LDR in a single record, usually caused by lack of blank line between records, so records get mushed together. Also check for multiple =001s.
98              
99             Determine why the constant SUBFIELD_INDICATOR can't be used in the split into subfields.
100              
101             Work on encode().
102              
103             Allow as_marcmaker() to be called with either MARC::Field or MARC::Record objects, returning the appropriate result. Desired behavior is as_usmarc() methods in MARC::Record and MARC::Field
104              
105             Decode should mostly be working. Test for correctness.
106              
107             Remove unnecessary code and documentation, remnants of the initial development of the module. Move internal subs to end of module?
108              
109             =head1 VERSION HISTORY
110              
111             Version 0.05: First CPAN release, Oct. 30, 2005.
112              
113             Version 0.04: Updated Oct. 22, 2005. Released Oct. 23, 2005.
114              
115             -Initial commit to CVS on SourceForge
116             -Misc. cleanup.
117              
118             Version 0.03: Updated Aug. 2, 2005. Released Aug. 14, 2005.
119              
120             -Revised decode() to fix problem with dollar sign conversion from mnemonics to characters.
121              
122             Version 0.02: Updated July 12-13, 2005. Released July 16, 2005.
123              
124             -Preliminary version of encode() for fields and records
125              
126             Version 0.01: Initial version, Nov. 21, 2004-Mar. 7, 2005. Released Mar. 7, 2005.
127              
128             -Basic version, translates .mrk format file into MARC::Record objects.
129              
130             =for internal
131              
132             ############################################################
133             This section is copied from MARC::File::MicroLIF.
134             ############################################################
135              
136             The buffer must be large enough to handle any valid record because
137             we don't check for cases like a CR/LF pair or an end-of-record/CR/LF
138             trio being only partially in the buffer.
139              
140             The max valid record is the max MARC record size (99999) plus one
141             or two characters per tag (CR, LF, or CR/LF). It's hard to say
142             what the max number of tags is, so here we use 6000. (6000 tags
143             can be squeezed into a MARC record only if every tag has only one
144             subfield containing a maximum of one character, or if data from
145             multiple tags overlaps in the MARC record body. We're pretty safe.)
146              
147             =cut
148              
149 2     2   12 use constant BUFFER_MIN => (99999 + 6000 * 2);
  2         2  
  2         14358  
150              
151             =head1 METHODS
152              
153             =cut
154              
155             ##################################
156             ### START OF MARCMAKER METHODS ###
157             ##################################
158              
159             =head2 _next (merged from MicroLIF and USMARC)
160              
161             Called by MARC::File::next().
162              
163             =cut
164              
165             sub _next { #done for MARCMaker?
166              
167 0     0   0 my $self = shift;
168              
169             #_get_chunk will separate records from each other and should convert
170             # line endings to those of the platform.
171 0         0 my $makerrec = $self->_get_chunk();
172             # for ease, make sure the newlines match this platform
173 0 0       0 $makerrec =~ s/[\x0d\x0a]+/\n/g if defined $makerrec;
174              
175 0         0 return $makerrec;
176             } #_next
177              
178             =head2 decode( $string [, \&filter_func ] )
179              
180             (description based on MARC::File::USMARC::decode POD information)
181              
182             Constructor for handling data from a MARCMaker file. This function takes care
183             of all the tag directory parsing & mangling.
184              
185             Any warnings or coercions can be checked in the C function.
186              
187             The C<$filter_func> is an optional reference to a user-supplied function
188             that determines on a tag-by-tag basis if you want the tag passed to it
189             to be put into the MARC record. The function is passed the tag number
190             and the raw tag data, and must return a boolean. The return of a true
191             value tells MARC::File::MARCMaker::decode that the tag should get put into
192             the resulting MARC record.
193              
194             For example, if you only want title and subject tags in your MARC record,
195             try this:
196              
197             sub filter {
198             my ($tagno,$tagdata) = @_;
199              
200             return ($tagno == 245) || ($tagno >= 600 && $tagno <= 699);
201             }
202              
203             my $marc = MARC::File::MARCMaker->decode( $string, \&filter );
204              
205             Why would you want to do such a thing? The big reason is that creating
206             fields is processor-intensive, and if your program is doing read-only
207             data analysis and needs to be as fast as possible, you can save time by
208             not creating fields that you'll be ignoring anyway.
209              
210             Another possible use is if you're only interested in printing certain
211             tags from the record, then you can filter them when you read from disc
212             and not have to delete unwanted tags yourself.
213              
214             =cut
215              
216              
217             sub decode { #MARCMaker
218              
219 1     1 1 22 my $text;
220 1         2 my $location = '';
221              
222             ## decode can be called in a variety of ways
223             ## $object->decode( $string )
224             ## MARC::File::MARCMaker->decode( $string )
225             ## MARC::File::MARCMaker::decode( $string )
226             ## this bit of code covers all three
227              
228 1         2 my $self = shift;
229 1 50       5 if ( ref($self) =~ /^MARC::File/ ) {
230 0         0 $location = 'in record '.$self->{recnum};
231 0         0 $text = shift;
232             } else {
233 1         2 $location = 'in record 1';
234 1 50       5 $text = $self=~/MARC::File/ ? shift : $self;
235             }
236              
237 1         3 my $filter_func = shift;
238              
239             # for ease, make the newlines match this platform
240             # this has probably already been taken care of at least once, but just in case
241 1 50       25 $text =~ s/[\x0d\x0a]+/\n/g if defined $text;
242              
243 1         6 my $marc = MARC::Record->new();
244              
245             #report improperly passed $text (undefined $text)
246 1 50       12 return $marc->_warn( "Unable to retrieve a record string $location" ) unless defined $text;
247              
248             #############################
249             #### Charset work needed ####
250             #############################
251             #use default charset until that function is revised
252 1         3 my $charset = usmarc_default();
253             #############################
254             #############################
255              
256              
257             #Split each record on the "\n=" into the @lines array
258 1         10 my @lines=split END_OF_FIELD, $text;
259 1         2 my $leader = shift @lines;
260 1 50       5 unless ($leader =~ /^=LDR /) {
261 0         0 $marc->_warn( "First line must begin with =LDR" );
262             }
263            
264 1         3 $leader=~s/^=LDR //; #Remove "=LDR "
265 1         3 $leader=~s/[\n\r]//g; #remove line endings
266 1         2 $leader=~s/\\/ /g; # substitute " " for \
267             #report error if result is not 24 bytes long
268 1 50       4 unless (length($leader) == LEADER_LEN) {
269 0         0 $marc->_warn( "Leader must be exactly 24 bytes long" );
270             }
271              
272             #add leader to the record
273 1         6 $marc->leader( substr( $leader, 0, LEADER_LEN ) );
274              
275 1         9 LINE: foreach my $line (@lines) {
276             #Remove newlines from $line ; and also substitute " " for \
277 7         131 $line=~s/[\n\r]//g;
278 7         23 $line=~s/\\/ /g;
279             #get the tag name
280 7         13 my $tagno = substr($line,0,3);
281             # Check tag validity
282 7 50       17 ( $tagno =~ /^[0-9A-Za-z]{3}$/ ) or $marc->_warn( "Invalid tag in $location: \"$tagno\"" );
283              
284 7 100 66     38 if ( ($tagno =~ /^\d+$/ ) && ( $tagno < 10 ) ) {
285             #translate characters for tag data
286             #revise line below as needed for _maker2char
287 2         6 my $tagdata = _maker2char ( substr( $line, 5 ), $charset );
288             #filter_func implementation needs work
289 2 50       5 if ( $filter_func ) {
290 0 0       0 next LINE unless $filter_func->( $tagno, $tagdata );
291             }
292             #add field to record
293 2         6 $marc->append_fields( MARC::Field->new( $tagno, $tagdata ) );
294             } #if $tagno < 10
295             else {
296             #translate characters for subfield data
297             #get indicators
298 5         8 my $ind1 = substr( $line, 5, 1 );
299 5         7 my $ind2 = substr( $line, 6, 1 );
300 5         8 my $tagdata = substr( $line, 7 );
301             #report error if first character of tagdata is not a subfield indicator ($)
302 5 50       16 $marc->_warn( "First character of subfield data must be a subfield indicator (dollar sign), $tagdata, $location for tag $tagno" ) unless ($tagdata =~ /^\$/ );
303 5 50       9 if ( $filter_func ) {
304 0 0       0 next LINE unless $filter_func->( $tagno, $tagdata );
305             }
306              
307             #why doesn't SUBFIELD_INDICATOR work in the split?
308 5         17 my @subfields_mnemonic = split( /\x24/, $tagdata );
309             #convert characters from mnemonics to characters
310 5         7 my @subfields = map {_maker2char($_, $charset)} @subfields_mnemonic;
  11         17  
311            
312             #is there a better way to deal with the empty first item?
313 5         8 my $empty = shift @subfields;
314 5 50       10 $marc->_warn( "Subfield data appears before first subfield? $location in $tagno" ) if $empty;
315              
316             # Split the subfield data into subfield name and data pairs
317 5         6 my @subfield_data;
318 5         5 for ( @subfields ) {
319 6 50       11 if ( length > 0 ) {
320 6         16 push( @subfield_data, substr($_,0,1),substr($_,1) );
321             } else {
322 0         0 $marc->_warn( "Entirely empty subfield found in tag $tagno" );
323             }
324             } #for @subfields
325              
326 5 50       14 if ( !@subfield_data ) {
327 0         0 $marc->_warn( "no subfield data found $location for tag $tagno" );
328 0         0 next;
329             }
330              
331 5         16 my $field = MARC::Field->new($tagno, $ind1, $ind2, @subfield_data );
332 5 50       182 if ( $field->warnings() ) {
333 0         0 $marc->_warn( $field->warnings() );
334             }
335 5         30 $marc->append_fields( $field );
336             }
337             } # looping through all the fields
338              
339              
340 1         33 return $marc;
341              
342             } #decode MARCMaker
343              
344             =head2 update_leader() #from USMARC
345              
346             This may be unnecessary code. Delete this section if that is the case.
347              
348             If any changes get made to the MARC record, the first 5 bytes of the
349             leader (the length) will be invalid. This function updates the
350             leader with the correct length of the record as it would be if
351             written out to a file.
352              
353              
354             sub update_leader() { #from USMARC
355             my $self = shift;
356              
357             my (undef,undef,$reclen,$baseaddress) = $self->_build_tag_directory();
358              
359             $self->_set_leader_lengths( $reclen, $baseaddress );
360             } #updated_leader() from USMARC
361              
362             =head2 encode() #based on MARC::File::USMARC
363              
364             Returns a string of characters suitable for writing out to a MARCMaker file,
365             including the leader, directory and all the fields.
366              
367             Uses as_marcmaker() below to build each field.
368              
369             =cut
370              
371             sub encode { #MARCMaker, based on USMARC's encode()
372 1     1 1 623 my $marc = shift;
373 1 50 33     8 $marc = shift if (ref($marc)||$marc) =~ /^MARC::File/;
374 1         2 my $field_string = '';
375             #convert each field (after the leader) to MARCMaker format
376 1         6 foreach my $field ($marc->fields()) {
377 7         355 $field_string .= $field->MARC::File::MARCMaker::as_marcmaker();
378             } #foreach field in record
379            
380             # Glomp it all together
381 1         38 return join("", "=LDR ", $marc->leader, "\n", $field_string, "\n");
382              
383             } #encode from USMARC
384              
385              
386             =head2 as_marcmaker()
387              
388             Based on MARC::Field::as_usmarc().
389             Turns a MARC::Field into a MARCMaker formatted field string.
390              
391             =head2 TODO (as_marcmaker())
392              
393             -Change field encoding portion of as_marcmaker() to internal _as_marcmaker()
394             -Implement as_marcmaker() as wrapper for MARC::Record object and MARC::Field object encoding into MARCMaker format.
395              
396              
397             =cut
398              
399             sub as_marcmaker() {
400 12     12 1 3645 my $self = shift;
401             # $self = shift if (ref($self)||$self) =~ /^MARC::File/;
402              
403 12 50       36 die "Wanted a MARC::Field but got a ", ref($self) unless ref($self) eq "MARC::Field";
404              
405 12         22 my $charset = ustext_default();
406              
407             # Tags < 010 are pretty easy
408 12 100       50 if ( $self->is_control_field ) {
  0 50       0  
409             #convert characters to MARCMaker codes
410 2         19 my $field_data = (_char2maker($self->data(), $charset));
411             #swap blank spaces for backslash ( \ )
412 2         11 $field_data =~ s/ /\\/g;
413             #return formatted field
414 2         9 return sprintf "=%s %s\n", $self->tag(), $field_data;
415             } #if control field
416             elsif ($self->tag() eq '000') {print "Leader?\n"} #leader?
417             else {
418 10         127 my @subs;
419 10         14 my @subdata = @{$self->{_subfields}};
  10         34  
420 10         42 while ( @subdata ) {
421             #convert characters to MARCMaker codes as each subfield goes by
422 11         33 push( @subs, join( "", SUBFIELD_INDICATOR, shift @subdata, (_char2maker(shift @subdata, $charset))) );
423             } # while
424              
425 10         46 my $ind1 = $self->indicator(1);
426 10         128 my $ind2 = $self->indicator(2);
427             #swap blank for backslash ( \ )
428 10         105 $ind1 =~ s/ /\\/g;
429 10         22 $ind2 =~ s/ /\\/g;
430              
431             return
432 10         31 join ("", "=", $self->tag(), " ",
433             $ind1,
434             $ind2,
435             @subs,
436             "\n",
437             );
438             }
439             } #as_usmarc() #MARC::Field
440              
441              
442             ####################################
443             ###### END USMARC subs #############
444             ####################################
445              
446              
447              
448              
449             #########################################
450             ### begin internal subs from MicroLIF ###
451             #########################################
452              
453             #################################
454             # fill the buffer if we need to #
455             #################################
456              
457             sub _fill_buffer { #done for MARCMaker?
458              
459 0     0   0 my $self = shift;
460 0         0 my $ok = 1;
461              
462 0 0 0     0 if ( !$self->{exhaustedfh} && length( $self->{inputbuf} ) < BUFFER_MIN ) {
463             # append the next chunk of bytes to the buffer
464 0         0 my $read = read $self->{fh}, $self->{inputbuf}, BUFFER_MIN, length($self->{inputbuf});
465             #convert line endings within the input buffer
466 0 0       0 if ($self->{inputbuf} =~ /\x0d\x0a/s) {
    0          
    0          
467 0         0 $self->{inputbuf} =~ s/\x0d\x0a/\n/sg;
468             } #if DOS endings
469             elsif ($self->{inputbuf} =~ /\x0a/) {
470 0         0 $self->{inputbuf} =~ s/\x0a/\n/sg;
471             } #elsif Unix endings
472             elsif ($self->{inputbuf} =~ /\x0d/) {
473 0         0 $self->{inputbuf} =~ s/\x0d/\n/sg;
474             } #elsif Macintosh endings
475              
476             #remove extra blank lines between records
477 0         0 $self->{inputbuf} =~ s/\n\s*\n+/\n\n/g;
478              
479 0 0       0 if ( !defined $read ) {
    0          
480             # error!
481 0         0 $ok = undef;
482 0         0 $MARC::File::ERROR = "error reading from file " . $self->{filename};
483             }
484             elsif ( $read < 1 ) {
485 0         0 $self->{exhaustedfh} = 1;
486             }
487             }
488              
489 0         0 return $ok;
490             }
491              
492             =for internal
493              
494             =head2 _get_chunk( ) #for MARCMaker
495              
496             Gets the next chunk of data (which should be a single complete record).
497              
498             All extra \r and \n are stripped and line endings are converted to those of the platform (\n).
499              
500             =cut
501              
502             sub _get_chunk { #done for MARCMaker?
503              
504 0     0   0 my $self = shift;
505              
506 0         0 my $chunk = undef;
507            
508             #read from the file and fill the input buffer
509 0 0 0     0 if ( $self->_fill_buffer() && length($self->{inputbuf}) > 0 ) {
510              
511             #retrieve the next record
512 0         0 ($chunk) = split /\n\n/, $self->{inputbuf}, 0;
513             #remove the chunk and record separator from the input buffer
514 0         0 $self->{inputbuf} = substr( $self->{inputbuf}, length($chunk)+length("\n\n") );
515 0 0       0 if ( !$chunk ) {
516 0         0 $chunk = $self->{inputbuf};
517 0         0 $self->{inputbuf} = '';
518 0         0 $self->{exhaustedfh} = 1;
519             } #if not chunk
520              
521             } #if buffer can be filled and has characters
522 0         0 return $chunk;
523             } #_get_chunk()
524              
525             =head2 _unget_chunk ( ) #done for MARCMaker?
526              
527             $chunk is put at the beginning of the buffer followed
528             by two line endings ("\n\n") as a record separator.
529              
530             I don't know that this sub is necessary.
531              
532             =cut
533              
534             sub _unget_chunk {
535 0     0   0 my $self = shift;
536 0         0 my $chunk = shift;
537 0         0 $self->{inputbuf} = $chunk . $self->{inputbuf};
538 0         0 return;
539             }
540              
541              
542             #######################################
543             ### End internal subs from MicroLIF ###
544             #######################################
545              
546             #######################################
547             ### Character handling from MARC.pm ###
548             #######################################
549              
550             =head2 _char2maker
551              
552             Pass in string of characters from a MARC record and a character map ($charset, or usmarc_default() by default).
553             Returns string of characters encoded in MARCMaker format.
554             (e.g. replaces '$' with {dollar})
555              
556             =cut
557              
558             sub _char2maker { #deal with charmap default
559 13     13   1430 my @marc_string = split (//, shift);
560 13         171 my $charmap = shift; #|| $charset; #add default value
561 13         28 my $maker_string = join ('', map {${$charmap}{$_} } @marc_string);
  4608         6169  
  4608         10063  
562             #replace html-style entities (´) with code in curly braces ({acute})
563 13         345 while ($maker_string =~ s/(&)([^ ]{1,7}?)(;)/{$2}/o) {}
564              
565 13         424 return $maker_string;
566             } #_char2maker
567              
568             ######################
569              
570              
571             =head2 Default charset
572              
573             usmarc_default() -- Originally from MARC.pm. Offers default mnemonics for character encoding and decoding.
574              
575             Used by _maker2char.
576              
577             This perhaps should be an internal _usmarc_default().
578              
579             =cut
580              
581             sub usmarc_default { # rec
582 1     1 0 9 my @hexchar = (0x00..0x1a,0x1c,0x7f..0x8c,0x8f..0xa0,0xaf,0xbb,
583             0xbe,0xbf,0xc7..0xdf,0xfc,0xfd,0xff);
584 1         5 my %inchar = map {sprintf ("%2.2X",int $_), chr($_)} @hexchar;
  92         207  
585              
586 1         10 $inchar{esc} = chr(0x1b); # escape
587 1         2 $inchar{dollar} = chr(0x24); # dollar sign
588 1         3 $inchar{curren} = chr(0x24); # dollar sign - alternate
589 1         1 $inchar{24} = chr(0x24); # dollar sign - alternate
590 1         2 $inchar{bsol} = chr(0x5c); # back slash (reverse solidus)
591 1         2 $inchar{lcub} = chr(0x7b); # opening curly brace
592 1         2 $inchar{rcub} = "}"; # closing curly brace - part 1
593 1         2 $inchar{joiner} = chr(0x8d); # zero width joiner
594 1         2 $inchar{nonjoin} = chr(0x8e); # zero width non-joiner
595 1         2 $inchar{Lstrok} = chr(0xa1); # latin capital letter l with stroke
596 1         2 $inchar{Ostrok} = chr(0xa2); # latin capital letter o with stroke
597 1         3 $inchar{Dstrok} = chr(0xa3); # latin capital letter d with stroke
598 1         1 $inchar{THORN} = chr(0xa4); # latin capital letter thorn (icelandic)
599 1         2 $inchar{AElig} = chr(0xa5); # latin capital letter AE
600 1         1 $inchar{OElig} = chr(0xa6); # latin capital letter OE
601 1         3 $inchar{softsign} = chr(0xa7); # modifier letter soft sign
602 1         2 $inchar{middot} = chr(0xa8); # middle dot
603 1         2 $inchar{flat} = chr(0xa9); # musical flat sign
604 1         2 $inchar{reg} = chr(0xaa); # registered sign
605 1         2 $inchar{plusmn} = chr(0xab); # plus-minus sign
606 1         1 $inchar{Ohorn} = chr(0xac); # latin capital letter o with horn
607 1         2 $inchar{Uhorn} = chr(0xad); # latin capital letter u with horn
608 1         2 $inchar{mlrhring} = chr(0xae); # modifier letter right half ring (alif)
609 1         2 $inchar{mllhring} = chr(0xb0); # modifier letter left half ring (ayn)
610 1         2 $inchar{lstrok} = chr(0xb1); # latin small letter l with stroke
611 1         2 $inchar{ostrok} = chr(0xb2); # latin small letter o with stroke
612 1         2 $inchar{dstrok} = chr(0xb3); # latin small letter d with stroke
613 1         1 $inchar{thorn} = chr(0xb4); # latin small letter thorn (icelandic)
614 1         2 $inchar{aelig} = chr(0xb5); # latin small letter ae
615 1         2 $inchar{oelig} = chr(0xb6); # latin small letter oe
616 1         2 $inchar{hardsign} = chr(0xb7); # modifier letter hard sign
617 1         1 $inchar{inodot} = chr(0xb8); # latin small letter dotless i
618 1         2 $inchar{pound} = chr(0xb9); # pound sign
619 1         27 $inchar{eth} = chr(0xba); # latin small letter eth
620 1         3 $inchar{ohorn} = chr(0xbc); # latin small letter o with horn
621 1         7 $inchar{uhorn} = chr(0xbd); # latin small letter u with horn
622 1         3 $inchar{deg} = chr(0xc0); # degree sign
623 1         2 $inchar{scriptl} = chr(0xc1); # latin small letter script l
624 1         3 $inchar{phono} = chr(0xc2); # sound recording copyright
625 1         2 $inchar{copy} = chr(0xc3); # copyright sign
626 1         3 $inchar{sharp} = chr(0xc4); # sharp
627 1         2 $inchar{iquest} = chr(0xc5); # inverted question mark
628 1         2 $inchar{iexcl} = chr(0xc6); # inverted exclamation mark
629 1         2 $inchar{hooka} = chr(0xe0); # combining hook above
630 1         3 $inchar{grave} = chr(0xe1); # combining grave
631 1         1 $inchar{acute} = chr(0xe2); # combining acute
632 1         3 $inchar{circ} = chr(0xe3); # combining circumflex
633 1         2 $inchar{tilde} = chr(0xe4); # combining tilde
634 1         2 $inchar{macr} = chr(0xe5); # combining macron
635 1         2 $inchar{breve} = chr(0xe6); # combining breve
636 1         2 $inchar{dot} = chr(0xe7); # combining dot above
637 1         3 $inchar{diaer} = chr(0xe8); # combining diaeresis
638 1         2 $inchar{uml} = chr(0xe8); # combining umlaut
639 1         2 $inchar{caron} = chr(0xe9); # combining hacek
640 1         2 $inchar{ring} = chr(0xea); # combining ring above
641 1         1 $inchar{llig} = chr(0xeb); # combining ligature left half
642 1         2 $inchar{rlig} = chr(0xec); # combining ligature right half
643 1         2 $inchar{rcommaa} = chr(0xed); # combining comma above right
644 1         2 $inchar{dblac} = chr(0xee); # combining double acute
645 1         2 $inchar{candra} = chr(0xef); # combining candrabindu
646 1         2 $inchar{cedil} = chr(0xf0); # combining cedilla
647 1         2 $inchar{ogon} = chr(0xf1); # combining ogonek
648 1         2 $inchar{dotb} = chr(0xf2); # combining dot below
649 1         2 $inchar{dbldotb} = chr(0xf3); # combining double dot below
650 1         2 $inchar{ringb} = chr(0xf4); # combining ring below
651 1         1 $inchar{dblunder} = chr(0xf5); # combining double underscore
652 1         2 $inchar{under} = chr(0xf6); # combining underscore
653 1         2 $inchar{commab} = chr(0xf7); # combining comma below
654 1         2 $inchar{rcedil} = chr(0xf8); # combining right cedilla
655 1         2 $inchar{breveb} = chr(0xf9); # combining breve below
656 1         1 $inchar{ldbltil} = chr(0xfa); # combining double tilde left half
657 1         2 $inchar{rdbltil} = chr(0xfb); # combining double tilde right half
658 1         2 $inchar{commaa} = chr(0xfe); # combining comma above
659 1 50       3 if ($MARC::DEBUG) {
660 0         0 foreach my $str (sort keys %inchar) {
661 0         0 printf "%s = %x\n", $str, ord($inchar{$str});
662             }
663             }
664 1         5 return \%inchar;
665             } #usmarc_default
666              
667             ###################################################
668              
669             =head2 ustext_default
670              
671             ustext_default -- Originally from MARC.pm. Offers default mnemonics for character encoding and decoding.
672              
673             Used by _char2maker.
674              
675             This perhaps should be an internal _ustext_default().
676              
677             =cut
678              
679             sub ustext_default {
680 12     12 1 109 my @hexchar = (0x00..0x1a,0x1c,0x7f..0x8c,0x8f..0xa0,0xaf,0xbb,
681             0xbe,0xbf,0xc7..0xdf,0xfc,0xfd,0xff);
682 12         24 my %outchar = map {chr($_), sprintf ("{%2.2X}",int $_)} @hexchar;
  1104         3599  
683              
684 12         139 my @ascchar = map {chr($_)} (0x20..0x23,0x25..0x7a,0x7c,0x7e);
  1104         1829  
685 12         64 foreach my $asc (@ascchar) { $outchar{$asc} = $asc;}
  1104         1901  
686              
687 12         28 $outchar{chr(0x1b)} = '{esc}'; # escape
688 12         21 $outchar{chr(0x24)} = '{dollar}'; # dollar sign
689 12         22 $outchar{chr(0x5c)} = '{bsol}'; # back slash (reverse solidus)
690 12         51 $outchar{chr(0x7b)} = '{lcub}'; # opening curly brace
691 12         21 $outchar{chr(0x7d)} = '{rcub}'; # closing curly brace
692 12         20 $outchar{chr(0x8d)} = '{joiner}'; # zero width joiner
693 12         16 $outchar{chr(0x8e)} = '{nonjoin}'; # zero width non-joiner
694 12         18 $outchar{chr(0xa1)} = '{Lstrok}'; # latin capital letter l with stroke
695 12         21 $outchar{chr(0xa2)} = '{Ostrok}'; # latin capital letter o with stroke
696 12         15 $outchar{chr(0xa3)} = '{Dstrok}'; # latin capital letter d with stroke
697 12         21 $outchar{chr(0xa4)} = '{THORN}'; # latin capital letter thorn (icelandic)
698 12         16 $outchar{chr(0xa5)} = '{AElig}'; # latin capital letter AE
699 12         15 $outchar{chr(0xa6)} = '{OElig}'; # latin capital letter OE
700 12         21 $outchar{chr(0xa7)} = '{softsign}'; # modifier letter soft sign
701 12         17 $outchar{chr(0xa8)} = '{middot}'; # middle dot
702 12         17 $outchar{chr(0xa9)} = '{flat}'; # musical flat sign
703 12         17 $outchar{chr(0xaa)} = '{reg}'; # registered sign
704 12         19 $outchar{chr(0xab)} = '{plusmn}'; # plus-minus sign
705 12         35 $outchar{chr(0xac)} = '{Ohorn}'; # latin capital letter o with horn
706 12         16 $outchar{chr(0xad)} = '{Uhorn}'; # latin capital letter u with horn
707 12         19 $outchar{chr(0xae)} = '{mlrhring}'; # modifier letter right half ring (alif)
708 12         17 $outchar{chr(0xb0)} = '{mllhring}'; # modifier letter left half ring (ayn)
709 12         14 $outchar{chr(0xb1)} = '{lstrok}'; # latin small letter l with stroke
710 12         18 $outchar{chr(0xb2)} = '{ostrok}'; # latin small letter o with stroke
711 12         17 $outchar{chr(0xb3)} = '{dstrok}'; # latin small letter d with stroke
712 12         25 $outchar{chr(0xb4)} = '{thorn}'; # latin small letter thorn (icelandic)
713 12         17 $outchar{chr(0xb5)} = '{aelig}'; # latin small letter ae
714 12         18 $outchar{chr(0xb6)} = '{oelig}'; # latin small letter oe
715 12         17 $outchar{chr(0xb7)} = '{hardsign}'; # modifier letter hard sign
716 12         17 $outchar{chr(0xb8)} = '{inodot}'; # latin small letter dotless i
717 12         36 $outchar{chr(0xb9)} = '{pound}'; # pound sign
718 12         19 $outchar{chr(0xba)} = '{eth}'; # latin small letter eth
719 12         21 $outchar{chr(0xbc)} = '{ohorn}'; # latin small letter o with horn
720 12         16 $outchar{chr(0xbd)} = '{uhorn}'; # latin small letter u with horn
721 12         20 $outchar{chr(0xc0)} = '{deg}'; # degree sign
722 12         20 $outchar{chr(0xc1)} = '{scriptl}'; # latin small letter script l
723 12         18 $outchar{chr(0xc2)} = '{phono}'; # sound recording copyright
724 12         17 $outchar{chr(0xc3)} = '{copy}'; # copyright sign
725 12         23 $outchar{chr(0xc4)} = '{sharp}'; # sharp
726 12         19 $outchar{chr(0xc5)} = '{iquest}'; # inverted question mark
727 12         21 $outchar{chr(0xc6)} = '{iexcl}'; # inverted exclamation mark
728 12         16 $outchar{chr(0xe0)} = '{hooka}'; # combining hook above
729 12         16 $outchar{chr(0xe1)} = '{grave}'; # combining grave
730 12         15 $outchar{chr(0xe2)} = '{acute}'; # combining acute
731 12         17 $outchar{chr(0xe3)} = '{circ}'; # combining circumflex
732 12         16 $outchar{chr(0xe4)} = '{tilde}'; # combining tilde
733 12         17 $outchar{chr(0xe5)} = '{macr}'; # combining macron
734 12         15 $outchar{chr(0xe6)} = '{breve}'; # combining breve
735 12         14 $outchar{chr(0xe7)} = '{dot}'; # combining dot above
736 12         17 $outchar{chr(0xe8)} = '{uml}'; # combining diaeresis (umlaut)
737 12         28 $outchar{chr(0xe9)} = '{caron}'; # combining hacek
738 12         23 $outchar{chr(0xea)} = '{ring}'; # combining ring above
739 12         25 $outchar{chr(0xeb)} = '{llig}'; # combining ligature left half
740 12         15 $outchar{chr(0xec)} = '{rlig}'; # combining ligature right half
741 12         16 $outchar{chr(0xed)} = '{rcommaa}'; # combining comma above right
742 12         17 $outchar{chr(0xee)} = '{dblac}'; # combining double acute
743 12         18 $outchar{chr(0xef)} = '{candra}'; # combining candrabindu
744 12         15 $outchar{chr(0xf0)} = '{cedil}'; # combining cedilla
745 12         18 $outchar{chr(0xf1)} = '{ogon}'; # combining ogonek
746 12         17 $outchar{chr(0xf2)} = '{dotb}'; # combining dot below
747 12         16 $outchar{chr(0xf3)} = '{dbldotb}'; # combining double dot below
748 12         15 $outchar{chr(0xf4)} = '{ringb}'; # combining ring below
749 12         20 $outchar{chr(0xf5)} = '{dblunder}'; # combining double underscore
750 12         31 $outchar{chr(0xf6)} = '{under}'; # combining underscore
751 12         17 $outchar{chr(0xf7)} = '{commab}'; # combining comma below
752 12         17 $outchar{chr(0xf8)} = '{rcedil}'; # combining right cedilla
753 12         15 $outchar{chr(0xf9)} = '{breveb}'; # combining breve below
754 12         18 $outchar{chr(0xfa)} = '{ldbltil}'; # combining double tilde left half
755 12         18 $outchar{chr(0xfb)} = '{rdbltil}'; # combining double tilde right half
756 12         17 $outchar{chr(0xfe)} = '{commaa}'; # combining comma above
757 12 50       31 if ($MARC::DEBUG) {
758 0         0 foreach my $num (sort keys %outchar) {
759 0         0 printf "%x = %s\n", ord($num), $outchar{$num};
760             }
761             }
762 12         134 return \%outchar;
763             } #ustext_default
764              
765              
766             ####################################################################
767              
768             =head2 _maker2char default
769              
770             _maker2char() -- Translates MARCMaker encoded character into MARC-8 character.
771              
772             =cut
773              
774             sub _maker2char { # rec
775 13     13   18 my $marc_string = shift;
776 13         14 my $charmap = shift;
777 13         35 while ($marc_string =~ /{(\w{1,8}?)}/o) {
778 75 50       70 if (exists ${$charmap}{$1}) {
  75         146  
779 75         70 $marc_string = join ('', $`, ${$charmap}{$1}, $');
  75         525  
780             }
781             else {
782 0         0 $marc_string = join ('', $`, '&', $1, ';', $');
783             }
784             }
785             # closing curly brace - part 2, permits {lcub}text{rcub} in input
786 13         19 $marc_string =~ s/\}/\x7d/go;
787 13         27 return $marc_string;
788             }
789              
790             ################################
791             ### END OF MARCMAKER METHODS ###
792             ################################
793              
794             1;
795              
796             =head1 RELATED MODULES
797              
798             L
799              
800             L
801              
802             =head1 SEE ALSO
803              
804             L
805              
806             L for more information about the
807             DOS-based MARCMaker and MARCBreaker programs.
808              
809              
810             The methods in this MARCMaker module are based upon MARC::File::USMARC.pm and MARC::File::MicroLIF.pm.
811             Those are distributed with MARC::Record.
812             The underlying code is based on the MARCMaker-related methods in MARC.pm.
813              
814              
815             =head1 LICENSE
816              
817             This code may be distributed under the same terms as Perl itself.
818              
819             Please note that this module is not a product of or supported by the
820             employers of the various contributors to the code.
821              
822             =head1 AUTHOR
823              
824             Bryan Baldus
825             eijabb@cpan.org
826              
827             Copyright (c) 2004-2005.
828              
829             =cut