File Coverage

blib/lib/Treex/Block/Read/BaseAlignedTextReader.pm
Criterion Covered Total %
statement 2 4 50.0
branch n/a
condition n/a
subroutine 2 2 100.0
pod n/a
total 4 6 66.6


line stmt bran cond sub pod time code
1             package Treex::Block::Read::BaseAlignedTextReader;
2             BEGIN {
3 1     1   912 $Treex::Block::Read::BaseAlignedTextReader::VERSION = '0.08170';
4             }
5 1     1   438 use Moose;
  0            
  0            
6             use Treex::Core::Common;
7             extends 'Treex::Block::Read::BaseAlignedReader';
8             use File::Slurp;
9              
10             #has lines_per_doc => ( isa => 'Int', is => 'ro', default => 0 );
11             #has merge_files => ( isa => 'Bool', is => 'ro', default => 0 );
12              
13             #sub BUILD {
14             # my ($self) = @_;
15             # if ( $self->lines_per_doc ) {
16             # $self->set_is_one_doc_per_file(0);
17             # }
18             # return;
19             #}
20              
21             sub next_document_texts {
22             my ($self) = @_;
23              
24             my %mapping = $self->next_filenames() or return;
25             my %texts;
26             # if ( $self->lines_per_doc ) { # TODO: option lines_per_document not implemented
27             # log_fatal "option lines_per_document not implemented for aligned readers yet";
28             # }
29             foreach my $lang ( keys %mapping ) {
30             my $filename = $mapping{$lang};
31             if ( $filename eq '-' ) {
32             $texts{$lang} = read_file( \*STDIN );
33             }
34             else {
35             $texts{$lang} = read_file( $filename, binmode => 'encoding(utf8)', err_mode => 'log_fatal' );
36             }
37             }
38              
39             return \%texts;
40             }
41              
42             1;
43              
44             __END__
45              
46             =for Pod::Coverage BUILD
47              
48             =head1 NAME
49              
50             Treex::Block::Read::BaseAlignedTextReader - abstract ancestor for parallel-corpora document readers
51              
52             =head1 VERSION
53              
54             version 0.08170
55              
56             =head1 DESCRIPTION
57              
58             This class serves as an common ancestor for document readers,
59             that have parameter C<from> with a space or comma separated list of filenames
60             to be loaded and load the documents from plain text files.
61             It is designed to implement the L<Treex::Core::DocumentReader> interface.
62              
63             In derived classes you need to define the C<next_document> method,
64             and you can use C<next_document_texts> and C<new_document> methods.
65              
66             =head1 METHODS
67              
68             =over
69              
70             =item next_document_texts
71              
72             Returns a hashref, where keys are zone labels and values
73             are strings representing contents of the files.
74              
75             =back
76              
77             =head1 SEE
78              
79             L<Treex::Block::Read::BaseAlignedReader>
80             L<Treex::Block::Read::AlignedSentences>
81              
82             =head1 AUTHOR
83              
84             Martin Popel
85              
86             =head1 COPYRIGHT AND LICENSE
87              
88             Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
89              
90             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.