File Coverage

blib/lib/Treex/Block/Read/BaseAlignedTextReader.pm
Criterion Covered Total %
statement 18 30 60.0
branch 0 4 0.0
condition n/a
subroutine 6 7 85.7
pod n/a
total 24 41 58.5


line stmt bran cond sub pod time code
1             package Treex::Block::Read::BaseAlignedTextReader;
2             $Treex::Block::Read::BaseAlignedTextReader::VERSION = '2.20151102';
3 2     2   1577 use strict;
  2         5  
  2         54  
4 2     2   9 use warnings;
  2         5  
  2         56  
5 2     2   905 use Moose;
  2         471156  
  2         16  
6 2     2   14395 use Treex::Core::Common;
  2         604538  
  2         13  
7             extends 'Treex::Block::Read::BaseAlignedReader';
8 2     2   11277 use File::Slurp;
  2         14  
  2         167  
9 2     2   10 use Data::Dumper;
  2         4  
  2         553  
10              
11             #has lines_per_doc => ( isa => 'Int', is => 'ro', default => 0 );
12             #has merge_files => ( isa => 'Bool', is => 'ro', default => 0 );
13              
14             #sub BUILD {
15             # my ($self) = @_;
16             # if ( $self->lines_per_doc ) {
17             # $self->set_is_one_doc_per_file(0);
18             # }
19             # return;
20             #}
21              
22             sub next_document_texts {
23 0     0     my ($self) = @_;
24              
25             #print STDERR __PACKAGE__ . ":" . __LINE__ . "\n";
26              
27 0           my $filenames = $self->next_filenames();
28 0 0         return if ! $filenames;
29              
30 0           my %mapping = %{$filenames};
  0            
31              
32 0           my %texts;
33             # if ( $self->lines_per_doc ) { # TODO: option lines_per_document not implemented
34             # log_fatal "option lines_per_document not implemented for aligned readers yet";
35             # }
36 0           foreach my $lang ( keys %mapping ) {
37 0           my $filename = $mapping{$lang};
38 0 0         if ( $filename eq '-' ) {
39 0           $texts{$lang} = read_file( \*STDIN );
40             }
41             else {
42 0           $texts{$lang} = read_file( $filename, binmode => 'encoding(utf8)', err_mode => 'log_fatal' );
43             }
44             }
45              
46 0           return \%texts;
47             }
48              
49             1;
50              
51             __END__
52              
53             =for Pod::Coverage BUILD
54              
55             =head1 NAME
56              
57             Treex::Block::Read::BaseAlignedTextReader - abstract ancestor for parallel-corpora document readers
58              
59             =head1 VERSION
60              
61             version 2.20151102
62              
63             =head1 DESCRIPTION
64              
65             This class serves as an common ancestor for document readers,
66             that have parameter C<from> with a space or comma separated list of filenames
67             to be loaded and load the documents from plain text files.
68             It is designed to implement the L<Treex::Core::DocumentReader> interface.
69              
70             In derived classes you need to define the C<next_document> method,
71             and you can use C<next_document_texts> and C<new_document> methods.
72              
73             =head1 METHODS
74              
75             =over
76              
77             =item next_document_texts
78              
79             Returns a hashref, where keys are zone labels and values
80             are strings representing contents of the files.
81              
82             =back
83              
84             =head1 SEE
85              
86             L<Treex::Block::Read::BaseAlignedReader>
87             L<Treex::Block::Read::AlignedSentences>
88              
89             =head1 AUTHOR
90              
91             Martin Popel
92              
93             =head1 COPYRIGHT AND LICENSE
94              
95             Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
96              
97             This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.