| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package Text::Mining::Corpus::Document; |
|
2
|
3
|
|
|
3
|
|
83858
|
use base qw(Text::Mining::Base); |
|
|
3
|
|
|
|
|
6
|
|
|
|
3
|
|
|
|
|
1464
|
|
|
3
|
|
|
|
|
|
|
use Class::Std; |
|
4
|
|
|
|
|
|
|
use Class::Std::Utils; |
|
5
|
|
|
|
|
|
|
use Text::Mining::Parser; |
|
6
|
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use warnings; |
|
8
|
|
|
|
|
|
|
use strict; |
|
9
|
|
|
|
|
|
|
use Carp; |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
use version; our $VERSION = qv('0.0.8'); |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
{ |
|
14
|
|
|
|
|
|
|
my %document_id_of : ATTR( :set :get ); |
|
15
|
|
|
|
|
|
|
my %document_type_of : ATTR( :set :get :default<> ); |
|
16
|
|
|
|
|
|
|
my %corpus_id_of : ATTR( :init_arg :set :get :default<> ); |
|
17
|
|
|
|
|
|
|
my %corpus_name_of : ATTR( :set :get :default<> ); |
|
18
|
|
|
|
|
|
|
my %submitted_by_user_id_of : ATTR( :set :get :default<> ); |
|
19
|
|
|
|
|
|
|
my %document_title_of : ATTR( :set :get :default<> ); |
|
20
|
|
|
|
|
|
|
my %document_url_of : ATTR( :set :get :default<> ); |
|
21
|
|
|
|
|
|
|
my %document_path_of : ATTR( :set :get :default<> ); |
|
22
|
|
|
|
|
|
|
my %file_name_of : ATTR( :init_arg :set :get :default<> ); |
|
23
|
|
|
|
|
|
|
my %file_type_of : ATTR( :init_arg :set :get :default<> ); |
|
24
|
|
|
|
|
|
|
my %bytes_of : ATTR( :set :get :default<> ); |
|
25
|
|
|
|
|
|
|
my %enter_date_of : ATTR( :set :get :default<> ); |
|
26
|
|
|
|
|
|
|
my %exit_date_of : ATTR( :set :get :default<> ); |
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
sub BUILD { |
|
29
|
|
|
|
|
|
|
my ($self, $ident, $arg_ref) = @_; |
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
return; |
|
32
|
|
|
|
|
|
|
} |
|
33
|
|
|
|
|
|
|
|
|
34
|
|
|
|
|
|
|
sub START { |
|
35
|
|
|
|
|
|
|
my ($self, $ident, $arg_ref) = @_; |
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
#print " FILE TYPE: ", $self->get_file_type(), "\n\n"; |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
if (defined $arg_ref->{document_id}) { $self->_get_document($arg_ref); } |
|
40
|
|
|
|
|
|
|
elsif (defined $arg_ref->{file_name}) { $self->insert( $arg_ref ); } |
|
41
|
|
|
|
|
|
|
|
|
42
|
|
|
|
|
|
|
return; |
|
43
|
|
|
|
|
|
|
} |
|
44
|
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
sub parse { |
|
46
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
47
|
|
|
|
|
|
|
$arg_ref->{file_type} = $self->get_document_type(); |
|
48
|
|
|
|
|
|
|
$arg_ref->{file_name} = $self->get_file_name(); |
|
49
|
|
|
|
|
|
|
$arg_ref->{document_id} = $self->get_document_id(); |
|
50
|
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
my $parser = Text::Mining::Parser->new( $arg_ref ); |
|
52
|
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
print STDERR $parser->version(), "\n"; |
|
54
|
|
|
|
|
|
|
print STDERR $parser->stats(), "\n"; |
|
55
|
|
|
|
|
|
|
print STDERR $parser->parse(), "\n"; |
|
56
|
|
|
|
|
|
|
|
|
57
|
|
|
|
|
|
|
my $document_id = $self->get_document_id(); |
|
58
|
|
|
|
|
|
|
return $document_id; |
|
59
|
|
|
|
|
|
|
} |
|
60
|
|
|
|
|
|
|
|
|
61
|
|
|
|
|
|
|
# sub get_id { my ($self) = @_; return $id_of{ident $self}; } |
|
62
|
|
|
|
|
|
|
# sub get_document_id { my ($self) = @_; return $id_of{ident $self}; } |
|
63
|
|
|
|
|
|
|
# sub get_submitted_document_id { my ($self) = @_; return $id_of{ident $self}; } |
|
64
|
|
|
|
|
|
|
# sub get_corpus_id { my ($self) = @_; return $corpus_id_of{ident $self}; } |
|
65
|
|
|
|
|
|
|
# sub get_submitted_by_user_id { my ($self) = @_; return $submitted_by_user_id_of{ident $self}; } |
|
66
|
|
|
|
|
|
|
# sub get_document_url { my ($self) = @_; return $document_url_of{ident $self}; } |
|
67
|
|
|
|
|
|
|
# sub get_document_path { my ($self) = @_; return $document_path_of{ident $self}; } |
|
68
|
|
|
|
|
|
|
# sub get_file_name { my ($self) = @_; return $file_name_of{ident $self}; } |
|
69
|
|
|
|
|
|
|
# sub get_enter_date { my ($self) = @_; return $enter_date_of{ident $self}; } |
|
70
|
|
|
|
|
|
|
# sub get_exit_date { my ($self) = @_; return $exit_date_of{ident $self}; } |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
sub _get_document { |
|
73
|
|
|
|
|
|
|
my ($self, $arg_ref) = @_; |
|
74
|
|
|
|
|
|
|
my $ident = ident $self; |
|
75
|
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
my $sql = "select document_id, document_type_id, corpus_id, document_path, document_file_name, bytes, enter_date "; |
|
77
|
|
|
|
|
|
|
$sql .= "from documents "; |
|
78
|
|
|
|
|
|
|
$sql .= "where document_id = '$arg_ref->{document_id}'"; |
|
79
|
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
($document_id_of{$ident}, |
|
81
|
|
|
|
|
|
|
$document_type_of{$ident}, |
|
82
|
|
|
|
|
|
|
$corpus_id_of{$ident}, |
|
83
|
|
|
|
|
|
|
$document_path_of{$ident}, |
|
84
|
|
|
|
|
|
|
$file_name_of{$ident}, |
|
85
|
|
|
|
|
|
|
$bytes_of{$ident}, |
|
86
|
|
|
|
|
|
|
$enter_date_of{$ident}) = $self->library()->sqlexec($sql, '@'); |
|
87
|
|
|
|
|
|
|
} |
|
88
|
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
sub all { |
|
90
|
|
|
|
|
|
|
my ($self) = @_; |
|
91
|
|
|
|
|
|
|
my (@documents); |
|
92
|
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
my $sql = "select document_id from documents order by document_id asc"; |
|
94
|
|
|
|
|
|
|
my $documents = $self->library()->sqlexec( $sql, '\@@' ); |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
foreach my $document (@$documents) { push @documents, Text::Librarian::Document->new({ document_id => $document->[0] }); } |
|
97
|
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
return \@documents; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
sub display_all { |
|
102
|
|
|
|
|
|
|
my ($self, $c, $root_url) = @_; |
|
103
|
|
|
|
|
|
|
my @switch = (1, 0); |
|
104
|
|
|
|
|
|
|
my @classes = ('rowB', 'rowA'); |
|
105
|
|
|
|
|
|
|
my $documents = Text::Librarian::Document->all(); |
|
106
|
|
|
|
|
|
|
my ($html, $switch, $class); |
|
107
|
|
|
|
|
|
|
|
|
108
|
|
|
|
|
|
|
$html .= " \n";
|
109
|
|
|
|
|
|
|
$html .= " | \n";
|
110
|
|
|
|
|
|
|
$html .= " | | \n";
|
111
|
|
|
|
|
|
|
$html .= " | Name | \n";
|
112
|
|
|
|
|
|
|
$html .= " | Description | \n";
|
113
|
|
|
|
|
|
|
$html .= " | Path | \n";
|
114
|
|
|
|
|
|
|
$html .= " | \n";
|
115
|
|
|
|
|
|
|
foreach my $document (@$documents) { |
|
116
|
|
|
|
|
|
|
$switch = $switch[$switch]; |
|
117
|
|
|
|
|
|
|
$class = $classes[$switch]; |
|
118
|
|
|
|
|
|
|
$html .= " | \n";
|
119
|
|
|
|
|
|
|
$html .= " | [X] | \n";
|
120
|
|
|
|
|
|
|
$html .= " | " . $document->get_name() . " | \n";
|
121
|
|
|
|
|
|
|
$html .= " | " . $document->get_desc() . " | \n";
|
122
|
|
|
|
|
|
|
$html .= " | " . $document->get_path() . " | \n";
|
123
|
|
|
|
|
|
|
$html .= " | \n";
|
124
|
|
|
|
|
|
|
} |
|
125
|
|
|
|
|
|
|
$html .= " | \n"; |
|
126
|
|
|
|
|
|
|
return $html; |
|
127
|
|
|
|
|
|
|
} |
|
128
|
|
|
|
|
|
|
|
|
129
|
|
|
|
|
|
|
sub update { |
|
130
|
|
|
|
|
|
|
my ( $self, $arg_ref ) = @_; |
|
131
|
|
|
|
|
|
|
my $ident = ident $self; |
|
132
|
|
|
|
|
|
|
my @updates = (); |
|
133
|
|
|
|
|
|
|
|
|
134
|
|
|
|
|
|
|
if ( defined $arg_ref->{corpus_id} ) { $self->set_desc( $arg_ref->{corpus_id} ); push @updates, "corpus_id = '" . $self->_html_to_sql( $arg_ref->{corpus_id} ) . "'"; } |
|
135
|
|
|
|
|
|
|
if ( defined $arg_ref->{document_path} ) { $self->set_path( $arg_ref->{document_path} ); push @updates, "document_path = '" . $self->_html_to_sql( $arg_ref->{document_path} ) . "'"; } |
|
136
|
|
|
|
|
|
|
if ( defined $arg_ref->{file_name} ) { $self->set_file_name( $arg_ref->{file_name} ); push @updates, "file_name = '" . $self->_html_to_sql( $arg_ref->{file_name} ) . "'"; } |
|
137
|
|
|
|
|
|
|
if ( defined $arg_ref->{bytes} ) { $self->set_desc( $arg_ref->{bytes} ); push @updates, "bytes = '" . $self->_html_to_sql( $arg_ref->{bytes} ) . "'"; } |
|
138
|
|
|
|
|
|
|
|
|
139
|
|
|
|
|
|
|
my $sql = "update documents set " . join( ', ', @updates ) . " where document_id = '$document_id_of{$ident}'"; |
|
140
|
|
|
|
|
|
|
$self->library()->sqlexec($sql); |
|
141
|
|
|
|
|
|
|
} |
|
142
|
|
|
|
|
|
|
|
|
143
|
|
|
|
|
|
|
sub insert { |
|
144
|
|
|
|
|
|
|
my ($self, $arg_ref) = @_; |
|
145
|
|
|
|
|
|
|
foreach ('corpus_id', 'document_title', 'document_path', 'file_name', 'bytes') { $arg_ref->{$_} = $self->_html_to_sql( $arg_ref->{$_} || '' ); } |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# Set doc_type_id : alpha - should be live or at least configured |
|
148
|
|
|
|
|
|
|
my %doc_types = ( txt => 1, xml => 2, pdf => 3 ); |
|
149
|
|
|
|
|
|
|
$arg_ref->{document_type_id} = $doc_types{ $arg_ref->{file_type} }; |
|
150
|
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
my $sql = "insert into documents (document_type_id, corpus_id, document_title, document_path, document_file_name, bytes) "; |
|
152
|
|
|
|
|
|
|
$sql .= "values ('$arg_ref->{document_type_id}', '$arg_ref->{corpus_id}', '$arg_ref->{document_title}', '$arg_ref->{document_path}', '$arg_ref->{file_name}', '$arg_ref->{bytes}') "; |
|
153
|
|
|
|
|
|
|
#print "\n", $sql, "\n\n"; |
|
154
|
|
|
|
|
|
|
$self->library()->sqlexec($sql); |
|
155
|
|
|
|
|
|
|
|
|
156
|
|
|
|
|
|
|
$sql = "select LAST_INSERT_ID()"; |
|
157
|
|
|
|
|
|
|
( $arg_ref->{document_id} ) = $self->library()->sqlexec($sql, '@'); |
|
158
|
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
$self->_get_document( $arg_ref ); |
|
160
|
|
|
|
|
|
|
} |
|
161
|
|
|
|
|
|
|
|
|
162
|
|
|
|
|
|
|
sub delete { |
|
163
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
164
|
|
|
|
|
|
|
my $ident = ident $self; |
|
165
|
|
|
|
|
|
|
|
|
166
|
|
|
|
|
|
|
$self->library()->sqlexec("delete from documents where document_id = '" . $self->get_document_id() . "'"); |
|
167
|
|
|
|
|
|
|
} |
|
168
|
|
|
|
|
|
|
} |
|
169
|
|
|
|
|
|
|
|
|
170
|
|
|
|
|
|
|
1; # Magic true value required at end of module |
|
171
|
|
|
|
|
|
|
__END__ |