| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
#!usr/bin/perl |
|
2
|
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
###################################################################################### |
|
4
|
|
|
|
|
|
|
# # |
|
5
|
|
|
|
|
|
|
# Author: Clint Cuffy # |
|
6
|
|
|
|
|
|
|
# Date: 06/16/2016 # |
|
7
|
|
|
|
|
|
|
# Revised: 04/06/2017 # |
|
8
|
|
|
|
|
|
|
# UMLS Similarity - Medline XML-To-Word2Vec Input Format Conversion Module # |
|
9
|
|
|
|
|
|
|
# # |
|
10
|
|
|
|
|
|
|
###################################################################################### |
|
11
|
|
|
|
|
|
|
# # |
|
12
|
|
|
|
|
|
|
# Description: # |
|
13
|
|
|
|
|
|
|
# ============ # |
|
14
|
|
|
|
|
|
|
# Perl Medline XML-To-Word2Vec Input Format Conversion Module # |
|
15
|
|
|
|
|
|
|
# for the "word2vec" package. # |
|
16
|
|
|
|
|
|
|
# Features: # |
|
17
|
|
|
|
|
|
|
# ========= # |
|
18
|
|
|
|
|
|
|
# Supports Parsing Individual Files or Directories # |
|
19
|
|
|
|
|
|
|
# Plain XML files or .gz XML files (extracts and processes in RAM) # |
|
20
|
|
|
|
|
|
|
# Include results by specified Date Ranges: 00/00/0000 Format # |
|
21
|
|
|
|
|
|
|
# Include results by title, abstract or both per article # |
|
22
|
|
|
|
|
|
|
# Multi-Threading Support - Divides work by number of threads # |
|
23
|
|
|
|
|
|
|
# Text Compoundify # |
|
24
|
|
|
|
|
|
|
# # |
|
25
|
|
|
|
|
|
|
###################################################################################### |
|
26
|
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
|
|
28
|
|
|
|
|
|
|
package Word2vec::Xmltow2v; |
|
29
|
|
|
|
|
|
|
|
|
30
|
4
|
|
|
4
|
|
57343
|
use strict; |
|
|
4
|
|
|
|
|
9
|
|
|
|
4
|
|
|
|
|
110
|
|
|
31
|
4
|
|
|
4
|
|
20
|
use warnings; |
|
|
4
|
|
|
|
|
8
|
|
|
|
4
|
|
|
|
|
88
|
|
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Standard Package(s) |
|
34
|
4
|
|
|
4
|
|
1389
|
use utf8; |
|
|
4
|
|
|
|
|
48
|
|
|
|
4
|
|
|
|
|
19
|
|
|
35
|
4
|
|
|
4
|
|
1416
|
use threads; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
36
|
|
|
|
|
|
|
use threads::shared; |
|
37
|
|
|
|
|
|
|
use IO::Uncompress::Gunzip qw(gunzip $GunzipError); |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
# CPAN Package(s) |
|
40
|
|
|
|
|
|
|
use Cwd; |
|
41
|
|
|
|
|
|
|
use File::Type; |
|
42
|
|
|
|
|
|
|
use Text::Unidecode; |
|
43
|
|
|
|
|
|
|
use XML::Twig; |
|
44
|
|
|
|
|
|
|
use Sys::CpuAffinity; |
|
45
|
|
|
|
|
|
|
|
|
46
|
|
|
|
|
|
|
# Word2Vec Utility Package(s) |
|
47
|
|
|
|
|
|
|
use Word2vec::Bst; |
|
48
|
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
use vars qw($VERSION); |
|
51
|
|
|
|
|
|
|
|
|
52
|
|
|
|
|
|
|
$VERSION = '0.02'; |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
|
|
55
|
|
|
|
|
|
|
# Global Variables |
|
56
|
|
|
|
|
|
|
my $debugLock :shared; |
|
57
|
|
|
|
|
|
|
my $writeLock :shared; |
|
58
|
|
|
|
|
|
|
my $queueLock :shared; |
|
59
|
|
|
|
|
|
|
my $appendLock :shared; |
|
60
|
|
|
|
|
|
|
my @xmlJobQueue :shared; |
|
61
|
|
|
|
|
|
|
my $totalJobCount :shared; |
|
62
|
|
|
|
|
|
|
my $finishedJobCount :shared; |
|
63
|
|
|
|
|
|
|
my $preCompWordCount :shared; |
|
64
|
|
|
|
|
|
|
my $postCompWordCount :shared; |
|
65
|
|
|
|
|
|
|
my $compoundWordCount :shared; |
|
66
|
|
|
|
|
|
|
|
|
67
|
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
###################################################################################### |
|
69
|
|
|
|
|
|
|
# Constructor |
|
70
|
|
|
|
|
|
|
###################################################################################### |
|
71
|
|
|
|
|
|
|
|
|
72
|
|
|
|
|
|
|
BEGIN |
|
73
|
|
|
|
|
|
|
{ |
|
74
|
|
|
|
|
|
|
# CONSTRUCTOR : DO SOMETHING HERE |
|
75
|
|
|
|
|
|
|
} |
|
76
|
|
|
|
|
|
|
|
|
77
|
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
###################################################################################### |
|
79
|
|
|
|
|
|
|
# Deconstructor |
|
80
|
|
|
|
|
|
|
###################################################################################### |
|
81
|
|
|
|
|
|
|
|
|
82
|
|
|
|
|
|
|
END |
|
83
|
|
|
|
|
|
|
{ |
|
84
|
|
|
|
|
|
|
# DECONSTRUCTOR : DO SOMETHING HERE |
|
85
|
|
|
|
|
|
|
} |
|
86
|
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
|
|
88
|
|
|
|
|
|
|
###################################################################################### |
|
89
|
|
|
|
|
|
|
# new Class Operator |
|
90
|
|
|
|
|
|
|
###################################################################################### |
|
91
|
|
|
|
|
|
|
|
|
92
|
|
|
|
|
|
|
sub new |
|
93
|
|
|
|
|
|
|
{ |
|
94
|
|
|
|
|
|
|
my $class = shift; |
|
95
|
|
|
|
|
|
|
my $self = { |
|
96
|
|
|
|
|
|
|
# Private Member Variables |
|
97
|
|
|
|
|
|
|
_debugLog => shift, # Boolean (Binary): 0 = False, 1 = True |
|
98
|
|
|
|
|
|
|
_writeLog => shift, # Boolean (Binary): 0 = False, 1 = True |
|
99
|
|
|
|
|
|
|
_storeTitle => shift, # Boolean (Binary): 0 = False, 1 = True |
|
100
|
|
|
|
|
|
|
_storeAbstract => shift, # Boolean (Binary): 0 = False, 1 = True |
|
101
|
|
|
|
|
|
|
_quickParse => shift, # Boolean (Binary): 0 = False, 1 = True |
|
102
|
|
|
|
|
|
|
_compoundifyText => shift, # Boolean (Binary): 0 = False, 1 = True |
|
103
|
|
|
|
|
|
|
_numOfThreads => shift, # Integer |
|
104
|
|
|
|
|
|
|
_workingDir => shift, # String |
|
105
|
|
|
|
|
|
|
_savePath => shift, # String |
|
106
|
|
|
|
|
|
|
_beginDate => shift, # String Format: Month/Day/Year |
|
107
|
|
|
|
|
|
|
_endDate => shift, # String Format: Month/Day/Year |
|
108
|
|
|
|
|
|
|
_xmlStringToParse => shift, # String |
|
109
|
|
|
|
|
|
|
_textCorpusStr => shift, # String |
|
110
|
|
|
|
|
|
|
_fileHandle => shift, # File Handle |
|
111
|
|
|
|
|
|
|
_twigHandler => shift, # File Handle |
|
112
|
|
|
|
|
|
|
_parsedCount => shift, # Int |
|
113
|
|
|
|
|
|
|
_tempDate => shift, # String (Temporary Placeholder) |
|
114
|
|
|
|
|
|
|
_tempStr => shift, # String (Temporary Placeholder) |
|
115
|
|
|
|
|
|
|
_compoundWordAry => shift, # Array Of Compound Words |
|
116
|
|
|
|
|
|
|
_compoundWordBST => shift, # Binary Search Tree Reference |
|
117
|
|
|
|
|
|
|
_maxCompoundWordLength => shift, # Integer |
|
118
|
|
|
|
|
|
|
_overwriteExistingFile => shift, # Integer |
|
119
|
|
|
|
|
|
|
_compoundWordCount => shift, # Integer |
|
120
|
|
|
|
|
|
|
}; |
|
121
|
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
# Set debug log variable to false if not defined |
|
123
|
|
|
|
|
|
|
$self->{ _debugLog } = 0 if !defined ( $self->{ _debugLog } ); |
|
124
|
|
|
|
|
|
|
$self->{ _writeLog } = 0 if !defined ( $self->{ _writeLog } ); |
|
125
|
|
|
|
|
|
|
$self->{ _storeTitle } = 1 if !defined ( $self->{ _storeTitle } ); |
|
126
|
|
|
|
|
|
|
$self->{ _storeAbstract } = 1 if !defined ( $self->{ _storeAbstract } ); |
|
127
|
|
|
|
|
|
|
$self->{ _quickParse } = 0 if !defined ( $self->{ _quickParse } ); |
|
128
|
|
|
|
|
|
|
$self->{ _compoundifyText } = 0 if !defined ( $self->{ _compoundifyText } ); |
|
129
|
|
|
|
|
|
|
$self->{ _numOfThreads } = Sys::CpuAffinity::getNumCpus() if !defined ( $self->{ _numOfThreads } ); |
|
130
|
|
|
|
|
|
|
$self->{ _workingDir } = Cwd::getcwd() if !defined ( $self->{ _workingDir } ); |
|
131
|
|
|
|
|
|
|
$self->{ _savePath } = Cwd::getcwd() if !defined ( $self->{ _savePath } ); |
|
132
|
|
|
|
|
|
|
$self->{ _beginDate } = "00/00/0000" if !defined ( $self->{ _beginDate } ); |
|
133
|
|
|
|
|
|
|
$self->{ _endDate } = "99/99/9999" if !defined ( $self->{ _endDate } ); |
|
134
|
|
|
|
|
|
|
$self->{ _xmlStringToParse } = "(null)" if !defined ( $self->{ _xmlStringToParse } ); |
|
135
|
|
|
|
|
|
|
$self->{ _textCorpusStr } = "" if !defined ( $self->{ _textCorpusStr } ); |
|
136
|
|
|
|
|
|
|
$self->{ _twigHandler } = 0 if !defined ( $self->{ _twigHandler } ); |
|
137
|
|
|
|
|
|
|
$self->{ _parsedCount } = 0 if !defined ( $self->{ _parsedCount } ); |
|
138
|
|
|
|
|
|
|
$self->{ _tempDate } = "" if !defined ( $self->{ _tempDate } ); |
|
139
|
|
|
|
|
|
|
$self->{ _tempStr } = "" if !defined ( $self->{ _tempStr } ); |
|
140
|
|
|
|
|
|
|
$self->{ _outputFileName } = "textcorpus.txt" if !defined ( $self->{ _outputFileName } ); |
|
141
|
|
|
|
|
|
|
@{ $self->{ _compoundWordAry } } = () if !defined ( $self->{ _compoundWordAry } ); |
|
142
|
|
|
|
|
|
|
@{ $self->{ _compoundWordAry } } = @{ $self->{ _compoundWordAry } } if defined ( $self->{ _compoundWordAry } ); |
|
143
|
|
|
|
|
|
|
$self->{ _compoundWordBST } = Word2vec::Bst->new() if !defined ( $self->{ _compoundWordBST } ); |
|
144
|
|
|
|
|
|
|
$self->{ _maxCompoundWordLength } = 20 if !defined ( $self->{ _maxCompoundWordLength } ); |
|
145
|
|
|
|
|
|
|
$self->{ _overwriteExistingFile } = 0 if !defined ( $self->{ _overwriteExistingFile } ); |
|
146
|
|
|
|
|
|
|
|
|
147
|
|
|
|
|
|
|
# Initialize Thread Safe Counting Variables |
|
148
|
|
|
|
|
|
|
@xmlJobQueue = (); |
|
149
|
|
|
|
|
|
|
$compoundWordCount = 0; |
|
150
|
|
|
|
|
|
|
$preCompWordCount = 0; |
|
151
|
|
|
|
|
|
|
$postCompWordCount = 0; |
|
152
|
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
# Open File Handler if checked variable is true |
|
154
|
|
|
|
|
|
|
if( $self->{ _writeLog } ) |
|
155
|
|
|
|
|
|
|
{ |
|
156
|
|
|
|
|
|
|
open( $self->{ _fileHandle }, '>:utf8', 'Xmltow2vLog.txt' ); |
|
157
|
|
|
|
|
|
|
$self->{ _fileHandle }->autoflush( 1 ); # Auto-flushes writes to log |
|
158
|
|
|
|
|
|
|
} |
|
159
|
|
|
|
|
|
|
|
|
160
|
|
|
|
|
|
|
# Declare XML parser |
|
161
|
|
|
|
|
|
|
# Quick Parse Method(s): Much Faster With Less Hardware Requirements and Accuracy |
|
162
|
|
|
|
|
|
|
if( $self->{ _quickParse } == 1 ) |
|
163
|
|
|
|
|
|
|
{ |
|
164
|
|
|
|
|
|
|
$self->{ _twigHandler } = XML::Twig->new( |
|
165
|
|
|
|
|
|
|
twig_handlers => |
|
166
|
|
|
|
|
|
|
{ |
|
167
|
|
|
|
|
|
|
'DateCreated' => sub { _QuickParseDateCreated( @_, $self ) }, |
|
168
|
|
|
|
|
|
|
'Journal' => sub { _QuickParseJournal( @_, $self ) }, |
|
169
|
|
|
|
|
|
|
'Article' => sub { _QuickParseArticle( @_, $self ) }, |
|
170
|
|
|
|
|
|
|
'OtherAbstract' => sub { _QuickParseOtherAbstract( @_, $self ) }, |
|
171
|
|
|
|
|
|
|
}, |
|
172
|
|
|
|
|
|
|
); |
|
173
|
|
|
|
|
|
|
} |
|
174
|
|
|
|
|
|
|
# Default Parse Method: Much Slower With High RAM Requirements and Better Accuracy |
|
175
|
|
|
|
|
|
|
else |
|
176
|
|
|
|
|
|
|
{ |
|
177
|
|
|
|
|
|
|
$self->{ _twigHandler } = XML::Twig->new( |
|
178
|
|
|
|
|
|
|
twig_handlers => |
|
179
|
|
|
|
|
|
|
{ |
|
180
|
|
|
|
|
|
|
'MedlineCitationSet' => sub { _ParseMedlineCitationSet( @_, $self ) }, |
|
181
|
|
|
|
|
|
|
}, |
|
182
|
|
|
|
|
|
|
); |
|
183
|
|
|
|
|
|
|
} |
|
184
|
|
|
|
|
|
|
|
|
185
|
|
|
|
|
|
|
bless $self, $class; |
|
186
|
|
|
|
|
|
|
|
|
187
|
|
|
|
|
|
|
$self->WriteLog( "New - Debug On" ); |
|
188
|
|
|
|
|
|
|
$self->WriteLog( "New - QuickParse Enabled" ) if( $self->{ _quickParse } == 1 ); |
|
189
|
|
|
|
|
|
|
|
|
190
|
|
|
|
|
|
|
if( $self->{ _xmlStringToParse } ne "(null)" ) |
|
191
|
|
|
|
|
|
|
{ |
|
192
|
|
|
|
|
|
|
#$self->_RemoveXMLVersion( \$self->{ _xmlStringToParse } ); |
|
193
|
|
|
|
|
|
|
|
|
194
|
|
|
|
|
|
|
if( $self->_CheckForNullData ( $self->{ _xmlStringToParse } ) ) |
|
195
|
|
|
|
|
|
|
{ |
|
196
|
|
|
|
|
|
|
$self->WriteLog( "New - Error: XML String is null" ); |
|
197
|
|
|
|
|
|
|
} |
|
198
|
|
|
|
|
|
|
else |
|
199
|
|
|
|
|
|
|
{ |
|
200
|
|
|
|
|
|
|
$self->{ _twigHandler }->parse( $self->{ _xmlStringToParse } ); |
|
201
|
|
|
|
|
|
|
} |
|
202
|
|
|
|
|
|
|
} |
|
203
|
|
|
|
|
|
|
else |
|
204
|
|
|
|
|
|
|
{ |
|
205
|
|
|
|
|
|
|
$self->WriteLog( "New - No XML String Argument To Parse" ); |
|
206
|
|
|
|
|
|
|
} |
|
207
|
|
|
|
|
|
|
|
|
208
|
|
|
|
|
|
|
return $self; |
|
209
|
|
|
|
|
|
|
} |
|
210
|
|
|
|
|
|
|
|
|
211
|
|
|
|
|
|
|
|
|
212
|
|
|
|
|
|
|
###################################################################################### |
|
213
|
|
|
|
|
|
|
# DESTROY |
|
214
|
|
|
|
|
|
|
###################################################################################### |
|
215
|
|
|
|
|
|
|
|
|
216
|
|
|
|
|
|
|
sub DESTROY |
|
217
|
|
|
|
|
|
|
{ |
|
218
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
219
|
|
|
|
|
|
|
|
|
220
|
|
|
|
|
|
|
# Close FileHandle |
|
221
|
|
|
|
|
|
|
close( $self->{ _fileHandle } ) if( $self->{ _fileHandle } ); |
|
222
|
|
|
|
|
|
|
} |
|
223
|
|
|
|
|
|
|
|
|
224
|
|
|
|
|
|
|
|
|
225
|
|
|
|
|
|
|
###################################################################################### |
|
226
|
|
|
|
|
|
|
# Module Functions |
|
227
|
|
|
|
|
|
|
###################################################################################### |
|
228
|
|
|
|
|
|
|
|
|
229
|
|
|
|
|
|
|
sub ConvertMedlineXMLToW2V |
|
230
|
|
|
|
|
|
|
{ |
|
231
|
|
|
|
|
|
|
my ( $self, $dir ) = @_; |
|
232
|
|
|
|
|
|
|
$dir = $self->GetWorkingDir() if !defined ( $dir ); |
|
233
|
|
|
|
|
|
|
|
|
234
|
|
|
|
|
|
|
my $result = $self->_DateCheck(); |
|
235
|
|
|
|
|
|
|
|
|
236
|
|
|
|
|
|
|
# Check(s) |
|
237
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2v - Error: Date Check Failed" ) if ( $result == -1 ); |
|
238
|
|
|
|
|
|
|
return -1 if ( $result == -1 ); |
|
239
|
|
|
|
|
|
|
|
|
240
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Error: StoreTitle and StoreAbstract Variables Set To 0 - No Data Will Be Extracted" ) |
|
241
|
|
|
|
|
|
|
if ( $self->GetStoreTitle() == 0 && $self->GetStoreAbstract() == 0 ); |
|
242
|
|
|
|
|
|
|
return -1 if ( $self->GetStoreTitle() == 0 && $self->GetStoreAbstract() == 0 ); |
|
243
|
|
|
|
|
|
|
|
|
244
|
|
|
|
|
|
|
# Check To See If Overwrite Existing File Option Is Enabled And Overwrite |
|
245
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Overwrite Existing File Option Enabled" ) if $self->GetOverwriteExistingFile() == 1; |
|
246
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Existing File Found - Removing Existing File" ) if ( $self->GetOverwriteExistingFile() == 1 && -e $self->GetSavePath() ); |
|
247
|
|
|
|
|
|
|
unlink( $self->GetSavePath() ) if ( $self->GetOverwriteExistingFile() == 1 && -e $self->GetSavePath() ); |
|
248
|
|
|
|
|
|
|
|
|
249
|
|
|
|
|
|
|
my $isFileOrDir = $self->IsFileOrDirectory( $dir ); |
|
250
|
|
|
|
|
|
|
|
|
251
|
|
|
|
|
|
|
# Process File In Working Directory |
|
252
|
|
|
|
|
|
|
if( $isFileOrDir eq "file" ) |
|
253
|
|
|
|
|
|
|
{ |
|
254
|
|
|
|
|
|
|
$self->SetXMLStringToParse( $self->_ReadXMLDataFromFile( $dir ) ); |
|
255
|
|
|
|
|
|
|
return -1 if ( $self->GetXMLStringToParse() ) eq "(null)"; |
|
256
|
|
|
|
|
|
|
|
|
257
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Parsing XML File: $dir" ); |
|
258
|
|
|
|
|
|
|
$self->_ParseXMLString( $self->GetXMLStringToParse() ); |
|
259
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Parsing Complete" ); |
|
260
|
|
|
|
|
|
|
} |
|
261
|
|
|
|
|
|
|
# Process All Files In Directory |
|
262
|
|
|
|
|
|
|
elsif( $isFileOrDir eq "dir" ) |
|
263
|
|
|
|
|
|
|
{ |
|
264
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - No File Specified/Using Directory Option" ); |
|
265
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Obtaining File(s) In Directory" ); |
|
266
|
|
|
|
|
|
|
|
|
267
|
|
|
|
|
|
|
# Read File Name(s) From Specified Directory |
|
268
|
|
|
|
|
|
|
opendir( my $dirHandle, "$dir" ) or $result = -1; |
|
269
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Error: Can't open $dir: $!" ) if $result == -1; |
|
270
|
|
|
|
|
|
|
return -1 if $result == -1; |
|
271
|
|
|
|
|
|
|
|
|
272
|
|
|
|
|
|
|
for my $file ( readdir( $dirHandle ) ) |
|
273
|
|
|
|
|
|
|
{ |
|
274
|
|
|
|
|
|
|
push( @xmlJobQueue, $file ) if ( ( index( $file, ".xml" ) != -1 ) && ( index( $file, ".xml.gz") == -1 ) ); |
|
275
|
|
|
|
|
|
|
push( @xmlJobQueue, $file ) if ( index( $file, ".gz" ) != -1 ); |
|
276
|
|
|
|
|
|
|
} |
|
277
|
|
|
|
|
|
|
|
|
278
|
|
|
|
|
|
|
closedir $dirHandle; |
|
279
|
|
|
|
|
|
|
undef $dirHandle; |
|
280
|
|
|
|
|
|
|
|
|
281
|
|
|
|
|
|
|
# Set Total Job Count |
|
282
|
|
|
|
|
|
|
$totalJobCount = @xmlJobQueue; |
|
283
|
|
|
|
|
|
|
|
|
284
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Parsing $totalJobCount File(s)" ); |
|
285
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Starting Worker Thread(s) / Compiling Text Corpus" ); |
|
286
|
|
|
|
|
|
|
|
|
287
|
|
|
|
|
|
|
# Start Thread(s) |
|
288
|
|
|
|
|
|
|
for( my $i = 0; $i < $self->GetNumOfThreads(); $i++ ) |
|
289
|
|
|
|
|
|
|
{ |
|
290
|
|
|
|
|
|
|
my $thread = threads->create( "_ThreadedConvert", $self, $dir ); |
|
291
|
|
|
|
|
|
|
} |
|
292
|
|
|
|
|
|
|
|
|
293
|
|
|
|
|
|
|
# Join All Running Threads Prior To Termination |
|
294
|
|
|
|
|
|
|
my @threadAry = threads->list(); |
|
295
|
|
|
|
|
|
|
|
|
296
|
|
|
|
|
|
|
for my $thread ( @threadAry ) |
|
297
|
|
|
|
|
|
|
{ |
|
298
|
|
|
|
|
|
|
$thread->join() if ( $thread->is_running() || $thread->is_joinable() ); |
|
299
|
|
|
|
|
|
|
} |
|
300
|
|
|
|
|
|
|
|
|
301
|
|
|
|
|
|
|
print( "Parsed $finishedJobCount of $totalJobCount Files\n" ) if ( $self->GetDebugLog() == 0 ); |
|
302
|
|
|
|
|
|
|
print( "Number Of Compound Words: $compoundWordCount\n" ) if ( $self->GetDebugLog() == 0 ); |
|
303
|
|
|
|
|
|
|
print( "Number Of Words (Before Compounding): $preCompWordCount\n" ) if ( $self->GetDebugLog() == 0 ); |
|
304
|
|
|
|
|
|
|
print( "Number Of Words (After Compounding): $postCompWordCount\n" ) if ( $self->GetDebugLog() == 0 ); |
|
305
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Parsed $finishedJobCount of $totalJobCount Files" ); |
|
306
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Number Of Compound Words: $compoundWordCount" ); |
|
307
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Number Of Words (Before Compounding): $preCompWordCount" ); |
|
308
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Number Of Words (After Compounding): $postCompWordCount" ); |
|
309
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Parsing Complete" ); |
|
310
|
|
|
|
|
|
|
|
|
311
|
|
|
|
|
|
|
# Clean Up |
|
312
|
|
|
|
|
|
|
ClearTempStr(); |
|
313
|
|
|
|
|
|
|
ClearTextCorpusStr(); |
|
314
|
|
|
|
|
|
|
$totalJobCount = 0; |
|
315
|
|
|
|
|
|
|
$preCompWordCount = 0; |
|
316
|
|
|
|
|
|
|
$compoundWordCount = 0; |
|
317
|
|
|
|
|
|
|
$postCompWordCount = 0; |
|
318
|
|
|
|
|
|
|
} |
|
319
|
|
|
|
|
|
|
else |
|
320
|
|
|
|
|
|
|
{ |
|
321
|
|
|
|
|
|
|
$self->WriteLog( "ConvertMedlineXMLToW2V - Unknown Parameter Type: Not File Or Directory" ); |
|
322
|
|
|
|
|
|
|
} |
|
323
|
|
|
|
|
|
|
|
|
324
|
|
|
|
|
|
|
return 0; |
|
325
|
|
|
|
|
|
|
} |
|
326
|
|
|
|
|
|
|
|
|
327
|
|
|
|
|
|
|
sub _ThreadedConvert |
|
328
|
|
|
|
|
|
|
{ |
|
329
|
|
|
|
|
|
|
my ( $self, $dir ) = @_; |
|
330
|
|
|
|
|
|
|
|
|
331
|
|
|
|
|
|
|
my $keepWorking = 1; |
|
332
|
|
|
|
|
|
|
my $tid = threads->tid(); |
|
333
|
|
|
|
|
|
|
|
|
334
|
|
|
|
|
|
|
$self->WriteLog( "_ThreadedConvert - Warning: Requested Thread $tid Not Needed/Threads Exceed Work Load - Terminating Thread" ) if ( @xmlJobQueue == 0 ); |
|
335
|
|
|
|
|
|
|
return 1 if ( @xmlJobQueue == 0 ); |
|
336
|
|
|
|
|
|
|
|
|
337
|
|
|
|
|
|
|
$self->WriteLog( "_ThreadedConvert - Starting Thread: $tid" ); |
|
338
|
|
|
|
|
|
|
$self->WriteLog( "_ThreadedConvert - Thread $tid Parsing File(s) In Job Queue" ); |
|
339
|
|
|
|
|
|
|
|
|
340
|
|
|
|
|
|
|
while( $keepWorking == 1 ) |
|
341
|
|
|
|
|
|
|
{ |
|
342
|
|
|
|
|
|
|
my $file; |
|
343
|
|
|
|
|
|
|
|
|
344
|
|
|
|
|
|
|
# Prevent Other Threads From Reading Shared Job Queue (Array) At The Same Time |
|
345
|
|
|
|
|
|
|
{ |
|
346
|
|
|
|
|
|
|
lock( $queueLock ); |
|
347
|
|
|
|
|
|
|
|
|
348
|
|
|
|
|
|
|
# Fetch A File Name To Parse |
|
349
|
|
|
|
|
|
|
my $index = 0; |
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
# Keep Iterating Through Queue While Elements Are Not Defined |
|
352
|
|
|
|
|
|
|
while( $index < @xmlJobQueue ) |
|
353
|
|
|
|
|
|
|
{ |
|
354
|
|
|
|
|
|
|
$file = $xmlJobQueue[$index]; |
|
355
|
|
|
|
|
|
|
delete( $xmlJobQueue[$index] ) if defined( $file ); |
|
356
|
|
|
|
|
|
|
|
|
357
|
|
|
|
|
|
|
# Exit Loop If Element Array Defined |
|
358
|
|
|
|
|
|
|
$index = @xmlJobQueue if defined( $file ); |
|
359
|
|
|
|
|
|
|
|
|
360
|
|
|
|
|
|
|
$index++; |
|
361
|
|
|
|
|
|
|
} |
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
# Increment Parsed File Counter |
|
364
|
|
|
|
|
|
|
$finishedJobCount++ if defined( $file ); |
|
365
|
|
|
|
|
|
|
|
|
366
|
|
|
|
|
|
|
# Exit The Main Loop If The Last Element Was Parsed |
|
367
|
|
|
|
|
|
|
$keepWorking = 0 if ( @xmlJobQueue == 0 ); |
|
368
|
|
|
|
|
|
|
} |
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
if( defined( $file ) ) |
|
371
|
|
|
|
|
|
|
{ |
|
372
|
|
|
|
|
|
|
print( "Thread $tid: Parsing $file\n" ) if ( !$self->GetDebugLog() ); |
|
373
|
|
|
|
|
|
|
$self->WriteLog( "_ThreadedConvert - Thread $tid: Processing File: $file" ); |
|
374
|
|
|
|
|
|
|
$self->SetXMLStringToParse( $self->_ReadXMLDataFromFile( "$dir/$file" ) ); |
|
375
|
|
|
|
|
|
|
$self->WriteLog( "_ThreadedConvert - Thread $tid: Parsing XML Data" ); |
|
376
|
|
|
|
|
|
|
$self->_ParseXMLString( $self->GetXMLStringToParse() ); |
|
377
|
|
|
|
|
|
|
$self->WriteLog( "_ThreadedConvert - Thread $tid: Parsed $file" ); |
|
378
|
|
|
|
|
|
|
print( "Thread $tid: Parsed $file\n" ) if ( !$self->GetDebugLog() ); |
|
379
|
|
|
|
|
|
|
$self->_SaveTextCorpusToFile( $self->GetSavePath(), 1 ); |
|
380
|
|
|
|
|
|
|
$self->ClearTextCorpusStr(); |
|
381
|
|
|
|
|
|
|
} |
|
382
|
|
|
|
|
|
|
} |
|
383
|
|
|
|
|
|
|
|
|
384
|
|
|
|
|
|
|
$self->WriteLog( "_ThreadedConvert - Thread $tid Finished - Terminating" ); |
|
385
|
|
|
|
|
|
|
|
|
386
|
|
|
|
|
|
|
return 0; |
|
387
|
|
|
|
|
|
|
} |
|
388
|
|
|
|
|
|
|
|
|
389
|
|
|
|
|
|
|
sub _ParseXMLString |
|
390
|
|
|
|
|
|
|
{ |
|
391
|
|
|
|
|
|
|
my ( $self, $string ) = @_; |
|
392
|
|
|
|
|
|
|
$string = "" if !defined ( $string ); |
|
393
|
|
|
|
|
|
|
|
|
394
|
|
|
|
|
|
|
if( $self->_CheckParseRequirements( $string ) eq -1 ) |
|
395
|
|
|
|
|
|
|
{ |
|
396
|
|
|
|
|
|
|
return -1; |
|
397
|
|
|
|
|
|
|
} |
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
# REMOVEME |
|
400
|
|
|
|
|
|
|
#$self->_RemoveXMLVersion( \$string ); |
|
401
|
|
|
|
|
|
|
|
|
402
|
|
|
|
|
|
|
if( $self->_CheckForNullData( $string ) ) |
|
403
|
|
|
|
|
|
|
{ |
|
404
|
|
|
|
|
|
|
$self->WriteLog( "_ParseXMLString - Cannot Parse (null) string" ); |
|
405
|
|
|
|
|
|
|
return -1; |
|
406
|
|
|
|
|
|
|
} |
|
407
|
|
|
|
|
|
|
else |
|
408
|
|
|
|
|
|
|
{ |
|
409
|
|
|
|
|
|
|
$self->{ _twigHandler }->parse( $string ); |
|
410
|
|
|
|
|
|
|
$self->WriteLog( "_ParseXMLString: Released PubmedArticle from memory" ); |
|
411
|
|
|
|
|
|
|
|
|
412
|
|
|
|
|
|
|
# Print how many entries were parsed |
|
413
|
|
|
|
|
|
|
$self->WriteLog( "_ParseXMLString: Parsed " . $self->GetParsedCount() . " entries" ); |
|
414
|
|
|
|
|
|
|
} |
|
415
|
|
|
|
|
|
|
|
|
416
|
|
|
|
|
|
|
return 0; |
|
417
|
|
|
|
|
|
|
} |
|
418
|
|
|
|
|
|
|
|
|
419
|
|
|
|
|
|
|
sub _CheckParseRequirements |
|
420
|
|
|
|
|
|
|
{ |
|
421
|
|
|
|
|
|
|
my ( $self, $string ) = @_; |
|
422
|
|
|
|
|
|
|
$string = "" if !defined ( $string ); |
|
423
|
|
|
|
|
|
|
|
|
424
|
|
|
|
|
|
|
if( $string eq "" ) |
|
425
|
|
|
|
|
|
|
{ |
|
426
|
|
|
|
|
|
|
$self->WriteLog( "_CheckParseRequirements - Error: Nothing To Parse" ); |
|
427
|
|
|
|
|
|
|
return -1; |
|
428
|
|
|
|
|
|
|
} |
|
429
|
|
|
|
|
|
|
elsif( $self->GetTwigHandler() == 0 ) |
|
430
|
|
|
|
|
|
|
{ |
|
431
|
|
|
|
|
|
|
$self->WriteLog( "_CheckParseRequirements - Error: Unable To Parse XML Data/TwigHandler = (null)" ); |
|
432
|
|
|
|
|
|
|
return -1; |
|
433
|
|
|
|
|
|
|
} |
|
434
|
|
|
|
|
|
|
|
|
435
|
|
|
|
|
|
|
return 0; |
|
436
|
|
|
|
|
|
|
} |
|
437
|
|
|
|
|
|
|
|
|
438
|
|
|
|
|
|
|
# Checks to see if Medline XML data in memory is a null string |
|
439
|
|
|
|
|
|
|
sub _CheckForNullData |
|
440
|
|
|
|
|
|
|
{ |
|
441
|
|
|
|
|
|
|
my ( $self, $temp ) = @_; |
|
442
|
|
|
|
|
|
|
my $nullStr = "(null)"; |
|
443
|
|
|
|
|
|
|
|
|
444
|
|
|
|
|
|
|
if( my $n = index( $temp, $nullStr ) != -1 ) |
|
445
|
|
|
|
|
|
|
{ |
|
446
|
|
|
|
|
|
|
# Return True |
|
447
|
|
|
|
|
|
|
return 1 if $n == 0; |
|
448
|
|
|
|
|
|
|
} |
|
449
|
|
|
|
|
|
|
|
|
450
|
|
|
|
|
|
|
# Return False |
|
451
|
|
|
|
|
|
|
return 0; |
|
452
|
|
|
|
|
|
|
} |
|
453
|
|
|
|
|
|
|
|
|
454
|
|
|
|
|
|
|
# Removes the XML Version string prior to parsing the XML string |
|
455
|
|
|
|
|
|
|
sub _RemoveXMLVersion |
|
456
|
|
|
|
|
|
|
{ |
|
457
|
|
|
|
|
|
|
my ( $self, $temp ) = @_; |
|
458
|
|
|
|
|
|
|
|
|
459
|
|
|
|
|
|
|
# Checking For XML Version |
|
460
|
|
|
|
|
|
|
my $xmlVersion = '
|
|
461
|
|
|
|
|
|
|
my $docType = '!DOCTYPE'; |
|
462
|
|
|
|
|
|
|
|
|
463
|
|
|
|
|
|
|
my $line = ""; |
|
464
|
|
|
|
|
|
|
my $newXMLString = ""; |
|
465
|
|
|
|
|
|
|
|
|
466
|
|
|
|
|
|
|
foreach $line ( split /\n/ , ${$temp} ) |
|
467
|
|
|
|
|
|
|
{ |
|
468
|
|
|
|
|
|
|
if( index( $line, $xmlVersion ) == -1 && index( $line, $docType ) == -1 ) |
|
469
|
|
|
|
|
|
|
{ |
|
470
|
|
|
|
|
|
|
$newXMLString .= ( $line . "\n" ); |
|
471
|
|
|
|
|
|
|
} |
|
472
|
|
|
|
|
|
|
} |
|
473
|
|
|
|
|
|
|
|
|
474
|
|
|
|
|
|
|
${$temp} = $newXMLString; |
|
475
|
|
|
|
|
|
|
} |
|
476
|
|
|
|
|
|
|
|
|
477
|
|
|
|
|
|
|
sub _ParseMedlineCitationSet |
|
478
|
|
|
|
|
|
|
{ |
|
479
|
|
|
|
|
|
|
my ( $twigSelf, $root, $self ) = @_; |
|
480
|
|
|
|
|
|
|
my @pubMedArticles = $root->children(); |
|
481
|
|
|
|
|
|
|
|
|
482
|
|
|
|
|
|
|
my $parsedData = 0; |
|
483
|
|
|
|
|
|
|
|
|
484
|
|
|
|
|
|
|
foreach my $pubMedArticle ( @pubMedArticles ) |
|
485
|
|
|
|
|
|
|
{ |
|
486
|
|
|
|
|
|
|
# Parse XML Data |
|
487
|
|
|
|
|
|
|
$parsedData = $self->_ParseMedlineArticle( $pubMedArticle ); |
|
488
|
|
|
|
|
|
|
|
|
489
|
|
|
|
|
|
|
# Compoundify String If Option Is Enabled |
|
490
|
|
|
|
|
|
|
if( $self->GetCompoundifyText() == 1 && ( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) ) |
|
491
|
|
|
|
|
|
|
{ |
|
492
|
|
|
|
|
|
|
my $tempStr = $self->CompoundifyString( lc( $self->GetTempStr() ) ); |
|
493
|
|
|
|
|
|
|
|
|
494
|
|
|
|
|
|
|
# Append Article Data To Text Corpus |
|
495
|
|
|
|
|
|
|
$self->AppendStrToTextCorpus( $tempStr ); |
|
496
|
|
|
|
|
|
|
} |
|
497
|
|
|
|
|
|
|
elsif( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) |
|
498
|
|
|
|
|
|
|
{ |
|
499
|
|
|
|
|
|
|
# Append Article Data To Text Corpus |
|
500
|
|
|
|
|
|
|
$self->AppendStrToTextCorpus( $self->GetTempStr() ); |
|
501
|
|
|
|
|
|
|
} |
|
502
|
|
|
|
|
|
|
|
|
503
|
|
|
|
|
|
|
# Clear string placeholders |
|
504
|
|
|
|
|
|
|
$self->ClearTempStr(); |
|
505
|
|
|
|
|
|
|
$self->ClearTempDate(); |
|
506
|
|
|
|
|
|
|
|
|
507
|
|
|
|
|
|
|
# Increment Parsed Counter |
|
508
|
|
|
|
|
|
|
$self->{ _parsedCount }++ if ( $parsedData == 1 ); |
|
509
|
|
|
|
|
|
|
|
|
510
|
|
|
|
|
|
|
# Release the stored XML section from memory (not fully tested) |
|
511
|
|
|
|
|
|
|
$pubMedArticle->purge() if defined( $pubMedArticle ); |
|
512
|
|
|
|
|
|
|
|
|
513
|
|
|
|
|
|
|
# Reset Parsed Data Flag |
|
514
|
|
|
|
|
|
|
$parsedData = 0; |
|
515
|
|
|
|
|
|
|
} |
|
516
|
|
|
|
|
|
|
|
|
517
|
|
|
|
|
|
|
# Release the stored XML section from memory (not fully tested) |
|
518
|
|
|
|
|
|
|
$root->purge(); |
|
519
|
|
|
|
|
|
|
$self->WriteLog( "_ParseMedlineCitationSet: Released PubmedArticleSet group from memory" ); |
|
520
|
|
|
|
|
|
|
} |
|
521
|
|
|
|
|
|
|
|
|
522
|
|
|
|
|
|
|
sub _ParseMedlineArticle |
|
523
|
|
|
|
|
|
|
{ |
|
524
|
|
|
|
|
|
|
my ( $self, $medlineArticle ) = @_; |
|
525
|
|
|
|
|
|
|
|
|
526
|
|
|
|
|
|
|
my @articles = $medlineArticle->children(); |
|
527
|
|
|
|
|
|
|
my $dateCreated = ""; |
|
528
|
|
|
|
|
|
|
|
|
529
|
|
|
|
|
|
|
for my $article ( @articles ) |
|
530
|
|
|
|
|
|
|
{ |
|
531
|
|
|
|
|
|
|
if( $article->tag() eq "Article" ) |
|
532
|
|
|
|
|
|
|
{ |
|
533
|
|
|
|
|
|
|
$self->_ParseArticle( $article ); |
|
534
|
|
|
|
|
|
|
} |
|
535
|
|
|
|
|
|
|
elsif( $article->tag() eq "DateCreated" ) |
|
536
|
|
|
|
|
|
|
{ |
|
537
|
|
|
|
|
|
|
$self->SetTempDate( $self->_ParseDateCreated( $article ) ); |
|
538
|
|
|
|
|
|
|
} |
|
539
|
|
|
|
|
|
|
elsif( $article->tag() eq "OtherAbstract" ) |
|
540
|
|
|
|
|
|
|
{ |
|
541
|
|
|
|
|
|
|
$self->_ParseOtherAbstract( $article ); |
|
542
|
|
|
|
|
|
|
} |
|
543
|
|
|
|
|
|
|
else |
|
544
|
|
|
|
|
|
|
{ |
|
545
|
|
|
|
|
|
|
$self->WriteLog( "_ParseMedlineArticle - (New Data Found) - Tag: " . $article->tag() . ", Field: " . $article->field() ); |
|
546
|
|
|
|
|
|
|
} |
|
547
|
|
|
|
|
|
|
|
|
548
|
|
|
|
|
|
|
# Release article from memory |
|
549
|
|
|
|
|
|
|
$article->purge(); |
|
550
|
|
|
|
|
|
|
} |
|
551
|
|
|
|
|
|
|
|
|
552
|
|
|
|
|
|
|
return 1; |
|
553
|
|
|
|
|
|
|
} |
|
554
|
|
|
|
|
|
|
|
|
555
|
|
|
|
|
|
|
sub _ParseDateCreated |
|
556
|
|
|
|
|
|
|
{ |
|
557
|
|
|
|
|
|
|
my ( $self, $article ) = @_; |
|
558
|
|
|
|
|
|
|
|
|
559
|
|
|
|
|
|
|
my $month = ""; |
|
560
|
|
|
|
|
|
|
my $day = ""; |
|
561
|
|
|
|
|
|
|
my $year = ""; |
|
562
|
|
|
|
|
|
|
|
|
563
|
|
|
|
|
|
|
my @dateAry = $article->children(); |
|
564
|
|
|
|
|
|
|
|
|
565
|
|
|
|
|
|
|
for my $date ( @dateAry ) |
|
566
|
|
|
|
|
|
|
{ |
|
567
|
|
|
|
|
|
|
$day = $date->field() if ( $date->tag() eq "Day" ); |
|
568
|
|
|
|
|
|
|
$month = $date->field if ( $date->tag() eq "Month" ); |
|
569
|
|
|
|
|
|
|
$year = $date->field() if ( $date->tag() eq "Year" ); |
|
570
|
|
|
|
|
|
|
} |
|
571
|
|
|
|
|
|
|
|
|
572
|
|
|
|
|
|
|
# Check(s) |
|
573
|
|
|
|
|
|
|
$day = "00" if !defined ( $day ); |
|
574
|
|
|
|
|
|
|
$month = "00" if !defined ( $month ); |
|
575
|
|
|
|
|
|
|
$year = "0000" if !defined ( $year ); |
|
576
|
|
|
|
|
|
|
|
|
577
|
|
|
|
|
|
|
$self->WriteLog( "_ParseDateCreated - Month: $month, Day: $day, Year: $year " ); |
|
578
|
|
|
|
|
|
|
|
|
579
|
|
|
|
|
|
|
return "$month/$day/$year"; |
|
580
|
|
|
|
|
|
|
} |
|
581
|
|
|
|
|
|
|
|
|
582
|
|
|
|
|
|
|
sub _ParseArticle |
|
583
|
|
|
|
|
|
|
{ |
|
584
|
|
|
|
|
|
|
my ( $self, $article ) = @_; |
|
585
|
|
|
|
|
|
|
|
|
586
|
|
|
|
|
|
|
my @articleChildren = $article->children(); |
|
587
|
|
|
|
|
|
|
|
|
588
|
|
|
|
|
|
|
for my $articleChild ( @articleChildren ) |
|
589
|
|
|
|
|
|
|
{ |
|
590
|
|
|
|
|
|
|
if( $articleChild->tag() eq "Journal" ) |
|
591
|
|
|
|
|
|
|
{ |
|
592
|
|
|
|
|
|
|
$self->_ParseJournal( $articleChild ); |
|
593
|
|
|
|
|
|
|
} |
|
594
|
|
|
|
|
|
|
elsif( $articleChild->tag() eq "ArticleTitle" ) |
|
595
|
|
|
|
|
|
|
{ |
|
596
|
|
|
|
|
|
|
my $tempStr = Text::Unidecode::unidecode( $articleChild->field() ); |
|
597
|
|
|
|
|
|
|
chomp( $tempStr ); |
|
598
|
|
|
|
|
|
|
|
|
599
|
|
|
|
|
|
|
# Store String |
|
600
|
|
|
|
|
|
|
$self->AppendToTempStr( $tempStr ) if ( $self->GetStoreTitle() == 1 ); |
|
601
|
|
|
|
|
|
|
|
|
602
|
|
|
|
|
|
|
$self->WriteLog( "_ParseArticle - Tag: " . $articleChild->tag() . ", Field: " . $tempStr ); |
|
603
|
|
|
|
|
|
|
} |
|
604
|
|
|
|
|
|
|
elsif( $articleChild->tag() eq "Abstract" ) |
|
605
|
|
|
|
|
|
|
{ |
|
606
|
|
|
|
|
|
|
my $tempStr = Text::Unidecode::unidecode( $articleChild->field() ); |
|
607
|
|
|
|
|
|
|
chomp( $tempStr ); |
|
608
|
|
|
|
|
|
|
|
|
609
|
|
|
|
|
|
|
# Store String |
|
610
|
|
|
|
|
|
|
$self->AppendToTempStr( $tempStr ) if ( $self->GetStoreAbstract() == 1 ); |
|
611
|
|
|
|
|
|
|
|
|
612
|
|
|
|
|
|
|
$self->WriteLog( "_ParseArticle - Tag: " . $articleChild->tag() . ", Field: " . $tempStr ); |
|
613
|
|
|
|
|
|
|
} |
|
614
|
|
|
|
|
|
|
else |
|
615
|
|
|
|
|
|
|
{ |
|
616
|
|
|
|
|
|
|
$self->WriteLog( "_ParseArticle - (New Tag Found) - Tag: " . $articleChild->tag() . ", Field: " . $articleChild->field() ); |
|
617
|
|
|
|
|
|
|
} |
|
618
|
|
|
|
|
|
|
} |
|
619
|
|
|
|
|
|
|
} |
|
620
|
|
|
|
|
|
|
|
|
621
|
|
|
|
|
|
|
sub _ParseJournal |
|
622
|
|
|
|
|
|
|
{ |
|
623
|
|
|
|
|
|
|
my ( $self, $journalRoot ) = @_; |
|
624
|
|
|
|
|
|
|
|
|
625
|
|
|
|
|
|
|
my @journalChildren = $journalRoot->children(); |
|
626
|
|
|
|
|
|
|
|
|
627
|
|
|
|
|
|
|
for my $journalChild ( @journalChildren ) |
|
628
|
|
|
|
|
|
|
{ |
|
629
|
|
|
|
|
|
|
if( $journalChild->tag() eq "Title" ) |
|
630
|
|
|
|
|
|
|
{ |
|
631
|
|
|
|
|
|
|
my $tempStr = Text::Unidecode::unidecode( $journalChild->field() ); |
|
632
|
|
|
|
|
|
|
chomp( $tempStr ); |
|
633
|
|
|
|
|
|
|
|
|
634
|
|
|
|
|
|
|
# Store String |
|
635
|
|
|
|
|
|
|
$self->AppendToTempStr( $tempStr ) if ( $self->GetStoreTitle() == 1 ); |
|
636
|
|
|
|
|
|
|
|
|
637
|
|
|
|
|
|
|
$self->WriteLog( "_ParseJournal - Tag: " . $journalChild->tag() . ", Field: " . $tempStr ); |
|
638
|
|
|
|
|
|
|
} |
|
639
|
|
|
|
|
|
|
else |
|
640
|
|
|
|
|
|
|
{ |
|
641
|
|
|
|
|
|
|
$self->WriteLog( "_ParseJournal - (New Tag Found) - Tag: " . $journalChild->tag() . ", Field: " . $journalChild->field() ); |
|
642
|
|
|
|
|
|
|
} |
|
643
|
|
|
|
|
|
|
} |
|
644
|
|
|
|
|
|
|
} |
|
645
|
|
|
|
|
|
|
|
|
646
|
|
|
|
|
|
|
sub _ParseOtherAbstract |
|
647
|
|
|
|
|
|
|
{ |
|
648
|
|
|
|
|
|
|
my ( $self, $abstractRoot ) = @_; |
|
649
|
|
|
|
|
|
|
|
|
650
|
|
|
|
|
|
|
my @otherAbstractChildren = $abstractRoot->children(); |
|
651
|
|
|
|
|
|
|
|
|
652
|
|
|
|
|
|
|
for my $abstractChild ( @otherAbstractChildren ) |
|
653
|
|
|
|
|
|
|
{ |
|
654
|
|
|
|
|
|
|
if( $abstractChild->tag() eq "AbstractText" ) |
|
655
|
|
|
|
|
|
|
{ |
|
656
|
|
|
|
|
|
|
my $tempStr = Text::Unidecode::unidecode( $abstractChild->field() ); |
|
657
|
|
|
|
|
|
|
chomp( $tempStr ); |
|
658
|
|
|
|
|
|
|
|
|
659
|
|
|
|
|
|
|
# Store String |
|
660
|
|
|
|
|
|
|
$self->AppendToTempStr( $tempStr ) if ( $self->GetStoreAbstract() == 1 ); |
|
661
|
|
|
|
|
|
|
|
|
662
|
|
|
|
|
|
|
$self->WriteLog( "_ParseOtherAbstract - Tag: " . $abstractChild->tag() . ", Field: " . $tempStr ); |
|
663
|
|
|
|
|
|
|
} |
|
664
|
|
|
|
|
|
|
else |
|
665
|
|
|
|
|
|
|
{ |
|
666
|
|
|
|
|
|
|
$self->WriteLog( "_ParseOtherAbstract - (New Tag Found) - Tag: " . $abstractChild->tag() . ", Field: " . $abstractChild->field() ); |
|
667
|
|
|
|
|
|
|
} |
|
668
|
|
|
|
|
|
|
} |
|
669
|
|
|
|
|
|
|
} |
|
670
|
|
|
|
|
|
|
|
|
671
|
|
|
|
|
|
|
sub _QuickParseDateCreated |
|
672
|
|
|
|
|
|
|
{ |
|
673
|
|
|
|
|
|
|
my ( $twigSelf, $article, $self ) = @_; |
|
674
|
|
|
|
|
|
|
|
|
675
|
|
|
|
|
|
|
my $month = ""; |
|
676
|
|
|
|
|
|
|
my $day = ""; |
|
677
|
|
|
|
|
|
|
my $year = ""; |
|
678
|
|
|
|
|
|
|
|
|
679
|
|
|
|
|
|
|
# Clear Old Date |
|
680
|
|
|
|
|
|
|
$self->ClearTempDate(); |
|
681
|
|
|
|
|
|
|
|
|
682
|
|
|
|
|
|
|
my @dateAry = $article->children(); |
|
683
|
|
|
|
|
|
|
|
|
684
|
|
|
|
|
|
|
for my $date ( @dateAry ) |
|
685
|
|
|
|
|
|
|
{ |
|
686
|
|
|
|
|
|
|
$day = $date->field() if ( $date->tag() eq "Day" ); |
|
687
|
|
|
|
|
|
|
$month = $date->field if ( $date->tag() eq "Month" ); |
|
688
|
|
|
|
|
|
|
$year = $date->field() if ( $date->tag() eq "Year" ); |
|
689
|
|
|
|
|
|
|
} |
|
690
|
|
|
|
|
|
|
|
|
691
|
|
|
|
|
|
|
# Check(s) |
|
692
|
|
|
|
|
|
|
$day = "00" if !defined ( $day ); |
|
693
|
|
|
|
|
|
|
$month = "00" if !defined ( $month ); |
|
694
|
|
|
|
|
|
|
$year = "0000" if !defined ( $year ); |
|
695
|
|
|
|
|
|
|
|
|
696
|
|
|
|
|
|
|
$self->WriteLog( "_QuickParseDateCreated - Month: $month, Day: $day, Year: $year " ); |
|
697
|
|
|
|
|
|
|
|
|
698
|
|
|
|
|
|
|
$self->SetTempDate( "$month/$day/$year" ); |
|
699
|
|
|
|
|
|
|
|
|
700
|
|
|
|
|
|
|
# Free Memory |
|
701
|
|
|
|
|
|
|
$article->purge(); |
|
702
|
|
|
|
|
|
|
} |
|
703
|
|
|
|
|
|
|
|
|
704
|
|
|
|
|
|
|
sub _QuickParseJournal |
|
705
|
|
|
|
|
|
|
{ |
|
706
|
|
|
|
|
|
|
my ( $twigSelf, $journalRoot, $self ) = @_; |
|
707
|
|
|
|
|
|
|
|
|
708
|
|
|
|
|
|
|
my @journalChildren = $journalRoot->children(); |
|
709
|
|
|
|
|
|
|
|
|
710
|
|
|
|
|
|
|
for my $journalChild ( @journalChildren ) |
|
711
|
|
|
|
|
|
|
{ |
|
712
|
|
|
|
|
|
|
if( $journalChild->tag() eq "Title" ) |
|
713
|
|
|
|
|
|
|
{ |
|
714
|
|
|
|
|
|
|
my $tempStr = Text::Unidecode::unidecode( $journalChild->field() ); |
|
715
|
|
|
|
|
|
|
chomp( $tempStr ); |
|
716
|
|
|
|
|
|
|
|
|
717
|
|
|
|
|
|
|
# Store String |
|
718
|
|
|
|
|
|
|
$self->AppendToTempStr( $tempStr ) if ( $self->GetStoreTitle() == 1 ); |
|
719
|
|
|
|
|
|
|
|
|
720
|
|
|
|
|
|
|
$self->WriteLog( "_QuickParseJournal - Tag: " . $journalChild->tag() . ", Field: " . $tempStr ); |
|
721
|
|
|
|
|
|
|
} |
|
722
|
|
|
|
|
|
|
else |
|
723
|
|
|
|
|
|
|
{ |
|
724
|
|
|
|
|
|
|
$self->WriteLog( "_QuickParseJournal - (New Tag Found) - Tag: " . $journalChild->tag() . ", Field: " . $journalChild->field() ); |
|
725
|
|
|
|
|
|
|
} |
|
726
|
|
|
|
|
|
|
} |
|
727
|
|
|
|
|
|
|
|
|
728
|
|
|
|
|
|
|
# Compoundify String If Option Is Enabled |
|
729
|
|
|
|
|
|
|
if( $self->GetCompoundifyText() == 1 && ( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) ) |
|
730
|
|
|
|
|
|
|
{ |
|
731
|
|
|
|
|
|
|
my $tempStr = $self->CompoundifyString( lc( $self->GetTempStr() ) ); |
|
732
|
|
|
|
|
|
|
|
|
733
|
|
|
|
|
|
|
# Append Article Data To Text Corpus |
|
734
|
|
|
|
|
|
|
$self->AppendStrToTextCorpus( $tempStr ); |
|
735
|
|
|
|
|
|
|
} |
|
736
|
|
|
|
|
|
|
elsif( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) |
|
737
|
|
|
|
|
|
|
{ |
|
738
|
|
|
|
|
|
|
# Append Article Data To Text Corpus |
|
739
|
|
|
|
|
|
|
$self->AppendStrToTextCorpus( $self->GetTempStr() ); |
|
740
|
|
|
|
|
|
|
} |
|
741
|
|
|
|
|
|
|
|
|
742
|
|
|
|
|
|
|
# Clear string placeholders |
|
743
|
|
|
|
|
|
|
$self->ClearTempStr(); |
|
744
|
|
|
|
|
|
|
|
|
745
|
|
|
|
|
|
|
# Free Memory |
|
746
|
|
|
|
|
|
|
$journalRoot->purge(); |
|
747
|
|
|
|
|
|
|
} |
|
748
|
|
|
|
|
|
|
|
|
749
|
|
|
|
|
|
|
sub _QuickParseArticle |
|
750
|
|
|
|
|
|
|
{ |
|
751
|
|
|
|
|
|
|
my ( $twigSelf, $article, $self ) = @_; |
|
752
|
|
|
|
|
|
|
|
|
753
|
|
|
|
|
|
|
my @articleChildren = $article->children(); |
|
754
|
|
|
|
|
|
|
|
|
755
|
|
|
|
|
|
|
for my $articleChild ( @articleChildren ) |
|
756
|
|
|
|
|
|
|
{ |
|
757
|
|
|
|
|
|
|
if( $articleChild->tag() eq "ArticleTitle" ) |
|
758
|
|
|
|
|
|
|
{ |
|
759
|
|
|
|
|
|
|
my $tempStr = Text::Unidecode::unidecode( $articleChild->field() ); |
|
760
|
|
|
|
|
|
|
chomp( $tempStr ); |
|
761
|
|
|
|
|
|
|
|
|
762
|
|
|
|
|
|
|
# Store String |
|
763
|
|
|
|
|
|
|
$self->AppendToTempStr( $tempStr ) if ( $self->GetStoreTitle() == 1 ); |
|
764
|
|
|
|
|
|
|
|
|
765
|
|
|
|
|
|
|
$self->WriteLog( "_QuickParseArticle - Tag: " . $articleChild->tag() . ", Field: " . $tempStr ); |
|
766
|
|
|
|
|
|
|
} |
|
767
|
|
|
|
|
|
|
elsif( $articleChild->tag() eq "Abstract" ) |
|
768
|
|
|
|
|
|
|
{ |
|
769
|
|
|
|
|
|
|
my $tempStr = Text::Unidecode::unidecode( $articleChild->field() ); |
|
770
|
|
|
|
|
|
|
chomp( $tempStr ); |
|
771
|
|
|
|
|
|
|
|
|
772
|
|
|
|
|
|
|
# Store String |
|
773
|
|
|
|
|
|
|
$self->AppendToTempStr( $tempStr ) if ( $self->GetStoreAbstract() == 1 ); |
|
774
|
|
|
|
|
|
|
|
|
775
|
|
|
|
|
|
|
$self->WriteLog( "_QuickParseArticle - Tag: " . $articleChild->tag() . ", Field: " . $tempStr ); |
|
776
|
|
|
|
|
|
|
} |
|
777
|
|
|
|
|
|
|
else |
|
778
|
|
|
|
|
|
|
{ |
|
779
|
|
|
|
|
|
|
$self->WriteLog( "_QuickParseArticle - (New Tag Found) - Tag: " . $articleChild->tag() . ", Field: " . $articleChild->field() ); |
|
780
|
|
|
|
|
|
|
} |
|
781
|
|
|
|
|
|
|
} |
|
782
|
|
|
|
|
|
|
|
|
783
|
|
|
|
|
|
|
# Compoundify String If Option Is Enabled |
|
784
|
|
|
|
|
|
|
if( $self->GetCompoundifyText() == 1 && ( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) ) |
|
785
|
|
|
|
|
|
|
{ |
|
786
|
|
|
|
|
|
|
my $tempStr = $self->CompoundifyString( lc( $self->GetTempStr() ) ); |
|
787
|
|
|
|
|
|
|
|
|
788
|
|
|
|
|
|
|
# Append Article Data To Text Corpus |
|
789
|
|
|
|
|
|
|
$self->AppendStrToTextCorpus( $tempStr ); |
|
790
|
|
|
|
|
|
|
} |
|
791
|
|
|
|
|
|
|
elsif( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) |
|
792
|
|
|
|
|
|
|
{ |
|
793
|
|
|
|
|
|
|
# Append Article Data To Text Corpus |
|
794
|
|
|
|
|
|
|
$self->AppendStrToTextCorpus( $self->GetTempStr() ); |
|
795
|
|
|
|
|
|
|
} |
|
796
|
|
|
|
|
|
|
|
|
797
|
|
|
|
|
|
|
# Clear string placeholders |
|
798
|
|
|
|
|
|
|
$self->ClearTempStr(); |
|
799
|
|
|
|
|
|
|
|
|
800
|
|
|
|
|
|
|
# Free Memory |
|
801
|
|
|
|
|
|
|
$article->purge(); |
|
802
|
|
|
|
|
|
|
} |
|
803
|
|
|
|
|
|
|
|
|
804
|
|
|
|
|
|
|
sub _QuickParseOtherAbstract |
|
805
|
|
|
|
|
|
|
{ |
|
806
|
|
|
|
|
|
|
my ( $twigSelf, $abstractRoot, $self ) = @_; |
|
807
|
|
|
|
|
|
|
|
|
808
|
|
|
|
|
|
|
my @otherAbstractChildren = $abstractRoot->children(); |
|
809
|
|
|
|
|
|
|
|
|
810
|
|
|
|
|
|
|
for my $abstractChild ( @otherAbstractChildren ) |
|
811
|
|
|
|
|
|
|
{ |
|
812
|
|
|
|
|
|
|
if( $abstractChild->tag() eq "AbstractText" ) |
|
813
|
|
|
|
|
|
|
{ |
|
814
|
|
|
|
|
|
|
my $tempStr = Text::Unidecode::unidecode( $abstractChild->field() ); |
|
815
|
|
|
|
|
|
|
chomp( $tempStr ); |
|
816
|
|
|
|
|
|
|
|
|
817
|
|
|
|
|
|
|
# Store String |
|
818
|
|
|
|
|
|
|
$self->AppendToTempStr( $tempStr ) if ( $self->GetStoreAbstract() == 1 ); |
|
819
|
|
|
|
|
|
|
|
|
820
|
|
|
|
|
|
|
$self->WriteLog( "_QuickParseOtherAbstract - Tag: " . $abstractChild->tag() . ", Field: " . $tempStr ); |
|
821
|
|
|
|
|
|
|
} |
|
822
|
|
|
|
|
|
|
else |
|
823
|
|
|
|
|
|
|
{ |
|
824
|
|
|
|
|
|
|
$self->WriteLog( "_QuickParseOtherAbstract - (New Tag Found) - Tag: " . $abstractChild->tag() . ", Field: " . $abstractChild->field() ); |
|
825
|
|
|
|
|
|
|
} |
|
826
|
|
|
|
|
|
|
} |
|
827
|
|
|
|
|
|
|
|
|
828
|
|
|
|
|
|
|
# Compoundify String If Option Is Enabled |
|
829
|
|
|
|
|
|
|
if( $self->GetCompoundifyText() == 1 && ( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) ) |
|
830
|
|
|
|
|
|
|
{ |
|
831
|
|
|
|
|
|
|
my $tempStr = $self->CompoundifyString( lc( $self->GetTempStr() ) ); |
|
832
|
|
|
|
|
|
|
|
|
833
|
|
|
|
|
|
|
# Append Article Data To Text Corpus |
|
834
|
|
|
|
|
|
|
$self->AppendStrToTextCorpus( $tempStr ); |
|
835
|
|
|
|
|
|
|
} |
|
836
|
|
|
|
|
|
|
elsif( $self->IsDateInSpecifiedRange( $self->GetTempDate(), $self->GetBeginDate(), $self->GetEndDate() ) == 1 ) |
|
837
|
|
|
|
|
|
|
{ |
|
838
|
|
|
|
|
|
|
# Append Article Data To Text Corpus |
|
839
|
|
|
|
|
|
|
$self->AppendStrToTextCorpus( $self->GetTempStr() ); |
|
840
|
|
|
|
|
|
|
} |
|
841
|
|
|
|
|
|
|
|
|
842
|
|
|
|
|
|
|
# Clear string placeholders |
|
843
|
|
|
|
|
|
|
$self->ClearTempStr(); |
|
844
|
|
|
|
|
|
|
|
|
845
|
|
|
|
|
|
|
# Free Memory |
|
846
|
|
|
|
|
|
|
$abstractRoot->purge(); |
|
847
|
|
|
|
|
|
|
} |
|
848
|
|
|
|
|
|
|
|
|
849
|
|
|
|
|
|
|
sub CreateCompoundWordBST |
|
850
|
|
|
|
|
|
|
{ |
|
851
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
852
|
|
|
|
|
|
|
|
|
853
|
|
|
|
|
|
|
$self->WriteLog( "CreateCompoundWordBST - Creating Binary Search Tree From Compound Word Array" ); |
|
854
|
|
|
|
|
|
|
|
|
855
|
|
|
|
|
|
|
my $bst = $self->GetCompoundWordBST(); |
|
856
|
|
|
|
|
|
|
my @compoundWordAry = $self->GetCompoundWordAry(); |
|
857
|
|
|
|
|
|
|
my $arySize = @compoundWordAry; |
|
858
|
|
|
|
|
|
|
|
|
859
|
|
|
|
|
|
|
# Check(s) |
|
860
|
|
|
|
|
|
|
$self->WriteLog( "CreateCompoundWordBST - Error: Cannot Create BST / Compound Word Array Is Empty - Have You Read The Compound Word File To Memory?" ) if $arySize == 0; |
|
861
|
|
|
|
|
|
|
return -1 if $arySize == 0; |
|
862
|
|
|
|
|
|
|
|
|
863
|
|
|
|
|
|
|
my $rootNode = $bst->CreateBST( \@compoundWordAry, 0, $arySize - 1, undef ); |
|
864
|
|
|
|
|
|
|
$bst->SetRootNode( $rootNode ); |
|
865
|
|
|
|
|
|
|
|
|
866
|
|
|
|
|
|
|
# Clean-Up |
|
867
|
|
|
|
|
|
|
$self->ClearCompoundWordAry(); |
|
868
|
|
|
|
|
|
|
|
|
869
|
|
|
|
|
|
|
$self->WriteLog( "CreateCompoundWordBST - Compound Word Binary Search Tree Created" ); |
|
870
|
|
|
|
|
|
|
|
|
871
|
|
|
|
|
|
|
return 0; |
|
872
|
|
|
|
|
|
|
} |
|
873
|
|
|
|
|
|
|
|
|
874
|
|
|
|
|
|
|
sub CompoundifyString |
|
875
|
|
|
|
|
|
|
{ |
|
876
|
|
|
|
|
|
|
my ( $self, $str ) = @_; |
|
877
|
|
|
|
|
|
|
|
|
878
|
|
|
|
|
|
|
return "(null)" if !defined ( $str ); |
|
879
|
|
|
|
|
|
|
|
|
880
|
|
|
|
|
|
|
$self->WriteLog( "CompoundifyString - Compoundifying String - $str" ); |
|
881
|
|
|
|
|
|
|
|
|
882
|
|
|
|
|
|
|
my $bst = $self->GetCompoundWordBST(); |
|
883
|
|
|
|
|
|
|
|
|
884
|
|
|
|
|
|
|
my @strAry = split( ' ', $str ); |
|
885
|
|
|
|
|
|
|
$str = ""; |
|
886
|
|
|
|
|
|
|
|
|
887
|
|
|
|
|
|
|
my $arySize = @strAry; |
|
888
|
|
|
|
|
|
|
my $maxCompoundWordLength = $self->GetMaxCompoundWordLength(); |
|
889
|
|
|
|
|
|
|
|
|
890
|
|
|
|
|
|
|
for( my $i = 0; $i < @strAry; $i++ ) |
|
891
|
|
|
|
|
|
|
{ |
|
892
|
|
|
|
|
|
|
my $lastIndex = $i + $maxCompoundWordLength; |
|
893
|
|
|
|
|
|
|
$lastIndex = $arySize - 1 if ( $i + $maxCompoundWordLength > $arySize ); |
|
894
|
|
|
|
|
|
|
my @tempAry = @strAry[$i..$lastIndex]; |
|
895
|
|
|
|
|
|
|
|
|
896
|
|
|
|
|
|
|
my $node = $self->_CompoundifySearch( \@tempAry, undef, $strAry[$i], 0 ); |
|
897
|
|
|
|
|
|
|
undef( @tempAry ); |
|
898
|
|
|
|
|
|
|
|
|
899
|
|
|
|
|
|
|
# Compound Word(s) Found |
|
900
|
|
|
|
|
|
|
if( defined( $node ) ) |
|
901
|
|
|
|
|
|
|
{ |
|
902
|
|
|
|
|
|
|
# Split Compound Word Data And Set Next Index After Located Compound Word(s) |
|
903
|
|
|
|
|
|
|
my @nodeDataAry = split( ' ', $node->data ); |
|
904
|
|
|
|
|
|
|
$i += @nodeDataAry - 1; |
|
905
|
|
|
|
|
|
|
|
|
906
|
|
|
|
|
|
|
# Add Compound Words To The Return String |
|
907
|
|
|
|
|
|
|
$str .= join( '_', @nodeDataAry ) . " "; |
|
908
|
|
|
|
|
|
|
undef( @nodeDataAry ); |
|
909
|
|
|
|
|
|
|
|
|
910
|
|
|
|
|
|
|
# Increment Compound Word Counter |
|
911
|
|
|
|
|
|
|
$compoundWordCount++; |
|
912
|
|
|
|
|
|
|
} |
|
913
|
|
|
|
|
|
|
# No Compound Word(s) Found |
|
914
|
|
|
|
|
|
|
else |
|
915
|
|
|
|
|
|
|
{ |
|
916
|
|
|
|
|
|
|
# Add Single Word At Array Index To Return String |
|
917
|
|
|
|
|
|
|
$str .= $strAry[$i] . " "; |
|
918
|
|
|
|
|
|
|
} |
|
919
|
|
|
|
|
|
|
|
|
920
|
|
|
|
|
|
|
# Increment Word Counter |
|
921
|
|
|
|
|
|
|
$postCompWordCount++; |
|
922
|
|
|
|
|
|
|
|
|
923
|
|
|
|
|
|
|
# Debug Print Statements |
|
924
|
|
|
|
|
|
|
#$self->WriteLog( "Data: " . $node->data . " : Next Index: $i" ) if defined ( $node ); |
|
925
|
|
|
|
|
|
|
#$self->WriteLog( "Undefined : Index: $i" ) if !defined ( $node ); |
|
926
|
|
|
|
|
|
|
} |
|
927
|
|
|
|
|
|
|
|
|
928
|
|
|
|
|
|
|
$self->WriteLog( "CompoundifyString - Compounded String - $str" ); |
|
929
|
|
|
|
|
|
|
|
|
930
|
|
|
|
|
|
|
return $str; |
|
931
|
|
|
|
|
|
|
} |
|
932
|
|
|
|
|
|
|
|
|
933
|
|
|
|
|
|
|
sub _CompoundifySearch |
|
934
|
|
|
|
|
|
|
{ |
|
935
|
|
|
|
|
|
|
my ( $self, $strAryRef, $oldNode, $searchStr, $index ) = @_; |
|
936
|
|
|
|
|
|
|
|
|
937
|
|
|
|
|
|
|
# Checks(s) |
|
938
|
|
|
|
|
|
|
return undef if !defined ( $strAryRef ); |
|
939
|
|
|
|
|
|
|
return undef if !defined ( $searchStr ); |
|
940
|
|
|
|
|
|
|
return undef if !defined ( $index ); |
|
941
|
|
|
|
|
|
|
|
|
942
|
|
|
|
|
|
|
my @strAry = @{ $strAryRef }; |
|
943
|
|
|
|
|
|
|
my $arySize = @strAry; |
|
944
|
|
|
|
|
|
|
my $bst = $self->GetCompoundWordBST(); |
|
945
|
|
|
|
|
|
|
|
|
946
|
|
|
|
|
|
|
|
|
947
|
|
|
|
|
|
|
my $resultNode = $bst->BSTContainsSearch( $bst->GetRootNode(), $searchStr ); |
|
948
|
|
|
|
|
|
|
|
|
949
|
|
|
|
|
|
|
if( defined( $resultNode ) && $index < $arySize ) |
|
950
|
|
|
|
|
|
|
{ |
|
951
|
|
|
|
|
|
|
$index++; |
|
952
|
|
|
|
|
|
|
|
|
953
|
|
|
|
|
|
|
# Make Sure Returned Node Data Is Equal With Search String Or Return Old Node |
|
954
|
|
|
|
|
|
|
$resultNode = $bst->BSTExactSearch( $bst->GetRootNode(), $searchStr ); |
|
955
|
|
|
|
|
|
|
$resultNode = $oldNode if !defined( $resultNode ); |
|
956
|
|
|
|
|
|
|
|
|
957
|
|
|
|
|
|
|
$searchStr .= ( " " . $strAry[$index] ) if ( $index < $arySize ); |
|
958
|
|
|
|
|
|
|
return $self->_CompoundifySearch( $strAryRef, $resultNode, $searchStr, $index ) if ( $index < $arySize ); |
|
959
|
|
|
|
|
|
|
} |
|
960
|
|
|
|
|
|
|
|
|
961
|
|
|
|
|
|
|
# Post Check(s) |
|
962
|
|
|
|
|
|
|
$resultNode = undef if defined( $resultNode ) && ( $resultNode->data ne $searchStr ); |
|
963
|
|
|
|
|
|
|
|
|
964
|
|
|
|
|
|
|
if( defined( $oldNode ) ) |
|
965
|
|
|
|
|
|
|
{ |
|
966
|
|
|
|
|
|
|
my @searchStrAry = split( ' ', $searchStr ); |
|
967
|
|
|
|
|
|
|
my @nodeStrAry = split( ' ', $oldNode->data ); |
|
968
|
|
|
|
|
|
|
|
|
969
|
|
|
|
|
|
|
if( @searchStrAry > @nodeStrAry ) |
|
970
|
|
|
|
|
|
|
{ |
|
971
|
|
|
|
|
|
|
@searchStrAry = splice( @searchStrAry, 0, @nodeStrAry ); |
|
972
|
|
|
|
|
|
|
my $strA = join( ' ', @searchStrAry ); |
|
973
|
|
|
|
|
|
|
my $strB = join( ' ', @nodeStrAry ); |
|
974
|
|
|
|
|
|
|
$oldNode = undef if $strA ne $strB; |
|
975
|
|
|
|
|
|
|
} |
|
976
|
|
|
|
|
|
|
elsif( @searchStrAry == @nodeStrAry ) |
|
977
|
|
|
|
|
|
|
{ |
|
978
|
|
|
|
|
|
|
$oldNode = undef if $oldNode->data ne $searchStr; |
|
979
|
|
|
|
|
|
|
} |
|
980
|
|
|
|
|
|
|
else |
|
981
|
|
|
|
|
|
|
{ |
|
982
|
|
|
|
|
|
|
$oldNode = undef; |
|
983
|
|
|
|
|
|
|
} |
|
984
|
|
|
|
|
|
|
} |
|
985
|
|
|
|
|
|
|
|
|
986
|
|
|
|
|
|
|
|
|
987
|
|
|
|
|
|
|
|
|
988
|
|
|
|
|
|
|
|
|
989
|
|
|
|
|
|
|
# Bug Fix: If Search Word Found At First Array Index And Second Word Not Found. |
|
990
|
|
|
|
|
|
|
# Prevent Invalid Data From Being Returned. |
|
991
|
|
|
|
|
|
|
return undef if !defined( $resultNode ) && $index == 1; |
|
992
|
|
|
|
|
|
|
|
|
993
|
|
|
|
|
|
|
return $oldNode if !defined( $resultNode ); |
|
994
|
|
|
|
|
|
|
|
|
995
|
|
|
|
|
|
|
return $resultNode; |
|
996
|
|
|
|
|
|
|
} |
|
997
|
|
|
|
|
|
|
|
|
998
|
|
|
|
|
|
|
sub ReadCompoundWordDataFromFile |
|
999
|
|
|
|
|
|
|
{ |
|
1000
|
|
|
|
|
|
|
my ( $self, $fileDir, $autoSetMaxCompoundWordLength ) = @_; |
|
1001
|
|
|
|
|
|
|
|
|
1002
|
|
|
|
|
|
|
$self->WriteLog( "ReadCompoundWordDataFromFile - Error: Directory Not Defined" ) if !defined ( $fileDir ); |
|
1003
|
|
|
|
|
|
|
return -1 if !defined ( $fileDir ); |
|
1004
|
|
|
|
|
|
|
|
|
1005
|
|
|
|
|
|
|
$self->WriteLog( "ReadCompoundWordDataFromFile - Error: Directory/File Does Not Exist" ) if !( -e "$fileDir" ); |
|
1006
|
|
|
|
|
|
|
return -1 if !( -e "$fileDir" ); |
|
1007
|
|
|
|
|
|
|
|
|
1008
|
|
|
|
|
|
|
$self->WriteLog( "ReadCompoundWordDataFromFile - Reading Compound Word File: \"$fileDir\"" ); |
|
1009
|
|
|
|
|
|
|
|
|
1010
|
|
|
|
|
|
|
my @dataAry = (); |
|
1011
|
|
|
|
|
|
|
|
|
1012
|
|
|
|
|
|
|
# Read XML Data From File To Memory |
|
1013
|
|
|
|
|
|
|
open( my $fileHandle, '<:encoding(UTF-8)', "$fileDir" ); |
|
1014
|
|
|
|
|
|
|
|
|
1015
|
|
|
|
|
|
|
# Prepare Max Compound Word Length |
|
1016
|
|
|
|
|
|
|
$self->SetMaxCompoundWordLength( 0 ) if defined ( $autoSetMaxCompoundWordLength ); |
|
1017
|
|
|
|
|
|
|
|
|
1018
|
|
|
|
|
|
|
while( my $row = <$fileHandle> ) |
|
1019
|
|
|
|
|
|
|
{ |
|
1020
|
|
|
|
|
|
|
chomp( $row ); |
|
1021
|
|
|
|
|
|
|
$row = $self->RemoveSpecialCharactersFromString( $row ); |
|
1022
|
|
|
|
|
|
|
push( @dataAry, $row ); |
|
1023
|
|
|
|
|
|
|
|
|
1024
|
|
|
|
|
|
|
# Find Max Compound Word Length |
|
1025
|
|
|
|
|
|
|
my @words = split( ' ', $row ); |
|
1026
|
|
|
|
|
|
|
my $size = @words; |
|
1027
|
|
|
|
|
|
|
undef( @words ); |
|
1028
|
|
|
|
|
|
|
$self->SetMaxCompoundWordLength( $size ) if defined( $autoSetMaxCompoundWordLength ) && ( $self->GetMaxCompoundWordLength() < $size ); |
|
1029
|
|
|
|
|
|
|
} |
|
1030
|
|
|
|
|
|
|
|
|
1031
|
|
|
|
|
|
|
close( $fileHandle ); |
|
1032
|
|
|
|
|
|
|
|
|
1033
|
|
|
|
|
|
|
$self->WriteLog( "ReadCompoundWordDataFromFile - Error: Compound Word Length > 100" ) if ( $self->GetMaxCompoundWordLength() > 100 ); |
|
1034
|
|
|
|
|
|
|
return -1 if ( $self->GetMaxCompoundWordLength() > 100 ); |
|
1035
|
|
|
|
|
|
|
|
|
1036
|
|
|
|
|
|
|
$self->WriteLog( "ReadCompoundWordDataFromFile - Auto Set Max Compound Word Length To \"" . $self->GetMaxCompoundWordLength() . "\"") if defined ( $autoSetMaxCompoundWordLength ); |
|
1037
|
|
|
|
|
|
|
$self->WriteLog( "ReadCompoundWordDataFromFile - Reading Complete" ); |
|
1038
|
|
|
|
|
|
|
$self->WriteLog( "ReadCompoundWordDataFromFile - Sorting Compound Word List" ); |
|
1039
|
|
|
|
|
|
|
|
|
1040
|
|
|
|
|
|
|
@dataAry = sort( @dataAry ); |
|
1041
|
|
|
|
|
|
|
$self->SetCompoundWordAry( \@dataAry ); |
|
1042
|
|
|
|
|
|
|
|
|
1043
|
|
|
|
|
|
|
$self->WriteLog( "ReadCompoundWordDataFromFile - Stored " . @dataAry . " Compound Words In Memory" ) if ( @dataAry > 0 ); |
|
1044
|
|
|
|
|
|
|
$self->WriteLog( "ReadCompoundWordDataFromFile - Detected Compound Word Array Data / Auto-Setting Compoundify Text = 1" ) if @dataAry > 0; |
|
1045
|
|
|
|
|
|
|
$self->SetCompoundifyText( 1 ) if ( @dataAry > 0 ); |
|
1046
|
|
|
|
|
|
|
|
|
1047
|
|
|
|
|
|
|
$self->WriteLog( "ReadCompoundwordDataFromFile - No Compound Word Array Data Detected / Auto-Setting Compoundify Text = 0" ) if @dataAry == 0; |
|
1048
|
|
|
|
|
|
|
$self->SetCompoundifyText( 0 ) if ( @dataAry == 0 ); |
|
1049
|
|
|
|
|
|
|
|
|
1050
|
|
|
|
|
|
|
$self->WriteLog( "ReadCompoundWordDataFromFile - Sorting Complete" ); |
|
1051
|
|
|
|
|
|
|
|
|
1052
|
|
|
|
|
|
|
return 0; |
|
1053
|
|
|
|
|
|
|
} |
|
1054
|
|
|
|
|
|
|
|
|
1055
|
|
|
|
|
|
|
sub SaveCompoundWordListToFile |
|
1056
|
|
|
|
|
|
|
{ |
|
1057
|
|
|
|
|
|
|
my ( $self, $savePath ) = @_; |
|
1058
|
|
|
|
|
|
|
|
|
1059
|
|
|
|
|
|
|
$self->WriteLog( "SaveCompoundWordListToFile - Error: Save Path Not Specified" ) if !defined( $savePath ); |
|
1060
|
|
|
|
|
|
|
return -1 if !defined( $savePath ); |
|
1061
|
|
|
|
|
|
|
|
|
1062
|
|
|
|
|
|
|
$self->WriteLog( "SaveCompoundWordListToFile - Saving Compound Word List To \"$savePath\"" ); |
|
1063
|
|
|
|
|
|
|
|
|
1064
|
|
|
|
|
|
|
# Create File Handle |
|
1065
|
|
|
|
|
|
|
open( my $fileHandle, '>:encoding(UTF-8)', "$savePath" ); |
|
1066
|
|
|
|
|
|
|
|
|
1067
|
|
|
|
|
|
|
# Write Data To File |
|
1068
|
|
|
|
|
|
|
for my $compoundWord ( $self->GetCompoundWordAry() ) |
|
1069
|
|
|
|
|
|
|
{ |
|
1070
|
|
|
|
|
|
|
print( $fileHandle "$compoundWord\n" ); |
|
1071
|
|
|
|
|
|
|
} |
|
1072
|
|
|
|
|
|
|
|
|
1073
|
|
|
|
|
|
|
close( $fileHandle ); |
|
1074
|
|
|
|
|
|
|
undef( $fileHandle ); |
|
1075
|
|
|
|
|
|
|
|
|
1076
|
|
|
|
|
|
|
$self->WriteLog( "SaveCompoundWordListToFile - Compound Word List Saved To \"$savePath\"" ); |
|
1077
|
|
|
|
|
|
|
|
|
1078
|
|
|
|
|
|
|
return 0; |
|
1079
|
|
|
|
|
|
|
} |
|
1080
|
|
|
|
|
|
|
|
|
1081
|
|
|
|
|
|
|
sub ReadTextFromFile |
|
1082
|
|
|
|
|
|
|
{ |
|
1083
|
|
|
|
|
|
|
my ( $self, $fileDir ) = @_; |
|
1084
|
|
|
|
|
|
|
|
|
1085
|
|
|
|
|
|
|
$self->WriteLog( "ReadTextFromFile - Error: Directory Not Defined" ) if !defined ( $fileDir ); |
|
1086
|
|
|
|
|
|
|
return "(null)" if !defined ( $fileDir ); |
|
1087
|
|
|
|
|
|
|
|
|
1088
|
|
|
|
|
|
|
$self->WriteLog( "ReadTextFromFile - Error: Directory/File Does Not Exist" ) if !( -e "$fileDir" ); |
|
1089
|
|
|
|
|
|
|
return "(null)" if !( -e "$fileDir" ); |
|
1090
|
|
|
|
|
|
|
|
|
1091
|
|
|
|
|
|
|
my $str = ""; |
|
1092
|
|
|
|
|
|
|
|
|
1093
|
|
|
|
|
|
|
# Read XML Data From File To Memory |
|
1094
|
|
|
|
|
|
|
open( my $fileHandle, '<:encoding(UTF-8)', "$fileDir" ); |
|
1095
|
|
|
|
|
|
|
|
|
1096
|
|
|
|
|
|
|
while( my $row = <$fileHandle> ) |
|
1097
|
|
|
|
|
|
|
{ |
|
1098
|
|
|
|
|
|
|
chomp $row; |
|
1099
|
|
|
|
|
|
|
$str .= " $row"; |
|
1100
|
|
|
|
|
|
|
} |
|
1101
|
|
|
|
|
|
|
|
|
1102
|
|
|
|
|
|
|
close( $fileHandle ); |
|
1103
|
|
|
|
|
|
|
|
|
1104
|
|
|
|
|
|
|
$self->WriteLog( "ReadTextFromFile - Reading Complete" ); |
|
1105
|
|
|
|
|
|
|
|
|
1106
|
|
|
|
|
|
|
return $str; |
|
1107
|
|
|
|
|
|
|
} |
|
1108
|
|
|
|
|
|
|
|
|
1109
|
|
|
|
|
|
|
sub SaveTextToFile |
|
1110
|
|
|
|
|
|
|
{ |
|
1111
|
|
|
|
|
|
|
my ( $self, $savePath, $str ) = @_; |
|
1112
|
|
|
|
|
|
|
|
|
1113
|
|
|
|
|
|
|
$self->WriteLog( "SaveTextToFile - Error: No Save Path Specified" ) if !defined( $savePath ); |
|
1114
|
|
|
|
|
|
|
return -1 if !defined( $savePath ); |
|
1115
|
|
|
|
|
|
|
|
|
1116
|
|
|
|
|
|
|
$self->WriteLog( "SaveTextToFile - Saving Data To \"$savePath\"" ); |
|
1117
|
|
|
|
|
|
|
|
|
1118
|
|
|
|
|
|
|
# Create file handle |
|
1119
|
|
|
|
|
|
|
my $fileHandle = undef; |
|
1120
|
|
|
|
|
|
|
|
|
1121
|
|
|
|
|
|
|
# Over write file if $appendToFile == 0 |
|
1122
|
|
|
|
|
|
|
open( $fileHandle, '>:encoding(UTF-8)', "$savePath" ); |
|
1123
|
|
|
|
|
|
|
|
|
1124
|
|
|
|
|
|
|
# Write Data To File |
|
1125
|
|
|
|
|
|
|
print( $fileHandle "$str" ); |
|
1126
|
|
|
|
|
|
|
|
|
1127
|
|
|
|
|
|
|
close( $fileHandle ); |
|
1128
|
|
|
|
|
|
|
undef( $fileHandle ); |
|
1129
|
|
|
|
|
|
|
|
|
1130
|
|
|
|
|
|
|
$self->WriteLog( "SaveTextToFile - File Saved To \"$savePath\"" ); |
|
1131
|
|
|
|
|
|
|
|
|
1132
|
|
|
|
|
|
|
return 0; |
|
1133
|
|
|
|
|
|
|
} |
|
1134
|
|
|
|
|
|
|
|
|
1135
|
|
|
|
|
|
|
sub _ReadXMLDataFromFile |
|
1136
|
|
|
|
|
|
|
{ |
|
1137
|
|
|
|
|
|
|
my ( $self, $fileDir ) = @_; |
|
1138
|
|
|
|
|
|
|
|
|
1139
|
|
|
|
|
|
|
$self->WriteLog( "_ReadXMLDataFromFile - Error: Directory Not Defined" ) if !defined ( $fileDir ); |
|
1140
|
|
|
|
|
|
|
return "(null)" if !defined ( $fileDir ); |
|
1141
|
|
|
|
|
|
|
|
|
1142
|
|
|
|
|
|
|
$self->WriteLog( "_ReadXMLDataFromFile - Error: Directory/File Does Not Exist" ) if !( -e "$fileDir" ); |
|
1143
|
|
|
|
|
|
|
return "(null)" if !( -e "$fileDir" ); |
|
1144
|
|
|
|
|
|
|
|
|
1145
|
|
|
|
|
|
|
my $data = ""; |
|
1146
|
|
|
|
|
|
|
|
|
1147
|
|
|
|
|
|
|
# Extract XML File From GZip To Memory |
|
1148
|
|
|
|
|
|
|
if ( index( $fileDir, ".gz" ) != -1 ) |
|
1149
|
|
|
|
|
|
|
{ |
|
1150
|
|
|
|
|
|
|
IO::Uncompress::Gunzip::gunzip "$fileDir" => \$data or die "gunzip failed\n"; |
|
1151
|
|
|
|
|
|
|
} |
|
1152
|
|
|
|
|
|
|
# Read XML Data From File To Memory |
|
1153
|
|
|
|
|
|
|
else |
|
1154
|
|
|
|
|
|
|
{ |
|
1155
|
|
|
|
|
|
|
open( my $fileHandle, '<:encoding(UTF-8)', "$fileDir" ); |
|
1156
|
|
|
|
|
|
|
|
|
1157
|
|
|
|
|
|
|
while( my $row = <$fileHandle> ) |
|
1158
|
|
|
|
|
|
|
{ |
|
1159
|
|
|
|
|
|
|
chomp $row; |
|
1160
|
|
|
|
|
|
|
$data .= "$row\n"; |
|
1161
|
|
|
|
|
|
|
} |
|
1162
|
|
|
|
|
|
|
|
|
1163
|
|
|
|
|
|
|
close( $fileHandle ); |
|
1164
|
|
|
|
|
|
|
} |
|
1165
|
|
|
|
|
|
|
|
|
1166
|
|
|
|
|
|
|
$self->WriteLog( "_ReadXMLDataFromFile - Reading Data Complete/Data Stored" ); |
|
1167
|
|
|
|
|
|
|
|
|
1168
|
|
|
|
|
|
|
return $data; |
|
1169
|
|
|
|
|
|
|
} |
|
1170
|
|
|
|
|
|
|
|
|
1171
|
|
|
|
|
|
|
sub _SaveTextCorpusToFile |
|
1172
|
|
|
|
|
|
|
{ |
|
1173
|
|
|
|
|
|
|
my ( $self, $savePath, $appendToFile ) = @_; |
|
1174
|
|
|
|
|
|
|
|
|
1175
|
|
|
|
|
|
|
# Prevent Other Threads From Writing At The Same Time |
|
1176
|
|
|
|
|
|
|
{ |
|
1177
|
|
|
|
|
|
|
lock( $writeLock ); |
|
1178
|
|
|
|
|
|
|
|
|
1179
|
|
|
|
|
|
|
$self->WriteLog( "_SaveTextCorpusToFile - Error: No Save Path Specified" ) if !defined( $savePath ); |
|
1180
|
|
|
|
|
|
|
return -1 if !defined( $savePath ); |
|
1181
|
|
|
|
|
|
|
|
|
1182
|
|
|
|
|
|
|
$appendToFile = $self->GetOverwriteExitingFile() if !defined ( $appendToFile ); |
|
1183
|
|
|
|
|
|
|
|
|
1184
|
|
|
|
|
|
|
$self->WriteLog( "_SaveTextCorpusToFile - Saving Text Corpus To \"$savePath\"" ); |
|
1185
|
|
|
|
|
|
|
|
|
1186
|
|
|
|
|
|
|
# Create file handle |
|
1187
|
|
|
|
|
|
|
my $fileHandle = undef; |
|
1188
|
|
|
|
|
|
|
|
|
1189
|
|
|
|
|
|
|
# Over write file if $appendToFile == 0 |
|
1190
|
|
|
|
|
|
|
open( $fileHandle, '>:encoding(UTF-8)', "$savePath" ) if $appendToFile == 0; |
|
1191
|
|
|
|
|
|
|
|
|
1192
|
|
|
|
|
|
|
# Append to file if $appendToFile == 1 |
|
1193
|
|
|
|
|
|
|
open( $fileHandle, '>>:encoding(UTF-8)', "$savePath" ) if $appendToFile == 1; |
|
1194
|
|
|
|
|
|
|
|
|
1195
|
|
|
|
|
|
|
# Write Data To File |
|
1196
|
|
|
|
|
|
|
print( $fileHandle $self->GetTextCorpusStr() ); |
|
1197
|
|
|
|
|
|
|
|
|
1198
|
|
|
|
|
|
|
close( $fileHandle ); |
|
1199
|
|
|
|
|
|
|
undef( $fileHandle ); |
|
1200
|
|
|
|
|
|
|
|
|
1201
|
|
|
|
|
|
|
$self->WriteLog( "_SaveTextCorpusToFile - Text Corpus Saved To \"$savePath\"" ); |
|
1202
|
|
|
|
|
|
|
} |
|
1203
|
|
|
|
|
|
|
|
|
1204
|
|
|
|
|
|
|
return 1; |
|
1205
|
|
|
|
|
|
|
} |
|
1206
|
|
|
|
|
|
|
|
|
1207
|
|
|
|
|
|
|
sub IsDateInSpecifiedRange |
|
1208
|
|
|
|
|
|
|
{ |
|
1209
|
|
|
|
|
|
|
my ( $self, $date, $beginDate, $endDate ) = @_; |
|
1210
|
|
|
|
|
|
|
|
|
1211
|
|
|
|
|
|
|
$self->WriteLog( "Error: Date Not Specified To Check Against Date Range" ) if !defined ( $date ); |
|
1212
|
|
|
|
|
|
|
return 0 if !defined ( $date ); |
|
1213
|
|
|
|
|
|
|
|
|
1214
|
|
|
|
|
|
|
$self->WriteLog( "Warning - BeginDate Parameter Not Specified - Using Default Value: " . $self->GetBeginDate() ) if !defined ( $beginDate ); |
|
1215
|
|
|
|
|
|
|
$self->WriteLog( "Warning - EndDate Parameter Not Specified - Using Default Value: " . $self->GetEndDate() ) if !defined ( $endDate ); |
|
1216
|
|
|
|
|
|
|
$beginDate = $self->GetBeginDate() if !defined ( $beginDate ); |
|
1217
|
|
|
|
|
|
|
$endDate = $self->GetEndDate() if !defined ( $endDate ); |
|
1218
|
|
|
|
|
|
|
|
|
1219
|
|
|
|
|
|
|
my @dateAry = split( '/', $date ); |
|
1220
|
|
|
|
|
|
|
my @beginDateAry = split( '/', $beginDate ); |
|
1221
|
|
|
|
|
|
|
my @endDateAry = split( '/', $endDate ); |
|
1222
|
|
|
|
|
|
|
|
|
1223
|
|
|
|
|
|
|
# Check(s) |
|
1224
|
|
|
|
|
|
|
if( @dateAry != 3 ) |
|
1225
|
|
|
|
|
|
|
{ |
|
1226
|
|
|
|
|
|
|
$self->WriteLog( "Invalid Date Format - Requested Format: Month/Day/Year : Specified Format - $date" ); |
|
1227
|
|
|
|
|
|
|
return 0; |
|
1228
|
|
|
|
|
|
|
} |
|
1229
|
|
|
|
|
|
|
elsif( @beginDateAry != 3 ) |
|
1230
|
|
|
|
|
|
|
{ |
|
1231
|
|
|
|
|
|
|
$self->WriteLog( "Invalid Date Format - Requested Format: Month/Day/Year : Specified Format - $beginDate" ); |
|
1232
|
|
|
|
|
|
|
return 0; |
|
1233
|
|
|
|
|
|
|
} |
|
1234
|
|
|
|
|
|
|
elsif( @endDateAry != 3 ) |
|
1235
|
|
|
|
|
|
|
{ |
|
1236
|
|
|
|
|
|
|
$self->WriteLog( "Invalid Date Format - Requested Format: Month/Day/Year : Specified Format - $endDate" ); |
|
1237
|
|
|
|
|
|
|
return 0; |
|
1238
|
|
|
|
|
|
|
} |
|
1239
|
|
|
|
|
|
|
|
|
1240
|
|
|
|
|
|
|
# Begin Date Comparison |
|
1241
|
|
|
|
|
|
|
my $dateYear = $dateAry[2]; |
|
1242
|
|
|
|
|
|
|
my $beginYear = $beginDateAry[2]; |
|
1243
|
|
|
|
|
|
|
my $endYear = $endDateAry[2]; |
|
1244
|
|
|
|
|
|
|
|
|
1245
|
|
|
|
|
|
|
my $dateMonth = $dateAry[0]; |
|
1246
|
|
|
|
|
|
|
my $beginMonth = $beginDateAry[0]; |
|
1247
|
|
|
|
|
|
|
my $endMonth = $endDateAry[0]; |
|
1248
|
|
|
|
|
|
|
|
|
1249
|
|
|
|
|
|
|
my $dateDay = $dateAry[1]; |
|
1250
|
|
|
|
|
|
|
my $beginDay = $beginDateAry[1]; |
|
1251
|
|
|
|
|
|
|
my $endDay = $endDateAry[1]; |
|
1252
|
|
|
|
|
|
|
|
|
1253
|
|
|
|
|
|
|
# Check(s) |
|
1254
|
|
|
|
|
|
|
return 0 if ( $dateYear < 0 || $beginYear < 0 || $endYear < 0 || |
|
1255
|
|
|
|
|
|
|
$dateMonth < 0 || $beginMonth < 0 || $endMonth < 0 || |
|
1256
|
|
|
|
|
|
|
$dateDay < 0 || $beginDay < 0 || $endDay < 0 ); |
|
1257
|
|
|
|
|
|
|
|
|
1258
|
|
|
|
|
|
|
return 0 if ( $dateYear < $beginYear || $dateYear > $endYear ); |
|
1259
|
|
|
|
|
|
|
return 0 if ( ( $dateYear == $beginYear && $dateMonth < $beginMonth ) || ( $dateYear == $endYear && $dateMonth > $endMonth ) ); |
|
1260
|
|
|
|
|
|
|
return 0 if ( ( $dateYear == $beginYear && $dateMonth == $beginMonth && $dateDay < $beginDay ) |
|
1261
|
|
|
|
|
|
|
|| ( $dateYear == $endYear && $dateMonth == $endMonth && $dateDay > $endDay ) ); |
|
1262
|
|
|
|
|
|
|
|
|
1263
|
|
|
|
|
|
|
return 1; |
|
1264
|
|
|
|
|
|
|
} |
|
1265
|
|
|
|
|
|
|
|
|
1266
|
|
|
|
|
|
|
sub IsFileOrDirectory |
|
1267
|
|
|
|
|
|
|
{ |
|
1268
|
|
|
|
|
|
|
my ( $self, $path ) = @_; |
|
1269
|
|
|
|
|
|
|
|
|
1270
|
|
|
|
|
|
|
# Check(s) |
|
1271
|
|
|
|
|
|
|
return "unknown" if !defined( $path ); |
|
1272
|
|
|
|
|
|
|
return "unknown" if !( -e $path ); |
|
1273
|
|
|
|
|
|
|
|
|
1274
|
|
|
|
|
|
|
return "file" if ( -f $path ); |
|
1275
|
|
|
|
|
|
|
return "dir" if ( -d $path ); |
|
1276
|
|
|
|
|
|
|
} |
|
1277
|
|
|
|
|
|
|
|
|
1278
|
|
|
|
|
|
|
sub RemoveSpecialCharactersFromString |
|
1279
|
|
|
|
|
|
|
{ |
|
1280
|
|
|
|
|
|
|
my ( $self, $str ) = @_; |
|
1281
|
|
|
|
|
|
|
$str = lc( $str ); # Convert all characters to lowercase |
|
1282
|
|
|
|
|
|
|
$str =~ s/ +/ /g; # Remove duplicate white spaces between words |
|
1283
|
|
|
|
|
|
|
$str =~ s/'s//g; # Remove "'s" characters (Apostrophe 's') |
|
1284
|
|
|
|
|
|
|
$str =~ s/-/ /g; # Replace all hyphen characters to spaces |
|
1285
|
|
|
|
|
|
|
$str =~ tr/a-z\015\012/ /cs; # Remove all characters except 'a' to 'z' and new-line characters |
|
1286
|
|
|
|
|
|
|
#$str =~ s/[\$#@~!&*()\[\];.,:?^\-'`\\\/]+//g; # Does not include numeric characters |
|
1287
|
|
|
|
|
|
|
|
|
1288
|
|
|
|
|
|
|
# Convert String Line Ending Suitable To The Target |
|
1289
|
|
|
|
|
|
|
my $lineEnding = ""; |
|
1290
|
|
|
|
|
|
|
my $os = $self->GetOSType(); |
|
1291
|
|
|
|
|
|
|
|
|
1292
|
|
|
|
|
|
|
$lineEnding = "\015\012" if ( $os eq "MSWin32" ); |
|
1293
|
|
|
|
|
|
|
$lineEnding = "\012" if ( $os eq "linux" ); |
|
1294
|
|
|
|
|
|
|
$lineEnding = "\015" if ( $os eq "MacOS" ); |
|
1295
|
|
|
|
|
|
|
|
|
1296
|
|
|
|
|
|
|
$str =~ s/(\015\012|\012|\015)/$lineEnding/g; |
|
1297
|
|
|
|
|
|
|
|
|
1298
|
|
|
|
|
|
|
# Removes Spaces At Both Ends Of String And More Than Once Space In-Between Ends |
|
1299
|
|
|
|
|
|
|
$str =~ s/^\s+|\s(?=\s)|\s+$//g; |
|
1300
|
|
|
|
|
|
|
|
|
1301
|
|
|
|
|
|
|
return $str; |
|
1302
|
|
|
|
|
|
|
} |
|
1303
|
|
|
|
|
|
|
|
|
1304
|
|
|
|
|
|
|
sub GetFileType |
|
1305
|
|
|
|
|
|
|
{ |
|
1306
|
|
|
|
|
|
|
my ( $self, $filePath ) = @_; |
|
1307
|
|
|
|
|
|
|
|
|
1308
|
|
|
|
|
|
|
my $ft = File::Type->new(); |
|
1309
|
|
|
|
|
|
|
my $fileType = $ft->checktype_filename( $filePath ); |
|
1310
|
|
|
|
|
|
|
undef( $ft ); |
|
1311
|
|
|
|
|
|
|
|
|
1312
|
|
|
|
|
|
|
return $fileType; |
|
1313
|
|
|
|
|
|
|
} |
|
1314
|
|
|
|
|
|
|
|
|
1315
|
|
|
|
|
|
|
sub _DateCheck |
|
1316
|
|
|
|
|
|
|
{ |
|
1317
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1318
|
|
|
|
|
|
|
|
|
1319
|
|
|
|
|
|
|
my $beginDate = $self->GetBeginDate(); |
|
1320
|
|
|
|
|
|
|
my $endDate = $self->GetEndDate(); |
|
1321
|
|
|
|
|
|
|
|
|
1322
|
|
|
|
|
|
|
# Check(s) |
|
1323
|
|
|
|
|
|
|
$self->WriteLog( "_DateCheck - Error: Begin Date Node Defined" ) if !defined( $beginDate ); |
|
1324
|
|
|
|
|
|
|
return -1 if !defined( $beginDate ); |
|
1325
|
|
|
|
|
|
|
|
|
1326
|
|
|
|
|
|
|
$self->Writelog( "_DateCheck - Error: End Date Not Defined" ) if !defined( $endDate ); |
|
1327
|
|
|
|
|
|
|
return -1 if !defined( $endDate ); |
|
1328
|
|
|
|
|
|
|
|
|
1329
|
|
|
|
|
|
|
# Parse Begin Date |
|
1330
|
|
|
|
|
|
|
my $delimiter = ""; |
|
1331
|
|
|
|
|
|
|
$delimiter = "-" if index( $beginDate, "-" ) != -1; |
|
1332
|
|
|
|
|
|
|
$delimiter = "/" if index( $beginDate, "/" ) != -1; |
|
1333
|
|
|
|
|
|
|
|
|
1334
|
|
|
|
|
|
|
$self->WriteLog( "_DateCheck - Error: Begin Date Improper Format" ) if ( $delimiter eq "" ); |
|
1335
|
|
|
|
|
|
|
return -1 if ( $delimiter eq "" ); |
|
1336
|
|
|
|
|
|
|
|
|
1337
|
|
|
|
|
|
|
my @bDateAry = split( $delimiter, $beginDate ); |
|
1338
|
|
|
|
|
|
|
|
|
1339
|
|
|
|
|
|
|
# Check For Default Begin Date And Adjust Accordingly |
|
1340
|
|
|
|
|
|
|
if( $bDateAry[0] == 0 && $bDateAry[1] == 0 && $bDateAry[2] == 0000 ) |
|
1341
|
|
|
|
|
|
|
{ |
|
1342
|
|
|
|
|
|
|
$bDateAry[0] = 01; |
|
1343
|
|
|
|
|
|
|
$bDateAry[1] = 01; |
|
1344
|
|
|
|
|
|
|
$bDateAry[2] = 0000; |
|
1345
|
|
|
|
|
|
|
} |
|
1346
|
|
|
|
|
|
|
|
|
1347
|
|
|
|
|
|
|
# Set Date In Proper Format |
|
1348
|
|
|
|
|
|
|
$beginDate = join( '/', @bDateAry ) if ( $delimiter eq "-" ); |
|
1349
|
|
|
|
|
|
|
$self->SetBeginDate( $beginDate ) if ( $delimiter eq "-" ); |
|
1350
|
|
|
|
|
|
|
|
|
1351
|
|
|
|
|
|
|
# Parse End Date |
|
1352
|
|
|
|
|
|
|
$delimiter = ""; |
|
1353
|
|
|
|
|
|
|
$delimiter = "-" if index( $endDate, "-" ) != -1; |
|
1354
|
|
|
|
|
|
|
$delimiter = "/" if index( $endDate, "/" ) != -1; |
|
1355
|
|
|
|
|
|
|
|
|
1356
|
|
|
|
|
|
|
$self->WriteLog( "_DateCheck - Error: End Date Improper Format" ) if ( $delimiter eq "" ); |
|
1357
|
|
|
|
|
|
|
return -1 if ( $delimiter eq "" ); |
|
1358
|
|
|
|
|
|
|
|
|
1359
|
|
|
|
|
|
|
my @eDateAry = split( $delimiter, $endDate ); |
|
1360
|
|
|
|
|
|
|
|
|
1361
|
|
|
|
|
|
|
# Check For Default End Date And Adjust Accordingly |
|
1362
|
|
|
|
|
|
|
if( $eDateAry[0] == 99 && $eDateAry[1] == 99 && $eDateAry[2] == 9999 ) |
|
1363
|
|
|
|
|
|
|
{ |
|
1364
|
|
|
|
|
|
|
$eDateAry[0] = 12; |
|
1365
|
|
|
|
|
|
|
$eDateAry[1] = 31; |
|
1366
|
|
|
|
|
|
|
$eDateAry[2] = 9999; |
|
1367
|
|
|
|
|
|
|
} |
|
1368
|
|
|
|
|
|
|
|
|
1369
|
|
|
|
|
|
|
# Set Date In Proper Format |
|
1370
|
|
|
|
|
|
|
$endDate = join( '/', @eDateAry ) if ( $delimiter eq "-" ); |
|
1371
|
|
|
|
|
|
|
$self->SetEndDate( $endDate ) if ( $delimiter eq "-" ); |
|
1372
|
|
|
|
|
|
|
|
|
1373
|
|
|
|
|
|
|
# Basic Checks |
|
1374
|
|
|
|
|
|
|
$self->WriteLog( "_DateCheck - Error: Begin Date Not Specified In \"Month/Day/Year\" or \"Month-Day-Year\" Format" ) if ( @bDateAry != 3 ); |
|
1375
|
|
|
|
|
|
|
$self->WriteLog( "_DateCheck - Error: End Date Not Specified In \"Month/Day/Year\" or \"Month-Day-Year\" Format" ) if ( @eDateAry != 3 ); |
|
1376
|
|
|
|
|
|
|
return -1 if ( @bDateAry != 3 ) || ( @eDateAry != 3 ); |
|
1377
|
|
|
|
|
|
|
|
|
1378
|
|
|
|
|
|
|
$self->WriteLog( "_DateCheck - Error: Incorrect Begin Date Month Value - Expected Value: 1-12 / Specified Value: " . $bDateAry[0] ) if ( $bDateAry[0] < 1 || $bDateAry[0] > 12 ); |
|
1379
|
|
|
|
|
|
|
$self->WriteLog( "_DateCheck - Error: Incorrect End Date Month Value - Expected Value: 1-12 / Specified Value: " . $eDateAry[0] ) if ( $eDateAry[0] < 1 || $eDateAry[0] > 12 ); |
|
1380
|
|
|
|
|
|
|
return -1 if ( $bDateAry[0] < 1 || $bDateAry[0] > 12 ) || ( $eDateAry[0] < 1 || $eDateAry[0] > 12 ); |
|
1381
|
|
|
|
|
|
|
|
|
1382
|
|
|
|
|
|
|
$self->WriteLog( "_DateCheck - Error: Incorrect Begin Date Day Value - Expected Value: 1-31 / Specified Value: " . $bDateAry[1] ) if ( $bDateAry[1] < 1 || $bDateAry[1] > 31 ); |
|
1383
|
|
|
|
|
|
|
$self->WriteLog( "_DateCheck - Error: Incorrect End Date Day Value - Expected Value: 1-31 / Specified Value: " . $eDateAry[1] ) if ( $eDateAry[1] < 1 || $eDateAry[1] > 31 ); |
|
1384
|
|
|
|
|
|
|
return -1 if ( $bDateAry[1] < 1 || $bDateAry[1] > 31 ) || ( $eDateAry[1] < 1 || $eDateAry[1] > 31 ); |
|
1385
|
|
|
|
|
|
|
|
|
1386
|
|
|
|
|
|
|
$self->WriteLog( "_DateCheck - Error: Incorrect Begin Date Year Value - Expected Value: 0-9999 / Specified Value: " . $bDateAry[2] ) if ( $bDateAry[2] < 0 || $bDateAry[2] > 9999 ); |
|
1387
|
|
|
|
|
|
|
$self->WriteLog( "_DateCheck - Error: Incorrect End Date Year Value - Expected Value: 0-9999 / Specified Value: " . $eDateAry[2] ) if ( $eDateAry[2] < 0 || $eDateAry[2] > 9999 ); |
|
1388
|
|
|
|
|
|
|
return -1 if ( $bDateAry[2] < 0 || $bDateAry[2] > 9999 ) || ( $eDateAry[2] < 0 || $eDateAry[2] > 9999 ); |
|
1389
|
|
|
|
|
|
|
|
|
1390
|
|
|
|
|
|
|
# Advanced Checks |
|
1391
|
|
|
|
|
|
|
$self->WriteLog( "_DateCheck - Error: Begin Date Year > End Date Year" ) if ( $bDateAry[2] > $eDateAry[2] ); |
|
1392
|
|
|
|
|
|
|
return -1 if ( $bDateAry[2] > $eDateAry[2] ); |
|
1393
|
|
|
|
|
|
|
|
|
1394
|
|
|
|
|
|
|
$self->WriteLog( "_DateCheck - Error: Years Equal, Begin Date Month > End Date Month" ) if ( $bDateAry[2] == $eDateAry[2] && $bDateAry[0] > $eDateAry[0] ); |
|
1395
|
|
|
|
|
|
|
return -1 if ( $bDateAry[2] == $eDateAry[2] && $bDateAry[0] > $eDateAry[0] ); |
|
1396
|
|
|
|
|
|
|
|
|
1397
|
|
|
|
|
|
|
$self->WriteLog( "_DateCheck - Error: Years And Months Equal, Begin Date Day > End Date Day" ) if ( $bDateAry[2] == $eDateAry[2] && $bDateAry[0] == $eDateAry[0] && $bDateAry[1] > $eDateAry[1] ); |
|
1398
|
|
|
|
|
|
|
return -1 if ( $bDateAry[2] == $eDateAry[2] && $bDateAry[0] == $eDateAry[0] && $bDateAry[1] > $eDateAry[1] ); |
|
1399
|
|
|
|
|
|
|
|
|
1400
|
|
|
|
|
|
|
# Clean Up |
|
1401
|
|
|
|
|
|
|
$beginDate = ""; |
|
1402
|
|
|
|
|
|
|
$endDate = ""; |
|
1403
|
|
|
|
|
|
|
$delimiter = ""; |
|
1404
|
|
|
|
|
|
|
@bDateAry = (); |
|
1405
|
|
|
|
|
|
|
@eDateAry = (); |
|
1406
|
|
|
|
|
|
|
|
|
1407
|
|
|
|
|
|
|
return 0; |
|
1408
|
|
|
|
|
|
|
} |
|
1409
|
|
|
|
|
|
|
|
|
1410
|
|
|
|
|
|
|
sub GetOSType |
|
1411
|
|
|
|
|
|
|
{ |
|
1412
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1413
|
|
|
|
|
|
|
return $^O; |
|
1414
|
|
|
|
|
|
|
} |
|
1415
|
|
|
|
|
|
|
|
|
1416
|
|
|
|
|
|
|
|
|
1417
|
|
|
|
|
|
|
###################################################################################### |
|
1418
|
|
|
|
|
|
|
# Accessors |
|
1419
|
|
|
|
|
|
|
###################################################################################### |
|
1420
|
|
|
|
|
|
|
|
|
1421
|
|
|
|
|
|
|
sub GetDebugLog |
|
1422
|
|
|
|
|
|
|
{ |
|
1423
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1424
|
|
|
|
|
|
|
$self->{ _debugLog } = 0 if !defined ( $self->{ _debugLog } ); |
|
1425
|
|
|
|
|
|
|
return $self->{ _debugLog }; |
|
1426
|
|
|
|
|
|
|
} |
|
1427
|
|
|
|
|
|
|
|
|
1428
|
|
|
|
|
|
|
sub GetWriteLog |
|
1429
|
|
|
|
|
|
|
{ |
|
1430
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1431
|
|
|
|
|
|
|
$self->{ _writeLog } = 0 if !defined ( $self->{ _writeLog } ); |
|
1432
|
|
|
|
|
|
|
return $self->{ _writeLog }; |
|
1433
|
|
|
|
|
|
|
} |
|
1434
|
|
|
|
|
|
|
|
|
1435
|
|
|
|
|
|
|
sub GetStoreTitle |
|
1436
|
|
|
|
|
|
|
{ |
|
1437
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1438
|
|
|
|
|
|
|
$self->{ _storeTitle } = 1 if !defined ( $self->{ _storeTitle } ); |
|
1439
|
|
|
|
|
|
|
return $self->{ _storeTitle }; |
|
1440
|
|
|
|
|
|
|
} |
|
1441
|
|
|
|
|
|
|
|
|
1442
|
|
|
|
|
|
|
sub GetStoreAbstract |
|
1443
|
|
|
|
|
|
|
{ |
|
1444
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1445
|
|
|
|
|
|
|
$self->{ _storeAbstract } = 1 if !defined ( $self->{ _storeAbstract } ); |
|
1446
|
|
|
|
|
|
|
return $self->{ _storeAbstract }; |
|
1447
|
|
|
|
|
|
|
} |
|
1448
|
|
|
|
|
|
|
|
|
1449
|
|
|
|
|
|
|
sub GetQuickParse |
|
1450
|
|
|
|
|
|
|
{ |
|
1451
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1452
|
|
|
|
|
|
|
$self->{ _quickParse } = 0 if !defined ( $self->{ _quickParse } ); |
|
1453
|
|
|
|
|
|
|
return $self->{ _quickParse }; |
|
1454
|
|
|
|
|
|
|
} |
|
1455
|
|
|
|
|
|
|
|
|
1456
|
|
|
|
|
|
|
sub GetCompoundifyText |
|
1457
|
|
|
|
|
|
|
{ |
|
1458
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1459
|
|
|
|
|
|
|
$self->{ _compoundifyText } = 0 if !defined ( $self->{ _compoundifyText } ); |
|
1460
|
|
|
|
|
|
|
return $self->{ _compoundifyText }; |
|
1461
|
|
|
|
|
|
|
} |
|
1462
|
|
|
|
|
|
|
|
|
1463
|
|
|
|
|
|
|
sub GetNumOfThreads |
|
1464
|
|
|
|
|
|
|
{ |
|
1465
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1466
|
|
|
|
|
|
|
$self->{ _numOfThreads } = Sys::CpuAffinity::getNumCpus() if !defined ( $self->{ _numOfThreads } ); |
|
1467
|
|
|
|
|
|
|
return $self->{ _numOfThreads }; |
|
1468
|
|
|
|
|
|
|
} |
|
1469
|
|
|
|
|
|
|
|
|
1470
|
|
|
|
|
|
|
sub GetWorkingDir |
|
1471
|
|
|
|
|
|
|
{ |
|
1472
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1473
|
|
|
|
|
|
|
$self->{ _workingDir } = Cwd::getcwd() if !defined $self->{ _workingDir }; |
|
1474
|
|
|
|
|
|
|
return $self->{ _workingDir }; |
|
1475
|
|
|
|
|
|
|
} |
|
1476
|
|
|
|
|
|
|
|
|
1477
|
|
|
|
|
|
|
sub GetSavePath |
|
1478
|
|
|
|
|
|
|
{ |
|
1479
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1480
|
|
|
|
|
|
|
$self->{ _savePath } = "(null)" if !defined $self->{ _savePath }; |
|
1481
|
|
|
|
|
|
|
return $self->{ _savePath }; |
|
1482
|
|
|
|
|
|
|
} |
|
1483
|
|
|
|
|
|
|
|
|
1484
|
|
|
|
|
|
|
sub GetBeginDate |
|
1485
|
|
|
|
|
|
|
{ |
|
1486
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1487
|
|
|
|
|
|
|
$self->{ _beginDate } = "00/00/0000" if !defined ( $self->{ _beginDate } ); |
|
1488
|
|
|
|
|
|
|
return $self->{ _beginDate }; |
|
1489
|
|
|
|
|
|
|
} |
|
1490
|
|
|
|
|
|
|
|
|
1491
|
|
|
|
|
|
|
sub GetEndDate |
|
1492
|
|
|
|
|
|
|
{ |
|
1493
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1494
|
|
|
|
|
|
|
$self->{ _endDate } = "99/99/9999" if !defined ( $self->{ _endDate } ); |
|
1495
|
|
|
|
|
|
|
return $self->{ _endDate }; |
|
1496
|
|
|
|
|
|
|
} |
|
1497
|
|
|
|
|
|
|
|
|
1498
|
|
|
|
|
|
|
sub GetXMLStringToParse |
|
1499
|
|
|
|
|
|
|
{ |
|
1500
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1501
|
|
|
|
|
|
|
$self->{ _xmlStringToParse } = "(null)" if !defined ( $self->{ _xmlStringToParse } ); |
|
1502
|
|
|
|
|
|
|
return $self->{ _xmlStringToParse }; |
|
1503
|
|
|
|
|
|
|
} |
|
1504
|
|
|
|
|
|
|
|
|
1505
|
|
|
|
|
|
|
sub GetTextCorpusStr |
|
1506
|
|
|
|
|
|
|
{ |
|
1507
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1508
|
|
|
|
|
|
|
$self->{ _textCorpusStr } = "" if !defined ( $self->{_textCorpusStr } ); |
|
1509
|
|
|
|
|
|
|
return $self->{ _textCorpusStr }; |
|
1510
|
|
|
|
|
|
|
} |
|
1511
|
|
|
|
|
|
|
|
|
1512
|
|
|
|
|
|
|
sub GetFileHandle |
|
1513
|
|
|
|
|
|
|
{ |
|
1514
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1515
|
|
|
|
|
|
|
$self->{ _fileHandle } = undef if !defined ( $self->{ _fileHandle } ); |
|
1516
|
|
|
|
|
|
|
return $self->{ _fileHandle }; |
|
1517
|
|
|
|
|
|
|
} |
|
1518
|
|
|
|
|
|
|
|
|
1519
|
|
|
|
|
|
|
sub GetTwigHandler |
|
1520
|
|
|
|
|
|
|
{ |
|
1521
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1522
|
|
|
|
|
|
|
$self->{ _twigHandler } = "(null)" if !defined ( $self->{ _twigHandler } ); |
|
1523
|
|
|
|
|
|
|
return $self->{ _twigHandler }; |
|
1524
|
|
|
|
|
|
|
} |
|
1525
|
|
|
|
|
|
|
|
|
1526
|
|
|
|
|
|
|
sub GetParsedCount |
|
1527
|
|
|
|
|
|
|
{ |
|
1528
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1529
|
|
|
|
|
|
|
$self->{ _parsedCount } = 0 if !defined ( $self->{ _parsedCount } ); |
|
1530
|
|
|
|
|
|
|
return $self->{ _parsedCount }; |
|
1531
|
|
|
|
|
|
|
} |
|
1532
|
|
|
|
|
|
|
|
|
1533
|
|
|
|
|
|
|
sub GetTempStr |
|
1534
|
|
|
|
|
|
|
{ |
|
1535
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1536
|
|
|
|
|
|
|
$self->{ _tempStr } = "" if !defined ( $self->{ _tempStr } ); |
|
1537
|
|
|
|
|
|
|
return $self->{ _tempStr }; |
|
1538
|
|
|
|
|
|
|
} |
|
1539
|
|
|
|
|
|
|
|
|
1540
|
|
|
|
|
|
|
sub GetTempDate |
|
1541
|
|
|
|
|
|
|
{ |
|
1542
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1543
|
|
|
|
|
|
|
$self->{ _tempDate } = "" if !defined ( $self->{ _tempDate } ); |
|
1544
|
|
|
|
|
|
|
return $self->{ _tempDate }; |
|
1545
|
|
|
|
|
|
|
} |
|
1546
|
|
|
|
|
|
|
|
|
1547
|
|
|
|
|
|
|
sub GetCompoundWordAry |
|
1548
|
|
|
|
|
|
|
{ |
|
1549
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1550
|
|
|
|
|
|
|
$self->{ _compoundWordAry } = () if !defined ( $self->{ _compoundWordAry } ); |
|
1551
|
|
|
|
|
|
|
return @{ $self->{ _compoundWordAry } }; |
|
1552
|
|
|
|
|
|
|
} |
|
1553
|
|
|
|
|
|
|
|
|
1554
|
|
|
|
|
|
|
sub GetCompoundWordBST |
|
1555
|
|
|
|
|
|
|
{ |
|
1556
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1557
|
|
|
|
|
|
|
$self->{ _compoundWordBST } = Word2vec::Bst->new() if !defined ( $self->{ _compoundWordBST } ); |
|
1558
|
|
|
|
|
|
|
return $self->{ _compoundWordBST }; |
|
1559
|
|
|
|
|
|
|
} |
|
1560
|
|
|
|
|
|
|
|
|
1561
|
|
|
|
|
|
|
sub GetMaxCompoundWordLength |
|
1562
|
|
|
|
|
|
|
{ |
|
1563
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1564
|
|
|
|
|
|
|
$self->{ _maxCompoundWordLength } = 20 if !defined ( $self->{ _maxCompoundWordLength } ); |
|
1565
|
|
|
|
|
|
|
return $self->{ _maxCompoundWordLength }; |
|
1566
|
|
|
|
|
|
|
} |
|
1567
|
|
|
|
|
|
|
|
|
1568
|
|
|
|
|
|
|
sub GetOverwriteExistingFile |
|
1569
|
|
|
|
|
|
|
{ |
|
1570
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1571
|
|
|
|
|
|
|
$self->{ _overwriteExistingFile } = 0 if !defined ( $self->{ _overwriteExistingFile } ); |
|
1572
|
|
|
|
|
|
|
return $self->{ _overwriteExistingFile }; |
|
1573
|
|
|
|
|
|
|
} |
|
1574
|
|
|
|
|
|
|
|
|
1575
|
|
|
|
|
|
|
|
|
1576
|
|
|
|
|
|
|
###################################################################################### |
|
1577
|
|
|
|
|
|
|
# Mutators |
|
1578
|
|
|
|
|
|
|
###################################################################################### |
|
1579
|
|
|
|
|
|
|
|
|
1580
|
|
|
|
|
|
|
sub SetStoreTitle |
|
1581
|
|
|
|
|
|
|
{ |
|
1582
|
|
|
|
|
|
|
my ( $self, $value ) = @_; |
|
1583
|
|
|
|
|
|
|
return $self->{ _storeTitle } = $value; |
|
1584
|
|
|
|
|
|
|
} |
|
1585
|
|
|
|
|
|
|
|
|
1586
|
|
|
|
|
|
|
sub SetStoreAbstract |
|
1587
|
|
|
|
|
|
|
{ |
|
1588
|
|
|
|
|
|
|
my ( $self, $value ) = @_; |
|
1589
|
|
|
|
|
|
|
return $self->{ _storeAbstract } = $value; |
|
1590
|
|
|
|
|
|
|
} |
|
1591
|
|
|
|
|
|
|
|
|
1592
|
|
|
|
|
|
|
sub SetWorkingDir |
|
1593
|
|
|
|
|
|
|
{ |
|
1594
|
|
|
|
|
|
|
my ( $self, $dir ) = @_; |
|
1595
|
|
|
|
|
|
|
return $self->{ _workingDir } = $dir; |
|
1596
|
|
|
|
|
|
|
} |
|
1597
|
|
|
|
|
|
|
|
|
1598
|
|
|
|
|
|
|
sub SetSavePath |
|
1599
|
|
|
|
|
|
|
{ |
|
1600
|
|
|
|
|
|
|
my ( $self, $dir ) = @_; |
|
1601
|
|
|
|
|
|
|
return $self->{ _savePath } = $dir; |
|
1602
|
|
|
|
|
|
|
} |
|
1603
|
|
|
|
|
|
|
|
|
1604
|
|
|
|
|
|
|
sub SetQuickParse |
|
1605
|
|
|
|
|
|
|
{ |
|
1606
|
|
|
|
|
|
|
my ( $self, $value ) = @_; |
|
1607
|
|
|
|
|
|
|
return $self->{ _quickParse } = $value; |
|
1608
|
|
|
|
|
|
|
} |
|
1609
|
|
|
|
|
|
|
|
|
1610
|
|
|
|
|
|
|
sub SetCompoundifyText |
|
1611
|
|
|
|
|
|
|
{ |
|
1612
|
|
|
|
|
|
|
my ( $self, $value ) = @_; |
|
1613
|
|
|
|
|
|
|
return $self->{ _compoundifyText } = $value; |
|
1614
|
|
|
|
|
|
|
} |
|
1615
|
|
|
|
|
|
|
|
|
1616
|
|
|
|
|
|
|
sub SetNumOfThreads |
|
1617
|
|
|
|
|
|
|
{ |
|
1618
|
|
|
|
|
|
|
my ( $self, $value ) = @_; |
|
1619
|
|
|
|
|
|
|
|
|
1620
|
|
|
|
|
|
|
# Check |
|
1621
|
|
|
|
|
|
|
$self->WriteLog( "SetNumOfThreads - Warning: Number Of Threads Value < 0 / Setting Default Value" ) if ( $value < 0 ); |
|
1622
|
|
|
|
|
|
|
$value = Sys::CpuAffinity::getNumCpus() if ( $value < 0 ); |
|
1623
|
|
|
|
|
|
|
|
|
1624
|
|
|
|
|
|
|
return $self->{ _numOfThreads } = $value; |
|
1625
|
|
|
|
|
|
|
} |
|
1626
|
|
|
|
|
|
|
|
|
1627
|
|
|
|
|
|
|
sub SetBeginDate |
|
1628
|
|
|
|
|
|
|
{ |
|
1629
|
|
|
|
|
|
|
my ( $self, $str ) = @_; |
|
1630
|
|
|
|
|
|
|
return $self->{ _beginDate } = $str; |
|
1631
|
|
|
|
|
|
|
} |
|
1632
|
|
|
|
|
|
|
|
|
1633
|
|
|
|
|
|
|
sub SetEndDate |
|
1634
|
|
|
|
|
|
|
{ |
|
1635
|
|
|
|
|
|
|
my ( $self, $str ) = @_; |
|
1636
|
|
|
|
|
|
|
return $self->{ _endDate } = $str; |
|
1637
|
|
|
|
|
|
|
} |
|
1638
|
|
|
|
|
|
|
|
|
1639
|
|
|
|
|
|
|
sub SetXMLStringToParse |
|
1640
|
|
|
|
|
|
|
{ |
|
1641
|
|
|
|
|
|
|
my ( $self, $str ) = @_; |
|
1642
|
|
|
|
|
|
|
return $self->{ _xmlStringToParse } = $str; |
|
1643
|
|
|
|
|
|
|
} |
|
1644
|
|
|
|
|
|
|
|
|
1645
|
|
|
|
|
|
|
sub SetTextCorpusStr |
|
1646
|
|
|
|
|
|
|
{ |
|
1647
|
|
|
|
|
|
|
my ( $self, $str ) = @_; |
|
1648
|
|
|
|
|
|
|
return $self->{ _textCorpusStr } = $str; |
|
1649
|
|
|
|
|
|
|
} |
|
1650
|
|
|
|
|
|
|
|
|
1651
|
|
|
|
|
|
|
sub AppendStrToTextCorpus |
|
1652
|
|
|
|
|
|
|
{ |
|
1653
|
|
|
|
|
|
|
my ( $self, $str ) = @_; |
|
1654
|
|
|
|
|
|
|
|
|
1655
|
|
|
|
|
|
|
return if ( $str eq "" || !defined( $str ) ); |
|
1656
|
|
|
|
|
|
|
|
|
1657
|
|
|
|
|
|
|
# Prevent Other Threads From Appending Data At The Same Time |
|
1658
|
|
|
|
|
|
|
{ |
|
1659
|
|
|
|
|
|
|
lock( $appendLock ); |
|
1660
|
|
|
|
|
|
|
|
|
1661
|
|
|
|
|
|
|
# Removes Spaces At Both Ends Of String And More Than Once Space In-Between Ends |
|
1662
|
|
|
|
|
|
|
$str =~ s/^\s+|\s(?=\s)|\s+$//g; |
|
1663
|
|
|
|
|
|
|
|
|
1664
|
|
|
|
|
|
|
# Append string to text corpus |
|
1665
|
|
|
|
|
|
|
$self->{ _textCorpusStr } .= "$str "; |
|
1666
|
|
|
|
|
|
|
} |
|
1667
|
|
|
|
|
|
|
} |
|
1668
|
|
|
|
|
|
|
|
|
1669
|
|
|
|
|
|
|
sub ClearTextCorpusStr |
|
1670
|
|
|
|
|
|
|
{ |
|
1671
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1672
|
|
|
|
|
|
|
return $self->{ _textCorpusStr } = ""; |
|
1673
|
|
|
|
|
|
|
} |
|
1674
|
|
|
|
|
|
|
|
|
1675
|
|
|
|
|
|
|
sub SetTempStr |
|
1676
|
|
|
|
|
|
|
{ |
|
1677
|
|
|
|
|
|
|
my ( $self, $str ) = @_; |
|
1678
|
|
|
|
|
|
|
|
|
1679
|
|
|
|
|
|
|
# Convert String To UTF8 Format Encoding (Removes Special Characters / Fixes Wide Character Bug) |
|
1680
|
|
|
|
|
|
|
$str = $self->RemoveSpecialCharactersFromString( $str ); |
|
1681
|
|
|
|
|
|
|
$str = Text::Unidecode::unidecode( $str ); |
|
1682
|
|
|
|
|
|
|
|
|
1683
|
|
|
|
|
|
|
return $self->{ _tempStr } = $str; |
|
1684
|
|
|
|
|
|
|
} |
|
1685
|
|
|
|
|
|
|
|
|
1686
|
|
|
|
|
|
|
sub AppendToTempStr |
|
1687
|
|
|
|
|
|
|
{ |
|
1688
|
|
|
|
|
|
|
my ( $self, $str ) = @_; |
|
1689
|
|
|
|
|
|
|
|
|
1690
|
|
|
|
|
|
|
# Convert String To UTF8 Format Encoding (Removes Special Characters / Fixes Wide Character Bug) |
|
1691
|
|
|
|
|
|
|
$str = $self->RemoveSpecialCharactersFromString( $str ); |
|
1692
|
|
|
|
|
|
|
$str = Text::Unidecode::unidecode( $str ); |
|
1693
|
|
|
|
|
|
|
|
|
1694
|
|
|
|
|
|
|
# Removes Spaces At Both Ends Of String And More Than Once Space In-Between Ends |
|
1695
|
|
|
|
|
|
|
$str =~ s/^\s+|\s(?=\s)|\s+$//g; |
|
1696
|
|
|
|
|
|
|
|
|
1697
|
|
|
|
|
|
|
# Increment Word Counter |
|
1698
|
|
|
|
|
|
|
my @words = split( ' ', $str ); |
|
1699
|
|
|
|
|
|
|
$preCompWordCount += scalar( @words ); |
|
1700
|
|
|
|
|
|
|
undef( @words ); |
|
1701
|
|
|
|
|
|
|
|
|
1702
|
|
|
|
|
|
|
# Append string to text corpus |
|
1703
|
|
|
|
|
|
|
return $self->{ _tempStr } .= "$str "; |
|
1704
|
|
|
|
|
|
|
} |
|
1705
|
|
|
|
|
|
|
|
|
1706
|
|
|
|
|
|
|
sub ClearTempStr |
|
1707
|
|
|
|
|
|
|
{ |
|
1708
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1709
|
|
|
|
|
|
|
return $self->{ _tempStr } = ""; |
|
1710
|
|
|
|
|
|
|
} |
|
1711
|
|
|
|
|
|
|
|
|
1712
|
|
|
|
|
|
|
sub SetTempDate |
|
1713
|
|
|
|
|
|
|
{ |
|
1714
|
|
|
|
|
|
|
my ( $self, $str ) = @_; |
|
1715
|
|
|
|
|
|
|
return $self->{ _tempDate } = $str; |
|
1716
|
|
|
|
|
|
|
} |
|
1717
|
|
|
|
|
|
|
|
|
1718
|
|
|
|
|
|
|
sub ClearTempDate |
|
1719
|
|
|
|
|
|
|
{ |
|
1720
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1721
|
|
|
|
|
|
|
return $self->{ _tempDate } = ""; |
|
1722
|
|
|
|
|
|
|
} |
|
1723
|
|
|
|
|
|
|
|
|
1724
|
|
|
|
|
|
|
sub SetCompoundWordAry |
|
1725
|
|
|
|
|
|
|
{ |
|
1726
|
|
|
|
|
|
|
my ( $self, $aryRef ) = @_; |
|
1727
|
|
|
|
|
|
|
$self->WriteLog( "Warning: Setting CompoundWordArray when array is already defined - Clearing Previous Array" ) if ( @{ $self->{ _compoundWordAry } } > 0 ); |
|
1728
|
|
|
|
|
|
|
undef( $self->{ _compoundWordAry } ) if ( @{ $self->{ _compoundWordAry } } > 0 ); |
|
1729
|
|
|
|
|
|
|
return @{ $self->{ _compoundWordAry } } = @{ $aryRef }; |
|
1730
|
|
|
|
|
|
|
} |
|
1731
|
|
|
|
|
|
|
|
|
1732
|
|
|
|
|
|
|
sub ClearCompoundWordAry |
|
1733
|
|
|
|
|
|
|
{ |
|
1734
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1735
|
|
|
|
|
|
|
undef( $self->{ _compoundWordAry } ); |
|
1736
|
|
|
|
|
|
|
return @{ $self->{ _compoundWordAry } } = (); |
|
1737
|
|
|
|
|
|
|
} |
|
1738
|
|
|
|
|
|
|
|
|
1739
|
|
|
|
|
|
|
sub SetCompoundWordBST |
|
1740
|
|
|
|
|
|
|
{ |
|
1741
|
|
|
|
|
|
|
my ( $self, $bst ) = @_; |
|
1742
|
|
|
|
|
|
|
$self->WriteLog( "Warning: Setting CompoundWordBST when BST is already defined - Clearing Previous BST" ) if defined ( $self->{ _compoundWordBST } ); |
|
1743
|
|
|
|
|
|
|
$self->{ _compoundWordBST }->DESTROY() if defined( $self->{ _compoundWordBST } ); |
|
1744
|
|
|
|
|
|
|
undef( $self->{ _compoundWordBST } ) if defined ( $self->{ _compoundWordBST } ); |
|
1745
|
|
|
|
|
|
|
return $self->{ _compoundWordBST } = $bst; |
|
1746
|
|
|
|
|
|
|
} |
|
1747
|
|
|
|
|
|
|
|
|
1748
|
|
|
|
|
|
|
sub ClearCompoundWordBST |
|
1749
|
|
|
|
|
|
|
{ |
|
1750
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1751
|
|
|
|
|
|
|
undef( $self->{ _compoundWordBST } ); |
|
1752
|
|
|
|
|
|
|
return $self->{ _compoundWordBST }; |
|
1753
|
|
|
|
|
|
|
} |
|
1754
|
|
|
|
|
|
|
|
|
1755
|
|
|
|
|
|
|
sub SetMaxCompoundWordLength |
|
1756
|
|
|
|
|
|
|
{ |
|
1757
|
|
|
|
|
|
|
my ( $self, $value ) = @_; |
|
1758
|
|
|
|
|
|
|
return $self->{ _maxCompoundWordLength } = $value; |
|
1759
|
|
|
|
|
|
|
} |
|
1760
|
|
|
|
|
|
|
|
|
1761
|
|
|
|
|
|
|
sub SetOverwriteExistingFile |
|
1762
|
|
|
|
|
|
|
{ |
|
1763
|
|
|
|
|
|
|
my ( $self, $value ) = @_; |
|
1764
|
|
|
|
|
|
|
return $self->{ _overwriteExistingFile } = $value; |
|
1765
|
|
|
|
|
|
|
} |
|
1766
|
|
|
|
|
|
|
|
|
1767
|
|
|
|
|
|
|
|
|
1768
|
|
|
|
|
|
|
###################################################################################### |
|
1769
|
|
|
|
|
|
|
# Debug Functions |
|
1770
|
|
|
|
|
|
|
###################################################################################### |
|
1771
|
|
|
|
|
|
|
|
|
1772
|
|
|
|
|
|
|
sub GetTime |
|
1773
|
|
|
|
|
|
|
{ |
|
1774
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1775
|
|
|
|
|
|
|
my( $sec, $min, $hour ) = localtime(); |
|
1776
|
|
|
|
|
|
|
|
|
1777
|
|
|
|
|
|
|
$hour = "0$hour" if( $hour < 10 ); |
|
1778
|
|
|
|
|
|
|
$min = "0$min" if( $min < 10 ); |
|
1779
|
|
|
|
|
|
|
$sec = "0$sec" if( $sec < 10 ); |
|
1780
|
|
|
|
|
|
|
|
|
1781
|
|
|
|
|
|
|
return "$hour:$min:$sec"; |
|
1782
|
|
|
|
|
|
|
} |
|
1783
|
|
|
|
|
|
|
|
|
1784
|
|
|
|
|
|
|
sub GetDate |
|
1785
|
|
|
|
|
|
|
{ |
|
1786
|
|
|
|
|
|
|
my ( $self ) = @_; |
|
1787
|
|
|
|
|
|
|
my ( $sec, $min, $hour, $mday, $mon, $year ) = localtime(); |
|
1788
|
|
|
|
|
|
|
|
|
1789
|
|
|
|
|
|
|
$mon += 1; |
|
1790
|
|
|
|
|
|
|
$year += 1900; |
|
1791
|
|
|
|
|
|
|
|
|
1792
|
|
|
|
|
|
|
return "$mon/$mday/$year"; |
|
1793
|
|
|
|
|
|
|
} |
|
1794
|
|
|
|
|
|
|
|
|
1795
|
|
|
|
|
|
|
sub WriteLog |
|
1796
|
|
|
|
|
|
|
{ |
|
1797
|
|
|
|
|
|
|
my ( $self ) = shift; |
|
1798
|
|
|
|
|
|
|
my $string = shift; |
|
1799
|
|
|
|
|
|
|
my $printNewLine = shift; |
|
1800
|
|
|
|
|
|
|
|
|
1801
|
|
|
|
|
|
|
return if !defined ( $string ); |
|
1802
|
|
|
|
|
|
|
$printNewLine = 1 if !defined ( $printNewLine ); |
|
1803
|
|
|
|
|
|
|
|
|
1804
|
|
|
|
|
|
|
# Prevent Other Threads From Writing At The Same Time |
|
1805
|
|
|
|
|
|
|
lock( $debugLock ); |
|
1806
|
|
|
|
|
|
|
|
|
1807
|
|
|
|
|
|
|
if( $self->GetDebugLog() ) |
|
1808
|
|
|
|
|
|
|
{ |
|
1809
|
|
|
|
|
|
|
if( ref ( $self ) ne "Word2vec::Xmltow2v" ) |
|
1810
|
|
|
|
|
|
|
{ |
|
1811
|
|
|
|
|
|
|
print( GetDate() . " " . GetTime() . " - xmltow2v: Cannot Call WriteLog() From Outside Module!\n" ); |
|
1812
|
|
|
|
|
|
|
return; |
|
1813
|
|
|
|
|
|
|
} |
|
1814
|
|
|
|
|
|
|
|
|
1815
|
|
|
|
|
|
|
$string = "" if !defined ( $string ); |
|
1816
|
|
|
|
|
|
|
print GetDate() . " " . GetTime() . " - xmltow2v::$string"; |
|
1817
|
|
|
|
|
|
|
print "\n" if( $printNewLine != 0 ); |
|
1818
|
|
|
|
|
|
|
} |
|
1819
|
|
|
|
|
|
|
|
|
1820
|
|
|
|
|
|
|
if( $self->GetWriteLog() ) |
|
1821
|
|
|
|
|
|
|
{ |
|
1822
|
|
|
|
|
|
|
if( ref ( $self ) ne "Word2vec::Xmltow2v" ) |
|
1823
|
|
|
|
|
|
|
{ |
|
1824
|
|
|
|
|
|
|
print( GetDate() . " " . GetTime() . " - xmltow2v: Cannot Call WriteLog() From Outside Module!\n" ); |
|
1825
|
|
|
|
|
|
|
return; |
|
1826
|
|
|
|
|
|
|
} |
|
1827
|
|
|
|
|
|
|
|
|
1828
|
|
|
|
|
|
|
my $fileHandle = $self->GetFileHandle(); |
|
1829
|
|
|
|
|
|
|
|
|
1830
|
|
|
|
|
|
|
if( defined( $fileHandle ) ) |
|
1831
|
|
|
|
|
|
|
{ |
|
1832
|
|
|
|
|
|
|
print( $fileHandle GetDate() . " " . GetTime() . " - xmltow2v::$string" ); |
|
1833
|
|
|
|
|
|
|
print( $fileHandle "\n" ) if( $printNewLine != 0 ); |
|
1834
|
|
|
|
|
|
|
} |
|
1835
|
|
|
|
|
|
|
} |
|
1836
|
|
|
|
|
|
|
} |
|
1837
|
|
|
|
|
|
|
|
|
1838
|
|
|
|
|
|
|
#################### All Modules Are To Output "1"(True) at EOF ###################### |
|
1839
|
|
|
|
|
|
|
1; |
|
1840
|
|
|
|
|
|
|
|
|
1841
|
|
|
|
|
|
|
|
|
1842
|
|
|
|
|
|
|
=head1 NAME |
|
1843
|
|
|
|
|
|
|
|
|
1844
|
|
|
|
|
|
|
Word2vec::Xmltow2v - Medline XML-To-W2V Module. |
|
1845
|
|
|
|
|
|
|
|
|
1846
|
|
|
|
|
|
|
=head1 SYNOPSIS |
|
1847
|
|
|
|
|
|
|
|
|
1848
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
1849
|
|
|
|
|
|
|
|
|
1850
|
|
|
|
|
|
|
# Parameters: Debug Output = True, Write Log = False, StoreTitle = True, StoreAbstract = True, Quick Parse = True, CompoundifyText = True, Use Multi-Threading (Default = 1 Thread Per CPU Core) |
|
1851
|
|
|
|
|
|
|
my $xmlconv = new xmltow2v( 1, 0, 1, 1, 1, 1, 2 ); # Note: Specifying no parameters implies default settings. |
|
1852
|
|
|
|
|
|
|
$xmlconv->SetWorkingDir( "Medline/XML/Directory/Here" ); |
|
1853
|
|
|
|
|
|
|
$xmlconv->SetSavePath( "textcorpus.txt" ); |
|
1854
|
|
|
|
|
|
|
$xmlconv->SetStoreTitle( 1 ); |
|
1855
|
|
|
|
|
|
|
$xmlconv->SetStoreAbstract( 1 ); |
|
1856
|
|
|
|
|
|
|
$xmlconv->SetBeginDate( "01/01/2004" ); |
|
1857
|
|
|
|
|
|
|
$xmlconv->SetEndDate( "08/13/2016" ); |
|
1858
|
|
|
|
|
|
|
$xmlconv->SetOverwriteExistingFile( 1 ); |
|
1859
|
|
|
|
|
|
|
|
|
1860
|
|
|
|
|
|
|
# If Compound Word File Exists, Store It In Memory And Create Compound Word Binary Search Tree |
|
1861
|
|
|
|
|
|
|
$xmlconv->ReadCompoundWordDataFromFile( "compoundword.txt", 1 ); |
|
1862
|
|
|
|
|
|
|
$xmlconv->CreateCompoundWordBST(); |
|
1863
|
|
|
|
|
|
|
|
|
1864
|
|
|
|
|
|
|
# Parse XML Files or Directory Of Files |
|
1865
|
|
|
|
|
|
|
$xmlconv->ConvertMedlineXMLToW2V( "/xmlDirectory/" ); |
|
1866
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
1867
|
|
|
|
|
|
|
|
|
1868
|
|
|
|
|
|
|
=head1 DESCRIPTION |
|
1869
|
|
|
|
|
|
|
|
|
1870
|
|
|
|
|
|
|
Word2vec::Xmltow2v is a XML-to-text module which converts Medline XML article title |
|
1871
|
|
|
|
|
|
|
and abstract data, given a date range, into a plain text corpus for use |
|
1872
|
|
|
|
|
|
|
with Word2vec::Interface. It also "compoundifies" during text corpus compilation |
|
1873
|
|
|
|
|
|
|
given a compound word file. |
|
1874
|
|
|
|
|
|
|
|
|
1875
|
|
|
|
|
|
|
=head2 Main Functions |
|
1876
|
|
|
|
|
|
|
|
|
1877
|
|
|
|
|
|
|
=head3 new |
|
1878
|
|
|
|
|
|
|
|
|
1879
|
|
|
|
|
|
|
Description: |
|
1880
|
|
|
|
|
|
|
|
|
1881
|
|
|
|
|
|
|
Returns a new 'Word2vec::Xmltow2v' module object. |
|
1882
|
|
|
|
|
|
|
|
|
1883
|
|
|
|
|
|
|
Note: Specifying no parameters implies default options. |
|
1884
|
|
|
|
|
|
|
|
|
1885
|
|
|
|
|
|
|
Default Parameters: |
|
1886
|
|
|
|
|
|
|
debugLog = 0 |
|
1887
|
|
|
|
|
|
|
writeLog = 0 |
|
1888
|
|
|
|
|
|
|
storeTitle = 1 |
|
1889
|
|
|
|
|
|
|
storeAbstract = 1 |
|
1890
|
|
|
|
|
|
|
quickParse = 0 |
|
1891
|
|
|
|
|
|
|
compoundifyText = 0 |
|
1892
|
|
|
|
|
|
|
numOfThreads = Number of CPUs/CPU cores (1 thread per core/CPU) |
|
1893
|
|
|
|
|
|
|
workingDir = Current Directory |
|
1894
|
|
|
|
|
|
|
savePath = Current Directory |
|
1895
|
|
|
|
|
|
|
beginDate = "00/00/0000" |
|
1896
|
|
|
|
|
|
|
endDate = "99/99/9999" |
|
1897
|
|
|
|
|
|
|
xmlStringToParse = "(null)" |
|
1898
|
|
|
|
|
|
|
textCorpusString = "" |
|
1899
|
|
|
|
|
|
|
twigHandler = 0 |
|
1900
|
|
|
|
|
|
|
parsedCount = 0 |
|
1901
|
|
|
|
|
|
|
tempDate = "" |
|
1902
|
|
|
|
|
|
|
tempStr = "" |
|
1903
|
|
|
|
|
|
|
outputFileName = "textcorpus.txt" |
|
1904
|
|
|
|
|
|
|
compoundWordAry = () |
|
1905
|
|
|
|
|
|
|
compoundWordBST = Word2vec::Bst->new() |
|
1906
|
|
|
|
|
|
|
maxCompoundWordLength = 0 |
|
1907
|
|
|
|
|
|
|
overwriteExistingFile = 0 |
|
1908
|
|
|
|
|
|
|
|
|
1909
|
|
|
|
|
|
|
Input: |
|
1910
|
|
|
|
|
|
|
|
|
1911
|
|
|
|
|
|
|
$debugLog -> Instructs module to print debug statements to the console. (1 = True / 0 = False) |
|
1912
|
|
|
|
|
|
|
$writeLog -> Instructs module to print debug statements to a log file. (1 = True / 0 = False) |
|
1913
|
|
|
|
|
|
|
$storeTitle -> Instructs module to store Medline article titles during text corpus compilation. (1 = True / 0 = False) |
|
1914
|
|
|
|
|
|
|
$storeAbstract -> Instructs module to store Medline article abstracts during text corpus compilation. (1 = True / 0 = False) |
|
1915
|
|
|
|
|
|
|
$quickParse -> Instructs module to utilize quick XML parsing Functions for known Medline article title and abstract tags. (1 = True / 0 = False) |
|
1916
|
|
|
|
|
|
|
$compoundifyText -> Instructs module to compoundify text on the fly given a compound word file. This is automatically set |
|
1917
|
|
|
|
|
|
|
when reading the compound word file to memory regardless of user setting. (1 = True / 0 = False) |
|
1918
|
|
|
|
|
|
|
$numOfThreads -> Specifies the number of worker threads which parse Medline XML files simultaneously to create the text corpus. |
|
1919
|
|
|
|
|
|
|
This speeds up text corpus generation by the number of physical cores present an a given machine. (Positive integer value) |
|
1920
|
|
|
|
|
|
|
ie. Using four threads of a Intel i7 core machine speeds up text corpus generation roughly four times faster than being single threaded. |
|
1921
|
|
|
|
|
|
|
$workingDir -> Specifies the current working directory. (String) |
|
1922
|
|
|
|
|
|
|
$savePath -> Specifies the save path for text corpus generation. (String) |
|
1923
|
|
|
|
|
|
|
$beginDate -> Specifies the beginning date range for Medline article text corpus composition. (Format: XX/XX/XXXX) |
|
1924
|
|
|
|
|
|
|
$endDate -> Specifies the ending date range for Medline article text corpus composition. (Format: XX/XX/XXXX) |
|
1925
|
|
|
|
|
|
|
$xmlStringToParse -> Storage location for the current Medline XML file in memory. (String) |
|
1926
|
|
|
|
|
|
|
$textCorpusString -> Temporary storage location for text corpus generation in memory. (String) |
|
1927
|
|
|
|
|
|
|
$twigHandler -> XML::Twig object location. |
|
1928
|
|
|
|
|
|
|
$parsedCount -> Number of parsed Medline articles during text corpus generation. |
|
1929
|
|
|
|
|
|
|
$tempDate -> Temporary storage location for current Medline article date during text corpus compilation. |
|
1930
|
|
|
|
|
|
|
$tempStr -> Temporary storage location for current Medline article title/abstract during text corpus compilation. |
|
1931
|
|
|
|
|
|
|
$outputFileName -> Output file path/name. |
|
1932
|
|
|
|
|
|
|
$compoundWordAry -> Storage location for compound words, used to compoundify text. (Array) <- Depreciated |
|
1933
|
|
|
|
|
|
|
$compoundWordBST -> Storage location for compound words, used to compoundify text. (Binary Search Tree) <- Supersedes '$compoundWordAry' |
|
1934
|
|
|
|
|
|
|
$maxCompoundWordLength -> Maximum number of words able to be compoundified in one phrase. ie "six_sea_snakes_were_sailing" = 5 compoundified words. |
|
1935
|
|
|
|
|
|
|
The compounding algorithm will attempt to compoundify no more than this set value, even-though the compound word list could |
|
1936
|
|
|
|
|
|
|
possibly contain larger compounded phrases. |
|
1937
|
|
|
|
|
|
|
$overwriteExistingFile -> Instructs the module to either overwrite any existing text corpus files or append to the existing file. |
|
1938
|
|
|
|
|
|
|
|
|
1939
|
|
|
|
|
|
|
Note: It is not recommended to specify all new() parameters, as it has not been thoroughly tested. Maximum recommended parameters to be specified include: |
|
1940
|
|
|
|
|
|
|
"debugLog, writeLog, storeTitle, storeAbstract, quickParse, compoundifyText, numOfThreads, workingDir, savePath, beginDate, endDate" |
|
1941
|
|
|
|
|
|
|
|
|
1942
|
|
|
|
|
|
|
Output: |
|
1943
|
|
|
|
|
|
|
|
|
1944
|
|
|
|
|
|
|
Word2vec::Xmltow2v object. |
|
1945
|
|
|
|
|
|
|
|
|
1946
|
|
|
|
|
|
|
Example: |
|
1947
|
|
|
|
|
|
|
|
|
1948
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
1949
|
|
|
|
|
|
|
|
|
1950
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); # Note: Specifying no parameters implies default settings as listed above. |
|
1951
|
|
|
|
|
|
|
|
|
1952
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
1953
|
|
|
|
|
|
|
|
|
1954
|
|
|
|
|
|
|
# Or |
|
1955
|
|
|
|
|
|
|
|
|
1956
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
1957
|
|
|
|
|
|
|
|
|
1958
|
|
|
|
|
|
|
# Parameters: Debug Output = True, Write Log = False, StoreTitle = True, StoreAbstract = True, Quick Parse = True, CompoundifyText = True, Use Multi-Threading (2 Threads) |
|
1959
|
|
|
|
|
|
|
my $xmlconv = new xmltow2v( 1, 0, 1, 1, 1, 1, 2 ); |
|
1960
|
|
|
|
|
|
|
|
|
1961
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
1962
|
|
|
|
|
|
|
|
|
1963
|
|
|
|
|
|
|
=head3 DESTROY |
|
1964
|
|
|
|
|
|
|
|
|
1965
|
|
|
|
|
|
|
Description: |
|
1966
|
|
|
|
|
|
|
|
|
1967
|
|
|
|
|
|
|
Removes module objects and variables from memory. |
|
1968
|
|
|
|
|
|
|
|
|
1969
|
|
|
|
|
|
|
Input: |
|
1970
|
|
|
|
|
|
|
|
|
1971
|
|
|
|
|
|
|
None |
|
1972
|
|
|
|
|
|
|
|
|
1973
|
|
|
|
|
|
|
Output: |
|
1974
|
|
|
|
|
|
|
|
|
1975
|
|
|
|
|
|
|
None |
|
1976
|
|
|
|
|
|
|
|
|
1977
|
|
|
|
|
|
|
Example: |
|
1978
|
|
|
|
|
|
|
|
|
1979
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
1980
|
|
|
|
|
|
|
|
|
1981
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
1982
|
|
|
|
|
|
|
|
|
1983
|
|
|
|
|
|
|
$xmlconv->DESTROY(); |
|
1984
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
1985
|
|
|
|
|
|
|
|
|
1986
|
|
|
|
|
|
|
=head3 ConvertMedlineXMLToW2V |
|
1987
|
|
|
|
|
|
|
|
|
1988
|
|
|
|
|
|
|
Description: |
|
1989
|
|
|
|
|
|
|
|
|
1990
|
|
|
|
|
|
|
Parses specified parameter Medline XML file or directory of files, creating a text corpus. Returns 0 if successful or -1 during an error. |
|
1991
|
|
|
|
|
|
|
|
|
1992
|
|
|
|
|
|
|
Note: Supports plain Medline XML or gun-zipped XML files. |
|
1993
|
|
|
|
|
|
|
|
|
1994
|
|
|
|
|
|
|
Input: |
|
1995
|
|
|
|
|
|
|
|
|
1996
|
|
|
|
|
|
|
$filePath -> XML file path to parse. (This can be a single file or directory of XML/XML.gz files). |
|
1997
|
|
|
|
|
|
|
|
|
1998
|
|
|
|
|
|
|
Output: |
|
1999
|
|
|
|
|
|
|
|
|
2000
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-Successful |
|
2001
|
|
|
|
|
|
|
|
|
2002
|
|
|
|
|
|
|
Example: |
|
2003
|
|
|
|
|
|
|
|
|
2004
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2005
|
|
|
|
|
|
|
|
|
2006
|
|
|
|
|
|
|
$xmlconv = new xmltow2v(); # Note: Specifying no parameters implies default settings |
|
2007
|
|
|
|
|
|
|
$xmlconv->SetSavePath( "testCorpus.txt" ); |
|
2008
|
|
|
|
|
|
|
$xmlconv->SetStoreTitle( 1 ); |
|
2009
|
|
|
|
|
|
|
$xmlconv->SetStoreAbstract( 1 ); |
|
2010
|
|
|
|
|
|
|
$xmlconv->SetBeginDate( "01/01/2004" ); |
|
2011
|
|
|
|
|
|
|
$xmlconv->SetEndDate( "08/13/2016" ); |
|
2012
|
|
|
|
|
|
|
$xmlconv->SetOverwriteExistingFile( 1 ); |
|
2013
|
|
|
|
|
|
|
$xmlconv->ConvertMedlineXMLToW2V( "/xmlDirectory/" ); |
|
2014
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2015
|
|
|
|
|
|
|
|
|
2016
|
|
|
|
|
|
|
|
|
2017
|
|
|
|
|
|
|
=head3 _ThreadedConvert |
|
2018
|
|
|
|
|
|
|
|
|
2019
|
|
|
|
|
|
|
Description: |
|
2020
|
|
|
|
|
|
|
|
|
2021
|
|
|
|
|
|
|
Multi-Threaded Medline XML to text corpus conversion function. |
|
2022
|
|
|
|
|
|
|
|
|
2023
|
|
|
|
|
|
|
Input: |
|
2024
|
|
|
|
|
|
|
|
|
2025
|
|
|
|
|
|
|
$directory -> File directory or directory of files to parse. |
|
2026
|
|
|
|
|
|
|
|
|
2027
|
|
|
|
|
|
|
Output: |
|
2028
|
|
|
|
|
|
|
|
|
2029
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful |
|
2030
|
|
|
|
|
|
|
|
|
2031
|
|
|
|
|
|
|
Example: |
|
2032
|
|
|
|
|
|
|
|
|
2033
|
|
|
|
|
|
|
Warning: This is a private function called by 'ConvertMedlineXMLToW2V()'. It should not be called outside of xmltow2v module. |
|
2034
|
|
|
|
|
|
|
|
|
2035
|
|
|
|
|
|
|
=head3 _ParseXMLString |
|
2036
|
|
|
|
|
|
|
|
|
2037
|
|
|
|
|
|
|
Description: |
|
2038
|
|
|
|
|
|
|
|
|
2039
|
|
|
|
|
|
|
Parses passed string parameter for Medline XML article title and abstract data and appends found data to the text corpus. |
|
2040
|
|
|
|
|
|
|
|
|
2041
|
|
|
|
|
|
|
Input: |
|
2042
|
|
|
|
|
|
|
|
|
2043
|
|
|
|
|
|
|
$string -> Medline XML string data to parse. |
|
2044
|
|
|
|
|
|
|
|
|
2045
|
|
|
|
|
|
|
Output: |
|
2046
|
|
|
|
|
|
|
|
|
2047
|
|
|
|
|
|
|
None |
|
2048
|
|
|
|
|
|
|
|
|
2049
|
|
|
|
|
|
|
Example: |
|
2050
|
|
|
|
|
|
|
|
|
2051
|
|
|
|
|
|
|
Warning: This is a private function called by "ConvertMedlineXMLToW2V()" and "_ThreadedConvert()". It should not be called outside of xmltow2v module. |
|
2052
|
|
|
|
|
|
|
|
|
2053
|
|
|
|
|
|
|
=head3 _CheckParseRequirements |
|
2054
|
|
|
|
|
|
|
|
|
2055
|
|
|
|
|
|
|
Description: |
|
2056
|
|
|
|
|
|
|
|
|
2057
|
|
|
|
|
|
|
Checks passed string parameter to see if it contains relevant data and XML::Twig handler is initialized. |
|
2058
|
|
|
|
|
|
|
|
|
2059
|
|
|
|
|
|
|
Input: |
|
2060
|
|
|
|
|
|
|
|
|
2061
|
|
|
|
|
|
|
$string -> String data to check |
|
2062
|
|
|
|
|
|
|
|
|
2063
|
|
|
|
|
|
|
Output: |
|
2064
|
|
|
|
|
|
|
|
|
2065
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful |
|
2066
|
|
|
|
|
|
|
|
|
2067
|
|
|
|
|
|
|
Example: |
|
2068
|
|
|
|
|
|
|
|
|
2069
|
|
|
|
|
|
|
Warning: This is a private function called "_ParseXMLString()". It should not be called outside of xmltow2v module. |
|
2070
|
|
|
|
|
|
|
|
|
2071
|
|
|
|
|
|
|
=head3 _CheckForNullData |
|
2072
|
|
|
|
|
|
|
|
|
2073
|
|
|
|
|
|
|
Description: |
|
2074
|
|
|
|
|
|
|
|
|
2075
|
|
|
|
|
|
|
Checks passed string parameter for "(null)" string. |
|
2076
|
|
|
|
|
|
|
|
|
2077
|
|
|
|
|
|
|
Input: |
|
2078
|
|
|
|
|
|
|
|
|
2079
|
|
|
|
|
|
|
$string -> String data to be checked. |
|
2080
|
|
|
|
|
|
|
|
|
2081
|
|
|
|
|
|
|
Output: |
|
2082
|
|
|
|
|
|
|
|
|
2083
|
|
|
|
|
|
|
$value -> '1' = True/Null data or '0' = False/Valid data |
|
2084
|
|
|
|
|
|
|
|
|
2085
|
|
|
|
|
|
|
Example: |
|
2086
|
|
|
|
|
|
|
|
|
2087
|
|
|
|
|
|
|
Warning: This is a private function called by "new()" and "_ParseXMLString()". It should not be called outside of xmltow2v module. |
|
2088
|
|
|
|
|
|
|
|
|
2089
|
|
|
|
|
|
|
=head3 _RemoveXMLVersion |
|
2090
|
|
|
|
|
|
|
|
|
2091
|
|
|
|
|
|
|
Description: |
|
2092
|
|
|
|
|
|
|
|
|
2093
|
|
|
|
|
|
|
Removes the XML Version string prior to parsing the XML string data. (Depreciated) |
|
2094
|
|
|
|
|
|
|
|
|
2095
|
|
|
|
|
|
|
Input: |
|
2096
|
|
|
|
|
|
|
|
|
2097
|
|
|
|
|
|
|
$string -> Medline XML string data |
|
2098
|
|
|
|
|
|
|
|
|
2099
|
|
|
|
|
|
|
Output: |
|
2100
|
|
|
|
|
|
|
|
|
2101
|
|
|
|
|
|
|
None |
|
2102
|
|
|
|
|
|
|
|
|
2103
|
|
|
|
|
|
|
Example: |
|
2104
|
|
|
|
|
|
|
|
|
2105
|
|
|
|
|
|
|
Warning: This is a private function called by "new()" and "_ParseXMLString()". It should not be called outside of xmltow2v module. |
|
2106
|
|
|
|
|
|
|
|
|
2107
|
|
|
|
|
|
|
=head3 _ParseMedlineCitationSet |
|
2108
|
|
|
|
|
|
|
|
|
2109
|
|
|
|
|
|
|
Description: |
|
2110
|
|
|
|
|
|
|
|
|
2111
|
|
|
|
|
|
|
Parses 'MedlineCitationSet' tag data in Medline XML file. |
|
2112
|
|
|
|
|
|
|
|
|
2113
|
|
|
|
|
|
|
Input: |
|
2114
|
|
|
|
|
|
|
|
|
2115
|
|
|
|
|
|
|
$twigHandler -> XML::Twig handler |
|
2116
|
|
|
|
|
|
|
$root -> Beginning of XML directory to parse. ( Directory in Medline XML string data ) |
|
2117
|
|
|
|
|
|
|
|
|
2118
|
|
|
|
|
|
|
Output: |
|
2119
|
|
|
|
|
|
|
|
|
2120
|
|
|
|
|
|
|
None |
|
2121
|
|
|
|
|
|
|
|
|
2122
|
|
|
|
|
|
|
Example: |
|
2123
|
|
|
|
|
|
|
|
|
2124
|
|
|
|
|
|
|
Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. |
|
2125
|
|
|
|
|
|
|
|
|
2126
|
|
|
|
|
|
|
=head3 _ParseMedlineArticle |
|
2127
|
|
|
|
|
|
|
|
|
2128
|
|
|
|
|
|
|
Description: |
|
2129
|
|
|
|
|
|
|
|
|
2130
|
|
|
|
|
|
|
Parses 'MedlineArticle' tag data in Medline XML file. |
|
2131
|
|
|
|
|
|
|
|
|
2132
|
|
|
|
|
|
|
Input: |
|
2133
|
|
|
|
|
|
|
|
|
2134
|
|
|
|
|
|
|
$medlineArticle -> Current Medline article directory in XML data (XML::Twig directory) |
|
2135
|
|
|
|
|
|
|
|
|
2136
|
|
|
|
|
|
|
Output: |
|
2137
|
|
|
|
|
|
|
|
|
2138
|
|
|
|
|
|
|
$value -> '1' = Finished parsing Medline article. |
|
2139
|
|
|
|
|
|
|
|
|
2140
|
|
|
|
|
|
|
Example: |
|
2141
|
|
|
|
|
|
|
|
|
2142
|
|
|
|
|
|
|
Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. |
|
2143
|
|
|
|
|
|
|
|
|
2144
|
|
|
|
|
|
|
=head3 _ParseDateCreated |
|
2145
|
|
|
|
|
|
|
|
|
2146
|
|
|
|
|
|
|
Description: |
|
2147
|
|
|
|
|
|
|
|
|
2148
|
|
|
|
|
|
|
Parses 'DateCreated' tag data in Medline XML file. |
|
2149
|
|
|
|
|
|
|
|
|
2150
|
|
|
|
|
|
|
Input: |
|
2151
|
|
|
|
|
|
|
|
|
2152
|
|
|
|
|
|
|
$article -> Current Medline article in XML data (XML::Twig directory) |
|
2153
|
|
|
|
|
|
|
|
|
2154
|
|
|
|
|
|
|
Output: |
|
2155
|
|
|
|
|
|
|
|
|
2156
|
|
|
|
|
|
|
$date -> 'XX/XX/XXXX' (Month/Day/Year) |
|
2157
|
|
|
|
|
|
|
|
|
2158
|
|
|
|
|
|
|
Example: |
|
2159
|
|
|
|
|
|
|
|
|
2160
|
|
|
|
|
|
|
Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. |
|
2161
|
|
|
|
|
|
|
|
|
2162
|
|
|
|
|
|
|
=head3 _ParseArticle |
|
2163
|
|
|
|
|
|
|
|
|
2164
|
|
|
|
|
|
|
Description: |
|
2165
|
|
|
|
|
|
|
|
|
2166
|
|
|
|
|
|
|
Parses 'Article' tag data in Medline XML file. Fetches 'ArticleTitle', 'Journal' and 'Abstract' XML tags. |
|
2167
|
|
|
|
|
|
|
|
|
2168
|
|
|
|
|
|
|
Input: |
|
2169
|
|
|
|
|
|
|
|
|
2170
|
|
|
|
|
|
|
$article -> Current Medline article in XML data (XML::Twig directory) |
|
2171
|
|
|
|
|
|
|
|
|
2172
|
|
|
|
|
|
|
Output: |
|
2173
|
|
|
|
|
|
|
|
|
2174
|
|
|
|
|
|
|
None |
|
2175
|
|
|
|
|
|
|
|
|
2176
|
|
|
|
|
|
|
Example: |
|
2177
|
|
|
|
|
|
|
|
|
2178
|
|
|
|
|
|
|
Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. |
|
2179
|
|
|
|
|
|
|
|
|
2180
|
|
|
|
|
|
|
=head3 _ParseJournal |
|
2181
|
|
|
|
|
|
|
|
|
2182
|
|
|
|
|
|
|
Description: |
|
2183
|
|
|
|
|
|
|
|
|
2184
|
|
|
|
|
|
|
Parses 'Journal' tag data in Medline XML file. Fetches 'Title' XML tag. |
|
2185
|
|
|
|
|
|
|
|
|
2186
|
|
|
|
|
|
|
Input: |
|
2187
|
|
|
|
|
|
|
|
|
2188
|
|
|
|
|
|
|
$journalRoot -> Current Medline journal directory in XML data (XML::Twig directory) |
|
2189
|
|
|
|
|
|
|
|
|
2190
|
|
|
|
|
|
|
Output: |
|
2191
|
|
|
|
|
|
|
|
|
2192
|
|
|
|
|
|
|
None |
|
2193
|
|
|
|
|
|
|
|
|
2194
|
|
|
|
|
|
|
Example: |
|
2195
|
|
|
|
|
|
|
|
|
2196
|
|
|
|
|
|
|
Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. |
|
2197
|
|
|
|
|
|
|
|
|
2198
|
|
|
|
|
|
|
=head3 _ParseOtherAbstract |
|
2199
|
|
|
|
|
|
|
|
|
2200
|
|
|
|
|
|
|
Description: |
|
2201
|
|
|
|
|
|
|
|
|
2202
|
|
|
|
|
|
|
Parses 'Abstract' tag data in Medline XML file. Fetches 'AbstractText' XML tag. |
|
2203
|
|
|
|
|
|
|
|
|
2204
|
|
|
|
|
|
|
Input: |
|
2205
|
|
|
|
|
|
|
|
|
2206
|
|
|
|
|
|
|
$abstractRoot -> Current Medline abstract directory in XML data (XML::Twig directory) |
|
2207
|
|
|
|
|
|
|
|
|
2208
|
|
|
|
|
|
|
Output: |
|
2209
|
|
|
|
|
|
|
|
|
2210
|
|
|
|
|
|
|
None |
|
2211
|
|
|
|
|
|
|
|
|
2212
|
|
|
|
|
|
|
Example: |
|
2213
|
|
|
|
|
|
|
|
|
2214
|
|
|
|
|
|
|
Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. |
|
2215
|
|
|
|
|
|
|
|
|
2216
|
|
|
|
|
|
|
=head3 _QuickParseDateCreated |
|
2217
|
|
|
|
|
|
|
|
|
2218
|
|
|
|
|
|
|
Description: |
|
2219
|
|
|
|
|
|
|
|
|
2220
|
|
|
|
|
|
|
Parses 'DateCreated' tag data in Medline XML file. Used when 'QuickParse' member variable is enabled. Sets $tempDate member variable to parsed 'DateCreated' tag data. |
|
2221
|
|
|
|
|
|
|
|
|
2222
|
|
|
|
|
|
|
Input: |
|
2223
|
|
|
|
|
|
|
|
|
2224
|
|
|
|
|
|
|
$twigHandler -> 'XML::Twig' handler |
|
2225
|
|
|
|
|
|
|
$article -> Current Medline article directory in XML data (XML::Twig directory) |
|
2226
|
|
|
|
|
|
|
|
|
2227
|
|
|
|
|
|
|
Output: |
|
2228
|
|
|
|
|
|
|
|
|
2229
|
|
|
|
|
|
|
None |
|
2230
|
|
|
|
|
|
|
|
|
2231
|
|
|
|
|
|
|
Example: |
|
2232
|
|
|
|
|
|
|
|
|
2233
|
|
|
|
|
|
|
Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. |
|
2234
|
|
|
|
|
|
|
|
|
2235
|
|
|
|
|
|
|
=head3 _QuickParseJournal |
|
2236
|
|
|
|
|
|
|
|
|
2237
|
|
|
|
|
|
|
Description: |
|
2238
|
|
|
|
|
|
|
|
|
2239
|
|
|
|
|
|
|
Parses 'Journal' tag data in Medline XML file. Fetches 'Title' XML tag. Used when 'QuickParse' member variable is enabled. |
|
2240
|
|
|
|
|
|
|
Sets $tempStr to parsed data and stores in text corpus. |
|
2241
|
|
|
|
|
|
|
|
|
2242
|
|
|
|
|
|
|
Input: |
|
2243
|
|
|
|
|
|
|
|
|
2244
|
|
|
|
|
|
|
$twigHandler -> 'XML::Twig' handler. |
|
2245
|
|
|
|
|
|
|
$journalRoot -> Current Medline journal directory in XML data (XML::Twig directory) |
|
2246
|
|
|
|
|
|
|
|
|
2247
|
|
|
|
|
|
|
Output: |
|
2248
|
|
|
|
|
|
|
|
|
2249
|
|
|
|
|
|
|
None |
|
2250
|
|
|
|
|
|
|
|
|
2251
|
|
|
|
|
|
|
Example: |
|
2252
|
|
|
|
|
|
|
|
|
2253
|
|
|
|
|
|
|
Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. |
|
2254
|
|
|
|
|
|
|
|
|
2255
|
|
|
|
|
|
|
=head3 _QuickParseArticle |
|
2256
|
|
|
|
|
|
|
|
|
2257
|
|
|
|
|
|
|
Description: |
|
2258
|
|
|
|
|
|
|
|
|
2259
|
|
|
|
|
|
|
Parses 'Article' tag data in Medline XML file. Fetches 'ArticleTitle' and 'Abstract' XML tags. Used when 'QuickParse' member variable is enabled. |
|
2260
|
|
|
|
|
|
|
Sets $tempStr to parsed data and stores in text corpus. |
|
2261
|
|
|
|
|
|
|
|
|
2262
|
|
|
|
|
|
|
Input: |
|
2263
|
|
|
|
|
|
|
|
|
2264
|
|
|
|
|
|
|
$twigHandler -> 'XML::Twig' handler. |
|
2265
|
|
|
|
|
|
|
$article -> Current Medline article directory in XML data (XML::Twig directory) |
|
2266
|
|
|
|
|
|
|
|
|
2267
|
|
|
|
|
|
|
Output: |
|
2268
|
|
|
|
|
|
|
|
|
2269
|
|
|
|
|
|
|
None |
|
2270
|
|
|
|
|
|
|
|
|
2271
|
|
|
|
|
|
|
Example: |
|
2272
|
|
|
|
|
|
|
|
|
2273
|
|
|
|
|
|
|
Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. |
|
2274
|
|
|
|
|
|
|
|
|
2275
|
|
|
|
|
|
|
=head3 _QuickParseOtherAbstract |
|
2276
|
|
|
|
|
|
|
|
|
2277
|
|
|
|
|
|
|
Description: |
|
2278
|
|
|
|
|
|
|
|
|
2279
|
|
|
|
|
|
|
Parses 'Abstract' tag data in Medline XML file. Fetches 'AbstractText' XML tag. Used when 'QuickParse' member variable is enabled. |
|
2280
|
|
|
|
|
|
|
Sets $tempStr to parsed data and stores in text corpus. |
|
2281
|
|
|
|
|
|
|
|
|
2282
|
|
|
|
|
|
|
Input: |
|
2283
|
|
|
|
|
|
|
|
|
2284
|
|
|
|
|
|
|
$twigHandler -> 'XML::Twig' handler. |
|
2285
|
|
|
|
|
|
|
$anstractRoot -> Current Medline abstract directory in XML data (XML::Twig directory) |
|
2286
|
|
|
|
|
|
|
|
|
2287
|
|
|
|
|
|
|
Output: |
|
2288
|
|
|
|
|
|
|
|
|
2289
|
|
|
|
|
|
|
None |
|
2290
|
|
|
|
|
|
|
|
|
2291
|
|
|
|
|
|
|
Example: |
|
2292
|
|
|
|
|
|
|
|
|
2293
|
|
|
|
|
|
|
Warning: This is a private function and is called by xmltow2v's XML::Twig handler. It should not be called outside of xmltow2v module. |
|
2294
|
|
|
|
|
|
|
|
|
2295
|
|
|
|
|
|
|
=head3 CreateCompoundWordBST |
|
2296
|
|
|
|
|
|
|
|
|
2297
|
|
|
|
|
|
|
Description: |
|
2298
|
|
|
|
|
|
|
|
|
2299
|
|
|
|
|
|
|
Creates a binary search tree using compound word data in memory and stores root node. This also clears the compound word array afterwards. |
|
2300
|
|
|
|
|
|
|
|
|
2301
|
|
|
|
|
|
|
Warning: Compound word file must be loaded into memory using ReadCompoundWordDataFromFile() prior to calling this method. This function |
|
2302
|
|
|
|
|
|
|
will also delete the compound word array upon completion as it will no longer be necessary. |
|
2303
|
|
|
|
|
|
|
|
|
2304
|
|
|
|
|
|
|
Input: |
|
2305
|
|
|
|
|
|
|
|
|
2306
|
|
|
|
|
|
|
None |
|
2307
|
|
|
|
|
|
|
|
|
2308
|
|
|
|
|
|
|
Output: |
|
2309
|
|
|
|
|
|
|
|
|
2310
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful |
|
2311
|
|
|
|
|
|
|
|
|
2312
|
|
|
|
|
|
|
Example: |
|
2313
|
|
|
|
|
|
|
|
|
2314
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2315
|
|
|
|
|
|
|
|
|
2316
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2317
|
|
|
|
|
|
|
$xmlconv->ReadCompoundWordDataFromFile( "samples/compoundword.txt" ); |
|
2318
|
|
|
|
|
|
|
$xmlconv->CreateCompoundWordBST(); |
|
2319
|
|
|
|
|
|
|
|
|
2320
|
|
|
|
|
|
|
=head3 CompoundifyString |
|
2321
|
|
|
|
|
|
|
|
|
2322
|
|
|
|
|
|
|
Description: |
|
2323
|
|
|
|
|
|
|
|
|
2324
|
|
|
|
|
|
|
Compoundifies string parameter based on compound word data in memory using the compound word binary search tree. |
|
2325
|
|
|
|
|
|
|
|
|
2326
|
|
|
|
|
|
|
Warning: Compound word file must be loaded into memory using ReadCompoundWordDataFromFile() prior to calling this method. |
|
2327
|
|
|
|
|
|
|
|
|
2328
|
|
|
|
|
|
|
Input: |
|
2329
|
|
|
|
|
|
|
|
|
2330
|
|
|
|
|
|
|
$string -> String to compoundify |
|
2331
|
|
|
|
|
|
|
|
|
2332
|
|
|
|
|
|
|
Output: |
|
2333
|
|
|
|
|
|
|
|
|
2334
|
|
|
|
|
|
|
$string -> Compounded string or "(null)" if string parameter is not defined. |
|
2335
|
|
|
|
|
|
|
|
|
2336
|
|
|
|
|
|
|
Example: |
|
2337
|
|
|
|
|
|
|
|
|
2338
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2339
|
|
|
|
|
|
|
|
|
2340
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2341
|
|
|
|
|
|
|
$xmlconv->ReadCompoundWordDataFromFile( "samples/compoundword.txt" ); |
|
2342
|
|
|
|
|
|
|
$xmlconv->CreateCompoundWordBST(); |
|
2343
|
|
|
|
|
|
|
my $compoundedString = $xmlconv->CompoundifyString( "String to compoundify" ); |
|
2344
|
|
|
|
|
|
|
print( "Compounded String: $compoundedString\n" ); |
|
2345
|
|
|
|
|
|
|
|
|
2346
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2347
|
|
|
|
|
|
|
|
|
2348
|
|
|
|
|
|
|
=head3 _CompoundifySearch |
|
2349
|
|
|
|
|
|
|
|
|
2350
|
|
|
|
|
|
|
Description: |
|
2351
|
|
|
|
|
|
|
|
|
2352
|
|
|
|
|
|
|
Recursive method used by CompoundifyString() to fetch compound word data in binary search tree. |
|
2353
|
|
|
|
|
|
|
|
|
2354
|
|
|
|
|
|
|
Warning: This function requires specific parameters and should not be called outside of CompoundifyString() method. |
|
2355
|
|
|
|
|
|
|
|
|
2356
|
|
|
|
|
|
|
Input: |
|
2357
|
|
|
|
|
|
|
|
|
2358
|
|
|
|
|
|
|
$stringArrayRef -> Array reference containing string data |
|
2359
|
|
|
|
|
|
|
$oldNode -> Last 'Word2vec::Node' data match was found |
|
2360
|
|
|
|
|
|
|
$searchStr -> Search phrase |
|
2361
|
|
|
|
|
|
|
$index -> Current string array index |
|
2362
|
|
|
|
|
|
|
|
|
2363
|
|
|
|
|
|
|
Output: |
|
2364
|
|
|
|
|
|
|
|
|
2365
|
|
|
|
|
|
|
Word2vec::Node -> Last node containing positive search phrase match |
|
2366
|
|
|
|
|
|
|
|
|
2367
|
|
|
|
|
|
|
Example: |
|
2368
|
|
|
|
|
|
|
|
|
2369
|
|
|
|
|
|
|
Warning: This is a private function and is called by 'CompoundifyString()'. It should not be called outside of xmltow2v module. |
|
2370
|
|
|
|
|
|
|
|
|
2371
|
|
|
|
|
|
|
=head3 ReadCompoundWordDataFromFile |
|
2372
|
|
|
|
|
|
|
|
|
2373
|
|
|
|
|
|
|
Description: |
|
2374
|
|
|
|
|
|
|
|
|
2375
|
|
|
|
|
|
|
Reads compound word file and stores in memory. $autoSetMaxCompWordLength parameter is not required to be set. This |
|
2376
|
|
|
|
|
|
|
parameter instructs the method to auto set the maximum compound word length dependent on the longest compound word found. |
|
2377
|
|
|
|
|
|
|
|
|
2378
|
|
|
|
|
|
|
Note: $autoSetMaxCompWordLength options: defined = True and Undefined = False. |
|
2379
|
|
|
|
|
|
|
|
|
2380
|
|
|
|
|
|
|
Input: |
|
2381
|
|
|
|
|
|
|
|
|
2382
|
|
|
|
|
|
|
$filePath -> Compound word file path |
|
2383
|
|
|
|
|
|
|
$autoSetMaxCompWordLength -> Maximum length of a given compoundified phrase the module's compoundify algorithm will permit. |
|
2384
|
|
|
|
|
|
|
|
|
2385
|
|
|
|
|
|
|
Note: Calling this method with $autoSetMaxCompWordLength defined will automatically set the maxCompoundWordLength variable to the longest compound phrase. |
|
2386
|
|
|
|
|
|
|
|
|
2387
|
|
|
|
|
|
|
Output: |
|
2388
|
|
|
|
|
|
|
|
|
2389
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful |
|
2390
|
|
|
|
|
|
|
|
|
2391
|
|
|
|
|
|
|
Example: |
|
2392
|
|
|
|
|
|
|
|
|
2393
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2394
|
|
|
|
|
|
|
|
|
2395
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2396
|
|
|
|
|
|
|
$xmlconv->ReadCompoundWordDataFromFile( "samples/compoundword.txt", 1 ); |
|
2397
|
|
|
|
|
|
|
|
|
2398
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2399
|
|
|
|
|
|
|
|
|
2400
|
|
|
|
|
|
|
=head3 SaveCompoundWordListToFile |
|
2401
|
|
|
|
|
|
|
|
|
2402
|
|
|
|
|
|
|
Description: |
|
2403
|
|
|
|
|
|
|
|
|
2404
|
|
|
|
|
|
|
Saves compound word data in memory to a specified file location. |
|
2405
|
|
|
|
|
|
|
|
|
2406
|
|
|
|
|
|
|
Input: |
|
2407
|
|
|
|
|
|
|
|
|
2408
|
|
|
|
|
|
|
$savePath -> Path to save compound word list to file. |
|
2409
|
|
|
|
|
|
|
|
|
2410
|
|
|
|
|
|
|
Output: |
|
2411
|
|
|
|
|
|
|
|
|
2412
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful |
|
2413
|
|
|
|
|
|
|
|
|
2414
|
|
|
|
|
|
|
Example: |
|
2415
|
|
|
|
|
|
|
|
|
2416
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2417
|
|
|
|
|
|
|
|
|
2418
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2419
|
|
|
|
|
|
|
$xmlconv->ReadCompoundWordDataFromFile( "samples/compoundword.txt" ); |
|
2420
|
|
|
|
|
|
|
$xmlconv->SaveCompoundWordDataFromFile( "samples/newcompoundword.txt" ); |
|
2421
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2422
|
|
|
|
|
|
|
|
|
2423
|
|
|
|
|
|
|
=head3 ReadTextFromFile |
|
2424
|
|
|
|
|
|
|
|
|
2425
|
|
|
|
|
|
|
Description: |
|
2426
|
|
|
|
|
|
|
|
|
2427
|
|
|
|
|
|
|
Reads a plain text file with utf8 encoding in memory. Returns string data if successful and "(null)" if unsuccessful. |
|
2428
|
|
|
|
|
|
|
|
|
2429
|
|
|
|
|
|
|
Input: |
|
2430
|
|
|
|
|
|
|
|
|
2431
|
|
|
|
|
|
|
$filePath -> Text file to read into memory |
|
2432
|
|
|
|
|
|
|
|
|
2433
|
|
|
|
|
|
|
Output: |
|
2434
|
|
|
|
|
|
|
|
|
2435
|
|
|
|
|
|
|
$string -> String data if successful or "(null)" if un-successful. |
|
2436
|
|
|
|
|
|
|
|
|
2437
|
|
|
|
|
|
|
Example: |
|
2438
|
|
|
|
|
|
|
|
|
2439
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2440
|
|
|
|
|
|
|
|
|
2441
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2442
|
|
|
|
|
|
|
my $textData = $xmlconv->ReadTextFromFile( "samples/textcorpus.txt" ); |
|
2443
|
|
|
|
|
|
|
print( "Text Data: $textData\n" ); |
|
2444
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2445
|
|
|
|
|
|
|
|
|
2446
|
|
|
|
|
|
|
=head3 SaveTextToFile |
|
2447
|
|
|
|
|
|
|
|
|
2448
|
|
|
|
|
|
|
Description: |
|
2449
|
|
|
|
|
|
|
|
|
2450
|
|
|
|
|
|
|
Saves a plain text file with utf8 encoding in a specified location. |
|
2451
|
|
|
|
|
|
|
|
|
2452
|
|
|
|
|
|
|
Input: |
|
2453
|
|
|
|
|
|
|
|
|
2454
|
|
|
|
|
|
|
$savePath -> Path to save string data. |
|
2455
|
|
|
|
|
|
|
$string -> String to save |
|
2456
|
|
|
|
|
|
|
|
|
2457
|
|
|
|
|
|
|
Output: |
|
2458
|
|
|
|
|
|
|
|
|
2459
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful |
|
2460
|
|
|
|
|
|
|
|
|
2461
|
|
|
|
|
|
|
Example: |
|
2462
|
|
|
|
|
|
|
|
|
2463
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2464
|
|
|
|
|
|
|
|
|
2465
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2466
|
|
|
|
|
|
|
my $result = $xmlconv->SaveTextToFile( "text.txt", "Hello world!" ); |
|
2467
|
|
|
|
|
|
|
|
|
2468
|
|
|
|
|
|
|
print( "File saved\n" ) if $result == 0; |
|
2469
|
|
|
|
|
|
|
print( "File unable to save\n" ) if $result == -1; |
|
2470
|
|
|
|
|
|
|
|
|
2471
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2472
|
|
|
|
|
|
|
|
|
2473
|
|
|
|
|
|
|
=head3 _ReadXMLDataFromFile |
|
2474
|
|
|
|
|
|
|
|
|
2475
|
|
|
|
|
|
|
Description: |
|
2476
|
|
|
|
|
|
|
|
|
2477
|
|
|
|
|
|
|
Reads an XML file from a specified location. Returns string in memory if successful and "(null)" if unsuccessful. |
|
2478
|
|
|
|
|
|
|
|
|
2479
|
|
|
|
|
|
|
Input: |
|
2480
|
|
|
|
|
|
|
|
|
2481
|
|
|
|
|
|
|
$filePath -> File to read given path |
|
2482
|
|
|
|
|
|
|
|
|
2483
|
|
|
|
|
|
|
Output: |
|
2484
|
|
|
|
|
|
|
|
|
2485
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful |
|
2486
|
|
|
|
|
|
|
|
|
2487
|
|
|
|
|
|
|
Example: |
|
2488
|
|
|
|
|
|
|
|
|
2489
|
|
|
|
|
|
|
Warning: This is a private function and is called by XML::Twig parsing functions. It should not be called outside of xmltow2v module. |
|
2490
|
|
|
|
|
|
|
|
|
2491
|
|
|
|
|
|
|
=head3 _SaveTextCorpusToFile |
|
2492
|
|
|
|
|
|
|
|
|
2493
|
|
|
|
|
|
|
Description: |
|
2494
|
|
|
|
|
|
|
|
|
2495
|
|
|
|
|
|
|
Saves text corpus data to specified file path. This method will append to any existing file if $appendToFile parameter |
|
2496
|
|
|
|
|
|
|
is defined or "overwrite" option is disabled. Enabling "overwrite" option will overwrite any existing files. |
|
2497
|
|
|
|
|
|
|
|
|
2498
|
|
|
|
|
|
|
Input: |
|
2499
|
|
|
|
|
|
|
|
|
2500
|
|
|
|
|
|
|
$savePath -> Path to save the text corpus |
|
2501
|
|
|
|
|
|
|
$appendToFile -> Specifies whether the module will overwrite any existing data or append to existing text corpus data. |
|
2502
|
|
|
|
|
|
|
|
|
2503
|
|
|
|
|
|
|
Note: Leaving this variable undefined will fetch the "Overwrite" member variable and set the value to this parameter. |
|
2504
|
|
|
|
|
|
|
|
|
2505
|
|
|
|
|
|
|
Output: |
|
2506
|
|
|
|
|
|
|
|
|
2507
|
|
|
|
|
|
|
$value -> '0' = Successful / '-1' = Un-successful |
|
2508
|
|
|
|
|
|
|
|
|
2509
|
|
|
|
|
|
|
Example: |
|
2510
|
|
|
|
|
|
|
|
|
2511
|
|
|
|
|
|
|
Warning: This is a private function and is called by XML::Twig parsing functions. It should not be called outside of xmltow2v module. |
|
2512
|
|
|
|
|
|
|
|
|
2513
|
|
|
|
|
|
|
=head3 IsDateInSpecifiedRange |
|
2514
|
|
|
|
|
|
|
|
|
2515
|
|
|
|
|
|
|
Description: |
|
2516
|
|
|
|
|
|
|
|
|
2517
|
|
|
|
|
|
|
Checks to see if $date is within $beginDate and $endDate range. Returns 1 if true and 0 if false. |
|
2518
|
|
|
|
|
|
|
|
|
2519
|
|
|
|
|
|
|
Note: Date Format: XX/XX/XXXX (Month/Day/Year) |
|
2520
|
|
|
|
|
|
|
|
|
2521
|
|
|
|
|
|
|
Input: |
|
2522
|
|
|
|
|
|
|
|
|
2523
|
|
|
|
|
|
|
$date -> Date to check against minimum and maximum data range. (String) |
|
2524
|
|
|
|
|
|
|
$beginDate -> Minimum date range (String) |
|
2525
|
|
|
|
|
|
|
$endDate -> Maximum date range (String) |
|
2526
|
|
|
|
|
|
|
|
|
2527
|
|
|
|
|
|
|
Output: |
|
2528
|
|
|
|
|
|
|
|
|
2529
|
|
|
|
|
|
|
$value -> '1' = True/Date is within specified range Or '0' = False/Date is not within specified range. |
|
2530
|
|
|
|
|
|
|
|
|
2531
|
|
|
|
|
|
|
Example: |
|
2532
|
|
|
|
|
|
|
|
|
2533
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2534
|
|
|
|
|
|
|
|
|
2535
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2536
|
|
|
|
|
|
|
print( "Is \"01/01/2004\" within the date range: \"02/21/1985\" to \"08/13/2016\"?\n" ); |
|
2537
|
|
|
|
|
|
|
print( "Yes\n" ) if $xmlconv->IsDateInSpecifiedRange( "01/01/2004", "02/21/1985", "08/13/2016" ) == 1; |
|
2538
|
|
|
|
|
|
|
print( "No\n" ) if $xmlconv->IsDateInSpecifiedRange( "01/01/2004", "02/21/1985", "08/13/2016" ) == 0; |
|
2539
|
|
|
|
|
|
|
|
|
2540
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2541
|
|
|
|
|
|
|
|
|
2542
|
|
|
|
|
|
|
=head3 IsFileOrDirectory |
|
2543
|
|
|
|
|
|
|
|
|
2544
|
|
|
|
|
|
|
Description: |
|
2545
|
|
|
|
|
|
|
|
|
2546
|
|
|
|
|
|
|
Checks to see if specified path is a file or directory. |
|
2547
|
|
|
|
|
|
|
|
|
2548
|
|
|
|
|
|
|
Input: |
|
2549
|
|
|
|
|
|
|
|
|
2550
|
|
|
|
|
|
|
$path -> File or directory path. (String) |
|
2551
|
|
|
|
|
|
|
|
|
2552
|
|
|
|
|
|
|
Output: |
|
2553
|
|
|
|
|
|
|
|
|
2554
|
|
|
|
|
|
|
$string -> Returns: "file" = file, "dir" = directory and "unknown" if the path is not a file or directory (undefined). |
|
2555
|
|
|
|
|
|
|
|
|
2556
|
|
|
|
|
|
|
Example: |
|
2557
|
|
|
|
|
|
|
|
|
2558
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2559
|
|
|
|
|
|
|
|
|
2560
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2561
|
|
|
|
|
|
|
my $path = "path/to/a/directory"; |
|
2562
|
|
|
|
|
|
|
|
|
2563
|
|
|
|
|
|
|
print( "Is \"$path\" a file or directory? " . $xmlconv->IsFileOrDirectory( $path ) . "\n" ); |
|
2564
|
|
|
|
|
|
|
|
|
2565
|
|
|
|
|
|
|
$path = "path/to/a/file.file"; |
|
2566
|
|
|
|
|
|
|
|
|
2567
|
|
|
|
|
|
|
print( "Is \"$path\" a file or directory? " . $xmlconv->IsFileOrDirectory( $path ) . "\n" ); |
|
2568
|
|
|
|
|
|
|
|
|
2569
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2570
|
|
|
|
|
|
|
|
|
2571
|
|
|
|
|
|
|
=head3 RemoveSpecialCharactersFromString |
|
2572
|
|
|
|
|
|
|
|
|
2573
|
|
|
|
|
|
|
Description: |
|
2574
|
|
|
|
|
|
|
|
|
2575
|
|
|
|
|
|
|
Removes special characters from string parameter, removes extra spaces and converts text to lowercase. |
|
2576
|
|
|
|
|
|
|
|
|
2577
|
|
|
|
|
|
|
Note: This method is called when parsing and compiling Medline title/abstract data. |
|
2578
|
|
|
|
|
|
|
|
|
2579
|
|
|
|
|
|
|
Input: |
|
2580
|
|
|
|
|
|
|
|
|
2581
|
|
|
|
|
|
|
$string -> String passed to remove special characters from and convert to lowercase. |
|
2582
|
|
|
|
|
|
|
|
|
2583
|
|
|
|
|
|
|
Output: |
|
2584
|
|
|
|
|
|
|
|
|
2585
|
|
|
|
|
|
|
$string -> String with all special characters removed and converted to lowercase. |
|
2586
|
|
|
|
|
|
|
|
|
2587
|
|
|
|
|
|
|
Example: |
|
2588
|
|
|
|
|
|
|
|
|
2589
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2590
|
|
|
|
|
|
|
|
|
2591
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2592
|
|
|
|
|
|
|
|
|
2593
|
|
|
|
|
|
|
my $str = "Heart Attack is$ an!@ also KNOWN as an Acute MYOCARDIAL inFARCTion!"; |
|
2594
|
|
|
|
|
|
|
|
|
2595
|
|
|
|
|
|
|
print( "Original String: $str\n" ); |
|
2596
|
|
|
|
|
|
|
|
|
2597
|
|
|
|
|
|
|
$str = $xmlconv->RemoveSpecialCharactersFromString( $str ); |
|
2598
|
|
|
|
|
|
|
|
|
2599
|
|
|
|
|
|
|
print( "Modified String: $str\n" ); |
|
2600
|
|
|
|
|
|
|
|
|
2601
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2602
|
|
|
|
|
|
|
|
|
2603
|
|
|
|
|
|
|
=head3 GetFileType |
|
2604
|
|
|
|
|
|
|
|
|
2605
|
|
|
|
|
|
|
Description: |
|
2606
|
|
|
|
|
|
|
|
|
2607
|
|
|
|
|
|
|
Returns file data type (string). |
|
2608
|
|
|
|
|
|
|
|
|
2609
|
|
|
|
|
|
|
Input: |
|
2610
|
|
|
|
|
|
|
|
|
2611
|
|
|
|
|
|
|
$filePath -> File to check located at file path |
|
2612
|
|
|
|
|
|
|
|
|
2613
|
|
|
|
|
|
|
Output: |
|
2614
|
|
|
|
|
|
|
|
|
2615
|
|
|
|
|
|
|
$string -> File type |
|
2616
|
|
|
|
|
|
|
|
|
2617
|
|
|
|
|
|
|
Example: |
|
2618
|
|
|
|
|
|
|
|
|
2619
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2620
|
|
|
|
|
|
|
|
|
2621
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new() |
|
2622
|
|
|
|
|
|
|
my $fileType = $xmlconv->GetFileType( "samples/textcorpus.txt" ); |
|
2623
|
|
|
|
|
|
|
|
|
2624
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2625
|
|
|
|
|
|
|
|
|
2626
|
|
|
|
|
|
|
=head3 _DateCheck |
|
2627
|
|
|
|
|
|
|
|
|
2628
|
|
|
|
|
|
|
Description: |
|
2629
|
|
|
|
|
|
|
|
|
2630
|
|
|
|
|
|
|
Checks specified begin and end date strings for formatting and logic errors. |
|
2631
|
|
|
|
|
|
|
|
|
2632
|
|
|
|
|
|
|
Input: |
|
2633
|
|
|
|
|
|
|
|
|
2634
|
|
|
|
|
|
|
None |
|
2635
|
|
|
|
|
|
|
|
|
2636
|
|
|
|
|
|
|
Output: |
|
2637
|
|
|
|
|
|
|
|
|
2638
|
|
|
|
|
|
|
$value -> "0" = Passed Checks / "-1" = Failed Checks |
|
2639
|
|
|
|
|
|
|
|
|
2640
|
|
|
|
|
|
|
Example: |
|
2641
|
|
|
|
|
|
|
|
|
2642
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2643
|
|
|
|
|
|
|
|
|
2644
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new() |
|
2645
|
|
|
|
|
|
|
print "Passed Date Checks\n" if ( $xmlconv->_DateCheck() == 0 ); |
|
2646
|
|
|
|
|
|
|
print "Failed Date Checks\n" if ( $xmlconv->_DateCheck() == -1 ); |
|
2647
|
|
|
|
|
|
|
|
|
2648
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2649
|
|
|
|
|
|
|
|
|
2650
|
|
|
|
|
|
|
=head2 Accessor Functions |
|
2651
|
|
|
|
|
|
|
|
|
2652
|
|
|
|
|
|
|
=head3 GetDebugLog |
|
2653
|
|
|
|
|
|
|
|
|
2654
|
|
|
|
|
|
|
Description: |
|
2655
|
|
|
|
|
|
|
|
|
2656
|
|
|
|
|
|
|
Returns the _debugLog member variable set during Word2vec::Xmltow2v object initialization of new function. |
|
2657
|
|
|
|
|
|
|
|
|
2658
|
|
|
|
|
|
|
Input: |
|
2659
|
|
|
|
|
|
|
|
|
2660
|
|
|
|
|
|
|
None |
|
2661
|
|
|
|
|
|
|
|
|
2662
|
|
|
|
|
|
|
Output: |
|
2663
|
|
|
|
|
|
|
|
|
2664
|
|
|
|
|
|
|
$value -> '0' = False, '1' = True |
|
2665
|
|
|
|
|
|
|
|
|
2666
|
|
|
|
|
|
|
Example: |
|
2667
|
|
|
|
|
|
|
|
|
2668
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2669
|
|
|
|
|
|
|
|
|
2670
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new() |
|
2671
|
|
|
|
|
|
|
my $debugLog = $xmlconv->GetDebugLog(); |
|
2672
|
|
|
|
|
|
|
|
|
2673
|
|
|
|
|
|
|
print( "Debug Logging Enabled\n" ) if $debugLog == 1; |
|
2674
|
|
|
|
|
|
|
print( "Debug Logging Disabled\n" ) if $debugLog == 0; |
|
2675
|
|
|
|
|
|
|
|
|
2676
|
|
|
|
|
|
|
|
|
2677
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2678
|
|
|
|
|
|
|
|
|
2679
|
|
|
|
|
|
|
=head3 GetWriteLog |
|
2680
|
|
|
|
|
|
|
|
|
2681
|
|
|
|
|
|
|
Description: |
|
2682
|
|
|
|
|
|
|
|
|
2683
|
|
|
|
|
|
|
Returns the _writeLog member variable set during Word2vec::Xmltow2v object initialization of new function. |
|
2684
|
|
|
|
|
|
|
|
|
2685
|
|
|
|
|
|
|
Input: |
|
2686
|
|
|
|
|
|
|
|
|
2687
|
|
|
|
|
|
|
None |
|
2688
|
|
|
|
|
|
|
|
|
2689
|
|
|
|
|
|
|
Output: |
|
2690
|
|
|
|
|
|
|
|
|
2691
|
|
|
|
|
|
|
$value -> '0' = False, '1' = True |
|
2692
|
|
|
|
|
|
|
|
|
2693
|
|
|
|
|
|
|
Example: |
|
2694
|
|
|
|
|
|
|
|
|
2695
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2696
|
|
|
|
|
|
|
|
|
2697
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2698
|
|
|
|
|
|
|
my $writeLog = $xmlconv->GetWriteLog(); |
|
2699
|
|
|
|
|
|
|
|
|
2700
|
|
|
|
|
|
|
print( "Write Logging Enabled\n" ) if $writeLog == 1; |
|
2701
|
|
|
|
|
|
|
print( "Write Logging Disabled\n" ) if $writeLog == 0; |
|
2702
|
|
|
|
|
|
|
|
|
2703
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2704
|
|
|
|
|
|
|
|
|
2705
|
|
|
|
|
|
|
=head3 GetStoreTitle |
|
2706
|
|
|
|
|
|
|
|
|
2707
|
|
|
|
|
|
|
Description: |
|
2708
|
|
|
|
|
|
|
|
|
2709
|
|
|
|
|
|
|
Returns the _storeTitle member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
2710
|
|
|
|
|
|
|
|
|
2711
|
|
|
|
|
|
|
Input: |
|
2712
|
|
|
|
|
|
|
|
|
2713
|
|
|
|
|
|
|
None |
|
2714
|
|
|
|
|
|
|
|
|
2715
|
|
|
|
|
|
|
Output: |
|
2716
|
|
|
|
|
|
|
|
|
2717
|
|
|
|
|
|
|
$value -> '1' = True / '0' = False |
|
2718
|
|
|
|
|
|
|
|
|
2719
|
|
|
|
|
|
|
Example: |
|
2720
|
|
|
|
|
|
|
|
|
2721
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2722
|
|
|
|
|
|
|
|
|
2723
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2724
|
|
|
|
|
|
|
my $storeTitle = $xmlconv->GetStoreTitle(); |
|
2725
|
|
|
|
|
|
|
|
|
2726
|
|
|
|
|
|
|
print( "Store Title Option: Enabled\n" ) if $storeTitle == 1; |
|
2727
|
|
|
|
|
|
|
print( "Store Title Option: Disabled\n" ) if $storeTitle == 0; |
|
2728
|
|
|
|
|
|
|
|
|
2729
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2730
|
|
|
|
|
|
|
|
|
2731
|
|
|
|
|
|
|
=head3 GetStoreAbstract |
|
2732
|
|
|
|
|
|
|
|
|
2733
|
|
|
|
|
|
|
Description: |
|
2734
|
|
|
|
|
|
|
|
|
2735
|
|
|
|
|
|
|
Returns the _storeAbstract member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
2736
|
|
|
|
|
|
|
|
|
2737
|
|
|
|
|
|
|
Input: |
|
2738
|
|
|
|
|
|
|
|
|
2739
|
|
|
|
|
|
|
None |
|
2740
|
|
|
|
|
|
|
|
|
2741
|
|
|
|
|
|
|
Output: |
|
2742
|
|
|
|
|
|
|
|
|
2743
|
|
|
|
|
|
|
$value -> '1' = True / '0' = False |
|
2744
|
|
|
|
|
|
|
|
|
2745
|
|
|
|
|
|
|
Example: |
|
2746
|
|
|
|
|
|
|
|
|
2747
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2748
|
|
|
|
|
|
|
|
|
2749
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2750
|
|
|
|
|
|
|
my $storeAbstract = $xmlconv->GetStoreAbstract(); |
|
2751
|
|
|
|
|
|
|
|
|
2752
|
|
|
|
|
|
|
print( "Store Abstract Option: Enabled\n" ) if $storeAbsract == 1; |
|
2753
|
|
|
|
|
|
|
print( "Store Abstract Option: Disabled\n" ) if $storeAbstract == 0; |
|
2754
|
|
|
|
|
|
|
|
|
2755
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2756
|
|
|
|
|
|
|
|
|
2757
|
|
|
|
|
|
|
=head3 GetQuickParse |
|
2758
|
|
|
|
|
|
|
|
|
2759
|
|
|
|
|
|
|
Description: |
|
2760
|
|
|
|
|
|
|
|
|
2761
|
|
|
|
|
|
|
Returns the _quickParse member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
2762
|
|
|
|
|
|
|
|
|
2763
|
|
|
|
|
|
|
Input: |
|
2764
|
|
|
|
|
|
|
|
|
2765
|
|
|
|
|
|
|
None |
|
2766
|
|
|
|
|
|
|
|
|
2767
|
|
|
|
|
|
|
Output: |
|
2768
|
|
|
|
|
|
|
|
|
2769
|
|
|
|
|
|
|
$value -> '1' = True / '0' = False |
|
2770
|
|
|
|
|
|
|
|
|
2771
|
|
|
|
|
|
|
Example: |
|
2772
|
|
|
|
|
|
|
|
|
2773
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2774
|
|
|
|
|
|
|
|
|
2775
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2776
|
|
|
|
|
|
|
my $quickParse = $xmlconv->GetQuickParse(); |
|
2777
|
|
|
|
|
|
|
|
|
2778
|
|
|
|
|
|
|
print( "Quick Parse Option: Enabled\n" ) if $quickParse == 1; |
|
2779
|
|
|
|
|
|
|
print( "Quick Parse Option: Disabled\n" ) if $quickParse == 0; |
|
2780
|
|
|
|
|
|
|
|
|
2781
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2782
|
|
|
|
|
|
|
|
|
2783
|
|
|
|
|
|
|
=head3 GetCompoundifyText |
|
2784
|
|
|
|
|
|
|
|
|
2785
|
|
|
|
|
|
|
Description: |
|
2786
|
|
|
|
|
|
|
|
|
2787
|
|
|
|
|
|
|
Returns the _compoundifyText member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
2788
|
|
|
|
|
|
|
|
|
2789
|
|
|
|
|
|
|
Input: |
|
2790
|
|
|
|
|
|
|
|
|
2791
|
|
|
|
|
|
|
None |
|
2792
|
|
|
|
|
|
|
|
|
2793
|
|
|
|
|
|
|
Output: |
|
2794
|
|
|
|
|
|
|
|
|
2795
|
|
|
|
|
|
|
$value -> '1' = True / '0' = False |
|
2796
|
|
|
|
|
|
|
|
|
2797
|
|
|
|
|
|
|
Example: |
|
2798
|
|
|
|
|
|
|
|
|
2799
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2800
|
|
|
|
|
|
|
|
|
2801
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2802
|
|
|
|
|
|
|
my $compoundify = $xmlconv->GetCompoundifyText(); |
|
2803
|
|
|
|
|
|
|
|
|
2804
|
|
|
|
|
|
|
print( "Compoundify Text Option: Enabled\n" ) if $compoundify == 1; |
|
2805
|
|
|
|
|
|
|
print( "Compoundify Text Option: Disabled\n" ) if $compoundify == 0; |
|
2806
|
|
|
|
|
|
|
|
|
2807
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2808
|
|
|
|
|
|
|
|
|
2809
|
|
|
|
|
|
|
=head3 GetNumOfThreads |
|
2810
|
|
|
|
|
|
|
|
|
2811
|
|
|
|
|
|
|
Description: |
|
2812
|
|
|
|
|
|
|
|
|
2813
|
|
|
|
|
|
|
Returns the _numOfThreads member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
2814
|
|
|
|
|
|
|
|
|
2815
|
|
|
|
|
|
|
Input: |
|
2816
|
|
|
|
|
|
|
|
|
2817
|
|
|
|
|
|
|
None |
|
2818
|
|
|
|
|
|
|
|
|
2819
|
|
|
|
|
|
|
Output: |
|
2820
|
|
|
|
|
|
|
|
|
2821
|
|
|
|
|
|
|
$value -> Number of threads |
|
2822
|
|
|
|
|
|
|
|
|
2823
|
|
|
|
|
|
|
Example: |
|
2824
|
|
|
|
|
|
|
|
|
2825
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2826
|
|
|
|
|
|
|
|
|
2827
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2828
|
|
|
|
|
|
|
my $numOfThreads = $xmlconv->GetNumOfThreads(); |
|
2829
|
|
|
|
|
|
|
|
|
2830
|
|
|
|
|
|
|
print( "Number of threads: $numOfThreads\n" ); |
|
2831
|
|
|
|
|
|
|
|
|
2832
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2833
|
|
|
|
|
|
|
|
|
2834
|
|
|
|
|
|
|
=head3 GetWorkingDir |
|
2835
|
|
|
|
|
|
|
|
|
2836
|
|
|
|
|
|
|
Description: |
|
2837
|
|
|
|
|
|
|
|
|
2838
|
|
|
|
|
|
|
Returns the _workingDir member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
2839
|
|
|
|
|
|
|
|
|
2840
|
|
|
|
|
|
|
Input: |
|
2841
|
|
|
|
|
|
|
|
|
2842
|
|
|
|
|
|
|
None |
|
2843
|
|
|
|
|
|
|
|
|
2844
|
|
|
|
|
|
|
Output: |
|
2845
|
|
|
|
|
|
|
|
|
2846
|
|
|
|
|
|
|
$string -> Working directory string |
|
2847
|
|
|
|
|
|
|
|
|
2848
|
|
|
|
|
|
|
Example: |
|
2849
|
|
|
|
|
|
|
|
|
2850
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2851
|
|
|
|
|
|
|
|
|
2852
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2853
|
|
|
|
|
|
|
my $workingDirectory = $xmlconv->GetWorkingDir(); |
|
2854
|
|
|
|
|
|
|
|
|
2855
|
|
|
|
|
|
|
print( "Working Directory: $workingDirectory\n" ); |
|
2856
|
|
|
|
|
|
|
|
|
2857
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2858
|
|
|
|
|
|
|
|
|
2859
|
|
|
|
|
|
|
=head3 GetSavePath |
|
2860
|
|
|
|
|
|
|
|
|
2861
|
|
|
|
|
|
|
Description: |
|
2862
|
|
|
|
|
|
|
|
|
2863
|
|
|
|
|
|
|
Returns the _saveDir member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
2864
|
|
|
|
|
|
|
|
|
2865
|
|
|
|
|
|
|
Input: |
|
2866
|
|
|
|
|
|
|
|
|
2867
|
|
|
|
|
|
|
None |
|
2868
|
|
|
|
|
|
|
|
|
2869
|
|
|
|
|
|
|
Output: |
|
2870
|
|
|
|
|
|
|
|
|
2871
|
|
|
|
|
|
|
$string -> Save directory string |
|
2872
|
|
|
|
|
|
|
|
|
2873
|
|
|
|
|
|
|
Example: |
|
2874
|
|
|
|
|
|
|
|
|
2875
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2876
|
|
|
|
|
|
|
|
|
2877
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2878
|
|
|
|
|
|
|
my $savePath = $xmlconv->GetSavePath(); |
|
2879
|
|
|
|
|
|
|
|
|
2880
|
|
|
|
|
|
|
print( "Save Directory: $savePath\n" ); |
|
2881
|
|
|
|
|
|
|
|
|
2882
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2883
|
|
|
|
|
|
|
|
|
2884
|
|
|
|
|
|
|
=head3 GetBeginDate |
|
2885
|
|
|
|
|
|
|
|
|
2886
|
|
|
|
|
|
|
Description: |
|
2887
|
|
|
|
|
|
|
|
|
2888
|
|
|
|
|
|
|
Returns the _beginDate member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
2889
|
|
|
|
|
|
|
|
|
2890
|
|
|
|
|
|
|
Input: |
|
2891
|
|
|
|
|
|
|
|
|
2892
|
|
|
|
|
|
|
None |
|
2893
|
|
|
|
|
|
|
|
|
2894
|
|
|
|
|
|
|
Output: |
|
2895
|
|
|
|
|
|
|
|
|
2896
|
|
|
|
|
|
|
$date -> Beginning date range - Format: XX/XX/XXXX (Mon/Day/Year) |
|
2897
|
|
|
|
|
|
|
|
|
2898
|
|
|
|
|
|
|
Example: |
|
2899
|
|
|
|
|
|
|
|
|
2900
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2901
|
|
|
|
|
|
|
|
|
2902
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2903
|
|
|
|
|
|
|
my $date = $xmlconv->GetBeginDate(); |
|
2904
|
|
|
|
|
|
|
|
|
2905
|
|
|
|
|
|
|
print( "Date: $date\n" ); |
|
2906
|
|
|
|
|
|
|
|
|
2907
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2908
|
|
|
|
|
|
|
|
|
2909
|
|
|
|
|
|
|
=head3 GetEndDate |
|
2910
|
|
|
|
|
|
|
|
|
2911
|
|
|
|
|
|
|
Description: |
|
2912
|
|
|
|
|
|
|
|
|
2913
|
|
|
|
|
|
|
Returns the _endDate member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
2914
|
|
|
|
|
|
|
|
|
2915
|
|
|
|
|
|
|
Input: |
|
2916
|
|
|
|
|
|
|
|
|
2917
|
|
|
|
|
|
|
None |
|
2918
|
|
|
|
|
|
|
|
|
2919
|
|
|
|
|
|
|
Output: |
|
2920
|
|
|
|
|
|
|
|
|
2921
|
|
|
|
|
|
|
$date -> End date range - Format: XX/XX/XXXX (Mon/Day/Year). |
|
2922
|
|
|
|
|
|
|
|
|
2923
|
|
|
|
|
|
|
Example: |
|
2924
|
|
|
|
|
|
|
|
|
2925
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2926
|
|
|
|
|
|
|
|
|
2927
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2928
|
|
|
|
|
|
|
my $date = $xmlconv->GetEndDate(); |
|
2929
|
|
|
|
|
|
|
|
|
2930
|
|
|
|
|
|
|
print( "Date: $date\n" ); |
|
2931
|
|
|
|
|
|
|
|
|
2932
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2933
|
|
|
|
|
|
|
|
|
2934
|
|
|
|
|
|
|
=head3 GetXMLStringToParse |
|
2935
|
|
|
|
|
|
|
|
|
2936
|
|
|
|
|
|
|
Returns the XML data (string) to be parsed. |
|
2937
|
|
|
|
|
|
|
|
|
2938
|
|
|
|
|
|
|
Description: |
|
2939
|
|
|
|
|
|
|
|
|
2940
|
|
|
|
|
|
|
Returns the _xmlStringToParse member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
2941
|
|
|
|
|
|
|
|
|
2942
|
|
|
|
|
|
|
Input: |
|
2943
|
|
|
|
|
|
|
|
|
2944
|
|
|
|
|
|
|
None |
|
2945
|
|
|
|
|
|
|
|
|
2946
|
|
|
|
|
|
|
Output: |
|
2947
|
|
|
|
|
|
|
|
|
2948
|
|
|
|
|
|
|
$string -> Medline XML data string |
|
2949
|
|
|
|
|
|
|
|
|
2950
|
|
|
|
|
|
|
Example: |
|
2951
|
|
|
|
|
|
|
|
|
2952
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2953
|
|
|
|
|
|
|
|
|
2954
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2955
|
|
|
|
|
|
|
my $xmlStr = $xmlconv->GetXMLStringToParse(); |
|
2956
|
|
|
|
|
|
|
|
|
2957
|
|
|
|
|
|
|
print( "XML String: $xmlStr\n" ); |
|
2958
|
|
|
|
|
|
|
|
|
2959
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2960
|
|
|
|
|
|
|
|
|
2961
|
|
|
|
|
|
|
=head3 GetTextCorpusStr |
|
2962
|
|
|
|
|
|
|
|
|
2963
|
|
|
|
|
|
|
Description: |
|
2964
|
|
|
|
|
|
|
|
|
2965
|
|
|
|
|
|
|
Returns the _textCorpusStr member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
2966
|
|
|
|
|
|
|
|
|
2967
|
|
|
|
|
|
|
Input: |
|
2968
|
|
|
|
|
|
|
|
|
2969
|
|
|
|
|
|
|
None |
|
2970
|
|
|
|
|
|
|
|
|
2971
|
|
|
|
|
|
|
Output: |
|
2972
|
|
|
|
|
|
|
|
|
2973
|
|
|
|
|
|
|
$string -> Text corpus string |
|
2974
|
|
|
|
|
|
|
|
|
2975
|
|
|
|
|
|
|
Example: |
|
2976
|
|
|
|
|
|
|
|
|
2977
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
2978
|
|
|
|
|
|
|
|
|
2979
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
2980
|
|
|
|
|
|
|
my $str = $xmlconv->GetTextCorpusStr(); |
|
2981
|
|
|
|
|
|
|
|
|
2982
|
|
|
|
|
|
|
print( "Text Corpus: $str\n" ); |
|
2983
|
|
|
|
|
|
|
|
|
2984
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
2985
|
|
|
|
|
|
|
|
|
2986
|
|
|
|
|
|
|
=head3 GetFileHandle |
|
2987
|
|
|
|
|
|
|
|
|
2988
|
|
|
|
|
|
|
Description: |
|
2989
|
|
|
|
|
|
|
|
|
2990
|
|
|
|
|
|
|
Returns the _fileHandle member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
2991
|
|
|
|
|
|
|
|
|
2992
|
|
|
|
|
|
|
Warning: This is a private function. File handle is used by WriteLog() method. Do not manipulate this file handle as errors can result. |
|
2993
|
|
|
|
|
|
|
|
|
2994
|
|
|
|
|
|
|
Input: |
|
2995
|
|
|
|
|
|
|
|
|
2996
|
|
|
|
|
|
|
None |
|
2997
|
|
|
|
|
|
|
|
|
2998
|
|
|
|
|
|
|
Output: |
|
2999
|
|
|
|
|
|
|
|
|
3000
|
|
|
|
|
|
|
$fileHandle -> Returns file handle for WriteLog() method. |
|
3001
|
|
|
|
|
|
|
|
|
3002
|
|
|
|
|
|
|
Example: |
|
3003
|
|
|
|
|
|
|
|
|
3004
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3005
|
|
|
|
|
|
|
|
|
3006
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3007
|
|
|
|
|
|
|
my $fileHandle = $xmlconv->GetFileHandle(); |
|
3008
|
|
|
|
|
|
|
|
|
3009
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3010
|
|
|
|
|
|
|
|
|
3011
|
|
|
|
|
|
|
=head3 GetTwigHandler |
|
3012
|
|
|
|
|
|
|
|
|
3013
|
|
|
|
|
|
|
Returns XML::Twig handler. |
|
3014
|
|
|
|
|
|
|
|
|
3015
|
|
|
|
|
|
|
Description: |
|
3016
|
|
|
|
|
|
|
|
|
3017
|
|
|
|
|
|
|
Returns the _twigHandler member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
3018
|
|
|
|
|
|
|
|
|
3019
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3020
|
|
|
|
|
|
|
|
|
3021
|
|
|
|
|
|
|
Input: |
|
3022
|
|
|
|
|
|
|
|
|
3023
|
|
|
|
|
|
|
None |
|
3024
|
|
|
|
|
|
|
|
|
3025
|
|
|
|
|
|
|
Output: |
|
3026
|
|
|
|
|
|
|
|
|
3027
|
|
|
|
|
|
|
$twigHandler -> XML::Twig handler. |
|
3028
|
|
|
|
|
|
|
|
|
3029
|
|
|
|
|
|
|
Example: |
|
3030
|
|
|
|
|
|
|
|
|
3031
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3032
|
|
|
|
|
|
|
|
|
3033
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3034
|
|
|
|
|
|
|
my $xmlHandler = $xmlconv->GetTwigHandler(); |
|
3035
|
|
|
|
|
|
|
|
|
3036
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3037
|
|
|
|
|
|
|
|
|
3038
|
|
|
|
|
|
|
=head3 GetParsedCount |
|
3039
|
|
|
|
|
|
|
|
|
3040
|
|
|
|
|
|
|
Description: |
|
3041
|
|
|
|
|
|
|
|
|
3042
|
|
|
|
|
|
|
Returns the _parsedCount member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
3043
|
|
|
|
|
|
|
|
|
3044
|
|
|
|
|
|
|
Input: |
|
3045
|
|
|
|
|
|
|
|
|
3046
|
|
|
|
|
|
|
None |
|
3047
|
|
|
|
|
|
|
|
|
3048
|
|
|
|
|
|
|
Output: |
|
3049
|
|
|
|
|
|
|
|
|
3050
|
|
|
|
|
|
|
$value -> Number of parsed Medline articles. |
|
3051
|
|
|
|
|
|
|
|
|
3052
|
|
|
|
|
|
|
Example: |
|
3053
|
|
|
|
|
|
|
|
|
3054
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3055
|
|
|
|
|
|
|
|
|
3056
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3057
|
|
|
|
|
|
|
my $numOfParsed = $xmlconv->GetParsedCount(); |
|
3058
|
|
|
|
|
|
|
|
|
3059
|
|
|
|
|
|
|
print( "Number of parsed Medline articles: $numOfParsed\n" ); |
|
3060
|
|
|
|
|
|
|
|
|
3061
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3062
|
|
|
|
|
|
|
|
|
3063
|
|
|
|
|
|
|
=head3 GetTempStr |
|
3064
|
|
|
|
|
|
|
|
|
3065
|
|
|
|
|
|
|
Description: |
|
3066
|
|
|
|
|
|
|
|
|
3067
|
|
|
|
|
|
|
Returns the _tempStr member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
3068
|
|
|
|
|
|
|
|
|
3069
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. Used by module as a temporary storage |
|
3070
|
|
|
|
|
|
|
location for parsed Medline 'Title' and 'Abstract' flag string data. |
|
3071
|
|
|
|
|
|
|
|
|
3072
|
|
|
|
|
|
|
Input: |
|
3073
|
|
|
|
|
|
|
|
|
3074
|
|
|
|
|
|
|
None |
|
3075
|
|
|
|
|
|
|
|
|
3076
|
|
|
|
|
|
|
Output: |
|
3077
|
|
|
|
|
|
|
|
|
3078
|
|
|
|
|
|
|
$string -> Temporary string storage location. |
|
3079
|
|
|
|
|
|
|
|
|
3080
|
|
|
|
|
|
|
Example: |
|
3081
|
|
|
|
|
|
|
|
|
3082
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3083
|
|
|
|
|
|
|
|
|
3084
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3085
|
|
|
|
|
|
|
my $tempStr = $xmlconv->GetTempStr(); |
|
3086
|
|
|
|
|
|
|
|
|
3087
|
|
|
|
|
|
|
print( "Temp String: $tempStr\n" ); |
|
3088
|
|
|
|
|
|
|
|
|
3089
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3090
|
|
|
|
|
|
|
|
|
3091
|
|
|
|
|
|
|
=head3 GetTempDate |
|
3092
|
|
|
|
|
|
|
|
|
3093
|
|
|
|
|
|
|
Description: |
|
3094
|
|
|
|
|
|
|
|
|
3095
|
|
|
|
|
|
|
Returns the _tempDate member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
3096
|
|
|
|
|
|
|
Used by module as a temporary storage location for parsed Medline 'DateCreated' flag string data. |
|
3097
|
|
|
|
|
|
|
|
|
3098
|
|
|
|
|
|
|
Input: |
|
3099
|
|
|
|
|
|
|
|
|
3100
|
|
|
|
|
|
|
None |
|
3101
|
|
|
|
|
|
|
|
|
3102
|
|
|
|
|
|
|
Output: |
|
3103
|
|
|
|
|
|
|
|
|
3104
|
|
|
|
|
|
|
$date -> Date string - Format: XX/XX/XXXX (Mon/Day/Year). |
|
3105
|
|
|
|
|
|
|
|
|
3106
|
|
|
|
|
|
|
Example: |
|
3107
|
|
|
|
|
|
|
|
|
3108
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3109
|
|
|
|
|
|
|
|
|
3110
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3111
|
|
|
|
|
|
|
my $date = $xmlconv->GetTempDate(); |
|
3112
|
|
|
|
|
|
|
|
|
3113
|
|
|
|
|
|
|
print( "Temp Date: $date\n" ); |
|
3114
|
|
|
|
|
|
|
|
|
3115
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3116
|
|
|
|
|
|
|
|
|
3117
|
|
|
|
|
|
|
=head3 GetCompoundWordAry |
|
3118
|
|
|
|
|
|
|
|
|
3119
|
|
|
|
|
|
|
Description: |
|
3120
|
|
|
|
|
|
|
|
|
3121
|
|
|
|
|
|
|
Returns the _compoundWordAry member array reference set during Word2vec::Xmltow2v object instantiation of new function. |
|
3122
|
|
|
|
|
|
|
|
|
3123
|
|
|
|
|
|
|
Warning: Compound word data must be loaded in memory first via ReadCompoundWordDataFromFile(). |
|
3124
|
|
|
|
|
|
|
|
|
3125
|
|
|
|
|
|
|
Input: |
|
3126
|
|
|
|
|
|
|
|
|
3127
|
|
|
|
|
|
|
None |
|
3128
|
|
|
|
|
|
|
|
|
3129
|
|
|
|
|
|
|
Output: |
|
3130
|
|
|
|
|
|
|
|
|
3131
|
|
|
|
|
|
|
$arrayReference -> Compound word array reference. |
|
3132
|
|
|
|
|
|
|
|
|
3133
|
|
|
|
|
|
|
Example: |
|
3134
|
|
|
|
|
|
|
|
|
3135
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3136
|
|
|
|
|
|
|
|
|
3137
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3138
|
|
|
|
|
|
|
my $arrayReference = $xmlconv->GetCompoundWordAry(); |
|
3139
|
|
|
|
|
|
|
my @compoundWord = @{ $arrayReference }; |
|
3140
|
|
|
|
|
|
|
|
|
3141
|
|
|
|
|
|
|
print( "Compound Word Array: @compoundWord\n" ); |
|
3142
|
|
|
|
|
|
|
|
|
3143
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3144
|
|
|
|
|
|
|
|
|
3145
|
|
|
|
|
|
|
=head3 GetCompoundWordBST |
|
3146
|
|
|
|
|
|
|
|
|
3147
|
|
|
|
|
|
|
Description: |
|
3148
|
|
|
|
|
|
|
|
|
3149
|
|
|
|
|
|
|
Returns the _compoundWordBST member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
3150
|
|
|
|
|
|
|
|
|
3151
|
|
|
|
|
|
|
Input: |
|
3152
|
|
|
|
|
|
|
|
|
3153
|
|
|
|
|
|
|
None |
|
3154
|
|
|
|
|
|
|
|
|
3155
|
|
|
|
|
|
|
Output: |
|
3156
|
|
|
|
|
|
|
|
|
3157
|
|
|
|
|
|
|
$bst -> Compound word binary search tree. |
|
3158
|
|
|
|
|
|
|
|
|
3159
|
|
|
|
|
|
|
Example: |
|
3160
|
|
|
|
|
|
|
|
|
3161
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3162
|
|
|
|
|
|
|
|
|
3163
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3164
|
|
|
|
|
|
|
my $bst = $xmlconv->GetCompoundWordBST(); |
|
3165
|
|
|
|
|
|
|
|
|
3166
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3167
|
|
|
|
|
|
|
|
|
3168
|
|
|
|
|
|
|
=head3 GetMaxCompoundWordLength |
|
3169
|
|
|
|
|
|
|
|
|
3170
|
|
|
|
|
|
|
Description: |
|
3171
|
|
|
|
|
|
|
|
|
3172
|
|
|
|
|
|
|
Returns the _maxCompoundWordLength member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
3173
|
|
|
|
|
|
|
|
|
3174
|
|
|
|
|
|
|
Note: If not defined, it is automatically set to and returns 20. |
|
3175
|
|
|
|
|
|
|
|
|
3176
|
|
|
|
|
|
|
Input: |
|
3177
|
|
|
|
|
|
|
|
|
3178
|
|
|
|
|
|
|
None |
|
3179
|
|
|
|
|
|
|
|
|
3180
|
|
|
|
|
|
|
Output: |
|
3181
|
|
|
|
|
|
|
|
|
3182
|
|
|
|
|
|
|
$value -> Maximum number of compound words in a given phrase. |
|
3183
|
|
|
|
|
|
|
|
|
3184
|
|
|
|
|
|
|
Example: |
|
3185
|
|
|
|
|
|
|
|
|
3186
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3187
|
|
|
|
|
|
|
|
|
3188
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3189
|
|
|
|
|
|
|
my $compoundWordLength = $xmlconv->GetMaxCompoundWordLength(); |
|
3190
|
|
|
|
|
|
|
|
|
3191
|
|
|
|
|
|
|
print( "Maximum Compound Word Length: $compoundWordLength\n" ); |
|
3192
|
|
|
|
|
|
|
|
|
3193
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3194
|
|
|
|
|
|
|
|
|
3195
|
|
|
|
|
|
|
=head3 GetOverwriteExistingFile |
|
3196
|
|
|
|
|
|
|
|
|
3197
|
|
|
|
|
|
|
Description: |
|
3198
|
|
|
|
|
|
|
|
|
3199
|
|
|
|
|
|
|
Returns the _overwriteExisitingFile member variable set during Word2vec::Xmltow2v object instantiation of new function. |
|
3200
|
|
|
|
|
|
|
Enables overwriting of existing text corpus if set to '1' or appends to the existing text corpus if set to '0'. |
|
3201
|
|
|
|
|
|
|
|
|
3202
|
|
|
|
|
|
|
Input: |
|
3203
|
|
|
|
|
|
|
|
|
3204
|
|
|
|
|
|
|
None |
|
3205
|
|
|
|
|
|
|
|
|
3206
|
|
|
|
|
|
|
Output: |
|
3207
|
|
|
|
|
|
|
|
|
3208
|
|
|
|
|
|
|
$value -> '1' = Overwrite existing file / '0' = Append to exiting file. |
|
3209
|
|
|
|
|
|
|
|
|
3210
|
|
|
|
|
|
|
Example: |
|
3211
|
|
|
|
|
|
|
|
|
3212
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3213
|
|
|
|
|
|
|
|
|
3214
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3215
|
|
|
|
|
|
|
my $overwriteExitingFile = $xmlconv->GetOverwriteExistingFile(); |
|
3216
|
|
|
|
|
|
|
|
|
3217
|
|
|
|
|
|
|
print( "Overwrite Existing File? YES\n" ) if ( $overwriteExistingFile == 1 ); |
|
3218
|
|
|
|
|
|
|
print( "Overwrite Existing File? NO\n" ) if ( $overwriteExistingFile == 0 ); |
|
3219
|
|
|
|
|
|
|
|
|
3220
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3221
|
|
|
|
|
|
|
|
|
3222
|
|
|
|
|
|
|
=head2 Mutator Functions |
|
3223
|
|
|
|
|
|
|
|
|
3224
|
|
|
|
|
|
|
=head3 SetStoreTitle |
|
3225
|
|
|
|
|
|
|
|
|
3226
|
|
|
|
|
|
|
Description: |
|
3227
|
|
|
|
|
|
|
|
|
3228
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Instructs module to store article title if true or omit if false. |
|
3229
|
|
|
|
|
|
|
|
|
3230
|
|
|
|
|
|
|
Input: |
|
3231
|
|
|
|
|
|
|
|
|
3232
|
|
|
|
|
|
|
$value -> '1' = Store Titles / '0' = Omit Titles |
|
3233
|
|
|
|
|
|
|
|
|
3234
|
|
|
|
|
|
|
Ouput: |
|
3235
|
|
|
|
|
|
|
|
|
3236
|
|
|
|
|
|
|
None |
|
3237
|
|
|
|
|
|
|
|
|
3238
|
|
|
|
|
|
|
Example: |
|
3239
|
|
|
|
|
|
|
|
|
3240
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3241
|
|
|
|
|
|
|
|
|
3242
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3243
|
|
|
|
|
|
|
$xmlconv->SetStoreTitle( 1 ); |
|
3244
|
|
|
|
|
|
|
|
|
3245
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3246
|
|
|
|
|
|
|
|
|
3247
|
|
|
|
|
|
|
=head3 SetStoreAbstract |
|
3248
|
|
|
|
|
|
|
|
|
3249
|
|
|
|
|
|
|
Description: |
|
3250
|
|
|
|
|
|
|
|
|
3251
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Instructs module to store article abstracts if true or omit if false. |
|
3252
|
|
|
|
|
|
|
|
|
3253
|
|
|
|
|
|
|
Input: |
|
3254
|
|
|
|
|
|
|
|
|
3255
|
|
|
|
|
|
|
$value -> '1' = Store Abstracts / '0' = Omit Abstracts |
|
3256
|
|
|
|
|
|
|
|
|
3257
|
|
|
|
|
|
|
Ouput: |
|
3258
|
|
|
|
|
|
|
|
|
3259
|
|
|
|
|
|
|
None |
|
3260
|
|
|
|
|
|
|
|
|
3261
|
|
|
|
|
|
|
Example: |
|
3262
|
|
|
|
|
|
|
|
|
3263
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3264
|
|
|
|
|
|
|
|
|
3265
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3266
|
|
|
|
|
|
|
$xmlconv->SetStoreAbstract( 1 ); |
|
3267
|
|
|
|
|
|
|
|
|
3268
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3269
|
|
|
|
|
|
|
|
|
3270
|
|
|
|
|
|
|
=head3 SetWorkingDir |
|
3271
|
|
|
|
|
|
|
|
|
3272
|
|
|
|
|
|
|
Description: |
|
3273
|
|
|
|
|
|
|
|
|
3274
|
|
|
|
|
|
|
Sets member variable to passed string parameter. Represents the working directory. |
|
3275
|
|
|
|
|
|
|
|
|
3276
|
|
|
|
|
|
|
Input: |
|
3277
|
|
|
|
|
|
|
|
|
3278
|
|
|
|
|
|
|
$string -> Working directory string |
|
3279
|
|
|
|
|
|
|
|
|
3280
|
|
|
|
|
|
|
Ouput: |
|
3281
|
|
|
|
|
|
|
|
|
3282
|
|
|
|
|
|
|
None |
|
3283
|
|
|
|
|
|
|
|
|
3284
|
|
|
|
|
|
|
Example: |
|
3285
|
|
|
|
|
|
|
|
|
3286
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3287
|
|
|
|
|
|
|
|
|
3288
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3289
|
|
|
|
|
|
|
$xmlconv->SetWorkingDir( "/samples/" ); |
|
3290
|
|
|
|
|
|
|
|
|
3291
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3292
|
|
|
|
|
|
|
|
|
3293
|
|
|
|
|
|
|
=head3 SetSavePath |
|
3294
|
|
|
|
|
|
|
|
|
3295
|
|
|
|
|
|
|
Description: |
|
3296
|
|
|
|
|
|
|
|
|
3297
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Represents the text corpus save path. |
|
3298
|
|
|
|
|
|
|
|
|
3299
|
|
|
|
|
|
|
Input: |
|
3300
|
|
|
|
|
|
|
|
|
3301
|
|
|
|
|
|
|
$string -> Text corpus save path |
|
3302
|
|
|
|
|
|
|
|
|
3303
|
|
|
|
|
|
|
Output: |
|
3304
|
|
|
|
|
|
|
|
|
3305
|
|
|
|
|
|
|
None |
|
3306
|
|
|
|
|
|
|
|
|
3307
|
|
|
|
|
|
|
Example: |
|
3308
|
|
|
|
|
|
|
|
|
3309
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3310
|
|
|
|
|
|
|
|
|
3311
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3312
|
|
|
|
|
|
|
$xmlconv->SetSavePath( "samples/textcorpus.txt" ); |
|
3313
|
|
|
|
|
|
|
|
|
3314
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3315
|
|
|
|
|
|
|
|
|
3316
|
|
|
|
|
|
|
=head3 SetQuickParse |
|
3317
|
|
|
|
|
|
|
|
|
3318
|
|
|
|
|
|
|
Description: |
|
3319
|
|
|
|
|
|
|
|
|
3320
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Instructs module to utilize quick parse |
|
3321
|
|
|
|
|
|
|
routines to speed up text corpus compilation. This method is somewhat less accurate due to its non-exhaustive nature. |
|
3322
|
|
|
|
|
|
|
|
|
3323
|
|
|
|
|
|
|
Input: |
|
3324
|
|
|
|
|
|
|
|
|
3325
|
|
|
|
|
|
|
$value -> '1' = Enable Quick Parse / '0' = Disable Quick Parse |
|
3326
|
|
|
|
|
|
|
|
|
3327
|
|
|
|
|
|
|
Ouput: |
|
3328
|
|
|
|
|
|
|
|
|
3329
|
|
|
|
|
|
|
None |
|
3330
|
|
|
|
|
|
|
|
|
3331
|
|
|
|
|
|
|
Example: |
|
3332
|
|
|
|
|
|
|
|
|
3333
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3334
|
|
|
|
|
|
|
|
|
3335
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3336
|
|
|
|
|
|
|
$xmlconv->SetQuickParse( 1 ); |
|
3337
|
|
|
|
|
|
|
|
|
3338
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3339
|
|
|
|
|
|
|
|
|
3340
|
|
|
|
|
|
|
=head3 SetCompoundifyText |
|
3341
|
|
|
|
|
|
|
|
|
3342
|
|
|
|
|
|
|
Description: |
|
3343
|
|
|
|
|
|
|
|
|
3344
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Instructs module to utilize 'compoundify' option if true. |
|
3345
|
|
|
|
|
|
|
|
|
3346
|
|
|
|
|
|
|
Warning: This requires compound word data to be loaded into memory with ReadCompoundWordDataFromFile() method prior |
|
3347
|
|
|
|
|
|
|
to executing text corpus compilation. |
|
3348
|
|
|
|
|
|
|
|
|
3349
|
|
|
|
|
|
|
Input: |
|
3350
|
|
|
|
|
|
|
|
|
3351
|
|
|
|
|
|
|
$value -> '1' = Compoundify text / '0' = Do not compoundify text |
|
3352
|
|
|
|
|
|
|
|
|
3353
|
|
|
|
|
|
|
Ouput: |
|
3354
|
|
|
|
|
|
|
|
|
3355
|
|
|
|
|
|
|
None |
|
3356
|
|
|
|
|
|
|
|
|
3357
|
|
|
|
|
|
|
Example: |
|
3358
|
|
|
|
|
|
|
|
|
3359
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3360
|
|
|
|
|
|
|
|
|
3361
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3362
|
|
|
|
|
|
|
$xmlconv->SetCompoundifyText( 1 ); |
|
3363
|
|
|
|
|
|
|
|
|
3364
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3365
|
|
|
|
|
|
|
|
|
3366
|
|
|
|
|
|
|
=head3 SetNumOfThreads |
|
3367
|
|
|
|
|
|
|
|
|
3368
|
|
|
|
|
|
|
Description: |
|
3369
|
|
|
|
|
|
|
|
|
3370
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Sets the requested number of threads to parse Medline XML files |
|
3371
|
|
|
|
|
|
|
and compile the text corpus. |
|
3372
|
|
|
|
|
|
|
|
|
3373
|
|
|
|
|
|
|
Input: |
|
3374
|
|
|
|
|
|
|
|
|
3375
|
|
|
|
|
|
|
$value -> Integer (Positive value) |
|
3376
|
|
|
|
|
|
|
|
|
3377
|
|
|
|
|
|
|
Ouput: |
|
3378
|
|
|
|
|
|
|
|
|
3379
|
|
|
|
|
|
|
None |
|
3380
|
|
|
|
|
|
|
|
|
3381
|
|
|
|
|
|
|
Example: |
|
3382
|
|
|
|
|
|
|
|
|
3383
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3384
|
|
|
|
|
|
|
|
|
3385
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3386
|
|
|
|
|
|
|
$xmlconv->SetNumOfThreads( 4 ); |
|
3387
|
|
|
|
|
|
|
|
|
3388
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3389
|
|
|
|
|
|
|
|
|
3390
|
|
|
|
|
|
|
=head3 SetBeginDate |
|
3391
|
|
|
|
|
|
|
|
|
3392
|
|
|
|
|
|
|
Description: |
|
3393
|
|
|
|
|
|
|
|
|
3394
|
|
|
|
|
|
|
Sets member variable to passed string parameter. Sets beginning date range for earliest articles to store, by |
|
3395
|
|
|
|
|
|
|
'DateCreated' Medline tag, within the text corpus during compilation. |
|
3396
|
|
|
|
|
|
|
|
|
3397
|
|
|
|
|
|
|
Note: Expected format - "XX/XX/XXXX" (Mon/Day/Year) |
|
3398
|
|
|
|
|
|
|
|
|
3399
|
|
|
|
|
|
|
Input: |
|
3400
|
|
|
|
|
|
|
|
|
3401
|
|
|
|
|
|
|
$string -> Date string - Format: "XX/XX/XXXX" |
|
3402
|
|
|
|
|
|
|
|
|
3403
|
|
|
|
|
|
|
Ouput: |
|
3404
|
|
|
|
|
|
|
|
|
3405
|
|
|
|
|
|
|
None |
|
3406
|
|
|
|
|
|
|
|
|
3407
|
|
|
|
|
|
|
Example: |
|
3408
|
|
|
|
|
|
|
|
|
3409
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3410
|
|
|
|
|
|
|
|
|
3411
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3412
|
|
|
|
|
|
|
$xmlconv->SetBeginDate( "01/01/2004" ); |
|
3413
|
|
|
|
|
|
|
|
|
3414
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3415
|
|
|
|
|
|
|
|
|
3416
|
|
|
|
|
|
|
=head3 SetEndDate |
|
3417
|
|
|
|
|
|
|
|
|
3418
|
|
|
|
|
|
|
Description: |
|
3419
|
|
|
|
|
|
|
|
|
3420
|
|
|
|
|
|
|
Sets member variable to passed string parameter. Sets ending date range for latest article to store, by |
|
3421
|
|
|
|
|
|
|
'DateCreated' Medline tag, within the text corpus during compilation. |
|
3422
|
|
|
|
|
|
|
|
|
3423
|
|
|
|
|
|
|
Note: Expected format - "XX/XX/XXXX" (Mon/Day/Year) |
|
3424
|
|
|
|
|
|
|
|
|
3425
|
|
|
|
|
|
|
Input: |
|
3426
|
|
|
|
|
|
|
|
|
3427
|
|
|
|
|
|
|
$string -> Date string - Format: "XX/XX/XXXX" |
|
3428
|
|
|
|
|
|
|
|
|
3429
|
|
|
|
|
|
|
Ouput: |
|
3430
|
|
|
|
|
|
|
|
|
3431
|
|
|
|
|
|
|
None |
|
3432
|
|
|
|
|
|
|
|
|
3433
|
|
|
|
|
|
|
Example: |
|
3434
|
|
|
|
|
|
|
|
|
3435
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3436
|
|
|
|
|
|
|
|
|
3437
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3438
|
|
|
|
|
|
|
$xmlconv->SetEndDate( "08/13/2016" ); |
|
3439
|
|
|
|
|
|
|
|
|
3440
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3441
|
|
|
|
|
|
|
|
|
3442
|
|
|
|
|
|
|
=head3 SetXMLStringToParse |
|
3443
|
|
|
|
|
|
|
|
|
3444
|
|
|
|
|
|
|
Description: |
|
3445
|
|
|
|
|
|
|
|
|
3446
|
|
|
|
|
|
|
Sets member variable to passed string parameter. This string normally consists of Medline XML data to be |
|
3447
|
|
|
|
|
|
|
parsed for text corpus compilation. |
|
3448
|
|
|
|
|
|
|
|
|
3449
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3450
|
|
|
|
|
|
|
|
|
3451
|
|
|
|
|
|
|
Input: |
|
3452
|
|
|
|
|
|
|
|
|
3453
|
|
|
|
|
|
|
$string -> String |
|
3454
|
|
|
|
|
|
|
|
|
3455
|
|
|
|
|
|
|
Ouput: |
|
3456
|
|
|
|
|
|
|
|
|
3457
|
|
|
|
|
|
|
None |
|
3458
|
|
|
|
|
|
|
|
|
3459
|
|
|
|
|
|
|
Example: |
|
3460
|
|
|
|
|
|
|
|
|
3461
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3462
|
|
|
|
|
|
|
|
|
3463
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3464
|
|
|
|
|
|
|
$xmlconv->SetXMLStringToParse( "Hello World!" ); |
|
3465
|
|
|
|
|
|
|
|
|
3466
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3467
|
|
|
|
|
|
|
|
|
3468
|
|
|
|
|
|
|
=head3 SetTextCorpusStr |
|
3469
|
|
|
|
|
|
|
|
|
3470
|
|
|
|
|
|
|
Description: |
|
3471
|
|
|
|
|
|
|
|
|
3472
|
|
|
|
|
|
|
Sets member variable to passed string parameter. Overwrites any stored text corpus data in memory to the string parameter. |
|
3473
|
|
|
|
|
|
|
|
|
3474
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3475
|
|
|
|
|
|
|
|
|
3476
|
|
|
|
|
|
|
Input: |
|
3477
|
|
|
|
|
|
|
|
|
3478
|
|
|
|
|
|
|
$string -> String |
|
3479
|
|
|
|
|
|
|
|
|
3480
|
|
|
|
|
|
|
Ouput: |
|
3481
|
|
|
|
|
|
|
|
|
3482
|
|
|
|
|
|
|
None |
|
3483
|
|
|
|
|
|
|
|
|
3484
|
|
|
|
|
|
|
Example: |
|
3485
|
|
|
|
|
|
|
|
|
3486
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3487
|
|
|
|
|
|
|
|
|
3488
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3489
|
|
|
|
|
|
|
$xmlconv->SetTextCorpusStr( "Hello World!" ); |
|
3490
|
|
|
|
|
|
|
|
|
3491
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3492
|
|
|
|
|
|
|
|
|
3493
|
|
|
|
|
|
|
=head3 AppendStrToTextCorpus |
|
3494
|
|
|
|
|
|
|
|
|
3495
|
|
|
|
|
|
|
Description: |
|
3496
|
|
|
|
|
|
|
|
|
3497
|
|
|
|
|
|
|
Sets member variable to passed string parameter. Appends string parameter to text corpus string in memory. |
|
3498
|
|
|
|
|
|
|
|
|
3499
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3500
|
|
|
|
|
|
|
|
|
3501
|
|
|
|
|
|
|
Input: |
|
3502
|
|
|
|
|
|
|
|
|
3503
|
|
|
|
|
|
|
$string -> String |
|
3504
|
|
|
|
|
|
|
|
|
3505
|
|
|
|
|
|
|
Ouput: |
|
3506
|
|
|
|
|
|
|
|
|
3507
|
|
|
|
|
|
|
None |
|
3508
|
|
|
|
|
|
|
|
|
3509
|
|
|
|
|
|
|
Example: |
|
3510
|
|
|
|
|
|
|
|
|
3511
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3512
|
|
|
|
|
|
|
|
|
3513
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3514
|
|
|
|
|
|
|
$xmlconv->AppendStrToTextCorpus( "Hello World!" ); |
|
3515
|
|
|
|
|
|
|
|
|
3516
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3517
|
|
|
|
|
|
|
|
|
3518
|
|
|
|
|
|
|
=head3 ClearTextCorpus |
|
3519
|
|
|
|
|
|
|
|
|
3520
|
|
|
|
|
|
|
Description: |
|
3521
|
|
|
|
|
|
|
|
|
3522
|
|
|
|
|
|
|
Clears text corpus data in memory. |
|
3523
|
|
|
|
|
|
|
|
|
3524
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3525
|
|
|
|
|
|
|
|
|
3526
|
|
|
|
|
|
|
Input: |
|
3527
|
|
|
|
|
|
|
|
|
3528
|
|
|
|
|
|
|
None |
|
3529
|
|
|
|
|
|
|
|
|
3530
|
|
|
|
|
|
|
Ouput: |
|
3531
|
|
|
|
|
|
|
|
|
3532
|
|
|
|
|
|
|
None |
|
3533
|
|
|
|
|
|
|
|
|
3534
|
|
|
|
|
|
|
Example: |
|
3535
|
|
|
|
|
|
|
|
|
3536
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3537
|
|
|
|
|
|
|
|
|
3538
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3539
|
|
|
|
|
|
|
$xmlconv->ClearTextCorpus(); |
|
3540
|
|
|
|
|
|
|
|
|
3541
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3542
|
|
|
|
|
|
|
|
|
3543
|
|
|
|
|
|
|
=head3 SetTempStr |
|
3544
|
|
|
|
|
|
|
|
|
3545
|
|
|
|
|
|
|
Description: |
|
3546
|
|
|
|
|
|
|
|
|
3547
|
|
|
|
|
|
|
Sets member variable to passed string parameter. Sets temporary member string to passed string parameter. |
|
3548
|
|
|
|
|
|
|
(Temporary placeholder for Medline Title and Abstract data). |
|
3549
|
|
|
|
|
|
|
|
|
3550
|
|
|
|
|
|
|
Note: This removes special characters and converts all characters to lowercase. |
|
3551
|
|
|
|
|
|
|
|
|
3552
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3553
|
|
|
|
|
|
|
|
|
3554
|
|
|
|
|
|
|
Input: |
|
3555
|
|
|
|
|
|
|
|
|
3556
|
|
|
|
|
|
|
$string -> String |
|
3557
|
|
|
|
|
|
|
|
|
3558
|
|
|
|
|
|
|
Ouput: |
|
3559
|
|
|
|
|
|
|
|
|
3560
|
|
|
|
|
|
|
None |
|
3561
|
|
|
|
|
|
|
|
|
3562
|
|
|
|
|
|
|
Example: |
|
3563
|
|
|
|
|
|
|
|
|
3564
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3565
|
|
|
|
|
|
|
|
|
3566
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3567
|
|
|
|
|
|
|
$xmlconv->SetTempStr( "Hello World!" ); |
|
3568
|
|
|
|
|
|
|
|
|
3569
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3570
|
|
|
|
|
|
|
|
|
3571
|
|
|
|
|
|
|
=head3 AppendToTempStr |
|
3572
|
|
|
|
|
|
|
|
|
3573
|
|
|
|
|
|
|
Description: |
|
3574
|
|
|
|
|
|
|
|
|
3575
|
|
|
|
|
|
|
Appends string parameter to temporary member string in memory. |
|
3576
|
|
|
|
|
|
|
|
|
3577
|
|
|
|
|
|
|
Note: This removes special characters and converts all characters to lowercase. |
|
3578
|
|
|
|
|
|
|
|
|
3579
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3580
|
|
|
|
|
|
|
|
|
3581
|
|
|
|
|
|
|
Input: |
|
3582
|
|
|
|
|
|
|
|
|
3583
|
|
|
|
|
|
|
$string -> String |
|
3584
|
|
|
|
|
|
|
|
|
3585
|
|
|
|
|
|
|
Ouput: |
|
3586
|
|
|
|
|
|
|
|
|
3587
|
|
|
|
|
|
|
None |
|
3588
|
|
|
|
|
|
|
|
|
3589
|
|
|
|
|
|
|
Example: |
|
3590
|
|
|
|
|
|
|
|
|
3591
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3592
|
|
|
|
|
|
|
|
|
3593
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3594
|
|
|
|
|
|
|
$xmlconv->AppendToTempStr( "Hello World!" ); |
|
3595
|
|
|
|
|
|
|
|
|
3596
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3597
|
|
|
|
|
|
|
|
|
3598
|
|
|
|
|
|
|
=head3 ClearTempStr |
|
3599
|
|
|
|
|
|
|
|
|
3600
|
|
|
|
|
|
|
Clears the temporary string storage in memory. |
|
3601
|
|
|
|
|
|
|
|
|
3602
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3603
|
|
|
|
|
|
|
|
|
3604
|
|
|
|
|
|
|
Input: |
|
3605
|
|
|
|
|
|
|
|
|
3606
|
|
|
|
|
|
|
None |
|
3607
|
|
|
|
|
|
|
|
|
3608
|
|
|
|
|
|
|
Ouput: |
|
3609
|
|
|
|
|
|
|
|
|
3610
|
|
|
|
|
|
|
None |
|
3611
|
|
|
|
|
|
|
|
|
3612
|
|
|
|
|
|
|
Example: |
|
3613
|
|
|
|
|
|
|
|
|
3614
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3615
|
|
|
|
|
|
|
|
|
3616
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3617
|
|
|
|
|
|
|
$xmlconv->ClearTempStr(); |
|
3618
|
|
|
|
|
|
|
|
|
3619
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3620
|
|
|
|
|
|
|
|
|
3621
|
|
|
|
|
|
|
=head3 SetTempDate |
|
3622
|
|
|
|
|
|
|
|
|
3623
|
|
|
|
|
|
|
Description: |
|
3624
|
|
|
|
|
|
|
|
|
3625
|
|
|
|
|
|
|
Sets member variable to passed string parameter. Sets temporary date string to passed string. |
|
3626
|
|
|
|
|
|
|
|
|
3627
|
|
|
|
|
|
|
Note: Date Format - "XX/XX/XXXX" (Mon/Day/Year) |
|
3628
|
|
|
|
|
|
|
|
|
3629
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3630
|
|
|
|
|
|
|
|
|
3631
|
|
|
|
|
|
|
Input: |
|
3632
|
|
|
|
|
|
|
|
|
3633
|
|
|
|
|
|
|
$string -> Date string - Format: "XX/XX/XXXX" |
|
3634
|
|
|
|
|
|
|
|
|
3635
|
|
|
|
|
|
|
Ouput: |
|
3636
|
|
|
|
|
|
|
|
|
3637
|
|
|
|
|
|
|
None |
|
3638
|
|
|
|
|
|
|
|
|
3639
|
|
|
|
|
|
|
Example: |
|
3640
|
|
|
|
|
|
|
|
|
3641
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3642
|
|
|
|
|
|
|
|
|
3643
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3644
|
|
|
|
|
|
|
$xmlconv->SetTempDate( "08/13/2016" ); |
|
3645
|
|
|
|
|
|
|
|
|
3646
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3647
|
|
|
|
|
|
|
|
|
3648
|
|
|
|
|
|
|
=head3 ClearTempDate |
|
3649
|
|
|
|
|
|
|
|
|
3650
|
|
|
|
|
|
|
Description: |
|
3651
|
|
|
|
|
|
|
|
|
3652
|
|
|
|
|
|
|
Clears the temporary date storage location in memory. |
|
3653
|
|
|
|
|
|
|
|
|
3654
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3655
|
|
|
|
|
|
|
|
|
3656
|
|
|
|
|
|
|
Input: |
|
3657
|
|
|
|
|
|
|
|
|
3658
|
|
|
|
|
|
|
None |
|
3659
|
|
|
|
|
|
|
|
|
3660
|
|
|
|
|
|
|
Ouput: |
|
3661
|
|
|
|
|
|
|
|
|
3662
|
|
|
|
|
|
|
None |
|
3663
|
|
|
|
|
|
|
|
|
3664
|
|
|
|
|
|
|
Example: |
|
3665
|
|
|
|
|
|
|
|
|
3666
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3667
|
|
|
|
|
|
|
|
|
3668
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3669
|
|
|
|
|
|
|
$xmlconv->ClearTempDate(); |
|
3670
|
|
|
|
|
|
|
|
|
3671
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3672
|
|
|
|
|
|
|
|
|
3673
|
|
|
|
|
|
|
=head3 SetCompoundWordAry |
|
3674
|
|
|
|
|
|
|
|
|
3675
|
|
|
|
|
|
|
Description: |
|
3676
|
|
|
|
|
|
|
|
|
3677
|
|
|
|
|
|
|
Sets member variable to de-referenced passed array reference parameter. Stores compound word array by |
|
3678
|
|
|
|
|
|
|
de-referencing array reference parameter. |
|
3679
|
|
|
|
|
|
|
|
|
3680
|
|
|
|
|
|
|
Note: Clears previous data if existing. |
|
3681
|
|
|
|
|
|
|
|
|
3682
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3683
|
|
|
|
|
|
|
|
|
3684
|
|
|
|
|
|
|
Input: |
|
3685
|
|
|
|
|
|
|
|
|
3686
|
|
|
|
|
|
|
$arrayReference -> Array reference of compound words |
|
3687
|
|
|
|
|
|
|
|
|
3688
|
|
|
|
|
|
|
Ouput: |
|
3689
|
|
|
|
|
|
|
|
|
3690
|
|
|
|
|
|
|
None |
|
3691
|
|
|
|
|
|
|
|
|
3692
|
|
|
|
|
|
|
Example: |
|
3693
|
|
|
|
|
|
|
|
|
3694
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3695
|
|
|
|
|
|
|
|
|
3696
|
|
|
|
|
|
|
my @compoundWordAry = ( "big dog", "respiratory failure", "seven large masses" ); |
|
3697
|
|
|
|
|
|
|
|
|
3698
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3699
|
|
|
|
|
|
|
$xmlconv->SetCompoundWordAry( \@compoundWordAry ); |
|
3700
|
|
|
|
|
|
|
|
|
3701
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3702
|
|
|
|
|
|
|
|
|
3703
|
|
|
|
|
|
|
=head3 ClearCompoundWordAry |
|
3704
|
|
|
|
|
|
|
|
|
3705
|
|
|
|
|
|
|
Description: |
|
3706
|
|
|
|
|
|
|
|
|
3707
|
|
|
|
|
|
|
Clears compound word array in memory. |
|
3708
|
|
|
|
|
|
|
|
|
3709
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3710
|
|
|
|
|
|
|
|
|
3711
|
|
|
|
|
|
|
Input: |
|
3712
|
|
|
|
|
|
|
|
|
3713
|
|
|
|
|
|
|
None |
|
3714
|
|
|
|
|
|
|
|
|
3715
|
|
|
|
|
|
|
Ouput: |
|
3716
|
|
|
|
|
|
|
|
|
3717
|
|
|
|
|
|
|
None |
|
3718
|
|
|
|
|
|
|
|
|
3719
|
|
|
|
|
|
|
Example: |
|
3720
|
|
|
|
|
|
|
|
|
3721
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3722
|
|
|
|
|
|
|
|
|
3723
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3724
|
|
|
|
|
|
|
$xmlconv->ClearCompoundWordAry(); |
|
3725
|
|
|
|
|
|
|
|
|
3726
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3727
|
|
|
|
|
|
|
|
|
3728
|
|
|
|
|
|
|
=head3 SetCompoundWordBST |
|
3729
|
|
|
|
|
|
|
|
|
3730
|
|
|
|
|
|
|
Description: |
|
3731
|
|
|
|
|
|
|
|
|
3732
|
|
|
|
|
|
|
Sets member variable to passed Word2vec::Bst parameter. Sets compound word binary search tree to passed binary tree parameter. |
|
3733
|
|
|
|
|
|
|
|
|
3734
|
|
|
|
|
|
|
Note: Un-defines previous binary tree if existing. |
|
3735
|
|
|
|
|
|
|
|
|
3736
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3737
|
|
|
|
|
|
|
|
|
3738
|
|
|
|
|
|
|
Input: |
|
3739
|
|
|
|
|
|
|
|
|
3740
|
|
|
|
|
|
|
Word2vec::Bst -> Binary Search Tree |
|
3741
|
|
|
|
|
|
|
|
|
3742
|
|
|
|
|
|
|
Ouput: |
|
3743
|
|
|
|
|
|
|
|
|
3744
|
|
|
|
|
|
|
None |
|
3745
|
|
|
|
|
|
|
|
|
3746
|
|
|
|
|
|
|
Example: |
|
3747
|
|
|
|
|
|
|
|
|
3748
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3749
|
|
|
|
|
|
|
|
|
3750
|
|
|
|
|
|
|
my @compoundWordAry = ( "big dog", "respiratory failure", "seven large masses" ); |
|
3751
|
|
|
|
|
|
|
@compoundWordAry = sort( @compoundWordAry ); |
|
3752
|
|
|
|
|
|
|
|
|
3753
|
|
|
|
|
|
|
my $arySize = @compoundWordAry; |
|
3754
|
|
|
|
|
|
|
|
|
3755
|
|
|
|
|
|
|
my $bst = Word2vec::Bst; |
|
3756
|
|
|
|
|
|
|
$bst->CreateTree( \@compoundWordAry, 0, $arySize, undef ); |
|
3757
|
|
|
|
|
|
|
|
|
3758
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3759
|
|
|
|
|
|
|
$xmlconv->SetCompoundWordBST( $bst ); |
|
3760
|
|
|
|
|
|
|
|
|
3761
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3762
|
|
|
|
|
|
|
|
|
3763
|
|
|
|
|
|
|
=head3 ClearCompoundWordBST |
|
3764
|
|
|
|
|
|
|
|
|
3765
|
|
|
|
|
|
|
Description: |
|
3766
|
|
|
|
|
|
|
|
|
3767
|
|
|
|
|
|
|
Clears/Un-defines existing compound word binary search tree from memory. |
|
3768
|
|
|
|
|
|
|
|
|
3769
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3770
|
|
|
|
|
|
|
|
|
3771
|
|
|
|
|
|
|
Input: |
|
3772
|
|
|
|
|
|
|
|
|
3773
|
|
|
|
|
|
|
None |
|
3774
|
|
|
|
|
|
|
|
|
3775
|
|
|
|
|
|
|
Ouput: |
|
3776
|
|
|
|
|
|
|
|
|
3777
|
|
|
|
|
|
|
None |
|
3778
|
|
|
|
|
|
|
|
|
3779
|
|
|
|
|
|
|
Example: |
|
3780
|
|
|
|
|
|
|
|
|
3781
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3782
|
|
|
|
|
|
|
|
|
3783
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3784
|
|
|
|
|
|
|
$xmlconv->ClearCompoundWordBST(); |
|
3785
|
|
|
|
|
|
|
|
|
3786
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3787
|
|
|
|
|
|
|
|
|
3788
|
|
|
|
|
|
|
=head3 SetMaxCompoundWordLength |
|
3789
|
|
|
|
|
|
|
|
|
3790
|
|
|
|
|
|
|
Description: |
|
3791
|
|
|
|
|
|
|
|
|
3792
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Sets maximum number of compound words in a phrase for comparison. |
|
3793
|
|
|
|
|
|
|
|
|
3794
|
|
|
|
|
|
|
ie. "medical campus of Virginia Commonwealth University" can be interpreted as a compound word of 6 words. |
|
3795
|
|
|
|
|
|
|
Setting this variable to 3 will only attempt compoundifying a maximum amount of three words. |
|
3796
|
|
|
|
|
|
|
The result would be "medical_campus_of Virginia commonwealth university" even-though an exact representation |
|
3797
|
|
|
|
|
|
|
of this compounded string can exist. Setting this variable to 6 will result in compounding all six words if |
|
3798
|
|
|
|
|
|
|
they exists in the compound word array/bst. |
|
3799
|
|
|
|
|
|
|
|
|
3800
|
|
|
|
|
|
|
Warning: This is a private function and should not be called or manipulated. |
|
3801
|
|
|
|
|
|
|
|
|
3802
|
|
|
|
|
|
|
Input: |
|
3803
|
|
|
|
|
|
|
|
|
3804
|
|
|
|
|
|
|
$value -> Integer |
|
3805
|
|
|
|
|
|
|
|
|
3806
|
|
|
|
|
|
|
Ouput: |
|
3807
|
|
|
|
|
|
|
|
|
3808
|
|
|
|
|
|
|
None |
|
3809
|
|
|
|
|
|
|
|
|
3810
|
|
|
|
|
|
|
Example: |
|
3811
|
|
|
|
|
|
|
|
|
3812
|
|
|
|
|
|
|
use Word2vec::Xmltow2v; |
|
3813
|
|
|
|
|
|
|
|
|
3814
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3815
|
|
|
|
|
|
|
$xmlconv->SetMaxCompoundWordLength( 8 ); |
|
3816
|
|
|
|
|
|
|
|
|
3817
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3818
|
|
|
|
|
|
|
|
|
3819
|
|
|
|
|
|
|
=head3 SetOverwriteExistingFile |
|
3820
|
|
|
|
|
|
|
|
|
3821
|
|
|
|
|
|
|
Sets member variable to passed integer parameter. Requires 0 = False or 1 = True. Sets option to overwrite |
|
3822
|
|
|
|
|
|
|
existing text corpus during compilation if 1 or append to existing text corpus if 0. |
|
3823
|
|
|
|
|
|
|
|
|
3824
|
|
|
|
|
|
|
=head2 Debug Functions |
|
3825
|
|
|
|
|
|
|
|
|
3826
|
|
|
|
|
|
|
=head3 GetTime |
|
3827
|
|
|
|
|
|
|
|
|
3828
|
|
|
|
|
|
|
Description: |
|
3829
|
|
|
|
|
|
|
|
|
3830
|
|
|
|
|
|
|
Returns current time string in "Hour:Minute:Second" format. |
|
3831
|
|
|
|
|
|
|
|
|
3832
|
|
|
|
|
|
|
Input: |
|
3833
|
|
|
|
|
|
|
|
|
3834
|
|
|
|
|
|
|
None |
|
3835
|
|
|
|
|
|
|
|
|
3836
|
|
|
|
|
|
|
Output: |
|
3837
|
|
|
|
|
|
|
|
|
3838
|
|
|
|
|
|
|
$string -> XX:XX:XX ("Hour:Minute:Second") |
|
3839
|
|
|
|
|
|
|
|
|
3840
|
|
|
|
|
|
|
Example: |
|
3841
|
|
|
|
|
|
|
|
|
3842
|
|
|
|
|
|
|
use Word2vec::Xmltow2v: |
|
3843
|
|
|
|
|
|
|
|
|
3844
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3845
|
|
|
|
|
|
|
my $time = $xmlconv->GetTime(); |
|
3846
|
|
|
|
|
|
|
|
|
3847
|
|
|
|
|
|
|
print( "Current Time: $time\n" ) if defined( $time ); |
|
3848
|
|
|
|
|
|
|
|
|
3849
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3850
|
|
|
|
|
|
|
|
|
3851
|
|
|
|
|
|
|
=head3 GetDate |
|
3852
|
|
|
|
|
|
|
|
|
3853
|
|
|
|
|
|
|
Description: |
|
3854
|
|
|
|
|
|
|
|
|
3855
|
|
|
|
|
|
|
Returns current month, day and year string in "Month/Day/Year" format. |
|
3856
|
|
|
|
|
|
|
|
|
3857
|
|
|
|
|
|
|
Input: |
|
3858
|
|
|
|
|
|
|
|
|
3859
|
|
|
|
|
|
|
None |
|
3860
|
|
|
|
|
|
|
|
|
3861
|
|
|
|
|
|
|
Output: |
|
3862
|
|
|
|
|
|
|
|
|
3863
|
|
|
|
|
|
|
$string -> XX/XX/XXXX ("Month/Day/Year") |
|
3864
|
|
|
|
|
|
|
|
|
3865
|
|
|
|
|
|
|
Example: |
|
3866
|
|
|
|
|
|
|
|
|
3867
|
|
|
|
|
|
|
use Word2vec::Xmltow2v: |
|
3868
|
|
|
|
|
|
|
|
|
3869
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3870
|
|
|
|
|
|
|
my $date = $xmlconv->GetDate(); |
|
3871
|
|
|
|
|
|
|
|
|
3872
|
|
|
|
|
|
|
print( "Current Date: $date\n" ) if defined( $date ); |
|
3873
|
|
|
|
|
|
|
|
|
3874
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3875
|
|
|
|
|
|
|
|
|
3876
|
|
|
|
|
|
|
=head3 WriteLog |
|
3877
|
|
|
|
|
|
|
|
|
3878
|
|
|
|
|
|
|
Description: |
|
3879
|
|
|
|
|
|
|
|
|
3880
|
|
|
|
|
|
|
Prints passed string parameter to the console, log file or both depending on user options. |
|
3881
|
|
|
|
|
|
|
|
|
3882
|
|
|
|
|
|
|
Note: printNewLine parameter prints a new line character following the string if the parameter |
|
3883
|
|
|
|
|
|
|
is undefined and does not if parameter is 0. |
|
3884
|
|
|
|
|
|
|
|
|
3885
|
|
|
|
|
|
|
Input: |
|
3886
|
|
|
|
|
|
|
|
|
3887
|
|
|
|
|
|
|
$string -> String to print to the console/log file. |
|
3888
|
|
|
|
|
|
|
$value -> 0 = Do not print newline character after string, all else prints new line character including 'undef'. |
|
3889
|
|
|
|
|
|
|
|
|
3890
|
|
|
|
|
|
|
Output: |
|
3891
|
|
|
|
|
|
|
|
|
3892
|
|
|
|
|
|
|
None |
|
3893
|
|
|
|
|
|
|
|
|
3894
|
|
|
|
|
|
|
Example: |
|
3895
|
|
|
|
|
|
|
|
|
3896
|
|
|
|
|
|
|
use Word2vec::Xmltow2v: |
|
3897
|
|
|
|
|
|
|
|
|
3898
|
|
|
|
|
|
|
my $xmlconv = Word2vec::Xmltow2v->new(); |
|
3899
|
|
|
|
|
|
|
$xmlconv->WriteLog( "Hello World" ); |
|
3900
|
|
|
|
|
|
|
|
|
3901
|
|
|
|
|
|
|
undef( $xmlconv ); |
|
3902
|
|
|
|
|
|
|
|
|
3903
|
|
|
|
|
|
|
=head1 Author |
|
3904
|
|
|
|
|
|
|
|
|
3905
|
|
|
|
|
|
|
Clint Cuffy, Virginia Commonwealth University |
|
3906
|
|
|
|
|
|
|
|
|
3907
|
|
|
|
|
|
|
=head1 COPYRIGHT |
|
3908
|
|
|
|
|
|
|
|
|
3909
|
|
|
|
|
|
|
Copyright (c) 2016 |
|
3910
|
|
|
|
|
|
|
|
|
3911
|
|
|
|
|
|
|
Bridget T McInnes, Virginia Commonwealth University |
|
3912
|
|
|
|
|
|
|
btmcinnes at vcu dot edu |
|
3913
|
|
|
|
|
|
|
|
|
3914
|
|
|
|
|
|
|
Clint Cuffy, Virginia Commonwealth University |
|
3915
|
|
|
|
|
|
|
cuffyca at vcu dot edu |
|
3916
|
|
|
|
|
|
|
|
|
3917
|
|
|
|
|
|
|
This program is free software; you can redistribute it and/or modify it |
|
3918
|
|
|
|
|
|
|
under the terms of the GNU General Public License as published by the Free |
|
3919
|
|
|
|
|
|
|
Software Foundation; either version 2 of the License, or (at your option) |
|
3920
|
|
|
|
|
|
|
any later version. |
|
3921
|
|
|
|
|
|
|
|
|
3922
|
|
|
|
|
|
|
This program is distributed in the hope that it will be useful, but WITHOUT |
|
3923
|
|
|
|
|
|
|
ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS |
|
3924
|
|
|
|
|
|
|
FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. |
|
3925
|
|
|
|
|
|
|
|
|
3926
|
|
|
|
|
|
|
You should have received a copy of the GNU General Public License along with |
|
3927
|
|
|
|
|
|
|
this program; if not, write to: |
|
3928
|
|
|
|
|
|
|
|
|
3929
|
|
|
|
|
|
|
The Free Software Foundation, Inc., |
|
3930
|
|
|
|
|
|
|
59 Temple Place - Suite 330, |
|
3931
|
|
|
|
|
|
|
Boston, MA 02111-1307, USA. |
|
3932
|
|
|
|
|
|
|
|
|
3933
|
|
|
|
|
|
|
=cut |