File Coverage

blib/lib/Text/SenseClusters/LabelEvaluation/Wikipedia/GetWikiData.pm
Criterion Covered Total %
statement 30 34 88.2
branch 3 6 50.0
condition n/a
subroutine 5 5 100.0
pod 0 2 0.0
total 38 47 80.8


line stmt bran cond sub pod time code
1             #!/usr/bin/perl -w
2              
3             # Declaring the Package for the module.
4             package Text::SenseClusters::LabelEvaluation::Wikipedia::GetWikiData;
5              
6 5     5   48531 use strict;
  5         11  
  5         291  
7 5     5   1129 use encoding "utf-8";
  5         21860  
  5         39  
8              
9             # The following two lines will make this module inherit from the Exporter Class.
10             require Exporter;
11             our @ISA = qw(Exporter);
12              
13             # Using WWW::Wikipedia Module.
14             # Reference: http://search.cpan.org/dist/WWW-Wikipedia/lib/WWW/Wikipedia.pm
15 5     5   8828 use WWW::Wikipedia;
  5         709152  
  5         336  
16              
17             # Defining the Variable for using the Wikipedia Module.
18             # Reference: http://search.cpan.org/~bricas/WWW-Wikipedia-2.00/
19             my $wiki = WWW::Wikipedia->new();
20              
21             #######################################################################################################################
22              
23             =head1 Name
24              
25             Text::SenseClusters::LabelEvaluation::Wikipedia::GetWikiData - Module for getting the information about a topic from wikipedia.
26              
27             =head1 SYNOPSIS
28              
29             #The following code snippet will show how to use this module.
30              
31             # Including the LabelEvaluation Module.
32             use Text::SenseClusters::LabelEvaluation::Wikipedia::GetWikiData;
33              
34             # Defining the topic name for which we will create the file containing their detail
35             # data from the wikipedia.
36             my $topicName ="BillClinton";
37              
38             # This tells to not to create the temp files (which will held the data feteched from
39             # wikipedia). To keep this file make it 0.
40             my $isClean = 1;
41              
42             # The following code will call the getWikiDataForTopic() function from the
43             # GetWikiData modules. It will create the file containing the wikipedia
44             # information about the topic.
45             my $fileName =
46             Text::SenseClusters::LabelEvaluation::Wikipedia::GetWikiData::getWikiDataForTopic(
47             $topicName, $isClean);
48              
49             print "\nName of the File created for the topic \'$topicName\' is $fileName \n";
50              
51              
52             =head1 DESCRIPTION
53            
54             Given a topic, this module is responsible for getting the wikipedia
55             information about it and writing it to file with the file-name as,
56             '.txt'
57            
58             =cut
59              
60             ##########################################################################################
61             =head1 function: getWikiDataFileForTopic
62              
63             This function will fetch data about a topics from the Wikipedia, then it
64             will write the fetched data into a new file 'topic_Name.txt'.
65              
66              
67             @argument1 : Name of the topic for which we need to fetch data from the
68             Wikipedia.
69              
70             @return : Name of the file in which this function has written the
71             data,'topic_Name.txt'.
72            
73             @description :
74             1). Reading the topic to read from the function arguments.
75             2). Use this topic name to create file name in which we will write
76             data about the topic.
77             3). Get the data from the Wikipedia module about the topic and write
78             it into the above mentioned topic.
79             4). Return the file name.
80              
81             =cut
82             ##########################################################################################
83             sub getWikiDataFileForTopic{
84            
85             # Read the Topic name from the argument of the function.
86 1     1 0 13 my $topicToLook = shift;
87            
88             # Removing the white space from the front and end of the word.
89 1         8 $topicToLook =~ s/^\s+|\s+$//g;
90              
91             # Removing the white space with underscore.
92 1         4 $topicToLook =~ s/\s+/_/g;
93              
94             # Creating the fileName from the topic name.
95 1         15 my $fileName = "temp_$topicToLook.txt";
96            
97            
98             # Open the file handle in Write Mode.
99 1         152 open (MYFILE, ">$fileName");
100              
101             # Use Wikipedia Search to get the result about the topic.
102             # Reference: http://search.cpan.org/~bricas/WWW-Wikipedia-2.00/
103 1         12 my $result = $wiki->search($topicToLook);
104              
105             # If the entry has some text, write it out to file.
106 1 50       6708532 if ($result){
107             # Writing the content of the search result into the newly created file.
108 1         7 print MYFILE $result->text();
109            
110             # Also writing the list of any related items into the files.
111 1         83 print MYFILE join( "\n", $result->related() );
112             }
113              
114             # Close the file handle.
115 1         493 close (MYFILE);
116              
117             # Returning the name of the file in which we write the Wikipedia data
118             # about the given topic.
119 1         67 return $fileName;
120             }
121              
122              
123              
124             #########################################################################################
125              
126             =head1 function: getWikiDataForTopic() -
127              
128             This function will fetch data about a topics from the Wikipedia and return to
129             user.
130              
131             @argument1 : Name of the topic for which we need to fetch data from the
132             Wikipedia.
133             @return : String data about the topics.
134              
135             =cut
136              
137             #########################################################################################
138             sub getWikiDataForTopic{
139            
140             # Read the Topic name from the argument of the function.
141 6     6 0 10 my $topicToLook = shift;
142            
143             # Reading the parameter which says whether to delete data or not.
144 6         13 my $isClean = shift;
145            
146             # Removing the white space from the front and end of the word.
147 6         43 $topicToLook =~ s/^\s+|\s+$//g;
148              
149             # Variable that will hold all the string data for a given topic.
150 6         12 my $topicData = "";
151            
152             # Use Wikipedia Search to get the result about the topic.
153             # Reference: http://search.cpan.org/~bricas/WWW-Wikipedia-2.00/
154 6         36 my $result = $wiki->search($topicToLook);
155              
156             # If the entry has some text, write it out to file.
157 6 50       29303522 if ($result){
158             # Adding all the text to $topicData variable.
159 6         42 $topicData = $topicData.$result->text();
160            
161             # Also adding the list of any related items into the files.
162 6         185 $topicData = $topicData.join("\n", $result->related());
163             }
164              
165             # If user want to see the wiki files, he will mention isClean==1.
166 6 50       1414 if($isClean == 0){
167             # Creating the fileName from the topic name.
168 0         0 my $fileName = "temp_$topicToLook.txt";
169             # Open the file handle in Write Mode.
170 0         0 open (MYFILE, ">$fileName");
171             # Writing the content of the search result into the newly created file.
172 0         0 print MYFILE $topicData;
173             # Close the file handle.
174 0         0 close (MYFILE);
175             }
176            
177             # Returning the wikipedia about the topic
178 6         621 return $topicData;
179             }
180              
181              
182              
183             #######################################################################################################
184             =pod
185              
186             =head1 SEE ALSO
187              
188             http://senseclusters.cvs.sourceforge.net/viewvc/senseclusters/LabelEvaluation/
189            
190             Last modified by :
191             $Id: GetWikiData.pm,v 1.6 2013/03/18 02:17:16 jhaxx030 Exp $
192              
193            
194             =head1 AUTHORS
195              
196             Ted Pedersen, University of Minnesota, Duluth
197             tpederse at d.umn.edu
198              
199             Anand Jha, University of Minnesota, Duluth
200             jhaxx030 at d.umn.edu
201              
202              
203              
204             =head1 COPYRIGHT AND LICENSE
205              
206             Copyright (C) 2012 Ted Pedersen, Anand Jha
207              
208             See http://dev.perl.org/licenses/ for more information.
209              
210             This program is free software; you can redistribute it and/or modify
211             it under the terms of the GNU General Public License as published by
212             the Free Software Foundation; either version 2 of the License, or
213             (at your option) any later version.
214              
215             This program is distributed in the hope that it will be useful,
216             but WITHOUT ANY WARRANTY; without even the implied warranty of
217             MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
218             GNU General Public License for more details.
219              
220             You should have received a copy of the GNU General Public License
221             along with this program; if not, write to:
222            
223            
224             The Free Software Foundation, Inc., 59 Temple Place, Suite 330,
225             Boston, MA 02111-1307 USA
226            
227            
228             =cut
229             #######################################################################################################
230              
231              
232             # Making the default return statement as 1;
233             # Reference : http://lists.netisland.net/archives/phlpm/phlpm-2001/msg00426.html
234             1;