line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
1
|
|
|
|
|
|
|
=head1 NAME |
2
|
|
|
|
|
|
|
|
3
|
|
|
|
|
|
|
WebService::Yahoo::TermExtractor - Perl wrapper for the Yahoo! Term Extraction WebService |
4
|
|
|
|
|
|
|
|
5
|
|
|
|
|
|
|
=head1 SYNOPSIS |
6
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use WebService::Yahoo::TermExtractor; |
8
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
my $yte = WebService::Yahoo::TermExtractor->new( appid => 'your_app_id', context => $source_text); |
10
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
my $terms = $yte->get_terms; # returns an array ref |
12
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
=head1 DESCRIPTION |
14
|
|
|
|
|
|
|
|
15
|
|
|
|
|
|
|
WebService::Yahoo::TermExtractor provides a simple object-oriented |
16
|
|
|
|
|
|
|
wrapper around the Yahoo! Term Extraction WebService. |
17
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
The Yahoo! Term Extraction WebService attempts to extract a list of |
19
|
|
|
|
|
|
|
significant words or phrases from the content submitted. |
20
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
=head1 EXAMPLES |
22
|
|
|
|
|
|
|
|
23
|
|
|
|
|
|
|
use WebService::Yahoo::TermExtractor; |
24
|
|
|
|
|
|
|
|
25
|
|
|
|
|
|
|
my $source_text = 'A chunk of text, that mentions perl, to extract terms from...'; |
26
|
|
|
|
|
|
|
|
27
|
|
|
|
|
|
|
my $yte = WebService::Yahoo::TermExtractor->new( appid => 'your_app_id', context => $source_text); |
28
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
my $terms = $yte->get_terms; |
30
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
die "An error occured while trying to extract terms..." unless $terms; |
32
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
print "This article is about:\n"; |
34
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
foreach my $term (@$terms) { |
36
|
|
|
|
|
|
|
print "\t$term\n"; |
37
|
|
|
|
|
|
|
} |
38
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
If you are making multiple calls, each with different source text, and have already initialised a |
40
|
|
|
|
|
|
|
WebService::Yahoo::TermExtractor object you can reuse the object and |
41
|
|
|
|
|
|
|
call C with the text to extract from. |
42
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
my $yte = WebService::Yahoo::TermExtractor->new( appid => 'textextract', context => $source_text); |
44
|
|
|
|
|
|
|
|
45
|
|
|
|
|
|
|
my $terms = $yte->get_terms; |
46
|
|
|
|
|
|
|
|
47
|
|
|
|
|
|
|
... do stuff and then later ... |
48
|
|
|
|
|
|
|
|
49
|
|
|
|
|
|
|
$terms = $yte->get_terms($new_source_text); |
50
|
|
|
|
|
|
|
|
51
|
|
|
|
|
|
|
The following example shows input text from the London PM home page: |
52
|
|
|
|
|
|
|
|
53
|
|
|
|
|
|
|
"We are a group of people dedicated to the encouragement of all things |
54
|
|
|
|
|
|
|
Perl-like in London. This involves helping each other, discussing topics, |
55
|
|
|
|
|
|
|
sharing information and the occasional drink and mention of Buffy the |
56
|
|
|
|
|
|
|
Vampire Slayer." |
57
|
|
|
|
|
|
|
|
58
|
|
|
|
|
|
|
Which returns the following terms: |
59
|
|
|
|
|
|
|
|
60
|
|
|
|
|
|
|
buffy the vampire slayer |
61
|
|
|
|
|
|
|
occasional drink |
62
|
|
|
|
|
|
|
encouragement |
63
|
|
|
|
|
|
|
perl |
64
|
|
|
|
|
|
|
london |
65
|
|
|
|
|
|
|
|
66
|
|
|
|
|
|
|
=cut |
67
|
|
|
|
|
|
|
|
68
|
|
|
|
|
|
|
####################################################################### |
69
|
|
|
|
|
|
|
|
70
|
|
|
|
|
|
|
package WebService::Yahoo::TermExtractor; |
71
|
1
|
|
|
1
|
|
40675
|
use strict; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
32
|
|
72
|
1
|
|
|
1
|
|
4
|
use warnings; |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
24
|
|
73
|
1
|
|
|
1
|
|
922
|
use LWP::UserAgent; |
|
1
|
|
|
|
|
50244
|
|
|
1
|
|
|
|
|
36
|
|
74
|
1
|
|
|
1
|
|
44
|
use vars qw($VERSION); |
|
1
|
|
|
|
|
2
|
|
|
1
|
|
|
|
|
234
|
|
75
|
|
|
|
|
|
|
|
76
|
|
|
|
|
|
|
$VERSION = "0.01"; |
77
|
|
|
|
|
|
|
|
78
|
|
|
|
|
|
|
=head1 FUNCTIONS |
79
|
|
|
|
|
|
|
|
80
|
|
|
|
|
|
|
As an object-oriented module WebService::Yahoo::TermExtractor exports no |
81
|
|
|
|
|
|
|
functions. |
82
|
|
|
|
|
|
|
|
83
|
|
|
|
|
|
|
=head1 CONSTRUCTOR |
84
|
|
|
|
|
|
|
|
85
|
|
|
|
|
|
|
=over 4 |
86
|
|
|
|
|
|
|
|
87
|
|
|
|
|
|
|
=item new ( appid => 'your_app_id', context => 'your_source_text' ) |
88
|
|
|
|
|
|
|
|
89
|
|
|
|
|
|
|
This is the constructor for a new WebService::Yahoo::TermExtractor |
90
|
|
|
|
|
|
|
object. The C is required for you to use the Yahoo webservice and |
91
|
|
|
|
|
|
|
must be requested from them. See L for more details. |
92
|
|
|
|
|
|
|
|
93
|
|
|
|
|
|
|
C is the source text that terms should be extracted from. Both |
94
|
|
|
|
|
|
|
arguments are required. |
95
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
=back |
97
|
|
|
|
|
|
|
|
98
|
|
|
|
|
|
|
=cut |
99
|
|
|
|
|
|
|
|
100
|
|
|
|
|
|
|
sub new { |
101
|
0
|
|
|
0
|
1
|
|
my ($class, %args) = @_; |
102
|
0
|
|
|
|
|
|
bless \%args, $class; |
103
|
|
|
|
|
|
|
} |
104
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
=head1 METHODS |
106
|
|
|
|
|
|
|
|
107
|
|
|
|
|
|
|
=over 4 |
108
|
|
|
|
|
|
|
|
109
|
|
|
|
|
|
|
=item get_terms ( [ $context ] ) |
110
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
This method sends the request and returns an array reference of any |
112
|
|
|
|
|
|
|
extracted terms. If invoked without an argument the C provided |
113
|
|
|
|
|
|
|
in C is used. If an argument is passed it is assumed to be source |
114
|
|
|
|
|
|
|
text that terms should be extracted from. This was added as a convience |
115
|
|
|
|
|
|
|
for working with multiple pieces of text. |
116
|
|
|
|
|
|
|
|
117
|
|
|
|
|
|
|
C returns an array reference pointing to the list of terms on |
118
|
|
|
|
|
|
|
success and undef on failure. |
119
|
|
|
|
|
|
|
|
120
|
|
|
|
|
|
|
=back |
121
|
|
|
|
|
|
|
|
122
|
|
|
|
|
|
|
=cut |
123
|
|
|
|
|
|
|
|
124
|
|
|
|
|
|
|
sub get_terms { |
125
|
0
|
|
|
0
|
1
|
|
my $self = shift; |
126
|
0
|
|
|
|
|
|
my $url = 'http://search.yahooapis.com/ContentAnalysisService/V1/termExtraction'; |
127
|
0
|
|
0
|
|
|
|
my $context = shift || $self->{context}; |
128
|
0
|
|
|
|
|
|
my @terms; # this holds the extracted terms; |
129
|
|
|
|
|
|
|
|
130
|
0
|
|
|
|
|
|
my $ua = LWP::UserAgent->new; |
131
|
0
|
|
|
|
|
|
$ua->timeout(20); |
132
|
|
|
|
|
|
|
|
133
|
0
|
|
|
|
|
|
my $response = $ua->post( $url, { appid => $self->{appid}, |
134
|
|
|
|
|
|
|
context => $context, |
135
|
|
|
|
|
|
|
} |
136
|
|
|
|
|
|
|
); |
137
|
|
|
|
|
|
|
|
138
|
0
|
0
|
|
|
|
|
return undef unless $response->is_success; |
139
|
|
|
|
|
|
|
|
140
|
0
|
|
|
|
|
|
my $content = $response->content; |
141
|
|
|
|
|
|
|
|
142
|
0
|
|
|
|
|
|
while($content =~ m!(.*?)!g) { |
143
|
0
|
|
|
|
|
|
push(@terms, $1); |
144
|
|
|
|
|
|
|
} |
145
|
|
|
|
|
|
|
|
146
|
0
|
|
|
|
|
|
return \@terms; |
147
|
|
|
|
|
|
|
} |
148
|
|
|
|
|
|
|
|
149
|
|
|
|
|
|
|
1; |
150
|
|
|
|
|
|
|
|
151
|
|
|
|
|
|
|
####################################################################### |
152
|
|
|
|
|
|
|
|
153
|
|
|
|
|
|
|
=head1 DEPENDENCIES |
154
|
|
|
|
|
|
|
|
155
|
|
|
|
|
|
|
WebService::Yahoo::TermExtractor requires the following module: |
156
|
|
|
|
|
|
|
|
157
|
|
|
|
|
|
|
L |
158
|
|
|
|
|
|
|
|
159
|
|
|
|
|
|
|
=head1 SEE ALSO |
160
|
|
|
|
|
|
|
|
161
|
|
|
|
|
|
|
For information about the Yahoo! Term Extractor service - |
162
|
|
|
|
|
|
|
L |
163
|
|
|
|
|
|
|
|
164
|
|
|
|
|
|
|
To sign up for an application key - |
165
|
|
|
|
|
|
|
L |
166
|
|
|
|
|
|
|
|
167
|
|
|
|
|
|
|
=head1 LICENCE AND COPYRIGHT |
168
|
|
|
|
|
|
|
|
169
|
|
|
|
|
|
|
Copyright (C) 2006 Dean Wilson. All Rights Reserved. |
170
|
|
|
|
|
|
|
|
171
|
|
|
|
|
|
|
This module is free software; you can redistribute it and/or modify it |
172
|
|
|
|
|
|
|
under the same terms as Perl itself. |
173
|
|
|
|
|
|
|
|
174
|
|
|
|
|
|
|
=head1 AUTHOR |
175
|
|
|
|
|
|
|
|
176
|
|
|
|
|
|
|
Dean Wilson |
177
|
|
|
|
|
|
|
|
178
|
|
|
|
|
|
|
=cut |