File Coverage

blib/lib/WWW/Scraper/Gmail.pm
Criterion Covered Total %
statement 21 185 11.3
branch 0 56 0.0
condition 0 3 0.0
subroutine 7 15 46.6
pod 0 8 0.0
total 28 267 10.4


line stmt bran cond sub pod time code
1             package WWW::Scraper::Gmail;
2              
3 1     1   33157 use 5.005003;
  1         4  
  1         41  
4 1     1   6 use strict;
  1         2  
  1         35  
5 1     1   5 use warnings;
  1         6  
  1         73  
6              
7             require Exporter;
8             require LWP;
9             require Crypt::SSLeay;
10              
11 1     1   1146 use LWP::UserAgent;
  1         105862  
  1         44  
12 1     1   1508 use Env qw{HOME};
  1         4597  
  1         9  
13 1     1   365 use Carp;
  1         2  
  1         101  
14             #use Data::Dumper;
15 1     1   1114 use HTML::Entities;
  1         6421  
  1         3860  
16              
17             our @ISA = qw(Exporter);
18              
19             # Items to export into callers namespace by default. Note: do not export
20             # names by default without a very good reason. Use EXPORT_OK instead.
21             # Do not simply export all your public functions/methods/constants.
22              
23             # This allows declaration use WWW::Scraper::Gmail ':all';
24             # If you do not need this, moving things directly into @EXPORT or @EXPORT_OK
25             # will save memory.
26             our %EXPORT_TAGS = ( 'all' => [ qw(
27            
28             ) ] );
29              
30             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
31              
32             our @EXPORT = qw(
33            
34             );
35              
36             our $VERSION = '0.09';
37              
38              
39             # Preloaded methods go here.
40             #
41             my ($next_url);
42             #my ($js_ver);
43             my ($gm_l_cookie, $sid);
44              
45              
46             my ($url, $url2, $url3, $url_init, $urlx, $ua, $req, $res);
47             my ($cookie, $dump, $inbox, $head, $zx);
48             my ($gmail_at);
49             my $num = 0;
50             my ($username, $password);
51             my $logged_in = 0;
52             my $pid = "$ENV{HOME}/.gmailpid";
53             my $gmailrc = "$ENV{HOME}/.gmailrc";
54              
55             $url = "https://www.google.com/accounts/ServiceLoginBoxAuth";
56             $url2 = "https://www.google.com/accounts/CheckCookie?service=mail&chtml=LoginDoneHtml";
57             #$urlx = "http://gmail.google.com/gmail?search=inbox&view=tl&start=0&init=1&zx=$zx";
58             $url3 = "http://gmail.google.com/gmail?search=inbox&view=tl&start=0";
59             $url_init = "http://gmail.google.com/gmail?search=inbox&view=tl&start=0&init=1";
60              
61             sub setUP {
62 0     0 0   unlink "$gmailrc";
63 0           $username = shift @_;
64 0           $password = shift @_;
65             }
66              
67             sub getUP {
68 0 0   0 0   open(GMAILRC, "$gmailrc") or die("Can't Open $gmailrc \nFormat:\n[gmail]\nusername=\npassword=\n");
69 0           while () {
70 0 0         $username = $1 if (/username=(.*)/);
71 0 0         $password = $1 if (/password=(.*)/);;
72             }
73 0           close(GMAILRC);
74 0 0 0       return(0) if(!$username or !$password);
75 0           return(1);
76             }
77              
78             sub login {
79              
80 0     0 0   $ua = LWP::UserAgent->new();
81             #its a GOOSE
82 0           $ua->agent("User-Agent: Mozilla/5.0 (Macintosh; U; PPC Mac OS X Mach-O; en-US; rv:1.7) Gecko/20040608");
83 0           $head = HTTP::Headers->new(); #, Referer => $ref);
84              
85 0 0         if(open(GMAILPID, "$pid")) {
86 0           my $first = ;
87 0 0         if ($first - time() > 50000) {
88             #cookie is expired
89 0           unlink($pid);
90 0           last();
91             }
92 0           $cookie = ;
93 0           $gmail_at = ;
94 0           $zx = ;
95             #print "cookie = $cookie\ngmail_at = $gmail_at\nzx=$zx\n";
96 0           $head = HTTP::Headers->new(Cookie => $cookie);
97 0           close(GMAILPID);
98 0           chomp($cookie, $gmail_at, $zx);
99 0           return(0);
100             }
101 0 0         getUP() if (!$username);
102              
103             #---------------------------------------------------------
104 0           $req = HTTP::Request->new(GET => "http://gmail.google.com/");
105 0           $res = $ua->request($req);
106             #Ok, all the cookies are blanked out at this point
107             #---------------------------------------------------------
108 0           $req = HTTP::Request->new(GET => "https://www.google.com/accounts/ServiceLoginBox?service=mail&continue=https%3A%2F%2Fgmail.google.com%2Fgmail");
109 0           $res = $ua->request($req);
110             #---------------------------------------------------------
111 0           $req = HTTP::Request->new(GET => "https://www.google.com/accounts/ServiceLoginBox?service=mail&continue=https%3A%2F%2Fgmail.google.com%2Fgmail");
112 0           $res = $ua->request($req);
113             #---------------------------------------------------------
114 0           $head->push_header(Referer => "https://www.google.com/accounts/ServiceLoginBox?service=mail&continue=https%3A%2F%2Fgmail.google.com%2Fgmail");
115 0           $head->push_header(Cookie => "Session=en_US; en_US;");
116 0           $req = HTTP::Request->new(POST => "https://www.google.com/accounts/ServiceLoginBoxAuth", $head);
117 0           $req->content_type("application/x-www-form-urlencoded");
118 0           $req->content("continue=https://gmail.google.com/gmail&service=mail&Email=$username&Passwd=$password&PersistentCookie=yes&null=Sign+in");
119 0           $res = $ua->request($req);
120 0           my $dump = $res->as_string();
121 0           while ($dump =~ m!^Set-Cookie: (SID[^;]*).*!mgs) {
122             #just get the SID
123 0           $sid .= "$1";
124             }
125 0 0         if ($dump =~ m!(CheckCookie.+?)"!mgs) {
126             #have to get the url to go to next
127 0           $next_url = "https://www.google.com/accounts/$1";
128             }
129 0           $head->referer("https://www.google.com/accounts/ServiceLoginBoxAuth");
130 0           $head->remove_header("Cookie");
131             #Cookie: Session=en_US; en_US; SID=AdVxxBlym6yJx-cWNmCvV2EgDllBCk8R-B7MB_0fqeZ-2vYrVgFMwXiMJiJucsluXNY7SDuK4p7bGDGIcqqa0mg=
132             #Cookie: Session=en_US; en_US; SID=AZ9jS5SGlgV7gX_clo063pjTE1R6OSonFHI2iPJcJGvR2vYrVgFMwXiMJiJucsluXINZah0H-npo0on6buw3QuM=;
133 0           $head->push_header(Cookie => "Session=en_US; en_US; $sid");
134 0           $req = HTTP::Request->new(GET => $next_url, $head);
135 0           $next_url = "";
136 0           $res = $ua->simple_request($req);
137 0           $dump = $res->as_string(); #do the redirect ourselves!
138 0           while ($dump =~ m!Location: (.+?)Serv!gis) {
139 0           $next_url = $1;
140 0           chomp($next_url);
141             }
142             #---------------------------------------------------------
143 0           $head->remove_header("Cookie");
144 0           $head->remove_header("Referer");
145 0           $gm_l_cookie = "GMAIL_LOGIN=T" . (time() - 2) . "435/" . (time()-1) . "221/" . time() . "264";
146 0           $head->push_header(Cookie => "$gm_l_cookie; $sid");
147 0           $req = HTTP::Request->new(GET => $next_url, $head);
148 0           $next_url = "";
149 0           $res = $ua->simple_request($req);
150 0           $cookie = "";
151 0           $dump = $res->as_string();
152 0           while ($dump =~ m!^Set-Cookie: ([^;]*)!mgs) {
153 0           $cookie .= "$1; ";
154 0 0         if ($1 =~ /SID=(.*)/) {
155 0           $sid = $1;
156             }
157 0 0         if ($1 =~ /AT=(.*)/) {
158 0           $gmail_at = $1;
159             }
160             }
161 0           $cookie .= "jscookietest=valid";
162 0 0         if ($dump =~ m!src=(/gmail\?view=page&name=js&ver=(.+?)) f!mgis) {
163 0           $next_url = $1;
164             #$js_ver = $2;
165 0           $zx = $2;
166             }
167              
168             #print "got $cookie ---- $next_url - and $js_ver\n\n\n";
169             #print $res->as_string(), "\n"; exit();
170             #var fs_time=(new Date()).getTime();var testcookie = 'jscookietest=valid';
171             #document.cookie = testcookie;
172             #if (document.cookie.indexOf(testcookie) == -1) {top.location = '/gmail/html/nocookies.html';}
173             #document.cookie = testcookie + ';expires=' + new Date(0).toGMTString();
174             #var agt = navigator.userAgent.toLowerCase();
175             #if (agt.indexOf('msie')!= -1 && document.all) {var control = (agt.indexOf('msie 5') != -1) ? 'Microsoft.XMLHTTP' : 'Msxml2.XMLHTTP';try {new ActiveXObject(control);} catch (e) {top.location = '/gmail/html/noactivex.html';}}
176             #name=main src=/gmail/html/loading.html frameborder=0 noresize scrolling=no>
177             #&ver=8d26317a8120ce2c frameborder=0 noresize>
178              
179             #---------------------------------------------------------
180 0           $head->remove_header("Cookie");
181 0           $head->push_header(Cookie => "$gm_l_cookie; $cookie");
182 0           $req = HTTP::Request->new(GET => "https://gmail.google.com/$next_url", $head);
183 0           $res = $ua->simple_request($req);
184             #---------------------------------------------------------
185 0           my $url3 = "https://gmail.google.com/gmail?search=inbox&view=tl&start=0";
186             #$req = HTTP::Request->new(GET => "https://gmail.google.com/gmail", $head);
187 0           $req = HTTP::Request->new(GET=>$url3, $head);
188 0           $res = $ua->simple_request($req);
189             #---------------------------------------------------------
190             #
191             #
192             #
193              
194 0 0         if (open(GMAILPID, "> $pid")) {
195             #Save the cookie to a file so that we don't have to go through it all each time
196 0           print GMAILPID time(), "\n";
197 0           print GMAILPID $cookie, "\n";
198 0           print GMAILPID $gmail_at, "\n";
199 0           print GMAILPID $zx, "\n";
200             #print GMAILPID $zx, "\n";
201 0           close(GMAILPID);
202             }
203              
204 0           $logged_in = 1;
205              
206             }
207              
208             sub doGmailAt {
209             #must be logged in to do the gmail at..
210 0 0   0 0   login() unless $logged_in;
211              
212 0           $req = HTTP::Request->new(GET => $url_init, $head);
213 0           $res = $ua->request($req);
214 0           $dump = $res->as_string();
215              
216             #more cookies
217             #$zx = $1 if ($dump =~ m!ver=([A-Za-z0-9]*)!);
218 0           while ($dump =~ m!^Set-Cookie: (GMAIL([^;]*)).*!mgs) {
219 0           $cookie .= $1 . ";";
220 0 0         if ($1 =~ /GMAIL_AT=(.*)/) {
221 0           $gmail_at = $1;
222             }
223             }
224              
225 0           $head = HTTP::Headers->new(Cookie => $cookie);
226             #print "cookie = $cookie\ngmail_at = $gmail_at\nzx=$zx\n";
227             }
228              
229             sub countMail {
230              
231 0     0 0   login();
232              
233 0           $req = HTTP::Request->new(GET => $url3, $head);
234 0           $res = $ua->request($req);
235              
236 0           my $num = 0;
237 0 0         if ($res->is_success()) {
238 0           $inbox = $res->content();
239 0           $inbox =~ m!(D\(\[\"t\".*])!mgis;
240 0           $inbox = $1;
241 0 0         return(0) if (!$inbox);
242 0           $inbox =~ s!\\!!ig;
243 0           $inbox =~ s!!!ig;
244 0           while ($inbox =~ m!\[".+?",([01]),[01],"(.+?)",".+?",".+?","(.+?)","(.+?)".+?\]!mgis) {
245 0 0         $num++ if ($1);
246             #my ($from, $subject, $new) = ($2, (($3 =~ /raquo/) ? $4 : $3), (($1 == 1) ? " NEW!!! " : ""));
247            
248             }
249             }
250 0           return $num;
251             }
252              
253             sub outputMail {
254              
255 0     0 0   login();
256 0 0         my $delim = ($ARGV[0]) ? $ARGV[0] : ";;";
257 0           my $ret;
258              
259 0           $req = HTTP::Request->new(GET => $url3, $head);
260 0           $res = $ua->request($req);
261              
262 0 0         if ($res->is_success()) {
263 0           $inbox = $res->content();
264 0           $inbox =~ m!(D\(\[\"t\".*])!mgis;
265 0           $inbox = $1;
266 0 0         return("") if (!$inbox);
267 0           $inbox =~ s!\\!!ig;
268             #$inbox =~ s!!!ig;
269             # D(["t",["fe4f9c5b8c5bf74",1,0,"\1:49pm\","\John\, \me\, \\John\\ (36)","\»\ ","\nutritional intake\","On Wed, 11 Aug 2004 16:10:29 -0400, Erik Kastner <kastner@gmail.com> wrote: > Good …",[]
270             # ,"","fe4f9c5b8c5bf74",0]
271             # ,["fe4676e88eff196",0,0,"Aug 9","\David Boswell\","\»\ ","delicious on mozdev","erik, ok, you're all set up with http://delicious.mozdev.org/ i apologize for the delay in …",[]
272             # ,"","fe4676e88eff196",0]
273             # ]
274             # );
275             # D(["t",["fe4fc45c88db08f",1,0,"2:32pm","John, me, John (39)","» ","nutritional intake","this is the reply with more, "bitch" On Wed, 11 Aug 2004 17:15:15 -0400, Erik Kastner …",[]
276             # ,"","fe4fc45c88db08f",0]
277             # ,["fe4fc293a18f00a",1,0,"2:30pm","Erik F. Kastner","» ","Yoy","yoyo",[]
278             # ,"","fe4fc293a18f00a",0]
279             # ,["fe4676e88eff196",0,0,"Aug 9","David Boswell","» ","delicious on mozdev","erik, ok, you're all set up with http://delicious.mozdev.org/ i apologize for the delay in …",[]
280             # ,"","fe4676e88eff196",0]
281              
282 0           while ($inbox =~ m!\[".+?",([01]),[01],"(?:)?(.+?)(?:)?","(.+?)",".+?","(?:)?(.+?)(?:)?","(.+?)".+?\]!mgis) {
283 0           $num++;
284 0 0         my ($time, $from, $subject, $new, $blurb) = ($2, $3, $4, ($1 == 1) ? "new!" : "", $5);
285 0 0         if ($from =~ m!(.+?)!) {
286 0           $from = $1;
287             }
288             else {
289 0           $from =~ s!(.+?)
290             }
291             #John, me, John (39)
292             #print "Looking at $4\n\n";
293             #my ($from, $subject, $new) = ($2, (($3 =~ /raquo/) ? $4 : $3), (($1 == 1) ? " NEW!!! " : ""));
294 0           my $rec = {};
295 0 0         if ($1) {
296 0           $ret .= "$from$delim$subject$delim$time$delim$blurb\n";
297             }
298             }
299             #print "$num total messages in inbox\n";
300 0           return $ret;
301              
302             }
303             else {
304 0           warn $res->content();
305 0           warn $res->status_line, "\n";
306 0           return("");
307             }
308             }
309              
310             sub fetchMail {
311              
312 0     0 0   login();
313              
314 0           my @msgs;
315 0           $req = HTTP::Request->new(GET => $url3, $head);
316 0           $res = $ua->request($req);
317              
318 0 0         if ($res->is_success()) {
319 0           $inbox = $res->content();
320 0           $inbox =~ m!(D\(\[\"t\".*])!mgis;
321 0           $inbox = $1;
322 0 0         return(0) if (!$inbox);
323 0           $inbox =~ s!\\!!ig;
324 0           $inbox =~ s!!!ig;
325 0           while ($inbox =~ m!\[".+?",([01]),[01],"(.+?)",".+?",".+?","(.+?)","(.+?)".+?\]!mgis) {
326 0           $num++;
327             #my ($from, $subject, $new) = ($2, (($3 =~ /raquo/) ? $4 : $3), (($1 == 1) ? " NEW!!! " : ""));
328 0 0         my ($time, $from, $subject, $new, $blurb) = ($2, $3, $4, ($1 == 1) ? "new!" : "", $5);
329 0           my $rec = {};
330 0           $rec = {
331             from => $from,
332             subject => $subject,
333             date => $time,
334             blurb => $blurb,
335             new => $1
336             };
337 0           push @msgs, $rec;
338              
339             #print "Thread Started by $from, Subject $subject @ $time $new\n\t$blurb\n";
340             }
341             #print "$num total messages in inbox\n";
342 0           return @msgs;
343              
344             }
345             else {
346 0           warn $res->content();
347 0           warn $res->status_line, "\n";
348 0           return(0);
349             }
350             }
351              
352             sub setPrefs {
353 0     0 0   my ($arg) = @_;
354 0           login();
355 0           doGmailAt();
356 0 0         $arg->{"MaxPer"} = 100 unless defined $arg->{MaxPer};
357 0 0         $arg->{"Signature"} = "" unless defined $arg->{Signature};
358              
359 0           $arg->{"Signature"} = HTML::Entities::encode_entities_numeric($arg->{"Signature"});
360             #print Dumper $arg;
361              
362 0           my $url_pref=" http://gmail.google.com/gmail?search=inbox&view=tl&start=0&act=prefs&at=$gmail_at&p_bx_hs=1&p_ix_nt=$arg->{MaxPer}&p_bx_sc=1&p_sx_sg=$arg->{Signature}&zx=$zx";
363             #print "Going for $url_pref\n";
364             #$head = HTTP::Headers->new(Cookie => $cookie); #, Referer => $ref);
365 0           $req = HTTP::Request->new(GET=>$url_pref, $head);
366 0           $res = $ua->request($req);
367 0           return ($res->as_string() =~ /saved/);
368             }
369              
370             1;
371             __END__