File Coverage

blib/lib/WWW/Eksisozluk.pm
Criterion Covered Total %
statement 18 118 15.2
branch 0 34 0.0
condition 0 9 0.0
subroutine 6 11 54.5
pod 0 5 0.0
total 24 177 13.5


line stmt bran cond sub pod time code
1             package WWW::Eksisozluk;
2             # ABSTRACT: Perl interface for Eksisozluk.com
3             $WWW::Eksisozluk::VERSION = '0.11';
4 1     1   13229 use strict;
  1         2  
  1         25  
5 1     1   3 use warnings;
  1         1  
  1         17  
6 1     1   681 use DateTime;
  1         102515  
  1         34  
7 1     1   613 use LWP::UserAgent;
  1         27407  
  1         31  
8 1     1   388 use experimental 'smartmatch';
  1         575  
  1         4  
9 1     1   413 use utf8::all;
  1         37798  
  1         6  
10              
11             #Exporting stuff
12             require Exporter;
13             our @ISA = qw(Exporter);
14             our %EXPORT_TAGS = ( 'all' => [ qw(
15             new
16             ) ] );
17             our @EXPORT_OK = ( 'new' );
18             our @EXPORT = qw();
19              
20             sub new{
21 0     0 0   my $class = shift;
22 0           my $self = {};
23 0           bless $self, $class;
24 0           return $self;
25             }
26              
27              
28             #Global variables.
29             my $date_now = DateTime->now->set_time_zone('Europe/Istanbul');
30             my $date_search = DateTime->now->subtract(days=>1)->ymd; #2015-04-25
31             my %link = (
32             'debe' => "https://eksisozluk.com/istatistik/dunun-en-begenilen-entryleri",
33             'author' => "https://eksisozluk.com/biri/",
34             'entry' => "https://eksisozluk.com/entry/",
35             'topic' => "https://eksisozluk.com/",
36             'search' => "?a=search&searchform.when.from=$date_search",
37             'popular' => "https://eksisozluk.com/basliklar/populer?p=",
38             'today' => "https://eksisozluk.com/basliklar/bugun/"
39             );
40             my $sleeptime = 5; #sleep after each request. 0 would mean disabled.
41              
42              
43              
44             sub entry{
45              
46             #Get id from arguments.
47 0     0 0   my $class = shift;
48 0           my $id = shift;
49              
50             #Test if satisfies id number format.
51 0 0 0       if($id !~ /^\d{1,9}$/ || $id==0){
52 0           die "Argument passed to the entry subroutine is not correct. Did you create an object as described in synopsis?";
53             }
54              
55 0           my %entry = (
56             'id' => $id,
57             'id_link' => "$link{entry}$id",
58             'id_ref' => 0,
59              
60             'is_found' => 0,
61              
62             'topic' => "",
63             'topic_link' => "",
64            
65             'date' => 0,
66              
67             'author' => "",
68             'author_link' => "",
69             'body_raw' => "",
70             'body' => "",
71             'fav_count' => 0
72             );
73            
74             #Get the entry file.
75 0           my $ua = LWP::UserAgent->new;
76 0           $ua->timeout(10);
77 0           $ua->env_proxy;
78 0           my $response = $ua->get("$link{entry}$id");
79 0           sleep($sleeptime);
80 0           my $downloaded_entry_file;
81            
82 0 0         if($response->is_success){
83 0           $entry{'is_found'}=1;
84 0           $downloaded_entry_file = $response->decoded_content;
85             }else{
86             #return with is_found=0
87 0           return %entry;
88             #Another possible way of handling could have been:
89             #die "Error on downloading entry. Response: ".$response->status_line;
90             #TODO ask user which way he/she wishes, ie. take parameters to handle this issue.
91             }
92              
93             #topic & topic_link
94 0 0         if($downloaded_entry_file=~/
95 0           $entry{'topic_link'}=$link{topic}.$1;
96 0           $entry{'topic'}=$2;
97             }
98              
99             #date
100 0 0         if($downloaded_entry_file=~/$entry{'id'}\s([\d\s\.\:~]+)/){
101 0           $entry{'date'}=$1;
102             }
103            
104             #author
105 0 0         if($downloaded_entry_file=~/data-author="([\w\d\s]+)" data-author-id/){
106 0           $entry{'author'}=$1;
107 0           $entry{'author_link'}=$link{author}.$1;
108             }
109              
110             #body_raw, body
111 0 0         if($downloaded_entry_file=~/class=\"content\">(.*?)<\/div>/){
112 0           $entry{'body_raw'}=$1;
113 0           $entry{'body'}=$1; #handled below.
114             }
115              
116             #body: open goo.gl
117 0           while($entry{'body'}=~/href="(http:\/\/goo.gl[^"]*)"/){
118 0           my $temp=&longgoogl($1);
119 0           $entry{'body'}=~s/href="(http:\/\/goo.gl[^"]*)"/href="$temp"/;
120             }
121            
122             #body: open hidden references (akıllı bkz)
123 0           $entry{'body'}=~s/<([^<]*)(data-query=\")([^>]*)\">\*<\/a><\/sup>/<$1$2$3\">\(* $3\)<\/a>/g;
124            
125             #body: fix links so that they work outside eksisozluk.com + _blank
126 0           $entry{'body'}=~s/href="\//target="_blank" href="https:\/\/eksisozluk.com\//g;
127            
128             #body: gmail underline fix
129 0           $entry{'body'}=~s/href="/style="text-decoration:none;" href="/g;
130            
131             #body: fix imgur links ending without jpg
132 0           $entry{'body'}=~s/(href="https?:\/\/[^.]*\.?imgur.com\/\w{7})"/$1\.jpg"/g;
133              
134             #body: add img src to display images that are jpg jpeg png
135 0           $entry{'body'}=~s/(href="([^"]*\.(jpe?g|png)(:large)?)"[^<]*<\/a>)/$1



/g;
136            
137             #body: add a northwest arrow, and domain name in parantheses
138 0           $entry{'body'}=~s/(https?:\/\/(?!eksisozluk.com)([^\/<]*\.[^\/<]*)[^<]*<\/a>)/$1 \($2 ↗\)/g;
139              
140             #favcount
141 0 0         if($downloaded_entry_file=~/data-favorite-count="(\d+)"/){
142 0           $entry{'fav_count'}=$1;
143             }
144              
145             #id_ref (first entry of the day, used for debe)
146 0           $response = $ua->get("$entry{'topic_link'}$link{search}");
147 0           sleep($sleeptime);
148 0           my $downloaded_search_file;
149 0 0         if($response->is_success){
150 0           $downloaded_search_file=$response->decoded_content;
151 0 0         if($downloaded_search_file=~/
  • 152 0           $entry{'id_ref'}=$1;
    153             }
    154             }else{
    155             #Return with minus 1.
    156 0           $entry{'id_ref'}=-1;
    157             #Another possible way of handling.
    158             #die "Error on searching reference entry. Response: ".$response->status_line;
    159             }
    160              
    161              
    162 0           return %entry;
    163              
    164             }
    165              
    166              
    167              
    168             sub topiclist{
    169              
    170             #Get type from arguments.
    171 0     0 0   my $class = shift;
    172 0           my $type = shift;
    173              
    174             #Test if it's valid.
    175 0 0 0       if($type ne "popular" && $type ne "today"){
    176 0           die "Argument passed to topiclist subroutine has to be either \"popular\" or \"today\".";
    177             }
    178              
    179 0           my $currentpage = 0;
    180 0           my $pagecount = 2;
    181 0           my %topiclist_topics;
    182              
    183 0           while($currentpage<$pagecount){
    184              
    185             #update pagecount
    186 0           $currentpage++;
    187 0           print "looking for page $currentpage\n";
    188              
    189 0           my $ua = LWP::UserAgent->new;
    190 0           $ua->timeout(10);
    191 0           $ua->env_proxy;
    192 0           my $response = $ua->get("$link{$type}$currentpage");
    193 0           sleep($sleeptime);
    194 0           my $downloaded_topiclist_file;
    195              
    196 0 0         if($response->is_success){
    197 0           $downloaded_topiclist_file=$response->decoded_content;
    198              
    199             #Get the pagecount value only once.
    200             #First page doesn't have pagecount.. Check it at second page.
    201 0 0 0       if($currentpage == 2 && $downloaded_topiclist_file=~/data-pagecount="(\d+)"/){
    202 0           $pagecount = $1;
    203 0           print "pagecount becomes $pagecount\n";
    204             }
    205              
    206             #We might have removed left frame populars here, but it doesn't really matter.
    207              
    208             #Add topics to the hash, with the number of entries in it.
    209 0           while($downloaded_topiclist_file =~ />(.*)\s?(\d+)
    210              
    211             #Add if not added before
    212 0 0         if(!($1 ~~ %topiclist_topics)){
    213 0           $topiclist_topics{"$1"}=$2;
    214             }
    215             #Cross out the processed one
    216 0           $downloaded_topiclist_file=~s/>(.*)\s(\d+)
    217             }
    218              
    219             }else{
    220 0           die "Error on downloading topic list. Response: ".$response->status_line;
    221             }
    222              
    223             }
    224              
    225 0           return %topiclist_topics;
    226              
    227             }
    228              
    229              
    230              
    231              
    232             sub debe_ids{
    233              
    234 0     0 0   my @debe;
    235 0           my $ua = LWP::UserAgent->new;
    236 0           $ua->timeout(10);
    237 0           $ua->env_proxy;
    238 0           my $response = $ua->get("$link{debe}");
    239 0           sleep($sleeptime);
    240 0           my $downloaded_debe_file;
    241            
    242 0 0         if($response->is_success){
    243 0           $downloaded_debe_file=$response->decoded_content;
    244              
    245              
    246 0           while($downloaded_debe_file =~ /%23(\d+)">/){
    247              
    248             #If the matched entry id did not added before, then add.
    249 0 0         if(!($1 ~~ @debe)){
    250 0           push @debe,$1;
    251             }
    252             #Cross it to avoid duplicates.
    253 0           $downloaded_debe_file=~s/%23(\d+)">/%23XXXX">/;
    254             }
    255              
    256 0 0         if(scalar(@debe)!=50){
    257 0           my $miscount = scalar(@debe);
    258 0           warn "Debe list has $miscount entries";
    259             }
    260            
    261             }else{
    262 0           die "Error on downloading data. Response: ".$response->status_line;
    263             }
    264              
    265 0           return @debe;
    266             }
    267              
    268              
    269             sub longgoogl{
    270 0     0 0   my $googl = $_[0];
    271 0           my $long = `curl -s $1 |grep HREF`;
    272 0 0         if($long =~/"(http[^"]*)"/){
    273 0           $long = $1;
    274             }
    275 0           return $long;
    276             }
    277              
    278              
    279             1;
    280              
    281             =pod
    282              
    283             =encoding UTF-8
    284              
    285             =head1 NAME
    286              
    287             WWW::Eksisozluk - Perl interface for Eksisozluk.com
    288              
    289             =head1 VERSION
    290              
    291             version 0.11
    292              
    293             =head1 SYNOPSIS
    294              
    295             use WWW::Eksisozluk;
    296             #You should create an object as shown below.
    297             my $eksi = WWW::Eksisozluk->new();
    298              
    299             #IDs for today's debe list (element at index 0 is the top one)
    300             my @debe = $eksi->debe_ids();
    301              
    302             #Details (body, author, date etc) of an entry with given id.
    303             my %entry = $eksi->entry($debe[0]);
    304              
    305             #Popular topics with number of recent entries in it.
    306             my %popular = $eksi->topiclist(popular);
    307              
    308             #Today's topics with number of recent entries in it.
    309             my %today = $eksi->topiclist(today);
    310              
    311             =head1 DESCRIPTION
    312              
    313             This module provides a simple perl interface for eksisozluk, which is a user-based
    314             web dictionary written mostly in Turkish, active since 1999. You can get debe list
    315             (top entries of yesterday) by using this module. You can also reach topic list for
    316             today, and popular topic lists.
    317              
    318             As a friendly note, data you reach by using this module might be subject to copyright
    319             terms of Eksisozluk. See eksisozluk.com for details.
    320              
    321             =head1 AUTHOR
    322              
    323             Kivanc Yazan
    324              
    325             =head1 COPYRIGHT AND LICENSE
    326              
    327             This software is copyright (c) 2015 by Kivanc Yazan.
    328              
    329             This is free software; you can redistribute it and/or modify it under
    330             the same terms as the Perl 5 programming language system itself.
    331              
    332             =cut
    333              
    334             __END__