File Coverage

blib/lib/Net/RFC/Search.pm
Criterion Covered Total %
statement 67 69 97.1
branch 24 28 85.7
condition 7 8 87.5
subroutine 12 12 100.0
pod 3 3 100.0
total 113 120 94.1


line stmt bran cond sub pod time code
1             package Net::RFC::Search;
2             =head1 NAME
3              
4             Net::RFC::Search - search for RFC's and dump RFC's content either to a variable or to a file.
5              
6             =head1 SYNOPSIS
7              
8             Net::RFC::Search provides 2 methods:
9              
10             B is for searching for a RFC index number by given 'keyword' (through RFC index text file).
11              
12             B is for dumping RFC's content either to a variable or to a file.
13              
14             use Net::RFC::Search;
15              
16             my $rfc = Net::RFC::Search->new;
17              
18             # This will return array of RFC indices with "websocket" keyword in their headers.
19             my @found = $rfc->search_by_header('WebSocket');
20              
21             # This will dump content of RFC 6455 into $rfc_text variable.
22             my $rfc_text = $rfc->get_by_index(6455);
23              
24             # Dumps RFC 6455 into /tmp/6455.txt file
25             $rfc->get_by_index(6455, '/tmp/6455.txt');
26              
27             =head1 VERSION
28              
29             Version 0.02
30              
31             =head1 DESCRIPTION
32              
33             Net::RFC::Search is a module aimed to be a simple tool to search and dump RFC's.
34              
35             =head1 CONSTRUCTOR
36              
37             =over 4
38              
39             =item new(%options)
40              
41             Create instance of C.
42              
43             B<%options> are optional parameters:
44              
45             C - a file name to store RFC index file into. Defaults to ~/.rfcindex
46              
47             C - URL of the RFC site/mirror where index file and RFC's are going to be downloaded from.
48              
49             =back
50              
51             =head1 METHODS
52              
53             =over 4
54              
55             =item search_by_header("keyword")
56              
57             Returns array of RFC index numbers "keyword" has been found in.
58              
59             Search occurs in RFC header names (i.e. through RFC index file).
60              
61             =item get_by_index($index [, $filename ]);
62              
63             Downloads RFC of index number C<$index> and returns downloaded content.
64              
65             By providing optional C<$filename> content will be dumped into C<$filename>.
66              
67             =back
68              
69             =head1 TODO
70              
71             =over 4
72              
73             =item add caching facilities
74              
75             =item do not rely on LWP::UserAgent only, add lynx/curl as optional methods to retrieve RFC's
76              
77             =back
78              
79             =head1 ACKNOWLEDGEMENTS
80              
81             This module is heavily based on rfc.pl script written by **Derrick Daugherty** (http://www.dewn.com/rfc/)
82              
83             =head1 AUTHOR
84              
85             Nikolay Aviltsev, C<< navi@cpan.org >>
86              
87             =head1 LICENSE AND COPYRIGHT
88              
89             Copyright 2013 Nikolay Aviltsev.
90              
91             This program is free software; you can redistribute it and/or modify it
92             under the terms of either: the GNU General Public License as published
93             by the Free Software Foundation; or the Artistic License.
94              
95             See L for more information.
96              
97             =cut
98              
99 2     2   108841 use 5.006;
  2         9  
  2         122  
100 2     2   11 use strict;
  2         5  
  2         104  
101              
102 2     2   3007 use LWP::UserAgent;
  2         117085  
  2         73  
103 2     2   1999 use IO::File;
  2         12964  
  2         330  
104 2     2   15 use Carp;
  2         4  
  2         111  
105 2     2   4741 use File::HomeDir;
  2         14401  
  2         1594  
106              
107             our $VERSION = '0.02';
108             my $ua;
109              
110             sub new {
111 4     4 1 3318 my ($class, %params) = @_;
112              
113 4         13 my $self = {};
114 4   66     50 $self->{indexpath} = $params{indexpath} || File::HomeDir->my_home . "/.rfcindex";
115              
116 4   100     204 $self->{rfcbaseurl} = $params{rfcbaseurl} || 'http://www.ietf.org/rfc/';
117 4         19 $self->{rfcbaseurl} =~ s/\s//g;
118 4 100       26 $self->{rfcbaseurl} .= '/' unless substr($self->{rfcbaseurl}, -1) eq '/';
119              
120 4         12 bless $self, $class;
121 4         19 return $self;
122             }
123              
124             sub _ua {
125 5     5   467 my $self = shift;
126 5 100       39 return $ua if $ua;
127              
128 2         30 $ua = LWP::UserAgent->new(timeout => 10);
129             }
130              
131             sub _make_index {
132 1     1   19 my $self = shift;
133 1         4 my $indexpath = $self->{indexpath};
134              
135             # system ("lynx -dump www.ietf.org/download/rfc-index.txt > $indexpath");
136 1         7 my $response = $self->_ua->get('http://www.ietf.org/download/rfc-index.txt');
137 1 50       1206544 if ($response->is_success) {
138 1         25 my $fh = IO::File->new($indexpath, 'w');
139 1         247 print $fh $response->decoded_content;
140 1         44747 undef $fh;
141             }
142             else {
143 0         0 confess "Could not get rfc-index.txt, please try again later";
144             }
145             }
146              
147             sub search_by_header {
148 3     3 1 10687 my ($self, $string) = @_;
149 3 100       81 $self->_make_index unless -e $self->{indexpath};
150              
151 3         127 my $fh = IO::File->new($self->{indexpath}, "r");
152              
153 3         414 my ($thing, @found_indices);
154 3         6 my $found = 0;
155              
156 3         53376 for my $line(<$fh>) {
157 85578 100       226473 if ($line !~ /^\s*$/) {
158 63750         105548 $thing .= $line;
159 63750 100       200876 $found = 1 if ($line =~ /$string/i);
160             }
161             else {
162 21828 100       60136 $thing =~ /^(\d+)/ if $thing;
163 21828 100 100     96171 push @found_indices, $1 if ($1 && $found);
164              
165 21828         24533 $found = 0;
166 21828         33633 $thing = '';
167             }
168             }
169              
170 3         7213 undef $fh;
171 3         1520 return @found_indices;
172             }
173              
174             sub get_by_index {
175 3     3 1 7013 my ($self, $index, $dump_to) = @_;
176 3 50       127 $self->_make_index unless -e $self->{indexpath};
177              
178 3         7 my $rfc;
179 3 50       12 if ($index) {
180 3         15 my $response = $self->_download_rfc_by_index($index);
181 3 100       23923 $rfc = $response->{error} ? $response->{error_message} : $response->{content};
182             }
183              
184 3 100       22 if ($dump_to) {
185 1         15 my $fh = IO::File->new($dump_to, "w");
186 1         1330 print $fh $rfc;
187             }
188              
189 3         33 return $rfc;
190             }
191              
192             sub _download_rfc_by_index {
193 3     3   10 my ($self, $index) = @_;
194 3 50       16 if (length $index < 4) {
195 0         0 $index = '0' . $index;
196             }
197              
198 3         52 my $rfcbaseurl = $self->{rfcbaseurl};
199 3         12 my $url = $self->{rfcbaseurl} . "rfc" . $index . ".txt";
200              
201             # `lynx -dump ${rfcbaseurl}rfc$index.txt`;
202 3         16 my $response = $self->_ua->get($url);
203              
204 3 100       1116945 return $response->is_success ?
205             { error => 0, content => $response->decoded_content } :
206             { error => 1, error_code => $response->code, error_message => $response->status_line };
207             }
208              
209             1;