File Coverage

blib/lib/BGPmon/Fetch/Archive.pm
Criterion Covered Total %
statement 13 15 86.6
branch n/a
condition n/a
subroutine 5 5 100.0
pod n/a
total 18 20 90.0


line stmt bran cond sub pod time code
1             package BGPmon::Fetch::Archive;
2             our $VERSION = '2.0';
3              
4 1     1   81541 use strict;
  1         2  
  1         77  
5 1     1   7 use warnings;
  1         2  
  1         35  
6 1     1   3068 use POSIX qw/strftime/;
  1         21739  
  1         7  
7 1     1   1233 use File::Path qw/mkpath rmtree/;
  1         2  
  1         70  
8 1     1   877 use BGPmon::Translator::XFB2PerlHash;
  0            
  0            
9             use BGPmon::Fetch::File;
10             use LWP::Simple;
11             use Data::Dumper;
12              
13             BEGIN{
14             require Exporter;
15             our $AUTOLOAD;
16             our @ISA = qw(Exporter);
17             our %EXPORT_TAGS = ( 'all' => [ qw(init_bgpdata connect_archive
18             read_xml_message close_connection is_connected messages_read
19             files_read uptime connection_endtime connection_duration get_error_code
20             get_error_message get_error_msg) ] );
21             our @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
22             }
23              
24             # connection status
25             my $msgs_read = 0;
26             my $files_read = 0;
27             my $connection_start;
28             my $connection_stop;
29             my $connected = 0;
30              
31             #state variables to maintain state between calls to read_xml_message
32             my $upd_url; #Root URL to retrieve update files from
33             my $begin_time; #interval start time
34             my $end_time; #interval end time
35             my $year; #year/month used for URL construction
36             my $month;
37             my $append; #variable detects /UPDATES/ dirs below the normal depth
38             my @index_page; #list of HTML download links
39             my $scratch_dir = "/tmp/";
40             my $ignore_data_errors = 0;
41             my $ignore_incomplete_data = 0;
42              
43             #Error codes and messages
44             my %error_code;
45             my %error_msg;
46             my @function_names = ('init_bgpdata', 'connect_archive', 'read_xml_message',
47             'close_connection', 'is_connected','uptime','connection_endtime',
48             'connection_duration');
49              
50             use constant NO_ERROR_CODE => 0;
51             use constant NO_ERROR_MSG => 'No Error';
52             use constant UNDEFINED_ARGUMENT_CODE => 401;
53             use constant UNDEFINED_ARGUMENT_MSG => 'Undefined Argument(s)';
54             use constant UNCONNECTED_CODE => 402;
55             use constant UNCONNECTED_MSG => 'Not connected to an archive';
56             use constant ALREADY_CONNECTED_CODE => 403;
57             use constant ALREADY_CONNECTED_MSG => 'Already connected to an archive';
58             use constant NO_INDEX_PAGE_CODE => 404;
59             use constant NO_INDEX_PAGE_MSG => 'Unable to find an index page';
60             use constant SYSCALL_FAIL_CODE => 405;
61             use constant SYSCALL_FAIL_MSG => 'System call failed';
62             use constant INVALID_ARGUMENT_CODE => 406;
63             use constant INVALID_ARGUMENT_MSG => 'Invalid value given for argument';
64             use constant INIT_FAIL_CODE => 407;
65             use constant INIT_FAIL_MSG => 'Failed to initialize connection to archive';
66             use constant FILE_OPERATION_FAIL_CODE => 408;
67             use constant FILE_OPERATION_FAIL_MSG => 'File operation failed';
68             use constant DOWNLOAD_FAIL_CODE => 409;
69             use constant DOWNLOAD_FAIL_MSG => 'Failed to download file';
70             use constant INVALID_MESSAGE_CODE => 410;
71             use constant INVALID_MESSAGE_MSG => 'Invalid message read';
72             use constant INVALID_FUNCTION_SPECIFIED_CODE => 411;
73             use constant INVALID_FUNCTION_SPECIFIED_MSG => 'Invalid function specified';
74             use constant IGNORE_ERROR_CODE => 412;
75             use constant IGNORE_ERROR_MSG =>
76             'Cannot have ignore_incomplete_data off with ignore_data_errors on';
77              
78             for my $function_name (@function_names) {
79             $error_code{$function_name} = NO_ERROR_CODE;
80             $error_msg{$function_name} = NO_ERROR_MSG;
81             }
82              
83             END{
84             my $errs;
85             rmtree($scratch_dir,{keep_root => 1, safe => 1, error => \$errs});
86             }
87              
88             =head1 NAME
89              
90             BGPmon::Fetch::Archive
91              
92             The BGPmon::Fetch::Archive module, to connect to an online archive of
93             BGP files, download XML files and read XML messages one at a time.
94              
95             =head1 SYNOPSIS
96              
97             The BGPmon::Fetch::Archive module provides functionality to connect
98             to an BGP archive and read one XML message at a time.
99              
100             use BGPmon::Fetch::Archive;
101             my $ret = init_bgpdata('scratch_dir' => '/tmp/',
102             'ignore_incomplete_data' => 1, 'ignore_data_errors' => 0);
103             my $ret = connect_archive('archive.netsec.colostate.edu/collectors/bgpdata-netsec/',
104             1234567890,2345678901);
105             my $xml_msg = read_xml_message();
106             my $ret = is_connected();
107             my $num_read = messages_read();
108             my $num_files = files_read();
109             my $uptime = uptime();
110             my $ret = close_connection();
111             my $downtime = connection_endtime();
112             my $duration = connection_duration();
113              
114             =head1 EXPORT
115              
116             init_bgpdata
117             connect_archive
118             read_xml_message
119             close_connection
120             is_connected
121             messages_read
122             files_read
123             uptime
124             connection_endtime
125             connection_duration
126             get_error_code
127             get_error_message
128             get_error_msg
129              
130             =cut
131              
132             =head1 SUBROUTINES/METHODS
133              
134             =head2 init_bgpdata
135              
136             Initializes the scratch directory and error-checking flags for the next
137             archive connection.
138             Input: The location to create a scratch directory in (default is /tmp)
139             Whether to ignore potentially incomplete data (default is to check)
140             Whether to ignore all data errors (must also specify ignore
141             incomplete data flag as well) (default is to check)
142             Output: 0 if initialization fails
143             1 if initialization succeeds
144             Usage: my $ret = init_bgpdata('scratch_dir' => '/tmp',
145             'ignore_incomplete_data' => 1, 'ignore_data_errors' => 0);
146              
147             =cut
148              
149             sub init_bgpdata{
150             my %args = @_;
151             my $fname = "init_bgpdata";
152              
153             #Extract the specified scratch directory if specified, otherwise
154             #use the default directory (/tmp).
155             my $new_dir = $args{'scratch_dir'};
156             if( !defined($new_dir) ){
157             eval{
158             $scratch_dir .= "/BGP.Archive.$$";
159             $scratch_dir =~ s/\/\//\//;
160             mkpath $scratch_dir;
161             1;
162             } or do {
163             $error_code{$fname} = SYSCALL_FAIL_CODE;
164             $error_msg{$fname} = SYSCALL_FAIL_MSG.": $@";
165             return 0;
166             };
167             return 1;
168             }
169             else{
170             eval{
171             $new_dir .= "/BGP.Archive.$$";
172             $new_dir =~ s/\/\//\//;
173             mkpath $new_dir;
174             $scratch_dir = $new_dir;
175             1;
176             } or do {
177             $error_code{$fname} = SYSCALL_FAIL_CODE;
178             $error_msg{$fname} = SYSCALL_FAIL_MSG.": $@";
179             return 0;
180             };
181             }
182              
183             #Get whether or not the user wants to ignore incomplete data or all data errors
184             #It is an error to ignore all data errors but not incomplete data
185             $ignore_incomplete_data = $args{'ignore_incomplete_data'}
186             if defined $args{'ignore_incomplete_data'};
187             $ignore_data_errors = $args{'ignore_data_errors'}
188             if defined $args{'ignore_data_errors'};
189             if( $ignore_data_errors && !$ignore_incomplete_data ){
190             $error_code{$fname} = IGNORE_ERROR_CODE;
191             $error_msg{$fname} = IGNORE_ERROR_MSG;
192             return 0;
193             }
194              
195             #Initialize the scratch directory and ignore-error flags for the
196             #underlying File module
197             if( !BGPmon::Fetch::File::init_bgpdata(%args) ){
198             $error_code{$fname} =
199             BGPmon::Fetch::File::get_error_code('init_bgpdata');
200             $error_msg{$fname} =
201             BGPmon::Fetch::File::get_error_msg('init_bgpdata');
202             return 0;
203             }
204              
205             #Reset some module state variables, including error codes
206             $msgs_read = 0;
207             $files_read = 0;
208             for my $function_name (@function_names) {
209             $error_code{$function_name} = NO_ERROR_CODE;
210             $error_msg{$function_name} = NO_ERROR_MSG;
211             }
212             $connection_stop = undef;
213             return 1;
214             }
215              
216             =head2 connect_archive
217              
218             Connects to an online archive.
219              
220             Input: archive URL page for some collector
221             (i.e. "archive.netsec.colostate.edu/bgpdata-netsec/")
222             start/end (UNIX timestamps corresponding to the data interval
223             the user wants)
224              
225             Output: 0 on success, 1 on failure
226              
227             Usage: my $ret = connect_archive(
228             'archive.netsec.colostate.edu/collectors/bgpdata-netsec',
229             1234567890,
230             2345678901);
231              
232             =cut
233              
234             sub connect_archive {
235              
236             #Store arguments in state variables
237             my $url = shift;
238             my $begin = shift;
239             my $end = shift;
240              
241             my $fname = "connect_archive";
242              
243             #Check for correct number of variables
244             if(!defined($url) or !defined($begin) or !defined($end)){
245             $error_code{$fname} = UNDEFINED_ARGUMENT_CODE;
246             $error_msg{$fname} = UNDEFINED_ARGUMENT_MSG;
247             return 1;
248             }
249            
250              
251             #Check to make sure we aren't already connected to an archive
252             if(is_connected()){
253             $error_code{$fname} = ALREADY_CONNECTED_CODE;
254             $error_msg{$fname} = ALREADY_CONNECTED_MSG;
255             return 1;
256             }
257              
258             #check to make sure start and end time are proper UNIX timestamps
259             #and that the start time <= end time and the URL has no garbage characters
260             if($begin =~ m/\D/ || $end =~ m/\D/ || $begin<0 || $end<0 ||
261             $begin > $end || $url =~ m/[^[:graph:]]/){
262             $error_code{$fname} = INVALID_ARGUMENT_CODE;
263             $error_msg{$fname} = INVALID_ARGUMENT_MSG;
264             return 1;
265             }
266              
267             #Set year/month variables with the start time
268             my @c_time = gmtime($begin);
269             $year = scalar strftime("%Y",@c_time);
270             $month = scalar strftime("%m",@c_time);
271              
272             #Create the scratch directory if it has not already been created
273             #and set the ignore-error flags to off
274             if( $scratch_dir !~ m/BGP.Archive/ ){
275             if(!init_bgpdata('ignore_incomplete_data'=>0,'ignore_data_errors'=>0)){
276             $error_code{$fname} = INIT_FAIL_CODE;
277             $error_msg{$fname} = INIT_FAIL_MSG;
278             close_connection();
279             return 1;
280             }
281             }
282              
283             $upd_url = $url;
284             $begin_time = $begin;
285             $end_time = $end;
286              
287             set_append();
288              
289             #Fetch the first update file (which will in turn get the first index file)
290             if( !defined(get_next_update_file()) ){
291             $error_code{$fname} = INIT_FAIL_CODE;
292             $error_msg{$fname} = INIT_FAIL_MSG;
293             close_connection();
294             return 1;
295             }
296              
297             $connected = 1;
298             $connection_start = time;
299              
300             $error_code{$fname} = NO_ERROR_CODE;
301             $error_msg{$fname} = NO_ERROR_MSG;
302              
303             return 0;
304             }
305              
306             =head2 read_xml_message
307              
308             Reads the next XML message from the data source that is in the interval.
309              
310             Input: None, but assumes connect_archive has been called
311              
312             Output: The next XML message from the archive "stream" or undef
313              
314             Usage: my $msg = read_xml_message();
315              
316             =cut
317              
318             sub read_xml_message {
319             my $fname = "read_xml_message";
320              
321             if( !is_connected() ){
322             $error_code{$fname} = UNCONNECTED_CODE;
323             $error_msg{$fname} = UNCONNECTED_MSG;
324             return undef;
325             }
326              
327             while( 1 ){
328             #Try to get the next message
329             my $msg = get_next_message();
330             if( !defined($msg) ){
331             $error_code{$fname} = INVALID_MESSAGE_CODE;
332             $error_msg{$fname} = INVALID_MESSAGE_MSG;
333             return undef;
334             }
335             my $msg_hashref =BGPmon::Translator::XFB2PerlHash::translate_msg($msg);
336             if( !keys %$msg_hashref ){
337             $error_code{$fname} = INVALID_MESSAGE_CODE;
338             $error_msg{$fname} = INVALID_MESSAGE_MSG;
339             return undef;
340             }
341             #Non BGP messages can have all sorts of weird timestamps, so we'll
342             #just return any message that isn't a normal BGP_MESSAGE and let
343             #the calling script deal with them.
344             if(!defined BGPmon::Translator::XFB2PerlHash::get_content(
345             "/BGP_MONITOR_MESSAGE")){
346             $error_code{$fname} = NO_ERROR_CODE;
347             $error_msg{$fname} = NO_ERROR_MSG;
348             return $msg;
349             }
350             #If there is a message, evaluate its timestamp
351             my $msg_time = BGPmon::Translator::XFB2PerlHash::get_content(
352             "/BGP_MONITOR_MESSAGE/OBSERVED_TIME/TIMESTAMP/content");
353             if( !defined($msg_time) ){
354             $error_code{$fname} = INVALID_MESSAGE_CODE;
355             $error_msg{$fname} = INVALID_MESSAGE_MSG;
356             return $msg;
357             }
358             #if the message is too early, get the next one
359             if( $msg_time < $begin_time ){
360             next;
361             }
362             #If the message time >= start time but <= end_time, return it
363             elsif( $msg_time <= $end_time ){
364             $msgs_read++;
365             $error_code{$fname} = NO_ERROR_CODE;
366             $error_msg{$fname} = NO_ERROR_MSG;
367             return $msg;
368             }
369             #Otherwise the message is past the interval and we quit
370             else{
371             close_connection();
372             return undef;
373             }
374             }
375              
376             return undef;
377             }
378              
379             =head2 close_connection
380              
381             Function to close and delete any files and reset the module's state variables
382              
383             Usage: close_connection();
384              
385             =cut
386              
387             sub close_connection {
388             my $fname = "close_connection";
389             if( !is_connected() ){
390             $error_code{$fname} = UNCONNECTED_CODE;
391             $error_msg{$fname} = UNCONNECTED_MSG;
392             return 1;
393             }
394             eval{
395             my $errs;
396             rmtree($scratch_dir,{keep_root => 1, safe => 1, error => \$errs});
397             1;
398             } or do {
399             $error_code{$fname} = SYSCALL_FAIL_CODE;
400             $error_msg{$fname} = SYSCALL_FAIL_MSG.": $@";
401             };
402             $connection_stop = time;
403             BGPmon::Fetch::File::close_connection();
404             ($upd_url,$year,$month,$begin_time,$end_time,@index_page) = undef;
405             $connected = 0;
406             $error_code{$fname} = NO_ERROR_CODE;
407             $error_msg{$fname} = NO_ERROR_MSG;
408             return 0;
409             }
410              
411             =head2 is_connected
412              
413             Function to report whether currently connected to an archive.
414              
415             =cut
416              
417             sub is_connected {
418             return $connected;
419             }
420              
421             =head2 messages_read
422              
423             Get number of messages read.
424              
425             Usage: my $num_msgs = messages_read();
426              
427             =cut
428              
429             sub messages_read {
430             return $msgs_read;
431             }
432              
433             =head2 files_read
434              
435             Get the number of files read.
436              
437             Usage: my $num_files = files_read();
438              
439             =cut
440             sub files_read{
441             return $files_read;
442             }
443              
444             =head2 uptime
445              
446             Returns number of seconds the connection has been up.
447             If the connection is down, return 0.
448              
449             Usage: my $time = uptime();
450              
451             =cut
452              
453             sub uptime {
454             if ($connected) {
455             return time() - $begin_time;
456             }
457             return 0;
458              
459             }
460              
461             =head2 connection_endtime
462              
463             Returns the time the connection ended .
464             If the connection is up, return 0.
465              
466             Usage: my $time = connection_endtime();
467              
468             =cut
469              
470             sub connection_endtime {
471             if ($connected) {
472             return 0;
473             }
474             return $connection_stop;
475              
476             }
477              
478             =head2 connection_duration
479              
480             Returns the total time the last connection was up for.
481             If the connection is up, returns 0.
482              
483             NOTE: If a connection is currently established, call uptime().
484              
485             Usage: my $dur = connection_duration();
486              
487             =cut
488             sub connection_duration{
489             my $fname = "connection_duration";
490             if( $connected) {
491             $error_code{$fname} = ALREADY_CONNECTED_CODE;
492             $error_msg{$fname} = ALREADY_CONNECTED_MSG;
493             return 0;
494             }
495             $error_code{$fname} = NO_ERROR_CODE;
496             $error_msg{$fname} = NO_ERROR_MSG;
497             return $connection_stop - $connection_start;
498             }
499              
500             =head2 get_error_code
501              
502             Get the error code for a given function
503              
504             Input : the name of the function whose error code we should report
505              
506             Output: the function's error code
507             or UNDEFINED_ARGUMENT if the user did not supply a function
508             or INVALID_FUNCTION_SPECIFIED if the user provided an invalid function
509              
510             Usage: my $err_code = get_error_code("connect_archive");
511              
512             =cut
513              
514             sub get_error_code {
515             my $function = shift;
516              
517             # check we got a function name
518             if (!defined($function)) {
519             return UNDEFINED_ARGUMENT_CODE;
520             }
521              
522             return $error_code{$function} if defined($error_code{$function});
523             return INVALID_FUNCTION_SPECIFIED_CODE;
524             }
525              
526             =head2 get_error_message
527              
528             Get the error message of a given function
529              
530             Input : the name of the function whose error message we should report
531              
532             Output: the function's error message
533             or UNDEFINED_ARGUMENT if the user did not supply a function
534             or INVALID_FUNCTION_SPECIFIED if the user provided an invalid function
535              
536             Usage: my $err_msg = get_error_message("read_xml_message");
537              
538             =cut
539              
540             sub get_error_message {
541             my $function = shift;
542              
543             # check we got a function name
544             if (!defined($function)) {
545             return UNDEFINED_ARGUMENT_MSG;
546             }
547              
548             return $error_msg{$function} if defined($error_msg{$function});
549             return INVALID_FUNCTION_SPECIFIED_MSG."$function";
550             }
551              
552             =head2 get_error_msg
553              
554             Shorthand call for get_error_message
555              
556             =cut
557              
558             sub get_error_msg{
559             my $fname = shift;
560             return get_error_message($fname);
561             }
562              
563             ################################# END EXPORTED FUNCTIONS ######################
564             ###################### BEGIN UNEXPORTED FUNCTIONS #############################
565              
566             #get_next_message
567             #This function retrieves the next message from the currently-open file
568             #or fetches the next available update file and then returns the first
569             #message from that file.
570             #Input: None
571             #Output: The next XML message in the archive "stream" or
572             # undef if there are no more messages available.
573              
574             sub get_next_message{
575             my $fname = "get_next_message";
576             #First see if there's an open update file going already
577             if( !BGPmon::Fetch::File::is_connected() ){
578             $error_code{$fname} = UNCONNECTED_CODE;
579             $error_msg{$fname} = UNCONNECTED_MSG;
580             return undef;
581             }
582              
583             #Get the next message from the open XML file, or the next update file
584             #if no message is found.
585             my $msg = BGPmon::Fetch::File::read_xml_message();
586             while( !defined($msg) ){
587             #If there is no open update file, try to get the next one
588             my $ret = get_next_update_file();
589             if( !defined($ret) ){
590             $error_code{$fname} = BGPmon::Fetch::File::get_error_code
591             ("read_xml_message");
592             $error_msg{$fname} = BGPmon::Fetch::File::get_error_message
593             ("read_xml_message");
594             return undef;
595             }
596             $msg = BGPmon::Fetch::File::read_xml_message();
597             }
598             return $msg;
599             }
600              
601             # get_next_update_file
602             #
603             #Iterates through a list of files and downloads and opens the next one
604             #Input: None
605             #Output: 0 on success, undef on failure
606             # Also, the module-level filehandle upd_fh is open on success
607              
608             sub get_next_update_file{
609             my $fname = "get_next_update_file";
610             my $next_url = "";
611              
612             if( is_connected() && BGPmon::Fetch::File::is_connected() ){
613             $error_code{$fname} = FILE_OPERATION_FAIL_CODE;
614             $error_msg{$fname} = FILE_OPERATION_FAIL_MSG;
615             return undef;
616             }
617              
618             #If there is no index page loaded for an otherwise connected archive,
619             #then return an error
620             if( !@index_page && is_connected() ){
621             $error_code{$fname} = NO_INDEX_PAGE_CODE;
622             $error_msg{$fname} = NO_INDEX_PAGE_MSG;
623             return undef;
624             }
625              
626             #Grab the URL of the next file to download
627             $next_url = shift @index_page;
628             if( !defined($next_url) || $next_url eq "" ){
629             #If the current index page is exhausted, increment the month/year
630             #and download the next index page
631             advanceIndex() if is_connected();
632             @index_page = get_next_index();
633             if( !@index_page ){
634             $error_code{$fname} = DOWNLOAD_FAIL_CODE;
635             $error_msg{$fname} = DOWNLOAD_FAIL_MSG;
636             return undef;
637             }
638             $next_url = shift @index_page;
639             }
640              
641             #Extract the filename from the next URL to download
642             my @url_split = split("/",$next_url);
643             my $upd_fn = $url_split[-1];
644             my $ret = download_URL($next_url,"$scratch_dir/".$upd_fn);
645             if( !defined($ret) ){
646             $error_code{$fname} = DOWNLOAD_FAIL_CODE;
647             $error_msg{$fname} = DOWNLOAD_FAIL_MSG." $upd_fn";
648             return undef;
649             }
650              
651             if( BGPmon::Fetch::File::connect_file("$scratch_dir/".$upd_fn) ){
652             $error_code{$fname} = FILE_OPERATION_FAIL_CODE;
653             $error_msg{$fname} = FILE_OPERATION_FAIL_MSG;
654             return undef;
655             }
656             $files_read++;
657              
658             return 0;
659             }
660              
661             #advanceIndex
662             #A helper function to advance the month
663             #and possibly year module state variables.
664             #Input: None
665             #Output: None (returns 0)
666              
667             sub advanceIndex{
668             $month = sprintf("%02u",($month + 1) % 13);
669             $month = "01" if $month == 0;
670             $year = sprintf("%04u",$year + 1) if $month == 1;
671             return 0;
672             }
673              
674             #get_next_index
675              
676             #Fetches an index HTML page and returns the contents as an array
677             #Input: None
678             #Output: An array with the lines of the HTML, or undef on failure
679              
680             sub get_next_index{
681             my $fname = "get_next_index";
682             my $index_url = "";
683             my @html_index = undef;
684             my $index_fh = undef;
685             my $index_fn = "$scratch_dir/index.html";
686              
687             if(!defined($upd_url)||!defined($month)||
688             !defined($year)||!defined($append)){
689             $error_code{$fname} = UNDEFINED_ARGUMENT_CODE;
690             $error_msg{$fname} = UNDEFINED_ARGUMENT_MSG;
691             return;
692             }
693              
694             #Construct the current index URL from the current month and year
695             $index_url = "$upd_url/$year.$month/$append";
696              
697             my $ret = download_URL($index_url,$index_fn);
698             if( !defined($ret) ){
699             $error_code{$fname} = DOWNLOAD_FAIL_CODE;
700             $error_msg{$fname} = DOWNLOAD_FAIL_MSG;
701             return;
702             }
703              
704             #Open the saved index HTML file and load it into an array
705             unless( open($index_fh,"<","$index_fn") ){
706             $error_code{$fname} = FILE_OPERATION_FAIL_CODE;
707             $error_msg{$fname} = FILE_OPERATION_FAIL_MSG.": $@";
708             return;
709             }
710              
711             while(<$index_fh>){
712             my $line = $_;
713             chomp($line);
714              
715             my $file_url = $index_url . get_filename_from_line($line);
716             if( $file_url ne $index_url ){
717             push(@html_index,$file_url);
718             }
719             }
720              
721             if( !@html_index ){
722             $error_code{$fname} = NO_INDEX_PAGE_CODE;
723             $error_msg{$fname} = NO_INDEX_PAGE_MSG;
724             return;
725             }
726              
727             close($index_fh);
728             eval{
729             unlink($index_fn);
730             1;
731             } or do {
732             $error_code{$fname} = SYSCALL_FAIL_CODE;
733             $error_msg{$fname} = SYSCALL_FAIL_MSG;
734             };
735             #This shift removes a leading undef at the beginning of the array.
736             shift @html_index;
737             return @html_index;
738             }
739              
740             # get_filename_from_line
741              
742             #This helper function extracts an update filename from a line of HTML.
743             #Input: A single line of HTML code
744             #Output: The filename extracted from the line (of the form
745             # updates.YYYYMMDD.HHMM.*.xml.[compression type] )
746             # or the empty string if none is found.
747              
748             sub get_filename_from_line{
749             my $line = shift;
750             my $fname = "get_filename_from_line";
751             if( !defined($line) ){
752             $error_code{$fname} = UNDEFINED_ARGUMENT_CODE;
753             $error_msg{$fname} = UNDEFINED_ARGUMENT_MSG;
754             return undef;
755             }
756              
757             #Filenames look like this: updates.YYYYMMDD.HHMM.*.xml
758             #Filenames may also have an up-to-4-character extension for compressed
759             if($line =~
760             m/(\"updates\.[0-9][0-9][0-9][0-9][0-1][0-9][0-3][0-9]\.[0-2][0-9][0-5][0-9].*\.xml(\.\w{0,4})?\")/){
761             my $filename = $1;
762             #Hack off the double-quotes on either side of the filename
763             $filename =~ s/\"//g;
764             return $filename;
765             }
766             return "";
767             }
768              
769              
770             #set_append
771              
772             #This function tests different variants of the final possible subdirectory
773             #under an archive page.
774             #Input: None
775             #Output: 0
776              
777             sub set_append{
778             my $fname = "set_append";
779             my $url = $upd_url."/$year.$month/";
780             my $output = "$scratch_dir/index-test.html";
781              
782             if(download_URL($url."UPDATES/",$output) == 0 && validateIndex($output) ){
783             $append = "UPDATES/";
784             }
785             elsif(download_URL($url."updates/",$output)==0 && validateIndex($output) ){
786             $append = "updates/";
787             }
788             elsif(download_URL($url,$output) == 0 && validateIndex($output) ){
789             $append = "";
790             }
791             else {
792             $append = undef;
793             }
794             eval{
795             unlink($output);
796             1;
797             } or do {
798             $error_code{$fname} = SYSCALL_FAIL_CODE;
799             $error_msg{$fname} = SYSCALL_FAIL_MSG.": $@";
800             };
801             return 0;
802             }
803              
804             # validateIndex
805             #A helper function to scan an HTML page to determine whether the page is
806             #a valid index page or something else. It looks for the key phrase
807             #"Index of" with the current year.month subdirectory.
808             #Input: The filename to check
809             #Output: 1 if the file matches the search, 0 otherwise
810              
811             sub validateIndex{
812             my $index_fn = shift;
813             my $fname = "validateIndex";
814             if( !defined($index_fn) ){
815             $error_code{$fname} = UNDEFINED_ARGUMENT_CODE;
816             $error_msg{$fname} = UNDEFINED_ARGUMENT_MSG;
817             return 0;
818             }
819             my $index_fh;
820             unless( open($index_fh,"<",$index_fn) ){
821             $error_code{$fname} = FILE_OPERATION_FAIL_CODE;
822             $error_msg{$fname} = FILE_OPERATION_FAIL_MSG;
823             return 0;
824             }
825              
826             while(<$index_fh>){
827             my $line = $_;
828             if( $line =~ m/Index of .*\/$year.$month\// ){
829             close($index_fh);
830             return 1;
831             }
832             }
833             close($index_fh);
834             return 0;
835             }
836              
837             # download_URL
838             #
839             #Downloads a target file and saves it to a user-specified file
840             #This function is primarily a wrapper around several functions of
841             # the LWP::Simple module.
842             #Input: a target URL, either an HTML index or another file
843             # an output file name
844             #Output: 0 on success, 1 on failure
845              
846             sub download_URL{
847             my $fname = "download_URL";
848              
849             #Get argument(s) and check that they exist
850             my $target_url = shift;
851             my $output = shift;
852              
853             #If the target is not defined, obviously that is a problem
854             if(!defined($target_url) || $target_url eq "" ||
855             !defined($output) || $output eq "" ){
856             $error_code{$fname} = UNDEFINED_ARGUMENT_CODE;
857             $error_msg{$fname} = UNDEFINED_ARGUMENT_MSG;
858             return 1;
859             }
860              
861             if($target_url !~ m/http/){
862             $target_url = "http://".$target_url;
863             }
864              
865             ## Download the specified file into the given output file
866             my $ret = LWP::Simple::getstore($target_url, $output);
867             #If grabbing the url fails, log the reason why
868             if(LWP::Simple::is_error($ret)){
869             $error_code{$fname} = DOWNLOAD_FAIL_CODE;
870             $error_msg{$fname} = DOWNLOAD_FAIL_MSG.$ret." ";
871             return 1;
872             }
873             return 0;
874             }
875              
876             ########################### END UNEXPORTED FUNCTIONS ##########################
877              
878             =head1 ERROR CODES AND MESSAGES
879             The following error codes and messages are defined:
880              
881             0: No Error
882             'No Error'
883              
884             401: A subroutine was missing an expected argument
885             'Undefined Argument(s)'
886              
887             402: There is no active connection to an archive
888             'Not connected to an archive'
889              
890             403: There is a currently-active connection to an archive
891             'Already connected to an archive'
892              
893             404: The module was unable to find an HTML index page
894             or any download links on the index page
895             'Unable to find an index page'
896              
897             405: A system call failed
898             'System call failed'
899              
900             406: An invalid value was passed to a subroutine as an argument
901             'Invalid value given for argument'
902              
903             407: The connection could not be initialized, either by a failure
904             to set the scratch directory, ignore-error flags, or
905             the first update file could not be loaded.
906             'Failed to initialize connection to archive'
907              
908             408: A filesystem 'open' command failed
909             'File operation failed'
910              
911             409: There was a failure trying to download a file
912             'Failed to download file'
913              
914             410: An invalid XML message was read, or the end of the archive was read
915             'Invalid message read'
916              
917             411: An invalid function name was passed to get_error_[code/message/msg]
918             'Invalid function specified'
919              
920             412: User tried to ignore all data errors, but was checking for
921             incomplete data
922             'Cannot have ignore_incomplete_data off with ignore_data_errors on'
923              
924             =head1 AUTHOR
925              
926             Jason Bartlett, C<< >>
927              
928             =head1 BUGS
929              
930             Please report any bugs or feature requests to C
931             , or through the web interface at L.
932              
933              
934             =head1 SUPPORT
935              
936             You can find documentation for this module with the perldoc command.
937              
938             perldoc BGPmon::Fetch::Archive
939              
940             =cut
941              
942             =head1 LICENSE AND COPYRIGHT
943              
944             Copyright (c) 2012 Colorado State University
945              
946             Permission is hereby granted, free of charge, to any person
947             obtaining a copy of this software and associated documentation
948             files (the "Software"), to deal in the Software without
949             restriction, including without limitation the rights to use,
950             copy, modify, merge, publish, distribute, sublicense, and/or
951             sell copies of the Software, and to permit persons to whom
952             the Software is furnished to do so, subject to the following
953             conditions:
954              
955             The above copyright notice and this permission notice shall be
956             included in all copies or substantial portions of the Software.
957              
958             THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
959             EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
960             OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
961             NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
962             HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
963             WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
964             FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
965             OTHER DEALINGS IN THE SOFTWARE.\
966              
967             File: Archive.pm
968              
969             Authors: M. Lawrence Weikum, Jason Bartlett, Kaustubh Gadkari, Dan Massey, Cathie Olschanowsky
970             Date: 20 November 2013
971              
972             =cut
973              
974             1; # End of BGPmon::Fetch::Archive