File Coverage

blib/lib/News/Archive.pm
Criterion Covered Total %
statement 30 294 10.2
branch 0 128 0.0
condition 0 72 0.0
subroutine 10 70 14.2
pod 48 49 97.9
total 88 613 14.3


line stmt bran cond sub pod time code
1             $VERSION = "0.13";
2             package News::Archive;
3             our $VERSION = "0.13";
4              
5             # -*- Perl -*- Tue May 25 14:37:47 CDT 2004
6             ###############################################################################
7             # Written by Tim Skirvin . Copyright 2003-2004,
8             # Tim Skirvin. Redistribution terms are below.
9             ###############################################################################
10              
11             =head1 NAME
12              
13             News::Archive - archive news articles for later use
14              
15             =head1 SYNOPSIS
16              
17             use News::Archive;
18             my $archive = new News::Archive
19             ( 'basedir' => '/home/tskirvin/kiboze' );
20            
21             # Get a news article
22             my $article = News::Article->new(\*STDIN);
23             my $msgid = article->header('message-id');
24              
25             die "Already processed '$msgid'\n"
26             if ($archive->article( $messageid ));
27              
28             # Get the list of groups we're supposed to be saving the article into
29             my @groups = split('\s*,\s*', $article->header('newsgroups') );
30             map { s/\s+//g } @groups;
31              
32             # Make sure we're subscribed to these groups
33             foreach (@groups) { $archive->subscribe($_) }
34              
35             # Actually save the article.
36             my $ret = $archive->save_article(
37             [ @{$article->rawheaders}, '', @{$article->body} ], @groups );
38             $ret ? print "Accepted article $messageid\n"
39             : print "Couldn't save article $messageid\n";
40              
41             See below for more options.
42              
43             =head1 DESCRIPTION
44              
45             News::Archive is a package for storing news articles in an accessible
46             form. Articles are stored one-per-file, and are accessible by either
47             message-ID or overview information. The files are then accessible with a
48             Net::NNTP compatible interface, for easy access by other packages.
49              
50             News::Archive keeps several files to keep track of its archives:
51              
52             =over 4
53              
54             =item active file
55              
56             Keeps track of all newsgroups we are "subscribed" to and all of the
57             information that changes regularly - the number of articles we have
58             archived, the current first and last article numbers, etc.
59              
60             Watched over with News::Active.
61              
62             =item history database
63              
64             A simple database keeping track of articles by Message-ID. Makes access
65             by ID easy, and ensures that we don't save the same article twice. The
66             database chosen to maintain these is user-determined.
67              
68             =item newsgroup file
69              
70             Keeps track of more static information about the newsgroups we are
71             subscribed to - descriptions, creation dates, etc.
72              
73             Watched over with News::GroupInfo.
74              
75             =item archive directory
76              
77             Directory structure of all articles, with each article saved as a single
78             textfile within a directory structure laid out at one section of the group
79             name per directory, such as "rec/games/mecha". Crossposts are hardlinked
80             to other directory structures.
81              
82             Articles are actually divided into sub-directories containing up to 500
83             articles, to avoid Unix directory size performance limitations.
84             Individual files are thus stored in a file such as
85             "rec/games/mecha/1.500/1".
86              
87             Each newsgroup also contains overview information, watched over with
88             News::Overview. This overview file goes in the top of the structure, such
89             as "rec/games/mecha/.overview".
90              
91             =back
92              
93             You may note that these files are very similar to how INN does its work.
94             This is intentional - this package is meant to act in many ways like a
95             lighter-weight INN.
96              
97             =head1 USAGE
98              
99             =cut
100              
101             ###############################################################################
102             ### Variables #################################################################
103             ###############################################################################
104 1     1   900 use vars qw( $DEBUG $HOSTNAME $ERROR $HASH $READONLY );
  1         1  
  1         140  
105              
106             =head2 Global Variables
107              
108             The following variables are set within News::Archive, and are global
109             throughout all invocations.
110              
111             =over 4
112              
113             =item $News::Active::DEBUG
114              
115             Default value for C in new objects.
116              
117             =cut
118              
119             $DEBUG = 0;
120              
121             =item $News::Active::HOSTNAME
122              
123             Default value for C in new objects. Obtained using
124             C.
125              
126             =cut
127              
128             $HOSTNAME = hostname();
129              
130             =item $News::Active::HASH
131              
132             The number of articles to keep in each directory. Default is 500; change
133             this at your own peril, since things may get screwed up later if you
134             change it after archiving any articles!
135              
136             =cut
137              
138             $HASH = 500; # How many articles should we hash directories off at?
139              
140             ## Internal only - default error string. We're not currently using this
141             ## well, and may never be, so let's not talk about it in the docs much.
142              
143             $ERROR = "";
144              
145             ## Should we open things read-only by default?
146              
147             $READONLY = 0;
148              
149             =back
150              
151             =cut
152              
153             ###############################################################################
154             ### main() ####################################################################
155             ###############################################################################
156              
157 1     1   6 use strict;
  1         1  
  1         35  
158 1     1   5 use warnings;
  1         4  
  1         39  
159 1     1   1552 use News::Article;
  1         28177  
  1         52  
160 1     1   1179 use News::Overview;
  1         18028  
  1         33  
161 1     1   691 use News::Active;
  1         3  
  1         28  
162 1     1   623 use News::GroupInfo;
  1         2  
  1         36  
163 1     1   8 use Net::NNTP::Functions;
  1         2  
  1         64  
164 1     1   993 use Sys::Hostname;
  1         1508  
  1         52  
165 1     1   10 use Fcntl;
  1         2  
  1         4962  
166              
167             ###############################################################################
168             ### Basic Functions ###########################################################
169             ###############################################################################
170              
171             =head2 Basic Functions
172              
173             These functions create and deal with the object itself.
174              
175             =over 4
176              
177             =item new ( HASHREF )
178              
179             Creates the News::Archive object. C contains initialization
180             information for this object; currently supported options:
181              
182             basedir Base directory for this object to work with.
183             Required; we will fail without this.
184             archives Location of the post archives. Defaults to
185             $basedir/archives
186             historyfile Location of the history database. Defaults to
187             $basedir/historyfile
188             activefile Location of the active file. Defaults to
189             $basedir/active
190             overfilename File name for the overview database files in each
191             newsgroup hierarchy. Defaults to ".overview".
192             db_type The type of perl database we will use to store
193             files that need that level of service. Defaults
194             to 'DB_File'
195             groupinfofile Location of the groupinfo file. Defaults to
196             $basedir/newsgroups.
197             hostname String to use when a local hostname is required.
198             Defaults to $News::Archive::HOSTNAME.
199             debug Should we print debugging information? Defaults to
200             $News::Archive::DEBUG.
201             readonly Should we open this read-only?
202              
203             Returns the blessed object on success, or undef on failure.
204              
205             =cut
206              
207             sub new {
208 0     0 1   my ($proto, %hash) = @_;
209 0 0         unless ( $hash{'basedir'} ) {
210 0           _set_error("No 'basedir' value offered (is your config file set up?)");
211 0           return undef;
212             }
213 0 0         my $basedir = $hash{'basedir'} or return undef;
214 0   0       my $class = ref($proto) || $proto;
215 0 0 0       my $self = {
      0        
      0        
      0        
      0        
      0        
      0        
      0        
216             'group' => undef,
217             'pointer' => 0,
218             'readonly' => $hash{'readonly'} || $READONLY || 0,
219             'archives' => $hash{'archives'} || "$basedir/archives",
220             'historyfile' => $hash{'historyfile'} || "$basedir/history",
221             'activefile' => $hash{'activefile'} || "$basedir/active",
222             'overfilename' => $hash{'overfile'} || ".overview",
223             'db_type' => $hash{'db_type'} || "DB_File",
224             'groupinfofile' => $hash{'groupinfofile'} || "$basedir/newsgroups",
225             'hostname' => $hash{'hostname'} || $HOSTNAME || 'localhost',
226             'debug' => defined $hash{'debug'} ? $hash{'debug'} : $DEBUG,
227             };
228 0           bless $self, $class;
229 0   0       $$self{'history'} = $self->history || return undef;
230 0   0       $$self{'active'} = $self->activefile || return undef;
231 0   0       $$self{'groupinfo'} = $self->groupinfo || return undef;
232 0           $self;
233             }
234              
235             =item activefile ()
236              
237             Returns the News::Active object based on C, set in new(). If
238             this object has not already been opened and created, creates it;
239             otherwise, just returns the existing object. Passes on the 'readonly'
240             flag.
241              
242             =cut
243              
244             sub activefile {
245 0     0 1   my ($self) = @_;
246 0   0       $$self{'active'} ||= new News::Active($$self{activefile},
247             'readonly' => $$self{readonly});
248 0           $$self{'active'};
249             }
250              
251             =item activeclose ()
252              
253             Writes out and closes the News::GroupInfo object.
254              
255             =cut
256              
257             sub activeclose {
258 0     0 1   my ($self) = @_;
259 0 0         return 1 unless $$self{'active'};
260 0           $self->activefile->write;
261 0           delete $$self{'active'};
262 0           1;
263             }
264              
265             =item groupinfo ()
266              
267             Returns the News::GroupInfo object based on C, set in
268             new(). If this object has not already been opened and created, creates
269             it; otherwise, just returns the existing object. Passes on the 'readonly'
270             flag.
271              
272             =cut
273              
274             sub groupinfo {
275 0     0 1   my ($self) = @_;
276 0   0       $$self{'groupinfo'} ||= new News::GroupInfo($$self{groupinfofile},
277             'readonly' => $$self{readonly});
278 0           $$self{'groupinfo'};
279             }
280              
281             =item groupclose ()
282              
283             Writes out and closes the News::GroupInfo object.
284              
285             =cut
286              
287             sub groupclose {
288 0     0 1   my ($self) = @_;
289 0 0         return 1 unless $$self{'groupinfo'};
290 0           $self->groupinfo->write;
291 0           $$self{'groupinfo'} = undef;
292 0           1;
293             }
294              
295             =item history ()
296              
297             Returns a tied hashref based on C, set in new(). If this
298             object has not already been opened and created, creates it; otherwise,
299             just returns the existing object.
300              
301             =cut
302              
303             sub history {
304 0     0 1   my ($self) = @_;
305 0   0       $$self{'history'} ||= $self->_tie($$self{historyfile}, $$self{db_type});
306 0           $$self{'history'};
307             }
308              
309             =item debug ()
310              
311             Returns true if we want to print debugging information, false otherwise.
312             Used a lot internally, may also be used externally.
313              
314             =cut
315              
316 0     0 1   sub debug { shift->{debug} }
317              
318             =item activeentry ( GROUP )
319              
320             Returns the News::Active::Entry information for the given C.
321              
322             =cut
323              
324 0     0 1   sub activeentry { shift->activefile->entry(shift) }
325              
326             =item groupentry ( GROUP )
327              
328             Returns the News::GroupInfo::Entry information for the given C.
329              
330             =cut
331              
332 0     0 1   sub groupentry { shift->groupinfo->entry(shift) }
333              
334             =item close ()
335              
336             Close all open files.
337              
338             =cut
339              
340             sub close {
341 0     0 1   my $self = shift;
342 0           $self->groupclose;
343 0           $self->activeclose;
344 0           untie %{$self->{history}};
  0            
345             }
346              
347             =back
348              
349             =cut
350              
351             ###############################################################################
352             ### Internal Functions - Basic ################################################
353             ###############################################################################
354              
355             ## _tie ( FILE [, CLASS] )
356             # Ties a database file to a hash. CLASS defaults to the normal internal
357             # database type. Currently only works with DB_File and SDBM_File
358             sub _tie {
359 0     0     my ($self, $file, $class, @args) = @_;
360 0 0         return "" unless $file;
361 0   0       $class ||= $self->{db_type};
362 0           my %tie;
363 0 0 0       if ($class eq 'DB_File' || $class eq 'SDBM_File') {
364 0           require "$class.pm";
365 0 0         my $opentype = $$self{readonly} ? O_RDONLY : O_CREAT|O_RDWR;
366 0 0         tie %tie, $class, $file, $opentype, 0755
367             or ( warn "Couldn't tie $file: $!\n" & return ());
368 0           } else { %tie = () }
369 0           \%tie;
370             }
371              
372             ## _isnumeric ( STRING )
373             # Returns 1 if STRING is purely numeric (no negative numbers!), 0 otherwise.
374 0 0   0     sub _isnumeric { shift =~ m/^[\d\.]+$/ ? 1 : 0 }
375              
376             ## _mkdir_full ( DIR )
377             # Make a full directory structure. Not exactly what I want, but it'll do
378             # for now.
379             sub _mkdir_full {
380 0     0     my ($self, $dir) = @_;
381 0 0         return 1 if -d $dir;
382 0 0         warn "Making directory $dir\n" if $self->debug;
383 0           system("mkdir -p $dir");
384             }
385              
386             ## DESTROY ()
387             # Item destructor. Untie the active and history information.
388             sub DESTROY {
389 0     0     my $self = shift;
390 0           $self->close;
391             }
392              
393             ###############################################################################
394             ### Error Functions ###########################################################
395             ###############################################################################
396              
397             =head2 Error Functions
398              
399             These functions deal with the global error variable, which is currently
400             not being used very effectively.
401              
402             =over 4
403              
404             =item error ( [ERROR] )
405              
406             Returns the text (a scalar) describing the last error message. If
407             C is offered, then it sets the error message to this first.
408              
409             =cut
410              
411 0 0   0 1   sub error { $ERROR = $_[1] if defined $_[1]; $ERROR }
  0            
412              
413             =item clear_error ()
414              
415             Clears the error message.
416              
417             =cut
418              
419 0     0 1   sub clear_error { $ERROR = "" }
420              
421             =back
422              
423             =cut
424              
425             ###############################################################################
426             ### Internal Functions - Error ################################################
427             ###############################################################################
428              
429             ## _set_error ( ERROR )
430             # Set the $ERROR variable internally.
431 0     0     sub _set_error { $ERROR = shift; $ERROR }
  0            
432              
433             ###############################################################################
434             ### NNTP Functions ############################################################
435             ###############################################################################
436              
437             =head2 Net::NNTP Equivalents
438              
439             The following functions are the equivalent of the Net::NNTP commands; they
440             are provided for compatibility with News::Web and other news functions.
441             More information on their use is available in those manual pages.
442              
443             =over 4
444              
445             =item article ( [ MSGID|MSGNUM ], [FH] )
446              
447             Retrives the article indicated by C or C (B) as
448             the headers, a blank line, and then the body of the article. Either
449             prints it to C (if offered) or returns an array reference containing
450             the text.
451              
452             Returns undef if the article is not found.
453              
454             =cut
455              
456             sub article {
457 0     0 1   my ($self, $id, $fh) = @_;
458 0   0       my $article = $self->_article($id || 0);
459 0 0         return undef unless $article;
460 0 0         $fh ? print $fh join("\n", $article->rawheaders(), "", @{$article->body}, "")
  0            
461             : [ $article->rawheaders(), '', $article->body(), '' ];
462             }
463              
464             =item head ( [ MSGID|MSGNUM ], [FH] )
465              
466             As with C, but only returns the header of the article.
467              
468             =cut
469              
470             sub head {
471 0     0 1   my ($self, $id, $fh) = @_;
472 0   0       my $article = $self->_article($id || 0);
473 0 0         return undef unless $article;
474 0 0         $fh ? print $fh join("\n", $article->rawheaders())
475             : [ $article->rawheaders() ];
476             }
477              
478             =item body ( [ MSGID|MSGNUM ], [FH] )
479              
480             As with C, but only returns the body of the article.
481              
482             =cut
483              
484             sub body{
485 0     0 1   my ($self, $id, $fh) = @_;
486 0   0       my $article = $self->_article($id || 0);
487 0 0         return undef unless $article;
488 0 0         $fh ? print $fh join("\n", @{$article->body})
  0            
489             : [ $article->body() ];
490             }
491              
492             =item nntpstat ( [ MSGID|MSGNUM ] )
493              
494             As with C, but only returns the article's message-id. Returns
495             undef if not set or the article didn't exist.
496              
497             =cut
498              
499             sub nntpstat {
500 0     0 1   my ($self, $id) = @_;
501 0   0       my $article = $self->_article($id || 0);
502 0 0         return undef unless $article;
503 0 0         $article->header('message-id') || undef;
504             }
505              
506             =item group ( [GROUP] )
507              
508             Sets the current group pointer; necessary if we want to use C
509             or its ilk by message number and not message-ID. In array context,
510             returns the active information of the group as a list (number of articles,
511             first article number, last article number, group name). In scalar
512             context, just returns the group name.
513              
514             =cut
515              
516             sub group {
517 0     0 1   my ($self, $group) = @_;
518 0 0         defined $group ? $$self{'group'} = $group
519             : $group = $$self{'group'};
520 0 0         return ( wantarray ? [] : "" ) unless $$self{'group'};
    0          
521 0 0         return "" unless $$self{'group'};
522 0 0         wantarray ? @{$self->_groupinfo($group)} : $group;
  0            
523             }
524              
525             =item ihave ( MSGID, MESSAGE )
526              
527             Writes an article to the archive with Message-ID C. C is
528             the actual message. Invokes C.
529              
530             (Note that this is preferred to C, at least here, because it lets
531             us tell much earlier if we don't want the article.)
532              
533             =cut
534              
535             sub ihave {
536 0     0 1   my ($self, $msgid, @message) = @_;
537 0 0         return 0 if $self->history->{$msgid}; # We already have it
538 0           $self->save_article(\@message);
539             }
540              
541             =item last ()
542              
543             Unimplemented.
544              
545             =cut
546              
547 0     0 1   sub last { _unsupported() }
548              
549             =item date ()
550              
551             Returns the local time (in seconds since the epoch).
552              
553             =cut
554              
555 0     0 1   sub date { time }
556              
557             =item postok ()
558              
559             Returns 0; we don't want anything to get the idea that it can post.
560              
561             =cut
562              
563 0     0 1   sub postok { 0 }
564              
565             =item authinfo ()
566              
567             Unimplemented.
568              
569             =cut
570              
571 0     0 1   sub authinfo { _unsupported() }
572              
573             =item list ()
574              
575             Same as C, listing all active groups.
576              
577             =cut
578              
579 0     0 1   sub list { shift->active("*") }
580              
581             =item newgroups ()
582              
583             Unimplemented.
584              
585             =cut
586              
587 0     0 1   sub newgroups { _unsupported() }
588              
589             =item newnews ()
590              
591             Unimplemented.
592              
593             =cut
594              
595 0     0 1   sub newnews { _unsupported() }
596              
597             =item newnews ()
598              
599             Unimplemented.
600              
601             =cut
602              
603 0     0 0   sub next { _unsupported() }
604              
605             =item post ( MESSAGE )
606              
607             Writes an article to the archive. C is the actual message.
608             Invokes C.
609              
610             =cut
611              
612             sub post {
613 0     0 1   my ($self, @message) = @_;
614 0           $self->save_article(\@message);
615             }
616              
617             =item slave ()
618              
619             Unimplemented.
620              
621             =cut
622              
623 0     0 1   sub slave { _unsupported() }
624              
625             =item quit ()
626              
627             Close the current connection; clear the current group, and reset the
628             pointer. Returns 1.
629              
630             =cut
631              
632             sub quit {
633 0     0 1   my ($self) = @_;
634 0           $self->{group} = undef;
635 0           $self->{pointer} = 0;
636 0           1;
637             }
638              
639             =item newsgroups ( [PATTERN] )
640              
641             Returns a hashref where the keys are the newsgroups that match the pattern
642             C (uses C), and the values are descriptiion text for
643             the newsgroup.
644              
645             =cut
646              
647             sub newsgroups {
648 0     0 1   my ($self, $pattern) = @_;
649 0           my $hash = $self->active($pattern);
650 0           foreach (keys %{$hash}) {
  0            
651 0           my $group = $self->groupinfo->entry($_);
652 0 0         $$hash{$_} = $group ? $group->desc : $_ ;
653             }
654 0           $hash;
655             }
656              
657             =item distributions
658              
659             Not implemented.
660              
661             =cut
662              
663 0     0 1   sub distributions { _unsupported() }
664              
665             =item subscriptions ()
666              
667             Returns a listref to all groups that we are subscribed to. This is not
668             ideal; we may only want the ones that we have descriptions for, or a
669             specific flag set in News::GroupInfo, or something. It works for now,
670             though.
671              
672             =cut
673              
674             sub subscriptions { # [ keys %{active(@_)} ] }
675 0     0 1   my ($self, $pattern) = @_;
676 0   0       $pattern ||= '*';
677 0           my %return;
678 0           foreach my $item ($self->groupinfo->entries($pattern)) {
679 0 0         next unless wildmat($pattern, $item->name); # Is this necessary?
680 0           $return{$item->name} = $item->arrayref;
681             }
682 0           [ keys %return ];
683             # \%return;
684             }
685              
686             =item overview_fmt ()
687              
688             Returns the overview format information from News::Overview, since that's
689             what we're currently using.
690              
691             =cut
692              
693 0     0 1   sub overview_fmt { News::Overview::overview_fmt }
694              
695             =item active_times ( [PATTERN] )
696              
697             Returns a hashref where the keys are the group names, and the values are
698             the results from Carrayref()>.
699              
700             =cut
701              
702             sub active_times {
703 0     0 1   my ($self, $pattern) = @_;
704 0           my %return;
705 0           foreach my $item ($self->groupinfo->entries($pattern)) {
706 0           $return{$item->name} = $item->arrayref;
707             }
708 0           \%return;
709             }
710              
711             =item active ( [PATTERN] )
712              
713             Returns a hashref where the keys are the group names, and the values are
714             the results from Carrayref()>.
715              
716             =cut
717              
718             sub active {
719 0     0 1   my ($self, $pattern) = @_;
720 0   0       $pattern ||= '*';
721 0           my %return;
722 0           foreach my $item ($self->activefile->entries($pattern)) {
723 0 0         next unless wildmat($pattern, $item->name); # Is this necessary?
724 0           $return{$item->name} = $item->arrayref;
725             }
726 0           \%return;
727             }
728              
729             =item xgtitle ( [PATTERN] )
730              
731             Same as C
732              
733             =cut
734              
735 0     0 1   sub xgtitle { newsgroups(@_) }
736              
737             =item xhdr ( HEADER, SPEC [, PATTERN] )
738              
739             =cut
740              
741             sub xhdr {
742 0     0 1   my ($self, $hdr, $spec, $pattern) = @_;
743 0   0       $pattern ||= '*';
744 0           my $xover = $self->xover($spec, $hdr);
745 0           my %return;
746 0           foreach (keys %{$xover}) {
  0            
747 0           my $string = join(' ', @{$$xover{$_}});
  0            
748 0 0         next unless wildmat($pattern, $string);
749 0           $return{$_} = $string;
750             }
751 0           \%return;
752             }
753              
754             =item xover ( MATCH, HDR )
755              
756             Gets information from the stored overview database. See B
757             for more information on how this works.
758              
759             =cut
760              
761             sub xover {
762 0     0 1   my ($self, $match, $hdr) = @_;
763 0 0         my $group = $self->group; return [] unless $group;
  0            
764 0           $self->overview_read($group, $match, $hdr);
765             }
766              
767             =item xpath ( MID )
768              
769             Returns the full path name on the server of the location of the given
770             article.
771              
772             =cut
773              
774             sub xpath {
775 0     0 1   my ($self, $mid) = @_;
776 0 0         my $history = $self->history->{$mid}; return undef unless $history;
  0            
777 0           my ($group, $number) = split('/', $history);
778 0           my $dir = $self->_dirname($group);
779 0           my $file = $self->_filename($number);
780 0           "$dir/$file";
781             }
782              
783             =item xpat ( HEADER, SPEC [, PATTERN] )
784              
785             Same as C.
786              
787             =cut
788              
789 0     0 1   sub xpat { xhdr(@_) }
790              
791             =item xrover ( SPEC )
792              
793             Same as $self->xhdr('References', SPEC)
794              
795             =cut
796              
797             sub xrover {
798 0     0 1   my ($self, $spec) = @_;
799 0           $self->xhdr('References', $spec);
800             }
801              
802             =item listgroup
803              
804             Unimplemented.
805              
806             =cut
807              
808 0     0 1   sub listgroup { _unsupported() }
809              
810             =item reader ()
811              
812             Unimplemented.
813              
814             =cut
815              
816 0     0 1   sub reader { _unsupported() }
817              
818             =back
819              
820             =cut
821              
822             ###############################################################################
823             ### Internal Functions - NNTP #################################################
824             ###############################################################################
825              
826             ## _group ()
827             # Returns the current value of 'group' that we're working with.
828 0     0     sub _group { shift->{'group'} }
829              
830             ## _article ( ID )
831             # Loads the article indicated with ID. Returns a News::Article object
832             # if successful.
833             sub _article {
834 0     0     my ($self, $id) = @_;
835            
836             # If the given ID is numeric or not given, then try to find and
837             # load the appropriate message-ID. If that doesn't work, then it
838             # was a bad number.
839 0 0         return undef unless $id;
840              
841 0           my ($group, $number);
842 0 0         if ( _isnumeric($id) ) { # Just a number -> we need the group
843 0 0         return undef if ($id eq 0);
844 0   0       $group = $self->_group() || return undef;
845 0           $number = $id;
846             } else { # Get the group from the ID
847 0 0         my $history = $self->history->{$id}; return undef unless $history;
  0            
848 0           ($group, $number) = split('/', $history);
849             }
850              
851 0           my $dir = $self->_dirname($group);
852 0           my $file = $self->_filename($number);
853              
854 0 0         print "Looking for article $id at $dir/$file\n" if $self->debug;
855            
856 0           my $article = new News::Article; $article->read("$dir/$file");
  0            
857 0 0         $article || undef;
858             }
859              
860             ## _groupinfo ( GROUP )
861             # Pulls out activefile information on the given group - # of articles,
862             # # of first article, # of last article, group name. Returns an arrayref
863             # with this information.
864             sub _groupinfo {
865 0     0     my ($self, $group, @args) = @_;
866 0           my $active = $self->activefile->entry($group);
867 0 0 0       return [] unless ($active && ref $active);
868 0           [ $active->count, $active->first, $active->final, $active->name ];
869             }
870              
871              
872             ## _unsupported ()
873             # Used for unsupported NNTP functions. Not particularly interesting, but
874             # it might be better later.
875 0     0     sub _unsupported { undef }
876              
877             ###############################################################################
878             ### Archive Functions #########################################################
879             ###############################################################################
880              
881             =head2 Archive Functions
882              
883             The following functions actually deal with the archive itself.
884              
885             =over 4
886              
887             =item save_article ( LINES [, GROUPS] )
888              
889             Saves an article into the archive. C is an arrayref that is
890             passed to News::Article; C is an array of groups that we want to
891             save the article to, if not those listed in the Newsgroups: header.
892              
893             The article is modified by adding C onto the Path: header and
894             creating a new Xref: header to match where we will save the article. The
895             file is primarily linked to a single location, and hardlinks are made to
896             the other locations. Overview information is generated for each group,
897             history information is saved to ensure that we don't save the same article
898             twice, and directories are created as needed.
899              
900             Note that there are currently some race conditions possible with this
901             function, which should be partially solved be adding file and directory
902             locking.
903              
904             =cut
905              
906             sub save_article {
907 0     0 1   my ($self, $lines, @groups) = @_;
908 0           my $article = new News::Article(\@$lines);
909              
910 0           my $messageid = $article->header('message-id');
911 0           $messageid =~ s/\s+//g;
912              
913 0 0         unless ( scalar @groups ) {
914 0           @groups = split('\s*,\s*', $article->header('newsgroups'))
915             }
916            
917             # Create a new Path header to reflect the new server
918 0   0       my $newpath = join('!', $self->{'hostname'} || 'localhost',
919             $article->header('Path'));
920 0           $article->set_headers('Path', $newpath);
921              
922 0           my ($maingroup, %files);
923 0           foreach my $group (@groups) {
924 0 0         next unless $self->subscribed($group);
925 0           my $actentry = $self->activeentry($group);
926 0 0         next unless $actentry;
927 0           $files{$group} = $actentry->next_number;
928 0 0         $maingroup = $group unless $maingroup;
929             }
930              
931             # Make a new Xref header
932 0   0       my $newxref = $self->{'hostname'} || 'localhost';
933 0           foreach (@groups) {
934 0 0         next unless $files{$_};
935 0           $newxref = join(' ', $newxref, "$_:$files{$_}" );
936             }
937 0           $article->set_headers('Xref', $newxref);
938              
939             # Create the files
940 0           my $file;
941 0           foreach my $group (@groups) {
942 0 0         next unless $files{$group};
943 0           my $active = $self->activefile->entry($group);
944 0 0         next unless $active;
945 0           my $dir = $self->_dirname($group);
946 0           my $filename = $self->_filename($files{$group});
947              
948             # Making the directory if it's necessary
949 0           my $path = $filename; $path =~ s%^(.*)/[^/]+$%$1%;
  0            
950 0 0         unless (-d "$dir/$path") { $self->_mkdir_full( "$dir/$path" ); }
  0            
951              
952 0 0         if ($file) {
953 0           my $link = "$dir/$filename";
954 0 0         print "Linking $link to $file\n" if $self->debug;
955 0 0         link ( $file, $link ) && $active->add_article();
956             } else {
957 0           $file = "$dir/$filename";
958 0 0         print "Writing $messageid to $file\n" if $self->debug;
959 0 0         open(FILE, ">$file") or return undef;
960 0           $article->write(\*FILE);
961 0           CORE::close FILE;
962 0           $active->add_article;
963             }
964              
965             # Populate the overview files
966 0           $self->overview_add($files{$group}, $group, $article);
967             }
968              
969             # Populate the history file
970 0           my $history = "$maingroup/$files{$maingroup}";
971 0 0         print "$messageid is saved as '$history'\n" if $self->debug;
972 0           $self->history->{$messageid} = $history;
973              
974 0           1;
975             }
976              
977             =item subscribe ( GROUP )
978              
979             Subscribe to the given C, by adding information about the group to
980             the active and groupinfo files and starting the directory tree.
981              
982             =cut
983              
984             sub subscribe {
985 0     0 1   my ($self, $group) = @_;
986 0 0         return 1 if $self->subscribed($group);
987 0           $self->activefile->subscribe($group);
988 0           $self->groupinfo->subscribe($group, time, 'generic', 'No Description');
989 0           $self->_mkdir_full( $self->_dirname($group) );
990 0           1;
991             }
992              
993             =item unsubscribe ( GROUP )
994              
995             Unsubscribe from C, by removing information about it from the
996             active and groupinfo files.
997              
998             =cut
999              
1000             sub unsubscribe {
1001 0     0 1   my $self = shift;
1002 0 0         $self->activefile->unsubscribe(@_)
1003             &&
1004             $self->groupinfo->unsubscribe(@_);
1005             }
1006              
1007             =item subscribed ( GROUP )
1008              
1009             Returns 1 if we are subscribed to C, 0 otherwise.
1010              
1011             =cut
1012              
1013 0 0   0 1   sub subscribed { shift->activefile->subscribed(shift) ? 1 : 0 }
1014              
1015             =item overview_add ( NUMBER, GROUP, ARTICLE )
1016              
1017             Add information to C's overview information regarding article
1018             C, which is C
. Just appends the information to the
1019             overview database; we don't need to do anything more at this point.
1020              
1021             =cut
1022              
1023             sub overview_add {
1024 0     0 1   my ($self, $number, $group, $article) = @_;
1025              
1026             # Get the proper overfiew info - this is too convoluted
1027 0           my $over = new News::Overview;
1028 0           my $artinfo = $over->add_from_article($number, $article);
1029            
1030             # Write out the information
1031 0 0         my $dir = $self->_dirname($group); next unless -d $dir;
  0            
1032 0           my $filename = join('/', $dir, $$self{overfilename});
1033 0 0         open(OVER, ">>$filename") or next;
1034 0           print OVER $over->print, "\n";
1035 0           CORE::close OVER;
1036              
1037 0           1;
1038             }
1039              
1040             =item overview_read ( GROUP, MESSAGE-SPEC [, HDR ] )
1041              
1042             Get the overview information from C for the articles specified by
1043             C (see B). If C is offered, only return
1044             that header information. Mostly invokes C.
1045              
1046             =cut
1047              
1048             sub overview_read {
1049 0     0 1   my ($self, $group, $match, $hdr) = @_;
1050 0 0         return {} unless $group;
1051              
1052 0           my ($first, $last) = messagespec($match);
1053            
1054 0           my $dir = $self->_dirname($group);
1055 0           my $filename = join('/', $dir, $$self{overfilename});
1056 0           my $over = new News::Overview;
1057 0 0 0       open(OVER, $filename) or (warn "Couldn't open $filename: $!\n" && return {} );
1058 0           foreach () {
1059 0 0 0       next if $_ < $first; next if ($last > $first and $_ > $last);
  0 0          
1060 0           $over->add_xover($_)
1061             }
1062 0           CORE::close OVER;
1063              
1064 0 0         $hdr ? $over->xover($match, $hdr) : $over->xover($match);
1065             }
1066              
1067             =back
1068              
1069             =cut
1070              
1071             ###############################################################################
1072             ### Internal Functions - Archive ##############################################
1073             ###############################################################################
1074              
1075             ## _dirname( GROUPNAME )
1076             # Makes the base directory name out of the group name and the 'archives'
1077             # value (which is where the files are stored). '.'s are replaced with '/'.
1078 0     0     sub _dirname { join('/', shift->{archives}, split('\.', shift)) }
1079              
1080             ## _filename( NUMBER )
1081             # Returns the filename of the individual message based on the number of
1082             # the message. This is a longer directory name based on $HASH - we only
1083             # want so many messages per directory. This could be more complicated,
1084             # and probably will have to be some day, but it works for now.
1085             sub _filename {
1086 0     0     my ($self, $number) = @_;
1087 0           my $floor = int ( ( $number - 1 ) / $HASH ) * $HASH + 1;
1088 0           my $ceiling = $floor + $HASH - 1;
1089 0           sprintf("%d.%d/%d", $floor, $ceiling, $number);
1090             }
1091              
1092             1;
1093              
1094             =head1 NOTES
1095              
1096             This module has grown out of my original kiboze.pl scripts, which
1097             accomplished essentially the same writing functions but none of the
1098             reading ones. While a write-only interface has been somewhat beneficial,
1099             this should be much more helpful.
1100              
1101             =head1 TODO
1102              
1103             Start using the AutoLoader (or something like it)
1104              
1105             File locking across the board, along with read-only opens.
1106              
1107             Close and re-open the databases periodically, to write stuff out while in
1108             the middle of an operation.
1109              
1110             While we currently have basic hashing taking place on the newsgroups to
1111             prevent the directories from getting too large, it would be nice if this
1112             were instead done as a time-hash - that is, if the article was from 28 Apr
1113             2004, we could make directories that looked like 2004.01.01 (yearly
1114             hashing), 2004.04.01 (monthly), or 2004.04.28 (daily).
1115              
1116             More News::Web changes to better connect with News::Archive would be nice.
1117              
1118             Using a different Overview format may make sense.
1119              
1120             Offer some functions to rebuild overview information later.
1121              
1122             Offer something to make default ~/.kibozerc files.
1123              
1124             =cut
1125              
1126             =head1 REQUIREMENTS
1127              
1128             C, B, B,
1129             B, B, B
1130              
1131             =head1 SEE ALSO
1132              
1133             Modules: B, B, B,
1134             B, B, B
1135              
1136             Scripts: B, B, B
1137              
1138             =head1 AUTHOR
1139              
1140             Tim Skirvin
1141              
1142             =head1 HOMEPAGE
1143              
1144             B
1145              
1146             =head1 LICENSE
1147              
1148             This code may be redistributed under the same terms as Perl itself.
1149              
1150             =head1 COPYRIGHT
1151              
1152             Copyright 2003-2004, Tim Skirvin.
1153              
1154             =cut
1155              
1156             ###############################################################################
1157             ### Version History ###########################################################
1158             ###############################################################################
1159             # v0.10 Wed Apr 28 16:59:53 CDT 2004
1160             ### First documented version.
1161             # v0.11 Thu Apr 29 10:15:51 CDT 2004
1162             ### Using '1.500' instead of '1-500', to make sure there's no collisions
1163             ### with actual groupnames. groupclose() and activeclose().
1164             # v0.12 Tue May 25 11:03:36 CDT 2004
1165             ### Trying to add a 'read-only' aspect to this.
1166             # v0.13 Tue May 25 14:37:13 CDT 2004
1167             ### Some changes in how it writes stuff out. DESTROY isn't the default
1168             ### now, close() is. Also, added 'use warnings'.