File Coverage

blib/lib/Data/ID/Exim.pm
Criterion Covered Total %
statement 61 61 100.0
branch 5 6 83.3
condition 2 3 66.6
subroutine 15 15 100.0
pod 5 5 100.0
total 88 90 97.7


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Data::ID::Exim - generate Exim message IDs
4              
5             =head1 SYNOPSIS
6              
7             use Data::ID::Exim qw(exim_mid);
8              
9             $mid = exim_mid;
10              
11             use Data::ID::Exim qw(exim_mid_time read_exim_mid);
12              
13             $mid_time = exim_mid_time(Time::Unix::time());
14             ($sec, $usec, $pid) = read_exim_mid($mid);
15              
16             use Data::ID::Exim qw(base62 read_base62);
17              
18             $digits = base62(3, $value);
19             $value = read_base62($digits);
20              
21              
22             =head1 DESCRIPTION
23              
24             This module supplies a function which generates IDs using the algorithm
25             that the Exim MTA uses to generate message IDs. It also supplies
26             functions to manipulate such IDs, and the base 62 encoding in isolation.
27              
28             =cut
29              
30             package Data::ID::Exim;
31              
32 4     4   94593 { use 5.006; }
  4         14  
  4         152  
33 4     4   22 use warnings;
  4         7  
  4         129  
34 4     4   40 use strict;
  4         21  
  4         142  
35              
36 4     4   18 use Carp qw(croak);
  4         11  
  4         366  
37 4     4   4782 use Time::HiRes 1.00 qw(gettimeofday);
  4         7573  
  4         25  
38              
39             our $VERSION = "0.008";
40              
41 4     4   5652 use parent "Exporter";
  4         1360  
  4         23  
42             our @EXPORT_OK = qw(exim_mid exim_mid_time read_exim_mid base62 read_base62);
43              
44             {
45             my(%base62, %read_base62);
46             for(my $v = 10; $v--; ) {
47             my $d = chr(ord("0") + $v);
48             $base62{$v} = $d;
49             $read_base62{$d} = $v;
50             }
51             for(my $i = 26; $i--; ) {
52             {
53             my $v = 10 + $i;
54             my $d = chr(ord("A") + $i);
55             $base62{$v} = $d;
56             $read_base62{$d} = $v;
57             }
58             {
59             my $v = 36 + $i;
60             my $d = chr(ord("a") + $i);
61             $base62{$v} = $d;
62             $read_base62{$d} = $v;
63             }
64             }
65              
66             sub base62($$) {
67 22     22 1 58 my($ndigits, $value) = @_;
68 22         32 my $digits = "";
69 22         55 while($ndigits--) {
70 4     4   4878 use integer;
  4         50  
  4         19  
71 104         195 $digits .= $base62{$value % 62};
72 104         168 $value /= 62;
73             }
74 22         111 return scalar(reverse($digits));
75             }
76              
77             sub read_base62($) {
78 10     10 1 40 my($digits) = @_;
79 10         15 my $value = 0;
80 10         35 while($digits =~ /(.)/sg) {
81 44         149 $value = 62 * $value + $read_base62{$1};
82             }
83 10         33 return $value;
84             }
85             }
86              
87             =head1 FUNCTIONS
88              
89             =over
90              
91             =item exim_mid
92              
93             Generates an Exim message ID. (This ID may, of course, be used to label
94             things other than mail messages, but Exim refers to them as message IDs.)
95             The ID is based on the time and process ID, such that it is guaranteed
96             to be unique among IDs generated by this algorithm on this host.
97             This function is completely interoperable with Exim, in the sense that
98             it uses exactly the same algorithm so that the uniqueness guarantee
99             applies between IDs generated by this function and by Exim itself.
100              
101             The format of the message ID is three groups of base 62 digits, separated
102             by hyphens. The first group, of six digits, gives the integral number of
103             seconds since the epoch. The second group, also of six digits, gives the
104             process ID. The third group, of two digits, gives the fractional part
105             of the number of seconds since the epoch, in units of 1/2000 of a second
106             (500 us). The function does not return until the clock has advanced far
107             enough that another call would generate a different ID.
108              
109             The strange structure of the ID comes from compatibility with earlier
110             versions of Exim, in which the last two digits were a sequence number.
111              
112             =item exim_mid(HOST_NUMBER)
113              
114             Exim has limited support for making message IDs unique among a group
115             of hosts. Each host is assigned a number in the range 0 to 16 inclusive.
116             The last two digits of the message IDs give the host number multiplied by
117             200 plus the fractional part of the number of seconds since the epoch in
118             units of 1/200 of a second (5 ms). This makes message IDs unique across
119             the group of hosts, at the expense of generation rate.
120              
121             To generate this style of ID, pass the host number to C.
122             The host number must be configured by some out-of-band mechanism.
123              
124             =cut
125              
126             sub _make_fraction($$) {
127 4     4   676 use integer;
  4         7  
  4         19  
128 1050     1050   939 my($host_number, $usec) = @_;
129 1050 100       3947 defined($host_number) ?
130             200*$host_number + $usec/5000 :
131             $usec/500;
132             }
133              
134             sub exim_mid(;$) {
135 4     4 1 14 my($host_number) = @_;
136 4         17 my($sec, $usec) = gettimeofday;
137 4         7 my $frac = _make_fraction($host_number, $usec);
138 4         3 my($new_sec, $new_usec, $new_frac);
139 4   66     5 do {
140 1046         1919 ($new_sec, $new_usec) = gettimeofday;
141 1046         1351 $new_frac = _make_fraction($host_number, $new_usec);
142             } while($new_sec == $sec && $new_frac == $frac);
143 4         11 return base62(6, $sec)."-".base62(6, $$)."-".base62(2, $frac);
144             }
145              
146             =item exim_mid_time(TIME)
147              
148             Because the first section of an Exim message ID encodes the time to a
149             resolution of a second, these IDs sort in a useful way. For the purposes
150             of lexical comparison using this feature, it is sometimes useful to
151             construct a string encoding a specified time in Exim message ID format.
152             (This can also be used as a very concise clock display.)
153              
154             This function constructs the initial time portion of an Exim message
155             ID. TIME must be an integral Unix time number. The corresponding
156             six-base62-digit string is returned.
157              
158             =cut
159              
160             sub exim_mid_time($) {
161 2     2 1 9 my($t) = @_;
162 2         6 return base62(6, $t);
163             }
164              
165             =item read_exim_mid(MID)
166              
167             This function extracts the information encoded in an Exim message ID.
168             This is a slightly naughty thing to do: the ID should really only be
169             used as a unique identifier. Nevertheless, the time encoded in an ID
170             is sometimes useful.
171              
172             The function returns a three-element list. The first two elements encode
173             the time at which the ID was generated, as a (seconds, microseconds)
174             pair giving the time since the epoch. This is the same time format as
175             is returned by C. The message ID does not encode the time
176             with a resolution as great as a microsecond; the returned microseconds
177             value is rounded down appropriately. The third item in the result list
178             is the encoded PID.
179              
180             =item read_exim_mid(MID, HOST_NUMBER_P)
181              
182             The optional HOST_NUMBER_P argument is a truth value indicating whether the
183             message ID was encoded using the variant algorithm that includes a host
184             number in the ID. It is essential to decode the ID using the correct
185             algorithm. The host number, if present, is returned as a fourth item
186             in the result list.
187              
188             =cut
189              
190             sub read_exim_mid($;$) {
191 2     2 1 10 my($mid, $host_number_p) = @_;
192 2 50       14 croak "malformed message ID"
193             unless $mid =~ /\A([0-9A-Za-z]{6})-([0-9A-Za-z]{6})-
194             ([0-9A-Za-z]{2})\z/x;
195 2         11 my @b62 = ($1, $2, $3);
196 2         4 my($sec, $pid, $frac) = map { read_base62($_) } @b62;
  6         11  
197 2 100       25 if($host_number_p) {
198 4     4   1749 use integer;
  4         9  
  4         15  
199 1         3 my $host_number = $frac / 200;
200 1         12 my $usec = ($frac % 200) * 5000;
201 1         7 return ($sec, $usec, $pid, $host_number);
202             } else {
203 1         3 my $usec = $frac * 500;
204 1         9 return ($sec, $usec, $pid);
205             }
206             }
207              
208             =item base62(NDIGITS, VALUE)
209              
210             This performs base 62 encoding. VALUE and NDIGITS must both be
211             non-negative native integers. VALUE is expressed in base 62, and the
212             least significant NDIGITS digits are returned as a string.
213              
214             =item read_base62(DIGITS)
215              
216             This performs base 62 decoding. DIGITS must be a string of base 62
217             digits. It is interpreted and the value returned as a native integer.
218              
219             =back
220              
221             =head1 BUGS
222              
223             Can theoretically generate duplicate message IDs during a leap second.
224             Exim suffers the same problem.
225              
226             =head1 SEE ALSO
227              
228             L,
229             L,
230             L,
231             L
232              
233             =head1 AUTHOR
234              
235             Andrew Main (Zefram)
236              
237             =head1 COPYRIGHT
238              
239             Copyright (C) 2004, 2006, 2007, 2009, 2010, 2011
240             Andrew Main (Zefram)
241              
242             =head1 LICENSE
243              
244             This module is free software; you can redistribute it and/or modify it
245             under the same terms as Perl itself.
246              
247             =cut
248              
249             1;