File Coverage

blib/lib/Data/ID/Exim.pm
Criterion Covered Total %
statement 105 105 100.0
branch 11 14 78.5
condition 5 9 55.5
subroutine 24 24 100.0
pod 10 10 100.0
total 155 162 95.6


line stmt bran cond sub pod time code
1             =head1 NAME
2              
3             Data::ID::Exim - generate Exim message IDs
4              
5             =head1 SYNOPSIS
6              
7             use Data::ID::Exim qw(exim_mid exim_mid36);
8              
9             $mid = exim_mid;
10             $mid = exim_mid36;
11              
12             use Data::ID::Exim qw(
13             exim_mid_time exim_mid36_time read_exim_mid read_exim_mid36);
14              
15             $mid_time = exim_mid_time(Time::Unix::time());
16             $mid_time = exim_mid36_time(Time::Unix::time());
17             ($sec, $usec, $pid) = read_exim_mid($mid);
18             ($sec, $usec, $pid) = read_exim_mid36($mid);
19              
20             use Data::ID::Exim qw(base62 base36 read_base62 read_base36);
21              
22             $digits = base62(3, $value);
23             $digits = base36(3, $value);
24             $value = read_base62($digits);
25             $value = read_base36($digits);
26              
27             =head1 DESCRIPTION
28              
29             This module supplies functions which generate IDs using the algorithms
30             that the Exim MTA uses to generate message IDs, and functions to
31             manipulate such IDs. Exim has two schemes for message IDs, one using
32             base 62 to compactly represent numeric components and one using base
33             36. Base 62 is the preferred system, and is used where filenames are
34             case-sensitive. Base 36, which yields monocase (specifically uppercase)
35             message IDs, is used where filenames are case-insensitive. Apart from
36             the radix the two schemes are very similar. This module supplies separate
37             functions for the two schemes.
38              
39             =cut
40              
41             package Data::ID::Exim;
42              
43 4     4   192082 { use 5.006; }
  4         13  
44 4     4   22 use warnings;
  4         7  
  4         100  
45 4     4   18 use strict;
  4         10  
  4         90  
46              
47 4     4   17 use Carp qw(croak);
  4         5  
  4         197  
48 4     4   1214 use Time::HiRes 1.00 qw(gettimeofday);
  4         3796  
  4         21  
49              
50             our $VERSION = "0.009";
51              
52 4     4   1525 use parent "Exporter";
  4         899  
  4         18  
53             our @EXPORT_OK = qw(
54             exim_mid exim_mid36
55             exim_mid_time exim_mid36_time read_exim_mid read_exim_mid36
56             base62 base36 read_base62 read_base36
57             );
58              
59             {
60             my(%base62, %read_base62);
61             for(my $v = 10; $v--; ) {
62             my $d = chr(ord("0") + $v);
63             $base62{$v} = $d;
64             $read_base62{$d} = $v;
65             }
66             for(my $i = 26; $i--; ) {
67             {
68             my $v = 10 + $i;
69             my $d = chr(ord("A") + $i);
70             $base62{$v} = $d;
71             $read_base62{$d} = $v;
72             }
73             {
74             my $v = 36 + $i;
75             my $d = chr(ord("a") + $i);
76             $base62{$v} = $d;
77             $read_base62{$d} = $v;
78             }
79             }
80              
81             sub base62($$) {
82 22     22 1 102 my($ndigits, $value) = @_;
83 22         35 my $digits = "";
84 22         39 while($ndigits--) {
85 4     4   1710 use integer;
  4         44  
  4         16  
86 104         166 $digits .= $base62{$value % 62};
87 104         150 $value /= 62;
88             }
89 22         86 return scalar(reverse($digits));
90             }
91              
92             sub base36($$) {
93 22     22 1 43 my($ndigits, $value) = @_;
94 22         30 my $digits = "";
95 22         42 while($ndigits--) {
96 4     4   326 use integer;
  4         8  
  4         14  
97 104         140 $digits .= $base62{$value % 36};
98 104         146 $value /= 36;
99             }
100 22         85 return scalar(reverse($digits));
101             }
102              
103             sub read_base62($) {
104 10     10 1 30 my($digits) = @_;
105 10         17 my $value = 0;
106 10         34 while($digits =~ /(.)/sg) {
107 44         137 $value = 62 * $value + $read_base62{$1};
108             }
109 10         35 return $value;
110             }
111              
112             sub read_base36($) {
113 10     10 1 21 my($digits) = @_;
114 10         16 my $value = 0;
115 10         37 while($digits =~ /(.)/sg) {
116 44         84 my $v = $read_base62{$1};
117 44 50 33     130 $v = undef if defined($v) && $v >= 36;
118 44         110 $value = 36 * $value + $v;
119             }
120 10         31 return $value;
121             }
122             }
123              
124             =head1 FUNCTIONS
125              
126             All of these functions come in matched pairs, for the base-62 and the
127             base-36 message ID schemes. Each pair is described together, because
128             the functions are used identically.
129              
130             =over
131              
132             =item exim_mid
133              
134             =item exim_mid36
135              
136             Generates an Exim message ID. (This ID may, of course, be used to label
137             things other than mail messages, but Exim refers to them as message IDs.)
138             The ID is based on the time and process ID, such that it is guaranteed
139             to be unique among IDs generated by this algorithm on this host.
140             This function is completely interoperable with Exim, in the sense that
141             it uses exactly the same algorithm so that the uniqueness guarantee
142             applies between IDs generated by this function and by Exim itself.
143              
144             The format of the message ID is three groups of base 62 or base 36 digits
145             respectively, separated by hyphens. The first group, of six digits,
146             gives the integral number of seconds since the epoch. The second group,
147             also of six digits, gives the process ID. The third group, of two
148             digits, gives the fractional part of the number of seconds since the
149             epoch, in units of 1/2000 of a second (500 us) or 1/1000 of a second
150             (1000 us) respectively. The function does not return until the clock
151             has advanced far enough that another call would generate a different ID.
152              
153             The strange structure of the ID comes from compatibility with earlier
154             versions of Exim, in which the last two digits were a sequence number.
155              
156             =item exim_mid(HOST_NUMBER)
157              
158             =item exim_mid36(HOST_NUMBER)
159              
160             Exim has limited support for making message IDs unique among a group
161             of hosts. Each host is assigned a number in the range 0 to 16 or 11
162             respectively inclusive. The last two digits of the message IDs give the
163             host number multiplied by 200 or 100 respectively plus the fractional part
164             of the number of seconds since the epoch in units of 1/200 of a second
165             (5 ms) or 1/100 of a second (10 ms) respectively. This makes message
166             IDs unique across the group of hosts, at the expense of generation rate.
167              
168             To generate this style of ID, pass the host number to C or
169             C. The host number must be configured by some out-of-band
170             mechanism.
171              
172             =cut
173              
174             sub _make_fraction_62($$) {
175 4     4   774 use integer;
  4         7  
  4         11  
176 1627     1627   1792 my($host_number, $usec) = @_;
177 1627 100       4085 defined($host_number) ?
178             200*$host_number + $usec/5000 :
179             $usec/500;
180             }
181              
182             sub exim_mid(;$) {
183 4     4 1 70 my($host_number) = @_;
184 4         10 my($sec, $usec) = gettimeofday;
185 4         7 my $frac = _make_fraction_62($host_number, $usec);
186 4         15 my($new_sec, $new_usec, $new_frac);
187 4   66     5 do {
188 1623         2414 ($new_sec, $new_usec) = gettimeofday;
189 1623         1846 $new_frac = _make_fraction_62($host_number, $new_usec);
190             } while($new_sec == $sec && $new_frac == $frac);
191 4         8 return base62(6, $sec)."-".base62(6, $$)."-".base62(2, $frac);
192             }
193              
194             sub _make_fraction_36($$) {
195 4     4   645 use integer;
  4         8  
  4         19  
196 2881     2881   3158 my($host_number, $usec) = @_;
197 2881 100       7226 defined($host_number) ?
198             100*$host_number + $usec/10000 :
199             $usec/1000;
200             }
201              
202             sub exim_mid36(;$) {
203 4     4 1 12 my($host_number) = @_;
204 4         8 my($sec, $usec) = gettimeofday;
205 4         8 my $frac = _make_fraction_36($host_number, $usec);
206 4         6 my($new_sec, $new_usec, $new_frac);
207 4   66     5 do {
208 2877         4265 ($new_sec, $new_usec) = gettimeofday;
209 2877         3364 $new_frac = _make_fraction_36($host_number, $new_usec);
210             } while($new_sec == $sec && $new_frac == $frac);
211 4         10 return base36(6, $sec)."-".base36(6, $$)."-".base36(2, $frac);
212             }
213              
214             =item exim_mid_time(TIME)
215              
216             =item exim_mid36_time(TIME)
217              
218             Because the first section of an Exim message ID encodes the time to a
219             resolution of a second, these IDs sort in a useful way. For the purposes
220             of lexical comparison using this feature, it is sometimes useful to
221             construct a string encoding a specified time in Exim message ID format.
222             (This can also be used as a very concise clock display.)
223              
224             This function constructs the initial time portion of an Exim message
225             ID. TIME must be an integral Unix time number. The corresponding
226             six-digit string is returned.
227              
228             =cut
229              
230             sub exim_mid_time($) {
231 2     2 1 67 my($t) = @_;
232 2         5 return base62(6, $t);
233             }
234              
235             sub exim_mid36_time($) {
236 2     2 1 4 my($t) = @_;
237 2         4 return base36(6, $t);
238             }
239              
240             =item read_exim_mid(MID)
241              
242             =item read_exim_mid36(MID)
243              
244             This function extracts the information encoded in an Exim message ID.
245             This is a slightly naughty thing to do: the ID should really only be
246             used as a unique identifier. Nevertheless, the time encoded in an ID
247             is sometimes useful.
248              
249             The function returns a three-element list. The first two elements encode
250             the time at which the ID was generated, as a (seconds, microseconds)
251             pair giving the time since the epoch. This is the same time format as
252             is returned by C. The message ID does not encode the time
253             with a resolution as fine as a microsecond; the returned microseconds
254             value is rounded down appropriately. The third item in the result list
255             is the encoded PID.
256              
257             =item read_exim_mid(MID, HOST_NUMBER_P)
258              
259             =item read_exim_mid36(MID, HOST_NUMBER_P)
260              
261             The optional HOST_NUMBER_P argument is a truth value indicating whether the
262             message ID was encoded using the variant algorithm that includes a host
263             number in the ID. It is essential to decode the ID using the correct
264             algorithm. The host number, if present, is returned as a fourth item
265             in the result list.
266              
267             =cut
268              
269             sub read_exim_mid($;$) {
270 2     2 1 66 my($mid, $host_number_p) = @_;
271 2 50       13 croak "malformed message ID"
272             unless $mid =~ /\A([0-9A-Za-z]{6})-([0-9A-Za-z]{6})-
273             ([0-9A-Za-z]{2})\z/x;
274 2         10 my @b62 = ($1, $2, $3);
275 2         5 my($sec, $pid, $frac) = map { read_base62($_) } @b62;
  6         11  
276 2 100       6 if($host_number_p) {
277 4     4   1179 use integer;
  4         7  
  4         10  
278 1         4 my $host_number = $frac / 200;
279 1         3 my $usec = ($frac % 200) * 5000;
280 1         8 return ($sec, $usec, $pid, $host_number);
281             } else {
282 1         2 my $usec = $frac * 500;
283 1         5 return ($sec, $usec, $pid);
284             }
285             }
286              
287             sub read_exim_mid36($;$) {
288 2     2 1 8 my($mid, $host_number_p) = @_;
289 2 50       18 croak "malformed message ID"
290             unless $mid =~ /\A([0-9A-Z]{6})-([0-9A-Z]{6})-([0-9A-Z]{2})\z/x;
291 2         11 my @b36 = ($1, $2, $3);
292 2         6 my($sec, $pid, $frac) = map { read_base36($_) } @b36;
  6         12  
293 2 100       5 if($host_number_p) {
294 4     4   651 use integer;
  4         10  
  4         12  
295 1         2 my $host_number = $frac / 100;
296 1         2 my $usec = ($frac % 100) * 10000;
297 1         7 return ($sec, $usec, $pid, $host_number);
298             } else {
299 1         2 my $usec = $frac * 1000;
300 1         9 return ($sec, $usec, $pid);
301             }
302             }
303              
304             =item base62(NDIGITS, VALUE)
305              
306             =item base36(NDIGITS, VALUE)
307              
308             These perform base 62 and base 36 encoding respectively. VALUE and
309             NDIGITS must both be non-negative native integers. VALUE is expressed
310             in base 62 or base 36 respectively, and the least significant NDIGITS
311             digits are returned as a string.
312              
313             =item read_base62(DIGITS)
314              
315             =item read_base36(DIGITS)
316              
317             These perform base 62 and base 36 decoding. DIGITS must be a string
318             of base 62 or base 36 digits respectively. It is interpreted and the
319             value returned as a native integer.
320              
321             =back
322              
323             =head1 BUGS
324              
325             Can theoretically generate duplicate message IDs during a leap second.
326             Exim suffers the same problem.
327              
328             =head1 SEE ALSO
329              
330             L,
331             L,
332             L,
333             L
334              
335             =head1 AUTHOR
336              
337             Andrew Main (Zefram)
338              
339             =head1 COPYRIGHT
340              
341             Copyright (C) 2004, 2006, 2007, 2009, 2010, 2011, 2017
342             Andrew Main (Zefram)
343              
344             =head1 LICENSE
345              
346             This module is free software; you can redistribute it and/or modify it
347             under the same terms as Perl itself.
348              
349             =cut
350              
351             1;