File Coverage

blib/lib/Data/ID/Exim.pm

Criterion	Covered	Total	%
statement	105	105	100.0
branch	11	14	78.5
condition	5	9	55.5
subroutine	24	24	100.0
pod	10	10	100.0
total	155	162	95.6

line	stmt	bran	cond	sub	pod	time	code
1							=head1 NAME
2
3							Data::ID::Exim - generate Exim message IDs
4
5							=head1 SYNOPSIS
6
7							use Data::ID::Exim qw(exim_mid exim_mid36);
8
9							$mid = exim_mid;
10							$mid = exim_mid36;
11
12							use Data::ID::Exim qw(
13							exim_mid_time exim_mid36_time read_exim_mid read_exim_mid36);
14
15							$mid_time = exim_mid_time(Time::Unix::time());
16							$mid_time = exim_mid36_time(Time::Unix::time());
17							($sec, $usec, $pid) = read_exim_mid($mid);
18							($sec, $usec, $pid) = read_exim_mid36($mid);
19
20							use Data::ID::Exim qw(base62 base36 read_base62 read_base36);
21
22							$digits = base62(3, $value);
23							$digits = base36(3, $value);
24							$value = read_base62($digits);
25							$value = read_base36($digits);
26
27							=head1 DESCRIPTION
28
29							This module supplies functions which generate IDs using the algorithms
30							that the Exim MTA uses to generate message IDs, and functions to
31							manipulate such IDs. Exim has two schemes for message IDs, one using
32							base 62 to compactly represent numeric components and one using base
33							36. Base 62 is the preferred system, and is used where filenames are
34							case-sensitive. Base 36, which yields monocase (specifically uppercase)
35							message IDs, is used where filenames are case-insensitive. Apart from
36							the radix the two schemes are very similar. This module supplies separate
37							functions for the two schemes.
38
39							=cut
40
41							package Data::ID::Exim;
42
43	4			4		192082	{ use 5.006; }
	4					13
44	4			4		22	use warnings;
	4					7
	4					100
45	4			4		18	use strict;
	4					10
	4					90
46
47	4			4		17	use Carp qw(croak);
	4					5
	4					197
48	4			4		1214	use Time::HiRes 1.00 qw(gettimeofday);
	4					3796
	4					21
49
50							our $VERSION = "0.009";
51
52	4			4		1525	use parent "Exporter";
	4					899
	4					18
53							our @EXPORT_OK = qw(
54							exim_mid exim_mid36
55							exim_mid_time exim_mid36_time read_exim_mid read_exim_mid36
56							base62 base36 read_base62 read_base36
57							);
58
59							{
60							my(%base62, %read_base62);
61							for(my $v = 10; $v--; ) {
62							my $d = chr(ord("0") + $v);
63							$base62{$v} = $d;
64							$read_base62{$d} = $v;
65							}
66							for(my $i = 26; $i--; ) {
67							{
68							my $v = 10 + $i;
69							my $d = chr(ord("A") + $i);
70							$base62{$v} = $d;
71							$read_base62{$d} = $v;
72							}
73							{
74							my $v = 36 + $i;
75							my $d = chr(ord("a") + $i);
76							$base62{$v} = $d;
77							$read_base62{$d} = $v;
78							}
79							}
80
81							sub base62($$) {
82	22			22	1	102	my($ndigits, $value) = @_;
83	22					35	my $digits = "";
84	22					39	while($ndigits--) {
85	4			4		1710	use integer;
	4					44
	4					16
86	104					166	$digits .= $base62{$value % 62};
87	104					150	$value /= 62;
88							}
89	22					86	return scalar(reverse($digits));
90							}
91
92							sub base36($$) {
93	22			22	1	43	my($ndigits, $value) = @_;
94	22					30	my $digits = "";
95	22					42	while($ndigits--) {
96	4			4		326	use integer;
	4					8
	4					14
97	104					140	$digits .= $base62{$value % 36};
98	104					146	$value /= 36;
99							}
100	22					85	return scalar(reverse($digits));
101							}
102
103							sub read_base62($) {
104	10			10	1	30	my($digits) = @_;
105	10					17	my $value = 0;
106	10					34	while($digits =~ /(.)/sg) {
107	44					137	$value = 62 * $value + $read_base62{$1};
108							}
109	10					35	return $value;
110							}
111
112							sub read_base36($) {
113	10			10	1	21	my($digits) = @_;
114	10					16	my $value = 0;
115	10					37	while($digits =~ /(.)/sg) {
116	44					84	my $v = $read_base62{$1};
117	44	50	33			130	$v = undef if defined($v) && $v >= 36;
118	44					110	$value = 36 * $value + $v;
119							}
120	10					31	return $value;
121							}
122							}
123
124							=head1 FUNCTIONS
125
126							All of these functions come in matched pairs, for the base-62 and the
127							base-36 message ID schemes. Each pair is described together, because
128							the functions are used identically.
129
130							=over
131
132							=item exim_mid
133
134							=item exim_mid36
135
136							Generates an Exim message ID. (This ID may, of course, be used to label
137							things other than mail messages, but Exim refers to them as message IDs.)
138							The ID is based on the time and process ID, such that it is guaranteed
139							to be unique among IDs generated by this algorithm on this host.
140							This function is completely interoperable with Exim, in the sense that
141							it uses exactly the same algorithm so that the uniqueness guarantee
142							applies between IDs generated by this function and by Exim itself.
143
144							The format of the message ID is three groups of base 62 or base 36 digits
145							respectively, separated by hyphens. The first group, of six digits,
146							gives the integral number of seconds since the epoch. The second group,
147							also of six digits, gives the process ID. The third group, of two
148							digits, gives the fractional part of the number of seconds since the
149							epoch, in units of 1/2000 of a second (500 us) or 1/1000 of a second
150							(1000 us) respectively. The function does not return until the clock
151							has advanced far enough that another call would generate a different ID.
152
153							The strange structure of the ID comes from compatibility with earlier
154							versions of Exim, in which the last two digits were a sequence number.
155
156							=item exim_mid(HOST_NUMBER)
157
158							=item exim_mid36(HOST_NUMBER)
159
160							Exim has limited support for making message IDs unique among a group
161							of hosts. Each host is assigned a number in the range 0 to 16 or 11
162							respectively inclusive. The last two digits of the message IDs give the
163							host number multiplied by 200 or 100 respectively plus the fractional part
164							of the number of seconds since the epoch in units of 1/200 of a second
165							(5 ms) or 1/100 of a second (10 ms) respectively. This makes message
166							IDs unique across the group of hosts, at the expense of generation rate.
167
168							To generate this style of ID, pass the host number to C or
169							C. The host number must be configured by some out-of-band
170							mechanism.
171
172							=cut
173
174							sub _make_fraction_62($$) {
175	4			4		774	use integer;
	4					7
	4					11
176	1627			1627		1792	my($host_number, $usec) = @_;
177	1627	100				4085	defined($host_number) ?
178							200*$host_number + $usec/5000 :
179							$usec/500;
180							}
181
182							sub exim_mid(;$) {
183	4			4	1	70	my($host_number) = @_;
184	4					10	my($sec, $usec) = gettimeofday;
185	4					7	my $frac = _make_fraction_62($host_number, $usec);
186	4					15	my($new_sec, $new_usec, $new_frac);
187	4		66			5	do {
188	1623					2414	($new_sec, $new_usec) = gettimeofday;
189	1623					1846	$new_frac = _make_fraction_62($host_number, $new_usec);
190							} while($new_sec == $sec && $new_frac == $frac);
191	4					8	return base62(6, $sec)."-".base62(6, $$)."-".base62(2, $frac);
192							}
193
194							sub _make_fraction_36($$) {
195	4			4		645	use integer;
	4					8
	4					19
196	2881			2881		3158	my($host_number, $usec) = @_;
197	2881	100				7226	defined($host_number) ?
198							100*$host_number + $usec/10000 :
199							$usec/1000;
200							}
201
202							sub exim_mid36(;$) {
203	4			4	1	12	my($host_number) = @_;
204	4					8	my($sec, $usec) = gettimeofday;
205	4					8	my $frac = _make_fraction_36($host_number, $usec);
206	4					6	my($new_sec, $new_usec, $new_frac);
207	4		66			5	do {
208	2877					4265	($new_sec, $new_usec) = gettimeofday;
209	2877					3364	$new_frac = _make_fraction_36($host_number, $new_usec);
210							} while($new_sec == $sec && $new_frac == $frac);
211	4					10	return base36(6, $sec)."-".base36(6, $$)."-".base36(2, $frac);
212							}
213
214							=item exim_mid_time(TIME)
215
216							=item exim_mid36_time(TIME)
217
218							Because the first section of an Exim message ID encodes the time to a
219							resolution of a second, these IDs sort in a useful way. For the purposes
220							of lexical comparison using this feature, it is sometimes useful to
221							construct a string encoding a specified time in Exim message ID format.
222							(This can also be used as a very concise clock display.)
223
224							This function constructs the initial time portion of an Exim message
225							ID. TIME must be an integral Unix time number. The corresponding
226							six-digit string is returned.
227
228							=cut
229
230							sub exim_mid_time($) {
231	2			2	1	67	my($t) = @_;
232	2					5	return base62(6, $t);
233							}
234
235							sub exim_mid36_time($) {
236	2			2	1	4	my($t) = @_;
237	2					4	return base36(6, $t);
238							}
239
240							=item read_exim_mid(MID)
241
242							=item read_exim_mid36(MID)
243
244							This function extracts the information encoded in an Exim message ID.
245							This is a slightly naughty thing to do: the ID should really only be
246							used as a unique identifier. Nevertheless, the time encoded in an ID
247							is sometimes useful.
248
249							The function returns a three-element list. The first two elements encode
250							the time at which the ID was generated, as a (seconds, microseconds)
251							pair giving the time since the epoch. This is the same time format as
252							is returned by C. The message ID does not encode the time
253							with a resolution as fine as a microsecond; the returned microseconds
254							value is rounded down appropriately. The third item in the result list
255							is the encoded PID.
256
257							=item read_exim_mid(MID, HOST_NUMBER_P)
258
259							=item read_exim_mid36(MID, HOST_NUMBER_P)
260
261							The optional HOST_NUMBER_P argument is a truth value indicating whether the
262							message ID was encoded using the variant algorithm that includes a host
263							number in the ID. It is essential to decode the ID using the correct
264							algorithm. The host number, if present, is returned as a fourth item
265							in the result list.
266
267							=cut
268
269							sub read_exim_mid($;$) {
270	2			2	1	66	my($mid, $host_number_p) = @_;
271	2	50				13	croak "malformed message ID"
272							unless $mid =~ /\A([0-9A-Za-z]{6})-([0-9A-Za-z]{6})-
273							([0-9A-Za-z]{2})\z/x;
274	2					10	my @b62 = ($1, $2, $3);
275	2					5	my($sec, $pid, $frac) = map { read_base62($_) } @b62;
	6					11
276	2	100				6	if($host_number_p) {
277	4			4		1179	use integer;
	4					7
	4					10
278	1					4	my $host_number = $frac / 200;
279	1					3	my $usec = ($frac % 200) * 5000;
280	1					8	return ($sec, $usec, $pid, $host_number);
281							} else {
282	1					2	my $usec = $frac * 500;
283	1					5	return ($sec, $usec, $pid);
284							}
285							}
286
287							sub read_exim_mid36($;$) {
288	2			2	1	8	my($mid, $host_number_p) = @_;
289	2	50				18	croak "malformed message ID"
290							unless $mid =~ /\A([0-9A-Z]{6})-([0-9A-Z]{6})-([0-9A-Z]{2})\z/x;
291	2					11	my @b36 = ($1, $2, $3);
292	2					6	my($sec, $pid, $frac) = map { read_base36($_) } @b36;
	6					12
293	2	100				5	if($host_number_p) {
294	4			4		651	use integer;
	4					10
	4					12
295	1					2	my $host_number = $frac / 100;
296	1					2	my $usec = ($frac % 100) * 10000;
297	1					7	return ($sec, $usec, $pid, $host_number);
298							} else {
299	1					2	my $usec = $frac * 1000;
300	1					9	return ($sec, $usec, $pid);
301							}
302							}
303
304							=item base62(NDIGITS, VALUE)
305
306							=item base36(NDIGITS, VALUE)
307
308							These perform base 62 and base 36 encoding respectively. VALUE and
309							NDIGITS must both be non-negative native integers. VALUE is expressed
310							in base 62 or base 36 respectively, and the least significant NDIGITS
311							digits are returned as a string.
312
313							=item read_base62(DIGITS)
314
315							=item read_base36(DIGITS)
316
317							These perform base 62 and base 36 decoding. DIGITS must be a string
318							of base 62 or base 36 digits respectively. It is interpreted and the
319							value returned as a native integer.
320
321							=back
322
323							=head1 BUGS
324
325							Can theoretically generate duplicate message IDs during a leap second.
326							Exim suffers the same problem.
327
328							=head1 SEE ALSO
329
330							L,
331							L,
332							L,
333							L
334
335							=head1 AUTHOR
336
337							Andrew Main (Zefram)
338
339							=head1 COPYRIGHT
340
341							Copyright (C) 2004, 2006, 2007, 2009, 2010, 2011, 2017
342							Andrew Main (Zefram)
343
344							=head1 LICENSE
345
346							This module is free software; you can redistribute it and/or modify it
347							under the same terms as Perl itself.
348
349							=cut
350
351							1;