File Coverage

blib/lib/Net/Traces/TSH.pm
Criterion Covered Total %
statement 252 298 84.5
branch 94 144 65.2
condition 17 40 42.5
subroutine 27 27 100.0
pod 11 15 73.3
total 401 524 76.5


line stmt bran cond sub pod time code
1             package Net::Traces::TSH;
2              
3 9     9   80088 use 5.6.1;
  9         31  
  9         382  
4 9     9   46 use strict;
  9         19  
  9         357  
5 9     9   50 use warnings;
  9         22  
  9         347  
6 9     9   8088 use autouse 'Carp' => qw(carp croak confess);
  9         15324  
  9         50  
7              
8             our $VERSION = 0.16;
9              
10             =head1 NAME
11              
12             Net::Traces::TSH - Analyze IP traffic traces in TSH format
13              
14             =head1 SYNOPSIS
15              
16             use Net::Traces::TSH qw(:traffic_analysis);
17              
18             # Display progress indicators
19             #
20             verbose;
21              
22             # Process the trace in file some_trace.tsh
23             #
24             process_trace 'some_trace.tsh';
25              
26             # Then, write a summary of the trace contents to some_trace.csv, in
27             # comma-separated values (CSV) format
28             #
29             write_trace_summary 'some_trace.csv';
30              
31             =cut
32              
33             require Exporter;
34              
35             our @ISA = qw( Exporter );
36              
37             our @EXPORT = qw( );
38              
39             # Exportable subroutine definitions
40             #
41             sub configure( % );
42             sub date_of( $ );
43             sub get_IP_address ( $ );
44             sub get_interfaces_href();
45             sub get_interfaces_list();
46             sub get_trace_summary_href();
47             sub process_trace( $ );
48             sub records_in( $ );
49             sub verbose();
50             sub write_interface_summaries( ; $);
51             sub write_trace_summary( ; $ );
52              
53             our @EXPORT_OK = qw(
54             configure
55             date_of
56             get_IP_address
57             get_interfaces_href
58             get_interfaces_list
59             get_trace_summary_href
60             numerically
61             process_trace
62             records_in
63             verbose
64             write_interface_summaries
65             write_trace_summary
66             );
67              
68             our %EXPORT_TAGS = (
69             traffic_analysis => [ qw( verbose
70             process_trace
71             write_interface_summaries
72             write_trace_summary
73             )
74             ],
75              
76             trace_information => [ qw( date_of records_in ) ],
77              
78             all => [@EXPORT_OK],
79             );
80              
81             # Internal/utility subroutine definitions
82             #
83             sub progress( $ );
84             sub write_summary( *$ ; $ );
85             sub print_value( *$ );
86              
87             our %options;
88              
89             # Load the IANA protocol numbers from the __DATA__ section. If by any
90             # chance we end up having duplicate keywords, something must have
91             # corrupted the __DATA__ section, so abort.
92             #
93             my %iana_protocol_numbers;
94              
95             INIT {
96 9     9   67 while () {
97 1260         1358 chomp;
98 1260         2548 my ($k, $v) = split " ", $_, 2;
99              
100             # Sanity check
101             #
102 1260 50       2519 die "Duplicate IANA protocol keyword detected"
103             if defined $iana_protocol_numbers{$k};
104              
105 1260         6151 $iana_protocol_numbers{$k} = $v;
106             }
107              
108             # Default options, parameters and output
109             #
110             %options = (
111             # Do not display progress information
112 9         726 Verbosity => 0,
113              
114             'Link Capacity' => 0, # Bits per second
115              
116             # Filename to store TCP traffic in tcpdump format
117             tcpdump => 0,
118              
119             # Filename to store TCP traffic in ns2 format
120             ns2 => 0,
121             );
122             }
123              
124             # Used to sort the keys of a hash in numeric order instead of the
125             # default alphabetical order. Borrowed from "Programming Perl 3/e" by
126             # Wall, Christiansen and Orwant (p. 790).
127             #
128 1635     1635 0 2038 sub numerically { $a <=> $b; }
129              
130             =head1 INSTALLATION
131              
132             C can be installed like any CPAN module. In
133             particular, consider using Andreas Koenig's CPAN module for all your
134             CPAN module installation needs.
135              
136             To find out more about installing CPAN modules type
137              
138             perldoc perlmodinstall
139              
140             at the command prompt.
141              
142             If you have already downloaded the C tarball,
143             decompress and untar it, and proceed as follows:
144              
145             perl Makefile.PL
146             make
147             make test
148             make install
149              
150             =head1 DESCRIPTION
151              
152             C can assist you in analyzing Internet Protocol (IP)
153             packet traces in Time Sequenced Headers (TSH) format, a binary network
154             trace format. Daily TSH traces are available from the L
155             site|"SEE ALSO">. Each 44-byte TSH record corresponds to an IP packet
156             passing by a monitoring point. Although there are no explicit
157             delimiters, each record is composed of three sections.
158              
159             =over
160              
161             =item Time and Interface
162              
163             The first section uses 8 bytes to store the time (with microsecond
164             granularity) and the interface number of the corresponding packet, as
165             recorded by the (passive) monitor.
166              
167             =item IP
168              
169             The next 20 bytes contain the standard IP packet header. IP options
170             are not recorded.
171              
172             =item TCP
173              
174             The third and last section contains the first 16 bytes of the standard
175             TCP segment header. The TCP checksum, urgent pointer, and TCP options
176             (if any) are not included in a TSH record.
177              
178             =back
179              
180             If a record does not correspond to a TCP segment, it is not clear how
181             to interpret the last section. As such, C makes no
182             assumptions, and does not analyze the last section of a TSH record
183             unless it corresponds to a TCP segment. In other words,
184             C reports on protocols other than TCP based solely
185             on the first two sections.
186              
187             The following diagram illustrates a TSH record.
188              
189             0 1 2 3
190             0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 Section
191             +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
192             0 | Timestamp (seconds) | Time
193             +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
194             1 | Interface No.| Timestamp (microseconds) |
195             +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
196             2 |Version| IHL |Type of Service| Total Length | IP
197             +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
198             3 | Identification |Flags| Fragment Offset |
199             +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
200             4 | Time to Live | Protocol | Header Checksum |
201             +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
202             5 | Source Address |
203             +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
204             6 | Destination Address |
205             +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
206             7 | Source Port | Destination Port | TCP
207             +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
208             8 | Sequence Number |
209             +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
210             9 | Acknowledgment Number |
211             +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
212             | Data | |C|E|U|A|P|R|S|F| |
213             10 | Offset|RSRV-ed|W|C|R|C|S|S|Y|I| Window |
214             | | |R|E|G|K|H|T|N|N| |
215             +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
216              
217             This diagram is an adaptation of the original TSH diagram (found on
218             the L), which reflects the changes due
219             to the addition of Explicit Congestion Notification (ECN) in the TCP
220             header flags. Also, keep in mind that recent Internet Engineering
221             Task Force (IETF) Requests for Comments (RFCs) have deprecated the IP
222             header I field in favor of L
223             and Explicit Congestion Notification|"SEE ALSO">.
224              
225             You can use C to L
226             information|"process_trace"> from a TSH packet trace, perform
227             statistical analysis on Transport protocol, Differentiated Services
228             (DiffServ) and ECN usage, and obtain packet and segment size
229             distributions. The trace L
230             are stored in comma separated values (CSV), a platform independent
231             text format.
232              
233             =head2 Data Structures
234              
235             A single TSH trace may contain records for packets observed on several
236             different interfaces. For example, the daily TSH traces from the
237             NLANR PMA repository typically contain records from two different
238             interfaces. In such cases, incoming and outgoing traffic can be
239             differentiated based on the interface number (despite the scrabbling of
240             IP addresses to protect privacy). C users may be
241             interested in collecting statistical information for each interface
242             separately or aggregating across the entire trace.
243             C uses two hashes to maintain these statistics:
244             %Interfaces and %Trace.
245              
246             B<%Interfaces> contains counts of several protocol particulars on a
247             per-interface basis. For example, %Interfaces can tell you how many
248             IP packets, TCP segments, and UDP datagrams were recorded in the trace
249             for each interface.
250              
251             B<%Trace> contains general information about the trace (date, number
252             of records, duration, number of interfaces, etc.) as well as the
253             aggregate data points across all interfaces. As such, %Trace will
254             report the I number of UDP datagrams in the trace, the total
255             number of TCP SYNs, and so on.
256              
257             Both %Trace and %Interfaces are I by
258             L. The recommended way to get the
259             trace summary information, after processing a trace is to call
260             L, which stores the
261             contents of %Trace in a CSV-formated text file, as shown in
262             L. Similarly, If you want summaries for the
263             traffic on each interface, use
264             L.
265              
266             Neither %Trace nor %Interfaces are exported by default and are not
267             meant to be accessed directly by user code. However, if you know what
268             you are doing, you can get a reference to %Trace by calling
269             L, and a reference to
270             %Interfaces by calling L.
271             If you choose to do so, the following subsections explain how you can
272             access some of the information stored in %Trace. The %Interfaces
273             structure is virtually the same only lacking the "general trace
274             information" part. See also L
275             hashes|"Using the Net::Trace::TSH trace summary hashes">.
276              
277             =head3 General Trace Information
278              
279             =over
280              
281             =item $Trace{filename}
282              
283             The L.
284              
285             =item $Trace{date}
286              
287             The estimated date of the trace (see L).
288              
289             =item $Trace{summary}
290              
291             The trace L.
292              
293             =item $Trace{starts}
294              
295             The first trace timestamp, in seconds, as it is recorded in the trace.
296              
297             =item $Trace{ends}
298              
299             The last trace timestamp, in seconds, after being "normalized".
300             Essentially, the number of seconds since $Trace{starts}.
301              
302             =item $Trace{records}
303              
304             L in the trace.
305              
306             Similarly, if I<$if> is the interface number,
307             C<$Interfaces{$if}{records}> contains the number or records
308             corresponding to packets observed on interface I<$if>.
309              
310             =item $Trace{interfaces}
311              
312             Number of interfaces recorded in the trace.
313              
314             =item $Trace{unidirectional}
315              
316             True, if each interface carries unidirectional traffic.
317              
318             False, if there is bidirectional traffic in at least one interface.
319              
320             C if traffic directionality was not examined.
321              
322             =item $Trace{'Link Capacity'}
323              
324             The L in bits per
325             second (b/s).
326              
327             =back
328              
329             =head3 Internet Protocol
330              
331             =over
332              
333             =item $Trace{IP}{Total}{Packets}
334              
335             =item $Trace{IP}{Total}{Bytes}
336              
337             Number of IP packets and bytes, respectively, in the trace. The
338             number of IP packets should equal the number of records in the trace.
339              
340             As mentioned earlier, %Trace has virtually the same structure as
341             %Interfaces. Therefore, if I<$if> is the interface number,
342             C<$Interfaces{$if}{IP}{Total}{Packets}> and
343             C<$Interfaces{$if}{IP}{Total}{Bytes}> contain the number of IP
344             packets and bytes, respectively, observed on interface I<$if>. The
345             same "rule" applies to all %Trace fields presented below.
346              
347             =back
348              
349             =head4 Fragmentation
350              
351             =over
352              
353             =item $Trace{IP}{DF}{Packets}
354              
355             =item $Trace{IP}{DF}{Bytes}
356              
357             Number of IP packets and bytes, respectively, requesting no
358             fragmentation ('Do not Fragment').
359              
360             =item $Trace{IP}{MF}{Packets}
361              
362             =item $Trace{IP}{MF}{Bytes}
363              
364             Number of IP packets and bytes, respectively, indicating that 'More
365             Fragments' follow.
366              
367             =back
368              
369             =head4 Differentiated Services
370              
371             =over
372              
373             =item $Trace{IP}{Normal}{Packets}
374              
375             =item $Trace{IP}{Normal}{Bytes}
376              
377             Number of IP packets and bytes, respectively, requesting no particular
378             treatment (best effort traffic). No DiffServ or ECN bits are set.
379              
380             =item $Trace{IP}{'Class Selector'}{Packets}
381              
382             =item $Trace{IP}{'Class Selector Bytes'}
383              
384             Number of IP packets and bytes, respectively, with Class Selector bits
385             set.
386              
387             =item $Trace{IP}{'AF PHB Packets'}
388              
389             =item $Trace{IP}{'AF PHB Bytes'}
390              
391             Number of IP packets and bytes, respectively, requesting Assured
392             Forwarding Per-Hop Behavior (PHB).
393              
394             =item $Trace{IP}{'EF PHB'}{Packets}
395              
396             =item $Trace{IP}{'EF PHB'}{Bytes}
397              
398             Number of IP packets and bytes, respectively, requesting Expedited
399             Forwarding Per-Hop Behavior (PHB)
400              
401             =back
402              
403             =head4 Explicit Congestion Notification
404              
405             =over
406              
407             =item $Trace{IP}{ECT}{Packets}
408              
409             =item $Trace{IP}{ECT}{Bytes}
410              
411             Number of IP packets and bytes, respectively, with either of the ECT
412             bits set. These packets should be carrying traffic from ECN-aware
413             hosts.
414              
415             =item $Trace{IP}{CE}{Packets}
416              
417             =item $Trace{IP}{CE}{Bytes}
418              
419             Number of IP packets and bytes, respectively, with the CE bit set.
420             These packets carry ECN-capable traffic that has been marked at an
421             ECN-aware router.
422              
423             =back
424              
425             =head4 IP Options
426              
427             =over
428              
429             =item $Trace{IP}{'No IP Options'}{Packets}
430              
431             =item $Trace{IP}{'No IP Options'}{Bytes}
432              
433             Number of IP packets and bytes, respectively, carrying no IP header options.
434              
435             =item $Trace{IP}{'IP Options'}{Packets}
436              
437             =item $Trace{IP}{'IP Options'}{Bytes}
438              
439             Number of IP packets and bytes, respectively, carrying IP header options.
440              
441             =back
442              
443             The following diagram summarizes the %Trace data structure up to here.
444              
445             Trace
446             - filename
447             - summary
448             - date
449             - starts
450             - ends
451             - records
452             - interfaces
453             - unidirectional
454             - 'Link Capacity'
455             - IP
456             - Total
457             - Packets
458             - Bytes
459             - DF
460             - Packets
461             - Bytes
462             - MF
463             - Packets
464             - Bytes
465             - Normal
466             - Packets
467             - Bytes
468             - 'Class Selector'
469             - Packets
470             - Bytes
471             - 'AF PHB'
472             - Packets
473             - Bytes
474             - 'EF PHB'
475             - Packets
476             - Bytes
477             - ECT
478             - Packets
479             - Bytes
480             - CE
481             - Packets
482             - Bytes
483             - 'No IP Options'
484             - Packets
485             - Bytes
486             - 'IP Options'
487             - Packets
488             - Bytes
489              
490             =head3 Transport Protocols
491              
492             Besides the summary information about the trace itself and statistics
493             about IP, %Trace maintains information about the transport protocols
494             present in the trace. Based on the IP header, %Trace maintains the
495             same statistics mentioned in the L
496             Protocol"> for all transport protocols with an IANA assigned number
497             (including, of course, TCP and UDP). For example,
498              
499             =over
500              
501             =item $Trace{Transport}{TCP}{Total}{Packets}
502              
503             =item $Trace{Transport}{TCP}{Total}{Bytes}
504              
505             Number of TCP segments and the corresponding bytes (including the IP
506             and TCP headers) in the trace.
507              
508             =item $Trace{Transport}{UDP}{Total}{Packets}
509              
510             =item $Trace{Transport}{UDP}{Total}{Bytes}
511              
512             Ditto, for UDP.
513              
514             =item $Trace{Transport}{ICMP}{DF}{Packets}
515              
516             =item $Trace{Transport}{ICMP}{DF}{Bytes}
517              
518             Number of ICMP packets and bytes, respectively, with the DF bit set.
519              
520             =back
521              
522             =head2 Using the Net::Trace::TSH trace summary hashes
523              
524             The following example creates the trace summary file only if TCP
525             accounts for more than 90% of the total IP traffic, in terms of bytes.
526              
527             # Explicitly import process_trace(), write_trace_summary(), and
528             # get_trace_summary_href():
529              
530             use Net::Traces::TSH qw( process_trace
531             write_trace_summary
532             get_trace_summary_href
533             );
534              
535             # Process a trace file...
536             #
537             process_trace "some.tsh";
538              
539             # Get a reference to %Trace
540             #
541             my $ts_href = get_trace_summary_href;
542              
543             # ...and generate a summary only if the condition is met.
544             #
545             write_trace_summary
546             if ( ( $ts_href->{Transport}{TCP}{Total}{Bytes}
547             / $ts_href->{IP}{Total}{Bytes}
548             ) > 0.9
549             );
550              
551             =cut
552              
553             # Hash containing aggregate (across all interfaces) information about
554             # the trace currently being processed. Daily TSH traces from NLANR
555             # PMA usually contain records from two interfaces (incoming and
556             # outgoing).
557             #
558             my %Trace;
559              
560             # Hash containing per-interface information about the trace currently
561             # being processed.
562             #
563             my %Interfaces;
564              
565             # Make sure that all data points are accounted for and are in correct
566             # order, thus saving some of hash key sorting operations. Moreover,
567             # this allows us to use more descriptive, i.e. self-documenting hash
568             # key names for the data hashes, %Trace and %Interfaces.
569             #
570             my @data_points = ( 'Total', 'DF', 'MF', 'ECT', 'CE',
571             'Normal', 'Class Selector', 'AF PHB', 'EF PHB',
572             'No IP Options', 'IP Options'
573             );
574              
575             =head1 FUNCTIONS
576              
577             C does not export any functions by default. The
578             following functions, listed in alphabetical order, are
579             L.
580              
581             =head2 configure
582              
583             configure %OPTIONS
584              
585             Used to specify verbosity, the link capacity, and the types of outputs
586             requested. For example,
587              
588             configure(
589             # Display progress information, equivalent to calling verbose()
590             #
591             Verbosity => 1, # default is 0, no progress information
592              
593             'Link Capacity' => 100_000_000, # bits per second
594              
595             # Convert the TCP records in the TSH trace to tcpdump
596             # format and store in 'trace.tcpdump'.
597             #
598             tcpdump => 'trace.tcpdump',
599              
600             # Convert the TCP data-carrying segment records to binary
601             # ns2 traffic trace format. Create one binary file per
602             # interface and use 'trace.ns2' as the file prefix.
603             #
604             ns2 => 'trace.ns2',
605              
606             );
607              
608             =cut
609              
610             sub configure ( % ) {
611 7     7 1 8118 while ( defined ($_ = shift) ) {
612 8 100       29 if ( defined $options{$_} ) {
613 7         28 $options{$_} = shift;
614             }
615             else {
616 1         3 $options{$_} = undef;
617 1         2 shift;
618 1         299 carp "Ignoring unknown configuration option '$_'...";
619             }
620             }
621             }
622              
623             =head2 date_of
624              
625             date_of FILENAME
626              
627             TSH traces downloaded from the L
628             ALSO"> typically contain a timestamp as part of their filename.
629             date_of() converts the timestamp to a human readable format. That is,
630             if FILENAME contains a valid timestamp, date_of() returns the
631             corresponding GMT date as a human readable string. For example,
632              
633             date_of 'ODU-1073132115.tsh'
634              
635             returns C.
636              
637             If the FILENAME does not contain a timestamp, date_of() returns
638             I.
639              
640             Note that there is nothing special about FILENAME: It can be any
641             string. The goal here is to get an idea of the period the trace was
642             collected.
643              
644             =cut
645              
646             sub date_of( $ ) {
647 24 100 100 24 1 2085 $_ = shift and /(\d{10})/ and return join ' ', scalar gmtime $1, 'GMT';
648             }
649              
650             =head2 get_IP_address
651              
652             get_IP_address INTEGER
653              
654             Converts a 32-bit integer to an IP address in dotted decimal
655             notation. For example,
656              
657             get_IP_address(167772172)
658              
659             returns C<10.0.0.12>.
660              
661             =cut
662              
663             sub get_IP_address ( $ ) {
664 1690     1690 1 18787 return join '.', unpack('C4', pack('N', shift));
665             }
666              
667             =head2 get_interfaces_href
668              
669             get_interfaces_href
670              
671             Returns a hash I to L<%Interfaces|"Data Structures">.
672              
673             =cut
674              
675             sub get_interfaces_href() {
676              
677 4     4 1 32348 return \%Interfaces;
678              
679             }
680              
681              
682             =head2 get_interfaces_list
683              
684             get_interfaces_list
685              
686             In list context returns a sorted list of all interfaces recorded in
687             the trace. In scalar context returns the number of unique interfaces
688             in the trace.
689              
690             =cut
691              
692             sub get_interfaces_list() {
693              
694 9 100   9 1 1618 return wantarray ? sort numerically keys %Interfaces
695             : scalar keys %Interfaces;
696              
697             }
698              
699             =head2 get_trace_summary_href
700              
701             get_trace_summary_href
702              
703             Returns a hash I to L<%Trace|"Data Structures">.
704              
705             =cut
706              
707             sub get_trace_summary_href() {
708              
709 2     2 1 891 return \%Trace;
710              
711             }
712              
713             =head2 process_trace
714              
715             process_trace FILENAME
716              
717             In a void context, process_trace() examines the binary TSH trace stored
718             in FILENAME, and populates L<%Trace and %Interfaces|"Data
719             Structures">.
720              
721             In a list context process_trace() in addition to collecting summary
722             statistics, it extracts all TCP flows and TCP data-carrying segments
723             from the trace, returning two hash references. For example,
724              
725             my ($senders_href, $segments_href) = process_trace 'trace.tsh';
726              
727             will process C and return two hash references:
728             I<$senders_href> and I<$segments_href>.
729              
730             I<$senders_href> is a reference to a hash which contains an entry for
731             each TCP sender in the trace file. A TCP sender is identified by the
732             ordered 4-tuple
733              
734             (src, src port, dst, dst port)
735              
736             where I and I are the L<32-bit integers|"get_IP_address">
737             corresponding to the IP addresses of the sending and receiving hosts,
738             respectively. Similarly, I and I are the sending
739             and receiving processes' port numbers. Senders are categorized on a
740             per interface basis. For example, the following accesses the list of
741             segments sent from 10.0.0.12:80 to 10.0.0.14:1080 (on interface 1):
742              
743             $senders_href->{1}{167772172,80,167772174,1080}
744              
745             Each hash entry is a list of timestamps extracted from the trace
746             records and stored after being "normalized" (start of trace = 0.0
747             seconds, always).
748              
749             In theory, records corresponding to packets transmitted on the same
750             interface should have different timestamps. In practice, although it
751             is not very likely that two data segments have the same timestamp, I
752             encountered a few traces that did have duplicate timestamps.
753             process_trace() checks for such cases and implements a timestamp
754             "collision avoidance" algorithm. A timestamp collision threshold is
755             defined and is currently set to 3. Trace processing is aborted if the
756             number of records with the same timestamp exceeds this threshold. If
757             you encounter such traces, it is not a bad idea to investigate why
758             this is happening, as the trace may be corrupted.
759              
760             The second returned value, I<$segments_href>, is another hash
761             reference, which can be used to access any individual I
762             TCP segment> in the trace. Again, segments are categorized on a per
763             interface basis. Three values are stored per segment: the total
764             number of bytes (including IP and TCP headers, and application
765             payload), the segment sequence number, and whether the segment was
766             retransmitted or not.
767              
768             For example, assuming the first record corresponds to a TCP segment,
769             here is how you can print its packet size and the sequence number
770             carried in the TCP header:
771              
772             my $interface = 1;
773             my $timestamp = 0.0;
774              
775             print $segments_href->{$interface}{$timestamp}{bytes};
776             print $segments_href->{$interface}{$timestamp}{seq_num};
777              
778             You can also check whether a segment was retransmitted or not:
779              
780             if ( segments_href->{$interface}{$timestamp}{retransmitted} ) {
781             print "Segment was retransmitted by the TCP sender.";
782             }
783             else {
784             print "Segment must have been acknowledged by the TCP receiver.";
785             }
786              
787             Note that process_trace() only initializes the "retransmitted" value
788             to false (0). It is write_sojourn_times() that detects retransmitted
789             segments and updates the "retransmitted" entry to I, if it is
790             determined that the segment was retransmitted.
791              
792             CAVEAT: write_sojourn_times() is not currently included in the stable,
793             CPAN version of the module. L if you want to get
794             a copy of the bleeding edge version.
795              
796             =head3 Using a TSH trace in ns2 simulations
797              
798             In addition to extracting %senders and %segments, C
799             allows you to generate binary files suitable for driving L
800             simulations|"SEE ALSO">. For example,
801              
802             configure(ns2 => 'some.tsh');
803              
804             process_trace 'some.tsh';
805              
806             After the call to configure(), process_trace() will generate a binary
807             file for each interface found in the trace. For example, assume that
808             F has recorded traffic from two interfaces, 1 and 2.
809             process_trace() will generate two binary files:
810              
811             some.tsh-if1.bin
812             some.tsh-if2.bin
813              
814             Each of these files L in
815             conjunction Application/Traffic/Trace. For example, the following ns2
816             script fragment illustrates how to attach F to a
817             traffic source
818              
819             # ...
820              
821             # Initialize a trace file
822             #
823             set tfile [new Tracefile]
824             $tfile filename some.tsh-2.bin
825              
826             # Attach the tracefile
827             #
828             set trace [new Application/Traffic/Trace]
829             $trace attach-tracefile $tfile
830              
831             # ...
832              
833             Note that both F and F include
834             only the I in the trace. If you want to
835             convert the I TSH trace to Traffic/Trace files, see
836             C.
837              
838              
839             =head3 Converting TSH to F
840              
841             If you would like to extract the TCP traffic and store it in
842             F format, use
843              
844             configure(tcpdump => 'tcpdump_filename');
845              
846             before calling process_trace(). process_trace() will generates a text
847             file based on the trace records in a format similar to the modified
848             output of F, as presented in I
849             by W. R. Stevens (see pp. 230-231).
850              
851             You can use such an output as input to other tools, present real
852             traffic scenarios in a classroom, or simply "eyeball" the trace. For
853             example, here are the first ten lines of the contents of such a file:
854              
855             0.000000000 10.0.0.1.6699 > 10.0.0.2.55309: . ack 225051666 win 65463
856             0.000014000 10.0.0.3.80 > 10.0.0.4.14401: S 457330477:457330477(0) ack 810547499 win 34932
857             0.000014000 10.0.0.1.6699 > 10.0.0.2.55309: . 3069529864:3069531324(1460) ack 225051666 win 65463
858             0.000024000 10.0.0.5.12119 > 10.0.0.6.80: F 2073668891:2073668891(0) ack 183269290 win 64240
859             0.000034000 10.0.0.7.4725 > 10.0.0.8.445: S 3152140131:3152140131(0) win 16384
860             0.000067000 10.0.0.1.6699 > 10.0.0.2.55309: P 3069531324:3069531944(620) ack 225051666 win 65463
861             0.000072000 10.0.0.11.3381 > 10.0.0.12.445: S 1378088462:1378088462(0) win 16384
862             0.000083000 10.0.0.13.1653 > 10.0.0.1.6699: P 3272208349:3272208357(8) ack 501563814 win 32767
863             0.000093000 10.0.0.14.1320 > 10.0.0.15.445: S 3127123478:3127123478(0) win 64170
864             0.000095000 10.0.0.4.14401 > 10.0.0.3.80: R 810547499:810547499(0) ack 457330478 win 34932
865              
866             Note that this output is similar to what F with options C<-n>
867             and C<-S> would have produced. The only missing fields are related to
868             the TCP options negotiated during connection setup. Unfortunately,
869             L include only the first 16 bytes of the
870             TCP header, making it impossible to record the options from the
871             segment header.
872              
873             =cut
874              
875             # A TSH record is 44 bytes long.
876             #
877 9     9   19223 use constant TSH_RECORD_LENGTH => 44;
  9         18  
  9         853  
878              
879             # If more than so many records have the same timestamp, abort
880             # processing.
881             #
882 9     9   59 use constant TIMESTAMP_COLLISION_THRESHOLD => 3;
  9         10  
  9         21124  
883              
884             sub process_trace( $ ) {
885              
886             # Sanity checks
887             #
888 5     5 1 2275 my $trace = shift;
889 5 50       25 croak 'No trace filename provided' unless $trace;
890              
891 5         22 my $records = records_in $trace;
892 5 50       15 croak "Number of records in $trace not an integer. Is $trace corrupted?"
893             unless $records;
894              
895             # Open trace file
896             #
897 5 50       188 open(INPUT, '<', $trace)
898             or croak "Cannot open $trace for processing. $!";
899              
900 5         15 binmode INPUT; # Needed for non-UNIX OSes; no harm in UNIX
901              
902 5 100 33     118 $options{tcpdump} and
903             ( open(TCPDUMP, '>', $options{tcpdump})
904             or croak "Cannot open $options{tcpdump}. $!"
905             );
906              
907 5         91 my %ns2_fh;
908             my %ns2_previous_timestamp;
909              
910 5         19 progress "Initializing data structures... ";
911              
912 5         192 %Trace = %Interfaces = ();
913              
914 5         12 $Trace{filename} = $trace;
915 5         10 $Trace{records} = $records;
916 5         10 $Trace{'Link Capacity'} = $options{'Link Capacity'};
917              
918             # If process_trace() is called in a void context, we will not
919             # examine traffic direction, thus undef $Trace{unidirectional}.
920             # Otherwise, assume that traffic is unidirectional, until proven
921             # otherwise.
922             #
923 5 100       16 $Trace{unidirectional} = defined wantarray ? 1 : undef;
924              
925 5         7 my (%senders, %segments);
926              
927 5         22 progress "Processing $Trace{filename}...\n";
928              
929             # Read the trace file, record by record
930             #
931 5         9 my $record;
932              
933 5         112 while( read(INPUT, $record, TSH_RECORD_LENGTH) ) {
934             # Extract the fields from the TSH record in a platform-independent way
935             #
936 5000         44924 my ($t_sec,
937             $if, $t_usec,
938             $version_ihl, $tos, $ip_len,
939             $id, $flags_offset,
940             $ttl, $protocol, $chk_sum,
941             $src,
942             $dst,
943             $src_port, $dst_port,
944             $seq_num,
945             $ack_num,
946             $data_offset, $tcp_flags, $win) =
947             unpack( "# Time
948             N # timestamp (seconds)
949             C B24 # interface, timestamp (microseconds)
950              
951             # IP
952             C C n # Version & IHL, Type of Service, Total Length
953             n n # Identification, Flags & Fragment Offset
954             B8 B8 n # TTL, Protocol, Header Checksum
955             N # Source Address
956             N # Destination Address
957              
958             # TCP
959             n n # Source Port, Destination Port
960             N # Sequence Number
961             N # Acknowledgment Number
962             C C n # Data Offset & Reserved bits, Flags, Window
963             ", $record
964             );
965              
966             ##################################################################
967             # TIME
968             ##################################################################
969             # Sanity: make absolutely sure that $t_sec is considered an
970             # integer in the code below
971             #
972 5000         8507 $t_sec = int $t_sec;
973              
974             # Extract the microseconds part of the timestamp
975             #
976 5000         8681 $t_usec = oct("0b$t_usec") / 1_000_000;
977              
978             # Sanity check
979             #
980 5000 50       15137 croak 'Microseconds record field exceeds 1,000,000. Processing aborted'
981             unless $t_usec < 1;
982              
983 5000 100       9219 unless ( defined $Trace{starts} ) {
984             # Get the first timestamp in the trace, and use it to normalize
985             # the rest of the timestamps in the trace.
986             #
987 5         13 $Trace{starts} = $t_sec + $t_usec;
988              
989             # Identify the period the trace was collected
990             #
991 5   50     20 $Trace{date} = date_of $Trace{filename} || date_of $t_sec || 'Unknown';
992              
993             # Timestamp of the last processed record.
994             #
995 5         13 $Trace{ends} = 0.0;
996             }
997              
998 5000         7274 $Interfaces{$if}{records}++;
999              
1000             # Combine the two parts of the timestamp ($t_sec and $t_usec) in
1001             # one variable and normalize using the first timestamp in the trace
1002             #
1003 5000         6921 my $timestamp = $t_sec + $t_usec - $Trace{starts};
1004              
1005             # Convert the $protocol number to the corresponding protocol name
1006             #
1007 5000   100     12574 $protocol = $iana_protocol_numbers{oct "0b$protocol"} || 'Unknown';
1008              
1009             # Sanity check: Timestamps must increase monotonically in a TSH
1010             # trace.
1011             #
1012 5000 50       9868 if ( $Trace{ends} > $timestamp ) {
1013             # If this is a TCP segment then this can play a big role if we
1014             # are interested in extracting the segment time series, so it's
1015             # better that we abort processing.
1016             #
1017 0         0 print "Timestamps do not increase monotonically (Prot $protocol)\n";
1018              
1019 0 0       0 croak "Processing aborted for $Trace{filename}" if wantarray;
1020             }
1021              
1022             ##################################################################
1023             # IP
1024             ##################################################################
1025 5000         7536 $Interfaces{$if}{IP}{Total}{Packets}++;
1026 5000         7108 $Interfaces{$if}{IP}{Total}{Bytes} += $ip_len;
1027              
1028             # Packet size distribution
1029             #
1030 5000         8616 $Interfaces{$if}{IP}{'Packet Size'}{$ip_len}++;
1031              
1032             # Get the IP version
1033             #
1034 5000         6233 my $version = ($version_ihl & 0xf0) >> 4;
1035              
1036             # We shouldn't see anything other than IPv4. If we do, issue a
1037             # warning.
1038             #
1039 5000 50       8696 carp "IPv$version packet detected" unless $version == 4;
1040              
1041             # Get the IP header length (IHL)
1042             #
1043 5000         5463 my $ihl = ($version_ihl & 0xf) << 2;
1044              
1045             ##################################################################
1046             # Transport protocols
1047             ##################################################################
1048 5000         8864 $Interfaces{$if}{Transport}{$protocol}{Total}{Packets}++;
1049 5000         8457 $Interfaces{$if}{Transport}{$protocol}{Total}{Bytes} += $ip_len;
1050              
1051             # Packet size distribution
1052             #
1053 5000         8715 $Interfaces{$if}{Transport}{$protocol}{'Packet Size'}{$ip_len}++;
1054              
1055             ##################################################################
1056             # D(o not)F(ragment) bit
1057             ##################################################################
1058 5000 100       9087 if ($flags_offset & 0x4000) {
1059 4475         6205 $Interfaces{$if}{IP}{DF}{Packets}++;
1060 4475         6253 $Interfaces{$if}{IP}{DF}{Bytes} += $ip_len;
1061              
1062 4475         6754 $Interfaces{$if}{Transport}{$protocol}{DF}{Packets}++;
1063 4475         7716 $Interfaces{$if}{Transport}{$protocol}{DF}{Bytes} += $ip_len;
1064             }
1065              
1066             ##################################################################
1067             # M(ore)F(ragments) bit
1068             ##################################################################
1069 5000 50       8905 if ($flags_offset & 0x2000) {
1070 0         0 $Interfaces{$if}{IP}{MF}{Packets}++;
1071 0         0 $Interfaces{$if}{IP}{MF}{Bytes} += $ip_len;
1072              
1073 0         0 $Interfaces{$if}{Transport}{$protocol}{MF}{Packets}++;
1074 0         0 $Interfaces{$if}{Transport}{$protocol}{MF}{Bytes} += $ip_len;
1075             }
1076              
1077             ##################################################################
1078             # DiffServ
1079             ##################################################################
1080             #
1081             # Convert the ToS field and gather DiffServ statistics.
1082             #
1083             # Extract the Differentiated Services Code Point (DSCP) from ToS
1084             #
1085 5000         9226 my $dscp = $tos >> 2;
1086              
1087 5000 50       7376 if ( $dscp == 0 ) {
    0          
    0          
1088             # The usual suspect, the default value most of the time. This
1089             # is compatible with RFC 791 (original ToS definition), RFC 1349
1090             # (updated ToS definition), RFC 2474 (DiffServ defines DSCP),
1091             # RFC 2780: No DiffServ code point (DSCP) set
1092             #
1093 5000         13548 $Interfaces{$if}{IP}{Normal}{Packets}++;
1094 5000         11141 $Interfaces{$if}{IP}{Normal}{Bytes} += $ip_len;
1095              
1096 5000         8065 $Interfaces{$if}{Transport}{$protocol}{Normal}{Packets}++;
1097 5000         8670 $Interfaces{$if}{Transport}{$protocol}{Normal}{Bytes} += $ip_len;
1098             }
1099             elsif ( $dscp % 0b001000 == 0 ) {
1100             # Class Selector Code points -- RFC 2474
1101             #
1102 0         0 $Interfaces{$if}{IP}{'Class Selector'}{Packets}++;
1103 0         0 $Interfaces{$if}{IP}{'Class Selector'}{Bytes} += $ip_len;
1104              
1105 0         0 $Interfaces{$if}{Transport}{$protocol}{'Class Selector'}{Packets}++;
1106 0         0 $Interfaces{$if}{Transport}{$protocol}{'Class Selector'}{Bytes}
1107             +=$ip_len;
1108             }
1109             elsif ( $dscp % 2 == 0 ) {
1110 0         0 $dscp >>= 1;
1111 0 0 0     0 if ( 0b00100 < $dscp and $dscp < 0b10100 ) {
    0          
1112             # Assured Forwarding (AF) PHB -- RFC 2597
1113             #
1114 0         0 $Interfaces{$if}{IP}{'AF PHB'}{Packets}++;
1115 0         0 $Interfaces{$if}{IP}{'AF PHB'}{Bytes} += $ip_len;
1116              
1117 0         0 $Interfaces{$if}{Transport}{$protocol}{'AF PHB'}{Packets}++;
1118 0         0 $Interfaces{$if}{Transport}{$protocol}{'AF PHB'}{Bytes} += $ip_len;
1119             }
1120             elsif ( $dscp == 0b10111 ) {
1121             # Expedited Forwarding (EF) PHB -- RFC 2598
1122             #
1123 0         0 $Interfaces{$if}{IP}{'EF PHB'}{Packets}++;
1124 0         0 $Interfaces{$if}{IP}{'EF PHB'}{Bytes} += $ip_len;
1125              
1126 0         0 $Interfaces{$if}{Transport}{$protocol}{'EF PHB'}{Packets}++;
1127 0         0 $Interfaces{$if}{Transport}{$protocol}{'EF PHB'}{Bytes} += $ip_len;
1128             }
1129             }
1130              
1131             ##################################################################
1132             # ECN
1133             ##################################################################
1134             #
1135             # Extract ECN from ToS and gather ECN statistics
1136             #
1137 5000         5593 my $ecn = $tos & 0b11;
1138 5000 50       8120 if ( $ecn ) {
1139 0         0 $Interfaces{$if}{IP}{ECT}{Packets}++;
1140 0         0 $Interfaces{$if}{IP}{ECT}{Bytes} += $ip_len;
1141              
1142 0         0 $Interfaces{$if}{Transport}{$protocol}{ECT}{Packets}++;
1143 0         0 $Interfaces{$if}{Transport}{$protocol}{ECT}{Bytes} += $ip_len;
1144             }
1145              
1146 5000 50       8534 if ( $ecn == 0b11 ) {
1147 0         0 $Interfaces{$if}{IP}{CE}{Packets}++;
1148 0         0 $Interfaces{$if}{IP}{CE}{Bytes} += $ip_len;
1149              
1150 0         0 $Interfaces{$if}{Transport}{$protocol}{CE}{Packets}++;
1151 0         0 $Interfaces{$if}{Transport}{$protocol}{CE}{Bytes} += $ip_len;
1152             }
1153              
1154             ##################################################################
1155             # IP Options
1156             ##################################################################
1157 5000 50       6886 if ( $ihl == 20 ) {
    0          
1158 5000         7416 $Interfaces{$if}{IP}{'No IP Options'}{Packets}++;
1159 5000         7385 $Interfaces{$if}{IP}{'No IP Options'}{Bytes} += $ip_len;
1160              
1161 5000         8049 $Interfaces{$if}{Transport}{$protocol}{'No IP Options'}{Packets}++;
1162 5000         8451 $Interfaces{$if}{Transport}{$protocol}{'No IP Options'}{Bytes} += $ip_len;
1163             }
1164             elsif ( $ihl > 20 ) {
1165 0         0 $Interfaces{$if}{IP}{'IP Options'}{Packets}++;
1166 0         0 $Interfaces{$if}{IP}{'IP Options'}{Bytes} += $ip_len;
1167              
1168 0         0 $Interfaces{$if}{Transport}{$protocol}{'IP Options'}{Packets}++;
1169 0         0 $Interfaces{$if}{Transport}{$protocol}{'IP Options'}{Bytes} += $ip_len;
1170             }
1171             else {
1172             # This is an extremely unlikely event, but just in case...
1173             #
1174 0         0 carp "IP header with only $ihl bytes detected";
1175             }
1176              
1177             ##################################################################
1178             # TCP-related counts
1179             ##################################################################
1180 5000 100       15294 if ( $protocol eq 'TCP' ) {
1181             # Extract TCP header length from $data_offset, and right shift,
1182             # since the TCP header length is expressed in 4-byte words.
1183             #
1184 4210         4834 my $tcp_hl = ( $data_offset & 0xf0 ) >> 2;
1185 4210         4864 my $tcp_payload = $ip_len - $ihl - $tcp_hl;
1186              
1187             # TCP flags
1188             #
1189 4210         20160 my ($cwr, $ece, $urg, $ack, $psh, $rst, $syn, $fin) =
1190             split '', unpack('B8', pack('C', $tcp_flags));
1191              
1192 4210 100       9733 if ( $syn ) {
1193             # Count the number of SYNs, SYN/ACKs and SYNs carrying a
1194             # payload in the trace.
1195             #
1196 1365         3310 $Interfaces{$if}{Transport}{TCP}{SYN}{$tcp_hl}++;
1197 1365 100       2430 $Interfaces{$if}{Transport}{TCP}{'SYN/ACK'}{$tcp_hl}++
1198             if $ack;
1199 1365 50       2287 $Interfaces{$if}{Transport}{TCP}{'SYN/Payload'}++
1200             if $tcp_payload > 0;
1201              
1202             # Collect the receiver's advertised window (awnd), for all
1203             # SYNs that have the standard TCP header. We will refer to
1204             # that as the "hard count". For larger SYNs, we cannot say
1205             # for sure what is the receiver's advertised window, but we
1206             # can collect a count for comparison (rwnd). We will refer to
1207             # this as the "soft count".
1208             #
1209 1365         2647 $Interfaces{$if}{Transport}{TCP}{rwnd}{$win}++;
1210 1365 100       2698 $Interfaces{$if}{Transport}{TCP}{awnd}{$win}++
1211             if $tcp_hl == 20;
1212             }
1213              
1214             # Count the number of ACKs, pure ACKs, etc.
1215             #
1216 4210 100       7120 if ( $ack ) {
1217 2880 50       5701 if ( $tcp_hl < 20 ) {
1218             # Yet another extremely unlikely event, but just in case...
1219             #
1220 0         0 carp "TCP header with only $tcp_hl bytes detected and ignored";
1221             }
1222             else {
1223 2880         4742 $Interfaces{$if}{Transport}{TCP}{'Total ACKs'}++;
1224              
1225 2880 100       4316 if ( $tcp_hl == 20 ) {
1226 2530         3903 $Interfaces{$if}{Transport}{TCP}{'Cumulative ACKs'}++;
1227              
1228 2530 100       5348 $Interfaces{$if}{Transport}{TCP}{'Pure ACKs'}++
1229             if $tcp_payload == 0;
1230             }
1231             else {
1232 350         529 $Interfaces{$if}{Transport}{TCP}{'Options ACKs'}++;
1233 350         695 $Interfaces{$if}{Transport}{TCP}{'ACK Option Size'}{$tcp_hl}++;
1234             }
1235             }
1236             }
1237              
1238             ##################################################################
1239             # Export %senders and %segments
1240             ##################################################################
1241             # Determine if we should collect statistics about the %senders and
1242             # the %segments. If process_trace() was called in a void context
1243             # then we do not need to collect such data, which results in
1244             # tremendous memory usage savings.
1245             #
1246 4210 100       7339 if ( $tcp_payload > 0 ) {
1247 1860 50       3159 if ( wantarray ) {
1248             # Add elements to the hashes ONLY if the segment carries
1249             # some payload. This way, one can be more sure if a given
1250             # segment was retransmitted or not, since ACKs are not
1251             # guaranteed reliable delivery.
1252             #
1253             # Occasionally, we may get 2 or more TCP segments with the
1254             # same $timestamp. We would like to keep them in the
1255             # segments hash and be able to discriminate between the
1256             # different segments, so we use the following (hash)
1257             # collision avoidance mechanism.
1258             #
1259 0         0 my $collisions = 0;
1260              
1261 0         0 while ( exists $segments{$if}{$timestamp}{bytes} ) {
1262             # Sanity check: If more than TIMESTAMP_COLLISION_THRESHOLD
1263             # trace records have the same timestamp, it is better to
1264             # abort processing. Theoretically there shouldn't be two
1265             # segments with the same timestamp.
1266             #
1267 0 0       0 croak 'Too many duplicate timestamps: ', $collisions,
1268             ' trace records have the same timestamp. Processing aborted'
1269             if $collisions++ == TIMESTAMP_COLLISION_THRESHOLD;
1270              
1271 0         0 carp "Duplicate timestamp $timestamp detected & replaced with ",
1272             $timestamp .= "1";
1273              
1274 0         0 $Trace{Transport}{TCP}{'Concurrent Segments'}++;
1275             }
1276              
1277             # Store the total length of the segment (headers +
1278             # application payload), and the sequence number it carries
1279             #
1280 0         0 $segments{$if}{$timestamp}{bytes} = $ip_len;
1281 0         0 $segments{$if}{$timestamp}{seq_num} = $seq_num;
1282              
1283             # In addition, flag by default every segment as an original
1284             # transmission. Detection of retransmitted segments is not
1285             # done in process_trace(), but rather in
1286             # write_sojourn_times()
1287             #
1288 0         0 $segments{$if}{$timestamp}{retransmitted} = undef;
1289              
1290             # Add the packet timestamp to the respective sender list
1291             #
1292 0         0 push @{ $senders{$if}{"$src,$src_port,$dst,$dst_port"} },
  0         0  
1293             $timestamp;
1294              
1295             # Flag bidirectional traffic found in the *same* interface.
1296             # If bidirectional traffic is present in the same interface,
1297             # it is not clear (yet) how to isolate "incoming" from
1298             # "outgoing" traffic.
1299             #
1300 0 0 0     0 $Trace{unidirectional} = 0
1301             if ( $Trace{unidirectional} and
1302             exists $senders{$if}{"$dst,$dst_port,$src,$src_port"}
1303             );
1304             }
1305              
1306             ##################################################################
1307             # Export TSH to ns2 binary traffic trace format
1308             ##################################################################
1309             # Generate an ns2 binary traffic trace. (TCP data-carrying
1310             # segements only)
1311             #
1312 1860 100       3813 if ( $options{ns2} ) {
1313              
1314 372 100       809 unless ( defined $ns2_fh{$if}) {
1315 2 50       142 open($ns2_fh{$if}, '>', "$options{ns2}-if$if.bin")
1316             or croak "Cannot open $options{ns2}-if$if.bin. $!";
1317 2         6 binmode $ns2_fh{$if}; # Needed for non-UNIX OSes; no harm in UNIX
1318              
1319 2         6 $ns2_previous_timestamp{$if} = $timestamp;
1320             }
1321              
1322             print
1323 372         369 { $ns2_fh{$if} }
  372         1422  
1324             pack('NN', # two integers: interpacket time (usec), packet size (B)
1325             sprintf("%.0f", ( $timestamp
1326             - $ns2_previous_timestamp{$if} ) * 1_000_000
1327             ),
1328             $ip_len
1329             );
1330              
1331 372         669 $ns2_previous_timestamp{$if} = $timestamp;
1332             }
1333             }
1334              
1335             ##################################################################
1336             # Export TSH to tcpdump format
1337             ##################################################################
1338             # Print a tcpdump-like time line of the TSH trace (for TCP
1339             # segments only)
1340             #
1341 4210 100       9182 if ( $options{tcpdump}) {
1342 842         4139 printf TCPDUMP "%1.9f ", $timestamp;
1343 842 100 100     1821 print TCPDUMP
    100          
    100          
    100          
    50          
    50          
    100          
    100          
    100          
    50          
1344             get_IP_address $src, ".$src_port > ",
1345             get_IP_address $dst, ".$dst_port: ",
1346              
1347             $syn ? 'S' : '', # SYN: Synchronize sequence numbers
1348             $fin ? 'F' : '', # FIN: Sender is finished sending data
1349             $psh ? 'P' : '', # PSH: Push data to receiving process ASAP
1350             $rst ? 'R' : '', # RST: Reset Connection
1351             $cwr ? 'C' : '', # ECN: Congestion Window Reduced bit
1352             $ece ? 'E' : '', # ECN: ECN-capable Transport
1353              
1354             ($syn + $fin + $psh + $rst + $cwr + $ece) ? ' ' : '. ',
1355              
1356             ($tcp_payload or $syn or $fin or $rst)
1357             ? join('', "$seq_num:", $seq_num + $tcp_payload, "($tcp_payload) ")
1358             : '',
1359              
1360             $ack ? "ack $ack_num " : '',
1361             "win $win ",
1362             $urg ? "urg 1\n": "\n",
1363             }
1364             }
1365              
1366             # The following is used both for sanity checks and to store the
1367             # the duration of the trace
1368             #
1369 5000         20791 $Trace{ends} = $Interfaces{$if}{ends} = $timestamp;
1370              
1371             } # end of while( read...)
1372              
1373 5         5761 close INPUT;
1374              
1375 5 100 33     129 close TCPDUMP and
1376             progress "TCP activity stored in text format in $options{tcpdump}\n"
1377             if $options{tcpdump};
1378              
1379 5 50 33     55 carp $Trace{Transport}{TCP}{'Concurrent Segments'},
1380             ' TCP segments had the same timestamp with another segment'
1381             if $Trace{Transport}{TCP}{'Concurrent Segments'} and wantarray;
1382              
1383             # Since we keep track of statistics on a per-interface basis, we
1384             # need to copy the data to %Trace for backwards compatibility.
1385             #
1386 5         27 my @interfaces = get_interfaces_list;
1387 5         23 $Trace{interfaces} = scalar @interfaces;
1388              
1389 5         10 my $total_records = 0;
1390              
1391 5         19 foreach my $if ( @interfaces ) {
1392              
1393 10         27 $total_records += $Interfaces{$if}{records};
1394              
1395 9     9   63 no warnings qw(uninitialized);
  9         21  
  9         12159  
1396              
1397 10         12 my @transports = sort keys %{$Interfaces{$if}{Transport}};
  10         74  
1398              
1399 10         25 foreach my $metric ('Packets', 'Bytes') {
1400 20         40 foreach ( @data_points ) {
1401 220         734 $Trace{IP}{$_}{$metric} += $Interfaces{$if}{IP}{$_}{$metric};
1402              
1403 220         278 foreach my $protocol ( @transports ) {
1404 770         2977 $Trace{Transport}{$protocol}{$_}{$metric}
1405             += $Interfaces{$if}{Transport}{$protocol}{$_}{$metric};
1406             }
1407             }
1408             }
1409              
1410             # ACKs
1411             #
1412 10         25 foreach ( 'Total ACKs', 'Cumulative ACKs', 'Pure ACKs', 'Options ACKs' ) {
1413 40         123 $Trace{Transport}{TCP}{$_} += $Interfaces{$if}{Transport}{TCP}{$_};
1414             }
1415              
1416             # Advertised window
1417             #
1418 10         20 foreach ( keys %{$Interfaces{$if}{Transport}{TCP}{rwnd}} ) {
  10         69  
1419 120         347 $Trace{Transport}{TCP}{rwnd}{$_}
1420             += $Interfaces{$if}{Transport}{TCP}{rwnd}{$_};
1421              
1422 120         437 $Trace{Transport}{TCP}{awnd}{$_}
1423             += $Interfaces{$if}{Transport}{TCP}{awnd}{$_};
1424             }
1425              
1426             # SYN and SYN/ACKs
1427             #
1428 10         25 foreach ( keys %{$Interfaces{$if}{Transport}{TCP}{SYN}} ) {
  10         219  
1429 40         116 $Trace{Transport}{TCP}{SYN}{$_}
1430             += $Interfaces{$if}{Transport}{TCP}{SYN}{$_};
1431              
1432 40         189 $Trace{Transport}{TCP}{'SYN/ACK'}{$_}
1433             += $Interfaces{$if}{Transport}{TCP}{'SYN/ACK'}{$_};
1434              
1435 40         171 $Trace{Transport}{TCP}{'SYN/Payload'}
1436             += $Interfaces{$if}{Transport}{TCP}{'SYN/Payload'};
1437             }
1438              
1439             # TCP Options ACKs
1440             #
1441 10         28 while ( my ($k, $v) = each
  40         148  
1442             %{$Interfaces{$if}{Transport}{TCP}{'ACK Option Size'}} ) {
1443 30         80 $Trace{Transport}{TCP}{'ACK Option Size'}{$k} += $v;
1444             }
1445              
1446             # Packet size distribution
1447             #
1448 10         19 while ( my ($k, $v) = each %{$Interfaces{$if}{IP}{'Packet Size'}} ) {
  730         2735  
1449 720         1538 $Trace{IP}{'Packet Size'}{$k} += $v;
1450              
1451 720         1169 foreach ( @transports ) {
1452 2575         8495 $Trace{Transport}{$_}{'Packet Size'}{$k}
1453             += $Interfaces{$if}{Transport}{$_}{'Packet Size'}{$k};
1454             }
1455             }
1456             }
1457              
1458             # Sanity checks
1459             #
1460 5         23 my $total_packets;
1461 5         22 while ( ($_) = each %{$Trace{Transport}} ) {
  25         93  
1462 20         50 $total_packets += $Trace{Transport}{$_}{Total}{Packets};
1463             }
1464              
1465 5 50       32 croak "Total number of packets is not equal to the number of trace records"
1466             unless $Trace{records} == $total_packets;
1467              
1468 5 50       40 croak "The estimated number of records based on the file size does not equal the number of records observed across all interfaces"
1469             unless $total_records == $Trace{records};
1470              
1471 5 100       149 return (\%senders, \%segments) if defined wantarray;
1472             }
1473              
1474             sub progress( $ ) {
1475 17 50   17 0 79 print STDERR shift if $options{Verbosity};
1476             }
1477              
1478             =head2 records_in
1479              
1480             records_in FILENAME
1481              
1482             Estimates the number to records in FILENAME based on its file size.
1483             It returns an integer corresponding to the "expected" number of
1484             records in the trace, or I if the file size does not seem to
1485             correspond to a legitimate TSH trace.
1486              
1487             =cut
1488              
1489             sub records_in( $ ) {
1490 7     7 1 661 my $no_records = (-s shift) / TSH_RECORD_LENGTH;
1491              
1492 7 100       568 $no_records == int $no_records and return $no_records;
1493             }
1494              
1495              
1496             =head2 verbose
1497              
1498             verbose
1499              
1500             As you might expect, this function sets the verbosity level of the
1501             module. By default C remains "silent". Call
1502             verbose() to see trace processing progress indicators on standard
1503             error.
1504              
1505             As of version 0.13, verbose() is equivalent to
1506              
1507             configure(Verbosity => 1);
1508              
1509             =cut
1510              
1511             sub verbose () {
1512              
1513 1     1 1 177 $options{Verbosity} = 1;
1514              
1515             }
1516              
1517             # Utility function to export the information stored in %Trace and
1518             # %Interfaces in CSV format
1519             #
1520             sub write_summary( *$ ; $ ) {
1521 3     3 0 11 my ( $FH, $href, $if ) = @_;
1522              
1523 3 50 33     39 confess "usage: write_summary(FILEHANDLE, HASH_REFERENCE)"
1524             unless ref($FH) eq 'GLOB' and ref($href) eq 'HASH';
1525              
1526             # Prepare to print general trace file information
1527             #
1528 3         165 print $FH <
1529             GENERAL TRACE INFORMATION
1530             Filename,$Trace{filename},$Trace{date}
1531             Duration,$Trace{ends}
1532             Records,$Trace{records}
1533             Interfaces,$Trace{interfaces}
1534             GENERAL_INFO
1535              
1536 3 50       54 print $FH "Link Capacity,$Trace{'Link Capacity'}\n"
1537             if $Trace{'Link Capacity'};
1538              
1539 3 50       22 print $FH 'Duplicate timestamps,',
1540             $Trace{Transport}{TCP}{'Concurrent Segments'}, "\n"
1541             if $Trace{Transport}{TCP}{'Concurrent Segments'};
1542              
1543 3 100       10 if ( defined $if ) {
1544 2         15 print $FH <
1545              
1546             INTERFACE INFORMATION
1547             Interface Number,$if
1548             Duration,$href->{ends}
1549             Records,$href->{records}
1550              
1551             INTERFACE TRAFFIC DENSITY
1552             ,Pkts/s,Bytes/Pkt,b/s
1553             INTERFACE_INFO
1554              
1555 2         34 printf $FH
1556             "IP Total,%.0f,%.0f,%.0f\n",
1557             $href->{IP}{Total}{Packets} / $href->{ends},
1558             $href->{IP}{Total}{Bytes} / $href->{IP}{Total}{Packets},
1559             $href->{IP}{Total}{Bytes} * 8 / $href->{ends};
1560              
1561 2 50       14 if ( $href->{Transport}{TCP}{Total}{Packets}) {
1562 2         18 printf $FH "TCP Total,%.0f,%.0f,%.0f",
1563             $href->{Transport}{TCP}{Total}{Packets} / $href->{ends},
1564             ( $href->{Transport}{TCP}{Total}{Bytes}
1565             / $href->{Transport}{TCP}{Total}{Packets}
1566             ),
1567             ( ( $href->{Transport}{TCP}{Total}{Bytes} * 8 )
1568             / $href->{ends}
1569             );
1570             }
1571             else {
1572 0         0 print $FH "TCP Total,0,0,0";
1573             }
1574             }
1575              
1576 3         14 my @transports = sort keys %{$href->{Transport}};
  3         30  
1577              
1578 3         10 foreach my $metric ('Packets', 'Bytes') {
1579 6         37 print $FH
1580             "\n\nIP STATISTICS (", uc($metric),
1581             ")\n,,Fragmentation,,Explicit Congestion Notification,,",
1582             "Differentiated Services,,,,IP Options\n,",
1583             join( ',', @data_points), "\nIP";
1584              
1585             # Some of the entries in the hashes below are naturally
1586             # uninitialized. For example, a given trace may not have any
1587             # packets the MF bit set. We take advantage of Perl's automatic
1588             # conversion of uninitialized values to 0 (in a scalar/number
1589             # context). However, with warnings on, this may cause a
1590             # considerable number warnings re: uninitialized values possibly
1591             # leading a novice user to believe that something REALLY BAD
1592             # happened, which is not the case. So we disable these particular
1593             # warnings for the rest of the block. This "practice is followed
1594             # in the rest of the code below, as necessary.
1595             #
1596 9     9   62 no warnings qw(uninitialized);
  9         16  
  9         1903  
1597              
1598 6         14 foreach ( @data_points ) {
1599 66         249 printf $FH ",%d", $href->{IP}{$_}{$metric};
1600             }
1601              
1602 6         13 foreach my $protocol ( @transports ) {
1603 22         38 print $FH "\n$protocol";
1604              
1605 22         33 foreach ( @data_points ) {
1606 242         1214 printf $FH ",%d", $href->{Transport}{$protocol}{$_}{$metric};
1607             }
1608             }
1609             }
1610              
1611             # Print distribution of ACKs
1612             #
1613 3 50       16 if ( $href->{Transport}{TCP}{'Total ACKs'} ) {
1614 3         7 print $FH "\n\nTCP ACKNOWLEDGEMENTS\n";
1615              
1616 3         7 foreach ( 'Total ACKs', 'Cumulative ACKs', 'Pure ACKs', 'Options ACKs' ) {
1617 12         54 printf $FH "$_,%d\n", $href->{Transport}{TCP}{$_};
1618             }
1619             }
1620              
1621             # Print the TCP Advertised window distribution
1622             #
1623 3 50       23 if ( $href->{Transport}{TCP}{rwnd} ) {
1624 3         6 print $FH
1625             "\nRECEIVER ADVERTISED WINDOW\nSize (Bytes),Soft Count,Hard Count\n";
1626              
1627 9     9   46 no warnings qw(uninitialized);
  9         16  
  9         1087  
1628              
1629 3         8 foreach ( sort numerically keys %{$href->{Transport}{TCP}{rwnd}} ) {
  3         58  
1630 47         242 printf $FH "%d,%d,%d\n", $_,
1631              
1632             $href->{Transport}{TCP}{rwnd}{$_}
1633             - $href->{Transport}{TCP}{awnd}{$_},
1634              
1635             $href->{Transport}{TCP}{awnd}{$_};
1636             }
1637             }
1638              
1639             # Print the TCP Options-carrying SYN size distribution
1640             #
1641 3 50       18 if ( $href->{Transport}{TCP}{SYN} ) {
1642 3         7 print $FH
1643             "\nTCP OPTIONS NEGOTIATION\n",
1644             'TCP Header Length (Bytes),SYN,SYN/ACK';
1645              
1646 9     9   127 no warnings qw(uninitialized);
  9         16  
  9         1423  
1647              
1648 3         5 foreach ( sort numerically keys %{$href->{Transport}{TCP}{SYN}} ) {
  3         20  
1649 14         81 print $FH "\n$_,",
1650             $href->{Transport}{TCP}{SYN}{$_}
1651             - $href->{Transport}{TCP}{'SYN/ACK'}{$_}, ',',
1652             $href->{Transport}{TCP}{'SYN/ACK'}{$_};
1653             }
1654              
1655 3         13 print $FH "\nSYN/Payload,", $href->{Transport}{TCP}{'SYN/Payload'};
1656             }
1657              
1658             # Print the distribution of ACKs carrying TCP options
1659             #
1660 3 50       13 if ( $href->{Transport}{TCP}{'Options ACKs'}) {
1661 3         90 print $FH "\n\nTCP OPTIONS ACK USAGE\nTCP Header Length (Bytes),Count";
1662              
1663 9     9   50 no warnings qw(uninitialized);
  9         15  
  9         6160  
1664              
1665 3         5 foreach ( sort numerically keys
  3         16  
1666             %{$href->{Transport}{TCP}{'ACK Option Size'}} )
1667             {
1668 10         37 print $FH "\n$_,", $href->{Transport}{TCP}{'ACK Option Size'}{$_};
1669             }
1670             }
1671              
1672             # Print the packet size distribution
1673             #
1674 3         13 print $FH join ',', "\n\nPACKET SIZE DISTRIBUTION\nBytes,IP", @transports;
1675              
1676 3         5 foreach ( sort numerically keys %{$href->{IP}{'Packet Size'}} ) {
  3         71  
1677 270         1102 print $FH "\n$_,$href->{IP}{'Packet Size'}{$_}";
1678              
1679 270         331 foreach my $prt ( @transports ) {
1680 1019         3717 print_value(\*$FH, $href->{Transport}{$prt}{'Packet Size'}{$_});
1681             }
1682             }
1683              
1684 3         31 print $FH "\n";
1685             }
1686              
1687             =head2 write_interface_summaries
1688              
1689             write_interface_summaries
1690             write_interface_summaries FILE_PREFIX
1691              
1692             Writes a CSV summary similar to what write_trace_summary() generates
1693             for each interface in the trace (see L<%Interfaces|"Data
1694             Structures">). Each summary file has a C<.if-X.csv> suffix, where X
1695             is the number of the interface. If FILE_PREFIX is provided,
1696             write_interface_summaries() will append to it this standard suffix
1697             (indicative of the interface).
1698              
1699             =cut
1700              
1701             sub write_interface_summaries( ; $ ) {
1702              
1703 1     1 1 454 foreach my $if ( get_interfaces_list ) {
1704             # Open the interface-specific summary
1705             #
1706 2   33     26 my $if_summary = shift || $Trace{filename};
1707 2         6 $if_summary .= ".if-$if.csv";
1708              
1709 2 50       91672 open(LOG, '>', $if_summary)
1710             or croak "Cannot open interface-specific summary. $!";
1711              
1712 2         36 progress 'Generating interface-specific summary... ';
1713              
1714 2         25 write_summary( \*LOG, $Interfaces{$if}, $if );
1715              
1716 2         182 close LOG;
1717              
1718 2         15 progress "see $if_summary\n";
1719             }
1720              
1721             }
1722              
1723             =head2 write_trace_summary
1724              
1725             write_trace_summary
1726             write_trace_summary FILENAME
1727              
1728             Writes the contents of L<%Trace|"Data Structures"> to FILENAME
1729             in comma separated values (CSV) format, a platform independent text
1730             format, excellent for storing tabular data. CSV is both
1731             human-readable and suitable for further analysis using Perl or direct
1732             import to a spreadsheet application. Although not required, it is
1733             recommended that FILENAME should have a I<.csv> suffix.
1734              
1735             If FILENAME is not specified, write_trace_summary() will create one
1736             for you by appending the suffix I<.csv> to the L
1737             Trace Information"> of the trace being processed.
1738              
1739             If you want FILENAME to contain meaningful data you should call
1740             write_trace_summary() I calling process_trace().
1741              
1742             =cut
1743              
1744             sub write_trace_summary( ; $ ) {
1745              
1746 1 50 33 1 1 828 croak
      33        
1747             'Important trace information was not found. Call process_trace() before ',
1748             "calling write_trace_summary().\nTrace summary generation aborted"
1749             unless ( $Trace{IP}{Total}{Bytes}
1750             and $Trace{IP}{Total}{Packets}
1751             and $Trace{ends}
1752             );
1753            
1754             # Open the log file (expected to be .csv)
1755             #
1756 1   33     9 $Trace{summary} = shift || "$Trace{filename}.csv";
1757              
1758 1 50       158 open(LOG, '>', $Trace{summary})
1759             or croak "Cannot write trace summary to $Trace{summary}. $!";
1760              
1761 1         10 progress 'Generating trace summary... ';
1762              
1763 1         7 write_summary( \*LOG, \%Trace );
1764              
1765 1         363 close LOG;
1766              
1767 1         15 progress "see $Trace{summary}\n";
1768             }
1769              
1770             sub print_value(*$) {
1771 1019     1019 0 1247 my ($fh, $value) = @_;
1772 1019 100       1158 print {$fh} $value ? ",$value" : ',0';
  1019         4900  
1773             }
1774              
1775             # Mandatory: the module must return "true"
1776             #
1777              
1778             1;
1779              
1780             =head1 DEPENDENCIES
1781              
1782             Nothing non-standard: L, L and L.
1783              
1784             =head2 EXPORTS
1785              
1786             None by default.
1787              
1788             =head3 Exportable
1789              
1790             configure() date_of() get_IP_address() get_interfaces_href()
1791             get_interfaces_list() get_trace_summary_href() numerically()
1792             process_trace() records_in() verbose() write_trace_summary()
1793              
1794             In addition, the following export tags are defined:
1795              
1796             =over
1797              
1798             =item :traffic_analysis
1799              
1800             verbose() process_trace() write_interface_summaries()
1801             write_trace_summary()
1802              
1803             =item :trace_information
1804              
1805             date_of() records_in()
1806              
1807             =back
1808              
1809             Finally, all exportable functions can be imported with
1810              
1811             use Net::Traces::TSH qw(:all);
1812              
1813             =head1 VERSION
1814              
1815             This is C version 0.16.
1816              
1817             =head1 SEE ALSO
1818              
1819             The NLANR MOAT Passive Measurement and Analysis (PMA) web site at
1820             http://pma.nlanr.net/PMA provides more details on the process of
1821             collecting packet traces. The site features a set of Perl programs
1822             you can download, including several converters from other packet trace
1823             formats to TSH.
1824              
1825             TSH trace files can be downloaded from the NLANR/PMA trace repository
1826             at http://pma.nlanr.net/Traces . The site contains a variety of
1827             traces gathered from several monitoring points at university campuses
1828             and (Giga)PoPs connected to a variety of large and small networks.
1829              
1830             C version 0.11 was presented in YAPC::NA 2004. The
1831             presentation slides are available at
1832             http://www.cs.stonybrook.edu/~kostas/art/yapc .
1833              
1834             =head2 DiffServ
1835              
1836             If you are not familiar with Differentiated Services (DiffServ), good
1837             starting points are the following RFCs:
1838              
1839             K. Nichols I, I
1840             Field (DS Field) in the IPv4 and IPv6 Headers>, RFC 2474. Available at
1841             http://www.ietf.org/rfc/rfc2474.txt
1842              
1843             S. Blake I, I,
1844             RFC 2475. Available at http://www.ietf.org/rfc/rfc2475.txt
1845              
1846             See also RFC 2597 and RFC 2598.
1847              
1848             =head2 ECN
1849              
1850             If you are not familiar Explicit Congestion Notification (ECN) make
1851             sure to read
1852              
1853             K. K. Ramakrishnan I, I
1854             Notification (ECN) to IP>, RFC 3168. Available at
1855             http://www.ietf.org/rfc/rfc3168.txt
1856              
1857             =head2 The ns2 network simulator
1858              
1859             C can convert TSH traces to binary files suitable to
1860             drive simulations in ns2. More information about ns2 is available at
1861             http://www.isi.edu/nsnam/ns .
1862              
1863             =head1 AUTHOR
1864              
1865             Kostas Pentikousis, kostas AT cpan DOT org.
1866              
1867             =head1 ACKNOWLEDGMENTS
1868              
1869             Professor Hussein Badr provided invaluable guidance while crafting the
1870             main algorithms of this module.
1871              
1872             Many thanks to Wall, Christiansen and Orwant for writing I
1873             Perl 3/e>. It has been indispensable while developing this module.
1874              
1875             =head1 COPYRIGHT AND LICENSE
1876              
1877             Copyright 2003, 2004 by Kostas Pentikousis. All Rights Reserved.
1878              
1879             This library is free software with ABSOLUTELY NO WARRANTY. You can
1880             redistribute it and/or modify it under the same terms as Perl itself.
1881              
1882             =cut
1883              
1884             __DATA__