File Coverage

blib/lib/Bio/NEXUS/Functions.pm

Criterion	Covered	Total	%
statement	87	135	64.4
branch	39	68	57.3
condition	8	21	38.1
subroutine	15	22	68.1
pod			n/a
total	149	246	60.5

line	stmt	bran	cond	sub	time	code
1						#################################################################
2						# Functions.pm - internal functions for reading, parsing, arrays
3						#################################################################
4						# Original version thanks to Tom Hladish
5						#
6						# $Id: Functions.pm,v 1.16 2012/02/07 21:49:27 astoltzfus Exp $
7
8						#################### START POD DOCUMENTATION ##################
9
10						=head1 NAME
11
12						Bio::NEXUS::Functions - Provides private utiliy functions for the module
13
14						=head1 SYNOPSIS
15
16						=head1 DESCRIPTION
17
18						This package provides private functions that are not object-specific.
19
20						=head1 COMMENTS
21
22						=head1 FEEDBACK
23
24						All feedback (bugs, feature enhancements, etc.) is greatly appreciated.
25
26						=head1 AUTHORS
27
28						Original version by Thomas Hladish (tjhladish at yahoo)
29
30						=head1 VERSION
31
32						$Revision: 1.16 $
33
34						=head1 METHODS
35
36						=cut
37
38						package Bio::NEXUS::Functions;
39
40	34			34	181	use strict;
	34				69
	34				2348
41						#use Data::Dumper; # XXX this is not used, might as well not import it!
42						#use Carp; # XXX this is not used, might as well not import it!
43	34			34	22222	use Bio::NEXUS::Util::Exceptions;
	34				178
	34				2462
44	34			34	183	use vars qw(@EXPORT @EXPORT_OK @ISA $VERSION);
	34				54
	34				2297
45	34			34	184	use Bio::NEXUS; $VERSION = $Bio::NEXUS::VERSION;
	34				61
	34				1268
46	34			34	179	use Exporter ();
	34				62
	34				78076
47
48						@ISA = qw ( Exporter );
49						@EXPORT = qw(
50						&_slurp
51						&_parse_nexus_words
52						&_ntsa
53						&_stna
54						&_quote_if_needed
55						&_nexus_formatted
56						&_is_comment
57						&_is_number
58						&_is_dec_number
59						&_sci_to_dec
60						&_unique
61						&_nonunique
62						&_share_elements
63						&_fast_in_array
64						&_in_array
65						&_is_same_array
66						);
67
68						## READING & PARSING FUNCTIONS:
69
70						=begin comment
71
72						Name : _slurp
73						Usage : $file_content = _slurp($filename);
74						Function: reads an entire file into memory
75						Returns : none
76						Args : file name (string)
77
78						=end comment
79
80						=cut
81
82						sub _slurp {
83	69			69	163	my ($filename) = @_;
84	69		33		4169	open my $fh, '<', "$filename"
85						\|\| Bio::NEXUS::Util::Exceptions::FileError->throw(
86						'error' => "ERROR: Could not open filename <$filename> for input; $!"
87						);
88	69				166	my $file_content = do { local ($/); <$fh> };
	69				402
	69				3162
89	69				5537	return $file_content;
90						}
91
92						=begin comment
93
94						Title : _parse_nexus_words
95						Usage : $parsed_words = _parse_nexus_words($buffer);
96						Function: parse a string of text into "words" (as defined in the NEXUS standard)
97						Returns : an array ref of "words" and punctuation marks. Single-quoted expressions are single "words". Double quotes are not supported.
98						Args : text buffer
99						Notes : this method has replaced _parse_string_tokens(), which did not conform to the NEXUS standard in all its quirky splendor (particularly with regard to punctuation)
100
101						=end comment
102
103						=cut
104
105						sub _parse_nexus_words {
106	559			559	947	my $buffer = shift;
107	559	50			1430	if ( not defined $buffer ) {
108	0				0	Bio::NEXUS::Util::Exceptions::BadArgs->throw(
109						'error' => '_parse_nexus_words() requires a text string argument (the text to be parsed)'
110						);
111						}
112	559				802	my @words;
113	559				991	my ( $word, $in_quotes ) = ( q{}, 0 );
114
115	559				26235	my @chars = split( //, $buffer );
116	559				2827	my $comment_level = 0;
117
118						# iterate through the characters
119	559				1803	for ( my $i = 0; $i < @chars; $i++ ) {
120	66989				79804	my $char = $chars[$i];
121	66989				78140	my $next = $chars[ $i + 1 ];
122
123	66989	100			290271	if ($comment_level) { # if we are in a comment already
		100
		100
		100
		100
		100
124	1790	50			3193	$comment_level++ if ( $char eq '[' );
125	1790	100			3675	$comment_level-- if ( $char eq ']' );
126	1790				4103	$word .= $char;
127						}
128
129						# If we see a quote
130						elsif ( $char eq q{'} ) {
131
132						# and we're already inside quotes . . .
133	428	100			708	if ($in_quotes) {
134
135						# check to see if this is an escaped (doubled single) quote,
136						# (unless we're already at the end of the string to be parsed).
137	214	50	66		1172	if ( defined $next && $next eq q{'} ) {
138
139						# If it is, append it to the current word;
140	0				0	$word .= $char;
141						}
142						else {
143
144						# otherwise, close off the quoted string
145	214				344	$in_quotes--;
146
147						# Replace spaces with underscores (according to NEXUS, they're equivalent)
148						#
149						# This may not be correct. Certainly TreeBASE doesn't like it
150						# when we use both quoted strings and underscores in them
151	214				649	$word =~ s/ /_/g;
152
153						# Push it onto the word list, after
154						# dealing with funny apostrophe business
155	214				1200	push @words, _ntsa($word);
156
157						# And clean the slate
158	214				693	$word = q{};
159						}
160						}
161						else {
162
163						# If we weren't in quotes before, we are now
164	214				578	$in_quotes++;
165						}
166						}
167						elsif ($in_quotes) {
168
169						# We're in a quoted string, so anything can be part of the word
170	4669				11093	$word .= $char;
171						}
172						elsif ( $char eq '[' ) { # hit new comment, level 0 (bug if we just finished one)
173	90				120	$comment_level++;
174	90				213	$word .= $char;
175						}
176
177						# If we see NEXUS-style punctuation
178						elsif ( $char =~ /[\[\]\-(){}\/\\,;:=*"`+<>]/ ) {
179
180	5821	100			14275	push @words, &_ntsa($word)
181
182						# $word will be q{} if there was a preceding space;
183						# otherwise, it will contain some string
184						unless $word eq q{};
185
186						# then that counts as a word (we'll deal with pos/neg
187						# numbers later in _rebuild_numbers() if that gets called)
188	5821				9811	push @words, $char;
189	5821				20879	$word = q{};
190						}
191
192						# If we see whitespace
193						elsif ( $char =~ /\s/ ) {
194
195						# then we just finished a [probably] normal, space-delimited word
196	6775	100			14269	push @words, &_ntsa($word)
197
198						unless $word eq q{};
199
200						# although we don't want to keep pushing it
201						# if there are multiple spaces, so we empty $word
202	6775				18248	$word = q{};
203						}
204
205						# If $word isn't quoted, and $char is neither punctuation nor whitespace
206						else {
207	47416				114293	$word .= $char;
208						}
209						}
210
211	559	100			1707	push @words, $word unless $word eq q{};
212	559				9686	return \@words;
213						}
214
215						sub _rebuild_numbers {
216	0			0	0	my $words = shift;
217	0				0	my @new_words;
218
219						# Don't bother checking whether the last word is a '+' or '-'
220	0				0	for ( my $i = 0; $i < ( @$words - 1 ); $i++ ) {
221	0				0	my $word = $words->[$i];
222	0				0	my $next = $words->[ $i + 1 ]; # There will always be a next
223
224						# my $next_next = defined $words[$i +2] ? $words[$i+2] : q{};
225						# There might be a previous
226	0	0			0	my $last = $i == 0 ? undef: $words->[ $i - 1 ];
227
228	0	0	0		0	if ( $word eq '-' \|\| $word eq '+' ) {
229	0	0			0	if ( my ( $num, $exp ) = $next =~ /^([\d.]+)(e)?/i ) {
230	0	0			0	if ( _is_dec_number($num) ) {
231	0				0	$word .= $next;
232	0				0	$i++;
233	0	0			0	if ($exp) {
234
235						}
236						}
237						}
238						}
239						else {
240	0				0	push @new_words, $word;
241						}
242						}
243	0				0	return \@new_words;
244						}
245
246						=begin comment
247
248						Title : _ntsa (nexus to standard apostrophe)
249						Usage : $standard_word = $block->_ntsa($nexus_word);
250						Function: change doubled single quotes to single single quotes (apostrophes)
251						Returns : a standard english word (or phrase)
252						Args : a nexus "word"
253						Notes : See NEXUS definition of "word" for an explanation
254
255						=end comment
256
257						=cut
258
259						sub _ntsa {
260	4796			4796	6172	my $nexus_word = shift;
261	4796				6036	$nexus_word =~ s/[^']''[^']/'/g;
262	4796				10693	return $nexus_word;
263						}
264
265						=begin comment
266
267						Title : _stna (standard to nexus apostrophe)
268						Usage : $nexus_word = $block->_stna($standard_word);
269						Function: change single single quotes (apostrophes) to double single quotes
270						Returns : a nexus "word"
271						Args : a standard english word (or phrase)
272						Notes : See NEXUS definition of "word" for an explanation
273
274						=end comment
275
276						=cut
277
278						sub _stna {
279	1189			1189	1390	my $standard_word = shift;
280	1189				1487	$standard_word =~ s/[^']'[^']/''/g;
281	1189				2927	return $standard_word;
282						}
283
284						=begin comment
285
286						Title : _quote_if_needed
287						Usage : $string = Bio::NEXUS::Block::_quote_if_needed($string);
288						Function: put single quotes around string if it contains spaces or NEXUS punctuation
289						Returns : a string, in single quotes if necessary
290						Args : a string
291
292						=end comment
293
294						=cut
295
296						sub _quote_if_needed {
297	1189			1189	1445	my $nexus_word = shift;
298	1189	100			7440	if ( $nexus_word =~ /[-\s(){}\[\]\/\\,;:=+*<>`'"]/ ) {
299	4				13	return "'$nexus_word'";
300						}
301						else {
302	1185				2679	return $nexus_word;
303						}
304						}
305
306						=begin comment
307
308						Title : _nexus_formatted
309						Usage : $string = Bio::NEXUS::Block::_nexus_formatted($string);
310						Function: escape apostrophes and quote strings as needed for NEXUS output
311						Returns : a string
312						Args : a string
313
314						=end comment
315
316						=cut
317
318						sub _nexus_formatted {
319	1189			1189	1821	my $nexus_word = shift;
320	1189				2217	$nexus_word = _quote_if_needed( _stna($nexus_word) );
321	1189				5515	return $nexus_word;
322						}
323
324						=begin comment
325
326						Name : _is_comment
327						Usage : $boolean = _is_comment($string);
328						Function: tests whether something looks like a comment
329						Returns : boolean
330						Args : string to test
331
332						=end comment
333
334						=cut
335
336						sub _is_comment {
337	2139			2139	2770	my ($string) = @_;
338	2139	100			10034	if ( $string =~ /^\[.*\]$/s ) { return 1 }
	358				1700
339	1781				6020	else { return 0 }
340						}
341
342						=begin comment
343
344						Title : _is_dec_number
345						Usage : if ( _is_dec_number($num) ) { do_something() };
346						Function: verifies that a number is a normal decimal number (e.g. 3 or 9.41)
347						Returns : 1 if $num is a number, otherwise 0
348						Args : a number
349
350						=end comment
351
352						=cut
353
354						sub _is_dec_number {
355	861			861	1165	my ($number) = @_;
356
357	861	50	33		3848	return 0 unless defined $number && length $number;
358
359	861				2781	my $number_regex = qr/^[-+]? # positive or negative
360						(?: \d+ # e.g., 523
361						\| \d*[.]\d+ # 3.14 or .45
362						\| \d+[.]\d* # 212. or 212.0
363						)
364						$/x;
365
366	861	100	66		6475	return 0 unless defined $number && $number =~ $number_regex;
367
368	857				3511	return 1;
369						}
370
371						=begin comment
372
373						Title : _is_number
374						Usage : if ( _is_number($num) ) { do_something() };
375						Function: verifies that a number is of reasonable form (such as 0.4 or 6.1e2.1)
376						Returns : 1 if $num is a number, otherwise 0
377						Args : a number
378
379						=end comment
380
381						=cut
382
383						sub _is_number {
384	761			761	1317	my ($number) = @_;
385
386	761	50	33		3453	return 0 unless defined $number && length $number;
387
388	761				3377	my ( $num, $exp ) = $number =~ /^([^e]+)(?:e([^e]+))?$/i;
389
390	761	50			1616	return 0 unless _is_dec_number($num);
391
392	761	100			1617	return _is_dec_number($exp) if defined $exp;
393
394	757				2402	return 1;
395						}
396
397						=begin comment
398
399						Title : _sci_to_dec
400						Usage : $decimal = _sci_to_dec($scientic_notation);
401						Function: Changes scientific notation to decimal notation
402						Returns : scalar (a number)
403						Args : scalar (a number), possibly in scientific notation
404
405						=end comment
406
407						=cut
408
409						sub _sci_to_dec {
410	88			88	123	my ($sci_num) = @_;
411
412	88				152	$sci_num =~ s/\s//g;
413	88	100			151	return $sci_num if _is_dec_number($sci_num);
414
415	4				20	my ( $num, $exp ) = $sci_num =~ /^ ([^e]+) e ([^e]+) $/ix;
416
417	4	50	33		10	return 0 unless ( _is_dec_number($num) && _is_dec_number($exp) );
418
419	4				27	my $dec_num = $num * ( 10**$exp );
420	4				13	return $dec_num;
421						}
422
423						## ARRAY FUNCTIONS:
424
425						=begin comment
426
427						Name : _any
428						Usage : _any($filename);
429						Function: reads an entire file into memory
430						Returns : none
431						Args : file name (string)
432
433						=end comment
434
435						=cut
436
437						sub _unique {
438	0			0		my (@array) = @_;
439	0					my %seen = ();
440
441						# from perl cookbook. fast, and preserves order
442	0					my @unique = grep { !$seen{$_}++ } @array;
	0
443	0					return @unique;
444						}
445
446						sub _nonunique {
447	0			0		my (@array) = @_;
448	0					my %seen = ();
449	0					my @nonunique = grep { $seen{$_}++ } @array;
	0
450	0					return @nonunique;
451						}
452
453						sub _share_elements {
454	0			0		my ( $array1, $array2 ) = @_;
455	0					for my $element1 (@$array1) {
456	0	0				if ( &in_array( $array2, $element1 ) ) { return 1; }
	0
457						}
458	0					return 0;
459						}
460
461						sub _fast_in_array {
462	0			0		my ( $array, $element ) = @_;
463	0					for (@$array) {
464	0	0				if ( $element eq $_ ) {
465	0					return 1;
466						}
467						}
468	0					return 0;
469						}
470
471						sub _in_array {
472	0			0		my ( $array, $test ) = @_;
473	0					my $match = 0;
474	0					for (@$array) {
475	0	0				$match++ if $_ eq $test;
476						}
477	0					return $match;
478						}
479
480						sub _is_same_array {
481	0			0		my ( $array, $test ) = @_;
482	0	0				return 1 if $array eq $test;
483	0	0				return 0 unless scalar @$array == scalar @$test;
484
485	0					my $astr = join '', sort @$array;
486	0					my $tstr = join '', sort @$test;
487	0	0				return 1 if $astr eq $tstr;
488	0					return 0;
489						}
490
491						1;