File Coverage

GO/Parsers/obo_text_parser.pm

Criterion	Covered	Total	%
statement	284	411	69.1
branch	163	262	62.2
condition	28	53	52.8
subroutine	16	20	80.0
pod	0	13	0.0
total	491	759	64.6

line	stmt	bran	cond	sub	pod	time	code
1							# $Id: obo_text_parser.pm,v 1.52 2010/03/11 22:40:27 cmungall Exp $
2							#
3							#
4							# see also - http://www.geneontology.org
5							# - http://www.godatabase.org/dev
6							#
7							# You may distribute this module under the same terms as perl itself
8
9							package GO::Parsers::obo_text_parser;
10
11							=head1 NAME
12
13							GO::Parsers::obo_text_parser - OBO Flat file parser object
14
15							=head1 SYNOPSIS
16
17							do not use this class directly; use GO::Parser
18
19							=cut
20
21							=head1 DESCRIPTION
22
23
24							=cut
25
26	10			10		66	use Exporter;
	10					22
	10					520
27	10			10		12029	use Text::Balanced qw(extract_quotelike extract_bracketed);
	10					423477
	10					1229
28	10			10		121	use base qw(GO::Parsers::base_parser);
	10					20
	10					4351
29	10			10		5319	use GO::Parsers::ParserEventNames;
	10					27
	10					2815
30
31	10			10		62	use Carp;
	10					15
	10					586
32	10			10		57	use FileHandle;
	10					19
	10					90
33
34	10			10		5192	use strict qw(vars refs);
	10					18
	10					119395
35
36							sub dtd {
37	0			0	0	0	'obo-parser-events.dtd';
38							}
39
40							sub parse_fh {
41	10			10	0	2157	my ($self, $fh) = @_;
42
43	10					110	$self->start_event(OBO);
44	10					739	$self->parse_fh_inner($fh);
45
46	10					160	$self->pop_stack_to_depth(0);
47	10					674	$self->parsed_ontology(1);
48
49							}
50
51							sub parse_fh_inner {
52
53	10			10	0	28	my ($self, $fh) = @_;
54	10					40	my $file = $self->file;
55	10					121	my $litemode = $self->litemode;
56	10					25	my $is_go;
57	10					18	local($_); # latest perl is more strict about modification of $_
58
59	10					131	$self->fire_source_event($file);
60	10					153	$self->start_event(HEADER);
61	10					297	my $stanza_count;
62	10					26	my $in_hdr = 1;
63	10					25	my $is_root = 1; # default
64	10					20	my $namespace_set;
65							my $id;
66	10					82	my $namespace = $self->force_namespace; # default
67	10					58	my $force_namespace = $self->force_namespace;
68	10					79	my $usc = $self->replace_underscore;
69	10					29	my %id_remap_h = ();
70	10					22	my @imports = ();
71
72	10					19	my $is_utf8;
73
74							# temporary hack...
75	10	50				76	if ($ENV{OBO_IDMAP}) {
76	0					0	my @parts = split(/\;/,$ENV{OBO_IDMAP});
77	0					0	foreach (@parts) {
78	0	0				0	if (/(.)=(.)/) {
79	0					0	$id_remap_h{$1} = $2;
80							}
81							}
82							}
83
84	10					23	my $default_id_prefix;
85
86	10					4014	while(<$fh>) {
87	3715					5365	chomp;
88
89	3715	50				7450	if (/^encoding:\s*utf/) {
90	0					0	$is_utf8 = 1;
91							}
92
93	3715	50				7091	if (!$is_utf8) {
94	3715					4720	tr [\200-\377]
95							[\000-\177]; # see 'man perlop', section on tr/
96							# weird ascii characters should be excluded
97	3715					4618	tr/\0-\10//d; # remove weird characters; ascii 0-8
98							# preserve \11 (9 - tab) and \12 (10-linefeed)
99	3715					4267	tr/\13\14//d; # remove weird characters; 11,12
100							# preserve \15 (13 - carriage return)
101	3715					4399	tr/\16-\37//d; # remove 14-31 (all rest before space)
102	3715					4640	tr/\177//d; # remove DEL character
103							}
104
105	3715					4712	s/^\!.*//;
106	3715					4661	s/[^\\]\!.*//;
107							#s/[^\\]\#.*//;
108	3715					6684	s/^\s+//;
109	3715					7602	s/\s+$//;
110	3715	100				7641	next unless $_;
111	3213	0	33			6788	next if ($litemode && $_ !~ /^(\[\|id:\|name:\|is_a:\|relationship:\|namespace:\|is_obsolete:)/ && !$in_hdr);
			33
112	3213	100				23298	if (/^\[(\w+)\]\s(.)/) { # new stanza
		100
		50
113
114							# we are at the beginning of a new stanza
115							# reset everything and make sure everything from
116							# previous stanza is exported
117
118	494					1251	my $stanza = lc($1);
119	494					1091	my $rest = $2;
120	494	100				939	if ($in_hdr) {
121	10					23	$in_hdr = 0;
122	10					195	$self->end_event(HEADER);
123							}
124							else {
125	484	100				1225	if (!$namespace_set) {
126	149	50				302	if (!$namespace) {
127	0	0				0	if ($stanza ne 'instance') {
128							#$self->parse_err("missing namespace for ID: $id");
129							}
130							}
131							else {
132	149					519	$self->event(NAMESPACE, $namespace);
133							}
134							}
135	484	100				12533	$self->event(IS_ROOT,1) if $is_root;
136	484					1850	$is_root = 1; # assume root by default; override if parents found
137	484					702	$namespace_set = 0;
138	484					1674	$self->end_event;
139							}
140	494	100				18205	$is_root = 0 unless $stanza eq 'term';
141	494					1510	$self->start_event($stanza);
142	494					13823	$id = undef;
143	494					2876	$stanza_count++;
144							}
145							elsif ($in_hdr) {
146
147							# we are in the header section
148
149	94	50				424	if (/^([\w\-]+)\:\s(.)/) { # tag-val pair
150	94					472	my ($tag, $val) = ($1,$2);
151	94	100				227	if ($tag eq 'subsetdef') {
152	12	50				40	if ($val =~ /(\S+)\s+(.*)/) {
153	12					20	my $subset_id = $1;
154	12					22	$val = $2;
155	12					26	my ($subset_name, $parts) =
156							extract_qstr($val);
157	0					0	$val =
158							[[ID,$subset_id],
159							[NAME,$subset_name],
160	12					63	map {dbxref($_)} @$parts];
161							}
162							else {
163	0					0	$self->parse_err("subsetdef: expect ID \"NAME\", got: $val");
164							}
165							}
166	94	100				185	if ($tag eq 'synonymtypedef') {
167	18	50				150	if ($val =~ /(\S+)\s+\"(.)\"\s(.*)/) {
168	18					34	my $stname = $1;
169	18					64	my $stdef = $2;
170	18					35	my $scope = $3;
171	18	50				108	$val =
172							[[ID,$stname],
173							[NAME,$stdef],
174							($scope ? ['scope', $scope] : ())];
175
176							}
177							else {
178	0					0	$self->parse_err("synonymtypedef: expect ID \"NAME\", got: $val");
179							}
180							}
181	94	50				190	if ($tag eq 'idspace') {
182	0					0	my ($idspace,$global,@rest) = split(' ',$val);
183	0	0				0	if (!$global) {
184	0					0	$self->parse_err("idspace requires two columns");
185							}
186							$val =
187	0	0				0	[['local',$idspace],
188							['global',$global],
189							(@rest ? [COMMENT,join(' ',@rest)] : ()),
190							];
191							}
192	94	50				169	if ($tag eq 'local-id-mapping') {
193	0	0				0	if ($val =~ /(\S+)\s+(.*)/) {
194							# with a local ID mapping we delay binding
195	0					0	$val =
196							[['local',$1],
197							['to',$2]];
198							}
199							else {
200	0					0	$self->parse_err("id-mapping requires two columns");
201							}
202							}
203	94	50				153	if ($tag eq 'import') {
204	0	0				0	if ($ENV{OBO_FOLLOW_IMPORTS}) {
205	0					0	push(@imports, $val);
206							}
207							else {
208							# handled below
209							#$self->event(import=>$val);
210							}
211							}
212
213	94					296	$self->event($tag=>$val);
214
215							# post-processing
216	94	100				14606	if ($tag eq 'default-namespace') {
217	10	50				47	$namespace = $val
218							unless $namespace;
219							}
220	94	100				216	if ($tag eq 'id-mapping') {
221	13	50				62	if ($val =~ /(\S+)\s+(.*)/) {
222							# bind at parse time
223	13	50				46	if ($id_remap_h{$1}) {
224	0					0	$self->parse_err("remapping $1 to $2");
225							}
226	13					48	$id_remap_h{$1} = $2;
227							}
228							else {
229	0					0	$self->parse_err("id-mapping requires two columns");
230							}
231							}
232	94	50				600	if ($tag eq 'default-id-prefix') {
233	0					0	$default_id_prefix = $val;
234							}
235							}
236							else {
237	0					0	$self->parse_err("illegal header entry: $_");
238							}
239							} # END OF IN-HEADER
240							elsif (/^([\w\-]+)\:\s(.)/) { # tag-val pair
241	2625					7179	my ($tag, $val) = ($1,$2);
242	2625					2758	my $qh;
243	2625					4519	($val, $qh) = extract_quals($val);
244							#$val =~ s/\\//g;
245	2625					4659	my $val2 = $val;
246	2625					3983	$val2 =~ s/\\,/,/g;
247	2625					3433	$val2 =~ s/\\//g;
248	2625	100	66			24969	if ($tag eq ID) {
		100	100
		100	100
		100	33
		100	33
		100	33
		100
		100
		50
		100
		100
		100
		100
		50
		50
		50
		50
249	494	100				1172	if ($id_remap_h{$val}) {
250	6					14	$val = $id_remap_h{$val};
251							}
252	494	100				1446	if ($val !~ /:/) {
253	13	50				42	if ($default_id_prefix) {
254	0					0	$val = "$default_id_prefix:$val";
255							}
256							}
257							}
258							elsif ($tag eq NAME) {
259							# replace underscore in name
260	494					714	$val = $val2;
261	494	50				956	if ($usc) {
262	0					0	$val =~ s/_/$usc/g;
263							}
264							}
265							elsif ($tag eq RELATIONSHIP) {
266	97					413	my ($type, @ids) = split(' ', $val2);
267	97					158	my $id = shift @ids;
268	97	100				256	if ($id_remap_h{$type}) {
269	75					133	$type = $id_remap_h{$type};
270							}
271	97	100				267	if ($type !~ /:/) {
272	22	50				47	if ($default_id_prefix) {
273	0					0	$type = "$default_id_prefix:$type";
274							}
275							}
276	97					385	$val = [[TYPE,$type],[TO,$id]];
277	97					238	push(@$val,map {['additional_argument',$_]} @ids);
	0					0
278							}
279							elsif ($tag eq INTERSECTION_OF \|\| $tag eq UNION_OF) {
280	4					13	my ($type, $id) = split(' ', $val2);
281	4	100				15	if ($id_remap_h{$type}) {
282	2					5	$type = $id_remap_h{$type};
283							}
284	4	50				15	if ($type !~ /:/) {
285	0	0				0	if ($default_id_prefix) {
286	0					0	$type = "$default_id_prefix:$type";
287							}
288							}
289	4	100				10	if (defined $id) {
290	2					11	$val = [[TYPE,$type],[TO,$id]];
291							}
292							else {
293	2					5	$id = $type;
294	2					9	$val = [[TO,$id]];
295							}
296							}
297							elsif ($tag eq INVERSE_OF \|\| $tag eq TRANSITIVE_OVER \|\| $tag eq IS_A) {
298	476	100				1113	if ($id_remap_h{$val}) {
299	3					7	$val = $id_remap_h{$val};
300							}
301	476	100				1429	if ($val !~ /:/) {
302	23	50				62	if ($default_id_prefix) {
303	0					0	$val = "$default_id_prefix:$val";
304							}
305							}
306							}
307							elsif ($tag eq DISJOINT_FROM) {
308	2	50				6	if ($id_remap_h{$val}) {
309	0					0	$val = $id_remap_h{$val};
310							}
311	2	50				30	if ($val !~ /:/) {
312	0	0				0	if ($default_id_prefix) {
313	0					0	$val = "$default_id_prefix:$val";
314							}
315							}
316							}
317							elsif ($tag eq XREF) {
318	118					412	$tag = XREF_ANALOG;
319	118					323	my $dbxref = dbxref($val);
320	118					348	$val = $dbxref->[1];
321							}
322							elsif ($tag eq XREF_ANALOG) {
323	1					5	my $dbxref = dbxref($val);
324	1					3	$val = $dbxref->[1];
325							}
326							elsif ($tag eq XREF_UNKNOWN) {
327	0					0	my $dbxref = dbxref($val);
328	0					0	$val = $dbxref->[1];
329							}
330							elsif ($tag eq PROPERTY_VALUE) {
331	116	50				442	if ($val =~ /^(\S+)\s+(\".*)/) {
332							# first form
333							# property_value: relation "literal value" xsd:datatype
334	0					0	my $type = $1;
335	0					0	my $rest = $2;
336	0					0	my ($to, $datatype) = extract_quotelike($rest);
337	0					0	$to =~ s/^\"//;
338	0					0	$to =~ s/\"$//;
339	0					0	$datatype =~ s/^\s+//;
340	0					0	$val = [[TYPE,$type],
341							[VALUE,$to],
342							[DATATYPE,$datatype]];
343							}
344							else {
345							# second form
346							# property_value: relation ToID
347	116					336	my ($type,$to) = split(' ',$val);
348	116					498	$val = [[TYPE,$type],
349							[TO,$to]];
350							}
351							}
352							elsif ($tag eq NAMESPACE) {
353	338	50				898	if ($force_namespace) {
354							# override whatever namespace was provided
355	0					0	$val = $force_namespace;
356							}
357							else {
358							# do nothing - we will export later
359							}
360	338					436	$namespace_set = $val;
361							}
362							elsif ($tag eq DEF) {
363	43					148	my ($defstr, $parts) =
364							extract_qstr($val);
365	53					125	$val =
366							[[DEFSTR,$defstr],
367	43					177	map {dbxref($_)} @$parts];
368							}
369							elsif ($tag =~ /(\w*)synonym/) {
370	380		100			1652	my $scope = $1 \|\| '';
371	380	100				679	if ($scope) {
372	1					2	$tag = SYNONYM;
373	1	50				5	if ($scope =~ /(\w+)_$/) {
374	1					3	$scope = $1;
375							}
376							else {
377	0					0	$self->parse_err("bad synonym type: $scope");
378	0					0	$scope = '';
379							}
380							}
381	380					755	my ($syn, $parts, $extra_quals) =
382							extract_qstr($val2);
383	380	100				964	if (@$extra_quals) {
384	297					494	$scope = shift @$extra_quals;
385	297					519	$scope = lc($scope);
386	297	100				993	$qh->{synonym_type} = shift @$extra_quals if @$extra_quals;
387							}
388	380	50				795	if ($qh->{scope}) {
389	0	0				0	if ($scope) {
390	0	0				0	if ($scope ne $qh->{scope}) {
391	0					0	$self->parse_err("inconsistent scope: $scope/$qh->{scope}");
392							}
393							else {
394	0					0	$self->parse_err("redundant scope: $scope");
395							}
396							}
397							}
398							else {
399	380					753	$qh->{scope} = $scope;
400							}
401
402	56					121	$val =
403							[[SYNONYM_TEXT,$syn],
404	380					1425	(map {dbxref($_)} @$parts)];
405							}
406							elsif ($tag =~ /formula/) {
407	0					0	my ($formula, $parts, $extra_quals) =
408							extract_qstr($val2);
409	0					0	my $lang = 'CLIF';
410	0	0				0	if (@$extra_quals) {
411	0					0	$lang = shift @$extra_quals;
412							}
413	0					0	$qh->{format} = $lang;
414
415	0					0	$val =
416							[['formula_text',$formula],
417	0					0	(map {dbxref($_)} @$parts)];
418							}
419							elsif ($tag eq 'holds_temporally_between' \|\| # experimental support for obof1.3
420							$tag eq 'holds_atemporally_between' \|\|
421							$tag eq 'holds_on_class_level_between') {
422	0					0	my ($sub, $ob) = split(' ', $val2);
423	0	0				0	if ($id_remap_h{$sub}) {
424	0					0	$sub = $id_remap_h{$sub};
425							}
426	0	0				0	if ($id_remap_h{$ob}) {
427	0					0	$ob = $id_remap_h{$ob};
428							}
429	0					0	$val = [[subject=>$sub],[object=>$ob]];
430							}
431							elsif ($tag eq 'holds_over_chain' \|\| $tag eq 'equivalent_to_chain') { # obof1.3
432	0					0	my @rels = split(' ', $val2);
433	0					0	@rels = map {
434	0					0	my $rel = $_;
435	0	0				0	if ($id_remap_h{$_}) {
436	0					0	$rel = $id_remap_h{$_}
437							}
438	0	0				0	if ($rel !~ /:/) {
439	0	0				0	if ($default_id_prefix) {
440	0					0	$rel = "$default_id_prefix:$rel";
441							}
442							}
443	0					0	$rel;
444							} @rels;
445	0					0	$val = [map {[relation=>$_]} @rels];
	0					0
446							}
447							elsif ($tag =~ /^expand/) {
448	0					0	my ($template, $parts) =
449							extract_qstr($val);
450	0					0	$val = $template;
451							}
452							else {
453	62					106	$val = $val2;
454							# normal tag:val
455							}
456	2625	100	100			13862	if (!ref($val) && $val eq 'true') {
457	29					35	$val = 1;
458							}
459	2625	50	66			8914	if (!ref($val) && $val eq 'false') {
460	0					0	$val = 0;
461							}
462	2625	100				4657	if (%$qh) {
463							# note that if attributes are used for
464							# terminal nodes then we effectively have
465							# to 'push the node down' a level;
466							# eg
467							# x
468							# ==> [is_a=>'x']
469							# x
470							# ==> [is_a=>[[@=>[[t=>v]]],[.=>x]]]
471	390	100				767	my $data = ref $val ? $val : [['.'=>$val]];
472	390					1049	my @quals = map {[$_=>$qh->{$_}]} keys %$qh;
	668					2432
473	390					2200	$self->event($tag=>[['@'=>[@quals]],
474							@$data,
475							]);
476							}
477							else {
478	2235					6800	$self->event($tag=>$val);
479							}
480
481	2625	100	100			417414	if ($tag eq IS_A \|\| $tag eq RELATIONSHIP) {
482	553					797	$is_root = 0;
483							}
484	2625	50	33			6598	if ($tag eq IS_OBSOLETE && $val) {
485	0					0	$is_root = 0;
486							}
487	2625	100				4879	if ($tag eq ID) {
488	494					1512	$id = $val;
489							}
490	2625	100				14983	if ($tag eq NAME) {
491	494	50				1186	if (!$id) {
492	0					0	$self->parse_err("missing id!")
493							}
494							else {
495	494					2246	$self->acc2name_h->{$id} = $val;
496							}
497							}
498							}
499							else {
500	0					0	$self->throw("uh oh: $_");
501							}
502							}
503
504							# duplicated code! check final event
505	10	100				66	if (!$namespace_set) {
506	7	50	33			193	if (!$namespace && $stanza_count) {
507							#$self->parse_err("missing namespace for ID: $id");
508							}
509							else {
510	7					37	$self->event(NAMESPACE, $namespace);
511							}
512							}
513	10	50				864	$self->event(IS_ROOT,1) if $is_root;
514
515	10					33	foreach my $import_file (@imports) {
516	0					0	$import_file = $self->download_file_if_required($import_file);
517	0					0	$self->file($import_file);
518	0					0	$self->pop_stack_to_depth(1);
519							#$self->end_event(HEADER);
520	0					0	my $ifh = FileHandle->new($import_file);
521	0					0	$self->parse_fh_inner($ifh);
522							#$self->pop_stack_to_depth(1);
523	0					0	$ifh->close();
524							}
525
526	10					209	return;
527							}
528
529							# each tag line can have trailing qualifiers in {}s at the end
530							sub extract_quals {
531	2625			2625	0	3520	my $str = shift;
532
533	2625					9290	my %q = ();
534	2625	100				5850	if ($str =~ /(.[^\s])\s+(\{.)\}\s*$/) {
535	10					376	my $return_str = $1;
536	10					28	my $extr = $2;
537	10	50				26	if ($extr) {
538	10					28	my @qparts = split_on_comma($extr);
539	10					24	foreach (@qparts) {
540	10	50				147	if (/(\w+)=\"(.*)\"/) {
		50
		50
541	0					0	$q{$1} = $2;
542							}
543							elsif (/(\w+)=\'(.*)\'/) {
544	0					0	$q{$1} = $2;
545							}
546							elsif (/(\w+)=(\S+)/) { # current 1.2 standard; non-quoted
547	10					54	$q{$1} = $2;
548							}
549							else {
550	0					0	warn("$_ in $str");
551							}
552							}
553							}
554	10					118	return ($return_str, \%q);
555							}
556							else {
557	2615					9292	return ($str, {});
558							}
559							}
560
561							sub extract_qstr {
562	435			435	0	657	my $str = shift;
563
564	435					1426	my ($extr, $rem, $prefix) = extract_quotelike($str);
565	435					43084	my $txt = $extr;
566	435	100				2123	$txt =~ s/^\"// if $txt;
567	435	100				2857	$txt =~ s/\"$// if $txt;
568	435	50				1113	if ($prefix) {
569	0					0	warn("illegal prefix: $prefix in: $str");
570							}
571
572	435					606	my @extra = ();
573
574							# synonyms can have two words following quoted part
575							# before dbxref section
576							# - two
577	435	100				1852	if ($rem =~ /(\w+)\s+(\w+)\s+(\[.*)/) {
		100
578	278					617	$rem = $3;
579	278					762	push(@extra,$1,$2);
580							}
581							elsif ($rem =~ /(\w+)\s+(\[.*)/) {
582	19					36	$rem = $2;
583	19					45	push(@extra,$1);
584							}
585							else {
586							}
587
588	435					577	my @parts = ();
589	435					1315	while (($extr, $rem, $prefix) = extract_bracketed($rem, '[]')) {
590	788	100				73304	last unless $extr;
591	353					1148	$extr =~ s/^\[//;
592	353					1066	$extr =~ s/\]$//;
593	353	100				1545	push(@parts, $extr) if $extr;
594							}
595							@parts =
596	435					836	map {split_on_comma($_)} @parts;
	95					248
597
598	435	100				1100	$txt =~ s/\\//g if $txt;
599	435					1620	return ($txt, \@parts, \@extra);
600							}
601
602							sub split_on_comma {
603	105			105	0	167	my $str = shift;
604	105					192	my @parts = ();
605	105					422	while ($str =~ /(.[^\\],\s)(.*)/) {
606	14					39	$str = $1;
607	14					27	my $part = $2;
608	14					42	unshift(@parts, $part);
609	14					106	$str =~ s/,\s*$//;
610							}
611	105					243	unshift(@parts, $str);
612	105					257	return map {s/\\//g;$_} @parts;
	119					219
	119					479
613							}
614
615							# turns a DB:ACC string into an obo-xml dbxref element
616							sub dbxref {
617	228			228	0	314	my $str = shift;
618	228					388	$str =~ s/\\//g;
619	228					235	my $name;
620	228	50				574	if ($str =~ /(.)\s+\"(.)\"$/) {
621	0					0	$str = $1;
622	0					0	$name = $2;
623							}
624	228					877	my ($db, @rest) = split(/:/, $str);
625	228					524	my $acc = join(':',@rest);
626	228					677	$db =~ s/^\s+//;
627	228	50	33			751	if ($db eq 'http' && $acc =~ /^\/\//) {
628							# dbxref is actually a URI
629	0					0	$db = 'URL';
630	0					0	$acc = simple_escape($acc);
631	0					0	$acc =~ s/\s/\%20/g;
632	0					0	$acc = "http:$acc";
633							}
634							else {
635							# $db=escape($db);
636							# $acc=escape($acc);
637							}
638	228					422	$db =~ s/\s+/_/g; # HumanDO.obo has spaces in xref
639	228					371	$acc =~ s/\s+/_/g;
640	228	50				490	$db = 'NULL' unless $db;
641	228	50				487	$acc = 'NULL' unless $acc;
642	228	50				1846	[DBXREF,[[ACC,$acc],
643							[DBNAME,$db],
644							defined $name ? [NAME,$name] : ()
645							]];
646							}
647
648							sub parse_term_expression {
649	4			4	0	12735	my $self = shift;
650	4					8	my $expr = shift;
651	4					18	my ($te,$rest) = $self->parse_term_expression_with_rest($expr);
652	4	50				14	if ($rest) {
653	0					0	$self->parse_err("trailing: $rest");
654							}
655	4					44	return Data::Stag->nodify($te);
656							}
657
658							sub parse_term_expression_with_rest {
659	15			15	0	20	my $self = shift;
660	15					20	my $expr = shift;
661	15	100				99	if ($expr =~ /^\((.*)/) {
		100
		50
662	1					8	my $genus_expr = $1;
663	1					5	my ($genus,$diff_expr) = $self->parse_term_expression_with_rest($genus_expr);
664	1					5	my $next_c = substr($diff_expr,0,1,'');
665	1	50				5	if ($next_c eq ')') {
666	1					5	my ($diffs,$rest) = $self->parse_differentia_with_rest($diff_expr);
667	1					5	my $stag = [intersection=>[
668							[link=>[[to=>[$genus]]]],
669							@$diffs]];
670	1					5	return ($stag,$rest);
671
672							}
673							else {
674	0					0	$self->parse_err("expected ) at end of genus. Got: $next_c followed by $diff_expr");
675							}
676							}
677							elsif ($expr =~ /^([\w\:\.\-]+)\^(.*)/) {
678	7					21	my $genus = $1;
679	7					19	my $diff_expr = $2;
680	7					22	my ($diffs,$rest) = $self->parse_differentia_with_rest($diff_expr);
681	7					30	my $stag = [intersection=>[
682							[link=>[[to=>$genus]]],
683							@$diffs]];
684	7					24	return ($stag,$rest);
685							}
686							elsif ($expr =~ /^([\w\:\.\-]+)(.*)/) {
687	7					28	return ($1,$2);
688							}
689							else {
690	0					0	$self->parse_err("could not parse: $expr");
691							}
692							}
693
694							sub parse_differentia {
695	0			0	0	0	my $self = shift;
696	0					0	my $expr = shift;
697	0					0	my ($diffs,$rest) = $self->parse_differentia_with_rest($expr);
698	0	0				0	if ($rest) {
699	0					0	$self->parse_err("trailing: $rest");
700							}
701
702	0					0	Data::Stag->nodify($_) foreach @$diffs;
703	0					0	return $diffs;
704							}
705
706							sub parse_differentia_with_rest {
707	10			10	0	16	my $self = shift;
708	10					20	my $expr = shift;
709	10	50				42	if ($expr =~ /^(.+?)\((.*)/) {
710	10					20	my $rel = $1;
711	10					17	my $term_expr = $2;
712	10					28	my ($term,$rest) = $self->parse_term_expression_with_rest($term_expr);
713	10	100				56	my $diff = [link=>[[type=>$rel],
714							[to=>(ref($term) ? [$term] : $term)]]];
715	10	50				24	if ($rest) {
716	10					25	my $next_c = substr($rest,0,1,'');
717	10	50				24	if ($next_c eq ')') {
718	10					19	$next_c = substr($rest,0,1);
719	10	100	66			64	if ($next_c eq '^' \|\| $next_c eq ',') {
		100
		50
720	2					12	my ($next_diffs,$next_rest) = $self->parse_differentia_with_rest(substr($rest,1));
721	2	50				8	if (!$next_diffs) {
722	0					0	$self->parse_err("problem parsing differentia: $rest. Expr: $term_expr");
723	0					0	return ([$diff],$rest);
724							}
725	2					12	return ([$diff,@$next_diffs],$next_rest);
726							}
727							elsif ($next_c eq '') {
728	4					16	return ([$diff],$rest);
729							}
730							elsif ($next_c eq ')') {
731	4					16	return ([$diff],$rest);
732							}
733							else {
734	0						$self->parse_err("expected ^ or ) in differentium. Got: $next_c followed_by: $rest. Expr: $term_expr");
735							}
736							}
737							else {
738	0						$self->parse_err("expected ) to close differentium. Got: $next_c followed by: $rest. Expr: $term_expr");
739							}
740							}
741							else {
742	0						$self->parse_err("expected ). Got: \"\". Expr: $term_expr");
743							}
744							}
745							else {
746	0						$self->parse_err("expect relation(...). Got: $expr. ");
747							}
748							}
749
750							# lifted from CGI::Util
751
752							our $EBCDIC = "\t" ne "\011";
753							# (ord('^') == 95) for codepage 1047 as on os390, vmesa
754							our @E2A = (
755							0, 1, 2, 3,156, 9,134,127,151,141,142, 11, 12, 13, 14, 15,
756							16, 17, 18, 19,157, 10, 8,135, 24, 25,146,143, 28, 29, 30, 31,
757							128,129,130,131,132,133, 23, 27,136,137,138,139,140, 5, 6, 7,
758							144,145, 22,147,148,149,150, 4,152,153,154,155, 20, 21,158, 26,
759							32,160,226,228,224,225,227,229,231,241,162, 46, 60, 40, 43,124,
760							38,233,234,235,232,237,238,239,236,223, 33, 36, 42, 41, 59, 94,
761							45, 47,194,196,192,193,195,197,199,209,166, 44, 37, 95, 62, 63,
762							248,201,202,203,200,205,206,207,204, 96, 58, 35, 64, 39, 61, 34,
763							216, 97, 98, 99,100,101,102,103,104,105,171,187,240,253,254,177,
764							176,106,107,108,109,110,111,112,113,114,170,186,230,184,198,164,
765							181,126,115,116,117,118,119,120,121,122,161,191,208, 91,222,174,
766							172,163,165,183,169,167,182,188,189,190,221,168,175, 93,180,215,
767							123, 65, 66, 67, 68, 69, 70, 71, 72, 73,173,244,246,242,243,245,
768							125, 74, 75, 76, 77, 78, 79, 80, 81, 82,185,251,252,249,250,255,
769							92,247, 83, 84, 85, 86, 87, 88, 89, 90,178,212,214,210,211,213,
770							48, 49, 50, 51, 52, 53, 54, 55, 56, 57,179,219,220,217,218,159
771							);
772
773							sub escape {
774	0	0	0	0	0		shift() if @_ > 1 and ( ref($_[0]) \|\| (defined $_[1] && $_[0] eq $CGI::DefaultClass));
			0
775	0						my $toencode = shift;
776	0	0					return undef unless defined($toencode);
777							# force bytes while preserving backward compatibility -- dankogai
778	0						$toencode = pack("C", unpack("C", $toencode));
779	0	0					if ($EBCDIC) {
780	0						$toencode=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",$E2A[ord($1)])/eg;
	0
781							} else {
782	0						$toencode=~s/([^a-zA-Z0-9_.-])/uc sprintf("%%%02x",ord($1))/eg;
	0
783							}
784	0						return $toencode;
785							}
786
787							sub simple_escape {
788	0	0		0	0		return unless defined(my $toencode = shift);
789	0						$toencode =~ s{&}{&}gso;
790	0						$toencode =~ s{<}{<}gso;
791	0						$toencode =~ s{>}{>}gso;
792	0						$toencode =~ s{\"}{"}gso;
793							# Doesn't work. Can't work. forget it.
794							# $toencode =~ s{\x8b}{‹}gso;
795							# $toencode =~ s{\x9b}{›}gso;
796	0						$toencode;
797							}
798
799
800
801							1;