File Coverage

blib/lib/OBO/Parser/OBOParser.pm
Criterion Covered Total %
statement 458 628 72.9
branch 268 432 62.0
condition 42 66 63.6
subroutine 18 18 100.0
pod 1 2 50.0
total 787 1146 68.6


line stmt bran cond sub pod time code
1             # $Id: OBOParser.pm 2014-11-14 erick.antezana $
2             #
3             # Module : OBOParser.pm
4             # Purpose : Parse OBO-formatted files.
5             # License : Copyright (c) 2006-2014 by Erick Antezana. All rights reserved.
6             # This program is free software; you can redistribute it and/or
7             # modify it under the same terms as Perl itself.
8             # Contact : Erick Antezana
9             #
10             package OBO::Parser::OBOParser;
11              
12 7     7   81284 use OBO::Core::Dbxref;
  7         12  
  7         217  
13 7     7   2514 use OBO::Core::Instance;
  7         14  
  7         230  
14 7     7   5063 use OBO::Core::Ontology;
  7         23  
  7         298  
15 7     7   122 use OBO::Core::Relationship;
  7         732  
  7         983  
16 7     7   3509 use OBO::Core::RelationshipType;
  7         19  
  7         227  
17 7     7   3032 use OBO::Core::SubsetDef;
  7         19  
  7         191  
18 7     7   2237 use OBO::Core::SynonymTypeDef;
  7         12  
  7         188  
19 7     7   1733 use OBO::Core::Term;
  7         15  
  7         175  
20 7     7   95 use OBO::Util::IDspaceSet;
  7         15  
  7         145  
21 7     7   37 use OBO::Util::Set;
  7         9  
  7         135  
22 7     7   2623 use OBO::XO::OBO_ID;
  7         16  
  7         193  
23              
24 7     7   38 use Carp;
  7         11  
  7         461  
25 7     7   3311 use Date::Manip qw(ParseDate UnixDate);
  7         844366  
  7         596  
26 7     7   71 use strict;
  7         10  
  7         221  
27 7     7   29 use warnings;
  7         12  
  7         249  
28              
29 7     7   28 use open qw(:std :utf8); # Make All I/O Default to UTF-8
  7         9  
  7         52  
30              
31             $Carp::Verbose = 1;
32              
33             sub new {
34 4     4 0 56 my $class = shift;
35 4         10 my $self = {};
36            
37 4         11 $self->{OBO_FILE} = undef; # required, (1)
38            
39 4         13 bless ($self, $class);
40 4         11 return $self;
41             }
42              
43             =head2 work
44              
45             Usage - $OBOParser->work($obo_file_path)
46             Returns - the parsed OBO ontology
47             Args - the OBO file to be parsed
48             Function - parses an OBO file
49            
50             =cut
51              
52             sub work {
53 9     9 1 1770 my $self = shift;
54 9 50       38 if (defined $_[0]) {
55 9         46 $self->{OBO_FILE} = shift;
56             } else {
57 0         0 croak 'You have to provide an OBO file as input';
58             }
59            
60 9 50       521 open (OBO_FILE, $self->{OBO_FILE}) || croak 'The OBO file (', $self->{OBO_FILE}, ') cannot be opened: ', $!;
61            
62 9         41 $/ = ""; # one paragraph at the time
63 9         3754 chomp(my @chunks = );
64 9         109 chomp(@chunks);
65 9         135 close OBO_FILE;
66              
67             #
68             # Treat OBO file header tags
69             #
70 9         20 my $file_line_number = 0;
71 9 50 33     143 if (defined $chunks[0] && $chunks[0] =~ /^format-version:\s*(.*)/) {
72              
73 9         109 my @header = split (/\n/, $chunks[0]);
74 9         23 $file_line_number = $#header + 2; # amount of lines in the header
75 9         45 $chunks[0] = join("\n", @header);
76              
77             #
78             # format-version
79             #
80 9         13 my $format_version;
81 9 50       113 if ($chunks[0] =~ /(^format-version:\s*(.*)\n?)/m) { # required tag
82 9         34 $format_version = $2;
83 9         312 $chunks[0] =~ s/$1//;
84             }
85              
86             #
87             # data_version
88             #
89 9         18 my $data_version;
90 9 100       70 if ($chunks[0] =~ /(^data-version:\s*(.*)\n?)/m) {
91 1         3 $data_version = $2;
92 1         20 $chunks[0] =~ s/$1//;
93             }
94            
95             #
96             # ontology
97             #
98 9         14 my $ontology_id_space;
99 9 100       46 if ($chunks[0] =~ /(^ontology:\s*(.*)\n?)/m) { # as of OBO spec 1.4
100 1         4 $ontology_id_space = $2;
101 1         29 $chunks[0] =~ s/$1//;
102             }
103            
104             #
105             # date
106             #
107 9         14 my $date;
108 9 50       57 if ($chunks[0] =~ /(^date:\s*(.*)\n?)/m) {
109 9         19 $date = $2;
110 9         127 $chunks[0] =~ s/$1//;
111             }
112            
113             #
114             # saved_by
115             #
116 9         19 my $saved_by;
117 9 100       168 if ($chunks[0] =~ /(^saved-by:\s*(.*)\n?)/m) {
118 7         21 $saved_by = $2;
119 7         86 $chunks[0] =~ s/$1//;
120             }
121              
122             #
123             # auto_generated_by
124             #
125 9         15 my $auto_generated_by;
126 9 100       51 if ($chunks[0] =~ /(^auto-generated-by:\s*(.*)\n?)/m) {
127 8         15 $auto_generated_by = $2;
128 8         118 $chunks[0] =~ s/$1//;
129             }
130              
131             #
132             # imports
133             #
134 9         63 my $imports = OBO::Util::Set->new();
135 9         48 while ($chunks[0] =~ /(^import:\s*(.*)\n?)/m) {
136 1         4 $imports->add($2);
137 1         24 $chunks[0] =~ s/$1//;
138             }
139              
140             #
141             # subsetdef
142             #
143 9         102 my $subset_def_map = OBO::Util::SubsetDefMap->new();
144 9         60 while ($chunks[0] =~ /(^subsetdef:\s*(\S+)\s+\"(.*)\"\n?)/m) {
145 19         52 my $line = quotemeta($1);
146 19         80 my $ssd = OBO::Core::SubsetDef->new();
147 19         48 $ssd->name($2);
148 19         121 $ssd->description($3);
149 19         68 $subset_def_map->put($2, $ssd);
150 19         423 $chunks[0] =~ s/${line}//;
151             }
152            
153             #
154             # synonymtypedef
155             #
156 9         60 my $synonym_type_def_set = OBO::Util::SynonymTypeDefSet->new();
157 9         65 while ($chunks[0] =~ /(^synonymtypedef:\s*(\S+)\s+\"(.*)\"(.*)?\n?)/m) {
158 5         14 my $line = quotemeta($1);
159 5         21 my $std = OBO::Core::SynonymTypeDef->new();
160 5         15 $std->name($2);
161 5         12 $std->description($3);
162 5         8 my $sc = $4;
163 5 100 66     42 $std->scope($sc) if (defined $sc && $sc =~s/\s//);
164 5         15 $synonym_type_def_set->add($std);
165 5         96 $chunks[0] =~ s/${line}//;
166             }
167            
168             #
169             # idspace
170             #
171 9         54 my $idspaces = OBO::Util::IDspaceSet->new();
172 9         48 while ($chunks[0] =~ /(^idspace:\s*(\S+)\s*(\S+)\s+(\"(.*)\")?\n?)/m) {
173 3         14 my $line = quotemeta($1);
174 3         14 my $new_idspace = OBO::Core::IDspace->new();
175 3         8 $new_idspace->local_idspace($2);
176 3         8 $new_idspace->uri($3);
177 3         5 my $dc = $5;
178 3 50       10 $new_idspace->description($dc) if (defined $dc);
179 3         10 $idspaces->add($new_idspace);
180 3         65 $chunks[0] =~ s/${line}//;
181             }
182            
183             #
184             # default-relationship-id-prefix
185             # e.g. default-relationship-id-prefix: OBO_REL
186             #
187 9         13 my $default_relationship_id_prefix;
188 9 100       48 if ($chunks[0] =~ /(^default_relationship_id_prefix:\s*(.*)\n?)/m) {
189 1         2 $default_relationship_id_prefix = $2;
190 1         20 $chunks[0] =~ s/$1//;
191             }
192            
193             #
194             # default-namespace
195             #
196 9         12 my $default_namespace;
197 9 50       55 if ($chunks[0] =~ /(^default-namespace:\s*(.*)\n?)/m) {
198 9         17 $default_namespace = $2;
199 9         108 $chunks[0] =~ s/$1//;
200             }
201            
202             #
203             # remark
204             #
205 9         32 my $remarks = OBO::Util::Set->new();
206 9         85 while ($chunks[0] =~ /(^remark:\s*(.*)\n?)/m) {
207 6         26 my $line = quotemeta($1);
208 6         17 $remarks->add($2);
209 6         127 $chunks[0] =~ s/${line}//;
210             }
211              
212 9 50       27 if (!defined $format_version) {
213 0         0 croak "The OBO file '", $self->{OBO_FILE},"' does not have a correct header, please verify it.";
214             }
215            
216             #
217             # treat-xrefs-as-equivalent
218             #
219 9         29 my $treat_xrefs_as_equivalent = OBO::Util::Set->new();
220 9         35 while ($chunks[0] =~ /(^treat-xrefs-as-equivalent:\s*(.*)\n?)/m) {
221 2         5 $treat_xrefs_as_equivalent->add($2);
222 2         27 $chunks[0] =~ s/$1//;
223             }
224            
225             #
226             # treat-xrefs-as-is_a
227             #
228 9         27 my $treat_xrefs_as_is_a = OBO::Util::Set->new();
229 9         30 while ($chunks[0] =~ /(^treat-xrefs-as-is_a:\s*(.*)\n?)/m) {
230 2         4 $treat_xrefs_as_is_a->add($2);
231 2         21 $chunks[0] =~ s/$1//;
232             }
233            
234             #
235             # store the values in header tags
236             #
237 9         65 my $result = OBO::Core::Ontology->new();
238            
239 9 100       30 $result->data_version($data_version) if ($data_version);
240 9 100       28 $result->id($ontology_id_space) if ($ontology_id_space);
241 9 50       47 $result->date($date) if ($date);
242 9 100       38 $result->saved_by($saved_by) if ($saved_by);
243             #$result->auto_generated_by($auto_generated_by) if ($auto_generated_by);
244 9         28 $result->subset_def_map($subset_def_map);
245 9         26 $result->imports($imports->get_set());
246 9         43 $result->synonym_type_def_set($synonym_type_def_set->get_set());
247 9         33 $result->idspaces($idspaces->get_set());
248 9 100       48 $result->default_relationship_id_prefix($default_relationship_id_prefix) if ($default_relationship_id_prefix);
249 9 50       50 $result->default_namespace($default_namespace) if ($default_namespace);
250 9         21 $result->remarks($remarks->get_set());
251 9         132 $result->treat_xrefs_as_equivalent($treat_xrefs_as_equivalent->get_set());
252 9         28 $result->treat_xrefs_as_is_a($treat_xrefs_as_is_a->get_set());
253              
254 9 50       24 if ($chunks[0]) {
255 0         0 print STDERR "The following line(s) has been ignored from the header:\n", $chunks[0], "\n";
256             }
257            
258             #
259             # Keep log's
260             #
261 9         14 my %used_subset; # of the used subsets to pin point nonused subsets defined in the header (subsetdef's)
262            
263             my %used_synonym_type_def; # of the used synonymtypedef to pin point nonused synonymtypedef's defined in the header (synonymtypedef's)
264            
265             #
266             # Regexps
267             #
268             #my $r_db_acc = qr/([ \*\.\w-]*):([ '\#~\w:\\\+\?\{\}\$\/\(\)\[\]\.=&!%_,-]*)/o;
269 9         43 my $r_db_acc = qr/\s+(\w+:\w+)/o;
270 9         29 my $r_dbxref = qr/\s+(\[.*\])/o;
271 9         22 my $syn_scope = qr/(\s+(EXACT|BROAD|NARROW|RELATED))?/o;
272 9         24 my $r_true_false = qr/\s*(true|false)/o;
273 9         23 my $r_comments = qr/\s*(\!\s*(.*))?/o;
274            
275 9         17 my $intersection_of_counter = 0;
276 9         11 my $union_of_counter = 0;
277            
278 9         88 my %allowed_data_types = ( 'xsd:simpleType' => 1, # Indicates any primitive type (abstract)
279             'xsd:string' => 1, # A string
280             'xsd:integer' => 1, # Any integer
281             'xsd:decimal' => 1, # Any real number
282             'xsd:negativeInteger' => 1, # Any negative integer
283             'xsd:positiveInteger' => 1, # Any integer > 0
284             'xsd:nonNegativeInteger' => 1, # Any integer >= 0
285             'xsd:nonPositiveInteger' => 1, # Any integer < 0
286             'xsd:boolean' => 1, # True or false
287             'xsd:date' => 1 # An XML-Schema date
288             );
289            
290 9         18 foreach my $chunk (@chunks) {
291 2151         13686 my @entry = split (/\n/, $chunk);
292 2151         2799 my $stanza = shift @entry;
293            
294 2151 100 100     8756 if ($stanza && $stanza =~ /\[Term\]/) { # treat [Term]'s
    100 100        
    100 66        
    50 33        
295            
296 2025         1605 my $term;
297             #
298             # to check we have zero or at least two intersection_of's and zero or at least two union_of's
299             #
300 2025         1609 $intersection_of_counter = 0;
301 2025         1429 $union_of_counter = 0;
302            
303 2025         1513 $file_line_number++;
304            
305 2025         1516 my $only_one_id_tag_per_entry = 0;
306 2025         1622 my $only_one_name_tag_per_entry = 0;
307            
308 2025         2043 foreach my $line (@entry) {
309 12004         9413 $file_line_number++;
310 12004 100 100     167289 if ($line =~ /^id:\s*(\S+)/) { # get the term id
    100 100        
    100 100        
    100 100        
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    50          
    100          
    100          
    100          
    100          
    50          
    50          
    100          
    50          
    50          
    50          
    100          
    50          
311 2025 50       8646 if ($line =~ /^id:$r_db_acc/) { # Does it follow the "convention"?
312 2025 50       3005 croak "The term with id '", $1, "' has a duplicated 'id' tag in the file '", $self->{OBO_FILE} if ($only_one_id_tag_per_entry);
313 2025         4775 $term = $result->get_term_by_id($1); # does this term is already in the ontology?
314 2025 100 33     3847 if (!defined $term){
    50          
315 1621         3813 $term = OBO::Core::Term->new(); # if not, create a new term
316 1621         3032 $term->id($1);
317 1621         3493 $result->add_term($term); # add it to the ontology
318 1621         2643 $only_one_id_tag_per_entry = 1;
319             } elsif (defined $term->def()->text() && $term->def()->text() ne '') {
320             # The term is already in the ontology since it has a definition! (maybe empty?)
321 0         0 croak "The term with id '", $1, "' is duplicated in the OBO file.";
322             }
323             } else {
324 0         0 carp "The term with id '", $1, "' does NOT follow the ID convention: 'IDSPACE:UNIQUE_IDENTIFIER', e.g. GO:1234567";
325             }
326             } elsif ($line =~ /^is_anonymous:$r_true_false/) {
327 2 50       9 $term->is_anonymous(($1 eq 'true')?1:0);
328             } elsif ($line =~ /^name:\s*(.*)/) {
329 2023 50       3169 carp "The term with id '", $1, "' has a duplicated 'name' tag in the file '", $self->{OBO_FILE} if ($only_one_name_tag_per_entry);
330 2023 50       3281 if (!defined $1) {
331 0         0 warn "The term with id '", $term->id(), "' has no name in file '", $self->{OBO_FILE}, "'";
332             } else {
333 2023         3858 $term->name($1);
334 2023         2708 $only_one_name_tag_per_entry = 1;
335             }
336             } elsif ($line =~ /^namespace:\s*(.*)/) {
337 13         55 $term->namespace($1); # it is a Set
338             } elsif ($line =~ /^alt_id:$r_db_acc/) {
339 9         32 $term->alt_id($1);
340             } elsif ($line =~ /^def:\s*\"(.*)\"$r_dbxref/) { # fill the definition
341 1337         3343 my $def = OBO::Core::Def->new();
342 1337         2502 $def->text($1);
343 1337         2554 $def->dbxref_set_as_string($2);
344 1337         3460 $term->def($def);
345             } elsif ($line =~ /^comment:\s*(.*)/) {
346 193         525 $term->comment($1);
347             } elsif ($line =~ /^subset:\s*(\S+)/) {
348 34         70 my $ss = $1;
349 34 50       99 if ($result->subset_def_map()->contains_key($ss)) {
350 34         93 $term->subset($ss); # it is a Set (i.e. added to a Set)
351            
352 34         104 $used_subset{$ss}++; # check subsets usage
353             } else {
354 0         0 croak "The subset '", $ss, "' is not defined in the header! Check your OBO file line '", $file_line_number, "'";
355             }
356             } elsif ($line =~ /^(exact|narrow|broad|related)_synonym:\s*\"(.*)\"$r_dbxref/) { # OBO spec 1.1
357 573         2272 $term->synonym_as_string($2, $3, uc($1));
358             } elsif ($line =~ /^synonym:\s*\"(.*)\"$syn_scope(\s+([-\w]+))?$r_dbxref/) {
359 1535 100       4058 my $scope = (defined $3)?$3:'RELATED';
360             # As of OBO flat file spec v1.2, we use:
361             # synonym: "endomitosis" EXACT []
362 1535 100       3006 if (defined $5) { # if a 'synonym type name' is given
363 5         7 my $found = 0; # check that the 'synonym type name' was defined in the header!
364 5         15 foreach my $st ($result->synonym_type_def_set()->get_set()) {
365 7 100       19 if ($st->name() eq $5) {
366 5 100       15 if (!defined $3) { # if no scope is given, use the one defined in the header!
367 1         3 my $default_scope = $st->scope();
368 1 50       4 $scope = $default_scope if (defined $default_scope);
369             }
370 5         7 $found = 1;
371 5         6 last;
372             }
373             }
374 5 50       12 croak 'The synonym type name (', $5,') used in line ', $file_line_number, " in the file '", $self->{OBO_FILE}, "' was not defined" if (!$found);
375 5         14 $used_synonym_type_def{$5}++; # check synonymtypedef usage
376             }
377 1535         3822 $term->synonym_as_string($1, $6, $scope, $5);
378            
379             } elsif ($line =~ /^xref:\s*(.*)/ || $line =~ /^xref_analog:\s*(.*)/ || $line =~ /^xref_unknown:\s*(.*)/) {
380 1161         2848 $term->xref_set_as_string($1);
381             } elsif ($line =~ /^is_a:$r_db_acc$r_comments/) { # The comment is ignored here but generated later internally
382 2149         4419 my $t_id = $term->id();
383 2149 50       5369 if ($t_id eq $1) {
384 0         0 warn "The term '", $t_id, "' has a reflexive is_a relationship, which was ignored!";
385 0         0 next;
386             }
387 2149         5420 my $rel = OBO::Core::Relationship->new();
388 2149         6930 $rel->id($t_id.'_is_a_'.$1);
389 2149         3876 $rel->type('is_a');
390 2149         5040 my $target = $result->get_term_by_id($1); # Is this term already in the ontology?
391 2149 100       3751 if (!defined $target) {
392 264         636 $target = OBO::Core::Term->new(); # if not, create a new term
393 264         559 $target->id($1);
394 264         565 $result->add_term($target);
395             }
396 2149         4461 $rel->link($term, $target);
397 2149         4311 $result->add_relationship($rel);
398             } elsif ($line =~ /^intersection_of:\s*([\w\/]+)?$r_db_acc$r_comments/) {
399             # TODO Improve the 'intersection_of' treatment
400 2         11 my $rel = OBO::Core::Relationship->new();
401 2   100     10 my $r = $1 || 'nil';
402 2         5 my $id = $term->id().'_'.$r.'_'.$2;
403 2         6 $id =~ s/\s+/_/g;
404 2         5 $rel->id($id);
405 2         3 $rel->type($r);
406 2         5 my $target = $result->get_term_by_id($2); # Is this term already in the ontology?
407 2 100       4 if (!defined $target) {
408 1         3 $target = OBO::Core::Term->new(); # if not, create a new term
409 1         3 $target->id($2);
410 1         25 $result->add_term($target);
411             }
412 2         7 $rel->head($target);
413 2         6 $term->intersection_of($rel);
414 2         5 $intersection_of_counter++;
415             } elsif ($line =~ /^union_of:$r_db_acc$r_comments/) {
416             # TODO wait until the OBO spec 1.4 be released
417 0         0 my $target = $result->get_term_by_id($1); # Is this term already in the ontology?
418 0 0       0 if (!defined $target) {
419 0         0 $target = OBO::Core::Term->new(); # if not, create a new term
420 0         0 $target->id($1);
421 0         0 $result->add_term($target);
422             }
423 0         0 $term->union_of($1);
424 0         0 $union_of_counter++;
425             } elsif ($line =~ /^disjoint_from:$r_db_acc$r_comments/) {
426 15         58 $term->disjoint_from($1); # We are assuming that the other term exists or will exist; otherwise , we have to create it like in the is_a section.
427             } elsif ($line =~ /^relationship:\s*([\w\/]+)$r_db_acc$r_comments/ || $line =~ /^relationship:\s*$r_db_acc$r_db_acc$r_comments/) {
428 898         2495 my $rel = OBO::Core::Relationship->new();
429 898         1754 my $id = $term->id().'_'.$1.'_'.$2; # TODO: I have to standarise the id's: term_id1_db:acc_term_id2
430 898         2351 $id =~ s/\s+/_/g;
431 898         1749 $rel->id($id);
432 898         1475 $rel->type($1);
433             #warn "TYPE : '", $id, "'";
434 898         2012 my $target = $result->get_term_by_id($2); # Is this term already in the ontology?
435 898 100       1628 if (!defined $target) {
436 144         360 $target = OBO::Core::Term->new(); # if not, create a new term
437 144         325 $target->id($2);
438 144         317 $result->add_term($target);
439             }
440 898         1866 $rel->link($term, $target);
441 898         1636 $result->add_relationship($rel);
442             } elsif ($line =~ /^created_by:\s*(.*)/) {
443 4         15 $term->created_by($1);
444             } elsif ($line =~ /^creation_date:\s*(.*)/) {
445 4         8 my $d = $1;
446 4         22 my $pd = ParseDate($d); # Check that the date follows the ISO 8601 format
447 4 50       74810 if (!$pd) {
448 0         0 warn "Bad date string: '", $d, "' in line ", $file_line_number, "\n";
449             # } else {
450             # my ($year, $month, $day) = UnixDate($pd, "%Y", "%m", "%d");
451             # warn "Date was $month/$day/$year\n";
452             }
453 4         24 $term->creation_date($d);
454             } elsif ($line =~ /^modified_by:\s*(.*)/) {
455 0         0 $term->modified_by($1);
456             } elsif ($line =~ /^modification_date:\s*(.*)/) {
457 0         0 my $d = $1;
458 0         0 my $pd = ParseDate($d); # Check that the date follows the ISO 8601 format
459 0 0       0 if (!$pd) {
460 0         0 warn "Bad date string: '", $d, "' in line ", $file_line_number, "\n";
461             }
462 0         0 $term->modification_date($d);
463             } elsif ($line =~ /^is_obsolete:$r_true_false/) {
464 20 50       104 $term->is_obsolete(($1 eq 'true')?1:0);
465             } elsif ($line =~ /^replaced_by:\s*(.*)/) {
466 0         0 $term->replaced_by($1);
467             } elsif ($line =~ /^consider:\s*(.*)/) {
468 0         0 $term->consider($1);
469             } elsif ($line =~ /^builtin:$r_true_false/) {
470 0 0       0 $term->builtin(($1 eq 'true')?1:0);
471             } elsif ($line =~ /^property_value:\s*(\w+)$r_db_acc/ || $line =~ /^property_value:\s*(\w+)\s+"([ '\#~\w:\\\+\?\{\}\$\/\(\)\[\]\.=&!%_,-]+)"$r_db_acc/) { # TODO some parts in this block might change later on...
472              
473 6         21 my $r2_type = $result->get_relationship_type_by_id($1); # Is this relationship type already in the ontology?
474 6 50       15 if (!defined $r2_type){
475 6         19 $r2_type = OBO::Core::RelationshipType->new(); # if not, create a new relationship type
476 6         15 $r2_type->id($1);
477 6         19 $result->add_relationship_type($r2_type); # add it to the ontology
478             }
479              
480             #
481             # create the triplet
482             #
483 6         18 my $rel = OBO::Core::Relationship->new();
484 6         14 my $id = $term->id().'_'.$1.'_'.$2; # term --> rel --> [term|instance|datatype]
485 6         19 $id =~ s/\s+/_/g;
486 6         11 $rel->id($id);
487 6         12 $rel->type($r2_type->id());
488            
489 6 100       21 if (!defined $3) {
    50          
490             #
491             # property_value: lastest_modification_by erick
492             #
493 1         3 my $target = $result->get_term_by_id($2); # suggest to OBOF to define TERMs before they are used so that parsers could know they are dealing with terms!
494            
495 1 50       3 if (defined $target) { # term --> rel --> term
496            
497             } else {
498 1         3 $target = $result->get_instance_by_id($2);
499            
500 1 50       3 if (!defined $target) { # term --> rel --> instance
501 1         3 $target = OBO::Core::Instance->new();
502 1         3 $target->id($2);
503 1         4 $result->add_instance($target);
504             }
505             }
506            
507 1         3 $rel->link($term, $target); # triplet: term --> rel --> target
508 1         2 $term->property_value($rel);
509            
510             #$result->add_relationship($rel); # TODO Do we need this? or better add $ontology->{PROPERTY_VALUES}?
511             } elsif (defined $3) { # term --> rel --> datatype
512             #
513             # property_value: lastest_modification_by "erick" xsd:string or e.g. shoe_size "12" xsd:positiveInteger
514             #
515 5         13 my $target = $result->get_instance_by_id($2);
516            
517 5 50       13 if (!defined $target) { # term --> rel --> datatype
518 5         23 $target = OBO::Core::Instance->new();
519 5         13 $target->id($2);
520            
521             # data type check
522 5 50       16 warn "Unrecommended XML-schema pritive (data type) found: '", $3, "'" unless (exists $allowed_data_types{$3});
523            
524 5         15 my $data_type = OBO::Core::Term->new();
525 5         10 $data_type->id($3);
526             #$result->add_term($data_type); # TODO Think about it...
527 5         15 $target->instance_of($data_type);
528             #$result->add_instance($target); # TODO Think about it...
529             }
530              
531 5         11 $rel->link($term, $target);
532 5         11 $term->property_value($rel);
533            
534             #$result->add_relationship($rel); # TODO Do we need this? or better add $ontology->{PROPERTY_VALUES}?
535             }
536             } elsif ($line =~ /^!/) {
537             # skip line
538             } else {
539 0         0 warn 'Unknown syntax found (and ignored) in line: ', $file_line_number, " (in file '", $self->{OBO_FILE}, "'):\n\t", $line, "\n";
540             }
541             }
542             # Check for required fields: id
543 2025 50 33     5971 if (defined $term && !defined $term->id()) {
544 0         0 croak "No ID found in term:\n", $chunk;
545             }
546 2025 50       3621 if ($intersection_of_counter == 1) { # IDEM TEST: ($intersection_of_counter != 0 && $intersection_of_counter < 2)
547 0         0 carp "Missing 'intersection_of' tag in term:\n\n", $chunk, "\n";
548             }
549 2025 50       2902 if ($union_of_counter == 1) { # IDEM TEST: ($union_of_counter != 0 && $union_of_counter < 2)
550 0         0 carp "Missing 'union_of' tag in term:\n\n", $chunk, "\n";
551             }
552 2025         4533 $file_line_number++;
553             } elsif ($stanza && $stanza =~ /\[Typedef\]/) { # treat [Typedef]
554 112         138 my $type;
555 112         124 my $only_one_name_tag_per_entry = 0;
556            
557             #
558             # to check we have zero or at least two intersection_of's and zero or at least two union_of's
559             #
560 112         107 $intersection_of_counter = 0;
561 112         109 $union_of_counter = 0;
562            
563 112         89 $file_line_number++;
564 112         134 foreach my $line (@entry) {
565 602         511 $file_line_number++;
566 602 100 100     11100 if ($line =~ /^id:\s*(.*)/) { # get the type id
    50 66        
    100          
    50          
    50          
    100          
    100          
    100          
    50          
    50          
    100          
    50          
    100          
    100          
    100          
    100          
    50          
    100          
    100          
    100          
    100          
    50          
    50          
    100          
    100          
    50          
    100          
    50          
    50          
    50          
    50          
    100          
    100          
    100          
    100          
    50          
    50          
    50          
    100          
    50          
567 112         302 $type = $result->get_relationship_type_by_id($1); # Is this relationship type already in the ontology?
568 112 100 33     241 if (!defined $type){
    50          
569 75         202 $type = OBO::Core::RelationshipType->new(); # if not, create a new type
570 75         155 $type->id($1);
571 75         190 $result->add_relationship_type($type); # add it to the ontology
572             } elsif (defined $type->def()->text() && $type->def()->text() ne '') {
573             # the type is already in the ontology since it has a definition! (not empty)
574 0         0 croak "The relationship type with id '", $1, "' is duplicated in the OBO file. Check line: '", $file_line_number, "'";
575             } else {
576             # the type already in the ontology but with an empty def, which most probably will
577             # be defined later. This case is the result of adding a relationship while parsing
578             # the Term stanzas.
579             #warn "Line: '", $line, "', Def: '", $type->def_as_string(), "'\n";
580             }
581             } elsif ($line =~ /^is_anonymous:$r_true_false/) {
582 0 0       0 $type->is_anonymous(($1 eq 'true')?1:0);
583             } elsif ($line =~ /^name:\s*(.*)/) {
584 112 50       185 croak "The typedef with id '", $1, "' has a duplicated 'name' tag in the file '", $self->{OBO_FILE}, "'. Check line: '", $file_line_number, "'" if ($only_one_name_tag_per_entry);
585 112         254 $type->name($1);
586 112         170 $only_one_name_tag_per_entry = 1;
587             } elsif ($line =~ /^namespace:\s*(.*)/) {
588 0         0 $type->namespace($1); # it is a Set
589             } elsif ($line =~ /^alt_id:\s*([:\w]+)/) {
590 0         0 $type->alt_id($1);
591             } elsif ($line =~ /^def:\s*\"(.*)\"$r_dbxref/) { # fill in the definition
592 59         172 my $def = OBO::Core::Def->new();
593 59         129 $def->text($1);
594 59         118 $def->dbxref_set_as_string($2);
595 59         180 $type->def($def);
596             } elsif ($line =~ /^comment:\s*(.*)/) {
597 36         121 $type->comment($1);
598             } elsif ($line =~ /^subset:\s*(\S+)/) {
599 1         3 my $ss = $1;
600 1 50       5 if ($result->subset_def_map()->contains_key($ss)) {
601 1         7 $type->subset($ss); # it is a Set (i.e. added to a Set)
602            
603 1         6 $used_subset{$ss}++; # check subsets usage
604             } else {
605 0         0 croak "The subset '", $ss, "' is not defined in the header! Check your OBO file relationship type in line: '", $file_line_number, "'";
606             }
607             } elsif ($line =~ /^domain:\s*(.*)/) {
608 0         0 $type->domain($1);
609             } elsif ($line =~ /^range:\s*(.*)/) {
610 0         0 $type->range($1);
611             } elsif ($line =~ /^is_anti_symmetric:$r_true_false/) {
612 13 50       57 $type->is_anti_symmetric(($1 eq 'true')?1:0);
613             } elsif ($line =~ /^is_cyclic:$r_true_false/) {
614 0 0       0 $type->is_cyclic(($1 eq 'true')?1:0);
615             } elsif ($line =~ /^is_reflexive:$r_true_false/) {
616 24 50       113 $type->is_reflexive(($1 eq 'true')?1:0);
617             } elsif ($line =~ /^is_symmetric:$r_true_false/) {
618 2 50       12 $type->is_symmetric(($1 eq 'true')?1:0);
619             } elsif ($line =~ /^is_transitive:$r_true_false/) {
620 47 50       189 $type->is_transitive(($1 eq 'true')?1:0);
621             } elsif ($line =~ /^is_a:\s*([:\w]+)$r_comments/) { # intrinsic or not??? # The comment is ignored here but generated (and sometimes fixed) later internally
622 62         97 my $r = $1;
623 62         136 my $r_id = $type->id();
624 62 50       124 if ($r_id eq $r) {
625 0         0 warn "The term '", $r_id, "' has a reflexive is_a relationship, which was ignored!";
626 0         0 next;
627             }
628 62         164 my $rel = OBO::Core::Relationship->new();
629 62         185 $rel->id($r_id.'_is_a_'.$r);
630 62         123 $rel->type('is_a');
631 62         151 my $target = $result->get_relationship_type_by_id($r); # Is this relationship type already in the ontology?
632 62 100       105 if (!defined $target) {
633 7         26 $target = OBO::Core::RelationshipType->new(); # if not, create a new relationship type
634 7         22 $target->id($r);
635 7         23 $result->add_relationship_type($target);
636             }
637 62         132 $rel->link($type, $target); # add a relationship between two relationship types
638 62         130 $result->add_relationship($rel);
639             } elsif ($line =~ /^is_metadata_tag:$r_true_false/) {
640 0 0       0 $type->is_metadata_tag(($1 eq 'true')?1:0);
641             } elsif ($line =~ /^is_class_level:$r_true_false/) {
642 1 50       7 $type->is_class_level(($1 eq 'true')?1:0);
643             } elsif ($line =~ /^(exact|narrow|broad|related)_synonym:\s*\"(.*)\"$r_dbxref/) {
644 3         20 $type->synonym_as_string($2, $3, uc($1));
645             } elsif ($line =~ /^synonym:\s*\"(.*)\"$syn_scope(\s+(\w+))?$r_dbxref/) {
646 16 50       51 my $scope = (defined $3)?$3:'RELATED';
647             # From OBO flat file spec v1.2, we use:
648             # synonym: "endomitosis" EXACT []
649 16 50       46 if (defined $5) {
650 0         0 my $found = 0; # check that the 'synonym type name' was defined in the header!
651 0         0 foreach my $st ($result->synonym_type_def_set()->get_set()) {
652             # Adapt the scope if necessary to the one defined in the header!
653 0 0       0 if ($st->name() eq $5) {
654 0         0 $found = 1;
655 0         0 my $default_scope = $st->scope();
656 0 0       0 $scope = $default_scope if (defined $default_scope);
657 0         0 last;
658             }
659             }
660 0 0       0 croak 'The synonym type name (', $5,') used in line ', $file_line_number, " in the file '", $self->{OBO_FILE}, "' was not defined" if (!$found);
661 0         0 $used_synonym_type_def{$5}++; # check synonymtypedef usage
662             }
663 16         57 $type->synonym_as_string($1, $6, $scope, $5);
664             } elsif ($line =~ /^xref:\s*(.*)/ || $line =~ /^xref_analog:\s*(.*)/ || $line =~ /^xref_unk:\s*(.*)/) {
665 74         222 $type->xref_set_as_string($1);
666             } elsif ($line =~ /^intersection_of:\s*([\w\/]+)?$r_db_acc$r_comments/) {
667             # TODO Improve the 'intersection_of' treatment
668 0         0 my $rel = OBO::Core::Relationship->new();
669 0   0     0 my $r = $1 || 'nil';
670 0         0 my $id = $type->id().'_'.$r.'_'.$2;
671 0         0 $id =~ s/\s+/_/g;
672 0         0 $rel->id($id);
673 0         0 $rel->type($r);
674 0         0 my $target = $result->get_term_by_id($2); # Is this term already in the ontology?
675 0 0       0 if (!defined $target) {
676 0         0 $target = OBO::Core::Term->new(); # if not, create a new term
677 0         0 $target->id($2);
678 0         0 $result->add_term($target);
679             }
680 0         0 $rel->head($target);
681 0         0 $type->intersection_of($rel);
682 0         0 $intersection_of_counter++;
683             } elsif ($line =~ /^union_of:\s*(.*)/) {
684             # TODO wait until the OBO spec 1.4 be released
685 0         0 my $target = $result->get_relationship_type_by_id($1); # Is this relationship type already in the ontology?
686 0 0       0 if (!defined $target) {
687 0         0 $target = OBO::Core::RelationshipType->new(); # if not, create a new relationship type
688 0         0 $target->id($1);
689 0         0 $result->add_relationship_type($target);
690             }
691 0         0 $type->union_of($1);
692 0         0 $union_of_counter++;
693             } elsif ($line =~ /^disjoint_from:\s*([:\w]+)$r_comments/) {
694 1         7 $type->disjoint_from($1); # We are assuming that the other relation type exists or will exist; otherwise , we have to create it like in the is_a section.
695             } elsif ($line =~ /^inverse_of:\s*([:\w]+)$r_comments/) { # e.g. inverse_of: has_participant ! has participant
696 13         33 my $inv_id = $1;
697 13         45 my $inv_type = $result->get_relationship_type_by_id($inv_id); # Is this INVERSE relationship type already in the ontology?
698 13 100       36 if (!defined $inv_type){
699 9         30 $inv_type = OBO::Core::RelationshipType->new(); # if not, create a new type
700 9         25 $inv_type->id($inv_id);
701             #$inv_type->name($3) if ($3); # not necessary, this name could be wrong...
702 9         26 $result->add_relationship_type($inv_type); # add it to the ontology
703             }
704 13         41 $type->inverse_of($inv_type);
705             } elsif ($line =~ /^transitive_over:\s*(.*)/) {
706 0         0 $type->transitive_over($1);
707             } elsif ($line =~ /^holds_over_chain:\s*([:\w]+)\s*([:\w]+)$r_comments/) { # R <- R1.R2
708 14         28 my $r1_id = $1;
709 14         19 my $r2_id = $2;
710 14         37 my $r1_type = $result->get_relationship_type_by_id($r1_id); # Is this relationship type already in the ontology?
711 14 100       32 if (!defined $r1_type){
712 2         9 $r1_type = OBO::Core::RelationshipType->new(); # if not, create a new type
713 2         7 $r1_type->id($r1_id);
714 2         7 $result->add_relationship_type($r1_type); # add it to the ontology
715             }
716 14         25 my $r2_type = $result->get_relationship_type_by_id($r2_id); # Is this relationship type already in the ontology?
717 14 100       30 if (!defined $r2_type){
718 1         4 $r2_type = OBO::Core::RelationshipType->new(); # if not, create a new type
719 1         4 $r2_type->id($r2_id);
720 1         3 $result->add_relationship_type($r2_type); # add it to the ontology
721             }
722 14         32 $type->holds_over_chain($r1_type->id(), $r2_type->id());
723             } elsif ($line =~ /^equivalent_to_chain:\s*(.*)/) {
724             # TODO
725             } elsif ($line =~ /^disjoint_over:\s*(.*)/) {
726             # TODO
727             } elsif ($line =~ /^is_functional:$r_true_false/) {
728 0 0       0 $type->is_functional(($1 eq 'true')?1:0);
729             } elsif ($line =~ /^is_inverse_functional:$r_true_false/) {
730 0 0       0 $type->is_inverse_functional(($1 eq 'true')?1:0);
731             } elsif ($line =~ /^created_by:\s*(.*)/) {
732 2         11 $type->created_by($1);
733             } elsif ($line =~ /^creation_date:\s*(.*)/) {
734 2         6 my $d = $1;
735 2         9 my $pd = ParseDate($d); # Check that the date follows the ISO 8601 format
736 2 50       2155 if (!$pd) {
737 0         0 warn "Bad date string: '", $d, "' in line ", $file_line_number, "\n";
738             }
739 2         9 $type->creation_date($d);
740             } elsif ($line =~ /^modified_by:\s*(.*)/) {
741 1         5 $type->modified_by($1);
742             } elsif ($line =~ /^modification_date:\s*(.*)/) {
743 1         2 my $d = $1;
744 1         4 my $pd = ParseDate($d); # Check that the date follows the ISO 8601 format
745 1 50       718 if (!$pd) {
746 0         0 warn "Bad date string: '", $d, "' in line ", $file_line_number, "\n";
747             }
748 1         5 $type->modification_date($d);
749             } elsif ($line =~ /^is_obsolete:\s*(.*)/) {
750 0 0       0 $type->is_obsolete(($1 eq 'true')?1:0);
751             } elsif ($line =~ /^replaced_by:\s*(.*)/) {
752 0         0 $type->replaced_by($1);
753             } elsif ($line =~ /^consider:\s*(.*)/) {
754 0         0 $type->consider($1);
755             } elsif ($line =~ /^builtin:$r_true_false/) {
756 5 50       33 $type->builtin(($1 eq 'true')?1:0);
757             } elsif ($line =~ /^!/) {
758             # skip line
759             } else {
760 0         0 warn 'Unknown syntax found (and ignored) in line: ', $file_line_number, " (in file '", $self->{OBO_FILE}, "'):\n\t", $line, "\n";
761             }
762             }
763             # Check for required fields: id
764 112 50       265 if (!defined $type->id()) {
765 0         0 croak "No ID found in type:\n\n", $chunk, "\n\nfrom file '", $self->{OBO_FILE}, "'";
766             }
767 112 50       214 if ($intersection_of_counter == 1) { # IDEM TEST: ($intersection_of_counter != 0 && $intersection_of_counter < 2)
768 0         0 carp "Missing 'intersection_of' tag in relationship type:\n\n", $chunk, "\n";
769             }
770 112 50       178 if ($union_of_counter == 1) { # IDEM TEST: ($union_of_counter != 0 && $union_of_counter < 2)
771 0         0 carp "Missing 'union_of' tag in relationship type:\n\n", $chunk, "\n";
772             }
773 112         240 $file_line_number++;
774             } elsif ($stanza && $stanza =~ /\[Instance\]/) { # treat [Instance]
775 5         8 my $instance;
776            
777             #
778             # to check we have zero or at least two intersection_of's and zero or at least two union_of's
779             #
780             # TODO do INSTANCES have these tags?
781 5         6 $intersection_of_counter = 0;
782 5         6 $union_of_counter = 0;
783            
784 5         6 $file_line_number++;
785            
786 5         7 my $only_one_id_tag_per_entry = 0;
787 5         4 my $only_one_name_tag_per_entry = 0;
788            
789 5         10 foreach my $line (@entry) {
790 26         30 $file_line_number++;
791 26 100 66     1444 if ($line =~ /^id:\s*(\S+)/) { # get the instance id
    50 66        
    100 33        
    100 66        
    50          
    50          
    50          
    50          
    50          
    100          
    100          
    100          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    0          
792 5 50       83 if ($line =~ /^id:$r_db_acc/) { # Does it follow the "convention"?
793 5 50       13 croak "The instance with id '", $1, "' has a duplicated 'id' tag in the file '", $self->{OBO_FILE} if ($only_one_id_tag_per_entry);
794 5         21 $instance = $result->get_instance_by_id($1); # does this instance is already in the ontology?
795 5 100       13 if (!defined $instance){
796 3         18 $instance = OBO::Core::Instance->new(); # if not, create a new instance
797 3         10 $instance->id($1);
798 3         11 $result->add_instance($instance); # add it to the ontology
799 3         7 $only_one_id_tag_per_entry = 1;
800             #} elsif (defined $instance->def()->text() && $instance->def()->text() ne '') {
801             # TODO Do instances have a definition?
802             # # The instance is already in the ontology since it has a definition! (maybe empty?)
803             # croak "The instance with id '", $1, "' is duplicated in the OBO file.";
804             }
805             } else {
806 0         0 croak "The instance with id '", $1, "' does NOT follow the ID convention: 'IDSPACE:UNIQUE_IDENTIFIER', e.g. GO:1234567";
807             }
808             } elsif ($line =~ /^is_anonymous:$r_true_false/) {
809 0 0       0 $instance->is_anonymous(($1 eq 'true')?1:0);
810             } elsif ($line =~ /^name:\s*(.*)/) {
811 5 50       11 croak "The instance with id '", $1, "' has a duplicated 'name' tag in the file '", $self->{OBO_FILE} if ($only_one_name_tag_per_entry);
812 5 50       13 if (!defined $1) {
813 0         0 warn "The instance with id '", $instance->id(), "' has no name in file '", $self->{OBO_FILE}, "'";
814             } else {
815 5         18 $instance->name($1);
816 5         10 $only_one_name_tag_per_entry = 1;
817             }
818             } elsif ($line =~ /^namespace:\s*(.*)/) {
819 1         5 $instance->namespace($1); # it is a Set
820             } elsif ($line =~ /^alt_id:$r_db_acc/) {
821             # TODO do INSTANCES have this tag?
822 0         0 $instance->alt_id($1);
823             } elsif ($line =~ /^def:\s*\"(.*)\"$r_dbxref/) { # fill in the definition
824 0         0 my $def = OBO::Core::Def->new();
825 0         0 $def->text($1);
826 0         0 $def->dbxref_set_as_string($2);
827 0         0 $instance->def($def);
828             } elsif ($line =~ /^comment:\s*(.*)/) {
829 0         0 $instance->comment($1);
830             } elsif ($line =~ /^subset:\s*(\S+)/) {
831 0         0 my $ss = $1;
832 0 0       0 if ($result->subset_def_map()->contains_key($ss)) {
833 0         0 $instance->subset($ss); # it is a Set (i.e. added to a Set)
834            
835 0         0 $used_subset{$ss}++; # check subsets usage
836             } else {
837 0         0 croak "The subset '", $ss, "' is not defined in the header! Check your OBO file line '", $file_line_number, "'";
838             }
839             } elsif ($line =~ /^(exact|narrow|broad|related)_synonym:\s*\"(.*)\"$r_dbxref/) { # OBO spec 1.1
840 0         0 $instance->synonym_as_string($2, $3, uc($1));
841             } elsif ($line =~ /^synonym:\s*\"(.*)\"$syn_scope(\s+([-\w]+))?$r_dbxref/) {
842 1 50       4 my $scope = (defined $3)?$3:'RELATED';
843             # As of OBO flat file spec v1.2, we use:
844             # synonym: "endomitosis" EXACT []
845 1 50       3 if (defined $5) {
846 0         0 my $found = 0; # check that the 'synonym type name' was defined in the header!
847 0         0 foreach my $st ($result->synonym_type_def_set()->get_set()) {
848             # Adapt the scope if necessary to the one defined in the header!
849 0 0       0 if ($st->name() eq $5) {
850 0         0 $found = 1;
851 0         0 my $default_scope = $st->scope();
852 0 0       0 $scope = $default_scope if (defined $default_scope);
853 0         0 last;
854             }
855             }
856 0 0       0 croak 'The synonym type name (', $5,') used in line ', $file_line_number, " in the file '", $self->{OBO_FILE}, "' was not defined" if (!$found);
857 0         0 $used_synonym_type_def{$5}++; # check synonymtypedef usage
858             }
859 1         6 $instance->synonym_as_string($1, $6, $scope, $5);
860             } elsif ($line =~ /^xref:\s*(.*)/ || $line =~ /^xref_analog:\s*(.*)/ || $line =~ /^xref_unknown:\s*(.*)/) {
861 1         5 $instance->xref_set_as_string($1);
862             } elsif ($line =~ /^instance_of:$r_db_acc$r_comments/) { # The comment is ignored here but retrieved later internally
863 5         17 my $t = $result->get_term_by_id($1); # Is this instance already in the ontology?
864 5 100       13 if (!defined $t) {
865 3         14 $t = OBO::Core::Term->new(); # if not, create a new Term
866 3         11 $t->id($1);
867 3         11 $result->add_term($t);
868             }
869 5         19 $instance->instance_of($t);
870             } elsif ($line =~ /^intersection_of:\s*([\w\/]+)?$r_db_acc$r_comments/) {
871             # TODO Improve the 'intersection_of' treatment
872             # TODO do INSTANCES have this tag?
873 0         0 my $rel = OBO::Core::Relationship->new();
874 0   0     0 my $r = $1 || 'nil';
875 0         0 my $id = $instance->id().'_'.$r.'_'.$2;
876 0         0 $id =~ s/\s+/_/g;
877 0         0 $rel->id($id);
878 0         0 $rel->type($r);
879 0         0 my $target = $result->get_instance_by_id($2); # Is this instance already in the ontology?
880 0 0       0 if (!defined $target) {
881 0         0 $target = OBO::Core::Instance->new(); # if not, create a new instance
882 0         0 $target->id($2);
883 0         0 $result->add_instance($target);
884             }
885 0         0 $rel->head($target);
886 0         0 $instance->intersection_of($rel);
887 0         0 $intersection_of_counter++;
888             } elsif ($line =~ /^union_of:\s*(.*)/) {
889             # TODO wait until the OBO spec 1.4 be released
890             # TODO do INSTANCES have this tag?
891 0         0 my $target = $result->get_instance_by_id($1); # Is this instance already in the ontology?
892 0 0       0 if (!defined $target) {
893 0         0 $target = OBO::Core::Instance->new(); # if not, create a new instance
894 0         0 $target->id($1);
895 0         0 $result->add_instance($target);
896             }
897 0         0 $instance->union_of($1);
898 0         0 $union_of_counter++;
899             } elsif ($line =~ /^disjoint_from:$r_db_acc$r_comments/) {
900             # TODO do INSTANCES have this tag?
901 0         0 $instance->disjoint_from($1); # We are assuming that the other instance exists or will exist; otherwise , we have to create it like in the is_a section.
902             } elsif ($line =~ /^relationship:\s*([\w\/]+)$r_db_acc$r_comments/ || $line =~ /^relationship:\s*$r_db_acc$r_db_acc$r_comments/) {
903             # TODO do INSTANCES have this tag?
904 0         0 my $rel = OBO::Core::Relationship->new();
905 0         0 my $id = $instance->id().'_'.$1.'_'.$2; # TODO see the line (TODO) of the 'term' section
906 0         0 $id =~ s/\s+/_/g;
907 0         0 $rel->id($id);
908 0         0 $rel->type($1);
909 0         0 my $target = $result->get_instance_by_id($2); # Is this instance already in the ontology?
910 0 0       0 if (!defined $target) {
911 0         0 $target = OBO::Core::Instance->new(); # if not, create a new instance
912 0         0 $target->id($2);
913 0         0 $result->add_instance($target);
914             }
915 0         0 $rel->link($instance, $target);
916 0         0 $result->add_relationship($rel);
917             } elsif ($line =~ /^created_by:\s*(.*)/) {
918 0         0 $instance->created_by($1);
919             } elsif ($line =~ /^creation_date:\s*(.*)/) {
920 0         0 my $d = $1;
921 0         0 my $pd = ParseDate($d); # Check that the date follows the ISO 8601 format
922 0 0       0 if (!$pd) {
923 0         0 warn "Bad date string: '", $d, "' in line ", $file_line_number, "\n";
924             }
925 0         0 $instance->creation_date($d);
926             } elsif ($line =~ /^modified_by:\s*(.*)/) {
927 0         0 $instance->modified_by($1);
928             } elsif ($line =~ /^modification_date:\s*(.*)/) {
929 0         0 my $d = $1;
930 0         0 my $pd = ParseDate($d); # Check that the date follows the ISO 8601 format
931 0 0       0 if (!$pd) {
932 0         0 warn "Bad date string: '", $d, "' in line ", $file_line_number, "\n";
933             }
934 0         0 $instance->modification_date($d);
935             } elsif ($line =~ /^is_obsolete:$r_true_false/) {
936 0 0       0 $instance->is_obsolete(($1 eq 'true')?1:0);
937             } elsif ($line =~ /^replaced_by:\s*(.*)/) {
938 0         0 $instance->replaced_by($1);
939             } elsif ($line =~ /^consider:\s*(.*)/) {
940 0         0 $instance->consider($1);
941             } elsif ($line =~ /^builtin:$r_true_false/) {
942             # TODO do INSTANCES have this tag?
943 0 0       0 $instance->builtin(($1 eq 'true')?1:0);
944             } elsif ($line =~ /^property_value:\s*(\w+)$r_db_acc/ || $line =~ /^property_value:\s*(\w+)\s+"(\w+)"$r_db_acc/) { # TODO re-implement this once the OBO spec is more mature...
945            
946 8         33 my $r2_type = $result->get_relationship_type_by_id($1); # Is this relationship type already in the ontology?
947 8 100       21 if (!defined $r2_type){
948 5         20 $r2_type = OBO::Core::RelationshipType->new(); # if not, create a new relationship type
949 5         18 $r2_type->id($1);
950 5         18 $result->add_relationship_type($r2_type); # add it to the ontology
951             }
952              
953             #
954             # create the triplet
955             #
956 8         28 my $rel = OBO::Core::Relationship->new();
957 8         28 my $id = $instance->id().'_'.$1.'_'.$2; # instance --> rel --> [term|instance|datatype]
958 8         32 $id =~ s/\s+/_/g;
959 8         22 $rel->id($id);
960 8         24 $rel->type($r2_type->id());
961            
962 8 100       34 if (!defined $3) {
    50          
963             #
964             # property_value: lastest_modification_by erick
965             #
966 4         15 my $target = $result->get_term_by_id($2); # suggest to OBOF to define TERMs before they are used so that parsers could know they are dealing with terms!
967            
968 4 50       11 if (defined $target) { # instance --> rel --> term
969            
970             } else {
971 4         10 $target = $result->get_instance_by_id($2);
972            
973 4 100       14 if (!defined $target) { # instance --> rel --> instance
974 3         11 $target = OBO::Core::Instance->new();
975 3         11 $target->id($2);
976 3         10 $result->add_instance($target);
977             }
978             }
979            
980 4         14 $rel->link($instance, $target); # triplet: instance --> rel --> target
981 4         14 $instance->property_value($rel);
982            
983             #$result->add_relationship($rel); # TODO Do we need this? or better add $ontology->{PROPERTY_VALUES}?
984             } elsif (defined $3) { # instance --> rel --> datatype
985             #
986             # property_value: lastest_modification_by "erick" xsd:string or e.g. shoe_size "12" xsd:positiveInteger
987             #
988 4         12 my $target = $result->get_instance_by_id($2);
989            
990 4 50       13 if (!defined $target) { # instance --> rel --> datatype
991 4         14 $target = OBO::Core::Instance->new();
992 4         12 $target->id($2);
993            
994             # data type check
995 4 50       18 warn "Unrecommended XML-schema pritive (data type) found: '", $3, "'" unless (exists $allowed_data_types{$3});
996            
997 4         15 my $data_type = OBO::Core::Term->new();
998 4         13 $data_type->id($3);
999             #$result->add_term($data_type); # TODO Think about it...
1000 4         13 $target->instance_of($data_type);
1001             #$result->add_instance($target); # TODO Think about it...
1002             }
1003              
1004 4         14 $rel->link($instance, $target);
1005 4         11 $instance->property_value($rel);
1006            
1007             #$result->add_relationship($rel); # TODO Do we need this? or better add $ontology->{PROPERTY_VALUES}?
1008             }
1009             } elsif ($line =~ /^!/) {
1010             # skip line
1011             } else {
1012 0         0 warn 'Unknown syntax found (and ignored) in line: ', $file_line_number, " (in file '", $self->{OBO_FILE}, "'):\n\t", $line, "\n";
1013             }
1014             }
1015             # Check for required fields: id
1016 5 50 33     22 if (defined $instance && !defined $instance->id()) {
1017 0         0 croak "No ID found in instance:\n", $chunk;
1018             }
1019 5 50       15 if ($intersection_of_counter == 1) { # IDEM TEST: ($intersection_of_counter != 0 && $intersection_of_counter < 2)
1020             # TODO do INSTANCES have this tag?
1021 0         0 croak "Missing 'intersection_of' tag in instance:\n\n", $chunk, "\n";
1022             }
1023 5 50       15 if ($union_of_counter == 1) { # IDEM TEST: ($union_of_counter != 0 && $union_of_counter < 2)
1024 0         0 carp "Missing 'union_of' tag in instance:\n\n", $chunk, "\n";
1025             }
1026 5         12 $file_line_number++;
1027             } elsif ($stanza && $stanza =~ /\[Annotation\]/) { # treat [Annotation]
1028             # TODO "Annotations are ignored by ONTO-PERL (they might be supported in the future).";
1029             }
1030             }
1031            
1032             #
1033             # Warn (and delete) on non used subsets which were defined in the header (subsetdef's)
1034             #
1035 9         45 my @set_of_all_ss = $result->subset_def_map()->key_set()->get_set();
1036 9         45 foreach my $pss (sort @set_of_all_ss) {
1037 19 50       50 if (!$used_subset{$pss}) {
1038 0         0 $result->subset_def_map()->remove($pss);
1039 0         0 warn "Unused subset found (and removed): '", $pss, "' (in file '", $self->{OBO_FILE}, "')";
1040             }
1041             }
1042            
1043             #
1044             # Warn (and delete) on non used synonym type def's which were defined in the header (synonymtypedef's)
1045             #
1046 9         34 my @set_of_all_synonymtypedef = $result->synonym_type_def_set()->get_set();
1047 9         23 foreach my $st (@set_of_all_synonymtypedef) {
1048 5 50       22 if (!$used_synonym_type_def{$st->name()}) {
1049 0         0 $result->synonym_type_def_set()->remove($st);
1050 0         0 warn "Unused synonym type def found (and removed): '", $st->name(), "' (in file '", $self->{OBO_FILE}, "')";
1051             }
1052             }
1053            
1054             #
1055             # Work-around for some ontologies like GO: Explicitly add the implicit 'is_a' if missing
1056             #
1057 9 50       31 if (!$result->has_relationship_type_id('is_a')){
1058 0         0 my $type = OBO::Core::RelationshipType->new(); # if not, create a new type
1059 0         0 $type->id('is_a');
1060 0         0 $type->name('is_a');
1061 0         0 $result->add_relationship_type($type);
1062             }
1063            
1064 9         34 $/ = "\n";
1065              
1066 9         609 return $result;
1067             } else { # if no header (chunk[0])
1068 0           carp "The OBO file '", $self->{OBO_FILE},"' does not have a correct header, please verify it.";
1069             }
1070             }
1071              
1072             1;
1073              
1074             __END__