File Coverage

blib/lib/OBO/Parser/OBOParser.pm
Criterion Covered Total %
statement 464 634 73.1
branch 268 432 62.0
condition 42 66 63.6
subroutine 18 18 100.0
pod 1 2 50.0
total 793 1152 68.8


line stmt bran cond sub pod time code
1             # $Id: OBOParser.pm 2015-02-14 erick.antezana $
2             #
3             # Module : OBOParser.pm
4             # Purpose : Parse OBO-formatted files.
5             # License : Copyright (c) 2006-2015 by Erick Antezana. All rights reserved.
6             # This program is free software; you can redistribute it and/or
7             # modify it under the same terms as Perl itself.
8             # Contact : Erick Antezana
9             #
10             package OBO::Parser::OBOParser;
11              
12 7     7   106426 use OBO::Core::Dbxref;
  7         17  
  7         199  
13 7     7   3647 use OBO::Core::Instance;
  7         19  
  7         217  
14 7     7   7756 use OBO::Core::Ontology;
  7         25  
  7         311  
15 7     7   69 use OBO::Core::Relationship;
  7         14  
  7         164  
16 7     7   4147 use OBO::Core::RelationshipType;
  7         19  
  7         213  
17 7     7   3911 use OBO::Core::SubsetDef;
  7         22  
  7         190  
18 7     7   3024 use OBO::Core::SynonymTypeDef;
  7         19  
  7         199  
19 7     7   2470 use OBO::Core::Term;
  7         19  
  7         210  
20 7     7   45 use OBO::Util::IDspaceSet;
  7         14  
  7         153  
21 7     7   35 use OBO::Util::Set;
  7         12  
  7         160  
22 7     7   3725 use OBO::XO::OBO_ID;
  7         19  
  7         188  
23              
24 7     7   37 use Carp;
  7         14  
  7         494  
25 7     7   4806 use Date::Manip qw(ParseDate UnixDate);
  7         1185669  
  7         728  
26 7     7   71 use strict;
  7         16  
  7         169  
27 7     7   40 use warnings;
  7         15  
  7         250  
28              
29 7     7   39 use open qw(:std :utf8); # Make All I/O Default to UTF-8
  7         17  
  7         64  
30              
31             $Carp::Verbose = 1;
32              
33             sub new {
34 4     4 0 53 my $class = shift;
35 4         12 my $self = {};
36            
37 4         14 $self->{OBO_FILE} = undef; # required, (1)
38            
39 4         11 bless ($self, $class);
40 4         14 return $self;
41             }
42              
43             =head2 work
44              
45             Usage - $OBOParser->work($obo_file_path)
46             Returns - the parsed OBO ontology
47             Args - the OBO file to be parsed
48             Function - parses an OBO file
49            
50             =cut
51              
52             sub work {
53 9     9 1 1309 my $self = shift;
54 9 50       36 if (defined $_[0]) {
55 9         45 $self->{OBO_FILE} = shift;
56             } else {
57 0         0 croak 'You have to provide an OBO file as input';
58             }
59            
60 9 50       500 open (OBO_FILE, $self->{OBO_FILE}) || croak 'The OBO file (', $self->{OBO_FILE}, ') cannot be opened: ', $!;
61            
62 9         43 $/ = ""; # one paragraph at the time
63 9         5832 chomp(my @chunks = );
64 9         156 chomp(@chunks);
65 9         154 close OBO_FILE;
66              
67             #
68             # Treat OBO file header tags
69             #
70 9         22 my $file_line_number = 0;
71 9 50 33     142 if (defined $chunks[0] && $chunks[0] =~ /^format-version:\s*(.*)/) {
72              
73 9         125 my @header = split (/\n/, $chunks[0]);
74 9         23 $file_line_number = $#header + 2; # amount of lines in the header
75 9         54 $chunks[0] = join("\n", @header);
76              
77             #
78             # format-version
79             #
80 9         16 my $format_version;
81 9 50       71 if ($chunks[0] =~ /(^format-version:\s*(.*)\n?)/m) { # required tag
82 9         35 $format_version = $2;
83 9         352 $chunks[0] =~ s/$1//;
84             }
85              
86             #
87             # data_version
88             #
89 9         23 my $data_version;
90 9 100       50 if ($chunks[0] =~ /(^data-version:\s*(.*)\n?)/m) {
91 1         4 $data_version = $2;
92 1         16 $chunks[0] =~ s/$1//;
93             }
94            
95             #
96             # ontology
97             #
98 9         17 my $ontology_id_space;
99 9 100       45 if ($chunks[0] =~ /(^ontology:\s*(.*)\n?)/m) { # as of OBO spec 1.4
100 1         3 $ontology_id_space = $2;
101 1         14 $chunks[0] =~ s/$1//;
102             }
103            
104             #
105             # date
106             #
107 9         19 my $date;
108 9 50       55 if ($chunks[0] =~ /(^date:\s*(.*)\n?)/m) {
109 9         25 $date = $2;
110 9         140 $chunks[0] =~ s/$1//;
111             }
112            
113             #
114             # saved_by
115             #
116 9         23 my $saved_by;
117 9 100       144 if ($chunks[0] =~ /(^saved-by:\s*(.*)\n?)/m) {
118 7         17 $saved_by = $2;
119 7         133 $chunks[0] =~ s/$1//;
120             }
121              
122             #
123             # auto_generated_by
124             #
125 9         21 my $auto_generated_by;
126 9 100       54 if ($chunks[0] =~ /(^auto-generated-by:\s*(.*)\n?)/m) {
127 8         25 $auto_generated_by = $2;
128 8         125 $chunks[0] =~ s/$1//;
129             }
130              
131             #
132             # imports
133             #
134 9         70 my $imports = OBO::Util::Set->new();
135 9         53 while ($chunks[0] =~ /(^import:\s*(.*)\n?)/m) {
136 1         5 $imports->add($2);
137 1         19 $chunks[0] =~ s/$1//;
138             }
139              
140             #
141             # subsetdef
142             #
143 9         98 my $subset_def_map = OBO::Util::SubsetDefMap->new();
144 9         59 while ($chunks[0] =~ /(^subsetdef:\s*(\S+)\s+\"(.*)\"\n?)/m) {
145 19         59 my $line = quotemeta($1);
146 19         79 my $ssd = OBO::Core::SubsetDef->new();
147 19         54 $ssd->name($2);
148 19         53 $ssd->description($3);
149 19         71 $subset_def_map->put($2, $ssd);
150 19         350 $chunks[0] =~ s/${line}//;
151             }
152            
153             #
154             # synonymtypedef
155             #
156 9         63 my $synonym_type_def_set = OBO::Util::SynonymTypeDefSet->new();
157 9         75 while ($chunks[0] =~ /(^synonymtypedef:\s*(\S+)\s+\"(.*)\"(.*)?\n?)/m) {
158 5         18 my $line = quotemeta($1);
159 5         33 my $std = OBO::Core::SynonymTypeDef->new();
160 5         20 $std->name($2);
161 5         17 $std->description($3);
162 5         14 my $sc = $4;
163 5 100 66     60 $std->scope($sc) if (defined $sc && $sc =~s/\s//);
164 5         24 $synonym_type_def_set->add($std);
165 5         102 $chunks[0] =~ s/${line}//;
166             }
167            
168             #
169             # idspace
170             #
171 9         66 my $idspaces = OBO::Util::IDspaceSet->new();
172 9         53 while ($chunks[0] =~ /(^idspace:\s*(\S+)\s*(\S+)\s+(\"(.*)\")?\n?)/m) {
173 3         12 my $line = quotemeta($1);
174 3         19 my $new_idspace = OBO::Core::IDspace->new();
175 3         13 $new_idspace->local_idspace($2);
176 3         13 $new_idspace->uri($3);
177 3         8 my $dc = $5;
178 3 50       21 $new_idspace->description($dc) if (defined $dc);
179 3         14 $idspaces->add($new_idspace);
180 3         96 $chunks[0] =~ s/${line}//;
181             }
182            
183             #
184             # default-relationship-id-prefix
185             # e.g. default-relationship-id-prefix: OBO_REL
186             #
187 9         23 my $default_relationship_id_prefix;
188 9 100       47 if ($chunks[0] =~ /(^default_relationship_id_prefix:\s*(.*)\n?)/m) {
189 1         3 $default_relationship_id_prefix = $2;
190 1         13 $chunks[0] =~ s/$1//;
191             }
192            
193             #
194             # default-namespace
195             #
196 9         28 my $default_namespace;
197 9 50       61 if ($chunks[0] =~ /(^default-namespace:\s*(.*)\n?)/m) {
198 9         20 $default_namespace = $2;
199 9         124 $chunks[0] =~ s/$1//;
200             }
201            
202             #
203             # remark
204             #
205 9         38 my $remarks = OBO::Util::Set->new();
206 9         57 while ($chunks[0] =~ /(^remark:\s*(.*)\n?)/m) {
207 6         27 my $line = quotemeta($1);
208 6         25 $remarks->add($2);
209 6         130 $chunks[0] =~ s/${line}//;
210             }
211              
212 9 50       32 if (!defined $format_version) {
213 0         0 croak "The OBO file '", $self->{OBO_FILE},"' does not have a correct header, please verify it.";
214             }
215            
216             #
217             # treat-xrefs-as-equivalent
218             #
219 9         31 my $treat_xrefs_as_equivalent = OBO::Util::Set->new();
220 9         38 while ($chunks[0] =~ /(^treat-xrefs-as-equivalent:\s*(.*)\n?)/m) {
221 2         7 $treat_xrefs_as_equivalent->add($2);
222 2         33 $chunks[0] =~ s/$1//;
223             }
224            
225             #
226             # treat-xrefs-as-is_a
227             #
228 9         38 my $treat_xrefs_as_is_a = OBO::Util::Set->new();
229 9         42 while ($chunks[0] =~ /(^treat-xrefs-as-is_a:\s*(.*)\n?)/m) {
230 2         6 $treat_xrefs_as_is_a->add($2);
231 2         26 $chunks[0] =~ s/$1//;
232             }
233            
234             #
235             # store the values in header tags
236             #
237 9         64 my $result = OBO::Core::Ontology->new();
238            
239 9 100       37 $result->data_version($data_version) if ($data_version);
240 9 100       29 $result->id($ontology_id_space) if ($ontology_id_space);
241 9 50       55 $result->date($date) if ($date);
242 9 100       44 $result->saved_by($saved_by) if ($saved_by);
243             #$result->auto_generated_by($auto_generated_by) if ($auto_generated_by);
244 9         43 $result->subset_def_map($subset_def_map);
245 9         28 $result->imports($imports->get_set());
246 9         47 $result->synonym_type_def_set($synonym_type_def_set->get_set());
247 9         42 $result->idspaces($idspaces->get_set());
248 9 100       31 $result->default_relationship_id_prefix($default_relationship_id_prefix) if ($default_relationship_id_prefix);
249 9 50       51 $result->default_namespace($default_namespace) if ($default_namespace);
250 9         28 $result->remarks($remarks->get_set());
251 9         32 $result->treat_xrefs_as_equivalent($treat_xrefs_as_equivalent->get_set());
252 9         37 $result->treat_xrefs_as_is_a($treat_xrefs_as_is_a->get_set());
253              
254 9 50       27 if ($chunks[0]) {
255 0         0 print STDERR "The following line(s) has been ignored from the header:\n", $chunks[0], "\n";
256             }
257            
258             #
259             # Keep log's
260             #
261 9         16 my %used_subset; # of the used subsets to pin point nonused subsets defined in the header (subsetdef's)
262            
263             my %used_synonym_type_def; # of the used synonymtypedef to pin point nonused synonymtypedef's defined in the header (synonymtypedef's)
264            
265             #
266             # Regexps
267             #
268             #my $r_db_acc = qr/([ \*\.\w-]*):([ '\#~\w:\\\+\?\{\}\$\/\(\)\[\]\.=&!%_,-]*)/o;
269 9         41 my $r_db_acc = qr/\s+(\w+:\w+)/o; # TODO check if qr/\s+(\w+:\S+)/o; is better...
270 9         28 my $r_dbxref = qr/\s+(\[.*\])/o;
271 9         27 my $syn_scope = qr/(\s+(EXACT|BROAD|NARROW|RELATED))?/o;
272 9         26 my $r_true_false = qr/\s*(true|false)/o;
273 9         28 my $r_comments = qr/\s*(\!\s*(.*))?/o;
274            
275 9         14 my $intersection_of_counter = 0;
276 9         22 my $union_of_counter = 0;
277            
278 9         89 my %allowed_data_types = ( 'xsd:simpleType' => 1, # Indicates any primitive type (abstract)
279             'xsd:string' => 1, # A string
280             'xsd:integer' => 1, # Any integer
281             'xsd:decimal' => 1, # Any real number
282             'xsd:negativeInteger' => 1, # Any negative integer
283             'xsd:positiveInteger' => 1, # Any integer > 0
284             'xsd:nonNegativeInteger' => 1, # Any integer >= 0
285             'xsd:nonPositiveInteger' => 1, # Any integer < 0
286             'xsd:boolean' => 1, # True or false
287             'xsd:date' => 1 # An XML-Schema date
288             );
289            
290 9         21 foreach my $chunk (@chunks) {
291 2152         14598 my @entry = split (/\n/, $chunk);
292 2152         3468 my $stanza = shift @entry;
293            
294 2152 100 100     10266 if ($stanza && $stanza =~ /\[Term\]/) { # treat [Term]'s
    100 100        
    100 66        
    50 33        
295            
296 2026         2172 my $term;
297             #
298             # to check we have zero or at least two intersection_of's and zero or at least two union_of's
299             #
300 2026         2363 $intersection_of_counter = 0;
301 2026         2117 $union_of_counter = 0;
302            
303 2026         2013 $file_line_number++;
304            
305 2026         2066 my $only_one_id_tag_per_entry = 0;
306 2026         2073 my $only_one_name_tag_per_entry = 0;
307            
308 2026         2974 foreach my $line (@entry) {
309 12023         14216 $file_line_number++;
310 12023 100 100     196128 if ($line =~ /^id:\s*(\S+)/) { # get the term id
    100 100        
    100 100        
    100 100        
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    100          
    50          
    100          
    100          
    100          
    100          
    50          
    50          
    100          
    50          
    50          
    50          
    100          
    50          
311 2026 50       9974 if ($line =~ /^id:$r_db_acc/) { # Does it follow the "convention"?
312 2026 50       3834 croak "The term with id '", $1, "' has a duplicated 'id' tag in the file '", $self->{OBO_FILE} if ($only_one_id_tag_per_entry);
313 2026         6053 $term = $result->get_term_by_id($1); # does this term is already in the ontology?
314 2026 100 33     4892 if (!defined $term){
    50          
315 1622         4858 $term = OBO::Core::Term->new(); # if not, create a new term
316 1622         4250 $term->id($1);
317 1622         4335 $result->add_term($term); # add it to the ontology
318 1622         3477 $only_one_id_tag_per_entry = 1;
319             } elsif (defined $term->def()->text() && $term->def()->text() ne '') {
320             # The term is already in the ontology since it has a definition! (maybe empty?)
321 0         0 croak "The term with id '", $1, "' is duplicated in the OBO file.";
322             }
323             } else {
324 0         0 carp "The term with id '", $1, "' does NOT follow the ID convention: 'IDSPACE:UNIQUE_IDENTIFIER', e.g. GO:1234567";
325             }
326             } elsif ($line =~ /^is_anonymous:$r_true_false/) {
327 2 50       14 $term->is_anonymous(($1 eq 'true')?1:0);
328             } elsif ($line =~ /^name:\s*(.*)/) {
329 2024 50       3915 carp "The term with id '", $1, "' has a duplicated 'name' tag in the file '", $self->{OBO_FILE} if ($only_one_name_tag_per_entry);
330 2024 50       5235 if (!defined $1) {
331 0         0 warn "The term with id '", $term->id(), "' has no name in file '", $self->{OBO_FILE}, "'";
332             } else {
333 2024         5777 $term->name($1);
334 2024         4064 $only_one_name_tag_per_entry = 1;
335             }
336             } elsif ($line =~ /^namespace:\s*(.*)/) {
337 14         55 $term->namespace($1); # it is a Set
338             } elsif ($line =~ /^alt_id:$r_db_acc/) {
339 9         37 $term->alt_id($1);
340             } elsif ($line =~ /^def:\s*\"(.*)\"$r_dbxref/) { # fill the definition
341 1338         3880 my $def = OBO::Core::Def->new();
342 1338         3382 $def->text($1);
343 1338         3342 $def->dbxref_set_as_string($2);
344 1338         4230 $term->def($def);
345             } elsif ($line =~ /^comment:\s*(.*)/) {
346 194         662 $term->comment($1);
347             } elsif ($line =~ /^subset:\s*(\S+)/) {
348 34         71 my $ss = $1;
349 34 50       108 if ($result->subset_def_map()->contains_key($ss)) {
350 34         96 $term->subset($ss); # it is a Set (i.e. added to a Set)
351            
352 34         122 $used_subset{$ss}++; # check subsets usage
353             } else {
354 0         0 croak "The subset '", $ss, "' is not defined in the header! Check your OBO file line '", $file_line_number, "'";
355             }
356             } elsif ($line =~ /^(exact|narrow|broad|related)_synonym:\s*\"(.*)\"$r_dbxref/) { # OBO spec 1.1
357 573         2702 $term->synonym_as_string($2, $3, uc($1));
358             } elsif ($line =~ /^synonym:\s*\"(.*)\"$syn_scope(\s+([-\w]+))?$r_dbxref/) {
359 1541 100       4552 my $scope = (defined $3)?$3:'RELATED';
360             # As of OBO flat file spec v1.2, we use:
361             # synonym: "endomitosis" EXACT []
362 1541 100       3498 if (defined $5) { # if a 'synonym type name' is given
363 5         9 my $found = 0; # check that the 'synonym type name' was defined in the header!
364 5         22 foreach my $st ($result->synonym_type_def_set()->get_set()) {
365 7 100       27 if ($st->name() eq $5) {
366 5 100       17 if (!defined $3) { # if no scope is given, use the one defined in the header!
367 1         4 my $default_scope = $st->scope();
368 1 50       5 $scope = $default_scope if (defined $default_scope);
369             }
370 5         8 $found = 1;
371 5         11 last;
372             }
373             }
374 5 50       17 croak 'The synonym type name (', $5,') used in line ', $file_line_number, " in the file '", $self->{OBO_FILE}, "' was not defined" if (!$found);
375 5         18 $used_synonym_type_def{$5}++; # check synonymtypedef usage
376             }
377 1541         4958 $term->synonym_as_string($1, $6, $scope, $5);
378            
379             } elsif ($line =~ /^xref:\s*(.*)/ || $line =~ /^xref_analog:\s*(.*)/ || $line =~ /^xref_unknown:\s*(.*)/) {
380 1167         3801 $term->xref_set_as_string($1);
381             } elsif ($line =~ /^is_a:$r_db_acc$r_comments/) { # The comment is ignored here but generated later internally
382 2149         6345 my $t_id = $term->id();
383 2149 50       6419 if ($t_id eq $1) {
384 0         0 warn "The term '", $t_id, "' has a reflexive is_a relationship, which was ignored!";
385 0         0 next;
386             }
387 2149         6424 my $rel = OBO::Core::Relationship->new();
388 2149         10430 $rel->id($t_id.'_is_a_'.$1);
389 2149         6268 $rel->type('is_a');
390 2149         5796 my $target = $result->get_term_by_id($1); # Is this term already in the ontology?
391 2149 100       4886 if (!defined $target) {
392 264         798 $target = OBO::Core::Term->new(); # if not, create a new term
393 264         757 $target->id($1);
394 264         715 $result->add_term($target);
395             }
396 2149         5752 $rel->link($term, $target);
397 2149         5945 $result->add_relationship($rel);
398             } elsif ($line =~ /^intersection_of:\s*([\w\/]+)?$r_db_acc$r_comments/) {
399             # TODO Improve the 'intersection_of' treatment
400 2         17 my $rel = OBO::Core::Relationship->new();
401 2   100     13 my $r = $1 || 'nil';
402 2         8 my $id = $term->id().'_'.$r.'_'.$2;
403 2         8 $id =~ s/\s+/_/g;
404 2         8 $rel->id($id);
405 2         8 $rel->type($r);
406 2         10 my $target = $result->get_term_by_id($2); # Is this term already in the ontology?
407 2 100       8 if (!defined $target) {
408 1         7 $target = OBO::Core::Term->new(); # if not, create a new term
409 1         4 $target->id($2);
410 1         6 $result->add_term($target);
411             }
412 2         10 $rel->head($target);
413 2         9 $term->intersection_of($rel);
414 2         9 $intersection_of_counter++;
415             } elsif ($line =~ /^union_of:$r_db_acc$r_comments/) {
416             # TODO wait until the OBO spec 1.4 be released
417 0         0 my $target = $result->get_term_by_id($1); # Is this term already in the ontology?
418 0 0       0 if (!defined $target) {
419 0         0 $target = OBO::Core::Term->new(); # if not, create a new term
420 0         0 $target->id($1);
421 0         0 $result->add_term($target);
422             }
423 0         0 $term->union_of($1);
424 0         0 $union_of_counter++;
425             } elsif ($line =~ /^disjoint_from:$r_db_acc$r_comments/) {
426 15         65 $term->disjoint_from($1); # We are assuming that the other term exists or will exist; otherwise , we have to create it like in the is_a section.
427             } elsif ($line =~ /^relationship:\s*([\w\/]+)$r_db_acc$r_comments/ || $line =~ /^relationship:\s*$r_db_acc$r_db_acc$r_comments/) {
428 898         3015 my $rel = OBO::Core::Relationship->new();
429 898         2538 my $id = $term->id().'_'.$1.'_'.$2; # TODO: I have to standarise the id's: term_id1_db:acc_term_id2
430 898         2723 $id =~ s/\s+/_/g;
431 898         2504 $rel->id($id);
432 898         2327 $rel->type($1);
433             #warn "TYPE : '", $id, "'";
434 898         2591 my $target = $result->get_term_by_id($2); # Is this term already in the ontology?
435 898 100       2096 if (!defined $target) {
436 144         456 $target = OBO::Core::Term->new(); # if not, create a new term
437 144         440 $target->id($2);
438 144         454 $result->add_term($target);
439             }
440 898         2530 $rel->link($term, $target);
441 898         2367 $result->add_relationship($rel);
442             } elsif ($line =~ /^created_by:\s*(.*)/) {
443 5         27 $term->created_by($1);
444             } elsif ($line =~ /^creation_date:\s*(.*)/) {
445 5         13 my $d = $1;
446 5         35 my $pd = ParseDate($d); # Check that the date follows the ISO 8601 format
447 5 50       132763 if (!$pd) {
448 0         0 warn "Bad date string: '", $d, "' in line ", $file_line_number, "\n";
449             # } else {
450             # my ($year, $month, $day) = UnixDate($pd, "%Y", "%m", "%d");
451             # warn "Date was $month/$day/$year\n";
452             }
453 5         36 $term->creation_date($d);
454             } elsif ($line =~ /^modified_by:\s*(.*)/) {
455 0         0 $term->modified_by($1);
456             } elsif ($line =~ /^modification_date:\s*(.*)/) {
457 0         0 my $d = $1;
458 0         0 my $pd = ParseDate($d); # Check that the date follows the ISO 8601 format
459 0 0       0 if (!$pd) {
460 0         0 warn "Bad date string: '", $d, "' in line ", $file_line_number, "\n";
461             }
462 0         0 $term->modification_date($d);
463             } elsif ($line =~ /^is_obsolete:$r_true_false/) {
464 20 50       128 $term->is_obsolete(($1 eq 'true')?1:0);
465             } elsif ($line =~ /^replaced_by:\s*(.*)/) {
466 0         0 $term->replaced_by($1);
467             } elsif ($line =~ /^consider:\s*(.*)/) {
468 0         0 $term->consider($1);
469             } elsif ($line =~ /^builtin:$r_true_false/) {
470 0 0       0 $term->builtin(($1 eq 'true')?1:0);
471             } elsif ($line =~ /^property_value:\s*(\w+)$r_db_acc/ || $line =~ /^property_value:\s*(\w+)\s+"([ \°'\#~\w:\\\+\?\{\}\$\/\(\)\[\]\.=&!%_,-]+)"$r_db_acc/) { # TODO some parts in this block might change later on...
472              
473 6         17 my $relationship_type_id = $1;
474 6         12 my $value_specifier = $2;
475 6         11 my $last_match = $3;
476              
477 6         26 my $r2_type = $result->get_relationship_type_by_id($relationship_type_id); # Is this relationship type already in the ontology?
478 6 50       17 if (!defined $r2_type){
479 6         32 $r2_type = OBO::Core::RelationshipType->new(); # if not, create a new relationship type
480 6         21 $r2_type->id($relationship_type_id);
481 6         20 $result->add_relationship_type($r2_type); # add it to the ontology
482             }
483             #
484             # create the triplet
485             #
486 6         21 my $rel = OBO::Core::Relationship->new();
487 6         20 my $id = $term->id().'_'.$relationship_type_id.'_'.$value_specifier; # term --> rel --> [term|instance|datatype]
488 6         24 $id =~ s/\s+/_/g;
489 6         21 $rel->id($id);
490 6         48 $rel->type($r2_type->id());
491              
492 6 100       28 if (!defined $last_match) {
    50          
493             #
494             # property_value: lastest_modification_by erick
495             #
496 1         5 my $target = $result->get_term_by_id($value_specifier); # suggest to OBOF to define TERMs before they are used so that parsers could know they are dealing with terms!
497            
498 1 50       5 if (defined $target) { # term --> rel --> term
499            
500             } else {
501 1         5 $target = $result->get_instance_by_id($value_specifier);
502            
503 1 50       5 if (!defined $target) { # term --> rel --> instance
504 1         5 $target = OBO::Core::Instance->new();
505 1         4 $target->id($value_specifier);
506 1         4 $result->add_instance($target);
507             }
508             }
509            
510 1         3 $rel->link($term, $target); # triplet: term --> rel --> target
511 1         4 $term->property_value($rel);
512            
513             #$result->add_relationship($rel); # TODO Do we need this? or better add $ontology->{PROPERTY_VALUES}?
514             } elsif (defined $last_match) { # term --> rel --> datatype
515             #
516             # property_value: lastest_modification_by "erick" xsd:string or e.g. shoe_size "12" xsd:positiveInteger
517             #
518 5         21 my $target = $result->get_instance_by_id($value_specifier);
519            
520 5 50       15 if (!defined $target) { # term --> rel --> datatype
521 5         28 $target = OBO::Core::Instance->new();
522 5         17 $target->id($value_specifier);
523            
524             # data type check
525 5 50       20 warn "Unrecommended XML-schema pritive (data type) found: '", $last_match, "'" unless (exists $allowed_data_types{$last_match});
526            
527 5         19 my $data_type = OBO::Core::Term->new();
528 5         16 $data_type->id($last_match);
529             #$result->add_term($data_type); # TODO Think about it...
530 5         19 $target->instance_of($data_type);
531             #$result->add_instance($target); # TODO Think about it...
532             }
533              
534 5         15 $rel->link($term, $target);
535 5         15 $term->property_value($rel);
536            
537             #$result->add_relationship($rel); # TODO Do we need this? or better add $ontology->{PROPERTY_VALUES}?
538             }
539             } elsif ($line =~ /^!/) {
540             # skip line
541             } else {
542 0         0 warn 'Unknown syntax found (and ignored) in line: ', $file_line_number, " (in file '", $self->{OBO_FILE}, "'):\n\t", $line, "\n";
543             }
544             }
545             # Check for required fields: id
546 2026 50 33     8149 if (defined $term && !defined $term->id()) {
547 0         0 croak "No ID found in term:\n", $chunk;
548             }
549 2026 50       4445 if ($intersection_of_counter == 1) { # IDEM TEST: ($intersection_of_counter != 0 && $intersection_of_counter < 2)
550 0         0 carp "Missing 'intersection_of' tag in term:\n\n", $chunk, "\n";
551             }
552 2026 50       4055 if ($union_of_counter == 1) { # IDEM TEST: ($union_of_counter != 0 && $union_of_counter < 2)
553 0         0 carp "Missing 'union_of' tag in term:\n\n", $chunk, "\n";
554             }
555 2026         5748 $file_line_number++;
556             } elsif ($stanza && $stanza =~ /\[Typedef\]/) { # treat [Typedef]
557 112         139 my $type;
558 112         147 my $only_one_name_tag_per_entry = 0;
559            
560             #
561             # to check we have zero or at least two intersection_of's and zero or at least two union_of's
562             #
563 112         134 $intersection_of_counter = 0;
564 112         123 $union_of_counter = 0;
565            
566 112         124 $file_line_number++;
567 112         184 foreach my $line (@entry) {
568 602         779 $file_line_number++;
569 602 100 100     13335 if ($line =~ /^id:\s*(.*)/) { # get the type id
    50 66        
    100          
    50          
    50          
    100          
    100          
    100          
    50          
    50          
    100          
    50          
    100          
    100          
    100          
    100          
    50          
    100          
    100          
    100          
    100          
    50          
    50          
    100          
    100          
    50          
    100          
    50          
    50          
    50          
    50          
    100          
    100          
    100          
    100          
    50          
    50          
    50          
    100          
    50          
570 112         364 $type = $result->get_relationship_type_by_id($1); # Is this relationship type already in the ontology?
571 112 100 33     325 if (!defined $type){
    50          
572 75         246 $type = OBO::Core::RelationshipType->new(); # if not, create a new type
573 75         221 $type->id($1);
574 75         228 $result->add_relationship_type($type); # add it to the ontology
575             } elsif (defined $type->def()->text() && $type->def()->text() ne '') {
576             # the type is already in the ontology since it has a definition! (not empty)
577 0         0 croak "The relationship type with id '", $1, "' is duplicated in the OBO file. Check line: '", $file_line_number, "'";
578             } else {
579             # the type already in the ontology but with an empty def, which most probably will
580             # be defined later. This case is the result of adding a relationship while parsing
581             # the Term stanzas.
582             #warn "Line: '", $line, "', Def: '", $type->def_as_string(), "'\n";
583             }
584             } elsif ($line =~ /^is_anonymous:$r_true_false/) {
585 0 0       0 $type->is_anonymous(($1 eq 'true')?1:0);
586             } elsif ($line =~ /^name:\s*(.*)/) {
587 112 50       234 croak "The typedef with id '", $1, "' has a duplicated 'name' tag in the file '", $self->{OBO_FILE}, "'. Check line: '", $file_line_number, "'" if ($only_one_name_tag_per_entry);
588 112         334 $type->name($1);
589 112         235 $only_one_name_tag_per_entry = 1;
590             } elsif ($line =~ /^namespace:\s*(.*)/) {
591 0         0 $type->namespace($1); # it is a Set
592             } elsif ($line =~ /^alt_id:\s*([:\w]+)/) {
593 0         0 $type->alt_id($1);
594             } elsif ($line =~ /^def:\s*\"(.*)\"$r_dbxref/) { # fill in the definition
595 59         183 my $def = OBO::Core::Def->new();
596 59         174 $def->text($1);
597 59         166 $def->dbxref_set_as_string($2);
598 59         208 $type->def($def);
599             } elsif ($line =~ /^comment:\s*(.*)/) {
600 36         131 $type->comment($1);
601             } elsif ($line =~ /^subset:\s*(\S+)/) {
602 1         3 my $ss = $1;
603 1 50       7 if ($result->subset_def_map()->contains_key($ss)) {
604 1         7 $type->subset($ss); # it is a Set (i.e. added to a Set)
605            
606 1         5 $used_subset{$ss}++; # check subsets usage
607             } else {
608 0         0 croak "The subset '", $ss, "' is not defined in the header! Check your OBO file relationship type in line: '", $file_line_number, "'";
609             }
610             } elsif ($line =~ /^domain:\s*(.*)/) {
611 0         0 $type->domain($1);
612             } elsif ($line =~ /^range:\s*(.*)/) {
613 0         0 $type->range($1);
614             } elsif ($line =~ /^is_anti_symmetric:$r_true_false/) {
615 13 50       69 $type->is_anti_symmetric(($1 eq 'true')?1:0);
616             } elsif ($line =~ /^is_cyclic:$r_true_false/) {
617 0 0       0 $type->is_cyclic(($1 eq 'true')?1:0);
618             } elsif ($line =~ /^is_reflexive:$r_true_false/) {
619 24 50       135 $type->is_reflexive(($1 eq 'true')?1:0);
620             } elsif ($line =~ /^is_symmetric:$r_true_false/) {
621 2 50       15 $type->is_symmetric(($1 eq 'true')?1:0);
622             } elsif ($line =~ /^is_transitive:$r_true_false/) {
623 47 50       260 $type->is_transitive(($1 eq 'true')?1:0);
624             } elsif ($line =~ /^is_a:\s*([:\w]+)$r_comments/) { # intrinsic or not??? # The comment is ignored here but generated (and sometimes fixed) later internally
625 62         139 my $r = $1;
626 62         196 my $r_id = $type->id();
627 62 50       162 if ($r_id eq $r) {
628 0         0 warn "The term '", $r_id, "' has a reflexive is_a relationship, which was ignored!";
629 0         0 next;
630             }
631 62         201 my $rel = OBO::Core::Relationship->new();
632 62         290 $rel->id($r_id.'_is_a_'.$r);
633 62         197 $rel->type('is_a');
634 62         202 my $target = $result->get_relationship_type_by_id($r); # Is this relationship type already in the ontology?
635 62 100       157 if (!defined $target) {
636 7         30 $target = OBO::Core::RelationshipType->new(); # if not, create a new relationship type
637 7         27 $target->id($r);
638 7         25 $result->add_relationship_type($target);
639             }
640 62         176 $rel->link($type, $target); # add a relationship between two relationship types
641 62         181 $result->add_relationship($rel);
642             } elsif ($line =~ /^is_metadata_tag:$r_true_false/) {
643 0 0       0 $type->is_metadata_tag(($1 eq 'true')?1:0);
644             } elsif ($line =~ /^is_class_level:$r_true_false/) {
645 1 50       11 $type->is_class_level(($1 eq 'true')?1:0);
646             } elsif ($line =~ /^(exact|narrow|broad|related)_synonym:\s*\"(.*)\"$r_dbxref/) {
647 3         25 $type->synonym_as_string($2, $3, uc($1));
648             } elsif ($line =~ /^synonym:\s*\"(.*)\"$syn_scope(\s+(\w+))?$r_dbxref/) {
649 16 50       55 my $scope = (defined $3)?$3:'RELATED';
650             # From OBO flat file spec v1.2, we use:
651             # synonym: "endomitosis" EXACT []
652 16 50       53 if (defined $5) {
653 0         0 my $found = 0; # check that the 'synonym type name' was defined in the header!
654 0         0 foreach my $st ($result->synonym_type_def_set()->get_set()) {
655             # Adapt the scope if necessary to the one defined in the header!
656 0 0       0 if ($st->name() eq $5) {
657 0         0 $found = 1;
658 0         0 my $default_scope = $st->scope();
659 0 0       0 $scope = $default_scope if (defined $default_scope);
660 0         0 last;
661             }
662             }
663 0 0       0 croak 'The synonym type name (', $5,') used in line ', $file_line_number, " in the file '", $self->{OBO_FILE}, "' was not defined" if (!$found);
664 0         0 $used_synonym_type_def{$5}++; # check synonymtypedef usage
665             }
666 16         63 $type->synonym_as_string($1, $6, $scope, $5);
667             } elsif ($line =~ /^xref:\s*(.*)/ || $line =~ /^xref_analog:\s*(.*)/ || $line =~ /^xref_unk:\s*(.*)/) {
668 74         248 $type->xref_set_as_string($1);
669             } elsif ($line =~ /^intersection_of:\s*([\w\/]+)?$r_db_acc$r_comments/) {
670             # TODO Improve the 'intersection_of' treatment
671 0         0 my $rel = OBO::Core::Relationship->new();
672 0   0     0 my $r = $1 || 'nil';
673 0         0 my $id = $type->id().'_'.$r.'_'.$2;
674 0         0 $id =~ s/\s+/_/g;
675 0         0 $rel->id($id);
676 0         0 $rel->type($r);
677 0         0 my $target = $result->get_term_by_id($2); # Is this term already in the ontology?
678 0 0       0 if (!defined $target) {
679 0         0 $target = OBO::Core::Term->new(); # if not, create a new term
680 0         0 $target->id($2);
681 0         0 $result->add_term($target);
682             }
683 0         0 $rel->head($target);
684 0         0 $type->intersection_of($rel);
685 0         0 $intersection_of_counter++;
686             } elsif ($line =~ /^union_of:\s*(.*)/) {
687             # TODO wait until the OBO spec 1.4 be released
688 0         0 my $target = $result->get_relationship_type_by_id($1); # Is this relationship type already in the ontology?
689 0 0       0 if (!defined $target) {
690 0         0 $target = OBO::Core::RelationshipType->new(); # if not, create a new relationship type
691 0         0 $target->id($1);
692 0         0 $result->add_relationship_type($target);
693             }
694 0         0 $type->union_of($1);
695 0         0 $union_of_counter++;
696             } elsif ($line =~ /^disjoint_from:\s*([:\w]+)$r_comments/) {
697 1         11 $type->disjoint_from($1); # We are assuming that the other relation type exists or will exist; otherwise , we have to create it like in the is_a section.
698             } elsif ($line =~ /^inverse_of:\s*([:\w]+)$r_comments/) { # e.g. inverse_of: has_participant ! has participant
699 13         30 my $inv_id = $1;
700 13         47 my $inv_type = $result->get_relationship_type_by_id($inv_id); # Is this INVERSE relationship type already in the ontology?
701 13 100       44 if (!defined $inv_type){
702 9         35 $inv_type = OBO::Core::RelationshipType->new(); # if not, create a new type
703 9         29 $inv_type->id($inv_id);
704             #$inv_type->name($3) if ($3); # not necessary, this name could be wrong...
705 9         39 $result->add_relationship_type($inv_type); # add it to the ontology
706             }
707 13         46 $type->inverse_of($inv_type);
708             } elsif ($line =~ /^transitive_over:\s*(.*)/) {
709 0         0 $type->transitive_over($1);
710             } elsif ($line =~ /^holds_over_chain:\s*([:\w]+)\s*([:\w]+)$r_comments/) { # R <- R1.R2
711 14         34 my $r1_id = $1;
712 14         28 my $r2_id = $2;
713 14         49 my $r1_type = $result->get_relationship_type_by_id($r1_id); # Is this relationship type already in the ontology?
714 14 100       42 if (!defined $r1_type){
715 2         8 $r1_type = OBO::Core::RelationshipType->new(); # if not, create a new type
716 2         8 $r1_type->id($r1_id);
717 2         9 $result->add_relationship_type($r1_type); # add it to the ontology
718             }
719 14         43 my $r2_type = $result->get_relationship_type_by_id($r2_id); # Is this relationship type already in the ontology?
720 14 100       40 if (!defined $r2_type){
721 1         7 $r2_type = OBO::Core::RelationshipType->new(); # if not, create a new type
722 1         7 $r2_type->id($r2_id);
723 1         5 $result->add_relationship_type($r2_type); # add it to the ontology
724             }
725 14         45 $type->holds_over_chain($r1_type->id(), $r2_type->id());
726             } elsif ($line =~ /^equivalent_to_chain:\s*(.*)/) {
727             # TODO
728             } elsif ($line =~ /^disjoint_over:\s*(.*)/) {
729             # TODO
730             } elsif ($line =~ /^is_functional:$r_true_false/) {
731 0 0       0 $type->is_functional(($1 eq 'true')?1:0);
732             } elsif ($line =~ /^is_inverse_functional:$r_true_false/) {
733 0 0       0 $type->is_inverse_functional(($1 eq 'true')?1:0);
734             } elsif ($line =~ /^created_by:\s*(.*)/) {
735 2         14 $type->created_by($1);
736             } elsif ($line =~ /^creation_date:\s*(.*)/) {
737 2         13 my $d = $1;
738 2         18 my $pd = ParseDate($d); # Check that the date follows the ISO 8601 format
739 2 50       3345 if (!$pd) {
740 0         0 warn "Bad date string: '", $d, "' in line ", $file_line_number, "\n";
741             }
742 2         14 $type->creation_date($d);
743             } elsif ($line =~ /^modified_by:\s*(.*)/) {
744 1         8 $type->modified_by($1);
745             } elsif ($line =~ /^modification_date:\s*(.*)/) {
746 1         5 my $d = $1;
747 1         14 my $pd = ParseDate($d); # Check that the date follows the ISO 8601 format
748 1 50       1327 if (!$pd) {
749 0         0 warn "Bad date string: '", $d, "' in line ", $file_line_number, "\n";
750             }
751 1         8 $type->modification_date($d);
752             } elsif ($line =~ /^is_obsolete:\s*(.*)/) {
753 0 0       0 $type->is_obsolete(($1 eq 'true')?1:0);
754             } elsif ($line =~ /^replaced_by:\s*(.*)/) {
755 0         0 $type->replaced_by($1);
756             } elsif ($line =~ /^consider:\s*(.*)/) {
757 0         0 $type->consider($1);
758             } elsif ($line =~ /^builtin:$r_true_false/) {
759 5 50       35 $type->builtin(($1 eq 'true')?1:0);
760             } elsif ($line =~ /^!/) {
761             # skip line
762             } else {
763 0         0 warn 'Unknown syntax found (and ignored) in line: ', $file_line_number, " (in file '", $self->{OBO_FILE}, "'):\n\t", $line, "\n";
764             }
765             }
766             # Check for required fields: id
767 112 50       371 if (!defined $type->id()) {
768 0         0 croak "No ID found in type:\n\n", $chunk, "\n\nfrom file '", $self->{OBO_FILE}, "'";
769             }
770 112 50       262 if ($intersection_of_counter == 1) { # IDEM TEST: ($intersection_of_counter != 0 && $intersection_of_counter < 2)
771 0         0 carp "Missing 'intersection_of' tag in relationship type:\n\n", $chunk, "\n";
772             }
773 112 50       229 if ($union_of_counter == 1) { # IDEM TEST: ($union_of_counter != 0 && $union_of_counter < 2)
774 0         0 carp "Missing 'union_of' tag in relationship type:\n\n", $chunk, "\n";
775             }
776 112         308 $file_line_number++;
777             } elsif ($stanza && $stanza =~ /\[Instance\]/) { # treat [Instance]
778 5         9 my $instance;
779            
780             #
781             # to check we have zero or at least two intersection_of's and zero or at least two union_of's
782             #
783             # TODO do INSTANCES have these tags?
784 5         10 $intersection_of_counter = 0;
785 5         7 $union_of_counter = 0;
786            
787 5         8 $file_line_number++;
788            
789 5         8 my $only_one_id_tag_per_entry = 0;
790 5         9 my $only_one_name_tag_per_entry = 0;
791            
792 5         10 foreach my $line (@entry) {
793 26         33 $file_line_number++;
794 26 100 66     1580 if ($line =~ /^id:\s*(\S+)/) { # get the instance id
    50 66        
    100 33        
    100 66        
    50          
    50          
    50          
    50          
    50          
    100          
    100          
    100          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    50          
    0          
795 5 50       83 if ($line =~ /^id:$r_db_acc/) { # Does it follow the "convention"?
796 5 50       16 croak "The instance with id '", $1, "' has a duplicated 'id' tag in the file '", $self->{OBO_FILE} if ($only_one_id_tag_per_entry);
797 5         22 $instance = $result->get_instance_by_id($1); # does this instance is already in the ontology?
798 5 100       19 if (!defined $instance){
799 3         18 $instance = OBO::Core::Instance->new(); # if not, create a new instance
800 3         12 $instance->id($1);
801 3         13 $result->add_instance($instance); # add it to the ontology
802 3         10 $only_one_id_tag_per_entry = 1;
803             #} elsif (defined $instance->def()->text() && $instance->def()->text() ne '') {
804             # TODO Do instances have a definition?
805             # # The instance is already in the ontology since it has a definition! (maybe empty?)
806             # croak "The instance with id '", $1, "' is duplicated in the OBO file.";
807             }
808             } else {
809 0         0 croak "The instance with id '", $1, "' does NOT follow the ID convention: 'IDSPACE:UNIQUE_IDENTIFIER', e.g. GO:1234567";
810             }
811             } elsif ($line =~ /^is_anonymous:$r_true_false/) {
812 0 0       0 $instance->is_anonymous(($1 eq 'true')?1:0);
813             } elsif ($line =~ /^name:\s*(.*)/) {
814 5 50       14 croak "The instance with id '", $1, "' has a duplicated 'name' tag in the file '", $self->{OBO_FILE} if ($only_one_name_tag_per_entry);
815 5 50       15 if (!defined $1) {
816 0         0 warn "The instance with id '", $instance->id(), "' has no name in file '", $self->{OBO_FILE}, "'";
817             } else {
818 5         24 $instance->name($1);
819 5         13 $only_one_name_tag_per_entry = 1;
820             }
821             } elsif ($line =~ /^namespace:\s*(.*)/) {
822 1         7 $instance->namespace($1); # it is a Set
823             } elsif ($line =~ /^alt_id:$r_db_acc/) {
824             # TODO do INSTANCES have this tag?
825 0         0 $instance->alt_id($1);
826             } elsif ($line =~ /^def:\s*\"(.*)\"$r_dbxref/) { # fill in the definition
827 0         0 my $def = OBO::Core::Def->new();
828 0         0 $def->text($1);
829 0         0 $def->dbxref_set_as_string($2);
830 0         0 $instance->def($def);
831             } elsif ($line =~ /^comment:\s*(.*)/) {
832 0         0 $instance->comment($1);
833             } elsif ($line =~ /^subset:\s*(\S+)/) {
834 0         0 my $ss = $1;
835 0 0       0 if ($result->subset_def_map()->contains_key($ss)) {
836 0         0 $instance->subset($ss); # it is a Set (i.e. added to a Set)
837            
838 0         0 $used_subset{$ss}++; # check subsets usage
839             } else {
840 0         0 croak "The subset '", $ss, "' is not defined in the header! Check your OBO file line '", $file_line_number, "'";
841             }
842             } elsif ($line =~ /^(exact|narrow|broad|related)_synonym:\s*\"(.*)\"$r_dbxref/) { # OBO spec 1.1
843 0         0 $instance->synonym_as_string($2, $3, uc($1));
844             } elsif ($line =~ /^synonym:\s*\"(.*)\"$syn_scope(\s+([-\w]+))?$r_dbxref/) {
845 1 50       6 my $scope = (defined $3)?$3:'RELATED';
846             # As of OBO flat file spec v1.2, we use:
847             # synonym: "endomitosis" EXACT []
848 1 50       6 if (defined $5) {
849 0         0 my $found = 0; # check that the 'synonym type name' was defined in the header!
850 0         0 foreach my $st ($result->synonym_type_def_set()->get_set()) {
851             # Adapt the scope if necessary to the one defined in the header!
852 0 0       0 if ($st->name() eq $5) {
853 0         0 $found = 1;
854 0         0 my $default_scope = $st->scope();
855 0 0       0 $scope = $default_scope if (defined $default_scope);
856 0         0 last;
857             }
858             }
859 0 0       0 croak 'The synonym type name (', $5,') used in line ', $file_line_number, " in the file '", $self->{OBO_FILE}, "' was not defined" if (!$found);
860 0         0 $used_synonym_type_def{$5}++; # check synonymtypedef usage
861             }
862 1         7 $instance->synonym_as_string($1, $6, $scope, $5);
863             } elsif ($line =~ /^xref:\s*(.*)/ || $line =~ /^xref_analog:\s*(.*)/ || $line =~ /^xref_unknown:\s*(.*)/) {
864 1         6 $instance->xref_set_as_string($1);
865             } elsif ($line =~ /^instance_of:$r_db_acc$r_comments/) { # The comment is ignored here but retrieved later internally
866 5         20 my $t = $result->get_term_by_id($1); # Is this instance already in the ontology?
867 5 100       17 if (!defined $t) {
868 3         13 $t = OBO::Core::Term->new(); # if not, create a new Term
869 3         12 $t->id($1);
870 3         12 $result->add_term($t);
871             }
872 5         18 $instance->instance_of($t);
873             } elsif ($line =~ /^intersection_of:\s*([\w\/]+)?$r_db_acc$r_comments/) {
874             # TODO Improve the 'intersection_of' treatment
875             # TODO do INSTANCES have this tag?
876 0         0 my $rel = OBO::Core::Relationship->new();
877 0   0     0 my $r = $1 || 'nil';
878 0         0 my $id = $instance->id().'_'.$r.'_'.$2;
879 0         0 $id =~ s/\s+/_/g;
880 0         0 $rel->id($id);
881 0         0 $rel->type($r);
882 0         0 my $target = $result->get_instance_by_id($2); # Is this instance already in the ontology?
883 0 0       0 if (!defined $target) {
884 0         0 $target = OBO::Core::Instance->new(); # if not, create a new instance
885 0         0 $target->id($2);
886 0         0 $result->add_instance($target);
887             }
888 0         0 $rel->head($target);
889 0         0 $instance->intersection_of($rel);
890 0         0 $intersection_of_counter++;
891             } elsif ($line =~ /^union_of:\s*(.*)/) {
892             # TODO wait until the OBO spec 1.4 be released
893             # TODO do INSTANCES have this tag?
894 0         0 my $target = $result->get_instance_by_id($1); # Is this instance already in the ontology?
895 0 0       0 if (!defined $target) {
896 0         0 $target = OBO::Core::Instance->new(); # if not, create a new instance
897 0         0 $target->id($1);
898 0         0 $result->add_instance($target);
899             }
900 0         0 $instance->union_of($1);
901 0         0 $union_of_counter++;
902             } elsif ($line =~ /^disjoint_from:$r_db_acc$r_comments/) {
903             # TODO do INSTANCES have this tag?
904 0         0 $instance->disjoint_from($1); # We are assuming that the other instance exists or will exist; otherwise , we have to create it like in the is_a section.
905             } elsif ($line =~ /^relationship:\s*([\w\/]+)$r_db_acc$r_comments/ || $line =~ /^relationship:\s*$r_db_acc$r_db_acc$r_comments/) {
906             # TODO do INSTANCES have this tag?
907 0         0 my $rel = OBO::Core::Relationship->new();
908 0         0 my $id = $instance->id().'_'.$1.'_'.$2; # TODO see the line (TODO) of the 'term' section
909 0         0 $id =~ s/\s+/_/g;
910 0         0 $rel->id($id);
911 0         0 $rel->type($1);
912 0         0 my $target = $result->get_instance_by_id($2); # Is this instance already in the ontology?
913 0 0       0 if (!defined $target) {
914 0         0 $target = OBO::Core::Instance->new(); # if not, create a new instance
915 0         0 $target->id($2);
916 0         0 $result->add_instance($target);
917             }
918 0         0 $rel->link($instance, $target);
919 0         0 $result->add_relationship($rel);
920             } elsif ($line =~ /^created_by:\s*(.*)/) {
921 0         0 $instance->created_by($1);
922             } elsif ($line =~ /^creation_date:\s*(.*)/) {
923 0         0 my $d = $1;
924 0         0 my $pd = ParseDate($d); # Check that the date follows the ISO 8601 format
925 0 0       0 if (!$pd) {
926 0         0 warn "Bad date string: '", $d, "' in line ", $file_line_number, "\n";
927             }
928 0         0 $instance->creation_date($d);
929             } elsif ($line =~ /^modified_by:\s*(.*)/) {
930 0         0 $instance->modified_by($1);
931             } elsif ($line =~ /^modification_date:\s*(.*)/) {
932 0         0 my $d = $1;
933 0         0 my $pd = ParseDate($d); # Check that the date follows the ISO 8601 format
934 0 0       0 if (!$pd) {
935 0         0 warn "Bad date string: '", $d, "' in line ", $file_line_number, "\n";
936             }
937 0         0 $instance->modification_date($d);
938             } elsif ($line =~ /^is_obsolete:$r_true_false/) {
939 0 0       0 $instance->is_obsolete(($1 eq 'true')?1:0);
940             } elsif ($line =~ /^replaced_by:\s*(.*)/) {
941 0         0 $instance->replaced_by($1);
942             } elsif ($line =~ /^consider:\s*(.*)/) {
943 0         0 $instance->consider($1);
944             } elsif ($line =~ /^builtin:$r_true_false/) {
945             # TODO do INSTANCES have this tag?
946 0 0       0 $instance->builtin(($1 eq 'true')?1:0);
947             } elsif ($line =~ /^property_value:\s*(\w+)$r_db_acc/ || $line =~ /^property_value:\s*(\w+)\s+"([ \°'\#~\w:\\\+\?\{\}\$\/\(\)\[\]\.=&!%_,-]+)"$r_db_acc/) { # TODO some parts in this block might change later on...
948            
949 8         18 my $relationship_type_id = $1;
950 8         18 my $value_specifier = $2;
951 8         14 my $last_match = $3;
952            
953 8         32 my $r2_type = $result->get_relationship_type_by_id($relationship_type_id); # Is this relationship type already in the ontology?
954 8 100       27 if (!defined $r2_type){
955 5         23 $r2_type = OBO::Core::RelationshipType->new(); # if not, create a new relationship type
956 5         19 $r2_type->id($relationship_type_id);
957 5         18 $result->add_relationship_type($r2_type); # add it to the ontology
958             }
959              
960             #
961             # create the triplet
962             #
963 8         27 my $rel = OBO::Core::Relationship->new();
964 8         23 my $id = $instance->id().'_'.$relationship_type_id.'_'.$value_specifier; # instance --> rel --> [term|instance|datatype]
965 8         25 $id =~ s/\s+/_/g;
966 8         25 $rel->id($id);
967 8         25 $rel->type($r2_type->id());
968            
969 8 100       29 if (!defined $last_match) {
    50          
970             #
971             # property_value: lastest_modification_by erick
972             #
973 4         16 my $target = $result->get_term_by_id($value_specifier); # suggest to OBOF to define TERMs before they are used so that parsers could know they are dealing with terms!
974            
975 4 50       12 if (defined $target) { # instance --> rel --> term
976            
977             } else {
978 4         19 $target = $result->get_instance_by_id($value_specifier);
979            
980 4 100       12 if (!defined $target) { # instance --> rel --> instance
981 3         13 $target = OBO::Core::Instance->new();
982 3         11 $target->id($value_specifier);
983 3         10 $result->add_instance($target);
984             }
985             }
986            
987 4         13 $rel->link($instance, $target); # triplet: instance --> rel --> target
988 4         15 $instance->property_value($rel);
989            
990             #$result->add_relationship($rel); # TODO Do we need this? or better add $ontology->{PROPERTY_VALUES}?
991             } elsif (defined $last_match) { # instance --> rel --> datatype
992             #
993             # property_value: lastest_modification_by "erick" xsd:string or e.g. shoe_size "12" xsd:positiveInteger
994             #
995 4         14 my $target = $result->get_instance_by_id($value_specifier);
996            
997 4 50       15 if (!defined $target) { # instance --> rel --> datatype
998 4         13 $target = OBO::Core::Instance->new();
999 4         14 $target->id($value_specifier);
1000            
1001             # data type check
1002 4 50       18 warn "Unrecommended XML-schema pritive (data type) found: '", $last_match, "'" unless (exists $allowed_data_types{$last_match});
1003            
1004 4         15 my $data_type = OBO::Core::Term->new();
1005 4         15 $data_type->id($last_match);
1006             #$result->add_term($data_type); # TODO Think about it...
1007 4         12 $target->instance_of($data_type);
1008             #$result->add_instance($target); # TODO Think about it...
1009             }
1010              
1011 4         13 $rel->link($instance, $target);
1012 4         13 $instance->property_value($rel);
1013            
1014             #$result->add_relationship($rel); # TODO Do we need this? or better add $ontology->{PROPERTY_VALUES}?
1015             }
1016             } elsif ($line =~ /^!/) {
1017             # skip line
1018             } else {
1019 0         0 warn 'Unknown syntax found (and ignored) in line: ', $file_line_number, " (in file '", $self->{OBO_FILE}, "'):\n\t", $line, "\n";
1020             }
1021             }
1022             # Check for required fields: id
1023 5 50 33     26 if (defined $instance && !defined $instance->id()) {
1024 0         0 croak "No ID found in instance:\n", $chunk;
1025             }
1026 5 50       15 if ($intersection_of_counter == 1) { # IDEM TEST: ($intersection_of_counter != 0 && $intersection_of_counter < 2)
1027             # TODO do INSTANCES have this tag?
1028 0         0 croak "Missing 'intersection_of' tag in instance:\n\n", $chunk, "\n";
1029             }
1030 5 50       13 if ($union_of_counter == 1) { # IDEM TEST: ($union_of_counter != 0 && $union_of_counter < 2)
1031 0         0 carp "Missing 'union_of' tag in instance:\n\n", $chunk, "\n";
1032             }
1033 5         16 $file_line_number++;
1034             } elsif ($stanza && $stanza =~ /\[Annotation\]/) { # treat [Annotation]
1035             # TODO "Annotations are ignored by ONTO-PERL (they might be supported in the future).";
1036             }
1037             }
1038            
1039             #
1040             # Warn (and delete) on non used subsets which were defined in the header (subsetdef's)
1041             #
1042 9         53 my @set_of_all_ss = $result->subset_def_map()->key_set()->get_set();
1043 9         49 foreach my $pss (sort @set_of_all_ss) {
1044 19 50       58 if (!$used_subset{$pss}) {
1045 0         0 $result->subset_def_map()->remove($pss);
1046 0         0 warn "Unused subset found (and removed): '", $pss, "' (in file '", $self->{OBO_FILE}, "')";
1047             }
1048             }
1049            
1050             #
1051             # Warn (and delete) on non used synonym type def's which were defined in the header (synonymtypedef's)
1052             #
1053 9         44 my @set_of_all_synonymtypedef = $result->synonym_type_def_set()->get_set();
1054 9         31 foreach my $st (@set_of_all_synonymtypedef) {
1055 5 50       32 if (!$used_synonym_type_def{$st->name()}) {
1056 0         0 $result->synonym_type_def_set()->remove($st);
1057 0         0 warn "Unused synonym type def found (and removed): '", $st->name(), "' (in file '", $self->{OBO_FILE}, "')";
1058             }
1059             }
1060            
1061             #
1062             # Work-around for some ontologies like GO: Explicitly add the implicit 'is_a' if missing
1063             #
1064 9 50       43 if (!$result->has_relationship_type_id('is_a')){
1065 0         0 my $type = OBO::Core::RelationshipType->new(); # if not, create a new type
1066 0         0 $type->id('is_a');
1067 0         0 $type->name('is_a');
1068 0         0 $result->add_relationship_type($type);
1069             }
1070            
1071 9         45 $/ = "\n";
1072              
1073 9         642 return $result;
1074             } else { # if no header (chunk[0])
1075 0           carp "The OBO file '", $self->{OBO_FILE},"' does not have a correct header, please verify it.";
1076             }
1077             }
1078              
1079             1;
1080              
1081             __END__