File Coverage

blib/lib/TM.pm
Criterion Covered Total %
statement 621 862 72.0
branch 232 364 63.7
condition 55 132 41.6
subroutine 67 80 83.7
pod 49 54 90.7
total 1024 1492 68.6


line stmt bran cond sub pod time code
1             package TM;
2              
3 36     36   743679 use strict;
  36         86  
  36         1581  
4 36     36   192 use warnings;
  36         75  
  36         1569  
5              
6             require Exporter;
7 36     36   200 use base qw(Exporter);
  36         93  
  36         9998  
8              
9             our $VERSION = '1.56';
10              
11 36     36   45425 use Data::Dumper;
  36         169976  
  36         4334  
12             # !!! HACK to suppress an annoying warning about Data::Dumper's VERSION not being numerical
13             $Data::Dumper::VERSION = '2.12108';
14             # !!! END of HACK
15              
16 36     36   52680 use Class::Struct;
  36         95378  
  36         281  
17 36     36   59177 use Time::HiRes;
  36         80133  
  36         272  
18 36     36   55073 use TM::PSI;
  36         98  
  36         1387  
19              
20 36     36   60834 use Log::Log4perl;
  36         6078367  
  36         303  
21             Log::Log4perl::init( \ q(
22              
23             log4perl.rootLogger=DEBUG, Screen
24              
25             log4perl.appender.Screen=Log::Log4perl::Appender::Screen
26             log4perl.appender.Screen.layout=Log::Log4perl::Layout::PatternLayout
27             log4perl.appender.Screen.layout.ConversionPattern=[%r] %F %L %c - %m%n
28              
29             #log4perl.rootLogger=DEBUG, LOGFILE
30              
31             #log4perl.appender.LOGFILE=Log::Log4perl::Appender::File
32             #log4perl.appender.LOGFILE.filename=/tmp/tm.log
33             #log4perl.appender.LOGFILE.mode=append
34              
35             #log4perl.appender.LOGFILE.layout=PatternLayout
36             #log4perl.appender.LOGFILE.layout.ConversionPattern=[%r] %F %L %c - %m%n
37             ) );
38              
39             our $log = Log::Log4perl->get_logger("TM");
40              
41             our $infrastructure; # default set = core + topicmaps_inc + astma_inc
42              
43             =pod
44              
45             =head1 NAME
46              
47             TM - Topic Maps, Base Class
48              
49             =head1 SYNOPSIS
50              
51             my $tm = new TM (baseuri => 'tm://whatever/'); # empty map
52              
53             # add a toplet (= minimal topic, only identification, no characteristics)
54             # by specifying an internal ID
55             $tm->internalize ('aaa'); # only internal identifier
56             $tm->internalize ('bbb' => 'http://bbb/'); # with a subject address
57             $tm->internalize ('ccc' => \ 'http://ccc/'); # with a subject indicator
58              
59             # without specifying an internal ID (will be auto-generated)
60             $tm->internalize (undef => 'http://ccc/'); # with a subject address
61             $tm->internalize (undef => \ 'http://ccc/'); # with a subject indicator
62              
63             # get rid of toplet(s)
64             $tm->externalize ('tm://whatever/aaa', ...);
65              
66             # find full URI of a toplet
67             my $tid = $tm->tids ('person'); # returns tm://whatever/person
68             my @tids = $tm->tids ('person', ...) # for a whole list
69              
70             my $tid = $tm->tids ( 'http://bbb/'); # with subject address
71             my $tid = $tm->tids (\ 'http://ccc/'); # with subject indicator
72              
73             my @ts = $tm->toplets; # get all toplets
74             my @ts = $tm->toplets (\ '+all -infrastructure'); # only those you added
75              
76             my @as = $tm->asserts (\ '+all -infrastructure'); # only those you added
77              
78             my @as = $tm->retrieve; # all assertions
79             my $a = $tm->retrieve ('23ac4637....345'); # returns only that one assertion
80             my @as = $tm->retrieve ('23ac4637....345', '...'); # returns all these assertions
81              
82             # create standalone assertion
83             my $a = Assertion->new (type => 'is-subclass-of',
84             roles => [ 'subclass', 'superclass' ],
85             players => [ 'rumsti', 'ramsti' ]);
86             $tm->assert ($a); # add that to map
87              
88             # create a name
89             my $n = Assertion->new (kind => TM->NAME,
90             type => 'name',
91             scope => 'us',
92             roles => [ 'thing', 'value' ],
93             players => [ 'rumsti', new TM::Literal ('AAA') ])
94             # create an occurrence
95             my $o = Assertion->new (kind => TM->OCC,
96             type => 'occurrence',
97             scope => 'us',
98             roles => [ 'thing', 'value' ],
99             players => [ 'rumsti', new TM::Literal ('http://whatever/') ])
100              
101             $tm->assert ($n, $o); # throw them in
102              
103             $tm->retract ($a->[TM->LID], ...); # get rid of assertion(s)
104              
105             my @as = $tm->retrieve ('id..of...assertion'); # extract particular assertions
106              
107             # find particular assertions
108             # generic search patterns
109             my @as = $tm->match_forall (scope => 'tm://whatever/sss');
110              
111             my @bs = $tm->match_forall (type => 'tm://whatever/ttt',
112             roles => [ 'tm://whatever/aaa', 'tm://whatever/bbb' ]);
113              
114             # specialized search patterns (see TM::Axes)
115             my @cs = $tm->match_forall (type => 'is-subclass-of',
116             arole => 'superclass',
117             aplayer => 'tm://whatever/rumsti',
118             brole => 'subclass');
119              
120             my @ds = $tm->match_forall (type => 'isa',
121             class => 'tm://whatever/person');
122              
123             # perform merging, cleanup, etc.
124             $tm->consolidate;
125              
126             # check internal consistency of the data structure
127             die "panic" if $tm->insane;
128              
129             # taxonomy stuff
130             warn "what a subtle joke" if $tm->is_a ($tm->tids ('gw_bush', 'moron'));
131              
132             die "what a subtle joke"
133             unless $tm->is_subclass ($tm->tids ('politician', 'moron'));
134              
135             # returns Mr. Spock if Volcans are subclassing Aliens
136             warn "my best friends: ". Dumper [ $tm->instancesT ($tm->tids ('alien')) ];
137              
138              
139             =head1 ABSTRACT
140              
141             This class provides read/write access to a data structure according to the Topic Maps paradigm. As
142             it stands, this class implements directly so-called I maps, i.e. those maps which
143             completely reside in memory. Implementations for non-materialized maps can be derived from it.
144              
145             =head1 DESCRIPTION
146              
147             This class implements directly so-called I topic maps, i.e. those maps which
148             completely reside in memory. Non-materialized and non-materializable maps can be implemented by
149             deriving from this class by overloading one or all of the sub-interfaces. If this is done cleverly,
150             then any application, even a TMQL query processor can operate on non-materialized (virtual) maps in
151             the same way as on materialized ones.
152              
153             =head2 Data Structures
154              
155             The Topic Maps paradigm knows two abstractions
156              
157             =over
158              
159             =item I, Topic Maps Data Model
160              
161             L
162              
163             =item I, Topic Maps Reference Model
164              
165             L
166              
167             =back
168              
169             For historical reasons, this package adopts an abstraction which is in between these
170             two. Accordingly, there are only following types of data structures
171              
172             =over
173              
174             =item Toplets:
175              
176             These are like TMDM topics, but only contain addressing information (subject identifiers and subject
177             addresses) along with an internal identifier.
178              
179             =item Assertions:
180              
181             These are like TMDM associations, but are generalized to host also occurrences and names. Also
182             associations using predefined association types, such as C (I) and C
183             (I) are represented as assertions.
184              
185             =item Variants:
186              
187             No idea what they are good for. They can be probably safely ignored.
188              
189             =back
190              
191             The data manipulation interface is very low-level and B exposes internal data structures.
192             As long as you do not mess with the information you get and you follow the API rules, this can
193             provide a convenient, fast, albeit not overly comfortable interface. If you prefer more a TMDM-like
194             style of accessing a map then have a look at L.
195              
196              
197             =head2 Identifiers
198              
199             Of course, L supports the subject locator and the subject indicator mechanism as mandated
200             by the Topic Maps standards.
201              
202             Additionally, this package also uses I identifiers to address everything which looks and
203             smells like a topic, also associations, names and occurrences. For topics the application (or
204             author) of the topic map will most likely provide these internal identifiers. For the others the
205             identifiers are generated.
206              
207             Since v1.31 this package distinguishes between 3 kinds of internal identifiers:
208              
209             =over
210              
211             =item I toplet identifiers
212              
213             These identifiers are always interpreted local to a map, in that the C of the map is used
214             as prefix. So, a local identifier
215              
216             chinese-working-conditions
217              
218             will become
219              
220             tm://nirvana/chinese-working-conditions
221              
222             if the base URI of the map were
223              
224             tm://nirvana/
225              
226             So if you want to use identifiers such as these, then you should either use the absolut version
227             (including the base URI) or use the method C to find the absolute version.
228              
229             =item I toplet identifiers
230              
231             All toplets from the infrastructure are declared I, i.e. untouchable. Examples are
232             C, C or C (universal scope).
233              
234             These identifiers are always the same in all maps this package system manages. That implies that if
235             you use such an identifier, then you cannot attach a local meaning to it. And it implies that at
236             merging time, toplets with these identifiers will merge. Even if there were no subject indicators or
237             addresses involved.
238              
239             It is probably a good idea to leave such toplets alone as the software is relying on the stability
240             of the sacrosanct identifiers.
241              
242             =item assertion identifiers
243              
244             Each assertion also has an (internal) identifier. It is a function from the content, so it
245             is characteristic for the assertion.
246              
247             =back
248              
249             =head2 Consistency
250              
251             An application using a map may expect that a map is I, i.e. that the following
252             consistency conditions are met:
253              
254             =over
255              
256             =item B (fixed on)
257              
258             Every identifier appearing in some assertion as type, scope, role or player is also registered as
259             toplet.
260              
261             =item B (default: on)
262              
263             Two (or more) toplets sharing the same I are treated as one toplet.
264              
265             =item B (default: on)
266              
267             Two (or more) toplets sharing the same I are treated as one toplet.
268              
269             =item B (default: off)
270              
271             Two (or more) toplet sharing the same name in the same scope are treated as one toplet.
272              
273             =back
274              
275             =cut
276              
277             use constant {
278 36         216725 Subject_based_Merging => 1,
279             Indicator_based_Merging => 2,
280             TNC_based_Merging => 3,
281 36     36   6968 };
  36         98  
282              
283             =pod
284              
285             While A1 is related with the internal consistency of the data structure (see C), the others
286             are a choice the application can make (see C).
287              
288             I is not automatically provided when a map is modified by the application. It is the
289             applications responsibility to trigger the process to consolidate the map. As that may be
290             potentially expensive, the control remains at the application.
291              
292             When an IO driver is consuming a map from a resource, say, loading from an XTM file, then that
293             driver will ensure that the map is consolidated according to the current settings before it hands it
294             to the application. The application is then in full control of the map as it can change, add and
295             delete toplets and assertions. The map can become unconsolidated in this process. The method
296             C reinstates consistency again.
297              
298             You can change these defaults by (a) providing an additional option to the constructor
299              
300             new TM (....,
301             consistency => [ TM->Subject_based_Merging,
302             TM->Indicator_based_Merging ]);
303              
304             or (b) by later using the accessor C (see below).
305              
306             =head1 MAP INTERFACE
307              
308             =head2 Constructor
309              
310             I<$tm> = new TM (...)
311              
312             The constructor will create an empty map, or, to be more exact, it will fill the map with the
313             taxonomy from L which covers basic Topic Maps concepts such as I or I.
314              
315             The constructor understands a number of key/value pair parameters:
316              
317             =over
318              
319             =item C (default: C)
320              
321             Every toplet in the map has an unique local identifier (e.g. C). The C parameter
322             controls how an absolute URI is built from this identifier.
323              
324             =item C (default: [ Subject_based_Merging, Indicator_based_Merging ])
325              
326             This controls the consistency settings. They can be changed later with the C method.
327              
328             =back
329              
330             =cut
331              
332             sub new {
333 751     751 0 369333 my $class = shift;
334 751         2774 my %self = @_;
335              
336 751   100     5136 $self{consistency} ||= [ Subject_based_Merging, Indicator_based_Merging ];
337 751   100     2379 $self{baseuri} ||= 'tm://nirvana/';
338 751 100       3941 $self{baseuri} .= '#' unless $self{baseuri} =~ m|[/\#:]$|;
339              
340 751         2695 my $self = bless \%self, $class;
341              
342 751 50       2835 unless ($self->{mid2iid}) { # we need to do fast cloning of basic vocabulary
343 751         1253 %{ $self->{mid2iid} } = %{ $infrastructure->{mid2iid} }; # shallow clone
  751         13521  
  751         10417  
344 751         2718 %{ $self->{assertions} } = %{ $infrastructure->{assertions} }; # shallow clone
  751         5245  
  751         9986  
345             }
346 751         2666 $self->{last_mod} = 0; # book keeping
347 751         3287 $self->{created} = Time::HiRes::time;
348              
349 751         3040 return $self;
350             }
351              
352 0     0   0 sub DESTROY {} # not much to do here
353              
354             =pod
355              
356             =head2 Methods
357              
358             =over
359              
360             =item B
361              
362             I<$bu> = I<$tm>->baseuri
363              
364             This methods retrieves the base URI component of the map. This is a read-only method. The base URI
365             is B defined.
366              
367             =cut
368              
369             sub baseuri {
370 572     572 1 10895 my $self = shift;
371 572         3360 return $self->{baseuri};
372             }
373              
374             =pod
375              
376             =item B
377              
378             I<@merging_constraints> = I<$tm>->consistency
379              
380             I<$tm>->consistency (I<@list_of_consistency_constants>)
381              
382             This method provides read/write access to the consistency settings.
383              
384             If no parameters are provided, then the current list of consistency settings is returned. If
385             parameters are provided, that list must consist of the constants defined under L.
386              
387             B: Changing the consistency does B automatically trigger C.
388              
389             =cut
390              
391             sub consistency {
392 4     4 1 559 my $self = shift;
393 4         6 my @params = @_;
394              
395 4 100       11 $self->{consistency} = [ @params ] if @params;
396 4         6 return @{$self->{consistency}};
  4         33  
397             }
398              
399             =pod
400              
401             =item B
402              
403             Returns the L date of last time the map has been modified (content-wise).
404              
405             =cut
406              
407             sub last_mod {
408 0     0 1 0 my $self = shift;
409 0         0 return $self->{last_mod};
410             }
411              
412             =pod
413              
414             =item B
415              
416             I<$tm>->consolidate
417              
418             I<$tm>->consolidate (I<@list_of_consistency_constants>)
419              
420             This method I a map by performing the following actions:
421              
422             =over
423              
424             =item *
425              
426             perform merging based on subject address (see TMDM section 5.3.2)
427              
428             =item *
429              
430             perform merging based on subject indicators (see TMDM section 5.3.2)
431              
432             =item *
433              
434             remove all superfluous toplets (those which do not take part in any assertion)
435              
436             B: Not implemented yet!
437              
438             =back
439              
440             This method will normally use the map's consistency settings. These settings can be overridden by
441             adding consistency settings as parameters (see L). In that case the map's settings are
442             B modified, so use this carefully.
443              
444             B: In all cases the map will be modified.
445              
446             B: After merging some of the I might not be reliably point to a topic.
447              
448             =cut
449              
450             # NOTE: Below there much is done regarding speed. First the toplets are swept detecting which have
451             # to be merged. This is not done immediately (as this is an expensive operation), instead a 'merger' hash
452             # is built. Note how merging information A -> B and A -> C is morphed into A -> B and B -> C using
453             # the _find_free function.
454              
455             # That merger hash is then consolidated by following edges until their end, so that there are no
456             # cycles.
457              
458             sub consolidate {
459 224     224 1 115491 my $self = shift;
460 224 50       1271 my $cons = @_ ? [ @_ ] : $self->{consistency}; # override
461 224         399 my $indi = grep ($_ == Indicator_based_Merging, @{$self->{consistency}});
  224         1009  
462 224         442 my $subj = grep ($_ == Subject_based_Merging, @{$self->{consistency}});
  224         1005  
463 224         350 my $tnc = grep ($_ == TNC_based_Merging, @{$self->{consistency}});
  224         1209  
464              
465             #warn "cond indi $indi subj $subj tnc $tnc";
466              
467 224         442 my %SIDs; # holds subject addresses found
468             my %SINs; # holds subject indicators found
469 0         0 my %BNs; # holds basename + scope found
470              
471             #warn Dumper $cons;
472              
473             #== find merging points and memorize this in mergers =======================================================================
474 0         0 my %mergers; # will contain the merging edges
475 224         523 my $mid2iid = $self->{mid2iid}; # shortcut
476 224         415 my $asserts = $self->{assertions}; # shortcut
477 224         454 my $baseuri = $self->{baseuri}; # shortcut
478              
479 224         3514 MERGE:
480 224         430 foreach my $this (keys %{$mid2iid}) {
481             #warn "looking at $this";
482 8862         15262 my $thism = $mid2iid->{$this};
483             #warn "SIDs: ". Dumper \%SIDs;
484             #warn "SINs: ". Dumper \%SINs;
485             #-- based on subject indication ------------------------------------------------------------------------------------------
486 8862 100       17952 if ($indi) {
487 2262         11179 foreach my $sin (@{$thism->[TM->INDICATORS]}) { # walk over the subject indicators
  2262         9725  
488 2447 100       7493 if (my $that = $SINs{$sin}) { # $that is now a key pointing to a merging partner
489             #warn "merging (IND) $this >> $that"; #. Dumper $thism, $thatm;
490 263         814 _add_merge (\%mergers, $baseuri, $this, $that);
491              
492             } else { # no merging, so enter the sins
493 2184         10365 $SINs{$sin} = $this;
494             }
495             }
496             }
497              
498             sub _add_merge {
499 557     557   933 my $mergers = shift;
500 557         797 my $bu = shift;
501 557         781 my $this = shift;
502 557         730 my $that = shift;
503              
504 557 100       4734 ($this, $that) = ($that, $this) if $this =~ /^$bu/; # we swap them to favor that which resembles the baseURI
505 557         1489 $mergers->{_find_free ($this, $mergers)} = $that; # find a free place to make that mapping
506             }
507              
508             sub _find_free {
509 557     557   797 my $this = shift;
510 557         1099 my $mergers = shift;
511            
512 557         823 my $this2 = $this;
513 557         999 my $this3;
514 557         2122 while ($this3 = $mergers->{$this2}) {
515 135 50 33     844 if ($this3 eq $this || $this3 eq $this2) { # loop, we do not need it
516 0         0 return $this3;
517             } else {
518 135         428 $this2 = $this3; # we follow the trail
519             }
520             }
521 557         3056 return $this2; # this2 was the end of the trail
522             }
523              
524             #-- based on subject address ---------------------------------------------------------------------------------------------
525 8862 100       29056 if ($subj) {
526 3198 100       11420 if (my $sid = $thism->[TM->ADDRESS]) {
527 110 100       311 if (my $that = $SIDs{$sid}) { # found partner => should be merged
528             #warn "merging (ADDR) $this >> $that";
529 30         120 _add_merge (\%mergers, $baseuri, $this, $that);
530             ###### old $mergers{_find_free ($this, \%mergers)} = $that;
531             # must obviously both have the same subject address, so, no reason to touch this
532             } else { # there is no partner, first one with this subject address
533 80         285 $SIDs{$sid} = $this;
534             }
535             }
536             }
537             #warn "after 1 on '$this' ";#.Dumper $mid2iid;
538             }
539             #-- based on TNC ---------------------------------------------------------------------------------------------
540 224 100       1634 if ($tnc) {
541 144         650 my ($THING, $VALUE) = ('thing', 'value');
542 144         788 foreach my $a (values %$asserts) {
543 3192 100       15935 next unless $a->[TM->KIND] == TM->NAME; # we are only interested in basenames
544             #warn "checking assertion ".Dumper $a;
545 864         2173 my ($v) = get_x_players ($self, $a, $VALUE); # if we get back a longer list, bad luck
546 864         4167 my $bn_plus_scope = $v->[0] . # the basename is a string reference
547             $a->[TM->SCOPE]; # relative to the scope
548 864         1894 my ($this) = get_x_players ($self, $a, $THING); # thing which plays 'topic'
549             #warn " --> player is $this";
550 864 100       2651 if (my $that = $BNs{$bn_plus_scope}) { # if we have seen it before
551             #warn " -> SEEN";
552 264         970 _add_merge (\%mergers, $baseuri, $this, $that);
553             #### old $mergers{_find_free ($this, \%mergers)} = $that;
554             } else { # it is new to use, we store it into %BNs
555             #warn " -> NOT SEEN";
556 600         3681 $BNs{$bn_plus_scope} = $this;
557             #warn "BNs ".Dumper \%BNs;
558             }
559             }
560             }
561             #== consolidate mergers: no cycles, trail followed through ======================================================
562             #warn "mergers ".Dumper \%mergers;
563              
564 224         1253 for (2..2) { # at most 2, theoretical only one should be sufficient
565 224         408 my $changes = 0;
566 224         834 foreach my $h (keys %mergers) {
567             #warn "working on $h";
568 557 100 66     2757 if ($mergers{$h} eq $h) { # micro loop
    50          
569 55         186 delete $mergers{$h};
570             } elsif (defined $mergers{$mergers{$h}} && $mergers{$mergers{$h}} eq $h) {
571 0         0 delete $mergers{$h};
572             } else {
573 502         951 my $h2 = $mergers{$h};
574 502         2351 my %seen = ($h => 1, $h2 => 1); # loop avoidance
575             #warn "seeen start".Dumper \%seen;
576 502   100     3151 while ($mergers{$h2} and !$seen{$mergers{$h2}}++) { $h2 = $mergers{$h} = $mergers{$h2}; $changes++;}
  98         219  
  98         549  
577             #warn "half consolidated (chagens $changes)" .Dumper $H;
578             }
579             }
580             # warn "consoli loop $_: changes: $changes";
581             # warn "early finish" if $_ == 1 and $changes == 0;
582 224 100       973 last if $changes == 0;
583             # die "not clean" if $_ == 2 and $changes > 0;
584             }
585              
586             #warn "consolidated mergers ".Dumper \%mergers;
587              
588              
589             #== actual merging ========================================================================================
590              
591             # recanonicalize affected assertions
592             {
593 224         353 my $changed = _relabel (\%mergers, $self->baseuri, values %$asserts );
  224         1764  
594 224         1574 while (my ($k, $a) = each %$changed) {
595 1374         2552 delete $asserts->{ $k };
596             # delete $mid2iid->{ $k };
597             # $mid2iid->{ $a->[TM->LID] } = [ $a->[TM->LID], undef, [] ];
598 1374         47356 $asserts->{ $a->[TM->LID] } = $a;
599             }
600             }
601              
602 224         721 foreach my $that (keys %mergers) {
603 502         1243 my $this = $mergers{$that};
604 502         985 my $thism = $mid2iid->{$this};
605 502         1055 my $thatm = $mid2iid->{$that}; # shorthand
606 502 50       1614 next if $thatm == $thism; # we already have merged
607              
608 502 50 100     4006 $log->logdie ("two different subject addresses for two topics to be merged ($this, $that)")
      66        
609             if $thism->[TM->ADDRESS] and $thatm->[TM->ADDRESS] and
610             $thism->[TM->ADDRESS] ne $thatm->[TM->ADDRESS];
611              
612             #warn "merge now $that > $this";
613 502   100     2895 $thism->[TM->ADDRESS] ||= $thatm->[TM->ADDRESS]; # first subject address
614             { # then indicators
615 502         851 my $Is = $thism->[TM->INDICATORS]; # reference to thism indicators
  502         1570  
616 502         663 push @$Is, @{$thatm->[TM->INDICATORS]}; # add the others to it
  502         2106  
617 502         664 { my %X; map { $X{$_}++ } @$Is; @$Is = keys %X; } # make that unique
  502         652  
  502         2983  
  921         2060  
  502         2968  
618             }
619 502         2336 $mid2iid->{$that} = $thism; # finally
620             }
621             #warn "after post-merger ". Dumper $mid2iid;
622              
623 224         707 $self->{mid2iid} = $mid2iid; # this makes tie happy, in the case the map is tied
624 224         4990 $self->{last_mod} = Time::HiRes::time;
625             }
626              
627             =pod
628              
629             =item B
630              
631             I<$tm>->clear
632              
633             This method removes all toplets and assertions (except the infrastructure). Everything else remains.
634              
635             =cut
636              
637             sub clear {
638 1     1 1 600 my $self = shift;
639              
640 1         3 my %mid2iid = %{ $infrastructure->{mid2iid} }; # shallow clone
  1         19  
641 1         4 my %assertions = %{ $infrastructure->{assertions} }; # shallow clone
  1         7  
642              
643 1         4 $self->{mid2iid} = \%mid2iid; # making it explicit keeps MLDBM happy
644 1         7 $self->{assertions} = \%assertions; # ditto
645 1         6 $self->{last_mod} = Time::HiRes::time; # book keeping
646 1         3 return $self; # convenience for chaining
647             }
648              
649             =pod
650              
651             =item B
652              
653             I<$tm>->add (I<$tm2>, ...)
654              
655             This method accepts a list of L objects and adds all content from these maps to the current
656             object.
657              
658             B: There is B merging done for user-supplied toplets. Use explicitly method C
659             for it. Merging is done for all sacrosanct toplets, i.e. those from the infrastructure.
660              
661             From v1.31 onwards this method tries to favour the I identifiers (LIDs) of B map
662             over LIDs of the added maps. This means, firstly, that internal identifiers of B map are
663             B touched (or re-generated) in any way and that any shorthands (without a baseuri prefix) will
664             remain valid when using C. Secondly, LIDs in the added map will be attempted to blend into
665             B map by changing simply their prefix. If that newly generated LID is already taken by
666             something in B map, then the original LID will be used. That allows many added LIDs be used
667             together with C without (much) change in code. Of course, the only reliable way to reach a
668             topic is a subject locator or an indicator. This is all about convenience.
669              
670             B: This procedure implies that some assertions are recomputed, so that also their LID will
671             change!
672              
673              
674             =cut
675              
676             sub add {
677 0     0 1 0 my $self = shift;
678 0         0 my $baseuri = $self->{baseuri};
679 0         0 my $mid2iid = $self->{mid2iid}; # shorthand
680 0         0 my $asserts = $self->{assertions};
681              
682 0         0 foreach (@_) { # deal with one store after the other
683 0         0 my $baseuri2 = $_->{baseuri};
684              
685 0         0 my %changes; # will contain old -> new internal identifier mappings
686 0         0 while (my ($k, $v) = each %{$_->{mid2iid}}) {
  0         0  
687              
688 0 0       0 if ($infrastructure->{mid2iid}->{$k}) { # infrastructure toplets are sacrosanct
689             } else {
690 0         0 (my $k2 = $k) =~ s/^$baseuri2/$baseuri/; # replace baseuri2 prefix
691              
692 0 0       0 $k2 = $k if $mid2iid->{$k2}; # if there is a collision, bounce back to original
693 0         0 $k2 .= '1' while $mid2iid->{$k2}; # while there is still a collision ... (this only in case of same baseuris)
694             # $k2 = $baseuri.sprintf ("uuid-%010d", $TM::toplet_ctr++)
695             # if $mid2iid->{$k2}; # if there is a collision, create generic one
696              
697 0         0 $changes{$k} = $k2;
698 0         0 $v->[TM->LID] = $k2; # use that key as canonical one
699 0         0 $mid2iid->{$k2} = $v; # ...add what the other has
700             }
701             }
702             #warn Dumper \%changes;
703 0         0 my $changed = _relabel (\%changes, $baseuri, values %{ $_->{assertions} } );
  0         0  
704             #warn Dumper $changed;
705 0         0 while (my ($k, $a) = each %$changed) {
706             # delete $mid2iid->{ $k };
707             # $mid2iid->{ $a->[TM->LID] } = [ $a->[TM->LID], undef, [] ]; # put the new one in here
708 0         0 $asserts->{ $a->[TM->LID] } = $a; # and also in the assertions part
709             }
710             }
711 0         0 $self->{mid2iid} = $mid2iid; # make MLDBM happy
712 0         0 $self->{assertions} = $asserts; # ditto
713 0         0 $self->{last_mod} = Time::HiRes::time;
714             }
715              
716              
717             sub _relabel {
718 224     224   428 my $changes = shift;
719 224         423 my $baseuri = shift;
720              
721 224         413 my %changed; # we record here old LID -> newly relabelled assertion
722 224         1008 foreach my $a (@_) {
723 5486         8480 my ($this, $that);
724             #warn "working on ".Dumper $a;
725 5486 50 33     21381 $a->[TM->SCOPE] = $that if $that = $changes->{ $a->[TM->SCOPE] }; $this ||= $that;
  5486         22415  
726 5486 50 33     19538 $a->[TM->TYPE] = $that if $that = $changes->{ $a->[TM->TYPE] }; $this ||= $that;
  5486         26902  
727            
728 5486 50       6408 map { $_ = $this = $that if $that = $changes->{ $_ } } @{ $a->[TM->ROLES] };
  10972         32242  
  5486         17541  
729 5486 100       7381 map { $_ = $this = $that if $that = $changes->{ $_ } } @{ $a->[TM->PLAYERS] };
  10972         36904  
  5486         16823  
730             #warn "$this for ".Dumper $a;
731 5486 100       30171 $changed{ $a->[TM->LID] } = $a if $this; # something has changed
732              
733 5486         13094 $a->[TM->CANON] = 0; canonicalize (undef, $a);
  5486         22056  
734 5486         18492 $a->[TM->LID] = mklabel ($a);
735            
736             }
737 224         1048 return \%changed;
738             }
739              
740             =pod
741              
742             =item B
743              
744             I<$diff> = I<$new_tm>->diff (I<$old_tm>)
745              
746             I<$diff> = TM::diff (I<$new_tm>, I<$old_tm>)
747              
748             I<$diff> = TM::diff (I<$new_tm>, I<$old_tm>,
749             {consistency => \ @list_of_consistency_consts,
750             include_changes => 1})
751              
752             C compares two topic maps and returns their differences as a hash reference. While it works on
753             any two maps, it is most useful after one map (the I) is modified into a I.
754              
755             If C is used in OO-style, the current map is interpreted as the I map and the map in the
756             arguments as I.
757              
758             By default, the toplet and assertion identifiers for any changes are returned; the option
759             C causes the return of the actual toplets and assertions themselves. This option
760             makes C's output more self-contained: enabled, one can fully (re)create the new map from the
761             old one using the diff (or vice versa).
762              
763             The C option uses the same format as the TM constructor (see L) and
764             describes how corresponding toplets in the two maps are to be identified. Toplets with the same
765             internal ids are always considered equal. If I is active, toplets with
766             the same I are considered equal (overriding the topic identities). If I
767             based consistency> is active, toplets with a matching I are considered equal
768             (overriding the previous identities).
769              
770             B: This overriding of previous conditions for identity is necessary to keep the equality
771             relationship unique and one-to-one. As an example, consider the following scenario: a toplet I
772             in the old map is split into multiple new toplets I and I in the new map. If I had a
773             locator or identifier that is moved to I (and if consistency options were active), then the
774             identity detector will consider I to be equal to I, and B I in the new map to
775             correspond to I in the old map. However, this will never lead to loss of information: I in
776             the new map is flagged as completely new toplet.
777              
778             The differences between old and new map are returned underneath the keys I, I,
779             I and I. If C is on, the extra keys I,
780             I and I are populated. The values of all these keys are hash references
781             themselves.
782              
783             =over
784              
785             =item I, I
786              
787             The C and C hashes list new or removed toplets, respectively (with their identifiers as
788             keys). For each toplet, the value of the hash is an array of associated assertion ids. The array is
789             empty but defined if there are no associated assertions.
790              
791             For toplets the attached assertions are the usual ones (names, occurrences) and class-instance
792             relationships (attached to the instance toplet).
793              
794             For associations, the assertions are attached to the I toplet.
795              
796             =item I
797              
798             This hash consists of the non-trivial toplet identities that were found. If neither Subject- nor
799             Indicator-based merging is active, then this hash is empty. Otherwise, the keys are toplet
800             identifiers in the old map, with the corresponding topic identifier in the new map as value. This
801             includes standalone topics as well as assertions and associations that were renamed due to
802             changed player or role identities.
803              
804             =item I
805              
806             The I hash contains the changes for matched toplets. The key is the toplet identifier in
807             the old map (which is potentially different from the one in the new map; see the note about
808             identities above). The value is a hash with three keys: I, I and I. The
809             value for the C key is defined if and only if the toplet associated with this toplet has
810             changed (i.e. Subject Locator or Indicators have changed). The values for the C and C
811             keys are arrays with the new or removed assertions that are attached to this toplet. These arrays are
812             defined but empty where no applicable information is present.
813              
814             =item I, I
815              
816             These hashes hold the actual new or removed toplets if the option C is active.
817             Keys are the toplet ids, values are references to the actual toplet data structures.
818              
819             =item I
820              
821             This hash holds the actual assertions where the maps differ; it exists only if the option
822             C is active. Keys are the assertion identifiers, values the references to the
823             actual assertion data structure. Note that assertion ids uniquely identify the assertion contents,
824             therefore this hash can hold assertions from both new and old map.
825              
826             =back
827              
828             =cut
829              
830             sub diff {
831 0     0 1 0 my ($newmap,$oldmap,$options)=@_;
832 0 0 0     0 return undef if (!$oldmap || !$newmap);
833              
834 0         0 my ($base)=$oldmap->baseuri;
835 0 0       0 $log->logdie ("comparison of maps with different bases not supported yet!")
836             if ($newmap->baseuri ne $base);
837              
838 0         0 my (%plus,%minus,%modified);
839             # a lot of comparison/translation can be skipped if tids are the only identity
840 0         0 my $xlatneeded= grep($_==TM->Subject_based_Merging ||
841 0   0     0 $_==TM->Indicator_based_Merging,@{$options->{consistency}});
842              
843             # first walk the maps to match old and new items
844 0         0 my (%seen,%locators,%indicators);
845 0         0 for my $map ($oldmap,$newmap) {
846 0 0       0 my $key = ($map eq $oldmap ? "old":"new");
847 0 0       0 my $value = ($map eq $oldmap ? 1:2);
848              
849 0         0 for my $m (map { $_->[TM->LID] } ($map->toplets(\ '+all'))) {
  0         0  
850             # get the topic-aspects (tid, locators and identifiers)
851             # for finding unchanged/new/old topics
852 0         0 my $midlet=$map->toplet($m);
853 0 0       0 $locators{$key}->{$midlet->[TM->ADDRESS]}=$m
854             if ($midlet->[TM->ADDRESS]);
855 0         0 map { $indicators{$key}->{$_}=$m } (@{$midlet->[TM->INDICATORS]});
  0         0  
  0         0  
856 0         0 $seen{$m}|=$value;
857             }
858 0         0 for my $a (map { $_->[TM->LID] } $map->asserts (\ '+all')) {
  0         0  
859 0         0 $seen{$a}|=$value;
860             }
861             }
862              
863             # identify same topics
864             # first identity: same topic ids
865 0         0 my %old2new = map { ($_,$_) } grep { $seen{$_} == 3 } keys %seen;
  0         0  
  0         0  
866 0         0 my $foundxlat;
867 0 0       0 if (grep($_==TM->Subject_based_Merging,@{$options->{consistency}}))
  0         0  
868             {
869             # second: same locators
870             # note that this overwrites topic identitites!
871             # scenario: old has topica/loc x; new has topica/no loc and topicb/loc x
872 0   0     0 map { $foundxlat||=($locators{old}->{$_} ne $locators{new}->{$_});
  0         0  
873 0         0 $old2new{$locators{old}->{$_}}=$locators{new}->{$_};
874             }
875 0         0 (grep(exists $locators{new}->{$_}, keys %{$locators{old}}));
876             }
877 0 0       0 if (grep($_==TM->Indicator_based_Merging,@{$options->{consistency}}))
  0         0  
878             {
879             # final: matching indicators
880             # note that this overwrites topic and locator identitites, similar scenario as above
881 0   0     0 map { $foundxlat||=($indicators{old}->{$_} ne $indicators{new}->{$_});
  0         0  
882 0         0 $old2new{$indicators{old}->{$_}}=$indicators{new}->{$_}; }
883 0         0 (grep(exists $indicators{new}->{$_}, keys %{$indicators{old}}));
884             }
885             # no need to bother with translating assertions if there are no changed-tid identities
886 0 0 0     0 $xlatneeded=0 if ($xlatneeded && !$foundxlat);
887              
888             # produce list of missing/new topics
889 0 0       0 my %new2old=($xlatneeded?(reverse %old2new):%old2new);
890 0         0 my (%checkmidlet,%plusass,%minusass);
891 0         0 for my $t (keys %seen)
892             {
893 0 0 0     0 if ($seen{$t}==2 && !$new2old{$t})
    0 0        
894             {
895             # identical assertions with new lids are not detected here
896             # but later (via minusass)
897             # new assertion-lids happen with identified renamed players (lid is computed over values!)
898 0 0       0 $newmap->retrieve($t)?$plusass{$t}=1:$plus{$t}=[];
899             }
900             elsif ($seen{$t}==1 && !$old2new{$t})
901             {
902 0 0       0 $oldmap->retrieve($t)?$minusass{$t}=1:$minus{$t}=[];
903             }
904             else
905             {
906             # we work along the old tids (when not the same)
907 0 0       0 $checkmidlet{$seen{$t}==2?$new2old{$t}:$t}=1;
908             }
909             }
910 0         0 undef %seen; undef %locators; undef %indicators;
  0         0  
  0         0  
911              
912             #warn "check midlets ".Dumper \ %checkmidlet;
913              
914             # weed out the topics/midlets that are unchanged
915             # and all the identical assertions
916 0         0 my @checkassertion;
917 0         0 for my $t (keys %checkmidlet) {
918              
919 0 0       0 if ($t =~ /^[A-F0-9]{32}$/i) {
920 0         0 my $oa=$oldmap->retrieve($t);
921 0         0 my $on=$newmap->retrieve($old2new{$t});
922            
923 0 0 0     0 if ($oa && $on && $oa->[TM->LID] ne $on->[TM->LID]) {
      0        
924 0         0 push @checkassertion,$t;
925             }
926             } else {
927 0         0 my $ot = $oldmap->toplet($t);
928 0         0 my $nt = $newmap->toplet($old2new{$t});
929              
930 0 0       0 unless (_toplets_eq ($ot, $nt)) {
931 0         0 $modified{$t}->{identities}=1;
932 0   0     0 $modified{$t}->{plus}||=[];
933 0   0     0 $modified{$t}->{minus}||=[];
934             }
935              
936             # note: new toplet() returns internal id as well, which we DON'T want to check on here!
937             sub _toplets_eq
938             {
939 0     0   0 my ($a,$b)=@_;
940            
941 0   0     0 my ($A, $B) = ($a->[TM->ADDRESS] ||'', $b->[TM->ADDRESS] ||''); # just convert undef into ''
      0        
942 0 0       0 return 0 unless $A eq $B; # different subject address?
943 0         0 my %SIDS;
944 0         0 map { ++$SIDS{$_} } @{$a->[TM->INDICATORS]}, @{$b->[TM->INDICATORS]}; # we KNOW that the lists are UNIQUE, do we?
  0         0  
  0         0  
  0         0  
945 0 0       0 return 0 if grep { $_ != 2 } values %SIDS; # if it is not exactly 2 (one from a, one from b), then not equal
  0         0  
946 0         0 return 1; # we're happy: different LIDs don't interest us here
947             }
948            
949             }
950             }
951              
952             #warn "modified ".Dumper \%modified;
953              
954 0         0 my %old2newid;
955             my %identities;
956 0 0       0 if ($xlatneeded)
957             {
958             # now do the translation for assertions: rebuild old assertions
959             # into new namespace and compute the id
960             # don't waste time: do this only on the assertions that may be required
961             # minusass (or plusass) must be checked to find assertions with renamed-but-identical players
962 0         0 for my $t (@checkassertion,keys %minusass)
963             {
964 0         0 my $m=$oldmap->retrieve($t);
965 0         0 my ($lid,$scope,$kind,$type,$roles,$players)=
966 0         0 @{$m}[TM->LID,TM->SCOPE,TM->KIND,TM->TYPE,TM->ROLES,TM->PLAYERS];
967              
968             # if any of the topics is untranslatable, then skip the remaining work
969             # as it can't successfully compare anyway...
970 0   0     0 $scope=$old2new{$scope} || next;
971 0   0     0 $type=$old2new{$type} || next;
972 0 0 0     0 my @newroles = map { ref($_)?$_:$old2new{$_} || next; } (@{$roles});
  0         0  
  0         0  
973 0 0 0     0 my @newplayers = map { ref($_)?$_:$old2new{$_} || next; } (@{$players});
  0         0  
  0         0  
974              
975 0         0 my $n=Assertion->new(scope=>$scope,
976             kind=>$kind,
977             type=>$type,
978             roles=>\@newroles,players=>\@newplayers);
979 0         0 $newmap->canonicalize($n);
980 0         0 my $newid=TM::mklabel($n);
981 0         0 $old2newid{$t}=$newid;
982              
983 0 0       0 if ($plusass{$newid}) # we found a matching assertion, wohee!
984             {
985 0         0 delete $plusass{$newid};
986 0         0 delete $minusass{$t};
987             # remember that this assertion was re-id'd (directly or indirectly via players)
988             # this is done for standalone assocs just the same as for bn/oc characteristics
989 0         0 $identities{$t}=$newid;
990             }
991             }
992             }
993              
994             # finally, find and attach the modified assertions to their topics
995             # attributes: to the topic
996             # associations: to the type-topic
997              
998 0         0 for my $key ("plus","minus")
999             {
1000 0         0 my ($unmodified,$map,$candidates);
1001 0 0       0 if ($key eq "plus")
1002             {
1003 0         0 $unmodified=\%plus; $map=$newmap; $candidates=\%plusass;
  0         0  
  0         0  
1004             }
1005             else
1006             {
1007 0         0 $unmodified=\%minus; $map=$oldmap; $candidates=\%minusass;
  0         0  
  0         0  
1008             }
1009            
1010 0         0 for my $t (keys %{$candidates})
  0         0  
1011             {
1012 0         0 my $m=$map->retrieve($t);
1013 0         0 my ($oldwho,$who,$what);
1014 0 0       0 if ($m->[TM->KIND] ne TM->ASSOC)
    0          
1015             {
1016             # bn or oc: attach to referenced topic
1017 0         0 $who=($map->get_x_players($m,"thing"))[0];
1018 0         0 $what=$t;
1019             }
1020             elsif ($m->[TM->TYPE] eq "isa")
1021             {
1022             # isa associations get attached to the instance topic
1023 0         0 $who=($map->get_x_players($m,"instance"))[0];
1024 0         0 $what=$t;
1025             }
1026             else
1027             {
1028             # general assoc: gets attached to type topic
1029 0         0 $who=$m->[TM->TYPE];
1030 0         0 $what=$t;
1031             }
1032              
1033             # if this assertion belongs to a topic that is marked gone/new, we save it with that topic
1034 0 0       0 if ($unmodified->{$who})
1035             {
1036 0         0 push @{$unmodified->{$who}},$what;
  0         0  
1037             }
1038             else # if this belongs to a modified topic: more details please (new/old ass)
1039             {
1040             # we access things along the old id axis...
1041 0 0       0 if ($key eq "plus")
1042             {
1043 0         0 $who=$new2old{$who};
1044             }
1045 0   0     0 $modified{$who}->{$key}||=[];
1046 0         0 push @{$modified{$who}->{$key}},$what;
  0         0  
1047             }
1048             }
1049             }
1050              
1051 0 0       0 map { $identities{$_}=$old2new{$_} if ($_ ne $old2new{$_}); } (keys %old2new);
  0         0  
1052              
1053 0         0 my $returnvalue={
1054             'identities'=>\%identities,
1055             'plus'=>\%plus,
1056             'minus'=>\%minus,
1057             'modified'=>\%modified,
1058             };
1059              
1060             # pull in the midlets and assertions that have been affected,
1061             # so that the resulting datastructure can be frozen and used together with oldmap
1062             # to (re)create newmap
1063 0 0       0 if ($options->{include_changes})
1064             {
1065             # one problem, though is naming: midlets can have changed but their name doesn't
1066             # reflect that: we need two midlet datastructures here.
1067             # (assertions are fine, their names always reflect their content uniquely)
1068              
1069 0         0 my (%plusm,%minusm,%ass,$a);
1070 0         0 map { $plusm{$_} = $newmap->toplet($_) } keys %plus;
  0         0  
1071 0         0 map { $ass{ $_->[TM->LID] } = $_ }
  0         0  
1072 0         0 map { $newmap->retrieve($_) }
1073 0         0 map { @$_ }
1074             values %plus;
1075 0         0 map { $minusm{$_} = $oldmap->toplet($_) } keys %minus;
  0         0  
1076 0         0 map { $ass{ $_->[TM->LID] } = $_ }
  0         0  
1077 0         0 map { $oldmap->retrieve($_) }
1078 0         0 map { @$_ }
1079             values %minus;
1080              
1081 0         0 for my $k (keys %modified)
1082             {
1083             # these are corresponding topics with differing midlet (contents)
1084 0 0       0 if ($modified{$k}->{identities})
1085             {
1086 0         0 $plusm{$k} = $newmap->toplet($old2new{$k});
1087 0         0 $minusm{$k} = $oldmap->toplet($k);
1088             }
1089 0 0       0 map { $plusm{$_} =$newmap->toplet($_); $a=$newmap->retrieve($_) and $ass{$_}=$a; } (@{$modified{$k}->{plus}});
  0         0  
  0         0  
  0         0  
1090 0 0       0 map { $minusm{$_}=$oldmap->toplet($_); $a=$oldmap->retrieve($_) and $ass{$_}=$a; } (@{$modified{$k}->{minus}});
  0         0  
  0         0  
  0         0  
1091             }
1092              
1093 0         0 $returnvalue->{plus_midlets} =\%plusm;
1094 0         0 $returnvalue->{minus_midlets} =\%minusm;
1095 0         0 $returnvalue->{assertions} =\%ass;
1096             }
1097              
1098 0         0 return $returnvalue;
1099             }
1100              
1101             =pod
1102              
1103             =item B (DEPRECATED)
1104              
1105             I<$tm>->melt (I<$tm2>)
1106              
1107             This - probably more auxiliary - function copies relevant aspect of a second map into the object.
1108              
1109             =cut
1110              
1111             our @ESSENTIALS = qw(mid2iid assertions baseuri variants);
1112              
1113             sub melt {
1114 0     0 1 0 my $self = shift;
1115 0         0 my $tm2 = shift;
1116              
1117 0         0 @{$self}{@ESSENTIALS} = @{$tm2}{@ESSENTIALS};
  0         0  
  0         0  
1118 0         0 $self->{last_mod} = Time::HiRes::time;
1119             }
1120              
1121             =pod
1122              
1123             =item B
1124              
1125             warn "topic map broken" if I<$tm>->insane
1126              
1127             This method tests invariant conditions inside the TM structure of that map. Specifically,
1128              
1129             =over
1130              
1131             =item *
1132              
1133             each toplet has a LID which points to a toplet with the same address
1134              
1135             =back
1136              
1137             It returns a string with a message or C if everything seems fine.
1138              
1139             TODO: add test whether all variant entries have a proper LID (and toplet)
1140              
1141              
1142             =cut
1143              
1144             sub insane {
1145 5     5 1 19 my $self = shift;
1146              
1147 5         10 my $mid2iid = $self->{mid2iid};
1148 5         7 my $asserts = $self->{assertions};
1149              
1150             # Test 1: all toplet LIDs point to something in mid2iid which refers to themselves
1151 5         30 foreach my $k (keys %$mid2iid) {
1152 112         145 my $t = $mid2iid->{$k};
1153 112 100       342 return "toplet LID $k not in mid2iid"
1154             unless $mid2iid->{ $t->[TM->LID] };
1155 111 50       329 return "LID $k inconsistent with toplet LID"
1156             unless $mid2iid->{ $t->[TM->LID] } == $t;
1157 111 100 66     319 return "key $k looks like assertion, but has not assertions entry"
1158             if $k =~ /[[:xdigit:]]{16}/ and !$asserts->{$k};
1159             }
1160             ## Test 2: all assertions are toplets
1161             # foreach my $k (keys %$asserts) {
1162             # return "assertion $k has no toplet entry"
1163             # unless $mid2iid->{ $asserts->{$k}->[TM->LID] };
1164             # return "assertion $k toplet entry has a different LID"
1165             # unless $mid2iid->{ $asserts->{$k}->[TM->LID] }->[TM->LID] eq $k;
1166             # }
1167 3         25 return undef; # pass all tests
1168             }
1169              
1170             =pod
1171              
1172             =back
1173              
1174             =head1 TOPLET INTERFACE
1175              
1176             I are light-weight versions of TMDM topics. They only carry addressing information and are
1177             represented by an array (struct) with the following fields:
1178              
1179             =cut
1180              
1181             struct 'Toplet' => [
1182             lid => '$',
1183             saddr => '$',
1184             sinds => '$',
1185             ];
1186              
1187             =pod
1188              
1189             =over
1190              
1191             =item C (index: C)
1192              
1193             The internal identifier. Mostly it repeats the key in the toplet hash, but also aliased identifiers
1194             may exist.
1195              
1196             =item C (index: C
)
1197              
1198             It contains the B (address) URI, if known. Otherwise C.
1199              
1200             =item C (index: C)
1201              
1202             This is a reference to a list containing B (indicators). The list can be empty,
1203             no duplicate removal is attempted at this stage.
1204              
1205             =back
1206              
1207             You can create this structure manually, but mostly you would leave it to C to do the
1208             work.
1209              
1210             Example:
1211              
1212             # dogmatic way to produce it
1213             my $to = Toplet->new (lid => $baseuri . 'my-lovely-cat',
1214             saddr => 'http://subject-address.com/',
1215             sinds => []);
1216              
1217             # also good and well
1218             my $to = [ $baseuri . 'my-lovely-cat',
1219             'http://subject-address.com/',
1220             [] ];
1221              
1222             # better
1223             my $to = $tm->internalize ('my-lovely-cat' => 'http://subject-address.com/');
1224              
1225             To access the individual fields, you can either use the struct accessors C and C, or
1226             use the constants defined above for indices into the array:
1227              
1228             =cut
1229              
1230             use constant {
1231             # LID => 0,
1232 36         65025 ADDRESS => 1,
1233             INDICATORS => 2
1234 36     36   465 };
  36         75  
1235              
1236             =pod
1237              
1238             Example:
1239              
1240             warn "indicators: ", join (", ", @{$to->sinds});
1241              
1242             warn "locator: ", $to->[TM->ADDRESS];
1243              
1244             =head2 Methods
1245              
1246             =over
1247              
1248             =item B
1249              
1250             I<$iid> = I<$tm>->internalize (I<$some_id>)
1251              
1252             I<$iid> = I<$tm>->internalize (I<$some_id> => I<$some_id>)
1253              
1254             I<@iids> = I<$tm>->internalize (I<$some_id> => I<$some_id>, ...)
1255              
1256             This method does some trickery when a new toplet should be added to the map, depending on how
1257             parameters are passed into it. The general scheme is that pairs of identifiers are passed in. The
1258             first is usually the internal identifier, the second a subject identifier or the subject
1259             locator. The convention is that subject identifier URIs are passed in as string references, whereas
1260             subject locator URIs are passed in as strings.
1261              
1262             The following cases are covered:
1263              
1264             =over
1265              
1266             =item C undef>
1267              
1268             If the ID is already an absolute URI and contains the C of the map as prefix, then this URI
1269             is used as internal toplet identifier. If the ID is some other URI, then a toplet with that URI as
1270             subject locator is searched in the map. If such a toplet already exists, then nothing special needs
1271             to happen. If no such toplet existed, a new URI, based on the C and a random number will
1272             be created for the internal identifier and the original URI is used as subject address.
1273              
1274             B: Using C URI> implies that you use two different URIs as subject addresses. This
1275             will result in an error.
1276              
1277             =item C URI>
1278              
1279             Like above, only that the URI is directly interpreted as subject address.
1280              
1281             =item C \ URI> (reference to string)
1282              
1283             Like above, only that the URI is interpreted as another subject identifier. If the toplet already existed,
1284             then this subject identifier is simply added. Duplicates are suppressed (since v1.31).
1285              
1286             =item C URI>
1287              
1288             Like above, only that the internal identifier is auto-created if there is no toplet with the URI
1289             as subject address.
1290              
1291             Attention: If you call internalize like this
1292              
1293             $tm->internalize(undef => $whatever)
1294              
1295             then perl will (un)helpfully replace the required undef with the string "undef" and wreck the operation.
1296             Using either a variable to hold the undef or replacing the (syntactic sugar) arrow with a comma works around this issue.
1297              
1298             =item C \ URI>
1299              
1300             Like above, only that the URI us used as subject identifier.
1301              
1302             =item C undef>
1303              
1304             A toplet with an auto-generated ID will be inserted.
1305              
1306             =back
1307              
1308             In any case, the internal identifier(s) of all inserted (or existing) toplets are returned for
1309             convenience.
1310              
1311             =cut
1312              
1313             our $toplet_ctr = 0;
1314              
1315             sub internalize {
1316 5309     5309 1 10433 my $self = shift;
1317 5309         15299 my $baseuri = $self->{baseuri};
1318              
1319             #warn "internalize base: $baseuri";
1320              
1321 5309         6698 my @mids;
1322 5309         13698 my $mid2iid = $self->{mid2iid};
1323 5309         13468 while (@_) {
1324 27051         55516 my ($k, $v) = (shift, shift); # assume to get here undef => URI or ID => URI or ID => \ URI or ID => undef
1325             #warn "internalize $k, $v"; # if ! defined $k;
1326             # make sure that $k contains a mid
1327              
1328 27051 50 66     125270 $k = undef if defined $k && $k eq 'undef'; # perl 5.10 will stringify undef => ....
1329              
1330 27051 100       44844 if (defined $k) {
    100          
    100          
1331 27005 100       134274 if ($mid2iid->{$k}) { # this identifier is already in the map
    100          
    100          
    100          
1332             # null
1333             } elsif ($k =~ /^$baseuri/) { # ha, perfect, another identifier already in form
1334             # null # keep it as it is
1335             } elsif ($k =~ /^\w+:/) { # some other absURL
1336 23 100       58 if (my $k2 = $self->tids ($k)) { # we already had it
1337 8         49 ($k, $v) = ($k2, $k);
1338             } else { # it is unknown so far
1339 15         85 ($k, $v) = ($baseuri.sprintf ("uuid-%010d", $toplet_ctr++), $k);
1340             }
1341             } elsif (my $k2 = $self->tids ($k)) {
1342 5276         13703 $k = $k2; # then we already have it, maybe under a different mid, take that
1343              
1344             } else { # this means we have a relURI and it is not from that map
1345 3188         7773 $k = $baseuri.$k; # but now it is
1346             }
1347              
1348             } elsif (ref ($v) eq 'Assertion') { # k is not defined, lets look at v, but if that is an assertion
1349 1         6 $k = $baseuri.sprintf ("uuid-%010d", $toplet_ctr++); # generate a new one
1350             } elsif (my $k2 = $self->tids ($v)) { # k is not defined, lets look at v; we already had it
1351 19         40 $k = $k2; # this will be k then
1352             } else { # it is unknown so far
1353 26         163 $k = $baseuri.sprintf ("uuid-%010d", $toplet_ctr++); # generate a new one
1354             }
1355              
1356             #warn "really internalizing '$k' '$v'";
1357 27051         66606 push @mids, $k;
1358              
1359 27051 100       60927 $v = $v->[TM->LID] if ref ($v) eq 'Assertion'; # for internal reification we use the assertion's LID
1360              
1361 27051   100     87163 $mid2iid->{$k} ||= [ $k, undef, [] ]; # now see that we have an entry in the mid2iid table
1362 27051         51711 my $kentry = $mid2iid->{$k}; # keep this as a shortcut
1363              
1364 27051 100       51356 if ($v) {
1365 1081 100       3653 if (ref($v)) { # being a reference means that we have a subject indication
    100          
1366 732         2694 push @{$kentry->[TM->INDICATORS]}, $$v # append it to the list
  138         541  
1367 751 100       1018 unless grep {$$v eq $_} @{$kentry->[TM->INDICATORS]}; # if not yet there
  751         4498  
1368             } elsif ($kentry->[TM->ADDRESS]) { # this is a subject address and, oh, there is already a subject address, not good
1369 10 100       50 $log->logdie ("duplicate subject address '$v' for '$k'") unless $v eq $kentry->[TM->ADDRESS];
1370             } else { # everything is fine, we can set it
1371 320         869 $kentry->[TM->ADDRESS] = $v;
1372             }
1373             }
1374 27050         81049 $mid2iid->{$k} = $kentry; # necessary if mid2iid is tied itself
1375             }
1376 5308         8228 $self->{mid2iid} = $mid2iid; #!! needed for Berkeley DBM recognize changes on deeper levels
1377 5308         18271 $self->{last_mod} = Time::HiRes::time;
1378 5308 100       38630 return wantarray ? @mids : $mids[0];
1379             }
1380              
1381             =pod
1382              
1383             =item B (old name B)
1384              
1385             I<$t> = I<$tm>->toplet (I<$mid>)
1386              
1387             I<@ts> = I<$tm>->toplet (I<$mid>, ....)
1388              
1389             This function returns a reference to a toplet structure. It can be used in scalar and list context.
1390              
1391             =cut
1392              
1393             sub midlet {
1394 7185     7185 0 18930 return toplet (@_);
1395             }
1396              
1397             sub toplet {
1398 7216     7216 1 14767 my $self = shift;
1399 7216         12298 my $mid2iid = $self->{mid2iid};
1400              
1401 7216 100       16040 if (wantarray) {
1402 7184 50       12151 return (map { defined $_ ? $mid2iid->{$_} : $_ } @_);
  14368         63647  
1403             } else {
1404 32         204 return $mid2iid->{$_[0]};
1405             }
1406             }
1407              
1408             =pod
1409              
1410             =item B (old name B)
1411              
1412             I<@mids> = I<$tm>->toplets
1413              
1414             I<@mids> = I<$tm>->toplets (I<@list_of_ids>)
1415              
1416             I<@mids> = I<$tm>->toplets (I<$selection_spec>)
1417              
1418             This function returns toplet structures from the map. B: This has changed from v 1.13. Before
1419             you got ids.
1420              
1421             If no parameter is provided, all toplets are returned. This includes really everything also
1422             infrastructure toplets. If an explicit list is provided as parameter, then all toplets with these
1423             identifiers are returned.
1424              
1425             If a search specification is used, it has to be passed in as string reference. That string contains
1426             the selection specification using the following simple language (curly brackets mean repetition,
1427             round bracket grouping, vertical bar alternatives):
1428              
1429             specification -> { ( '+' | '-' ) group }
1430              
1431             whereby I is one of the following:
1432              
1433             =over
1434              
1435             =item C
1436              
1437             refers to B toplets in the map. This includes those supplied by the application. The list also
1438             includes all infrastructure topics which the software maintains for completeness.
1439              
1440             =item C
1441              
1442             refers to all toplets the infrastructure has provided. This implies that
1443              
1444             all - infrastructure
1445              
1446             is everything the user (application) has supplied.
1447              
1448             =back
1449              
1450             Examples:
1451              
1452             # all toplets except those from TM::PSI
1453             $tm->toplets (\ '+all -infrastructure')
1454              
1455             B: No attempt is made to make this list unique.
1456              
1457             B: The specifications are not commutative, but are interpreted from left-to-right. So C
1458             -infrastructure +infrastructure> is not the same as C. In the
1459             latter case the infrastructure toplets have been added twice, and are then deducted completely with
1460             C<-infrastructure>.
1461              
1462             =cut
1463              
1464             sub midlets {
1465 0     0 0 0 return toplets (@_);
1466             }
1467              
1468             sub toplets {
1469 85     85 1 60777 my $self = shift;
1470 85         220 my $mid2iid = $self->{mid2iid};
1471              
1472 85 100       249 if ($_[0]) { # if there is some parameter
1473 15 100       35 if (ref ($_[0]) ) { # whoohie, a search spec
1474 14         18 my $spec = ${$_[0]};
  14         28  
1475 14         18 my $l = []; # will be list
1476 14         71 while ($spec =~ s/([+-])(\w+)//) {
1477 23 100       84 if ($2 eq 'all') {
    100          
1478 11         95 $l = _mod_list ($1 eq '+', $l, keys %$mid2iid);
1479             } elsif ($2 eq 'infrastructure') {
1480 11         22 $l = _mod_list ($1 eq '+', $l, keys %{$infrastructure->{mid2iid}});
  11         69  
1481             } else {
1482 1         10 $log->logdie (scalar __PACKAGE__ .": specification '$2' unknown");
1483             }
1484             }
1485 13 100       48 $log->logdie (scalar __PACKAGE__ .": unhandled specification '$spec' left") if $spec =~ /\S/;
1486 12         22 return map { $mid2iid->{$_} } @$l;
  198         325  
1487             } else {
1488 1         3 my $m = $mid2iid;
1489 1         4 return @$m{$self->tids (@_)}; # make all these fu**ing identifiers map-absolute
1490             }
1491             } else { # if the list was empty, we assume every thing in the map
1492 70         865 return values %$mid2iid;
1493             }
1494              
1495             sub _mod_list {
1496 59     59   94 my $pm = shift; # non-zero for +
1497 59         72 my $l = shift;
1498 59 100       97 if ($pm) {
1499 33         284 return [ @$l, @_ ];
1500             } else {
1501 26         31 my %minus;
1502 26         148 @minus{ @_ } = (1) x @_;
1503 26         52 return [ grep { !$minus{$_} } @$l ];
  378         835  
1504             }
1505             }
1506             sub _mk_uniq {
1507 0     0   0 my %uniq;
1508 0         0 @uniq {@_} = (1) x @_;
1509 0         0 return keys %uniq;
1510             }
1511              
1512             }
1513              
1514             =pod
1515              
1516             =item B (old name B)
1517              
1518             I<$mid> = I<$tm>->tids (I<$some_id>)
1519              
1520             I<@mids> = I<$tm>->tids (I<$some_id>, ...)
1521              
1522             This function tries to build absolute versions of the identifiers passed in. C will be
1523             returned if no such can be constructed. Can be used in scalar and list context.
1524              
1525             =over
1526              
1527             =item *
1528              
1529             If the passed-in identifier is a relative URI, so it is made absolute by prefixing it with the map
1530             C and then we look for a toplet with that internal identifier.
1531              
1532             =item *
1533              
1534             If the passed-in identifier is an absolute URI, where the C is a prefix, then that URI will
1535             be used as internal identifier to look for a toplet.
1536              
1537             =item *
1538              
1539             If the passed-in identifier is an absolute URI, where the C is B a prefix, then that
1540             URI will be used as subject locator and such a toplet will be looked for.
1541              
1542             =item *
1543              
1544             If the passed-in identifier is a reference to an absolute URI, then that URI will be used as subject
1545             identifier and such a toplet will be looked for.
1546              
1547             =back
1548              
1549             =cut
1550              
1551             sub mids {
1552 7217     7217 0 3391343 return tids (@_);
1553             }
1554              
1555             sub tids {
1556 17508     17508 1 63422 my $self = shift;
1557 17508         28516 my $mid2iid = $self->{mid2iid}; # shorthand
1558              
1559 17508         21230 my @ks;
1560             MID:
1561 17508         35985 foreach my $k (@_) {
1562 24730 100       187752 if (! defined $k) { # someone put in undef
    100          
    100          
    100          
1563 11         24 push @ks, undef;
1564              
1565             } elsif (ref ($k)) { # would be subject indicator ref
1566 219         374 my $kk = $$k;
1567 219         274 foreach my $k2 (keys %{$mid2iid}) {
  219         1907  
1568 3824 100       3700 if (grep ($_ eq $kk,
  3824         16988  
1569             @{$mid2iid->{$k2}->[TM->INDICATORS]}
1570             )) {
1571 193         731 push @ks, $mid2iid->{$k2}->[TM->LID]; # LID points to 'canonical' internal identifier
1572 193         1062 next MID;
1573             }
1574             }
1575 26         143 push @ks, undef;
1576              
1577             } elsif (my $kk = $mid2iid->{$k}) { # we already have something which looks like a tid
1578 1147         4508 push @ks, $kk->[TM->LID]; # give back the 'canonical' one
1579              
1580             } elsif ($k =~ /(^\w+:)|(^[A-F0-9]{32}$)/i) { # must be some other uri or assoc id, must be subject address
1581 36     36   295 no warnings;
  36         95  
  36         13744  
1582 62         510 my @k2 = grep ($mid2iid->{$_}->[TM->ADDRESS] eq $k, keys %{$mid2iid});
  62         2448  
1583 62 100       474 push @ks, @k2 ? $mid2iid->{$k2[0]}->[TM->LID] : undef; # we take the first we find
1584              
1585             } else { # only a string, like 'aaa'
1586 23291         67742 my $k2 = $self->{baseuri}.$k; # make it absolute, and...
1587 23291 100       135752 push @ks, $mid2iid->{$k2} # see whether there is something
1588             ? $mid2iid->{$k2}->[TM->LID] : undef; # and then take canonical LID
1589             }
1590             }
1591             #warn "mids ".Dumper (\@_)." returning ".Dumper (\@ks);
1592 17508 100       87222 return wantarray ? @ks : $ks[0];
1593             }
1594              
1595             =pod
1596              
1597             =item B
1598              
1599             I<$tm>->externalize (I<$some_id>, ...)
1600              
1601             This function simply deletes the toplet entry for the given internal identifier(s). The function
1602             returns all deleted toplet entries.
1603              
1604             B: Assertions in which this topic is involved will B be removed. Use C to
1605             clean up all assertion where non-existing toplets still exist.
1606              
1607             =cut
1608              
1609             sub externalize {
1610 68     68 1 582 my $self = shift;
1611              
1612 68         134 my $mid2iid = $self->{mid2iid};
1613 68         143 my @doomed = map { delete $mid2iid->{$_} } @_;
  10         54  
1614 68         125 $self->{mid2iid} = $mid2iid; ## !! needed for Berkeley DBM recognize changes on deeper levels
1615 68         278 $self->{last_mod} = Time::HiRes::time;
1616 68         683 return @doomed;
1617             }
1618              
1619             =pod
1620              
1621             =back
1622              
1623             =head1 ASSERTIONS INTERFACE
1624              
1625             One assertion is a record containing its own identifier, the scope, the type of the assocation, an
1626             field whether this is an association, an occurrence or a name and then all roles and all players,
1627             both in separate lists.
1628              
1629             =cut
1630              
1631             struct 'Assertion' => [
1632             lid => '$',
1633             scope => '$',
1634             type => '$',
1635             kind => '$', # redundant, but very useful
1636             roles => '$',
1637             players => '$',
1638             canon => '$',
1639             ];
1640              
1641             use constant {
1642 36         4554 LID => 0,
1643             SCOPE => 1,
1644             TYPE => 2,
1645             KIND => 3,
1646             ROLES => 4,
1647             PLAYERS => 5,
1648             CANON => 6
1649 36     36   245 };
  36         85  
1650              
1651             =pod
1652              
1653             Assertions consist of the following components:
1654              
1655             =over
1656              
1657             =item I (index C):
1658              
1659             Every assertion has an identifier. It is a unique identifier generated from a canonicalized form of
1660             the assertion itself.
1661              
1662             =item I (index: C)
1663              
1664             This component holds the scope of the assertion.
1665              
1666             =item I (index: C, redundant information):
1667              
1668             For technical reasons (read: it is faster) we distinguish between full associations (C),
1669             names (C) and occurrences (C).
1670              
1671             =cut
1672              
1673             # values for 'kind'
1674             use constant {
1675 36         62846 ASSOC => 0,
1676             NAME => 1,
1677             OCC => 2,
1678 36     36   221 };
  36         75  
1679              
1680             =pod
1681              
1682             =item I (index: C):
1683              
1684             The toplet id of the type of this assertion.
1685              
1686             =item I (index: C):
1687              
1688             A list reference which holds a list of toplet ids for the roles.
1689              
1690             =item I (index: C):
1691              
1692             A list reference which holds a list of toplet IDs for the players.
1693              
1694             =item I (index: C):
1695              
1696             Either C<1> or C to signal whether this assertion has been (already) canonicalized (see
1697             L). If an assertion is canonicalized, then the players and roles lists are sorted
1698             (somehow), so that assertions can be easily compared.
1699              
1700             =back
1701              
1702             Obviously the lists for roles and players B have the same length, so that every player
1703             corresponds to exactly one role. If one role is played by several players, the role appears multiple
1704             times.
1705              
1706             As a special case, names and occurrences are mapped into assertions, by
1707              
1708             =over
1709              
1710             =item *
1711              
1712             setting the I to C and C,
1713              
1714             =item *
1715              
1716             setting the I to the toplet id in question and using a L as the player for
1717             C,
1718              
1719             =item *
1720              
1721             using the I component to store the name/occurrence type,
1722              
1723             =item *
1724              
1725             using as I either C or C
1726              
1727             =back
1728              
1729             Example:
1730              
1731             # general association
1732             $a = Assertion->new (type => 'is-subclass-of',
1733             roles => [ 'subclass', 'superclass' ],
1734             players => [ 'rumsti', 'ramsti' ])
1735              
1736              
1737             warn $a->scope . " is the same as " . $a->[TM->SCOPE];
1738              
1739             # create a name
1740             use TM::Literal;
1741             $n = Assertion->new (kind => TM->NAME,
1742             type => 'name',
1743             scope => 'us',
1744             roles => [ 'thing', 'value' ],
1745             players => [ 'rumsti',
1746             new TM::Literal ('AAA') ]);
1747              
1748             # create an occurrence
1749             use TM::Literal;
1750             $n = Assertion->new (kind => TM->OCC,
1751             type => 'occurrence',
1752             scope => 'us',
1753             roles => [ 'thing', 'value' ],
1754             players => [ 'rumsti',
1755             new TM::Literal ('http://whatever/') ]);
1756              
1757             =head2 Special Assertions
1758              
1759             This package adopts the following conventions to store certain assertions:
1760              
1761             =over
1762              
1763             =item C
1764              
1765             Associations of this type should have one role C and another C. The scope
1766             should always be C.
1767              
1768             =item C
1769              
1770             Associations of this type should have one role C and another C. The scope should
1771             always be C.
1772              
1773             =item C
1774              
1775             Assertions for names should have the C component set to it and use the C component to
1776             store the name type. The two roles to use are C for the value and C for the toplet
1777             carrying the name.
1778              
1779             =item C
1780              
1781             Assertions for occurrences should have the C component set to it and use the C component
1782             to store the occurrence type. The two roles to use are C for the value and C for the
1783             toplet carrying the name.
1784              
1785             =back
1786              
1787             =head2 Methods
1788              
1789             =over
1790              
1791             =item B
1792              
1793             I<@as> = I<$tm>->assert (I<@list-of-assertions>)
1794              
1795             This method takes a list of assertions, canonicalizes them and then injects them into the map. If
1796             one of the newly added assertions already existed in the map, it will be ignored.
1797              
1798             In this process, all assertions will be completed (if fields are missing).
1799              
1800             =over
1801              
1802             =item If an assertion does not have a type, it will default to C<$TM::PSI::THING>.
1803              
1804             =item If an assertion does not have a scope, it defaults to C<$TM::PSI::US>.
1805              
1806             =back
1807              
1808             Then the assertion will be canonicalized (unless it already was). This implies that
1809             non-canonicalized assertions will be modified, in that the role/player lists change. Any assertion
1810             not having an LID will get one.
1811              
1812             The method returns a list of all asserted assertions.
1813              
1814             Example:
1815              
1816             my $a = Assertion->new (type => 'rumsti');
1817             $tm->assert ($a);
1818              
1819             B: Maybe the type will default to I in the future.
1820              
1821             =cut
1822              
1823             sub assert {
1824 2400     2400 1 9324 my $self = shift;
1825 2400         4614 my ($THING, $US) = ('thing', 'us');
1826              
1827             #warn "sub $THING assert $self".ref ($self);
1828              
1829 2400         3123 my @tids; # first collect all emerging tids from the assertions
1830 2400         5079 foreach (@_) {
1831 4163 50       11104 unless ($_->[CANON]) {
1832 4163   66     22052 push @tids, $_->[TYPE] || $THING;
1833 4163   66     23823 push @tids, $_->[SCOPE] || $US;
1834 4163         4834 push @tids, @{$_->[ROLES]};
  4163         8911  
1835 4163         6279 push @tids, grep { ! ref ($_) } @{$_->[PLAYERS]};
  8310         25431  
  4163         8037  
1836             }
1837             }
1838 2400         4375 @tids = $self->internalize ( map { $_ => undef } @tids); # then convert them into proper usable tids
  22855         51847  
1839              
1840 2400         9736 my $asserts = $self->{assertions}; # load (MLDBM kicker)
1841 2400         5174 foreach (@_) { # only now use all the information to complete the assertions
1842 4163 50       10527 unless ($_->[CANON]) {
1843 4163   100     13492 $_->[KIND] ||= ASSOC;
1844 4163         8030 $_->[TYPE] = shift @tids;
1845 4163         10014 $_->[SCOPE] = shift @tids;
1846 4163         5543 $_->[ROLES] = [ map { shift @tids } @{$_->[ROLES]} ];
  8310         42615  
  4163         7828  
1847 4163 100       8838 $_->[PLAYERS] = [ map { $_ = ref ($_) ? $_ : shift @tids } @{$_->[PLAYERS]} ];
  8310         41990  
  4163         9710  
1848              
1849 4163         12417 canonicalize (undef, $_);
1850              
1851 4163   66     17337 $_->[LID] ||= mklabel ($_);
1852             }
1853 4163         20386 $asserts->{$_->[LID]} = $_;
1854             }
1855 2400         5208 $self->{assertions} = $asserts; ### HACK ALERT: needed for Berkeley DBM recognize changes on deeper levels
1856 2400         19367 $self->{last_mod} = Time::HiRes::time;
1857 2400         9562 return @_;
1858             }
1859              
1860             =pod
1861              
1862             =item B
1863              
1864             I<$assertion> = I<$tm>->retrieve (I<$some_assertion_id>)
1865              
1866             I<@assertions> = I<$tm>->retrieve (I<$some_assertion_id>, ...)
1867              
1868             This method takes a list of assertion IDs and returns the assertion(s) with the given (subject)
1869             ID(s). If the assertion is not identifiable, C will be returned in its place. Called in list
1870             context, it will return a list of assertion references.
1871              
1872             =cut
1873              
1874             sub retrieve {
1875 228     228 1 3927 my $self = shift;
1876 228         378 my $asserts = $self->{assertions};
1877              
1878 228 100       407 if (wantarray()) {
1879 87         162 return map { $asserts->{$_} } @_;
  87         278  
1880             } else {
1881 141         408 return $asserts->{$_[0]};
1882             }
1883             }
1884              
1885             =pod
1886              
1887             =item B
1888              
1889             I<@assertions> = I<$tm>->asserts (I<$selection_spec>)
1890              
1891             If a search specification is used, it has to be passed in as string reference. That string contains
1892             the selection specification using the following simple language (curly brackets mean repetition,
1893             round bracket grouping, vertical bar alternatives):
1894              
1895             specification -> { ( '+' | '-' ) group }
1896              
1897             whereby I is one of the following:
1898              
1899             =over
1900              
1901             =item C
1902              
1903             refers to B assertions in the map. This includes those supplied by the application, but also
1904             all predefined associations, names and occurrences.
1905              
1906             =item C
1907              
1908             refers to all assertions which are actually associations
1909              
1910             =item C
1911              
1912             refers to all assertions which are actually name characteristics
1913              
1914             =item C
1915              
1916             refers to all assertions which are actually occurrences
1917              
1918             =item C
1919              
1920             refers to all assertions the infrastructure has provided. This implies that
1921              
1922             all - infrastructure
1923              
1924             is everything the user (application) has supplied.
1925              
1926             =back
1927              
1928             Examples:
1929              
1930             # all toplets except those from TM::PSI
1931             $tm->asserts (\ '+all -infrastructure')
1932              
1933             # like above, without assocs, so with names and occurrences
1934             $tm->asserts (\ '+all -associations')
1935              
1936             B: No attempt is made to make this list unique.
1937              
1938             B: The specifications are not commutative, but are interpreted from left-to-right. So C
1939             -associations +associations> is not the same as C.
1940             C<-infrastructure>.
1941              
1942             =cut
1943              
1944             sub asserts {
1945 45     45 1 9288 my $self = shift;
1946 45         126 my $asserts = $self->{assertions};
1947              
1948 45 100       145 if ($_[0]) {
1949 13 50       32 if (ref ($_[0])) {
1950 13         16 my $spec = ${$_[0]};
  13         22  
1951 13         23 my $l = []; # will be list
1952 13         65 while ($spec =~ s/([+-])(\w+)//) {
1953 37 100       159 if ($2 eq 'all') {
    100          
    100          
    100          
    50          
1954 11         58 $l = _mod_list ($1 eq '+', $l, keys %$asserts);
1955              
1956             } elsif ($2 eq 'associations') {
1957 30         76 $l = _mod_list ($1 eq '+', $l, map { $_->[TM->LID] }
  36         116  
1958 3         11 grep { $_->[TM->KIND] == TM->ASSOC } values %$asserts);
1959             } elsif ($2 eq 'names') {
1960 14         59 $l = _mod_list ($1 eq '+', $l, map { $_->[TM->LID] }
  168         571  
1961 14         46 grep { $_->[TM->KIND] == TM->NAME } values %$asserts);
1962             } elsif ($2 eq 'occurrences') {
1963 5         21 $l = _mod_list ($1 eq '+', $l, map { $_->[TM->LID] }
  60         197  
1964 5         17 grep { $_->[TM->KIND] == TM->OCC } values %$asserts);
1965             } elsif ($2 eq 'infrastructure') {
1966 4         44 $l = _mod_list ($1 eq '+', $l, keys %{$TM::infrastructure->{assertions}} );
  4         20  
1967             } else {
1968 0         0 $log->logdie (scalar __PACKAGE__ .": specification '$2' unknown");
1969             }
1970             }
1971 13 50       36 $log->logdie (scalar __PACKAGE__ .": unhandled specification '$spec' left") if $spec =~ /\S/;
1972 13         25 return map { $asserts->{$_} } @$l;
  92         192  
1973             } else {
1974 0         0 return $asserts->{@_};
1975             }
1976             } else {
1977 32         221 return values %$asserts;
1978             }
1979             }
1980              
1981             =pod
1982              
1983             =item B
1984              
1985             I<$bool> = I<$tm>->is_asserted (I<$a>)
1986              
1987             This method will return C<1> if the passed-in assertion exists in the store. The assertion will be
1988             canonicalized before checking, but no defaults will be added if parts are missing.
1989              
1990             =cut
1991              
1992             sub is_asserted {
1993 4     4 1 259 my $self = shift;
1994 4         7 my $a = shift;
1995              
1996 4 50       14 unless ($a->[CANON]) {
1997 4         13 absolutize ($self, $a);
1998 4         10 canonicalize (undef, $a);
1999 4         11 $a->[TM->LID] = mklabel ($a);
2000             }
2001 4         31 return $self->{assertions}->{ $a->[TM->LID] };
2002             }
2003              
2004             =pod
2005              
2006             =item B
2007              
2008             I<$tm>->retract (I<@list_of_assertion_ids>)
2009              
2010             This methods expects a list of assertion IDs and will remove the assertions from the map. If an ID
2011             is bogus, it will be ignored.
2012              
2013             B: Only these particular assertions will be deleted. Any toplets mentioned in these assertions
2014             will remain. Use C to remove unnecessary toplets.
2015              
2016             =cut
2017              
2018             sub retract {
2019 4     4 1 354 my $self = shift;
2020              
2021             # TODO: does delete $self->{assertions}->{@_} work?
2022 4         18 my $assertions = $self->{assertions};
2023 5         17 map {
2024 4         8 delete $assertions->{$_} # delete them from the primary store
2025             } @_;
2026 4         8 $self->{assertions} = $assertions; ##!! needed for Berkeley DBM recognize changes on deeper levels
2027 4         18 $self->{last_mod} = Time::HiRes::time;
2028             }
2029              
2030             =pod
2031              
2032             =item B, B, B
2033              
2034             I<@assertions> = I<$tm>->match (TM->FORALL [ , I ] );
2035              
2036             I<@assertions> = I<$tm>->match (TM->EXISTS [ , I ] );
2037              
2038             I<@assertions> = I<$tm>->match_forall ( [ I ] );
2039              
2040             I<@assertions> = I<$tm>->match_exists ( [ I ] );
2041              
2042             These methods take a search specification and return matching assertions. The result list contains
2043             references to the assertions themselves, not to copies. You can change the assertions themselves on
2044             your own risk (read: better not do it).
2045              
2046             For C, if the constant C is used as first parameter, this method returns a list of
2047             B assertions in the store following the search specification. If the constant C is
2048             used, the method will return a non-empty value if B can be found. Calling the more
2049             specific C is the same as calling C with C. Similar for
2050             C.
2051              
2052             B: C is not yet implemented.
2053              
2054             For I there are two alternatives:
2055              
2056             =over
2057              
2058             =item Generic Search
2059              
2060             Here the search specification is a hash with the same fields as for the constructor of an assertion:
2061              
2062             Example:
2063              
2064             $tm->match (TM->FORALL, type => '...',
2065             scope => '...,
2066             roles => [ ...., ....],
2067             players => [ ...., ....]);
2068              
2069             Any combination of assertion components can be used, all are optional, with the only constraint that
2070             the number of roles must match that for the players. All involved IDs should be absolutized before
2071             matching. If you use C for a role or a player, then this is interpreted as I
2072             (wildcard).
2073              
2074             =item Specialized Search
2075              
2076             The implementation also understands a number of specialized search specifications. These are
2077             listed in L.
2078              
2079             =back
2080              
2081             B: Some combinations will be very fast, while others quite slow. If you experience
2082             problems, then it might be time to think about indexing (see L).
2083              
2084             B: For the assertion type and the role subclassing is honored.
2085              
2086             =cut
2087              
2088             use constant {
2089 36         246050 EXISTS => 1,
2090             FORALL => 0
2091 36     36   281 };
  36         77  
2092              
2093             our %exists_handlers = (); # they should be written at some point
2094              
2095             our %forall_handlers = (
2096             '' => {
2097             code => sub { # no params => want all of them
2098             my $self = shift;
2099             return values %{$self->{assertions}};
2100             },
2101             desc => 'returns all assertions',
2102             params => {},
2103             },
2104              
2105             'nochar' => {
2106             code => sub {
2107             my $self = shift;
2108             return
2109             grep ($_->[KIND] <= ASSOC,
2110             values %{$self->{assertions}});
2111             },
2112             desc => 'returns all associations (so no names or occurrences)',
2113             params => { 'nochar' => '1'}
2114             },
2115             #-- taxos ---------------------------------------------------------------------------------------------
2116             'subclass.type' => {
2117             code => sub {
2118             my $self = shift;
2119             my $st = shift;
2120             my ($ISSC, $SUBCLASS) = ('is-subclass-of', 'subclass');
2121             return () unless shift eq $ISSC;
2122             return
2123             grep ( $self->is_x_player ($_, $st, $SUBCLASS),
2124             grep ( $_->[TYPE] eq $ISSC,
2125             values %{$self->{assertions}}));
2126             },
2127             desc => 'returns all assertions where there are subclasses of a given toplet',
2128             params => { 'type' => 'is-subclass-of', subclass => 'which toplet should be the superclass'},
2129             key => sub {
2130             my $self = shift;
2131             my $a = shift;
2132             my ($ISSC, $SUBCLASS) = ('is-subclass-of', 'subclass');
2133             return "subclass.type:". ($self->get_x_players ($a, $SUBCLASS))[0] . '.' . $ISSC;
2134             },
2135             enum => sub {
2136             my $self = shift;
2137             my ($ISSC) = ('is-subclass-of');
2138             return
2139             grep { $_->[TYPE] eq $ISSC }
2140             values %{$self->{assertions}};
2141             }
2142             },
2143            
2144             'superclass.type' => {
2145             code => sub {
2146             my $self = shift;
2147             my $st = shift;
2148             my ($ISSC, $SUPERCLASS) = ('is-subclass-of', 'superclass');
2149             return () unless shift eq $ISSC;
2150             return
2151             grep ( $self->is_x_player ($_, $st, $SUPERCLASS),
2152             grep ( $_->[TYPE] eq $ISSC,
2153             values %{$self->{assertions}}));
2154             },
2155             desc => 'returns all assertions where there are superclasses of a given toplet',
2156             params => { 'type' => 'is-subclass-of', superclass => 'which toplet should be the subclass'},
2157             key => sub {
2158             my $self = shift;
2159             my $a = shift;
2160             my ($ISSC, $SUPERCLASS) = ('is-subclass-of', 'superclass');
2161             return "superclass.type:". ($self->get_x_players ($a, $SUPERCLASS))[0] . '.' . $ISSC;
2162             },
2163             enum => sub {
2164             my $self = shift;
2165             my ($ISSC) = ('is-subclass-of');
2166             return
2167             grep { $_->[TYPE] eq $ISSC }
2168             values %{$self->{assertions}};
2169             }
2170             },
2171              
2172             'class.type' => {
2173             code => sub {
2174             my $self = shift;
2175             my $t = shift;
2176             my ($ISA, $CLASS) = ('isa', 'class');
2177             return () unless shift eq $ISA;
2178             return
2179             grep ( $self->is_x_player ($_, $t, $CLASS),
2180             grep ( $_->[TYPE] eq $ISA,
2181             values %{$self->{assertions}}));
2182             },
2183             desc => 'returns all assertions where there are instances of a given toplet',
2184             params => { type => 'isa', class => 'which toplet should be the class'},
2185             key => sub {
2186             my $self = shift;
2187             my $a = shift;
2188             my ($ISA, $CLASS) = ('isa', 'class');
2189             return "class.type:". ($self->get_x_players ($a, $CLASS))[0] . '.' . $ISA;
2190             },
2191             enum => sub {
2192             my $self = shift;
2193             my ($ISA) = ('isa');
2194             return
2195             grep { $_->[TYPE] eq $ISA }
2196             values %{$self->{assertions}};
2197             }
2198             },
2199              
2200             'instance.type' => {
2201             code => sub {
2202             my $self = shift;
2203             my $i = shift;
2204             my ($ISA, $INSTANCE) = ('isa', 'instance');
2205             return () unless shift eq $ISA;
2206             return
2207             grep ( $self->is_x_player ($_, $i, $INSTANCE),
2208             grep ( $_->[TYPE] eq $ISA,
2209             values %{$self->{assertions}}));
2210             },
2211             desc => 'returns all assertions where there are classes of a given toplet',
2212             params => { type => 'isa', instance => 'which toplet should be the instance'},
2213             key => sub {
2214             my $self = shift;
2215             my $a = shift;
2216             my ($ISA, $INSTANCE) = ('isa', 'instance');
2217             return "instance.type:". ($self->get_x_players ($a, $INSTANCE))[0] . '.' . $ISA;
2218             },
2219             enum => sub {
2220             my $self = shift;
2221             my ($ISA) = ('isa');
2222             return
2223             grep { $_->[TYPE] eq $ISA }
2224             values %{$self->{assertions}};
2225             }
2226             },
2227             #--
2228             'char.irole' => {
2229             code => sub {
2230             warn "char.irole is deprecated. use char.topic instead";
2231             my $self = shift;
2232             my $topic = $_[1];
2233             return undef unless $topic;
2234             return
2235             grep ($self->is_player ($_, $topic) && # TODO: optimize this grep away (getting chars is expensive)
2236             NAME <= $_->[KIND] && $_->[KIND] <= OCC,
2237             values %{$self->{assertions}});
2238             },
2239             desc => 'deprecated: return all assertions which are characteristics for a given toplet',
2240             params => { char => '1', irole => 'the toplet for which characteristics are sought'}
2241             },
2242              
2243             'char.topic' => {
2244             code => sub {
2245             my $self = shift;
2246             my $topic = $_[1];
2247             return
2248             grep (NAME <= $_->[KIND] && $_->[KIND] <= OCC &&
2249             $_->[PLAYERS]->[0] eq $topic, # first role is always the 'thing'
2250             values %{$self->{assertions}});
2251             },
2252             desc => 'return all assertions which are characteristics for a given toplet',
2253             params => { char => '1', topic => 'the toplet for which characteristics are sought'},
2254             key => sub {
2255             my $self = shift;
2256             my $a = shift;
2257             return "char.topic:1.". $a->[PLAYERS]->[0];
2258             },
2259             enum => sub {
2260             my $self = shift;
2261             return
2262             grep { $_->[KIND] != ASSOC }
2263             values %{ $self->{assertions} };
2264             }
2265             },
2266              
2267             'char.value' => {
2268             code => sub {
2269             my $self = shift;
2270             my $value = $_[1];
2271             return
2272             grep (NAME <= $_->[KIND] && $_->[KIND] <= OCC &&
2273             $_->[PLAYERS]->[1]->[0] eq $value->[0] && # second role is always the value
2274             $_->[PLAYERS]->[1]->[1] eq $value->[1], # test value AND type
2275             values %{$self->{assertions}});
2276             },
2277             desc => 'return all assertions which are characteristics for some topic of a given value',
2278             params => { char => '1', value => 'the value for which all characteristics are sought'},
2279             key => sub {
2280             my $self = shift;
2281             my $a = shift;
2282             return "char.value:1.". $a->[PLAYERS]->[1]->[0] . '.' . $a->[PLAYERS]->[1]->[1];
2283             },
2284             enum => sub {
2285             my $self = shift;
2286             return
2287             grep { $_->[KIND] != ASSOC }
2288             values %{ $self->{assertions} };
2289             }
2290             },
2291              
2292             'char.type' => {
2293             code => sub {
2294             my $self = shift;
2295             my $type = $_[1];
2296             return
2297             grep { $self->is_subclass ($_->[TYPE], $type ) }
2298             grep { $_->[KIND] != ASSOC }
2299             values %{$self->{assertions}};
2300             },
2301             desc => 'return all assertions which are characteristics for some given type',
2302             params => { char => '1', type => 'the characteristic type'},
2303             key => sub {
2304             my $self = shift;
2305             my $a = shift;
2306             return "char.type:1.". $a->[TYPE];
2307             },
2308             enum => sub {
2309             my $self = shift;
2310             return
2311             grep { $_->[KIND] != ASSOC }
2312             values %{ $self->{assertions} };
2313             }
2314             },
2315              
2316             'char.type.value' => {
2317             code => sub {
2318             my $self = shift;
2319             my $type = $_[1];
2320             my $value = $_[2];
2321             return
2322             grep { $self->is_subclass ($_->[TYPE], $type ) }
2323             grep (NAME <= $_->[KIND] && $_->[KIND] <= OCC &&
2324             $_->[PLAYERS]->[1]->[0] eq $value->[0] && # second role is always the value
2325             $_->[PLAYERS]->[1]->[1] eq $value->[1], # test value AND type
2326             values %{$self->{assertions}});
2327             },
2328             desc => 'return all assertions which are characteristics for some topic of a given value for some given type',
2329             params => { char => '1', type => 'the characteristic type', value => 'the value for which all characteristics are sought'},
2330             key => sub {
2331             my $self = shift;
2332             my $a = shift;
2333             return "char.type.value:1.". $a->[TYPE] . '.' . $a->[PLAYERS]->[1]->[0] . '.' . $a->[PLAYERS]->[1]->[1];
2334             },
2335             enum => sub {
2336             my $self = shift;
2337             return
2338             grep { $_->[KIND] != ASSOC }
2339             values %{ $self->{assertions} };
2340             }
2341             },
2342              
2343             'char.topic.type' => {
2344             code => sub {
2345             my $self = shift;
2346             my $topic = $_[1];
2347             my $type = $_[2];
2348             return
2349             grep ($self->is_subclass ($_->[TYPE], $type),
2350             grep ($_->[PLAYERS]->[0] eq $topic && # first role is always the 'thing'
2351             NAME <= $_->[KIND] && $_->[KIND] <= OCC,
2352             values %{$self->{assertions}}));
2353             },
2354             desc => 'return all assertions which are a characteristic of a given type for a given topic',
2355             params => { char => '1', topic => 'the toplet for which these characteristics are sought', type => 'type of characteristic' },
2356             key => sub {
2357             my $self = shift;
2358             my $a = shift;
2359             return "char.topic.type:1.". $a->[PLAYERS]->[0] . '.' . $a->[TYPE] ;
2360             },
2361             enum => sub {
2362             my $self = shift;
2363             return
2364             grep { $_->[KIND] != ASSOC }
2365             values %{ $self->{assertions} };
2366             }
2367             },
2368              
2369             'lid' => {
2370             code => sub {
2371             my $self = shift;
2372             my $lid = $_[1];
2373             return
2374             $self->{assertions}->{$lid} || ();
2375             },
2376             desc => 'return one particular assertions with a given ID',
2377             params => { lid => 'the ID of the assertion' }
2378             },
2379              
2380             'type' => {
2381             code => sub {
2382             my $self = shift;
2383             my $type = $_[0];
2384             return
2385             grep ($self->is_subclass ($_->[TYPE], $type),
2386             values %{$self->{assertions}});
2387             },
2388             desc => 'return all assertions with a given type',
2389             params => { type => 'the type of the assertion' }
2390             },
2391            
2392             'iplayer' => {
2393             code => sub {
2394             my $self = shift;
2395             my $ip = $_[0];
2396             return
2397             grep ($self->is_player ($_, $ip),
2398             values %{$self->{assertions}});
2399             },
2400             desc => 'return all assertions where a given toplet is a player',
2401             params => { iplayer => 'the player toplet' }
2402             },
2403              
2404             'iplayer.type' => {
2405             code => sub {
2406             my $self = shift;
2407             my ($ip, $ty) = @_;
2408             return
2409             grep ($self->is_player ($_, $ip) &&
2410             $self->is_subclass ($_->[TYPE], $ty),
2411             values %{$self->{assertions}});
2412             },
2413             desc => 'return all assertions of a given type where a given toplet is a player',
2414             params => { iplayer => 'the player toplet', type => 'the type of the assertion' }
2415             },
2416              
2417             'iplayer.irole' => {
2418             code => sub {
2419             my $self = shift;
2420             my ($ip, $ir) = @_;
2421             return
2422             grep ($self->is_player ($_, $ip, $ir),
2423             values %{$self->{assertions}});
2424             },
2425             desc => 'return all assertions where a given toplet is a player of a given role',
2426             params => { iplayer => 'the player toplet', irole => 'the role toplet (incl subclasses)' },
2427             },
2428              
2429             'iplayer.irole.type' => {
2430             code => sub {
2431             my $self = shift;
2432             my ($ip, $ir, $ty) = @_;
2433             return
2434             grep ($self->is_subclass ($_->[TYPE], $ty) &&
2435             $self->is_player ($_, $ip, $ir),
2436             values %{$self->{assertions}});
2437             },
2438             desc => 'return all assertions of a given type where a given toplet is a player of a given role',
2439             params => { iplayer => 'the player toplet',
2440             irole => 'the role toplet (incl subclasses)',
2441             type => 'the type of the assertion' }
2442             },
2443              
2444             'irole.type' => {
2445             code => sub {
2446             my $self = shift;
2447             my ($ir, $ty) = @_;
2448             return
2449             grep ($self->is_role ($_, $ir) &&
2450             $self->is_subclass ($_->[TYPE], $ty),
2451             values %{$self->{assertions}});
2452             },
2453             desc => 'return all assertions of a given type where there is a given role',
2454             params => { irole => 'the role toplet (incl subclasses)', type => 'the type of the assertion' }
2455             },
2456              
2457             'irole' => {
2458             code => sub {
2459             my $self = shift;
2460             my ($ir) = @_;
2461             return
2462             grep ($self->is_role ($_, $ir),
2463             values %{$self->{assertions}});
2464             },
2465             desc => 'return all assertions where there is a given role',
2466             params => { irole => 'the role toplet (incl subclasses)' }
2467             },
2468              
2469             'aplayer.arole.brole.type' => {
2470             code => sub {
2471             my $self = shift;
2472             my ($ap, $ar, $br, $ty) = @_;
2473             return
2474             grep ( $self->is_role ($_, $br),
2475             grep ( $self->is_player ($_, $ap, $ar),
2476             grep ( $self->is_subclass ($_->[TYPE], $ty),
2477             values %{$self->{assertions}})));
2478             },
2479             desc => 'return all assertions of a given type where a given toplet plays a given role and there exist another given role',
2480             params => { aplayer => 'the player toplet for the arole',
2481             arole => 'the role toplet (incl subclasses) for the aplayer',
2482             brole => 'the other role toplet (incl subclasses)',
2483             type => 'the type of the assertion'
2484             }
2485             },
2486            
2487             'aplayer.arole.bplayer.brole.type' => {
2488             code => sub {
2489             my $self = shift;
2490             my ($ap, $ar, $bp, $br, $ty) = @_;
2491             return
2492             grep ( $self->is_player ($_, $bp, $br),
2493             grep ( $self->is_player ($_, $ap, $ar),
2494             grep ( $self->is_subclass ($_->[TYPE], $ty),
2495             values %{$self->{assertions}})));
2496             },
2497             desc => 'return all assertions of a given type where a given toplet plays a given role and there exist another given role with another given toplet as player',
2498             params => { aplayer => 'the player toplet for the arole',
2499             arole => 'the role toplet (incl subclasses) for the aplayer',
2500             brole => 'the other role toplet (incl subclasses)',
2501             bplayer => 'the player for the brole',
2502             type => 'the type of the assertion'
2503             }
2504             },
2505              
2506             'anyid' => {
2507             code => sub {
2508             my $self = shift;
2509             my $lid = shift;
2510             return
2511             grep (
2512             $self->is_subclass ($_->[TYPE], $lid) || # probably not a good idea
2513             $_->[TYPE] eq $lid || # this seems a bit safer
2514             $_->[SCOPE] eq $lid ||
2515             $self->is_player ($_, $lid) ||
2516             $self->is_role ($_, $lid) ,
2517             values %{$self->{assertions}});
2518             },
2519             desc => 'return all assertions where a given toplet appears somehow',
2520             params => { anyid => 'the toplet' }
2521             }
2522            
2523             );
2524              
2525             sub _allinone {
2526 61     61   89 my $self = shift;
2527 61         86 my $exists = shift;
2528 61         2477 my $template = Assertion->new (@_); # we create an assertion on the fly
2529             #warn "allinone ".Dumper $template;
2530 61         3381 $self->absolutize ($template);
2531             #warn "allinone2".Dumper $template;
2532 61         169 $self->canonicalize ($template); # of course, need to be canonicalized
2533             #warn "allinone3".Dumper $template;
2534              
2535             #warn "in store match template ".Dumper $template;
2536 61         80 my @mads;
2537 61         241 ASSERTION:
2538 61         86 foreach my $m (values %{$self->{assertions}}) { # arbitrary AsTMa! queries TBD, can be faster as well
2539            
2540 785 100 100     2236 next if defined $template->[KIND] && # is kind defined
2541             $m->[KIND] ne $template->[KIND]; # and does it match?
2542             #warn "after kind";
2543 770 100 100     2225 next if defined $template->[SCOPE] &&
2544             $m->[SCOPE] ne $self->tids ($template->[SCOPE]); # does scope match?
2545             #warn "after scope";
2546 632 100 100     2221 next if defined $template->[TYPE] &&
2547             !$self->is_subclass ($m->[TYPE], $self->tids ($template->[TYPE])); # does type match (including subclassing)?
2548             #warn "after type";
2549            
2550 271         634 my ($rm, $rc) = ($m->[ROLES], $template->[ROLES]);
2551 271 100 50     648 push @mads, $m and next ASSERTION if ! @$rc; # match ok, if we have no roles
2552             #warn "after push roles";
2553 242 50       490 next ASSERTION if @$rm != @$rc; # quick check: roles must be of equal length
2554             #warn "after roles";
2555 242         388 my ($pm, $pc) = ($m->[PLAYERS], $template->[PLAYERS]);
2556 242 50 0     445 push @mads, $m and next ASSERTION if ! @$pc; # match ok, if we have no players
2557 242 50       461 next if @$pm != @$pc; # quick check: roles and players must be of equal length
2558             #warn "after players equal length ".Dumper ($pm, $pc);
2559              
2560             ####### $pm = [ $self->tids (@$pm) ];
2561 242         319 for (my $i = 0; $i < @{$rm}; $i++) { # order is canonicalized, would not want to test all permutations
  339         868  
2562             #warn "before role tests : is $rm->[$i] subclass of $rc->[$i]?";
2563 298 50 33     1137 next ASSERTION if defined $rc->[$i] && !$self->is_subclass ($rm->[$i], $rc->[$i]); # go to next assertion if that does not match
2564             #warn "after role ok";
2565 298 100 100     1663 next ASSERTION if defined $pc->[$i] && $pm->[$i] ne $pc->[$i];
2566             }
2567             #warn "after players roles";
2568 41 50       90 return (1) if $exists; # with exists that's it
2569 41         84 push @mads, $m; # with forall we do continue to collect
2570             }
2571             #warn "we return ".Dumper \@mads;
2572 61         690 return @mads; # and return what we got
2573             }
2574              
2575             #sub _fat_mama {
2576             # use Proc::ProcessTable;
2577             # my $t = new Proc::ProcessTable;
2578             ##warn Dumper [ $t->fields ]; exit;
2579             # my ($me) = grep {$_->pid == $$ } @{ $t->table };
2580             ##warn "size: ". $me->size;
2581             # return $me->size / 1024.0 / 1024.0;
2582             #}
2583              
2584              
2585              
2586             sub match_forall {
2587 1792     1792 1 14764 my $self = shift;
2588 1792         6117 my %query = @_;
2589             #warn "forall ".Dumper \%query;
2590              
2591 1792         13098 my @skeys = sort keys %query; # all fields make up the key
2592 1792         4115 my $skeys = join ('.', @skeys);
2593 1792         2719 my @svals = map { $query{$_} } @skeys;
  3510         7926  
2594              
2595 1792 50       4249 if (my $idxs = $self->{indices}) { # there are indices to help me
2596 0         0 my $key = "$skeys:" . join ('.', @svals);
2597 0         0 foreach my $idx (@$idxs) {
2598 0 0       0 if (my $lids = $idx->is_cached ($key)) { # if result was cached, lets take the list of lids
2599             # warn "using cached for $key". Dumper $lids;
2600 0         0 return map { $self->{assertions}->{$_} } @$lids; # and return fully fledged
  0         0  
2601             }
2602             }
2603             # obviously we have not found it # not defined means not cache => recompute
2604 0         0 my @as = _dispatch_forall ($self, \%query, $skeys, @svals); # do it the hard way
2605 0         0 $idxs->[0]->do_cache ($key, [ map { $_->[LID] } @as ]); # save it for later, simply use the first [0]
  0         0  
2606 0         0 return @as;
2607             } else { # no cache, let's do the ochsentour
2608 1792         4265 return _dispatch_forall ($self, \%query, $skeys, @svals);
2609             }
2610              
2611             sub _dispatch_forall {
2612 1792     1792   2135 my $self = shift;
2613 1792         1952 my $query = shift;
2614 1792         2099 my $skeys = shift;
2615              
2616 1792 100       4619 if (my $handler = $forall_handlers{$skeys}) { # there is a constraint and we have a handler
2617 1731         2162 return &{$handler->{code}} ($self, @_);
  1731         4758  
2618             } else { # otherwise
2619 61         249 return _allinone ($self, 0, %$query); # we use a generic handler, slow but should do the trick
2620             }
2621             }
2622              
2623             }
2624              
2625             sub match_exists {
2626 0     0 1 0 my $self = shift;
2627 0         0 my %query = @_;
2628              
2629             #warn "exists ".Dumper $query;
2630              
2631 0         0 my @skeys = sort keys %query; # all fields make up the key
2632 0         0 my $skeys = join ('.', @skeys);
2633              
2634             #warn "keys for this $skeys";
2635 0 0       0 if (my $handler = $exists_handlers{$skeys}) { # there is a constraint and we have a handler
2636 0         0 return &{$handler->{code}} ($self, map { $query{$_} } @skeys);
  0         0  
  0         0  
2637             } else { # otherwise
2638 0         0 return _allinone ($self, 1, %query); # we use a generic handler, slow but should do the trick
2639             }
2640             }
2641              
2642             sub match {
2643 254     254 1 57088 my $self = shift;
2644 254         411 my $exists = shift; # FORALL or EXIST, DOES NOT work yet
2645              
2646 254 50       980 return $exists ? match_exists ($self, @_) : match_forall ($self, @_);
2647             }
2648              
2649              
2650             =pod
2651              
2652             =back
2653              
2654             =head2 Role Retrieval
2655              
2656             =over
2657              
2658             =item B, B
2659              
2660             I<$bool> = is_player (I<$tm>, I<$assertion>, I<$player_id>, [ I<$role_id> ])
2661              
2662             I<$bool> = is_x_player (I<$tm>, I<$assertion>, I<$player_id>, [ I<$role_id> ])
2663              
2664             This function returns C<1> if the identifier specified by the C parameter plays any role
2665             in the assertion provided as C parameter.
2666              
2667             If the C is provided as third parameter then it must be exactly this role (or any subclass
2668             thereof) that is played. The 'x'-version is using equality instead of 'subclassing' ('x' for
2669             "exact").
2670              
2671             =cut
2672              
2673             sub is_player {
2674 1380     1380 1 1730 my $self = shift;
2675 1380         1538 my $m = shift;
2676              
2677             # warn "is_player ".Dumper \@_;
2678             # warn "caller: ". Dumper [ caller ];
2679             # foreach (0..0) {
2680             # warn " ".join (' ---- ', caller($_));
2681             # }
2682              
2683 1380         1589 my $p = shift;# or die "must specify valid player: ".Dumper ([ $m ])." and role is ".shift;
2684             #
2685             # warn "after shifting player '$p'";
2686 1380         1520 my $r = shift; # may be undefined
2687              
2688 1380 50       2548 $log->logdie ("must specify a player '$p' for role '$r'") unless $p;
2689              
2690 1380 100       2169 if ($r) {
2691 303         552 my ($ps, $rs) = ($m->[PLAYERS], $m->[ROLES]);
2692              
2693 303         741 for (my $i = 0; $i < @$ps; $i++) {
2694 529 100       1537 next unless $ps->[$i] eq $p;
2695 113 50       305 next unless $self->is_subclass ($rs->[$i], $r);
2696 113         632 return 1;
2697             }
2698             } else {
2699 1077 100       1081 return 1 if grep ($_ eq $p, @{$m->[PLAYERS]});
  1077         4835  
2700             }
2701 1015         4199 return 0;
2702             }
2703              
2704             sub is_x_player {
2705 10405     10405 1 12334 my $self = shift;
2706 10405         10730 my $m = shift;
2707 10405 50       21437 my $p = shift or $log->logdie ("must specify x-player: ".Dumper ([ $m ]));
2708 10405         11046 my $r = shift; # may be undefined
2709              
2710 10405 50       16465 if ($r) {
2711 10405         15917 my ($ps, $rs) = ($m->[PLAYERS], $m->[ROLES]);
2712              
2713 10405         34659 for (my $i = 0; $i < @$ps; $i++) {
2714 19981 100       58342 next unless $ps->[$i] eq $p;
2715 2520 100       7310 next unless $rs->[$i] eq $r;
2716 912         3231 return 1;
2717             }
2718             } else {
2719 0 0       0 return 1 if grep ($_ eq $p, @{$m->[PLAYERS]});
  0         0  
2720             }
2721 9493         51273 return 0;
2722             }
2723              
2724             =pod
2725              
2726             =item B, B
2727              
2728             I<@player_ids> = get_players (I<$tm>, I<$assertion>, [ I<$role_id> ])
2729              
2730             I<@player_ids> = get_x_players (I<$tm>, I<$assertion>, I<$role_id>)
2731              
2732             This function returns the player(s) for the given role. If the role is not provided all players are
2733             returned.
2734              
2735             The "x" version does not honor subclassing.
2736              
2737             =cut
2738              
2739             sub get_players {
2740 52     52 1 75 my $self = shift;
2741 52         64 my $a = shift;
2742 52         64 my $r = shift;
2743            
2744 52 50       110 return @{ $a->[PLAYERS] } unless $r;
  0         0  
2745 52         97 my ($ps, $rs) = ($a->[PLAYERS], $a->[ROLES]);
2746            
2747 52         61 my @ps;
2748 52         154 for (my $i = 0; $i < @$ps; $i++) {
2749 104 100       603 next unless $self->is_subclass ($rs->[$i], $r);
2750 52         165 push @ps, $ps->[$i];
2751             }
2752 52         229 return @ps;
2753             }
2754              
2755             sub get_x_players {
2756 2512     2512 1 3493 my $self = shift;
2757 2512         2763 my $a = shift;
2758 2512         2914 my $r = shift;
2759              
2760 2512         4608 my ($ps, $rs) = ($a->[PLAYERS], $a->[ROLES]);
2761            
2762 2512         2907 my @ps;
2763 2512         6758 for (my $i = 0; $i < @$ps; $i++) {
2764 5024 100       14623 next unless $rs->[$i] eq $r;
2765 2512         7488 push @ps, $ps->[$i];
2766             }
2767 2512         7712 return @ps;
2768             }
2769              
2770             =pod
2771              
2772             =item B, B
2773              
2774             I<$bool> = is_role (I<$tm>, I<$assertion>, I<$role_id>)
2775              
2776             I<$bool> = is_x_role (I<$tm>, I<$assertion>, I<$role_id>)
2777              
2778             This function returns C<1> if the C is a role in the assertion provided. The "x" version of
2779             this function does not honor subclassing.
2780              
2781             =cut
2782              
2783             sub is_role {
2784 36     36 1 44 my $self = shift;
2785 36         43 my $m = shift;
2786 36 50       78 my $r = shift or $log->logdie ("must specify role: ".Dumper ([ $m ]));
2787              
2788 36 100       40 return 1 if grep ($self->is_subclass ($_, $r), @{$m->[ROLES]});
  36         101  
2789             }
2790              
2791             sub is_x_role {
2792 0     0 1 0 my $self = shift;
2793 0         0 my $m = shift;
2794 0 0       0 my $r = shift or $log->logdie ("must specify role: ".Dumper ([ $m ]));
2795              
2796 0 0       0 return 1 if grep ($_ eq $r, @{$m->[ROLES]});
  0         0  
2797             }
2798              
2799             =pod
2800              
2801             =item B
2802              
2803             I<@role_ids> = get_roles (I<$tm>, I<$assertion>, I<$player>)
2804              
2805             This function returns a list of roles a particular player plays in a given assertion.
2806              
2807             =cut
2808              
2809             sub get_roles {
2810 0     0 1 0 my $self = shift;
2811 0         0 my $a = shift;
2812 0         0 my $p = shift; # the player
2813              
2814 0         0 my ($ps, $rs) = ($a->[PLAYERS], $a->[ROLES]);
2815            
2816 0         0 my @rs;
2817 0         0 for (my $i = 0; $i < @$ps; $i++) {
2818 0 0       0 next unless $ps->[$i] eq $p;
2819 0         0 push @rs, $rs->[$i];
2820             }
2821 0         0 return @rs;
2822             }
2823              
2824             =pod
2825              
2826             =item B
2827              
2828             I<@role_ids> = @{ get_role_s (I<$tm>, I<$assertion>) }
2829              
2830             This function extracts a reference to the list of role identifiers.
2831              
2832             =cut
2833              
2834             sub get_role_s {
2835 0     0 1 0 my $self = shift;
2836 0         0 my $a = shift;
2837 0         0 return $a->[ROLES];
2838             }
2839              
2840             =pod
2841              
2842             =back
2843              
2844              
2845             =head2 Auxiliary Functions
2846              
2847             =over
2848              
2849             =item B
2850              
2851             I<$assertion> = absolutize (I<$tm>, I<$assertion>)
2852              
2853             This method takes one assertion and makes sure that all identifiers in it (for the type, the scope
2854             and all the role and players) are made absolute for the context map. It returns this very assertion.
2855             It will not touch canonicalized assertions.
2856              
2857             =cut
2858              
2859             sub absolutize {
2860 65     65 1 88 my $self = shift;
2861 65         98 my $a = shift;
2862              
2863 65 50       203 return $a if $a->[CANON]; # skip it if we are already canonicalized
2864             #warn "in abosl ".Dumper $a;
2865 65 100       262 $a->[TYPE] = tids ($self, $a->[TYPE]) if $a->[TYPE];
2866 65 100       273 $a->[SCOPE] = tids ($self, $a->[SCOPE]) if $a->[SCOPE];
2867              
2868 65 100       174 map { $_ = tids ($self, $_) } @{$a->[ROLES]} if $a->[ROLES]; # things which are references, we will keep
  97         314  
  47         142  
2869 65 50       196 map { $_ = ref ($_) ? $_ : tids ($self, $_) } @{$a->[PLAYERS]} if $a->[PLAYERS]; # the others are treated as ids (could be literal references!)
  93 100       218  
  45         87  
2870             #warn "after abosl ".Dumper $a;
2871 65         100 return $a;
2872             }
2873              
2874             =pod
2875              
2876             =item B
2877              
2878             I<$assertion> = canonicalize (I<$tm>, I<$assertion>)
2879              
2880             This method takes an assertion and reorders the roles (together with their respective players) in a
2881             consistent way. It also makes sure that the KIND is defined (defaults to C), that the type is
2882             defined (defaults to C) and that all references are made absolute LIDs. Finally, the field
2883             C is set to 1 to indicate that the assertion is canonicalized.
2884              
2885             The function will not do anything if the assertion is already canonicalized. The component C
2886             is set to C<1> if the assertion has been canonicalized.
2887              
2888             Conveniently, the function returns the same assertion, albeit a maybe modified one.
2889              
2890             TODO: remove map parameter, it is no longer necessary
2891              
2892             =cut
2893              
2894             sub canonicalize {
2895 10002     10002 1 15248 my $self = shift;
2896 10002         13242 my $s = shift;
2897             #warn "in canon ".Dumper $s;
2898             #warn "using LIDs ".Dumper $LIDs;
2899              
2900 10002 50       33064 return $s if $s->[CANON]; # skip it if we are already canonicalized
2901              
2902             # reorder role/players canonically
2903 10002         15405 my $rs = $s->[ROLES];
2904 10002         13611 my $ps = $s->[PLAYERS];
2905 10002         26608 my @reorder = (0..$#$ps); # create 0, 1, 2, ..., how many roles
2906             #warn @reorder;
2907             # sort according to roles (alphanum) and at ties according to players on position $a, $b
2908 10002 50       29496 @reorder = sort { $rs->[$a] cmp $rs->[$b] || $ps->[$a] cmp $ps->[$b] } @reorder;
  9984         36246  
2909             #warn @reorder;
2910 10002         16568 $s->[ROLES] = [ map { $rs->[$_] } @reorder ];
  19951         51332  
2911 10002         17901 $s->[PLAYERS] = [ map { $ps->[$_] } @reorder ];
  19951         62446  
2912             # we are done (almost)
2913 10002         18774 $s->[CANON] = 1;
2914              
2915             #warn "in canon return ".Dumper $s;
2916 10002         57673 return $s;
2917             }
2918              
2919             # =pod
2920              
2921             # =item B
2922              
2923             # I<$hash> = mklabel (I<$assertion>);
2924              
2925             # For internal optimization all characteristics have an additional HASH component which can be used to
2926             # maintain indices. This function takes a assertion and computes an MD5 hash and sets the C
2927             # component if that is not yet defined.
2928              
2929             # Such a hash only makes sense if the assertion is canonicalized, otherwise an exception is raised.
2930              
2931             # Example:
2932              
2933             # my $a = Assertion->new (lid => 'urn:x-rho:important');
2934             # print "this uniquely (well) identifies the assertion ". mklabel ($a);
2935              
2936             # =cut
2937              
2938             sub mklabel {
2939 9932     9932 0 14355 my $a = shift;
2940 9932 50       26774 $log->logdie ("refuse to hash non canonicalized assertion") unless $a->[CANON];
2941 36     36   484 use Digest::MD5 qw(md5_hex);
  36         110  
  36         100653  
2942 9932 100       15287 return md5_hex ($a->[SCOPE], $a->[TYPE], @{$a->[ROLES]}, map { ref ($_) ? join ("", @$_) : $_ } @{$a->[PLAYERS]}); # recompute the hash if necessary
  9932         17964  
  19855         118934  
  9932         25758  
2943             # ^^^^^^^^^^^^^^ # this is a literal value
2944             # ^^ # this is for a normal identifier
2945             }
2946              
2947             =pod
2948              
2949             =back
2950              
2951             =head1 TAXONOMICS AND SUBSUMPTION
2952              
2953             The following methods provide useful basic, ontological functionality around transitive subclassing
2954             between classes and instance/type relationships.
2955              
2956             B: Everything is a subclass of C (changed in v1.35).
2957              
2958             B: Everything is an instance of C.
2959              
2960             B: See L for predefined things.
2961              
2962             =head2 Boolean Methods
2963              
2964             =over
2965              
2966             =item B
2967              
2968             I<$bool> = I<$tm>->is_subclass (I<$superclass_id>, I<$subclass_id>)
2969              
2970             This function returns C<1> if the first parameter is a (transitive) superclass of the second,
2971             i.e. there is an assertion of type I in the context map. It also returns C<1> if the
2972             superclass is a $TM::PSI::THING or if subclass and superclass are the same (reflexive).
2973              
2974             =cut
2975              
2976             sub is_subclass {
2977 3923     3923 1 6007 my $self = shift;
2978 3923         5027 my $class = shift;
2979 3923         4145 my $super = shift;
2980              
2981 3923 100       11451 return 1 if $class eq $super; # we always assume that A subclasses A
2982              
2983 2890         5608 my ($ISA, $US, $THING, $SUBCLASSES, $SUBCLASS, $SUPERCLASS, $INSTANCE, $CLASS) =
2984             ('isa', 'us', 'thing', 'is-subclass-of', 'subclass', 'superclass', 'instance', 'class');
2985              
2986             #warn "is_subclass?: class $class super $super , thing $THING, $SUBCLASSES, $SUPERCLASS";
2987 2890 100       5227 return 1 if $super eq $THING; # everything subclasses thing
2988             # but not if the class is one of the predefined things, yes, there is a method to this madness
2989 2873 100       6921 return 0 if $class eq $ISA;
2990 2241 50       4068 return 0 if $class eq $US;
2991 2241 100       4195 return 0 if $class eq $THING; # thing would only subclass itself and that is covered above
2992 2239 100       7861 return 0 if $class eq $SUBCLASSES;
2993 1197 100       2260 return 0 if $class eq $SUBCLASS;
2994 1181 100       2392 return 0 if $class eq $SUPERCLASS;
2995 1161 100       2504 return 0 if $class eq $INSTANCE;
2996 1099 100       1939 return 0 if $class eq $CLASS;
2997             # # see whether there is an assertion that we have a direct subclasses relationship between the two
2998              
2999             # This would be an optimization, but this does not go through match
3000             # return 1 if $self->is_asserted (Assertion->new (scope => $US, # TODO OPTIMIZE
3001             # type => $SUBCLASSES,
3002             # roles => [ $SUBCLASS, $SUPERCLASS ],
3003             # players => [ $class, $super ])
3004             # );
3005             # if we still do not have a decision, we will check all super types of $class and see (recursively) whether we can establish is-subclass-of
3006 784         1893 return 1 if grep ($self->is_subclass ($_, $super), # check all of the intermediate type whether there is a transitive relation
3007 1080 100       2414 map { $self->get_x_players ($_, $SUPERCLASS) } # find the superclass player there => intermediate type
3008             $self->match_forall (type => $SUBCLASSES,
3009             subclass => $class)
3010             );
3011 1011         5672 return 0; # ok, we give up now
3012             }
3013              
3014             =pod
3015              
3016             =item B
3017              
3018             I<$bool> = I<$tm>->is_a (I<$something_lid>, I<$class_lid>)
3019              
3020             This method returns C<1> if the thing referenced by the first parameter is an instance of the class
3021             referenced by the second. The method honors transitive subclassing.
3022              
3023             =cut
3024              
3025             sub is_a {
3026 86     86 1 143 my $self = shift;
3027 86         121 my $thingie = shift;
3028 86         102 my $type = shift; # ok, what class are looking at?
3029              
3030 86         193 my ($ISA, $CLASS, $THING) = ('isa', 'class', 'thing');
3031              
3032             #warn "isa thingie $thingie class $type";
3033              
3034 86 100 100     258 return 1 if $type eq $THING and # is the class == 'thing' and
3035             $self->{mid2iid}->{$thingie}; # and does the thingie exist?
3036              
3037 85         218 my ($m) = $self->retrieve ($thingie);
3038 85 50 33     274 return 1 if $m and # is it an assertion ? and...
3039             $self->is_subclass ($m->[TYPE], $type); # is the assertion type a subclass?
3040              
3041 26         89 return 1 if grep ($self->is_subclass ($_, $type), # check all of the intermediate type whether there is a transitive relation
3042 85 100       268 map { $self->get_players ($_, $CLASS) } # find the class player there => intermediate type
3043             $self->match_forall (type => $ISA, instance => $thingie)
3044             );
3045 67         593 return 0;
3046             }
3047              
3048             =pod
3049              
3050             =back
3051              
3052             =head2 List Methods
3053              
3054             =over
3055              
3056             =item B, B
3057              
3058             I<@lids> = I<$tm>->subclasses (I<$lid>, ...)
3059              
3060             I<@lids> = I<$tm>->subclassesT (I<$lid>, ...)
3061              
3062             C returns all B subclasses of the toplet identified by C<$lid>. If the toplet does
3063             not exist, the list will be empty. C is a variant which honors the transitive
3064             subclassing (so if A is a subclass of B and B is a subclass of C, then A is also a subclass of C).
3065              
3066             Duplicates are suppressed.
3067              
3068             =cut
3069              
3070             sub subclasses {
3071 26     26 1 10002 my $self = shift;
3072              
3073 26         55 my ($SUBCLASSES) = ('is-subclass-of');
3074 23         98 my @sc = map { $_->[PLAYERS]->[0] }
  26         83  
3075 26         65 map { $self->match_forall (type => $SUBCLASSES, superclass => $_) }
3076             @_;
3077 26         53 my %dup;
3078 26 50       63 return map { $dup{$_}++ ? () : $_ } @sc;
  23         211  
3079             }
3080              
3081             sub subclassesT {
3082 10     10 1 1057 my $self = shift;
3083              
3084 10         14 my @sc = map { $self->subclasses ($_) } @_;
  10         20  
3085 10         19 push @sc, @_, map { $self->subclassesT ($_) } @sc; # laziness equals recursion
  6         21  
3086 10         14 my %dup;
3087 10 100       12 return map { $dup{$_}++ ? () : $_ } @sc;
  23         107  
3088             }
3089              
3090             =pod
3091              
3092             =item B, B
3093              
3094             I<@lids> = I<$tm>->superclasses (I<$lid>, ...)
3095              
3096             I<@lids> = I<$tm>->superclassesT (I<$lid>, ...)
3097              
3098             The method C returns all direct superclasses of the toplet identified by C<$lid>. If
3099             the toplet does not exist, the list will be empty. C is a variant which honors
3100             transitive subclassing.
3101              
3102             Duplicates are suppressed.
3103              
3104             =cut
3105              
3106             sub superclasses {
3107 13     13 1 1306 my $self = shift;
3108              
3109 13         18 my ($SUBCLASSES) = ('is-subclass-of');
3110 10         29 my @sc = map { $_->[PLAYERS]->[1] }
  13         25  
3111 13         22 map { $self->match_forall (type => $SUBCLASSES, subclass => $_) }
3112             @_;
3113 13         25 my %dup;
3114 13 50       26 return map { $dup{$_}++ ? () : $_ } @sc;
  10         56  
3115             }
3116              
3117             sub superclassesT {
3118 11     11 1 638 my $self = shift;
3119              
3120 11         33 my @sc = map { $self->superclasses ($_) } @_;
  11         22  
3121 11         20 push @sc, @_, map { $self->superclassesT ($_) } @sc; # laziness equals recursion
  7         20  
3122 11         12 my %dup;
3123 11 100       12 return map { $dup{$_}++ ? () : $_ } @sc;
  29         99  
3124             }
3125              
3126             =pod
3127              
3128             =item B, B
3129              
3130             I<@lids> = I<$tm>->types (I<$lid>, ...)
3131              
3132             I<@lids> = I<$tm>->typesT (I<$lid>, ...)
3133              
3134             The method C returns all direct classes of the toplet identified by C<$lid>. If the toplet does
3135             not exist, the list will be empty. C is a variant which honors transitive subclassing (so if
3136             I is an instance of type I and I is a subclass of I, then I is also an instance of
3137             I).
3138              
3139             Duplicates will be suppressed.
3140              
3141             =cut
3142              
3143             sub types {
3144 14     14 1 3829 my $self = shift;
3145 14         24 my $ISA = ('isa');
3146 14         20 my $a;
3147 14         22 my @types = map { ($a = $self->retrieve ($_))
  10         44  
3148             ? $a->[TYPE]
3149 16 100       45 : ( map { $_->[PLAYERS]->[0] } $self->match_forall (type => $ISA, instance => $_) )
3150             }
3151             @_;
3152 14         20 my %dup;
3153 14 100       29 return map { $dup{$_}++ ? () : $_ } @types;
  18         128  
3154             }
3155              
3156             sub typesT {
3157 2     2 1 1046 my $self = shift;
3158              
3159 2         8 my @types = map { $self->types ($_) } @_;
  2         5  
3160 2         5 push @types, map { $self->superclassesT ($_) } @types;
  3         9  
3161 2         4 my %dup;
3162 2 100       5 return map { $dup{$_}++ ? () : $_ } @types;
  10         37  
3163             }
3164              
3165              
3166             =pod
3167              
3168             =item B, B
3169              
3170             I<@lids> = I<$tm>->instances (I<$lid>, ...)
3171              
3172             I<@lids> = I<$tm>->instancesT (I<$lid>, ...)
3173              
3174             These methods return the direct (C) and also indirect (C) instances of the
3175             toplet identified by C<$lid>.
3176              
3177             Duplicates are suppressed.
3178              
3179             =cut
3180              
3181             sub instances {
3182 90     90 1 11124 my $self = shift;
3183              
3184             # warn Dumper [ caller ] unless @_;
3185              
3186 90         212 my ($ISA, $THING) = ('isa', 'thing');
3187              
3188 118         283 my @instances = map {
3189 90         456 $_ eq $THING
3190 13         33 ? map { $_->[TM->LID] } $self->toplets
3191             :
3192 35         282 (map { $_->[LID ] } $self->match_forall (type => $_)), # all assocs of this type
3193 90 100       518 (map { $_->[PLAYERS]->[1] } $self->match_forall (type => $ISA, class => $_)) # all direct instances
3194             } @_;
3195             }
3196              
3197             sub instancesT {
3198 2     2 1 1345 my $self = shift;
3199              
3200 3         7 my @instances = map { $self->instances ($_) }
  2         7  
3201 2         6 map { $self->subclassesT ($_) }
3202             @_;
3203 2         6 my %dup;
3204 2 50       5 return map { $dup{$_}++ ? () : $_ } @instances;
  31         98  
3205             }
3206              
3207             =pod
3208              
3209             =back
3210              
3211             =head2 Filters
3212              
3213             Quite often one needs to walk through a list of things to determine whether they are instances (or
3214             types, subtypes or supertypes) of some concept. This list of functions lets you do that: you pass in
3215             a list (reference) and the function behaves as filter, returning a list reference.
3216              
3217             =over
3218              
3219             =item B
3220              
3221             I<@id> = I<$tm>->are_instances (I<$class_id>, I<@list_of_ids>)
3222              
3223             Returns all those ids where the topic is an instance of the class provided.
3224              
3225             =cut
3226              
3227             sub are_instances {
3228 3     3 1 208 my $self = shift;
3229 3         7 my $class = shift; # ok, what class are we looking at?
3230              
3231 3         9 my ($THING, $ISA, $CLASS) = ('thing', 'isa', 'class');
3232              
3233 3         4 my @rs;
3234 3         8 foreach my $thing (@_) { # we work through all the things we got
3235             #warn "checking $thing";
3236 111 50 0     268 push @rs, $thing and next # we happily take one if
      33        
3237             if $class eq $THING and # is the class = 'thing' ? and
3238             $self->midlet ($thing); # then does the thing exist in the map ?
3239              
3240 111         248 my $m = $self->retrieve ($thing);
3241 111 0 0     229 push @rs, $thing and next # we happily take one if
      0        
      33        
3242             if $m and # it is an assertion ? and...
3243             ($class eq $THING # either it is the class a THING (we did not explicitly store _that_)
3244             or
3245             $self->is_subclass ($m->[TYPE], $class) # or is the assertion type a subclass?
3246             );
3247              
3248 24         60 push @rs, $thing and next # we happily take one if
3249             if grep ($self->is_subclass ($_, $class), # finall we check all of the intermediate type whether there is a transitive relation
3250 111 100 50     220 map { $self->get_players ($_, $CLASS) } # then we find the 'class' value
3251             $self->match_forall (type => $ISA, instance => $thing));
3252             # nothing # otherwise we do not push
3253             }
3254 3         40 return @rs;
3255             }
3256              
3257             =pod
3258              
3259             =item B (Warning: placeholder only)
3260              
3261             I<@ids> = I<$tm>->are_types (I<$instance_id>, I<@list_of_ids>)
3262              
3263             Returns all those ids where the topic is a type of the instance provided.
3264              
3265             =cut
3266              
3267             sub are_types {
3268 1     1 1 626 $log->logwarn ("# not implemented function");
3269 1         978 return 0;
3270             }
3271              
3272             =pod
3273              
3274             =item B (Warning: placeholder only)
3275              
3276             I<@ids> = I<$tm>->are_supertypes (I<$class_id>, I<@list_of_ids>)
3277              
3278             Returns all those ids where the topic is a supertype of the class provided.
3279              
3280             =cut
3281              
3282             sub are_supertypes {
3283 1     1 1 5 $log->logwarn ("# not implemented function");
3284 1         408 return 0;
3285             }
3286              
3287             =pod
3288              
3289             =item B (Warning: placeholder only)
3290              
3291             I<@ids> = I<$tm>->are_subtypes (I<$class_id>, I<@list_of_ids>)
3292              
3293             Returns all those ids where the topic is a subtype of the class provided.
3294              
3295             =cut
3296              
3297             sub are_subtypes {
3298 1     1 1 5 $log->logwarn ("# not implemented function");
3299 1         450 return 0;
3300             }
3301              
3302             =pod
3303              
3304             =back
3305              
3306             =head1 REIFICATION
3307              
3308             =over
3309              
3310             =item B
3311              
3312             (I<$tid>) = I<$tm>->is_reified (I<$assertion>)
3313              
3314             (I<$tid>) = I<$tm>->is_reified (I<$url>)
3315              
3316             In the case that the handed-in assertion is internally reified in the map, this method will return
3317             the internal identifier of the reifying toplet. Or C if there is none.
3318              
3319             In the case that the handed-in URL is used as subject address of a toplet, this method will return
3320             the internal identifier of the reifying toplet. Or C if there is none.
3321              
3322             =cut
3323              
3324             sub _is_reified {
3325 11     11   24 my $self = shift;
3326 11         19 my $a = shift;
3327              
3328 11         28 my $mid2iid = $self->{mid2iid}; # shortcut
3329 11 100       65 $a = $a->[TM->LID] if ref ($a) eq 'Assertion'; # for assertions we take the LID
3330              
3331 17         119 return grep { $mid2iid->{$_}->[TM->ADDRESS] eq $a } # brute force
  351         1028  
3332 11         96 grep { $mid2iid->{$_}->[TM->ADDRESS] }
3333 11         24 keys %{$mid2iid};
3334             }
3335              
3336             sub is_reified {
3337 11     11 1 2782 return _is_reified (@_);
3338             }
3339              
3340             =pod
3341              
3342             =item B
3343              
3344             I<$url> = I<$tm>->reifies (I<$tid>)
3345              
3346             I<$assertion> = I<$tm>->reifies (I<$tid>)
3347              
3348             Given a toplet identifier, this method returns either the internally reified assertion, an
3349             externally reified object via its URL, or C if that toplet does not reify at all.
3350              
3351             =cut
3352              
3353             sub reifies {
3354 8     8 1 686 my $self = shift;
3355 8         18 my $tid = shift;
3356              
3357 8 50       92 my $add = $self->{mid2iid}->{$tid}->[TM->ADDRESS] if $self->{mid2iid}->{$tid};
3358 8 50       28 return undef unless $add;
3359 8 100       73 return $add =~ /^[A-F0-9]{32}$/i ? $self->{assertions}->{$add} : $add;
3360             }
3361              
3362             =pod
3363              
3364             =back
3365              
3366             =head1 VARIANTS (aka "The Warts")
3367              
3368             No comment.
3369              
3370             =over
3371              
3372             =item B
3373              
3374             I<$tm>->variants (I<$id>, I<$variant>)
3375              
3376             I<$tm>->variants (I<$id>)
3377              
3378             With this method you can get/set a variant tree for B topic. According to the standard only
3379             basenames (aka topic names) can have variants, but, hey, this is such an ugly beast (I am
3380             digressing). According to this data model you can have variants for B toplets/maplets. You only
3381             need their id.
3382              
3383             The structure is like this:
3384              
3385             $VAR1 = {
3386             'tm:param1' => {
3387             'variants' => {
3388             'tm:param3' => {
3389             'variants' => undef,
3390             'value' => 'name for param3'
3391             }
3392             },
3393             'value' => 'name for param1'
3394             },
3395             'tm:param2' => {
3396             'variants' => undef,
3397             'value' => 'name for param2'
3398             }
3399             };
3400              
3401             The parameters are the keys (there can only be one, which is a useful, cough, restriction of the
3402             standard) and the data is the value. Obviously, one key value (i.e. parameter) can only exists once.
3403              
3404             Caveat: This is not very well tested (read: not tested at all).
3405              
3406             =cut
3407              
3408             sub variants {
3409 0     0 1 0 my $self = shift;
3410 0         0 my $id = shift;
3411 0         0 my $var = shift;
3412              
3413 0 0       0 $self->{last_mod} = Time::HiRes::time if $var;
3414 0 0       0 return $var ? $self->{variants}->{$id} = $var : $self->{variants}->{$id};
3415             }
3416              
3417              
3418             =pod
3419              
3420             =back
3421              
3422             =head1 LOGGING
3423              
3424             The L module hosts (since 1.29) the Log4Perl object C<$TM::log>. It is initialized with some
3425             reasonable defaults, but an using application can access it, tweak it, or overwrite it completely.
3426              
3427             =head1 SEE ALSO
3428              
3429             L, L
3430              
3431             =head1 COPYRIGHT AND LICENSE
3432              
3433             Copyright 200[1-8] by Robert Barta, Edrrho@cpan.orgE
3434              
3435             This library is free software; you can redistribute it and/or modify it under the same terms as Perl
3436             itself.
3437              
3438             =cut
3439              
3440             #-- this we do when all structures have been defined
3441             _prime_infrastructure(); # initialize
3442             # NOTE: BEGIN does not work, because we have to define all
3443              
3444             sub _prime_infrastructure { # generate a fragmentary TM structure for the infrastructure
3445 36     36   144 foreach my $h ($TM::PSI::core,
3446             $TM::PSI::topicmaps_inc,
3447             $TM::PSI::tmql_inc,
3448             $TM::PSI::astma_inc) {
3449 144         196 foreach my $k (keys %{ $h->{mid2iid} }) {
  144         855  
3450 1008         3544 $infrastructure->{mid2iid}->{$k} = [ $k, undef, $h->{mid2iid}->{$k} ]; # and manifest them as toplets
3451             }
3452              
3453 288         1277 map { $infrastructure->{assertions}->{ $_->[TM->LID] } = $_ } # manifest assertions
  288         574  
3454 288         732 map { $_->[TM->LID] = mklabel ($_); # after computing the hash LID
3455 288         587 $_ }
3456 288         4171 map { canonicalize ( undef, $_ ) } # after canonicalizing them
3457 288         15829 map { $_->[TM->KIND] = TM->ASSOC; # adding defaults
3458 288         820 $_->[TM->SCOPE] = TM::PSI::US;
3459 288         469 $_ }
3460 144         808 map { Assertion->new (type => $_->[0], # which is built here
3461             roles => $_->[1], # with the roles list
3462             players => $_->[2])} # with the players list
3463 144         345 @{ $h->{assertions} };
3464             }
3465             }
3466              
3467              
3468             1;
3469              
3470             __END__