File Coverage

blib/lib/TM.pm

Criterion	Covered	Total	%
statement	621	862	72.0
branch	232	364	63.7
condition	55	132	41.6
subroutine	67	80	83.7
pod	49	54	90.7
total	1024	1492	68.6

line	stmt	bran	cond	sub	pod	time	code
1							package TM;
2
3	36			36		743679	use strict;
	36					86
	36					1581
4	36			36		192	use warnings;
	36					75
	36					1569
5
6							require Exporter;
7	36			36		200	use base qw(Exporter);
	36					93
	36					9998
8
9							our $VERSION = '1.56';
10
11	36			36		45425	use Data::Dumper;
	36					169976
	36					4334
12							# !!! HACK to suppress an annoying warning about Data::Dumper's VERSION not being numerical
13							$Data::Dumper::VERSION = '2.12108';
14							# !!! END of HACK
15
16	36			36		52680	use Class::Struct;
	36					95378
	36					281
17	36			36		59177	use Time::HiRes;
	36					80133
	36					272
18	36			36		55073	use TM::PSI;
	36					98
	36					1387
19
20	36			36		60834	use Log::Log4perl;
	36					6078367
	36					303
21							Log::Log4perl::init( \ q(
22
23							log4perl.rootLogger=DEBUG, Screen
24
25							log4perl.appender.Screen=Log::Log4perl::Appender::Screen
26							log4perl.appender.Screen.layout=Log::Log4perl::Layout::PatternLayout
27							log4perl.appender.Screen.layout.ConversionPattern=[%r] %F %L %c - %m%n
28
29							#log4perl.rootLogger=DEBUG, LOGFILE
30
31							#log4perl.appender.LOGFILE=Log::Log4perl::Appender::File
32							#log4perl.appender.LOGFILE.filename=/tmp/tm.log
33							#log4perl.appender.LOGFILE.mode=append
34
35							#log4perl.appender.LOGFILE.layout=PatternLayout
36							#log4perl.appender.LOGFILE.layout.ConversionPattern=[%r] %F %L %c - %m%n
37							) );
38
39							our $log = Log::Log4perl->get_logger("TM");
40
41							our $infrastructure; # default set = core + topicmaps_inc + astma_inc
42
43							=pod
44
45							=head1 NAME
46
47							TM - Topic Maps, Base Class
48
49							=head1 SYNOPSIS
50
51							my $tm = new TM (baseuri => 'tm://whatever/'); # empty map
52
53							# add a toplet (= minimal topic, only identification, no characteristics)
54							# by specifying an internal ID
55							$tm->internalize ('aaa'); # only internal identifier
56							$tm->internalize ('bbb' => 'http://bbb/'); # with a subject address
57							$tm->internalize ('ccc' => \ 'http://ccc/'); # with a subject indicator
58
59							# without specifying an internal ID (will be auto-generated)
60							$tm->internalize (undef => 'http://ccc/'); # with a subject address
61							$tm->internalize (undef => \ 'http://ccc/'); # with a subject indicator
62
63							# get rid of toplet(s)
64							$tm->externalize ('tm://whatever/aaa', ...);
65
66							# find full URI of a toplet
67							my $tid = $tm->tids ('person'); # returns tm://whatever/person
68							my @tids = $tm->tids ('person', ...) # for a whole list
69
70							my $tid = $tm->tids ( 'http://bbb/'); # with subject address
71							my $tid = $tm->tids (\ 'http://ccc/'); # with subject indicator
72
73							my @ts = $tm->toplets; # get all toplets
74							my @ts = $tm->toplets (\ '+all -infrastructure'); # only those you added
75
76							my @as = $tm->asserts (\ '+all -infrastructure'); # only those you added
77
78							my @as = $tm->retrieve; # all assertions
79							my $a = $tm->retrieve ('23ac4637....345'); # returns only that one assertion
80							my @as = $tm->retrieve ('23ac4637....345', '...'); # returns all these assertions
81
82							# create standalone assertion
83							my $a = Assertion->new (type => 'is-subclass-of',
84							roles => [ 'subclass', 'superclass' ],
85							players => [ 'rumsti', 'ramsti' ]);
86							$tm->assert ($a); # add that to map
87
88							# create a name
89							my $n = Assertion->new (kind => TM->NAME,
90							type => 'name',
91							scope => 'us',
92							roles => [ 'thing', 'value' ],
93							players => [ 'rumsti', new TM::Literal ('AAA') ])
94							# create an occurrence
95							my $o = Assertion->new (kind => TM->OCC,
96							type => 'occurrence',
97							scope => 'us',
98							roles => [ 'thing', 'value' ],
99							players => [ 'rumsti', new TM::Literal ('http://whatever/') ])
100
101							$tm->assert ($n, $o); # throw them in
102
103							$tm->retract ($a->[TM->LID], ...); # get rid of assertion(s)
104
105							my @as = $tm->retrieve ('id..of...assertion'); # extract particular assertions
106
107							# find particular assertions
108							# generic search patterns
109							my @as = $tm->match_forall (scope => 'tm://whatever/sss');
110
111							my @bs = $tm->match_forall (type => 'tm://whatever/ttt',
112							roles => [ 'tm://whatever/aaa', 'tm://whatever/bbb' ]);
113
114							# specialized search patterns (see TM::Axes)
115							my @cs = $tm->match_forall (type => 'is-subclass-of',
116							arole => 'superclass',
117							aplayer => 'tm://whatever/rumsti',
118							brole => 'subclass');
119
120							my @ds = $tm->match_forall (type => 'isa',
121							class => 'tm://whatever/person');
122
123							# perform merging, cleanup, etc.
124							$tm->consolidate;
125
126							# check internal consistency of the data structure
127							die "panic" if $tm->insane;
128
129							# taxonomy stuff
130							warn "what a subtle joke" if $tm->is_a ($tm->tids ('gw_bush', 'moron'));
131
132							die "what a subtle joke"
133							unless $tm->is_subclass ($tm->tids ('politician', 'moron'));
134
135							# returns Mr. Spock if Volcans are subclassing Aliens
136							warn "my best friends: ". Dumper [ $tm->instancesT ($tm->tids ('alien')) ];
137
138
139							=head1 ABSTRACT
140
141							This class provides read/write access to a data structure according to the Topic Maps paradigm. As
142							it stands, this class implements directly so-called I maps, i.e. those maps which
143							completely reside in memory. Implementations for non-materialized maps can be derived from it.
144
145							=head1 DESCRIPTION
146
147							This class implements directly so-called I topic maps, i.e. those maps which
148							completely reside in memory. Non-materialized and non-materializable maps can be implemented by
149							deriving from this class by overloading one or all of the sub-interfaces. If this is done cleverly,
150							then any application, even a TMQL query processor can operate on non-materialized (virtual) maps in
151							the same way as on materialized ones.
152
153							=head2 Data Structures
154
155							The Topic Maps paradigm knows two abstractions
156
157							=over
158
159							=item I, Topic Maps Data Model
160
161							L
162
163							=item I, Topic Maps Reference Model
164
165							L
166
167							=back
168
169							For historical reasons, this package adopts an abstraction which is in between these
170							two. Accordingly, there are only following types of data structures
171
172							=over
173
174							=item Toplets:
175
176							These are like TMDM topics, but only contain addressing information (subject identifiers and subject
177							addresses) along with an internal identifier.
178
179							=item Assertions:
180
181							These are like TMDM associations, but are generalized to host also occurrences and names. Also
182							associations using predefined association types, such as C (I) and C
183							(I) are represented as assertions.
184
185							=item Variants:
186
187							No idea what they are good for. They can be probably safely ignored.
188
189							=back
190
191							The data manipulation interface is very low-level and B exposes internal data structures.
192							As long as you do not mess with the information you get and you follow the API rules, this can
193							provide a convenient, fast, albeit not overly comfortable interface. If you prefer more a TMDM-like
194							style of accessing a map then have a look at L.
195
196
197							=head2 Identifiers
198
199							Of course, L supports the subject locator and the subject indicator mechanism as mandated
200							by the Topic Maps standards.
201
202							Additionally, this package also uses I identifiers to address everything which looks and
203							smells like a topic, also associations, names and occurrences. For topics the application (or
204							author) of the topic map will most likely provide these internal identifiers. For the others the
205							identifiers are generated.
206
207							Since v1.31 this package distinguishes between 3 kinds of internal identifiers:
208
209							=over
210
211							=item I toplet identifiers
212
213							These identifiers are always interpreted local to a map, in that the C of the map is used
214							as prefix. So, a local identifier
215
216							chinese-working-conditions
217
218							will become
219
220							tm://nirvana/chinese-working-conditions
221
222							if the base URI of the map were
223
224							tm://nirvana/
225
226							So if you want to use identifiers such as these, then you should either use the absolut version
227							(including the base URI) or use the method C to find the absolute version.
228
229							=item I toplet identifiers
230
231							All toplets from the infrastructure are declared I, i.e. untouchable. Examples are
232							C, C or C (universal scope).
233
234							These identifiers are always the same in all maps this package system manages. That implies that if
235							you use such an identifier, then you cannot attach a local meaning to it. And it implies that at
236							merging time, toplets with these identifiers will merge. Even if there were no subject indicators or
237							addresses involved.
238
239							It is probably a good idea to leave such toplets alone as the software is relying on the stability
240							of the sacrosanct identifiers.
241
242							=item assertion identifiers
243
244							Each assertion also has an (internal) identifier. It is a function from the content, so it
245							is characteristic for the assertion.
246
247							=back
248
249							=head2 Consistency
250
251							An application using a map may expect that a map is I, i.e. that the following
252							consistency conditions are met:
253
254							=over
255
256							=item B (fixed on)
257
258							Every identifier appearing in some assertion as type, scope, role or player is also registered as
259							toplet.
260
261							=item B (default: on)
262
263							Two (or more) toplets sharing the same I are treated as one toplet.
264
265							=item B (default: on)
266
267							Two (or more) toplets sharing the same I are treated as one toplet.
268
269							=item B (default: off)
270
271							Two (or more) toplet sharing the same name in the same scope are treated as one toplet.
272
273							=back
274
275							=cut
276
277							use constant {
278	36					216725	Subject_based_Merging => 1,
279							Indicator_based_Merging => 2,
280							TNC_based_Merging => 3,
281	36			36		6968	};
	36					98
282
283							=pod
284
285							While A1 is related with the internal consistency of the data structure (see C), the others
286							are a choice the application can make (see C).
287
288							I is not automatically provided when a map is modified by the application. It is the
289							applications responsibility to trigger the process to consolidate the map. As that may be
290							potentially expensive, the control remains at the application.
291
292							When an IO driver is consuming a map from a resource, say, loading from an XTM file, then that
293							driver will ensure that the map is consolidated according to the current settings before it hands it
294							to the application. The application is then in full control of the map as it can change, add and
295							delete toplets and assertions. The map can become unconsolidated in this process. The method
296							C reinstates consistency again.
297
298							You can change these defaults by (a) providing an additional option to the constructor
299
300							new TM (....,
301							consistency => [ TM->Subject_based_Merging,
302							TM->Indicator_based_Merging ]);
303
304							or (b) by later using the accessor C (see below).
305
306							=head1 MAP INTERFACE
307
308							=head2 Constructor
309
310							I<$tm> = new TM (...)
311
312							The constructor will create an empty map, or, to be more exact, it will fill the map with the
313							taxonomy from L which covers basic Topic Maps concepts such as I or I.
314
315							The constructor understands a number of key/value pair parameters:
316
317							=over
318
319							=item C (default: C)
320
321							Every toplet in the map has an unique local identifier (e.g. C). The C parameter
322							controls how an absolute URI is built from this identifier.
323
324							=item C (default: [ Subject_based_Merging, Indicator_based_Merging ])
325
326							This controls the consistency settings. They can be changed later with the C method.
327
328							=back
329
330							=cut
331
332							sub new {
333	751			751	0	369333	my $class = shift;
334	751					2774	my %self = @_;
335
336	751		100			5136	$self{consistency} \|\|= [ Subject_based_Merging, Indicator_based_Merging ];
337	751		100			2379	$self{baseuri} \|\|= 'tm://nirvana/';
338	751	100				3941	$self{baseuri} .= '#' unless $self{baseuri} =~ m\|[/\#:]$\|;
339
340	751					2695	my $self = bless \%self, $class;
341
342	751	50				2835	unless ($self->{mid2iid}) { # we need to do fast cloning of basic vocabulary
343	751					1253	%{ $self->{mid2iid} } = %{ $infrastructure->{mid2iid} }; # shallow clone
	751					13521
	751					10417
344	751					2718	%{ $self->{assertions} } = %{ $infrastructure->{assertions} }; # shallow clone
	751					5245
	751					9986
345							}
346	751					2666	$self->{last_mod} = 0; # book keeping
347	751					3287	$self->{created} = Time::HiRes::time;
348
349	751					3040	return $self;
350							}
351
352	0			0		0	sub DESTROY {} # not much to do here
353
354							=pod
355
356							=head2 Methods
357
358							=over
359
360							=item B
361
362							I<$bu> = I<$tm>->baseuri
363
364							This methods retrieves the base URI component of the map. This is a read-only method. The base URI
365							is B defined.
366
367							=cut
368
369							sub baseuri {
370	572			572	1	10895	my $self = shift;
371	572					3360	return $self->{baseuri};
372							}
373
374							=pod
375
376							=item B
377
378							I<@merging_constraints> = I<$tm>->consistency
379
380							I<$tm>->consistency (I<@list_of_consistency_constants>)
381
382							This method provides read/write access to the consistency settings.
383
384							If no parameters are provided, then the current list of consistency settings is returned. If
385							parameters are provided, that list must consist of the constants defined under L.
386
387							B: Changing the consistency does B automatically trigger C.
388
389							=cut
390
391							sub consistency {
392	4			4	1	559	my $self = shift;
393	4					6	my @params = @_;
394
395	4	100				11	$self->{consistency} = [ @params ] if @params;
396	4					6	return @{$self->{consistency}};
	4					33
397							}
398
399							=pod
400
401							=item B
402
403							Returns the L date of last time the map has been modified (content-wise).
404
405							=cut
406
407							sub last_mod {
408	0			0	1	0	my $self = shift;
409	0					0	return $self->{last_mod};
410							}
411
412							=pod
413
414							=item B
415
416							I<$tm>->consolidate
417
418							I<$tm>->consolidate (I<@list_of_consistency_constants>)
419
420							This method I a map by performing the following actions:
421
422							=over
423
424							=item *
425
426							perform merging based on subject address (see TMDM section 5.3.2)
427
428							=item *
429
430							perform merging based on subject indicators (see TMDM section 5.3.2)
431
432							=item *
433
434							remove all superfluous toplets (those which do not take part in any assertion)
435
436							B: Not implemented yet!
437
438							=back
439
440							This method will normally use the map's consistency settings. These settings can be overridden by
441							adding consistency settings as parameters (see L). In that case the map's settings are
442							B modified, so use this carefully.
443
444							B: In all cases the map will be modified.
445
446							B: After merging some of the I might not be reliably point to a topic.
447
448							=cut
449
450							# NOTE: Below there much is done regarding speed. First the toplets are swept detecting which have
451							# to be merged. This is not done immediately (as this is an expensive operation), instead a 'merger' hash
452							# is built. Note how merging information A -> B and A -> C is morphed into A -> B and B -> C using
453							# the _find_free function.
454
455							# That merger hash is then consolidated by following edges until their end, so that there are no
456							# cycles.
457
458							sub consolidate {
459	224			224	1	115491	my $self = shift;
460	224	50				1271	my $cons = @_ ? [ @_ ] : $self->{consistency}; # override
461	224					399	my $indi = grep ($_ == Indicator_based_Merging, @{$self->{consistency}});
	224					1009
462	224					442	my $subj = grep ($_ == Subject_based_Merging, @{$self->{consistency}});
	224					1005
463	224					350	my $tnc = grep ($_ == TNC_based_Merging, @{$self->{consistency}});
	224					1209
464
465							#warn "cond indi $indi subj $subj tnc $tnc";
466
467	224					442	my %SIDs; # holds subject addresses found
468							my %SINs; # holds subject indicators found
469	0					0	my %BNs; # holds basename + scope found
470
471							#warn Dumper $cons;
472
473							#== find merging points and memorize this in mergers =======================================================================
474	0					0	my %mergers; # will contain the merging edges
475	224					523	my $mid2iid = $self->{mid2iid}; # shortcut
476	224					415	my $asserts = $self->{assertions}; # shortcut
477	224					454	my $baseuri = $self->{baseuri}; # shortcut
478
479	224					3514	MERGE:
480	224					430	foreach my $this (keys %{$mid2iid}) {
481							#warn "looking at $this";
482	8862					15262	my $thism = $mid2iid->{$this};
483							#warn "SIDs: ". Dumper \%SIDs;
484							#warn "SINs: ". Dumper \%SINs;
485							#-- based on subject indication ------------------------------------------------------------------------------------------
486	8862	100				17952	if ($indi) {
487	2262					11179	foreach my $sin (@{$thism->[TM->INDICATORS]}) { # walk over the subject indicators
	2262					9725
488	2447	100				7493	if (my $that = $SINs{$sin}) { # $that is now a key pointing to a merging partner
489							#warn "merging (IND) $this >> $that"; #. Dumper $thism, $thatm;
490	263					814	_add_merge (\%mergers, $baseuri, $this, $that);
491
492							} else { # no merging, so enter the sins
493	2184					10365	$SINs{$sin} = $this;
494							}
495							}
496							}
497
498							sub _add_merge {
499	557			557		933	my $mergers = shift;
500	557					797	my $bu = shift;
501	557					781	my $this = shift;
502	557					730	my $that = shift;
503
504	557	100				4734	($this, $that) = ($that, $this) if $this =~ /^$bu/; # we swap them to favor that which resembles the baseURI
505	557					1489	$mergers->{_find_free ($this, $mergers)} = $that; # find a free place to make that mapping
506							}
507
508							sub _find_free {
509	557			557		797	my $this = shift;
510	557					1099	my $mergers = shift;
511
512	557					823	my $this2 = $this;
513	557					999	my $this3;
514	557					2122	while ($this3 = $mergers->{$this2}) {
515	135	50	33			844	if ($this3 eq $this \|\| $this3 eq $this2) { # loop, we do not need it
516	0					0	return $this3;
517							} else {
518	135					428	$this2 = $this3; # we follow the trail
519							}
520							}
521	557					3056	return $this2; # this2 was the end of the trail
522							}
523
524							#-- based on subject address ---------------------------------------------------------------------------------------------
525	8862	100				29056	if ($subj) {
526	3198	100				11420	if (my $sid = $thism->[TM->ADDRESS]) {
527	110	100				311	if (my $that = $SIDs{$sid}) { # found partner => should be merged
528							#warn "merging (ADDR) $this >> $that";
529	30					120	_add_merge (\%mergers, $baseuri, $this, $that);
530							###### old $mergers{_find_free ($this, \%mergers)} = $that;
531							# must obviously both have the same subject address, so, no reason to touch this
532							} else { # there is no partner, first one with this subject address
533	80					285	$SIDs{$sid} = $this;
534							}
535							}
536							}
537							#warn "after 1 on '$this' ";#.Dumper $mid2iid;
538							}
539							#-- based on TNC ---------------------------------------------------------------------------------------------
540	224	100				1634	if ($tnc) {
541	144					650	my ($THING, $VALUE) = ('thing', 'value');
542	144					788	foreach my $a (values %$asserts) {
543	3192	100				15935	next unless $a->[TM->KIND] == TM->NAME; # we are only interested in basenames
544							#warn "checking assertion ".Dumper $a;
545	864					2173	my ($v) = get_x_players ($self, $a, $VALUE); # if we get back a longer list, bad luck
546	864					4167	my $bn_plus_scope = $v->[0] . # the basename is a string reference
547							$a->[TM->SCOPE]; # relative to the scope
548	864					1894	my ($this) = get_x_players ($self, $a, $THING); # thing which plays 'topic'
549							#warn " --> player is $this";
550	864	100				2651	if (my $that = $BNs{$bn_plus_scope}) { # if we have seen it before
551							#warn " -> SEEN";
552	264					970	_add_merge (\%mergers, $baseuri, $this, $that);
553							#### old $mergers{_find_free ($this, \%mergers)} = $that;
554							} else { # it is new to use, we store it into %BNs
555							#warn " -> NOT SEEN";
556	600					3681	$BNs{$bn_plus_scope} = $this;
557							#warn "BNs ".Dumper \%BNs;
558							}
559							}
560							}
561							#== consolidate mergers: no cycles, trail followed through ======================================================
562							#warn "mergers ".Dumper \%mergers;
563
564	224					1253	for (2..2) { # at most 2, theoretical only one should be sufficient
565	224					408	my $changes = 0;
566	224					834	foreach my $h (keys %mergers) {
567							#warn "working on $h";
568	557	100	66			2757	if ($mergers{$h} eq $h) { # micro loop
		50
569	55					186	delete $mergers{$h};
570							} elsif (defined $mergers{$mergers{$h}} && $mergers{$mergers{$h}} eq $h) {
571	0					0	delete $mergers{$h};
572							} else {
573	502					951	my $h2 = $mergers{$h};
574	502					2351	my %seen = ($h => 1, $h2 => 1); # loop avoidance
575							#warn "seeen start".Dumper \%seen;
576	502		100			3151	while ($mergers{$h2} and !$seen{$mergers{$h2}}++) { $h2 = $mergers{$h} = $mergers{$h2}; $changes++;}
	98					219
	98					549
577							#warn "half consolidated (chagens $changes)" .Dumper $H;
578							}
579							}
580							# warn "consoli loop $_: changes: $changes";
581							# warn "early finish" if $_ == 1 and $changes == 0;
582	224	100				973	last if $changes == 0;
583							# die "not clean" if $_ == 2 and $changes > 0;
584							}
585
586							#warn "consolidated mergers ".Dumper \%mergers;
587
588
589							#== actual merging ========================================================================================
590
591							# recanonicalize affected assertions
592							{
593	224					353	my $changed = _relabel (\%mergers, $self->baseuri, values %$asserts );
	224					1764
594	224					1574	while (my ($k, $a) = each %$changed) {
595	1374					2552	delete $asserts->{ $k };
596							# delete $mid2iid->{ $k };
597							# $mid2iid->{ $a->[TM->LID] } = [ $a->[TM->LID], undef, [] ];
598	1374					47356	$asserts->{ $a->[TM->LID] } = $a;
599							}
600							}
601
602	224					721	foreach my $that (keys %mergers) {
603	502					1243	my $this = $mergers{$that};
604	502					985	my $thism = $mid2iid->{$this};
605	502					1055	my $thatm = $mid2iid->{$that}; # shorthand
606	502	50				1614	next if $thatm == $thism; # we already have merged
607
608	502	50	100			4006	$log->logdie ("two different subject addresses for two topics to be merged ($this, $that)")
			66
609							if $thism->[TM->ADDRESS] and $thatm->[TM->ADDRESS] and
610							$thism->[TM->ADDRESS] ne $thatm->[TM->ADDRESS];
611
612							#warn "merge now $that > $this";
613	502		100			2895	$thism->[TM->ADDRESS] \|\|= $thatm->[TM->ADDRESS]; # first subject address
614							{ # then indicators
615	502					851	my $Is = $thism->[TM->INDICATORS]; # reference to thism indicators
	502					1570
616	502					663	push @$Is, @{$thatm->[TM->INDICATORS]}; # add the others to it
	502					2106
617	502					664	{ my %X; map { $X{$_}++ } @$Is; @$Is = keys %X; } # make that unique
	502					652
	502					2983
	921					2060
	502					2968
618							}
619	502					2336	$mid2iid->{$that} = $thism; # finally
620							}
621							#warn "after post-merger ". Dumper $mid2iid;
622
623	224					707	$self->{mid2iid} = $mid2iid; # this makes tie happy, in the case the map is tied
624	224					4990	$self->{last_mod} = Time::HiRes::time;
625							}
626
627							=pod
628
629							=item B
630
631							I<$tm>->clear
632
633							This method removes all toplets and assertions (except the infrastructure). Everything else remains.
634
635							=cut
636
637							sub clear {
638	1			1	1	600	my $self = shift;
639
640	1					3	my %mid2iid = %{ $infrastructure->{mid2iid} }; # shallow clone
	1					19
641	1					4	my %assertions = %{ $infrastructure->{assertions} }; # shallow clone
	1					7
642
643	1					4	$self->{mid2iid} = \%mid2iid; # making it explicit keeps MLDBM happy
644	1					7	$self->{assertions} = \%assertions; # ditto
645	1					6	$self->{last_mod} = Time::HiRes::time; # book keeping
646	1					3	return $self; # convenience for chaining
647							}
648
649							=pod
650
651							=item B
652
653							I<$tm>->add (I<$tm2>, ...)
654
655							This method accepts a list of L objects and adds all content from these maps to the current
656							object.
657
658							B: There is B merging done for user-supplied toplets. Use explicitly method C
659							for it. Merging is done for all sacrosanct toplets, i.e. those from the infrastructure.
660
661							From v1.31 onwards this method tries to favour the I identifiers (LIDs) of B map
662							over LIDs of the added maps. This means, firstly, that internal identifiers of B map are
663							B touched (or re-generated) in any way and that any shorthands (without a baseuri prefix) will
664							remain valid when using C. Secondly, LIDs in the added map will be attempted to blend into
665							B map by changing simply their prefix. If that newly generated LID is already taken by
666							something in B map, then the original LID will be used. That allows many added LIDs be used
667							together with C without (much) change in code. Of course, the only reliable way to reach a
668							topic is a subject locator or an indicator. This is all about convenience.
669
670							B: This procedure implies that some assertions are recomputed, so that also their LID will
671							change!
672
673
674							=cut
675
676							sub add {
677	0			0	1	0	my $self = shift;
678	0					0	my $baseuri = $self->{baseuri};
679	0					0	my $mid2iid = $self->{mid2iid}; # shorthand
680	0					0	my $asserts = $self->{assertions};
681
682	0					0	foreach (@_) { # deal with one store after the other
683	0					0	my $baseuri2 = $_->{baseuri};
684
685	0					0	my %changes; # will contain old -> new internal identifier mappings
686	0					0	while (my ($k, $v) = each %{$_->{mid2iid}}) {
	0					0
687
688	0	0				0	if ($infrastructure->{mid2iid}->{$k}) { # infrastructure toplets are sacrosanct
689							} else {
690	0					0	(my $k2 = $k) =~ s/^$baseuri2/$baseuri/; # replace baseuri2 prefix
691
692	0	0				0	$k2 = $k if $mid2iid->{$k2}; # if there is a collision, bounce back to original
693	0					0	$k2 .= '1' while $mid2iid->{$k2}; # while there is still a collision ... (this only in case of same baseuris)
694							# $k2 = $baseuri.sprintf ("uuid-%010d", $TM::toplet_ctr++)
695							# if $mid2iid->{$k2}; # if there is a collision, create generic one
696
697	0					0	$changes{$k} = $k2;
698	0					0	$v->[TM->LID] = $k2; # use that key as canonical one
699	0					0	$mid2iid->{$k2} = $v; # ...add what the other has
700							}
701							}
702							#warn Dumper \%changes;
703	0					0	my $changed = _relabel (\%changes, $baseuri, values %{ $_->{assertions} } );
	0					0
704							#warn Dumper $changed;
705	0					0	while (my ($k, $a) = each %$changed) {
706							# delete $mid2iid->{ $k };
707							# $mid2iid->{ $a->[TM->LID] } = [ $a->[TM->LID], undef, [] ]; # put the new one in here
708	0					0	$asserts->{ $a->[TM->LID] } = $a; # and also in the assertions part
709							}
710							}
711	0					0	$self->{mid2iid} = $mid2iid; # make MLDBM happy
712	0					0	$self->{assertions} = $asserts; # ditto
713	0					0	$self->{last_mod} = Time::HiRes::time;
714							}
715
716
717							sub _relabel {
718	224			224		428	my $changes = shift;
719	224					423	my $baseuri = shift;
720
721	224					413	my %changed; # we record here old LID -> newly relabelled assertion
722	224					1008	foreach my $a (@_) {
723	5486					8480	my ($this, $that);
724							#warn "working on ".Dumper $a;
725	5486	50	33			21381	$a->[TM->SCOPE] = $that if $that = $changes->{ $a->[TM->SCOPE] }; $this \|\|= $that;
	5486					22415
726	5486	50	33			19538	$a->[TM->TYPE] = $that if $that = $changes->{ $a->[TM->TYPE] }; $this \|\|= $that;
	5486					26902
727
728	5486	50				6408	map { $_ = $this = $that if $that = $changes->{ $_ } } @{ $a->[TM->ROLES] };
	10972					32242
	5486					17541
729	5486	100				7381	map { $_ = $this = $that if $that = $changes->{ $_ } } @{ $a->[TM->PLAYERS] };
	10972					36904
	5486					16823
730							#warn "$this for ".Dumper $a;
731	5486	100				30171	$changed{ $a->[TM->LID] } = $a if $this; # something has changed
732
733	5486					13094	$a->[TM->CANON] = 0; canonicalize (undef, $a);
	5486					22056
734	5486					18492	$a->[TM->LID] = mklabel ($a);
735
736							}
737	224					1048	return \%changed;
738							}
739
740							=pod
741
742							=item B
743
744							I<$diff> = I<$new_tm>->diff (I<$old_tm>)
745
746							I<$diff> = TM::diff (I<$new_tm>, I<$old_tm>)
747
748							I<$diff> = TM::diff (I<$new_tm>, I<$old_tm>,
749							{consistency => \ @list_of_consistency_consts,
750							include_changes => 1})
751
752							C compares two topic maps and returns their differences as a hash reference. While it works on
753							any two maps, it is most useful after one map (the I) is modified into a I.
754
755							If C is used in OO-style, the current map is interpreted as the I map and the map in the
756							arguments as I.
757
758							By default, the toplet and assertion identifiers for any changes are returned; the option
759							C causes the return of the actual toplets and assertions themselves. This option
760							makes C's output more self-contained: enabled, one can fully (re)create the new map from the
761							old one using the diff (or vice versa).
762
763							The C option uses the same format as the TM constructor (see L) and
764							describes how corresponding toplets in the two maps are to be identified. Toplets with the same
765							internal ids are always considered equal. If I is active, toplets with
766							the same I are considered equal (overriding the topic identities). If I
767							based consistency> is active, toplets with a matching I are considered equal
768							(overriding the previous identities).
769
770							B: This overriding of previous conditions for identity is necessary to keep the equality
771							relationship unique and one-to-one. As an example, consider the following scenario: a toplet I
772							in the old map is split into multiple new toplets I and I in the new map. If I had a
773							locator or identifier that is moved to I (and if consistency options were active), then the
774							identity detector will consider I to be equal to I, and B I in the new map to
775							correspond to I in the old map. However, this will never lead to loss of information: I in
776							the new map is flagged as completely new toplet.
777
778							The differences between old and new map are returned underneath the keys I, I,
779							I and I. If C is on, the extra keys I,
780							I and I are populated. The values of all these keys are hash references
781							themselves.
782
783							=over
784
785							=item I, I
786
787							The C and C hashes list new or removed toplets, respectively (with their identifiers as
788							keys). For each toplet, the value of the hash is an array of associated assertion ids. The array is
789							empty but defined if there are no associated assertions.
790
791							For toplets the attached assertions are the usual ones (names, occurrences) and class-instance
792							relationships (attached to the instance toplet).
793
794							For associations, the assertions are attached to the I toplet.
795
796							=item I
797
798							This hash consists of the non-trivial toplet identities that were found. If neither Subject- nor
799							Indicator-based merging is active, then this hash is empty. Otherwise, the keys are toplet
800							identifiers in the old map, with the corresponding topic identifier in the new map as value. This
801							includes standalone topics as well as assertions and associations that were renamed due to
802							changed player or role identities.
803
804							=item I
805
806							The I hash contains the changes for matched toplets. The key is the toplet identifier in
807							the old map (which is potentially different from the one in the new map; see the note about
808							identities above). The value is a hash with three keys: I, I and I. The
809							value for the C key is defined if and only if the toplet associated with this toplet has
810							changed (i.e. Subject Locator or Indicators have changed). The values for the C and C
811							keys are arrays with the new or removed assertions that are attached to this toplet. These arrays are
812							defined but empty where no applicable information is present.
813
814							=item I, I
815
816							These hashes hold the actual new or removed toplets if the option C is active.
817							Keys are the toplet ids, values are references to the actual toplet data structures.
818
819							=item I
820
821							This hash holds the actual assertions where the maps differ; it exists only if the option
822							C is active. Keys are the assertion identifiers, values the references to the
823							actual assertion data structure. Note that assertion ids uniquely identify the assertion contents,
824							therefore this hash can hold assertions from both new and old map.
825
826							=back
827
828							=cut
829
830							sub diff {
831	0			0	1	0	my ($newmap,$oldmap,$options)=@_;
832	0	0	0			0	return undef if (!$oldmap \|\| !$newmap);
833
834	0					0	my ($base)=$oldmap->baseuri;
835	0	0				0	$log->logdie ("comparison of maps with different bases not supported yet!")
836							if ($newmap->baseuri ne $base);
837
838	0					0	my (%plus,%minus,%modified);
839							# a lot of comparison/translation can be skipped if tids are the only identity
840	0					0	my $xlatneeded= grep($_==TM->Subject_based_Merging \|\|
841	0		0			0	$_==TM->Indicator_based_Merging,@{$options->{consistency}});
842
843							# first walk the maps to match old and new items
844	0					0	my (%seen,%locators,%indicators);
845	0					0	for my $map ($oldmap,$newmap) {
846	0	0				0	my $key = ($map eq $oldmap ? "old":"new");
847	0	0				0	my $value = ($map eq $oldmap ? 1:2);
848
849	0					0	for my $m (map { $_->[TM->LID] } ($map->toplets(\ '+all'))) {
	0					0
850							# get the topic-aspects (tid, locators and identifiers)
851							# for finding unchanged/new/old topics
852	0					0	my $midlet=$map->toplet($m);
853	0	0				0	$locators{$key}->{$midlet->[TM->ADDRESS]}=$m
854							if ($midlet->[TM->ADDRESS]);
855	0					0	map { $indicators{$key}->{$_}=$m } (@{$midlet->[TM->INDICATORS]});
	0					0
	0					0
856	0					0	$seen{$m}\|=$value;
857							}
858	0					0	for my $a (map { $_->[TM->LID] } $map->asserts (\ '+all')) {
	0					0
859	0					0	$seen{$a}\|=$value;
860							}
861							}
862
863							# identify same topics
864							# first identity: same topic ids
865	0					0	my %old2new = map { ($_,$_) } grep { $seen{$_} == 3 } keys %seen;
	0					0
	0					0
866	0					0	my $foundxlat;
867	0	0				0	if (grep($_==TM->Subject_based_Merging,@{$options->{consistency}}))
	0					0
868							{
869							# second: same locators
870							# note that this overwrites topic identitites!
871							# scenario: old has topica/loc x; new has topica/no loc and topicb/loc x
872	0		0			0	map { $foundxlat\|\|=($locators{old}->{$_} ne $locators{new}->{$_});
	0					0
873	0					0	$old2new{$locators{old}->{$_}}=$locators{new}->{$_};
874							}
875	0					0	(grep(exists $locators{new}->{$_}, keys %{$locators{old}}));
876							}
877	0	0				0	if (grep($_==TM->Indicator_based_Merging,@{$options->{consistency}}))
	0					0
878							{
879							# final: matching indicators
880							# note that this overwrites topic and locator identitites, similar scenario as above
881	0		0			0	map { $foundxlat\|\|=($indicators{old}->{$_} ne $indicators{new}->{$_});
	0					0
882	0					0	$old2new{$indicators{old}->{$_}}=$indicators{new}->{$_}; }
883	0					0	(grep(exists $indicators{new}->{$_}, keys %{$indicators{old}}));
884							}
885							# no need to bother with translating assertions if there are no changed-tid identities
886	0	0	0			0	$xlatneeded=0 if ($xlatneeded && !$foundxlat);
887
888							# produce list of missing/new topics
889	0	0				0	my %new2old=($xlatneeded?(reverse %old2new):%old2new);
890	0					0	my (%checkmidlet,%plusass,%minusass);
891	0					0	for my $t (keys %seen)
892							{
893	0	0	0			0	if ($seen{$t}==2 && !$new2old{$t})
		0	0
894							{
895							# identical assertions with new lids are not detected here
896							# but later (via minusass)
897							# new assertion-lids happen with identified renamed players (lid is computed over values!)
898	0	0				0	$newmap->retrieve($t)?$plusass{$t}=1:$plus{$t}=[];
899							}
900							elsif ($seen{$t}==1 && !$old2new{$t})
901							{
902	0	0				0	$oldmap->retrieve($t)?$minusass{$t}=1:$minus{$t}=[];
903							}
904							else
905							{
906							# we work along the old tids (when not the same)
907	0	0				0	$checkmidlet{$seen{$t}==2?$new2old{$t}:$t}=1;
908							}
909							}
910	0					0	undef %seen; undef %locators; undef %indicators;
	0					0
	0					0
911
912							#warn "check midlets ".Dumper \ %checkmidlet;
913
914							# weed out the topics/midlets that are unchanged
915							# and all the identical assertions
916	0					0	my @checkassertion;
917	0					0	for my $t (keys %checkmidlet) {
918
919	0	0				0	if ($t =~ /^[A-F0-9]{32}$/i) {
920	0					0	my $oa=$oldmap->retrieve($t);
921	0					0	my $on=$newmap->retrieve($old2new{$t});
922
923	0	0	0			0	if ($oa && $on && $oa->[TM->LID] ne $on->[TM->LID]) {
			0
924	0					0	push @checkassertion,$t;
925							}
926							} else {
927	0					0	my $ot = $oldmap->toplet($t);
928	0					0	my $nt = $newmap->toplet($old2new{$t});
929
930	0	0				0	unless (_toplets_eq ($ot, $nt)) {
931	0					0	$modified{$t}->{identities}=1;
932	0		0			0	$modified{$t}->{plus}\|\|=[];
933	0		0			0	$modified{$t}->{minus}\|\|=[];
934							}
935
936							# note: new toplet() returns internal id as well, which we DON'T want to check on here!
937							sub _toplets_eq
938							{
939	0			0		0	my ($a,$b)=@_;
940
941	0		0			0	my ($A, $B) = ($a->[TM->ADDRESS] \|\|'', $b->[TM->ADDRESS] \|\|''); # just convert undef into ''
			0
942	0	0				0	return 0 unless $A eq $B; # different subject address?
943	0					0	my %SIDS;
944	0					0	map { ++$SIDS{$_} } @{$a->[TM->INDICATORS]}, @{$b->[TM->INDICATORS]}; # we KNOW that the lists are UNIQUE, do we?
	0					0
	0					0
	0					0
945	0	0				0	return 0 if grep { $_ != 2 } values %SIDS; # if it is not exactly 2 (one from a, one from b), then not equal
	0					0
946	0					0	return 1; # we're happy: different LIDs don't interest us here
947							}
948
949							}
950							}
951
952							#warn "modified ".Dumper \%modified;
953
954	0					0	my %old2newid;
955							my %identities;
956	0	0				0	if ($xlatneeded)
957							{
958							# now do the translation for assertions: rebuild old assertions
959							# into new namespace and compute the id
960							# don't waste time: do this only on the assertions that may be required
961							# minusass (or plusass) must be checked to find assertions with renamed-but-identical players
962	0					0	for my $t (@checkassertion,keys %minusass)
963							{
964	0					0	my $m=$oldmap->retrieve($t);
965	0					0	my ($lid,$scope,$kind,$type,$roles,$players)=
966	0					0	@{$m}[TM->LID,TM->SCOPE,TM->KIND,TM->TYPE,TM->ROLES,TM->PLAYERS];
967
968							# if any of the topics is untranslatable, then skip the remaining work
969							# as it can't successfully compare anyway...
970	0		0			0	$scope=$old2new{$scope} \|\| next;
971	0		0			0	$type=$old2new{$type} \|\| next;
972	0	0	0			0	my @newroles = map { ref($_)?$_:$old2new{$_} \|\| next; } (@{$roles});
	0					0
	0					0
973	0	0	0			0	my @newplayers = map { ref($_)?$_:$old2new{$_} \|\| next; } (@{$players});
	0					0
	0					0
974
975	0					0	my $n=Assertion->new(scope=>$scope,
976							kind=>$kind,
977							type=>$type,
978							roles=>\@newroles,players=>\@newplayers);
979	0					0	$newmap->canonicalize($n);
980	0					0	my $newid=TM::mklabel($n);
981	0					0	$old2newid{$t}=$newid;
982
983	0	0				0	if ($plusass{$newid}) # we found a matching assertion, wohee!
984							{
985	0					0	delete $plusass{$newid};
986	0					0	delete $minusass{$t};
987							# remember that this assertion was re-id'd (directly or indirectly via players)
988							# this is done for standalone assocs just the same as for bn/oc characteristics
989	0					0	$identities{$t}=$newid;
990							}
991							}
992							}
993
994							# finally, find and attach the modified assertions to their topics
995							# attributes: to the topic
996							# associations: to the type-topic
997
998	0					0	for my $key ("plus","minus")
999							{
1000	0					0	my ($unmodified,$map,$candidates);
1001	0	0				0	if ($key eq "plus")
1002							{
1003	0					0	$unmodified=\%plus; $map=$newmap; $candidates=\%plusass;
	0					0
	0					0
1004							}
1005							else
1006							{
1007	0					0	$unmodified=\%minus; $map=$oldmap; $candidates=\%minusass;
	0					0
	0					0
1008							}
1009
1010	0					0	for my $t (keys %{$candidates})
	0					0
1011							{
1012	0					0	my $m=$map->retrieve($t);
1013	0					0	my ($oldwho,$who,$what);
1014	0	0				0	if ($m->[TM->KIND] ne TM->ASSOC)
		0
1015							{
1016							# bn or oc: attach to referenced topic
1017	0					0	$who=($map->get_x_players($m,"thing"))[0];
1018	0					0	$what=$t;
1019							}
1020							elsif ($m->[TM->TYPE] eq "isa")
1021							{
1022							# isa associations get attached to the instance topic
1023	0					0	$who=($map->get_x_players($m,"instance"))[0];
1024	0					0	$what=$t;
1025							}
1026							else
1027							{
1028							# general assoc: gets attached to type topic
1029	0					0	$who=$m->[TM->TYPE];
1030	0					0	$what=$t;
1031							}
1032
1033							# if this assertion belongs to a topic that is marked gone/new, we save it with that topic
1034	0	0				0	if ($unmodified->{$who})
1035							{
1036	0					0	push @{$unmodified->{$who}},$what;
	0					0
1037							}
1038							else # if this belongs to a modified topic: more details please (new/old ass)
1039							{
1040							# we access things along the old id axis...
1041	0	0				0	if ($key eq "plus")
1042							{
1043	0					0	$who=$new2old{$who};
1044							}
1045	0		0			0	$modified{$who}->{$key}\|\|=[];
1046	0					0	push @{$modified{$who}->{$key}},$what;
	0					0
1047							}
1048							}
1049							}
1050
1051	0	0				0	map { $identities{$_}=$old2new{$_} if ($_ ne $old2new{$_}); } (keys %old2new);
	0					0
1052
1053	0					0	my $returnvalue={
1054							'identities'=>\%identities,
1055							'plus'=>\%plus,
1056							'minus'=>\%minus,
1057							'modified'=>\%modified,
1058							};
1059
1060							# pull in the midlets and assertions that have been affected,
1061							# so that the resulting datastructure can be frozen and used together with oldmap
1062							# to (re)create newmap
1063	0	0				0	if ($options->{include_changes})
1064							{
1065							# one problem, though is naming: midlets can have changed but their name doesn't
1066							# reflect that: we need two midlet datastructures here.
1067							# (assertions are fine, their names always reflect their content uniquely)
1068
1069	0					0	my (%plusm,%minusm,%ass,$a);
1070	0					0	map { $plusm{$_} = $newmap->toplet($_) } keys %plus;
	0					0
1071	0					0	map { $ass{ $_->[TM->LID] } = $_ }
	0					0
1072	0					0	map { $newmap->retrieve($_) }
1073	0					0	map { @$_ }
1074							values %plus;
1075	0					0	map { $minusm{$_} = $oldmap->toplet($_) } keys %minus;
	0					0
1076	0					0	map { $ass{ $_->[TM->LID] } = $_ }
	0					0
1077	0					0	map { $oldmap->retrieve($_) }
1078	0					0	map { @$_ }
1079							values %minus;
1080
1081	0					0	for my $k (keys %modified)
1082							{
1083							# these are corresponding topics with differing midlet (contents)
1084	0	0				0	if ($modified{$k}->{identities})
1085							{
1086	0					0	$plusm{$k} = $newmap->toplet($old2new{$k});
1087	0					0	$minusm{$k} = $oldmap->toplet($k);
1088							}
1089	0	0				0	map { $plusm{$_} =$newmap->toplet($_); $a=$newmap->retrieve($_) and $ass{$_}=$a; } (@{$modified{$k}->{plus}});
	0					0
	0					0
	0					0
1090	0	0				0	map { $minusm{$_}=$oldmap->toplet($_); $a=$oldmap->retrieve($_) and $ass{$_}=$a; } (@{$modified{$k}->{minus}});
	0					0
	0					0
	0					0
1091							}
1092
1093	0					0	$returnvalue->{plus_midlets} =\%plusm;
1094	0					0	$returnvalue->{minus_midlets} =\%minusm;
1095	0					0	$returnvalue->{assertions} =\%ass;
1096							}
1097
1098	0					0	return $returnvalue;
1099							}
1100
1101							=pod
1102
1103							=item B (DEPRECATED)
1104
1105							I<$tm>->melt (I<$tm2>)
1106
1107							This - probably more auxiliary - function copies relevant aspect of a second map into the object.
1108
1109							=cut
1110
1111							our @ESSENTIALS = qw(mid2iid assertions baseuri variants);
1112
1113							sub melt {
1114	0			0	1	0	my $self = shift;
1115	0					0	my $tm2 = shift;
1116
1117	0					0	@{$self}{@ESSENTIALS} = @{$tm2}{@ESSENTIALS};
	0					0
	0					0
1118	0					0	$self->{last_mod} = Time::HiRes::time;
1119							}
1120
1121							=pod
1122
1123							=item B
1124
1125							warn "topic map broken" if I<$tm>->insane
1126
1127							This method tests invariant conditions inside the TM structure of that map. Specifically,
1128
1129							=over
1130
1131							=item *
1132
1133							each toplet has a LID which points to a toplet with the same address
1134
1135							=back
1136
1137							It returns a string with a message or C if everything seems fine.
1138
1139							TODO: add test whether all variant entries have a proper LID (and toplet)
1140
1141
1142							=cut
1143
1144							sub insane {
1145	5			5	1	19	my $self = shift;
1146
1147	5					10	my $mid2iid = $self->{mid2iid};
1148	5					7	my $asserts = $self->{assertions};
1149
1150							# Test 1: all toplet LIDs point to something in mid2iid which refers to themselves
1151	5					30	foreach my $k (keys %$mid2iid) {
1152	112					145	my $t = $mid2iid->{$k};
1153	112	100				342	return "toplet LID $k not in mid2iid"
1154							unless $mid2iid->{ $t->[TM->LID] };
1155	111	50				329	return "LID $k inconsistent with toplet LID"
1156							unless $mid2iid->{ $t->[TM->LID] } == $t;
1157	111	100	66			319	return "key $k looks like assertion, but has not assertions entry"
1158							if $k =~ /[[:xdigit:]]{16}/ and !$asserts->{$k};
1159							}
1160							## Test 2: all assertions are toplets
1161							# foreach my $k (keys %$asserts) {
1162							# return "assertion $k has no toplet entry"
1163							# unless $mid2iid->{ $asserts->{$k}->[TM->LID] };
1164							# return "assertion $k toplet entry has a different LID"
1165							# unless $mid2iid->{ $asserts->{$k}->[TM->LID] }->[TM->LID] eq $k;
1166							# }
1167	3					25	return undef; # pass all tests
1168							}
1169
1170							=pod
1171
1172							=back
1173
1174							=head1 TOPLET INTERFACE
1175
1176							I are light-weight versions of TMDM topics. They only carry addressing information and are
1177							represented by an array (struct) with the following fields:
1178
1179							=cut
1180
1181							struct 'Toplet' => [
1182							lid => '$',
1183							saddr => '$',
1184							sinds => '$',
1185							];
1186
1187							=pod
1188
1189							=over
1190
1191							=item C (index: C)
1192
1193							The internal identifier. Mostly it repeats the key in the toplet hash, but also aliased identifiers
1194							may exist.
1195
1196							=item C (index: C )
1197
1198							It contains the B (address) URI, if known. Otherwise C.
1199
1200							=item C (index: C)
1201
1202							This is a reference to a list containing B (indicators). The list can be empty,
1203							no duplicate removal is attempted at this stage.
1204
1205							=back
1206
1207							You can create this structure manually, but mostly you would leave it to C to do the
1208							work.
1209
1210							Example:
1211
1212							# dogmatic way to produce it
1213							my $to = Toplet->new (lid => $baseuri . 'my-lovely-cat',
1214							saddr => 'http://subject-address.com/',
1215							sinds => []);
1216
1217							# also good and well
1218							my $to = [ $baseuri . 'my-lovely-cat',
1219							'http://subject-address.com/',
1220							[] ];
1221
1222							# better
1223							my $to = $tm->internalize ('my-lovely-cat' => 'http://subject-address.com/');
1224
1225							To access the individual fields, you can either use the struct accessors C and C, or
1226							use the constants defined above for indices into the array:
1227
1228							=cut
1229
1230							use constant {
1231							# LID => 0,
1232	36					65025	ADDRESS => 1,
1233							INDICATORS => 2
1234	36			36		465	};
	36					75
1235
1236							=pod
1237
1238							Example:
1239
1240							warn "indicators: ", join (", ", @{$to->sinds});
1241
1242							warn "locator: ", $to->[TM->ADDRESS];
1243
1244							=head2 Methods
1245
1246							=over
1247
1248							=item B
1249
1250							I<$iid> = I<$tm>->internalize (I<$some_id>)
1251
1252							I<$iid> = I<$tm>->internalize (I<$some_id> => I<$some_id>)
1253
1254							I<@iids> = I<$tm>->internalize (I<$some_id> => I<$some_id>, ...)
1255
1256							This method does some trickery when a new toplet should be added to the map, depending on how
1257							parameters are passed into it. The general scheme is that pairs of identifiers are passed in. The
1258							first is usually the internal identifier, the second a subject identifier or the subject
1259							locator. The convention is that subject identifier URIs are passed in as string references, whereas
1260							subject locator URIs are passed in as strings.
1261
1262							The following cases are covered:
1263
1264							=over
1265
1266							=item C undef>
1267
1268							If the ID is already an absolute URI and contains the C of the map as prefix, then this URI
1269							is used as internal toplet identifier. If the ID is some other URI, then a toplet with that URI as
1270							subject locator is searched in the map. If such a toplet already exists, then nothing special needs
1271							to happen. If no such toplet existed, a new URI, based on the C and a random number will
1272							be created for the internal identifier and the original URI is used as subject address.
1273
1274							B: Using C URI> implies that you use two different URIs as subject addresses. This
1275							will result in an error.
1276
1277							=item C URI>
1278
1279							Like above, only that the URI is directly interpreted as subject address.
1280
1281							=item C \ URI> (reference to string)
1282
1283							Like above, only that the URI is interpreted as another subject identifier. If the toplet already existed,
1284							then this subject identifier is simply added. Duplicates are suppressed (since v1.31).
1285
1286							=item C URI>
1287
1288							Like above, only that the internal identifier is auto-created if there is no toplet with the URI
1289							as subject address.
1290
1291							Attention: If you call internalize like this
1292
1293							$tm->internalize(undef => $whatever)
1294
1295							then perl will (un)helpfully replace the required undef with the string "undef" and wreck the operation.
1296							Using either a variable to hold the undef or replacing the (syntactic sugar) arrow with a comma works around this issue.
1297
1298							=item C \ URI>
1299
1300							Like above, only that the URI us used as subject identifier.
1301
1302							=item C undef>
1303
1304							A toplet with an auto-generated ID will be inserted.
1305
1306							=back
1307
1308							In any case, the internal identifier(s) of all inserted (or existing) toplets are returned for
1309							convenience.
1310
1311							=cut
1312
1313							our $toplet_ctr = 0;
1314
1315							sub internalize {
1316	5309			5309	1	10433	my $self = shift;
1317	5309					15299	my $baseuri = $self->{baseuri};
1318
1319							#warn "internalize base: $baseuri";
1320
1321	5309					6698	my @mids;
1322	5309					13698	my $mid2iid = $self->{mid2iid};
1323	5309					13468	while (@_) {
1324	27051					55516	my ($k, $v) = (shift, shift); # assume to get here undef => URI or ID => URI or ID => \ URI or ID => undef
1325							#warn "internalize $k, $v"; # if ! defined $k;
1326							# make sure that $k contains a mid
1327
1328	27051	50	66			125270	$k = undef if defined $k && $k eq 'undef'; # perl 5.10 will stringify undef => ....
1329
1330	27051	100				44844	if (defined $k) {
		100
		100
1331	27005	100				134274	if ($mid2iid->{$k}) { # this identifier is already in the map
		100
		100
		100
1332							# null
1333							} elsif ($k =~ /^$baseuri/) { # ha, perfect, another identifier already in form
1334							# null # keep it as it is
1335							} elsif ($k =~ /^\w+:/) { # some other absURL
1336	23	100				58	if (my $k2 = $self->tids ($k)) { # we already had it
1337	8					49	($k, $v) = ($k2, $k);
1338							} else { # it is unknown so far
1339	15					85	($k, $v) = ($baseuri.sprintf ("uuid-%010d", $toplet_ctr++), $k);
1340							}
1341							} elsif (my $k2 = $self->tids ($k)) {
1342	5276					13703	$k = $k2; # then we already have it, maybe under a different mid, take that
1343
1344							} else { # this means we have a relURI and it is not from that map
1345	3188					7773	$k = $baseuri.$k; # but now it is
1346							}
1347
1348							} elsif (ref ($v) eq 'Assertion') { # k is not defined, lets look at v, but if that is an assertion
1349	1					6	$k = $baseuri.sprintf ("uuid-%010d", $toplet_ctr++); # generate a new one
1350							} elsif (my $k2 = $self->tids ($v)) { # k is not defined, lets look at v; we already had it
1351	19					40	$k = $k2; # this will be k then
1352							} else { # it is unknown so far
1353	26					163	$k = $baseuri.sprintf ("uuid-%010d", $toplet_ctr++); # generate a new one
1354							}
1355
1356							#warn "really internalizing '$k' '$v'";
1357	27051					66606	push @mids, $k;
1358
1359	27051	100				60927	$v = $v->[TM->LID] if ref ($v) eq 'Assertion'; # for internal reification we use the assertion's LID
1360
1361	27051		100			87163	$mid2iid->{$k} \|\|= [ $k, undef, [] ]; # now see that we have an entry in the mid2iid table
1362	27051					51711	my $kentry = $mid2iid->{$k}; # keep this as a shortcut
1363
1364	27051	100				51356	if ($v) {
1365	1081	100				3653	if (ref($v)) { # being a reference means that we have a subject indication
		100
1366	732					2694	push @{$kentry->[TM->INDICATORS]}, $$v # append it to the list
	138					541
1367	751	100				1018	unless grep {$$v eq $_} @{$kentry->[TM->INDICATORS]}; # if not yet there
	751					4498
1368							} elsif ($kentry->[TM->ADDRESS]) { # this is a subject address and, oh, there is already a subject address, not good
1369	10	100				50	$log->logdie ("duplicate subject address '$v' for '$k'") unless $v eq $kentry->[TM->ADDRESS];
1370							} else { # everything is fine, we can set it
1371	320					869	$kentry->[TM->ADDRESS] = $v;
1372							}
1373							}
1374	27050					81049	$mid2iid->{$k} = $kentry; # necessary if mid2iid is tied itself
1375							}
1376	5308					8228	$self->{mid2iid} = $mid2iid; #!! needed for Berkeley DBM recognize changes on deeper levels
1377	5308					18271	$self->{last_mod} = Time::HiRes::time;
1378	5308	100				38630	return wantarray ? @mids : $mids[0];
1379							}
1380
1381							=pod
1382
1383							=item B (old name B)
1384
1385							I<$t> = I<$tm>->toplet (I<$mid>)
1386
1387							I<@ts> = I<$tm>->toplet (I<$mid>, ....)
1388
1389							This function returns a reference to a toplet structure. It can be used in scalar and list context.
1390
1391							=cut
1392
1393							sub midlet {
1394	7185			7185	0	18930	return toplet (@_);
1395							}
1396
1397							sub toplet {
1398	7216			7216	1	14767	my $self = shift;
1399	7216					12298	my $mid2iid = $self->{mid2iid};
1400
1401	7216	100				16040	if (wantarray) {
1402	7184	50				12151	return (map { defined $_ ? $mid2iid->{$_} : $_ } @_);
	14368					63647
1403							} else {
1404	32					204	return $mid2iid->{$_[0]};
1405							}
1406							}
1407
1408							=pod
1409
1410							=item B (old name B)
1411
1412							I<@mids> = I<$tm>->toplets
1413
1414							I<@mids> = I<$tm>->toplets (I<@list_of_ids>)
1415
1416							I<@mids> = I<$tm>->toplets (I<$selection_spec>)
1417
1418							This function returns toplet structures from the map. B: This has changed from v 1.13. Before
1419							you got ids.
1420
1421							If no parameter is provided, all toplets are returned. This includes really everything also
1422							infrastructure toplets. If an explicit list is provided as parameter, then all toplets with these
1423							identifiers are returned.
1424
1425							If a search specification is used, it has to be passed in as string reference. That string contains
1426							the selection specification using the following simple language (curly brackets mean repetition,
1427							round bracket grouping, vertical bar alternatives):
1428
1429							specification -> { ( '+' \| '-' ) group }
1430
1431							whereby I is one of the following:
1432
1433							=over
1434
1435							=item C
1436
1437							refers to B toplets in the map. This includes those supplied by the application. The list also
1438							includes all infrastructure topics which the software maintains for completeness.
1439
1440							=item C
1441
1442							refers to all toplets the infrastructure has provided. This implies that
1443
1444							all - infrastructure
1445
1446							is everything the user (application) has supplied.
1447
1448							=back
1449
1450							Examples:
1451
1452							# all toplets except those from TM::PSI
1453							$tm->toplets (\ '+all -infrastructure')
1454
1455							B: No attempt is made to make this list unique.
1456
1457							B: The specifications are not commutative, but are interpreted from left-to-right. So C
1458							-infrastructure +infrastructure> is not the same as C. In the
1459							latter case the infrastructure toplets have been added twice, and are then deducted completely with
1460							C<-infrastructure>.
1461
1462							=cut
1463
1464							sub midlets {
1465	0			0	0	0	return toplets (@_);
1466							}
1467
1468							sub toplets {
1469	85			85	1	60777	my $self = shift;
1470	85					220	my $mid2iid = $self->{mid2iid};
1471
1472	85	100				249	if ($_[0]) { # if there is some parameter
1473	15	100				35	if (ref ($_[0]) ) { # whoohie, a search spec
1474	14					18	my $spec = ${$_[0]};
	14					28
1475	14					18	my $l = []; # will be list
1476	14					71	while ($spec =~ s/([+-])(\w+)//) {
1477	23	100				84	if ($2 eq 'all') {
		100
1478	11					95	$l = _mod_list ($1 eq '+', $l, keys %$mid2iid);
1479							} elsif ($2 eq 'infrastructure') {
1480	11					22	$l = _mod_list ($1 eq '+', $l, keys %{$infrastructure->{mid2iid}});
	11					69
1481							} else {
1482	1					10	$log->logdie (scalar __PACKAGE__ .": specification '$2' unknown");
1483							}
1484							}
1485	13	100				48	$log->logdie (scalar __PACKAGE__ .": unhandled specification '$spec' left") if $spec =~ /\S/;
1486	12					22	return map { $mid2iid->{$_} } @$l;
	198					325
1487							} else {
1488	1					3	my $m = $mid2iid;
1489	1					4	return @$m{$self->tids (@_)}; # make all these fu**ing identifiers map-absolute
1490							}
1491							} else { # if the list was empty, we assume every thing in the map
1492	70					865	return values %$mid2iid;
1493							}
1494
1495							sub _mod_list {
1496	59			59		94	my $pm = shift; # non-zero for +
1497	59					72	my $l = shift;
1498	59	100				97	if ($pm) {
1499	33					284	return [ @$l, @_ ];
1500							} else {
1501	26					31	my %minus;
1502	26					148	@minus{ @_ } = (1) x @_;
1503	26					52	return [ grep { !$minus{$_} } @$l ];
	378					835
1504							}
1505							}
1506							sub _mk_uniq {
1507	0			0		0	my %uniq;
1508	0					0	@uniq {@_} = (1) x @_;
1509	0					0	return keys %uniq;
1510							}
1511
1512							}
1513
1514							=pod
1515
1516							=item B (old name B)
1517
1518							I<$mid> = I<$tm>->tids (I<$some_id>)
1519
1520							I<@mids> = I<$tm>->tids (I<$some_id>, ...)
1521
1522							This function tries to build absolute versions of the identifiers passed in. C will be
1523							returned if no such can be constructed. Can be used in scalar and list context.
1524
1525							=over
1526
1527							=item *
1528
1529							If the passed-in identifier is a relative URI, so it is made absolute by prefixing it with the map
1530							C and then we look for a toplet with that internal identifier.
1531
1532							=item *
1533
1534							If the passed-in identifier is an absolute URI, where the C is a prefix, then that URI will
1535							be used as internal identifier to look for a toplet.
1536
1537							=item *
1538
1539							If the passed-in identifier is an absolute URI, where the C is B a prefix, then that
1540							URI will be used as subject locator and such a toplet will be looked for.
1541
1542							=item *
1543
1544							If the passed-in identifier is a reference to an absolute URI, then that URI will be used as subject
1545							identifier and such a toplet will be looked for.
1546
1547							=back
1548
1549							=cut
1550
1551							sub mids {
1552	7217			7217	0	3391343	return tids (@_);
1553							}
1554
1555							sub tids {
1556	17508			17508	1	63422	my $self = shift;
1557	17508					28516	my $mid2iid = $self->{mid2iid}; # shorthand
1558
1559	17508					21230	my @ks;
1560							MID:
1561	17508					35985	foreach my $k (@_) {
1562	24730	100				187752	if (! defined $k) { # someone put in undef
		100
		100
		100
1563	11					24	push @ks, undef;
1564
1565							} elsif (ref ($k)) { # would be subject indicator ref
1566	219					374	my $kk = $$k;
1567	219					274	foreach my $k2 (keys %{$mid2iid}) {
	219					1907
1568	3824	100				3700	if (grep ($_ eq $kk,
	3824					16988
1569							@{$mid2iid->{$k2}->[TM->INDICATORS]}
1570							)) {
1571	193					731	push @ks, $mid2iid->{$k2}->[TM->LID]; # LID points to 'canonical' internal identifier
1572	193					1062	next MID;
1573							}
1574							}
1575	26					143	push @ks, undef;
1576
1577							} elsif (my $kk = $mid2iid->{$k}) { # we already have something which looks like a tid
1578	1147					4508	push @ks, $kk->[TM->LID]; # give back the 'canonical' one
1579
1580							} elsif ($k =~ /(^\w+:)\|(^[A-F0-9]{32}$)/i) { # must be some other uri or assoc id, must be subject address
1581	36			36		295	no warnings;
	36					95
	36					13744
1582	62					510	my @k2 = grep ($mid2iid->{$_}->[TM->ADDRESS] eq $k, keys %{$mid2iid});
	62					2448
1583	62	100				474	push @ks, @k2 ? $mid2iid->{$k2[0]}->[TM->LID] : undef; # we take the first we find
1584
1585							} else { # only a string, like 'aaa'
1586	23291					67742	my $k2 = $self->{baseuri}.$k; # make it absolute, and...
1587	23291	100				135752	push @ks, $mid2iid->{$k2} # see whether there is something
1588							? $mid2iid->{$k2}->[TM->LID] : undef; # and then take canonical LID
1589							}
1590							}
1591							#warn "mids ".Dumper (\@_)." returning ".Dumper (\@ks);
1592	17508	100				87222	return wantarray ? @ks : $ks[0];
1593							}
1594
1595							=pod
1596
1597							=item B
1598
1599							I<$tm>->externalize (I<$some_id>, ...)
1600
1601							This function simply deletes the toplet entry for the given internal identifier(s). The function
1602							returns all deleted toplet entries.
1603
1604							B: Assertions in which this topic is involved will B be removed. Use C to
1605							clean up all assertion where non-existing toplets still exist.
1606
1607							=cut
1608
1609							sub externalize {
1610	68			68	1	582	my $self = shift;
1611
1612	68					134	my $mid2iid = $self->{mid2iid};
1613	68					143	my @doomed = map { delete $mid2iid->{$_} } @_;
	10					54
1614	68					125	$self->{mid2iid} = $mid2iid; ## !! needed for Berkeley DBM recognize changes on deeper levels
1615	68					278	$self->{last_mod} = Time::HiRes::time;
1616	68					683	return @doomed;
1617							}
1618
1619							=pod
1620
1621							=back
1622
1623							=head1 ASSERTIONS INTERFACE
1624
1625							One assertion is a record containing its own identifier, the scope, the type of the assocation, an
1626							field whether this is an association, an occurrence or a name and then all roles and all players,
1627							both in separate lists.
1628
1629							=cut
1630
1631							struct 'Assertion' => [
1632							lid => '$',
1633							scope => '$',
1634							type => '$',
1635							kind => '$', # redundant, but very useful
1636							roles => '$',
1637							players => '$',
1638							canon => '$',
1639							];
1640
1641							use constant {
1642	36					4554	LID => 0,
1643							SCOPE => 1,
1644							TYPE => 2,
1645							KIND => 3,
1646							ROLES => 4,
1647							PLAYERS => 5,
1648							CANON => 6
1649	36			36		245	};
	36					85
1650
1651							=pod
1652
1653							Assertions consist of the following components:
1654
1655							=over
1656
1657							=item I (index C):
1658
1659							Every assertion has an identifier. It is a unique identifier generated from a canonicalized form of
1660							the assertion itself.
1661
1662							=item I (index: C)
1663
1664							This component holds the scope of the assertion.
1665
1666							=item I (index: C, redundant information):
1667
1668							For technical reasons (read: it is faster) we distinguish between full associations (C),
1669							names (C) and occurrences (C).
1670
1671							=cut
1672
1673							# values for 'kind'
1674							use constant {
1675	36					62846	ASSOC => 0,
1676							NAME => 1,
1677							OCC => 2,
1678	36			36		221	};
	36					75
1679
1680							=pod
1681
1682							=item I (index: C):
1683
1684							The toplet id of the type of this assertion.
1685
1686							=item I (index: C):
1687
1688							A list reference which holds a list of toplet ids for the roles.
1689
1690							=item I (index: C):
1691
1692							A list reference which holds a list of toplet IDs for the players.
1693
1694							=item I (index: C):
1695
1696							Either C<1> or C to signal whether this assertion has been (already) canonicalized (see
1697							L). If an assertion is canonicalized, then the players and roles lists are sorted
1698							(somehow), so that assertions can be easily compared.
1699
1700							=back
1701
1702							Obviously the lists for roles and players B have the same length, so that every player
1703							corresponds to exactly one role. If one role is played by several players, the role appears multiple
1704							times.
1705
1706							As a special case, names and occurrences are mapped into assertions, by
1707
1708							=over
1709
1710							=item *
1711
1712							setting the I to C and C,
1713
1714							=item *
1715
1716							setting the I to the toplet id in question and using a L as the player for
1717							C,
1718
1719							=item *
1720
1721							using the I component to store the name/occurrence type,
1722
1723							=item *
1724
1725							using as I either C or C
1726
1727							=back
1728
1729							Example:
1730
1731							# general association
1732							$a = Assertion->new (type => 'is-subclass-of',
1733							roles => [ 'subclass', 'superclass' ],
1734							players => [ 'rumsti', 'ramsti' ])
1735
1736
1737							warn $a->scope . " is the same as " . $a->[TM->SCOPE];
1738
1739							# create a name
1740							use TM::Literal;
1741							$n = Assertion->new (kind => TM->NAME,
1742							type => 'name',
1743							scope => 'us',
1744							roles => [ 'thing', 'value' ],
1745							players => [ 'rumsti',
1746							new TM::Literal ('AAA') ]);
1747
1748							# create an occurrence
1749							use TM::Literal;
1750							$n = Assertion->new (kind => TM->OCC,
1751							type => 'occurrence',
1752							scope => 'us',
1753							roles => [ 'thing', 'value' ],
1754							players => [ 'rumsti',
1755							new TM::Literal ('http://whatever/') ]);
1756
1757							=head2 Special Assertions
1758
1759							This package adopts the following conventions to store certain assertions:
1760
1761							=over
1762
1763							=item C
1764
1765							Associations of this type should have one role C and another C. The scope
1766							should always be C.
1767
1768							=item C
1769
1770							Associations of this type should have one role C and another C. The scope should
1771							always be C.
1772
1773							=item C
1774
1775							Assertions for names should have the C component set to it and use the C component to
1776							store the name type. The two roles to use are C for the value and C for the toplet
1777							carrying the name.
1778
1779							=item C
1780
1781							Assertions for occurrences should have the C component set to it and use the C component
1782							to store the occurrence type. The two roles to use are C for the value and C for the
1783							toplet carrying the name.
1784
1785							=back
1786
1787							=head2 Methods
1788
1789							=over
1790
1791							=item B
1792
1793							I<@as> = I<$tm>->assert (I<@list-of-assertions>)
1794
1795							This method takes a list of assertions, canonicalizes them and then injects them into the map. If
1796							one of the newly added assertions already existed in the map, it will be ignored.
1797
1798							In this process, all assertions will be completed (if fields are missing).
1799
1800							=over
1801
1802							=item If an assertion does not have a type, it will default to C<$TM::PSI::THING>.
1803
1804							=item If an assertion does not have a scope, it defaults to C<$TM::PSI::US>.
1805
1806							=back
1807
1808							Then the assertion will be canonicalized (unless it already was). This implies that
1809							non-canonicalized assertions will be modified, in that the role/player lists change. Any assertion
1810							not having an LID will get one.
1811
1812							The method returns a list of all asserted assertions.
1813
1814							Example:
1815
1816							my $a = Assertion->new (type => 'rumsti');
1817							$tm->assert ($a);
1818
1819							B: Maybe the type will default to I in the future.
1820
1821							=cut
1822
1823							sub assert {
1824	2400			2400	1	9324	my $self = shift;
1825	2400					4614	my ($THING, $US) = ('thing', 'us');
1826
1827							#warn "sub $THING assert $self".ref ($self);
1828
1829	2400					3123	my @tids; # first collect all emerging tids from the assertions
1830	2400					5079	foreach (@_) {
1831	4163	50				11104	unless ($_->[CANON]) {
1832	4163		66			22052	push @tids, $_->[TYPE] \|\| $THING;
1833	4163		66			23823	push @tids, $_->[SCOPE] \|\| $US;
1834	4163					4834	push @tids, @{$_->[ROLES]};
	4163					8911
1835	4163					6279	push @tids, grep { ! ref ($_) } @{$_->[PLAYERS]};
	8310					25431
	4163					8037
1836							}
1837							}
1838	2400					4375	@tids = $self->internalize ( map { $_ => undef } @tids); # then convert them into proper usable tids
	22855					51847
1839
1840	2400					9736	my $asserts = $self->{assertions}; # load (MLDBM kicker)
1841	2400					5174	foreach (@_) { # only now use all the information to complete the assertions
1842	4163	50				10527	unless ($_->[CANON]) {
1843	4163		100			13492	$_->[KIND] \|\|= ASSOC;
1844	4163					8030	$_->[TYPE] = shift @tids;
1845	4163					10014	$_->[SCOPE] = shift @tids;
1846	4163					5543	$_->[ROLES] = [ map { shift @tids } @{$_->[ROLES]} ];
	8310					42615
	4163					7828
1847	4163	100				8838	$_->[PLAYERS] = [ map { $_ = ref ($_) ? $_ : shift @tids } @{$_->[PLAYERS]} ];
	8310					41990
	4163					9710
1848
1849	4163					12417	canonicalize (undef, $_);
1850
1851	4163		66			17337	$_->[LID] \|\|= mklabel ($_);
1852							}
1853	4163					20386	$asserts->{$_->[LID]} = $_;
1854							}
1855	2400					5208	$self->{assertions} = $asserts; ### HACK ALERT: needed for Berkeley DBM recognize changes on deeper levels
1856	2400					19367	$self->{last_mod} = Time::HiRes::time;
1857	2400					9562	return @_;
1858							}
1859
1860							=pod
1861
1862							=item B
1863
1864							I<$assertion> = I<$tm>->retrieve (I<$some_assertion_id>)
1865
1866							I<@assertions> = I<$tm>->retrieve (I<$some_assertion_id>, ...)
1867
1868							This method takes a list of assertion IDs and returns the assertion(s) with the given (subject)
1869							ID(s). If the assertion is not identifiable, C will be returned in its place. Called in list
1870							context, it will return a list of assertion references.
1871
1872							=cut
1873
1874							sub retrieve {
1875	228			228	1	3927	my $self = shift;
1876	228					378	my $asserts = $self->{assertions};
1877
1878	228	100				407	if (wantarray()) {
1879	87					162	return map { $asserts->{$_} } @_;
	87					278
1880							} else {
1881	141					408	return $asserts->{$_[0]};
1882							}
1883							}
1884
1885							=pod
1886
1887							=item B
1888
1889							I<@assertions> = I<$tm>->asserts (I<$selection_spec>)
1890
1891							If a search specification is used, it has to be passed in as string reference. That string contains
1892							the selection specification using the following simple language (curly brackets mean repetition,
1893							round bracket grouping, vertical bar alternatives):
1894
1895							specification -> { ( '+' \| '-' ) group }
1896
1897							whereby I is one of the following:
1898
1899							=over
1900
1901							=item C
1902
1903							refers to B assertions in the map. This includes those supplied by the application, but also
1904							all predefined associations, names and occurrences.
1905
1906							=item C
1907
1908							refers to all assertions which are actually associations
1909
1910							=item C
1911
1912							refers to all assertions which are actually name characteristics
1913
1914							=item C
1915
1916							refers to all assertions which are actually occurrences
1917
1918							=item C
1919
1920							refers to all assertions the infrastructure has provided. This implies that
1921
1922							all - infrastructure
1923
1924							is everything the user (application) has supplied.
1925
1926							=back
1927
1928							Examples:
1929
1930							# all toplets except those from TM::PSI
1931							$tm->asserts (\ '+all -infrastructure')
1932
1933							# like above, without assocs, so with names and occurrences
1934							$tm->asserts (\ '+all -associations')
1935
1936							B: No attempt is made to make this list unique.
1937
1938							B: The specifications are not commutative, but are interpreted from left-to-right. So C
1939							-associations +associations> is not the same as C.
1940							C<-infrastructure>.
1941
1942							=cut
1943
1944							sub asserts {
1945	45			45	1	9288	my $self = shift;
1946	45					126	my $asserts = $self->{assertions};
1947
1948	45	100				145	if ($_[0]) {
1949	13	50				32	if (ref ($_[0])) {
1950	13					16	my $spec = ${$_[0]};
	13					22
1951	13					23	my $l = []; # will be list
1952	13					65	while ($spec =~ s/([+-])(\w+)//) {
1953	37	100				159	if ($2 eq 'all') {
		100
		100
		100
		50
1954	11					58	$l = _mod_list ($1 eq '+', $l, keys %$asserts);
1955
1956							} elsif ($2 eq 'associations') {
1957	30					76	$l = _mod_list ($1 eq '+', $l, map { $_->[TM->LID] }
	36					116
1958	3					11	grep { $_->[TM->KIND] == TM->ASSOC } values %$asserts);
1959							} elsif ($2 eq 'names') {
1960	14					59	$l = _mod_list ($1 eq '+', $l, map { $_->[TM->LID] }
	168					571
1961	14					46	grep { $_->[TM->KIND] == TM->NAME } values %$asserts);
1962							} elsif ($2 eq 'occurrences') {
1963	5					21	$l = _mod_list ($1 eq '+', $l, map { $_->[TM->LID] }
	60					197
1964	5					17	grep { $_->[TM->KIND] == TM->OCC } values %$asserts);
1965							} elsif ($2 eq 'infrastructure') {
1966	4					44	$l = _mod_list ($1 eq '+', $l, keys %{$TM::infrastructure->{assertions}} );
	4					20
1967							} else {
1968	0					0	$log->logdie (scalar __PACKAGE__ .": specification '$2' unknown");
1969							}
1970							}
1971	13	50				36	$log->logdie (scalar __PACKAGE__ .": unhandled specification '$spec' left") if $spec =~ /\S/;
1972	13					25	return map { $asserts->{$_} } @$l;
	92					192
1973							} else {
1974	0					0	return $asserts->{@_};
1975							}
1976							} else {
1977	32					221	return values %$asserts;
1978							}
1979							}
1980
1981							=pod
1982
1983							=item B
1984
1985							I<$bool> = I<$tm>->is_asserted (I<$a>)
1986
1987							This method will return C<1> if the passed-in assertion exists in the store. The assertion will be
1988							canonicalized before checking, but no defaults will be added if parts are missing.
1989
1990							=cut
1991
1992							sub is_asserted {
1993	4			4	1	259	my $self = shift;
1994	4					7	my $a = shift;
1995
1996	4	50				14	unless ($a->[CANON]) {
1997	4					13	absolutize ($self, $a);
1998	4					10	canonicalize (undef, $a);
1999	4					11	$a->[TM->LID] = mklabel ($a);
2000							}
2001	4					31	return $self->{assertions}->{ $a->[TM->LID] };
2002							}
2003
2004							=pod
2005
2006							=item B
2007
2008							I<$tm>->retract (I<@list_of_assertion_ids>)
2009
2010							This methods expects a list of assertion IDs and will remove the assertions from the map. If an ID
2011							is bogus, it will be ignored.
2012
2013							B: Only these particular assertions will be deleted. Any toplets mentioned in these assertions
2014							will remain. Use C to remove unnecessary toplets.
2015
2016							=cut
2017
2018							sub retract {
2019	4			4	1	354	my $self = shift;
2020
2021							# TODO: does delete $self->{assertions}->{@_} work?
2022	4					18	my $assertions = $self->{assertions};
2023	5					17	map {
2024	4					8	delete $assertions->{$_} # delete them from the primary store
2025							} @_;
2026	4					8	$self->{assertions} = $assertions; ##!! needed for Berkeley DBM recognize changes on deeper levels
2027	4					18	$self->{last_mod} = Time::HiRes::time;
2028							}
2029
2030							=pod
2031
2032							=item B, B, B
2033
2034							I<@assertions> = I<$tm>->match (TM->FORALL [ , I ] );
2035
2036							I<@assertions> = I<$tm>->match (TM->EXISTS [ , I ] );
2037
2038							I<@assertions> = I<$tm>->match_forall ( [ I ] );
2039
2040							I<@assertions> = I<$tm>->match_exists ( [ I ] );
2041
2042							These methods take a search specification and return matching assertions. The result list contains
2043							references to the assertions themselves, not to copies. You can change the assertions themselves on
2044							your own risk (read: better not do it).
2045
2046							For C, if the constant C is used as first parameter, this method returns a list of
2047							B assertions in the store following the search specification. If the constant C is
2048							used, the method will return a non-empty value if B can be found. Calling the more
2049							specific C is the same as calling C with C. Similar for
2050							C.
2051
2052							B: C is not yet implemented.
2053
2054							For I there are two alternatives:
2055
2056							=over
2057
2058							=item Generic Search
2059
2060							Here the search specification is a hash with the same fields as for the constructor of an assertion:
2061
2062							Example:
2063
2064							$tm->match (TM->FORALL, type => '...',
2065							scope => '...,
2066							roles => [ ...., ....],
2067							players => [ ...., ....]);
2068
2069							Any combination of assertion components can be used, all are optional, with the only constraint that
2070							the number of roles must match that for the players. All involved IDs should be absolutized before
2071							matching. If you use C for a role or a player, then this is interpreted as I
2072							(wildcard).
2073
2074							=item Specialized Search
2075
2076							The implementation also understands a number of specialized search specifications. These are
2077							listed in L.
2078
2079							=back
2080
2081							B: Some combinations will be very fast, while others quite slow. If you experience
2082							problems, then it might be time to think about indexing (see L).
2083
2084							B: For the assertion type and the role subclassing is honored.
2085
2086							=cut
2087
2088							use constant {
2089	36					246050	EXISTS => 1,
2090							FORALL => 0
2091	36			36		281	};
	36					77
2092
2093							our %exists_handlers = (); # they should be written at some point
2094
2095							our %forall_handlers = (
2096							'' => {
2097							code => sub { # no params => want all of them
2098							my $self = shift;
2099							return values %{$self->{assertions}};
2100							},
2101							desc => 'returns all assertions',
2102							params => {},
2103							},
2104
2105							'nochar' => {
2106							code => sub {
2107							my $self = shift;
2108							return
2109							grep ($_->[KIND] <= ASSOC,
2110							values %{$self->{assertions}});
2111							},
2112							desc => 'returns all associations (so no names or occurrences)',
2113							params => { 'nochar' => '1'}
2114							},
2115							#-- taxos ---------------------------------------------------------------------------------------------
2116							'subclass.type' => {
2117							code => sub {
2118							my $self = shift;
2119							my $st = shift;
2120							my ($ISSC, $SUBCLASS) = ('is-subclass-of', 'subclass');
2121							return () unless shift eq $ISSC;
2122							return
2123							grep ( $self->is_x_player ($_, $st, $SUBCLASS),
2124							grep ( $_->[TYPE] eq $ISSC,
2125							values %{$self->{assertions}}));
2126							},
2127							desc => 'returns all assertions where there are subclasses of a given toplet',
2128							params => { 'type' => 'is-subclass-of', subclass => 'which toplet should be the superclass'},
2129							key => sub {
2130							my $self = shift;
2131							my $a = shift;
2132							my ($ISSC, $SUBCLASS) = ('is-subclass-of', 'subclass');
2133							return "subclass.type:". ($self->get_x_players ($a, $SUBCLASS))[0] . '.' . $ISSC;
2134							},
2135							enum => sub {
2136							my $self = shift;
2137							my ($ISSC) = ('is-subclass-of');
2138							return
2139							grep { $_->[TYPE] eq $ISSC }
2140							values %{$self->{assertions}};
2141							}
2142							},
2143
2144							'superclass.type' => {
2145							code => sub {
2146							my $self = shift;
2147							my $st = shift;
2148							my ($ISSC, $SUPERCLASS) = ('is-subclass-of', 'superclass');
2149							return () unless shift eq $ISSC;
2150							return
2151							grep ( $self->is_x_player ($_, $st, $SUPERCLASS),
2152							grep ( $_->[TYPE] eq $ISSC,
2153							values %{$self->{assertions}}));
2154							},
2155							desc => 'returns all assertions where there are superclasses of a given toplet',
2156							params => { 'type' => 'is-subclass-of', superclass => 'which toplet should be the subclass'},
2157							key => sub {
2158							my $self = shift;
2159							my $a = shift;
2160							my ($ISSC, $SUPERCLASS) = ('is-subclass-of', 'superclass');
2161							return "superclass.type:". ($self->get_x_players ($a, $SUPERCLASS))[0] . '.' . $ISSC;
2162							},
2163							enum => sub {
2164							my $self = shift;
2165							my ($ISSC) = ('is-subclass-of');
2166							return
2167							grep { $_->[TYPE] eq $ISSC }
2168							values %{$self->{assertions}};
2169							}
2170							},
2171
2172							'class.type' => {
2173							code => sub {
2174							my $self = shift;
2175							my $t = shift;
2176							my ($ISA, $CLASS) = ('isa', 'class');
2177							return () unless shift eq $ISA;
2178							return
2179							grep ( $self->is_x_player ($_, $t, $CLASS),
2180							grep ( $_->[TYPE] eq $ISA,
2181							values %{$self->{assertions}}));
2182							},
2183							desc => 'returns all assertions where there are instances of a given toplet',
2184							params => { type => 'isa', class => 'which toplet should be the class'},
2185							key => sub {
2186							my $self = shift;
2187							my $a = shift;
2188							my ($ISA, $CLASS) = ('isa', 'class');
2189							return "class.type:". ($self->get_x_players ($a, $CLASS))[0] . '.' . $ISA;
2190							},
2191							enum => sub {
2192							my $self = shift;
2193							my ($ISA) = ('isa');
2194							return
2195							grep { $_->[TYPE] eq $ISA }
2196							values %{$self->{assertions}};
2197							}
2198							},
2199
2200							'instance.type' => {
2201							code => sub {
2202							my $self = shift;
2203							my $i = shift;
2204							my ($ISA, $INSTANCE) = ('isa', 'instance');
2205							return () unless shift eq $ISA;
2206							return
2207							grep ( $self->is_x_player ($_, $i, $INSTANCE),
2208							grep ( $_->[TYPE] eq $ISA,
2209							values %{$self->{assertions}}));
2210							},
2211							desc => 'returns all assertions where there are classes of a given toplet',
2212							params => { type => 'isa', instance => 'which toplet should be the instance'},
2213							key => sub {
2214							my $self = shift;
2215							my $a = shift;
2216							my ($ISA, $INSTANCE) = ('isa', 'instance');
2217							return "instance.type:". ($self->get_x_players ($a, $INSTANCE))[0] . '.' . $ISA;
2218							},
2219							enum => sub {
2220							my $self = shift;
2221							my ($ISA) = ('isa');
2222							return
2223							grep { $_->[TYPE] eq $ISA }
2224							values %{$self->{assertions}};
2225							}
2226							},
2227							#--
2228							'char.irole' => {
2229							code => sub {
2230							warn "char.irole is deprecated. use char.topic instead";
2231							my $self = shift;
2232							my $topic = $_[1];
2233							return undef unless $topic;
2234							return
2235							grep ($self->is_player ($_, $topic) && # TODO: optimize this grep away (getting chars is expensive)
2236							NAME <= $_->[KIND] && $_->[KIND] <= OCC,
2237							values %{$self->{assertions}});
2238							},
2239							desc => 'deprecated: return all assertions which are characteristics for a given toplet',
2240							params => { char => '1', irole => 'the toplet for which characteristics are sought'}
2241							},
2242
2243							'char.topic' => {
2244							code => sub {
2245							my $self = shift;
2246							my $topic = $_[1];
2247							return
2248							grep (NAME <= $_->[KIND] && $_->[KIND] <= OCC &&
2249							$_->[PLAYERS]->[0] eq $topic, # first role is always the 'thing'
2250							values %{$self->{assertions}});
2251							},
2252							desc => 'return all assertions which are characteristics for a given toplet',
2253							params => { char => '1', topic => 'the toplet for which characteristics are sought'},
2254							key => sub {
2255							my $self = shift;
2256							my $a = shift;
2257							return "char.topic:1.". $a->[PLAYERS]->[0];
2258							},
2259							enum => sub {
2260							my $self = shift;
2261							return
2262							grep { $_->[KIND] != ASSOC }
2263							values %{ $self->{assertions} };
2264							}
2265							},
2266
2267							'char.value' => {
2268							code => sub {
2269							my $self = shift;
2270							my $value = $_[1];
2271							return
2272							grep (NAME <= $_->[KIND] && $_->[KIND] <= OCC &&
2273							$_->[PLAYERS]->[1]->[0] eq $value->[0] && # second role is always the value
2274							$_->[PLAYERS]->[1]->[1] eq $value->[1], # test value AND type
2275							values %{$self->{assertions}});
2276							},
2277							desc => 'return all assertions which are characteristics for some topic of a given value',
2278							params => { char => '1', value => 'the value for which all characteristics are sought'},
2279							key => sub {
2280							my $self = shift;
2281							my $a = shift;
2282							return "char.value:1.". $a->[PLAYERS]->[1]->[0] . '.' . $a->[PLAYERS]->[1]->[1];
2283							},
2284							enum => sub {
2285							my $self = shift;
2286							return
2287							grep { $_->[KIND] != ASSOC }
2288							values %{ $self->{assertions} };
2289							}
2290							},
2291
2292							'char.type' => {
2293							code => sub {
2294							my $self = shift;
2295							my $type = $_[1];
2296							return
2297							grep { $self->is_subclass ($_->[TYPE], $type ) }
2298							grep { $_->[KIND] != ASSOC }
2299							values %{$self->{assertions}};
2300							},
2301							desc => 'return all assertions which are characteristics for some given type',
2302							params => { char => '1', type => 'the characteristic type'},
2303							key => sub {
2304							my $self = shift;
2305							my $a = shift;
2306							return "char.type:1.". $a->[TYPE];
2307							},
2308							enum => sub {
2309							my $self = shift;
2310							return
2311							grep { $_->[KIND] != ASSOC }
2312							values %{ $self->{assertions} };
2313							}
2314							},
2315
2316							'char.type.value' => {
2317							code => sub {
2318							my $self = shift;
2319							my $type = $_[1];
2320							my $value = $_[2];
2321							return
2322							grep { $self->is_subclass ($_->[TYPE], $type ) }
2323							grep (NAME <= $_->[KIND] && $_->[KIND] <= OCC &&
2324							$_->[PLAYERS]->[1]->[0] eq $value->[0] && # second role is always the value
2325							$_->[PLAYERS]->[1]->[1] eq $value->[1], # test value AND type
2326							values %{$self->{assertions}});
2327							},
2328							desc => 'return all assertions which are characteristics for some topic of a given value for some given type',
2329							params => { char => '1', type => 'the characteristic type', value => 'the value for which all characteristics are sought'},
2330							key => sub {
2331							my $self = shift;
2332							my $a = shift;
2333							return "char.type.value:1.". $a->[TYPE] . '.' . $a->[PLAYERS]->[1]->[0] . '.' . $a->[PLAYERS]->[1]->[1];
2334							},
2335							enum => sub {
2336							my $self = shift;
2337							return
2338							grep { $_->[KIND] != ASSOC }
2339							values %{ $self->{assertions} };
2340							}
2341							},
2342
2343							'char.topic.type' => {
2344							code => sub {
2345							my $self = shift;
2346							my $topic = $_[1];
2347							my $type = $_[2];
2348							return
2349							grep ($self->is_subclass ($_->[TYPE], $type),
2350							grep ($_->[PLAYERS]->[0] eq $topic && # first role is always the 'thing'
2351							NAME <= $_->[KIND] && $_->[KIND] <= OCC,
2352							values %{$self->{assertions}}));
2353							},
2354							desc => 'return all assertions which are a characteristic of a given type for a given topic',
2355							params => { char => '1', topic => 'the toplet for which these characteristics are sought', type => 'type of characteristic' },
2356							key => sub {
2357							my $self = shift;
2358							my $a = shift;
2359							return "char.topic.type:1.". $a->[PLAYERS]->[0] . '.' . $a->[TYPE] ;
2360							},
2361							enum => sub {
2362							my $self = shift;
2363							return
2364							grep { $_->[KIND] != ASSOC }
2365							values %{ $self->{assertions} };
2366							}
2367							},
2368
2369							'lid' => {
2370							code => sub {
2371							my $self = shift;
2372							my $lid = $_[1];
2373							return
2374							$self->{assertions}->{$lid} \|\| ();
2375							},
2376							desc => 'return one particular assertions with a given ID',
2377							params => { lid => 'the ID of the assertion' }
2378							},
2379
2380							'type' => {
2381							code => sub {
2382							my $self = shift;
2383							my $type = $_[0];
2384							return
2385							grep ($self->is_subclass ($_->[TYPE], $type),
2386							values %{$self->{assertions}});
2387							},
2388							desc => 'return all assertions with a given type',
2389							params => { type => 'the type of the assertion' }
2390							},
2391
2392							'iplayer' => {
2393							code => sub {
2394							my $self = shift;
2395							my $ip = $_[0];
2396							return
2397							grep ($self->is_player ($_, $ip),
2398							values %{$self->{assertions}});
2399							},
2400							desc => 'return all assertions where a given toplet is a player',
2401							params => { iplayer => 'the player toplet' }
2402							},
2403
2404							'iplayer.type' => {
2405							code => sub {
2406							my $self = shift;
2407							my ($ip, $ty) = @_;
2408							return
2409							grep ($self->is_player ($_, $ip) &&
2410							$self->is_subclass ($_->[TYPE], $ty),
2411							values %{$self->{assertions}});
2412							},
2413							desc => 'return all assertions of a given type where a given toplet is a player',
2414							params => { iplayer => 'the player toplet', type => 'the type of the assertion' }
2415							},
2416
2417							'iplayer.irole' => {
2418							code => sub {
2419							my $self = shift;
2420							my ($ip, $ir) = @_;
2421							return
2422							grep ($self->is_player ($_, $ip, $ir),
2423							values %{$self->{assertions}});
2424							},
2425							desc => 'return all assertions where a given toplet is a player of a given role',
2426							params => { iplayer => 'the player toplet', irole => 'the role toplet (incl subclasses)' },
2427							},
2428
2429							'iplayer.irole.type' => {
2430							code => sub {
2431							my $self = shift;
2432							my ($ip, $ir, $ty) = @_;
2433							return
2434							grep ($self->is_subclass ($_->[TYPE], $ty) &&
2435							$self->is_player ($_, $ip, $ir),
2436							values %{$self->{assertions}});
2437							},
2438							desc => 'return all assertions of a given type where a given toplet is a player of a given role',
2439							params => { iplayer => 'the player toplet',
2440							irole => 'the role toplet (incl subclasses)',
2441							type => 'the type of the assertion' }
2442							},
2443
2444							'irole.type' => {
2445							code => sub {
2446							my $self = shift;
2447							my ($ir, $ty) = @_;
2448							return
2449							grep ($self->is_role ($_, $ir) &&
2450							$self->is_subclass ($_->[TYPE], $ty),
2451							values %{$self->{assertions}});
2452							},
2453							desc => 'return all assertions of a given type where there is a given role',
2454							params => { irole => 'the role toplet (incl subclasses)', type => 'the type of the assertion' }
2455							},
2456
2457							'irole' => {
2458							code => sub {
2459							my $self = shift;
2460							my ($ir) = @_;
2461							return
2462							grep ($self->is_role ($_, $ir),
2463							values %{$self->{assertions}});
2464							},
2465							desc => 'return all assertions where there is a given role',
2466							params => { irole => 'the role toplet (incl subclasses)' }
2467							},
2468
2469							'aplayer.arole.brole.type' => {
2470							code => sub {
2471							my $self = shift;
2472							my ($ap, $ar, $br, $ty) = @_;
2473							return
2474							grep ( $self->is_role ($_, $br),
2475							grep ( $self->is_player ($_, $ap, $ar),
2476							grep ( $self->is_subclass ($_->[TYPE], $ty),
2477							values %{$self->{assertions}})));
2478							},
2479							desc => 'return all assertions of a given type where a given toplet plays a given role and there exist another given role',
2480							params => { aplayer => 'the player toplet for the arole',
2481							arole => 'the role toplet (incl subclasses) for the aplayer',
2482							brole => 'the other role toplet (incl subclasses)',
2483							type => 'the type of the assertion'
2484							}
2485							},
2486
2487							'aplayer.arole.bplayer.brole.type' => {
2488							code => sub {
2489							my $self = shift;
2490							my ($ap, $ar, $bp, $br, $ty) = @_;
2491							return
2492							grep ( $self->is_player ($_, $bp, $br),
2493							grep ( $self->is_player ($_, $ap, $ar),
2494							grep ( $self->is_subclass ($_->[TYPE], $ty),
2495							values %{$self->{assertions}})));
2496							},
2497							desc => 'return all assertions of a given type where a given toplet plays a given role and there exist another given role with another given toplet as player',
2498							params => { aplayer => 'the player toplet for the arole',
2499							arole => 'the role toplet (incl subclasses) for the aplayer',
2500							brole => 'the other role toplet (incl subclasses)',
2501							bplayer => 'the player for the brole',
2502							type => 'the type of the assertion'
2503							}
2504							},
2505
2506							'anyid' => {
2507							code => sub {
2508							my $self = shift;
2509							my $lid = shift;
2510							return
2511							grep (
2512							$self->is_subclass ($_->[TYPE], $lid) \|\| # probably not a good idea
2513							$_->[TYPE] eq $lid \|\| # this seems a bit safer
2514							$_->[SCOPE] eq $lid \|\|
2515							$self->is_player ($_, $lid) \|\|
2516							$self->is_role ($_, $lid) ,
2517							values %{$self->{assertions}});
2518							},
2519							desc => 'return all assertions where a given toplet appears somehow',
2520							params => { anyid => 'the toplet' }
2521							}
2522
2523							);
2524
2525							sub _allinone {
2526	61			61		89	my $self = shift;
2527	61					86	my $exists = shift;
2528	61					2477	my $template = Assertion->new (@_); # we create an assertion on the fly
2529							#warn "allinone ".Dumper $template;
2530	61					3381	$self->absolutize ($template);
2531							#warn "allinone2".Dumper $template;
2532	61					169	$self->canonicalize ($template); # of course, need to be canonicalized
2533							#warn "allinone3".Dumper $template;
2534
2535							#warn "in store match template ".Dumper $template;
2536	61					80	my @mads;
2537	61					241	ASSERTION:
2538	61					86	foreach my $m (values %{$self->{assertions}}) { # arbitrary AsTMa! queries TBD, can be faster as well
2539
2540	785	100	100			2236	next if defined $template->[KIND] && # is kind defined
2541							$m->[KIND] ne $template->[KIND]; # and does it match?
2542							#warn "after kind";
2543	770	100	100			2225	next if defined $template->[SCOPE] &&
2544							$m->[SCOPE] ne $self->tids ($template->[SCOPE]); # does scope match?
2545							#warn "after scope";
2546	632	100	100			2221	next if defined $template->[TYPE] &&
2547							!$self->is_subclass ($m->[TYPE], $self->tids ($template->[TYPE])); # does type match (including subclassing)?
2548							#warn "after type";
2549
2550	271					634	my ($rm, $rc) = ($m->[ROLES], $template->[ROLES]);
2551	271	100	50			648	push @mads, $m and next ASSERTION if ! @$rc; # match ok, if we have no roles
2552							#warn "after push roles";
2553	242	50				490	next ASSERTION if @$rm != @$rc; # quick check: roles must be of equal length
2554							#warn "after roles";
2555	242					388	my ($pm, $pc) = ($m->[PLAYERS], $template->[PLAYERS]);
2556	242	50	0			445	push @mads, $m and next ASSERTION if ! @$pc; # match ok, if we have no players
2557	242	50				461	next if @$pm != @$pc; # quick check: roles and players must be of equal length
2558							#warn "after players equal length ".Dumper ($pm, $pc);
2559
2560							####### $pm = [ $self->tids (@$pm) ];
2561	242					319	for (my $i = 0; $i < @{$rm}; $i++) { # order is canonicalized, would not want to test all permutations
	339					868
2562							#warn "before role tests : is $rm->[$i] subclass of $rc->[$i]?";
2563	298	50	33			1137	next ASSERTION if defined $rc->[$i] && !$self->is_subclass ($rm->[$i], $rc->[$i]); # go to next assertion if that does not match
2564							#warn "after role ok";
2565	298	100	100			1663	next ASSERTION if defined $pc->[$i] && $pm->[$i] ne $pc->[$i];
2566							}
2567							#warn "after players roles";
2568	41	50				90	return (1) if $exists; # with exists that's it
2569	41					84	push @mads, $m; # with forall we do continue to collect
2570							}
2571							#warn "we return ".Dumper \@mads;
2572	61					690	return @mads; # and return what we got
2573							}
2574
2575							#sub _fat_mama {
2576							# use Proc::ProcessTable;
2577							# my $t = new Proc::ProcessTable;
2578							##warn Dumper [ $t->fields ]; exit;
2579							# my ($me) = grep {$_->pid == $$ } @{ $t->table };
2580							##warn "size: ". $me->size;
2581							# return $me->size / 1024.0 / 1024.0;
2582							#}
2583
2584
2585
2586							sub match_forall {
2587	1792			1792	1	14764	my $self = shift;
2588	1792					6117	my %query = @_;
2589							#warn "forall ".Dumper \%query;
2590
2591	1792					13098	my @skeys = sort keys %query; # all fields make up the key
2592	1792					4115	my $skeys = join ('.', @skeys);
2593	1792					2719	my @svals = map { $query{$_} } @skeys;
	3510					7926
2594
2595	1792	50				4249	if (my $idxs = $self->{indices}) { # there are indices to help me
2596	0					0	my $key = "$skeys:" . join ('.', @svals);
2597	0					0	foreach my $idx (@$idxs) {
2598	0	0				0	if (my $lids = $idx->is_cached ($key)) { # if result was cached, lets take the list of lids
2599							# warn "using cached for $key". Dumper $lids;
2600	0					0	return map { $self->{assertions}->{$_} } @$lids; # and return fully fledged
	0					0
2601							}
2602							}
2603							# obviously we have not found it # not defined means not cache => recompute
2604	0					0	my @as = _dispatch_forall ($self, \%query, $skeys, @svals); # do it the hard way
2605	0					0	$idxs->[0]->do_cache ($key, [ map { $_->[LID] } @as ]); # save it for later, simply use the first [0]
	0					0
2606	0					0	return @as;
2607							} else { # no cache, let's do the ochsentour
2608	1792					4265	return _dispatch_forall ($self, \%query, $skeys, @svals);
2609							}
2610
2611							sub _dispatch_forall {
2612	1792			1792		2135	my $self = shift;
2613	1792					1952	my $query = shift;
2614	1792					2099	my $skeys = shift;
2615
2616	1792	100				4619	if (my $handler = $forall_handlers{$skeys}) { # there is a constraint and we have a handler
2617	1731					2162	return &{$handler->{code}} ($self, @_);
	1731					4758
2618							} else { # otherwise
2619	61					249	return _allinone ($self, 0, %$query); # we use a generic handler, slow but should do the trick
2620							}
2621							}
2622
2623							}
2624
2625							sub match_exists {
2626	0			0	1	0	my $self = shift;
2627	0					0	my %query = @_;
2628
2629							#warn "exists ".Dumper $query;
2630
2631	0					0	my @skeys = sort keys %query; # all fields make up the key
2632	0					0	my $skeys = join ('.', @skeys);
2633
2634							#warn "keys for this $skeys";
2635	0	0				0	if (my $handler = $exists_handlers{$skeys}) { # there is a constraint and we have a handler
2636	0					0	return &{$handler->{code}} ($self, map { $query{$_} } @skeys);
	0					0
	0					0
2637							} else { # otherwise
2638	0					0	return _allinone ($self, 1, %query); # we use a generic handler, slow but should do the trick
2639							}
2640							}
2641
2642							sub match {
2643	254			254	1	57088	my $self = shift;
2644	254					411	my $exists = shift; # FORALL or EXIST, DOES NOT work yet
2645
2646	254	50				980	return $exists ? match_exists ($self, @_) : match_forall ($self, @_);
2647							}
2648
2649
2650							=pod
2651
2652							=back
2653
2654							=head2 Role Retrieval
2655
2656							=over
2657
2658							=item B, B
2659
2660							I<$bool> = is_player (I<$tm>, I<$assertion>, I<$player_id>, [ I<$role_id> ])
2661
2662							I<$bool> = is_x_player (I<$tm>, I<$assertion>, I<$player_id>, [ I<$role_id> ])
2663
2664							This function returns C<1> if the identifier specified by the C parameter plays any role
2665							in the assertion provided as C parameter.
2666
2667							If the C is provided as third parameter then it must be exactly this role (or any subclass
2668							thereof) that is played. The 'x'-version is using equality instead of 'subclassing' ('x' for
2669							"exact").
2670
2671							=cut
2672
2673							sub is_player {
2674	1380			1380	1	1730	my $self = shift;
2675	1380					1538	my $m = shift;
2676
2677							# warn "is_player ".Dumper \@_;
2678							# warn "caller: ". Dumper [ caller ];
2679							# foreach (0..0) {
2680							# warn " ".join (' ---- ', caller($_));
2681							# }
2682
2683	1380					1589	my $p = shift;# or die "must specify valid player: ".Dumper ([ $m ])." and role is ".shift;
2684							#
2685							# warn "after shifting player '$p'";
2686	1380					1520	my $r = shift; # may be undefined
2687
2688	1380	50				2548	$log->logdie ("must specify a player '$p' for role '$r'") unless $p;
2689
2690	1380	100				2169	if ($r) {
2691	303					552	my ($ps, $rs) = ($m->[PLAYERS], $m->[ROLES]);
2692
2693	303					741	for (my $i = 0; $i < @$ps; $i++) {
2694	529	100				1537	next unless $ps->[$i] eq $p;
2695	113	50				305	next unless $self->is_subclass ($rs->[$i], $r);
2696	113					632	return 1;
2697							}
2698							} else {
2699	1077	100				1081	return 1 if grep ($_ eq $p, @{$m->[PLAYERS]});
	1077					4835
2700							}
2701	1015					4199	return 0;
2702							}
2703
2704							sub is_x_player {
2705	10405			10405	1	12334	my $self = shift;
2706	10405					10730	my $m = shift;
2707	10405	50				21437	my $p = shift or $log->logdie ("must specify x-player: ".Dumper ([ $m ]));
2708	10405					11046	my $r = shift; # may be undefined
2709
2710	10405	50				16465	if ($r) {
2711	10405					15917	my ($ps, $rs) = ($m->[PLAYERS], $m->[ROLES]);
2712
2713	10405					34659	for (my $i = 0; $i < @$ps; $i++) {
2714	19981	100				58342	next unless $ps->[$i] eq $p;
2715	2520	100				7310	next unless $rs->[$i] eq $r;
2716	912					3231	return 1;
2717							}
2718							} else {
2719	0	0				0	return 1 if grep ($_ eq $p, @{$m->[PLAYERS]});
	0					0
2720							}
2721	9493					51273	return 0;
2722							}
2723
2724							=pod
2725
2726							=item B, B
2727
2728							I<@player_ids> = get_players (I<$tm>, I<$assertion>, [ I<$role_id> ])
2729
2730							I<@player_ids> = get_x_players (I<$tm>, I<$assertion>, I<$role_id>)
2731
2732							This function returns the player(s) for the given role. If the role is not provided all players are
2733							returned.
2734
2735							The "x" version does not honor subclassing.
2736
2737							=cut
2738
2739							sub get_players {
2740	52			52	1	75	my $self = shift;
2741	52					64	my $a = shift;
2742	52					64	my $r = shift;
2743
2744	52	50				110	return @{ $a->[PLAYERS] } unless $r;
	0					0
2745	52					97	my ($ps, $rs) = ($a->[PLAYERS], $a->[ROLES]);
2746
2747	52					61	my @ps;
2748	52					154	for (my $i = 0; $i < @$ps; $i++) {
2749	104	100				603	next unless $self->is_subclass ($rs->[$i], $r);
2750	52					165	push @ps, $ps->[$i];
2751							}
2752	52					229	return @ps;
2753							}
2754
2755							sub get_x_players {
2756	2512			2512	1	3493	my $self = shift;
2757	2512					2763	my $a = shift;
2758	2512					2914	my $r = shift;
2759
2760	2512					4608	my ($ps, $rs) = ($a->[PLAYERS], $a->[ROLES]);
2761
2762	2512					2907	my @ps;
2763	2512					6758	for (my $i = 0; $i < @$ps; $i++) {
2764	5024	100				14623	next unless $rs->[$i] eq $r;
2765	2512					7488	push @ps, $ps->[$i];
2766							}
2767	2512					7712	return @ps;
2768							}
2769
2770							=pod
2771
2772							=item B, B
2773
2774							I<$bool> = is_role (I<$tm>, I<$assertion>, I<$role_id>)
2775
2776							I<$bool> = is_x_role (I<$tm>, I<$assertion>, I<$role_id>)
2777
2778							This function returns C<1> if the C is a role in the assertion provided. The "x" version of
2779							this function does not honor subclassing.
2780
2781							=cut
2782
2783							sub is_role {
2784	36			36	1	44	my $self = shift;
2785	36					43	my $m = shift;
2786	36	50				78	my $r = shift or $log->logdie ("must specify role: ".Dumper ([ $m ]));
2787
2788	36	100				40	return 1 if grep ($self->is_subclass ($_, $r), @{$m->[ROLES]});
	36					101
2789							}
2790
2791							sub is_x_role {
2792	0			0	1	0	my $self = shift;
2793	0					0	my $m = shift;
2794	0	0				0	my $r = shift or $log->logdie ("must specify role: ".Dumper ([ $m ]));
2795
2796	0	0				0	return 1 if grep ($_ eq $r, @{$m->[ROLES]});
	0					0
2797							}
2798
2799							=pod
2800
2801							=item B
2802
2803							I<@role_ids> = get_roles (I<$tm>, I<$assertion>, I<$player>)
2804
2805							This function returns a list of roles a particular player plays in a given assertion.
2806
2807							=cut
2808
2809							sub get_roles {
2810	0			0	1	0	my $self = shift;
2811	0					0	my $a = shift;
2812	0					0	my $p = shift; # the player
2813
2814	0					0	my ($ps, $rs) = ($a->[PLAYERS], $a->[ROLES]);
2815
2816	0					0	my @rs;
2817	0					0	for (my $i = 0; $i < @$ps; $i++) {
2818	0	0				0	next unless $ps->[$i] eq $p;
2819	0					0	push @rs, $rs->[$i];
2820							}
2821	0					0	return @rs;
2822							}
2823
2824							=pod
2825
2826							=item B
2827
2828							I<@role_ids> = @{ get_role_s (I<$tm>, I<$assertion>) }
2829
2830							This function extracts a reference to the list of role identifiers.
2831
2832							=cut
2833
2834							sub get_role_s {
2835	0			0	1	0	my $self = shift;
2836	0					0	my $a = shift;
2837	0					0	return $a->[ROLES];
2838							}
2839
2840							=pod
2841
2842							=back
2843
2844
2845							=head2 Auxiliary Functions
2846
2847							=over
2848
2849							=item B
2850
2851							I<$assertion> = absolutize (I<$tm>, I<$assertion>)
2852
2853							This method takes one assertion and makes sure that all identifiers in it (for the type, the scope
2854							and all the role and players) are made absolute for the context map. It returns this very assertion.
2855							It will not touch canonicalized assertions.
2856
2857							=cut
2858
2859							sub absolutize {
2860	65			65	1	88	my $self = shift;
2861	65					98	my $a = shift;
2862
2863	65	50				203	return $a if $a->[CANON]; # skip it if we are already canonicalized
2864							#warn "in abosl ".Dumper $a;
2865	65	100				262	$a->[TYPE] = tids ($self, $a->[TYPE]) if $a->[TYPE];
2866	65	100				273	$a->[SCOPE] = tids ($self, $a->[SCOPE]) if $a->[SCOPE];
2867
2868	65	100				174	map { $_ = tids ($self, $_) } @{$a->[ROLES]} if $a->[ROLES]; # things which are references, we will keep
	97					314
	47					142
2869	65	50				196	map { $_ = ref ($_) ? $_ : tids ($self, $_) } @{$a->[PLAYERS]} if $a->[PLAYERS]; # the others are treated as ids (could be literal references!)
	93	100				218
	45					87
2870							#warn "after abosl ".Dumper $a;
2871	65					100	return $a;
2872							}
2873
2874							=pod
2875
2876							=item B
2877
2878							I<$assertion> = canonicalize (I<$tm>, I<$assertion>)
2879
2880							This method takes an assertion and reorders the roles (together with their respective players) in a
2881							consistent way. It also makes sure that the KIND is defined (defaults to C), that the type is
2882							defined (defaults to C) and that all references are made absolute LIDs. Finally, the field
2883							C is set to 1 to indicate that the assertion is canonicalized.
2884
2885							The function will not do anything if the assertion is already canonicalized. The component C
2886							is set to C<1> if the assertion has been canonicalized.
2887
2888							Conveniently, the function returns the same assertion, albeit a maybe modified one.
2889
2890							TODO: remove map parameter, it is no longer necessary
2891
2892							=cut
2893
2894							sub canonicalize {
2895	10002			10002	1	15248	my $self = shift;
2896	10002					13242	my $s = shift;
2897							#warn "in canon ".Dumper $s;
2898							#warn "using LIDs ".Dumper $LIDs;
2899
2900	10002	50				33064	return $s if $s->[CANON]; # skip it if we are already canonicalized
2901
2902							# reorder role/players canonically
2903	10002					15405	my $rs = $s->[ROLES];
2904	10002					13611	my $ps = $s->[PLAYERS];
2905	10002					26608	my @reorder = (0..$#$ps); # create 0, 1, 2, ..., how many roles
2906							#warn @reorder;
2907							# sort according to roles (alphanum) and at ties according to players on position $a, $b
2908	10002	50				29496	@reorder = sort { $rs->[$a] cmp $rs->[$b] \|\| $ps->[$a] cmp $ps->[$b] } @reorder;
	9984					36246
2909							#warn @reorder;
2910	10002					16568	$s->[ROLES] = [ map { $rs->[$_] } @reorder ];
	19951					51332
2911	10002					17901	$s->[PLAYERS] = [ map { $ps->[$_] } @reorder ];
	19951					62446
2912							# we are done (almost)
2913	10002					18774	$s->[CANON] = 1;
2914
2915							#warn "in canon return ".Dumper $s;
2916	10002					57673	return $s;
2917							}
2918
2919							# =pod
2920
2921							# =item B
2922
2923							# I<$hash> = mklabel (I<$assertion>);
2924
2925							# For internal optimization all characteristics have an additional HASH component which can be used to
2926							# maintain indices. This function takes a assertion and computes an MD5 hash and sets the C
2927							# component if that is not yet defined.
2928
2929							# Such a hash only makes sense if the assertion is canonicalized, otherwise an exception is raised.
2930
2931							# Example:
2932
2933							# my $a = Assertion->new (lid => 'urn:x-rho:important');
2934							# print "this uniquely (well) identifies the assertion ". mklabel ($a);
2935
2936							# =cut
2937
2938							sub mklabel {
2939	9932			9932	0	14355	my $a = shift;
2940	9932	50				26774	$log->logdie ("refuse to hash non canonicalized assertion") unless $a->[CANON];
2941	36			36		484	use Digest::MD5 qw(md5_hex);
	36					110
	36					100653
2942	9932	100				15287	return md5_hex ($a->[SCOPE], $a->[TYPE], @{$a->[ROLES]}, map { ref ($_) ? join ("", @$_) : $_ } @{$a->[PLAYERS]}); # recompute the hash if necessary
	9932					17964
	19855					118934
	9932					25758
2943							# ^^^^^^^^^^^^^^ # this is a literal value
2944							# ^^ # this is for a normal identifier
2945							}
2946
2947							=pod
2948
2949							=back
2950
2951							=head1 TAXONOMICS AND SUBSUMPTION
2952
2953							The following methods provide useful basic, ontological functionality around transitive subclassing
2954							between classes and instance/type relationships.
2955
2956							B: Everything is a subclass of C (changed in v1.35).
2957
2958							B: Everything is an instance of C.
2959
2960							B: See L for predefined things.
2961
2962							=head2 Boolean Methods
2963
2964							=over
2965
2966							=item B
2967
2968							I<$bool> = I<$tm>->is_subclass (I<$superclass_id>, I<$subclass_id>)
2969
2970							This function returns C<1> if the first parameter is a (transitive) superclass of the second,
2971							i.e. there is an assertion of type I in the context map. It also returns C<1> if the
2972							superclass is a $TM::PSI::THING or if subclass and superclass are the same (reflexive).
2973
2974							=cut
2975
2976							sub is_subclass {
2977	3923			3923	1	6007	my $self = shift;
2978	3923					5027	my $class = shift;
2979	3923					4145	my $super = shift;
2980
2981	3923	100				11451	return 1 if $class eq $super; # we always assume that A subclasses A
2982
2983	2890					5608	my ($ISA, $US, $THING, $SUBCLASSES, $SUBCLASS, $SUPERCLASS, $INSTANCE, $CLASS) =
2984							('isa', 'us', 'thing', 'is-subclass-of', 'subclass', 'superclass', 'instance', 'class');
2985
2986							#warn "is_subclass?: class $class super $super , thing $THING, $SUBCLASSES, $SUPERCLASS";
2987	2890	100				5227	return 1 if $super eq $THING; # everything subclasses thing
2988							# but not if the class is one of the predefined things, yes, there is a method to this madness
2989	2873	100				6921	return 0 if $class eq $ISA;
2990	2241	50				4068	return 0 if $class eq $US;
2991	2241	100				4195	return 0 if $class eq $THING; # thing would only subclass itself and that is covered above
2992	2239	100				7861	return 0 if $class eq $SUBCLASSES;
2993	1197	100				2260	return 0 if $class eq $SUBCLASS;
2994	1181	100				2392	return 0 if $class eq $SUPERCLASS;
2995	1161	100				2504	return 0 if $class eq $INSTANCE;
2996	1099	100				1939	return 0 if $class eq $CLASS;
2997							# # see whether there is an assertion that we have a direct subclasses relationship between the two
2998
2999							# This would be an optimization, but this does not go through match
3000							# return 1 if $self->is_asserted (Assertion->new (scope => $US, # TODO OPTIMIZE
3001							# type => $SUBCLASSES,
3002							# roles => [ $SUBCLASS, $SUPERCLASS ],
3003							# players => [ $class, $super ])
3004							# );
3005							# if we still do not have a decision, we will check all super types of $class and see (recursively) whether we can establish is-subclass-of
3006	784					1893	return 1 if grep ($self->is_subclass ($_, $super), # check all of the intermediate type whether there is a transitive relation
3007	1080	100				2414	map { $self->get_x_players ($_, $SUPERCLASS) } # find the superclass player there => intermediate type
3008							$self->match_forall (type => $SUBCLASSES,
3009							subclass => $class)
3010							);
3011	1011					5672	return 0; # ok, we give up now
3012							}
3013
3014							=pod
3015
3016							=item B
3017
3018							I<$bool> = I<$tm>->is_a (I<$something_lid>, I<$class_lid>)
3019
3020							This method returns C<1> if the thing referenced by the first parameter is an instance of the class
3021							referenced by the second. The method honors transitive subclassing.
3022
3023							=cut
3024
3025							sub is_a {
3026	86			86	1	143	my $self = shift;
3027	86					121	my $thingie = shift;
3028	86					102	my $type = shift; # ok, what class are looking at?
3029
3030	86					193	my ($ISA, $CLASS, $THING) = ('isa', 'class', 'thing');
3031
3032							#warn "isa thingie $thingie class $type";
3033
3034	86	100	100			258	return 1 if $type eq $THING and # is the class == 'thing' and
3035							$self->{mid2iid}->{$thingie}; # and does the thingie exist?
3036
3037	85					218	my ($m) = $self->retrieve ($thingie);
3038	85	50	33			274	return 1 if $m and # is it an assertion ? and...
3039							$self->is_subclass ($m->[TYPE], $type); # is the assertion type a subclass?
3040
3041	26					89	return 1 if grep ($self->is_subclass ($_, $type), # check all of the intermediate type whether there is a transitive relation
3042	85	100				268	map { $self->get_players ($_, $CLASS) } # find the class player there => intermediate type
3043							$self->match_forall (type => $ISA, instance => $thingie)
3044							);
3045	67					593	return 0;
3046							}
3047
3048							=pod
3049
3050							=back
3051
3052							=head2 List Methods
3053
3054							=over
3055
3056							=item B, B
3057
3058							I<@lids> = I<$tm>->subclasses (I<$lid>, ...)
3059
3060							I<@lids> = I<$tm>->subclassesT (I<$lid>, ...)
3061
3062							C returns all B subclasses of the toplet identified by C<$lid>. If the toplet does
3063							not exist, the list will be empty. C is a variant which honors the transitive
3064							subclassing (so if A is a subclass of B and B is a subclass of C, then A is also a subclass of C).
3065
3066							Duplicates are suppressed.
3067
3068							=cut
3069
3070							sub subclasses {
3071	26			26	1	10002	my $self = shift;
3072
3073	26					55	my ($SUBCLASSES) = ('is-subclass-of');
3074	23					98	my @sc = map { $_->[PLAYERS]->[0] }
	26					83
3075	26					65	map { $self->match_forall (type => $SUBCLASSES, superclass => $_) }
3076							@_;
3077	26					53	my %dup;
3078	26	50				63	return map { $dup{$_}++ ? () : $_ } @sc;
	23					211
3079							}
3080
3081							sub subclassesT {
3082	10			10	1	1057	my $self = shift;
3083
3084	10					14	my @sc = map { $self->subclasses ($_) } @_;
	10					20
3085	10					19	push @sc, @_, map { $self->subclassesT ($_) } @sc; # laziness equals recursion
	6					21
3086	10					14	my %dup;
3087	10	100				12	return map { $dup{$_}++ ? () : $_ } @sc;
	23					107
3088							}
3089
3090							=pod
3091
3092							=item B, B
3093
3094							I<@lids> = I<$tm>->superclasses (I<$lid>, ...)
3095
3096							I<@lids> = I<$tm>->superclassesT (I<$lid>, ...)
3097
3098							The method C returns all direct superclasses of the toplet identified by C<$lid>. If
3099							the toplet does not exist, the list will be empty. C is a variant which honors
3100							transitive subclassing.
3101
3102							Duplicates are suppressed.
3103
3104							=cut
3105
3106							sub superclasses {
3107	13			13	1	1306	my $self = shift;
3108
3109	13					18	my ($SUBCLASSES) = ('is-subclass-of');
3110	10					29	my @sc = map { $_->[PLAYERS]->[1] }
	13					25
3111	13					22	map { $self->match_forall (type => $SUBCLASSES, subclass => $_) }
3112							@_;
3113	13					25	my %dup;
3114	13	50				26	return map { $dup{$_}++ ? () : $_ } @sc;
	10					56
3115							}
3116
3117							sub superclassesT {
3118	11			11	1	638	my $self = shift;
3119
3120	11					33	my @sc = map { $self->superclasses ($_) } @_;
	11					22
3121	11					20	push @sc, @_, map { $self->superclassesT ($_) } @sc; # laziness equals recursion
	7					20
3122	11					12	my %dup;
3123	11	100				12	return map { $dup{$_}++ ? () : $_ } @sc;
	29					99
3124							}
3125
3126							=pod
3127
3128							=item B, B
3129
3130							I<@lids> = I<$tm>->types (I<$lid>, ...)
3131
3132							I<@lids> = I<$tm>->typesT (I<$lid>, ...)
3133
3134							The method C returns all direct classes of the toplet identified by C<$lid>. If the toplet does
3135							not exist, the list will be empty. C is a variant which honors transitive subclassing (so if
3136							I is an instance of type I and I is a subclass of I, then I is also an instance of
3137							I).
3138
3139							Duplicates will be suppressed.
3140
3141							=cut
3142
3143							sub types {
3144	14			14	1	3829	my $self = shift;
3145	14					24	my $ISA = ('isa');
3146	14					20	my $a;
3147	14					22	my @types = map { ($a = $self->retrieve ($_))
	10					44
3148							? $a->[TYPE]
3149	16	100				45	: ( map { $_->[PLAYERS]->[0] } $self->match_forall (type => $ISA, instance => $_) )
3150							}
3151							@_;
3152	14					20	my %dup;
3153	14	100				29	return map { $dup{$_}++ ? () : $_ } @types;
	18					128
3154							}
3155
3156							sub typesT {
3157	2			2	1	1046	my $self = shift;
3158
3159	2					8	my @types = map { $self->types ($_) } @_;
	2					5
3160	2					5	push @types, map { $self->superclassesT ($_) } @types;
	3					9
3161	2					4	my %dup;
3162	2	100				5	return map { $dup{$_}++ ? () : $_ } @types;
	10					37
3163							}
3164
3165
3166							=pod
3167
3168							=item B, B
3169
3170							I<@lids> = I<$tm>->instances (I<$lid>, ...)
3171
3172							I<@lids> = I<$tm>->instancesT (I<$lid>, ...)
3173
3174							These methods return the direct (C) and also indirect (C) instances of the
3175							toplet identified by C<$lid>.
3176
3177							Duplicates are suppressed.
3178
3179							=cut
3180
3181							sub instances {
3182	90			90	1	11124	my $self = shift;
3183
3184							# warn Dumper [ caller ] unless @_;
3185
3186	90					212	my ($ISA, $THING) = ('isa', 'thing');
3187
3188	118					283	my @instances = map {
3189	90					456	$_ eq $THING
3190	13					33	? map { $_->[TM->LID] } $self->toplets
3191							:
3192	35					282	(map { $_->[LID ] } $self->match_forall (type => $_)), # all assocs of this type
3193	90	100				518	(map { $_->[PLAYERS]->[1] } $self->match_forall (type => $ISA, class => $_)) # all direct instances
3194							} @_;
3195							}
3196
3197							sub instancesT {
3198	2			2	1	1345	my $self = shift;
3199
3200	3					7	my @instances = map { $self->instances ($_) }
	2					7
3201	2					6	map { $self->subclassesT ($_) }
3202							@_;
3203	2					6	my %dup;
3204	2	50				5	return map { $dup{$_}++ ? () : $_ } @instances;
	31					98
3205							}
3206
3207							=pod
3208
3209							=back
3210
3211							=head2 Filters
3212
3213							Quite often one needs to walk through a list of things to determine whether they are instances (or
3214							types, subtypes or supertypes) of some concept. This list of functions lets you do that: you pass in
3215							a list (reference) and the function behaves as filter, returning a list reference.
3216
3217							=over
3218
3219							=item B
3220
3221							I<@id> = I<$tm>->are_instances (I<$class_id>, I<@list_of_ids>)
3222
3223							Returns all those ids where the topic is an instance of the class provided.
3224
3225							=cut
3226
3227							sub are_instances {
3228	3			3	1	208	my $self = shift;
3229	3					7	my $class = shift; # ok, what class are we looking at?
3230
3231	3					9	my ($THING, $ISA, $CLASS) = ('thing', 'isa', 'class');
3232
3233	3					4	my @rs;
3234	3					8	foreach my $thing (@_) { # we work through all the things we got
3235							#warn "checking $thing";
3236	111	50	0			268	push @rs, $thing and next # we happily take one if
			33
3237							if $class eq $THING and # is the class = 'thing' ? and
3238							$self->midlet ($thing); # then does the thing exist in the map ?
3239
3240	111					248	my $m = $self->retrieve ($thing);
3241	111	0	0			229	push @rs, $thing and next # we happily take one if
			0
			33
3242							if $m and # it is an assertion ? and...
3243							($class eq $THING # either it is the class a THING (we did not explicitly store _that_)
3244							or
3245							$self->is_subclass ($m->[TYPE], $class) # or is the assertion type a subclass?
3246							);
3247
3248	24					60	push @rs, $thing and next # we happily take one if
3249							if grep ($self->is_subclass ($_, $class), # finall we check all of the intermediate type whether there is a transitive relation
3250	111	100	50			220	map { $self->get_players ($_, $CLASS) } # then we find the 'class' value
3251							$self->match_forall (type => $ISA, instance => $thing));
3252							# nothing # otherwise we do not push
3253							}
3254	3					40	return @rs;
3255							}
3256
3257							=pod
3258
3259							=item B (Warning: placeholder only)
3260
3261							I<@ids> = I<$tm>->are_types (I<$instance_id>, I<@list_of_ids>)
3262
3263							Returns all those ids where the topic is a type of the instance provided.
3264
3265							=cut
3266
3267							sub are_types {
3268	1			1	1	626	$log->logwarn ("# not implemented function");
3269	1					978	return 0;
3270							}
3271
3272							=pod
3273
3274							=item B (Warning: placeholder only)
3275
3276							I<@ids> = I<$tm>->are_supertypes (I<$class_id>, I<@list_of_ids>)
3277
3278							Returns all those ids where the topic is a supertype of the class provided.
3279
3280							=cut
3281
3282							sub are_supertypes {
3283	1			1	1	5	$log->logwarn ("# not implemented function");
3284	1					408	return 0;
3285							}
3286
3287							=pod
3288
3289							=item B (Warning: placeholder only)
3290
3291							I<@ids> = I<$tm>->are_subtypes (I<$class_id>, I<@list_of_ids>)
3292
3293							Returns all those ids where the topic is a subtype of the class provided.
3294
3295							=cut
3296
3297							sub are_subtypes {
3298	1			1	1	5	$log->logwarn ("# not implemented function");
3299	1					450	return 0;
3300							}
3301
3302							=pod
3303
3304							=back
3305
3306							=head1 REIFICATION
3307
3308							=over
3309
3310							=item B
3311
3312							(I<$tid>) = I<$tm>->is_reified (I<$assertion>)
3313
3314							(I<$tid>) = I<$tm>->is_reified (I<$url>)
3315
3316							In the case that the handed-in assertion is internally reified in the map, this method will return
3317							the internal identifier of the reifying toplet. Or C if there is none.
3318
3319							In the case that the handed-in URL is used as subject address of a toplet, this method will return
3320							the internal identifier of the reifying toplet. Or C if there is none.
3321
3322							=cut
3323
3324							sub _is_reified {
3325	11			11		24	my $self = shift;
3326	11					19	my $a = shift;
3327
3328	11					28	my $mid2iid = $self->{mid2iid}; # shortcut
3329	11	100				65	$a = $a->[TM->LID] if ref ($a) eq 'Assertion'; # for assertions we take the LID
3330
3331	17					119	return grep { $mid2iid->{$_}->[TM->ADDRESS] eq $a } # brute force
	351					1028
3332	11					96	grep { $mid2iid->{$_}->[TM->ADDRESS] }
3333	11					24	keys %{$mid2iid};
3334							}
3335
3336							sub is_reified {
3337	11			11	1	2782	return _is_reified (@_);
3338							}
3339
3340							=pod
3341
3342							=item B
3343
3344							I<$url> = I<$tm>->reifies (I<$tid>)
3345
3346							I<$assertion> = I<$tm>->reifies (I<$tid>)
3347
3348							Given a toplet identifier, this method returns either the internally reified assertion, an
3349							externally reified object via its URL, or C if that toplet does not reify at all.
3350
3351							=cut
3352
3353							sub reifies {
3354	8			8	1	686	my $self = shift;
3355	8					18	my $tid = shift;
3356
3357	8	50				92	my $add = $self->{mid2iid}->{$tid}->[TM->ADDRESS] if $self->{mid2iid}->{$tid};
3358	8	50				28	return undef unless $add;
3359	8	100				73	return $add =~ /^[A-F0-9]{32}$/i ? $self->{assertions}->{$add} : $add;
3360							}
3361
3362							=pod
3363
3364							=back
3365
3366							=head1 VARIANTS (aka "The Warts")
3367
3368							No comment.
3369
3370							=over
3371
3372							=item B
3373
3374							I<$tm>->variants (I<$id>, I<$variant>)
3375
3376							I<$tm>->variants (I<$id>)
3377
3378							With this method you can get/set a variant tree for B topic. According to the standard only
3379							basenames (aka topic names) can have variants, but, hey, this is such an ugly beast (I am
3380							digressing). According to this data model you can have variants for B toplets/maplets. You only
3381							need their id.
3382
3383							The structure is like this:
3384
3385							$VAR1 = {
3386							'tm:param1' => {
3387							'variants' => {
3388							'tm:param3' => {
3389							'variants' => undef,
3390							'value' => 'name for param3'
3391							}
3392							},
3393							'value' => 'name for param1'
3394							},
3395							'tm:param2' => {
3396							'variants' => undef,
3397							'value' => 'name for param2'
3398							}
3399							};
3400
3401							The parameters are the keys (there can only be one, which is a useful, cough, restriction of the
3402							standard) and the data is the value. Obviously, one key value (i.e. parameter) can only exists once.
3403
3404							Caveat: This is not very well tested (read: not tested at all).
3405
3406							=cut
3407
3408							sub variants {
3409	0			0	1	0	my $self = shift;
3410	0					0	my $id = shift;
3411	0					0	my $var = shift;
3412
3413	0	0				0	$self->{last_mod} = Time::HiRes::time if $var;
3414	0	0				0	return $var ? $self->{variants}->{$id} = $var : $self->{variants}->{$id};
3415							}
3416
3417
3418							=pod
3419
3420							=back
3421
3422							=head1 LOGGING
3423
3424							The L module hosts (since 1.29) the Log4Perl object C<$TM::log>. It is initialized with some
3425							reasonable defaults, but an using application can access it, tweak it, or overwrite it completely.
3426
3427							=head1 SEE ALSO
3428
3429							L, L
3430
3431							=head1 COPYRIGHT AND LICENSE
3432
3433							Copyright 200[1-8] by Robert Barta, Edrrho@cpan.orgE
3434
3435							This library is free software; you can redistribute it and/or modify it under the same terms as Perl
3436							itself.
3437
3438							=cut
3439
3440							#-- this we do when all structures have been defined
3441							_prime_infrastructure(); # initialize
3442							# NOTE: BEGIN does not work, because we have to define all
3443
3444							sub _prime_infrastructure { # generate a fragmentary TM structure for the infrastructure
3445	36			36		144	foreach my $h ($TM::PSI::core,
3446							$TM::PSI::topicmaps_inc,
3447							$TM::PSI::tmql_inc,
3448							$TM::PSI::astma_inc) {
3449	144					196	foreach my $k (keys %{ $h->{mid2iid} }) {
	144					855
3450	1008					3544	$infrastructure->{mid2iid}->{$k} = [ $k, undef, $h->{mid2iid}->{$k} ]; # and manifest them as toplets
3451							}
3452
3453	288					1277	map { $infrastructure->{assertions}->{ $_->[TM->LID] } = $_ } # manifest assertions
	288					574
3454	288					732	map { $_->[TM->LID] = mklabel ($_); # after computing the hash LID
3455	288					587	$_ }
3456	288					4171	map { canonicalize ( undef, $_ ) } # after canonicalizing them
3457	288					15829	map { $_->[TM->KIND] = TM->ASSOC; # adding defaults
3458	288					820	$_->[TM->SCOPE] = TM::PSI::US;
3459	288					469	$_ }
3460	144					808	map { Assertion->new (type => $_->[0], # which is built here
3461							roles => $_->[1], # with the roles list
3462							players => $_->[2])} # with the players list
3463	144					345	@{ $h->{assertions} };
3464							}
3465							}
3466
3467
3468							1;
3469
3470							__END__