File Coverage

blib/lib/License/Syntax.pm
Criterion Covered Total %
statement 282 326 86.5
branch 160 218 73.3
condition 55 78 70.5
subroutine 25 27 92.5
pod 12 12 100.0
total 534 661 80.7


line stmt bran cond sub pod time code
1             package License::Syntax;
2              
3 5     5   242254 use warnings;
  5         12  
  5         189  
4 5     5   31 use strict;
  5         11  
  5         179  
5 5     5   32 use Carp;
  5         15  
  5         407  
6 5     5   19891 use DBI;
  5         158154  
  5         460  
7 5     5   9367 use Text::CSV;
  5         84009  
  5         41  
8 5     5   6324 use POSIX;
  5         47598  
  5         52  
9 5     5   24016 use Data::Dumper;
  5         9613  
  5         19445  
10              
11             =head1 NAME
12              
13             License::Syntax - Coding and Decoding of License strings using SPDX and SUSE syntax.
14              
15             =head1 VERSION
16              
17             Version 0.13
18              
19             =cut
20              
21             our $VERSION = '0.13';
22              
23              
24             =head1 SYNOPSIS
25              
26             This implements the SUSE License Syntax.
27              
28             use License::Syntax;
29              
30             my $obj = new License::Syntax licensemap => 'licenselist.csv;as';
31             my $obj = new License::Syntax map => { 'GNU General Public License V2.0' => 'GPLv2' };
32             my $obj = new License::Syntax 'pathname.sqlite;table(alias,name)';
33             $obj->loadmap_csv($filename_csv);
34             $obj->loadmap_sqlite($filename_sqlite, $table_name, $alias_col, $name_col);
35             $obj->savemap_csv($filename_csv, scalar(localtime));
36             $obj->savemap_sqlite($filename_sqlite, $table_name, $alias_col, $name_col, 'TRUNCATE');
37             $obj->add_alias($alias, $canonical_name);
38             $name = $obj->canonical_name($alias, $disambiguate);
39             $tree = $obj->tokenize('GPLv2 & Apache 1.1; LGPLv2.1 | BSD4c<
40             $name = $obj->format_tokens($tree);
41              
42              
43             =head1 FUNCTIONS
44              
45             =head2 new
46              
47             License::Syntax is an object oriented module.
48             When constructing new License::Syntax objects, you can provide a mapping table for
49             license names. The table is used for recognizing alternate alias names for the
50             licenses (left hand side) and also defines the canonical short names of the licenses
51             (right hand side).
52             The mapping table is consulted twice, before and after decoding the syntax.
53             (Thus non-terminal mappings may actually be followed.)
54              
55             The mapping table can be provided either
56              
57             =over 2
58              
59             =item * as a CSV files of two columns. Column seperator is a comma (,)
60              
61             =item * as a hash, or
62              
63             =item * as table in an sqlite database using the given columns as left hand side and right hand side respectivly.
64              
65             =back
66              
67             As an alternative to specifying a mapping with new(), or additionally, mappings
68             can also be provided via loadmap_sqlite(), loadmap_csv(), or add_alias()
69             methods. Earlier mappings take precedence over later mappings.
70              
71              
72             =cut
73              
74             sub new
75             {
76 6     6 1 2181 my $self = shift;
77 6   33     51 my $class = ref($self) || $self;
78 6 100 100     47 if (1 == scalar @_ and !ref $_[0])
79             {
80 2         12 $self = { new => { licensemap => $_[0] } };
81             }
82             else
83             {
84 4 100       28 $self = { new => { (ref $_[0] eq 'HASH') ? %{$_[0]} : @_ } };
  2         11  
85             }
86 6         22 $self = bless $self, $class;
87              
88 6         33 $self->set_rejects('REJECT');
89              
90             # use Data::Dumper;
91             # carp Dumper $self, $class, \@_;
92 6 50       35 if ($self->{new}{map})
93             {
94 0         0 for my $k (%{$self->{new}{map}})
  0         0  
95             {
96 0         0 $self->add_alias($k, $self->{new}{map}{$k});
97             }
98 0         0 delete $self->{new}{map};
99             }
100 6 100       137 $self->_loadmap($self->{new}{licensemap})
101             if $self->{new}{licensemap};
102 6         41 return $self;
103             }
104              
105             # dispatch into either loadmap_sqlite or loadmap_csv.
106             sub _loadmap
107             {
108 3     3   10 my ($s,$f) = @_;
109              
110             # "filename.csv"
111             # "filename.csv;garbage"
112 3 100       35 my $suf = $1 if $f =~ s{;([\w,;\#\(\)]+)$}{};
113 3 100       24 return $s->loadmap_csv($f, $suf) if $f =~ m{\.csv$}i;
114              
115 2 50       19 croak "$f: needs either .csv or .sqlite suffix\n" unless $f =~ m{\.sql(ite)?$}i;
116              
117             # "filename.sqlite;table"
118             # "filename.sqlite;table(alias,name)"
119 2 50       19 my ($table,$left,$right) = ($1,'alias','name') if $suf =~ m{^(\w+)};
120 2 50       19 ($left,$right) = ($1,$2) if $suf =~ m{\((\w+)\W(\w+)};
121 2         13 return $s->loadmap_sqlite($f, $table, $left, $right);
122             }
123              
124             ## returns a two column array with the minimum representation of a license map.
125             sub _saveable_map
126             {
127 4     4   443 my ($s) = @_;
128 4         11 my %identity;
129             my %done;
130              
131 0         0 my @r;
132 4         7 for my $k (sort keys %{$s->{licensemap}{ex}})
  4         2030  
133             {
134 2380         5034 my $v = $s->{licensemap}{ex}{$k};
135 2380 100       4021 if ($v eq $k)
136             {
137 772         1560 $identity{$v}++;
138             }
139             else
140             {
141 1608         3733 push @r, [$k,$v];
142 1608         3239 $done{$v}++;
143             }
144             }
145              
146 4         250 for my $k (keys %identity)
147             {
148 772 100       1601 next if $done{$k};
149 40         93 push @r, ['',$k];
150             }
151              
152 4         174 return \@r;
153             }
154              
155             =head2 canonical_name
156              
157             $name = $obj->canonical_name($alias);
158             is equivalent to
159             $name = $obj->format_tokens($obj->tokenize($alias));
160            
161             =cut
162              
163             sub canonical_name
164             {
165 2     2 1 11 my ($s, $name) = @_;
166 2         8 return $s->format_tokens($s->tokenize($name));
167             }
168              
169             =head2 savemap_csv
170              
171             $obj->savemap_csv('filename.csv', scalar(localtime));
172              
173             Writes the current mapping table as a comma seperated file.
174              
175             =cut
176              
177             sub savemap_csv
178             {
179 2     2 1 1521 my ($s, $f, $header_suffix) = @_;
180 2 50       243 open O, ">", $f or croak "$f: write failed: $!";
181 2         96 print O qq{# "Alias name","Canonical Name" -- saved by License::Syntax $VERSION};
182 2 50       10 $header_suffix = '' unless defined $header_suffix;
183 2 50       10 $header_suffix .= "\n" unless $header_suffix =~ m{\n$}s;
184 2         4 print O $header_suffix;
185              
186 2         9 my $list = $s->_saveable_map();
187 2         9 for my $r (@$list)
188             {
189 824         2046 print O qq{"$r->[0]","$r->[1]"\n};
190             }
191 2 50       263 close O or croak "$f: write failed: $!";
192             }
193              
194             =head2 set_rejects
195              
196             $obj->set_rejects('REJECT', ...);
197              
198             define the license names to be rejected. Per default,
199             exactly one name 'REJECT' is rejected.
200              
201             =cut
202              
203             sub set_rejects
204             {
205 6     6 1 19 my ($s, @r) = @_;
206              
207             # store as a hash for faster test.
208 6         16 $s->{REJECT} = { map { $_ => 1 } @r };
  6         47  
209 6         19 return $s;
210             }
211              
212             =head2 add_alias
213              
214             $obj->add_alias($alias,$name);
215             $obj->add_alias(undef,$name);
216             $obj->add_alias('',$name);
217              
218             adds $name (and optionally $alias) to the objects licensemap.
219             Both, lower case and exact mappings are maintained.
220             (add_url is used in loadmap_csv)
221              
222             add_alias() takes care to extend to the right. That is, if it's right hand side
223             parameter is already known to be an alias, the new alias is added pointing to the old alias's canonical name (rahter than to the old alias that the caller provided).
224              
225             CAVEAT:
226             add_alias() does not maintain full tranitivity, as it does not extend to the left.
227             If its left hand side is already known to be a canonical name, a warning is
228             issued, but the situation cannot be corrected, as this would require rewriting
229             existing entries. This is non-obvious, as mappings are applied more than once
230             during format_tokens(), so indirect mappings involving non-terminal names, may
231             or may not work. A two step mapping currently works reliably, though.
232              
233             add_alias() does nothing, if it would directly redo an existing mapping.
234              
235             See also new() for more details about mappings.
236              
237             =cut
238              
239             =head2 add_url
240              
241             $obj->add_url($urls, $name);
242              
243             Add one or multiple URLs to the canonical license name. URLs can be seperated by comma or whitespace.
244             May be called multiple times for the same name, and fills an array of urls.
245             (add_url is used in loadmap_csv)
246              
247             =cut
248              
249             =head2 set_compat_class
250             $obj->set_compat_class($cc, $name);
251              
252             Specify the compatibility class, for a canonical license name.
253             compatibility classes are numerical. These classes allow to derive certain compatibility issues
254             amongst liceses. Some classes are always incompatible (even amongst themselves), other
255             classes are always comaptible, and for some other classses, compatibility is uncertain.
256             The exact semantics are to be defined. (set_compat_class is used in loadmap_csv).
257              
258             =cut
259              
260             sub set_compat_class
261             {
262 0     0 1 0 my ($s, $cc, $canonical_name) = @_;
263 0         0 $cc += 0;
264 0 0       0 croak "compatibility class should be numeric and > 0\n" unless $cc;
265 0         0 $s->{licensemap}{cc}{$canonical_name} = $cc;
266             }
267              
268             sub add_url
269             {
270 149     149 1 267 my ($s, $url, $canonical_name) = @_;
271 149         715 my @url = split(/[,\s]+/, $url);
272 149         189 push @{$s->{licensemap}{url}{$canonical_name}}, @url;
  149         858  
273             }
274              
275             sub add_alias
276             {
277 1483     1483 1 2913 my ($s, $from, $to) = @_;
278              
279 1483 100 100     6484 $from = '' if defined($from) and $from eq $to; # not an alias.
280              
281             # normalize whitespace:
282 1483         3466 $to =~ s{\s+}{ }g;
283 1483 100       7154 $from =~ s{\s+}{ }g if defined $from;
284              
285 1483 100       5510 if (defined(my $nn = $s->{licensemap}{ex}{$to}))
286             {
287             # do right extend
288             # simple loopdetection first:
289 856 50 33     4131 croak "cyclic alias '$from' -> '$to' with already known canonical name '$nn'\n"
290             if defined($from) && $from eq $nn;
291              
292             # now extend:
293 856 50       2273 carp "add_alias: '$from' -> '$to' extended to '$nn'\n" if $s->{debug};
294 856         1808 $to = $nn;
295             }
296              
297 1483 100 100     7120 if (defined $from and $from ne '')
298             {
299 1322         3153 my $aa;
300 1322 100       4517 if (defined($aa = $s->{licensemap}{ex}{$from}))
301             {
302 44 100       97 if ($aa eq $from)
303             {
304             # this alias is a right hand side.
305             # We recognize this, because all right hand sides map to itself.
306 1         6 my $msg = "mapping error: '$from' is now both alias and canonical name. Try to load '$from' -> '$to' earlier.";
307 1 50       5 carp "$msg\n" if $s->{debug};
308 1         3 push @{$s->{diagnostics}}, $msg;
  1         4  
309             }
310             else
311             {
312             # this is a chane attempt to an existing mapping.
313             # silently ignored.
314 43 100       289 carp "mapping ignored: '$from' => '$to', it already maps to '$aa'\n" if $s->{debug};
315 43         271 return $s;
316             }
317             }
318 1279         4583 $s->{licensemap}{ex}{$from} = $to;
319 1279         4165 $s->{licensemap}{lc}{lc $from} = $to;
320 1279 100       2362 if (scalar(my @a = _tokenize_linear($from)) > 1)
321             {
322 138         1358 $s->{licensemap}{tok}{lc $a[0]}{$from} = [ @a ];
323             }
324             }
325 1440         4569 $s->{licensemap}{ex}{$to} = $to;
326 1440         3950 $s->{licensemap}{lc}{lc $to} = $to;
327 1440 100       2792 if (scalar(my @a = _tokenize_linear($to)) > 1)
328             {
329 156         847 $s->{licensemap}{tok}{lc $a[0]}{$to} = [ @a ];
330             }
331 1440         4080 return $s;
332             }
333              
334             =head2 savemap_sqlite
335              
336             $obj->savemap_sqlite('filename.sqlite', 'lic_map', 'alias', 'shortname', $trunc_flag);
337              
338              
339             # sqlite3 filename.sqlite
340             sqlite> select * from lic_map
341             alias | shortname
342             ------|----------
343             ...
344              
345             If $trunc_flag is true and the table previously exists, the table is truncated before it is written to;
346             otherwise new contents merges over old contents, if any.
347              
348             =cut
349              
350             sub savemap_sqlite
351             {
352 1     1 1 2312 my ($s, $f, $t, $a, $n, $trunc_flag) = @_;
353              
354 1 50       15 my $dbh = DBI->connect("dbi:SQLite:dbname=$f","","") or carp "DBI-connect($f) failed: $!";
355              
356 1 50       37932 $dbh->do("PRAGMA default_synchronous = OFF") if $s->{new}{nofsync};
357 1         222 $dbh->do("CREATE TABLE IF NOT EXISTS $t ( $a TEXT, $n TEXT )");
358 1 50       499440 $dbh->do("DELETE FROM $t") if $trunc_flag;
359              
360 1         40002 my $list = $s->_saveable_map();
361 1         4 for my $r (@$list)
362             {
363 412         114046412 $dbh->do("INSERT OR REPLACE INTO $t ($a,$n) VALUES(?,?)", {}, $r->[0], $r->[1]);
364             }
365 1         779793 $dbh->disconnect();
366 1         310 return $s;
367             }
368              
369             =head2 loadmap_sqlite
370              
371             See also new() for more details about mappings.
372              
373             =cut
374              
375             sub loadmap_sqlite
376             {
377 2     2 1 6 my ($s, $f, $t, $a, $n) = @_;
378              
379 2 50       25 my $dbh = DBI->connect("dbi:SQLite:dbname=$f","","") or carp "DBI-connect($f) failed: $!";
380 2         38464 my $list = $dbh->selectall_arrayref("SELECT $a,$n FROM $t");
381 2         2684 for my $r (@$list)
382             {
383 824         2725 $s->add_alias($r->[0], $r->[1]);
384             }
385 2         5132 $dbh->disconnect();
386 2         519 return $s;
387             }
388              
389             =head2 tokenize
390              
391             $tree_arr = $obj->tokenize($complex_license_expr);
392             $tree_arr = $obj->tokenize($complex_license_expr, 1);
393              
394             Returns an array reference containing tokens and sub-arrays,
395             describing how the $complex_license_expr is parsed.
396             If a second parameter disambiguate is provided and is true,
397             extra parenthesis are inserted to unambiguiusly show how the
398             complex expression is interpreted.
399             If names have been loaded with add_alias, before calling tokenize,
400             all names and aliases are recognized as one token. E.g. "GPL 2.0 or later"
401             would be split as ["GPL 2.0", "or", "later"] otherwise.
402             No name mapping is performed here.
403              
404              
405             =cut
406              
407             sub _tokenize_linear
408             {
409 2725     2725   3905 my ($text) = @_;
410 2725         8302 $text =~ s{\s+}{ }g; # normalize whitespace
411 2725         1011728 my @a = ($text =~ m{\s*(.*?)?\s*(;|\||&|\bor\b|\band\b|<<|\(|\)|$)}gi);
412              
413             ## the above regexp often returns ['somthing', '', '', '']
414             ## remove the empty trailers.
415 2725   66     13931 while ((scalar @a) and ($a[-1] eq ''))
416             {
417 7957         40906 pop @a;
418             }
419 2725         12087 return @a;
420             }
421              
422             sub tokenize
423             {
424 6     6 1 604 my ($s, $text, $disambiguate) = @_;
425              
426 6 50       35 $text = "REJECT(?undefined($text)?)" unless $text =~ m{\w\w};
427              
428             #### accept a comma instead of a semicolon, unless there are semicolons.
429             ## Not done, we have to digest this: "The PHP License, version 3.01"
430             ## $text =~ s{,}{;} unless $text =~ m{;};
431              
432             ## tokenize the expression by cutting at all operators and parenthesis.
433             ## we cut before and after such operators and parenthesis, so that we
434             ## do not lose anything by cutting.
435              
436 6         20 my @a = _tokenize_linear($text);
437 6         16 my $i = 0;
438 6         24 for (; $i <= $#a; $i++) # this may shorten while we walk along.
439             {
440 92 100       343 if (my $m = $s->{licensemap}{tok}{lc $a[$i]})
441             {
442 14         42 for my $k (keys %$m)
443             {
444 62         77 my $match = 1;
445 62         64 for my $j (1..$#{$m->{$k}})
  62         144  
446             {
447 84 100       219 if ($a[$i+$j] ne $m->{$k}[$j])
448             {
449 62         73 $match = 0;
450 62         86 last;
451             }
452             }
453              
454 62 50       191 if ($match)
455             {
456             # Undo tokenization:
457             # Replace tokenized version with original license name
458 0         0 splice @a, $i, (scalar @{$m->{$k}}), $k;
  0         0  
459 0         0 last;
460             }
461             }
462             }
463 92 100       202 $a[$i] = 'and' if $a[$i] eq '&';
464 92 100       319 $a[$i] = 'or' if $a[$i] eq '|';
465             }
466              
467             ## before we group tokens, we pull back license names that contain or.
468            
469 6 100       23 $s->{disambiguate}++ if $disambiguate;
470 6         30 my $r = [ $s->_group_tokens(0, @a) ];
471 6 100       21 $s->{disambiguate}-- if $disambiguate;
472 6         33 return $r;
473             }
474              
475             sub _group_tokens
476             {
477 10     10   128 my ($s, $l, @a) = @_;
478 10   50     50 $s->{debug} ||= 0; # manually enable debugging here, in new().
479              
480 10         18 push @a, ''; # helps flushing $arr
481              
482 10         17 my @r;
483 10         18 my $arr = [];
484 10         24 my $in_word = 0;
485 10         13 my $in_parens = 0;
486              
487 10         24 for my $a (@a)
488             {
489 132 100       347 $in_parens++ if ($a =~ m{\(});
490 132 100 66     327 $in_parens-- if ($a =~ m{\)}) and $in_parens;
491 132 100 100     499 $in_word++ if ($a =~ m{\w}) and !$in_parens;
492              
493 132 50       271 carp "$l: a='$a' in_parens=$in_parens in_word=$in_word\n" if $s->{debug};
494              
495             ## operators, but not parenthesis
496             ## must include the empty string here!
497 132 100 100     640 if ($a =~ m{^(;|\||&|\bor\b|\band\b|<<|)$}i and !$in_parens)
498             {
499 36 50       79 carp "$l: emit [@$arr]\n" if $s->{debug};
500 36 100       74 if (scalar @$arr)
501             {
502 30 100       61 if ($in_word)
503             {
504             # put whitespace around some operators, so that it looks nicer.
505             # ; only has a trailing whitespace, << has no whitespaces.
506             # KEEP IN SYNC with format_tokens()
507 26 100       41 map { $_ = " $1 " if /^(;|\||&|\bor\b|\band\b)$/; s{^ ; $}{; } } @$arr;
  52         172  
  52         124  
508 26         72 push @r, join '', @$arr;
509             }
510             else
511             {
512             ## must be an expression in parenthesis
513 4 50 33     22 unless ($arr->[0] eq '(' and $arr->[-1] eq ')')
514             {
515 0         0 my $msg = "parse error: not in_word, and not in parens: a='$a' [@$arr]";
516 0         0 push @{$s->{diagnostics}}, $msg;
  0         0  
517 0 0       0 carp "$msg\n" if $s->{debug};
518             }
519 4 50       11 shift @$arr if $arr->[0] eq '(';
520 4 50       11 pop @$arr if $arr->[-1] eq ')';
521 4 50       9 carp "$l: recursion into [@$arr]\n" if $s->{debug};
522 4         23 push @r, [ $s->_group_tokens($l+1, @$arr) ];
523             }
524             }
525 36         60 $arr = [];
526 36         70 $in_word = 0
527             }
528 132 100 100     563 if ($in_word or $in_parens or $a eq ')')
      100        
529             {
530 96 50       196 carp "$l: add '$a' to [@$arr]\n" if $s->{debug};
531 96 50 100     254 if ($a eq ')' and !$in_parens and !@$arr)
      66        
532             {
533 0         0 my $msg = "parse error: bogus '$a'";
534 0         0 push @{$s->{diagnostics}}, $msg;
  0         0  
535 0 0       0 carp "$msg\n" if $s->{debug};
536             }
537 96 100       296 push @$arr, $a if length $a;
538             }
539             else
540             {
541 36 50       89 carp "$l: emit '$a'\n" if $s->{debug};
542 36 100       117 push @r, $a if length $a;
543             }
544             }
545              
546 10 50       27 if ($in_parens)
547             {
548 0         0 my $msg = "parse error: missing closing ')'";
549 0         0 push @{$s->{diagnostics}}, $msg;
  0         0  
550 0 0       0 carp "$msg\n" if $s->{debug};
551 0         0 return $s->_group_tokens($l, @a, ')')
552             }
553              
554 10 100       25 if ($s->{disambiguate})
555             {
556             ## the ordering here defines operator precedence.
557 4         9 for my $op ('<<', '&', 'and', '|', 'or', ';')
558             {
559 24         50 @r = _disambiguate($op, @r);
560             }
561             }
562              
563             ## we remove extra parens unconditionally.
564 10   100     54 while (scalar(@r) == 1 and ref $r[0] eq 'ARRAY')
565             {
566 4 50       13 if ($s->{debug})
567             {
568 5     5   65 use Data::Dumper;
  5         13  
  5         23294  
569 0         0 warn "removing extra parens from" . Dumper(\@r). Dumper $r[0];
570             }
571 4         4 @r = @{$r[0]};
  4         23  
572             }
573 10         58 return @r;
574             }
575              
576             ## find stretches of indentical operators $tok
577             ## and replace them by one sub-array each, containing the same.
578             sub _disambiguate
579             {
580 24     24   53 my ($tok, @a) = @_;
581            
582 24         27 my $i;
583 24         58 for ($i = 1; $i <= $#a; $i+= 2)
584             {
585 36 100 66     225 if (defined($a[$i]) && $a[$i] eq $tok)
586             {
587 8         10 my $e = $i;
588 8   100     39 while (defined($a[$e+2]) && $a[$e+2] eq $tok) { $e += 2; }
  2         7  
589 8         59 splice @a, $i-1, $e-$i+3, [ @a[$i-1 .. $e+1] ];
590             # assert: $a[$i+2] cannot be $tok now.
591             }
592             }
593 24         126 return @a;
594             }
595              
596             =head2 format_tokens
597              
598             reverse operation of tokenize()
599              
600             =cut
601              
602             sub _map_license_name
603             {
604 26     26   60 my ($s, $name, $prev_op) = @_;
605 26         29 my $parens;
606              
607 26         32 my $origname = $name;
608              
609             # used as a flag, so that we know if we mapped the name at least once.
610 26         29 my $mapped;
611             # we try the mapping three times:
612             # first including any parenthesis
613             # second: after splitting parethetical description
614             # third: after shaping into possibly conforming syntax
615              
616             {
617 26         30 my $new = $s->{licensemap}{ex}{$name};
  26         81  
618 26 100       121 $new = $s->{licensemap}{lc}{lc $name} unless defined $new;
619 26 100       76 $mapped = $name = $new if defined $new;
620             }
621              
622 26 50       256 ($name,$parens) = ($1,$3) if $name =~ m{^\s*(.*?)\s*(\((.*)\))?\s*$};
623 26 50       69 $origname = $name unless $name eq 'REJECT';
624              
625             ## allow for underscores in these name, to make vim users happy.
626 26         85 $name =~ s{(PERMISSIVE|NON|COPYLEFT)[-_]OSI[-_]COMPLIANT}{$1-OSI-COMPLIANT}g;
627              
628              
629             {
630 26         33 my $new = $s->{licensemap}{ex}{$name};
  26         73  
631 26 100       69 $new = $s->{licensemap}{lc}{lc $name} unless defined $new;
632 26 100       76 $mapped = $name = $new if defined $new;
633             }
634              
635 26         29 if (1)
636             {
637             ## policy: version numbers are appended with '-', not with 'v', ' V'
638             ## version numbers do not end in '.0'
639             # LGPLv2.1 -> LGPL-2.1
640             # Apache V2.0 -> Apache-2.0
641             ###
642 26 100       98 $name =~ s{(\w+)(-v|-V| V| v|v| )(\d[\.\d]*\+?|\d+\.\d[-~\w]*\+?)\s*$}{$1-$3} unless $s->{licensemap}{lc}{lc $name};
643 26 100       113 $name =~ s{(\d)\.0$}{$1} unless $s->{licensemap}{lc}{lc $name};
644             }
645             else # this is old policy, we do it the other way round now.
646             {
647             ## policy: version numbers are appended with a lower case v,
648             ## if the name is all caps and without white space.
649             ## otherwise append with space and capital V.
650              
651             ## LGPL-2.1 -> LGPL v2.1 -> LGPLv2.1
652             ## LGPL-V2.1 -> LGPL v2.1 -> LGPLv2.1
653             ## GPL-2+ -> GPL v2+ -> GPLv2+
654             ## Apache-2.0 -> Apache v2.0 -> Apache V2.0
655             ## do this only, if it really looks like a version number.
656             ## e.g. XXX-3 XXX-3.0~alpha BUT NOT vision-3d or BSD-4clause
657             $name =~ s{(\w+)-[vV]?(\d[\.\d]*\+?|\d+\.\d[-~\w]*\+?)\s*$}{$1 v$2};
658              
659              
660             ## LGPL v2.1 -> LGPLv2.1
661             ## LGPL 2.1+ -> LGPLv2.1+
662             ## PERMISSIVE -> PERMISSIVE
663             unless ($name =~ s{^([A-Z_\d\.-]+)\s*[vV ](\d\S*?)$}{$1v$2})
664             {
665             ## CC BY-SA v3.5 -> CC BY-SA V3.5
666             ## Apache v2.0 -> Apache V2.0
667             $name =~ s{^(.*\S)\s*[vV ](\d\S*?)$}{$1 V$2};
668             }
669             }
670              
671             ##
672             ## policy: modifiers are all lower case, licenses start upper case.
673 26 100 100     111 if (($prev_op||'') eq '<<')
674             {
675 4         9 $name = lc $name;
676             }
677             else
678             {
679              
680             {
681 22         25 my $new = $s->{licensemap}{ex}{$name};
  22         60  
682 22 100       53 $new = $s->{licensemap}{lc}{lc $name} unless defined $new;
683 22 100       67 $mapped = $name = $new if defined $new;
684             }
685            
686 22 100       44 unless (defined $mapped)
687             {
688             # names not in the mapping table get an questionmark!
689 4         9 $name = ucfirst $origname;
690 4 50       18 $name = '?' . $name unless $name =~ m{^\?};
691 4 50       13 $name = $name . '?' unless $name =~ m{\?$};
692 4         7 push @{$s->{diagnostics}}, "unknown name: '$origname'";
  4         14  
693             }
694             else
695             {
696 18         45 $name = ucfirst $name;
697             }
698             }
699 26 50 33     68 if ($name eq 'REJECT' and $origname !~ m{REJECT})
700             {
701 0         0 $parens = "?$origname?";
702 0         0 push @{$s->{diagnostics}}, "rejected name: '$origname'";
  0         0  
703             }
704 26 100       61 $name .= "($parens)" if $parens;
705 26         81 return $name;
706             }
707              
708             sub format_tokens
709             {
710 14     14 1 9104 my ($s, $aa) = @_;
711 14         19 my @a;
712 14         29 for my $a (@$aa)
713             {
714 54 100       118 if (ref $a)
715             {
716 8         24 push @a, '(', $s->format_tokens($a), ')';
717             }
718             else
719             {
720 46 100 100     296 if ($a =~ m{\w\w} and $a !~ m{^(and|or)$}i)
721             {
722 26 100       101 $a = $s->_map_license_name($a, @a?$a[-1]:undef);
723             }
724             else
725             {
726             # put whitespace around some operators, so that it looks nicer.
727             # ; only has a trailing whitespace, << has no whitespaces.
728             # KEEP IN SYNC with _group_tokens() if in_word
729 20         114 $a =~ s{^(;|\||&|or|and)$}{ $1 }i; $a =~ s{^ ; $}{; };
  20         59  
730             }
731 46         117 push @a, $a;
732             }
733             }
734 14         80 return join '', @a;
735             }
736              
737             =head2 loadmap_csv
738              
739             $obj->loadmap_csv('license_map.csv', 'as');
740             $obj->loadmap_csv('synopsis.csv', 'lauaas#c');
741              
742             Object method to
743             load (or merge) contents of a CVS table into the object.
744             This uses a trivial csv parser. Field seperator must be ;
745             linebreaks are record seperators, and the first line is ignored,
746             if it starts with '#'.
747             Fields can be surrounded by doublequotes, if a comma may be embedded.
748              
749             The second parameter is a field template, defining what the meaning of the fields is.
750             l Long name (none or once). This is a speaking name of the License: Example "Creative Commons Attribution 1.0"
751             a Alias name (any number). Any other name by which the license is known: Example: "CC-BY 1.0"
752             s Short name (once). The canonical (unique) short license identifiner: Example: "CC-BY-1"
753             u URL (any). Multiple URLs can also be written in one filed, seperated by whitespace.
754             # License classification number (none or once). (1..5)
755             c Comment (none or once)
756             The default template is "as", an alias, followed by the canonical short name.
757             Empty fields are ignored, as well as fields that contian only one '?'. Thus you
758             can use records like
759             "","Name"
760             to pass in a valid name without an alias.
761              
762             See also new() for more details about mappings.
763              
764             =cut
765             sub loadmap_csv
766             {
767 3     3 1 20 my ($self,$file,$template) = @_;
768 3   100     14 $template ||= "as";
769              
770 3         15 my @colum_types = split(//, $template);
771 3         8 my $canon_idx = undef;
772 3         12 for my $i (0..$#colum_types)
773             {
774 12 100       34 if ($colum_types[$i] eq 's')
775             {
776 3 50       11 die "multiple canonical short name columns in template '$template'\n" if defined $canon_idx;
777 3         7 $canon_idx = $i;
778             }
779             }
780 3 50       10 die "no canonical short name columns in template '$template'\n" unless defined $canon_idx;
781              
782 3 50       156 open my $in, "<", $file or croak "open($file) failed: $!\n";
783 3         23 my %opts = ( binary => 1, sep_char => ',', empty_is_undef => 1, eol => $/ );
784 3         31 my $csv = Text::CSV->new(\%opts);
785 3         393 my $line_no = 0;
786 3         6 for (;;)
787             {
788 325         3359 my $row = $csv->getline ($in);
789 325 100       221086 last if $csv->eof();
790 322 100       2025 if (!$line_no++)
791             {
792             # be forgiving, if we have errors while parsing the first line.
793             # it may be a comment, or the seperator chacracter may be wrong.
794 4 50       18 if (my @err = $csv->error_diag())
795             {
796 4         178 my $line = $csv->error_input();
797 4 100 66     75 if ($err[0] == 2025)
    100          
    50          
798             {
799             # no field seperator seen in the line
800 1 50       4 if ($opts{sep_char} eq ',')
801             {
802 1         3 $opts{sep_char} = ';';
803 1         9 POSIX::rewind $in;
804 1         240 $csv = Text::CSV->new(\%opts);
805 1         130 $line_no = 0;
806 1         4 next;
807             }
808             else
809             {
810 0         0 die "neither comma nor semicolon work as a field seperator.\n";
811             }
812             }
813             elsif (defined($line) and $line =~ s{^#\s*}{})
814             {
815             # can we do something with this header line now?
816 2         14 next;
817             }
818             elsif ($err[0])
819             {
820             # sometimes we come here, although there is no error.
821             # e.g. after restarting with changed sep_char,
822 0         0 print Dumper @err, $line;
823 0         0 next;
824             }
825             else
826             {
827             # "# heading","fields","...."
828             # "\160SPDX Full name","..."
829             # " SPDX Full name","..."
830 1 50       10 next if $row->[0] =~ m{^(\#|\W*SPDX)}; # heading
831             }
832             }
833             }
834              
835 318 100       760 next unless defined $row->[$canon_idx]; # a dummy entry?
836              
837 309         410 my $alias_count = 0;
838 309         3780 for my $i (0..$#colum_types)
839             {
840 1632 100       4530 next unless defined $row->[$i];
841 1431 100       5578 next if $row->[$i] =~ m{^[\?\s-]+$};
842 1269 100 100     7144 if ($colum_types[$i] eq 'l' or
    100          
    50          
843             $colum_types[$i] eq 'a')
844             {
845 644         2415 $self->add_alias($row->[$i], $row->[$canon_idx]);
846 644         1317 $alias_count++;
847             }
848             elsif($colum_types[$i] eq 'u')
849             {
850 149         518 $self->add_url($row->[$i], $row->[$canon_idx]);
851             }
852             elsif($colum_types[$i] eq 'c')
853             {
854 0         0 $self->set_compat_class($row->[$i], $row->[$canon_idx]);
855             }
856             }
857 309 100       905 unless ($alias_count)
858             {
859 11         37 $self->add_alias(undef, $row->[$canon_idx]);
860             }
861 309 50       1021 last if $csv->eof();
862             }
863 3         207 return $self;
864             }
865              
866             sub _loadmap_csv_old
867             {
868 0     0     my ($self,$file) = @_;
869              
870 0 0         open IN, "<", $file or croak "open($file) failed: $!\n";
871 0           my $linecount = 0;
872 0           while (defined (my $line = ))
873             {
874 0           chomp $line;
875 0 0 0       next if $line =~ m{^#} and !$linecount++;
876 0 0 0       if (($line =~ m{^"([^"]*)",\s*"([^"]*)"}) or
    0 0        
877             ($line =~ m{^([^,"]*),\s*([^,]*)}))
878             {
879             # actual mapping from old name to new name
880 0           $self->add_alias($1,$2);
881              
882             }
883             elsif ($line =~ m{^("",\s*)?"([^"]*)"\s*$} or
884             $line =~ m{^(,)?([^,"]*)\s*$})
885             {
886             # simple mentioning of good ones, needs no mapping
887 0           $self->add_alias(undef,$2);
888             }
889             else
890             {
891 0           die "$file:$linecount:\n\t$line\n not my csv syntax.";
892             }
893             }
894 0           return $self;
895             }
896              
897             =head1 AUTHOR
898              
899             Juergen Weigert, C<< >>
900              
901             =head1 BUGS
902              
903             This module defines a different syntax than
904             http://rpmlint.zarb.org/cgi-bin/trac.cgi/browser/trunk/TagsCheck.py
905              
906             Please report any bugs or feature requests to C, or through
907             the web interface at L. I will be notified, and then you'll
908             automatically be notified of progress on your bug as I make changes.
909              
910              
911              
912              
913             =head1 SUPPORT
914              
915             You can find documentation for this module with the perldoc command.
916              
917             perldoc License::Syntax
918              
919              
920             You can also look for information at:
921              
922             =over 4
923              
924             =item * RT: CPAN's request tracker
925              
926             L
927              
928             =item * AnnoCPAN: Annotated CPAN documentation
929              
930             L
931              
932             =item * CPAN Ratings
933              
934             L
935              
936             =item * Search CPAN
937              
938             L
939              
940             =back
941              
942              
943             =head1 ACKNOWLEDGEMENTS
944              
945              
946             =head1 COPYRIGHT & LICENSE
947              
948             Copyright 2009 Juergen Weigert.
949              
950             This program is free software; you can redistribute it and/or modify it
951             under the terms of either: the GNU General Public License as published
952             by the Free Software Foundation; or the Artistic License.
953              
954             See http://dev.perl.org/licenses/ for more information.
955              
956              
957             =cut
958              
959             1; # End of License::Syntax