File Coverage

blib/lib/Business/CompanyDesignator.pm
Criterion Covered Total %
statement 176 178 98.8
branch 83 92 90.2
condition 46 58 79.3
subroutine 26 26 100.0
pod 7 7 100.0
total 338 361 93.6


line stmt bran cond sub pod time code
1             package Business::CompanyDesignator;
2              
3             # Require perl 5.010 because the 'track' functionality of Regexp::Assemble
4             # is unsafe for earlier versions.
5 9     9   711708 use 5.010001;
  9         117  
6 9     9   4951 use Moose;
  9         4092652  
  9         69  
7 9     9   65865 use utf8;
  9         65  
  9         66  
8 9     9   323 use warnings qw(FATAL utf8);
  9         22  
  9         472  
9 9     9   2863 use FindBin qw($Bin);
  9         5416  
  9         1209  
10 9     9   2282 use YAML;
  9         33410  
  9         585  
11 9     9   5869 use File::ShareDir qw(dist_file);
  9         186548  
  9         719  
12 9     9   86 use List::MoreUtils qw(uniq);
  9         29  
  9         44  
13 9     9   13354 use Regexp::Assemble;
  9         173264  
  9         428  
14 9     9   5826 use Unicode::Normalize;
  9         19119  
  9         765  
15 9     9   90 use Carp;
  9         20  
  9         587  
16              
17 9     9   4640 use Business::CompanyDesignator::Record;
  9         41  
  9         704  
18 9     9   5671 use Business::CompanyDesignator::SplitResult;
  9         37  
  9         10730  
19              
20             our $VERSION = '0.15';
21              
22             has 'datafile' => ( is => 'ro', default => sub {
23             # Development/test version
24             my $local_datafile = "$Bin/../share/company_designator_dev.yml";
25             return $local_datafile if -f $local_datafile;
26             $local_datafile = "$Bin/../share/company_designator.yml";
27             return $local_datafile if -f $local_datafile;
28             # Installed version
29             return dist_file('Business-CompanyDesignator', 'company_designator.yml');
30             });
31              
32             # data is the raw dataset as loaded from datafile, keyed by long designator
33             has data => ( is => 'ro', lazy_build => 1 );
34              
35             # regex_cache is a cache of regexes by language and type, since they're expensive to build
36             has 'regex_cache' => ( is => 'ro', isa => 'HashRef', default => sub { {} } );
37              
38             # abbr_long_map is a hash mapping abbreviations (strings) back to an arrayref of
39             # long designators (since abbreviations are not necessarily unique)
40             has 'abbr_long_map' => ( is => 'ro', isa => 'HashRef', lazy_build => 1 );
41              
42             # pattern_string_map is a hash mapping patterns back to their source string,
43             # since we do things like add additional patterns without diacritics
44             has 'pattern_string_map' => ( is => 'ro', isa => 'HashRef', default => sub { {} } );
45             # pattern_string_map_lang is a hash of hashes, mapping language codes to hashes
46             # of patterns back to their source string
47             has 'pattern_string_map_lang' => ( is => 'ro', isa => 'HashRef', default => sub { {} } );
48              
49             sub _build_data {
50 8     8   146 my $self = shift;
51 8         296 YAML::LoadFile($self->datafile);
52             }
53              
54             sub _build_abbr_long_map {
55 5     5   15 my $self = shift;
56 5         14 my $map = {};
57 5         31 while (my ($long, $entry) = each %{ $self->data }) {
  865         22228  
58 860 100       1690 if (my $abbr = $entry->{abbr_std}) {
59 5   50     35 $map->{$abbr} ||= [];
60 5         9 push @{ $map->{$abbr} }, $long;
  5         26  
61             }
62 860 100       1622 my $abbr_list = $entry->{abbr} or next;
63 830 100       1723 $abbr_list = [ $abbr_list ] if ! ref $abbr_list;
64 830         1291 for my $abbr (@$abbr_list) {
65 1270   100     5393 $map->{$abbr} ||= [];
66 1270         1576 push @{ $map->{$abbr} }, $long;
  1270         3355  
67             }
68             }
69 5         143 return $map;
70             }
71              
72             sub long_designators {
73 4     4 1 1176 my $self = shift;
74 4         11 sort keys %{ $self->data };
  4         140  
75             }
76              
77             sub abbreviations {
78 3     3 1 1057 my $self = shift;
79 3         7 sort keys %{ $self->abbr_long_map };
  3         136  
80             }
81              
82             sub designators {
83 1     1 1 3 my $self = shift;
84 1         5 sort $self->long_designators, $self->abbreviations;
85             }
86              
87             # Return the B::CD::Record for $long designator
88             sub record {
89 1094     1094 1 43821 my ($self, $long) = @_;
90 1094 100       30048 my $entry = $self->data->{$long}
91             or croak "Invalid long designator '$long'";
92 1093         33565 return Business::CompanyDesignator::Record->new( long => $long, record => $entry );
93             }
94              
95             # Return a list of B::CD::Records for $designator
96             sub records {
97 862     862 1 325503 my ($self, $designator) = @_;
98 862 50       2247 croak "Missing designator" if ! $designator;
99 862 100       28616 if (exists $self->data->{$designator}) {
    100          
100 241         707 return ( $self->record($designator) );
101             }
102             elsif (my $long_set = $self->abbr_long_map->{$designator}) {
103 620         1595 return map { $self->record($_) } @$long_set
  680         1800  
104             }
105             else {
106 1         22 croak "Invalid designator '$designator'";
107             }
108             }
109              
110             # Add $string to regex assembler
111             sub _add_to_assembler {
112 1590     1590   3378 my ($self, $assembler, $lang, $string, $reference_string) = @_;
113 1590   66     3842 $reference_string ||= $string;
114             # printf "+ add_to_assembler (%s): '%s' => '%s'\n", join(',', @{ $lang || []}), $string, $reference_string;
115              
116             # FIXME: RA->add() doesn't work here because of known quantifier-escaping bugs:
117             # https://rt.cpan.org/Public/Bug/Display.html?id=50228
118             # https://rt.cpan.org/Public/Bug/Display.html?id=74449
119             # $assembler->add($string)
120             # Workaround by lexing and using insert()
121 1590         2190 my $optional1 = '\\.?,?\\s*';
122             my @pattern = map {
123             # Periods are treated as optional literals, with optional trailing commas and/or whitespace
124 1590 100       5961 /\./ ? $optional1 :
  23461 100       61846  
    100          
125             # Embedded spaces can be multiple, and include leading commas
126             / / ? ',?\s+' :
127             # Escape other regex metacharacters
128             /[()]/ ? "\\$_" : $_
129             } split //, $string;
130 1590         6319 $assembler->insert(@pattern);
131              
132             # Also add pattern => $string mapping to pattern_string_map and pattern_string_map_lang
133 1590         199302 my $pattern_string = join '', @pattern;
134              
135             # Special case - optional match characters can cause clashes between
136             # distinct pattern_strings e.g. /A\.?,?\s*S\.?,?\s*/ clashes with /AS/
137             # We need to handle such cases as ambiguous with extra checks
138 1590         2752 my $optional1e = "\Q$optional1\E";
139 1590         2040 my $alt_pattern_string1;
140 1590 100       8932 if ($pattern_string =~ /^(\w)(\w)$/) {
    100          
141 69         276 $alt_pattern_string1 = "$1$optional1$2$optional1";
142             } elsif ($pattern_string =~ /^(\w)$optional1e(\w)$optional1e$/) {
143 98         279 $alt_pattern_string1 = "$1$2";
144             }
145              
146             # If $pattern_string already exists in pattern_string_map then the pattern is ambiguous
147             # across entries, and we can't unambiguously map back to a standard designator
148 1590 100 66     52967 if (exists $self->pattern_string_map->{ $pattern_string }) {
    100          
149 245         7146 my $current = $self->pattern_string_map->{ $pattern_string };
150 245 100 100     1131 if ($current && $current ne $reference_string) {
151             # Reset to undef to mark ambiguity
152 3         95 $self->pattern_string_map->{ $pattern_string } = undef;
153             }
154             }
155             # Also check for the existence of $alt_pattern_string1, since this is also an ambiguity
156             elsif ($alt_pattern_string1 && exists $self->pattern_string_map->{ $alt_pattern_string1 }) {
157 5         143 my $current = $self->pattern_string_map->{ $alt_pattern_string1 };
158 5 50 33     58 if ($current && $current ne $reference_string) {
159             # Reset both pairs to undef to mark ambiguity
160 5         169 $self->pattern_string_map->{ $pattern_string } = undef;
161 5         144 $self->pattern_string_map->{ $alt_pattern_string1 } = undef;
162             }
163             }
164             else {
165 1340         36901 $self->pattern_string_map->{ $pattern_string } = $reference_string;
166             }
167 1590 100       3343 if ($lang) {
168 524         922 for my $l (@$lang) {
169 633 100       19739 if (exists $self->pattern_string_map_lang->{$l}->{ $pattern_string }) {
170 154         4774 my $current = $self->pattern_string_map_lang->{$l}->{ $pattern_string };
171 154 50 33     628 if ($current && $current ne $reference_string) {
172             # Reset to undef to mark ambiguity
173 0         0 $self->pattern_string_map_lang->{$l}->{ $pattern_string } = undef;
174             }
175             }
176             else {
177 479         13654 $self->pattern_string_map_lang->{$l}->{ $pattern_string } = $reference_string;
178             }
179             }
180             }
181              
182             # If $string contains unicode diacritics, also add a version without them for misspellings
183 9 100   9   92 if ($string =~ m/\pM/) {
  9         22  
  9         148  
  1590         7456  
184 211         421 my $stripped = $string;
185 211         1211 $stripped =~ s/\pM//g;
186 211         765 $self->_add_to_assembler($assembler, $lang, $stripped, $reference_string);
187             }
188             }
189              
190             # Assemble designator regex
191             sub _build_regex {
192 32     32   81 my $self = shift;
193 32         80 my ($type, $lang) = @_;
194              
195             # RA constructor - case insensitive, with match tracking
196 32         215 my $assembler = Regexp::Assemble->new->flags('i')->track(1);
197              
198             # Construct language regex if $lang is set
199 32         2704 my $lang_re;
200 32 100       93 if ($lang) {
201 28 100       104 $lang = [ $lang ] if ! ref $lang;
202 28         120 my $lang_str = join '|', sort @$lang;
203 28         367 $lang_re = qr/^($lang_str)$/;
204             }
205              
206 32         74 my $count = 0;
207 32         75 while (my ($long, $entry) = each %{ $self->data }) {
  5536         144215  
208             # If $type is begin, restrict to 'lead' entries
209 5504 100 100     14884 next if $type eq 'begin' && ! $entry->{lead};
210             # If $lang is set, restrict to entries that include $lang
211 2976 100 100     13107 next if $lang_re && $entry->{lang} !~ $lang_re;
212              
213 547         810 $count++;
214 547         2835 my $long_nfd = NFD($long);
215 547         1730 $self->_add_to_assembler($assembler, $lang, $long_nfd);
216              
217             # Add all abbreviations
218 547 100       1712 if (my $abbr_list = $entry->{abbr}) {
219 531 100       1732 $abbr_list = [ $abbr_list ] if ! ref $abbr_list;
220 531         1022 for my $abbr (@$abbr_list) {
221 832         3395 my $abbr_nfd = NFD($abbr);
222 832   66     3785 my $abbr_std = NFD($entry->{abbr_std} || $abbr);
223 832         1921 $self->_add_to_assembler($assembler, $lang, $abbr_nfd, $abbr_std);
224             }
225             }
226             }
227              
228             # If no entries found (a strange/bogus language?), return undef
229 32 100       240 return if $count == 0;
230              
231 20 50       120 return wantarray ? ( $assembler->re, $assembler ) : $assembler->re;
232             }
233              
234             # Regex accessor, returning regexes by type (begin/end) and language (en, es, etc.)
235             # $type defaults to 'end', $lang defaults to undef (for all)
236             sub regex {
237 1240     1240 1 1905 my $self = shift;
238 1240         2413 my ($type, $lang) = @_;
239 1240   50     2506 $type ||= 'end';
240              
241             # $lang might be an arrayref containing multiple language codes
242 1240         1759 my $lang_key;
243 1240 100       2422 if ($lang) {
244 560         948 $lang_key = $lang;
245 560 50 66     1316 if (ref $lang && ref $lang eq 'ARRAY' && @$lang) {
      66        
246 8 50       28 if (@$lang == 1) {
247 0         0 $lang_key = $lang->[0];
248             }
249             else {
250 8         22 $lang_key = join '_', sort map { lc $_ } @$lang;
  16         111  
251             }
252             }
253             }
254              
255 1240         1999 my $cache_key = $type;
256 1240 100       2607 $cache_key .= "_$lang_key" if $lang_key;
257              
258 1240 100       39045 if (my $entry = $self->regex_cache->{ $cache_key }) {
259 1208 50       4949 return wantarray ? @$entry : $entry->[0];
260             }
261              
262 32         127 my ($re, $assembler) = $self->_build_regex($type, $lang);
263 32         210095 $self->regex_cache->{ $cache_key } = [ $re, $assembler ];
264 32 50       206 return wantarray ? ( $re, $assembler ) : $re;
265             }
266              
267             # Helper to return split_designator results
268             sub _split_designator_result {
269 620     620   5388 my $self = shift;
270 620         2912 my ($lang, $before, $des, $after, $matched_pattern) = @_;
271              
272 620         1199 my $des_std;
273 620 100       1442 if ($matched_pattern) {
274 462 100       5158 $des_std = $self->pattern_string_map_lang->{$lang}->{$matched_pattern} if $lang;
275 462   100     12716 $des_std ||= $self->pattern_string_map->{$matched_pattern};
276 462 100       1137 if ($des_std) {
277             # Always coalesce spaces and delete commas from $des_std
278 442         1081 $des_std =~ s/,+/ /g;
279 442         1199 $des_std =~ s/\s\s+/ /g;
280             }
281             }
282              
283             # Legacy interface - return a simple before / des / after tuple, plus $des_std
284 620 100 66     1622 return map { defined $_ && ! ref $_ ? NFC($_) : '' } ($before, $des, $after, $des_std)
  840 100       6065  
285             if wantarray;
286              
287             # New scalar-context interface - return SplitResult object
288 410 100 100     12895 Business::CompanyDesignator::SplitResult->new(
      100        
      100        
      100        
289             before => NFC($before // ''),
290             designator => NFC($des // ''),
291             designator_std => NFC($des_std // ''),
292             after => NFC($after // ''),
293             records => [ $des_std ? $self->records(NFC $des_std) : () ],
294             );
295             }
296              
297             # Split $company_name on (the first) company designator, returning a triplet of strings:
298             # ($before, $designator, $after), plus the normalised form of the designator. If no
299             # designator is found, just returns ($company_name).
300             # e.g. matching "ABC Pty Ltd" would return "Pty Ltd" for $designator, but "Pty. Ltd." for
301             # the normalised form, and "Accessoires XYZ Ltee" would return "Ltee" for $designator,
302             # but "Ltée" for the normalised form
303             sub split_designator {
304 620     620 1 170989 my $self = shift;
305 620         2118 my ($company_name, %arg) = @_;
306 620         1281 my $lang = $arg{lang};
307 620         1327 my $allow_embedded = $arg{allow_embedded};
308 620   100     2754 $allow_embedded //= 1; # backwards-compatibility, unfortunately
309 620         4185 my $company_name_match = NFD($company_name);
310              
311             # Handle older perls without XPosixPunct
312 620 50       1238 state $punct_class = eval { '.' =~ m/\p{XPosixPunct}/ } ? '[\s\p{XPosixPunct}]' : '[\s[:punct:]]';
  4         37  
313              
314 620         1908 my ($re, $assembler) = $self->regex('end', $lang);
315 620         1727 my ($lead_re, $lead_assembler) = $self->regex('begin', $lang);
316              
317 620 100       1595 if ($re) {
318             # Designators are usually final, so try that first
319 480 100 100     174081 if ($company_name_match =~ m/^\s*(.*?)${punct_class}\s*($re)\s*$/) {
    100 100        
    100          
320 371         670673 return $self->_split_designator_result($lang, $1, $2, undef, $assembler->source($^R));
321             }
322             # Not final - check for a lead designator instead (e.g. RU, NL, etc.)
323             elsif ($lead_re && $company_name_match =~ m/^\s*($lead_re)${punct_class}\s*(.*?)\s*$/) {
324 49         107393 return $self->_split_designator_result($lang, undef, $1, $2, $lead_assembler->source($^R));
325             }
326             # Not final - check for an embedded designator with trailing content
327             elsif ($allow_embedded && $company_name_match =~ m/(.*?)${punct_class}\s*($re)(?:\s+(.*?))?$/) {
328 42         167660 return $self->_split_designator_result($lang, $1, $2, $3, $assembler->source($^R));
329             }
330             }
331              
332             # No match - return $company_name unchanged
333 158         52835 return $self->_split_designator_result($lang, $company_name);
334             }
335              
336             1;
337              
338             __END__
339              
340             =encoding utf-8
341              
342             =head1 NAME
343              
344             Business::CompanyDesignator - module for matching and stripping/manipulating the
345             company designators appended to company names
346              
347             =head1 VERSION
348              
349             Version: 0.13.
350              
351             This module is considered a B<BETA> release. Interfaces may change and/or break
352             without notice until the module reaches version 1.0.
353              
354             =head1 SYNOPSIS
355              
356             Business::CompanyDesignator is a perl module for matching and stripping/manipulating
357             the typical company designators appended (or sometimes, prepended) to company names.
358             It supports both long forms (e.g. Corporation, Incorporated, Limited etc.) and
359             abbreviations (e.g. Corp., Inc., Ltd., GmbH etc).
360              
361             use Business::CompanyDesignator;
362              
363             # Constructor
364             $bcd = Business::CompanyDesignator->new;
365             # Optionally, you can provide your own company_designator.yml file, instead of the bundled one
366             $bcd = Business::CompanyDesignator->new(datafile => '/path/to/company_designator.yml');
367              
368             # Get lists of designators, which may be long (e.g. Limited) or abbreviations (e.g. Ltd.)
369             @des = $bcd->designators;
370             @long = $bcd->long_designators;
371             @abbrev = $bcd->abbreviations;
372              
373             # Lookup individual designator records (returns B::CD::Record objects)
374             # Lookup record by long designator (unique)
375             $record = $bcd->record($long_designator);
376             # Lookup records by abbreviation or long designator (may not be unique)
377             @records = $bcd->records($designator);
378              
379             # Get a regex for matching designators by type ('end'/'begin') and lang
380             # By default, returns 'end' regexes for all languages
381             $re = $bcd->regex;
382             $company_name =~ $re and say 'designator found!';
383             $company_name =~ /$re\s*$/ and say 'final designator found!';
384             my $re_begin_en = $bcd->regex('begin', 'en');
385              
386             # Split $company_name on designator, returning a ($before, $designator, $after) triplet,
387             # plus the normalised form of the designator matched (can pass to records(), for example)
388             ($before, $des, $after, $normalised_des) = $bcd->split_designator($company_name);
389              
390             # Or in scalar context, return a L<Business::CompanyDesignator::SplitResult> object
391             $res = $bcd->split_designator($company_name, lang => 'en');
392             print join ' / ', $res->designator_std, $res->short_name, $res->extra;
393              
394              
395             =head1 DATASET
396              
397             Business::CompanyDesignator uses the company designator dataset from here:
398              
399             L<https://github.com/ProfoundNetworks/company_designator>
400              
401             which is bundled with the module. You can use your own (updated or custom)
402             version, if you prefer, by passing a 'datafile' parameter to the constructor.
403              
404             The dataset defines multiple long form designators (like "Company", "Limited",
405             or "Incorporée"), each of which have zero or more abbreviations (e.g. 'Co.',
406             'Ltd.', 'Inc.' etc.), and one or more language codes. The 'Company' entry,
407             for instance, looks like this:
408              
409             Company:
410             abbr:
411             - Co.
412             - '& Co.'
413             - and Co.
414             lang: en
415              
416             Long designators are unique across the dataset, but abbreviations are not
417             e.g. 'Inc.' is used for both "Incorporated" and French "Incorporée".
418              
419             =head1 METHODS
420              
421             =head2 new()
422              
423             Creates a Business::CompanyDesignator object.
424              
425             $bcd = Business::CompanyDesignator->new;
426              
427             By default this uses the bundled company_designator dataset. You may
428             provide your own (updated or custom) version by passing via a 'datafile'
429             parameter to the constructor.
430              
431             $bcd = Business::CompanyDesignator->new(datafile => '/path/to/company_designator.yml');
432              
433             =head2 designators()
434              
435             Returns the full list of company designator strings from the dataset
436             (both long form and abbreviations).
437              
438             @designators = $bcd->designators;
439              
440             =head2 long_designators()
441              
442             Returns the full list of long form designators from the dataset.
443              
444             @long = $bcd->long_designators;
445              
446             =head2 abbreviations()
447              
448             Returns the full list of abbreviation designators from the dataset.
449              
450             @abbrev = $bcd->abbreviations;
451              
452             =head2 record($long_designator)
453              
454             Returns the Business::CompanyDesignator::Record object for the given
455             long designator (and dies if not found).
456              
457             =head2 records($designator)
458              
459             Returns a list of Business::CompanyDesignator::Record objects for the
460             given abbreviation or long designator (for long designators there will
461             only be a single record returned, but abbreviations may map to multiple
462             records).
463              
464             Use this method for abbreviations, or if you're aren't sure of a
465             designator's type.
466              
467             =head2 regex([$type], [$lang])
468              
469             Returns a regex for all matching designators for $type ('begin'/'end') and
470             $lang (iso 639-1 language code e.g. 'en', 'es', de', etc.) from the dataset.
471             $lang may be either a single language code scalar, or an arrayref of language
472             codes, for multiple alternative languages. The returned regex is case-insensitive
473             and non-anchored.
474              
475             $type defaults to 'end', so without parameters regex() returns a regex
476             matching all designators for all languages.
477              
478             =head2 split_designator($company_name, [lang => $lang], [allow_embedded => $bool])
479              
480             Attempts to split $company_name on (the first) company designator found.
481              
482             In array context split_designator returns a list of four items - a triplet of
483             strings from $company_name ( $before, $designator, $after ), plus the
484             standardised version of the designator as a fourth element.
485              
486             ($short_name, $des, $after_text, $des_std) = $bcd->split_designator($company_name);
487              
488             In scalar context split_designator returns a L<Business::CompanyDesignator::SplitResult>
489             object.
490              
491             $res = $bcd->split_designator($company_name, lang => $lang);
492              
493             The $des designator in array context, and the SplitResult $res->designator
494             is the designator text as it matched in $company_name, while the array context
495             $des_std, and the SplitResult $res->designator_std is the standardised version
496             as found in the dataset.
497              
498             For instance, "ABC Pty Ltd" would return "Pty Ltd" as the $designator, but
499             "Pty. Ltd." as the stardardised form, and the latter would be what you
500             would find in designators() or would lookup with records(). Similarly,
501             "Accessoires XYZ Ltee" (without the french acute) would match, returning
502             "Ltee" (as found) for the $designator, but "Ltée" (with the acute) as the
503             standardised form.
504              
505             split_designator accepts the following optional (named) parameters:
506              
507             =over 4
508              
509             =item lang => $lang
510              
511             $lang can be a scalar ISO 639-1 language code ('en', 'fr', 'cn', etc.), or an
512             arrayref containing multiple language codes. If $lang is defined, split_designator
513             will only match designators for the specified set of languages, which can improve
514             the accuracy of the split by reducing false positive matches.
515              
516             =item allow_embedded => $boolean
517              
518             allow_embedded is a boolean indicating whether or not designators can occur in
519             the middle of strings, instead of only at the beginning or end. Defaults to true,
520             for backwards compatibility, which yields more matches, but also more false
521             positives. Setting to false is safer, but yields fewer matches (and embedded
522             designators do occur surprisingly often in the wild.)
523              
524             For more discussion, see L<AMBIGUITIES> below.
525              
526             =back
527              
528             =head2 AMBIGUITIES
529              
530             Note that split_designator does not always get the split right. It checks for
531             final designators first, then leading ones, and then finally looks for embedded
532             designators (if allow_embedded is set to true).
533              
534             Leading and trailing designators are usually reasonably accurate, but embedded
535             designators are problematic. For instance, embedded designators allow names like
536             these to split correctly:
537              
538             Amerihealth Insurance Company of NJ
539             Trenkwalder Personal AG Schweiz
540             Vicente Campano S L (COMERCIAL VICAM)
541             Gvozdika, gostinitsa OOO ""Eko-Treyd""
542              
543             but it will also wrongly split names like the following:
544              
545             XYZ PC Repairs ('PC' is a designator meaning 'Professional Corporation')
546             Dr S L Ledingham ('S L' is a Spanish designator for 'Sociedad Limitada')
547              
548             If you do want to allow splitting on embedded designators, you might want to pass
549             a 'lang' parameter to split_designator if you know the language(s) used for your
550             company names, as this will reduce the number of false positives by restricting the
551             set of designators matched against. It won't eliminate the issue altogether though,
552             so some post-processing might be required. (And I'd love to hear of ideas on how
553             to improve this.)
554              
555             =head1 SEE ALSO
556              
557             Finance::CompanyNames
558              
559             =head1 AUTHOR
560              
561             Gavin Carr <gavin@profound.net>
562              
563             =head1 COPYRIGHT AND LICENCE
564              
565             Copyright (C) 2013-2016 Gavin Carr
566              
567             This library is free software; you can redistribute it and/or modify it
568             under the same terms as Perl itself.
569              
570             =cut