File Coverage

lib/Table/BoxFormat/Unicode/CharClasses.pm
Criterion Covered Total %
statement 32 32 100.0
branch n/a
condition n/a
subroutine 10 10 100.0
pod 3 3 100.0
total 45 45 100.0


line stmt bran cond sub pod time code
1             package Table::BoxFormat::Unicode::CharClasses;
2             # doom@kzsu.stanford.edu
3             # 22 Dec 2016
4              
5              
6             =head1 NAME
7              
8             Table::BoxFormat::Unicode::CharClasses - character classes to work with db SELECT result formats
9              
10             =head1 SYNOPSIS
11              
12             use Table::BoxFormat::Unicode::CharClasses ':all';
13              
14             $horizontal_dashes_plus_crosses_or_whitespace =
15             qr{ ^
16             [ \p{IsHor} \s ] +
17             $ }x;
18              
19             $cross_character =
20             qr{
21             \p{IsCross}
22             {1,1} # just one
23             }xms;
24              
25             $column_separator =
26             qr{
27             \s+ # require leading whitespace
28             \p{IsDelim}
29             {1,1} # just one
30             \s+ # require trailing whitespace
31             }xms;
32              
33              
34             =head1 DESCRIPTION
35              
36             Table::BoxFormat::Unicode::CharClasses, contains a number of
37             pre-defined character classes to assist in writing regular
38             expressions to match elements of typical database SELECT result
39             formats (see: L).
40              
41             =head2 EXPORT
42              
43             None by default, ':all' for all.
44              
45             =cut
46              
47 8     8   73880 use 5.10.0;
  8         39  
48 8     8   56 use strict;
  8         17  
  8         160  
49 8     8   39 use warnings;
  8         16  
  8         176  
50 8     8   562 use utf8;
  8         30  
  8         36  
51             my $DEBUG = 1;
52 8     8   230 use Carp;
  8         16  
  8         381  
53 8     8   43 use Data::Dumper;
  8         16  
  8         1126  
54              
55             our (@ISA, @EXPORT_OK, %EXPORT_TAGS, @EXPORT);
56             BEGIN {
57 8     8   50 require Exporter;
58 8         134 @ISA = qw(Exporter);
59 8         41 %EXPORT_TAGS = ( 'all' => [
60             qw(
61             IsHor
62             IsCross
63             IsDelim
64             ) ] );
65 8         16 @EXPORT_OK = ( @{ $EXPORT_TAGS{'all'} } );
  8         26  
66 8         1511 @EXPORT = qw( ); # items to export into callers namespace by default (avoid this!)
67             }
68              
69             our $VERSION = '0.01';
70              
71              
72             =head2 regexp properties
73              
74             Definitons of some custom regexp character properties that might
75             be useful for projects such as L, that work with
76             the tabular text formats used by database monitors to display
77             select results.
78              
79             =over
80              
81             =cut
82              
83             =item IsHor
84              
85             Matches characters found in a "horizontal rule" row.
86              
87             =cut
88              
89             # defining character properties for regexp defaults
90             sub IsHor {
91 8     8 1 6997 my @codepoints =
92             ('002D', # - \N{HYPHEN-MINUS}
93             '002B', # + \N{PLUS SIGN}
94             '2500', # ─ \N{BOX DRAWINGS LIGHT HORIZONTAL}
95             '253C', # ┼ \N{BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL}
96             );
97 8         36 my $list = join "\n", @codepoints;
98 8         27 return $list;
99             }
100              
101             =item IsCross
102              
103             Matches the "cross" characters used at line intersections.
104              
105             =cut
106              
107             sub IsCross {
108 10     10 1 1503 my @codepoints =
109             (
110             '002B', # + \N{PLUS SIGN}
111             '253C', # ┼ \N{BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL}
112             );
113 10         34 my $list = join "\n", @codepoints;
114 10         32 return $list;
115             }
116              
117             =item IsDelim
118              
119             Matches the delimeter/separator characters used on column boundaries.
120              
121             =cut
122              
123             sub IsDelim {
124 2     2 1 395 my @codepoints =
125             (
126             '007C', # | \N{VERTICAL LINE}
127             '2502', # │ \N{BOX DRAWINGS LIGHT VERTICAL}
128             );
129 2         6 my $list = join "\n", @codepoints;
130 2         7 return $list;
131             }
132              
133              
134              
135             1;
136              
137             =head1 NOTES
138              
139             =head2 about characters used in the above classes
140              
141             =head3 unicode characters
142              
143             the unicode psql format uses these three characters:
144              
145             uniprops U+2502
146             U+2502 ‹│› \N{BOX DRAWINGS LIGHT VERTICAL}
147             \pS \p{So}
148             All Any Assigned InBoxDrawing Box_Drawing Common Zyyy So S Gr_Base
149             Grapheme_Base Graph GrBase Other_Symbol Pat_Syn Pattern_Syntax PatSyn
150             Print Symbol Unicode X_POSIX_Graph X_POSIX_Print
151              
152             uniprops U+2500
153             U+2500 ‹─› \N{BOX DRAWINGS LIGHT HORIZONTAL}
154             \pS \p{So}
155             All Any Assigned InBoxDrawing Box_Drawing Common Zyyy So S Gr_Base
156             Grapheme_Base Graph GrBase Other_Symbol Pat_Syn Pattern_Syntax PatSyn
157             Print Symbol Unicode X_POSIX_Graph X_POSIX_Print
158              
159             uniprops U+253c
160             U+253C ‹┼› \N{BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL}
161             \pS \p{So}
162             All Any Assigned InBoxDrawing Box_Drawing Common Zyyy So S Gr_Base
163             Grapheme_Base Graph GrBase Other_Symbol Pat_Syn Pattern_Syntax PatSyn
164             Print Symbol Unicode X_POSIX_Graph X_POSIX_Print
165              
166              
167             =head3 delimiter characters
168              
169             Either of these two characters may be data delimiters,
170             the ascii vertical bar or the unicode "BOX DRAWINGS LIGHT VERTICAL":
171              
172             |│
173              
174              
175             =head1 SEE ALSO
176              
177             L
178             L
179              
180             =head1 AUTHOR
181              
182             Joseph Brenner, Edoom@kzsu.stanford.eduE
183              
184             =head1 COPYRIGHT AND LICENSE
185              
186             Copyright (C) 2016 by Joseph Brenner
187              
188             This program is free software; you can redistribute it and/or modify it
189             under the terms of either: the GNU General Public License as published
190             by the Free Software Foundation; or the Artistic License.
191              
192             See http://dev.perl.org/licenses/ for more information.
193              
194             =cut