File Coverage

blib/lib/LaTeX/ToUnicode.pm
Criterion Covered Total %
statement 68 68 100.0
branch 7 10 70.0
condition n/a
subroutine 13 13 100.0
pod 1 1 100.0
total 89 92 96.7


line stmt bran cond sub pod time code
1 1     1   72220 use strict;
  1         3  
  1         28  
2 1     1   5 use warnings;
  1         2  
  1         41  
3             package LaTeX::ToUnicode;
4             BEGIN {
5 1     1   75 $LaTeX::ToUnicode::VERSION = '0.11';
6             }
7             #ABSTRACT: Convert LaTeX commands to Unicode (simplistically)
8              
9              
10             require Exporter;
11             our @ISA = qw(Exporter);
12             our @EXPORT_OK = qw( convert );
13              
14 1     1   7 use utf8;
  1         2  
  1         5  
15 1     1   520 use LaTeX::ToUnicode::Tables;
  1         4  
  1         842  
16              
17              
18             sub convert {
19 102     102 1 55942 my ( $string, %options ) = @_;
20 102         224 $string = _convert_commands( $string );
21 102         219 $string = _convert_accents( $string );
22 102 100       250 $string = _convert_german( $string ) if $options{german};
23 102         200 $string = _convert_symbols( $string );
24 102         202 $string = _convert_specials( $string );
25 102         197 $string = _convert_ligatures( $string );
26 102         213 $string = _convert_markups( $string );
27 102         220 $string =~ s/{(\w*)}/$1/g;
28 102         415 $string;
29             }
30              
31             sub _convert_commands {
32 102     102   169 my $string = shift;
33              
34 102         409 foreach my $command ( keys %LaTeX::ToUnicode::Tables::COMMANDS ) {
35 1632         11175 $string =~ s/\{\\$command\}/$LaTeX::ToUnicode::Tables::COMMANDS{$command}/g;
36 1632         14167 $string =~ s/\\$command(?=\s|\b)/$LaTeX::ToUnicode::Tables::COMMANDS{$command}/g;
37             }
38              
39 102         323 $string;
40             }
41              
42             sub _convert_accents {
43 102     102   177 my $string = shift;
44 102 50       322 $string =~ s/(\{\\(.)\{(\\?\w{1,2})\}\})/$LaTeX::ToUnicode::Tables::ACCENTS{$2}{$3} || $1/eg; # {\"{a}}
  29         223  
45 102 100       441 $string =~ s/(\{\\(.)(\\?\w{1,2})\})/$LaTeX::ToUnicode::Tables::ACCENTS{$2}{$3} || $1/eg; # {\"a}
  47         337  
46 102 50       266 $string =~ s/(\\(.)(\\?\w{1,2}))/$LaTeX::ToUnicode::Tables::ACCENTS{$2}{$3} || $1/eg; # \"a
  17         136  
47 102 50       177 $string =~ s/(\\(.)\{(\\?\w{1,2})\})/$LaTeX::ToUnicode::Tables::ACCENTS{$2}{$3} || $1/eg; # \"{a}
  1         8  
48 102         256 $string;
49             }
50              
51             sub _convert_german {
52 3     3   5 my $string = shift;
53              
54 3         19 foreach my $symbol ( keys %LaTeX::ToUnicode::Tables::GERMAN ) {
55 87         526 $string =~ s/\Q$symbol\E/$LaTeX::ToUnicode::Tables::GERMAN{$symbol}/g;
56             }
57 3         11 $string;
58             }
59              
60             sub _convert_symbols {
61 102     102   149 my $string = shift;
62              
63 102         410 foreach my $symbol ( keys %LaTeX::ToUnicode::Tables::SYMBOLS ) {
64 2244         13486 $string =~ s/{\\$symbol}/$LaTeX::ToUnicode::Tables::SYMBOLS{$symbol}/g;
65 2244         14416 $string =~ s/\\$symbol\b/$LaTeX::ToUnicode::Tables::SYMBOLS{$symbol}/g;
66             }
67 102         307 $string;
68             }
69              
70             # Replace \ with .
71             sub _convert_specials {
72 102     102   165 my $string = shift;
73 102         294 my $specials = join( '|', @LaTeX::ToUnicode::Tables::SPECIALS );
74 102         312 my $pattern = qr/\\($specials)/o;
75 102         415 $string =~ s/$pattern/$1/g;
76 102         185 $string =~ s/\\\$/\$/g;
77 102         257 $string;
78             }
79              
80             sub _convert_ligatures {
81 102     102   148 my $string = shift;
82              
83             # have to convert these in order specified.
84 102         355 my @ligs = @LaTeX::ToUnicode::Tables::LIGATURES;
85 102         254 for (my $i = 0; $i < @ligs; $i+=2) {
86 816         1404 my $in = $ligs[$i];
87 816         1095 my $out = $ligs[$i+1];
88 816         5592 $string =~ s/\Q$in\E/$out/g;
89             }
90 102         330 $string;
91             }
92              
93             #
94             sub _convert_markups {
95 102     102   159 my $string = shift;
96 102         132 my $orig_string = $string;
97            
98 102         253 my $markups = join( '|', @LaTeX::ToUnicode::Tables::MARKUPS );
99            
100             # Remove \textMARKUP{...}, leaving just the {...}
101 102         329 $string =~ s/\\text($markups)\b\s*//g;
102              
103             # Remove braces and \command in: {... \command ...}
104 102         341 $string =~ s/(\{[^{}]+)\\(?:$markups)\s+([^{}]+\})/$1$2/g;
105             #
106             # Remove braces and \command in: {\command ...}
107 102         335 $string =~ s/\{\\(?:$markups)\s+([^{}]*)\}/$1/g;
108             #
109             # Remove: {\command
110             # Although this will leave unmatched } chars behind, there's no
111             # alternative without full parsing, since the bib entry will often
112             # look like: {\em {The TeX{}book}}. Also might, in principle, be
113             # at the end of a line.
114 102         233 $string =~ s/\{\\(?:$markups)\b\s*//g;
115              
116             # Ultimately we remove all braces in ltx2crossrefxml SanitizeText fns,
117             # so the unmatched braces don't matter ... that code should be moved here.
118              
119 102         234 $string;
120             }
121              
122             1;
123              
124             __END__