| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package DMS::XS::Parser;
|
|
2
|
|
|
|
|
|
|
# XS parser — thin Perl shim over the C DMS parser.
|
|
3
|
|
|
|
|
|
|
#
|
|
4
|
|
|
|
|
|
|
# Public API mirrors DMS::Parser (pure Perl) so the two backends are drop-in
|
|
5
|
|
|
|
|
|
|
# interchangeable. SPEC v0.14 names:
|
|
6
|
|
|
|
|
|
|
#
|
|
7
|
|
|
|
|
|
|
# decode($src) -> body
|
|
8
|
|
|
|
|
|
|
# decode_document($src) -> { meta, body, comments, original_forms }
|
|
9
|
|
|
|
|
|
|
# encode($doc) -> DMS source
|
|
10
|
|
|
|
|
|
|
# encode_lite($doc) -> canonical DMS source
|
|
11
|
|
|
|
|
|
|
#
|
|
12
|
|
|
|
|
|
|
# Old names (parse, parse_document, to_dms, to_dms_lite, ...) remain as
|
|
13
|
|
|
|
|
|
|
# deprecated aliases for one release; they emit a one-time Carp warning
|
|
14
|
|
|
|
|
|
|
# and forward to the new canonical sub.
|
|
15
|
|
|
|
|
|
|
#
|
|
16
|
|
|
|
|
|
|
# Value types returned follow the same conventions as the pure-Perl parser:
|
|
17
|
|
|
|
|
|
|
# strings are unblessed Perl scalars; booleans, integers, floats, and
|
|
18
|
|
|
|
|
|
|
# date/time values are blessed into the DMS::* sentinel classes defined by
|
|
19
|
|
|
|
|
|
|
# DMS::Parser. Maps are Tie::IxHash-tied hashrefs; lists are arrayrefs.
|
|
20
|
|
|
|
|
|
|
|
|
21
|
3
|
|
|
3
|
|
355087
|
use strict;
|
|
|
3
|
|
|
|
|
8
|
|
|
|
3
|
|
|
|
|
106
|
|
|
22
|
3
|
|
|
3
|
|
16
|
use warnings;
|
|
|
3
|
|
|
|
|
18
|
|
|
|
3
|
|
|
|
|
186
|
|
|
23
|
3
|
|
|
3
|
|
17
|
use Carp ();
|
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
323
|
|
|
24
|
|
|
|
|
|
|
# Tie::IxHash is loaded lazily by Parser.xs on first full-mode parse
|
|
25
|
|
|
|
|
|
|
# (via load_module inside new_ixhash_fast). Lite-mode-only callers
|
|
26
|
|
|
|
|
|
|
# never trigger that load and skip the ~7 ms Tie::IxHash.pm parse.
|
|
27
|
|
|
|
|
|
|
# Full-mode users get the same Document shape as before — Tie::IxHash
|
|
28
|
|
|
|
|
|
|
# methods are defined by the time `parse_document` returns the tied
|
|
29
|
|
|
|
|
|
|
# hash, so `keys %$h` etc. work normally.
|
|
30
|
|
|
|
|
|
|
|
|
31
|
|
|
|
|
|
|
our $VERSION = '0.3.0';
|
|
32
|
|
|
|
|
|
|
|
|
33
|
|
|
|
|
|
|
# Capability flag — this port ships lite-mode decode + lite-mode encode_lite.
|
|
34
|
|
|
|
|
|
|
# See SPEC §Parsing modes — full and lite.
|
|
35
|
|
|
|
|
|
|
our $SUPPORTS_LITE_MODE = 1;
|
|
36
|
|
|
|
|
|
|
|
|
37
|
|
|
|
|
|
|
# Capability flag — this port ships unordered-table parse mode.
|
|
38
|
|
|
|
|
|
|
# See SPEC §Unordered tables.
|
|
39
|
|
|
|
|
|
|
our $SUPPORTS_IGNORE_ORDER = 1;
|
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
require XSLoader;
|
|
42
|
|
|
|
|
|
|
XSLoader::load('DMS::XS::Parser', $VERSION);
|
|
43
|
|
|
|
|
|
|
|
|
44
|
|
|
|
|
|
|
# Capture the XSUBs under private aliases so we can redefine the public
|
|
45
|
|
|
|
|
|
|
# `parse_*` names as deprecated Carp::carp wrappers below. The XSUBs
|
|
46
|
|
|
|
|
|
|
# themselves are bound under their original names by the XS module
|
|
47
|
|
|
|
|
|
|
# (renaming them at the C level would require a recompile and break
|
|
48
|
|
|
|
|
|
|
# any old DLL on disk); aliasing into `_xsub_*` lets us reach them
|
|
49
|
|
|
|
|
|
|
# without recursion. SPEC v0.14 rename: parse_* → decode_*.
|
|
50
|
|
|
|
|
|
|
{
|
|
51
|
3
|
|
|
3
|
|
21
|
no strict 'refs';
|
|
|
3
|
|
|
|
|
11
|
|
|
|
3
|
|
|
|
|
384
|
|
|
52
|
|
|
|
|
|
|
*_xsub_parse_document = \&parse_document;
|
|
53
|
|
|
|
|
|
|
*_xsub_parse_document_lite = \&parse_document_lite;
|
|
54
|
|
|
|
|
|
|
}
|
|
55
|
|
|
|
|
|
|
|
|
56
|
|
|
|
|
|
|
# Sentinel classes. These mirror DMS::Parser's (pure-Perl) classes so that
|
|
57
|
|
|
|
|
|
|
# encoders and tests work against both backends unchanged. We define them
|
|
58
|
|
|
|
|
|
|
# only if pure-Perl DMS::Parser hasn't already been loaded — otherwise we
|
|
59
|
|
|
|
|
|
|
# inherit its definitions and stay compatible.
|
|
60
|
|
|
|
|
|
|
sub _ensure_classes {
|
|
61
|
3
|
50
|
|
3
|
|
17
|
return if defined &DMS::LocalDate::new;
|
|
62
|
3
|
|
|
3
|
|
24
|
no strict 'refs';
|
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
6688
|
|
|
63
|
|
|
|
|
|
|
# Typed sentinels are blessed scalar refs (one alloc per value instead
|
|
64
|
|
|
|
|
|
|
# of the three in a blessed-hash shape). Matches the pure-Perl parser.
|
|
65
|
3
|
|
|
|
|
9
|
for my $cls (qw(DMS::LocalDate DMS::LocalTime DMS::LocalDateTime
|
|
66
|
|
|
|
|
|
|
DMS::OffsetDateTime)) {
|
|
67
|
12
|
|
|
0
|
|
65
|
*{"${cls}::new"} = sub { my $v = "$_[1]"; bless \$v, $_[0] };
|
|
|
12
|
|
|
|
|
158
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
68
|
12
|
|
|
0
|
|
39
|
*{"${cls}::value"} = sub { ${ $_[0] } };
|
|
|
12
|
|
|
|
|
47
|
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
69
|
|
|
|
|
|
|
}
|
|
70
|
3
|
|
|
0
|
|
27
|
*DMS::Float::new = sub { my $v = 0 + $_[1]; bless \$v, $_[0] };
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
71
|
3
|
|
|
0
|
|
13
|
*DMS::Float::value = sub { ${ $_[0] } };
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
72
|
3
|
|
|
3
|
|
14
|
*DMS::Integer::new = sub { my $v = 0 + $_[1]; bless \$v, $_[0] };
|
|
|
3
|
|
|
|
|
95
|
|
|
|
3
|
|
|
|
|
27
|
|
|
73
|
3
|
|
|
0
|
|
11
|
*DMS::Integer::value = sub { $_[0] };
|
|
|
0
|
|
|
|
|
0
|
|
|
74
|
3
|
|
|
33
|
|
10
|
*DMS::Integer::bstr = sub { "${ $_[0] }" }; # force stringification
|
|
|
33
|
|
|
|
|
39
|
|
|
|
33
|
|
|
|
|
96
|
|
|
75
|
3
|
|
|
0
|
|
10
|
*DMS::Integer::is_neg = sub { ${ $_[0] } < 0 };
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
76
|
3
|
0
|
|
0
|
|
17
|
*DMS::Bool::new = sub { my $v = $_[1]?1:0; bless \$v, $_[0] };
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
77
|
3
|
|
|
0
|
|
24
|
*DMS::Bool::value = sub { ${ $_[0] } };
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
78
|
|
|
|
|
|
|
# Path-segment marker for list-index breadcrumb steps in the
|
|
79
|
|
|
|
|
|
|
# attached-comment AST. String keys remain plain scalars.
|
|
80
|
3
|
|
|
6
|
|
29
|
*DMS::Index::new = sub { my $v = 0 + $_[1]; bless \$v, $_[0] };
|
|
|
6
|
|
|
|
|
7
|
|
|
|
6
|
|
|
|
|
12
|
|
|
81
|
3
|
|
|
1
|
|
25
|
*DMS::Index::value = sub { ${ $_[0] } };
|
|
|
1
|
|
|
|
|
3271
|
|
|
|
1
|
|
|
|
|
9
|
|
|
82
|
|
|
|
|
|
|
# SPEC §"Unordered tables": marker class for body tables produced by
|
|
83
|
|
|
|
|
|
|
# the *_unordered entry points. Underlying storage is a plain Perl
|
|
84
|
|
|
|
|
|
|
# hashref (no Tie::IxHash). `to_dms` (full mode) refuses to round-trip
|
|
85
|
|
|
|
|
|
|
# a Document containing this variant; `to_dms_lite` accepts it.
|
|
86
|
|
|
|
|
|
|
*DMS::UnorderedTable::new = sub {
|
|
87
|
0
|
|
|
0
|
|
0
|
my ($class, $h) = @_;
|
|
88
|
0
|
0
|
|
|
|
0
|
$h = {} unless defined $h;
|
|
89
|
0
|
|
|
|
|
0
|
return bless $h, $class;
|
|
90
|
3
|
|
|
|
|
23
|
};
|
|
91
|
|
|
|
|
|
|
}
|
|
92
|
|
|
|
|
|
|
_ensure_classes();
|
|
93
|
|
|
|
|
|
|
|
|
94
|
|
|
|
|
|
|
package DMS::XS::Parser;
|
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
# SPEC §Decode/Encode (v0.14): canonical entry point. Returns the body
|
|
97
|
|
|
|
|
|
|
# only — meta and comments are dropped. Use decode_document() to keep
|
|
98
|
|
|
|
|
|
|
# them.
|
|
99
|
|
|
|
|
|
|
sub decode {
|
|
100
|
2
|
|
|
2
|
0
|
5442
|
my ($src) = @_;
|
|
101
|
2
|
|
|
|
|
7
|
my $doc = decode_document($src);
|
|
102
|
2
|
|
|
|
|
10
|
return $doc->{body};
|
|
103
|
|
|
|
|
|
|
}
|
|
104
|
|
|
|
|
|
|
|
|
105
|
|
|
|
|
|
|
# SPEC §Parsing modes — full and lite. Body-only lite decode.
|
|
106
|
|
|
|
|
|
|
sub decode_lite {
|
|
107
|
0
|
|
|
0
|
0
|
0
|
my ($src) = @_;
|
|
108
|
0
|
|
|
|
|
0
|
return decode_document_lite($src)->{body};
|
|
109
|
|
|
|
|
|
|
}
|
|
110
|
|
|
|
|
|
|
|
|
111
|
|
|
|
|
|
|
# SPEC §Front-matter-only decode. Returns the FM table as a hashref
|
|
112
|
|
|
|
|
|
|
# (lite-mode shape — sidecar order list at "\0__dms_keys"), or undef
|
|
113
|
|
|
|
|
|
|
# when the document has no front matter at all. Body bytes after the
|
|
114
|
|
|
|
|
|
|
# closing `+++` are NOT tokenized; bad-body documents with valid FM
|
|
115
|
|
|
|
|
|
|
# succeed.
|
|
116
|
|
|
|
|
|
|
#
|
|
117
|
|
|
|
|
|
|
# Implementation: pre-scan the source in pure Perl to locate the FM
|
|
118
|
|
|
|
|
|
|
# block (or determine its absence), truncate the input to bytes 0..end-
|
|
119
|
|
|
|
|
|
|
# of-closing-`+++`-line, and hand the truncated buffer to the C parser
|
|
120
|
|
|
|
|
|
|
# (`parse_document_lite`). Diagnostics inside the FM block are byte-
|
|
121
|
|
|
|
|
|
|
# identical to a full decode because the leading bytes (and therefore
|
|
122
|
|
|
|
|
|
|
# every line / column inside the block) are unchanged.
|
|
123
|
|
|
|
|
|
|
sub decode_front_matter {
|
|
124
|
11
|
|
|
11
|
0
|
207719
|
my ($src) = @_;
|
|
125
|
11
|
|
|
|
|
35
|
my ($state, $close_end) = _scan_front_matter_bounds($src);
|
|
126
|
11
|
100
|
|
|
|
33
|
if ($state eq 'no_fm') {
|
|
127
|
3
|
|
|
|
|
9
|
return undef;
|
|
128
|
|
|
|
|
|
|
}
|
|
129
|
8
|
|
|
|
|
14
|
my $sub;
|
|
130
|
8
|
100
|
|
|
|
19
|
if ($state eq 'unterminated') {
|
|
131
|
|
|
|
|
|
|
# Hand the original source straight to the C parser; it will
|
|
132
|
|
|
|
|
|
|
# reach EOF inside the FM scan and raise the canonical
|
|
133
|
|
|
|
|
|
|
# "unterminated front matter" error.
|
|
134
|
1
|
|
|
|
|
2
|
$sub = $src;
|
|
135
|
|
|
|
|
|
|
} else { # 'fm'
|
|
136
|
|
|
|
|
|
|
# Truncate to end-of-closing-`+++`-line. The C parser then has
|
|
137
|
|
|
|
|
|
|
# the complete FM block and an empty body; no body bytes get
|
|
138
|
|
|
|
|
|
|
# tokenized, so body errors can't surface. Line/column numbers
|
|
139
|
|
|
|
|
|
|
# inside the FM are byte-identical to a full decode.
|
|
140
|
7
|
|
|
|
|
15
|
$sub = substr($src, 0, $close_end);
|
|
141
|
|
|
|
|
|
|
}
|
|
142
|
8
|
|
|
|
|
358
|
my $doc = _xsub_parse_document_lite($sub);
|
|
143
|
4
|
|
|
|
|
24
|
return $doc->{meta};
|
|
144
|
|
|
|
|
|
|
}
|
|
145
|
|
|
|
|
|
|
|
|
146
|
|
|
|
|
|
|
# Pre-scan to find the front matter delimiters. Returns one of:
|
|
147
|
|
|
|
|
|
|
# ('no_fm', undef) — no opening `+++` after trivia
|
|
148
|
|
|
|
|
|
|
# ('fm', $end_offset) — open + close found; $end_offset is
|
|
149
|
|
|
|
|
|
|
# the byte offset just past the EOL
|
|
150
|
|
|
|
|
|
|
# that ends the closing `+++` line
|
|
151
|
|
|
|
|
|
|
# ('unterminated',undef) — open found, no close
|
|
152
|
|
|
|
|
|
|
#
|
|
153
|
|
|
|
|
|
|
# Trivia recognized: blank lines (incl. CRLF), `# ...` line comments,
|
|
154
|
|
|
|
|
|
|
# `// ...` line comments, `### ... ###` block comments, `/* ... */`
|
|
155
|
|
|
|
|
|
|
# block comments. The scan only needs to be precise enough to locate
|
|
156
|
|
|
|
|
|
|
# `+++` reliably; it doesn't validate trivia content (the C parser
|
|
157
|
|
|
|
|
|
|
# will catch any malformed trivia when it re-scans the same prefix).
|
|
158
|
|
|
|
|
|
|
sub _scan_front_matter_bounds {
|
|
159
|
11
|
|
|
11
|
|
25
|
my ($src) = @_;
|
|
160
|
11
|
|
|
|
|
24
|
my $len = length($src);
|
|
161
|
11
|
|
|
|
|
17
|
my $i = 0;
|
|
162
|
11
|
|
|
|
|
37
|
while ($i < $len) {
|
|
163
|
11
|
|
|
|
|
31
|
my $c = substr($src, $i, 1);
|
|
164
|
|
|
|
|
|
|
# Inline whitespace.
|
|
165
|
11
|
50
|
33
|
|
|
67
|
if ($c eq ' ' || $c eq "\t") { $i++; next; }
|
|
|
0
|
|
|
|
|
0
|
|
|
|
0
|
|
|
|
|
0
|
|
|
166
|
|
|
|
|
|
|
# EOL.
|
|
167
|
11
|
100
|
|
|
|
30
|
if ($c eq "\n") { $i++; next; }
|
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
3
|
|
|
168
|
10
|
50
|
|
|
|
28
|
if ($c eq "\r") {
|
|
169
|
0
|
0
|
|
|
|
0
|
$i += (substr($src, $i, 2) eq "\r\n") ? 2 : 1;
|
|
170
|
0
|
|
|
|
|
0
|
next;
|
|
171
|
|
|
|
|
|
|
}
|
|
172
|
|
|
|
|
|
|
# `### ... ###` block comment.
|
|
173
|
10
|
50
|
|
|
|
29
|
if (substr($src, $i, 3) eq '###') {
|
|
174
|
0
|
|
|
|
|
0
|
my $end = index($src, "###", $i + 3);
|
|
175
|
0
|
0
|
|
|
|
0
|
return ('no_fm', undef) if $end < 0;
|
|
176
|
0
|
|
|
|
|
0
|
$i = $end + 3;
|
|
177
|
0
|
|
|
|
|
0
|
next;
|
|
178
|
|
|
|
|
|
|
}
|
|
179
|
|
|
|
|
|
|
# `# ...` line comment.
|
|
180
|
10
|
100
|
|
|
|
25
|
if ($c eq '#') {
|
|
181
|
1
|
|
|
|
|
40
|
my $nl = index($src, "\n", $i);
|
|
182
|
1
|
50
|
|
|
|
7
|
$i = $nl < 0 ? $len : $nl + 1;
|
|
183
|
1
|
|
|
|
|
4
|
next;
|
|
184
|
|
|
|
|
|
|
}
|
|
185
|
|
|
|
|
|
|
# `// ...` line comment.
|
|
186
|
9
|
50
|
33
|
|
|
27
|
if ($c eq '/' && substr($src, $i, 2) eq '//') {
|
|
187
|
0
|
|
|
|
|
0
|
my $nl = index($src, "\n", $i);
|
|
188
|
0
|
0
|
|
|
|
0
|
$i = $nl < 0 ? $len : $nl + 1;
|
|
189
|
0
|
|
|
|
|
0
|
next;
|
|
190
|
|
|
|
|
|
|
}
|
|
191
|
|
|
|
|
|
|
# `/* ... */` block comment.
|
|
192
|
9
|
50
|
33
|
|
|
23
|
if ($c eq '/' && substr($src, $i, 2) eq '/*') {
|
|
193
|
0
|
|
|
|
|
0
|
my $end = index($src, '*/', $i + 2);
|
|
194
|
0
|
0
|
|
|
|
0
|
return ('no_fm', undef) if $end < 0;
|
|
195
|
0
|
|
|
|
|
0
|
$i = $end + 2;
|
|
196
|
0
|
|
|
|
|
0
|
next;
|
|
197
|
|
|
|
|
|
|
}
|
|
198
|
9
|
|
|
|
|
19
|
last;
|
|
199
|
|
|
|
|
|
|
}
|
|
200
|
|
|
|
|
|
|
# Now check for `+++` opener on its own line.
|
|
201
|
11
|
100
|
|
|
|
37
|
return ('no_fm', undef) if $i + 3 > $len;
|
|
202
|
9
|
100
|
|
|
|
24
|
return ('no_fm', undef) if substr($src, $i, 3) ne '+++';
|
|
203
|
8
|
|
|
|
|
16
|
my $j = $i + 3;
|
|
204
|
|
|
|
|
|
|
# Optional trailing inline whitespace, then EOL or EOF.
|
|
205
|
8
|
|
|
|
|
20
|
while ($j < $len) {
|
|
206
|
8
|
|
|
|
|
17
|
my $c = substr($src, $j, 1);
|
|
207
|
8
|
50
|
33
|
|
|
35
|
last if $c ne ' ' && $c ne "\t";
|
|
208
|
0
|
|
|
|
|
0
|
$j++;
|
|
209
|
|
|
|
|
|
|
}
|
|
210
|
8
|
50
|
|
|
|
19
|
if ($j < $len) {
|
|
211
|
8
|
|
|
|
|
15
|
my $c = substr($src, $j, 1);
|
|
212
|
8
|
50
|
33
|
|
|
24
|
if ($c ne "\n" && $c ne "\r") {
|
|
213
|
|
|
|
|
|
|
# `+++` followed by other content on same line is not an
|
|
214
|
|
|
|
|
|
|
# opener (per SPEC §Front matter).
|
|
215
|
0
|
|
|
|
|
0
|
return ('no_fm', undef);
|
|
216
|
|
|
|
|
|
|
}
|
|
217
|
|
|
|
|
|
|
}
|
|
218
|
|
|
|
|
|
|
# Search for the closing `+++` line. Each candidate must be `+++`
|
|
219
|
|
|
|
|
|
|
# on its own line, optionally surrounded by inline whitespace.
|
|
220
|
|
|
|
|
|
|
# We walk line-by-line starting from `$j` (the EOL after the open).
|
|
221
|
8
|
|
|
|
|
13
|
my $p = $j;
|
|
222
|
8
|
|
|
|
|
19
|
while ($p < $len) {
|
|
223
|
|
|
|
|
|
|
# Skip the EOL we're sitting on.
|
|
224
|
17
|
|
|
|
|
54
|
my $c = substr($src, $p, 1);
|
|
225
|
17
|
50
|
|
|
|
39
|
if ($c eq "\n") { $p++; }
|
|
|
17
|
0
|
|
|
|
25
|
|
|
226
|
|
|
|
|
|
|
elsif ($c eq "\r") {
|
|
227
|
0
|
0
|
|
|
|
0
|
$p += (substr($src, $p, 2) eq "\r\n") ? 2 : 1;
|
|
228
|
|
|
|
|
|
|
}
|
|
229
|
17
|
100
|
|
|
|
40
|
last if $p >= $len;
|
|
230
|
|
|
|
|
|
|
# Find end of this line.
|
|
231
|
16
|
|
|
|
|
26
|
my $line_start = $p;
|
|
232
|
16
|
|
|
|
|
31
|
my $nl = index($src, "\n", $p);
|
|
233
|
16
|
50
|
|
|
|
34
|
my $line_end = $nl < 0 ? $len : $nl;
|
|
234
|
|
|
|
|
|
|
# If line_end - 1 is `\r`, the line proper ends one before.
|
|
235
|
16
|
|
|
|
|
39
|
my $line_end_no_cr = $line_end;
|
|
236
|
16
|
50
|
33
|
|
|
63
|
if ($line_end > $line_start
|
|
237
|
|
|
|
|
|
|
&& substr($src, $line_end - 1, 1) eq "\r") {
|
|
238
|
0
|
|
|
|
|
0
|
$line_end_no_cr = $line_end - 1;
|
|
239
|
|
|
|
|
|
|
}
|
|
240
|
16
|
|
|
|
|
36
|
my $line = substr($src, $line_start, $line_end_no_cr - $line_start);
|
|
241
|
16
|
|
|
|
|
27
|
my $trimmed = $line;
|
|
242
|
16
|
|
|
|
|
50
|
$trimmed =~ s/^[ \t]+//;
|
|
243
|
16
|
|
|
|
|
46
|
$trimmed =~ s/[ \t]+$//;
|
|
244
|
16
|
100
|
|
|
|
40
|
if ($trimmed eq '+++') {
|
|
245
|
|
|
|
|
|
|
# Include the EOL after the closing `+++` in the truncated
|
|
246
|
|
|
|
|
|
|
# range, so the C parser sees a complete line.
|
|
247
|
7
|
50
|
|
|
|
16
|
my $close_end = $nl < 0 ? $len : $nl + 1;
|
|
248
|
7
|
|
|
|
|
27
|
return ('fm', $close_end);
|
|
249
|
|
|
|
|
|
|
}
|
|
250
|
9
|
|
|
|
|
24
|
$p = $line_end; # advance to the EOL position
|
|
251
|
|
|
|
|
|
|
}
|
|
252
|
1
|
|
|
|
|
4
|
return ('unterminated', undef);
|
|
253
|
|
|
|
|
|
|
}
|
|
254
|
|
|
|
|
|
|
|
|
255
|
|
|
|
|
|
|
# decode_document and decode_document_lite forward to the XS-defined
|
|
256
|
|
|
|
|
|
|
# subs (still bound under their original names in Parser.xs, captured
|
|
257
|
|
|
|
|
|
|
# above as _xsub_parse_document / _xsub_parse_document_lite). The
|
|
258
|
|
|
|
|
|
|
# rename happens Perl-side so DMS-XS keeps loading any existing .dll
|
|
259
|
|
|
|
|
|
|
# without a rebuild.
|
|
260
|
39
|
|
|
39
|
0
|
433322
|
sub decode_document { goto &_xsub_parse_document }
|
|
261
|
0
|
|
|
0
|
0
|
0
|
sub decode_document_lite { goto &_xsub_parse_document_lite }
|
|
262
|
|
|
|
|
|
|
|
|
263
|
|
|
|
|
|
|
# Re-emit a parsed Document as DMS source. See SPEC §encode.
|
|
264
|
|
|
|
|
|
|
#
|
|
265
|
|
|
|
|
|
|
# Note: the underlying C parser does not yet record `original_forms`
|
|
266
|
|
|
|
|
|
|
# (integer-base / string-form preservation lives in the pure-Perl port).
|
|
267
|
|
|
|
|
|
|
# When `original_forms` is missing, the emitter falls back to defaults:
|
|
268
|
|
|
|
|
|
|
# integers render as canonical decimal, strings as basic-quoted. Comments
|
|
269
|
|
|
|
|
|
|
# and data structure are preserved via the C parser's existing comment AST.
|
|
270
|
|
|
|
|
|
|
sub encode {
|
|
271
|
19
|
|
|
19
|
0
|
7084
|
my ($doc) = @_;
|
|
272
|
19
|
|
|
|
|
1110
|
require DMS::Emitter;
|
|
273
|
19
|
|
|
|
|
62
|
return DMS::Emitter::encode($doc);
|
|
274
|
|
|
|
|
|
|
}
|
|
275
|
|
|
|
|
|
|
|
|
276
|
|
|
|
|
|
|
# Lite-mode emit: canonical DMS source — no comments, decimal integers,
|
|
277
|
|
|
|
|
|
|
# basic-quoted strings — ignoring any comments / original_forms in $doc.
|
|
278
|
|
|
|
|
|
|
# `decode(encode_lite($doc))` is data-equivalent to $doc; round-trip of
|
|
279
|
|
|
|
|
|
|
# comment + literal-form is *not* preserved. SPEC §encode.
|
|
280
|
|
|
|
|
|
|
#
|
|
281
|
|
|
|
|
|
|
# Implemented entirely in Perl (DMS::Emitter is shared between the
|
|
282
|
|
|
|
|
|
|
# pure-Perl and XS backends — same Document shape, same walk).
|
|
283
|
|
|
|
|
|
|
sub encode_lite {
|
|
284
|
2
|
|
|
2
|
0
|
1001
|
my ($doc) = @_;
|
|
285
|
|
|
|
|
|
|
# Fast path: C-side lite emitter walks the Perl tree and writes DMS
|
|
286
|
|
|
|
|
|
|
# bytes directly. Skips the per-kvpair Perl-VM trips of the pure-Perl
|
|
287
|
|
|
|
|
|
|
# Emitter. Falls back to the pure-Perl path when the XS function
|
|
288
|
|
|
|
|
|
|
# isn't available (older builds) or when the document contains a
|
|
289
|
|
|
|
|
|
|
# DMS::UnorderedTable that the C path doesn't yet handle specially
|
|
290
|
|
|
|
|
|
|
# — for the bench's normalized fixture, this is the path.
|
|
291
|
2
|
50
|
|
|
|
10
|
if (defined &encode_lite_xs) {
|
|
292
|
0
|
|
|
|
|
0
|
return encode_lite_xs($doc);
|
|
293
|
|
|
|
|
|
|
}
|
|
294
|
2
|
50
|
|
|
|
8
|
if (defined &to_dms_lite_xs) {
|
|
295
|
|
|
|
|
|
|
# Backward compat — pre-rebuild XS exposes the old XSUB name.
|
|
296
|
2
|
|
|
|
|
65
|
return to_dms_lite_xs($doc);
|
|
297
|
|
|
|
|
|
|
}
|
|
298
|
0
|
|
|
|
|
0
|
require DMS::Emitter;
|
|
299
|
0
|
|
|
|
|
0
|
return DMS::Emitter::encode_lite($doc);
|
|
300
|
|
|
|
|
|
|
}
|
|
301
|
|
|
|
|
|
|
|
|
302
|
|
|
|
|
|
|
# Deprecated aliases (SPEC v0.14: parse → decode, to_dms → encode).
|
|
303
|
|
|
|
|
|
|
# Removed in the next release. Each warns once per process via Carp.
|
|
304
|
|
|
|
|
|
|
{ my $warned;
|
|
305
|
|
|
|
|
|
|
sub parse {
|
|
306
|
1
|
50
|
|
1
|
0
|
10
|
unless ($warned++) {
|
|
307
|
1
|
|
|
|
|
247
|
Carp::carp(
|
|
308
|
|
|
|
|
|
|
'DMS::XS::Parser::parse() is deprecated; use decode() instead. '
|
|
309
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed parse() to decode().');
|
|
310
|
|
|
|
|
|
|
}
|
|
311
|
1
|
|
|
|
|
11
|
goto &decode;
|
|
312
|
|
|
|
|
|
|
}
|
|
313
|
|
|
|
|
|
|
}
|
|
314
|
|
|
|
|
|
|
{ my $warned;
|
|
315
|
|
|
|
|
|
|
sub parse_lite {
|
|
316
|
0
|
0
|
|
0
|
0
|
0
|
unless ($warned++) {
|
|
317
|
0
|
|
|
|
|
0
|
Carp::carp(
|
|
318
|
|
|
|
|
|
|
'DMS::XS::Parser::parse_lite() is deprecated; use decode_lite() instead. '
|
|
319
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed parse_lite() to decode_lite().');
|
|
320
|
|
|
|
|
|
|
}
|
|
321
|
0
|
|
|
|
|
0
|
goto &decode_lite;
|
|
322
|
|
|
|
|
|
|
}
|
|
323
|
|
|
|
|
|
|
}
|
|
324
|
|
|
|
|
|
|
{ my $warned;
|
|
325
|
|
|
|
|
|
|
sub to_dms {
|
|
326
|
1
|
50
|
|
1
|
0
|
82
|
unless ($warned++) {
|
|
327
|
1
|
|
|
|
|
143
|
Carp::carp(
|
|
328
|
|
|
|
|
|
|
'DMS::XS::Parser::to_dms() is deprecated; use encode() instead. '
|
|
329
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed to_dms() to encode().');
|
|
330
|
|
|
|
|
|
|
}
|
|
331
|
1
|
|
|
|
|
13
|
goto &encode;
|
|
332
|
|
|
|
|
|
|
}
|
|
333
|
|
|
|
|
|
|
}
|
|
334
|
|
|
|
|
|
|
{ my $warned;
|
|
335
|
|
|
|
|
|
|
sub to_dms_lite {
|
|
336
|
1
|
50
|
|
1
|
0
|
9
|
unless ($warned++) {
|
|
337
|
1
|
|
|
|
|
167
|
Carp::carp(
|
|
338
|
|
|
|
|
|
|
'DMS::XS::Parser::to_dms_lite() is deprecated; use encode_lite() instead. '
|
|
339
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed to_dms_lite() to encode_lite().');
|
|
340
|
|
|
|
|
|
|
}
|
|
341
|
1
|
|
|
|
|
12
|
goto &encode_lite;
|
|
342
|
|
|
|
|
|
|
}
|
|
343
|
|
|
|
|
|
|
}
|
|
344
|
|
|
|
|
|
|
|
|
345
|
|
|
|
|
|
|
# Helper matching DMS::Parser::new_table — returns an IxHash-tied hashref.
|
|
346
|
|
|
|
|
|
|
sub new_table {
|
|
347
|
0
|
|
|
0
|
0
|
0
|
tie my %h, 'Tie::IxHash';
|
|
348
|
0
|
|
|
|
|
0
|
return \%h;
|
|
349
|
|
|
|
|
|
|
}
|
|
350
|
|
|
|
|
|
|
|
|
351
|
|
|
|
|
|
|
# SPEC §"Unordered tables" — opt-in. The underlying C parser builds
|
|
352
|
|
|
|
|
|
|
# Tie::IxHash tied tables; rather than fork the C code, we walk the
|
|
353
|
|
|
|
|
|
|
# returned tree post-parse and replace every body table with a plain
|
|
354
|
|
|
|
|
|
|
# DMS::UnorderedTable hashref (insertion-order tracking dropped). Front
|
|
355
|
|
|
|
|
|
|
# matter is intentionally left alone — meta stays ordered per spec.
|
|
356
|
|
|
|
|
|
|
sub decode_document_unordered {
|
|
357
|
0
|
|
|
0
|
0
|
0
|
my ($src) = @_;
|
|
358
|
0
|
|
|
|
|
0
|
my $doc = _xsub_parse_document($src);
|
|
359
|
0
|
0
|
|
|
|
0
|
$doc->{body} = _to_unordered($doc->{body}) if defined $doc->{body};
|
|
360
|
0
|
|
|
|
|
0
|
return $doc;
|
|
361
|
|
|
|
|
|
|
}
|
|
362
|
|
|
|
|
|
|
|
|
363
|
|
|
|
|
|
|
sub decode_lite_document_unordered {
|
|
364
|
0
|
|
|
0
|
0
|
0
|
my ($src) = @_;
|
|
365
|
0
|
|
|
|
|
0
|
my $doc = _xsub_parse_document_lite($src);
|
|
366
|
0
|
0
|
|
|
|
0
|
$doc->{body} = _to_unordered($doc->{body}) if defined $doc->{body};
|
|
367
|
0
|
|
|
|
|
0
|
return $doc;
|
|
368
|
|
|
|
|
|
|
}
|
|
369
|
|
|
|
|
|
|
|
|
370
|
|
|
|
|
|
|
# Deprecated aliases (SPEC v0.14). Removed in the next release.
|
|
371
|
|
|
|
|
|
|
# Deprecated wrappers redefining the XSUB-bound `parse_document` /
|
|
372
|
|
|
|
|
|
|
# `parse_document_lite` in the symbol table. They warn once per process
|
|
373
|
|
|
|
|
|
|
# and forward to the captured XSUB (no recursion).
|
|
374
|
|
|
|
|
|
|
{ my $warned;
|
|
375
|
3
|
|
|
3
|
|
33
|
no warnings 'redefine';
|
|
|
3
|
|
|
|
|
5
|
|
|
|
3
|
|
|
|
|
662
|
|
|
376
|
|
|
|
|
|
|
*parse_document = sub {
|
|
377
|
1
|
50
|
|
1
|
|
17
|
unless ($warned++) {
|
|
378
|
1
|
|
|
|
|
164
|
Carp::carp(
|
|
379
|
|
|
|
|
|
|
'DMS::XS::Parser::parse_document() is deprecated; '
|
|
380
|
|
|
|
|
|
|
. 'use decode_document() instead. '
|
|
381
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed parse_document() to decode_document().');
|
|
382
|
|
|
|
|
|
|
}
|
|
383
|
1
|
|
|
|
|
28
|
goto &_xsub_parse_document;
|
|
384
|
|
|
|
|
|
|
};
|
|
385
|
|
|
|
|
|
|
}
|
|
386
|
|
|
|
|
|
|
{ my $warned;
|
|
387
|
3
|
|
|
3
|
|
26
|
no warnings 'redefine';
|
|
|
3
|
|
|
|
|
7
|
|
|
|
3
|
|
|
|
|
1977
|
|
|
388
|
|
|
|
|
|
|
*parse_document_lite = sub {
|
|
389
|
0
|
0
|
|
0
|
|
|
unless ($warned++) {
|
|
390
|
0
|
|
|
|
|
|
Carp::carp(
|
|
391
|
|
|
|
|
|
|
'DMS::XS::Parser::parse_document_lite() is deprecated; '
|
|
392
|
|
|
|
|
|
|
. 'use decode_document_lite() instead. '
|
|
393
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed parse_document_lite() to decode_document_lite().');
|
|
394
|
|
|
|
|
|
|
}
|
|
395
|
0
|
|
|
|
|
|
goto &_xsub_parse_document_lite;
|
|
396
|
|
|
|
|
|
|
};
|
|
397
|
|
|
|
|
|
|
}
|
|
398
|
|
|
|
|
|
|
|
|
399
|
|
|
|
|
|
|
{ my $warned;
|
|
400
|
|
|
|
|
|
|
sub parse_document_unordered {
|
|
401
|
0
|
0
|
|
0
|
0
|
|
unless ($warned++) {
|
|
402
|
0
|
|
|
|
|
|
Carp::carp(
|
|
403
|
|
|
|
|
|
|
'DMS::XS::Parser::parse_document_unordered() is deprecated; '
|
|
404
|
|
|
|
|
|
|
. 'use decode_document_unordered() instead. '
|
|
405
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed parse_*() to decode_*().');
|
|
406
|
|
|
|
|
|
|
}
|
|
407
|
0
|
|
|
|
|
|
goto &decode_document_unordered;
|
|
408
|
|
|
|
|
|
|
}
|
|
409
|
|
|
|
|
|
|
}
|
|
410
|
|
|
|
|
|
|
{ my $warned;
|
|
411
|
|
|
|
|
|
|
sub parse_lite_document_unordered {
|
|
412
|
0
|
0
|
|
0
|
0
|
|
unless ($warned++) {
|
|
413
|
0
|
|
|
|
|
|
Carp::carp(
|
|
414
|
|
|
|
|
|
|
'DMS::XS::Parser::parse_lite_document_unordered() is deprecated; '
|
|
415
|
|
|
|
|
|
|
. 'use decode_lite_document_unordered() instead. '
|
|
416
|
|
|
|
|
|
|
. 'SPEC v0.14 renamed parse_*() to decode_*().');
|
|
417
|
|
|
|
|
|
|
}
|
|
418
|
0
|
|
|
|
|
|
goto &decode_lite_document_unordered;
|
|
419
|
|
|
|
|
|
|
}
|
|
420
|
|
|
|
|
|
|
}
|
|
421
|
|
|
|
|
|
|
|
|
422
|
|
|
|
|
|
|
# Recursive walk: convert each table (plain or Tie::IxHash-tied hash) to
|
|
423
|
|
|
|
|
|
|
# a DMS::UnorderedTable plain hashref. Lists are descended into; blessed
|
|
424
|
|
|
|
|
|
|
# leaves (DMS::Integer / Float / Bool / dates) are preserved as-is.
|
|
425
|
|
|
|
|
|
|
sub _to_unordered {
|
|
426
|
0
|
|
|
0
|
|
|
my ($v) = @_;
|
|
427
|
0
|
0
|
|
|
|
|
return $v if !defined $v;
|
|
428
|
0
|
|
|
|
|
|
my $r = ref($v);
|
|
429
|
0
|
0
|
|
|
|
|
return $v if $r eq '';
|
|
430
|
|
|
|
|
|
|
# Blessed sentinels: leaves. (DMS::UnorderedTable shouldn't appear
|
|
431
|
|
|
|
|
|
|
# here at all — the XS parser doesn't produce it — but if it does we
|
|
432
|
|
|
|
|
|
|
# leave it.)
|
|
433
|
0
|
|
|
|
|
|
require Scalar::Util;
|
|
434
|
0
|
0
|
|
|
|
|
if (Scalar::Util::blessed($v)) {
|
|
435
|
0
|
0
|
|
|
|
|
return $v if $r ne 'DMS::UnorderedTable';
|
|
436
|
|
|
|
|
|
|
# Already unordered — recurse into children for safety.
|
|
437
|
0
|
|
|
|
|
|
my %h;
|
|
438
|
0
|
|
|
|
|
|
for my $k (keys %$v) {
|
|
439
|
0
|
|
|
|
|
|
$h{$k} = _to_unordered($v->{$k});
|
|
440
|
|
|
|
|
|
|
}
|
|
441
|
0
|
|
|
|
|
|
return bless \%h, 'DMS::UnorderedTable';
|
|
442
|
|
|
|
|
|
|
}
|
|
443
|
0
|
0
|
|
|
|
|
if ($r eq 'ARRAY') {
|
|
444
|
0
|
|
|
|
|
|
return [ map { _to_unordered($_) } @$v ];
|
|
|
0
|
|
|
|
|
|
|
|
445
|
|
|
|
|
|
|
}
|
|
446
|
0
|
0
|
|
|
|
|
if ($r eq 'HASH') {
|
|
447
|
|
|
|
|
|
|
# Tie::IxHash-tied hash or plain hash — either way, walk via
|
|
448
|
|
|
|
|
|
|
# `keys` (tied yields insertion order; plain yields hash order)
|
|
449
|
|
|
|
|
|
|
# and rebuild as a plain blessed UnorderedTable. We drop the
|
|
450
|
|
|
|
|
|
|
# Tie::IxHash magic by copying into a fresh `%h`.
|
|
451
|
0
|
|
|
|
|
|
my %h;
|
|
452
|
0
|
|
|
|
|
|
for my $k (keys %$v) {
|
|
453
|
0
|
|
|
|
|
|
$h{$k} = _to_unordered($v->{$k});
|
|
454
|
|
|
|
|
|
|
}
|
|
455
|
0
|
|
|
|
|
|
return bless \%h, 'DMS::UnorderedTable';
|
|
456
|
|
|
|
|
|
|
}
|
|
457
|
0
|
|
|
|
|
|
return $v;
|
|
458
|
|
|
|
|
|
|
}
|
|
459
|
|
|
|
|
|
|
|
|
460
|
|
|
|
|
|
|
1;
|