File Coverage

blib/lib/DMS/Emitter.pm
Criterion Covered Total %
statement 247 507 48.7
branch 89 288 30.9
condition 34 99 34.3
subroutine 27 34 79.4
pod 0 4 0.0
total 397 932 42.6


line stmt bran cond sub pod time code
1             package DMS::Emitter;
2             # DMS encode emitter — re-emit a parsed Document as DMS source.
3             #
4             # Mirrors the Rust reference (language/rust/crates/dms/src/lib.rs::encode).
5             # Pure Perl; no parser dependency at parse time. Used by both
6             # DMS::Parser::encode and DMS::XS::Parser::encode — the document shape
7             # is identical between the two backends, so the emitter walks them the
8             # same way.
9             #
10             # Contract (SPEC §encode):
11             # decode(encode(decode(source))) is data-equivalent to decode(source),
12             # has the same comments at the same attached paths, and uses the same
13             # literal forms for values where preserved (integer base, string form).
14             #
15             # Round-trip stability:
16             # encode(decode(encode(decode(source)))) is byte-equal to encode(decode(source)).
17             #
18             # SPEC v0.14 renamed to_dms/to_dms_lite to encode/encode_lite. The old
19             # names remain as deprecated aliases for one release.
20             #
21             # A Document is a hashref:
22             # { meta => undef|tied-hash, body => value, comments => [...],
23             # original_forms => [ [path_aref, lit_href], ... ] }
24            
25 1     1   8 use strict;
  1         2  
  1         50  
26 1     1   6 use warnings;
  1         3  
  1         75  
27 1     1   861 use utf8;
  1         336  
  1         9  
28 1     1   51 use Scalar::Util qw(blessed);
  1         2  
  1         77  
29 1     1   678 use POSIX qw(isnan isinf);
  1         5573  
  1         5  
30 1     1   1425 use Carp ();
  1         10  
  1         6050  
31            
32             our $VERSION = '0.3.0';
33            
34             my $INDENT_STR = ' ';
35            
36             # Public entry point. $doc is the decode_document return value.
37             # SPEC §encode (v0.14): renamed from to_dms() to encode().
38             sub encode {
39 19     19 0 34 my ($doc) = @_;
40 19         37 return _emit($doc, 0);
41             }
42            
43             # Lite-mode entry point — emits canonical DMS source with no comments
44             # and no original-form preservation (decimal integers, basic-quoted
45             # strings). Mirrors the Rust reference's `encode_lite`. SPEC §encode.
46             #
47             # `decode(encode_lite(doc))` is data-equivalent to `doc`; it is
48             # *not* required to round-trip comments or literal forms.
49             sub encode_lite {
50 0     0 0 0 my ($doc) = @_;
51 0         0 return _emit($doc, 1);
52             }
53            
54             # Deprecated aliases (SPEC v0.14 renamed to_dms/to_dms_lite to
55             # encode/encode_lite). Kept for one release. Carp once per process to
56             # avoid flooding callers that loop.
57             { my $warned;
58             sub to_dms {
59 0 0   0 0 0 unless ($warned++) {
60 0         0 Carp::carp(
61             'DMS::Emitter::to_dms() is deprecated; use encode() instead. '
62             . 'SPEC v0.14 renamed to_dms() to encode().');
63             }
64 0         0 goto &encode;
65             }
66             }
67             { my $warned;
68             sub to_dms_lite {
69 0 0   0 0 0 unless ($warned++) {
70 0         0 Carp::carp(
71             'DMS::Emitter::to_dms_lite() is deprecated; use encode_lite() instead. '
72             . 'SPEC v0.14 renamed to_dms_lite() to encode_lite().');
73             }
74 0         0 goto &encode_lite;
75             }
76             }
77            
78             sub _emit {
79 19     19   33 my ($doc, $lite) = @_;
80             # SPEC §"Unordered tables": full-mode `encode` refuses to round-trip
81             # a Document that contains a DMS::UnorderedTable. The Document tree
82             # has no stable iteration order, so re-emission cannot be byte-stable.
83             # Lite mode is allowed (canonical emit, no order promise).
84 19 50 33     75 if (!$lite && _contains_unordered($doc->{body})) {
85 0         0 die "encode (full-mode round-trip) refuses Document with DMS::UnorderedTable; "
86             . "use decode_document (ordered) or encode_lite. SPEC §Unordered tables.\n";
87             }
88 19 50 66     75 if (!$lite && defined($doc->{meta}) && _contains_unordered($doc->{meta})) {
      66        
89 0         0 die "encode (full-mode round-trip) refuses Document with DMS::UnorderedTable; "
90             . "use decode_document (ordered) or encode_lite. SPEC §Unordered tables.\n";
91             }
92 19 50       137 my $self = bless {
93             out => '',
94             comments_by_path => {}, # path-key -> { leading=>[], trailing=>scalar/undef, floating=>[] }
95             forms_by_path => {}, # path-key -> $lit
96             lite => $lite ? 1 : 0,
97             doc => $doc,
98             }, __PACKAGE__;
99             # In lite mode, the per-path comment + original-form maps stay
100             # empty: the walk then emits canonical form even when `doc.comments`
101             # / `doc.original_forms` are populated. Mirrors the Rust emitter's
102             # `new_lite` constructor.
103 19 50       57 if (!$self->{lite}) {
104             # Bucket comments + original_forms by path-key (joined string).
105 19 50       25 for my $ac (@{ $doc->{comments} || [] }) {
  19         51  
106 21         42 my $pk = _path_key($ac->{path});
107 21   100     136 $self->{comments_by_path}{$pk} ||= { leading => [], inner => [], trailing => [], floating => [] };
108 21         34 my $entry = $self->{comments_by_path}{$pk};
109 21         47 my $pos = $ac->{position};
110 21 100       50 if ($pos eq 'leading') {
    50          
    100          
111 10         15 push @{ $entry->{leading} }, $ac->{comment};
  10         34  
112             } elsif ($pos eq 'inner') {
113 0         0 push @{ $entry->{inner} }, $ac->{comment};
  0         0  
114             } elsif ($pos eq 'trailing') {
115 10         14 push @{ $entry->{trailing} }, $ac->{comment};
  10         29  
116             } else {
117 1         2 push @{ $entry->{floating} }, $ac->{comment};
  1         3  
118             }
119             }
120 19 50       29 for my $pair (@{ $doc->{original_forms} || [] }) {
  19         46  
121 7         13 my ($p, $lit) = @$pair;
122 7         14 my $pk = _path_key($p);
123 7 50       24 $self->{forms_by_path}{$pk} = $lit if !exists $self->{forms_by_path}{$pk};
124             }
125             }
126            
127 19         61 $self->_emit_document;
128 19         206 return $self->{out};
129             }
130            
131             # --- path-key encoding ---
132             # A path is an arrayref of plain string segments (table keys) and
133             # DMS::Index objects (list indices). We encode each segment with a
134             # distinct prefix so the join is unambiguous (a string key "0" doesn't
135             # collide with index 0).
136             sub _path_key {
137 295     295   353 my ($p) = @_;
138             return join("\0", map {
139 295 100       401 ref($_) eq 'DMS::Index' ? "I:" . $$_ : "K:$_"
  346         877  
140             } @$p);
141             }
142            
143             sub _emit_document {
144 19     19   27 my $self = shift;
145 19         26 my $doc = $self->{doc};
146            
147             # Front matter: emit the `+++` block when meta is defined OR when
148             # any `__fm__`-prefixed comment exists. Spec §to_dms allows omitting
149             # an empty-meta block when no FM comments either, but emitting `+++\n+++\n`
150             # for an empty-meta doc with no FM comments would be wrong; the
151             # has_meta test below guards that.
152             # Lite mode emits no comments, so FM comments don't force a `+++`
153             # block — only an explicit `meta = Some(...)` does. Mirrors the
154             # Rust reference (lib.rs::emit_document `!self.lite && ...`).
155 19         25 my $has_fm_comments = 0;
156 19 50       35 if (!$self->{lite}) {
157 19 50       27 for my $ac (@{ $doc->{comments} || [] }) {
  19         56  
158 21         33 my $first = $ac->{path}[0];
159 21 50 33     92 if (defined($first) && !ref($first) && $first eq '__fm__') {
      33        
160 0         0 $has_fm_comments = 1; last;
  0         0  
161             }
162             }
163             }
164 19         33 my $fm_present = defined $doc->{meta};
165 19 100 66     54 if ($fm_present || $has_fm_comments) {
166 3         7 $self->{out} .= "+++\n";
167 3         7 my $fm_path = ['__fm__'];
168 3 50       40 if (defined $doc->{meta}) {
169 3         8 $self->_emit_table_block($doc->{meta}, $fm_path, 0);
170             } else {
171 0         0 $self->_emit_floating($fm_path, 0);
172             }
173 3         5 $self->{out} .= "+++\n\n";
174             }
175            
176 19         25 my $body_path = [];
177 19         50 my $body = $doc->{body};
178 19 50       32 if (_is_table($body)) {
    0          
179 19         45 $self->_emit_table_block($body, $body_path, 0);
180             } elsif (_is_list($body)) {
181 0         0 $self->_emit_list_block($body, $body_path, 0);
182             } else {
183             # Scalar root.
184 0         0 my $nc = $self->{comments_by_path}{ _path_key($body_path) };
185 0 0       0 if ($nc) {
186 0         0 for my $c (@{ $nc->{leading} }) {
  0         0  
187 0         0 $self->_emit_comment_line($c, 0);
188             }
189             }
190 0         0 $self->_emit_value_inline($body, $body_path);
191 0         0 $self->_emit_trailing_for($body_path);
192 0         0 $self->{out} .= "\n";
193 0 0       0 if ($nc) {
194 0         0 for my $c (@{ $nc->{floating} }) {
  0         0  
195 0         0 $self->_emit_comment_line($c, 0);
196             }
197             }
198             }
199             }
200            
201             sub _is_table {
202 83     83   131 my ($v) = @_;
203 83 50       128 return 0 if !defined $v;
204             # SPEC §"Unordered tables": DMS::UnorderedTable is a blessed hashref
205             # marker. Treat it as a table for emission purposes; the full-mode
206             # to_dms guard above prevents it from reaching here in round-trip.
207 83 100       135 if (blessed($v)) {
208 38         124 return ref($v) eq 'DMS::UnorderedTable';
209             }
210 45         122 return ref($v) eq 'HASH';
211             }
212            
213             sub _is_list {
214 56     56   77 my ($v) = @_;
215 56 50       83 return 0 if !defined $v;
216 56 100       151 return 0 if blessed($v);
217 18         41 return ref($v) eq 'ARRAY';
218             }
219            
220             sub _table_keys {
221 28     28   41 my ($t) = @_;
222             # Fast path: when the table is a Tie::IxHash, bypass tie magic and
223             # read the keys AV directly. The tied object is a blessed arrayref
224             # `[idx_hv_rv, keys_av_rv, vals_av_rv, iter]` per Tie::IxHash's
225             # documented internal shape. `keys %$t` works too but goes through
226             # every entry's mg_find — measurably slower on a 5000-pair tree.
227 28         41 my $tied = tied(%$t);
228 28 50 33     107 if ($tied && ref($tied) eq 'Tie::IxHash') {
229 28         31 return @{ $tied->[1] };
  28         88  
230             }
231             # DMS::UnorderedTable (plain blessed hash) or plain HV: arbitrary
232             # iteration order is the documented contract for lite mode.
233 0         0 return keys %$t;
234             }
235            
236             # Recursive walk: returns true if any nested table (or the value itself)
237             # is a DMS::UnorderedTable. Used by `to_dms` (full mode) to refuse
238             # round-trip on unordered Documents per SPEC §"Unordered tables".
239             sub _contains_unordered {
240 73     73   132 my ($v) = @_;
241 73 50       330 return 0 if !defined $v;
242 73 100       138 if (blessed($v)) {
243             # DMS::UnorderedTable is itself the marker — we don't need to
244             # walk into it because finding the outer one is enough; any
245             # nested UnorderedTable would still be detected via the body.
246 38 50       89 return 1 if ref($v) eq 'DMS::UnorderedTable';
247             # Other blessed sentinels (DMS::Integer, DMS::Bool, dates,
248             # DMS::Float) are leaves.
249 38         100 return 0;
250             }
251 35 100       72 if (ref($v) eq 'HASH') {
252 25         93 for my $k (keys %$v) {
253 45 50       481 next if $k eq "\0_keys";
254 45 50       114 return 1 if _contains_unordered($v->{$k});
255             }
256 25         91 return 0;
257             }
258 10 100       25 if (ref($v) eq 'ARRAY') {
259 2         3 for my $item (@$v) {
260 6 50       8 return 1 if _contains_unordered($item);
261             }
262 2         4 return 0;
263             }
264 8         24 return 0;
265             }
266            
267             sub _emit_table_block {
268 25     25   53 my ($self, $t, $path, $indent) = @_;
269             # Lite-mode hot path: no comments, no original_forms, so the per-kvpair
270             # path/path-key construction and comment-map lookups are pure overhead.
271             # Skipping them saves ~35% of emit wall time on a 5000-kvpair Helm
272             # chart values.yaml fixture (the path-building inner-array copy plus
273             # _path_key string-join was the dominant cost).
274 25 50       44 if ($self->{lite}) {
275 0         0 my $pad = $INDENT_STR x $indent;
276 0         0 for my $k (_table_keys($t)) {
277 0         0 my $v = $t->{$k};
278 0         0 my $r = ref($v);
279             # Inline the common scalar cases. The realistic fixture is
280             # >90% plain-string + a few bool/int values; dispatching to
281             # _emit_value_inline + _emit_string per kvpair was the
282             # dominant cost after the path-key skip.
283 0 0       0 if (!$r) {
284 0 0       0 my $key_fmt = ($k =~ /\A[A-Za-z_][A-Za-z0-9_-]*\z/)
285             ? $k : _format_key($k);
286 0 0       0 if ($v !~ /[\\"\x00-\x1F]/) {
287 0         0 $self->{out} .= "${pad}${key_fmt}: \"${v}\"\n";
288             } else {
289 0         0 $self->{out} .= "${pad}${key_fmt}: \"" . _escape_basic($v) . "\"\n";
290             }
291 0         0 next;
292             }
293 0 0       0 if ($r eq 'DMS::Bool') {
294 0 0       0 my $key_fmt = ($k =~ /\A[A-Za-z_][A-Za-z0-9_-]*\z/)
295             ? $k : _format_key($k);
296 0 0       0 $self->{out} .= $pad . $key_fmt . ': '
297             . ($$v ? "true\n" : "false\n");
298 0         0 next;
299             }
300 0 0       0 if ($r eq 'DMS::Integer') {
301 0 0       0 my $key_fmt = ($k =~ /\A[A-Za-z_][A-Za-z0-9_-]*\z/)
302             ? $k : _format_key($k);
303 0         0 $self->{out} .= "${pad}${key_fmt}: ${$v}\n";
  0         0  
304 0         0 next;
305             }
306 0   0     0 my $can_block =
307             (_is_table($v) && scalar(_table_keys($v))) ||
308             (_is_list($v) && scalar(@$v));
309 0         0 $self->{out} .= $pad;
310 0         0 $self->{out} .= _format_key($k);
311 0         0 $self->{out} .= ':';
312 0 0       0 if ($can_block) {
313 0         0 $self->{out} .= "\n";
314 0 0       0 if (_is_table($v)) {
315 0         0 $self->_emit_table_block($v, undef, $indent + 1);
316             } else {
317 0         0 $self->_emit_list_block($v, undef, $indent + 1);
318             }
319             } else {
320 0         0 $self->{out} .= ' ';
321 0         0 $self->_emit_value_inline($v, undef);
322 0         0 $self->{out} .= "\n";
323             }
324             }
325 0         0 return;
326             }
327 25         40 for my $k (_table_keys($t)) {
328 45         188 my $v = $t->{$k};
329 45         270 my $child_path = [ @$path, $k ];
330 45         67 my $child_pk = _path_key($child_path);
331 45         71 my $nc = $self->{comments_by_path}{$child_pk};
332 45 100       74 if ($nc) {
333 9         12 for my $c (@{ $nc->{leading} }) {
  9         17  
334 8         23 $self->_emit_comment_line($c, $indent);
335             }
336             }
337 45   100     66 my $has_trailing = $nc && @{ $nc->{trailing} };
338 45         85 my $has_inner = $self->_has_inner($child_path);
339 45   100     80 my $can_block =
340             (_is_table($v) && scalar(_table_keys($v))) ||
341             (_is_list($v) && scalar(@$v));
342 45   66     106 my $needs_block = $can_block && !($has_trailing && $self->_is_flow_safe($v, $child_path));
343 45         92 $self->{out} .= $INDENT_STR x $indent;
344 45         95 $self->{out} .= _format_key($k);
345 45         73 $self->{out} .= ':';
346 45 100       77 if ($needs_block) {
347 5 50       10 if ($has_inner) {
348 0         0 $self->{out} .= ' ';
349 0         0 $self->_emit_inner_for($child_path);
350             # Trim trailing space left by _emit_inner_for.
351 0         0 $self->{out} =~ s/ \z//;
352             }
353 5         9 $self->{out} .= "\n";
354 5 100       23 if (_is_table($v)) { $self->_emit_table_block($v, $child_path, $indent + 1); }
  3         12  
355 2         6 else { $self->_emit_list_block($v, $child_path, $indent + 1); }
356             } else {
357 40         57 $self->{out} .= ' ';
358 40         98 $self->_emit_inner_for($child_path);
359 40         108 $self->_emit_value_inline($v, $child_path);
360 40         91 $self->_emit_trailing_for($child_path);
361 40         97 $self->{out} .= "\n";
362             }
363             }
364 25         60 $self->_emit_floating($path, $indent);
365             }
366            
367             sub _emit_list_block {
368 2     2   3 my ($self, $items, $path, $indent) = @_;
369 2 50       5 if ($self->{lite}) {
370 0         0 my $pad = $INDENT_STR x $indent;
371 0         0 for (my $i = 0; $i < @$items; $i++) {
372 0         0 my $v = $items->[$i];
373 0         0 $self->{out} .= $pad;
374 0         0 $self->{out} .= '+';
375 0 0 0     0 if (_is_table($v) && scalar(_table_keys($v))) {
    0 0        
376 0         0 $self->{out} .= "\n";
377 0         0 $self->_emit_table_block($v, undef, $indent + 1);
378             } elsif (_is_list($v) && scalar(@$v)) {
379 0         0 $self->{out} .= "\n";
380 0         0 $self->_emit_list_block($v, undef, $indent + 1);
381             } else {
382 0         0 $self->{out} .= ' ';
383 0         0 $self->_emit_value_inline($v, undef);
384 0         0 $self->{out} .= "\n";
385             }
386             }
387 0         0 return;
388             }
389 2         5 for (my $i = 0; $i < @$items; $i++) {
390 6         7 my $v = $items->[$i];
391 6         17 my $child_path = [ @$path, DMS::Index->new($i) ];
392 6         8 my $child_pk = _path_key($child_path);
393 6         7 my $nc = $self->{comments_by_path}{$child_pk};
394 6 50       8 if ($nc) {
395 0         0 for my $c (@{ $nc->{leading} }) {
  0         0  
396 0         0 $self->_emit_comment_line($c, $indent);
397             }
398             }
399 6         11 $self->{out} .= $INDENT_STR x $indent;
400 6         7 $self->{out} .= '+';
401 6         9 my $has_inner = $self->_has_inner($child_path);
402 6 50 50     8 if (_is_table($v) && scalar(_table_keys($v))) {
    50 50        
403 0 0       0 if ($has_inner) {
404 0         0 $self->{out} .= ' ';
405 0         0 $self->_emit_inner_for($child_path);
406 0         0 $self->{out} =~ s/ \z//;
407             }
408 0         0 $self->_emit_trailing_for($child_path);
409 0         0 $self->{out} .= "\n";
410 0         0 $self->_emit_table_block($v, $child_path, $indent + 1);
411             } elsif (_is_list($v) && scalar(@$v)) {
412 0 0       0 if ($has_inner) {
413 0         0 $self->{out} .= ' ';
414 0         0 $self->_emit_inner_for($child_path);
415 0         0 $self->{out} =~ s/ \z//;
416             }
417 0         0 $self->_emit_trailing_for($child_path);
418 0         0 $self->{out} .= "\n";
419 0         0 $self->_emit_list_block($v, $child_path, $indent + 1);
420             } else {
421 6         8 $self->{out} .= ' ';
422 6         11 $self->_emit_inner_for($child_path);
423 6         8 $self->_emit_value_inline($v, $child_path);
424 6         8 $self->_emit_trailing_for($child_path);
425 6         18 $self->{out} .= "\n";
426             }
427             }
428 2         3 $self->_emit_floating($path, $indent);
429             }
430            
431             sub _emit_value_inline {
432 46     46   73 my ($self, $v, $path) = @_;
433 46 100       69 if (blessed($v)) {
434 38         48 my $cls = ref($v);
435 38 0 0     72 if ($cls eq 'DMS::Bool') { $self->{out} .= $v->value ? 'true' : 'false'; }
  0 50 0     0  
    50 0        
    0          
    0          
436 38         74 elsif ($cls eq 'DMS::Integer') { $self->_emit_integer($v, $path); }
437 0         0 elsif ($cls eq 'DMS::Float') { $self->_emit_float($v->value); }
438             elsif ($cls eq 'DMS::OffsetDateTime'
439             || $cls eq 'DMS::LocalDateTime'
440             || $cls eq 'DMS::LocalDate'
441 0         0 || $cls eq 'DMS::LocalTime') { $self->{out} .= $v->value; }
442 0         0 else { die "to_dms: unknown blessed class $cls"; }
443 38         60 return;
444             }
445 8 50       13 if (_is_list($v)) {
446 0 0       0 if (!@$v) { $self->{out} .= '[]'; return; }
  0         0  
  0         0  
447 0         0 $self->{out} .= '[';
448 0         0 my $lite = $self->{lite};
449 0         0 for (my $i = 0; $i < @$v; $i++) {
450 0 0       0 $self->{out} .= ', ' if $i > 0;
451 0 0       0 my $sub = $lite ? undef : [ @$path, DMS::Index->new($i) ];
452 0         0 $self->_emit_value_inline($v->[$i], $sub);
453             }
454 0         0 $self->{out} .= ']';
455 0         0 return;
456             }
457 8 50       14 if (_is_table($v)) {
458 0         0 my @keys = _table_keys($v);
459 0 0       0 if (!@keys) { $self->{out} .= '{}'; return; }
  0         0  
  0         0  
460 0         0 $self->{out} .= '{';
461 0         0 my $first = 1;
462 0         0 my $lite = $self->{lite};
463 0         0 for my $k (@keys) {
464 0 0       0 $self->{out} .= ', ' unless $first;
465 0         0 $first = 0;
466 0         0 $self->{out} .= _format_key($k);
467 0         0 $self->{out} .= ': ';
468 0 0       0 my $sub = $lite ? undef : [ @$path, $k ];
469 0         0 $self->_emit_value_inline($v->{$k}, $sub);
470             }
471 0         0 $self->{out} .= '}';
472 0         0 return;
473             }
474             # Plain scalar = string.
475 8 50       15 if (!defined $v) { die "to_dms: got undef value"; }
  0         0  
476 8         22 $self->_emit_string("$v", $path);
477             }
478            
479             sub _emit_integer {
480 38     38   54 my ($self, $iv, $path) = @_;
481 38 50       75 if ($self->{lite}) {
482             # DMS::Integer is `bless \$v, 'DMS::Integer'` where $v is an IV.
483             # Direct deref + string concat skips the bstr() method dispatch.
484 0         0 $self->{out} .= "${$iv}";
  0         0  
485 0         0 return;
486             }
487 38         53 my $lit_ref = $self->{forms_by_path}{ _path_key($path) };
488 38 50 66     68 if ($lit_ref && exists $lit_ref->{integer_lit}) {
489 5         7 $self->{out} .= $lit_ref->{integer_lit};
490 5         7 return;
491             }
492             # Default: canonical decimal. DMS::Integer's bstr stringifies the IV.
493 33         85 $self->{out} .= $iv->bstr;
494             }
495            
496             sub _emit_float {
497 0     0   0 my ($self, $f) = @_;
498 0 0       0 if (isnan($f)) { $self->{out} .= 'nan'; return; }
  0         0  
  0         0  
499 0 0       0 if (isinf($f)) { $self->{out} .= ($f > 0 ? 'inf' : '-inf'); return; }
  0 0       0  
  0         0  
500             # ryu-shortest equivalent: try increasing %.Ng until round-trip works.
501 0         0 for my $p (1..17) {
502 0         0 my $s = sprintf("%.${p}g", $f);
503 0 0       0 if (0 + $s == $f) {
504 0         0 $s =~ s/e\+/e/;
505 0         0 $s =~ s/e-0+(\d)/e-$1/;
506 0         0 $s =~ s/e0+(\d)/e$1/;
507 0 0       0 if ($s !~ /[.eE]/) { $s .= '.0'; }
  0         0  
508 0         0 $self->{out} .= $s;
509 0         0 return;
510             }
511             }
512 0         0 $self->{out} .= sprintf("%.17g", $f);
513             }
514            
515             sub _emit_string {
516 8     8   16 my ($self, $s, $path) = @_;
517 8 50       30 if ($self->{lite}) {
518 0         0 $self->{out} .= '"';
519 0         0 $self->{out} .= _escape_basic($s);
520 0         0 $self->{out} .= '"';
521 0         0 return;
522             }
523 8         14 my $lit_ref = $self->{forms_by_path}{ _path_key($path) };
524 8         9 my $form;
525 8 50 66     18 if ($lit_ref && exists $lit_ref->{string_form}) {
526 2         3 $form = $lit_ref->{string_form};
527             }
528 8 100 66     20 if (!$form || $form->{kind} eq 'basic') {
529 6         9 $self->{out} .= '"';
530 6         13 $self->{out} .= _escape_basic($s);
531 6         13 $self->{out} .= '"';
532 6         8 return;
533             }
534 2 100       6 if ($form->{kind} eq 'literal') {
535 1         19 $self->{out} .= "'";
536 1         3 $self->{out} .= $s;
537 1         2 $self->{out} .= "'";
538 1         2 return;
539             }
540 1 50       3 if ($form->{kind} eq 'heredoc') {
541             # The stored body is post-modifier. For idempotent modifiers this
542             # is fine, but `_fold_paragraphs` joins lines within a paragraph
543             # with spaces — so we pre-expand each `\n` to `\n\n` so the
544             # re-applied modifier preserves line boundaries on round-trip.
545 1         2 my $body = $s;
546 1         2 my $has_fold = 0;
547 1 50       2 for my $m (@{ $form->{modifiers} || [] }) {
  1         4  
548 0 0       0 if ($m->{name} eq '_fold_paragraphs') { $has_fold = 1; last; }
  0         0  
  0         0  
549             }
550 1 50       3 if ($has_fold) {
551 0         0 $body =~ s/\n/\n\n/g;
552             }
553 1   50     9 $self->_emit_heredoc($body, $form->{flavor}, $form->{label}, $form->{modifiers} || []);
554 1         2 return;
555             }
556 0         0 die "to_dms: unknown string form: $form->{kind}";
557             }
558            
559             sub _emit_heredoc {
560 1     1   3 my ($self, $body, $flavor, $label, $modifiers) = @_;
561             # Compute the kvpair's indent from the most recent newline in $self->{out}.
562 1         2 my $bytes = $self->{out};
563 1         2 my $last_nl = rindex($bytes, "\n");
564 1 50       4 my $line_start = $last_nl < 0 ? 0 : $last_nl + 1;
565 1         3 my $kv_indent_spaces = 0;
566 1   33     14 while ($line_start + $kv_indent_spaces < length($bytes)
567             && substr($bytes, $line_start + $kv_indent_spaces, 1) eq ' ') {
568 0         0 $kv_indent_spaces++;
569             }
570 1         4 my $body_indent_str = ' ' x ($kv_indent_spaces + length($INDENT_STR));
571 1         2 my $term_indent_str = $body_indent_str;
572 1 50       4 my $opener = ($flavor eq 'basic_triple') ? '"""' : "'''";
573 1         2 $self->{out} .= $opener;
574 1 50       3 $self->{out} .= $label if defined $label;
575 1         3 for my $m (@$modifiers) {
576 0         0 $self->{out} .= ' ';
577 0         0 $self->{out} .= $m->{name};
578 0         0 $self->{out} .= '(';
579 0         0 my $first = 1;
580 0 0       0 for my $a (@{ $m->{args} || [] }) {
  0         0  
581 0 0       0 $self->{out} .= ', ' unless $first;
582 0         0 $first = 0;
583 0         0 $self->_emit_modifier_arg($a);
584             }
585 0         0 $self->{out} .= ')';
586             }
587 1         6 $self->{out} .= "\n";
588 1 50       3 if (length($body) == 0) {
589             # nothing — terminator on its own line follows
590             } else {
591 1         9 for my $line (split /\n/, $body, -1) {
592 1 50       3 if (length($line) == 0) {
593 0         0 $self->{out} .= "\n";
594             } else {
595 1         2 $self->{out} .= $body_indent_str;
596 1         2 $self->{out} .= $line;
597 1         2 $self->{out} .= "\n";
598             }
599             }
600             }
601 1         2 $self->{out} .= $term_indent_str;
602 1 50       3 if (defined $label) {
603 1         3 $self->{out} .= $label;
604             } else {
605 0 0       0 $self->{out} .= ($flavor eq 'basic_triple') ? '"""' : "'''";
606             }
607             }
608            
609             sub _emit_modifier_arg {
610 0     0   0 my ($self, $v) = @_;
611 0 0       0 if (blessed($v)) {
612 0         0 my $cls = ref($v);
613 0 0 0     0 if ($cls eq 'DMS::Bool') { $self->{out} .= $v->value ? 'true' : 'false'; }
  0 0 0     0  
    0 0        
    0          
    0          
614 0         0 elsif ($cls eq 'DMS::Integer') { $self->{out} .= $v->bstr; }
615 0         0 elsif ($cls eq 'DMS::Float') { $self->_emit_float($v->value); }
616             elsif ($cls eq 'DMS::OffsetDateTime'
617             || $cls eq 'DMS::LocalDateTime'
618             || $cls eq 'DMS::LocalDate'
619 0         0 || $cls eq 'DMS::LocalTime') { $self->{out} .= $v->value; }
620 0         0 else { die "modifier arg: unknown blessed class $cls"; }
621 0         0 return;
622             }
623 0 0       0 if (_is_list($v)) { $self->{out} .= '[]'; return; }
  0         0  
  0         0  
624 0 0       0 if (_is_table($v)) { $self->{out} .= '{}'; return; }
  0         0  
  0         0  
625             # Plain scalar = string. Modifier args use basic-quoted always.
626 0         0 $self->{out} .= '"';
627 0         0 $self->{out} .= _escape_basic("$v");
628 0         0 $self->{out} .= '"';
629             }
630            
631             sub _emit_comment_line {
632 9     9   19 my ($self, $c, $indent) = @_;
633 9         15 my $text = $c->{content};
634 9         17 my $prefix = $INDENT_STR x $indent;
635 9 50       23 if (index($text, "\n") < 0) {
636 9         18 $self->{out} .= $prefix;
637 9         13 $self->{out} .= $text;
638 9         14 $self->{out} .= "\n";
639 9         21 return;
640             }
641             # Multi-line: only the first line gets re-indented; subsequent body
642             # lines keep their original whitespace verbatim.
643 0         0 my @lines = split /\n/, $text, -1;
644 0         0 for (my $i = 0; $i < @lines; $i++) {
645 0 0       0 if ($i == 0) {
646 0         0 $self->{out} .= $prefix;
647 0         0 $self->{out} .= $lines[$i];
648             } else {
649 0         0 $self->{out} .= "\n";
650 0         0 $self->{out} .= $lines[$i];
651             }
652             }
653 0         0 $self->{out} .= "\n";
654             }
655            
656             sub _emit_trailing_for {
657 46     46   65 my ($self, $path) = @_;
658 46 50       75 return if $self->{lite};
659 46         63 my $nc = $self->{comments_by_path}{ _path_key($path) };
660 46 100 66     106 return if !$nc || !@{ $nc->{trailing} };
  8         25  
661 8         11 my $first = 1;
662 8         11 for my $c (@{ $nc->{trailing} }) {
  8         16  
663 8 50       20 $self->{out} .= ($first ? ' ' : ' ');
664 8         11 $first = 0;
665 8         19 $self->{out} .= $c->{content};
666             }
667             }
668            
669             sub _emit_inner_for {
670 46     46   63 my ($self, $path) = @_;
671 46 50       76 return if $self->{lite};
672 46         70 my $nc = $self->{comments_by_path}{ _path_key($path) };
673 46 100       117 return if !$nc;
674 8         12 for my $c (@{ $nc->{inner} }) {
  8         35  
675 0         0 $self->{out} .= $c->{content};
676 0         0 $self->{out} .= ' ';
677             }
678             }
679            
680             sub _has_inner {
681 51     51   72 my ($self, $path) = @_;
682 51 50       88 return 0 if $self->{lite};
683 51         80 my $nc = $self->{comments_by_path}{ _path_key($path) };
684 51   66     103 return $nc && @{ $nc->{inner} };
685             }
686            
687             sub _emit_floating {
688 27     27   46 my ($self, $path, $indent) = @_;
689 27 50       46 return if $self->{lite};
690 27         37 my $nc = $self->{comments_by_path}{ _path_key($path) };
691 27 100       74 return if !$nc;
692 1         1 for my $c (@{ $nc->{floating} }) {
  1         2  
693 1         2 $self->_emit_comment_line($c, $indent);
694             }
695             }
696            
697             # Returns true if $v rooted at $path is safe to emit as a flow form: no
698             # heredoc strings (heredocs need their own line) and no descendant has
699             # an attached comment (flow has nowhere to put it). Used to decide
700             # flow-vs-block when a trailing comment forces flow form.
701             sub _is_flow_safe {
702 0     0   0 my ($self, $v, $path) = @_;
703 0         0 my $pk_prefix = _path_key($path);
704             # Any descendant comment ⇒ unsafe. Skipped in lite mode (no
705             # comments are emitted anyway). Mirrors Rust lib.rs::is_flow_safe.
706 0 0       0 if (!$self->{lite}) {
707 0 0       0 for my $ac (@{ $self->{doc}{comments} || [] }) {
  0         0  
708 0 0       0 next if scalar(@{ $ac->{path} }) <= scalar(@$path);
  0         0  
709 0         0 my $apk = _path_key($ac->{path});
710 0 0       0 my $prefix = $pk_prefix eq '' ? '' : "$pk_prefix\0";
711 0 0       0 if ($pk_prefix eq '') {
    0          
712             # any non-empty path is descendant of root
713 0         0 return 0;
714             } elsif (substr($apk, 0, length($prefix)) eq $prefix) {
715 0         0 return 0;
716             }
717             }
718             }
719 0 0 0     0 if (!ref($v) && !blessed($v)) {
720             # plain string: check heredoc form
721 0         0 my $lit = $self->{forms_by_path}{ _path_key($path) };
722 0 0 0     0 if ($lit && exists $lit->{string_form} && $lit->{string_form}{kind} eq 'heredoc') {
      0        
723 0         0 return 0;
724             }
725 0         0 return 1;
726             }
727 0 0       0 if (blessed($v)) { return 1; }
  0         0  
728 0 0       0 if (_is_list($v)) {
729 0         0 for (my $i = 0; $i < @$v; $i++) {
730 0         0 my $sub = [ @$path, DMS::Index->new($i) ];
731 0 0       0 return 0 if !$self->_is_flow_safe($v->[$i], $sub);
732             }
733 0         0 return 1;
734             }
735 0 0       0 if (_is_table($v)) {
736 0         0 for my $k (_table_keys($v)) {
737 0         0 my $sub = [ @$path, $k ];
738 0 0       0 return 0 if !$self->_is_flow_safe($v->{$k}, $sub);
739             }
740 0         0 return 1;
741             }
742 0         0 return 1;
743             }
744            
745             sub _escape_basic {
746 6     6   10 my ($s) = @_;
747             # Fast path: most strings need no escaping. Skip the per-char split
748             # if the string contains no `\`, no `"`, and no control chars.
749 6 50       20 return $s if $s !~ /[\\"\x00-\x1F]/;
750 0         0 my $out = '';
751 0         0 for my $ch (split //, $s) {
752 0         0 my $code = ord($ch);
753 0 0       0 if ($ch eq '\\') { $out .= '\\\\'; }
  0 0       0  
    0          
    0          
    0          
    0          
    0          
    0          
754 0         0 elsif ($ch eq '"') { $out .= '\\"'; }
755 0         0 elsif ($ch eq "\n") { $out .= '\\n'; }
756 0         0 elsif ($ch eq "\r") { $out .= '\\r'; }
757 0         0 elsif ($ch eq "\t") { $out .= '\\t'; }
758 0         0 elsif ($ch eq "\b") { $out .= '\\b'; }
759 0         0 elsif ($ch eq "\f") { $out .= '\\f'; }
760 0         0 elsif ($code < 0x20) { $out .= sprintf('\\u%04X', $code); }
761 0         0 else { $out .= $ch; }
762             }
763 0         0 return $out;
764             }
765            
766             sub _is_bare_key_char_emit {
767 0     0   0 my ($c) = @_;
768 0 0 0     0 return 1 if $c eq '_' || $c eq '-';
769 0         0 my $o = ord($c);
770 0 0       0 if ($o < 128) {
771 0         0 return $c =~ /[A-Za-z0-9]/;
772             }
773             # Match the parser's frozen Unicode 15.1 XID_Continue snapshot so that
774             # to_dms emits a key bare iff the parser would accept it bare. Avoids
775             # producing surface forms that drift with the host's Unicode tables.
776 0         0 require DMS::Parser;
777 0         0 return DMS::Parser::_is_xid_continue($o);
778             }
779            
780             sub _format_key {
781 45     45   97 my ($k) = @_;
782             # ASCII fast path. Real-world keys are >99% plain ASCII identifiers;
783             # the regex bails out at the first non-bare char without splitting
784             # the string into single-char SVs. Saves per-key allocations.
785 45 50       222 return $k if $k =~ /\A[A-Za-z_][A-Za-z0-9_-]*\z/;
786 0 0         if (length($k) > 0) {
787 0           my $bare = 1;
788 0           for my $ch (split //, $k) {
789 0 0         if (!_is_bare_key_char_emit($ch)) { $bare = 0; last; }
  0            
  0            
790             }
791 0 0         return $k if $bare;
792             }
793             # Quoted: prefer literal if no single quote, no LF/CR.
794 0 0 0       if (index($k, "'") < 0 && index($k, "\n") < 0 && index($k, "\r") < 0) {
      0        
795 0           return "'$k'";
796             }
797 0           return '"' . _escape_basic($k) . '"';
798             }
799            
800             1;