File Coverage

blib/lib/Org/Document.pm
Criterion Covered Total %
statement 371 377 98.4
branch 161 174 92.5
condition 116 137 84.6
subroutine 27 27 100.0
pod 2 3 66.6
total 677 718 94.2


line stmt bran cond sub pod time code
1              
2             use 5.010001;
3 24     24   4216 use locale;
  24         76  
4 24     24   8848 use Log::ger;
  24         11713  
  24         106  
5 24     24   34818 use Moo;
  24         1118  
  24         115  
6 24     24   5254 use experimental 'smartmatch';
  24         11028  
  24         156  
7 24     24   16064 no if $] >= 5.021_006, warnings => "locale";
  24         62846  
  24         115  
8 24     24   14485 extends 'Org::Element';
  24         263  
  24         120  
9              
10             use List::MoreUtils qw(firstidx);
11 24     24   11491 use Time::HiRes qw(gettimeofday tv_interval);
  24         251794  
  24         137  
12 24     24   31712  
  24         26365  
  24         90  
13             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
14             our $DATE = '2022-06-23'; # DATE
15             our $DIST = 'Org-Parser'; # DIST
16             our $VERSION = '0.558'; # VERSION
17              
18             has tags => (is => 'rw');
19             has todo_states => (is => 'rw');
20             has done_states => (is => 'rw');
21             has priorities => (is => 'rw');
22             has drawer_names => (is => 'rw');
23             has properties => (is => 'rw');
24             has radio_targets => (is => 'rw');
25              
26             has time_zone => (is => 'rw');
27              
28             has ignore_unknown_settings => (is => 'rw');
29              
30             our $tags_re = qr/:(?:[\w@]+:)+/u;
31             my $ls_re = qr/(?:(?<=[\015\012])|\A)/; # line start
32             my $le_re = qr/(?:\R|\z)/; # line end
33             our $arg_re = qr/(?: '(?<squote> [^']*)' |
34             "(?<dquote> [^"]*)" |
35             (?<bare> \S+) )
36             /x;
37             our $args_re = qr/(?: $arg_re (?:[ \t]+ $arg_re)*)/x;
38             my $tstamp_re = qr/(?:\[\d{4}-\d{2}-\d{2} [^\n\]]*\])/x;
39             my $act_tstamp_re = qr/(?: <\d{4}-\d{2}-\d{2} [^\n>]* >)/x;
40             my $fn_name_re = qr/(?:[^ \t\n:\]]+)/x;
41             my $text_re =
42             qr{
43             (?<link> \[\[(?<link_link> [^\]\n]+)\]
44             (?:\[(?<link_desc> (?:[^\]]|\R)+)\])?\]) |
45             (?<radio_target> <<<(?<rt_target> [^>\n]+)>>>) |
46             (?<target> <<(?<t_target> [^>\n]+)>>) |
47              
48             # timestamp & time range
49             (?<trange> (?<trange_ts1> $tstamp_re)--
50             (?<trange_ts2> $tstamp_re)) |
51             (?<tstamp> $tstamp_re) |
52             (?<act_trange> (?<act_trange_ts1> $act_tstamp_re)--
53             (?<act_trange_ts2> $act_tstamp_re)) |
54             (?<act_tstamp> $act_tstamp_re) |
55              
56             # footnote (num, name + def, name + inline definition)
57             (?<fn_num> \[(?<fn_num_num>\d+)\]) |
58             (?<fn_namedef> $ls_re \[fn:(?<fn_namedef_name> $fn_name_re)\]
59             [ \t]* (?<fn_namedef_def> [^ \t\n]+)) |
60             (?<fn_nameidef> \[fn:(?<fn_nameidef_name> $fn_name_re?):?
61             (?<fn_nameidef_def> ([^\n\]]+)?)\]) |
62              
63             (?<markup_start> (?:(?<=\s|\(|\{)|\A) # whitespace, open paren, open curly paren
64             [*/+=~_]
65             (?=\S)) |
66             (?<markup_end> (?<=\S)
67             [*/+=~_]
68             # actually emacs doesn't allow ! after markup
69             (?:(?=[ \t\n:;"',.!?\)*-])|\z)) |
70              
71             (?<plain_text> (?:[^\[<*/+=~_\n]+|.+?))
72             #(?<plain_text> .+?) # too dispersy
73             }sxi;
74              
75             # XXX parser must be fixed: block elements have different precedence instead of
76             # flat like this. a headline has the highest precedence and a block or a drawer
77             # cannot contain a headline (e.g. "#+BEGIN_SRC foo\n* header\n#+END_SRC" should
78             # not contain a literal "* header" text but that is a headline. currently, a
79             # block or a drawer swallows a headline.
80              
81             my $block_elems_re = # top level elements
82             qr/(?<block> $ls_re (?<block_begin_indent>[ \t]*)
83             \#\+BEGIN_(?<block_name>\w+)
84             (?:[ \t]+(?<block_raw_arg>[^\n]*))?\R
85             (?<block_content>(?:.|\R)*?)
86             \R(?<block_end_indent>[ \t]*)
87             \#\+END_\k<block_name> $le_re) |
88             (?<setting> $ls_re (?<setting_indent>[ \t]*) \#\+
89             (?<setting_name> \w+): (?: [ \t]+
90             (?<setting_raw_arg> [^\n]*))? $le_re) |
91             (?<fixedw> (?: $ls_re [ \t]* (?::[ ][^\n]* | :$) $le_re )+ ) |
92             (?<comment> $ls_re [ \t]*\#[^\n]*(?:\R\#[^\n]*)* (?:\R|\z)) |
93             (?<headline> $ls_re (?<h_bullet>\*+) [ \t]
94             (?<h_title>[^\n]*?)
95             (?:[ \t]+(?<h_tags> $tags_re))?[ \t]* $le_re) |
96             (?<li_header> $ls_re (?<li_indent>[ \t]*)
97             (?<li_bullet>[+*-]|\d+\.) [ \t]+
98             (?<li_checkbox> \[(?<li_cbstate> [ X-])\])?
99             (?: (?<li_dt> [^\n]+?) [ ]::)?) |
100             (?<table> (?: $ls_re [ \t]* \| [ \t]* \S[^\n]* $le_re)+) |
101             (?<drawer> $ls_re [ \t]* :(?<drawer_name> \w+): [ \t]*\R
102             (?<drawer_content>(?:.|\R)*?)
103             $ls_re [ \t]* :END:) |
104             (?<text> (?:[^#|:+*0-9\n-]+|\n+|.)+?)
105             #(?<text> .+?) # too dispersy
106             /msxi;
107              
108             my ($self) = @_;
109             $self->tags([]);
110 95     95   177 $self->todo_states([]);
111 95         1491 $self->done_states([]);
112 95         229 $self->priorities([]);
113 95         223 $self->properties({});
114 95         1939 $self->drawer_names([qw/CLOCK LOGBOOK PROPERTIES/]);
115 95         277 # FEEDSTATUS
116 95         1672 $self->radio_targets([]);
117             }
118 95         242  
119             my ($self) = @_;
120             if (!@{ $self->todo_states } && !@{ $self->done_states }) {
121             $self->todo_states(['TODO']);
122 93     93   180 $self->done_states(['DONE']);
123 93 50 66     133 }
  93         321  
  81         304  
124 81         577 if (!@{ $self->priorities }) {
125 81         183 $self->priorities([qw/A B C/]);
126             }
127 93 100       160 $self->children([]);
  93         314  
128 89         253 }
129              
130 93         370 my $args = shift;
131             return [] unless defined($args) && length($args);
132             #$log->tracef("args = %s", $args);
133             my @args;
134 57     57   106 while ($args =~ /$arg_re (?:\s+|\z)/xg) {
135 57 100 66     230 if (defined $+{squote}) {
136             push @args, $+{squote};
137 55         75 } elsif (defined $+{dquote}) {
138 55         781 push @args, $+{dquote};
139 24 50   24   28917 } else {
  24 50       7413  
  24         72182  
  165         786  
140 0         0 push @args, $+{bare};
141             }
142 0         0 }
143             #$log->tracef("\\\@args = %s", \@args);
144 165         1003 \@args;
145             }
146              
147             my ($args) = @_;
148 55         951 my @s;
149             for (@$args) {
150             if (/\A(?:[A-Za-z0-9_:-]+|\|)\z/) {
151             push @s, $_;
152 2     2   5 } elsif (/"/) {
153 2         3 push @s, qq('$_');
154 2         4 } else {
155 4 50       17 push @s, qq("$_");
    0          
156 4         8 }
157             }
158 0         0 join " ", @s;
159             }
160 0         0  
161             my ($self, $args) = @_;
162             $self->document($self) unless $self->document;
163 2         14  
164             if (defined $args->{from_string}) {
165              
166             # NOTE: parsing is done twice. first pass will set settings (e.g. custom
167 96     96 0 82871 # todo keywords set by #+TODO), scan for radio targets. after that we
168 96 50       1237 # scan again to build the elements tree.
169              
170 96 100       1015 $self->_init_pass1();
171             $self->_parse($args->{from_string}, 1);
172             $self->_init_pass2();
173             $self->_parse($args->{from_string}, 2);
174             }
175             }
176 95         1026  
177 95         345 # parse blocky elements: setting, blocks, headline, drawer
178 93         1638 my ($self, $str, $pass) = @_;
179 93         267 log_trace('-> _parse(%s, pass=%d)', $str, $pass);
180             my $t0 = [gettimeofday];
181              
182             my $last_el;
183              
184             my $last_headline;
185 188     188   1275 my $last_headlines = [$self]; # [$doc, $last_hl_level1, $last_hl_lvl2, ...]
186 188         1213 my $last_listitem;
187 188         2447 my $last_lists = []; # [last_List_obj_for_indent_level0, ...]
188             my $parent;
189 188         304  
190             my @text;
191             while ($str =~ /$block_elems_re/og) {
192 188         1009 $parent = $last_listitem // $last_headline // $self;
193 188         221 #$log->tracef("TMP: parent=%s (%s)", ref($parent), $parent->_str);
194 188         987 my %m = %+;
195 188         257 next unless keys %m; # perlre bug?
196             #if ($log->is_trace) {
197             # # profiler shows that this is very heavy, so commenting this out
198 188         3338 # $log->tracef("TMP: match block element: %s", \%+) if $pass==2;
199 4729   100     12456 #}
      66        
200              
201 4729         27649 if (defined $m{text}) {
202 4729 50       10437 push @text, $m{text};
203             next;
204             } else {
205             if (@text) {
206             my $text = join("", @text);
207             if ($last_el && $last_el->isa('Org::Element::ListItem')) {
208 4729 100       6854 # a list is broken by either: a) another list (where the
209 4049         5442 # bullet type or indent is different; handled in the
210 4049         21494 # handling of $m{li_header}) or b) by two blank lines, or c)
211             # by non-blank text that is indented less than or equal to
212 680 100       1336 # the last list item's indent.
213 270         871  
214 270 100 100     1074 # a single blank line does not break a list. a text that is
215             # more indented than the last list item's indent will become
216             # the child of that list item.
217              
218             my ($firstline, $restlines) = $text =~ /(.*?\r?\n)(.+)/s;
219             if ($restlines) {
220             $restlines =~ /\A([ \t]*)/;
221             my $restlineslevel = length($1);
222             my $listlevel = length($last_el->parent->indent);
223             if ($restlineslevel <= $listlevel) {
224             my $origparent = $parent;
225 85         290 # find lesser-indented list
226 85 100       180 $parent = $last_headline // $self;
227 17         47 for (my $i=$restlineslevel-1; $i>=0; $i--) {
228 17         37 if ($last_lists->[$i]) {
229 17         64 $parent = $last_lists->[$i];
230 17 100       110 last;
231 11         23 }
232             }
233 11   33     25 splice @$last_lists, $restlineslevel;
234 11         35 $self->_add_text($firstline, $origparent, $pass);
235 2 50       12 $self->_add_text($restlines, $parent, $pass);
236 2         5 goto SKIP1;
237 2         4 }
238             }
239             }
240 11         21 $self->_add_text($text, $parent, $pass);
241 11         30 SKIP1:
242 11         28 @text = ();
243 11         82 $last_el = undef;
244             }
245             }
246              
247 259         638 my $el;
248 270         626 if ($m{block} && $pass == 2) {
249              
250 270         382 require Org::Element::Block;
251             $el = Org::Element::Block->new(
252             _str=>$m{block},
253             document=>$self, parent=>$parent,
254 680         861 begin_indent=>$m{block_begin_indent},
255 680 100 100     5324 end_indent=>$m{block_end_indent},
    100 100        
    100 100        
    100 100        
    100 100        
    100 100        
    100 100        
    100          
256             name=>$m{block_name}, args=>__parse_args($m{block_raw_arg}),
257 3         591 raw_content=>$m{block_content},
258             );
259              
260             } elsif ($m{setting}) {
261              
262             require Org::Element::Setting;
263             if ($m{setting_indent} &&
264             !(uc($m{setting_name}) ~~
265 3         26 @{Org::Element::Setting->indentable_settings})) {
266             push @text, $m{setting};
267             next;
268             } else {
269 60         3575 $el = Org::Element::Setting->new(
270 60 100 100     206 pass => $pass,
271             _str=>$m{setting},
272 8         24 document=>$self, parent=>$parent,
273 6         11 indent => $m{setting_indent},
274 6         22 name=>$m{setting_name},
275             raw_arg => $m{setting_raw_arg},
276             args=>__parse_args($m{setting_raw_arg}),
277             );
278             }
279              
280             } elsif ($m{fixedw} && $pass == 2) {
281              
282             require Org::Element::FixedWidthSection;
283 54         166 $el = Org::Element::FixedWidthSection->new(
284             pass => $pass,
285             _str=>$m{fixedw},
286             document=>$self, parent=>$parent,
287             );
288              
289 7         1052 } elsif ($m{comment} && $pass == 2) {
290              
291             require Org::Element::Comment;
292             $el = Org::Element::Comment->new(
293 7         135 _str=>$m{comment},
294             document=>$self, parent=>$parent,
295             );
296              
297             } elsif ($m{table} && $pass == 2) {
298 18         4126  
299             require Org::Element::Table;
300             $el = Org::Element::Table->new(
301 18         294 pass=>$pass,
302             _str=>$m{table},
303             document=>$self, parent=>$parent,
304             );
305              
306 6         1723 } elsif ($m{drawer} && $pass == 2) {
307              
308             require Org::Element::Drawer;
309             my $raw_content = $m{drawer_content};
310 6         98 $el = Org::Element::Drawer->new(
311             document=>$self, parent=>$parent,
312             name => uc($m{drawer_name}), pass => $pass,
313             );
314             $self->_add_text($raw_content, $el, $pass);
315 15         2675  
316 15         41 # for properties, we also parse property lines from raw drawer
317             # content. this is currently separate from normal Org text parsing,
318             # i'm not clear yet on how to do this canonically.
319 15         226 $el->_parse_properties($raw_content);
320              
321 14         49 } elsif ($m{li_header} && $pass == 2) {
322              
323             require Org::Element::List;
324             require Org::Element::ListItem;
325              
326 14         41 my $level = length($m{li_indent});
327             my $bullet = $m{li_bullet};
328             my $indent = $m{li_indent};
329             my $dt = $m{li_dt};
330 90         2284 my $cbstate = $m{li_cbstate};
331 90         2023 my $type = defined($dt) ? 'D' :
332             $bullet =~ /^\d+\./ ? 'O' : 'U';
333 90         232 my $bstyle = $type eq 'O' ? '<N>.' : $bullet;
334 90         137  
335 90         113 # parent for list is the last listitem of a lesser-indented list (or
336 90         156 # last headline, or document)
337 90         119 $parent = $last_headline // $self;
338 90 100       241 for (my $i=$level-1; $i>=0; $i--) {
    100          
339             if ($last_lists->[$i]) {
340 90 100       167 $parent = $last_lists->[$i]->children->[-1];
341             last;
342             }
343             }
344 90   66     193  
345 90         195 my $list = $last_lists->[$level];
346 107 100       201 if (!$list || $list->type ne $type ||
347 27         55 $list->bullet_style ne $bstyle) {
348 27         40 $list = Org::Element::List->new(
349             document => $self, parent => $parent,
350             indent=>$indent, type=>$type, bullet_style=>$bstyle,
351             );
352 90         128 $last_lists->[$level] = $list;
353 90 100 100     444 $parent->children([]) if !$parent->children;
      100        
354             push @{ $parent->children }, $list;
355 51         835 }
356             $last_lists->[$level] = $list;
357              
358             # parent for list item is list
359 51         6973 $parent = $list;
360 51 100       181  
361 51         68 $el = Org::Element::ListItem->new(
  51         103  
362             document=>$self, parent=>$list,
363 90         134 indent=>$indent, bullet=>$bullet);
364             $el->check_state($cbstate) if $cbstate;
365             $el->desc_term($self->_add_text_container($dt, $list, $pass))
366 90         105 if defined($dt);
367              
368 90         1385 splice @$last_lists, $level+1;
369             $last_listitem = $el;
370              
371 90 100       7046 } elsif ($m{headline} && $pass == 2) {
372 90 100       181  
373             require Org::Element::Headline;
374             my $level = length $m{h_bullet};
375 90         157  
376 90         140 # parent is upper-level headline
377             $parent = undef;
378             for (my $i=$level-1; $i>=0; $i--) {
379             $parent = $last_headlines->[$i] and last;
380 171         7443 }
381 171         372 $parent //= $self;
382              
383             $el = Org::Element::Headline->new(
384 171         275 _str=>$m{headline},
385 171         414 document=>$self, parent=>$parent,
386 181 100       412 level=>$level,
387             );
388 171   33     304 $el->tags(__split_tags($m{h_tags})) if ($m{h_tags});
389             my $title = $m{h_title};
390              
391             # recognize todo keyword
392 171         2863 my $todo_kw_re = "(?:".
393             join("|", map {quotemeta}
394             "COMMENT",
395 171 100       29751 @{$self->todo_states}, @{$self->done_states}) . ")";
396 171         253 if ($title =~ s/^($todo_kw_re)(\s+|\W)/$2/) {
397             my $state = $1;
398             $title =~ s/^\s+//;
399             $el->is_todo(1);
400 595         1224 $el->todo_state($state);
401             $el->is_done($state ~~ @{ $self->done_states } ? 1:0);
402 171         236 }
  171         385  
  171         351  
403 171 100       1516  
404 38         85 # recognize priority cookie
405 38         121 my $prio_re = "(?:".
406 38         100 join("|", map {quotemeta} @{$self->priorities}) . ")";
407 38         75 if ($title =~ s/\[#($prio_re)\]\s*//) {
408 38 100       43 $el->priority($1);
  38         183  
409             }
410              
411             # recognize statistics cookie
412             if ($title =~ s!\[(\d+%|\d+/\d+)\]\s*!!o) {
413 171         274 $el->statistics_cookie($1);
  517         936  
  171         363  
414 171 100       801 }
415 4         14  
416             $el->title($self->_add_text_container($title, $parent, $pass));
417              
418             $last_headlines->[$el->level] = $el;
419 171 100       467 splice @$last_headlines, $el->level+1;
420 6         17 $last_headline = $el;
421             $last_listitem = undef;
422             $last_lists = [];
423 171         380 }
424              
425 171         401 # we haven't caught other matches to become element
426 171         351 die "BUG1: no element" unless $el || $pass != 2;
427 171         217  
428 171         208 $parent->children([]) if !$parent->children;
429 171         368 push @{ $parent->children }, $el;
430             $last_el = $el;
431             }
432              
433 670 50 66     30363 # remaining text
434             if (@text) {
435 670 100       1647 $self->_add_text(join("", @text), $parent, $pass);
436 670         1098 }
  670         1165  
437 670         6077 @text = ();
438              
439             log_trace('<- _parse(), elapsed time=%.3fs',
440             tv_interval($t0, [gettimeofday]));
441 184 100       452 }
442 79         321  
443             require Org::Element::Text;
444 181         359 my ($self, $str, $parent, $pass) = @_;
445             my $container = Org::Element::Text->new(
446 181         956 document=>$self, parent=>$parent,
447             text=>'', style=>'',
448             );
449             $self->_add_text($str, $container, $pass);
450             $container = $container->children->[0] if
451 185     185   1806 $container->children && @{$container->children} == 1 &&
452 185         405 $container->children->[0]->isa('Org::Element::Text');
453 185         2773 $container;
454             }
455              
456             require Org::Element::Text;
457 185         14699 my ($self, $str, $parent, $pass) = @_;
458             $parent //= $self;
459 185 100 100     505 #$log->tracef("-> _add_text(%s, pass=%d)", $str, $pass);
  183   66     1084  
460              
461 185         537 my @plain_text;
462             while ($str =~ /$text_re/og) {
463             my %m = %+;
464             #if ($log->is_trace) {
465 591     591   10466 # # profiler shows that this is very heavy, so commenting this out
466 591         1163 # $log->tracef("TMP: match text: %s", \%+);
467 591   33     1150 #}
468             my $el;
469              
470 591         686 if (defined $m{plain_text} && $pass == 2) {
471 591         3241 push @plain_text, $m{plain_text};
472 1976         12358 next;
473             } else {
474             if (@plain_text) {
475             $self->_add_plain_text(join("", @plain_text), $parent, $pass);
476             @plain_text = ();
477 1976         3453 }
478             }
479 1976 100 100     5189  
480 1012         1617 if ($m{link} && $pass == 2) {
481 1012         3780 require Org::Element::Link;
482             $el = Org::Element::Link->new(
483 964 100       1597 document => $self, parent => $parent,
484 117         450 link=>$m{link_link},
485 117         206 );
486             if (defined($m{link_desc}) && length($m{link_desc})) {
487             $el->description(
488             $self->_add_text_container($m{link_desc},
489 964 100 100     8272 $el, $pass));
    100 33        
    50 100        
    100 100        
    100 100        
    100 100        
    100 100        
    100 100        
    100 100        
    100 100        
    100 100        
    100          
490 6         1120 }
491             } elsif ($m{radio_target}) {
492             require Org::Element::RadioTarget;
493             $el = Org::Element::RadioTarget->new(
494 6         85 pass => $pass,
495 6 100 66     4234 document => $self, parent => $parent,
496             target=>$m{rt_target},
497             );
498 3         9 } elsif ($m{target} && $pass == 2) {
499             require Org::Element::Target;
500             $el = Org::Element::Target->new(
501 4         392 document => $self, parent => $parent,
502             target=>$m{t_target},
503             );
504             } elsif ($m{fn_num} && $pass == 2) {
505             require Org::Element::Footnote;
506 4         59 $el = Org::Element::Footnote->new(
507             document => $self, parent => $parent,
508 0         0 name=>$m{fn_num_num}, is_ref=>1,
509             );
510             } elsif ($m{fn_namedef} && $pass == 2) {
511             require Org::Element::Footnote;
512 0         0 $el = Org::Element::Footnote->new(
513             document => $self, parent => $parent,
514 1         372 name=>$m{fn_namedef_name},
515             is_ref=>$m{fn_namedef_def} ? 0:1,
516             );
517 1         6 $el->def($self->_add_text_container($m{fn_namedef_def},
518             $parent, $pass));
519             } elsif ($m{fn_nameidef} && $pass == 2) {
520 1         4 require Org::Element::Footnote;
521             $el = Org::Element::Footnote->new(
522             document => $self, parent => $parent,
523             name=>$m{fn_nameidef_name},
524 1 50       17 is_ref=>($m{fn_nameidef_def} ? 0:1) ||
525             !length($m{fn_nameidef_name}),
526             );
527 1         6 $el->def(length($m{fn_nameidef_def}) ?
528             $self->_add_text_container($m{fn_nameidef_def},
529 3         11 $parent, $pass) : undef);
530             } elsif ($m{trange} && $pass == 2) {
531             require Org::Element::TimeRange;
532             require Org::Element::Timestamp;
533             $el = Org::Element::TimeRange->new(
534 3   100     52 document => $self, parent => $parent,
535             );
536             my $opts = {allow_event_duration=>0, allow_repeater=>0};
537             $el->ts1(Org::Element::Timestamp->new(
538 3 100       21 _str=>$m{trange_ts1}, document=>$self, parent=>$parent));
539             $el->ts1->_parse_timestamp($m{trange_ts1}, $opts);
540 3         9 $el->ts2(Org::Element::Timestamp->new(
541 3         7 _str=>$m{trange_ts2}, document=>$self, parent=>$parent));
542 3         49 $el->ts2->_parse_timestamp($m{trange_ts2}, $opts);
543             $el->children([$el->ts1, $el->ts2]);
544             } elsif ($m{tstamp} && $pass == 2) {
545 3         58 require Org::Element::Timestamp;
546             $el = Org::Element::Timestamp->new(
547 3         48 _str => $m{tstamp}, document => $self, parent => $parent,
548 3         70 );
549             $el->_parse_timestamp($m{tstamp});
550 3         58 } elsif ($m{act_trange} && $pass == 2) {
551 3         68 require Org::Element::TimeRange;
552 3         28 require Org::Element::Timestamp;
553             $el = Org::Element::TimeRange->new(
554 26         906 document => $self, parent => $parent,
555             );
556 26         425 my $opts = {allow_event_duration=>0, allow_repeater=>0};
557             $el->ts1(Org::Element::Timestamp->new(
558 26         5988 _str=>$m{act_trange_ts1}, document=>$self, parent=>$parent));
559             $el->ts1->_parse_timestamp($m{act_trange_ts1}, $opts);
560 5         393 $el->ts2(Org::Element::Timestamp->new(
561 5         402 _str=>$m{act_trange_ts2}, document=>$self, parent=>$parent));
562 5         81 $el->ts2->_parse_timestamp($m{act_trange_ts2}, $opts);
563             $el->children([$el->ts1, $el->ts2]);
564             } elsif ($m{act_tstamp} && $pass == 2) {
565 5         1355 require Org::Element::Timestamp;
566             $el = Org::Element::Timestamp->new(
567 5         77 _str => $m{act_tstamp}, document => $self, parent => $parent,
568 5         1819 );
569             $el->_parse_timestamp($m{act_tstamp});
570 3         75 } elsif ($m{markup_start} && $pass == 2) {
571 3         84 require Org::Element::Text;
572 3         34 $el = Org::Element::Text->new(
573             document => $self, parent => $parent,
574 29         3411 style=>'', text=>$m{markup_start},
575             );
576 29         473 # temporary mark, we need to apply markup later
577             $el->{_mu_start}++;
578 29         13164 } elsif ($m{markup_end} && $pass == 2) {
579             require Org::Element::Text;
580 24         77 $el = Org::Element::Text->new(
581             document => $self, parent => $parent,
582             style=>'', text=>$m{markup_end},
583             );
584 24         347 # temporary mark, we need to apply markup later
585             $el->{_mu_end}++;
586 24         1650 }
587             die "BUG2: no element" unless $el || $pass != 2;
588 48         166 $parent->children([]) if !$parent->children;
589             push @{ $parent->children }, $el;
590             }
591              
592 48         745 # remaining text
593             if (@plain_text && $pass == 2) {
594 48         775 $parent->children([]) if !$parent->children;
595             push @{$parent->children}, Org::Element::Text->new(
596 961 50 66     2654 text => join("", @plain_text), style=>'',
597 961 100       1995 document=>$self, parent=>$parent);
598 961         1027 @plain_text = ();
  961         5534  
599             }
600              
601             if ($pass == 2) {
602 588 100 66     1589 $self->_apply_markup($parent);
603 394 100       1295 if (@{$self->radio_targets}) {
604 394         496 my $re = join "|", map {quotemeta} @{$self->radio_targets};
  394         6848  
605             $re = qr/(?:$re)/i;
606             $self->_linkify_rt_recursive($re, $parent);
607 394         16022 }
608             my $c = $parent->children // [];
609             }
610 588 100       1371  
611 413         1051 #$log->tracef('<- _add_text()');
612 413 100       477 }
  413         1035  
613 1         2  
  2         7  
  1         3  
614 1         20 # to keep parser's regexes simple and fast, we detect markup in regex rather
615 1         4 # simplistically (as text element) and then apply some more filtering & applying
616             # logic here
617 413   100     1145  
618             #$log->trace("-> _apply_markup()");
619             my ($self, $parent) = @_;
620             my $last_index = 0;
621             my $c = $parent->children or return;
622              
623             while (1) {
624             #$log->tracef("text cluster = %s", [map {$_->as_string} @$c]);
625             # find a new mu_start
626             my $mu_start_index = -1;
627             my $mu;
628             for (my $i = $last_index; $i < @$c; $i++) {
629 413     413   646 next unless $c->[$i]->{_mu_start};
630 413         497 $mu_start_index = $i; $mu = $c->[$i]->text;
631 413 100       1004 #$log->tracef("found mu_start at %d (%s)", $i, $mu);
632             last;
633 407         490 }
634             unless ($mu_start_index >= 0) {
635             #$log->trace("no more mu_start found");
636 429         476 last;
637 429         472 }
638 429         858  
639 831 100       1903 # check whether this is a valid markup (has text, has markup end, not
640 22         27 # interspersed with non-text, no more > 1 newlines)
  22         40  
641             my $mu_end_index = 0;
642 22         29 my $newlines = 0;
643             my $has_text;
644 429 100       769 my $has_unmarkable;
645             for (my $i=$mu_start_index+1; $i < @$c; $i++) {
646 407         515 if ($c->[$i]->isa('Org::Element::Text')) {
647             $has_text++;
648             } elsif (1) {
649             } else {
650             $has_unmarkable++; last;
651 22         23 }
652 22         23 if ($c->[$i]->{_mu_end} && $c->[$i]->text eq $mu) {
653 22         25 #$log->tracef("found mu_end at %d", $i);
654             $mu_end_index = $i; last;
655 22         46 }
656 60 100       125 my $text = $c->[$i]->as_string;
657 59         73 $newlines++ while $text =~ /\R/g;
658             last if $newlines > 1;
659             }
660             my $valid = $has_text && !$has_unmarkable
661             && $mu_end_index && $newlines <= 1;
662 60 100 100     138 #$log->tracef("mu candidate: start=%d, end=%s, ".
663             # "has_text=%s, has_unmarkable=%s, newlines=%d, valid=%s",
664 13         18 # $mu_start_index, $mu_end_index,
  13         18  
665             # $has_text, $has_unmarkable, $newlines, $valid
666 47         79 # );
667 47         121 if ($valid) {
668 47 100       95 no warnings 'once';
669             my $mu_el = Org::Element::Text->new(
670 22   66     100 document => $self, parent => $parent,
671             style=>$Org::Element::Text::mu2style{$mu}, text=>'',
672             );
673             my @c2 = splice @$c, $mu_start_index,
674             $mu_end_index-$mu_start_index+1, $mu_el;
675             #$log->tracef("grouping %s", [map {$_->text} @c2]);
676             $mu_el->children(\@c2);
677 22 100       41 shift @c2;
678 24     24   234 pop @c2;
  24         47  
  24         22361  
679             for (@c2) {
680             $_->{parent} = $mu_el;
681 13         216 }
682             $self->_merge_text_elements(\@c2);
683 13         216 # squish if only one child
684             if (@c2 == 1) {
685             $mu_el->text($c2[0]->text);
686 13         32 $mu_el->children(undef);
687 13         16 }
688 13         37 } else {
689 13         28 undef $c->[$mu_start_index]->{_mu_start};
690 22         33 $last_index++;
691             }
692 13         27 }
693             $self->_merge_text_elements($c);
694 13 100       34 #$log->trace("<- _apply_markup()");
695 12         25 }
696 12         32  
697             my ($self, $els) = @_;
698             #$log->tracef("-> _merge_text_elements(%s)", [map {$_->as_string} @$els]);
699 9         15 return unless @$els >= 2;
700 9         11 my $i=-1;
701             while (1) {
702             $i++;
703 407         769 last if $i >= @$els;
704             next if $els->[$i]->children || !$els->[$i]->isa('Org::Element::Text');
705             my $istyle = $els->[$i]->style // "";
706             while (1) {
707             last if $i+1 >= @$els || $els->[$i+1]->children ||
708 420     420   595 !$els->[$i+1]->isa('Org::Element::Text');
709             last if ($els->[$i+1]->style // "") ne $istyle;
710 420 100       852 #$log->tracef("merging text[%d] '%s' with '%s'",
711 110         157 # $i, $els->[$i]->text, $els->[$i+1]->text);
712 110         132 $els->[$i]->{text} .= $els->[$i+1]->{text} // "";
713 462         507 splice @$els, $i+1, 1;
714 462 100       790 }
715 352 100 100     1489 }
716 215   50     525 #$log->tracef("merge result = %s", [map {$_->as_string} @$els]);
717 215         232 #$log->trace("<- _merge_text_elements()");
718 298 100 100     1441 }
      100        
719              
720 104 100 50     255 require Org::Element::Text;
721             require Org::Element::Link;
722             my ($self, $re, $parent) = @_;
723 83   50     199 my $c = $parent->children;
724 83         206 return unless $c;
725             for (my $i=0; $i<@$c; $i++) {
726             my $el = $c->[$i];
727             if ($el->isa('Org::Element::Text')) {
728             my @split0 = split /\b($re)\b/, $el->text;
729             next unless @split0 > 1;
730             my @split;
731             for my $s (@split0) {
732 8     8   25 if ($s =~ /^$re$/) {
733 8         19 push @split, Org::Element::Link->new(
734 8         15 document=>$self, parent=>$parent,
735 8         13 link=>$s, description=>undef,
736 8 100       26 from_radio_target=>1,
737 1         14 );
738 12         17 } elsif (length $s) {
739 12 100       26 push @split, Org::Element::Text->new(
740 7         61 document=>$self, parent=>$parent,
741 7 100       24 text=>$s, style=>$el->style,
742 2         3 );
743 2         5 }
744 8 100       130 }
    100          
745 3         50 splice @$c, $i, 1, @split;
746             }
747             $self->_linkify_rt_recursive($re, $el);
748             }
749             }
750              
751 4         67 require Org::Element::Text;
752             my ($self, $str, $parent, $pass) = @_;
753             my $el = Org::Element::Text->new(
754             document=>$self, parent=>$parent, style=>'', text=>$str);
755             $parent->children([]) if !$parent->children;
756             push @{ $parent->children }, $el;
757 2         35 }
758              
759 7         34 [$_[0] =~ /:([^:]+)/g];
760             }
761              
762             require Module::List;
763             require Module::Load;
764 117     117   407  
765 117         247 my $mm = Module::List::list_modules("Org::Element::", {list_modules=>1});
766 117         1905 for (keys %$mm) {
767             Module::Load::load($_);
768 117 100       10534 }
769 117         191 }
  117         268  
770              
771             my ($self, $p1, $p2) = @_;
772              
773 7     7   56 my $pp = $self->priorities;
774             my $pos1 = firstidx {$_ eq $p1} @$pp;
775             return undef unless $pos1 >= 0; ## no critic: Subroutines::ProhibitExplicitReturnUndef
776             my $pos2 = firstidx {$_ eq $p2} @$pp;
777 2     2 1 902 return undef unless $pos2 >= 0; ## no critic: Subroutines::ProhibitExplicitReturnUndef
778 2         27578 $pos1 <=> $pos2;
779             }
780 2         1899  
781 2         2503 1;
782 38         765 # ABSTRACT: Represent an Org document
783              
784              
785             =pod
786              
787 11     11 1 58 =encoding UTF-8
788              
789 11         24 =head1 NAME
790 11     20   49  
  20         34  
791 11 100       37 Org::Document - Represent an Org document
792 9     17   24  
  17         22  
793 9 100       25 =head1 VERSION
794 7         27  
795             This document describes version 0.558 of Org::Document (from Perl distribution Org-Parser), released on 2022-06-23.
796              
797             =head1 SYNOPSIS
798              
799             use Org::Document;
800              
801             # create a new Org document tree from string
802             my $org = Org::Document->new(from_string => <<EOF);
803             * heading 1a
804             some text
805             ** heading 2
806             * heading 1b
807             EOF
808              
809             =head1 DESCRIPTION
810              
811             Derived from L<Org::Element>.
812              
813             =head1 ATTRIBUTES
814              
815             =head2 tags => ARRAY
816              
817             List of tags for this file, usually set via #+FILETAGS.
818              
819             =head2 todo_states => ARRAY
820              
821             List of known (action-requiring) todo states. Default is ['TODO'].
822              
823             =head2 done_states => ARRAY
824              
825             List of known done (non-action-requiring) states. Default is ['DONE'].
826              
827             =head2 priorities => ARRAY
828              
829             List of known priorities. Default is ['A', 'B', 'C'].
830              
831             =head2 drawer_names => ARRAY
832              
833             List of known drawer names. Default is [qw/CLOCK LOGBOOK PROPERTIES/].
834              
835             =head2 properties => ARRAY
836              
837             File-wide properties.
838              
839             =head2 radio_targets => ARRAY
840              
841             List of radio target text.
842              
843             =head2 time_zone => ARRAY
844              
845             If set, will be passed to DateTime->new() (e.g. by L<Org::Element::Timestamp>).
846              
847             =head2 ignore_unknown_settings => bool
848              
849             If set to true, unknown settings will not cause a parse failure.
850              
851             =head1 METHODS
852              
853             =for Pod::Coverage BUILD
854              
855             =head2 new
856              
857             Usage:
858              
859             $doc = Org::Document->new(%args);
860              
861             Create document object. If C<from_string> argument is specified, will parse
862             the string. Otherwise, will create an empty document object. Arguments:
863              
864             =over
865              
866             =item * from_string
867              
868             String. String to parse into document object tree content.
869              
870             =back
871              
872             =head2 load_element_modules()
873              
874             Load all Org::Element::* modules. This is useful when wanting to work with
875             element objects retrieved from serialization, where the element modules have not
876             been loaded.
877              
878             =head2 cmp_priorities($p1, $p2) => -1|0|-1
879              
880             Compare two priorities C<$p1> and C<$p2>. Return result like Perl's C<cmp>: 0 if
881             the two are the same, -1 if C<$p1> is of I<higher> priority (since it's more to
882             the left position in priority list, which is sorted highest-first) than C<$p2>,
883             and 1 if C<$p2> is of I<lower> priority than C<$p1>.
884              
885             If either C<$p1> or C<$p2> has unknown priority, will return undef.
886              
887             Examples:
888              
889             $doc->cmp_priorities('A', 'A') # -> 0
890             $doc->cmp_priorities('A', 'B') # -> -1 (A is higher than B)
891             $doc->cmp_priorities('C', 'B') # -> 1 (C is lower than B)
892             $doc->cmp_priorities('X', 'A') # -> undef (X is unknown)
893              
894             Note that X could be known if there is a C<#+PRIORITIES> setting which defines
895             it.
896              
897             =head1 HOMEPAGE
898              
899             Please visit the project's homepage at L<https://metacpan.org/release/Org-Parser>.
900              
901             =head1 SOURCE
902              
903             Source repository is at L<https://github.com/perlancar/perl-Org-Parser>.
904              
905             =head1 AUTHOR
906              
907             perlancar <perlancar@cpan.org>
908              
909             =head1 CONTRIBUTING
910              
911              
912             To contribute, you can send patches by email/via RT, or send pull requests on
913             GitHub.
914              
915             Most of the time, you don't need to build the distribution yourself. You can
916             simply modify the code, then test via:
917              
918             % prove -l
919              
920             If you want to build the distribution (e.g. to try to install it locally on your
921             system), you can install L<Dist::Zilla>,
922             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
923             Dist::Zilla plugin and/or Pod::Weaver::Plugin. Any additional steps required
924             beyond that are considered a bug and can be reported to me.
925              
926             =head1 COPYRIGHT AND LICENSE
927              
928             This software is copyright (c) 2022, 2021, 2020, 2019, 2017, 2016, 2015, 2014, 2013, 2012, 2011 by perlancar <perlancar@cpan.org>.
929              
930             This is free software; you can redistribute it and/or modify it under
931             the same terms as the Perl 5 programming language system itself.
932              
933             =head1 BUGS
934              
935             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=Org-Parser>
936              
937             When submitting a bug or request, please include a test-file or a
938             patch to an existing test-file that illustrates the bug or desired
939             feature.
940              
941             =cut