File Coverage

blib/lib/Org/Parser.pm
Criterion Covered Total %
statement 56 74 75.6
branch 23 34 67.6
condition 20 30 66.6
subroutine 6 6 100.0
pod 2 2 100.0
total 107 146 73.2


line stmt bran cond sub pod time code
1             package Org::Parser;
2              
3 23     23   1357386 use 5.014; # compilation failure in older perls, RT#141560
  23         248  
4 23     23   13339 use Moo;
  23         315742  
  23         130  
5              
6 23     23   47147 use Org::Document;
  23         91  
  23         1128  
7 23     23   172 use Scalar::Util qw(blessed);
  23         50  
  23         18240  
8              
9             our $AUTHORITY = 'cpan:PERLANCAR'; # AUTHORITY
10             our $DATE = '2023-08-05'; # DATE
11             our $DIST = 'Org-Parser'; # DIST
12             our $VERSION = '0.560'; # VERSION
13              
14             sub parse {
15 96     96 1 508493 my ($self, $arg, $opts) = @_;
16 96 100       399 die "Please specify a defined argument to parse()\n" unless defined($arg);
17              
18 94   100     582 $opts //= {};
19              
20 94         155 my $str;
21 94         230 my $srclabel = $opts->{_srclabel};
22 94         179 my $r = ref($arg);
23 94 100 33     287 if (!$r) {
    100 66        
    100          
    100          
24 90         214 $str = $arg;
25 90   100     366 $srclabel //= "string";
26             } elsif ($r eq 'ARRAY') {
27 1         6 $str = join "", @$arg;
28 1   50     6 $srclabel //= "arrayref";
29             } elsif ($r eq 'GLOB' || blessed($arg) && $arg->isa('IO::Handle')) {
30 1         30 $str = join "", <$arg>;
31 1   50     34 $srclabel //= "filehandle";
32             } elsif ($r eq 'CODE') {
33 1         2 my @chunks;
34 1         5 while (defined(my $chunk = $arg->())) {
35 4         23 push @chunks, $chunk;
36             }
37 1         7 $str = join "", @chunks;
38 1   50     5 $srclabel //= "code";
39             } else {
40 1         10 die "Invalid argument, please supply a ".
41             "string|arrayref|coderef|filehandle\n";
42             }
43             Org::Document->new(
44             _srclabel=>$srclabel,
45             from_string=>$str,
46             time_zone=>$opts->{time_zone},
47             ignore_unknown_settings=>$opts->{ignore_unknown_settings},
48 93         2387 );
49             }
50              
51             sub parse_file {
52 13     13 1 4054962 require File::Slurper;
53 13         57469 my ($self, $filename, $opts) = @_;
54 13 100       76 $opts = {%$opts} if $opts; # shallow copy
55 13   100     80 $opts //= {};
56              
57 13         23 state $loaded;
58              
59 13         56 my $content = File::Slurper::read_text($filename);
60 13         2049 $opts->{_srclabel} = "file:$filename";
61              
62 13         41 my $cf = $opts->{cache_file}; # old option, new option is 'cache' (automatic setting of cache file)
63 13         44 my $doc;
64             my $cache; # undef = no caching; 0 = not cached, should cache; 1 = cached
65 13 50 33     124 if (!$cf && ($opts->{cache} // $ENV{PERL_ORG_PARSER_CACHE})) {
      66        
66 0         0 require Cwd;
67 0         0 require Digest::MD5;
68 0         0 my @dirs = ("$ENV{HOME}/.cache/perl-org-parser", $ENV{HOME});
69 0         0 my $dir;
70 0         0 for (@dirs) {
71 0 0       0 if (-d $_) { $dir = $_; last }
  0 0       0  
  0         0  
72 0         0 elsif (mkdir $_) { $dir = $_; last }
  0         0  
73             }
74 0 0       0 die "Can't find a suitable cache directory" unless $dir;
75 0 0       0 my $abs = Cwd::abs_path($filename) or die "Can't find $filename";
76 0         0 my $base = $abs; $base =~ s!.+/!!;
  0         0  
77 0         0 $cf = "$dir/$base.".Digest::MD5::md5_hex($abs).".storable";
78             }
79 13 100       45 if ($cf) {
80 3         891 require Storable;
81 3   66     4646 $cache = !!((-e $cf) && (-M $cf) <= (-M $filename));
82 3 100       26 if ($cache) {
83 1         3 eval {
84 1         6 $doc = Storable::retrieve($cf);
85 1 50       146 $doc->load_element_modules unless $loaded++;
86 1         22 $doc->{_srclabel} = " (from cached file:$cf)";
87             };
88 1 50       6 if ($@) {
89 0         0 warn "Failed retrieving document from cache: $@, reparsing ...";
90 0         0 $cache = 0;
91             }
92             }
93             }
94              
95 13 100       100 $doc = $self->parse($content, $opts) unless $cache;
96 12 100 100     351 if (defined($cache) && !$cache) {
97 1         6 require Storable;
98 1         12 for ($doc->find('Timestamp')) {
99 0         0 $_->clear_parse_result;
100             }
101 1         6 Storable::store($doc, $cf);
102             }
103              
104 12         374 $doc;
105             }
106              
107             1;
108             # ABSTRACT: Parse Org documents
109              
110             __END__
111              
112             =pod
113              
114             =encoding UTF-8
115              
116             =head1 NAME
117              
118             Org::Parser - Parse Org documents
119              
120             =head1 VERSION
121              
122             This document describes version 0.560 of Org::Parser (from Perl distribution Org-Parser), released on 2023-08-05.
123              
124             =head1 SYNOPSIS
125              
126             use 5.010;
127             use Org::Parser;
128             my $orgp = Org::Parser->new();
129              
130             # parse a file
131             my $doc = $orgp->parse_file("$ENV{HOME}/todo.org");
132              
133             # parse a string
134             $doc = $orgp->parse(<<EOF);
135             #+TODO: TODO | DONE CANCELLED
136             <<<radio target>>>
137             * heading1a
138             ** TODO heading2a
139             SCHEDULED: <2011-03-31 Thu>
140             [[some][link]]
141             ** DONE heading2b
142             [2011-03-18 ]
143             this will become a link: radio target
144             * TODO heading1b *bold*
145             - some
146             - plain
147             - list
148             - [ ] with /checkbox/
149             * and
150             * sublist
151             * CANCELLED heading1c
152             + definition :: list
153             + another :: def
154             EOF
155              
156             # walk the document tree
157             $doc->walk(sub {
158             my ($el) = @_;
159             return unless $el->isa('Org::Element::Headline');
160             say "heading level ", $el->level, ": ", $el->title->as_string;
161             });
162              
163             will print something like:
164              
165             heading level 1: heading1a
166             heading level 2: heading2a
167             heading level 2: heading2b *bold*
168             heading level 1: heading1b
169             heading level 1: heading1c
170              
171             A command-line utility (in a separate distribution: L<App::OrgUtils>) is
172             available for debugging:
173              
174             % dump-org-structure ~/todo.org
175             Document:
176             Setting: "#+TODO: TODO | DONE CANCELLED\n"
177             RadioTarget: "<<<radio target>>>"
178             Text: "\n"
179             Headline: l=1
180             (title)
181             Text: "heading1a"
182             (children)
183             Headline: l=2 todo=TODO
184             (title)
185             Text: "heading2a"
186             (children)
187             Text: "SCHEDULED: "
188             ...
189              
190             =head1 DESCRIPTION
191              
192             This module parses Org documents. See http://orgmode.org/ for more details on
193             Org documents.
194              
195             See C<todo.org> in the distribution for the list of already- and not yet
196             implemented stuffs.
197              
198             =head1 ATTRIBUTES
199              
200             =head1 METHODS
201              
202             =head2 new()
203              
204             Create a new parser instance.
205              
206             =head2 $orgp->parse($str | $arrayref | $coderef | $filehandle, \%opts) => $doc
207              
208             Parse document (which can be contained in a scalar $str, an arrayref of lines
209             $arrayref, a subroutine which will be called for chunks until it returns undef,
210             or a filehandle).
211              
212             Returns L<Org::Document> object.
213              
214             If 'handler' attribute is specified, will call handler repeatedly during
215             parsing. See the 'handler' attribute for more details.
216              
217             Will die if there are syntax errors in documents.
218              
219             Known options:
220              
221             =over 4
222              
223             =item * time_zone => STR
224              
225             Will be passed to Org::Document's constructor.
226              
227             =back
228              
229             =head2 $orgp->parse_file($filename, \%opts) => $doc
230              
231             Just like parse(), but will load document from file instead.
232              
233             Known options (aside from those known by parse()):
234              
235             =over 4
236              
237             =item * cache => bool (default: from PERL_ORG_PARSER_CACHE, or 0)
238              
239             Since Org::Parser can spend some time to parse largish Org files, this is an
240             option to store the parse result (using L<Storable>). If caching is turned on,
241             then after the first parse, the result will be stored in:
242              
243             ~/.cache/perl-org-parser/<filename>.<md5-digest-of-file-absolute-path>.storable
244              
245             and subsequent calls to this function can directly use this cache, as long as
246             the cache is not stale.
247              
248             =back
249              
250             =head1 FAQ
251              
252             =head2 Why? Just as only perl can parse Perl, only org-mode can parse Org anyway!
253              
254             True. I'm only targetting good enough. As long as I can parse/process all my Org
255             notes and todo files, I have no complaints.
256              
257             =head2 It's too slow!
258              
259             Parser is completely regex-based at the moment (I plan to use L<Marpa> someday).
260             Performance is quite lousy but I'm not annoyed enough at the moment to overhaul
261             it.
262              
263             =head1 ENVIRONMENT
264              
265             =head2 PERL_ORG_PARSER_CACHE => bool
266              
267             Set default for C<cache> option in C<parse_file()>.
268              
269             =head1 HOMEPAGE
270              
271             Please visit the project's homepage at L<https://metacpan.org/release/Org-Parser>.
272              
273             =head1 SOURCE
274              
275             Source repository is at L<https://github.com/perlancar/perl-Org-Parser>.
276              
277             =head1 SEE ALSO
278              
279             L<Org::Document>
280              
281             =head1 AUTHOR
282              
283             perlancar <perlancar@cpan.org>
284              
285             =head1 CONTRIBUTORS
286              
287             =for stopwords Alex White Karl Williamson Steven Haryanto Tekki Trent Fisher Wong Meng Weng
288              
289             =over 4
290              
291             =item *
292              
293             Alex White <VVu@geekfarm.org>
294              
295             =item *
296              
297             Karl Williamson <khw@cpan.org>
298              
299             =item *
300              
301             Steven Haryanto <stevenharyanto@gmail.com>
302              
303             =item *
304              
305             Tekki <tekki@tekki.ch>
306              
307             =item *
308              
309             Trent Fisher <trent@cs.pdx.edu>
310              
311             =item *
312              
313             Wong Meng Weng <mengwong@pobox.com>
314              
315             =back
316              
317             =head1 CONTRIBUTING
318              
319              
320             To contribute, you can send patches by email/via RT, or send pull requests on
321             GitHub.
322              
323             Most of the time, you don't need to build the distribution yourself. You can
324             simply modify the code, then test via:
325              
326             % prove -l
327              
328             If you want to build the distribution (e.g. to try to install it locally on your
329             system), you can install L<Dist::Zilla>,
330             L<Dist::Zilla::PluginBundle::Author::PERLANCAR>,
331             L<Pod::Weaver::PluginBundle::Author::PERLANCAR>, and sometimes one or two other
332             Dist::Zilla- and/or Pod::Weaver plugins. Any additional steps required beyond
333             that are considered a bug and can be reported to me.
334              
335             =head1 COPYRIGHT AND LICENSE
336              
337             This software is copyright (c) 2023, 2022, 2021, 2020, 2019, 2017, 2016, 2015, 2014, 2013, 2012, 2011 by perlancar <perlancar@cpan.org>.
338              
339             This is free software; you can redistribute it and/or modify it under
340             the same terms as the Perl 5 programming language system itself.
341              
342             =head1 BUGS
343              
344             Please report any bugs or feature requests on the bugtracker website L<https://rt.cpan.org/Public/Dist/Display.html?Name=Org-Parser>
345              
346             When submitting a bug or request, please include a test-file or a
347             patch to an existing test-file that illustrates the bug or desired
348             feature.
349              
350             =cut