File Coverage

blib/lib/Mojito/Page/Parse.pm
Criterion Covered Total %
statement 56 62 90.3
branch 9 12 75.0
condition n/a
subroutine 11 11 100.0
pod 5 5 100.0
total 81 90 90.0


line stmt bran cond sub pod time code
1 5     5   519602 use strictures 1;
  5         41  
  5         137  
2             package Mojito::Page::Parse;
3             {
4             $Mojito::Page::Parse::VERSION = '0.24';
5             }
6 5     5   578 use 5.010;
  5         15  
  5         285  
7 5     5   3232 use Moo;
  5         54119  
  5         34  
8 5     5   11135 use MooX::Types::MooseLike::Base qw(:all);
  5         54977  
  5         2422  
9              
10 5     5   4804 use Data::Dumper::Concise;
  5         60611  
  5         6368  
11              
12             =head1 Name
13              
14             Mojito::Page::Parse - turn page source into a page structure
15              
16             =cut
17              
18             # This is the page source
19             has 'page' => (
20             is => 'rw',
21             isa => Value,
22             );
23             has 'sections' => (
24             is => 'ro',
25             isa => ArrayRef[HashRef],
26             lazy => 1,
27             builder => 'build_sections',
28             );
29             has 'page_structure' => (
30             is => 'rw',
31             isa => HashRef,
32             lazy => 1,
33             builder => 'build_page_structure',
34             );
35             has 'default_format' => (
36             is => 'rw',
37             isa => Value,
38             lazy => 1,
39             default => sub { 'HTML' },
40             );
41             has 'created' => (
42             is => 'ro',
43             isa => Int,
44             );
45             has 'last_modified' => (
46             is => 'ro',
47             isa => Int,
48             default => sub { time() },
49             );
50             has 'section_open_regex' => (
51             is => 'ro',
52             isa => RegexpRef,
53             default => sub { qr/<sx\.[^>]+>/ },
54             );
55             has 'section_close_regex' => (
56             is => 'ro',
57             isa => RegexpRef,
58             default => sub { qr(</sx>) },
59             );
60             has 'debug' => (
61             is => 'rw',
62             isa => Bool,
63             default => sub { 0 },
64             );
65             has 'messages' => (
66             is => 'rw',
67             isa => ArrayRef,
68             default => sub { [] },
69             );
70             has 'message_string' => (
71             is => 'ro',
72             isa => Value,
73             lazy => 1,
74             builder => '_build_message_string',
75             );
76             sub _build_message_string {
77 2     2   2570 my ($self) = (shift);
78 2 50       33 return join ', ', @{$self->messages} if $self->messages;
  2         667  
79 0         0 return;
80             }
81              
82             =head2 has_nested_section
83              
84             Test if we have nested sections.
85              
86             =cut
87              
88             sub has_nested_section {
89 5     5 1 2089 my ($self) = @_;
90              
91 5         31 my $section_open_regex = $self->section_open_regex;
92              
93             #die "Got no page" if !$self->page;
94 5         74 my @stuff_between_section_opens =
95             $self->page =~ m/${section_open_regex}(.*?)${section_open_regex}/si;
96              
97             # If when find a section ending tag in the middle of the two consecutive
98             # opening section tags then we know first section has been closed and thus
99             # does NOT contain a nested section.
100 5         7034 foreach my $tweener (@stuff_between_section_opens) {
101 4 100       24 if ( $tweener =~ m/<\/sx>/ ) {
102              
103             # The tweener section could cause us to think we're not nested
104             # due to an nested section of the general type (not the class=mc_ type)
105             # In this case we need to count the number of open and closed sections
106             # If they are the same then we dont' have </sec> left over to close the first
107             # and thus we have a nest.
108 2         10 my @opens = $tweener =~ m/(<sx[^>]*>)/sg;
109 2         29 my @closes = $tweener =~ m/(<\/sx>)/sg;
110 2 50       15 if ( scalar @opens == scalar @closes ) {
111 0         0 return 1;
112             }
113             }
114             else {
115 2         13 return 1;
116             }
117             }
118              
119 3         19 return 0;
120             }
121              
122             =head2 add_implicit_sections
123              
124             Add implicit sections to assist the building of the page_struct.
125              
126             =cut
127              
128             sub add_implicit_sections {
129 8     8 1 2899 my ($self) = @_;
130              
131 8         209 my $page = $self->page;
132              
133             # Add implicit sections in between explicit sections (if needed)
134 8 100       99 if ( $page =~ m/<\/sx>(?!\s*<sx\.).*?<sx\./si ) {
135 5         61 $page =~ s/<\/sx>(?!\s*<sx\.)(.*?)<sx\./<\/sx>\n<sx.Implicit>$1<\/sx>\n<sx./sig;
136             }
137              
138             # Add implicit section at the beginning (if needed)
139 8         58 $page =~ s/(?<!<sx\.\w)(<sx\.\w)/<\/sx>\n$1/si;
140 8         27 $page = "\n<sx.Implicit>\n${page}";
141              
142             # Add implicit section at the end (if needed)
143 8         68 $page =~ s/(<\/sx>)(?!.*<\/sx>)/$1\n<sx.Implicit>/si;
144 8         17 $page .= '</sx>';
145              
146             # cut empty implicits
147 8         43 $page =~ s/<sx\.Implicit>\s*<\/sx>//sig;
148              
149 8 50       174 if ( $self->debug ) {
150 0         0 say "PREMATCH: ", ${^PREMATCH};
151 0         0 say "MATCH: ${^MATCH}";
152 0         0 say "POSTMATCH: ", ${^POSTMATCH};
153 0         0 say "page: $page";
154             }
155              
156 8         25596 return $page;
157             }
158              
159             =head2 parse_sections
160              
161             Extract section class and content from the page.
162              
163             =cut
164              
165             sub parse_sections {
166 3     3 1 36 my ( $self, $page ) = @_;
167              
168 3         6 my $sections;
169 3         46 my @sections = $page =~ m/(<sx\.[^>]+>.*?<\/sx>)/sig;
170 3         10 foreach my $sx (@sections) {
171              
172             # Extract class and content
173 11         64 my ( $class, $content ) = $sx =~ m/<sx\.([^>]+)>(.*)?<\/sx>/si;
174 11         20 push @{$sections}, { class => $class, content => $content };
  11         54  
175             }
176              
177 3         86 return $sections;
178             }
179              
180             =head2 build_sections
181              
182             Wrap up the getting of sections process.
183              
184             =cut
185              
186             sub build_sections {
187 3     3 1 1975 my $self = shift;
188              
189             # Deal with nested sections gracefully by adding a message
190             # to bubble up to the view and display in the #message_area.
191 3 100       17 if ( $self->has_nested_section ) {
192 1         3 $self->messages( [ @{$self->messages}, 'haz nested sexes'] );
  1         5  
193             }
194 3         734 my $page = $self->add_implicit_sections;
195              
196 3         40 return $self->parse_sections($page);
197             }
198              
199             =head2 build_page_structure
200              
201             It's just an href that we'll persist as a Mongo document.
202              
203             =cut
204              
205             sub build_page_structure {
206 1     1 1 769 my $self = shift;
207              
208 1         6 my $return = {
209             sections => $self->sections,
210             default_format => $self->default_format,
211              
212             # created => '1234567890',
213             # last_modified => time(),
214             page_source => $self->page,
215              
216             # Set the message last to pick any builder message above
217             # e.g. ->sections can set a 'nested sections' message.
218             message => $self->message_string,
219             };
220 1         89 return $return;
221             }
222              
223             1