| blib/lib/Bible/OBML/Gateway.pm | |||
|---|---|---|---|
| Criterion | Covered | Total | % |
| statement | 171 | 232 | 73.7 |
| branch | 18 | 48 | 37.5 |
| condition | 21 | 62 | 33.8 |
| subroutine | 28 | 49 | 57.1 |
| pod | 5 | 5 | 100.0 |
| total | 243 | 396 | 61.3 |
| line | stmt | bran | cond | sub | pod | time | code |
|---|---|---|---|---|---|---|---|
| 1 | package Bible::OBML::Gateway; | ||||||
| 2 | # ABSTRACT: Bible Gateway content conversion to Open Bible Markup Language | ||||||
| 3 | |||||||
| 4 | 1 | 1 | 226774 | use 5.020; | |||
| 1 | 8 | ||||||
| 5 | |||||||
| 6 | 1 | 1 | 463 | use exact; | |||
| 1 | 36623 | ||||||
| 1 | 5 | ||||||
| 7 | 1 | 1 | 2856 | use exact::class; | |||
| 1 | 11328 | ||||||
| 1 | 4 | ||||||
| 8 | 1 | 1 | 761 | use Bible::OBML; | |||
| 1 | 626516 | ||||||
| 1 | 14 | ||||||
| 9 | 1 | 1 | 429 | use Bible::Reference; | |||
| 1 | 2 | ||||||
| 1 | 5 | ||||||
| 10 | 1 | 1 | 227 | use Mojo::ByteStream; | |||
| 1 | 2 | ||||||
| 1 | 37 | ||||||
| 11 | 1 | 1 | 8 | use Mojo::DOM; | |||
| 1 | 1 | ||||||
| 1 | 21 | ||||||
| 12 | 1 | 1 | 698 | use Mojo::UserAgent; | |||
| 1 | 234325 | ||||||
| 1 | 10 | ||||||
| 13 | 1 | 1 | 54 | use Mojo::URL; | |||
| 1 | 3 | ||||||
| 1 | 4 | ||||||
| 14 | 1 | 1 | 31 | use Mojo::Util 'html_unescape'; | |||
| 1 | 3 | ||||||
| 1 | 5004 | ||||||
| 15 | |||||||
| 16 | our $VERSION = '2.07'; # VERSION | ||||||
| 17 | |||||||
| 18 | has translation => 'NIV'; | ||||||
| 19 | has reference => Bible::Reference->new( bible => 'Protestant' ); | ||||||
| 20 | has url => Mojo::URL->new('https://www.biblegateway.com/passage/'); | ||||||
| 21 | has ua => sub { | ||||||
| 22 | my $ua = Mojo::UserAgent->new( max_redirects => 3 ); | ||||||
| 23 | $ua->transactor->name( __PACKAGE__ . '/' . ( __PACKAGE__->VERSION // '2.0' ) ); | ||||||
| 24 | return $ua; | ||||||
| 25 | }; | ||||||
| 26 | |||||||
| 27 | 1 | 1 | 1 | 1244 | sub translations ($self) { | ||
| 1 | 3 | ||||||
| 1 | 2 | ||||||
| 28 | 1 | 2 | my $translations; | ||||
| 29 | |||||||
| 30 | $self->ua->get( $self->url )->result->dom->find('select.search-dropdown option')->each( sub { | ||||||
| 31 | 2 | 100 | 2 | 3242 | my $class = $_->attr('class') || ''; | ||
| 32 | |||||||
| 33 | 2 | 100 | 19 | if ( $class eq 'lang' ) { | |||
| 50 | |||||||
| 34 | 1 | 5 | my @language = $_->text =~ /\-{3}(.+)\s\(([^\)]+)\)\-{3}/; | ||||
| 35 | 1 | 19 | push( @$translations, { | ||||
| 36 | language => $language[0], | ||||||
| 37 | acronym => $language[1], | ||||||
| 38 | } ); | ||||||
| 39 | } | ||||||
| 40 | elsif ( not $class ) { | ||||||
| 41 | 1 | 3 | my @translation = $_->text =~ /\s*(.+)\s\(([^\)]+)\)/; | ||||
| 42 | 1 | 11 | push( @{ $translations->[-1]{translations} }, { | ||||
| 1 | 8 | ||||||
| 43 | translation => $translation[0], | ||||||
| 44 | acronym => $translation[1], | ||||||
| 45 | } ); | ||||||
| 46 | } | ||||||
| 47 | 1 | 5 | } ); | ||||
| 48 | |||||||
| 49 | 1 | 20 | return $translations; | ||||
| 50 | } | ||||||
| 51 | |||||||
| 52 | 1 | 1 | 1 | 2305 | sub structure ( $self, $translation = $self->translation ) { | ||
| 1 | 3 | ||||||
| 1 | 4 | ||||||
| 1 | 11 | ||||||
| 53 | return $self->ua->get( | ||||||
| 54 | $self->url->clone->path( $self->url->path . 'bcv/' )->query( { version => $translation } ) | ||||||
| 55 | 1 | 5 | )->result->json->{data}[0]; | ||||
| 56 | } | ||||||
| 57 | |||||||
| 58 | 21 | 21 | 126 | sub _retag ( $tag, $retag ) { | |||
| 21 | 30 | ||||||
| 21 | 72 | ||||||
| 21 | 27 | ||||||
| 59 | 21 | 60 | $tag->tag($retag); | ||||
| 60 | 21 | 279 | delete $tag->attr->{$_} for ( keys %{ $tag->attr } ); | ||||
| 21 | 46 | ||||||
| 61 | } | ||||||
| 62 | |||||||
| 63 | 1 | 1 | 1 | 7 | sub fetch ( $self, $reference, $translation = $self->translation ) { | ||
| 1 | 3 | ||||||
| 1 | 2 | ||||||
| 1 | 2 | ||||||
| 1 | 4 | ||||||
| 64 | 1 | 3 | my $runs = $self->reference | ||||
| 65 | ->acronyms(0)->require_chapter_match(0)->require_book_ucfirst(0) | ||||||
| 66 | ->clear->in($reference)->as_runs; | ||||||
| 67 | 1 | 50 | 33 | 3957 | $reference = $runs->[0] unless ( @$runs != 1 or $runs->[0] !~ /\w\s*\d/ ); | ||
| 68 | |||||||
| 69 | 1 | 6 | my $result = $self->ua->get( | ||||
| 70 | $self->url->query( { | ||||||
| 71 | version => $translation, | ||||||
| 72 | search => $reference, | ||||||
| 73 | } ) | ||||||
| 74 | )->result; | ||||||
| 75 | |||||||
| 76 | 1 | 50 | 0 | 159 | croak( $translation . ' "' . ( $reference // '(undef)' ) . '" did not match a chapter or run of verses' ) | ||
| 77 | if ( $result->dom->at('div.content-section') ); | ||||||
| 78 | |||||||
| 79 | 1 | 13 | return Mojo::ByteStream->new( $result->body )->decode->to_string; | ||||
| 80 | } | ||||||
| 81 | |||||||
| 82 | 1 | 1 | 1 | 190 | sub parse ( $self, $html ) { | ||
| 1 | 2 | ||||||
| 1 | 3 | ||||||
| 1 | 2 | ||||||
| 83 | 1 | 50 | 6 | return unless ($html); | |||
| 84 | |||||||
| 85 | 1 | 10 | my $dom = Mojo::DOM->new($html); | ||||
| 86 | |||||||
| 87 | 1 | 11610 | my $ref_display = $dom->at('div.bcv div.dropdown-display-text'); | ||||
| 88 | 1 | 50 | 33 | 1117 | croak('source appears to be invalid; check your inputs') unless ( $ref_display and $ref_display->text ); | ||
| 89 | |||||||
| 90 | 1 | 132 | my $reference = $ref_display->text; | ||||
| 91 | 1 | 32 | my $translation = $dom->at('div.passage-col')->attr('data-translation'); | ||||
| 92 | |||||||
| 93 | 1 | 50 | 333 | croak('EXB (Extended Bible) translation not supported') if ( $translation eq 'EXB' ); | |||
| 94 | |||||||
| 95 | 1 | 6 | my $block = $dom->at('div.passage-text div.passage-content div:first-child'); | ||||
| 96 | 1 | 8 | 1856 | $block->find('*[data-link]')->each( sub { delete $_->attr->{'data-link'} } ); | |||
| 8 | 3028 | ||||||
| 97 | |||||||
| 98 | 1 | 27 | $html = $block->to_string; | ||||
| 99 | |||||||
| 100 | 1 | 3538 | $html =~ s`(\d+).(\d+)`$1/$2`g; | ||||
| 101 | 1 | 45 | $html =~ s`(?:<){2,}(.*?)(?:\x{2019}>|(?:>){2,})`\x{201c}$1\x{201d}`g; | ||||
| 102 | 1 | 35 | $html =~ s`(?:<)(.*?)(?:>|\x{2019})`\x{2018}$1\x{2019}`g; | ||||
| 103 | 1 | 30 | $html =~ s`\\\w+``g; | ||||
| 104 | 1 | 34 | $html =~ s/(?:\.\s*){2,}\./\x{2026}/; | ||||
| 105 | 1 | 30 | $html =~ s/\x{200a}//g; | ||||
| 106 | |||||||
| 107 | 1 | 6 | $block = Mojo::DOM->new($html)->at('div'); | ||||
| 108 | |||||||
| 109 | 1 | 50 | 9193 | $_->parent->strip if ( $_ = $block->find('div.poetry > h2')->first ); | |||
| 110 | |||||||
| 111 | 1 | 142 | 1872 | $block->descendant_nodes->grep( sub { $_->type eq 'comment' } )->each('remove'); | |||
| 142 | 5689 | ||||||
| 112 | 1 | 230 | $block->find( | ||||
| 113 | '.il-text, hidden, hr, .translation-note, span.inline-note, a.full-chap-link, b.inline-h3, top1' | ||||||
| 114 | )->each('remove'); | ||||||
| 115 | 1 | 11375 | $block->find('.std-text, hgroup, b, em, versenum, char')->each('strip'); | ||||
| 116 | $block | ||||||
| 117 | ->find('i, .italic, .trans-change, .idiom, .catch-word, selah, span.selah') | ||||||
| 118 | 1 | 0 | 7299 | ->each( sub { _retag( $_, 'i' ) } ); | |||
| 0 | 0 | ||||||
| 119 | 1 | 0 | 10536 | $block->find('.woj, u.jesus-speech')->each( sub { _retag( $_, 'woj' ) } ); | |||
| 0 | 0 | ||||||
| 120 | 1 | 0 | 3492 | $block->find('.divine-name, .small-caps')->each( sub { _retag( $_, 'small_caps' ) } ); | |||
| 0 | 0 | ||||||
| 121 | |||||||
| 122 | 10 | 10 | 2259 | $block->find('sup')->grep( sub { length $_->text == 1 } )->each( sub { | |||
| 123 | 0 | 0 | 0 | $_->content( '-' . $_->content ); | |||
| 124 | 0 | 0 | $_->strip; | ||||
| 125 | 1 | 4127 | } ); | ||||
| 126 | |||||||
| 127 | 1 | 64 | $self->reference->acronyms(1)->require_chapter_match(1)->require_book_ucfirst(1); | ||||
| 128 | |||||||
| 129 | 1 | 37 | my $footnotes = $block->at('div.footnotes'); | ||||
| 130 | 1 | 50 | 1844 | if ($footnotes) { | |||
| 131 | $footnotes->find('a.bibleref')->each( sub { | ||||||
| 132 | 0 | 0 | 0 | 0 | ( my $ref = $_->attr('data-bibleref') // '' ) =~ s/\.(\d+)\.(\d+)/ $1:$2/g; | ||
| 133 | 0 | 0 | $_->replace($ref); | ||||
| 134 | 0 | 0 | } ); | ||||
| 135 | 0 | 0 | $footnotes->remove; | ||||
| 136 | $footnotes = { | ||||||
| 137 | map { | ||||||
| 138 | 0 | 0 | '#' . $_->attr('id') => $self->reference->clear->in( | ||||
| 0 | 0 | ||||||
| 139 | $_->at('span')->all_text | ||||||
| 140 | )->as_text | ||||||
| 141 | } $footnotes->find('ol li')->each | ||||||
| 142 | }; | ||||||
| 143 | } | ||||||
| 144 | |||||||
| 145 | 1 | 7 | my $crossrefs = $block->at('div.crossrefs'); | ||||
| 146 | 1 | 50 | 1090 | if ($crossrefs) { | |||
| 147 | $crossrefs->find('a.bibleref')->each( sub { | ||||||
| 148 | 0 | 0 | 0 | 0 | ( my $ref = $_->attr('data-bibleref') // '' ) =~ s/\.(\d+)\.(\d+)/ $1:$2/g; | ||
| 149 | 0 | 0 | $_->replace($ref); | ||||
| 150 | 1 | 11 | } ); | ||||
| 151 | 1 | 1328 | $crossrefs->remove; | ||||
| 152 | $crossrefs = { | ||||||
| 153 | map { | ||||||
| 154 | 1 | 152 | '#' . $_->attr('id') => $self->reference->clear->in( | ||||
| 8 | 31223 | ||||||
| 155 | $_->at('a:last-child')->attr('data-bibleref') | ||||||
| 156 | )->refs | ||||||
| 157 | } $crossrefs->find('ol li')->each | ||||||
| 158 | }; | ||||||
| 159 | } | ||||||
| 160 | |||||||
| 161 | $block | ||||||
| 162 | ->find('span.text > a.bibleref') | ||||||
| 163 | ->map('parent') | ||||||
| 164 | 0 | 0 | 0 | ->grep( sub { $_->content =~ /^\[ | |||
| 165 | ->each( sub { | ||||||
| 166 | $_->find('a')->each( sub { | ||||||
| 167 | 0 | 0 | 0 | ( my $ref = $_->attr('data-bibleref') // '' ) =~ s/\.(\d+)\.(\d+)/ $1:$2/g; | |||
| 168 | 0 | 0 | $_->replace($ref); | ||||
| 169 | 0 | 0 | 0 | } ); | |||
| 170 | |||||||
| 171 | 0 | 0 | my $content = $_->content; | ||||
| 172 | 0 | 0 | $content =~ s|\s+\[([^\]]+)\]| | ||||
| 173 | 0 | 0 | ' |
||||
| 174 | |ge; | ||||||
| 175 | |||||||
| 176 | 0 | 0 | $_->content($content); | ||||
| 177 | 1 | 13567 | } ); | ||||
| 178 | |||||||
| 179 | $block | ||||||
| 180 | ->find('i > a.bibleref, crossref > a.bibleref') | ||||||
| 181 | ->map('parent') | ||||||
| 182 | 0 | 0 | 0 | ->grep( sub { $_->children->size == 1 } ) | |||
| 183 | ->each( sub { | ||||||
| 184 | 0 | 0 | 0 | my $a = $_->at('a:last-child'); | |||
| 185 | 0 | 0 | 0 | ( my $ref = $_->attr('data-bibleref') // '' ) =~ s/\.(\d+)\.(\d+)/ $1:$2/g; | |||
| 186 | |||||||
| 187 | 0 | 0 | $_->tag('sup'); | ||||
| 188 | 0 | 0 | $_->attr({ | ||||
| 189 | 'class' => 'crossreference', | ||||||
| 190 | 'data-cr' => $a->attr('data-bibleref'), | ||||||
| 191 | }); | ||||||
| 192 | |||||||
| 193 | 0 | 0 | $crossrefs = { | ||||
| 194 | $a->attr('data-bibleref') => $self->reference->clear->in($ref)->refs | ||||||
| 195 | }; | ||||||
| 196 | 1 | 1477 | } ); | ||||
| 197 | |||||||
| 198 | 1 | 2027 | $block->find('a.bibleref')->each('strip'); | ||||
| 199 | |||||||
| 200 | $block->find('sup.crossreference, sup.footnote')->each( sub { | ||||||
| 201 | 8 | 50 | 8 | 3986 | if ( $_->attr('class') eq 'footnote' ) { | ||
| 50 | |||||||
| 202 | $_->replace( | ||||||
| 203 | ( $footnotes->{ $_->attr('data-fn') } ) | ||||||
| 204 | 0 | 0 | 0 | ? ' |
|||
| 205 | : '' | ||||||
| 206 | ); | ||||||
| 207 | } | ||||||
| 208 | elsif ( $_->attr('class') eq 'crossreference' ) { | ||||||
| 209 | $_->replace( | ||||||
| 210 | ( $crossrefs->{ $_->attr('data-cr') } ) | ||||||
| 211 | 8 | 50 | 236 | ? ' |
|||
| 212 | : '' | ||||||
| 213 | ); | ||||||
| 214 | } | ||||||
| 215 | 1 | 1180 | } ); | ||||
| 216 | |||||||
| 217 | $block->find('footnote, crossref')->each( sub { | ||||||
| 218 | 8 | 8 | 1371 | _retag( $_, $_->tag ); | |||
| 219 | |||||||
| 220 | # patch for if there's an error in the source HTML where some footnotes | ||||||
| 221 | # or crossrefs should have a space but don't | ||||||
| 222 | |||||||
| 223 | 8 | 107 | my $previous = $_; | ||||
| 224 | 8 | 66 | 23 | $previous = $previous->previous_node while ( | |||
| 33 | |||||||
| 66 | |||||||
| 225 | $previous and $previous->tag and | ||||||
| 226 | ( $previous->tag eq 'crossref' or $previous->tag eq 'footnote' ) | ||||||
| 227 | ); | ||||||
| 228 | 8 | 50 | 33 | 1012 | my $previous_char = substr( ( ($previous) ? $previous->all_text || $previous->content : '' ), -1 ); | ||
| 229 | |||||||
| 230 | 8 | 442 | my $next = $_; | ||||
| 231 | 8 | 66 | 19 | $next = $next->next_node while ( | |||
| 33 | |||||||
| 66 | |||||||
| 232 | $next and $next->tag and | ||||||
| 233 | ( $next->tag eq 'crossref' or $next->tag eq 'footnote' ) | ||||||
| 234 | ); | ||||||
| 235 | 8 | 50 | 33 | 864 | my $next_char = substr( ( ($next) ? $next->all_text || $next->content : '' ), 0, 1 ); | ||
| 236 | |||||||
| 237 | 8 | 50 | 33 | 454 | $_->append(' ') if ( | ||
| 33 | |||||||
| 33 | |||||||
| 238 | length $previous_char and | ||||||
| 239 | length $next_char and | ||||||
| 240 | $previous_char =~ /[:;,\w\!\.\?]/ and | ||||||
| 241 | $next_char =~ /\w/ | ||||||
| 242 | ); | ||||||
| 243 | 1 | 333 | } ); | ||||
| 244 | |||||||
| 245 | 1 | 23 | _retag( $block, 'obml' ); | ||||
| 246 | 1 | 44 | $block->child_nodes->first->prepend( $block->new_tag( 'reference', $reference ) ); | ||||
| 247 | |||||||
| 248 | 1 | 532 | $block->find('h3.chapter')->each('remove'); | ||||
| 249 | 1 | 0 | 895 | $block->find('h2 + h3')->each( sub { $_->tag('h4') } ); | |||
| 0 | 0 | ||||||
| 250 | 1 | 2 | 948 | $block->find('h2, h3')->each( sub { _retag( $_, 'header' ) } ); | |||
| 2 | 1235 | ||||||
| 251 | 1 | 0 | 29 | $block->find('h4')->each( sub { _retag( $_, 'sub_header' ) } ); | |||
| 0 | 0 | ||||||
| 252 | |||||||
| 253 | 1 | 2 | 718 | $block->find('.versenum')->grep( sub { $_->text =~ /^\s*\(/ } )->each('remove'); | |||
| 2 | 1035 | ||||||
| 254 | 1 | 0 | 58 | $block->find('.chapternum + .versenum')->each( sub { $_->previous->remove } ); | |||
| 0 | 0 | ||||||
| 255 | 1 | 0 | 1065 | $block->find('.chapternum + i > .versenum')->each( sub { $_->parent->previous->remove } ); | |||
| 0 | 0 | ||||||
| 256 | |||||||
| 257 | $block->find('.chapternum')->each( sub { | ||||||
| 258 | 1 | 1 | 938 | _retag( $_, 'verse_number' ); | |||
| 259 | 1 | 29 | $_->content(1); | ||||
| 260 | 1 | 1194 | } ); | ||||
| 261 | $block->find('.versenum')->each( sub { | ||||||
| 262 | 2 | 2 | 1092 | _retag( $_, 'verse_number' ); | |||
| 263 | |||||||
| 264 | 2 | 55 | my $verse_number = $_->content; | ||||
| 265 | 2 | 163 | $verse_number =~ s/^.*://g; | ||||
| 266 | 2 | 11 | ($verse_number) = $verse_number =~ /(\d+)/; | ||||
| 267 | |||||||
| 268 | 2 | 6 | $_->content($verse_number); | ||||
| 269 | 1 | 148 | } ); | ||||
| 270 | |||||||
| 271 | 1 | 5 | 121 | $block->find('span.text')->each( sub { _retag( $_, 'text' ) } ); | |||
| 5 | 1204 | ||||||
| 272 | |||||||
| 273 | $block->find('table')->each( sub { | ||||||
| 274 | $_->find('tr')->each( sub { | ||||||
| 275 | 0 | 0 | $_->find('th')->each('remove'); | ||||
| 276 | 0 | 0 | 0 | unless ( $_->child_nodes->size ) { | |||
| 277 | 0 | 0 | $_->strip; | ||||
| 278 | } | ||||||
| 279 | else { | ||||||
| 280 | 0 | 0 | 0 | $_->replace( join( '', | |||
| 0 | |||||||
| 0 | |||||||
| 281 | ' |
||||||
| 282 | $_->find('td text')->map('content')->join(', '), | ||||||
| 283 | ( | ||||||
| 284 | ( $_->find('td text')->map('text')->last =~ /\W$/ ) ? '' : | ||||||
| 285 | ( $_->following_nodes->size ) ? '; ' : '.' | ||||||
| 286 | ), | ||||||
| 287 | ( ( $_->following_nodes->size ) ? ' ' : '' ), | ||||||
| 288 | ) ); | ||||||
| 289 | } | ||||||
| 290 | 0 | 0 | 0 | } ); | |||
| 291 | |||||||
| 292 | 0 | 0 | $_->tag('div'); | ||||
| 293 | 0 | 0 | $_->content( ' ' . $_->content . ' ' ); |
||||
| 294 | 1 | 35 | } ); | ||||
| 295 | |||||||
| 296 | $block->find('ul, ol')->each( sub { | ||||||
| 297 | $_->find('li')->each( sub { | ||||||
| 298 | 0 | 0 | $_->tag('text'); | ||||
| 299 | 0 | 0 | $_->find('text > text')->each('strip'); | ||||
| 300 | 0 | 0 | 0 | 0 | $_->append_content(' ') if ( $_->next and $_->next->tag eq 'li' ); |
||
| 301 | 0 | 0 | 0 | } ); | |||
| 302 | |||||||
| 303 | 0 | 0 | $_->tag('div'); | ||||
| 304 | 0 | 0 | $_->attr( class => 'left-1' ); | ||||
| 305 | 0 | 0 | $_->content( ' ' . $_->content . ' ' ); |
||||
| 306 | 1 | 776 | } ); | ||||
| 307 | |||||||
| 308 | 9 | 26 | $block->find( join( ', ', map { 'div.left-' . $_ } 1 .. 9 ) )->each( sub { | ||||
| 309 | 0 | 0 | 0 | my ($left) = $_->attr('class') =~ /\bleft\-(\d+)/; | |||
| 310 | 0 | 0 | $_->find('text')->each( sub { $_->attr( indent => $left ) } ); | ||||
| 0 | 0 | ||||||
| 311 | 0 | 0 | $_->strip; | ||||
| 312 | 1 | 1089 | } ); | ||||
| 313 | |||||||
| 314 | 1 | 0 | 4314 | $block->find('div.poetry')->each( sub { $_->attr( class => 'indent-1' ) } ); | |||
| 0 | 0 | ||||||
| 315 | 9 | 26 | $block->find( join( ', ', map { '.indent-' . $_ } 1 .. 9 ) )->each( sub { | ||||
| 316 | 0 | 0 | 0 | my ($indent) = $_->attr('class') =~ /\bindent\-(\d+)/; | |||
| 317 | $_->find('text')->each( sub { | ||||||
| 318 | 0 | 0 | 0 | $_->attr( indent => $indent + ( $_->attr('indent') || 0 ) ); | |||
| 319 | 0 | 0 | } ); | ||||
| 320 | 0 | 0 | $_->strip; | ||||
| 321 | 1 | 780 | } ); | ||||
| 322 | |||||||
| 323 | 1 | 4600 | $block->find( join( ', ', map { '.indent-' . $_ . '-breaks' } 1 .. 5 ) )->each('remove'); | ||||
| 5 | 20 | ||||||
| 324 | |||||||
| 325 | $block->find('text[indent]')->each( sub { | ||||||
| 326 | 0 | 0 | 0 | my $level = $_->attr('indent'); | |||
| 327 | 0 | 0 | _retag( $_, 'indent' ); | ||||
| 328 | 0 | 0 | $_->attr( level => $level ); | ||||
| 329 | 1 | 2709 | } ); | ||||
| 330 | 1 | 816 | $block->find('text')->each('strip'); | ||||
| 331 | |||||||
| 332 | $block->find('indent + indent')->each( sub { | ||||||
| 333 | 0 | 0 | 0 | 0 | if ( $_->previous->attr('level') eq $_->attr('level') ) { | ||
| 334 | 0 | 0 | $_->previous->append_content( ' ' . $_->content ); | ||||
| 335 | 0 | 0 | $_->remove; | ||||
| 336 | } | ||||||
| 337 | 1 | 1387 | } ); | ||||
| 338 | |||||||
| 339 | 1 | 2 | 728 | $block->find('p')->each( sub { _retag( $_, 'p' ) } ); | |||
| 2 | 680 | ||||||
| 340 | |||||||
| 341 | 1 | 50 | 33 | 27 | $block->at('p')->prepend_content(' |
||
| 342 | if ( $block->at('p') and not $block->at('p')->at('verse_number') ); | ||||||
| 343 | |||||||
| 344 | 1 | 631 | $block->find('div, span, u, sup, bk, verse, start-chapter')->each('strip'); | ||||
| 345 | |||||||
| 346 | 1 | 33 | 2522 | $_->each('strip') while ( $_ = $block->find('i > i') and $_->size ); | |||
| 347 | |||||||
| 348 | 1 | 674 | $html = html_unescape( $block->to_string ); | ||||
| 349 | |||||||
| 350 | 1 | 757 | $html =~ s/ [ ]+/ /g; # remove spaces immediately after a " " |
||||
| 351 | 1 | 14 | $html =~ s/,([A-z]|)/, $1/g; # fix missing spaces after commas (error in source HTML) | ||||
| 352 | 1 | 11 | $html =~ s/([A-z])()/$1 $2/g; # fix missing spaces before (error in source HTML) | ||||
| 353 | 1 | 85 | $html =~ s/([a-z])([A-Z])/$1 $2/g; # fix missing spaces from collapsed words (error in source HTML) | ||||
| 354 | |||||||
| 355 | 1 | 124 | return $html; | ||||
| 356 | } | ||||||
| 357 | |||||||
| 358 | 1 | 1 | 1 | 5449 | sub get ( $self, $reference, $translation = $self->translation ) { | ||
| 1 | 3 | ||||||
| 1 | 3 | ||||||
| 1 | 5 | ||||||
| 1 | 11 | ||||||
| 359 | 1 | 8 | return Bible::OBML->new->html( $self->parse( $self->fetch( $reference, $translation ) ) ); | ||||
| 360 | } | ||||||
| 361 | |||||||
| 362 | 1; | ||||||
| 363 | |||||||
| 364 | __END__ |