| line |
stmt |
bran |
cond |
sub |
pod |
time |
code |
|
1
|
|
|
|
|
|
|
package CCCP::HTML::Truncate; |
|
2
|
|
|
|
|
|
|
|
|
3
|
1
|
|
|
1
|
|
30102
|
use strict; |
|
|
1
|
|
|
|
|
2
|
|
|
|
1
|
|
|
|
|
47
|
|
|
4
|
1
|
|
|
1
|
|
6
|
use warnings; |
|
|
1
|
|
|
|
|
1
|
|
|
|
1
|
|
|
|
|
60
|
|
|
5
|
|
|
|
|
|
|
|
|
6
|
1
|
|
|
1
|
|
441
|
use XML::LibXML; |
|
|
0
|
|
|
|
|
|
|
|
|
0
|
|
|
|
|
|
|
|
7
|
|
|
|
|
|
|
use Encode qw(); |
|
8
|
|
|
|
|
|
|
|
|
9
|
|
|
|
|
|
|
our $VERSION = '0.04'; |
|
10
|
|
|
|
|
|
|
|
|
11
|
|
|
|
|
|
|
$CCCP::HTML::Truncate::enc = 'utf-8'; |
|
12
|
|
|
|
|
|
|
|
|
13
|
|
|
|
|
|
|
# ------------------------ EXTEND XML::LibXML::Element ----------------- |
|
14
|
|
|
|
|
|
|
# return serialize XML::LibXML::Element in correct encoding |
|
15
|
|
|
|
|
|
|
sub XML::LibXML::Element::html { |
|
16
|
|
|
|
|
|
|
my ($node, $actualEncoding) = @_; |
|
17
|
|
|
|
|
|
|
|
|
18
|
|
|
|
|
|
|
# correct decode |
|
19
|
|
|
|
|
|
|
my $f = Encode::find_encoding($CCCP::HTML::Truncate::enc || $node->ownerDocument->encoding() || $node->ownerDocument->actualEncoding()); |
|
20
|
|
|
|
|
|
|
|
|
21
|
|
|
|
|
|
|
return $f->encode($node->toString,Encode::FB_XMLCREF); |
|
22
|
|
|
|
|
|
|
} |
|
23
|
|
|
|
|
|
|
|
|
24
|
|
|
|
|
|
|
# ---------------------------------------- MAIN -------------------------------------------- |
|
25
|
|
|
|
|
|
|
|
|
26
|
|
|
|
|
|
|
# parser obj |
|
27
|
|
|
|
|
|
|
my $lx; |
|
28
|
|
|
|
|
|
|
|
|
29
|
|
|
|
|
|
|
sub _init_parser { |
|
30
|
|
|
|
|
|
|
return if $lx; |
|
31
|
|
|
|
|
|
|
$lx = XML::LibXML->new(); |
|
32
|
|
|
|
|
|
|
$lx->recover_silently(1); |
|
33
|
|
|
|
|
|
|
} |
|
34
|
|
|
|
|
|
|
|
|
35
|
|
|
|
|
|
|
# truncate html |
|
36
|
|
|
|
|
|
|
sub truncate { |
|
37
|
|
|
|
|
|
|
my ($class,$html_str,$length,$elips) = @_; |
|
38
|
|
|
|
|
|
|
|
|
39
|
|
|
|
|
|
|
return unless $html_str; |
|
40
|
|
|
|
|
|
|
|
|
41
|
|
|
|
|
|
|
$elips ||= "..."; |
|
42
|
|
|
|
|
|
|
|
|
43
|
|
|
|
|
|
|
$length ||= 0; |
|
44
|
|
|
|
|
|
|
$length =~ /(\d+)/; |
|
45
|
|
|
|
|
|
|
$length = $1 ? $1 : 0; |
|
46
|
|
|
|
|
|
|
return '' unless $length; |
|
47
|
|
|
|
|
|
|
$html_str =~ s/&/&/gm; |
|
48
|
|
|
|
|
|
|
return $html_str if length $html_str < $length; |
|
49
|
|
|
|
|
|
|
|
|
50
|
|
|
|
|
|
|
my $f = Encode::find_encoding($CCCP::HTML::Truncate::enc); |
|
51
|
|
|
|
|
|
|
$html_str = $f->decode($html_str); |
|
52
|
|
|
|
|
|
|
$elips = $f->decode($elips); |
|
53
|
|
|
|
|
|
|
|
|
54
|
|
|
|
|
|
|
$class->_init_parser(); |
|
55
|
|
|
|
|
|
|
my $root = $lx->parse_html_string($html_str); |
|
56
|
|
|
|
|
|
|
my ($body) = $root->documentElement()->findnodes('//body'); |
|
57
|
|
|
|
|
|
|
return '' unless $body; |
|
58
|
|
|
|
|
|
|
|
|
59
|
|
|
|
|
|
|
my $add_elips = 0; |
|
60
|
|
|
|
|
|
|
foreach ($body->ownerDocument->findnodes('//child::text()')) { |
|
61
|
|
|
|
|
|
|
if ($length>0) { |
|
62
|
|
|
|
|
|
|
my $str = $_->to_literal; |
|
63
|
|
|
|
|
|
|
my $new_str = substr($str,0,$length); |
|
64
|
|
|
|
|
|
|
$length -= length $str; |
|
65
|
|
|
|
|
|
|
if ($length < 1 and not $add_elips) { |
|
66
|
|
|
|
|
|
|
$new_str .= $elips; |
|
67
|
|
|
|
|
|
|
$add_elips++; |
|
68
|
|
|
|
|
|
|
# and skip all another text child |
|
69
|
|
|
|
|
|
|
my $text_parent = $_->parentNode; |
|
70
|
|
|
|
|
|
|
if ($_->nodePath =~ /\[(\d+)]$/) { |
|
71
|
|
|
|
|
|
|
foreach my $skip_text ($text_parent->findnodes(sprintf('//child::text()[position()>%d]',$1))) { |
|
72
|
|
|
|
|
|
|
$_->setData(''); |
|
73
|
|
|
|
|
|
|
}; |
|
74
|
|
|
|
|
|
|
} |
|
75
|
|
|
|
|
|
|
}; |
|
76
|
|
|
|
|
|
|
$_->setData($new_str); |
|
77
|
|
|
|
|
|
|
} else { |
|
78
|
|
|
|
|
|
|
my $parent = $_->parentNode; |
|
79
|
|
|
|
|
|
|
# add elips |
|
80
|
|
|
|
|
|
|
unless ($add_elips) { |
|
81
|
|
|
|
|
|
|
$add_elips++; |
|
82
|
|
|
|
|
|
|
my $elips_el = XML::LibXML::Element->new('span'); |
|
83
|
|
|
|
|
|
|
$elips_el->appendTextNode($elips); |
|
84
|
|
|
|
|
|
|
$parent->addChild($elips_el); |
|
85
|
|
|
|
|
|
|
}; |
|
86
|
|
|
|
|
|
|
# skip body |
|
87
|
|
|
|
|
|
|
if ($parent->isSameNode($body)) { |
|
88
|
|
|
|
|
|
|
$_->unbindNode(); |
|
89
|
|
|
|
|
|
|
} else { |
|
90
|
|
|
|
|
|
|
my @childs = $parent->findnodes($parent->nodePath.'//child::text()'); |
|
91
|
|
|
|
|
|
|
$#childs > 0 ? $_->unbindNode() : $parent->unbindNode(); |
|
92
|
|
|
|
|
|
|
} |
|
93
|
|
|
|
|
|
|
} |
|
94
|
|
|
|
|
|
|
}; |
|
95
|
|
|
|
|
|
|
|
|
96
|
|
|
|
|
|
|
my $ret = $body->html(); |
|
97
|
|
|
|
|
|
|
$ret =~ s/^( )?|(<\/p>)?<\/body>$//igm; |
|
98
|
|
|
|
|
|
|
return $ret; |
|
99
|
|
|
|
|
|
|
} |
|
100
|
|
|
|
|
|
|
|
|
101
|
|
|
|
|
|
|
1; |
|
102
|
|
|
|
|
|
|
__END__ |