File Coverage

blib/lib/Text/ParseAHD.pm
Criterion Covered Total %
statement 1 3 33.3
branch n/a
condition n/a
subroutine 1 1 100.0
pod n/a
total 2 4 50.0


line stmt bran cond sub pod time code
1             package Text::ParseAHD;
2             #use base qw(BASE);
3 1     1   33020 use Text::ParseAHD::Word;
  0            
  0            
4             use Text::ParseAHD::Definition;
5             use Class::Std;
6             use Class::Std::Utils;
7              
8             use warnings;
9             use strict;
10             use Carp;
11              
12             use version; our $VERSION = qv('0.0.2');
13              
14             {
15             my %html_of :ATTR( :get :set :default<''> :init_arg );
16             my %word_of :ATTR( :get :set :default<''> :init_arg );
17             my %pos_of :ATTR( :get :set :default<''> :init_arg );
18             my %syllables_of :ATTR( :get :set :default<''> :init_arg );
19             my %defs_of :ATTR( :get :set :default<''> :init_arg );
20             my %text_of :ATTR( :get :set :default<''> :init_arg );
21             my %word_obj_obj_of :ATTR( :get :set :default<''> :init_arg );
22            
23            
24             sub START {
25             my ($self, $ident, $arg_ref) = @_;
26             #$html_of{$ident} = $arg_ref->{html};
27             #$word_of{$ident} = $arg_ref->{word};
28             #$Word_of{$ident} = Text::ParseAHD::Word->new({'word',$word_of{$ident}});
29             $self->set_word_obj( Text::ParseAHD::Word->new({word => $self->get_word() }) );
30             return;
31             }
32            
33             sub parse_html {
34             my ( $self ) = @_;
35             my $ident = ident $self;
36             my $html = $html_of{$ident};
37             while ( $html =~ m/(.*?)/gsix ) {
38             my $definition_text = $1;
39             $definition_text =~ s/(.*?)<\/b>//i;
40             my $word = $self->clean_word( $1 );
41              
42             # print " STATUS: AHD4 definition " . ++$count . " found\n";
43              
44             if ($word eq $word_of{$ident}) {
45             $definition_text =~ s/\n//g; # Remove newlines
46             $definition_text =~ s/\ //g; # Remove nbsp
47             $definition_text =~ s/
//g; # Remove br
48             $definition_text =~ s/^(.*?)//six; # Remove leading
49             $definition_text =~ s/<\/td>.*$//six; # Remove trailing
50              
51             my @defs = $self->split_defs( $definition_text );
52             my $i=1;
53             foreach my $def (@defs) {
54             print $i . "\n\n" . $def . "\n\n\n\n";
55             $self->parse_definition( $def );
56             $i++;
57             }
58             }
59             #print $definition_text."\n\n\n";
60             }
61             $self->report_word();
62             return;
63             }
64              
65             sub clean_word {
66             my ($self, $word) = @_;
67             my $A = chr(194);
68             my $bullet = chr(183);
69             $word =~ s/://g;
70             $word =~ s/$A//g;
71             $word =~ s/$bullet//g;
72             $word =~ s/.*?<\/sup>//; # RH 080719
73             $word =~ s/^\s+//; # RH 080719
74             $word =~ s/\s+$//; # RH 080719
75             return $word;
76             }
77            
78             sub split_defs {
79             my ($self, $text) = @_;
80             my @defs = split(//, $text ); shift @defs;
81             return @defs;
82             }
83              
84             sub parse_definition {
85             my ($self, $text) = @_;
86             my $ident = ident $self;
87             my $pos = '';
88             my @word_forms = ();
89             my @definitions = ();
90             $text =~ s/(.*?)//six;
91             $pos=$1;
92             $text =~ s/(.*?)//six;
93             my $subhead = $1;
94             if (defined $subhead) {
95             #while ($subhead =~ m/(.*?)<\/b>/gisx) { push @word_forms, $self->syllables( $1 ); }
96             while ($subhead =~ m/(.*?)<\/b>/gisx) { push @word_forms, $1; }
97            
98             if ($subhead =~ m/(.*?)<\/i>/i) { $pos = $1; $pos =~ s/\.//g; } # Subtype
99             }
100             #print "WORD FORMS: " . join(' ', @word_forms) ."\n\n\n";
101             while ($text =~ m/(.*?)/gsix) {
102             my $def_text = $1;
103             my $new_defs;
104             if ($def_text =~ m/
    parse_list( $def_text, $pos ); }
105             else { $new_defs = [$self->parse_single( $def_text )]; }
106              
107             foreach my $definition (@{ $new_defs }) {push @definitions, $definition; }
108             }
109             foreach my $definition (@definitions){
110             $self->get_word_obj()->add_definition($definition->{text}, $definition->{example}, $pos);
111             }
112              
113             }
114              
115             sub parse_list {
116             my ($self, $text, $pos ) = @_;
117             $text =~ s//\[/ig;
118             $text =~ s/<\/ol>/]/ig;
119             $text =~ s/
  • /{ text => '/ig;
  • 120             $text =~ s/<\/li>/'}, /ig;
    121             $text =~ s/'\[/\[/ig;
    122             $text =~ s/,\s]/]/ig;
    123             $text =~ s/]'/]/ig;
    124             $text =~ s/Informal<\/i>//ig;#added by Nathan
    125             $text =~ s/Slang<\/i>//ig;#added by Nathan
    126             $text =~ s/text => '? ?\[{ text => '/text => '/ig;
    127             $text =~ s/{ text => '.*<\/i> \[{ text/{ text/ig;
    128             $text =~ s/}\]}/}/ig;
    129             $text =~ s/({ text => '[-\.\w\s]*)'([-\.\w\s]*'},)/$1$2/ig;
    130             #$text =~ s///ig;
    131             #$text =~ s/<\/ol>/]/ig;
    132             #$text =~ s/
  • /{ text => '/ig;
  • 133             #$text =~ s/<\/li>/'}, /ig;
    134             #$text =~ s/'\[/\[/ig;
    135             #$text =~ s/,\s]/]/ig;
    136             #$text =~ s/]'/]/ig;
    137              
    138             print "LIST TEXT: $text\n";
    139             my $definitions = eval "return $text;";
    140             #my $definitions = [{ text => 'A domesticated carnivorous mammal (Canis familiaris) related to the foxes and wolves and raised in a wide variety of breeds.'}, { text => 'Any of various carnivorous mammals of the family Canidae, such as the dingo.'}, { text => 'A male animal of the family Canidae, especially of the fox or a domesticated breed.'}, { text => 'Any of various other animals, such as the prairie dog.'}, { text => 'Informal [{ text => 'A person: You won, you lucky dog.'}, { text => 'A person regarded as contemptible: You stole my watch, you dog.'}, { text => 'A person regarded as unattractive or uninteresting.'}, { text => 'Something of inferior or low quality: "The President had read the speech to some of his friends and they told him it was a dog" (John P. Roche).'}, { text => 'An investment that produces a low return or a loss.'}]}, { text => 'Slang [{ text => 'A person regarded as unattractive or uninteresting.'}, { text => 'Something of inferior or low quality: "The President had read the speech to some of his friends and they told him it was a dog" (John P. Roche).'}, { text => 'An investment that produces a low return or a loss.'}]}, { text => 'dogs Slang The feet.'}, { text => 'See andiron.'}, { text => 'Slang A hot dog; a wiener.'}, { text => 'Any of various hooked or U-shaped metallic devices used for gripping or holding heavy objects.'}, { text => 'Astronomy A sun dog.'}];
    141             #while($text=~m/[
    142            
    143             my @definitions;
    144             foreach my $definition ( @$definitions ) {
    145             #foreach my $definition ( @definitions ) {
    146             #print "HELLO: $definition->{text}\n\n";
    147             if ( $definition->{text} =~ m/^ARRAY/ ) {
    148             foreach my $sub_list ( @{ $definition->{text} } ) {
    149             push @definitions, $self->parse_single( $sub_list->{text} );
    150             }
    151             } else {
    152             push @definitions, $self->parse_single( $definition->{text} );
    153             #$self->get_word_obj()->add_definition($definitions[-1]{text}, $definitions[-1]{example}, $pos);
    154             #my $list= $self->get_word_obj()->get_defs();
    155             #my @list2 = @$list;
    156             #$self->report_word(-1);
    157             }
    158             }
    159            
    160             return \@definitions;
    161             }
    162            
    163             sub report_word{
    164             my ($self, $i)=@_;
    165             my $word = $self->get_word_obj();
    166            
    167             print "WORD: " . $word->get_word() . "\n";
    168             my $list = $word->get_defs();
    169             my @list2 = @$list;
    170             if($i eq ''){
    171             $i=1;
    172             foreach my $def (@list2){
    173             print "DEF#$i:\n text: " . $def->get_text() . "\n example: " . $def->get_example() . "\n pos: " . $def->get_pos() . "\n\n";
    174             $i++;
    175             }
    176             }else{
    177             my $def = $list2[$i];
    178             print "DEF#$i:\n text: " . $def->get_text() . "\n example: " . $def->get_example() . "\n pos: " . $def->get_pos() . "\n\n";
    179             }
    180             }
    181            
    182             sub parse_single {
    183             my ($self, $text ) = @_;
    184             my $ident = ident $self;
    185             while ( $text =~ m/(.*?)<\/b>(.*?)<\/i>/gi ) {
    186             my ($word_form, $pos) = ($self->syllables( $1 ), $2); $pos =~ s/\.//g; $pos =~ s/ //g;
    187             #$self->_insert_word_form( $word_form, $pos );
    188             }
    189             my (@keys) = ();
    190             my (@roots) = ();
    191             my (%word_forms) = ();
    192             $text =~ s/(.*?)<\/tt>//i;
    193             my $root='';
    194             #$root = $1; if ($root eq $text) { $root = ''; }
    195              
    196             # Check for additional word forms
    197             while ( $text =~ s/(.*?)<\/b>(.*?)<\/i>//gi ) {
    198             my ($word_form, $pos) = ($self->syllables($1), $2); $pos =~ s/\.//g; $pos =~ s/ //g;
    199             $word_forms{$word_form} = $pos;
    200             }
    201             if (keys %word_forms) {
    202             foreach my $key (sort keys %word_forms) { $self->_insert_word_form( $key, $word_forms{$key} ); }
    203             } else {
    204             # No word forms, parse definition, root, example
    205             #print "SINGLE TEXT: $text \n";
    206             my $example = '';
    207             $text =~ m/()/ig; #resetting $1
    208             $text =~ s/(.*?)<\/i>//i;
    209             $example = $1;
    210            
    211             #print "EXAMPLE: $example\n";
    212            
    213             if ($text eq $example)
    214             { $example = ''; }
    215             $example =~ s/["']//i;
    216             $example =~ s/\.//i;
    217             $text =~ s/\[.*?]//i;
    218            
    219             if ($root) {
    220             if ($root =~ m/(.*?)<\/font>/g) { $root = $1; }
    221             while ($root =~ m/(.*?)<\/b>/g) { push @roots, $1; }
    222             if (@roots) { $root = join(',', @roots); }
    223             $root =~ s/'/:/g; }
    224             $text =~ s/<\/font>//i;
    225             $text =~ s/(.*?)<\/i>//i;
    226             $text =~ s/.*?<\/sup>//i;
    227             $text =~ s/://i;
    228             $text =~ s/["']//i;
    229             $text =~ s/See Synonyms at .*?<\/a>\.//i;
    230             $text =~ s/See .*?<\/a>//i;
    231             $text =~ s/.*?<\/b>//i;
    232             $text =~ s/.*?<\/font>//i;
    233             $text =~ s/.*?<\/i>//i;
    234             $text =~ s/\.//i;
    235            
    236             # push @keys, "word_id => '$word_id_of{$ident}'";
    237             push @keys, "word => '$word_of{$ident}'";
    238             if ($pos_of{$ident} && !$root) { push @keys, "pos => '$pos_of{$ident}'"; }
    239             if ( $root && $example) { push @keys, "root => '$root'"; push @keys, "root_def => '" . $self->_sql_escape($example) . "'"; $example = ''; }
    240             elsif ( $root ) { push @keys, "root => '$root'"; }
    241             #if ($example) { push @keys, "example => '" . $self->_sql_escape($example) . "'"; }
    242             if ($example) { push @keys, "example => '$example'"; }
    243             #if ($text) { push @keys, "text => '" . $self->_sql_escape($text) . "'"; }
    244             if ($text) { push @keys, "text => '$text'"; }
    245             }
    246            
    247             return eval "return { " . join(', ', @keys) . " };";
    248             }
    249             }
    250             1; # Magic true value required at end of module
    251             __END__