| line | stmt | bran | cond | sub | pod | time | code | 
| 1 |  |  |  |  |  |  | package Catmandu::Importer; | 
| 2 |  |  |  |  |  |  |  | 
| 3 | 35 |  |  | 35 |  | 204497 | use Catmandu::Sane; | 
|  | 35 |  |  |  |  | 100 |  | 
|  | 35 |  |  |  |  | 236 |  | 
| 4 |  |  |  |  |  |  |  | 
| 5 |  |  |  |  |  |  | our $VERSION = '1.2020'; | 
| 6 |  |  |  |  |  |  |  | 
| 7 | 35 |  |  | 35 |  | 285 | use Catmandu::Util qw(io is_value is_string is_array_ref is_hash_ref); | 
|  | 35 |  |  |  |  | 428 |  | 
|  | 35 |  |  |  |  | 3249 |  | 
| 8 | 35 |  |  | 35 |  | 11883 | use Catmandu::Util::Path qw(as_path); | 
|  | 35 |  |  |  |  | 82 |  | 
|  | 35 |  |  |  |  | 2247 |  | 
| 9 | 35 |  |  | 35 |  | 25346 | use LWP::UserAgent; | 
|  | 35 |  |  |  |  | 1550817 |  | 
|  | 35 |  |  |  |  | 1758 |  | 
| 10 | 35 |  |  | 35 |  | 338 | use HTTP::Request (); | 
|  | 35 |  |  |  |  | 97 |  | 
|  | 35 |  |  |  |  | 542 |  | 
| 11 | 35 |  |  | 35 |  | 196 | use URI           (); | 
|  | 35 |  |  |  |  | 92 |  | 
|  | 35 |  |  |  |  | 666 |  | 
| 12 | 35 |  |  | 35 |  | 17184 | use URI::Template (); | 
|  | 35 |  |  |  |  | 196929 |  | 
|  | 35 |  |  |  |  | 1222 |  | 
| 13 | 35 |  |  | 35 |  | 301 | use Moo::Role; | 
|  | 35 |  |  |  |  | 325 |  | 
|  | 35 |  |  |  |  | 447 |  | 
| 14 | 35 |  |  | 35 |  | 18136 | use namespace::clean; | 
|  | 35 |  |  |  |  | 97 |  | 
|  | 35 |  |  |  |  | 535 |  | 
| 15 |  |  |  |  |  |  |  | 
| 16 |  |  |  |  |  |  | with 'Catmandu::Logger'; | 
| 17 |  |  |  |  |  |  | with 'Catmandu::Iterable'; | 
| 18 |  |  |  |  |  |  | with 'Catmandu::IterableOnce'; | 
| 19 |  |  |  |  |  |  | with 'Catmandu::Fixable'; | 
| 20 |  |  |  |  |  |  | with 'Catmandu::Serializer'; | 
| 21 |  |  |  |  |  |  |  | 
| 22 |  |  |  |  |  |  | around generator => sub { | 
| 23 |  |  |  |  |  |  | my ($orig, $self) = @_; | 
| 24 |  |  |  |  |  |  |  | 
| 25 |  |  |  |  |  |  | my $generator = $orig->($self); | 
| 26 |  |  |  |  |  |  |  | 
| 27 |  |  |  |  |  |  | if (my $fixer = $self->_fixer) { | 
| 28 |  |  |  |  |  |  | $generator = $fixer->fix($generator); | 
| 29 |  |  |  |  |  |  | } | 
| 30 |  |  |  |  |  |  |  | 
| 31 |  |  |  |  |  |  | if (defined(my $path = $self->data_path)) { | 
| 32 |  |  |  |  |  |  | my $getter = as_path($path)->getter; | 
| 33 |  |  |  |  |  |  | return sub { | 
| 34 |  |  |  |  |  |  | state @buf; | 
| 35 |  |  |  |  |  |  | while (1) { | 
| 36 |  |  |  |  |  |  | return shift @buf if @buf; | 
| 37 |  |  |  |  |  |  | @buf = @{$getter->($generator->() // return)}; | 
| 38 |  |  |  |  |  |  | next; | 
| 39 |  |  |  |  |  |  | } | 
| 40 |  |  |  |  |  |  | }; | 
| 41 |  |  |  |  |  |  | } | 
| 42 |  |  |  |  |  |  |  | 
| 43 |  |  |  |  |  |  | $generator; | 
| 44 |  |  |  |  |  |  | }; | 
| 45 |  |  |  |  |  |  |  | 
| 46 |  |  |  |  |  |  | has file => (is => 'lazy', init_arg => undef); | 
| 47 |  |  |  |  |  |  | has _file_template => | 
| 48 |  |  |  |  |  |  | (is => 'ro', predicate => 'has_file', init_arg => 'file'); | 
| 49 |  |  |  |  |  |  | has variables => (is => 'ro', predicate => 1); | 
| 50 |  |  |  |  |  |  | has fh        => (is => 'ro', lazy      => 1, builder => 1); | 
| 51 |  |  |  |  |  |  | has encoding  => (is => 'ro', builder   => 1); | 
| 52 |  |  |  |  |  |  | has data_path            => (is => 'ro'); | 
| 53 |  |  |  |  |  |  | has user_agent           => (is => 'ro'); | 
| 54 |  |  |  |  |  |  | has http_method          => (is => 'lazy'); | 
| 55 |  |  |  |  |  |  | has http_headers         => (is => 'lazy'); | 
| 56 |  |  |  |  |  |  | has http_agent           => (is => 'ro', predicate => 1); | 
| 57 |  |  |  |  |  |  | has http_max_redirect    => (is => 'ro', predicate => 1); | 
| 58 |  |  |  |  |  |  | has http_timeout         => (is => 'ro', default => sub {180});  # LWP default | 
| 59 |  |  |  |  |  |  | has http_verify_hostname => (is => 'ro', default => sub {1}); | 
| 60 |  |  |  |  |  |  | has http_retry           => (is => 'ro', predicate => 1); | 
| 61 |  |  |  |  |  |  | has http_timing          => (is => 'ro', predicate => 1); | 
| 62 |  |  |  |  |  |  | has http_body            => (is => 'ro', predicate => 1); | 
| 63 |  |  |  |  |  |  | has _http_client => ( | 
| 64 |  |  |  |  |  |  | is       => 'ro', | 
| 65 |  |  |  |  |  |  | lazy     => 1, | 
| 66 |  |  |  |  |  |  | builder  => '_build_http_client', | 
| 67 |  |  |  |  |  |  | init_arg => 'user_agent' | 
| 68 |  |  |  |  |  |  | ); | 
| 69 |  |  |  |  |  |  | has _http_timing_tries => (is => 'lazy'); | 
| 70 |  |  |  |  |  |  | has ignore_404         => (is => 'ro'); | 
| 71 |  |  |  |  |  |  |  | 
| 72 |  |  |  |  |  |  | sub _build_encoding { | 
| 73 | 98 |  |  | 98 |  | 148427 | ':utf8'; | 
| 74 |  |  |  |  |  |  | } | 
| 75 |  |  |  |  |  |  |  | 
| 76 |  |  |  |  |  |  | sub _build_file { | 
| 77 | 65 |  |  | 65 |  | 826 | my ($self) = @_; | 
| 78 | 65 | 50 |  |  |  | 279 | return \*STDIN unless $self->has_file; | 
| 79 | 65 |  |  |  |  | 183 | my $file = $self->_file_template; | 
| 80 | 65 | 100 | 100 |  |  | 438 | if (is_string($file) && $self->has_variables) { | 
| 81 | 6 |  |  |  |  | 42 | my $template = URI::Template->new($file); | 
| 82 | 6 |  |  |  |  | 588 | my $vars     = $self->variables; | 
| 83 | 6 | 100 |  |  |  | 29 | if (is_value($vars)) { | 
| 84 | 3 |  |  |  |  | 16 | $vars = [split ',', $vars]; | 
| 85 |  |  |  |  |  |  | } | 
| 86 | 6 | 100 |  |  |  | 22 | if (is_array_ref($vars)) { | 
| 87 | 4 |  |  |  |  | 14 | my @keys = $template->variables; | 
| 88 | 4 |  |  |  |  | 64 | my @vals = @$vars; | 
| 89 | 4 |  |  |  |  | 9 | $vars = {}; | 
| 90 | 4 |  | 66 |  |  | 33 | $vars->{shift @keys} = shift @vals while @keys && @vals; | 
| 91 |  |  |  |  |  |  | } | 
| 92 | 6 |  |  |  |  | 26 | $file = $template->process_to_string(%$vars); | 
| 93 |  |  |  |  |  |  | } | 
| 94 | 65 | 100 | 100 |  |  | 2047 | if (is_string($file) && $file !~ m!^https?://! && !-r $file) { | 
|  |  |  | 100 |  |  |  |  | 
| 95 | 1 |  |  |  |  | 27 | Catmandu::BadArg->throw("file '$file' doesn't exist"); | 
| 96 |  |  |  |  |  |  | } | 
| 97 | 64 |  |  |  |  | 347 | $file; | 
| 98 |  |  |  |  |  |  | } | 
| 99 |  |  |  |  |  |  |  | 
| 100 |  |  |  |  |  |  | sub _build_fh { | 
| 101 | 62 |  |  | 62 |  | 1605 | my ($self) = @_; | 
| 102 |  |  |  |  |  |  |  | 
| 103 | 62 |  |  |  |  | 1059 | my $file = $self->file; | 
| 104 |  |  |  |  |  |  |  | 
| 105 |  |  |  |  |  |  | # get remote content | 
| 106 | 61 | 100 | 100 |  |  | 378 | if (is_string($file) && $file =~ m!^https?://!) { | 
| 107 | 13 |  |  |  |  | 26 | my $body; | 
| 108 | 13 | 100 |  |  |  | 57 | if ($self->has_http_body) { | 
| 109 | 3 |  |  |  |  | 11 | $body = $self->http_body; | 
| 110 |  |  |  |  |  |  |  | 
| 111 | 3 | 50 |  |  |  | 10 | if (ref $body) { | 
| 112 | 0 |  |  |  |  | 0 | $body = $self->serialize($body); | 
| 113 |  |  |  |  |  |  | } | 
| 114 |  |  |  |  |  |  |  | 
| 115 | 3 | 50 |  |  |  | 10 | if ($self->has_variables) { | 
| 116 | 3 |  |  |  |  | 7 | my $vars = $self->variables; | 
| 117 | 3 | 100 |  |  |  | 14 | if (is_hash_ref($vars)) {    # named variables | 
| 118 | 1 |  |  |  |  | 5 | for my $key (keys %$vars) { | 
| 119 | 1 |  |  |  |  | 3 | my $var = $vars->{$key}; | 
| 120 | 1 |  |  |  |  | 29 | $body =~ s/{$key}/$var/; | 
| 121 |  |  |  |  |  |  | } | 
| 122 |  |  |  |  |  |  | } | 
| 123 |  |  |  |  |  |  | else {                       # positional variables | 
| 124 | 2 | 50 |  |  |  | 8 | if (is_value($vars)) { | 
| 125 | 2 |  |  |  |  | 8 | $vars = [split ',', $vars]; | 
| 126 |  |  |  |  |  |  | } | 
| 127 | 2 |  |  |  |  | 7 | for my $var (@$vars) { | 
| 128 | 6 |  |  |  |  | 19 | $body =~ s/{\w+}/$var/; | 
| 129 |  |  |  |  |  |  | } | 
| 130 |  |  |  |  |  |  | } | 
| 131 |  |  |  |  |  |  | } | 
| 132 |  |  |  |  |  |  | } | 
| 133 |  |  |  |  |  |  |  | 
| 134 | 13 |  |  |  |  | 270 | my $content = $self->_http_request( | 
| 135 |  |  |  |  |  |  | $self->http_method, $file, $self->http_headers, | 
| 136 |  |  |  |  |  |  | $body,              $self->_http_timing_tries, | 
| 137 |  |  |  |  |  |  | ); | 
| 138 |  |  |  |  |  |  |  | 
| 139 | 8 |  |  |  |  | 77 | return io(\$content, mode => 'r', binmode => $_[0]->encoding); | 
| 140 |  |  |  |  |  |  | } | 
| 141 |  |  |  |  |  |  |  | 
| 142 | 48 |  |  |  |  | 316 | io($file, mode => 'r', binmode => $_[0]->encoding); | 
| 143 |  |  |  |  |  |  | } | 
| 144 |  |  |  |  |  |  |  | 
| 145 |  |  |  |  |  |  | sub _build_http_headers { | 
| 146 | 13 |  |  | 13 |  | 483 | []; | 
| 147 |  |  |  |  |  |  | } | 
| 148 |  |  |  |  |  |  |  | 
| 149 |  |  |  |  |  |  | sub _build_http_method { | 
| 150 | 10 |  |  | 10 |  | 289 | 'GET'; | 
| 151 |  |  |  |  |  |  | } | 
| 152 |  |  |  |  |  |  |  | 
| 153 |  |  |  |  |  |  | sub _build__http_timing_tries { | 
| 154 | 13 |  |  | 13 |  | 153 | my ($self) = @_; | 
| 155 |  |  |  |  |  |  |  | 
| 156 | 13 | 100 |  |  |  | 76 | if ($self->has_http_timing) { | 
|  |  | 100 |  |  |  |  |  | 
| 157 | 2 |  |  |  |  | 26 | my @timing_tries = $self->http_timing =~ /(\d+(?:\.\d+)*)/g; | 
| 158 | 2 |  |  |  |  | 19 | return \@timing_tries; | 
| 159 |  |  |  |  |  |  | } | 
| 160 |  |  |  |  |  |  | elsif ($self->has_http_retry) { | 
| 161 | 2 |  |  |  |  | 13 | my @timing_tries = (1) x $self->http_retry; | 
| 162 | 2 |  |  |  |  | 11 | return \@timing_tries; | 
| 163 |  |  |  |  |  |  | } | 
| 164 | 9 |  |  |  |  | 42 | return; | 
| 165 |  |  |  |  |  |  | } | 
| 166 |  |  |  |  |  |  |  | 
| 167 |  |  |  |  |  |  | sub _build_http_client { | 
| 168 | 2 |  |  | 2 |  | 48 | my ($self) = @_; | 
| 169 | 2 |  |  |  |  | 19 | my $ua = LWP::UserAgent->new; | 
| 170 | 2 |  |  |  |  | 4244 | $ua->timeout($self->http_timeout); | 
| 171 | 2 | 50 |  |  |  | 46 | $ua->agent($self->http_agent) if $self->has_http_agent; | 
| 172 | 2 | 50 |  |  |  | 35 | $ua->max_redirect($self->http_max_redirect) | 
| 173 |  |  |  |  |  |  | if $self->has_http_max_redirect; | 
| 174 | 2 |  |  |  |  | 19 | $ua->ssl_opts(verify_hostname => $self->http_verify_hostname); | 
| 175 | 2 |  |  |  |  | 71 | $ua->protocols_allowed([qw(http https)]); | 
| 176 | 2 |  |  |  |  | 31 | $ua->env_proxy; | 
| 177 | 2 |  |  |  |  | 18031 | $ua; | 
| 178 |  |  |  |  |  |  | } | 
| 179 |  |  |  |  |  |  |  | 
| 180 |  |  |  |  |  |  | sub _http_request { | 
| 181 | 13 |  |  | 13 |  | 46 | my ($self, $method, $url, $headers, $body, $timing_tries) = @_; | 
| 182 |  |  |  |  |  |  |  | 
| 183 | 13 |  |  |  |  | 253 | my $client = $self->_http_client; | 
| 184 |  |  |  |  |  |  |  | 
| 185 | 13 |  | 50 |  |  | 205 | my $req = HTTP::Request->new($method, $url, $headers || []); | 
| 186 | 13 | 100 |  |  |  | 16917 | $req->content($body) if defined $body; | 
| 187 |  |  |  |  |  |  |  | 
| 188 | 13 |  |  |  |  | 124 | my $res = $client->request($req); | 
| 189 |  |  |  |  |  |  |  | 
| 190 | 13 | 100 | 100 |  |  | 242916 | if ($res->code =~ /^408|500|502|503|504$/ && $timing_tries) { | 
| 191 | 4 |  |  |  |  | 95 | my @tries = @$timing_tries; | 
| 192 | 4 |  |  |  |  | 18 | while (my $sleep = shift @tries) { | 
| 193 | 6 |  |  |  |  | 6001059 | sleep $sleep; | 
| 194 | 6 |  |  |  |  | 183 | $res = $client->request($req->clone); | 
| 195 | 6 | 100 |  |  |  | 16391 | last if $res->code !~ /^408|500|502|503|504$/; | 
| 196 |  |  |  |  |  |  | } | 
| 197 |  |  |  |  |  |  | } | 
| 198 |  |  |  |  |  |  |  | 
| 199 | 13 |  |  |  |  | 284 | my $res_body = $res->decoded_content; | 
| 200 |  |  |  |  |  |  |  | 
| 201 | 13 | 100 |  |  |  | 10467 | unless ($res->is_success) { | 
| 202 | 5 |  |  |  |  | 71 | my $res_headers = []; | 
| 203 | 5 |  |  |  |  | 41 | for my $header ($res->header_field_names) { | 
| 204 | 18 |  |  |  |  | 359 | my $val = $res->header($header); | 
| 205 | 18 |  |  |  |  | 747 | push @$res_headers, $header, $val; | 
| 206 |  |  |  |  |  |  | } | 
| 207 |  |  |  |  |  |  | Catmandu::HTTPError->throw( | 
| 208 |  |  |  |  |  |  | { | 
| 209 | 5 |  |  |  |  | 38 | code             => $res->code, | 
| 210 |  |  |  |  |  |  | message          => $res->status_line, | 
| 211 |  |  |  |  |  |  | url              => $url, | 
| 212 |  |  |  |  |  |  | method           => $method, | 
| 213 |  |  |  |  |  |  | request_headers  => $headers, | 
| 214 |  |  |  |  |  |  | request_body     => $body, | 
| 215 |  |  |  |  |  |  | response_headers => $res_headers, | 
| 216 |  |  |  |  |  |  | response_body    => $res_body, | 
| 217 |  |  |  |  |  |  | } | 
| 218 |  |  |  |  |  |  | ); | 
| 219 |  |  |  |  |  |  | } | 
| 220 |  |  |  |  |  |  |  | 
| 221 | 8 |  |  |  |  | 121 | $res_body; | 
| 222 |  |  |  |  |  |  | } | 
| 223 |  |  |  |  |  |  |  | 
| 224 |  |  |  |  |  |  | sub readline { | 
| 225 | 0 |  |  | 0 | 0 |  | warnings::warnif("deprecated", | 
| 226 |  |  |  |  |  |  | "readline is deprecated, fh->getline instead"); | 
| 227 | 0 |  |  |  |  |  | $_[0]->fh->getline; | 
| 228 |  |  |  |  |  |  | } | 
| 229 |  |  |  |  |  |  |  | 
| 230 |  |  |  |  |  |  | sub readall { | 
| 231 | 0 |  |  | 0 | 0 |  | warnings::warnif("deprecated", | 
| 232 |  |  |  |  |  |  | "readall is deprecated, join('',fh->getlines) instead"); | 
| 233 | 0 |  |  |  |  |  | join '', $_[0]->fh->getlines; | 
| 234 |  |  |  |  |  |  | } | 
| 235 |  |  |  |  |  |  |  | 
| 236 |  |  |  |  |  |  | 1; | 
| 237 |  |  |  |  |  |  |  | 
| 238 |  |  |  |  |  |  | __END__ | 
| 239 |  |  |  |  |  |  |  | 
| 240 |  |  |  |  |  |  | =pod | 
| 241 |  |  |  |  |  |  |  | 
| 242 |  |  |  |  |  |  | =head1 NAME | 
| 243 |  |  |  |  |  |  |  | 
| 244 |  |  |  |  |  |  | Catmandu::Importer - Namespace for packages that can import | 
| 245 |  |  |  |  |  |  |  | 
| 246 |  |  |  |  |  |  | =head1 SYNOPSIS | 
| 247 |  |  |  |  |  |  |  | 
| 248 |  |  |  |  |  |  | # From the command line | 
| 249 |  |  |  |  |  |  |  | 
| 250 |  |  |  |  |  |  | # JSON is an importer and YAML an exporter | 
| 251 |  |  |  |  |  |  | $ catmandu convert JSON to YAML < data.json | 
| 252 |  |  |  |  |  |  |  | 
| 253 |  |  |  |  |  |  | # OAI is an importer and JSON an exporter | 
| 254 |  |  |  |  |  |  | $ catmandu convert OAI --url http://biblio.ugent.be/oai to JSON | 
| 255 |  |  |  |  |  |  |  | 
| 256 |  |  |  |  |  |  | # Fetch remote content | 
| 257 |  |  |  |  |  |  | $ catmandu convert JSON --file http://example.com/data.json to YAML | 
| 258 |  |  |  |  |  |  |  | 
| 259 |  |  |  |  |  |  | # From Perl | 
| 260 |  |  |  |  |  |  |  | 
| 261 |  |  |  |  |  |  | use Catmandu; | 
| 262 |  |  |  |  |  |  | use Data::Dumper; | 
| 263 |  |  |  |  |  |  |  | 
| 264 |  |  |  |  |  |  | my $importer = Catmandu->importer('JSON', file => 'data.json'); | 
| 265 |  |  |  |  |  |  |  | 
| 266 |  |  |  |  |  |  | $importer->each(sub { | 
| 267 |  |  |  |  |  |  | my $item = shift; | 
| 268 |  |  |  |  |  |  | print Dumper($item); | 
| 269 |  |  |  |  |  |  | }); | 
| 270 |  |  |  |  |  |  |  | 
| 271 |  |  |  |  |  |  | my $num = $importer->count; | 
| 272 |  |  |  |  |  |  |  | 
| 273 |  |  |  |  |  |  | my $first_item = $importer->first; | 
| 274 |  |  |  |  |  |  |  | 
| 275 |  |  |  |  |  |  | # Convert OAI to JSON in Perl | 
| 276 |  |  |  |  |  |  | my $importer = Catmandu->importer('OAI', url => 'http://biblio.ugent.be/oai'); | 
| 277 |  |  |  |  |  |  | my $exporter = Catmandu->exporter('JSON'); | 
| 278 |  |  |  |  |  |  |  | 
| 279 |  |  |  |  |  |  | $exporter->add_many($importer); | 
| 280 |  |  |  |  |  |  |  | 
| 281 |  |  |  |  |  |  | =head1 DESCRIPTION | 
| 282 |  |  |  |  |  |  |  | 
| 283 |  |  |  |  |  |  | A Catmandu::Importer is a Perl package that can generate structured data from | 
| 284 |  |  |  |  |  |  | sources such as JSON, YAML, XML, RDF or network protocols such as Atom, OAI-PMH, | 
| 285 |  |  |  |  |  |  | SRU and even DBI databases. Given an Catmandu::Importer a programmer can read | 
| 286 |  |  |  |  |  |  | data from using one of the many L<Catmandu::Iterable> methods: | 
| 287 |  |  |  |  |  |  |  | 
| 288 |  |  |  |  |  |  |  | 
| 289 |  |  |  |  |  |  | $importer->to_array; | 
| 290 |  |  |  |  |  |  | $importer->count; | 
| 291 |  |  |  |  |  |  | $importer->each(\&callback); | 
| 292 |  |  |  |  |  |  | $importer->first; | 
| 293 |  |  |  |  |  |  | $importer->rest; | 
| 294 |  |  |  |  |  |  | ...etc... | 
| 295 |  |  |  |  |  |  |  | 
| 296 |  |  |  |  |  |  | Every Catmandu::Importer is also L<Catmandu::Fixable> and thus inherits a 'fix' | 
| 297 |  |  |  |  |  |  | parameter that can be set in the constructor. When given a 'fix' parameter, then each | 
| 298 |  |  |  |  |  |  | item returned by the generator will be automatically Fixed using one or | 
| 299 |  |  |  |  |  |  | more L<Catmandu::Fix>es. | 
| 300 |  |  |  |  |  |  | E.g. | 
| 301 |  |  |  |  |  |  |  | 
| 302 |  |  |  |  |  |  | my $importer = Catmandu->importer('JSON',fix => ['upcase(title)']); | 
| 303 |  |  |  |  |  |  | $importer->each( sub { | 
| 304 |  |  |  |  |  |  | my $item = shift ; # Every $item->{title} is now upcased... | 
| 305 |  |  |  |  |  |  |  | 
| 306 |  |  |  |  |  |  | }); | 
| 307 |  |  |  |  |  |  |  | 
| 308 |  |  |  |  |  |  | # or via a Fix file | 
| 309 |  |  |  |  |  |  | my $importer = Catmandu->importer('JSON',fix => ['/my/fixes.txt']); | 
| 310 |  |  |  |  |  |  | $importer->each( sub { | 
| 311 |  |  |  |  |  |  | my $item = shift ; # Every $item->{title} is now upcased... | 
| 312 |  |  |  |  |  |  |  | 
| 313 |  |  |  |  |  |  | }); | 
| 314 |  |  |  |  |  |  |  | 
| 315 |  |  |  |  |  |  | =head1 CONFIGURATION | 
| 316 |  |  |  |  |  |  |  | 
| 317 |  |  |  |  |  |  | =over | 
| 318 |  |  |  |  |  |  |  | 
| 319 |  |  |  |  |  |  | =item file | 
| 320 |  |  |  |  |  |  |  | 
| 321 |  |  |  |  |  |  | Read input from a local file given by its path. If the path looks like a | 
| 322 |  |  |  |  |  |  | url, the content will be fetched first and then passed to the importer. | 
| 323 |  |  |  |  |  |  | Alternatively a scalar reference can be passed to read from a string. | 
| 324 |  |  |  |  |  |  |  | 
| 325 |  |  |  |  |  |  | =item fh | 
| 326 |  |  |  |  |  |  |  | 
| 327 |  |  |  |  |  |  | Read input from an L<IO::Handle>. If not specified, L<Catmandu::Util::io> is used to | 
| 328 |  |  |  |  |  |  | create the input stream from the C<file> argument or by using STDIN. | 
| 329 |  |  |  |  |  |  |  | 
| 330 |  |  |  |  |  |  | =item encoding | 
| 331 |  |  |  |  |  |  |  | 
| 332 |  |  |  |  |  |  | Binmode of the input stream C<fh>. Set to C<:utf8> by default. | 
| 333 |  |  |  |  |  |  |  | 
| 334 |  |  |  |  |  |  | =item fix | 
| 335 |  |  |  |  |  |  |  | 
| 336 |  |  |  |  |  |  | An ARRAY of one or more Fix-es or Fix scripts to be applied to imported items. | 
| 337 |  |  |  |  |  |  |  | 
| 338 |  |  |  |  |  |  | =item data_path | 
| 339 |  |  |  |  |  |  |  | 
| 340 |  |  |  |  |  |  | The data at C<data_path> is imported instead of the original data. | 
| 341 |  |  |  |  |  |  |  | 
| 342 |  |  |  |  |  |  | # given this imported item: | 
| 343 |  |  |  |  |  |  | {abc => [{a=>1},{b=>2},{c=>3}]} | 
| 344 |  |  |  |  |  |  | # with data_path 'abc', this item gets imported instead: | 
| 345 |  |  |  |  |  |  | [{a=>1},{b=>2},{c=>3}] | 
| 346 |  |  |  |  |  |  | # with data_path 'abc.*', 3 items get imported: | 
| 347 |  |  |  |  |  |  | {a=>1} | 
| 348 |  |  |  |  |  |  | {b=>2} | 
| 349 |  |  |  |  |  |  | {c=>3} | 
| 350 |  |  |  |  |  |  |  | 
| 351 |  |  |  |  |  |  | =item variables | 
| 352 |  |  |  |  |  |  |  | 
| 353 |  |  |  |  |  |  | Variables given here will interpolate the C<file> and C<http_body> options. The | 
| 354 |  |  |  |  |  |  | syntax is the same as L<URI::Template>. | 
| 355 |  |  |  |  |  |  |  | 
| 356 |  |  |  |  |  |  | # named arguments | 
| 357 |  |  |  |  |  |  | my $importer = Catmandu->importer('JSON', | 
| 358 |  |  |  |  |  |  | file => 'http://{server}/{path}', | 
| 359 |  |  |  |  |  |  | variables => {server => 'biblio.ugent.be', path => 'file.json'}, | 
| 360 |  |  |  |  |  |  | ); | 
| 361 |  |  |  |  |  |  |  | 
| 362 |  |  |  |  |  |  | # positional arguments | 
| 363 |  |  |  |  |  |  | my $importer = Catmandu->importer('JSON', | 
| 364 |  |  |  |  |  |  | file => 'http://{server}/{path}', | 
| 365 |  |  |  |  |  |  | variables => 'biblio.ugent.be,file.json', | 
| 366 |  |  |  |  |  |  | ); | 
| 367 |  |  |  |  |  |  |  | 
| 368 |  |  |  |  |  |  | # or | 
| 369 |  |  |  |  |  |  | my $importer = Catmandu->importer('JSON', | 
| 370 |  |  |  |  |  |  | url => 'http://{server}/{path}', | 
| 371 |  |  |  |  |  |  | variables => ['biblio.ugent.be','file.json'], | 
| 372 |  |  |  |  |  |  | ); | 
| 373 |  |  |  |  |  |  |  | 
| 374 |  |  |  |  |  |  | # or via the command line | 
| 375 |  |  |  |  |  |  | $ catmandu convert JSON --file 'http://{server}/{path}' --variables 'biblio.ugent.be,file.json' | 
| 376 |  |  |  |  |  |  |  | 
| 377 |  |  |  |  |  |  | =back | 
| 378 |  |  |  |  |  |  |  | 
| 379 |  |  |  |  |  |  | =head1 HTTP CONFIGURATION | 
| 380 |  |  |  |  |  |  |  | 
| 381 |  |  |  |  |  |  | These options are only relevant if C<file> is a url. See L<LWP::UserAgent> for details about these options. | 
| 382 |  |  |  |  |  |  |  | 
| 383 |  |  |  |  |  |  | =over | 
| 384 |  |  |  |  |  |  |  | 
| 385 |  |  |  |  |  |  | =item http_body | 
| 386 |  |  |  |  |  |  |  | 
| 387 |  |  |  |  |  |  | Set the GET/POST message body. | 
| 388 |  |  |  |  |  |  |  | 
| 389 |  |  |  |  |  |  | =item http_method | 
| 390 |  |  |  |  |  |  |  | 
| 391 |  |  |  |  |  |  | Set the type of HTTP request 'GET', 'POST' , ... | 
| 392 |  |  |  |  |  |  |  | 
| 393 |  |  |  |  |  |  | =item http_headers | 
| 394 |  |  |  |  |  |  |  | 
| 395 |  |  |  |  |  |  | A reference to a HTTP::Headers objects. | 
| 396 |  |  |  |  |  |  |  | 
| 397 |  |  |  |  |  |  | =back | 
| 398 |  |  |  |  |  |  |  | 
| 399 |  |  |  |  |  |  | =head2 Set an own HTTP client | 
| 400 |  |  |  |  |  |  |  | 
| 401 |  |  |  |  |  |  | =over | 
| 402 |  |  |  |  |  |  |  | 
| 403 |  |  |  |  |  |  | =item user_agent(LWP::UserAgent->new(...)) | 
| 404 |  |  |  |  |  |  |  | 
| 405 |  |  |  |  |  |  | Set an own HTTP client | 
| 406 |  |  |  |  |  |  |  | 
| 407 |  |  |  |  |  |  | =back | 
| 408 |  |  |  |  |  |  |  | 
| 409 |  |  |  |  |  |  | =head2 Alternative set the parameters of the default client | 
| 410 |  |  |  |  |  |  |  | 
| 411 |  |  |  |  |  |  | =over | 
| 412 |  |  |  |  |  |  |  | 
| 413 |  |  |  |  |  |  | =item http_agent | 
| 414 |  |  |  |  |  |  |  | 
| 415 |  |  |  |  |  |  | A string containing the name of the HTTP client. | 
| 416 |  |  |  |  |  |  |  | 
| 417 |  |  |  |  |  |  | =item http_max_redirect | 
| 418 |  |  |  |  |  |  |  | 
| 419 |  |  |  |  |  |  | Maximum number of HTTP redirects allowed. | 
| 420 |  |  |  |  |  |  |  | 
| 421 |  |  |  |  |  |  | =item http_timeout | 
| 422 |  |  |  |  |  |  |  | 
| 423 |  |  |  |  |  |  | Maximum execution time. | 
| 424 |  |  |  |  |  |  |  | 
| 425 |  |  |  |  |  |  | =item http_verify_hostname | 
| 426 |  |  |  |  |  |  |  | 
| 427 |  |  |  |  |  |  | Verify the SSL certificate. | 
| 428 |  |  |  |  |  |  |  | 
| 429 |  |  |  |  |  |  | =item http_retry | 
| 430 |  |  |  |  |  |  |  | 
| 431 |  |  |  |  |  |  | Maximum times to retry the HTTP request if it temporarily fails. Default is not | 
| 432 |  |  |  |  |  |  | to retry.  See L<LWP::UserAgent::Determined> for the HTTP status codes | 
| 433 |  |  |  |  |  |  | that initiate a retry. | 
| 434 |  |  |  |  |  |  |  | 
| 435 |  |  |  |  |  |  | =item http_timing | 
| 436 |  |  |  |  |  |  |  | 
| 437 |  |  |  |  |  |  | Maximum times and timeouts to retry the HTTP request if it temporarily fails. Default is not | 
| 438 |  |  |  |  |  |  | to retry.  See L<LWP::UserAgent::Determined> for the HTTP status codes | 
| 439 |  |  |  |  |  |  | that initiate a retry and the format of the timing value. | 
| 440 |  |  |  |  |  |  |  | 
| 441 |  |  |  |  |  |  | =back | 
| 442 |  |  |  |  |  |  |  | 
| 443 |  |  |  |  |  |  | =head1 METHODS | 
| 444 |  |  |  |  |  |  |  | 
| 445 |  |  |  |  |  |  | =head2 first, each, rest , ... | 
| 446 |  |  |  |  |  |  |  | 
| 447 |  |  |  |  |  |  | See L<Catmandu::Iterable> for all inherited methods. | 
| 448 |  |  |  |  |  |  |  | 
| 449 |  |  |  |  |  |  | =head1 CODING | 
| 450 |  |  |  |  |  |  |  | 
| 451 |  |  |  |  |  |  | Create your own importer by creating a Perl package in the Catmandu::Importer namespace that | 
| 452 |  |  |  |  |  |  | implements C<Catmandu::Importer>. Basically, you need to create a method 'generate' which | 
| 453 |  |  |  |  |  |  | returns a callback that creates one Perl hash for each call: | 
| 454 |  |  |  |  |  |  |  | 
| 455 |  |  |  |  |  |  | my $importer = Catmandu::Importer::Hello->new; | 
| 456 |  |  |  |  |  |  |  | 
| 457 |  |  |  |  |  |  | $importer->generate(); # record | 
| 458 |  |  |  |  |  |  | $importer->generate(); # next record | 
| 459 |  |  |  |  |  |  | $importer->generate(); # undef = end of stream | 
| 460 |  |  |  |  |  |  |  | 
| 461 |  |  |  |  |  |  | Here is an example of a simple C<Hello> importer: | 
| 462 |  |  |  |  |  |  |  | 
| 463 |  |  |  |  |  |  | package Catmandu::Importer::Hello; | 
| 464 |  |  |  |  |  |  |  | 
| 465 |  |  |  |  |  |  | use Catmandu::Sane; | 
| 466 |  |  |  |  |  |  | use Moo; | 
| 467 |  |  |  |  |  |  |  | 
| 468 |  |  |  |  |  |  | with 'Catmandu::Importer'; | 
| 469 |  |  |  |  |  |  |  | 
| 470 |  |  |  |  |  |  | sub generator { | 
| 471 |  |  |  |  |  |  | my ($self) = @_; | 
| 472 |  |  |  |  |  |  | state $fh = $self->fh; | 
| 473 |  |  |  |  |  |  | my $n = 0; | 
| 474 |  |  |  |  |  |  | return sub { | 
| 475 |  |  |  |  |  |  | $self->log->debug("generating record " . ++$n); | 
| 476 |  |  |  |  |  |  | my $name = $self->fh->readline; | 
| 477 |  |  |  |  |  |  | return defined $name ? { "hello" => $name } : undef; | 
| 478 |  |  |  |  |  |  | }; | 
| 479 |  |  |  |  |  |  | } | 
| 480 |  |  |  |  |  |  |  | 
| 481 |  |  |  |  |  |  | 1; | 
| 482 |  |  |  |  |  |  |  | 
| 483 |  |  |  |  |  |  | This importer can be called via the command line as: | 
| 484 |  |  |  |  |  |  |  | 
| 485 |  |  |  |  |  |  | $ catmandu convert Hello to JSON < /tmp/names.txt | 
| 486 |  |  |  |  |  |  | $ catmandu convert Hello to YAML < /tmp/names.txt | 
| 487 |  |  |  |  |  |  | $ catmandu import Hello to MongoDB --database_name test < /tmp/names.txt | 
| 488 |  |  |  |  |  |  |  | 
| 489 |  |  |  |  |  |  | Or, via Perl | 
| 490 |  |  |  |  |  |  |  | 
| 491 |  |  |  |  |  |  | use Catmandu; | 
| 492 |  |  |  |  |  |  |  | 
| 493 |  |  |  |  |  |  | my $importer = Catmandu->importer('Hello', file => '/tmp/names.txt'); | 
| 494 |  |  |  |  |  |  | $importer->each(sub { | 
| 495 |  |  |  |  |  |  | my $items = shift; | 
| 496 |  |  |  |  |  |  | }); | 
| 497 |  |  |  |  |  |  |  | 
| 498 |  |  |  |  |  |  | =head1 SEE ALSO | 
| 499 |  |  |  |  |  |  |  | 
| 500 |  |  |  |  |  |  | L<Catmandu::Iterable> , L<Catmandu::Fix> , | 
| 501 |  |  |  |  |  |  | L<Catmandu::Importer::CSV>, L<Catmandu::Importer::JSON> , L<Catmandu::Importer::YAML> | 
| 502 |  |  |  |  |  |  |  | 
| 503 |  |  |  |  |  |  | =cut |