File Coverage

blib/lib/DDG/Rewrite.pm
Criterion Covered Total %
statement 90 100 90.0
branch 45 60 75.0
condition 17 21 80.9
subroutine 7 7 100.0
pod 0 3 0.0
total 159 191 83.2


line stmt bran cond sub pod time code
1             package DDG::Rewrite;
2             our $AUTHORITY = 'cpan:DDG';
3             # ABSTRACT: A (mostly spice related) Rewrite definition in our system
4             $DDG::Rewrite::VERSION = '1016';
5 13     13   15167 use Moo;
  13         9544  
  13         66  
6 13     13   3048 use Carp qw( croak );
  13         14  
  13         519  
7 13     13   1274 use URI;
  13         10832  
  13         14888  
8              
9             sub BUILD {
10 19     19 0 66 my ( $self ) = @_;
11 19         34 my $to = $self->to;
12 19 100       43 my $callback = $self->has_callback ? $self->callback : "";
13 19 100 100     255 croak "Missing callback attribute for {{callback}} in to" if ($to =~ s/\Q{{callback}}/$callback/g && !$self->has_callback);
14             # Make sure we replace "{{dollar}}"" with "{dollar}".
15 18         24 $to =~ s/\Q{{dollar}}/\$\{dollar\}/g;
16 18         35 my @missing_envs;
17 18         41 for ($to =~ m/\Q{{ENV{\E(\w+)}}}/g) {
18 3 100       8 if (defined $ENV{$_}) {
19 2         3 my $val = $ENV{$_};
20 2         24 $to =~ s/\Q{{ENV{$_}}}/$val/g;
21             } else {
22 1         3 push @missing_envs, $_;
23 1         15 $to =~ s/\Q{{ENV{$_}}}//g;
24             }
25             }
26 18 100       31 $self->_missing_envs(\@missing_envs) if @missing_envs;
27 18         102 $self->_parsed_to($to);
28             }
29              
30              
31             has path => (
32             is => 'ro',
33             required => 1,
34             );
35              
36             has to => (
37             is => 'ro',
38             required => 1,
39             );
40              
41             has from => (
42             is => 'ro',
43             predicate => 'has_from',
44             );
45              
46             has callback => (
47             is => 'ro',
48             predicate => 'has_callback',
49             );
50              
51             has wrap_jsonp_callback => (
52             is => 'ro',
53             default => sub { 0 },
54             );
55              
56             has wrap_string_callback => (
57             is => 'ro',
58             default => sub { 0 },
59             );
60              
61             has headers => (
62             is => 'ro',
63             predicate => 'has_headers',
64             );
65              
66             has proxy_cache_valid => (
67             is => 'ro',
68             predicate => 'has_proxy_cache_valid',
69             );
70              
71             has proxy_ssl_session_reuse => (
72             is => 'ro',
73             predicate => 'has_proxy_ssl_session_reuse',
74             );
75              
76             has proxy_x_forwarded_for => (
77             is => 'ro',
78             default => sub { 'X-Forwarded-For $proxy_add_x_forwarded_for' }
79             );
80              
81             has post_body => (
82             is => 'ro',
83             predicate => 'has_post_body',
84             );
85              
86             has nginx_conf => (
87             is => 'ro',
88             lazy => 1,
89             builder => '_build_nginx_conf',
90             );
91              
92             has error_fallback => (
93             is => 'rw',
94             default => sub { 0 },
95             );
96              
97             has upstream_timeouts => (
98             is => 'lazy',
99             predicate => 'has_upstream_timeouts',
100             default => sub { +{} },
101             );
102              
103             sub _build_nginx_conf {
104 15     15   2598 my ( $self ) = @_;
105              
106 15         25 my $uri = URI->new($self->parsed_to);
107 15         12280 my $host = $uri->host;
108 15         429 my $port = $uri->port;
109 15         212 my $scheme = $uri->scheme;
110 15         173 my $uri_path = $self->parsed_to;
111 15         116 $uri_path =~ s!$scheme://$host:$port!!;
112 15         84 $uri_path =~ s!$scheme://$host!!;
113 15         24 my $is_duckduckgo = $host =~ /(?:127\.0\.0\.1|duckduckgo\.com)/;
114              
115             # wrap various other things into jsonp
116 15 50 66     60 croak "Cannot use wrap_jsonp_callback and wrap_string callback at the same time!" if $self->wrap_jsonp_callback && $self->wrap_string_callback;
117 15   100     35 my $wrap_jsonp_callback = $self->has_callback && $self->wrap_jsonp_callback;
118 15   66     33 my $wrap_string_callback = $self->has_callback && $self->wrap_string_callback;
119 15   66     45 my $uses_echo_module = $wrap_jsonp_callback || $wrap_string_callback;
120 15         19 my $callback = $self->callback;
121 15         48 my ($spice_name) = $self->path =~ m{^/js/spice/(.+)/$};
122 15 50       29 $spice_name =~ s|/|_|og if $spice_name;
123              
124 15         29 my $cfg = "location ^~ ".$self->path." {\n";
125              
126 15   66     61 my $timeouts = $self->has_upstream_timeouts && $self->upstream_timeouts;
127 15 100 100     53 if (ref $timeouts eq 'HASH' && keys %$timeouts) {
128 1 50       4 $cfg .= "\tproxy_connect_timeout $timeouts->{connect};\n" if $timeouts->{connect};
129 1 50       5 $cfg .= "\tproxy_send_timeout $timeouts->{send};\n" if $timeouts->{send};
130 1 50       5 $cfg .= "\tproxy_read_timeout $timeouts->{read};\n" if $timeouts->{read};
131             }
132              
133 15 100       26 if ( $self->headers ) {
134 4 100       15 if ( ref $self->headers eq 'HASH' ) {
    100          
135 2         2 for my $header ( sort keys %{$self->headers} ) {
  2         8  
136 3         9 $cfg .= "\tproxy_set_header $header \"" . $self->headers->{$header} . "\";\n";
137             }
138             }
139             elsif ( ref $self->headers eq 'ARRAY' ) {
140 1         1 for my $header ( @{ $self->headers } ) {
  1         3  
141 1         3 $cfg .= "\tproxy_set_header $header;\n";
142             }
143             }
144             else {
145 1         4 $cfg .= "\tproxy_set_header " . $self->headers . ";\n";
146             }
147             }
148              
149 15 100       29 if ( $self->has_post_body ) {
150 1         3 $cfg .= "\tproxy_method POST;\n";
151 1         5 $cfg .= "\tproxy_set_body '" . $self->post_body . "';\n";
152              
153             # This block sets the proxy cache key from the spice name and the combined
154             # set of captured GET parameters. The 'map' builds a hash of these capture
155             # parameters as keys to ensure each one occurs only once. We can then pull these
156             # out consistently by calling 'sort keys' on the returned hash and 'join' turns
157             # the sorted keys into a single string.
158             # e.g. post_body '{"method":"$2","query":"$1","cleaned_query":"$1"}'
159             # Would give a $cache_keys value of '$1$2'
160 1         2 my $cache_keys = join '', sort keys %{ {
161 1         6 map { $_ => 1 } ( $self->post_body =~ m/\$[0-9]+/g )
  2         8  
162             } };
163 1         5 $cfg .= "\tproxy_cache_key spice_${spice_name}_$cache_keys;\n"
164             }
165              
166 15 100       22 if($uses_echo_module) {
167             # we need to make sure we have plain text coming back until we have a way
168             # to unilaterally gunzip responses from the upstream since the echo module
169             # will intersperse plaintext with gzip which results in encoding errors.
170             # https://github.com/agentzh/echo-nginx-module/issues/30
171 1         2 $cfg .= "\tproxy_set_header Accept-Encoding '';\n";
172              
173             # This is a workaround that deals with endpoints that don't support callback functions.
174             # So endpoints that don't support callback functions return a content-type of 'application/json'
175             # because what they're returning is not meant to be executed in the first place.
176             # Setting content-type to application/javascript for those endpoints solves blocking due to
177             # mime type mismatches.
178 1         2 $cfg .= "\tmore_set_headers 'Content-Type: application/javascript; charset=utf-8';\n";
179             }
180              
181 15 100       23 $cfg .= "\techo_before_body '$callback(';\n" if $wrap_jsonp_callback;
182 15 50       18 $cfg .= "\techo_before_body '$callback".qq|("';\n| if $wrap_string_callback;
183              
184 15         13 my $upstream;
185 15 50       17 if( $spice_name ) {
186 15         19 $upstream = '$'.$spice_name.'_upstream';
187 15         28 $cfg .= "\tset $upstream $scheme://$host:$port;\n";
188             } else {
189 0         0 warn "Error: Problem finding spice name in ".$self->path; return
190 0         0 }
191              
192 15 100       49 $cfg .= "\trewrite ^".$self->path.($self->has_from ? $self->from : "(.*)")." ".$uri_path." break;\n";
193 15         20 $cfg .= "\tproxy_pass $upstream;\n";
194 15 100       28 $cfg .= "\tproxy_set_header ".$self->proxy_x_forwarded_for.";\n" if $is_duckduckgo;
195 15 100       28 $cfg .= "\tproxy_ssl_server_name on;\n" if $scheme =~ /https/;
196              
197 15 100       25 if($self->has_proxy_cache_valid) {
198             # This tells Nginx how long the response should be kept.
199 1         3 $cfg .= "\tproxy_cache_valid " . $self->proxy_cache_valid . ";\n";
200             # Some response headers from the endpoint can affect `proxy_cache_valid` so we ignore them.
201             # http://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_ignore_headers
202 1         2 $cfg .= "\tproxy_ignore_headers X-Accel-Expires Expires Cache-Control Set-Cookie;\n";
203             }
204              
205 15 50       25 $cfg .= "\tproxy_ssl_session_reuse ".$self->proxy_ssl_session_reuse.";\n" if $self->has_proxy_ssl_session_reuse;
206 15 100       18 $cfg .= "\techo_after_body ');';\n" if $wrap_jsonp_callback;
207 15 50       19 $cfg .= "\techo_after_body '\");';\n" if $wrap_string_callback;
208              
209             # proxy_intercept_errors is used to handle endpoints that don't return 200 OK
210             # When we get errors from the endpoint, instead of replying a blank page, it should reply the function instead with no parameters,
211             # e.g., ddg_spice_dictionary_definition();. The benefit of doing that is that we know for sure that the Spice failed, and we can do
212             # something about it (we know that the Spice failed because it should return Spice.failed('...') when the parameters are not valid).
213 15 100       19 if($callback) {
214 3         3 $cfg .= "\tproxy_intercept_errors on;\n";
215 3 50       7 if ($self->error_fallback) {
216 0         0 $cfg .= "\terror_page 301 302 303 403 500 502 503 504 =200 /js/failed/$callback;\n";
217 0         0 $cfg .= "\terror_page 404 =200 \@404_$callback;\n";
218             } else {
219 3         7 $cfg .= "\terror_page 301 302 303 403 404 500 502 503 504 =200 /js/failed/$callback;\n";
220             }
221             }
222              
223 15         15 $cfg .= "\texpires 1s;\n";
224 15         15 $cfg .= "}\n";
225 15 50       22 if ($self->error_fallback) {
226 0         0 my $fallback = $self->error_fallback;
227 0         0 $cfg .= "location \@404_$callback".qq( {\n);
228 0 0       0 $cfg .= "\techo_before_body '$callback(';\n" if $wrap_jsonp_callback;
229 0         0 $cfg .= qq(\techo '{"fallback": "$fallback"}';\n);
230 0 0       0 $cfg .= "\techo_after_body ');';\n" if $wrap_jsonp_callback;
231 0         0 $cfg .= qq( }\n);
232             }
233 15         125 return $cfg;
234             }
235              
236             has _missing_envs => (
237             is => 'rw',
238             predicate => 'has_missing_envs',
239             );
240 6     6 0 1832 sub missing_envs { shift->_missing_envs }
241              
242             has _parsed_to => (
243             is => 'rw',
244             );
245 30     30 0 79 sub parsed_to { shift->_parsed_to }
246              
247             1;
248              
249             __END__