File Coverage

blib/lib/DDG/Rewrite.pm
Criterion Covered Total %
statement 92 102 90.2
branch 47 62 75.8
condition 20 24 83.3
subroutine 7 7 100.0
pod 0 3 0.0
total 166 198 83.8


line stmt bran cond sub pod time code
1             package DDG::Rewrite;
2             our $AUTHORITY = 'cpan:DDG';
3             # ABSTRACT: A (mostly spice related) Rewrite definition in our system
4             $DDG::Rewrite::VERSION = '1018';
5 13     13   65544 use Moo;
  13         7875  
  13         70  
6 13     13   5373 use Carp qw( croak );
  13         35  
  13         842  
7 13     13   1195 use URI;
  13         15649  
  13         23054  
8              
9             sub BUILD {
10 20     20 0 105 my ( $self ) = @_;
11 20         56 my $to = $self->to;
12 20 100       82 my $callback = $self->has_callback ? $self->callback : "";
13 20 100 100     283 croak "Missing callback attribute for {{callback}} in to" if ($to =~ s/\Q{{callback}}/$callback/g && !$self->has_callback);
14             # Make sure we replace "{{dollar}}"" with "{dollar}".
15 19         38 $to =~ s/\Q{{dollar}}/\$\{dollar\}/g;
16 19         27 my @missing_envs;
17 19         67 for ($to =~ m/\Q{{ENV{\E(\w+)}}}/g) {
18 4 100       14 if (defined $ENV{$_}) {
19 3         7 my $val = $ENV{$_};
20 3         29 $to =~ s/\Q{{ENV{$_}}}/$val/g;
21             } else {
22 1         3 push @missing_envs, $_;
23 1         15 $to =~ s/\Q{{ENV{$_}}}//g;
24             }
25             }
26 19 100       51 $self->_missing_envs(\@missing_envs) if @missing_envs;
27 19         134 $self->_parsed_to($to);
28             }
29              
30              
31             has path => (
32             is => 'ro',
33             required => 1,
34             );
35              
36             has to => (
37             is => 'ro',
38             required => 1,
39             );
40              
41             has from => (
42             is => 'ro',
43             predicate => 'has_from',
44             );
45              
46             has callback => (
47             is => 'ro',
48             predicate => 'has_callback',
49             );
50              
51             has wrap_jsonp_callback => (
52             is => 'ro',
53             default => sub { 0 },
54             );
55              
56             has wrap_string_callback => (
57             is => 'ro',
58             default => sub { 0 },
59             );
60              
61             has headers => (
62             is => 'ro',
63             predicate => 'has_headers',
64             );
65              
66             has proxy_cache_valid => (
67             is => 'ro',
68             predicate => 'has_proxy_cache_valid',
69             );
70              
71             has proxy_ssl_session_reuse => (
72             is => 'ro',
73             predicate => 'has_proxy_ssl_session_reuse',
74             );
75              
76             has proxy_x_forwarded_for => (
77             is => 'ro',
78             default => sub { 'X-Forwarded-For $proxy_add_x_forwarded_for' }
79             );
80              
81             has post_body => (
82             is => 'ro',
83             predicate => 'has_post_body',
84             );
85              
86             has nginx_conf => (
87             is => 'ro',
88             lazy => 1,
89             builder => '_build_nginx_conf',
90             );
91              
92             has error_fallback => (
93             is => 'rw',
94             default => sub { 0 },
95             );
96              
97             has upstream_timeouts => (
98             is => 'lazy',
99             predicate => 'has_upstream_timeouts',
100             default => sub { +{} },
101             );
102              
103             has content_type_javascript => (
104             is => 'ro',
105             default => sub { 0 },
106             );
107              
108             sub _build_nginx_conf {
109 16     16   4735 my ( $self ) = @_;
110              
111 16         44 my $uri = URI->new($self->parsed_to);
112 16         12828 my $host = $uri->host;
113 16         669 my $port = $uri->port;
114 16         398 my $scheme = $uri->scheme;
115 16         275 my $uri_path = $self->parsed_to;
116 16         150 $uri_path =~ s!$scheme://$host:$port!!;
117 16         114 $uri_path =~ s!$scheme://$host!!;
118 16         44 my $is_duckduckgo = $host =~ /(?:127\.0\.0\.1|duckduckgo\.com)/;
119              
120             # wrap various other things into jsonp
121 16 50 66     69 croak "Cannot use wrap_jsonp_callback and wrap_string callback at the same time!" if $self->wrap_jsonp_callback && $self->wrap_string_callback;
122 16   100     68 my $wrap_jsonp_callback = $self->has_callback && $self->wrap_jsonp_callback;
123 16   66     50 my $wrap_string_callback = $self->has_callback && $self->wrap_string_callback;
124 16   66     56 my $uses_echo_module = $wrap_jsonp_callback || $wrap_string_callback;
125 16         36 my $content_type_javascript = $self->content_type_javascript;
126              
127 16         33 my $callback = $self->callback;
128 16         73 my ($spice_name) = $self->path =~ m{^/js/spice/(.+)/$};
129 16 50       49 $spice_name =~ s|/|_|og if $spice_name;
130              
131 16         48 my $cfg = "location ^~ ".$self->path." {\n";
132              
133 16   66     84 my $timeouts = $self->has_upstream_timeouts && $self->upstream_timeouts;
134 16 100 100     74 if (ref $timeouts eq 'HASH' && keys %$timeouts) {
135 1 50       6 $cfg .= "\tproxy_connect_timeout $timeouts->{connect};\n" if $timeouts->{connect};
136 1 50       5 $cfg .= "\tproxy_send_timeout $timeouts->{send};\n" if $timeouts->{send};
137 1 50       5 $cfg .= "\tproxy_read_timeout $timeouts->{read};\n" if $timeouts->{read};
138             }
139              
140 16 100       42 if ( $self->headers ) {
141 4 100       17 if ( ref $self->headers eq 'HASH' ) {
    100          
142 2         3 for my $header ( sort keys %{$self->headers} ) {
  2         10  
143 3         9 $cfg .= "\tproxy_set_header $header \"" . $self->headers->{$header} . "\";\n";
144             }
145             }
146             elsif ( ref $self->headers eq 'ARRAY' ) {
147 1         3 for my $header ( @{ $self->headers } ) {
  1         3  
148 1         2 $cfg .= "\tproxy_set_header $header;\n";
149             }
150             }
151             else {
152 1         4 $cfg .= "\tproxy_set_header " . $self->headers . ";\n";
153             }
154             }
155              
156 16 100       41 if ( $self->has_post_body ) {
157 1         3 $cfg .= "\tproxy_method POST;\n";
158 1         4 $cfg .= "\tproxy_set_body '" . $self->post_body . "';\n";
159              
160             # This block sets the proxy cache key from the spice name and the combined
161             # set of captured GET parameters. The 'map' builds a hash of these capture
162             # parameters as keys to ensure each one occurs only once. We can then pull these
163             # out consistently by calling 'sort keys' on the returned hash and 'join' turns
164             # the sorted keys into a single string.
165             # e.g. post_body '{"method":"$2","query":"$1","cleaned_query":"$1"}'
166             # Would give a $cache_keys value of '$1$2'
167 1         2 my $cache_keys = join '', sort keys %{ {
168 1         5 map { $_ => 1 } ( $self->post_body =~ m/\$[0-9]+/g )
  2         13  
169             } };
170 1         5 $cfg .= "\tproxy_cache_key spice_${spice_name}_$cache_keys;\n"
171             }
172              
173 16 100       38 if($uses_echo_module) {
174             # we need to make sure we have plain text coming back until we have a way
175             # to unilaterally gunzip responses from the upstream since the echo module
176             # will intersperse plaintext with gzip which results in encoding errors.
177             # https://github.com/agentzh/echo-nginx-module/issues/30
178 1         2 $cfg .= "\tproxy_set_header Accept-Encoding '';\n";
179             }
180              
181 16 100 100     54 if($uses_echo_module || $content_type_javascript) {
182             # This is a workaround that deals with endpoints that don't support callback functions.
183             # So endpoints that don't support callback functions return a content-type of 'application/json'
184             # because what they're returning is not meant to be executed in the first place.
185             # Setting content-type to application/javascript for those endpoints solves blocking due to
186             # mime type mismatches.
187 2         4 $cfg .= "\tmore_set_headers 'Content-Type: application/javascript; charset=utf-8';\n";
188             }
189              
190 16 100       31 $cfg .= "\techo_before_body '$callback(';\n" if $wrap_jsonp_callback;
191 16 50       28 $cfg .= "\techo_before_body '$callback".qq|("';\n| if $wrap_string_callback;
192              
193 16         20 my $upstream;
194 16 50       30 if( $spice_name ) {
195 16         27 $upstream = '$'.$spice_name.'_upstream';
196 16         39 $cfg .= "\tset $upstream $scheme://$host:$port;\n";
197             } else {
198 0         0 warn "Error: Problem finding spice name in ".$self->path; return
199 0         0 }
200              
201 16 100       86 $cfg .= "\trewrite ^".$self->path.($self->has_from ? $self->from : "(.*)")." ".$uri_path." break;\n";
202 16         29 $cfg .= "\tproxy_pass $upstream;\n";
203 16 100       42 $cfg .= "\tproxy_set_header ".$self->proxy_x_forwarded_for.";\n" if $is_duckduckgo;
204 16 100       40 $cfg .= "\tproxy_ssl_server_name on;\n" if $scheme =~ /https/;
205              
206 16 100       46 if($self->has_proxy_cache_valid) {
207             # This tells Nginx how long the response should be kept.
208 2         8 $cfg .= "\tproxy_cache_valid " . $self->proxy_cache_valid . ";\n";
209             # Some response headers from the endpoint can affect `proxy_cache_valid` so we ignore them.
210             # http://nginx.org/en/docs/http/ngx_http_proxy_module.html#proxy_ignore_headers
211 2         4 $cfg .= "\tproxy_ignore_headers X-Accel-Expires Expires Cache-Control Set-Cookie;\n";
212             }
213              
214 16 50       41 $cfg .= "\tproxy_ssl_session_reuse ".$self->proxy_ssl_session_reuse.";\n" if $self->has_proxy_ssl_session_reuse;
215 16 100       29 $cfg .= "\techo_after_body ');';\n" if $wrap_jsonp_callback;
216 16 50       30 $cfg .= "\techo_after_body '\");';\n" if $wrap_string_callback;
217              
218             # proxy_intercept_errors is used to handle endpoints that don't return 200 OK
219             # When we get errors from the endpoint, instead of replying a blank page, it should reply the function instead with no parameters,
220             # e.g., ddg_spice_dictionary_definition();. The benefit of doing that is that we know for sure that the Spice failed, and we can do
221             # something about it (we know that the Spice failed because it should return Spice.failed('...') when the parameters are not valid).
222 16 100       28 if($callback) {
223 4         6 $cfg .= "\tproxy_intercept_errors on;\n";
224 4 50       14 if ($self->error_fallback) {
225 0         0 $cfg .= "\terror_page 301 302 303 403 500 502 503 504 =200 /js/failed/$callback;\n";
226 0         0 $cfg .= "\terror_page 404 =200 \@404_$callback;\n";
227             } else {
228 4         10 $cfg .= "\terror_page 301 302 303 403 404 500 502 503 504 =200 /js/failed/$callback;\n";
229             }
230             }
231              
232 16         23 $cfg .= "\texpires 1s;\n";
233 16         20 $cfg .= "}\n";
234 16 50       40 if ($self->error_fallback) {
235 0         0 my $fallback = $self->error_fallback;
236 0         0 $cfg .= "location \@404_$callback".qq( {\n);
237 0 0       0 $cfg .= "\techo_before_body '$callback(';\n" if $wrap_jsonp_callback;
238 0         0 $cfg .= qq(\techo '{"fallback": "$fallback"}';\n);
239 0 0       0 $cfg .= "\techo_after_body ');';\n" if $wrap_jsonp_callback;
240 0         0 $cfg .= qq( }\n);
241             }
242 16         137 return $cfg;
243             }
244              
245             has _missing_envs => (
246             is => 'rw',
247             predicate => 'has_missing_envs',
248             );
249 7     7 0 2848 sub missing_envs { shift->_missing_envs }
250              
251             has _parsed_to => (
252             is => 'rw',
253             );
254 32     32 0 125 sub parsed_to { shift->_parsed_to }
255              
256             1;
257              
258             __END__