Skip to content
This repository was archived by the owner on Jan 24, 2025. It is now read-only.

Commit 32b0c0b

Browse files
authored
Merge pull request #718 from alphagov/fix-queue-clogging
Limit concurrency and retries of sidekiq workers
2 parents 26dafb9 + e0a2aed commit 32b0c0b

10 files changed

+617
-3
lines changed

Gemfile

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ gem 'rest-client', '~> 2.0.2'
2626
gem 'rubyzip', '~> 1.2'
2727
gem 'sass-rails', '~> 5.0'
2828
gem 'sentry-raven'
29+
gem 'sidekiq-limit_fetch'
2930
gem 'sidekiq-scheduler'
3031
gem 'turbolinks', '~> 5'
3132
gem 'uglifier', '>= 1.3.0'

Gemfile.lock

+4-1
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,8 @@ GEM
350350
connection_pool (~> 2.2, >= 2.2.0)
351351
rack-protection (>= 1.5.0)
352352
redis (>= 3.3.5, < 5)
353+
sidekiq-limit_fetch (3.4.0)
354+
sidekiq (>= 4)
353355
sidekiq-logging-json (0.0.18)
354356
sidekiq (>= 3)
355357
sidekiq-scheduler (3.0.0)
@@ -459,6 +461,7 @@ DEPENDENCIES
459461
rubyzip (~> 1.2)
460462
sass-rails (~> 5.0)
461463
sentry-raven
464+
sidekiq-limit_fetch
462465
sidekiq-scheduler
463466
simplecov
464467
spring (~> 2.0)
@@ -473,4 +476,4 @@ RUBY VERSION
473476
ruby 2.5.0p0
474477

475478
BUNDLED WITH
476-
1.16.1
479+
1.16.3

app/workers/ckan/v26/ckan_org_import_worker.rb

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ module CKAN
44
module V26
55
class CKANOrgImportWorker
66
include Sidekiq::Worker
7+
sidekiq_options queue: :import, retry: 3 # Discarded after ~2 minutes
78

89
def perform(organisation_id, *_args)
910
ckan_org = get_organization_from_ckan(organisation_id)

app/workers/ckan/v26/ckan_org_sync_worker.rb

+1
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ module CKAN
22
module V26
33
class CKANOrgSyncWorker
44
include Sidekiq::Worker
5+
sidekiq_options queue: :sync, retry: 13 # Discarded after ~17 hours
56

67
def perform
78
actions = CKANOrgDiffer.new.call

app/workers/ckan/v26/package_import_worker.rb

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ module CKAN
44
module V26
55
class PackageImportWorker
66
include Sidekiq::Worker
7+
sidekiq_options queue: :import, retry: 3 # Discarded after ~2 minutes
78

89
def perform(package_id, *_args)
910
package = get_package_from_ckan(package_id)

app/workers/ckan/v26/package_sync_worker.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ module CKAN
22
module V26
33
class PackageSyncWorker
44
include Sidekiq::Worker
5-
sidekiq_options retry: false
5+
sidekiq_options queue: :sync, retry: false
66

77
def perform
88
actions = PackageDiffer.new.call

config/environments/staging.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,5 @@
22

33
Rails.application.configure do
44
config.assets.compile = true
5-
config.ckan_v26_base_url = "https://test.data.gov.uk"
5+
config.ckan_v26_base_url = "https://staging.data.gov.uk"
66
end

config/production/nginx.conf

+294
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,294 @@
1+
limit_req_zone $limited_bot zone=botzone:10m rate=30r/m;
2+
limit_req_zone $limited_geo zone=one:10m rate=3r/s;
3+
4+
geo $ckan_internal {
5+
ranges;
6+
default 1;
7+
46.43.41.10-46.43.41.30 0;
8+
127.0.0.1-127.0.0.1 0;
9+
}
10+
11+
map $ckan_internal $limited_geo {
12+
1 $binary_remote_addr;
13+
0 "";
14+
}
15+
16+
map $http_user_agent $limited_bot {
17+
"~Baiduspider" 1;
18+
"~OtherSpider" 2;
19+
default "";
20+
}
21+
22+
upstream contractsfinder {
23+
server 34.249.103.20;
24+
}
25+
26+
upstream varnish {
27+
server 127.0.0.1:6081;
28+
}
29+
30+
server {
31+
listen 80;
32+
33+
server_name www.data.gov.uk;
34+
35+
return 301 $scheme://data.gov.uk$request_uri;
36+
}
37+
38+
server {
39+
listen 443 ssl;
40+
41+
server_name www.data.gov.uk;
42+
43+
ssl_certificate /etc/nginx/ssl/server.crt;
44+
ssl_certificate_key /etc/nginx/ssl/server.key;
45+
46+
return 301 $scheme://data.gov.uk$request_uri;
47+
}
48+
49+
server {
50+
listen 80;
51+
52+
server_name data.gov.uk;
53+
server_name co-prod3.dh.bytemark.co.uk;
54+
server_name localhost;
55+
56+
location /csw {
57+
try_files $uri @ckan;
58+
}
59+
60+
location / {
61+
rewrite ^/(.*)$ https://data.gov.uk/$1 permanent;
62+
}
63+
}
64+
65+
server {
66+
listen 443 ssl default_server;
67+
68+
server_name data.gov.uk;
69+
server_name localhost;
70+
71+
ssl_certificate /etc/nginx/ssl/server.crt;
72+
ssl_certificate_key /etc/nginx/ssl/server.key;
73+
ssl_ciphers "EECDH+ECDSA+AESGCM EECDH+aRSA+AESGCM EECDH+ECDSA+SHA384 EECDH+ECDSA+SHA256 EECDH+aRSA+SHA384 EECDH+aRSA+SHA256 EECDH+aRSA+RC4 EECDH EDH+aRSA !RC4 !aNULL !eNULL !LOW !3DES !MD5 !EXP !PSK !SRP !DSS";
74+
ssl_dhparam /etc/ssl/certs/dhparam.pem;
75+
ssl_prefer_server_ciphers on;
76+
ssl_protocols TLSv1 TLSv1.1 TLSv1.2;
77+
ssl_session_timeout 5m;
78+
79+
add_header Strict-Transport-Security "max-age=31536000";
80+
add_header X-Frame-Options SAMEORIGIN;
81+
82+
charset utf-8;
83+
client_max_body_size 25M;
84+
limit_req_status 429;
85+
port_in_redirect off;
86+
87+
gzip on;
88+
gzip_proxied any;
89+
gzip_buffers 16 8k;
90+
gzip_types *;
91+
92+
# Set the resolver so Nginx knows how to lookup domain names.
93+
resolver 8.8.8.8;
94+
95+
# Set the eventual URL as a variable so that Nginx will resolve the name
96+
# each time, rather than just at startup.
97+
set $find_backend "https://find-data-beta.cloudapps.digital";
98+
set $geoserver_backend "http://osinspiremappingprod.ordnancesurvey.co.uk/geoserver/";
99+
100+
if ($http_transfer_encoding ~* chunked) {
101+
return 444;
102+
}
103+
104+
location @404 {
105+
proxy_set_header Host $http_host;
106+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
107+
proxy_set_header X-Real-IP $remote_addr;
108+
109+
proxy_pass $find_backend/404;
110+
}
111+
112+
location @500 {
113+
proxy_set_header Host $http_host;
114+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
115+
proxy_set_header X-Real-IP $remote_addr;
116+
117+
proxy_pass $find_backend/500;
118+
}
119+
120+
location @ckan {
121+
proxy_set_header Host $http_host;
122+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
123+
proxy_set_header X-Real-IP $remote_addr;
124+
125+
proxy_intercept_errors on;
126+
proxy_pass http://varnish;
127+
proxy_redirect off;
128+
129+
error_page 404 = @404;
130+
error_page 500 502 503 504 = @500;
131+
132+
rewrite ^/accessibility-statement/?$ /accessibility permanent;
133+
rewrite ^/contact/?$ /support permanent;
134+
rewrite ^/cookies-policy/?$ /cookies permanent;
135+
rewrite ^/dataset/?$ /search permanent;
136+
rewrite ^/faq/?$ /about permanent;
137+
rewrite ^/technical-details/?$ /about permanent;
138+
rewrite ^/terms-and-conditions/?$ /terms permanent;
139+
140+
rewrite ^/apps(.*)$ /site-changes permanent;
141+
rewrite ^/comments(.*)$ /site-changes permanent;
142+
rewrite ^/dataset/(.*)_slug/issues/(.*)_issue$ /site-changes permanent;
143+
rewrite ^/forum(.*)$ /site-changes permanent;
144+
rewrite ^/node(.*)$ /site-changes permanent;
145+
rewrite ^/reply(.*)$ /site-changes permanent;
146+
rewrite ^/glossary(.*)$ /site-changes permanent;
147+
rewrite ^/search/everything/?(.*)$ /search?q=$1 permanent;
148+
149+
rewrite ^/blog(.*)$ https://data.blog.gov.uk permanent;
150+
rewrite ^/guidance(.*)$ http://guidance.data.gov.uk$1 permanent;
151+
rewrite ^/sib_knowledge_box(.*)$ https://www.gov.uk/guidance/social-impact-bonds permanent;
152+
rewrite ^/social_investment(.*)$ http://webarchive.nationalarchives.gov.uk/https://data.gov.uk/social_investment$1 permanent;
153+
}
154+
155+
location @contracts_archive {
156+
proxy_set_header Host contractsfinder;
157+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
158+
proxy_set_header X-Real-IP $remote_addr;
159+
proxy_set_header X-Script-Name /contracts-archive; # Must be the same as the location
160+
161+
proxy_pass http://contractsfinder;
162+
}
163+
164+
location @find {
165+
proxy_set_header Host $http_host;
166+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
167+
proxy_set_header X-Forwarded-Proto $scheme;
168+
169+
proxy_intercept_errors on;
170+
proxy_pass $find_backend;
171+
proxy_redirect off;
172+
173+
recursive_error_pages on;
174+
175+
error_page 404 = @ckan;
176+
error_page 500 502 503 504 = @ckan;
177+
178+
if ($arg_legacy) {
179+
return 404;
180+
}
181+
}
182+
183+
location / {
184+
deny 78.238.200.47;
185+
186+
limit_req zone=botzone burst=20;
187+
188+
try_files $uri @find;
189+
}
190+
191+
location /api {
192+
193+
location /api/1/util/snippet/api_info.html {
194+
return 404;
195+
}
196+
197+
location /api/action/user_list {
198+
return 404;
199+
}
200+
201+
location /api/action/user_show {
202+
return 404;
203+
}
204+
205+
location /api/util/markdown {
206+
return 404;
207+
}
208+
209+
try_files $uri @ckan;
210+
}
211+
212+
location /assets {
213+
alias /vagrant/src/shared_dguk_assets/assets;
214+
gzip on;
215+
gzip_types *;
216+
}
217+
218+
location /data/preview_proxy {
219+
try_files $uri @ckan;
220+
}
221+
222+
location /contracts-archive {
223+
try_files $uri @contracts_archive;
224+
}
225+
226+
location /data/dumps {
227+
autoindex on;
228+
autoindex_exact_size off;
229+
230+
alias /mnt/shared/ckan_dumps/;
231+
}
232+
233+
location /data/dump_analysis {
234+
autoindex on;
235+
autoindex_exact_size off;
236+
237+
alias /mnt/shared/ckan_dump_analysis/;
238+
}
239+
240+
location /data/reports/mi {
241+
autoindex on;
242+
autoindex_exact_size off;
243+
244+
alias /mnt/shared/mi_reports/;
245+
}
246+
247+
location /data/resource {
248+
autoindex on;
249+
autoindex_exact_size off;
250+
251+
alias /mnt/shared/ckan_resource_store/;
252+
}
253+
254+
location /education-standards {
255+
proxy_set_header Host isb;
256+
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
257+
proxy_set_header X-Real-IP $remote_addr;
258+
259+
proxy_pass https://dfe-app1.codeenigma.net/;
260+
proxy_redirect http://isb/ http://$host/;
261+
}
262+
263+
location ~ "^/find-assets/.+-([0-9a-f]{32}|[0-9a-f]{64})\..+" {
264+
gzip_static on;
265+
expires max;
266+
add_header Cache-Control public;
267+
add_header ETag "";
268+
269+
try_files $uri @find;
270+
}
271+
272+
location /geoserver {
273+
proxy_pass $geoserver_backend;
274+
}
275+
276+
location /nginx_status {
277+
stub_status on; # activate stub_status module
278+
access_log off;
279+
allow 127.0.0.1; # restrict access to local only
280+
deny all;
281+
}
282+
283+
location /services {
284+
try_files $uri @ckan;
285+
}
286+
287+
location /user {
288+
try_files $uri @ckan;
289+
}
290+
291+
location /xmlrpc.php {
292+
return 403;
293+
}
294+
}

config/sidekiq.yml

+6
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
---
22
:concurrency: <%= ENV.fetch('RAILS_MAX_THREADS') { 5 } %>
3+
:queues:
4+
- sync
5+
- import
6+
:limits:
7+
sync: 1
8+
import: <%= ENV.fetch('RAILS_MAX_THREADS') { 5 } - 1 %>
39

410
staging:
511
:schedule:

0 commit comments

Comments
 (0)