Skip to content

Commit 52c9c0d

Browse files
author
Douwe Maan
committed
Merge branch 'fix-stuck-import-jobs-query-performance-issue' into 'master'
StuckImportJobsWorker query performance optimization See merge request gitlab-org/gitlab-ce!22879
2 parents bef19a2 + 46b2884 commit 52c9c0d

File tree

4 files changed

+51
-35
lines changed

4 files changed

+51
-35
lines changed

app/models/project_import_state.rb

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,4 +56,17 @@ class ProjectImportState < ActiveRecord::Base
5656
end
5757
end
5858
end
59+
60+
def mark_as_failed(error_message)
61+
original_errors = errors.dup
62+
sanitized_message = Gitlab::UrlSanitizer.sanitize(error_message)
63+
64+
fail_op
65+
66+
update_column(:last_error, sanitized_message)
67+
rescue ActiveRecord::ActiveRecordError => e
68+
Rails.logger.error("Error setting import status to failed: #{e.message}. Original error: #{sanitized_message}")
69+
ensure
70+
@errors = original_errors
71+
end
5972
end

app/workers/stuck_import_jobs_worker.rb

Lines changed: 22 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -7,60 +7,58 @@ class StuckImportJobsWorker
77
IMPORT_JOBS_EXPIRATION = 15.hours.to_i
88

99
def perform
10-
projects_without_jid_count = mark_projects_without_jid_as_failed!
11-
projects_with_jid_count = mark_projects_with_jid_as_failed!
10+
import_state_without_jid_count = mark_import_states_without_jid_as_failed!
11+
import_state_with_jid_count = mark_import_states_with_jid_as_failed!
1212

1313
Gitlab::Metrics.add_event(:stuck_import_jobs,
14-
projects_without_jid_count: projects_without_jid_count,
15-
projects_with_jid_count: projects_with_jid_count)
14+
projects_without_jid_count: import_state_without_jid_count,
15+
projects_with_jid_count: import_state_with_jid_count)
1616
end
1717

1818
private
1919

20-
def mark_projects_without_jid_as_failed!
21-
enqueued_projects_without_jid.each do |project|
22-
project.mark_import_as_failed(error_message)
20+
def mark_import_states_without_jid_as_failed!
21+
enqueued_import_states_without_jid.each do |import_state|
22+
import_state.mark_as_failed(error_message)
2323
end.count
2424
end
2525

2626
# rubocop: disable CodeReuse/ActiveRecord
27-
def mark_projects_with_jid_as_failed!
28-
# TODO: Rollback this change to use SQL through #pluck
29-
jids_and_ids = enqueued_projects_with_jid.map { |project| [project.import_jid, project.id] }.to_h
27+
def mark_import_states_with_jid_as_failed!
28+
jids_and_ids = enqueued_import_states_with_jid.pluck(:jid, :id).to_h
3029

3130
# Find the jobs that aren't currently running or that exceeded the threshold.
3231
completed_jids = Gitlab::SidekiqStatus.completed_jids(jids_and_ids.keys)
3332
return unless completed_jids.any?
3433

35-
completed_project_ids = jids_and_ids.values_at(*completed_jids)
34+
completed_import_state_ids = jids_and_ids.values_at(*completed_jids)
3635

37-
# We select the projects again, because they may have transitioned from
36+
# We select the import states again, because they may have transitioned from
3837
# scheduled/started to finished/failed while we were looking up their Sidekiq status.
39-
completed_projects = enqueued_projects_with_jid.where(id: completed_project_ids)
38+
completed_import_states = enqueued_import_states_with_jid.where(id: completed_import_state_ids)
4039

41-
Rails.logger.info("Marked stuck import jobs as failed. JIDs: #{completed_projects.map(&:import_jid).join(', ')}")
40+
completed_import_state_jids = completed_import_states.map { |import_state| import_state.jid }.join(', ')
41+
Rails.logger.info("Marked stuck import jobs as failed. JIDs: #{completed_import_state_jids}")
4242

43-
completed_projects.each do |project|
44-
project.mark_import_as_failed(error_message)
43+
completed_import_states.each do |import_state|
44+
import_state.mark_as_failed(error_message)
4545
end.count
4646
end
4747
# rubocop: enable CodeReuse/ActiveRecord
4848

49-
# rubocop: disable CodeReuse/ActiveRecord
50-
def enqueued_projects
51-
Project.joins_import_state.where("(import_state.status = 'scheduled' OR import_state.status = 'started') OR (projects.import_status = 'scheduled' OR projects.import_status = 'started')")
49+
def enqueued_import_states
50+
ProjectImportState.with_status([:scheduled, :started])
5251
end
53-
# rubocop: enable CodeReuse/ActiveRecord
5452

5553
# rubocop: disable CodeReuse/ActiveRecord
56-
def enqueued_projects_with_jid
57-
enqueued_projects.where.not("import_state.jid IS NULL AND projects.import_jid IS NULL")
54+
def enqueued_import_states_with_jid
55+
enqueued_import_states.where.not(jid: nil)
5856
end
5957
# rubocop: enable CodeReuse/ActiveRecord
6058

6159
# rubocop: disable CodeReuse/ActiveRecord
62-
def enqueued_projects_without_jid
63-
enqueued_projects.where("import_state.jid IS NULL AND projects.import_jid IS NULL")
60+
def enqueued_import_states_without_jid
61+
enqueued_import_states.where(jid: nil)
6462
end
6563
# rubocop: enable CodeReuse/ActiveRecord
6664

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
title: Improves performance of stuck import jobs detection
3+
merge_request: 22879
4+
author:
5+
type: performance

spec/workers/stuck_import_jobs_worker_spec.rb

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,29 +8,29 @@
88
context 'when the import status was already updated' do
99
before do
1010
allow(Gitlab::SidekiqStatus).to receive(:completed_jids) do
11-
project.import_start
12-
project.import_finish
11+
import_state.start
12+
import_state.finish
1313

14-
[project.import_jid]
14+
[import_state.jid]
1515
end
1616
end
1717

1818
it 'does not mark the project as failed' do
1919
worker.perform
2020

21-
expect(project.reload.import_status).to eq('finished')
21+
expect(import_state.reload.status).to eq('finished')
2222
end
2323
end
2424

2525
context 'when the import status was not updated' do
2626
before do
27-
allow(Gitlab::SidekiqStatus).to receive(:completed_jids).and_return([project.import_jid])
27+
allow(Gitlab::SidekiqStatus).to receive(:completed_jids).and_return([import_state.jid])
2828
end
2929

3030
it 'marks the project as failed' do
3131
worker.perform
3232

33-
expect(project.reload.import_status).to eq('failed')
33+
expect(import_state.reload.status).to eq('failed')
3434
end
3535
end
3636
end
@@ -41,27 +41,27 @@
4141
end
4242

4343
it 'does not mark the project as failed' do
44-
expect { worker.perform }.not_to change { project.reload.import_status }
44+
expect { worker.perform }.not_to change { import_state.reload.status }
4545
end
4646
end
4747
end
4848

4949
describe 'with scheduled import_status' do
5050
it_behaves_like 'project import job detection' do
51-
let(:project) { create(:project, :import_scheduled) }
51+
let(:import_state) { create(:project, :import_scheduled).import_state }
5252

5353
before do
54-
project.import_state.update(jid: '123')
54+
import_state.update(jid: '123')
5555
end
5656
end
5757
end
5858

5959
describe 'with started import_status' do
6060
it_behaves_like 'project import job detection' do
61-
let(:project) { create(:project, :import_started) }
61+
let(:import_state) { create(:project, :import_started).import_state }
6262

6363
before do
64-
project.import_state.update(jid: '123')
64+
import_state.update(jid: '123')
6565
end
6666
end
6767
end

0 commit comments

Comments
 (0)