Skip to content
This repository was archived by the owner on Jan 24, 2025. It is now read-only.

Commit 3d65fe0

Browse files
authored
Use aliases to enable zero down time when re-indexing (#433)
* improve error messages on log * different datafile payloads of update and create * Use aliases to enable zero down time when re-indexing Refactoring Namespace legacy syncing services * Split Reindex Service into separate concerns. Add tests Update logging Add test for IndexDeleter * Fix code climate issues and disable a couple of checks: - Missing top level class documentation comment - Missing frozen string literal comment * Improve error handling and readability for AliasUpdater and IndexDeleter * Use 'drop' method instead of 'slice'
1 parent 79d16db commit 3d65fe0

14 files changed

+346
-95
lines changed

Diff for: .codeclimate.yml

+5
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
engines:
22
rubocop:
33
enabled: true
4+
checks:
5+
Rubocop/Style/Documentation:
6+
enabled: false
7+
Rubocop/Style/FrozenStringLiteralComment:
8+
enabled: false
49
eslint:
510
enabled: true
611
csslint:

Diff for: app/models/dataset.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -158,7 +158,7 @@ def timeseries?
158158
private
159159

160160
def sync_with_legacy
161-
LegacySyncService.new(self).sync
161+
Legacy::LegacyToBetaSyncService.new(self).sync
162162
end
163163

164164
def send_to_search_index

Diff for: app/models/legacy/datafile.rb

+22-19
Original file line numberDiff line numberDiff line change
@@ -1,37 +1,40 @@
11
class Legacy::Datafile < SimpleDelegator
22

33
def update_payload
4-
{ "id" => uuid,
5-
"description" => name,
6-
"format" => format,
7-
"date" => build_date,
8-
"resource_type" => build_datafile_type,
9-
"url" => url,
10-
"created" => created_at
4+
{
5+
'id' => uuid,
6+
'description' => name,
7+
'format' => format,
8+
'date' => build_date,
9+
'resource_type' => build_datafile_type,
10+
'url' => url,
11+
'created' => created_at
1112
}.compact.to_json
1213
end
1314

1415
def create_payload
15-
{ "package_id" => dataset.ckan_uuid,
16-
"url" => url,
17-
"description" => name,
18-
"format" => format,
19-
"name" => name,
20-
"resource_type" => build_datafile_type,
21-
"size" => size,
22-
"created" => created_at
16+
{
17+
'package_id' => dataset.ckan_uuid,
18+
'url' => url,
19+
'description' => name,
20+
'format' => format,
21+
'name' => name,
22+
'resource_type' => build_datafile_type,
23+
'size' => size,
24+
'created' => created_at
25+
2326
}.compact.to_json
2427
end
2528

2629
private
2730

2831
def build_date
29-
return "" unless dataset.timeseries?
30-
end_date.presence.strftime("%d/%m/%Y")
32+
return '' unless dataset.timeseries?
33+
end_date.presence.strftime('%d/%m/%Y')
3134
end
3235

3336
def build_datafile_type
34-
return "" if type.blank?
35-
type == "Doc" ? "documentation" : "file"
37+
return '' if type.blank?
38+
type == 'Doc' ? 'documentation' : 'file'
3639
end
3740
end

Diff for: app/services/alias_updater_service.rb

+52
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
class AliasUpdaterService
2+
def initialize(args)
3+
@new_index_name = args[:new_index_name]
4+
@index_alias = args[:index_alias]
5+
@client = args[:client]
6+
@logger = args[:logger]
7+
end
8+
9+
def run
10+
remove_alias_from_old_index
11+
assign_alias_to_new_index
12+
logger.info "Alias '#{index_alias}' now pointing to '#{new_index_name}'"
13+
end
14+
15+
private
16+
17+
attr_reader :logger, :client, :index_alias, :new_index_name
18+
19+
def remove_alias_from_old_index
20+
client.indices.update_aliases body: {
21+
actions: [
22+
{
23+
remove: {
24+
index: Dataset.index_name,
25+
alias: index_alias
26+
}
27+
}
28+
]
29+
}
30+
rescue => e
31+
msg = "Could not remove alias.\n #{e.message}"
32+
logger.error msg
33+
Raven.capture_error msg
34+
end
35+
36+
def assign_alias_to_new_index
37+
client.indices.update_aliases body: {
38+
actions: [
39+
{
40+
add: {
41+
index: new_index_name,
42+
alias: index_alias
43+
}
44+
}
45+
]
46+
}
47+
rescue => e
48+
msg = "Could not update alias.\n #{e.message}"
49+
logger.error msg
50+
Raven.capture_error msg
51+
end
52+
end

Diff for: app/services/datasets_indexer_service.rb

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
class DatasetsIndexerService
2+
def initialize(args)
3+
@batch_size = args[:batch_size]
4+
@date = args[:date]
5+
@new_index_name = args[:new_index_name]
6+
@client = args[:client]
7+
@logger = args[:logger]
8+
end
9+
10+
def run
11+
number_datasets_processed = 0
12+
13+
create_new_index
14+
15+
Dataset.published.find_in_batches(batch_size: batch_size) do |datasets|
16+
logger.info "Batching #{datasets.length} datasets"
17+
bulk_index(datasets)
18+
number_datasets_processed += batch_size
19+
end
20+
21+
logger.info "Datasets indexed to #{new_index_name}"
22+
end
23+
24+
private
25+
26+
attr_reader :date, :new_index_name, :batch_size, :client, :logger
27+
28+
def create_new_index
29+
client.indices.create(
30+
index: new_index_name,
31+
body: { mappings: index_mapping }
32+
)
33+
end
34+
35+
def bulk_index(datasets)
36+
client.bulk(
37+
index: new_index_name,
38+
type: ::Dataset.__elasticsearch__.document_type,
39+
body: prepare_records(datasets)
40+
)
41+
rescue => e
42+
msg = "There was an error indexing datasets:\n#{e.message}"
43+
logger.error msg
44+
Raven.capture msg
45+
end
46+
47+
def index_mapping
48+
{
49+
dataset: {
50+
properties: {
51+
name: {
52+
type: 'string',
53+
index: 'not_analyzed'
54+
},
55+
uuid: {
56+
type: 'string',
57+
index: 'not_analyzed'
58+
},
59+
location1: {
60+
type: 'string',
61+
fields: {
62+
raw: {
63+
type: 'string',
64+
index: 'not_analyzed'
65+
}
66+
}
67+
},
68+
organisation: {
69+
type: 'nested',
70+
properties: {
71+
title: {
72+
type: 'string',
73+
fields: {
74+
raw: {
75+
type: 'string',
76+
index: 'not_analyzed'
77+
}
78+
}
79+
}
80+
}
81+
}
82+
}
83+
}
84+
}
85+
end
86+
87+
def prepare_records(datasets)
88+
datasets.map do |dataset|
89+
{ index: { _id: dataset.id, data: dataset.as_indexed_json } }
90+
end
91+
end
92+
end

Diff for: app/services/index_deletion_service.rb

+36
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
class IndexDeletionService
2+
def initialize(args)
3+
@index_alias = args[:index_alias]
4+
@client = args[:client]
5+
@logger = args[:logger]
6+
end
7+
8+
def run
9+
indexes = client.indices.get_aliases.keys
10+
indexes_to_be_deleted = select_indexes_for_deletion(indexes)
11+
delete(indexes_to_be_deleted)
12+
rescue => e
13+
msg = "Failed to delete old indexes.\n#{e.message}"
14+
logger.error msg
15+
Raven.capture_error msg
16+
end
17+
18+
private
19+
20+
attr_reader :client, :index_alias, :logger
21+
22+
def select_indexes_for_deletion(indexes)
23+
# Ensure that the three most recent indexes are not deleted
24+
indexes
25+
.select { |index_name| index_name.include? index_alias }
26+
.sort_by { |index_name| Time.parse(index_name.gsub(/"#{index_alias}_"/, '')) }
27+
.drop(3)
28+
end
29+
30+
def delete(indexes)
31+
indexes.each do |index|
32+
client.indices.delete index: index
33+
logger.info "Deleted #{index}"
34+
end
35+
end
36+
end

Diff for: app/services/beta_sync_service.rb renamed to app/services/legacy/beta_to_legacy_sync_service.rb

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
require 'util/metadata_tools'
22

3-
class BetaSyncService
3+
class Legacy::BetaToLegacySyncService
44
ENDPOINTS = {
55
modified_datasets: 'api/3/action/package_search?q=metadata_modified:[NOW-1DAY%20TO%20NOW]&rows=5000'.freeze,
66
new_datasets: 'api/3/action/package_search?q=metadata_created:[NOW-1DAY%20TO%20NOW]&rows=5000'.freeze
@@ -36,10 +36,10 @@ def new_datasets
3636

3737
def import(dataset)
3838
begin
39-
@logger.info "Attempting to save legacy dataset to postgres and elasticsearch - legacy_id: #{dataset["id"]}"
39+
@logger.info "Attempting to save legacy dataset to postgres and elasticsearch - legacy_id: #{dataset['id']}"
4040
MetadataTools.persist(dataset, @orgs_cache, @theme_cache)
4141
MetadataTools.index(dataset)
42-
@logger.info "Legacy dataset saved - legacy_id: #{dataset["id"]}"
42+
@logger.info "Legacy dataset saved - legacy_id: #{dataset['id']}"
4343
rescue => e
4444
Raven.capture_exception e.message
4545
end

Diff for: app/services/legacy_sync_service.rb renamed to app/services/legacy/legacy_to_beta_sync_service.rb

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
class LegacySyncService
1+
class Legacy::LegacyToBetaSyncService
22
attr_reader :dataset
33

44
def initialize(dataset)

Diff for: app/services/reindex_service.rb

+24
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
class ReindexService
2+
attr_reader :logger, :index_alias, :indexer, :alias_updater, :index_deleter
3+
4+
def initialize(args)
5+
@indexer = args[:indexer]
6+
@alias_updater = args[:alias_updater]
7+
@index_deleter = args[:index_deleter]
8+
@logger = args[:logger]
9+
end
10+
11+
def run
12+
logger.info "Indexing #{published_datasets_count} datasets"
13+
indexer.run
14+
alias_updater.run
15+
index_deleter.run
16+
logger.info 'Reindexing complete!'
17+
end
18+
19+
private
20+
21+
def published_datasets_count
22+
Dataset.published.count
23+
end
24+
end

0 commit comments

Comments
 (0)