Skip to content

Commit 95487b0

Browse files
committed
Start Scraping HCB
1 parent 2d42062 commit 95487b0

16 files changed

+840
-11
lines changed

Gemfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ gem "rack-attack"
103103

104104
gem "active_storage_validations"
105105

106+
gem "nokogiri"
107+
106108
group :development, :test do
107109
# See https://guides.rubyonrails.org/debugging_rails_applications.html#debugging-with-the-debug-gem
108110
gem "debug", platforms: %i[ mri windows ], require: "debug/prelude"

Gemfile.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,7 @@ DEPENDENCIES
590590
lz_string
591591
marksmith
592592
mission_control-jobs
593+
nokogiri
593594
pagy
594595
paper_trail
595596
pg (~> 1.6)

app/jobs/hcb_sync_job.rb

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
class HcbSyncJob < ApplicationJob
2+
queue_as :default
3+
4+
def perform(org_id = nil, fast: false)
5+
org_id ||= HcbScraperService.default_org_id
6+
7+
sync_grants(org_id, fast: fast)
8+
end
9+
10+
private
11+
12+
def sync_grants(org_id, fast:)
13+
now = Time.current
14+
15+
Rails.logger.info "Starting HCB sync for org #{org_id} (fast=#{fast})"
16+
17+
grant_hashes = HcbScraperService.fetch_grants(org_id, fast: fast)
18+
19+
Rails.logger.info "Fetched #{grant_hashes.size} grants from HCB"
20+
21+
existing_grants = HcbGrant.where(
22+
org_id: org_id,
23+
grant_id: grant_hashes.map { |g| g[:grant_id] }
24+
).index_by(&:grant_id)
25+
26+
grants_to_insert = []
27+
grants_to_update = []
28+
grant_transaction_map = {}
29+
30+
grant_hashes.each do |grant_hash|
31+
grant_id = grant_hash[:grant_id]
32+
existing_grant = existing_grants[grant_id]
33+
34+
if existing_grant
35+
attrs = grant_hash.slice(
36+
:status,
37+
:initial_amount_cents,
38+
:balance_cents,
39+
:to_user_name,
40+
:to_user_avatar,
41+
:for_reason,
42+
:issued_at,
43+
:source_url
44+
)
45+
46+
attrs[:balance_cents] ||= existing_grant.balance_cents
47+
attrs[:to_user_avatar] ||= existing_grant.to_user_avatar
48+
attrs[:status] ||= existing_grant.status
49+
attrs[:initial_amount_cents] ||= existing_grant.initial_amount_cents
50+
attrs[:to_user_name] ||= existing_grant.to_user_name
51+
attrs[:for_reason] ||= existing_grant.for_reason
52+
attrs[:issued_at] ||= existing_grant.issued_at
53+
attrs[:source_url] ||= existing_grant.source_url
54+
55+
attrs[:org_id] = grant_hash[:org_id]
56+
attrs[:grant_id] = grant_id
57+
attrs[:last_seen_at] = now
58+
attrs[:last_synced_at] = now
59+
attrs[:soft_deleted_at] = nil
60+
attrs[:id] = existing_grant.id
61+
attrs[:first_seen_at] = existing_grant.first_seen_at
62+
attrs[:created_at] = existing_grant.created_at
63+
attrs[:updated_at] = now
64+
65+
grants_to_update << attrs
66+
grant_transaction_map[existing_grant.id] = grant_hash[:transactions] || []
67+
else
68+
attrs = {
69+
org_id: grant_hash[:org_id],
70+
grant_id: grant_id,
71+
status: grant_hash[:status],
72+
initial_amount_cents: grant_hash[:initial_amount_cents],
73+
balance_cents: grant_hash[:balance_cents],
74+
to_user_name: grant_hash[:to_user_name],
75+
to_user_avatar: grant_hash[:to_user_avatar],
76+
for_reason: grant_hash[:for_reason],
77+
issued_at: grant_hash[:issued_at],
78+
source_url: grant_hash[:source_url],
79+
last_seen_at: now,
80+
last_synced_at: now,
81+
soft_deleted_at: nil,
82+
first_seen_at: now,
83+
created_at: now,
84+
updated_at: now,
85+
sync_failures_count: 0
86+
}
87+
88+
grants_to_insert << attrs
89+
end
90+
rescue StandardError => e
91+
handle_grant_sync_error(grant_hash, e)
92+
end
93+
94+
inserted_grant_ids = []
95+
if grants_to_insert.any?
96+
result = HcbGrant.insert_all(
97+
grants_to_insert,
98+
returning: [ :id, :grant_id ]
99+
)
100+
inserted_grant_ids = result.rows.map { |row| { id: row[0], grant_id: row[1] } }
101+
end
102+
103+
if grants_to_update.any?
104+
HcbGrant.upsert_all(
105+
grants_to_update,
106+
unique_by: [ :org_id, :grant_id ]
107+
)
108+
end
109+
110+
inserted_grant_ids.each do |grant_info|
111+
grant_hash = grant_hashes.find { |g| g[:grant_id] == grant_info[:grant_id] }
112+
grant_transaction_map[grant_info[:id]] = grant_hash[:transactions] || [] if grant_hash
113+
end
114+
115+
sync_all_transactions(grant_transaction_map, org_id, now)
116+
117+
soft_delete_stale_grants(org_id, now)
118+
119+
Rails.logger.info "Completed HCB sync for org #{org_id}"
120+
end
121+
122+
def sync_all_transactions(grant_transaction_map, org_id, now)
123+
return if grant_transaction_map.empty?
124+
125+
grant_ids = grant_transaction_map.keys
126+
all_tx_hashes = grant_transaction_map.values.flatten
127+
128+
existing_transactions = HcbTransaction.where(
129+
hcb_grant_id: grant_ids,
130+
transaction_id: all_tx_hashes.map { |tx| tx[:transaction_id] }.compact.uniq
131+
).index_by { |tx| [ tx.hcb_grant_id, tx.transaction_id ] }
132+
133+
transactions_to_insert = []
134+
transactions_to_update = []
135+
136+
grant_transaction_map.each do |grant_id, tx_hashes|
137+
tx_hashes.each do |tx_hash|
138+
next if tx_hash[:transaction_id].blank?
139+
140+
existing_tx = existing_transactions[[ grant_id, tx_hash[:transaction_id] ]]
141+
142+
if existing_tx
143+
attrs = tx_hash.slice(
144+
:status,
145+
:amount_cents,
146+
:receipt_count,
147+
:memo,
148+
:hcb_created_at
149+
)
150+
151+
attrs[:status] ||= existing_tx.status
152+
attrs[:amount_cents] ||= existing_tx.amount_cents
153+
attrs[:receipt_count] ||= existing_tx.receipt_count
154+
attrs[:memo] ||= existing_tx.memo
155+
attrs[:hcb_created_at] ||= existing_tx.hcb_created_at
156+
157+
attrs[:hcb_grant_id] = grant_id
158+
attrs[:org_id] = org_id
159+
attrs[:transaction_id] = tx_hash[:transaction_id]
160+
attrs[:last_seen_at] = now
161+
attrs[:last_synced_at] = now
162+
attrs[:id] = existing_tx.id
163+
attrs[:first_seen_at] = existing_tx.first_seen_at
164+
attrs[:created_at] = existing_tx.created_at
165+
attrs[:updated_at] = now
166+
167+
transactions_to_update << attrs
168+
else
169+
attrs = {
170+
hcb_grant_id: grant_id,
171+
org_id: org_id,
172+
transaction_id: tx_hash[:transaction_id],
173+
status: tx_hash[:status],
174+
amount_cents: tx_hash[:amount_cents],
175+
receipt_count: tx_hash[:receipt_count],
176+
memo: tx_hash[:memo],
177+
hcb_created_at: tx_hash[:hcb_created_at],
178+
last_seen_at: now,
179+
last_synced_at: now,
180+
first_seen_at: now,
181+
created_at: now,
182+
updated_at: now
183+
}
184+
185+
transactions_to_insert << attrs
186+
end
187+
end
188+
end
189+
190+
HcbTransaction.insert_all(transactions_to_insert) if transactions_to_insert.any?
191+
HcbTransaction.upsert_all(transactions_to_update, unique_by: [ :org_id, :transaction_id ]) if transactions_to_update.any?
192+
end
193+
194+
def handle_grant_sync_error(grant_hash, error)
195+
grant_id = grant_hash[:grant_id]
196+
197+
Rails.logger.error "Failed to sync grant #{grant_id}: #{error.message}"
198+
Sentry.capture_exception(error, extra: { grant_id: grant_id })
199+
200+
grant = HcbGrant.find_by(
201+
org_id: grant_hash[:org_id],
202+
grant_id: grant_id
203+
)
204+
205+
return unless grant
206+
207+
grant.increment!(:sync_failures_count)
208+
grant.update_column(:last_sync_error, "#{error.class}: #{error.message}".truncate(1000))
209+
end
210+
211+
def soft_delete_stale_grants(org_id, now)
212+
stale_threshold = now - 7.days
213+
214+
HcbGrant.where(org_id: org_id)
215+
.where("last_seen_at < ?", stale_threshold)
216+
.where(soft_deleted_at: nil)
217+
.update_all(soft_deleted_at: now)
218+
end
219+
end

app/models/hcb_grant.rb

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
# == Schema Information
2+
#
3+
# Table name: hcb_grants
4+
#
5+
# id :bigint not null, primary key
6+
# balance_cents :integer
7+
# first_seen_at :datetime not null
8+
# for_reason :text
9+
# initial_amount_cents :integer
10+
# issued_at :datetime
11+
# last_seen_at :datetime not null
12+
# last_sync_error :text
13+
# last_synced_at :datetime
14+
# soft_deleted_at :datetime
15+
# source_url :string
16+
# status :string
17+
# sync_failures_count :integer default(0), not null
18+
# to_user_avatar :text
19+
# to_user_name :string
20+
# created_at :datetime not null
21+
# updated_at :datetime not null
22+
# grant_id :string not null
23+
# org_id :string not null
24+
#
25+
# Indexes
26+
#
27+
# index_hcb_grants_on_last_seen_at (last_seen_at)
28+
# index_hcb_grants_on_org_id_and_grant_id (org_id,grant_id) UNIQUE
29+
# index_hcb_grants_on_soft_deleted_at (soft_deleted_at)
30+
#
31+
class HcbGrant < ApplicationRecord
32+
has_many :hcb_transactions, dependent: :destroy
33+
34+
validates :org_id, presence: true
35+
validates :grant_id, presence: true
36+
validates :first_seen_at, presence: true
37+
validates :last_seen_at, presence: true
38+
39+
scope :active, -> { where(soft_deleted_at: nil) }
40+
scope :soft_deleted, -> { where.not(soft_deleted_at: nil) }
41+
end

app/models/hcb_transaction.rb

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# == Schema Information
2+
#
3+
# Table name: hcb_transactions
4+
#
5+
# id :bigint not null, primary key
6+
# amount_cents :integer
7+
# first_seen_at :datetime not null
8+
# hcb_created_at :datetime
9+
# last_seen_at :datetime not null
10+
# last_synced_at :datetime
11+
# memo :text
12+
# receipt_count :integer
13+
# source_url :string
14+
# status :string
15+
# created_at :datetime not null
16+
# updated_at :datetime not null
17+
# hcb_grant_id :bigint not null
18+
# org_id :string not null
19+
# transaction_id :string not null
20+
#
21+
# Indexes
22+
#
23+
# index_hcb_transactions_on_hcb_grant_id (hcb_grant_id)
24+
# index_hcb_transactions_on_last_seen_at (last_seen_at)
25+
# index_hcb_transactions_on_org_id_and_transaction_id (org_id,transaction_id) UNIQUE
26+
#
27+
# Foreign Keys
28+
#
29+
# fk_rails_... (hcb_grant_id => hcb_grants.id)
30+
#
31+
class HcbTransaction < ApplicationRecord
32+
belongs_to :hcb_grant
33+
34+
validates :org_id, presence: true
35+
validates :transaction_id, presence: true
36+
validates :first_seen_at, presence: true
37+
validates :last_seen_at, presence: true
38+
end

0 commit comments

Comments
 (0)