Skip to content

Commit 7f7dc4d

Browse files
authored
Import files for all active runs (#2292)
1 parent 6910344 commit 7f7dc4d

File tree

7 files changed

+27
-60
lines changed

7 files changed

+27
-60
lines changed

learning_resources/etl/edx_shared.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
transform_content_files,
1717
)
1818
from learning_resources.models import LearningResourceRun
19-
from learning_resources.utils import content_files_loaded_actions
2019

2120
log = logging.getLogger(__name__)
2221

@@ -147,17 +146,7 @@ def sync_edx_course_files(
147146

148147
if not run:
149148
continue
150-
resource = run.learning_resource
151-
if run != (
152-
resource.next_run
153-
or resource.runs.filter(
154-
Q(published=True) | Q(learning_resource__test_mode=True)
155-
)
156-
.order_by("-start_date")
157-
.first()
158-
):
159-
log.info("Skipping %s, not the next / most recent run", run.run_id)
160-
continue
149+
161150
with TemporaryDirectory() as export_tempdir:
162151
course_tarpath = Path(export_tempdir, key.split("/")[-1])
163152
log.info("course tarpath for run %s is %s", run.run_id, course_tarpath)
@@ -169,8 +158,6 @@ def sync_edx_course_files(
169158
continue
170159
if run.checksum == checksum and not overwrite:
171160
log.info("Checksums match for %s, skipping load", key)
172-
# Ensure any content files for other runs in the course are deindexed
173-
content_files_loaded_actions(run=run, deindex_only=True)
174161
continue
175162
try:
176163
load_content_files(

learning_resources/etl/edx_shared_test.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ def test_sync_edx_course_files( # noqa: PLR0913
9898
sync_edx_course_files(
9999
source, [course.id for course in courses], keys, s3_prefix=s3_prefix
100100
)
101-
assert mock_transform.call_count == (2 if published else 0)
102-
assert mock_load_content_files.call_count == (2 if published else 0)
101+
assert mock_transform.call_count == (4 if published else 0)
102+
assert mock_load_content_files.call_count == (4 if published else 0)
103103
if published:
104104
for course in courses:
105105
mock_load_content_files.assert_any_call(course.next_run, fake_data)
@@ -114,7 +114,7 @@ def test_sync_edx_course_files_matching_checksum(
114114
run = LearningResourceFactory.create(
115115
is_course=True, create_runs=True, etl_source=ETLSource.mitxonline.name
116116
).next_run
117-
other_run = run.learning_resource.runs.exclude(id=run.id).first()
117+
run.learning_resource.runs.exclude(id=run.id).first()
118118
run.checksum = "123"
119119
run.save()
120120
mocker.patch(
@@ -123,9 +123,6 @@ def test_sync_edx_course_files_matching_checksum(
123123
mock_index = mocker.patch(
124124
"learning_resources_search.plugins.tasks.index_run_content_files"
125125
)
126-
mock_deindex = mocker.patch(
127-
"learning_resources_search.plugins.tasks.deindex_run_content_files.si"
128-
)
129126
mock_log = mocker.patch("learning_resources.etl.edx_shared.log.info")
130127
mock_load = mocker.patch("learning_resources.etl.edx_shared.load_content_files")
131128

@@ -142,7 +139,6 @@ def test_sync_edx_course_files_matching_checksum(
142139
)
143140
sync_edx_course_files("mitxonline", [run.learning_resource.id], [key])
144141
mock_log.assert_any_call("Checksums match for %s, skipping load", key)
145-
mock_deindex.assert_called_once_with(other_run.id, unpublished_only=False)
146142
mock_load.assert_not_called()
147143
mock_index.assert_not_called()
148144

learning_resources/etl/loaders.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -801,7 +801,7 @@ def load_content_files(
801801

802802
if calc_completeness:
803803
calculate_completeness(course_run, content_tags=content_tags)
804-
content_files_loaded_actions(run=course_run, deindex_only=False)
804+
content_files_loaded_actions(run=course_run)
805805

806806
return content_files_ids
807807
return None

learning_resources/etl/loaders_test.py

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -967,22 +967,19 @@ def test_load_programs(mocker, mock_blocklist, mock_duplicates):
967967

968968

969969
@pytest.mark.parametrize("is_published", [True, False])
970-
@pytest.mark.parametrize("extra_run", [True, False])
971970
@pytest.mark.parametrize("calc_score", [True, False])
972-
def test_load_content_files(mocker, is_published, extra_run, calc_score):
971+
def test_load_content_files(mocker, is_published, calc_score):
973972
"""Test that load_content_files calls the expected functions"""
974973
course = LearningResourceFactory.create(is_course=True, create_runs=False)
975974
course_run = LearningResourceRunFactory.create(
976975
published=is_published, learning_resource=course
977976
)
978-
if extra_run:
979-
LearningResourceRunFactory.create(
980-
published=is_published,
981-
learning_resource=course,
982-
start_date=now_in_utc() - timedelta(days=365),
983-
)
984-
run_count = 2 if extra_run else 1
985-
assert course.runs.count() == run_count
977+
LearningResourceRunFactory.create(
978+
published=is_published,
979+
learning_resource=course,
980+
start_date=now_in_utc() - timedelta(days=365),
981+
)
982+
assert course.runs.count() == 2
986983

987984
deleted_content_file = ContentFileFactory.create(run=course_run)
988985
returned_content_file_id = deleted_content_file.id + 1
@@ -1011,9 +1008,7 @@ def test_load_content_files(mocker, is_published, extra_run, calc_score):
10111008
load_content_files(course_run, content_data, calc_completeness=calc_score)
10121009
assert mock_load_content_file.call_count == len(content_data)
10131010
assert mock_bulk_index.call_count == (1 if is_published else 0)
1014-
assert mock_bulk_delete.call_count == (
1015-
run_count if not is_published else run_count - 1
1016-
)
1011+
assert mock_bulk_delete.call_count == 0 if is_published else 1
10171012
assert mock_calc_score.call_count == (1 if calc_score else 0)
10181013
deleted_content_file.refresh_from_db()
10191014
assert not deleted_content_file.published
@@ -1045,10 +1040,6 @@ def test_load_test_mode_resource_content_files(
10451040
autospec=True,
10461041
return_value=[],
10471042
)
1048-
1049-
mocker.patch(
1050-
"learning_resources.etl.edx_shared.content_files_loaded_actions",
1051-
)
10521043
mocker.patch(
10531044
"learning_resources_search.plugins.tasks.deindex_run_content_files",
10541045
autospec=True,

learning_resources/hooks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def offeror_delete(self, offeror):
8181
"""Trigger actions to delete a learning resource offeror"""
8282

8383
@hookspec
84-
def content_files_loaded(self, run, deindex_only):
84+
def content_files_loaded(self, run):
8585
"""Trigger actions after content files are loaded for a run"""
8686

8787

learning_resources/utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -322,13 +322,13 @@ def bulk_resources_unpublished_actions(resource_ids: list[int], resource_type: s
322322
)
323323

324324

325-
def content_files_loaded_actions(run: LearningResourceRun, deindex_only):
325+
def content_files_loaded_actions(run: LearningResourceRun):
326326
"""
327327
Trigger plugins when content files are loaded for a LearningResourceRun
328328
"""
329329
pm = get_plugin_manager()
330330
hook = pm.hook
331-
hook.content_files_loaded(run=run, deindex_only=deindex_only)
331+
hook.content_files_loaded(run=run)
332332

333333

334334
def resource_run_unpublished_actions(run: LearningResourceRun):

learning_resources_search/plugins.py

Lines changed: 11 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -187,36 +187,29 @@ def resource_run_delete(self, run):
187187
run.delete()
188188

189189
@hookimpl
190-
def content_files_loaded(self, run, deindex_only):
190+
def content_files_loaded(self, run):
191191
"""
192192
Upsert a created/modified run's content files
193193
194194
Args:
195195
run(LearningResourceRun): The LearningResourceRun that was upserted
196-
deindex_only(bool): Only deindex the other runs' content files
197196
"""
198-
course_runs_to_deindex = run.learning_resource.runs.all()
199197
if run.published:
200-
# Only one run per course should have contentfiles at any given time
201-
course_runs_to_deindex = course_runs_to_deindex.exclude(id=run.id)
202-
if not deindex_only:
203-
index_tasks = []
204-
if django_settings.QDRANT_ENABLE_INDEXING_PLUGIN_HOOKS:
205-
index_tasks.append(
206-
vector_tasks.embed_run_content_files.si(run.id),
207-
)
198+
index_tasks = []
199+
if django_settings.QDRANT_ENABLE_INDEXING_PLUGIN_HOOKS:
208200
index_tasks.append(
209-
tasks.index_run_content_files.si(run.id),
201+
vector_tasks.embed_run_content_files.si(run.id),
210202
)
211-
try_with_retry_as_task(chain(*index_tasks))
212-
for course_run in course_runs_to_deindex:
203+
index_tasks.append(
204+
tasks.index_run_content_files.si(run.id),
205+
)
206+
try_with_retry_as_task(chain(*index_tasks))
207+
else:
213208
deindex_tasks = [
214-
tasks.deindex_run_content_files.si(
215-
course_run.id, unpublished_only=False
216-
),
209+
tasks.deindex_run_content_files.si(run.id, unpublished_only=False),
217210
]
218211
if django_settings.QDRANT_ENABLE_INDEXING_PLUGIN_HOOKS:
219212
deindex_tasks.append(
220-
vector_tasks.remove_run_content_files.si(course_run.id),
213+
vector_tasks.remove_run_content_files.si(run.id),
221214
)
222215
try_with_retry_as_task(chain(*deindex_tasks))

0 commit comments

Comments
 (0)