Skip to content

Commit b68a48d

Browse files
authored
Filter canvas etl to single course (#2368)
1 parent b267c66 commit b68a48d

File tree

3 files changed

+44
-17
lines changed

3 files changed

+44
-17
lines changed

learning_resources/management/commands/backpopulate_canvas_courses.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,25 @@ def add_arguments(self, parser):
2020
help="Force regenerate existing summaries/flashcards",
2121
)
2222

23+
parser.add_argument(
24+
"--canvas-ids",
25+
dest="canvas_ids",
26+
required=False,
27+
help="""
28+
If set, backpopulate only the canvas courses with these ids.
29+
The canvas id is the number in the url for the course on canvas
30+
or the numerical part at the start of the readable_id.
31+
Example: https://canvas.mit.edu/courses/1234567 -> 1234567
32+
Example: readable_id 1234567-foobar -> 1234567
33+
""",
34+
)
35+
2336
def handle(self, *args, **options): # noqa: ARG002
2437
"""Populate Canvas courses from S3"""
38+
canvas_ids = options["canvas_ids"].split(",") if options["canvas_ids"] else None
2539

2640
task = sync_canvas_courses.delay(
41+
canvas_course_ids=canvas_ids,
2742
overwrite=options["force_overwrite"],
2843
)
2944
self.stdout.write(f"Started task {task} to get courses from Canvas")

learning_resources/tasks.py

Lines changed: 14 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -481,7 +481,7 @@ def ingest_canvas_course(archive_path, overwrite):
481481

482482

483483
@app.task(acks_late=True)
484-
def sync_canvas_courses(overwrite):
484+
def sync_canvas_courses(canvas_course_ids, overwrite):
485485
"""
486486
Sync all canvas course files
487487
@@ -494,12 +494,15 @@ def sync_canvas_courses(overwrite):
494494
exports = bucket.objects.filter(Prefix=s3_prefix)
495495
log.info("syncing all canvas courses")
496496
latest_archives = {}
497+
497498
for archive in exports:
498499
key = archive.key
499500
course_folder = key.lstrip(settings.CANVAS_COURSE_BUCKET_PREFIX).split("/")[0]
500501
log.info("processing course folder %s", course_folder)
501-
if course_folder not in latest_archives or (
502-
max(archive.last_modified, latest_archives[course_folder].last_modified)
502+
503+
if (not canvas_course_ids or course_folder in canvas_course_ids) and (
504+
(course_folder not in latest_archives)
505+
or max(archive.last_modified, latest_archives[course_folder].last_modified)
503506
== archive.last_modified
504507
):
505508
latest_archives[course_folder] = archive
@@ -513,12 +516,14 @@ def sync_canvas_courses(overwrite):
513516
overwrite=overwrite,
514517
)
515518
canvas_readable_ids.append(resource_readable_id)
516-
stale_courses = LearningResource.objects.filter(
517-
etl_source=ETLSource.canvas.name
518-
).exclude(readable_id__in=canvas_readable_ids)
519-
stale_courses.update(test_mode=False, published=False)
520-
[resource_unpublished_actions(resource) for resource in stale_courses]
521-
stale_courses.delete()
519+
520+
if not canvas_course_ids:
521+
stale_courses = LearningResource.objects.filter(
522+
etl_source=ETLSource.canvas.name
523+
).exclude(readable_id__in=canvas_readable_ids)
524+
stale_courses.update(test_mode=False, published=False)
525+
[resource_unpublished_actions(resource) for resource in stale_courses]
526+
stale_courses.delete()
522527

523528

524529
@app.task(bind=True)

learning_resources/tasks_test.py

Lines changed: 15 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -611,9 +611,8 @@ def test_scrape_marketing_pages(mocker, settings, mocked_celery):
611611
mock_group.assert_called_once()
612612

613613

614-
def test_sync_canvas_courses_removes_stale_resources(
615-
settings, mocker, django_assert_num_queries
616-
):
614+
@pytest.mark.parametrize("canvas_ids", [["1"], None])
615+
def test_sync_canvas_courses(settings, mocker, django_assert_num_queries, canvas_ids):
617616
"""
618617
sync_canvas_courses should unpublish and delete stale canvas LearningResources
619618
"""
@@ -622,10 +621,10 @@ def test_sync_canvas_courses_removes_stale_resources(
622621
mocker.patch("learning_resources.tasks.get_learning_course_bucket")
623622
mock_bucket = mocker.Mock()
624623
mock_archive1 = mocker.Mock()
625-
mock_archive1.key = "canvas/course1/archive1.zip"
624+
mock_archive1.key = "canvas/1/archive1.zip"
626625
mock_archive1.last_modified = now_in_utc()
627626
mock_archive2 = mocker.Mock()
628-
mock_archive2.key = "canvas/course2/archive2.zip"
627+
mock_archive2.key = "canvas/2/archive2.zip"
629628
mock_archive2.last_modified = now_in_utc() - timedelta(days=1)
630629
mock_bucket.objects.filter.return_value = [mock_archive1, mock_archive2]
631630
mocker.patch(
@@ -657,14 +656,22 @@ def test_sync_canvas_courses_removes_stale_resources(
657656
)
658657

659658
# Patch ingest_canvas_course to return the readable_ids for the two non-stale courses
660-
mocker.patch(
659+
mock_ingest_course = mocker.patch(
661660
"learning_resources.tasks.ingest_canvas_course",
662661
side_effect=[("course1", lr1.runs.first()), ("course2", lr2.runs.first())],
663662
)
664-
sync_canvas_courses(overwrite=False)
663+
sync_canvas_courses(canvas_course_ids=canvas_ids, overwrite=False)
665664

666665
# The stale course should be unpublished and deleted
667-
assert not LearningResource.objects.filter(id=lr_stale.id).exists()
666+
if canvas_ids:
667+
assert LearningResource.objects.filter(id=lr_stale.id).exists()
668+
else:
669+
assert not LearningResource.objects.filter(id=lr_stale.id).exists()
668670
# The non-stale courses should still exist
669671
assert LearningResource.objects.filter(id=lr1.id).exists()
670672
assert LearningResource.objects.filter(id=lr2.id).exists()
673+
674+
if canvas_ids:
675+
assert mock_ingest_course.call_count == 1
676+
else:
677+
assert mock_ingest_course.call_count == 2

0 commit comments

Comments
 (0)