Skip to content

Commit

Permalink
perf: database improvements for Course.list
Browse files Browse the repository at this point in the history
  • Loading branch information
zawan-ila committed Jan 17, 2024
1 parent 5e867ce commit 79f5e90
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 15 deletions.
29 changes: 22 additions & 7 deletions course_discovery/apps/api/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import pytz
import waffle # lint-amnesty, pylint: disable=invalid-django-waffle-import
from django.contrib.auth import get_user_model
from django.db.models import Count
from django.db.models.query import Prefetch
from django.utils.text import slugify
from django.utils.translation import gettext_lazy as _
Expand Down Expand Up @@ -39,11 +40,12 @@
from course_discovery.apps.course_metadata.models import (
FAQ, AbstractLocationRestrictionModel, AdditionalMetadata, AdditionalPromoArea, CertificateInfo, Collaborator,
CorporateEndorsement, Course, CourseEditor, CourseEntitlement, CourseLocationRestriction, CourseReview, CourseRun,
CourseRunType, CourseType, Curriculum, CurriculumCourseMembership, CurriculumProgramMembership, Degree,
DegreeAdditionalMetadata, DegreeCost, DegreeDeadline, Endorsement, Fact, GeoLocation, IconTextPairing, Image,
LevelType, Mode, Organization, Pathway, Person, PersonAreaOfExpertise, PersonSocialNetwork, Position, Prerequisite,
ProductMeta, ProductValue, Program, ProgramLocationRestriction, ProgramSubscription, ProgramSubscriptionPrice,
ProgramType, Ranking, Seat, SeatType, Source, Specialization, Subject, TaxiForm, Topic, Track, Video
CourseRunType, CourseType, CourseUrlSlug, Curriculum, CurriculumCourseMembership, CurriculumProgramMembership,
Degree, DegreeAdditionalMetadata, DegreeCost, DegreeDeadline, Endorsement, Fact, GeoLocation, IconTextPairing,
Image, LevelType, Mode, Organization, Pathway, Person, PersonAreaOfExpertise, PersonSocialNetwork, Position,
Prerequisite, ProductMeta, ProductValue, Program, ProgramLocationRestriction, ProgramSubscription,
ProgramSubscriptionPrice, ProgramType, Ranking, Seat, SeatType, Source, Specialization, Subject, TaxiForm, Topic,
Track, Video
)
from course_discovery.apps.course_metadata.toggles import IS_COURSE_RUN_VARIANT_ID_EDITABLE
from course_discovery.apps.course_metadata.utils import (
Expand Down Expand Up @@ -888,14 +890,20 @@ def prefetch_queryset(cls, queryset=None):
# Explicitly check for None to avoid returning all Programs when the
# queryset passed in happens to be empty.
queryset = queryset if queryset is not None else Program.objects.all()
return queryset.select_related('type').prefetch_related('type__translations')
return queryset.select_related('type', 'partner').prefetch_related(
'type__translations'
).annotate(
Count('courses', distinct=True)
)

class Meta:
model = Program
fields = ('uuid', 'title', 'type', 'type_attrs', 'marketing_slug', 'marketing_url', 'number_of_courses',)
read_only_fields = ('uuid', 'marketing_url', 'number_of_courses', 'type_attrs')

def get_number_of_courses(self, obj):
if hasattr(obj, 'courses__count'):
return obj.courses__count
return obj.courses.count()


Expand Down Expand Up @@ -1330,7 +1338,6 @@ def prefetch_queryset(cls, partner, queryset=None, course_runs=None): # lint-am
queryset = queryset if queryset is not None else Course.objects.filter(partner=partner)

return queryset.select_related(
'level_type',
'video',
'video__image',
'partner',
Expand All @@ -1344,12 +1351,20 @@ def prefetch_queryset(cls, partner, queryset=None, course_runs=None): # lint-am
'in_year_value',
'location_restriction'
).prefetch_related(
# The level_type is here instead of inside select_related to
# prevent the optimizer from choosing a poor query plan
'level_type',
'expected_learning_items',
'level_type__translations',
'prerequisites',
'collaborators',
'topics',
'url_slug_history',
Prefetch(
'url_slug_history',
queryset=CourseUrlSlug.objects.filter(is_active=True),
to_attr='_prefetched_active_slug'
),
'url_redirects',
'editors',
cls.prefetch_course_runs(CourseRunSerializer, course_runs),
Expand Down
8 changes: 4 additions & 4 deletions course_discovery/apps/api/v1/tests/test_views/test_courses.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def test_get_exclude_deleted_programs(self):
""" Verify the endpoint returns no deleted associated programs """
ProgramFactory(courses=[self.course], status=ProgramStatus.Deleted)
url = reverse('api:v1:course-detail', kwargs={'key': self.course.key})
with self.assertNumQueries(26):
with self.assertNumQueries(29):
response = self.client.get(url)
assert response.status_code == 200
assert response.data.get('programs') == []
Expand Down Expand Up @@ -300,7 +300,7 @@ def test_list_query(self):

# Known to be flaky prior to the addition of tearDown()
# and logout() code which is the same number of additional queries
with self.assertNumQueries(40, threshold=3):
with self.assertNumQueries(36, threshold=3):
response = self.client.get(url)
self.assertListEqual(response.data['results'], self.serialize_course(courses, many=True))

Expand All @@ -310,7 +310,7 @@ def test_list_key_filter(self):
keys = ','.join([course.key for course in courses])
url = '{root}?{params}'.format(root=reverse('api:v1:course-list'), params=urlencode({'keys': keys}))

with self.assertNumQueries(39, threshold=3):
with self.assertNumQueries(35, threshold=3):
response = self.client.get(url)
self.assertListEqual(response.data['results'], self.serialize_course(courses, many=True))

Expand Down Expand Up @@ -2336,7 +2336,7 @@ def test_options(self):
CourseEntitlementFactory(course=self.course, mode=SeatTypeFactory.verified())

url = reverse('api:v1:course-detail', kwargs={'key': self.course.uuid})
with self.assertNumQueries(44, threshold=0):
with self.assertNumQueries(46, threshold=0):
response = self.client.options(url)
assert response.status_code == 200

Expand Down
5 changes: 2 additions & 3 deletions course_discovery/apps/api/v1/views/courses.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from django.core.exceptions import ValidationError
from django.db import models, transaction
from django.db.models import Q
from django.db.models.functions import Lower
from django.http.response import Http404
from django.shortcuts import get_object_or_404
from django.utils.translation import gettext as _
Expand Down Expand Up @@ -156,9 +155,9 @@ def get_queryset(self):
programs=programs,
)
if pub_q and edit_mode:
return queryset.filter(Q(key__icontains=pub_q) | Q(title__icontains=pub_q)).order_by(Lower('key'))
return queryset.filter(Q(key__icontains=pub_q) | Q(title__icontains=pub_q)).order_by('key')

return queryset.order_by(Lower('key'))
return queryset.order_by('key')

def get_serializer_context(self):
context = super().get_serializer_context()
Expand Down
8 changes: 7 additions & 1 deletion course_discovery/apps/course_metadata/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1595,7 +1595,11 @@ def active_url_slug(self):
if cached_active_url.is_found:
return cached_active_url.value

active_url = CourseUrlSlug.objects.filter(course=self, is_active=True).first()
# The if clause is simply to prevent N+1 issues
if hasattr(self, '_prefetched_active_slug') and self._prefetched_active_slug:
active_url = self._prefetched_active_slug[0]
else:
active_url = CourseUrlSlug.objects.filter(course=self, is_active=True).first()

if not active_url and self.draft and self.official_version:
# current draft url slug has already been published at least once, so get it from the official course
Expand Down Expand Up @@ -1786,6 +1790,8 @@ def set_active_url_slug(self, slug):

# There are too many branches in this method, and it is ok to clear cache than to serve stale data.
clear_slug_request_cache_for_course(self.uuid)
if hasattr(self, '_prefetched_active_slug'):
del self._prefetched_active_slug

if slug:
# case 0: if slug is already in use with another, raise an IntegrityError
Expand Down

0 comments on commit 79f5e90

Please sign in to comment.