peformance: Cache user mentions for multiple PMs.

showell · timabbott · commit c4bd4496dd7d · 2021-12-30T11:28:15.000-08:00
It's slightly annoying to plumb Optional[MentionBackend]
down the stack, but it's a one-time change.

I tried to make the cache code relatively unobtrusive
for the single-message use case.

We should be able to eliminate redundant stream queries
using similar techniques.

I considered caching at the level of rendering the message
itself, but this involves nearly as much plumbing, and
you have to account for the fact that several users on
your realm may have distinct default languages (French,
Spanish, Russian, etc.), so you would not eliminate as
many query hops. Also, if multiple streams were involved,
users would get slightly different messages based on
their prior subscriptions.
diff --git a/zerver/lib/actions.py b/zerver/lib/actions.py
@@ -1855,6 +1855,7 @@ def build_message_send_dict(
     realm: Optional[Realm] = None,
     widget_content_dict: Optional[Dict[str, Any]] = None,
     email_gateway: bool = False,
+    mention_backend: Optional[MentionBackend] = None,
 ) -> SendMessageRequest:
     """Returns a dictionary that can be passed into do_send_messages.  In
     production, this is always called by check_message, but some
@@ -1863,7 +1864,9 @@ def build_message_send_dict(
     if realm is None:
         realm = message.sender.realm
 
-    mention_backend = MentionBackend(realm.id)
+    if mention_backend is None:
+        mention_backend = MentionBackend(realm.id)
+
     mention_data = MentionData(
         mention_backend=mention_backend,
         content=message.content,
@@ -3303,6 +3306,7 @@ def check_message(
     email_gateway: bool = False,
     *,
     skip_stream_access_check: bool = False,
+    mention_backend: Optional[MentionBackend] = None,
 ) -> SendMessageRequest:
     """See
     https://zulip.readthedocs.io/en/latest/subsystems/sending-messages.html
@@ -3428,6 +3432,7 @@ def check_message(
         realm=realm,
         widget_content_dict=widget_content_dict,
         email_gateway=email_gateway,
+        mention_backend=mention_backend,
     )
 
     if stream is not None and message_send_dict.rendering_result.mentions_wildcard:
@@ -3444,6 +3449,7 @@ def _internal_prep_message(
     addressee: Addressee,
     content: str,
     email_gateway: bool = False,
+    mention_backend: Optional[MentionBackend] = None,
 ) -> Optional[SendMessageRequest]:
     """
     Create a message object and checks it, but doesn't send it or save it to the database.
@@ -3473,6 +3479,7 @@ def _internal_prep_message(
             content,
             realm=realm,
             email_gateway=email_gateway,
+            mention_backend=mention_backend,
         )
     except JsonableError as e:
         logging.exception(
@@ -3528,7 +3535,11 @@ def internal_prep_stream_message_by_name(
 
 
 def internal_prep_private_message(
-    realm: Realm, sender: UserProfile, recipient_user: UserProfile, content: str
+    realm: Realm,
+    sender: UserProfile,
+    recipient_user: UserProfile,
+    content: str,
+    mention_backend: Optional[MentionBackend] = None,
 ) -> Optional[SendMessageRequest]:
     """
     See _internal_prep_message for details of how this works.
@@ -3540,6 +3551,7 @@ def internal_prep_private_message(
         sender=sender,
         addressee=addressee,
         content=content,
+        mention_backend=mention_backend,
     )
 
 
diff --git a/zerver/lib/mention.py b/zerver/lib/mention.py
@@ -37,27 +37,61 @@ def Q(self) -> Q:
             raise AssertionError("totally empty filter makes no sense")
 
 
-@dataclass
 class MentionBackend:
-    realm_id: int
+    def __init__(self, realm_id: int) -> None:
+        self.realm_id = realm_id
+        self.user_cache: Dict[Tuple[int, str], FullNameInfo] = {}
 
     def get_full_name_info_list(self, user_filters: List[UserFilter]) -> List[FullNameInfo]:
-        q_list = [user_filter.Q() for user_filter in user_filters]
-
-        rows = (
-            UserProfile.objects.filter(
-                realm_id=self.realm_id,
-                is_active=True,
-            )
-            .filter(
-                functools.reduce(lambda a, b: a | b, q_list),
+        result: List[FullNameInfo] = []
+        unseen_user_filters: List[UserFilter] = []
+
+        # Try to get messages from the user_cache first.
+        # This loop populates two lists:
+        #  - results are the objects we pull from cache
+        #  - unseen_user_filters are filters where need to hit the DB
+        for user_filter in user_filters:
+            # We expect callers who take advantage of our user_cache to supply both
+            # id and full_name in the user mentions in their messages.
+            if user_filter.id is not None and user_filter.full_name is not None:
+                user = self.user_cache.get((user_filter.id, user_filter.full_name), None)
+                if user is not None:
+                    result.append(user)
+                    continue
+
+            # BOO! We have to go the database.
+            unseen_user_filters.append(user_filter)
+
+        # Most of the time, we have to go to the database to get user info,
+        # unless our last loop found everything in the cache.
+        if unseen_user_filters:
+            q_list = [user_filter.Q() for user_filter in unseen_user_filters]
+
+            rows = (
+                UserProfile.objects.filter(
+                    realm_id=self.realm_id,
+                    is_active=True,
+                )
+                .filter(
+                    functools.reduce(lambda a, b: a | b, q_list),
+                )
+                .only(
+                    "id",
+                    "full_name",
+                )
             )
-            .only(
-                "id",
-                "full_name",
-            )
-        )
-        return [FullNameInfo(id=row.id, full_name=row.full_name) for row in rows]
+
+            user_list = [FullNameInfo(id=row.id, full_name=row.full_name) for row in rows]
+
+            # We expect callers who take advantage of our cache to supply both
+            # id and full_name in the user mentions in their messages.
+            for user in user_list:
+                if user.id is not None and user.full_name is not None:
+                    self.user_cache[(user.id, user.full_name)] = user
+
+            result += user_list
+
+        return result
 
 
 def user_mention_matches_wildcard(mention: str) -> bool:
diff --git a/zerver/tests/test_subs.py b/zerver/tests/test_subs.py
@@ -4971,7 +4971,7 @@ def test_gather_subscriptions(self) -> None:
                 streams,
                 dict(principals=orjson.dumps(users_to_subscribe).decode()),
             )
-        self.assert_length(queries, 50)
+        self.assert_length(queries, 48)
 
         msg = f"""
             @**King Hamlet|{hamlet.id}** subscribed you to the following streams:
diff --git a/zerver/views/streams.py b/zerver/views/streams.py
@@ -50,7 +50,7 @@
     OrganizationOwnerRequired,
     ResourceNotFoundError,
 )
-from zerver.lib.mention import silent_mention_syntax_for_user
+from zerver.lib.mention import MentionBackend, silent_mention_syntax_for_user
 from zerver.lib.request import REQ, has_request_variables
 from zerver.lib.response import json_success
 from zerver.lib.retention import parse_message_retention_days
@@ -603,6 +603,9 @@ def send_messages_for_new_subscribers(
 
     newly_created_stream_names = {s.name for s in created_streams}
 
+    realm = user_profile.realm
+    mention_backend = MentionBackend(realm.id)
+
     # Inform the user if someone else subscribed them to stuff,
     # or if a new stream was created with the "announce" option.
     notifications = []
@@ -633,10 +636,11 @@ def send_messages_for_new_subscribers(
 
             notifications.append(
                 internal_prep_private_message(
-                    realm=user_profile.realm,
+                    realm=realm,
                     sender=sender,
                     recipient_user=recipient_user,
                     content=msg,
+                    mention_backend=mention_backend,
                 )
             )
 

Original file line number	Diff line number	Diff line change
`@@ -4971,7 +4971,7 @@ def test_gather_subscriptions(self) -> None:`
`4971`	`4971`	`streams,`
`4972`	`4972`	`dict(principals=orjson.dumps(users_to_subscribe).decode()),`
`4973`	`4973`	`)`
`4974`		`- self.assert_length(queries, 50)`
	`4974`	`+ self.assert_length(queries, 48)`
`4975`	`4975`
`4976`	`4976`	`msg = f"""`
`4977`	`4977`	`@King Hamlet\|{hamlet.id} subscribed you to the following streams:`