From ff13f225832296f7b85185884da6bce69cea115d Mon Sep 17 00:00:00 2001 From: Michael Barrett Date: Thu, 19 Sep 2024 18:08:11 +0100 Subject: [PATCH] Updated activities endpoint to better support client-side pagination no refs In order for client-side pagination to work effectively, the activities endpoint has been updated to: - Handle filtering required on the client-side (was previously done client-side) - Return nested replies in returned activities - Expand `inReplyTo` to full object --- src/db.ts | 76 +++++++++++++++++++++--- src/handlers.ts | 154 ++++++++++++++++++++++++++++++++++++++++++++---- 2 files changed, 211 insertions(+), 19 deletions(-) diff --git a/src/db.ts b/src/db.ts index f266feac..4b707005 100644 --- a/src/db.ts +++ b/src/db.ts @@ -18,25 +18,83 @@ await client.schema.createTableIfNotExists('key_value', function (table) { table.datetime('expires').nullable(); }); -// Helper function to get the meta data for a list of activity URIs +// Helper function to get the meta data for an array of activity URIs // from the database. This allows us to fetch information about the activities // without having to fetch the full activity object. This is a bit of a hack to // support sorting / filtering of the activities and should be replaced when we // have a proper db schema -export async function getActivityMeta(uris: string[]): Promise> { + +type ActivityMeta = { + id: number; // Used for sorting + activity_type: string; // Used for filtering by activity type + object_type: string; // Used for filtering by object type + reply_object_url: string; // Used for filtering by isReplyToOwn criteria + reply_object_name: string; // Used for filtering by isReplyToOwn criteria +}; + +type getActivityMetaQueryResult = { + key: string, + left_id: number, + activity_type: string, + object_type: string, + reply_object_url: string, + reply_object_name: string +} + +export async function getActivityMeta(uris: string[]): Promise> { const results = await client - .select('key', 'id', client.raw('JSON_EXTRACT(value, "$.type") as type')) - .from('key_value') - .whereIn('key', uris.map(uri => `["${uri}"]`)); + .select( + 'left.key', + 'left.id as left_id', + // mongo schmongo... + client.raw('JSON_EXTRACT(left.value, "$.type") as activity_type'), + client.raw('JSON_EXTRACT(left.value, "$.object.type") as object_type'), + client.raw('JSON_EXTRACT(right.value, "$.object.url") as reply_object_url'), + client.raw('JSON_EXTRACT(right.value, "$.object.name") as reply_object_name') + ) + .from({ left: 'key_value' }) + // @ts-ignore: This works as expected but the type definitions complain 🤔 + .leftJoin( + { right: 'key_value' }, + client.raw('JSON_UNQUOTE(JSON_EXTRACT(right.value, "$.object.id"))'), + '=', + client.raw('JSON_UNQUOTE(JSON_EXTRACT(left.value, "$.object.inReplyTo"))') + ) + .whereIn('left.key', uris.map(uri => `["${uri}"]`)); - const map = new Map(); + const map = new Map(); - for (const result of results) { + for (const result of results as getActivityMetaQueryResult[]) { map.set(result.key.substring(2, result.key.length - 2), { - id: result.id, - type: result.type, + id: result.left_id, + activity_type: result.activity_type, + object_type: result.object_type, + reply_object_url: result.reply_object_url, + reply_object_name: result.reply_object_name, }); } return map; } + +// Helper function to retrieve a map of replies for an array of activity URIs +// from the database +export async function getRepliesMap (uris: string[]): Promise> { + const map = new Map(); + + const results = await client + .select('value') + .from('key_value') + .where(client.raw('JSON_EXTRACT(value, "$.object.inReplyTo") IS NOT NULL')) + .whereIn('key', uris.map(uri => `["${uri}"]`)); + + for (const {value: result} of results) { + const replies = map.get(result.object.inReplyTo) ?? []; + + replies.push(result); + + map.set(result.object.inReplyTo, replies); + } + + return map; +} diff --git a/src/handlers.ts b/src/handlers.ts index 6ed8cd7c..d02259e9 100644 --- a/src/handlers.ts +++ b/src/handlers.ts @@ -18,7 +18,7 @@ import { Buffer } from 'node:buffer'; import { Context, Next } from 'hono'; import sanitizeHtml from 'sanitize-html'; import { v4 as uuidv4 } from 'uuid'; -import { getActivityMeta } from './db'; +import { getActivityMeta, getRepliesMap } from './db'; import { addToList, removeFromList } from './kv-helpers'; import { toURL } from './toURL'; import { ContextData, HonoContextVariables, fedify } from './app'; @@ -489,6 +489,8 @@ async function buildActivity( db: KvStore, apCtx: APContext, liked: string[] = [], + repliesMap: Map | null = null, + expandInReplyTo: boolean = false, ): Promise { const item = await db.get([uri]); @@ -559,6 +561,38 @@ async function buildActivity( } } + // If a replies map has been provided, the item is not a string, and the + // item has an id, we should nest any replies recursively (which involves + // calling this function again for each reply) + if (repliesMap && typeof item.object !== 'string' && item.object.id) { + item.object.replies = []; + + const replies = repliesMap.get(item.object.id); + + if (replies) { + const builtReplies = []; + + for (const reply of replies) { + const builtReply = await buildActivity(reply.id, db, apCtx, liked, repliesMap); + + if (builtReply) { + builtReplies.push(builtReply); + } + } + + item.object.replies = builtReplies; + } + } + + // Expand the inReplyTo object if it is a string and we are expanding inReplyTo + if (expandInReplyTo && typeof item.object !== 'string' && item.object.inReplyTo) { + const replyObject = await db.get([item.object.inReplyTo]); + + if (replyObject) { + item.object.inReplyTo = replyObject; + } + } + // Return the built item return item; } @@ -622,15 +656,56 @@ export async function getActivities( const globaldb = ctx.get('globaldb'); const apCtx = fedify.createContext(ctx.req.raw as Request, {db, globaldb}); - // Parse cursor and limit from query parameters + // ------------------------------------------------------------------------- + // Process query parameters + // ------------------------------------------------------------------------- + + // Parse "cursor" and "limit" from query parameters + // These are used to paginate the results + // ?cursor= + // ?limit= const queryCursor = ctx.req.query('cursor') const cursor = queryCursor ? Buffer.from(queryCursor, 'base64url').toString('utf-8') : null; const limit = Number.parseInt(ctx.req.query('limit') || DEFAULT_LIMIT.toString(), 10); - // Parse includeOwn from query parameters + // Parse "includeOwn" from query parameters // This is used to include the user's own activities in the results + // ?includeOwn= const includeOwn = ctx.req.query('includeOwn') === 'true'; + // Parse "includeReplies" from query parameters + // This is used to include nested replies in the results + // ?includeReplies= + const includeReplies = ctx.req.query('includeReplies') === 'true'; + + // Parse "filter" from query parameters + // This is used to filter the activities by various criteria + // ?filter={type: ['', ':', '::']} + const queryFilters = ctx.req.query('filter') || '[]'; + const filters = JSON.parse(decodeURI(queryFilters)) + + const typeFilters = (filters.type || []).map((filter: string) => { + const [activityType, objectType = null, criteria = null] = filter.split(':'); + + return { + activity: activityType, + object: objectType, + criteria, + } + }); + + console.log('Request query =', ctx.req.query()); + console.log('Processed query params =', JSON.stringify({ + cursor, + limit, + includeOwn, + typeFilters, + }, null, 2)); + + // ------------------------------------------------------------------------- + // Fetch required data from the database + // ------------------------------------------------------------------------- + // Fetch the liked object refs from the database: // - Data is structured as an array of strings // - Each string is a URI to an object in the database @@ -654,19 +729,64 @@ export async function getActivities( outboxRefs = await db.get(['outbox']) || []; } - // To be able to return a sorted / filtered "feed" of activities, we need to + // To be able to return a sorted / filtered list of activities, we need to // fetch some additional meta data about the referenced activities. Doing this // upfront allows us to sort, filter and paginate the activities before // building them for the response which saves us from having to perform // unnecessary database lookups for referenced activities that will not be - // included in the response. If we can't find the meta data in the database - // for an activity, we skip it as this is unexpected + // included in the response let activityRefs = [...inboxRefs, ...outboxRefs]; const activityMeta = await getActivityMeta(activityRefs); + // If we can't find the meta data in the database for an activity, we skip + // it as this is unexpected activityRefs = activityRefs.filter(ref => activityMeta.has(ref)); - // Sort the activity refs by the id of the activity (newest first) + // ------------------------------------------------------------------------- + // Apply filtering and sorting + // ------------------------------------------------------------------------- + + // Filter the activity refs by any provided type filters + if (typeFilters.length > 0) { + activityRefs = activityRefs.filter(ref => { + const activity = activityMeta.get(ref)!; + + return typeFilters.some((filter: { activity: string; object: string | null, criteria: string | null }) => { + // ?filter={type: ['']} + if (filter.activity && activity.activity_type !== filter.activity) { + return false; + } + + // ?filter={type: [':']} + if (filter.object && activity.object_type !== filter.object) { + return false; + } + + // ?filter={type: ['::isReplyToOwn,']} + if (filter.criteria && filter.criteria.startsWith('isReplyToOwn,')) { + // If the activity does not have a reply object url or name, + // we can't determine if it's a reply to an own object so + // we skip it + if (!activity.reply_object_url || !activity.reply_object_name) { + return false; + } + + // Verify that the reply is to an object created by the user by + // checking that the hostname associated with the reply object + // is the same as the hostname of the site. This is not a bullet + // proof check, but it's a good enough for now (i think 😅) + const [_, siteHost] = filter.criteria.split(','); + const { hostname: replyHost } = new URL(activity.reply_object_url); + + return siteHost === replyHost; + } + + return true; + }); + }); + } + + // Sort the activity refs by the id of the activity (newest first). // We are using the id to sort because currently not all activity types have // a timestamp. The id property is a unique auto incremented number at the // database level @@ -674,6 +794,10 @@ export async function getActivities( return activityMeta.get(b)!.id - activityMeta.get(a)!.id; }); + // ------------------------------------------------------------------------- + // Paginate + // ------------------------------------------------------------------------- + // Find the starting index based on the cursor const startIndex = cursor ? activityRefs.findIndex(ref => ref === cursor) + 1 : 0; @@ -685,12 +809,22 @@ export async function getActivities( ? Buffer.from(paginatedRefs[paginatedRefs.length - 1]).toString('base64url') : null; - // Build the activities for the response + // ------------------------------------------------------------------------- + // Build the activities and return the response + // ------------------------------------------------------------------------- + const activities = []; + // If we need to include replies, fetch the replies map based on the paginated + // activity refs, which will be utilised when building the activities + const repliesMap = includeReplies + ? await getRepliesMap(paginatedRefs) + : null; + + // Build the activities for (const ref of paginatedRefs) { try { - const builtActivity = await buildActivity(ref, globaldb, apCtx, likedRefs); + const builtActivity = await buildActivity(ref, globaldb, apCtx, likedRefs, repliesMap, true); if (builtActivity) { activities.push(builtActivity); @@ -700,7 +834,7 @@ export async function getActivities( } } - // Return the built activities and the next cursor + // Return the response return new Response(JSON.stringify({ items: activities, nextCursor,