From 5ae89a69bca2585c84868a32dd75fdcb8432ed8a Mon Sep 17 00:00:00 2001 From: PrivateGER Date: Thu, 25 May 2023 23:49:52 +0200 Subject: [PATCH] Add high performance batch imports --- packages/backend/src/db/meilisearch.ts | 66 +++++++++++-------- .../processors/background/index-all-notes.ts | 12 +++- packages/backend/src/services/note/create.ts | 4 +- 3 files changed, 50 insertions(+), 32 deletions(-) diff --git a/packages/backend/src/db/meilisearch.ts b/packages/backend/src/db/meilisearch.ts index eecbf93e62..abd248e46e 100644 --- a/packages/backend/src/db/meilisearch.ts +++ b/packages/backend/src/db/meilisearch.ts @@ -97,36 +97,48 @@ export default hasConfig ? { filter: constructedFilters }); }, - ingestNote: (note : Note) => { - logger.info("Indexing note in MeiliSearch: " + note.id); - - let attachmentType = ""; - if (note.attachedFileTypes.length > 0) { - attachmentType = note.attachedFileTypes[0].split("/")[0]; - switch (attachmentType) { - case "image": - case "video": - case "audio": - case "text": - break; - default: - attachmentType = "file" - break - } + ingestNote: (note: Note | Note[]) => { + if (note instanceof Note) { + note = [note]; } - return posts.addDocuments([ - { - id: note.id.toString(), - text: note.text, - userId: note.userId, - userHost: note.userHost, - channelId: note.channelId, - mediaAttachment: attachmentType, - userName: note.user?.username, - createdAt: note.createdAt.getTime() / 1000 // division by 1000 is necessary because Node returns in ms-accuracy + let indexingBatch: MeilisearchNote[] = []; + + note.forEach(note => { + + let attachmentType = ""; + if (note.attachedFileTypes.length > 0) { + attachmentType = note.attachedFileTypes[0].split("/")[0]; + switch (attachmentType) { + case "image": + case "video": + case "audio": + case "text": + break; + default: + attachmentType = "file" + break + } } - ]); + + indexingBatch.push({ + id: note.id.toString(), + text: note.text ? note.text : "", + userId: note.userId, + userHost: note.userHost ? note.userHost : "", + channelId: note.channelId ? note.channelId : "", + mediaAttachment: attachmentType, + userName: note.user?.username ? note.user.username : "", + createdAt: note.createdAt.getTime() / 1000 // division by 1000 is necessary because Node returns in ms-accuracy + } + ) + }); + + let indexingIDs = indexingBatch.map(note => note.id); + + logger.info("Indexing notes in MeiliSearch: " + indexingIDs.join(",")); + + return posts.addDocuments(indexingBatch); }, serverStats: async () => { let health : Health = await client.health(); diff --git a/packages/backend/src/queue/processors/background/index-all-notes.ts b/packages/backend/src/queue/processors/background/index-all-notes.ts index 03219199d9..9bed4eb731 100644 --- a/packages/backend/src/queue/processors/background/index-all-notes.ts +++ b/packages/backend/src/queue/processors/background/index-all-notes.ts @@ -4,7 +4,8 @@ import { queueLogger } from "../../logger.js"; import { Notes } from "@/models/index.js"; import { MoreThan } from "typeorm"; import { index } from "@/services/note/create.js"; -import { Note } from "@/models/entities/note.js"; +import {Note} from "@/models/entities/note.js"; +import meilisearch from "../../../db/meilisearch.js"; const logger = queueLogger.createSubLogger("index-all-notes"); @@ -58,11 +59,16 @@ export default async function indexAllNotes( for (let i = 0; i < notes.length; i += batch) { const chunk = notes.slice(i, i + batch); - await Promise.all(chunk.map((note) => index(note))); + + if (meilisearch) { + await meilisearch.ingestNote(chunk) + } + + await Promise.all(chunk.map((note) => index(note, true))); indexedCount += chunk.length; const pct = (indexedCount / total) * 100; - job.update({ indexedCount, cursor, total }); + job.update({indexedCount, cursor, total}); job.progress(+pct.toFixed(1)); logger.info(`Indexed notes ${indexedCount}/${total ? total : "?"}`); } diff --git a/packages/backend/src/services/note/create.ts b/packages/backend/src/services/note/create.ts index f6285a61d2..bd54db7e24 100644 --- a/packages/backend/src/services/note/create.ts +++ b/packages/backend/src/services/note/create.ts @@ -749,7 +749,7 @@ async function insertNote( } } -export async function index(note: Note): Promise { +export async function index(note: Note, reindexing: boolean): Promise { if (!note.text) return; if (config.elasticsearch && es) { @@ -778,7 +778,7 @@ export async function index(note: Note): Promise { ); } - if (meilisearch) { + if (meilisearch && !reindexing) { await meilisearch.ingestNote(note); } }