Add high performance batch imports
This commit is contained in:
parent
8d08bf3ee4
commit
5ae89a69bc
|
@ -97,36 +97,48 @@ export default hasConfig ? {
|
||||||
filter: constructedFilters
|
filter: constructedFilters
|
||||||
});
|
});
|
||||||
},
|
},
|
||||||
ingestNote: (note : Note) => {
|
ingestNote: (note: Note | Note[]) => {
|
||||||
logger.info("Indexing note in MeiliSearch: " + note.id);
|
if (note instanceof Note) {
|
||||||
|
note = [note];
|
||||||
let attachmentType = "";
|
|
||||||
if (note.attachedFileTypes.length > 0) {
|
|
||||||
attachmentType = note.attachedFileTypes[0].split("/")[0];
|
|
||||||
switch (attachmentType) {
|
|
||||||
case "image":
|
|
||||||
case "video":
|
|
||||||
case "audio":
|
|
||||||
case "text":
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
attachmentType = "file"
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return posts.addDocuments([
|
let indexingBatch: MeilisearchNote[] = [];
|
||||||
{
|
|
||||||
id: note.id.toString(),
|
note.forEach(note => {
|
||||||
text: note.text,
|
|
||||||
userId: note.userId,
|
let attachmentType = "";
|
||||||
userHost: note.userHost,
|
if (note.attachedFileTypes.length > 0) {
|
||||||
channelId: note.channelId,
|
attachmentType = note.attachedFileTypes[0].split("/")[0];
|
||||||
mediaAttachment: attachmentType,
|
switch (attachmentType) {
|
||||||
userName: note.user?.username,
|
case "image":
|
||||||
createdAt: note.createdAt.getTime() / 1000 // division by 1000 is necessary because Node returns in ms-accuracy
|
case "video":
|
||||||
|
case "audio":
|
||||||
|
case "text":
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
attachmentType = "file"
|
||||||
|
break
|
||||||
|
}
|
||||||
}
|
}
|
||||||
]);
|
|
||||||
|
indexingBatch.push({
|
||||||
|
id: note.id.toString(),
|
||||||
|
text: note.text ? note.text : "",
|
||||||
|
userId: note.userId,
|
||||||
|
userHost: note.userHost ? note.userHost : "",
|
||||||
|
channelId: note.channelId ? note.channelId : "",
|
||||||
|
mediaAttachment: attachmentType,
|
||||||
|
userName: note.user?.username ? note.user.username : "",
|
||||||
|
createdAt: note.createdAt.getTime() / 1000 // division by 1000 is necessary because Node returns in ms-accuracy
|
||||||
|
}
|
||||||
|
)
|
||||||
|
});
|
||||||
|
|
||||||
|
let indexingIDs = indexingBatch.map(note => note.id);
|
||||||
|
|
||||||
|
logger.info("Indexing notes in MeiliSearch: " + indexingIDs.join(","));
|
||||||
|
|
||||||
|
return posts.addDocuments(indexingBatch);
|
||||||
},
|
},
|
||||||
serverStats: async () => {
|
serverStats: async () => {
|
||||||
let health : Health = await client.health();
|
let health : Health = await client.health();
|
||||||
|
|
|
@ -4,7 +4,8 @@ import { queueLogger } from "../../logger.js";
|
||||||
import { Notes } from "@/models/index.js";
|
import { Notes } from "@/models/index.js";
|
||||||
import { MoreThan } from "typeorm";
|
import { MoreThan } from "typeorm";
|
||||||
import { index } from "@/services/note/create.js";
|
import { index } from "@/services/note/create.js";
|
||||||
import { Note } from "@/models/entities/note.js";
|
import {Note} from "@/models/entities/note.js";
|
||||||
|
import meilisearch from "../../../db/meilisearch.js";
|
||||||
|
|
||||||
const logger = queueLogger.createSubLogger("index-all-notes");
|
const logger = queueLogger.createSubLogger("index-all-notes");
|
||||||
|
|
||||||
|
@ -58,11 +59,16 @@ export default async function indexAllNotes(
|
||||||
|
|
||||||
for (let i = 0; i < notes.length; i += batch) {
|
for (let i = 0; i < notes.length; i += batch) {
|
||||||
const chunk = notes.slice(i, i + batch);
|
const chunk = notes.slice(i, i + batch);
|
||||||
await Promise.all(chunk.map((note) => index(note)));
|
|
||||||
|
if (meilisearch) {
|
||||||
|
await meilisearch.ingestNote(chunk)
|
||||||
|
}
|
||||||
|
|
||||||
|
await Promise.all(chunk.map((note) => index(note, true)));
|
||||||
|
|
||||||
indexedCount += chunk.length;
|
indexedCount += chunk.length;
|
||||||
const pct = (indexedCount / total) * 100;
|
const pct = (indexedCount / total) * 100;
|
||||||
job.update({ indexedCount, cursor, total });
|
job.update({indexedCount, cursor, total});
|
||||||
job.progress(+pct.toFixed(1));
|
job.progress(+pct.toFixed(1));
|
||||||
logger.info(`Indexed notes ${indexedCount}/${total ? total : "?"}`);
|
logger.info(`Indexed notes ${indexedCount}/${total ? total : "?"}`);
|
||||||
}
|
}
|
||||||
|
|
|
@ -749,7 +749,7 @@ async function insertNote(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
export async function index(note: Note): Promise<void> {
|
export async function index(note: Note, reindexing: boolean): Promise<void> {
|
||||||
if (!note.text) return;
|
if (!note.text) return;
|
||||||
|
|
||||||
if (config.elasticsearch && es) {
|
if (config.elasticsearch && es) {
|
||||||
|
@ -778,7 +778,7 @@ export async function index(note: Note): Promise<void> {
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (meilisearch) {
|
if (meilisearch && !reindexing) {
|
||||||
await meilisearch.ingestNote(note);
|
await meilisearch.ingestNote(note);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue