2023-05-29 17:45:55 +00:00
import { Health , Index , MeiliSearch , Stats } from "meilisearch" ;
2023-05-29 03:34:18 +00:00
import { dbLogger } from "./logger.js" ;
2023-05-24 22:55:33 +00:00
import config from "@/config/index.js" ;
2023-05-29 03:34:18 +00:00
import { Note } from "@/models/entities/note.js" ;
2023-05-25 22:33:02 +00:00
import * as url from "url" ;
2023-05-29 17:45:55 +00:00
import { ILocalUser } from "@/models/entities/user.js" ;
2023-05-29 03:34:18 +00:00
import { Followings , Users } from "@/models/index.js" ;
2023-05-26 00:07:34 +00:00
2023-05-24 22:55:33 +00:00
const logger = dbLogger . createSubLogger ( "meilisearch" , "gray" , false ) ;
2023-05-29 17:45:55 +00:00
let posts : Index ;
let client : MeiliSearch ;
2023-05-24 22:55:33 +00:00
const hasConfig =
2023-05-26 01:06:41 +00:00
config . meilisearch &&
( config . meilisearch . host ||
config . meilisearch . port ||
config . meilisearch . apiKey ) ;
2023-05-24 22:55:33 +00:00
2023-05-29 18:09:52 +00:00
if ( hasConfig ) {
2023-05-29 17:45:55 +00:00
const host = hasConfig ? config . meilisearch . host ? ? "localhost" : "" ;
2023-05-29 18:09:52 +00:00
const port = hasConfig ? config . meilisearch . port ? ? 7700 : 0 ;
const auth = hasConfig ? config . meilisearch . apiKey ? ? "" : "" ;
const ssl = hasConfig ? config . meilisearch . ssl ? ? false : false ;
2023-05-29 17:45:55 +00:00
2023-05-29 18:09:52 +00:00
logger . info ( "Connecting to MeiliSearch" ) ;
2023-05-29 17:45:55 +00:00
client = new MeiliSearch ( {
host : ` ${ ssl ? "https" : "http" } :// ${ host } : ${ port } ` ,
2023-05-29 18:09:52 +00:00
apiKey : auth ,
} ) ;
2023-05-29 17:45:55 +00:00
posts = client . index ( "posts" ) ;
posts
2023-05-29 18:09:52 +00:00
. updateSearchableAttributes ( [ "text" ] )
. catch ( ( e ) = >
logger . error ( ` Setting searchable attr failed, searches won't work: ${ e } ` ) ,
) ;
posts
. updateFilterableAttributes ( [
"userName" ,
"userHost" ,
"mediaAttachment" ,
"createdAt" ,
"userId" ,
] )
. catch ( ( e ) = >
logger . error (
` Setting filterable attr failed, advanced searches won't work: ${ e } ` ,
) ,
) ;
posts
. updateSortableAttributes ( [ "createdAt" ] )
. catch ( ( e ) = >
logger . error (
` Setting sortable attr failed, placeholder searches won't sort properly: ${ e } ` ,
2023-05-30 21:11:10 +00:00
) ,
) ;
posts
. updateStopWords ( [
"the" ,
"a" ,
"as" ,
"be" ,
"of" ,
"they" ,
"these" ,
"これ" ,
"それ" ,
"あれ" ,
"この" ,
"その" ,
"あの" ,
"ここ" ,
"そこ" ,
"あそこ" ,
"こちら" ,
"どこ" ,
"だれ" ,
"なに" ,
"なん" ,
"何" ,
"私" ,
"貴方" ,
"貴方方" ,
"我々" ,
"私達" ,
"あの人" ,
"あのか" ,
"彼女" ,
"彼" ,
"です" ,
"ありま" ,
"おりま" ,
"います" ,
"は" ,
"が" ,
"の" ,
"に" ,
"を" ,
"で" ,
"え" ,
"から" ,
"まで" ,
"より" ,
"も" ,
"どの" ,
"と" ,
"し" ,
"それで" ,
"しかし" ,
] )
. catch ( ( e ) = >
logger . error (
` Failed to set Meilisearch stop words, database size will be larger: ${ e } ` ,
2023-05-29 18:09:52 +00:00
) ,
) ;
logger . info ( "Connected to MeiliSearch" ) ;
2023-05-29 17:45:55 +00:00
}
2023-05-24 22:55:33 +00:00
export type MeilisearchNote = {
id : string ;
text : string ;
userId : string ;
userHost : string ;
2023-05-25 20:29:47 +00:00
userName : string ;
2023-05-24 22:55:33 +00:00
channelId : string ;
2023-05-25 12:15:13 +00:00
mediaAttachment : string ;
2023-05-26 01:06:41 +00:00
createdAt : number ;
} ;
2023-06-12 14:35:04 +00:00
function timestampToUnix ( timestamp : string ) {
let unix = 0 ;
// Only contains numbers => UNIX timestamp
if ( /^\d+$/ . test ( timestamp ) ) {
unix = Number . parseInt ( timestamp ) ;
}
if ( unix === 0 ) {
// Try to parse the timestamp as JavaScript Date
const date = Date . parse ( timestamp ) ;
if ( isNaN ( date ) ) return 0 ;
unix = date / 1000 ;
}
return unix ;
}
2023-05-26 01:06:41 +00:00
export default hasConfig
? {
2023-05-29 03:34:18 +00:00
search : async (
query : string ,
limit : number ,
offset : number ,
userCtx : ILocalUser | null ,
) = > {
/// Advanced search syntax
/// from:user => filter by user + optional domain
/// has:image/video/audio/text/file => filter by attachment types
/// domain:domain.com => filter by domain
/// before:Date => show posts made before Date
/// after: Date => show posts made after Date
/// "text" => get posts with exact text between quotes
/// filter:following => show results only from users you follow
/// filter:followers => show results only from followers
2023-05-29 17:00:16 +00:00
const constructedFilters : string [ ] = [ ] ;
2023-05-29 03:34:18 +00:00
2023-05-29 17:00:16 +00:00
const splitSearch = query . split ( " " ) ;
2023-05-29 03:34:18 +00:00
// Detect search operators and remove them from the actual query
2023-05-29 17:00:16 +00:00
const filteredSearchTerms = (
2023-05-29 03:34:18 +00:00
await Promise . all (
splitSearch . map ( async ( term ) = > {
if ( term . startsWith ( "has:" ) ) {
2023-05-29 17:00:16 +00:00
const fileType = term . slice ( 4 ) ;
2023-05-29 03:34:18 +00:00
constructedFilters . push ( ` mediaAttachment = " ${ fileType } " ` ) ;
return null ;
} else if ( term . startsWith ( "from:" ) ) {
2023-06-12 14:35:04 +00:00
let user = term . slice ( 5 ) ;
// Cut off leading @, those aren't saved in the DB
if ( user . charAt ( 0 ) === "@" ) {
user = user . slice ( 1 ) ;
}
// Determine if we got a webfinger address or a single username
if ( user . split ( "@" ) . length > 0 ) {
let splitUser = user . split ( "@" ) ;
let domain = splitUser . pop ( ) ;
user = splitUser . join ( "@" ) ;
constructedFilters . push (
` userName = ${ user } AND userHost = ${ domain } ` ,
) ;
} else {
constructedFilters . push ( ` userName = ${ user } ` ) ;
}
2023-05-29 03:34:18 +00:00
return null ;
} else if ( term . startsWith ( "domain:" ) ) {
2023-05-29 17:00:16 +00:00
const domain = term . slice ( 7 ) ;
2023-05-29 03:34:18 +00:00
constructedFilters . push ( ` userHost = ${ domain } ` ) ;
return null ;
} else if ( term . startsWith ( "after:" ) ) {
2023-05-29 17:00:16 +00:00
const timestamp = term . slice ( 6 ) ;
2023-06-12 14:35:04 +00:00
let unix = timestampToUnix ( timestamp ) ;
if ( unix !== 0 ) constructedFilters . push ( ` createdAt > ${ unix } ` ) ;
2023-05-29 03:34:18 +00:00
return null ;
} else if ( term . startsWith ( "before:" ) ) {
2023-05-29 17:00:16 +00:00
const timestamp = term . slice ( 7 ) ;
2023-06-12 14:35:04 +00:00
let unix = timestampToUnix ( timestamp ) ;
if ( unix !== 0 ) constructedFilters . push ( ` createdAt < ${ unix } ` ) ;
2023-05-29 03:34:18 +00:00
return null ;
} else if ( term . startsWith ( "filter:following" ) ) {
// Check if we got a context user
if ( userCtx ) {
// Fetch user follows from DB
2023-05-29 17:00:16 +00:00
const followedUsers = await Followings . find ( {
2023-05-29 03:34:18 +00:00
where : {
followerId : userCtx.id ,
} ,
select : {
followeeId : true ,
} ,
} ) ;
2023-05-29 17:01:34 +00:00
const followIDs = followedUsers . map (
( user ) = > user . followeeId ,
) ;
2023-05-29 03:34:18 +00:00
if ( followIDs . length === 0 ) return null ;
constructedFilters . push ( ` userId IN [ ${ followIDs . join ( "," ) } ] ` ) ;
} else {
logger . warn (
"search filtered to follows called without user context" ,
) ;
}
return null ;
} else if ( term . startsWith ( "filter:followers" ) ) {
// Check if we got a context user
if ( userCtx ) {
// Fetch users follows from DB
2023-05-29 17:00:16 +00:00
const followedUsers = await Followings . find ( {
2023-05-29 03:34:18 +00:00
where : {
followeeId : userCtx.id ,
} ,
select : {
followerId : true ,
} ,
} ) ;
2023-05-29 17:01:34 +00:00
const followIDs = followedUsers . map (
( user ) = > user . followerId ,
) ;
2023-05-29 03:34:18 +00:00
if ( followIDs . length === 0 ) return null ;
constructedFilters . push ( ` userId IN [ ${ followIDs . join ( "," ) } ] ` ) ;
} else {
logger . warn (
"search filtered to followers called without user context" ,
) ;
}
return null ;
2023-05-28 00:15:13 +00:00
}
2023-05-29 03:34:18 +00:00
return term ;
} ) ,
)
) . filter ( ( term ) = > term !== null ) ;
2023-05-28 00:15:13 +00:00
2023-05-29 17:00:16 +00:00
const sortRules = [ ] ;
2023-05-28 00:15:13 +00:00
2023-05-29 03:34:18 +00:00
// An empty search term with defined filters means we have a placeholder search => https://www.meilisearch.com/docs/reference/api/search#placeholder-search
// These have to be ordered manually, otherwise the *oldest* posts are returned first, which we don't want
if ( filteredSearchTerms . length === 0 && constructedFilters . length > 0 ) {
sortRules . push ( "createdAt:desc" ) ;
2023-05-26 01:06:41 +00:00
}
2023-05-29 03:34:18 +00:00
logger . info ( ` Searching for ${ filteredSearchTerms . join ( " " ) } ` ) ;
logger . info ( ` Limit: ${ limit } ` ) ;
logger . info ( ` Offset: ${ offset } ` ) ;
logger . info ( ` Filters: ${ constructedFilters } ` ) ;
logger . info ( ` Ordering: ${ sortRules } ` ) ;
return posts . search ( filteredSearchTerms . join ( " " ) , {
limit : limit ,
offset : offset ,
filter : constructedFilters ,
sort : sortRules ,
} ) ;
} ,
ingestNote : async ( ingestNotes : Note | Note [ ] ) = > {
if ( ingestNotes instanceof Note ) {
ingestNotes = [ ingestNotes ] ;
}
2023-05-29 17:00:16 +00:00
const indexingBatch : MeilisearchNote [ ] = [ ] ;
2023-05-29 03:34:18 +00:00
2023-05-29 17:00:16 +00:00
for ( const note of ingestNotes ) {
2023-05-29 03:34:18 +00:00
if ( note . user === undefined ) {
note . user = await Users . findOne ( {
where : {
id : note.userId ,
} ,
} ) ;
2023-05-26 01:06:41 +00:00
}
2023-05-29 03:34:18 +00:00
let attachmentType = "" ;
if ( note . attachedFileTypes . length > 0 ) {
attachmentType = note . attachedFileTypes [ 0 ] . split ( "/" ) [ 0 ] ;
switch ( attachmentType ) {
case "image" :
case "video" :
case "audio" :
case "text" :
break ;
default :
attachmentType = "file" ;
break ;
}
}
indexingBatch . push ( < MeilisearchNote > {
id : note.id.toString ( ) ,
text : note.text ? note . text : "" ,
userId : note.userId ,
userHost :
note . userHost !== ""
? note . userHost
: url . parse ( config . host ) . host ,
channelId : note.channelId ? note . channelId : "" ,
mediaAttachment : attachmentType ,
userName : note.user?.username ? ? "UNKNOWN" ,
createdAt : note.createdAt.getTime ( ) / 1000 , // division by 1000 is necessary because Node returns in ms-accuracy
} ) ;
2023-05-25 21:49:52 +00:00
}
2023-05-29 03:34:18 +00:00
return posts
. addDocuments ( indexingBatch , {
primaryKey : "id" ,
} )
. then ( ( ) = >
2023-05-31 10:08:21 +00:00
logger . info ( ` sent ${ indexingBatch . length } posts for indexing ` ) ,
2023-05-29 03:34:18 +00:00
) ;
} ,
serverStats : async ( ) = > {
2023-05-29 17:00:16 +00:00
const health : Health = await client . health ( ) ;
const stats : Stats = await client . getStats ( ) ;
2023-05-29 03:34:18 +00:00
return {
health : health.status ,
size : stats.databaseSize ,
indexed_count : stats.indexes [ "posts" ] . numberOfDocuments ,
} ;
} ,
2023-05-29 16:33:57 +00:00
deleteNotes : async ( note : Note | Note [ ] | string | string [ ] ) = > {
if ( note instanceof Note ) {
note = [ note ] ;
}
if ( typeof note === "string" ) {
note = [ note ] ;
}
2023-05-29 17:18:12 +00:00
const deletionBatch = note
2023-05-29 17:01:34 +00:00
. map ( ( n ) = > {
if ( n instanceof Note ) {
return n . id ;
}
2023-05-29 16:33:57 +00:00
2023-05-29 17:01:34 +00:00
if ( n . length > 0 ) return n ;
2023-05-29 16:33:57 +00:00
2023-05-29 17:01:34 +00:00
logger . error (
` Failed to delete note from Meilisearch, invalid post ID: ${ JSON . stringify (
n ,
) } ` ,
) ;
2023-05-29 16:33:57 +00:00
2023-05-29 17:01:34 +00:00
throw new Error (
` Invalid note ID passed to meilisearch deleteNote: ${ JSON . stringify (
n ,
) } ` ,
) ;
} )
. filter ( ( el ) = > el !== null ) ;
2023-05-29 16:33:57 +00:00
await posts . deleteDocuments ( deletionBatch as string [ ] ) . then ( ( ) = > {
2023-05-29 17:01:34 +00:00
logger . info (
` submitted ${ deletionBatch . length } large batch for deletion ` ,
) ;
2023-05-29 16:33:57 +00:00
} ) ;
} ,
2023-05-29 03:34:18 +00:00
}
2023-05-26 01:06:41 +00:00
: null ;