Optimizations and treating paragraps as separate notes
This commit is contained in:
parent
e4e8908788
commit
4723385751
21
src/main.rs
21
src/main.rs
|
@ -5,9 +5,9 @@ use futures::TryStreamExt;
|
|||
use indicatif::ProgressBar;
|
||||
use rand::distributions::{Distribution, WeightedIndex};
|
||||
use regex::Regex;
|
||||
use reqwest::{header, ClientBuilder, RequestBuilder, Response};
|
||||
use reqwest::{header, ClientBuilder};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::{json, Value};
|
||||
use serde_json::Value;
|
||||
use sprs::{CsMat, TriMat};
|
||||
use sqlx::postgres::PgPoolOptions;
|
||||
|
||||
|
@ -154,6 +154,14 @@ impl State {
|
|||
.replace_all(input, "")
|
||||
.to_string();
|
||||
|
||||
if input.contains("\n\n") {
|
||||
let paragraph_split = Regex::new("\n\n+").unwrap();
|
||||
paragraph_split
|
||||
.split(&input)
|
||||
.for_each(|p| self.insert_tokens(p));
|
||||
return;
|
||||
}
|
||||
|
||||
let regex = Regex::new(r"\s+").unwrap();
|
||||
|
||||
let nasty_words = vec![
|
||||
|
@ -201,7 +209,6 @@ async fn main() {
|
|||
} else {
|
||||
State::new()
|
||||
};
|
||||
progress.disable_steady_tick();
|
||||
|
||||
let pool = PgPoolOptions::new()
|
||||
.connect(std::env::var("DATABASE_URL").as_deref().unwrap())
|
||||
|
@ -217,7 +224,8 @@ async fn main() {
|
|||
r#"SELECT text, "createdAt"
|
||||
FROM note
|
||||
WHERE "note"."userId" = $1
|
||||
AND "note"."createdAt" > $2
|
||||
AND "note"."createdAt" > $2
|
||||
AND "note"."createdAt" < NOW()
|
||||
AND "note"."visibility" IN ('public', 'home')
|
||||
AND ("note"."cw" IS NULL OR LOWER("note"."cw") IN ('', 'gay', 'cursed', 'what', 'shitpost', 'no', 'natty what', 'natty what the fuck'))"#,
|
||||
"9awy7u3l76",
|
||||
|
@ -239,6 +247,11 @@ async fn main() {
|
|||
progress.set_length(cnt);
|
||||
}
|
||||
|
||||
drop(stream);
|
||||
drop(pool);
|
||||
|
||||
println!("Shape: {:?}", state.matrix.shape());
|
||||
|
||||
let file = File::create(path).unwrap();
|
||||
progress.set_message("Saving data...");
|
||||
rmp_serde::encode::write(&mut progress.wrap_write(file), &state).unwrap();
|
||||
|
|
Loading…
Reference in New Issue