Make embed module resolve multiple URLs async #20

Manually merged
froge merged 1 commit from embed-optimization into master 2024-07-14 09:17:41 +00:00
Showing only changes of commit ff9deefb8a - Show all commits

View file

@ -14,6 +14,7 @@ use matrix_sdk::{
}; };
use regex::Regex; use regex::Regex;
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use tokio::task::JoinSet;
use std::time::Instant; use std::time::Instant;
@ -120,15 +121,28 @@ pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, clie
return; return;
}; };
let urls = get_urls_from_message(&text_content.body);
let reqwest_client = reqwest::Client::builder().user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36").build().unwrap(); let reqwest_client = reqwest::Client::builder().user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36").build().unwrap();
// Create and populate the JoinSet with async requests
// This allows us to await all the requests at once later
let mut requests = JoinSet::new();
let urls = get_urls_from_message(&text_content.body);
for url in urls { for url in urls {
match reqwest_client.get(url).send().await { requests.spawn(reqwest_client.get(url).send());
Err(e) => warn!("Failed to fetch metadata for URL '{}' with error: '{:?}'", &url, e), }
while let Some(req) = requests.join_next().await {
match req.unwrap() {
Err(e) => warn!(
"Failed to fetch metadata for eventID '{}' and error: '{:?}'",
full_reply_event.event_id, e
),
Ok(req) => { Ok(req) => {
let url = req.url().clone();
match req.text().await { match req.text().await {
Err(e) => warn!("Failed to parse HTML for URL '{}' with error: '{:?}'", &url, e), Err(e) => warn!(
"Failed to parse HTML for URL '{}' in eventID '{}' with error: '{:?}'",
url, full_reply_event.event_id, e
),
Ok(resp) => { Ok(resp) => {
// beware, dirty HTML parsing code // beware, dirty HTML parsing code
let metadata = parse_metadata(&resp); let metadata = parse_metadata(&resp);
@ -154,9 +168,15 @@ pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, clie
); );
// Finally send the reply to the room // Finally send the reply to the room
warn!("Sending embed for URL: '{}'", &url); warn!(
"Sending embed for eventID '{}' with URL: '{}'",
full_reply_event.event_id, url
);
if room.send(bot_reply).await.is_err() { if room.send(bot_reply).await.is_err() {
warn!("Failed to send embed for URL: '{}'", &url); warn!(
"Failed to send embed for eventID '{}' with URL: '{}'",
full_reply_event.event_id, url
);
} }
warn!("Ran fn room.send after: '{:#?}'", fn_start.elapsed()); warn!("Ran fn room.send after: '{:#?}'", fn_start.elapsed());
// If we didn't get any metadata send a generic "No metadata" response // If we didn't get any metadata send a generic "No metadata" response
@ -167,9 +187,15 @@ pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, clie
) )
.make_reply_to(&full_reply_event, ForwardThread::Yes, AddMentions::Yes); .make_reply_to(&full_reply_event, ForwardThread::Yes, AddMentions::Yes);
// Send the reply to the room // Send the reply to the room
warn!("Sending 'No metadata' embed for URL: '{}'", &url); warn!(
"Sending 'No metadata' embed for eventID '{}' with URL: '{}'",
full_reply_event.event_id, url
);
if room.send(bot_reply).await.is_err() { if room.send(bot_reply).await.is_err() {
warn!("Failed to send embed for URL: '{}'", &url); warn!(
"Failed to send embed for eventID '{}' with URL: '{}'",
full_reply_event.event_id, url
);
} }
warn!("Ran fn room.send after: '{:#?}'", fn_start.elapsed()); warn!("Ran fn room.send after: '{:#?}'", fn_start.elapsed());
} }