Make embed module resolve multiple URLs async

This fixes issue #9 but probably needs some refactoring still

Also added logging for the current EventID of the message that the embed
module is processing, as well as cleaning up the logging code slightly.
This commit is contained in:
froge 2024-07-14 18:57:47 +10:00
parent fce7dbf9be
commit ff9deefb8a
Signed by: froge
GPG key ID: A825E09930271BFA

View file

@ -14,6 +14,7 @@ use matrix_sdk::{
}; };
use regex::Regex; use regex::Regex;
use scraper::{Html, Selector}; use scraper::{Html, Selector};
use tokio::task::JoinSet;
use std::time::Instant; use std::time::Instant;
@ -120,15 +121,28 @@ pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, clie
return; return;
}; };
let urls = get_urls_from_message(&text_content.body);
let reqwest_client = reqwest::Client::builder().user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36").build().unwrap(); let reqwest_client = reqwest::Client::builder().user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36").build().unwrap();
// Create and populate the JoinSet with async requests
// This allows us to await all the requests at once later
let mut requests = JoinSet::new();
let urls = get_urls_from_message(&text_content.body);
for url in urls { for url in urls {
match reqwest_client.get(url).send().await { requests.spawn(reqwest_client.get(url).send());
Err(e) => warn!("Failed to fetch metadata for URL '{}' with error: '{:?}'", &url, e), }
while let Some(req) = requests.join_next().await {
match req.unwrap() {
Err(e) => warn!(
"Failed to fetch metadata for eventID '{}' and error: '{:?}'",
full_reply_event.event_id, e
),
Ok(req) => { Ok(req) => {
let url = req.url().clone();
match req.text().await { match req.text().await {
Err(e) => warn!("Failed to parse HTML for URL '{}' with error: '{:?}'", &url, e), Err(e) => warn!(
"Failed to parse HTML for URL '{}' in eventID '{}' with error: '{:?}'",
url, full_reply_event.event_id, e
),
Ok(resp) => { Ok(resp) => {
// beware, dirty HTML parsing code // beware, dirty HTML parsing code
let metadata = parse_metadata(&resp); let metadata = parse_metadata(&resp);
@ -154,9 +168,15 @@ pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, clie
); );
// Finally send the reply to the room // Finally send the reply to the room
warn!("Sending embed for URL: '{}'", &url); warn!(
"Sending embed for eventID '{}' with URL: '{}'",
full_reply_event.event_id, url
);
if room.send(bot_reply).await.is_err() { if room.send(bot_reply).await.is_err() {
warn!("Failed to send embed for URL: '{}'", &url); warn!(
"Failed to send embed for eventID '{}' with URL: '{}'",
full_reply_event.event_id, url
);
} }
warn!("Ran fn room.send after: '{:#?}'", fn_start.elapsed()); warn!("Ran fn room.send after: '{:#?}'", fn_start.elapsed());
// If we didn't get any metadata send a generic "No metadata" response // If we didn't get any metadata send a generic "No metadata" response
@ -167,9 +187,15 @@ pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, clie
) )
.make_reply_to(&full_reply_event, ForwardThread::Yes, AddMentions::Yes); .make_reply_to(&full_reply_event, ForwardThread::Yes, AddMentions::Yes);
// Send the reply to the room // Send the reply to the room
warn!("Sending 'No metadata' embed for URL: '{}'", &url); warn!(
"Sending 'No metadata' embed for eventID '{}' with URL: '{}'",
full_reply_event.event_id, url
);
if room.send(bot_reply).await.is_err() { if room.send(bot_reply).await.is_err() {
warn!("Failed to send embed for URL: '{}'", &url); warn!(
"Failed to send embed for eventID '{}' with URL: '{}'",
full_reply_event.event_id, url
);
} }
warn!("Ran fn room.send after: '{:#?}'", fn_start.elapsed()); warn!("Ran fn room.send after: '{:#?}'", fn_start.elapsed());
} }