From ff9deefb8a05083543fb26450a8c64a06de11368 Mon Sep 17 00:00:00 2001 From: froge Date: Sun, 14 Jul 2024 18:57:47 +1000 Subject: [PATCH] Make embed module resolve multiple URLs async This fixes issue #9 but probably needs some refactoring still Also added logging for the current EventID of the message that the embed module is processing, as well as cleaning up the logging code slightly. --- src/embeds.rs | 44 +++++++++++++++++++++++++++++++++++--------- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/src/embeds.rs b/src/embeds.rs index 7ae7cac..9df79e6 100644 --- a/src/embeds.rs +++ b/src/embeds.rs @@ -14,6 +14,7 @@ use matrix_sdk::{ }; use regex::Regex; use scraper::{Html, Selector}; +use tokio::task::JoinSet; use std::time::Instant; @@ -120,15 +121,28 @@ pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, clie return; }; - let urls = get_urls_from_message(&text_content.body); let reqwest_client = reqwest::Client::builder().user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36").build().unwrap(); - + // Create and populate the JoinSet with async requests + // This allows us to await all the requests at once later + let mut requests = JoinSet::new(); + let urls = get_urls_from_message(&text_content.body); for url in urls { - match reqwest_client.get(url).send().await { - Err(e) => warn!("Failed to fetch metadata for URL '{}' with error: '{:?}'", &url, e), + requests.spawn(reqwest_client.get(url).send()); + } + + while let Some(req) = requests.join_next().await { + match req.unwrap() { + Err(e) => warn!( + "Failed to fetch metadata for eventID '{}' and error: '{:?}'", + full_reply_event.event_id, e + ), Ok(req) => { + let url = req.url().clone(); match req.text().await { - Err(e) => warn!("Failed to parse HTML for URL '{}' with error: '{:?}'", &url, e), + Err(e) => warn!( + "Failed to parse HTML for URL '{}' in eventID '{}' with error: '{:?}'", + url, full_reply_event.event_id, e + ), Ok(resp) => { // beware, dirty HTML parsing code let metadata = parse_metadata(&resp); @@ -154,9 +168,15 @@ pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, clie ); // Finally send the reply to the room - warn!("Sending embed for URL: '{}'", &url); + warn!( + "Sending embed for eventID '{}' with URL: '{}'", + full_reply_event.event_id, url + ); if room.send(bot_reply).await.is_err() { - warn!("Failed to send embed for URL: '{}'", &url); + warn!( + "Failed to send embed for eventID '{}' with URL: '{}'", + full_reply_event.event_id, url + ); } warn!("Ran fn room.send after: '{:#?}'", fn_start.elapsed()); // If we didn't get any metadata send a generic "No metadata" response @@ -167,9 +187,15 @@ pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, clie ) .make_reply_to(&full_reply_event, ForwardThread::Yes, AddMentions::Yes); // Send the reply to the room - warn!("Sending 'No metadata' embed for URL: '{}'", &url); + warn!( + "Sending 'No metadata' embed for eventID '{}' with URL: '{}'", + full_reply_event.event_id, url + ); if room.send(bot_reply).await.is_err() { - warn!("Failed to send embed for URL: '{}'", &url); + warn!( + "Failed to send embed for eventID '{}' with URL: '{}'", + full_reply_event.event_id, url + ); } warn!("Ran fn room.send after: '{:#?}'", fn_start.elapsed()); }