From df7b01b5c18d7027c711a20ae88621591278b698 Mon Sep 17 00:00:00 2001 From: froge Date: Sun, 8 Oct 2023 03:51:36 +1000 Subject: [PATCH 1/5] Fix issues #12 and #13 --- Cargo.toml | 2 +- src/embeds.rs | 42 +++++++++++++++++++++++++++--------------- src/lib.rs | 3 +++ 3 files changed, 31 insertions(+), 16 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index d95666d..d7bfc9e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "frogbot" -version = "0.1.0" +version = "0.1.1" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/src/embeds.rs b/src/embeds.rs index 9f76a51..e85df46 100644 --- a/src/embeds.rs +++ b/src/embeds.rs @@ -15,6 +15,7 @@ use regex::Regex; use scraper::{Html, Selector}; /// Represents an Embed in the chat +#[derive(Default)] pub struct Embed { /// The title of the embed pub title: String, @@ -30,7 +31,7 @@ impl Embed { } /// Scrapes the HTML of a webpage and generates an [`Embed`] with the scraped information. -pub fn parse_metadata(page: &str) -> Embed { +pub fn parse_metadata(page: &str) -> Option { let doc_body = Html::parse_document(page); // Selectors used to get metadata are defined here @@ -44,19 +45,23 @@ pub fn parse_metadata(page: &str) -> Embed { let mut meta_title = String::default(); let mut meta_description = String::default(); - if let Some(title) = title { - meta_title = title.text().collect(); - } else { - warn!("Failed to parse title HTML"); + match (title, desc) { + // If both title and description aren't found return None + (None, None) => { + warn!("Couldn't parse any metadata for URL"); + return None; + }, + // Otherwise set the title/description to whatever we find + (Some(title), Some(desc)) => { + meta_title = title.text().collect(); + meta_description = desc.value().attr("content").unwrap().to_string(); + } + // Handle logging of parse failures + (Some(_), None) => warn!("Failed to parse description HTML"), + (None, Some(_)) => warn!("Failed to parse title HTML"), } - if let Some(desc) = desc { - meta_description = desc.value().attr("content").unwrap().to_string(); - } else { - warn!("Failed to parse description HTML"); - } - - Embed::new(meta_title, meta_description) + Some(Embed::new(meta_title, meta_description)) } /// Check if the message has any urls in it and get them if it does @@ -119,15 +124,22 @@ pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, clie let urls = get_urls_from_message(&text_content.body); - let reqwest_client = reqwest::Client::builder().user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36").build().unwrap(); + let reqwest_client = reqwest::Client::builder().user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36").build().unwrap(); - for url in urls { + for mut url in urls { if let Ok(req) = reqwest_client.get(url).send().await { if let Ok(res) = req.text().await { // beware, dirty HTML parsing code - let embed = parse_metadata(&res); + let metadata = parse_metadata(&res); + + // If we didn't get any metadata set URL to nothing so it won't get repeated + // With no other embed data in the bot's embed message + if metadata.is_none() { + url = ""; + } // Build our message reply + let embed = metadata.unwrap_or(Embed::new("No metadata found".to_string(), "".to_string())); let bot_reply = RoomMessageEventContent::text_html( &embed.title, format!( diff --git a/src/lib.rs b/src/lib.rs index 829cec6..ae93518 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -141,6 +141,9 @@ pub async fn run(config: Config) -> anyhow::Result<()> { .await .expect("frogbot couldn't log into it's account."); + // Set the bot account's display name according to config + client.account().set_display_name(Some(&config.display_name)).await?; + warn!("Logged in successfully!"); warn!( "server: '{}', username: '{}', display name: '{}'", From 7d379448ff463dd21ca864624f48b9cf80eae953 Mon Sep 17 00:00:00 2001 From: froge Date: Sun, 8 Oct 2023 03:52:21 +1000 Subject: [PATCH 2/5] Run rustfmt --- src/embeds.rs | 5 +++-- src/lib.rs | 5 ++++- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/embeds.rs b/src/embeds.rs index e85df46..63aed72 100644 --- a/src/embeds.rs +++ b/src/embeds.rs @@ -50,7 +50,7 @@ pub fn parse_metadata(page: &str) -> Option { (None, None) => { warn!("Couldn't parse any metadata for URL"); return None; - }, + } // Otherwise set the title/description to whatever we find (Some(title), Some(desc)) => { meta_title = title.text().collect(); @@ -139,7 +139,8 @@ pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, clie } // Build our message reply - let embed = metadata.unwrap_or(Embed::new("No metadata found".to_string(), "".to_string())); + let embed = metadata + .unwrap_or(Embed::new("No metadata found".to_string(), "".to_string())); let bot_reply = RoomMessageEventContent::text_html( &embed.title, format!( diff --git a/src/lib.rs b/src/lib.rs index ae93518..2cf40ee 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -142,7 +142,10 @@ pub async fn run(config: Config) -> anyhow::Result<()> { .expect("frogbot couldn't log into it's account."); // Set the bot account's display name according to config - client.account().set_display_name(Some(&config.display_name)).await?; + client + .account() + .set_display_name(Some(&config.display_name)) + .await?; warn!("Logged in successfully!"); warn!( From 4f5e01802fbfaf46e34ba3fea9a4faca639f6ec4 Mon Sep 17 00:00:00 2001 From: froge Date: Sun, 8 Oct 2023 04:06:25 +1000 Subject: [PATCH 3/5] Fix bug that broke partial metadata parsing --- src/embeds.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/embeds.rs b/src/embeds.rs index 63aed72..1de2d15 100644 --- a/src/embeds.rs +++ b/src/embeds.rs @@ -57,8 +57,15 @@ pub fn parse_metadata(page: &str) -> Option { meta_description = desc.value().attr("content").unwrap().to_string(); } // Handle logging of parse failures - (Some(_), None) => warn!("Failed to parse description HTML"), - (None, Some(_)) => warn!("Failed to parse title HTML"), + // and set values to whatever we *did* manage to scrape + (Some(title), None) => { + warn!("Failed to parse description HTML"); + meta_title = title.text().collect(); + } + (None, Some(desc)) => { + warn!("Failed to parse title HTML"); + meta_description = desc.value().attr("content").unwrap().to_string(); + } } Some(Embed::new(meta_title, meta_description)) From da62c7c2c664ba9564b16995c62d456644dfdf63 Mon Sep 17 00:00:00 2001 From: froge Date: Sun, 8 Oct 2023 12:43:03 +1000 Subject: [PATCH 4/5] Refactor metadata parsing to avoid repetition --- src/embeds.rs | 36 +++++++++++++++--------------------- 1 file changed, 15 insertions(+), 21 deletions(-) diff --git a/src/embeds.rs b/src/embeds.rs index 1de2d15..aab20db 100644 --- a/src/embeds.rs +++ b/src/embeds.rs @@ -45,27 +45,21 @@ pub fn parse_metadata(page: &str) -> Option { let mut meta_title = String::default(); let mut meta_description = String::default(); - match (title, desc) { - // If both title and description aren't found return None - (None, None) => { - warn!("Couldn't parse any metadata for URL"); - return None; - } - // Otherwise set the title/description to whatever we find - (Some(title), Some(desc)) => { - meta_title = title.text().collect(); - meta_description = desc.value().attr("content").unwrap().to_string(); - } - // Handle logging of parse failures - // and set values to whatever we *did* manage to scrape - (Some(title), None) => { - warn!("Failed to parse description HTML"); - meta_title = title.text().collect(); - } - (None, Some(desc)) => { - warn!("Failed to parse title HTML"); - meta_description = desc.value().attr("content").unwrap().to_string(); - } + if let (None, None) = (title, desc) { + warn!("Couldn't parse any metadata for URL"); + return None; + } + + if let Some(title) = title { + meta_title = title.text().collect(); + } else { + warn!("Failed to parse title HTML"); + } + + if let Some(desc) = desc { + meta_description = desc.value().attr("content").unwrap().to_string(); + } else { + warn!("Failed to parse description HTML"); } Some(Embed::new(meta_title, meta_description)) From 1783ed00e6171f39e17fa2a3e3f8359f9f07b302 Mon Sep 17 00:00:00 2001 From: froge Date: Sun, 8 Oct 2023 14:04:23 +1000 Subject: [PATCH 5/5] Clean up & refactor embed output code --- src/embeds.rs | 62 +++++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 29 deletions(-) diff --git a/src/embeds.rs b/src/embeds.rs index aab20db..15741b9 100644 --- a/src/embeds.rs +++ b/src/embeds.rs @@ -127,46 +127,50 @@ pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, clie let reqwest_client = reqwest::Client::builder().user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36").build().unwrap(); - for mut url in urls { + for url in urls { if let Ok(req) = reqwest_client.get(url).send().await { if let Ok(res) = req.text().await { // beware, dirty HTML parsing code let metadata = parse_metadata(&res); - // If we didn't get any metadata set URL to nothing so it won't get repeated - // With no other embed data in the bot's embed message - if metadata.is_none() { - url = ""; - } + // Build and send our message reply + if metadata.is_some() { + let embed = metadata.unwrap(); + let bot_reply = RoomMessageEventContent::text_html( + &embed.title, + format!( + "
+

{}

+

{}

+
", + &embed.title, &embed.description + ), + ) + .make_reply_to(&full_reply_event); - // Build our message reply - let embed = metadata - .unwrap_or(Embed::new("No metadata found".to_string(), "".to_string())); - let bot_reply = RoomMessageEventContent::text_html( - &embed.title, - format!( - r#" -
-
{}
-

{}

-

{}

-
- "#, - &url, &url, &embed.title, &embed.description - ), - ) - .make_reply_to(&full_reply_event); - - // Finally send the reply to the room - warn!("Sending embed for URL: '{}'", &url); - if room.send(bot_reply, None).await.is_err() { - warn!("Failed to send embed for URL: '{}'", &url); + // Finally send the reply to the room + warn!("Sending embed for URL: '{}'", &url); + if room.send(bot_reply, None).await.is_err() { + warn!("Failed to send embed for URL: '{}'", &url); + } + // If we didn't get any metadata send a generic "No metadata" response + } else { + let bot_reply = RoomMessageEventContent::text_html( + "Couldn't parse metadata for URL", + "
Couldn't parse metadata for URL
", + ) + .make_reply_to(&full_reply_event); + // Send the reply to the room + warn!("Sending 'No metadata' embed for URL: '{}'", &url); + if room.send(bot_reply, None).await.is_err() { + warn!("Failed to send embed for URL: '{}'", &url); + } } } else { warn!("Failed to parse HTML for URL: '{}'", &url); } } else { - warn!("Failed to get metadata for '{}'", &url); + warn!("Failed to fetch metadata for '{}'", &url); } } };