From bf4380c4f90e7900754ebaa2b5661a46caacedf6 Mon Sep 17 00:00:00 2001 From: AxelSilverdew <4kuchibh@gmail.com> Date: Mon, 19 Jun 2023 21:58:13 +0530 Subject: [PATCH] refactor: moved all the core logic into lib.rs and embed related logic into embed.rs --- src/embeds.rs | 150 ++++++++++++++++++++++++++++++++++++++ src/lib.rs | 176 +++++++++++++++++++++++++++++++++++++++++++++ src/main.rs | 194 +------------------------------------------------- 3 files changed, 329 insertions(+), 191 deletions(-) create mode 100644 src/embeds.rs create mode 100644 src/lib.rs diff --git a/src/embeds.rs b/src/embeds.rs new file mode 100644 index 0000000..9336afa --- /dev/null +++ b/src/embeds.rs @@ -0,0 +1,150 @@ +//! # The Embed Module +//! +//! This module controls the embed functionality of frogbot. + +use lazy_static::lazy_static; +use log::warn; +use matrix_sdk::{ + room::Room, + ruma::events::room::message::{ + MessageType, OriginalSyncRoomMessageEvent, RoomMessageEventContent, + }, + Client, +}; +use regex::Regex; +use scraper::{Html, Selector}; + +/// Represents an Embed in the chat +pub struct Embed { + /// The title of the embed + pub title: String, + /// The description + pub description: String, +} + +impl Embed { + /// Creates a new [`Embed`]. + pub fn new(title: String, description: String) -> Embed { + Embed { title, description } + } +} + +/// Scrapes the HTML of a webpage and generates an [`Embed`] with the scraped information. +pub fn parse_metadata(page: &str) -> Embed { + let doc_body = Html::parse_document(page); + + // Selectors used to get metadata are defined here + let title_selector = Selector::parse("title").unwrap(); + let description_selector = Selector::parse("meta[name=\"description\"]").unwrap(); + + // Grab the actual data + let title = doc_body.select(&title_selector).next(); + let desc = doc_body.select(&description_selector).next(); + // Clean up meta info and store it as a string + let mut meta_title = String::from("None"); + let mut meta_description = String::from("None"); + + if let Some(title) = title { + meta_title = title.text().collect(); + } else { + warn!("Failed to parse title HTML"); + } + + if let Some(desc) = desc { + meta_description = desc.value().attr("content").unwrap().to_string(); + } else { + warn!("Failed to parse description HTML"); + } + + Embed::new(meta_title, meta_description) +} + +/// Check if the message has any urls in it and get them if it does +fn get_urls_from_message(message: &str) -> Vec<&str> { + // Using lazy static magic here, so this means the regex is compiled exactly once + // After initial compile it gets reused instead of recompiling on every message event + lazy_static! { + // shamelessly stolen and modified from some garbage blog online + // I have no fucking idea how this works - https://urlregex.com/ + static ref RE: Regex = Regex::new(r"(?:(?:https?)://)(?:\S+(?::\S*)?@|\d{1,3}(?:\.\d{1,3}){3}|(?:(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)(?:\.(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)*(?:\.[a-z\x{00a1}-\x{ffff}]{2,6}))(?::\d+)?(?:[^\s]*)?").unwrap(); + } + + // This will hold all the urls in the message if any are found + let mut urls: Vec<&str> = vec![]; + + if RE.is_match(message) { + // If we find any urls, push them into the urls vec + for regex_match in RE.find_iter(message) { + // If the url points to localhost, we don't want to embed it, so we ignore it + if regex_match.as_str().contains("localhost") + || regex_match.as_str().contains("127.0.0.1") + { + warn!("This is probably a malicious URL, ignoring!"); + } else { + warn!("Found {}", ®ex_match.as_str()); + urls.push(regex_match.as_str()); + } + } + } else { + // If we don't find any urls, do nothing + }; + urls +} + +/// Checks messages for valid links and generates embeds if found +pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, client: Client) { + if let Room::Joined(room) = room { + let full_reply_event = event.clone().into_full_event(room.room_id().to_owned()); + let MessageType::Text(text_content) = event.content.msgtype else { + warn!("Ignoring message, content is not plaintext!"); + return; + }; + + // If the sender ID matches our client, ignore the message + // We don't want to reply to ourselves + let client_user_id = client.user_id().unwrap(); + if event.sender == client_user_id { + return; + } + + let message = text_content.body.to_lowercase(); + let urls = get_urls_from_message(&message); + + let reqwest_client = reqwest::Client::builder().user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36").build().unwrap(); + + for url in urls { + if let Ok(req) = reqwest_client.get(url).send().await { + if let Ok(res) = req.text().await { + // beware, dirty HTML parsing code + let embed = parse_metadata(&res); + + // Build our message reply + let bot_reply = RoomMessageEventContent::text_html( + &embed.title, + format!( + r#" +
+
{}
+

{}

+

{}

+
+ "#, + &url, &url, &embed.title, &embed.description + ), + ) + .make_reply_to(&full_reply_event); + + // Finally send the reply to the room + warn!("Sending embed for URL: '{}'", &url); + if room.send(bot_reply, None).await.is_err() { + warn!("Failed to send embed for URL: '{}'", &url); + } + } else { + warn!("Failed to parse HTML for URL: '{}'", &url); + } + } else { + warn!("Failed to get metadata for '{}'", &url); + } + } + }; +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..eb52e76 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,176 @@ +//! A multi-purpose bot for Matrix +#![deny(missing_docs)] +pub mod embeds; + +use log::{error, warn}; +use matrix_sdk::{ + config::SyncSettings, + room::Room, + ruma::{ + api::client::uiaa, events::room::member::StrippedRoomMemberEvent, OwnedDeviceId, + OwnedRoomId, + }, + Client, ClientBuildError, +}; +use serde::{Deserialize, Serialize}; + +/// Represents the entries in the configuration file. +#[derive(Serialize, Deserialize, Debug)] +pub struct Config { + /// Your Homeserver URL (e.g. "matrix.yourdomain.com") + pub homeserver: String, + /// The Bot User's Username (e.g. "frogbot") + pub username: String, + /// The Display Name of the Bot (e.g. "Frogbot 🐸") + pub display_name: String, + /// The Password to the Bot User (e.g. "hunter2") + pub password: String, + /// A List of All the Rooms to Join (e.g. ["!myid:matrix.yourdomain.com"] ) + pub room_ids: Vec, +} + +impl Config { + /// Loads a config file for frogbot to use. + pub fn load(config_file: &str) -> Config { + let config_file = + std::fs::read_to_string(config_file).expect("Failed to read config file."); + toml::from_str(&config_file).expect("Failed to parse TOML config.") + } + + /// Returns a new frogbot client using the [`Config`]. + pub async fn create_client(&self) -> Result { + Client::builder() + .homeserver_url(&self.homeserver) + .handle_refresh_tokens() + .build() + .await + } +} + +/// Deletes all old encryption devices. +/// +/// We don't want to end up with a ton of encryption devices that aren't active. +/// This function removes all the old ones while preserving the current device. +/// +/// # Panics +/// +/// This function will panic if it cannot get a device ID from the current client. +pub async fn delete_old_encryption_devices(client: &Client, config: &Config) -> anyhow::Result<()> { + warn!("Deleting old encryption devices"); + let current_device_id = client.device_id().expect("Failed to get device ID"); + let old_devices: Vec = client + .devices() + .await? + .devices + .iter() + .filter(|d| d.device_id != current_device_id) + .map(|d| d.device_id.to_owned()) + .collect(); + + // Deleting these devices needs "user interaction" or something, so we just send password again + // and it works :D + if let Err(e) = client.delete_devices(&old_devices, None).await { + if let Some(info) = e.uiaa_response() { + let mut password = uiaa::Password::new( + uiaa::UserIdentifier::UserIdOrLocalpart(&config.username), + &config.password, + ); + password.session = info.session.as_deref(); + client + .delete_devices(&old_devices, Some(uiaa::AuthData::Password(password))) + .await?; + } + } + warn!("Finished deleting old encryption devices"); + Ok(()) +} + +/// Rejects invites that aren't valid anymore or have timed out. +pub async fn reject_stale_invites(client: &Client, config: &Config) { + warn!("Rejecting stale invites"); + for room in client.invited_rooms() { + let room_name = room.name().unwrap_or_default(); + if !room.is_space() + && !room.is_direct() + && config.room_ids.iter().any(|r| *r == room.room_id()) + { + warn!("Got invite to room: '{}'", room_name); + room.accept_invitation() + .await + .expect("Failed to accept invite"); + warn!("Joining room!"); + if let Err(e) = client.join_room_by_id(room.room_id()).await { + error!( + "Failed to join room with id: {} and error: {}", + room.room_id(), + e + ); + } + } else { + warn!("Rejecting invite to room: '{}'", room_name); + room.reject_invitation().await.unwrap_or_default(); + } + } + warn!("Finished rejecting stale invites"); +} + +/// Run frogbot +/// +/// Starts the bot and starts listening for events +/// +/// # Panics +/// +/// This function will panic in the following scenarios: +/// - If it cannot create a client using the current [`Config`]. +/// - If the bot can't log into it's account. +/// - If the initial event sync fails. +pub async fn run(config: Config) -> anyhow::Result<()> { + let client = &config + .create_client() + .await + .expect("There was a problem creating frogbot's client."); + + // Attempt to log into the server + client + .login_username(&config.username, &config.password) + .initial_device_display_name(&config.display_name) + .send() + .await + .expect("frogbot couldn't log into it's account."); + + warn!("Logged in successfully!"); + warn!( + "server: '{}', username: '{}', display name: '{}'", + &config.homeserver, &config.username, &config.display_name + ); + + // sync client once so we get latest events to work on before we continue + client + .sync_once(SyncSettings::default()) + .await + .expect("Failed the initial event sync."); + + delete_old_encryption_devices(client, &config).await?; + + reject_stale_invites(client, &config).await; + + // Add handler to log new room invites as they're recieved + client.add_event_handler(|ev: StrippedRoomMemberEvent, room: Room| async move { + if let Room::Invited(invited_room) = room { + warn!( + "Got invite to room: '{}' sent by '{}'", + invited_room.name().unwrap_or_default(), + ev.sender + ); + } + }); + + // Add handler to detect and create embeds for HTTP links in chat + client.add_event_handler(embeds::embed_handler); + + // Now keep on syncing forever. `sync()` will use the latest sync token automatically. + warn!("Starting sync loop"); + client.sync(SyncSettings::default()).await?; + + Ok(()) +} diff --git a/src/main.rs b/src/main.rs index ef33c43..85ce006 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,197 +1,9 @@ -use anyhow; -use toml; -use tokio; -use scraper::{Html, Selector}; -use lazy_static::lazy_static; -use regex::Regex; -use log::*; -use serde::{Serialize, Deserialize}; -use matrix_sdk::{ - Client, - config::SyncSettings, - room::Room, - - ruma::OwnedDeviceId, - ruma::OwnedRoomId, - ruma::api::client::uiaa, - ruma::events::room::member::StrippedRoomMemberEvent, - ruma::events::room::message::{MessageType, OriginalSyncRoomMessageEvent, RoomMessageEventContent}, -}; - -#[derive(Serialize, Deserialize, Debug)] -struct TomlConfig { - homeserver: String, - username: String, - display_name: String, - password: String, - room_ids: Vec, -} +use frogbot::{run, Config}; #[tokio::main] async fn main() -> anyhow::Result<()> { // init logging tracing_subscriber::fmt::init(); - let config = load_config(); - let client = Client::builder() - .homeserver_url(&config.homeserver) - .handle_refresh_tokens() - .build() - .await?; - - // try login - client - .login_username(&config.username, &config.password) - .initial_device_display_name(&config.display_name) - .send() - .await?; - - warn!("Logged in successfully!"); - warn!("server: '{}', username: '{}', display name: '{}'", &config.homeserver, &config.username, &config.display_name); - - // sync client once so we get latest events to work on before we continue - client.sync_once(SyncSettings::default()).await?; - - warn!("Deleting old encryption devices"); - let current_device_id = client.device_id().expect("Failed to get device ID"); - let old_devices: Vec = client.devices().await?.devices.iter().filter(|d| d.device_id != current_device_id).map(|d| d.device_id.to_owned()).collect(); - - // Deleting these devices needs "user interaction" or something, so we just send password again - // and it works :D - if let Err(e) = client.delete_devices(&old_devices, None).await { - if let Some(info) = e.uiaa_response() { - let mut password = uiaa::Password::new( - uiaa::UserIdentifier::UserIdOrLocalpart(&config.username), - &config.password, - ); - password.session = info.session.as_deref(); - client - .delete_devices(&old_devices, Some(uiaa::AuthData::Password(password))) - .await?; - } - } - warn!("Finished deleting old encryption devices"); - warn!("Rejecting stale invites"); - for room in client.invited_rooms() { - let room_name = room.name().unwrap_or_default(); - if !room.is_space() && !room.is_direct() && config.room_ids.iter().any(|r| *r == room.room_id()) { - warn!("Got invite to room: '{}'", room_name); - room.accept_invitation().await.expect("Failed to accept invite"); - warn!("Joining room!"); - if let Err(e) = client.join_room_by_id(room.room_id()).await { - error!("Failed to join room with id: {} and error: {}", room.room_id(), e); - } - } else { - warn!("Rejecting invite to room: '{}'", room_name); - room.reject_invitation().await.unwrap_or_default(); - } - } - warn!("Finished rejecting stale invites"); - - // Add handler to log new room invites as they're recieved - client.add_event_handler(|ev: StrippedRoomMemberEvent, room: Room| async move { - if let Room::Invited(invited_room) = room { - warn!("Got invite to room: '{}' sent by '{}'", invited_room.name().unwrap_or_default(), ev.sender); - } - }); - - // Add handler to detect and create embeds for HTTP links in chat - client.add_event_handler(handle_message_events); - - async fn handle_message_events(ev: OriginalSyncRoomMessageEvent, room: Room, client: Client) { - // Using lazy static magic here, so this means the regex is compiled exactly once - // After initial compile it gets reused instead of recompiling on every message event - lazy_static! { - // shamelessly stolen and modified from some garbage blog online - // I have no fucking idea how this works - https://urlregex.com/ - static ref RE: Regex = Regex::new(r"(?:(?:https?)://)(?:\S+(?::\S*)?@|\d{1,3}(?:\.\d{1,3}){3}|(?:(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)(?:\.(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)*(?:\.[a-z\x{00a1}-\x{ffff}]{2,6}))(?::\d+)?(?:[^\s]*)?").unwrap(); - } - if let Room::Joined(room) = room { - let full_reply_event = ev.clone().into_full_event(room.room_id().to_owned()); - let MessageType::Text(text_content) = ev.content.msgtype else { - warn!("Ignoring message, content is not plaintext!"); - return; - }; - // If the sender ID matches our client, ignore message - // We don't want to reply to ourselves - let client_user_id = client.user_id().unwrap(); - if ev.sender == client_user_id { - return; - } - - let msg = text_content.body.to_lowercase(); - // Make a HTTP request and parse out the metadata info - if let Some(url) = RE.find(&msg) { - if url.as_str().contains("localhost") || url.as_str().contains("127.0.0.1") { - warn!("This is probably a malicious URL, ignoring!"); - return; - } - warn!("Got message with URL: '{}', requesting metadata!", url.as_str()); - if let Ok(req) = reqwest::get(url.as_str()).await { - if let Ok(resp) = req.text().await { - // beware dirty HTML parsing code - let (title, desc) = parse_metadata(&resp); - - // Build our message reply - let msg_reply = RoomMessageEventContent::text_plain( - format!("Title: {}\nDescription: {}", title, desc)) - .make_reply_to(&full_reply_event); - - // Finally send the reply to the room - warn!("Sending metadata for URL: '{}'", url.as_str()); - if room.send(msg_reply, None).await.is_err() { - warn!("Failed to send metadata reply for URL: '{}'", url.as_str()); - } - } else { - warn!("Failed to parse HTML response into text for URL: '{}'", url.as_str()); - } - } else { - warn!("Failed to get metadata for URL: '{}'", url.as_str()); - } - } else { - info!("Got message but found no URLs, ignoring"); - } - } - } - - fn parse_metadata(page: &String) -> (String, String) { - let doc_body = Html::parse_document(page); - - // Selectors used to get metadata are defined here - let title_selector = Selector::parse("title").unwrap(); - let description_selector = Selector::parse("meta[name=\"description\"]").unwrap(); - - // Grab the actual data - let title = doc_body.select(&title_selector).next(); - let desc = doc_body.select(&description_selector).next(); - // Clean up meta info and store it as a string - let mut meta_title = String::from("None"); - let mut meta_description = String::from("None"); - - if title.is_some() { - meta_title = title.unwrap().text().collect(); - } else { - warn!("Failed to parse title HTML"); - } - - if desc.is_some() { - meta_description = desc.unwrap().value().attr("content").unwrap().to_string(); - } else { - warn!("Failed to parse description HTML"); - } - - return (meta_title, meta_description); - - } - - // Now keep on syncing forever. `sync()` will use the latest sync token automatically. - warn!("Starting sync loop"); - client.sync(SyncSettings::default()).await?; - Ok(()) -} - -fn load_config() -> TomlConfig { - let config_file = std::fs::read_to_string("./config.toml").expect("Failed to read config file"); - let config: TomlConfig = toml::from_str(&config_file).expect("Failed to parse TOML config"); - return config; - + let config = Config::load("./config.toml"); + run(config).await }