From bf4380c4f90e7900754ebaa2b5661a46caacedf6 Mon Sep 17 00:00:00 2001
From: AxelSilverdew <4kuchibh@gmail.com>
Date: Mon, 19 Jun 2023 21:58:13 +0530
Subject: [PATCH] refactor: moved all the core logic into lib.rs and embed
related logic into embed.rs
---
src/embeds.rs | 150 ++++++++++++++++++++++++++++++++++++++
src/lib.rs | 176 +++++++++++++++++++++++++++++++++++++++++++++
src/main.rs | 194 +-------------------------------------------------
3 files changed, 329 insertions(+), 191 deletions(-)
create mode 100644 src/embeds.rs
create mode 100644 src/lib.rs
diff --git a/src/embeds.rs b/src/embeds.rs
new file mode 100644
index 0000000..9336afa
--- /dev/null
+++ b/src/embeds.rs
@@ -0,0 +1,150 @@
+//! # The Embed Module
+//!
+//! This module controls the embed functionality of frogbot.
+
+use lazy_static::lazy_static;
+use log::warn;
+use matrix_sdk::{
+ room::Room,
+ ruma::events::room::message::{
+ MessageType, OriginalSyncRoomMessageEvent, RoomMessageEventContent,
+ },
+ Client,
+};
+use regex::Regex;
+use scraper::{Html, Selector};
+
+/// Represents an Embed in the chat
+pub struct Embed {
+ /// The title of the embed
+ pub title: String,
+ /// The description
+ pub description: String,
+}
+
+impl Embed {
+ /// Creates a new [`Embed`].
+ pub fn new(title: String, description: String) -> Embed {
+ Embed { title, description }
+ }
+}
+
+/// Scrapes the HTML of a webpage and generates an [`Embed`] with the scraped information.
+pub fn parse_metadata(page: &str) -> Embed {
+ let doc_body = Html::parse_document(page);
+
+ // Selectors used to get metadata are defined here
+ let title_selector = Selector::parse("title").unwrap();
+ let description_selector = Selector::parse("meta[name=\"description\"]").unwrap();
+
+ // Grab the actual data
+ let title = doc_body.select(&title_selector).next();
+ let desc = doc_body.select(&description_selector).next();
+ // Clean up meta info and store it as a string
+ let mut meta_title = String::from("None");
+ let mut meta_description = String::from("None");
+
+ if let Some(title) = title {
+ meta_title = title.text().collect();
+ } else {
+ warn!("Failed to parse title HTML");
+ }
+
+ if let Some(desc) = desc {
+ meta_description = desc.value().attr("content").unwrap().to_string();
+ } else {
+ warn!("Failed to parse description HTML");
+ }
+
+ Embed::new(meta_title, meta_description)
+}
+
+/// Check if the message has any urls in it and get them if it does
+fn get_urls_from_message(message: &str) -> Vec<&str> {
+ // Using lazy static magic here, so this means the regex is compiled exactly once
+ // After initial compile it gets reused instead of recompiling on every message event
+ lazy_static! {
+ // shamelessly stolen and modified from some garbage blog online
+ // I have no fucking idea how this works - https://urlregex.com/
+ static ref RE: Regex = Regex::new(r"(?:(?:https?)://)(?:\S+(?::\S*)?@|\d{1,3}(?:\.\d{1,3}){3}|(?:(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)(?:\.(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)*(?:\.[a-z\x{00a1}-\x{ffff}]{2,6}))(?::\d+)?(?:[^\s]*)?").unwrap();
+ }
+
+ // This will hold all the urls in the message if any are found
+ let mut urls: Vec<&str> = vec![];
+
+ if RE.is_match(message) {
+ // If we find any urls, push them into the urls vec
+ for regex_match in RE.find_iter(message) {
+ // If the url points to localhost, we don't want to embed it, so we ignore it
+ if regex_match.as_str().contains("localhost")
+ || regex_match.as_str().contains("127.0.0.1")
+ {
+ warn!("This is probably a malicious URL, ignoring!");
+ } else {
+ warn!("Found {}", ®ex_match.as_str());
+ urls.push(regex_match.as_str());
+ }
+ }
+ } else {
+ // If we don't find any urls, do nothing
+ };
+ urls
+}
+
+/// Checks messages for valid links and generates embeds if found
+pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, client: Client) {
+ if let Room::Joined(room) = room {
+ let full_reply_event = event.clone().into_full_event(room.room_id().to_owned());
+ let MessageType::Text(text_content) = event.content.msgtype else {
+ warn!("Ignoring message, content is not plaintext!");
+ return;
+ };
+
+ // If the sender ID matches our client, ignore the message
+ // We don't want to reply to ourselves
+ let client_user_id = client.user_id().unwrap();
+ if event.sender == client_user_id {
+ return;
+ }
+
+ let message = text_content.body.to_lowercase();
+ let urls = get_urls_from_message(&message);
+
+ let reqwest_client = reqwest::Client::builder().user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36").build().unwrap();
+
+ for url in urls {
+ if let Ok(req) = reqwest_client.get(url).send().await {
+ if let Ok(res) = req.text().await {
+ // beware, dirty HTML parsing code
+ let embed = parse_metadata(&res);
+
+ // Build our message reply
+ let bot_reply = RoomMessageEventContent::text_html(
+ &embed.title,
+ format!(
+ r#"
+
+
+ {}
+ {}
+
+ "#,
+ &url, &url, &embed.title, &embed.description
+ ),
+ )
+ .make_reply_to(&full_reply_event);
+
+ // Finally send the reply to the room
+ warn!("Sending embed for URL: '{}'", &url);
+ if room.send(bot_reply, None).await.is_err() {
+ warn!("Failed to send embed for URL: '{}'", &url);
+ }
+ } else {
+ warn!("Failed to parse HTML for URL: '{}'", &url);
+ }
+ } else {
+ warn!("Failed to get metadata for '{}'", &url);
+ }
+ }
+ };
+}
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..eb52e76
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,176 @@
+//! A multi-purpose bot for Matrix
+#![deny(missing_docs)]
+pub mod embeds;
+
+use log::{error, warn};
+use matrix_sdk::{
+ config::SyncSettings,
+ room::Room,
+ ruma::{
+ api::client::uiaa, events::room::member::StrippedRoomMemberEvent, OwnedDeviceId,
+ OwnedRoomId,
+ },
+ Client, ClientBuildError,
+};
+use serde::{Deserialize, Serialize};
+
+/// Represents the entries in the configuration file.
+#[derive(Serialize, Deserialize, Debug)]
+pub struct Config {
+ /// Your Homeserver URL (e.g. "matrix.yourdomain.com")
+ pub homeserver: String,
+ /// The Bot User's Username (e.g. "frogbot")
+ pub username: String,
+ /// The Display Name of the Bot (e.g. "Frogbot 🐸")
+ pub display_name: String,
+ /// The Password to the Bot User (e.g. "hunter2")
+ pub password: String,
+ /// A List of All the Rooms to Join (e.g. ["!myid:matrix.yourdomain.com"] )
+ pub room_ids: Vec,
+}
+
+impl Config {
+ /// Loads a config file for frogbot to use.
+ pub fn load(config_file: &str) -> Config {
+ let config_file =
+ std::fs::read_to_string(config_file).expect("Failed to read config file.");
+ toml::from_str(&config_file).expect("Failed to parse TOML config.")
+ }
+
+ /// Returns a new frogbot client using the [`Config`].
+ pub async fn create_client(&self) -> Result {
+ Client::builder()
+ .homeserver_url(&self.homeserver)
+ .handle_refresh_tokens()
+ .build()
+ .await
+ }
+}
+
+/// Deletes all old encryption devices.
+///
+/// We don't want to end up with a ton of encryption devices that aren't active.
+/// This function removes all the old ones while preserving the current device.
+///
+/// # Panics
+///
+/// This function will panic if it cannot get a device ID from the current client.
+pub async fn delete_old_encryption_devices(client: &Client, config: &Config) -> anyhow::Result<()> {
+ warn!("Deleting old encryption devices");
+ let current_device_id = client.device_id().expect("Failed to get device ID");
+ let old_devices: Vec = client
+ .devices()
+ .await?
+ .devices
+ .iter()
+ .filter(|d| d.device_id != current_device_id)
+ .map(|d| d.device_id.to_owned())
+ .collect();
+
+ // Deleting these devices needs "user interaction" or something, so we just send password again
+ // and it works :D
+ if let Err(e) = client.delete_devices(&old_devices, None).await {
+ if let Some(info) = e.uiaa_response() {
+ let mut password = uiaa::Password::new(
+ uiaa::UserIdentifier::UserIdOrLocalpart(&config.username),
+ &config.password,
+ );
+ password.session = info.session.as_deref();
+ client
+ .delete_devices(&old_devices, Some(uiaa::AuthData::Password(password)))
+ .await?;
+ }
+ }
+ warn!("Finished deleting old encryption devices");
+ Ok(())
+}
+
+/// Rejects invites that aren't valid anymore or have timed out.
+pub async fn reject_stale_invites(client: &Client, config: &Config) {
+ warn!("Rejecting stale invites");
+ for room in client.invited_rooms() {
+ let room_name = room.name().unwrap_or_default();
+ if !room.is_space()
+ && !room.is_direct()
+ && config.room_ids.iter().any(|r| *r == room.room_id())
+ {
+ warn!("Got invite to room: '{}'", room_name);
+ room.accept_invitation()
+ .await
+ .expect("Failed to accept invite");
+ warn!("Joining room!");
+ if let Err(e) = client.join_room_by_id(room.room_id()).await {
+ error!(
+ "Failed to join room with id: {} and error: {}",
+ room.room_id(),
+ e
+ );
+ }
+ } else {
+ warn!("Rejecting invite to room: '{}'", room_name);
+ room.reject_invitation().await.unwrap_or_default();
+ }
+ }
+ warn!("Finished rejecting stale invites");
+}
+
+/// Run frogbot
+///
+/// Starts the bot and starts listening for events
+///
+/// # Panics
+///
+/// This function will panic in the following scenarios:
+/// - If it cannot create a client using the current [`Config`].
+/// - If the bot can't log into it's account.
+/// - If the initial event sync fails.
+pub async fn run(config: Config) -> anyhow::Result<()> {
+ let client = &config
+ .create_client()
+ .await
+ .expect("There was a problem creating frogbot's client.");
+
+ // Attempt to log into the server
+ client
+ .login_username(&config.username, &config.password)
+ .initial_device_display_name(&config.display_name)
+ .send()
+ .await
+ .expect("frogbot couldn't log into it's account.");
+
+ warn!("Logged in successfully!");
+ warn!(
+ "server: '{}', username: '{}', display name: '{}'",
+ &config.homeserver, &config.username, &config.display_name
+ );
+
+ // sync client once so we get latest events to work on before we continue
+ client
+ .sync_once(SyncSettings::default())
+ .await
+ .expect("Failed the initial event sync.");
+
+ delete_old_encryption_devices(client, &config).await?;
+
+ reject_stale_invites(client, &config).await;
+
+ // Add handler to log new room invites as they're recieved
+ client.add_event_handler(|ev: StrippedRoomMemberEvent, room: Room| async move {
+ if let Room::Invited(invited_room) = room {
+ warn!(
+ "Got invite to room: '{}' sent by '{}'",
+ invited_room.name().unwrap_or_default(),
+ ev.sender
+ );
+ }
+ });
+
+ // Add handler to detect and create embeds for HTTP links in chat
+ client.add_event_handler(embeds::embed_handler);
+
+ // Now keep on syncing forever. `sync()` will use the latest sync token automatically.
+ warn!("Starting sync loop");
+ client.sync(SyncSettings::default()).await?;
+
+ Ok(())
+}
diff --git a/src/main.rs b/src/main.rs
index ef33c43..85ce006 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,197 +1,9 @@
-use anyhow;
-use toml;
-use tokio;
-use scraper::{Html, Selector};
-use lazy_static::lazy_static;
-use regex::Regex;
-use log::*;
-use serde::{Serialize, Deserialize};
-use matrix_sdk::{
- Client,
- config::SyncSettings,
- room::Room,
-
- ruma::OwnedDeviceId,
- ruma::OwnedRoomId,
- ruma::api::client::uiaa,
- ruma::events::room::member::StrippedRoomMemberEvent,
- ruma::events::room::message::{MessageType, OriginalSyncRoomMessageEvent, RoomMessageEventContent},
-};
-
-#[derive(Serialize, Deserialize, Debug)]
-struct TomlConfig {
- homeserver: String,
- username: String,
- display_name: String,
- password: String,
- room_ids: Vec,
-}
+use frogbot::{run, Config};
#[tokio::main]
async fn main() -> anyhow::Result<()> {
// init logging
tracing_subscriber::fmt::init();
- let config = load_config();
- let client = Client::builder()
- .homeserver_url(&config.homeserver)
- .handle_refresh_tokens()
- .build()
- .await?;
-
- // try login
- client
- .login_username(&config.username, &config.password)
- .initial_device_display_name(&config.display_name)
- .send()
- .await?;
-
- warn!("Logged in successfully!");
- warn!("server: '{}', username: '{}', display name: '{}'", &config.homeserver, &config.username, &config.display_name);
-
- // sync client once so we get latest events to work on before we continue
- client.sync_once(SyncSettings::default()).await?;
-
- warn!("Deleting old encryption devices");
- let current_device_id = client.device_id().expect("Failed to get device ID");
- let old_devices: Vec = client.devices().await?.devices.iter().filter(|d| d.device_id != current_device_id).map(|d| d.device_id.to_owned()).collect();
-
- // Deleting these devices needs "user interaction" or something, so we just send password again
- // and it works :D
- if let Err(e) = client.delete_devices(&old_devices, None).await {
- if let Some(info) = e.uiaa_response() {
- let mut password = uiaa::Password::new(
- uiaa::UserIdentifier::UserIdOrLocalpart(&config.username),
- &config.password,
- );
- password.session = info.session.as_deref();
- client
- .delete_devices(&old_devices, Some(uiaa::AuthData::Password(password)))
- .await?;
- }
- }
- warn!("Finished deleting old encryption devices");
- warn!("Rejecting stale invites");
- for room in client.invited_rooms() {
- let room_name = room.name().unwrap_or_default();
- if !room.is_space() && !room.is_direct() && config.room_ids.iter().any(|r| *r == room.room_id()) {
- warn!("Got invite to room: '{}'", room_name);
- room.accept_invitation().await.expect("Failed to accept invite");
- warn!("Joining room!");
- if let Err(e) = client.join_room_by_id(room.room_id()).await {
- error!("Failed to join room with id: {} and error: {}", room.room_id(), e);
- }
- } else {
- warn!("Rejecting invite to room: '{}'", room_name);
- room.reject_invitation().await.unwrap_or_default();
- }
- }
- warn!("Finished rejecting stale invites");
-
- // Add handler to log new room invites as they're recieved
- client.add_event_handler(|ev: StrippedRoomMemberEvent, room: Room| async move {
- if let Room::Invited(invited_room) = room {
- warn!("Got invite to room: '{}' sent by '{}'", invited_room.name().unwrap_or_default(), ev.sender);
- }
- });
-
- // Add handler to detect and create embeds for HTTP links in chat
- client.add_event_handler(handle_message_events);
-
- async fn handle_message_events(ev: OriginalSyncRoomMessageEvent, room: Room, client: Client) {
- // Using lazy static magic here, so this means the regex is compiled exactly once
- // After initial compile it gets reused instead of recompiling on every message event
- lazy_static! {
- // shamelessly stolen and modified from some garbage blog online
- // I have no fucking idea how this works - https://urlregex.com/
- static ref RE: Regex = Regex::new(r"(?:(?:https?)://)(?:\S+(?::\S*)?@|\d{1,3}(?:\.\d{1,3}){3}|(?:(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)(?:\.(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)*(?:\.[a-z\x{00a1}-\x{ffff}]{2,6}))(?::\d+)?(?:[^\s]*)?").unwrap();
- }
- if let Room::Joined(room) = room {
- let full_reply_event = ev.clone().into_full_event(room.room_id().to_owned());
- let MessageType::Text(text_content) = ev.content.msgtype else {
- warn!("Ignoring message, content is not plaintext!");
- return;
- };
- // If the sender ID matches our client, ignore message
- // We don't want to reply to ourselves
- let client_user_id = client.user_id().unwrap();
- if ev.sender == client_user_id {
- return;
- }
-
- let msg = text_content.body.to_lowercase();
- // Make a HTTP request and parse out the metadata info
- if let Some(url) = RE.find(&msg) {
- if url.as_str().contains("localhost") || url.as_str().contains("127.0.0.1") {
- warn!("This is probably a malicious URL, ignoring!");
- return;
- }
- warn!("Got message with URL: '{}', requesting metadata!", url.as_str());
- if let Ok(req) = reqwest::get(url.as_str()).await {
- if let Ok(resp) = req.text().await {
- // beware dirty HTML parsing code
- let (title, desc) = parse_metadata(&resp);
-
- // Build our message reply
- let msg_reply = RoomMessageEventContent::text_plain(
- format!("Title: {}\nDescription: {}", title, desc))
- .make_reply_to(&full_reply_event);
-
- // Finally send the reply to the room
- warn!("Sending metadata for URL: '{}'", url.as_str());
- if room.send(msg_reply, None).await.is_err() {
- warn!("Failed to send metadata reply for URL: '{}'", url.as_str());
- }
- } else {
- warn!("Failed to parse HTML response into text for URL: '{}'", url.as_str());
- }
- } else {
- warn!("Failed to get metadata for URL: '{}'", url.as_str());
- }
- } else {
- info!("Got message but found no URLs, ignoring");
- }
- }
- }
-
- fn parse_metadata(page: &String) -> (String, String) {
- let doc_body = Html::parse_document(page);
-
- // Selectors used to get metadata are defined here
- let title_selector = Selector::parse("title").unwrap();
- let description_selector = Selector::parse("meta[name=\"description\"]").unwrap();
-
- // Grab the actual data
- let title = doc_body.select(&title_selector).next();
- let desc = doc_body.select(&description_selector).next();
- // Clean up meta info and store it as a string
- let mut meta_title = String::from("None");
- let mut meta_description = String::from("None");
-
- if title.is_some() {
- meta_title = title.unwrap().text().collect();
- } else {
- warn!("Failed to parse title HTML");
- }
-
- if desc.is_some() {
- meta_description = desc.unwrap().value().attr("content").unwrap().to_string();
- } else {
- warn!("Failed to parse description HTML");
- }
-
- return (meta_title, meta_description);
-
- }
-
- // Now keep on syncing forever. `sync()` will use the latest sync token automatically.
- warn!("Starting sync loop");
- client.sync(SyncSettings::default()).await?;
- Ok(())
-}
-
-fn load_config() -> TomlConfig {
- let config_file = std::fs::read_to_string("./config.toml").expect("Failed to read config file");
- let config: TomlConfig = toml::from_str(&config_file).expect("Failed to parse TOML config");
- return config;
-
+ let config = Config::load("./config.toml");
+ run(config).await
}