refactor: moved all the core logic into lib.rs and embed related logic into embed.rs

This commit is contained in:
AxelSilverdew 2023-06-19 21:58:13 +05:30
parent cb7fd78610
commit bf4380c4f9
Signed by: AxelSilverdew
GPG key ID: F094C487E3EA1B79
3 changed files with 329 additions and 191 deletions

150
src/embeds.rs Normal file
View file

@ -0,0 +1,150 @@
//! # The Embed Module
//!
//! This module controls the embed functionality of frogbot.
use lazy_static::lazy_static;
use log::warn;
use matrix_sdk::{
room::Room,
ruma::events::room::message::{
MessageType, OriginalSyncRoomMessageEvent, RoomMessageEventContent,
},
Client,
};
use regex::Regex;
use scraper::{Html, Selector};
/// Represents an Embed in the chat
pub struct Embed {
/// The title of the embed
pub title: String,
/// The description
pub description: String,
}
impl Embed {
/// Creates a new [`Embed`].
pub fn new(title: String, description: String) -> Embed {
Embed { title, description }
}
}
/// Scrapes the HTML of a webpage and generates an [`Embed`] with the scraped information.
pub fn parse_metadata(page: &str) -> Embed {
let doc_body = Html::parse_document(page);
// Selectors used to get metadata are defined here
let title_selector = Selector::parse("title").unwrap();
let description_selector = Selector::parse("meta[name=\"description\"]").unwrap();
// Grab the actual data
let title = doc_body.select(&title_selector).next();
let desc = doc_body.select(&description_selector).next();
// Clean up meta info and store it as a string
let mut meta_title = String::from("None");
let mut meta_description = String::from("None");
if let Some(title) = title {
meta_title = title.text().collect();
} else {
warn!("Failed to parse title HTML");
}
if let Some(desc) = desc {
meta_description = desc.value().attr("content").unwrap().to_string();
} else {
warn!("Failed to parse description HTML");
}
Embed::new(meta_title, meta_description)
}
/// Check if the message has any urls in it and get them if it does
fn get_urls_from_message(message: &str) -> Vec<&str> {
// Using lazy static magic here, so this means the regex is compiled exactly once
// After initial compile it gets reused instead of recompiling on every message event
lazy_static! {
// shamelessly stolen and modified from some garbage blog online
// I have no fucking idea how this works - https://urlregex.com/
static ref RE: Regex = Regex::new(r"(?:(?:https?)://)(?:\S+(?::\S*)?@|\d{1,3}(?:\.\d{1,3}){3}|(?:(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)(?:\.(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)*(?:\.[a-z\x{00a1}-\x{ffff}]{2,6}))(?::\d+)?(?:[^\s]*)?").unwrap();
}
// This will hold all the urls in the message if any are found
let mut urls: Vec<&str> = vec![];
if RE.is_match(message) {
// If we find any urls, push them into the urls vec
for regex_match in RE.find_iter(message) {
// If the url points to localhost, we don't want to embed it, so we ignore it
if regex_match.as_str().contains("localhost")
|| regex_match.as_str().contains("127.0.0.1")
{
warn!("This is probably a malicious URL, ignoring!");
} else {
warn!("Found {}", &regex_match.as_str());
urls.push(regex_match.as_str());
}
}
} else {
// If we don't find any urls, do nothing
};
urls
}
/// Checks messages for valid links and generates embeds if found
pub async fn embed_handler(event: OriginalSyncRoomMessageEvent, room: Room, client: Client) {
if let Room::Joined(room) = room {
let full_reply_event = event.clone().into_full_event(room.room_id().to_owned());
let MessageType::Text(text_content) = event.content.msgtype else {
warn!("Ignoring message, content is not plaintext!");
return;
};
// If the sender ID matches our client, ignore the message
// We don't want to reply to ourselves
let client_user_id = client.user_id().unwrap();
if event.sender == client_user_id {
return;
}
let message = text_content.body.to_lowercase();
let urls = get_urls_from_message(&message);
let reqwest_client = reqwest::Client::builder().user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36").build().unwrap();
for url in urls {
if let Ok(req) = reqwest_client.get(url).send().await {
if let Ok(res) = req.text().await {
// beware, dirty HTML parsing code
let embed = parse_metadata(&res);
// Build our message reply
let bot_reply = RoomMessageEventContent::text_html(
&embed.title,
format!(
r#"
<blockquote>
<h6><a href="{}">{}</a></h6>
<h3><strong>{}</strong></h3>
<p>{}</p>
</blockquote>
"#,
&url, &url, &embed.title, &embed.description
),
)
.make_reply_to(&full_reply_event);
// Finally send the reply to the room
warn!("Sending embed for URL: '{}'", &url);
if room.send(bot_reply, None).await.is_err() {
warn!("Failed to send embed for URL: '{}'", &url);
}
} else {
warn!("Failed to parse HTML for URL: '{}'", &url);
}
} else {
warn!("Failed to get metadata for '{}'", &url);
}
}
};
}

176
src/lib.rs Normal file
View file

@ -0,0 +1,176 @@
//! A multi-purpose bot for Matrix
#![deny(missing_docs)]
pub mod embeds;
use log::{error, warn};
use matrix_sdk::{
config::SyncSettings,
room::Room,
ruma::{
api::client::uiaa, events::room::member::StrippedRoomMemberEvent, OwnedDeviceId,
OwnedRoomId,
},
Client, ClientBuildError,
};
use serde::{Deserialize, Serialize};
/// Represents the entries in the configuration file.
#[derive(Serialize, Deserialize, Debug)]
pub struct Config {
/// Your Homeserver URL (e.g. "matrix.yourdomain.com")
pub homeserver: String,
/// The Bot User's Username (e.g. "frogbot")
pub username: String,
/// The Display Name of the Bot (e.g. "Frogbot 🐸")
pub display_name: String,
/// The Password to the Bot User (e.g. "hunter2")
pub password: String,
/// A List of All the Rooms to Join (e.g. ["!myid:matrix.yourdomain.com"] )
pub room_ids: Vec<OwnedRoomId>,
}
impl Config {
/// Loads a config file for frogbot to use.
pub fn load(config_file: &str) -> Config {
let config_file =
std::fs::read_to_string(config_file).expect("Failed to read config file.");
toml::from_str(&config_file).expect("Failed to parse TOML config.")
}
/// Returns a new frogbot client using the [`Config`].
pub async fn create_client(&self) -> Result<Client, ClientBuildError> {
Client::builder()
.homeserver_url(&self.homeserver)
.handle_refresh_tokens()
.build()
.await
}
}
/// Deletes all old encryption devices.
///
/// We don't want to end up with a ton of encryption devices that aren't active.
/// This function removes all the old ones while preserving the current device.
///
/// # Panics
///
/// This function will panic if it cannot get a device ID from the current client.
pub async fn delete_old_encryption_devices(client: &Client, config: &Config) -> anyhow::Result<()> {
warn!("Deleting old encryption devices");
let current_device_id = client.device_id().expect("Failed to get device ID");
let old_devices: Vec<OwnedDeviceId> = client
.devices()
.await?
.devices
.iter()
.filter(|d| d.device_id != current_device_id)
.map(|d| d.device_id.to_owned())
.collect();
// Deleting these devices needs "user interaction" or something, so we just send password again
// and it works :D
if let Err(e) = client.delete_devices(&old_devices, None).await {
if let Some(info) = e.uiaa_response() {
let mut password = uiaa::Password::new(
uiaa::UserIdentifier::UserIdOrLocalpart(&config.username),
&config.password,
);
password.session = info.session.as_deref();
client
.delete_devices(&old_devices, Some(uiaa::AuthData::Password(password)))
.await?;
}
}
warn!("Finished deleting old encryption devices");
Ok(())
}
/// Rejects invites that aren't valid anymore or have timed out.
pub async fn reject_stale_invites(client: &Client, config: &Config) {
warn!("Rejecting stale invites");
for room in client.invited_rooms() {
let room_name = room.name().unwrap_or_default();
if !room.is_space()
&& !room.is_direct()
&& config.room_ids.iter().any(|r| *r == room.room_id())
{
warn!("Got invite to room: '{}'", room_name);
room.accept_invitation()
.await
.expect("Failed to accept invite");
warn!("Joining room!");
if let Err(e) = client.join_room_by_id(room.room_id()).await {
error!(
"Failed to join room with id: {} and error: {}",
room.room_id(),
e
);
}
} else {
warn!("Rejecting invite to room: '{}'", room_name);
room.reject_invitation().await.unwrap_or_default();
}
}
warn!("Finished rejecting stale invites");
}
/// Run frogbot
///
/// Starts the bot and starts listening for events
///
/// # Panics
///
/// This function will panic in the following scenarios:
/// - If it cannot create a client using the current [`Config`].
/// - If the bot can't log into it's account.
/// - If the initial event sync fails.
pub async fn run(config: Config) -> anyhow::Result<()> {
let client = &config
.create_client()
.await
.expect("There was a problem creating frogbot's client.");
// Attempt to log into the server
client
.login_username(&config.username, &config.password)
.initial_device_display_name(&config.display_name)
.send()
.await
.expect("frogbot couldn't log into it's account.");
warn!("Logged in successfully!");
warn!(
"server: '{}', username: '{}', display name: '{}'",
&config.homeserver, &config.username, &config.display_name
);
// sync client once so we get latest events to work on before we continue
client
.sync_once(SyncSettings::default())
.await
.expect("Failed the initial event sync.");
delete_old_encryption_devices(client, &config).await?;
reject_stale_invites(client, &config).await;
// Add handler to log new room invites as they're recieved
client.add_event_handler(|ev: StrippedRoomMemberEvent, room: Room| async move {
if let Room::Invited(invited_room) = room {
warn!(
"Got invite to room: '{}' sent by '{}'",
invited_room.name().unwrap_or_default(),
ev.sender
);
}
});
// Add handler to detect and create embeds for HTTP links in chat
client.add_event_handler(embeds::embed_handler);
// Now keep on syncing forever. `sync()` will use the latest sync token automatically.
warn!("Starting sync loop");
client.sync(SyncSettings::default()).await?;
Ok(())
}

View file

@ -1,197 +1,9 @@
use anyhow; use frogbot::{run, Config};
use toml;
use tokio;
use scraper::{Html, Selector};
use lazy_static::lazy_static;
use regex::Regex;
use log::*;
use serde::{Serialize, Deserialize};
use matrix_sdk::{
Client,
config::SyncSettings,
room::Room,
ruma::OwnedDeviceId,
ruma::OwnedRoomId,
ruma::api::client::uiaa,
ruma::events::room::member::StrippedRoomMemberEvent,
ruma::events::room::message::{MessageType, OriginalSyncRoomMessageEvent, RoomMessageEventContent},
};
#[derive(Serialize, Deserialize, Debug)]
struct TomlConfig {
homeserver: String,
username: String,
display_name: String,
password: String,
room_ids: Vec<OwnedRoomId>,
}
#[tokio::main] #[tokio::main]
async fn main() -> anyhow::Result<()> { async fn main() -> anyhow::Result<()> {
// init logging // init logging
tracing_subscriber::fmt::init(); tracing_subscriber::fmt::init();
let config = load_config(); let config = Config::load("./config.toml");
let client = Client::builder() run(config).await
.homeserver_url(&config.homeserver)
.handle_refresh_tokens()
.build()
.await?;
// try login
client
.login_username(&config.username, &config.password)
.initial_device_display_name(&config.display_name)
.send()
.await?;
warn!("Logged in successfully!");
warn!("server: '{}', username: '{}', display name: '{}'", &config.homeserver, &config.username, &config.display_name);
// sync client once so we get latest events to work on before we continue
client.sync_once(SyncSettings::default()).await?;
warn!("Deleting old encryption devices");
let current_device_id = client.device_id().expect("Failed to get device ID");
let old_devices: Vec<OwnedDeviceId> = client.devices().await?.devices.iter().filter(|d| d.device_id != current_device_id).map(|d| d.device_id.to_owned()).collect();
// Deleting these devices needs "user interaction" or something, so we just send password again
// and it works :D
if let Err(e) = client.delete_devices(&old_devices, None).await {
if let Some(info) = e.uiaa_response() {
let mut password = uiaa::Password::new(
uiaa::UserIdentifier::UserIdOrLocalpart(&config.username),
&config.password,
);
password.session = info.session.as_deref();
client
.delete_devices(&old_devices, Some(uiaa::AuthData::Password(password)))
.await?;
}
}
warn!("Finished deleting old encryption devices");
warn!("Rejecting stale invites");
for room in client.invited_rooms() {
let room_name = room.name().unwrap_or_default();
if !room.is_space() && !room.is_direct() && config.room_ids.iter().any(|r| *r == room.room_id()) {
warn!("Got invite to room: '{}'", room_name);
room.accept_invitation().await.expect("Failed to accept invite");
warn!("Joining room!");
if let Err(e) = client.join_room_by_id(room.room_id()).await {
error!("Failed to join room with id: {} and error: {}", room.room_id(), e);
}
} else {
warn!("Rejecting invite to room: '{}'", room_name);
room.reject_invitation().await.unwrap_or_default();
}
}
warn!("Finished rejecting stale invites");
// Add handler to log new room invites as they're recieved
client.add_event_handler(|ev: StrippedRoomMemberEvent, room: Room| async move {
if let Room::Invited(invited_room) = room {
warn!("Got invite to room: '{}' sent by '{}'", invited_room.name().unwrap_or_default(), ev.sender);
}
});
// Add handler to detect and create embeds for HTTP links in chat
client.add_event_handler(handle_message_events);
async fn handle_message_events(ev: OriginalSyncRoomMessageEvent, room: Room, client: Client) {
// Using lazy static magic here, so this means the regex is compiled exactly once
// After initial compile it gets reused instead of recompiling on every message event
lazy_static! {
// shamelessly stolen and modified from some garbage blog online
// I have no fucking idea how this works - https://urlregex.com/
static ref RE: Regex = Regex::new(r"(?:(?:https?)://)(?:\S+(?::\S*)?@|\d{1,3}(?:\.\d{1,3}){3}|(?:(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)(?:\.(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)*(?:\.[a-z\x{00a1}-\x{ffff}]{2,6}))(?::\d+)?(?:[^\s]*)?").unwrap();
}
if let Room::Joined(room) = room {
let full_reply_event = ev.clone().into_full_event(room.room_id().to_owned());
let MessageType::Text(text_content) = ev.content.msgtype else {
warn!("Ignoring message, content is not plaintext!");
return;
};
// If the sender ID matches our client, ignore message
// We don't want to reply to ourselves
let client_user_id = client.user_id().unwrap();
if ev.sender == client_user_id {
return;
}
let msg = text_content.body.to_lowercase();
// Make a HTTP request and parse out the metadata info
if let Some(url) = RE.find(&msg) {
if url.as_str().contains("localhost") || url.as_str().contains("127.0.0.1") {
warn!("This is probably a malicious URL, ignoring!");
return;
}
warn!("Got message with URL: '{}', requesting metadata!", url.as_str());
if let Ok(req) = reqwest::get(url.as_str()).await {
if let Ok(resp) = req.text().await {
// beware dirty HTML parsing code
let (title, desc) = parse_metadata(&resp);
// Build our message reply
let msg_reply = RoomMessageEventContent::text_plain(
format!("Title: {}\nDescription: {}", title, desc))
.make_reply_to(&full_reply_event);
// Finally send the reply to the room
warn!("Sending metadata for URL: '{}'", url.as_str());
if room.send(msg_reply, None).await.is_err() {
warn!("Failed to send metadata reply for URL: '{}'", url.as_str());
}
} else {
warn!("Failed to parse HTML response into text for URL: '{}'", url.as_str());
}
} else {
warn!("Failed to get metadata for URL: '{}'", url.as_str());
}
} else {
info!("Got message but found no URLs, ignoring");
}
}
}
fn parse_metadata(page: &String) -> (String, String) {
let doc_body = Html::parse_document(page);
// Selectors used to get metadata are defined here
let title_selector = Selector::parse("title").unwrap();
let description_selector = Selector::parse("meta[name=\"description\"]").unwrap();
// Grab the actual data
let title = doc_body.select(&title_selector).next();
let desc = doc_body.select(&description_selector).next();
// Clean up meta info and store it as a string
let mut meta_title = String::from("None");
let mut meta_description = String::from("None");
if title.is_some() {
meta_title = title.unwrap().text().collect();
} else {
warn!("Failed to parse title HTML");
}
if desc.is_some() {
meta_description = desc.unwrap().value().attr("content").unwrap().to_string();
} else {
warn!("Failed to parse description HTML");
}
return (meta_title, meta_description);
}
// Now keep on syncing forever. `sync()` will use the latest sync token automatically.
warn!("Starting sync loop");
client.sync(SyncSettings::default()).await?;
Ok(())
}
fn load_config() -> TomlConfig {
let config_file = std::fs::read_to_string("./config.toml").expect("Failed to read config file");
let config: TomlConfig = toml::from_str(&config_file).expect("Failed to parse TOML config");
return config;
} }