Initial version with link metadata embed support
This commit is contained in:
parent
c2fb6ccf5f
commit
373ff9ca20
2 changed files with 127 additions and 37 deletions
|
@ -15,3 +15,7 @@ env_logger = "0.10.0"
|
||||||
tokio = {version = "1.28.1", features = ["parking_lot", "rt-multi-thread", "macros"]}
|
tokio = {version = "1.28.1", features = ["parking_lot", "rt-multi-thread", "macros"]}
|
||||||
serde = {version = "1.0.163", features = ["derive"]}
|
serde = {version = "1.0.163", features = ["derive"]}
|
||||||
tracing-subscriber = "0.3.17"
|
tracing-subscriber = "0.3.17"
|
||||||
|
scraper = "0.16.0"
|
||||||
|
reqwest = "0.11.18"
|
||||||
|
regex = "1"
|
||||||
|
lazy_static = "1.4.0"
|
||||||
|
|
160
src/main.rs
160
src/main.rs
|
@ -1,6 +1,9 @@
|
||||||
use anyhow;
|
use anyhow;
|
||||||
use toml;
|
use toml;
|
||||||
use tokio;
|
use tokio;
|
||||||
|
use scraper::{Html, Selector};
|
||||||
|
use lazy_static::lazy_static;
|
||||||
|
use regex::Regex;
|
||||||
use log::*;
|
use log::*;
|
||||||
use serde::{Serialize, Deserialize};
|
use serde::{Serialize, Deserialize};
|
||||||
use matrix_sdk::{
|
use matrix_sdk::{
|
||||||
|
@ -9,8 +12,10 @@ use matrix_sdk::{
|
||||||
room::Room,
|
room::Room,
|
||||||
|
|
||||||
ruma::OwnedDeviceId,
|
ruma::OwnedDeviceId,
|
||||||
|
ruma::OwnedRoomId,
|
||||||
ruma::api::client::uiaa,
|
ruma::api::client::uiaa,
|
||||||
ruma::events::room::member::StrippedRoomMemberEvent,
|
ruma::events::room::member::StrippedRoomMemberEvent,
|
||||||
|
ruma::events::room::message::{MessageType, OriginalSyncRoomMessageEvent, RoomMessageEventContent},
|
||||||
};
|
};
|
||||||
|
|
||||||
#[derive(Serialize, Deserialize, Debug)]
|
#[derive(Serialize, Deserialize, Debug)]
|
||||||
|
@ -19,6 +24,7 @@ struct TomlConfig {
|
||||||
username: String,
|
username: String,
|
||||||
display_name: String,
|
display_name: String,
|
||||||
password: String,
|
password: String,
|
||||||
|
room_ids: Vec<OwnedRoomId>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
|
@ -38,14 +44,14 @@ async fn main() -> anyhow::Result<()> {
|
||||||
.initial_device_display_name(&config.display_name)
|
.initial_device_display_name(&config.display_name)
|
||||||
.send()
|
.send()
|
||||||
.await?;
|
.await?;
|
||||||
|
|
||||||
info!("Logged in successfully!");
|
warn!("Logged in successfully!");
|
||||||
info!("server: '{}', username: '{}', display name: '{}'", &config.homeserver, &config.username, &config.display_name);
|
warn!("server: '{}', username: '{}', display name: '{}'", &config.homeserver, &config.username, &config.display_name);
|
||||||
|
|
||||||
// sync client once so we get latest events to work on before we continue
|
// sync client once so we get latest events to work on before we continue
|
||||||
client.sync_once(SyncSettings::default()).await?;
|
client.sync_once(SyncSettings::default()).await?;
|
||||||
|
|
||||||
info!("Deleting old encryption devices");
|
warn!("Deleting old encryption devices");
|
||||||
let current_device_id = client.device_id().expect("Failed to get device ID");
|
let current_device_id = client.device_id().expect("Failed to get device ID");
|
||||||
let old_devices: Vec<OwnedDeviceId> = client.devices().await?.devices.iter().filter(|d| d.device_id != current_device_id).map(|d| d.device_id.to_owned()).collect();
|
let old_devices: Vec<OwnedDeviceId> = client.devices().await?.devices.iter().filter(|d| d.device_id != current_device_id).map(|d| d.device_id.to_owned()).collect();
|
||||||
|
|
||||||
|
@ -63,49 +69,129 @@ async fn main() -> anyhow::Result<()> {
|
||||||
.await?;
|
.await?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
info!("Finished deleting old encryption devices");
|
warn!("Finished deleting old encryption devices");
|
||||||
info!("Rejecting stale invites");
|
warn!("Rejecting stale invites");
|
||||||
for room in client.invited_rooms() {
|
for room in client.invited_rooms() {
|
||||||
room.reject_invitation().await.unwrap_or_default();
|
let room_name = room.name().unwrap_or_default();
|
||||||
}
|
if !room.is_space() && !room.is_direct() && config.room_ids.iter().any(|r| *r == room.room_id()) {
|
||||||
info!("Finished rejecting stale invites");
|
warn!("Got invite to room: '{}'", room_name);
|
||||||
|
room.accept_invitation().await.expect("Failed to accept invite");
|
||||||
// Add handler to deal with new room invites
|
warn!("Joining room!");
|
||||||
// TODO: Add code to filter rooms and only accept invites for rooms in config file
|
if let Err(e) = client.join_room_by_id(room.room_id()).await {
|
||||||
client.add_event_handler(|ev: StrippedRoomMemberEvent, room: Room, client: Client| async move {
|
error!("Failed to join room with id: {} and error: {}", room.room_id(), e);
|
||||||
info!("Processing room member event, room type: {:?}", room.room_type());
|
|
||||||
if let Room::Invited(invited_room) = room {
|
|
||||||
let room_name = ev.content.displayname.unwrap_or(String::from(""));
|
|
||||||
let is_dm = ev.content.is_direct.unwrap_or(true);
|
|
||||||
let is_bad_room = is_dm || invited_room.is_space() || room_name.is_empty();
|
|
||||||
info!("Got invite to room: '{}' sent by '{}'", room_name, ev.sender);
|
|
||||||
if is_bad_room {
|
|
||||||
info!("This room is probably a DM, ignoring!");
|
|
||||||
if let Err(e) = invited_room.reject_invitation().await {
|
|
||||||
warn!("Failed to reject invite with error: {}", e);
|
|
||||||
}
|
|
||||||
return ();
|
|
||||||
} else {
|
|
||||||
if let Err(e) = invited_room.accept_invitation().await {
|
|
||||||
warn!("Failed to accept room invite with error: {}", e);
|
|
||||||
}
|
|
||||||
info!("Joining room!");
|
|
||||||
if let Err(e) = client.join_room_by_id(invited_room.room_id()).await {
|
|
||||||
warn!("Failed to join room with id: {} and error: {}", invited_room.room_id(), e);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
warn!("Rejecting invite to room: '{}'", room_name);
|
||||||
|
room.reject_invitation().await.unwrap_or_default();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
warn!("Finished rejecting stale invites");
|
||||||
|
|
||||||
|
// Add handler to log new room invites as they're recieved
|
||||||
|
client.add_event_handler(|ev: StrippedRoomMemberEvent, room: Room| async move {
|
||||||
|
if let Room::Invited(invited_room) = room {
|
||||||
|
warn!("Got invite to room: '{}' sent by '{}'", invited_room.name().unwrap_or_default(), ev.sender);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Add handler to detect and create embeds for HTTP links in chat
|
||||||
|
client.add_event_handler(handle_message_events);
|
||||||
|
|
||||||
|
async fn handle_message_events(ev: OriginalSyncRoomMessageEvent, room: Room, client: Client) {
|
||||||
|
// Using lazy static magic here, so this means the regex is compiled exactly once
|
||||||
|
// After initial compile it gets reused instead of recompiling on every message event
|
||||||
|
lazy_static! {
|
||||||
|
// shamelessly stolen and modified from some garbage blog online
|
||||||
|
// I have no fucking idea how this works - https://urlregex.com/
|
||||||
|
static ref RE: Regex = Regex::new(r"(?:(?:https?)://)(?:\S+(?::\S*)?@|\d{1,3}(?:\.\d{1,3}){3}|(?:(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)(?:\.(?:[a-z\d\x{00a1}-\x{ffff}]+-?)*[a-z\d\x{00a1}-\x{ffff}]+)*(?:\.[a-z\x{00a1}-\x{ffff}]{2,6}))(?::\d+)?(?:[^\s]*)?").unwrap();
|
||||||
|
}
|
||||||
|
if let Room::Joined(room) = room {
|
||||||
|
let full_reply_event = ev.clone().into_full_event(room.room_id().to_owned());
|
||||||
|
let MessageType::Text(text_content) = ev.content.msgtype else {
|
||||||
|
warn!("Ignoring message, content is not plaintext!");
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
// If the sender ID matches our client, ignore message
|
||||||
|
// We don't want to reply to ourselves
|
||||||
|
let client_user_id = client.user_id().unwrap();
|
||||||
|
if ev.sender == client_user_id {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let msg = text_content.body.to_lowercase();
|
||||||
|
// Make a HTTP request and parse out the metadata info
|
||||||
|
if let Some(url) = RE.find(&msg) {
|
||||||
|
if url.as_str().contains("localhost") || url.as_str().contains("127.0.0.1") {
|
||||||
|
warn!("This is probably a malicious URL, ignoring!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
warn!("Got message with URL: '{}', requesting metadata!", url.as_str());
|
||||||
|
if let Ok(req) = reqwest::get(url.as_str()).await {
|
||||||
|
if let Ok(resp) = req.text().await {
|
||||||
|
// beware dirty HTML parsing code
|
||||||
|
let (title, desc) = parse_metadata(&resp);
|
||||||
|
|
||||||
|
// Build our message reply
|
||||||
|
let msg_reply = RoomMessageEventContent::text_plain(
|
||||||
|
format!("Title: {}\nDescription: {}", title, desc))
|
||||||
|
.make_reply_to(&full_reply_event);
|
||||||
|
|
||||||
|
// Finally send the reply to the room
|
||||||
|
warn!("Sending metadata for URL: '{}'", url.as_str());
|
||||||
|
if room.send(msg_reply, None).await.is_err() {
|
||||||
|
warn!("Failed to send metadata reply for URL: '{}'", url.as_str());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
warn!("Failed to parse HTML response into text for URL: '{}'", url.as_str());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
warn!("Failed to get metadata for URL: '{}'", url.as_str());
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
info!("Got message but found no URLs, ignoring");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_metadata(page: &String) -> (String, String) {
|
||||||
|
let doc_body = Html::parse_document(page);
|
||||||
|
|
||||||
|
// Selectors used to get metadata are defined here
|
||||||
|
let title_selector = Selector::parse("title").unwrap();
|
||||||
|
let description_selector = Selector::parse("meta[name=\"description\"]").unwrap();
|
||||||
|
|
||||||
|
// Grab the actual data
|
||||||
|
let title = doc_body.select(&title_selector).next();
|
||||||
|
let desc = doc_body.select(&description_selector).next();
|
||||||
|
// Clean up meta info and store it as a string
|
||||||
|
let mut meta_title = String::from("None");
|
||||||
|
let mut meta_description = String::from("None");
|
||||||
|
|
||||||
|
if title.is_some() {
|
||||||
|
meta_title = title.unwrap().text().collect();
|
||||||
|
} else {
|
||||||
|
warn!("Failed to parse title HTML");
|
||||||
|
}
|
||||||
|
|
||||||
|
if desc.is_some() {
|
||||||
|
meta_description = desc.unwrap().value().attr("content").unwrap().to_string();
|
||||||
|
} else {
|
||||||
|
warn!("Failed to parse description HTML");
|
||||||
|
}
|
||||||
|
|
||||||
|
return (meta_title, meta_description);
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
// Now keep on syncing forever. `sync()` will use the latest sync token automatically.
|
// Now keep on syncing forever. `sync()` will use the latest sync token automatically.
|
||||||
info!("Starting sync loop");
|
warn!("Starting sync loop");
|
||||||
client.sync(SyncSettings::default()).await?;
|
client.sync(SyncSettings::default()).await?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn load_config() -> TomlConfig {
|
fn load_config() -> TomlConfig {
|
||||||
// fuck error handling, it's too early in the program execution for that shit
|
let config_file = std::fs::read_to_string("./config.toml").expect("Failed to read config file");
|
||||||
let config: TomlConfig = toml::from_str(&std::fs::read_to_string("./config.toml").unwrap()).unwrap();
|
let config: TomlConfig = toml::from_str(&config_file).expect("Failed to parse TOML config");
|
||||||
return config; // see, so clean!
|
return config;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue