feat: add basic chatbot #14

Merged
koalp merged 3 commits from feature/minimal-chatbot into development 2021-04-29 02:24:20 +02:00
16 changed files with 1361 additions and 41 deletions
Showing only changes of commit a16dbbc790 - Show all commits

View File

@ -17,6 +17,3 @@ labels:
**Expected behavior** **Expected behavior**
*describe what you expected to happen* *describe what you expected to happen*
**Configuration**
*paste the result of `stage --version`

View File

@ -3,13 +3,12 @@ name: "Feature request"
about: "This template is for requesting a new feature" about: "This template is for requesting a new feature"
title: "" title: ""
labels: labels:
- "type::feature" - "type::enhancement"
- "status::review_needed" - "status::review_needed"
--- ---
*(if applicable) describe what problem or frustration you have currently* *(if applicable) describe what problem or frustration you have currently*
*describe what you would like to be able to do, or what solution you would like (you can propose several)* *describe what you would like to be able to do, or what solution you would like*
*(optional) additional context, comments or implementation propositions* *(optional) additional context, comments or implementation propositions*

1079
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,7 @@
members = [ members = [
"crieur-retrieve", "crieur-retrieve",
"crieur-chatbot",
] ]
@ -17,6 +18,7 @@ publish = false
[dependencies] [dependencies]
anyhow = "1.0.40" anyhow = "1.0.40"
crieur-retrieve = {version = "0.1", path="crieur-retrieve"} crieur-retrieve = {version = "0.1", path="crieur-retrieve"}
crieur-chatbot = {version = "0.1", path="crieur-chatbot"}
dotenv = "0.15.0" dotenv = "0.15.0"
env_logger = "0.8.3" env_logger = "0.8.3"
log = "0.4.14" log = "0.4.14"

View File

@ -7,7 +7,7 @@ Tools to retrieve articles from multiple newspaper you subscribed to.
First retrieve login cookies for websites and put it in a `.env` First retrieve login cookies for websites and put it in a `.env`
``` ```
cargo run --example=retrive_html_articles cargo run --example=cli_downloader
``` ```
# Documentation # Documentation

19
crieur-chatbot/Cargo.toml Normal file
View File

@ -0,0 +1,19 @@
[package]
name = "crieur-chatbot"
version = "0.1.0"
authors = ["koalp <koalp@alpaga.dev>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0.40"
dotenv = "0.15.0"
crieur-retrieve = {version = "0.1.0", path = "../crieur-retrieve"}
mime = "0.3.16"
log = "0.4.14"
[dependencies.matrix-sdk]
git = "https://github.com/matrix-org/matrix-rust-sdk"
rev = "ab180362c931606385dd53b73620d82ef2c3166d"
version = "0.2.0"

View File

@ -0,0 +1,85 @@
//! Chatbot
use std::convert::TryInto;
use anyhow::Result;
use matrix_sdk::{
self, async_trait,
events::{
room::message::{MessageEventContent, MessageType, TextMessageEventContent},
AnyMessageEventContent, SyncMessageEvent,
},
room::Room,
Client, ClientConfig, EventHandler, SyncSettings,
};
use crate::Html;
#[derive(Debug, Clone, Default)]
pub(crate) struct Builder {
user: String,
password: String,
homeserver: String,
//TODO: rooms
room: String,
}
impl Builder {
fn new() -> Self {
Default::default()
}
pub(crate) async fn connect(&self) -> Result<Chatbot> {
let client = Client::new(self.homeserver.as_str())?;
client
.login(self.user.as_str(), self.password.as_str(), None, None)
.await?;
assert!(client.logged_in().await);
client
.join_room_by_id(&self.room.as_str().try_into()?)
.await?;
Ok(Chatbot { client })
}
pub(crate) fn login(
&mut self,
user: &impl AsRef<str>,
password: &impl AsRef<str>,
) -> &mut Self {
self.user = String::from(user.as_ref());
self.password = String::from(password.as_ref());
self
}
pub(crate) fn homeserver(&mut self, homeserver: &impl AsRef<str>) -> &mut Self {
self.homeserver = String::from(homeserver.as_ref());
self
}
pub(crate) fn room(&mut self, room: &impl AsRef<str>) -> &mut Self {
self.room = String::from(room.as_ref());
self
}
}
#[derive(Debug, Clone)]
pub(crate) struct Chatbot {
client: Client,
}
impl Chatbot {
pub(crate) fn builder() -> Builder {
Builder::new()
}
pub(crate) async fn run(&self) -> Result<()> {
self.client.set_event_handler(Box::new(Html::new())).await;
let mut settings = SyncSettings::default();
if let Some(token) = self.client.sync_token().await {
settings = settings.token(token);
}
self.client.sync(settings).await;
Ok(())
}
}

32
crieur-chatbot/src/cli.rs Normal file
View File

@ -0,0 +1,32 @@
use std::env;
use anyhow::{bail, Result};
use dotenv::dotenv;
use crate::Chatbot;
/// Runs the chatbot
pub async fn run() -> Result<()> {
dotenv().ok();
let (user, password, homeserver, room) = match (
env::var("CRIEUR_MATRIX_USER"),
env::var("CRIEUR_MATRIX_PASSWORD"),
env::var("CRIEUR_MATRIX_HOMESERVER"),
env::var("CRIEUR_MATRIX_ROOM"),
) {
(Ok(user), Ok(password), Ok(homeserver), Ok(room)) => (user, password, homeserver, room),
_ => bail!("Configuration incomplete, please set all required environment variables"),
};
let chatbot = Chatbot::builder()
.login(&user, &password)
.homeserver(&homeserver)
.room(&room)
.connect()
.await?;
chatbot.run().await?;
Ok(())
}

View File

@ -0,0 +1,94 @@
use std::convert::TryInto;
use std::env;
use anyhow::Result;
use log::info;
use matrix_sdk::{
self, async_trait,
events::{
room::message::{MessageEventContent, MessageType, TextMessageEventContent},
AnyMessageEventContent, SyncMessageEvent,
},
room::Room,
Client, ClientConfig, EventHandler, SyncSettings,
};
use crieur_retrieve::{newspaper::Newspaper, ArticleLocation, Mediapart, Url};
pub(crate) struct Html {}
impl Html {
pub fn new() -> Self {
Self {}
}
}
async fn send_article<U, E>(url: U, room: matrix_sdk::room::Joined)
where
U: TryInto<Url, Error = E> + Send,
E: std::error::Error + Sync + Send + 'static,
{
//TODO: replace by async block when async block is stable
async fn article_html<U, E>(url: U) -> Result<String>
where
U: TryInto<Url, Error = E> + Send,
E: std::error::Error + Sync + Send + 'static,
{
let article_str = ArticleLocation::builder()
.url(url)?
.build()?
.retrieve_html()
.await?;
Ok(article_str)
}
let text_message =
|message| AnyMessageEventContent::RoomMessage(MessageEventContent::text_plain(message));
//TODO: replace occurences ok() by async and logging block when async block is stable
let article_html = match article_html(url).await {
Ok(url) => url,
Err(_) => {
room.send(text_message("Can't download the file"), None)
.await
.ok();
return;
}
};
room.send_attachment(
"test.html",
&mime::TEXT_HTML_UTF_8,
&mut article_html.as_bytes(),
None,
)
.await
.ok();
}
#[async_trait]
impl EventHandler for Html {
async fn on_room_message(&self, room: Room, event: &SyncMessageEvent<MessageEventContent>) {
if let Room::Joined(room) = room {
let msg_body = if let SyncMessageEvent {
content:
MessageEventContent {
msgtype: MessageType::Text(TextMessageEventContent { body: msg_body, .. }),
..
},
..
} = event
{
msg_body
} else {
return;
};
info!("sending file");
match msg_body.split(' ').collect::<Vec<_>>().as_slice() {
["!html", url, ..] => send_article(*url, room).await,
_ => return,
}
}
}
}

View File

@ -0,0 +1,2 @@
mod html;
pub(crate) use html::Html;

10
crieur-chatbot/src/lib.rs Normal file
View File

@ -0,0 +1,10 @@
//! Provides a matrix chatbot to download newspaper articles
mod cli;
pub use cli::run;
mod chatbot;
use chatbot::Chatbot;
mod handlers;
use handlers::Html;

View File

@ -1,21 +1,34 @@
use std::boxed::Box; use std::boxed::Box;
use std::convert::TryInto; use std::convert::TryInto;
use std::env;
use anyhow::{anyhow, Result}; use anyhow::{anyhow, Result};
use log::info; use log::info;
use url::{Host, Url}; use url::{Host, Url};
use crate::newspaper::Newspaper; use crate::newspaper::Newspaper;
use crate::newspapers::Mediapart;
type Newspapers<'a> = Vec<Box<&'a dyn Newspaper>>; type Newspapers = Vec<Box<dyn Newspaper>>;
#[derive(Default)] fn default_newpapers() -> Newspapers {
pub struct ArticleLocationBuilder<'a> { let mut mediapart = Mediapart::new();
url: Option<Url>,
newspapers: Option<Newspapers<'a>>, mediapart.login_cookie = Some((
"MPRUUID".into(),
env::var("MEDIAPART_COOKIE").unwrap().into(),
));
vec![Box::new(mediapart)]
} }
impl<'a> ArticleLocationBuilder<'a> { #[derive(Default)]
pub struct Builder {
url: Option<Url>,
newspapers: Option<Newspapers>,
}
impl Builder {
pub fn new() -> Self { pub fn new() -> Self {
Self::default() Self::default()
} }
@ -37,9 +50,9 @@ impl<'a> ArticleLocationBuilder<'a> {
} }
/// Adds a newspaper to the list /// Adds a newspaper to the list
pub fn newspaper<T>(&mut self, newspaper: &'a T) -> &mut Self pub fn newspaper<T>(mut self, newspaper: T) -> Self
where where
T: 'a + Newspaper, T: 'static + Newspaper,
{ {
match &mut self.newspapers { match &mut self.newspapers {
Some(newspapers) => newspapers.push(Box::new(newspaper)), Some(newspapers) => newspapers.push(Box::new(newspaper)),
@ -72,7 +85,7 @@ impl<'a> ArticleLocationBuilder<'a> {
/// - the url is not set /// - the url is not set
/// - the given url has no host /// - the given url has no host
// TODO: move this to a defined error, remove anyhow ! // TODO: move this to a defined error, remove anyhow !
pub fn build(&self) -> Result<ArticleLocation<'a>> { pub fn build(self) -> Result<ArticleLocation> {
let url = Clone::clone(self.url.as_ref().ok_or(anyhow!( let url = Clone::clone(self.url.as_ref().ok_or(anyhow!(
"No url set. You can set it with the url() function" "No url set. You can set it with the url() function"
))?); ))?);
@ -80,28 +93,22 @@ impl<'a> ArticleLocationBuilder<'a> {
let host = Host::parse(host)?; let host = Host::parse(host)?;
let newspaper = self let newspaper = self
.newspapers .newspapers
.as_ref() .unwrap_or(default_newpapers())
.ok_or(anyhow!( .into_iter()
"A list of NewsPaper must be set. It can be set with newspapers() function"
))?
.iter()
.find(|c| c.metadata().hosts.contains(&host)) .find(|c| c.metadata().hosts.contains(&host))
.ok_or(anyhow!("Newspaper couldn't be found"))?; .ok_or(anyhow!("Newspaper couldn't be found"))?;
Ok(ArticleLocation { Ok(ArticleLocation { newspaper, url })
newspaper: newspaper.clone(),
url,
})
} }
} }
pub struct ArticleLocation<'a> { pub struct ArticleLocation {
newspaper: Box<&'a dyn Newspaper>, newspaper: Box<dyn Newspaper>,
pub url: Url, pub url: Url,
} }
impl<'a> ArticleLocation<'a> { impl ArticleLocation {
pub fn builder() -> ArticleLocationBuilder<'a> { pub fn builder() -> Builder {
ArticleLocationBuilder::new() Builder::new()
} }
pub async fn retrieve_html(&self) -> Result<String> { pub async fn retrieve_html(&self) -> Result<String> {

View File

@ -34,7 +34,7 @@ impl Metadata {
} }
#[async_trait] #[async_trait]
pub trait Newspaper { pub trait Newspaper: Send + Sync {
/// Returns a list of hosts that corresponds to the newspapers /// Returns a list of hosts that corresponds to the newspapers
fn metadata(&self) -> Metadata; fn metadata(&self) -> Metadata;
@ -49,7 +49,7 @@ pub trait Newspaper {
} }
/// Returns a newspaper structure /// Returns a newspaper structure
async fn new() -> Self fn new() -> Self
where where
Self: Sized; Self: Sized;

View File

@ -80,7 +80,7 @@ impl Newspaper for Mediapart {
Ok(single_page_html) Ok(single_page_html)
} }
async fn new() -> Self { fn new() -> Self {
Self { Self {
..Default::default() ..Default::default()
} }

View File

@ -17,7 +17,7 @@ async fn main() -> Result<()> {
}; };
// TODO: remove this in favor of default newspapers // TODO: remove this in favor of default newspapers
let mut mediapart = Mediapart::new().await let mut mediapart = Mediapart::new()
//.login(USERNAME, PASSWORD) //.login(USERNAME, PASSWORD)
// //
; ;
@ -28,7 +28,7 @@ async fn main() -> Result<()> {
// TODO: shorten this, maybe an helper function ? // TODO: shorten this, maybe an helper function ?
let article_location = ArticleLocation::builder() let article_location = ArticleLocation::builder()
.url(url)? .url(url)?
.newspaper(&mediapart) .newspaper(mediapart)
.build()?; .build()?;
let article_str = article_location.retrieve_html().await?; let article_str = article_location.retrieve_html().await?;

View File

@ -1,4 +1,5 @@
use anyhow::Result; use anyhow::Result;
use crieur_chatbot::run;
use crieur_retrieve::{newspaper::Newspaper, Mediapart, Url}; use crieur_retrieve::{newspaper::Newspaper, Mediapart, Url};
use dotenv::dotenv; use dotenv::dotenv;
use std::env; use std::env;
@ -7,13 +8,12 @@ use std::env;
async fn main() -> Result<()> { async fn main() -> Result<()> {
dotenv().ok(); dotenv().ok();
let mut mediapart = Mediapart::new().await let mut mediapart = Mediapart::new()
//.login(USERNAME, PASSWORD) //.login(USERNAME, PASSWORD)
// //
; ;
mediapart.login_cookie = Some(("MPRUUID".into(), env::var("MEDIAPART_COOKIE")?)); mediapart.login_cookie = Some(("MPRUUID".into(), env::var("MEDIAPART_COOKIE")?));
let url = Url::parse("https://www.mediapart.fr/journal/france/030421/l-hotel-dieu-patients-et-medecins-tentent-de-percer-les-mysteres-du-covid-long")?; run().await?;
println!("{}", mediapart.retrieve_html(&url).await?);
Ok(()) Ok(())
} }