feat: add basic chatbot
Some checks failed
continuous-integration/drone/pr Build is running
continuous-integration/drone/push Build is failing

A basic chabot application that downloads article from one newspaper
have been added.

It can download html pages and is called with !hmtl

ArticleLocation have been refactored to own it's internal data.
This commit is contained in:
koalp 2021-04-27 04:32:27 +02:00
parent 9aa2b5f07b
commit a16dbbc790
Signed by: koalp
GPG Key ID: 35B21047DEB09A81
16 changed files with 1361 additions and 41 deletions

View File

@ -17,6 +17,3 @@ labels:
**Expected behavior**
*describe what you expected to happen*
**Configuration**
*paste the result of `stage --version`

View File

@ -3,13 +3,12 @@ name: "Feature request"
about: "This template is for requesting a new feature"
title: ""
labels:
- "type::feature"
- "type::enhancement"
- "status::review_needed"
---
*(if applicable) describe what problem or frustration you have currently*
*describe what you would like to be able to do, or what solution you would like (you can propose several)*
*describe what you would like to be able to do, or what solution you would like*
*(optional) additional context, comments or implementation propositions*

1079
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -2,6 +2,7 @@
members = [
"crieur-retrieve",
"crieur-chatbot",
]
@ -17,6 +18,7 @@ publish = false
[dependencies]
anyhow = "1.0.40"
crieur-retrieve = {version = "0.1", path="crieur-retrieve"}
crieur-chatbot = {version = "0.1", path="crieur-chatbot"}
dotenv = "0.15.0"
env_logger = "0.8.3"
log = "0.4.14"

View File

@ -7,7 +7,7 @@ Tools to retrieve articles from multiple newspaper you subscribed to.
First retrieve login cookies for websites and put it in a `.env`
```
cargo run --example=retrive_html_articles
cargo run --example=cli_downloader
```
# Documentation

19
crieur-chatbot/Cargo.toml Normal file
View File

@ -0,0 +1,19 @@
[package]
name = "crieur-chatbot"
version = "0.1.0"
authors = ["koalp <koalp@alpaga.dev>"]
edition = "2018"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
anyhow = "1.0.40"
dotenv = "0.15.0"
crieur-retrieve = {version = "0.1.0", path = "../crieur-retrieve"}
mime = "0.3.16"
log = "0.4.14"
[dependencies.matrix-sdk]
git = "https://github.com/matrix-org/matrix-rust-sdk"
rev = "ab180362c931606385dd53b73620d82ef2c3166d"
version = "0.2.0"

View File

@ -0,0 +1,85 @@
//! Chatbot
use std::convert::TryInto;
use anyhow::Result;
use matrix_sdk::{
self, async_trait,
events::{
room::message::{MessageEventContent, MessageType, TextMessageEventContent},
AnyMessageEventContent, SyncMessageEvent,
},
room::Room,
Client, ClientConfig, EventHandler, SyncSettings,
};
use crate::Html;
#[derive(Debug, Clone, Default)]
pub(crate) struct Builder {
user: String,
password: String,
homeserver: String,
//TODO: rooms
room: String,
}
impl Builder {
fn new() -> Self {
Default::default()
}
pub(crate) async fn connect(&self) -> Result<Chatbot> {
let client = Client::new(self.homeserver.as_str())?;
client
.login(self.user.as_str(), self.password.as_str(), None, None)
.await?;
assert!(client.logged_in().await);
client
.join_room_by_id(&self.room.as_str().try_into()?)
.await?;
Ok(Chatbot { client })
}
pub(crate) fn login(
&mut self,
user: &impl AsRef<str>,
password: &impl AsRef<str>,
) -> &mut Self {
self.user = String::from(user.as_ref());
self.password = String::from(password.as_ref());
self
}
pub(crate) fn homeserver(&mut self, homeserver: &impl AsRef<str>) -> &mut Self {
self.homeserver = String::from(homeserver.as_ref());
self
}
pub(crate) fn room(&mut self, room: &impl AsRef<str>) -> &mut Self {
self.room = String::from(room.as_ref());
self
}
}
#[derive(Debug, Clone)]
pub(crate) struct Chatbot {
client: Client,
}
impl Chatbot {
pub(crate) fn builder() -> Builder {
Builder::new()
}
pub(crate) async fn run(&self) -> Result<()> {
self.client.set_event_handler(Box::new(Html::new())).await;
let mut settings = SyncSettings::default();
if let Some(token) = self.client.sync_token().await {
settings = settings.token(token);
}
self.client.sync(settings).await;
Ok(())
}
}

32
crieur-chatbot/src/cli.rs Normal file
View File

@ -0,0 +1,32 @@
use std::env;
use anyhow::{bail, Result};
use dotenv::dotenv;
use crate::Chatbot;
/// Runs the chatbot
pub async fn run() -> Result<()> {
dotenv().ok();
let (user, password, homeserver, room) = match (
env::var("CRIEUR_MATRIX_USER"),
env::var("CRIEUR_MATRIX_PASSWORD"),
env::var("CRIEUR_MATRIX_HOMESERVER"),
env::var("CRIEUR_MATRIX_ROOM"),
) {
(Ok(user), Ok(password), Ok(homeserver), Ok(room)) => (user, password, homeserver, room),
_ => bail!("Configuration incomplete, please set all required environment variables"),
};
let chatbot = Chatbot::builder()
.login(&user, &password)
.homeserver(&homeserver)
.room(&room)
.connect()
.await?;
chatbot.run().await?;
Ok(())
}

View File

@ -0,0 +1,94 @@
use std::convert::TryInto;
use std::env;
use anyhow::Result;
use log::info;
use matrix_sdk::{
self, async_trait,
events::{
room::message::{MessageEventContent, MessageType, TextMessageEventContent},
AnyMessageEventContent, SyncMessageEvent,
},
room::Room,
Client, ClientConfig, EventHandler, SyncSettings,
};
use crieur_retrieve::{newspaper::Newspaper, ArticleLocation, Mediapart, Url};
pub(crate) struct Html {}
impl Html {
pub fn new() -> Self {
Self {}
}
}
async fn send_article<U, E>(url: U, room: matrix_sdk::room::Joined)
where
U: TryInto<Url, Error = E> + Send,
E: std::error::Error + Sync + Send + 'static,
{
//TODO: replace by async block when async block is stable
async fn article_html<U, E>(url: U) -> Result<String>
where
U: TryInto<Url, Error = E> + Send,
E: std::error::Error + Sync + Send + 'static,
{
let article_str = ArticleLocation::builder()
.url(url)?
.build()?
.retrieve_html()
.await?;
Ok(article_str)
}
let text_message =
|message| AnyMessageEventContent::RoomMessage(MessageEventContent::text_plain(message));
//TODO: replace occurences ok() by async and logging block when async block is stable
let article_html = match article_html(url).await {
Ok(url) => url,
Err(_) => {
room.send(text_message("Can't download the file"), None)
.await
.ok();
return;
}
};
room.send_attachment(
"test.html",
&mime::TEXT_HTML_UTF_8,
&mut article_html.as_bytes(),
None,
)
.await
.ok();
}
#[async_trait]
impl EventHandler for Html {
async fn on_room_message(&self, room: Room, event: &SyncMessageEvent<MessageEventContent>) {
if let Room::Joined(room) = room {
let msg_body = if let SyncMessageEvent {
content:
MessageEventContent {
msgtype: MessageType::Text(TextMessageEventContent { body: msg_body, .. }),
..
},
..
} = event
{
msg_body
} else {
return;
};
info!("sending file");
match msg_body.split(' ').collect::<Vec<_>>().as_slice() {
["!html", url, ..] => send_article(*url, room).await,
_ => return,
}
}
}
}

View File

@ -0,0 +1,2 @@
mod html;
pub(crate) use html::Html;

10
crieur-chatbot/src/lib.rs Normal file
View File

@ -0,0 +1,10 @@
//! Provides a matrix chatbot to download newspaper articles
mod cli;
pub use cli::run;
mod chatbot;
use chatbot::Chatbot;
mod handlers;
use handlers::Html;

View File

@ -1,21 +1,34 @@
use std::boxed::Box;
use std::convert::TryInto;
use std::env;
use anyhow::{anyhow, Result};
use log::info;
use url::{Host, Url};
use crate::newspaper::Newspaper;
use crate::newspapers::Mediapart;
type Newspapers<'a> = Vec<Box<&'a dyn Newspaper>>;
type Newspapers = Vec<Box<dyn Newspaper>>;
#[derive(Default)]
pub struct ArticleLocationBuilder<'a> {
url: Option<Url>,
newspapers: Option<Newspapers<'a>>,
fn default_newpapers() -> Newspapers {
let mut mediapart = Mediapart::new();
mediapart.login_cookie = Some((
"MPRUUID".into(),
env::var("MEDIAPART_COOKIE").unwrap().into(),
));
vec![Box::new(mediapart)]
}
impl<'a> ArticleLocationBuilder<'a> {
#[derive(Default)]
pub struct Builder {
url: Option<Url>,
newspapers: Option<Newspapers>,
}
impl Builder {
pub fn new() -> Self {
Self::default()
}
@ -37,9 +50,9 @@ impl<'a> ArticleLocationBuilder<'a> {
}
/// Adds a newspaper to the list
pub fn newspaper<T>(&mut self, newspaper: &'a T) -> &mut Self
pub fn newspaper<T>(mut self, newspaper: T) -> Self
where
T: 'a + Newspaper,
T: 'static + Newspaper,
{
match &mut self.newspapers {
Some(newspapers) => newspapers.push(Box::new(newspaper)),
@ -72,7 +85,7 @@ impl<'a> ArticleLocationBuilder<'a> {
/// - the url is not set
/// - the given url has no host
// TODO: move this to a defined error, remove anyhow !
pub fn build(&self) -> Result<ArticleLocation<'a>> {
pub fn build(self) -> Result<ArticleLocation> {
let url = Clone::clone(self.url.as_ref().ok_or(anyhow!(
"No url set. You can set it with the url() function"
))?);
@ -80,28 +93,22 @@ impl<'a> ArticleLocationBuilder<'a> {
let host = Host::parse(host)?;
let newspaper = self
.newspapers
.as_ref()
.ok_or(anyhow!(
"A list of NewsPaper must be set. It can be set with newspapers() function"
))?
.iter()
.unwrap_or(default_newpapers())
.into_iter()
.find(|c| c.metadata().hosts.contains(&host))
.ok_or(anyhow!("Newspaper couldn't be found"))?;
Ok(ArticleLocation {
newspaper: newspaper.clone(),
url,
})
Ok(ArticleLocation { newspaper, url })
}
}
pub struct ArticleLocation<'a> {
newspaper: Box<&'a dyn Newspaper>,
pub struct ArticleLocation {
newspaper: Box<dyn Newspaper>,
pub url: Url,
}
impl<'a> ArticleLocation<'a> {
pub fn builder() -> ArticleLocationBuilder<'a> {
ArticleLocationBuilder::new()
impl ArticleLocation {
pub fn builder() -> Builder {
Builder::new()
}
pub async fn retrieve_html(&self) -> Result<String> {

View File

@ -34,7 +34,7 @@ impl Metadata {
}
#[async_trait]
pub trait Newspaper {
pub trait Newspaper: Send + Sync {
/// Returns a list of hosts that corresponds to the newspapers
fn metadata(&self) -> Metadata;
@ -49,7 +49,7 @@ pub trait Newspaper {
}
/// Returns a newspaper structure
async fn new() -> Self
fn new() -> Self
where
Self: Sized;

View File

@ -80,7 +80,7 @@ impl Newspaper for Mediapart {
Ok(single_page_html)
}
async fn new() -> Self {
fn new() -> Self {
Self {
..Default::default()
}

View File

@ -17,7 +17,7 @@ async fn main() -> Result<()> {
};
// TODO: remove this in favor of default newspapers
let mut mediapart = Mediapart::new().await
let mut mediapart = Mediapart::new()
//.login(USERNAME, PASSWORD)
//
;
@ -28,7 +28,7 @@ async fn main() -> Result<()> {
// TODO: shorten this, maybe an helper function ?
let article_location = ArticleLocation::builder()
.url(url)?
.newspaper(&mediapart)
.newspaper(mediapart)
.build()?;
let article_str = article_location.retrieve_html().await?;

View File

@ -1,4 +1,5 @@
use anyhow::Result;
use crieur_chatbot::run;
use crieur_retrieve::{newspaper::Newspaper, Mediapart, Url};
use dotenv::dotenv;
use std::env;
@ -7,13 +8,12 @@ use std::env;
async fn main() -> Result<()> {
dotenv().ok();
let mut mediapart = Mediapart::new().await
let mut mediapart = Mediapart::new()
//.login(USERNAME, PASSWORD)
//
;
mediapart.login_cookie = Some(("MPRUUID".into(), env::var("MEDIAPART_COOKIE")?));
let url = Url::parse("https://www.mediapart.fr/journal/france/030421/l-hotel-dieu-patients-et-medecins-tentent-de-percer-les-mysteres-du-covid-long")?;
println!("{}", mediapart.retrieve_html(&url).await?);
run().await?;
Ok(())
}