feat: add basic chatbot #14
@ -17,6 +17,3 @@ labels:
|
|||||||
|
|
||||||
**Expected behavior**
|
**Expected behavior**
|
||||||
*describe what you expected to happen*
|
*describe what you expected to happen*
|
||||||
|
|
||||||
**Configuration**
|
|
||||||
*paste the result of `stage --version`
|
|
||||||
|
@ -3,13 +3,12 @@ name: "Feature request"
|
|||||||
about: "This template is for requesting a new feature"
|
about: "This template is for requesting a new feature"
|
||||||
title: ""
|
title: ""
|
||||||
labels:
|
labels:
|
||||||
- "type::feature"
|
- "type::enhancement"
|
||||||
- "status::review_needed"
|
- "status::review_needed"
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
*(if applicable) describe what problem or frustration you have currently*
|
*(if applicable) describe what problem or frustration you have currently*
|
||||||
|
|
||||||
*describe what you would like to be able to do, or what solution you would like (you can propose several)*
|
*describe what you would like to be able to do, or what solution you would like*
|
||||||
|
|
||||||
*(optional) additional context, comments or implementation propositions*
|
*(optional) additional context, comments or implementation propositions*
|
||||||
|
1079
Cargo.lock
generated
1079
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
members = [
|
members = [
|
||||||
"crieur-retrieve",
|
"crieur-retrieve",
|
||||||
|
"crieur-chatbot",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
@ -17,6 +18,7 @@ publish = false
|
|||||||
[dependencies]
|
[dependencies]
|
||||||
anyhow = "1.0.40"
|
anyhow = "1.0.40"
|
||||||
crieur-retrieve = {version = "0.1", path="crieur-retrieve"}
|
crieur-retrieve = {version = "0.1", path="crieur-retrieve"}
|
||||||
|
crieur-chatbot = {version = "0.1", path="crieur-chatbot"}
|
||||||
dotenv = "0.15.0"
|
dotenv = "0.15.0"
|
||||||
env_logger = "0.8.3"
|
env_logger = "0.8.3"
|
||||||
log = "0.4.14"
|
log = "0.4.14"
|
||||||
|
@ -7,7 +7,7 @@ Tools to retrieve articles from multiple newspaper you subscribed to.
|
|||||||
First retrieve login cookies for websites and put it in a `.env`
|
First retrieve login cookies for websites and put it in a `.env`
|
||||||
|
|
||||||
```
|
```
|
||||||
cargo run --example=retrive_html_articles
|
cargo run --example=cli_downloader
|
||||||
```
|
```
|
||||||
|
|
||||||
# Documentation
|
# Documentation
|
||||||
|
19
crieur-chatbot/Cargo.toml
Normal file
19
crieur-chatbot/Cargo.toml
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
[package]
|
||||||
|
name = "crieur-chatbot"
|
||||||
|
version = "0.1.0"
|
||||||
|
authors = ["koalp <koalp@alpaga.dev>"]
|
||||||
|
edition = "2018"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
anyhow = "1.0.40"
|
||||||
|
dotenv = "0.15.0"
|
||||||
|
crieur-retrieve = {version = "0.1.0", path = "../crieur-retrieve"}
|
||||||
|
mime = "0.3.16"
|
||||||
|
log = "0.4.14"
|
||||||
|
|
||||||
|
[dependencies.matrix-sdk]
|
||||||
|
git = "https://github.com/matrix-org/matrix-rust-sdk"
|
||||||
|
rev = "ab180362c931606385dd53b73620d82ef2c3166d"
|
||||||
|
version = "0.2.0"
|
85
crieur-chatbot/src/chatbot.rs
Normal file
85
crieur-chatbot/src/chatbot.rs
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
//! Chatbot
|
||||||
|
use std::convert::TryInto;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use matrix_sdk::{
|
||||||
|
self, async_trait,
|
||||||
|
events::{
|
||||||
|
room::message::{MessageEventContent, MessageType, TextMessageEventContent},
|
||||||
|
AnyMessageEventContent, SyncMessageEvent,
|
||||||
|
},
|
||||||
|
room::Room,
|
||||||
|
Client, ClientConfig, EventHandler, SyncSettings,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crate::Html;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Default)]
|
||||||
|
pub(crate) struct Builder {
|
||||||
|
user: String,
|
||||||
|
password: String,
|
||||||
|
homeserver: String,
|
||||||
|
//TODO: rooms
|
||||||
|
room: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Builder {
|
||||||
|
fn new() -> Self {
|
||||||
|
Default::default()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn connect(&self) -> Result<Chatbot> {
|
||||||
|
let client = Client::new(self.homeserver.as_str())?;
|
||||||
|
client
|
||||||
|
.login(self.user.as_str(), self.password.as_str(), None, None)
|
||||||
|
.await?;
|
||||||
|
assert!(client.logged_in().await);
|
||||||
|
client
|
||||||
|
.join_room_by_id(&self.room.as_str().try_into()?)
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
Ok(Chatbot { client })
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn login(
|
||||||
|
&mut self,
|
||||||
|
user: &impl AsRef<str>,
|
||||||
|
password: &impl AsRef<str>,
|
||||||
|
) -> &mut Self {
|
||||||
|
self.user = String::from(user.as_ref());
|
||||||
|
self.password = String::from(password.as_ref());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn homeserver(&mut self, homeserver: &impl AsRef<str>) -> &mut Self {
|
||||||
|
self.homeserver = String::from(homeserver.as_ref());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) fn room(&mut self, room: &impl AsRef<str>) -> &mut Self {
|
||||||
|
self.room = String::from(room.as_ref());
|
||||||
|
self
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub(crate) struct Chatbot {
|
||||||
|
client: Client,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Chatbot {
|
||||||
|
pub(crate) fn builder() -> Builder {
|
||||||
|
Builder::new()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub(crate) async fn run(&self) -> Result<()> {
|
||||||
|
self.client.set_event_handler(Box::new(Html::new())).await;
|
||||||
|
|
||||||
|
let mut settings = SyncSettings::default();
|
||||||
|
if let Some(token) = self.client.sync_token().await {
|
||||||
|
settings = settings.token(token);
|
||||||
|
}
|
||||||
|
self.client.sync(settings).await;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
}
|
32
crieur-chatbot/src/cli.rs
Normal file
32
crieur-chatbot/src/cli.rs
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
use std::env;
|
||||||
|
|
||||||
|
use anyhow::{bail, Result};
|
||||||
|
use dotenv::dotenv;
|
||||||
|
|
||||||
|
use crate::Chatbot;
|
||||||
|
|
||||||
|
/// Runs the chatbot
|
||||||
|
pub async fn run() -> Result<()> {
|
||||||
|
dotenv().ok();
|
||||||
|
|
||||||
|
let (user, password, homeserver, room) = match (
|
||||||
|
env::var("CRIEUR_MATRIX_USER"),
|
||||||
|
env::var("CRIEUR_MATRIX_PASSWORD"),
|
||||||
|
env::var("CRIEUR_MATRIX_HOMESERVER"),
|
||||||
|
env::var("CRIEUR_MATRIX_ROOM"),
|
||||||
|
) {
|
||||||
|
(Ok(user), Ok(password), Ok(homeserver), Ok(room)) => (user, password, homeserver, room),
|
||||||
|
_ => bail!("Configuration incomplete, please set all required environment variables"),
|
||||||
|
};
|
||||||
|
|
||||||
|
let chatbot = Chatbot::builder()
|
||||||
|
.login(&user, &password)
|
||||||
|
.homeserver(&homeserver)
|
||||||
|
.room(&room)
|
||||||
|
.connect()
|
||||||
|
.await?;
|
||||||
|
|
||||||
|
chatbot.run().await?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
94
crieur-chatbot/src/handlers/html.rs
Normal file
94
crieur-chatbot/src/handlers/html.rs
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
use std::convert::TryInto;
|
||||||
|
use std::env;
|
||||||
|
|
||||||
|
use anyhow::Result;
|
||||||
|
use log::info;
|
||||||
|
use matrix_sdk::{
|
||||||
|
self, async_trait,
|
||||||
|
events::{
|
||||||
|
room::message::{MessageEventContent, MessageType, TextMessageEventContent},
|
||||||
|
AnyMessageEventContent, SyncMessageEvent,
|
||||||
|
},
|
||||||
|
room::Room,
|
||||||
|
Client, ClientConfig, EventHandler, SyncSettings,
|
||||||
|
};
|
||||||
|
|
||||||
|
use crieur_retrieve::{newspaper::Newspaper, ArticleLocation, Mediapart, Url};
|
||||||
|
|
||||||
|
pub(crate) struct Html {}
|
||||||
|
|
||||||
|
impl Html {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async fn send_article<U, E>(url: U, room: matrix_sdk::room::Joined)
|
||||||
|
where
|
||||||
|
U: TryInto<Url, Error = E> + Send,
|
||||||
|
E: std::error::Error + Sync + Send + 'static,
|
||||||
|
{
|
||||||
|
//TODO: replace by async block when async block is stable
|
||||||
|
async fn article_html<U, E>(url: U) -> Result<String>
|
||||||
|
where
|
||||||
|
U: TryInto<Url, Error = E> + Send,
|
||||||
|
E: std::error::Error + Sync + Send + 'static,
|
||||||
|
{
|
||||||
|
let article_str = ArticleLocation::builder()
|
||||||
|
.url(url)?
|
||||||
|
.build()?
|
||||||
|
.retrieve_html()
|
||||||
|
.await?;
|
||||||
|
Ok(article_str)
|
||||||
|
}
|
||||||
|
|
||||||
|
let text_message =
|
||||||
|
|message| AnyMessageEventContent::RoomMessage(MessageEventContent::text_plain(message));
|
||||||
|
|
||||||
|
//TODO: replace occurences ok() by async and logging block when async block is stable
|
||||||
|
let article_html = match article_html(url).await {
|
||||||
|
Ok(url) => url,
|
||||||
|
Err(_) => {
|
||||||
|
room.send(text_message("Can't download the file"), None)
|
||||||
|
.await
|
||||||
|
.ok();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
room.send_attachment(
|
||||||
|
"test.html",
|
||||||
|
&mime::TEXT_HTML_UTF_8,
|
||||||
|
&mut article_html.as_bytes(),
|
||||||
|
None,
|
||||||
|
)
|
||||||
|
.await
|
||||||
|
.ok();
|
||||||
|
}
|
||||||
|
|
||||||
|
#[async_trait]
|
||||||
|
impl EventHandler for Html {
|
||||||
|
async fn on_room_message(&self, room: Room, event: &SyncMessageEvent<MessageEventContent>) {
|
||||||
|
if let Room::Joined(room) = room {
|
||||||
|
let msg_body = if let SyncMessageEvent {
|
||||||
|
content:
|
||||||
|
MessageEventContent {
|
||||||
|
msgtype: MessageType::Text(TextMessageEventContent { body: msg_body, .. }),
|
||||||
|
..
|
||||||
|
},
|
||||||
|
..
|
||||||
|
} = event
|
||||||
|
{
|
||||||
|
msg_body
|
||||||
|
} else {
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
info!("sending file");
|
||||||
|
|
||||||
|
match msg_body.split(' ').collect::<Vec<_>>().as_slice() {
|
||||||
|
["!html", url, ..] => send_article(*url, room).await,
|
||||||
|
_ => return,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
2
crieur-chatbot/src/handlers/mod.rs
Normal file
2
crieur-chatbot/src/handlers/mod.rs
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
mod html;
|
||||||
|
pub(crate) use html::Html;
|
10
crieur-chatbot/src/lib.rs
Normal file
10
crieur-chatbot/src/lib.rs
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
//! Provides a matrix chatbot to download newspaper articles
|
||||||
|
|
||||||
|
mod cli;
|
||||||
|
pub use cli::run;
|
||||||
|
|
||||||
|
mod chatbot;
|
||||||
|
use chatbot::Chatbot;
|
||||||
|
|
||||||
|
mod handlers;
|
||||||
|
use handlers::Html;
|
@ -1,21 +1,34 @@
|
|||||||
use std::boxed::Box;
|
use std::boxed::Box;
|
||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
use std::env;
|
||||||
|
|
||||||
use anyhow::{anyhow, Result};
|
use anyhow::{anyhow, Result};
|
||||||
use log::info;
|
use log::info;
|
||||||
use url::{Host, Url};
|
use url::{Host, Url};
|
||||||
|
|
||||||
use crate::newspaper::Newspaper;
|
use crate::newspaper::Newspaper;
|
||||||
|
use crate::newspapers::Mediapart;
|
||||||
|
|
||||||
type Newspapers<'a> = Vec<Box<&'a dyn Newspaper>>;
|
type Newspapers = Vec<Box<dyn Newspaper>>;
|
||||||
|
|
||||||
#[derive(Default)]
|
fn default_newpapers() -> Newspapers {
|
||||||
pub struct ArticleLocationBuilder<'a> {
|
let mut mediapart = Mediapart::new();
|
||||||
url: Option<Url>,
|
|
||||||
newspapers: Option<Newspapers<'a>>,
|
mediapart.login_cookie = Some((
|
||||||
|
"MPRUUID".into(),
|
||||||
|
env::var("MEDIAPART_COOKIE").unwrap().into(),
|
||||||
|
));
|
||||||
|
|
||||||
|
vec![Box::new(mediapart)]
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ArticleLocationBuilder<'a> {
|
#[derive(Default)]
|
||||||
|
pub struct Builder {
|
||||||
|
url: Option<Url>,
|
||||||
|
newspapers: Option<Newspapers>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Builder {
|
||||||
pub fn new() -> Self {
|
pub fn new() -> Self {
|
||||||
Self::default()
|
Self::default()
|
||||||
}
|
}
|
||||||
@ -37,9 +50,9 @@ impl<'a> ArticleLocationBuilder<'a> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Adds a newspaper to the list
|
/// Adds a newspaper to the list
|
||||||
pub fn newspaper<T>(&mut self, newspaper: &'a T) -> &mut Self
|
pub fn newspaper<T>(mut self, newspaper: T) -> Self
|
||||||
where
|
where
|
||||||
T: 'a + Newspaper,
|
T: 'static + Newspaper,
|
||||||
{
|
{
|
||||||
match &mut self.newspapers {
|
match &mut self.newspapers {
|
||||||
Some(newspapers) => newspapers.push(Box::new(newspaper)),
|
Some(newspapers) => newspapers.push(Box::new(newspaper)),
|
||||||
@ -72,7 +85,7 @@ impl<'a> ArticleLocationBuilder<'a> {
|
|||||||
/// - the url is not set
|
/// - the url is not set
|
||||||
/// - the given url has no host
|
/// - the given url has no host
|
||||||
// TODO: move this to a defined error, remove anyhow !
|
// TODO: move this to a defined error, remove anyhow !
|
||||||
pub fn build(&self) -> Result<ArticleLocation<'a>> {
|
pub fn build(self) -> Result<ArticleLocation> {
|
||||||
let url = Clone::clone(self.url.as_ref().ok_or(anyhow!(
|
let url = Clone::clone(self.url.as_ref().ok_or(anyhow!(
|
||||||
"No url set. You can set it with the url() function"
|
"No url set. You can set it with the url() function"
|
||||||
))?);
|
))?);
|
||||||
@ -80,28 +93,22 @@ impl<'a> ArticleLocationBuilder<'a> {
|
|||||||
let host = Host::parse(host)?;
|
let host = Host::parse(host)?;
|
||||||
let newspaper = self
|
let newspaper = self
|
||||||
.newspapers
|
.newspapers
|
||||||
.as_ref()
|
.unwrap_or(default_newpapers())
|
||||||
.ok_or(anyhow!(
|
.into_iter()
|
||||||
"A list of NewsPaper must be set. It can be set with newspapers() function"
|
|
||||||
))?
|
|
||||||
.iter()
|
|
||||||
.find(|c| c.metadata().hosts.contains(&host))
|
.find(|c| c.metadata().hosts.contains(&host))
|
||||||
.ok_or(anyhow!("Newspaper couldn't be found"))?;
|
.ok_or(anyhow!("Newspaper couldn't be found"))?;
|
||||||
Ok(ArticleLocation {
|
Ok(ArticleLocation { newspaper, url })
|
||||||
newspaper: newspaper.clone(),
|
|
||||||
url,
|
|
||||||
})
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct ArticleLocation<'a> {
|
pub struct ArticleLocation {
|
||||||
newspaper: Box<&'a dyn Newspaper>,
|
newspaper: Box<dyn Newspaper>,
|
||||||
pub url: Url,
|
pub url: Url,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a> ArticleLocation<'a> {
|
impl ArticleLocation {
|
||||||
pub fn builder() -> ArticleLocationBuilder<'a> {
|
pub fn builder() -> Builder {
|
||||||
ArticleLocationBuilder::new()
|
Builder::new()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub async fn retrieve_html(&self) -> Result<String> {
|
pub async fn retrieve_html(&self) -> Result<String> {
|
||||||
|
@ -34,7 +34,7 @@ impl Metadata {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
pub trait Newspaper {
|
pub trait Newspaper: Send + Sync {
|
||||||
/// Returns a list of hosts that corresponds to the newspapers
|
/// Returns a list of hosts that corresponds to the newspapers
|
||||||
fn metadata(&self) -> Metadata;
|
fn metadata(&self) -> Metadata;
|
||||||
|
|
||||||
@ -49,7 +49,7 @@ pub trait Newspaper {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a newspaper structure
|
/// Returns a newspaper structure
|
||||||
async fn new() -> Self
|
fn new() -> Self
|
||||||
where
|
where
|
||||||
Self: Sized;
|
Self: Sized;
|
||||||
|
|
||||||
|
@ -80,7 +80,7 @@ impl Newspaper for Mediapart {
|
|||||||
Ok(single_page_html)
|
Ok(single_page_html)
|
||||||
}
|
}
|
||||||
|
|
||||||
async fn new() -> Self {
|
fn new() -> Self {
|
||||||
Self {
|
Self {
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@ async fn main() -> Result<()> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// TODO: remove this in favor of default newspapers
|
// TODO: remove this in favor of default newspapers
|
||||||
let mut mediapart = Mediapart::new().await
|
let mut mediapart = Mediapart::new()
|
||||||
//.login(USERNAME, PASSWORD)
|
//.login(USERNAME, PASSWORD)
|
||||||
//
|
//
|
||||||
;
|
;
|
||||||
@ -28,7 +28,7 @@ async fn main() -> Result<()> {
|
|||||||
// TODO: shorten this, maybe an helper function ?
|
// TODO: shorten this, maybe an helper function ?
|
||||||
let article_location = ArticleLocation::builder()
|
let article_location = ArticleLocation::builder()
|
||||||
.url(url)?
|
.url(url)?
|
||||||
.newspaper(&mediapart)
|
.newspaper(mediapart)
|
||||||
.build()?;
|
.build()?;
|
||||||
|
|
||||||
let article_str = article_location.retrieve_html().await?;
|
let article_str = article_location.retrieve_html().await?;
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
|
use crieur_chatbot::run;
|
||||||
use crieur_retrieve::{newspaper::Newspaper, Mediapart, Url};
|
use crieur_retrieve::{newspaper::Newspaper, Mediapart, Url};
|
||||||
use dotenv::dotenv;
|
use dotenv::dotenv;
|
||||||
use std::env;
|
use std::env;
|
||||||
@ -7,13 +8,12 @@ use std::env;
|
|||||||
async fn main() -> Result<()> {
|
async fn main() -> Result<()> {
|
||||||
dotenv().ok();
|
dotenv().ok();
|
||||||
|
|
||||||
let mut mediapart = Mediapart::new().await
|
let mut mediapart = Mediapart::new()
|
||||||
//.login(USERNAME, PASSWORD)
|
//.login(USERNAME, PASSWORD)
|
||||||
//
|
//
|
||||||
;
|
;
|
||||||
|
|
||||||
mediapart.login_cookie = Some(("MPRUUID".into(), env::var("MEDIAPART_COOKIE")?));
|
mediapart.login_cookie = Some(("MPRUUID".into(), env::var("MEDIAPART_COOKIE")?));
|
||||||
let url = Url::parse("https://www.mediapart.fr/journal/france/030421/l-hotel-dieu-patients-et-medecins-tentent-de-percer-les-mysteres-du-covid-long")?;
|
run().await?;
|
||||||
println!("{}", mediapart.retrieve_html(&url).await?);
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user