feat: add basic chatbot
A basic chabot application that downloads article from one newspaper have been added. It can download html pages and is called with !hmtl ArticleLocation have been refactored to own it's internal data.
This commit is contained in:
parent
9aa2b5f07b
commit
a16dbbc790
@ -17,6 +17,3 @@ labels:
|
||||
|
||||
**Expected behavior**
|
||||
*describe what you expected to happen*
|
||||
|
||||
**Configuration**
|
||||
*paste the result of `stage --version`
|
||||
|
@ -3,13 +3,12 @@ name: "Feature request"
|
||||
about: "This template is for requesting a new feature"
|
||||
title: ""
|
||||
labels:
|
||||
- "type::feature"
|
||||
- "type::enhancement"
|
||||
- "status::review_needed"
|
||||
|
||||
---
|
||||
|
||||
*(if applicable) describe what problem or frustration you have currently*
|
||||
|
||||
*describe what you would like to be able to do, or what solution you would like (you can propose several)*
|
||||
*describe what you would like to be able to do, or what solution you would like*
|
||||
|
||||
*(optional) additional context, comments or implementation propositions*
|
||||
|
1079
Cargo.lock
generated
1079
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -2,6 +2,7 @@
|
||||
|
||||
members = [
|
||||
"crieur-retrieve",
|
||||
"crieur-chatbot",
|
||||
]
|
||||
|
||||
|
||||
@ -17,6 +18,7 @@ publish = false
|
||||
[dependencies]
|
||||
anyhow = "1.0.40"
|
||||
crieur-retrieve = {version = "0.1", path="crieur-retrieve"}
|
||||
crieur-chatbot = {version = "0.1", path="crieur-chatbot"}
|
||||
dotenv = "0.15.0"
|
||||
env_logger = "0.8.3"
|
||||
log = "0.4.14"
|
||||
|
@ -7,7 +7,7 @@ Tools to retrieve articles from multiple newspaper you subscribed to.
|
||||
First retrieve login cookies for websites and put it in a `.env`
|
||||
|
||||
```
|
||||
cargo run --example=retrive_html_articles
|
||||
cargo run --example=cli_downloader
|
||||
```
|
||||
|
||||
# Documentation
|
||||
|
19
crieur-chatbot/Cargo.toml
Normal file
19
crieur-chatbot/Cargo.toml
Normal file
@ -0,0 +1,19 @@
|
||||
[package]
|
||||
name = "crieur-chatbot"
|
||||
version = "0.1.0"
|
||||
authors = ["koalp <koalp@alpaga.dev>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.40"
|
||||
dotenv = "0.15.0"
|
||||
crieur-retrieve = {version = "0.1.0", path = "../crieur-retrieve"}
|
||||
mime = "0.3.16"
|
||||
log = "0.4.14"
|
||||
|
||||
[dependencies.matrix-sdk]
|
||||
git = "https://github.com/matrix-org/matrix-rust-sdk"
|
||||
rev = "ab180362c931606385dd53b73620d82ef2c3166d"
|
||||
version = "0.2.0"
|
85
crieur-chatbot/src/chatbot.rs
Normal file
85
crieur-chatbot/src/chatbot.rs
Normal file
@ -0,0 +1,85 @@
|
||||
//! Chatbot
|
||||
use std::convert::TryInto;
|
||||
|
||||
use anyhow::Result;
|
||||
use matrix_sdk::{
|
||||
self, async_trait,
|
||||
events::{
|
||||
room::message::{MessageEventContent, MessageType, TextMessageEventContent},
|
||||
AnyMessageEventContent, SyncMessageEvent,
|
||||
},
|
||||
room::Room,
|
||||
Client, ClientConfig, EventHandler, SyncSettings,
|
||||
};
|
||||
|
||||
use crate::Html;
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(crate) struct Builder {
|
||||
user: String,
|
||||
password: String,
|
||||
homeserver: String,
|
||||
//TODO: rooms
|
||||
room: String,
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
pub(crate) async fn connect(&self) -> Result<Chatbot> {
|
||||
let client = Client::new(self.homeserver.as_str())?;
|
||||
client
|
||||
.login(self.user.as_str(), self.password.as_str(), None, None)
|
||||
.await?;
|
||||
assert!(client.logged_in().await);
|
||||
client
|
||||
.join_room_by_id(&self.room.as_str().try_into()?)
|
||||
.await?;
|
||||
|
||||
Ok(Chatbot { client })
|
||||
}
|
||||
|
||||
pub(crate) fn login(
|
||||
&mut self,
|
||||
user: &impl AsRef<str>,
|
||||
password: &impl AsRef<str>,
|
||||
) -> &mut Self {
|
||||
self.user = String::from(user.as_ref());
|
||||
self.password = String::from(password.as_ref());
|
||||
self
|
||||
}
|
||||
|
||||
pub(crate) fn homeserver(&mut self, homeserver: &impl AsRef<str>) -> &mut Self {
|
||||
self.homeserver = String::from(homeserver.as_ref());
|
||||
self
|
||||
}
|
||||
|
||||
pub(crate) fn room(&mut self, room: &impl AsRef<str>) -> &mut Self {
|
||||
self.room = String::from(room.as_ref());
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct Chatbot {
|
||||
client: Client,
|
||||
}
|
||||
|
||||
impl Chatbot {
|
||||
pub(crate) fn builder() -> Builder {
|
||||
Builder::new()
|
||||
}
|
||||
|
||||
pub(crate) async fn run(&self) -> Result<()> {
|
||||
self.client.set_event_handler(Box::new(Html::new())).await;
|
||||
|
||||
let mut settings = SyncSettings::default();
|
||||
if let Some(token) = self.client.sync_token().await {
|
||||
settings = settings.token(token);
|
||||
}
|
||||
self.client.sync(settings).await;
|
||||
Ok(())
|
||||
}
|
||||
}
|
32
crieur-chatbot/src/cli.rs
Normal file
32
crieur-chatbot/src/cli.rs
Normal file
@ -0,0 +1,32 @@
|
||||
use std::env;
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use dotenv::dotenv;
|
||||
|
||||
use crate::Chatbot;
|
||||
|
||||
/// Runs the chatbot
|
||||
pub async fn run() -> Result<()> {
|
||||
dotenv().ok();
|
||||
|
||||
let (user, password, homeserver, room) = match (
|
||||
env::var("CRIEUR_MATRIX_USER"),
|
||||
env::var("CRIEUR_MATRIX_PASSWORD"),
|
||||
env::var("CRIEUR_MATRIX_HOMESERVER"),
|
||||
env::var("CRIEUR_MATRIX_ROOM"),
|
||||
) {
|
||||
(Ok(user), Ok(password), Ok(homeserver), Ok(room)) => (user, password, homeserver, room),
|
||||
_ => bail!("Configuration incomplete, please set all required environment variables"),
|
||||
};
|
||||
|
||||
let chatbot = Chatbot::builder()
|
||||
.login(&user, &password)
|
||||
.homeserver(&homeserver)
|
||||
.room(&room)
|
||||
.connect()
|
||||
.await?;
|
||||
|
||||
chatbot.run().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
94
crieur-chatbot/src/handlers/html.rs
Normal file
94
crieur-chatbot/src/handlers/html.rs
Normal file
@ -0,0 +1,94 @@
|
||||
use std::convert::TryInto;
|
||||
use std::env;
|
||||
|
||||
use anyhow::Result;
|
||||
use log::info;
|
||||
use matrix_sdk::{
|
||||
self, async_trait,
|
||||
events::{
|
||||
room::message::{MessageEventContent, MessageType, TextMessageEventContent},
|
||||
AnyMessageEventContent, SyncMessageEvent,
|
||||
},
|
||||
room::Room,
|
||||
Client, ClientConfig, EventHandler, SyncSettings,
|
||||
};
|
||||
|
||||
use crieur_retrieve::{newspaper::Newspaper, ArticleLocation, Mediapart, Url};
|
||||
|
||||
pub(crate) struct Html {}
|
||||
|
||||
impl Html {
|
||||
pub fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_article<U, E>(url: U, room: matrix_sdk::room::Joined)
|
||||
where
|
||||
U: TryInto<Url, Error = E> + Send,
|
||||
E: std::error::Error + Sync + Send + 'static,
|
||||
{
|
||||
//TODO: replace by async block when async block is stable
|
||||
async fn article_html<U, E>(url: U) -> Result<String>
|
||||
where
|
||||
U: TryInto<Url, Error = E> + Send,
|
||||
E: std::error::Error + Sync + Send + 'static,
|
||||
{
|
||||
let article_str = ArticleLocation::builder()
|
||||
.url(url)?
|
||||
.build()?
|
||||
.retrieve_html()
|
||||
.await?;
|
||||
Ok(article_str)
|
||||
}
|
||||
|
||||
let text_message =
|
||||
|message| AnyMessageEventContent::RoomMessage(MessageEventContent::text_plain(message));
|
||||
|
||||
//TODO: replace occurences ok() by async and logging block when async block is stable
|
||||
let article_html = match article_html(url).await {
|
||||
Ok(url) => url,
|
||||
Err(_) => {
|
||||
room.send(text_message("Can't download the file"), None)
|
||||
.await
|
||||
.ok();
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
room.send_attachment(
|
||||
"test.html",
|
||||
&mime::TEXT_HTML_UTF_8,
|
||||
&mut article_html.as_bytes(),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.ok();
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl EventHandler for Html {
|
||||
async fn on_room_message(&self, room: Room, event: &SyncMessageEvent<MessageEventContent>) {
|
||||
if let Room::Joined(room) = room {
|
||||
let msg_body = if let SyncMessageEvent {
|
||||
content:
|
||||
MessageEventContent {
|
||||
msgtype: MessageType::Text(TextMessageEventContent { body: msg_body, .. }),
|
||||
..
|
||||
},
|
||||
..
|
||||
} = event
|
||||
{
|
||||
msg_body
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
info!("sending file");
|
||||
|
||||
match msg_body.split(' ').collect::<Vec<_>>().as_slice() {
|
||||
["!html", url, ..] => send_article(*url, room).await,
|
||||
_ => return,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
2
crieur-chatbot/src/handlers/mod.rs
Normal file
2
crieur-chatbot/src/handlers/mod.rs
Normal file
@ -0,0 +1,2 @@
|
||||
mod html;
|
||||
pub(crate) use html::Html;
|
10
crieur-chatbot/src/lib.rs
Normal file
10
crieur-chatbot/src/lib.rs
Normal file
@ -0,0 +1,10 @@
|
||||
//! Provides a matrix chatbot to download newspaper articles
|
||||
|
||||
mod cli;
|
||||
pub use cli::run;
|
||||
|
||||
mod chatbot;
|
||||
use chatbot::Chatbot;
|
||||
|
||||
mod handlers;
|
||||
use handlers::Html;
|
@ -1,21 +1,34 @@
|
||||
use std::boxed::Box;
|
||||
use std::convert::TryInto;
|
||||
use std::env;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use log::info;
|
||||
use url::{Host, Url};
|
||||
|
||||
use crate::newspaper::Newspaper;
|
||||
use crate::newspapers::Mediapart;
|
||||
|
||||
type Newspapers<'a> = Vec<Box<&'a dyn Newspaper>>;
|
||||
type Newspapers = Vec<Box<dyn Newspaper>>;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ArticleLocationBuilder<'a> {
|
||||
url: Option<Url>,
|
||||
newspapers: Option<Newspapers<'a>>,
|
||||
fn default_newpapers() -> Newspapers {
|
||||
let mut mediapart = Mediapart::new();
|
||||
|
||||
mediapart.login_cookie = Some((
|
||||
"MPRUUID".into(),
|
||||
env::var("MEDIAPART_COOKIE").unwrap().into(),
|
||||
));
|
||||
|
||||
vec![Box::new(mediapart)]
|
||||
}
|
||||
|
||||
impl<'a> ArticleLocationBuilder<'a> {
|
||||
#[derive(Default)]
|
||||
pub struct Builder {
|
||||
url: Option<Url>,
|
||||
newspapers: Option<Newspapers>,
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
@ -37,9 +50,9 @@ impl<'a> ArticleLocationBuilder<'a> {
|
||||
}
|
||||
|
||||
/// Adds a newspaper to the list
|
||||
pub fn newspaper<T>(&mut self, newspaper: &'a T) -> &mut Self
|
||||
pub fn newspaper<T>(mut self, newspaper: T) -> Self
|
||||
where
|
||||
T: 'a + Newspaper,
|
||||
T: 'static + Newspaper,
|
||||
{
|
||||
match &mut self.newspapers {
|
||||
Some(newspapers) => newspapers.push(Box::new(newspaper)),
|
||||
@ -72,7 +85,7 @@ impl<'a> ArticleLocationBuilder<'a> {
|
||||
/// - the url is not set
|
||||
/// - the given url has no host
|
||||
// TODO: move this to a defined error, remove anyhow !
|
||||
pub fn build(&self) -> Result<ArticleLocation<'a>> {
|
||||
pub fn build(self) -> Result<ArticleLocation> {
|
||||
let url = Clone::clone(self.url.as_ref().ok_or(anyhow!(
|
||||
"No url set. You can set it with the url() function"
|
||||
))?);
|
||||
@ -80,28 +93,22 @@ impl<'a> ArticleLocationBuilder<'a> {
|
||||
let host = Host::parse(host)?;
|
||||
let newspaper = self
|
||||
.newspapers
|
||||
.as_ref()
|
||||
.ok_or(anyhow!(
|
||||
"A list of NewsPaper must be set. It can be set with newspapers() function"
|
||||
))?
|
||||
.iter()
|
||||
.unwrap_or(default_newpapers())
|
||||
.into_iter()
|
||||
.find(|c| c.metadata().hosts.contains(&host))
|
||||
.ok_or(anyhow!("Newspaper couldn't be found"))?;
|
||||
Ok(ArticleLocation {
|
||||
newspaper: newspaper.clone(),
|
||||
url,
|
||||
})
|
||||
Ok(ArticleLocation { newspaper, url })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ArticleLocation<'a> {
|
||||
newspaper: Box<&'a dyn Newspaper>,
|
||||
pub struct ArticleLocation {
|
||||
newspaper: Box<dyn Newspaper>,
|
||||
pub url: Url,
|
||||
}
|
||||
|
||||
impl<'a> ArticleLocation<'a> {
|
||||
pub fn builder() -> ArticleLocationBuilder<'a> {
|
||||
ArticleLocationBuilder::new()
|
||||
impl ArticleLocation {
|
||||
pub fn builder() -> Builder {
|
||||
Builder::new()
|
||||
}
|
||||
|
||||
pub async fn retrieve_html(&self) -> Result<String> {
|
||||
|
@ -34,7 +34,7 @@ impl Metadata {
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait Newspaper {
|
||||
pub trait Newspaper: Send + Sync {
|
||||
/// Returns a list of hosts that corresponds to the newspapers
|
||||
fn metadata(&self) -> Metadata;
|
||||
|
||||
@ -49,7 +49,7 @@ pub trait Newspaper {
|
||||
}
|
||||
|
||||
/// Returns a newspaper structure
|
||||
async fn new() -> Self
|
||||
fn new() -> Self
|
||||
where
|
||||
Self: Sized;
|
||||
|
||||
|
@ -80,7 +80,7 @@ impl Newspaper for Mediapart {
|
||||
Ok(single_page_html)
|
||||
}
|
||||
|
||||
async fn new() -> Self {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
..Default::default()
|
||||
}
|
||||
|
@ -17,7 +17,7 @@ async fn main() -> Result<()> {
|
||||
};
|
||||
|
||||
// TODO: remove this in favor of default newspapers
|
||||
let mut mediapart = Mediapart::new().await
|
||||
let mut mediapart = Mediapart::new()
|
||||
//.login(USERNAME, PASSWORD)
|
||||
//
|
||||
;
|
||||
@ -28,7 +28,7 @@ async fn main() -> Result<()> {
|
||||
// TODO: shorten this, maybe an helper function ?
|
||||
let article_location = ArticleLocation::builder()
|
||||
.url(url)?
|
||||
.newspaper(&mediapart)
|
||||
.newspaper(mediapart)
|
||||
.build()?;
|
||||
|
||||
let article_str = article_location.retrieve_html().await?;
|
||||
|
@ -1,4 +1,5 @@
|
||||
use anyhow::Result;
|
||||
use crieur_chatbot::run;
|
||||
use crieur_retrieve::{newspaper::Newspaper, Mediapart, Url};
|
||||
use dotenv::dotenv;
|
||||
use std::env;
|
||||
@ -7,13 +8,12 @@ use std::env;
|
||||
async fn main() -> Result<()> {
|
||||
dotenv().ok();
|
||||
|
||||
let mut mediapart = Mediapart::new().await
|
||||
let mut mediapart = Mediapart::new()
|
||||
//.login(USERNAME, PASSWORD)
|
||||
//
|
||||
;
|
||||
|
||||
mediapart.login_cookie = Some(("MPRUUID".into(), env::var("MEDIAPART_COOKIE")?));
|
||||
let url = Url::parse("https://www.mediapart.fr/journal/france/030421/l-hotel-dieu-patients-et-medecins-tentent-de-percer-les-mysteres-du-covid-long")?;
|
||||
println!("{}", mediapart.retrieve_html(&url).await?);
|
||||
run().await?;
|
||||
Ok(())
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user