Merge pull request 'feat: add basic chatbot' (#14) from feature/minimal-chatbot into development
Reviewed-on: #14
This commit is contained in:
commit
9655b086f0
@ -8,6 +8,7 @@ steps:
|
||||
pull: true
|
||||
errignore: true
|
||||
commands:
|
||||
- apt-get update && apt-get install -y cmake
|
||||
- rustup component add rustfmt
|
||||
- rustup component add clippy
|
||||
- cargo clippy
|
||||
@ -17,5 +18,6 @@ steps:
|
||||
pull: true
|
||||
errignore: true
|
||||
commands:
|
||||
- apt-get update && apt-get install -y cmake
|
||||
- cargo test --all
|
||||
- cargo build
|
||||
|
@ -17,6 +17,3 @@ labels:
|
||||
|
||||
**Expected behavior**
|
||||
*describe what you expected to happen*
|
||||
|
||||
**Configuration**
|
||||
*paste the result of `stage --version`
|
||||
|
@ -3,13 +3,12 @@ name: "Feature request"
|
||||
about: "This template is for requesting a new feature"
|
||||
title: ""
|
||||
labels:
|
||||
- "type::feature"
|
||||
- "type::enhancement"
|
||||
- "status::review_needed"
|
||||
|
||||
---
|
||||
|
||||
*(if applicable) describe what problem or frustration you have currently*
|
||||
|
||||
*describe what you would like to be able to do, or what solution you would like (you can propose several)*
|
||||
*describe what you would like to be able to do, or what solution you would like*
|
||||
|
||||
*(optional) additional context, comments or implementation propositions*
|
||||
|
1079
Cargo.lock
generated
1079
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@ -2,6 +2,7 @@
|
||||
|
||||
members = [
|
||||
"crieur-retrieve",
|
||||
"crieur-chatbot",
|
||||
]
|
||||
|
||||
|
||||
@ -17,6 +18,7 @@ publish = false
|
||||
[dependencies]
|
||||
anyhow = "1.0.40"
|
||||
crieur-retrieve = {version = "0.1", path="crieur-retrieve"}
|
||||
crieur-chatbot = {version = "0.1", path="crieur-chatbot"}
|
||||
dotenv = "0.15.0"
|
||||
env_logger = "0.8.3"
|
||||
log = "0.4.14"
|
||||
|
28
README.md
28
README.md
@ -1,17 +1,33 @@
|
||||
Tools to retrieve articles from multiple newspaper you subscribed to.
|
||||
|
||||
**This is a prototype, it isn't stable at all and you may not want to use it if you expect it to just work !**
|
||||
**This is a prototype, it isn't stable at all and you may not want to use it if
|
||||
you expect it to just work !**
|
||||
|
||||
# How to use it
|
||||
|
||||
First retrieve login cookies for websites and put it in a `.env`
|
||||
First retrieve login cookies for websites and put it in a `.env` such as
|
||||
explained in the [newspaper source configuration
|
||||
documentation](./documentation/reference/newspaper_configuration.md)
|
||||
|
||||
Then you can run run
|
||||
|
||||
```
|
||||
cargo run --example=retrive_html_articles
|
||||
cargo run --example=cli_downloader
|
||||
```
|
||||
|
||||
To know how to run the chatbot, please read the [chatbot
|
||||
guide](./documentation/guides/run_chatbot.md)
|
||||
|
||||
# Documentation
|
||||
|
||||
- 1. [Design](documentation/design/index.md)
|
||||
- a. [Scope of the project](documentation/design/scope.md)
|
||||
- b. [Retrieve](documentation/design/retrieve.md)
|
||||
- 1. Design
|
||||
- a. [Scope of the project and roadmap](./documentation/design/scope.md)
|
||||
- b. [Retrieve](./documentation/design/retrieve.md)
|
||||
- 2. Guides
|
||||
- a. [Add a newspaper a
|
||||
source](./documentation/guides/add_a_newspaper_source.md)
|
||||
- 3. Reference
|
||||
- a. [Newspaper source
|
||||
configuration](./documentation/reference/newspaper_configuration.md)
|
||||
- b. [Chatbot
|
||||
configuration](./documentation/reference/chatbot_configuration.md)
|
||||
|
19
crieur-chatbot/Cargo.toml
Normal file
19
crieur-chatbot/Cargo.toml
Normal file
@ -0,0 +1,19 @@
|
||||
[package]
|
||||
name = "crieur-chatbot"
|
||||
version = "0.1.0"
|
||||
authors = ["koalp <koalp@alpaga.dev>"]
|
||||
edition = "2018"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.40"
|
||||
dotenv = "0.15.0"
|
||||
crieur-retrieve = {version = "0.1.0", path = "../crieur-retrieve"}
|
||||
mime = "0.3.16"
|
||||
log = "0.4.14"
|
||||
|
||||
[dependencies.matrix-sdk]
|
||||
git = "https://github.com/matrix-org/matrix-rust-sdk"
|
||||
rev = "ab180362c931606385dd53b73620d82ef2c3166d"
|
||||
version = "0.2.0"
|
85
crieur-chatbot/src/chatbot.rs
Normal file
85
crieur-chatbot/src/chatbot.rs
Normal file
@ -0,0 +1,85 @@
|
||||
//! Chatbot
|
||||
use std::convert::TryInto;
|
||||
|
||||
use anyhow::Result;
|
||||
use matrix_sdk::{
|
||||
self, async_trait,
|
||||
events::{
|
||||
room::message::{MessageEventContent, MessageType, TextMessageEventContent},
|
||||
AnyMessageEventContent, SyncMessageEvent,
|
||||
},
|
||||
room::Room,
|
||||
Client, ClientConfig, EventHandler, SyncSettings,
|
||||
};
|
||||
|
||||
use crate::Html;
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub(crate) struct Builder {
|
||||
user: String,
|
||||
password: String,
|
||||
homeserver: String,
|
||||
//TODO: rooms
|
||||
room: String,
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
fn new() -> Self {
|
||||
Default::default()
|
||||
}
|
||||
|
||||
pub(crate) async fn connect(&self) -> Result<Chatbot> {
|
||||
let client = Client::new(self.homeserver.as_str())?;
|
||||
client
|
||||
.login(self.user.as_str(), self.password.as_str(), None, None)
|
||||
.await?;
|
||||
assert!(client.logged_in().await);
|
||||
client
|
||||
.join_room_by_id(&self.room.as_str().try_into()?)
|
||||
.await?;
|
||||
|
||||
Ok(Chatbot { client })
|
||||
}
|
||||
|
||||
pub(crate) fn login(
|
||||
&mut self,
|
||||
user: &impl AsRef<str>,
|
||||
password: &impl AsRef<str>,
|
||||
) -> &mut Self {
|
||||
self.user = String::from(user.as_ref());
|
||||
self.password = String::from(password.as_ref());
|
||||
self
|
||||
}
|
||||
|
||||
pub(crate) fn homeserver(&mut self, homeserver: &impl AsRef<str>) -> &mut Self {
|
||||
self.homeserver = String::from(homeserver.as_ref());
|
||||
self
|
||||
}
|
||||
|
||||
pub(crate) fn room(&mut self, room: &impl AsRef<str>) -> &mut Self {
|
||||
self.room = String::from(room.as_ref());
|
||||
self
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub(crate) struct Chatbot {
|
||||
client: Client,
|
||||
}
|
||||
|
||||
impl Chatbot {
|
||||
pub(crate) fn builder() -> Builder {
|
||||
Builder::new()
|
||||
}
|
||||
|
||||
pub(crate) async fn run(&self) -> Result<()> {
|
||||
self.client.set_event_handler(Box::new(Html::new())).await;
|
||||
|
||||
let mut settings = SyncSettings::default();
|
||||
if let Some(token) = self.client.sync_token().await {
|
||||
settings = settings.token(token);
|
||||
}
|
||||
self.client.sync(settings).await;
|
||||
Ok(())
|
||||
}
|
||||
}
|
32
crieur-chatbot/src/cli.rs
Normal file
32
crieur-chatbot/src/cli.rs
Normal file
@ -0,0 +1,32 @@
|
||||
use std::env;
|
||||
|
||||
use anyhow::{bail, Result};
|
||||
use dotenv::dotenv;
|
||||
|
||||
use crate::Chatbot;
|
||||
|
||||
/// Runs the chatbot
|
||||
pub async fn run() -> Result<()> {
|
||||
dotenv().ok();
|
||||
|
||||
let (user, password, homeserver, room) = match (
|
||||
env::var("CRIEUR_MATRIX_USER"),
|
||||
env::var("CRIEUR_MATRIX_PASSWORD"),
|
||||
env::var("CRIEUR_MATRIX_HOMESERVER"),
|
||||
env::var("CRIEUR_MATRIX_ROOM"),
|
||||
) {
|
||||
(Ok(user), Ok(password), Ok(homeserver), Ok(room)) => (user, password, homeserver, room),
|
||||
_ => bail!("Configuration incomplete, please set all required environment variables"),
|
||||
};
|
||||
|
||||
let chatbot = Chatbot::builder()
|
||||
.login(&user, &password)
|
||||
.homeserver(&homeserver)
|
||||
.room(&room)
|
||||
.connect()
|
||||
.await?;
|
||||
|
||||
chatbot.run().await?;
|
||||
|
||||
Ok(())
|
||||
}
|
94
crieur-chatbot/src/handlers/html.rs
Normal file
94
crieur-chatbot/src/handlers/html.rs
Normal file
@ -0,0 +1,94 @@
|
||||
use std::convert::TryInto;
|
||||
use std::env;
|
||||
|
||||
use anyhow::Result;
|
||||
use log::info;
|
||||
use matrix_sdk::{
|
||||
self, async_trait,
|
||||
events::{
|
||||
room::message::{MessageEventContent, MessageType, TextMessageEventContent},
|
||||
AnyMessageEventContent, SyncMessageEvent,
|
||||
},
|
||||
room::Room,
|
||||
Client, ClientConfig, EventHandler, SyncSettings,
|
||||
};
|
||||
|
||||
use crieur_retrieve::{ArticleLocation, Url};
|
||||
|
||||
pub(crate) struct Html {}
|
||||
|
||||
impl Html {
|
||||
pub fn new() -> Self {
|
||||
Self {}
|
||||
}
|
||||
}
|
||||
|
||||
async fn send_article<U, E>(url: U, room: matrix_sdk::room::Joined)
|
||||
where
|
||||
U: TryInto<Url, Error = E> + Send,
|
||||
E: std::error::Error + Sync + Send + 'static,
|
||||
{
|
||||
//TODO: replace by async block when async block is stable
|
||||
async fn article_html<U, E>(url: U) -> Result<String>
|
||||
where
|
||||
U: TryInto<Url, Error = E> + Send,
|
||||
E: std::error::Error + Sync + Send + 'static,
|
||||
{
|
||||
let article_str = ArticleLocation::builder()
|
||||
.url(url)?
|
||||
.build()?
|
||||
.retrieve_html()
|
||||
.await?;
|
||||
Ok(article_str)
|
||||
}
|
||||
|
||||
let text_message =
|
||||
|message| AnyMessageEventContent::RoomMessage(MessageEventContent::text_plain(message));
|
||||
|
||||
//TODO: replace occurences ok() by async and logging block when async block is stable
|
||||
let article_html = match article_html(url).await {
|
||||
Ok(url) => url,
|
||||
Err(_) => {
|
||||
room.send(text_message("Can't download the file"), None)
|
||||
.await
|
||||
.ok();
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
room.send_attachment(
|
||||
"test.html",
|
||||
&mime::TEXT_HTML_UTF_8,
|
||||
&mut article_html.as_bytes(),
|
||||
None,
|
||||
)
|
||||
.await
|
||||
.ok();
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl EventHandler for Html {
|
||||
async fn on_room_message(&self, room: Room, event: &SyncMessageEvent<MessageEventContent>) {
|
||||
if let Room::Joined(room) = room {
|
||||
let msg_body = if let SyncMessageEvent {
|
||||
content:
|
||||
MessageEventContent {
|
||||
msgtype: MessageType::Text(TextMessageEventContent { body: msg_body, .. }),
|
||||
..
|
||||
},
|
||||
..
|
||||
} = event
|
||||
{
|
||||
msg_body
|
||||
} else {
|
||||
return;
|
||||
};
|
||||
info!("sending file");
|
||||
|
||||
match msg_body.split(' ').collect::<Vec<_>>().as_slice() {
|
||||
["!html", url, ..] => send_article(*url, room).await,
|
||||
_ => return,
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
2
crieur-chatbot/src/handlers/mod.rs
Normal file
2
crieur-chatbot/src/handlers/mod.rs
Normal file
@ -0,0 +1,2 @@
|
||||
mod html;
|
||||
pub(crate) use html::Html;
|
10
crieur-chatbot/src/lib.rs
Normal file
10
crieur-chatbot/src/lib.rs
Normal file
@ -0,0 +1,10 @@
|
||||
//! Provides a matrix chatbot to download newspaper articles
|
||||
|
||||
mod cli;
|
||||
pub use cli::run;
|
||||
|
||||
mod chatbot;
|
||||
use chatbot::Chatbot;
|
||||
|
||||
mod handlers;
|
||||
use handlers::Html;
|
@ -1,21 +1,32 @@
|
||||
use std::boxed::Box;
|
||||
use std::convert::TryInto;
|
||||
use std::env;
|
||||
|
||||
use anyhow::{anyhow, Result};
|
||||
use log::info;
|
||||
use url::{Host, Url};
|
||||
|
||||
use crate::newspaper::Newspaper;
|
||||
use crate::newspapers::mediapart::{self, Mediapart};
|
||||
|
||||
type Newspapers<'a> = Vec<Box<&'a dyn Newspaper>>;
|
||||
type Newspapers = Vec<Box<dyn Newspaper>>;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ArticleLocationBuilder<'a> {
|
||||
url: Option<Url>,
|
||||
newspapers: Option<Newspapers<'a>>,
|
||||
fn default_newpapers() -> Result<Newspapers> {
|
||||
let mpruiid = env::var("MEDIAPART_COOKIE")?.into();
|
||||
let mediapart = Mediapart::builder()
|
||||
.login(mediapart::Login::MPRUUID(mpruiid))
|
||||
.build()?;
|
||||
|
||||
Ok(vec![Box::new(mediapart)])
|
||||
}
|
||||
|
||||
impl<'a> ArticleLocationBuilder<'a> {
|
||||
#[derive(Default)]
|
||||
pub struct Builder {
|
||||
url: Option<Url>,
|
||||
newspapers: Option<Newspapers>,
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
@ -37,9 +48,9 @@ impl<'a> ArticleLocationBuilder<'a> {
|
||||
}
|
||||
|
||||
/// Adds a newspaper to the list
|
||||
pub fn newspaper<T>(&mut self, newspaper: &'a T) -> &mut Self
|
||||
pub fn newspaper<T>(mut self, newspaper: T) -> Self
|
||||
where
|
||||
T: 'a + Newspaper,
|
||||
T: 'static + Newspaper,
|
||||
{
|
||||
match &mut self.newspapers {
|
||||
Some(newspapers) => newspapers.push(Box::new(newspaper)),
|
||||
@ -72,7 +83,7 @@ impl<'a> ArticleLocationBuilder<'a> {
|
||||
/// - the url is not set
|
||||
/// - the given url has no host
|
||||
// TODO: move this to a defined error, remove anyhow !
|
||||
pub fn build(&self) -> Result<ArticleLocation<'a>> {
|
||||
pub fn build(self) -> Result<ArticleLocation> {
|
||||
let url = Clone::clone(self.url.as_ref().ok_or(anyhow!(
|
||||
"No url set. You can set it with the url() function"
|
||||
))?);
|
||||
@ -80,28 +91,22 @@ impl<'a> ArticleLocationBuilder<'a> {
|
||||
let host = Host::parse(host)?;
|
||||
let newspaper = self
|
||||
.newspapers
|
||||
.as_ref()
|
||||
.ok_or(anyhow!(
|
||||
"A list of NewsPaper must be set. It can be set with newspapers() function"
|
||||
))?
|
||||
.iter()
|
||||
.unwrap_or(default_newpapers()?)
|
||||
.into_iter()
|
||||
.find(|c| c.metadata().hosts.contains(&host))
|
||||
.ok_or(anyhow!("Newspaper couldn't be found"))?;
|
||||
Ok(ArticleLocation {
|
||||
newspaper: newspaper.clone(),
|
||||
url,
|
||||
})
|
||||
Ok(ArticleLocation { newspaper, url })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ArticleLocation<'a> {
|
||||
newspaper: Box<&'a dyn Newspaper>,
|
||||
pub struct ArticleLocation {
|
||||
newspaper: Box<dyn Newspaper>,
|
||||
pub url: Url,
|
||||
}
|
||||
|
||||
impl<'a> ArticleLocation<'a> {
|
||||
pub fn builder() -> ArticleLocationBuilder<'a> {
|
||||
ArticleLocationBuilder::new()
|
||||
impl ArticleLocation {
|
||||
pub fn builder() -> Builder {
|
||||
Builder::new()
|
||||
}
|
||||
|
||||
pub async fn retrieve_html(&self) -> Result<String> {
|
||||
|
@ -8,8 +8,7 @@ pub use tools::{Download, Downloader};
|
||||
pub mod newspaper;
|
||||
|
||||
// TODO: move to another crate
|
||||
mod newspapers;
|
||||
pub use newspapers::Mediapart;
|
||||
pub mod newspapers;
|
||||
|
||||
mod article_location;
|
||||
pub use article_location::ArticleLocation;
|
||||
|
@ -1,17 +1,10 @@
|
||||
use anyhow::Result;
|
||||
use async_trait::async_trait;
|
||||
use derive_builder::Builder;
|
||||
use url::Host;
|
||||
pub use url::Url;
|
||||
|
||||
enum Login {
|
||||
Username(String, String),
|
||||
Cookie(String),
|
||||
}
|
||||
|
||||
/// Contains metadata about a newspaper
|
||||
// TODO: provide builder
|
||||
#[derive(Debug, PartialEq, Default, Builder)]
|
||||
#[derive(Debug, PartialEq, Default, derive_builder::Builder)]
|
||||
#[builder(default)]
|
||||
pub struct Metadata {
|
||||
/// The hosts that can be corresponds to this newspaper
|
||||
@ -28,13 +21,14 @@ pub struct Metadata {
|
||||
}
|
||||
|
||||
impl Metadata {
|
||||
/// Get metadata builder
|
||||
pub fn builder() -> MetadataBuilder {
|
||||
MetadataBuilder::default()
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
pub trait Newspaper {
|
||||
pub trait Newspaper: Send + Sync {
|
||||
/// Returns a list of hosts that corresponds to the newspapers
|
||||
fn metadata(&self) -> Metadata;
|
||||
|
||||
@ -49,7 +43,7 @@ pub trait Newspaper {
|
||||
}
|
||||
|
||||
/// Returns a newspaper structure
|
||||
async fn new() -> Self
|
||||
fn new() -> Self
|
||||
where
|
||||
Self: Sized;
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
use anyhow::Result;
|
||||
use anyhow::{anyhow, Result};
|
||||
use async_trait::async_trait;
|
||||
use cookie::Cookie;
|
||||
use url::Host;
|
||||
@ -8,16 +8,46 @@ use crate::tools;
|
||||
use crate::Url;
|
||||
use crate::{Download, Downloader};
|
||||
|
||||
pub enum Login {
|
||||
Username(String, String),
|
||||
MPRUUID(String),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct Mediapart {
|
||||
// TODO: remove this pub !!
|
||||
pub login_cookie: Option<(String, String)>,
|
||||
login_cookie: (String, String),
|
||||
}
|
||||
|
||||
fn str_to_host<S: Into<String>>(host: S) -> Host {
|
||||
Host::Domain(host.into())
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Default)]
|
||||
pub struct Builder {
|
||||
login_cookie: Option<(String, String)>,
|
||||
}
|
||||
|
||||
impl Builder {
|
||||
pub fn login(&mut self, login: Login) -> &mut Self {
|
||||
self.login_cookie = match login {
|
||||
Login::Username(_username, _password) => {
|
||||
unimplemented!("login using username and passwond not implemented")
|
||||
}
|
||||
Login::MPRUUID(cookie_value) => Some(("MPRUUID".into(), cookie_value)),
|
||||
};
|
||||
self
|
||||
}
|
||||
|
||||
pub fn build(&self) -> Result<Mediapart> {
|
||||
match &self.login_cookie {
|
||||
Some(login_cookie) => Ok(Mediapart {
|
||||
login_cookie: login_cookie.clone(),
|
||||
}),
|
||||
None => Err(anyhow!("You have to log in to access this newspaper")),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[async_trait]
|
||||
impl Newspaper for Mediapart {
|
||||
fn metadata(&self) -> Metadata {
|
||||
@ -41,13 +71,10 @@ impl Newspaper for Mediapart {
|
||||
let mut url = url.clone();
|
||||
url.set_query(Some(&query));
|
||||
|
||||
// TODO: add "?onglet=full" to the url if not
|
||||
let cookies = if let Some((name, value)) = &self.login_cookie {
|
||||
let cookie = Cookie::build(name, value).secure(true).finish();
|
||||
vec![cookie]
|
||||
} else {
|
||||
vec![]
|
||||
};
|
||||
let cookie = Cookie::build(&self.login_cookie.0, &self.login_cookie.1)
|
||||
.secure(true)
|
||||
.finish();
|
||||
let cookies = vec![cookie];
|
||||
|
||||
// TODO: replace by builder
|
||||
let downloader = Downloader { cookies };
|
||||
@ -74,13 +101,12 @@ impl Newspaper for Mediapart {
|
||||
"aside.cc-modal",
|
||||
];
|
||||
|
||||
// TODO: correction of usage of relative urls, and replace "" by the url
|
||||
let single_page_html =
|
||||
tools::self_contained_html(&html, &downloader, &url, &element_to_remove).await;
|
||||
Ok(single_page_html)
|
||||
}
|
||||
|
||||
async fn new() -> Self {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
..Default::default()
|
||||
}
|
||||
@ -91,3 +117,9 @@ impl Newspaper for Mediapart {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl Mediapart {
|
||||
pub fn builder() -> Builder {
|
||||
Builder::default()
|
||||
}
|
||||
}
|
||||
|
@ -1,3 +1 @@
|
||||
mod mediapart;
|
||||
|
||||
pub use mediapart::Mediapart;
|
||||
pub mod mediapart;
|
||||
|
@ -1,48 +1,97 @@
|
||||
This project mainly aims at providing an unified interface for several newspapers. Side
|
||||
objectives are to provide web API and different clients like a webUI or chatbots.
|
||||
---
|
||||
title: Scope of the project
|
||||
---
|
||||
|
||||
Several big components are planned for this project
|
||||
This project mainly aims at providing an unified interface for several
|
||||
newspapers. Side objectives are to provide web API and different clients like a
|
||||
webUI or chatbots.
|
||||
|
||||
```dot
|
||||
digraph G {
|
||||
rankdir=TB
|
||||
node [shape=rectangle, style=filled, color="#779988"]
|
||||
Several big components are planned for this project (it is an initial draft and
|
||||
may change later) :
|
||||
|
||||
subgraph cluster_frontend {
|
||||
color = transparent
|
||||
webui
|
||||
chatbot
|
||||
}
|
||||
```plantuml
|
||||
@startuml
|
||||
|
||||
frame "backend" {
|
||||
[Retrieval tools] as retrieval_tools
|
||||
[Article representation] as article_repr
|
||||
[Automatic retrieval] as auto_retrieve
|
||||
[Atom/RSS adapters] as rss
|
||||
[Cache DB] as cache
|
||||
|
||||
[Newspaper\n(Mediapart, …)] as newspaper
|
||||
() "Newspaper" as np_i
|
||||
newspaper -up- np_i
|
||||
|
||||
|
||||
webui -> api [color = red]
|
||||
chatbot -> api [color = red]
|
||||
[Article location] as article_location
|
||||
|
||||
subgraph cluster_backend {
|
||||
label = "Backend\ncrieur binary"
|
||||
labelloc = b
|
||||
style=filled
|
||||
[API] as api
|
||||
() "API" as api_i
|
||||
api -up- api_i
|
||||
|
||||
retrieve_tools [label="retrieve-tools"]
|
||||
retrieve_adapters [label="retrieve-adapters"]
|
||||
retrieve [label="retrieve-interface"]
|
||||
auto_retrieve [label="automatic-retrieve"]
|
||||
article_repr [label="article-representation\nRepresentation for articles"]
|
||||
api
|
||||
cache [label="Cache database"]
|
||||
rss [label="Atom/RSS adapters"]
|
||||
article_location ..> np_i
|
||||
|
||||
retrieve_tools -> retrieve_adapters
|
||||
retrieve_adapters -> retrieve
|
||||
retrieve_tools -> retrieve
|
||||
rss -> auto_retrieve
|
||||
article_repr -> retrieve_adapters
|
||||
api -> article_location
|
||||
api -> rss
|
||||
|
||||
retrieve -> api
|
||||
auto_retrieve -> api
|
||||
cache -> api
|
||||
newspaper -> retrieval_tools: uses to implement
|
||||
|
||||
}
|
||||
article_location --> article_repr :uses
|
||||
|
||||
auto_retrieve --> rss: watches
|
||||
auto_retrieve --> article_location
|
||||
auto_retrieve --> cache: stores in
|
||||
|
||||
}
|
||||
|
||||
frame "Web ui" {
|
||||
[Web UI] as webui
|
||||
[HTML renderer] as html_rend
|
||||
[Pdf exporter] as pdf_rend
|
||||
[Articles] as articles
|
||||
webui --> html_rend
|
||||
webui --> pdf_rend
|
||||
webui -> articles
|
||||
articles ..> api_i
|
||||
}
|
||||
|
||||
[Chatbot] as chatbot
|
||||
|
||||
chatbot ..> api_i
|
||||
|
||||
actor User
|
||||
User ..> webui
|
||||
User ..> chatbot
|
||||
|
||||
actor "Newspaper programmer" as newspaper_programmer
|
||||
newspaper_programmer ..> newspaper: implements
|
||||
@enduml
|
||||
```
|
||||
|
||||
A task queue could be added later to space requests.
|
||||
|
||||
# Implementation plan
|
||||
|
||||
## Phase I
|
||||
- [x] `Newspaper` interface : use to retrieve from newspaper websites
|
||||
- [ ] minimal chatbot (uses libraries directly)
|
||||
- [x] `ArticleLocation` : library for using several `Newspaper` and retrieving from
|
||||
a given url.
|
||||
|
||||
## Phase II
|
||||
- [ ] Article Representation : having a (beta) unified representation for downloaded
|
||||
articles
|
||||
- [ ] adding this representation to Newpsaper
|
||||
|
||||
## Phase III
|
||||
- [ ] Cache
|
||||
- [ ] Atom/rss adapters
|
||||
- [ ] automatic retrieve
|
||||
|
||||
## Phase IV
|
||||
- [ ] API
|
||||
- [ ] chatbot (uses api)
|
||||
|
||||
## Phase V
|
||||
- [ ] web ui
|
||||
|
19
documentation/guides/run_chatbot.md
Normal file
19
documentation/guides/run_chatbot.md
Normal file
@ -0,0 +1,19 @@
|
||||
---
|
||||
title: run the chatbot
|
||||
---
|
||||
|
||||
1. You must first configure matrix login, every variable in [the_reference](../reference/chatbot_configuration.md) is mandatory.
|
||||
|
||||
```env
|
||||
CRIEUR_MATRIX_USER=user
|
||||
CRIEUR_MATRIX_PASSWORD=password
|
||||
CRIEUR_MATRIX_HOMESERVER=https://homeserv.er
|
||||
CRIEUR_MATRIX_ROOM=roomid
|
||||
```
|
||||
|
||||
You can put it in a `.env` file.
|
||||
|
||||
2. run the chatbot
|
||||
```
|
||||
cargo run --release --bin crieur-chatbot
|
||||
```
|
17
documentation/reference/chatbot_configuration.md
Normal file
17
documentation/reference/chatbot_configuration.md
Normal file
@ -0,0 +1,17 @@
|
||||
---
|
||||
Title: Chatbot configuration reference
|
||||
---
|
||||
|
||||
The chatbot is configured using environment variables
|
||||
|
||||
CRIEUR_MATRIX_USER
|
||||
: username of the matrix bot account
|
||||
|
||||
CRIEUR_MATRIX_PASSWORD
|
||||
: password of the matrix bot account
|
||||
|
||||
CRIEUR_MATRIX_HOMESERVER
|
||||
: homeserver of the matrix bot account
|
||||
|
||||
CRIEUR_MATRIX_ROOM
|
||||
: the room in which to listen to events
|
10
documentation/reference/newspaper_configuration.md
Normal file
10
documentation/reference/newspaper_configuration.md
Normal file
@ -0,0 +1,10 @@
|
||||
---
|
||||
title: Newspapers configuration
|
||||
---
|
||||
|
||||
The newspapers are configured using environment variables
|
||||
|
||||
# Mediapart
|
||||
|
||||
MEDIAPART_COOKIE
|
||||
: sets the `MPRUUID` cookie, used to log in
|
@ -2,7 +2,11 @@ use std::convert::TryInto;
|
||||
use std::env;
|
||||
|
||||
use anyhow::Result;
|
||||
use crieur_retrieve::{newspaper::Newspaper, ArticleLocation, Mediapart, Url};
|
||||
use crieur_retrieve::{
|
||||
newspaper::Newspaper,
|
||||
newspapers::mediapart::{self, Mediapart},
|
||||
ArticleLocation, Url,
|
||||
};
|
||||
use dotenv::dotenv;
|
||||
use log::info;
|
||||
|
||||
@ -17,18 +21,18 @@ async fn main() -> Result<()> {
|
||||
};
|
||||
|
||||
// TODO: remove this in favor of default newspapers
|
||||
let mut mediapart = Mediapart::new().await
|
||||
//.login(USERNAME, PASSWORD)
|
||||
//
|
||||
;
|
||||
|
||||
mediapart.login_cookie = Some(("MPRUUID".into(), env::var("MEDIAPART_COOKIE")?.into()));
|
||||
let mpruiid = env::var("MEDIAPART_COOKIE")?.into();
|
||||
let mediapart = Mediapart::builder()
|
||||
.login(mediapart::Login::MPRUUID(mpruiid))
|
||||
.build()?;
|
||||
|
||||
info!("Trying to download article from {}", url);
|
||||
|
||||
// TODO: shorten this, maybe an helper function ?
|
||||
let article_location = ArticleLocation::builder()
|
||||
.url(url)?
|
||||
.newspaper(&mediapart)
|
||||
.newspaper(mediapart)
|
||||
.build()?;
|
||||
|
||||
let article_str = article_location.retrieve_html().await?;
|
||||
|
10
src/bin/crieur-chatbot.rs
Normal file
10
src/bin/crieur-chatbot.rs
Normal file
@ -0,0 +1,10 @@
|
||||
use anyhow::Result;
|
||||
use crieur_chatbot::run;
|
||||
use dotenv::dotenv;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
dotenv().ok();
|
||||
run().await?;
|
||||
Ok(())
|
||||
}
|
19
src/main.rs
19
src/main.rs
@ -1,19 +0,0 @@
|
||||
use anyhow::Result;
|
||||
use crieur_retrieve::{newspaper::Newspaper, Mediapart, Url};
|
||||
use dotenv::dotenv;
|
||||
use std::env;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<()> {
|
||||
dotenv().ok();
|
||||
|
||||
let mut mediapart = Mediapart::new().await
|
||||
//.login(USERNAME, PASSWORD)
|
||||
//
|
||||
;
|
||||
|
||||
mediapart.login_cookie = Some(("MPRUUID".into(), env::var("MEDIAPART_COOKIE")?));
|
||||
let url = Url::parse("https://www.mediapart.fr/journal/france/030421/l-hotel-dieu-patients-et-medecins-tentent-de-percer-les-mysteres-du-covid-long")?;
|
||||
println!("{}", mediapart.retrieve_html(&url).await?);
|
||||
Ok(())
|
||||
}
|
Loading…
Reference in New Issue
Block a user