All checks were successful
continuous-integration/drone/pr Build is passing
A builder for mediapart have been added. No generic builder have been created as there is no usecase yet. Some documentation have been added, roadmap and scope have been clarified and chatbot have been lightly documented.
126 lines
3.2 KiB
Rust
126 lines
3.2 KiB
Rust
use anyhow::{anyhow, Result};
|
|
use async_trait::async_trait;
|
|
use cookie::Cookie;
|
|
use url::Host;
|
|
|
|
use crate::newspaper::{Metadata, Newspaper};
|
|
use crate::tools;
|
|
use crate::Url;
|
|
use crate::{Download, Downloader};
|
|
|
|
pub enum Login {
|
|
Username(String, String),
|
|
MPRUUID(String),
|
|
}
|
|
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct Mediapart {
|
|
login_cookie: (String, String),
|
|
}
|
|
|
|
fn str_to_host<S: Into<String>>(host: S) -> Host {
|
|
Host::Domain(host.into())
|
|
}
|
|
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct Builder {
|
|
login_cookie: Option<(String, String)>,
|
|
}
|
|
|
|
impl Builder {
|
|
pub fn login(&mut self, login: Login) -> &mut Self {
|
|
self.login_cookie = match login {
|
|
Login::Username(_username, _password) => {
|
|
unimplemented!("login using username and passwond not implemented")
|
|
}
|
|
Login::MPRUUID(cookie_value) => Some(("MPRUUID".into(), cookie_value)),
|
|
};
|
|
self
|
|
}
|
|
|
|
pub fn build(&self) -> Result<Mediapart> {
|
|
match &self.login_cookie {
|
|
Some(login_cookie) => Ok(Mediapart {
|
|
login_cookie: login_cookie.clone(),
|
|
}),
|
|
None => Err(anyhow!("You have to log in to access this newspaper")),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Newspaper for Mediapart {
|
|
fn metadata(&self) -> Metadata {
|
|
Metadata::builder()
|
|
.hosts(vec![
|
|
str_to_host("mediapart.fr"),
|
|
str_to_host("www.mediapart.fr"),
|
|
])
|
|
.lower_case_name("mediapart")
|
|
.name("Médiapart")
|
|
.build()
|
|
.unwrap_or_default()
|
|
}
|
|
|
|
async fn retrieve_html(&self, url: &Url) -> Result<String> {
|
|
let initial_query = url.query();
|
|
let query = match initial_query {
|
|
Some(q) => format!("{}&onglet=full", q),
|
|
None => "onglet=full".into(),
|
|
};
|
|
let mut url = url.clone();
|
|
url.set_query(Some(&query));
|
|
|
|
let cookie = Cookie::build(&self.login_cookie.0, &self.login_cookie.1)
|
|
.secure(true)
|
|
.finish();
|
|
let cookies = vec![cookie];
|
|
|
|
// TODO: replace by builder
|
|
let downloader = Downloader { cookies };
|
|
|
|
let body = downloader.download(&url).await?;
|
|
let html = String::from_utf8(body.to_vec())?;
|
|
|
|
// TODO: Move to const
|
|
let element_to_remove = [
|
|
// header
|
|
".fb-root",
|
|
".skipLinks",
|
|
".js-flash-message",
|
|
".header-sticky.sticky-links",
|
|
"nav.main-menu",
|
|
// menus inside and social media buttons
|
|
"ul.sub-menu-journal",
|
|
".tools-social",
|
|
".simple-list.universe-journal",
|
|
".simple-list.universe-club",
|
|
// Footer
|
|
"footer",
|
|
// Misc
|
|
"aside.cc-modal",
|
|
];
|
|
|
|
let single_page_html =
|
|
tools::self_contained_html(&html, &downloader, &url, &element_to_remove).await;
|
|
Ok(single_page_html)
|
|
}
|
|
|
|
fn new() -> Self {
|
|
Self {
|
|
..Default::default()
|
|
}
|
|
}
|
|
|
|
async fn has_complete_access(&self) -> bool {
|
|
// TODO: check if we are logged using the cookie
|
|
true
|
|
}
|
|
}
|
|
|
|
impl Mediapart {
|
|
pub fn builder() -> Builder {
|
|
Builder::default()
|
|
}
|
|
}
|