132 lines
3.4 KiB
Rust
132 lines
3.4 KiB
Rust
use anyhow::{anyhow, bail, Result};
|
|
use async_trait::async_trait;
|
|
use cookie::Cookie;
|
|
use url::Host;
|
|
|
|
use crate::newspaper::{Metadata, Newspaper};
|
|
use crate::tools;
|
|
use crate::Url;
|
|
use crate::{Download, Downloader};
|
|
|
|
pub enum Login {
|
|
Username(String, String),
|
|
Cookies {
|
|
lmd_a_m: String,
|
|
phpsessid: String,
|
|
spip_session: String,
|
|
},
|
|
}
|
|
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct MondeDiplo {
|
|
login_cookies: Vec<(String, String)>,
|
|
}
|
|
|
|
fn str_to_host<S: Into<String>>(host: S) -> Host {
|
|
Host::Domain(host.into())
|
|
}
|
|
|
|
#[derive(Debug, Clone, Default)]
|
|
pub struct Builder {
|
|
login_cookies: Option<Vec<(String, String)>>,
|
|
}
|
|
|
|
impl Builder {
|
|
pub fn login(&mut self, login: Login) -> &mut Self {
|
|
self.login_cookies = match login {
|
|
Login::Username(_username, _password) => {
|
|
unimplemented!("login using username and passwond not implemented")
|
|
}
|
|
Login::Cookies {
|
|
lmd_a_m,
|
|
phpsessid,
|
|
spip_session,
|
|
} => Some(vec![
|
|
("lmd_a_m".into(), lmd_a_m),
|
|
("PHPSESSID".into(), phpsessid),
|
|
("spip_session".into(), spip_session),
|
|
]),
|
|
};
|
|
self
|
|
}
|
|
|
|
pub fn build(&self) -> Result<MondeDiplo> {
|
|
match &self.login_cookies {
|
|
Some(login_cookies) => Ok(MondeDiplo {
|
|
login_cookies: login_cookies.clone(),
|
|
}),
|
|
None => Err(anyhow!("You have to log in to access this newspaper")),
|
|
}
|
|
}
|
|
}
|
|
|
|
#[async_trait]
|
|
impl Newspaper for MondeDiplo {
|
|
fn metadata(&self) -> Metadata {
|
|
Metadata::builder()
|
|
.hosts(vec![
|
|
str_to_host("monde-diplomatique.fr"),
|
|
str_to_host("www.monde-diplomatique.fr"),
|
|
])
|
|
.lower_case_name("monde-diplomatique")
|
|
.name("Le Monde Diplomatique")
|
|
.build()
|
|
.unwrap_or_default()
|
|
}
|
|
|
|
async fn retrieve_html(&self, url: &Url) -> Result<String> {
|
|
let cookies = self
|
|
.login_cookies
|
|
.iter()
|
|
.map(|cookie| Cookie::build(&cookie.0, &cookie.1).finish())
|
|
.collect::<Vec<_>>();
|
|
|
|
// TODO: replace by builder
|
|
let downloader = Downloader { cookies };
|
|
|
|
let body = downloader.download(&url).await?;
|
|
let html = match body {
|
|
Some(body) => String::from_utf8(body.to_vec())?,
|
|
None => bail!("404 not found"),
|
|
};
|
|
|
|
// TODO: Move to const
|
|
let element_to_remove = [
|
|
// navigation elements
|
|
"#tout-en-haut.preentete",
|
|
"#entete.connecte",
|
|
"#navigation",
|
|
"#pied",
|
|
".bloc-connexion",
|
|
// unused features
|
|
"#ecouter",
|
|
// Social buttons
|
|
".actions-article",
|
|
"#partage",
|
|
// misc
|
|
"noscript",
|
|
];
|
|
|
|
let single_page_html =
|
|
tools::self_contained_html(&html, &downloader, &url, &element_to_remove).await;
|
|
Ok(single_page_html)
|
|
}
|
|
|
|
fn new() -> Self {
|
|
Self {
|
|
..Default::default()
|
|
}
|
|
}
|
|
|
|
async fn has_complete_access(&self) -> bool {
|
|
// TODO: check if we are logged using the cookie
|
|
true
|
|
}
|
|
}
|
|
|
|
impl MondeDiplo {
|
|
pub fn builder() -> Builder {
|
|
Builder::default()
|
|
}
|
|
}
|