use anyhow::{anyhow, Result}; use async_trait::async_trait; use cookie::Cookie; use url::Host; use crate::newspaper::{Metadata, Newspaper}; use crate::tools; use crate::Url; use crate::{Download, Downloader}; pub enum Login { Username(String, String), MPRUUID(String), } #[derive(Debug, Clone, Default)] pub struct Mediapart { login_cookie: (String, String), } fn str_to_host>(host: S) -> Host { Host::Domain(host.into()) } #[derive(Debug, Clone, Default)] pub struct Builder { login_cookie: Option<(String, String)>, } impl Builder { pub fn login(&mut self, login: Login) -> &mut Self { self.login_cookie = match login { Login::Username(_username, _password) => { unimplemented!("login using username and passwond not implemented") } Login::MPRUUID(cookie_value) => Some(("MPRUUID".into(), cookie_value)), }; self } pub fn build(&self) -> Result { match &self.login_cookie { Some(login_cookie) => Ok(Mediapart { login_cookie: login_cookie.clone(), }), None => Err(anyhow!("You have to log in to access this newspaper")), } } } #[async_trait] impl Newspaper for Mediapart { fn metadata(&self) -> Metadata { Metadata::builder() .hosts(vec![ str_to_host("mediapart.fr"), str_to_host("www.mediapart.fr"), ]) .lower_case_name("mediapart") .name("Médiapart") .build() .unwrap_or_default() } async fn retrieve_html(&self, url: &Url) -> Result { let initial_query = url.query(); let query = match initial_query { Some(q) => format!("{}&onglet=full", q), None => "onglet=full".into(), }; let mut url = url.clone(); url.set_query(Some(&query)); let cookie = Cookie::build(&self.login_cookie.0, &self.login_cookie.1) .secure(true) .finish(); let cookies = vec![cookie]; // TODO: replace by builder let downloader = Downloader { cookies }; let body = downloader.download(&url).await?; let html = String::from_utf8(body.to_vec())?; // TODO: Move to const let element_to_remove = [ // header ".fb-root", ".skipLinks", ".js-flash-message", ".header-sticky.sticky-links", "nav.main-menu", // menus inside and social media buttons "ul.sub-menu-journal", ".tools-social", ".simple-list.universe-journal", ".simple-list.universe-club", // Footer "footer", // Misc "aside.cc-modal", ]; let single_page_html = tools::self_contained_html(&html, &downloader, &url, &element_to_remove).await; Ok(single_page_html) } fn new() -> Self { Self { ..Default::default() } } async fn has_complete_access(&self) -> bool { // TODO: check if we are logged using the cookie true } } impl Mediapart { pub fn builder() -> Builder { Builder::default() } }