use anyhow::{anyhow, bail, Result}; use async_trait::async_trait; use cookie::Cookie; use indoc::indoc; use url::Host; use crate::newspaper::{Metadata, Newspaper}; use crate::tools; use crate::Url; use crate::{Download, Downloader}; pub enum Login { Username(String, String), Cookies { lmd_a_m: String, ssess: String }, } #[derive(Debug, Clone, Default)] pub struct CourrierInternational { login_cookies: Vec<(String, String)>, } fn str_to_host>(host: S) -> Host { Host::Domain(host.into()) } #[derive(Debug, Clone, Default)] pub struct Builder { login_cookies: Option>, } impl Builder { pub fn login(&mut self, login: Login) -> &mut Self { self.login_cookies = match login { Login::Username(_username, _password) => { unimplemented!("login using username and passwond not implemented") } Login::Cookies { lmd_a_m, ssess } => Some(vec![ ("lmd_a_m".into(), lmd_a_m), ("SSESS862c7003d721c672d39f161b1456b890".into(), ssess), ]), }; self } pub fn build(&self) -> Result { match &self.login_cookies { Some(login_cookies) => Ok(CourrierInternational { login_cookies: login_cookies.clone(), }), None => Err(anyhow!("You have to log in to access this newspaper")), } } } #[async_trait] impl Newspaper for CourrierInternational { fn metadata(&self) -> Metadata { Metadata::builder() .hosts(vec![ str_to_host("courrierinternational.com"), str_to_host("www.courrierinternational.com"), ]) .lower_case_name("courrier-international") .name("Courrier international") .build() .unwrap_or_default() } async fn retrieve_html(&self, url: &Url) -> Result { let cookies = self .login_cookies .iter() .map(|cookie| Cookie::build(&cookie.0, &cookie.1).finish()) .collect::>(); // TODO: replace by builder let downloader = Downloader { cookies }; let body = downloader.download(&url).await?; let html = match body { Some(body) => String::from_utf8(body.to_vec())?, None => bail!("404 not found"), }; let elements_to_remove = &[ // navigation elements "header.site-header", "footer.site-footer", // Social buttons "#toolbox-share", ".toolbox-share", ".toolbox-print", ".toolbox-respond", ".toolbox-zen", ".toolbox-newsletter", ".toolbox-offer", ".box-article-offer-friend-abo", // unused services ".article-aside", ".article-secondary", ".article-subject-readmore", // misc ".element-invisible", ".gptcontainer", ]; // FIXME: it doesn't work because the aside is in the article body // let toolbox_style = indoc! {" aside.article-toolbox { position: sticky; top: 1em; } "}; let single_page_html = tools::self_contained_html::Config { downloader: Some(&downloader), base_url: Some(&url), elements_to_remove, styles_to_add: &[toolbox_style], ..Default::default() } .run(&html) .await; Ok(single_page_html) } fn new() -> Self { Self { ..Default::default() } } async fn has_complete_access(&self) -> bool { // TODO: check if we are logged using the cookie true } } impl CourrierInternational { pub fn builder() -> Builder { Builder::default() } }