use anyhow::Result; use async_trait::async_trait; use cookie::Cookie; use url::Host; use crate::newspaper::{Metadata, Newspaper}; use crate::tools; use crate::Url; use crate::{Download, Downloader}; #[derive(Debug, Clone, Default)] pub struct Mediapart { // TODO: remove this pub !! pub login_cookie: Option<(String, String)>, } fn str_to_host>(host: S) -> Host { Host::Domain(host.into()) } #[async_trait] impl Newspaper for Mediapart { fn metadata(&self) -> Metadata { Metadata::builder() .hosts(vec![ str_to_host("mediapart.fr"), str_to_host("www.mediapart.fr"), ]) .lower_case_name("mediapart") .name("Médiapart") .build() .unwrap_or_default() } async fn retrieve_html(&self, url: &Url) -> Result { let initial_query = url.query(); let query = match initial_query { Some(q) => format!("{}&onglet=full", q), None => "onglet=full".into(), }; let mut url = url.clone(); url.set_query(Some(&query)); // TODO: add "?onglet=full" to the url if not let cookies = if let Some((name, value)) = &self.login_cookie { let cookie = Cookie::build(name, value).secure(true).finish(); vec![cookie] } else { vec![] }; // TODO: replace by builder let downloader = Downloader { cookies }; let body = downloader.download(&url).await?; let html = String::from_utf8(body.to_vec())?; // TODO: Move to const let element_to_remove = [ // header ".fb-root", ".skipLinks", ".js-flash-message", ".header-sticky.sticky-links", "nav.main-menu", // menus inside and social media buttons "ul.sub-menu-journal", ".tools-social", ".simple-list.universe-journal", ".simple-list.universe-club", // Footer "footer", // Misc "aside.cc-modal", ]; // TODO: correction of usage of relative urls, and replace "" by the url let single_page_html = tools::self_contained_html(&html, &downloader, &url, &element_to_remove).await; Ok(single_page_html) } fn new() -> Self { Self { ..Default::default() } } async fn has_complete_access(&self) -> bool { // TODO: check if we are logged using the cookie true } }