add several newspapers #36
@ -2,15 +2,7 @@
|
|||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
|
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use matrix_sdk::{
|
use matrix_sdk::{self, Client, SyncSettings};
|
||||||
self, async_trait,
|
|
||||||
events::{
|
|
||||||
room::message::{MessageEventContent, MessageType, TextMessageEventContent},
|
|
||||||
AnyMessageEventContent, SyncMessageEvent,
|
|
||||||
},
|
|
||||||
room::Room,
|
|
||||||
Client, ClientConfig, EventHandler, SyncSettings,
|
|
||||||
};
|
|
||||||
|
|
||||||
use crate::Html;
|
use crate::Html;
|
||||||
|
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::env;
|
|
||||||
|
|
||||||
use log::{error, info};
|
use log::error;
|
||||||
use matrix_sdk::{
|
use matrix_sdk::{
|
||||||
self, async_trait,
|
self, async_trait,
|
||||||
events::{
|
events::{
|
||||||
@ -9,7 +8,7 @@ use matrix_sdk::{
|
|||||||
AnyMessageEventContent, SyncMessageEvent,
|
AnyMessageEventContent, SyncMessageEvent,
|
||||||
},
|
},
|
||||||
room::Room,
|
room::Room,
|
||||||
Client, ClientConfig, EventHandler, SyncSettings,
|
EventHandler,
|
||||||
};
|
};
|
||||||
|
|
||||||
use crieur_retrieve::{article_location::Error, article_location::Result, ArticleLocation, Url};
|
use crieur_retrieve::{article_location::Error, article_location::Result, ArticleLocation, Url};
|
||||||
|
@ -2,7 +2,6 @@ use std::boxed::Box;
|
|||||||
use std::convert::TryInto;
|
use std::convert::TryInto;
|
||||||
use std::env;
|
use std::env;
|
||||||
|
|
||||||
use log::info;
|
|
||||||
use url::{Host, Url};
|
use url::{Host, Url};
|
||||||
|
|
||||||
use crate::newspaper::Newspaper;
|
use crate::newspaper::Newspaper;
|
||||||
@ -36,27 +35,20 @@ pub type Result<T, E = Error> = core::result::Result<T, E>;
|
|||||||
fn default_newpapers() -> Result<Newspapers> {
|
fn default_newpapers() -> Result<Newspapers> {
|
||||||
// TODO: same thing is written too much times : how to DRY ?
|
// TODO: same thing is written too much times : how to DRY ?
|
||||||
let config_key = "MEDIAPART_COOKIE".to_string();
|
let config_key = "MEDIAPART_COOKIE".to_string();
|
||||||
let mpruiid = env::var(&config_key)
|
let mpruiid = env::var(&config_key).map_err(|_| Error::Misconfiguration(config_key))?;
|
||||||
.map_err(|_| Error::Misconfiguration(config_key))?
|
|
||||||
.into();
|
|
||||||
|
|
||||||
let mediapart = Mediapart::builder()
|
let mediapart = Mediapart::builder()
|
||||||
.login(mediapart::Login::MPRUUID(mpruiid))
|
.login(mediapart::Login::Mpruuid(mpruiid))
|
||||||
.build()?;
|
.build()?;
|
||||||
|
|
||||||
let lmd_a_m = "MONDE_DIPLO_LMD_A_M".to_string();
|
let lmd_a_m = "MONDE_DIPLO_LMD_A_M".to_string();
|
||||||
let phpsessid = "MONDE_DIPLO_PHPSESSID".to_string();
|
let phpsessid = "MONDE_DIPLO_PHPSESSID".to_string();
|
||||||
let spip_session = "MONDE_DIPLO_SPIP_SESSION".to_string();
|
let spip_session = "MONDE_DIPLO_SPIP_SESSION".to_string();
|
||||||
|
|
||||||
let lmd_a_m = env::var(&lmd_a_m)
|
let lmd_a_m = env::var(&lmd_a_m).map_err(|_| Error::Misconfiguration(lmd_a_m))?;
|
||||||
.map_err(|_| Error::Misconfiguration(lmd_a_m))?
|
let phpsessid = env::var(&phpsessid).map_err(|_| Error::Misconfiguration(phpsessid))?;
|
||||||
.into();
|
let spip_session =
|
||||||
let phpsessid = env::var(&phpsessid)
|
env::var(&spip_session).map_err(|_| Error::Misconfiguration(spip_session))?;
|
||||||
.map_err(|_| Error::Misconfiguration(phpsessid))?
|
|
||||||
.into();
|
|
||||||
let spip_session = env::var(&spip_session)
|
|
||||||
.map_err(|_| Error::Misconfiguration(spip_session))?
|
|
||||||
.into();
|
|
||||||
|
|
||||||
let monde_diplo = MondeDiplo::builder()
|
let monde_diplo = MondeDiplo::builder()
|
||||||
.login(monde_diplomatique::Login::Cookies {
|
.login(monde_diplomatique::Login::Cookies {
|
||||||
@ -69,12 +61,8 @@ fn default_newpapers() -> Result<Newspapers> {
|
|||||||
let lmd_a_m = "COURRIER_INTERNATIONAL_LMD_A_M".to_string();
|
let lmd_a_m = "COURRIER_INTERNATIONAL_LMD_A_M".to_string();
|
||||||
let ssess = "COURRIER_INTERNATIONAL_SSESS".to_string();
|
let ssess = "COURRIER_INTERNATIONAL_SSESS".to_string();
|
||||||
|
|
||||||
let lmd_a_m = env::var(&lmd_a_m)
|
let lmd_a_m = env::var(&lmd_a_m).map_err(|_| Error::Misconfiguration(lmd_a_m))?;
|
||||||
.map_err(|_| Error::Misconfiguration(lmd_a_m))?
|
let ssess = env::var(&ssess).map_err(|_| Error::Misconfiguration(ssess))?;
|
||||||
.into();
|
|
||||||
let ssess = env::var(&ssess)
|
|
||||||
.map_err(|_| Error::Misconfiguration(ssess))?
|
|
||||||
.into();
|
|
||||||
|
|
||||||
let courrier_international = CourrierInternational::builder()
|
let courrier_international = CourrierInternational::builder()
|
||||||
.login(courrier_international::Login::Cookies { lmd_a_m, ssess })
|
.login(courrier_international::Login::Cookies { lmd_a_m, ssess })
|
||||||
|
@ -82,13 +82,19 @@ impl Newspaper for CourrierInternational {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// TODO: Move to const
|
// TODO: Move to const
|
||||||
let element_to_remove = [
|
let elements_to_remove = [
|
||||||
// navigation elements
|
// navigation elements
|
||||||
"#entete.connecte",
|
"#entete.connecte",
|
||||||
];
|
];
|
||||||
|
|
||||||
let single_page_html =
|
let single_page_html = tools::self_contained_html::Config {
|
||||||
tools::self_contained_html(&html, &downloader, &url, &element_to_remove).await;
|
downloader: Some(&downloader),
|
||||||
|
base_url: Some(&url),
|
||||||
|
elements_to_remove: &elements_to_remove,
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
.run(&html)
|
||||||
|
.await;
|
||||||
Ok(single_page_html)
|
Ok(single_page_html)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -10,7 +10,7 @@ use crate::{Download, Downloader};
|
|||||||
|
|
||||||
pub enum Login {
|
pub enum Login {
|
||||||
Username(String, String),
|
Username(String, String),
|
||||||
MPRUUID(String),
|
Mpruuid(String),
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Default)]
|
#[derive(Debug, Clone, Default)]
|
||||||
@ -33,7 +33,7 @@ impl Builder {
|
|||||||
Login::Username(_username, _password) => {
|
Login::Username(_username, _password) => {
|
||||||
unimplemented!("login using username and passwond not implemented")
|
unimplemented!("login using username and passwond not implemented")
|
||||||
}
|
}
|
||||||
Login::MPRUUID(cookie_value) => Some(("MPRUUID".into(), cookie_value)),
|
Login::Mpruuid(cookie_value) => Some(("MPRUUID".into(), cookie_value)),
|
||||||
};
|
};
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
@ -86,7 +86,7 @@ impl Newspaper for Mediapart {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// TODO: Move to const
|
// TODO: Move to const
|
||||||
let element_to_remove = [
|
let elements_to_remove = [
|
||||||
// header
|
// header
|
||||||
".fb-root",
|
".fb-root",
|
||||||
".skipLinks",
|
".skipLinks",
|
||||||
@ -104,8 +104,14 @@ impl Newspaper for Mediapart {
|
|||||||
"aside.cc-modal",
|
"aside.cc-modal",
|
||||||
];
|
];
|
||||||
|
|
||||||
let single_page_html =
|
let single_page_html = tools::self_contained_html::Config {
|
||||||
tools::self_contained_html(&html, &downloader, &url, &element_to_remove).await;
|
downloader: Some(&downloader),
|
||||||
|
base_url: Some(&url),
|
||||||
|
elements_to_remove: &elements_to_remove,
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
.run(&html)
|
||||||
|
.await;
|
||||||
Ok(single_page_html)
|
Ok(single_page_html)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -91,7 +91,7 @@ impl Newspaper for MondeDiplo {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// TODO: Move to const
|
// TODO: Move to const
|
||||||
let element_to_remove = [
|
let elements_to_remove = [
|
||||||
// navigation elements
|
// navigation elements
|
||||||
"#tout-en-haut.preentete",
|
"#tout-en-haut.preentete",
|
||||||
"#entete.connecte",
|
"#entete.connecte",
|
||||||
@ -107,8 +107,14 @@ impl Newspaper for MondeDiplo {
|
|||||||
"noscript",
|
"noscript",
|
||||||
];
|
];
|
||||||
|
|
||||||
let single_page_html =
|
let single_page_html = tools::self_contained_html::Config {
|
||||||
tools::self_contained_html(&html, &downloader, &url, &element_to_remove).await;
|
downloader: Some(&downloader),
|
||||||
|
base_url: Some(&url),
|
||||||
|
elements_to_remove: &elements_to_remove,
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
.run(&html)
|
||||||
|
.await;
|
||||||
Ok(single_page_html)
|
Ok(single_page_html)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,5 +1,4 @@
|
|||||||
mod download;
|
mod download;
|
||||||
mod self_contained_html;
|
pub mod self_contained_html;
|
||||||
|
|
||||||
pub use download::{Download, DownloadError, Downloader};
|
pub use download::{Download, DownloadError, Downloader};
|
||||||
pub use self_contained_html::self_contained_html;
|
|
||||||
|
@ -8,140 +8,177 @@ use url::Url;
|
|||||||
use crate::consts::{EVENT_HANDLERS, LINK_REL_EXTERNAL_RESOURCES};
|
use crate::consts::{EVENT_HANDLERS, LINK_REL_EXTERNAL_RESOURCES};
|
||||||
use crate::Download;
|
use crate::Download;
|
||||||
|
|
||||||
/// Makes an html page self-contained
|
/// Stores configuration for the self_contained_html function
|
||||||
///
|
// TODO: write a builder
|
||||||
/// The `downloader` must implement `Download` and is used to download ressources that are
|
pub struct Config<'t, E, D, S1 = &'static str, S2 = &'static str>
|
||||||
/// needed to make this page self-contained such as stylesheets or images.
|
where
|
||||||
///
|
E: std::error::Error,
|
||||||
/// The function also removes all scripts on the page
|
D: Download<Error = E> + Send,
|
||||||
pub async fn self_contained_html<E, D>(
|
S1: AsRef<str>,
|
||||||
html: impl AsRef<str>,
|
S2: AsRef<str>,
|
||||||
downloader: &D,
|
{
|
||||||
base_url: &Url,
|
/// the downloader that will be used to retrieve ressources on the page
|
||||||
elements_to_remove: &[impl AsRef<str>],
|
pub downloader: Option<&'t D>,
|
||||||
) -> String
|
/// Base url for downloading ressources, it probably the
|
||||||
|
pub base_url: Option<&'t Url>,
|
||||||
|
pub elements_to_remove: &'t [S1],
|
||||||
|
pub styles_to_add: &'t [S2],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t, E, D> Default for Config<'t, E, D>
|
||||||
where
|
where
|
||||||
E: std::error::Error,
|
E: std::error::Error,
|
||||||
D: Download<Error = E> + Send,
|
D: Download<Error = E> + Send,
|
||||||
{
|
{
|
||||||
// TODO: split/refactor this function :
|
fn default() -> Self {
|
||||||
// - ¿ find a way to ease the |get urls| -> |async download| -> |replace| workflow ?
|
Self {
|
||||||
// - `element_to_remove` , `base_url` and `downloader` should be in a configuration structure
|
downloader: None,
|
||||||
// - ¿ should be function of a trait ? or only of the configuration struct ?
|
base_url: None,
|
||||||
let (style_urls, html) = {
|
elements_to_remove: &[],
|
||||||
let document = Document::from(html.as_ref());
|
styles_to_add: &[],
|
||||||
|
|
||||||
// ---- Remove scripts ----
|
|
||||||
//
|
|
||||||
document.select("script").remove();
|
|
||||||
|
|
||||||
for event in EVENT_HANDLERS {
|
|
||||||
document
|
|
||||||
.select(format!("[{}]", event).as_str())
|
|
||||||
.remove_attr(event);
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for rel in LINK_REL_EXTERNAL_RESOURCES {
|
impl<'t, E, D, S1, S2> Config<'t, E, D, S1, S2>
|
||||||
document
|
where
|
||||||
.select(format!("link[rel=\"{}\"]", rel).as_str())
|
E: std::error::Error,
|
||||||
.remove();
|
D: Download<Error = E> + Send,
|
||||||
}
|
S1: AsRef<str>,
|
||||||
|
S2: AsRef<str>,
|
||||||
|
{
|
||||||
|
/// Makes an html page self-contained
|
||||||
|
///
|
||||||
|
/// The `downloader` must implement `Download` and is used to download ressources that are
|
||||||
|
/// needed to make this page self-contained such as stylesheets or images.
|
||||||
|
///
|
||||||
|
/// The function also removes all scripts on the page
|
||||||
|
pub async fn run(&self, html: impl AsRef<str>) -> String {
|
||||||
|
//TODO: don't panic
|
||||||
|
let base_url = self.base_url.expect("Base url not defined");
|
||||||
|
let downloader = self.downloader.expect("Downloader not defined");
|
||||||
|
// TODO: split/refactor this function :
|
||||||
|
// - ¿ find a way to ease the |get urls| -> |async download| -> |replace| workflow ?
|
||||||
|
// - `element_to_remove` , `base_url` and `downloader` should be in a configuration structure
|
||||||
|
let (style_urls, html) = {
|
||||||
|
let document = Document::from(html.as_ref());
|
||||||
|
|
||||||
// ---- Replace stylesheets ----
|
// ---- Remove scripts ----
|
||||||
|
//
|
||||||
|
document.select("script").remove();
|
||||||
|
|
||||||
|
for event in EVENT_HANDLERS {
|
||||||
|
document
|
||||||
|
.select(format!("[{}]", event).as_str())
|
||||||
|
.remove_attr(event);
|
||||||
|
}
|
||||||
|
|
||||||
|
for rel in LINK_REL_EXTERNAL_RESOURCES {
|
||||||
|
document
|
||||||
|
.select(format!("link[rel=\"{}\"]", rel).as_str())
|
||||||
|
.remove();
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---- Replace stylesheets ----
|
||||||
|
//
|
||||||
|
let stylesheets = document.select("link[href][rel=\"stylesheet\"]");
|
||||||
|
let styles_url = stylesheets
|
||||||
|
.iter()
|
||||||
|
.map(|stylesheet| {
|
||||||
|
if let Some(src) = stylesheet.attr("href") {
|
||||||
|
//TODO: does it work with absolute urls ?
|
||||||
|
base_url.join(src.as_ref()).ok()
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
(styles_url, String::from(document.html()))
|
||||||
|
};
|
||||||
|
|
||||||
|
let style_urls = style_urls.into_iter().map(|style_url| {
|
||||||
|
OptionFuture::from(
|
||||||
|
style_url.map(|s| async move { downloader.download(&s).await.unwrap() }),
|
||||||
|
)
|
||||||
|
});
|
||||||
|
let downloaded_styles = futures::future::join_all(style_urls).await;
|
||||||
|
|
||||||
|
let html = {
|
||||||
|
let document = Document::from(&html);
|
||||||
|
let styles = document.select("link[href][rel=\"stylesheet\"]");
|
||||||
|
|
||||||
|
styles
|
||||||
|
.iter()
|
||||||
|
.zip(downloaded_styles.iter())
|
||||||
|
.for_each(|(mut stylesheet, inner_css)| {
|
||||||
|
if let Some(Some(inner_css)) = inner_css {
|
||||||
|
let css = String::from_utf8(inner_css.to_vec()).unwrap();
|
||||||
|
let css = format!("<style>{}</style>", css);
|
||||||
|
stylesheet.replace_with_html(css);
|
||||||
|
} else {
|
||||||
|
stylesheet.remove();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
String::from(document.html())
|
||||||
|
};
|
||||||
|
|
||||||
|
// ---- Replace imgs ----
|
||||||
//
|
//
|
||||||
let stylesheets = document.select("link[href][rel=\"stylesheet\"]");
|
let image_urls = {
|
||||||
let styles_url = stylesheets
|
let document = Document::from(&html);
|
||||||
.iter()
|
let imgs = document.select("img:not([src^=\"data:\"])");
|
||||||
.map(|stylesheet| {
|
|
||||||
if let Some(src) = stylesheet.attr("href") {
|
|
||||||
//TODO: does it work with absolute urls ?
|
|
||||||
base_url.join(src.as_ref()).ok()
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
(styles_url, String::from(document.html()))
|
|
||||||
};
|
|
||||||
|
|
||||||
let style_urls = style_urls.into_iter().map(|style_url| {
|
imgs.iter()
|
||||||
OptionFuture::from(style_url.map(|s| async move { downloader.download(&s).await.unwrap() }))
|
.map(|image| {
|
||||||
});
|
if let Some(src) = image.attr("src") {
|
||||||
let downloaded_styles = futures::future::join_all(style_urls).await;
|
base_url.join(src.as_ref()).ok()
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>()
|
||||||
|
};
|
||||||
|
|
||||||
let html = {
|
let downloaded_images = image_urls.into_iter().map(|image_url| {
|
||||||
let document = Document::from(&html);
|
OptionFuture::from(image_url.map(|url| async move {
|
||||||
let styles = document.select("link[href][rel=\"stylesheet\"]");
|
let data = downloader.download(&url).await.unwrap();
|
||||||
|
(url, data)
|
||||||
|
}))
|
||||||
|
});
|
||||||
|
let downloaded_images = futures::future::join_all(downloaded_images).await;
|
||||||
|
|
||||||
styles
|
let html = {
|
||||||
.iter()
|
let document = Document::from(&html);
|
||||||
.zip(downloaded_styles.iter())
|
let imgs = document.select("img:not([src^=\"data:\"])");
|
||||||
.for_each(|(mut stylesheet, inner_css)| {
|
|
||||||
if let Some(Some(inner_css)) = inner_css {
|
|
||||||
let css = String::from_utf8(inner_css.to_vec()).unwrap();
|
|
||||||
let css = format!("<style>{}</style>", css);
|
|
||||||
stylesheet.replace_with_html(css);
|
|
||||||
} else {
|
|
||||||
stylesheet.remove();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
String::from(document.html())
|
|
||||||
};
|
|
||||||
|
|
||||||
// ---- Replace imgs ----
|
imgs.iter()
|
||||||
//
|
.zip(downloaded_images.iter())
|
||||||
let image_urls = {
|
.for_each(|(mut img, data)| {
|
||||||
let document = Document::from(&html);
|
if let Some((url, Some(data))) = data {
|
||||||
let imgs = document.select("img:not([src^=\"data:\"])");
|
let data = base64::encode(data);
|
||||||
|
//TODO: use an extension hashmap
|
||||||
|
let extension =
|
||||||
|
Path::new(url.path()).extension().unwrap().to_str().unwrap();
|
||||||
|
img.set_attr("src", &format!("data:image/{};base64,{}", extension, data));
|
||||||
|
} else {
|
||||||
|
img.remove()
|
||||||
|
}
|
||||||
|
});
|
||||||
|
// ---- Remove unwanted html elements -----
|
||||||
|
//
|
||||||
|
for element in self.elements_to_remove {
|
||||||
|
document.select(element.as_ref()).remove();
|
||||||
|
}
|
||||||
|
String::from(document.html())
|
||||||
|
};
|
||||||
|
|
||||||
imgs.iter()
|
// ---- output ----
|
||||||
.map(|image| {
|
|
||||||
if let Some(src) = image.attr("src") {
|
|
||||||
base_url.join(src.as_ref()).ok()
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>()
|
|
||||||
};
|
|
||||||
|
|
||||||
let downloaded_images = image_urls.into_iter().map(|image_url| {
|
|
||||||
OptionFuture::from(image_url.map(|url| async move {
|
|
||||||
let data = downloader.download(&url).await.unwrap();
|
|
||||||
(url, data)
|
|
||||||
}))
|
|
||||||
});
|
|
||||||
let downloaded_images = futures::future::join_all(downloaded_images).await;
|
|
||||||
|
|
||||||
let html = {
|
|
||||||
let document = Document::from(&html);
|
|
||||||
let imgs = document.select("img:not([src^=\"data:\"])");
|
|
||||||
|
|
||||||
imgs.iter()
|
|
||||||
.zip(downloaded_images.iter())
|
|
||||||
.for_each(|(mut img, data)| {
|
|
||||||
if let Some((url, Some(data))) = data {
|
|
||||||
let data = base64::encode(data);
|
|
||||||
let extension = Path::new(url.path()).extension().unwrap().to_str().unwrap();
|
|
||||||
img.set_attr("src", &format!("data:image/{};base64,{}", extension, data));
|
|
||||||
} else {
|
|
||||||
img.remove()
|
|
||||||
}
|
|
||||||
});
|
|
||||||
// ---- Remove unwanted html elements -----
|
|
||||||
//
|
//
|
||||||
for element in elements_to_remove {
|
let mut minifier = HTMLMinifier::new();
|
||||||
document.select(element.as_ref()).remove();
|
minifier.digest(html.as_str()).unwrap();
|
||||||
}
|
|
||||||
String::from(document.html())
|
|
||||||
};
|
|
||||||
|
|
||||||
// ---- output ----
|
String::from_utf8(minifier.get_html().into()).unwrap()
|
||||||
//
|
}
|
||||||
let mut minifier = HTMLMinifier::new();
|
|
||||||
minifier.digest(html.as_str()).unwrap();
|
|
||||||
|
|
||||||
String::from_utf8(minifier.get_html().into()).unwrap()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
@ -180,9 +217,14 @@ mod tests {
|
|||||||
let html = "<html><head><script>let id = id => id</script></head><body></body></html>";
|
let html = "<html><head><script>let id = id => id</script></head><body></body></html>";
|
||||||
let base_url = Url::parse("http://example.com")?;
|
let base_url = Url::parse("http://example.com")?;
|
||||||
let downloader = DummyDownloader {};
|
let downloader = DummyDownloader {};
|
||||||
let to_remove: &[&str] = &[];
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
self_contained_html(html, &downloader, &base_url, to_remove).await,
|
Config {
|
||||||
|
downloader: Some(&downloader),
|
||||||
|
base_url: Some(&base_url),
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
.run(html)
|
||||||
|
.await,
|
||||||
"<html><head></head><body></body></html>"
|
"<html><head></head><body></body></html>"
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -206,10 +248,13 @@ mod tests {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let base_url = Url::parse("http://example.com")?;
|
let base_url = Url::parse("http://example.com")?;
|
||||||
let to_remove: &[&str] = &[];
|
|
||||||
for s in EVENT_HANDLERS {
|
for s in EVENT_HANDLERS {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
self_contained_html(html(s), &downloader, &base_url, to_remove).await,
|
Config {
|
||||||
|
downloader: Some(&downloader),
|
||||||
|
base_url: Some(&base_url),
|
||||||
|
..Default::default()
|
||||||
|
}.run(html(s)).await,
|
||||||
"<html><head>\n</head>\n<body>\n<button class=\"activate\">button</button>\n</body></html>"
|
"<html><head>\n</head>\n<body>\n<button class=\"activate\">button</button>\n</body></html>"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -234,10 +279,15 @@ mod tests {
|
|||||||
};
|
};
|
||||||
|
|
||||||
let base_url = Url::parse("http://example.com")?;
|
let base_url = Url::parse("http://example.com")?;
|
||||||
let to_remove: &[&str] = &[];
|
|
||||||
for s in LINK_REL_EXTERNAL_RESOURCES {
|
for s in LINK_REL_EXTERNAL_RESOURCES {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
self_contained_html(html(s), &downloader, &base_url, to_remove).await,
|
Config {
|
||||||
|
downloader: Some(&downloader),
|
||||||
|
base_url: Some(&base_url),
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
.run(html(s))
|
||||||
|
.await,
|
||||||
"<html><head>\n</head>\n<body>\n</body></html>"
|
"<html><head>\n</head>\n<body>\n</body></html>"
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -290,9 +340,14 @@ mod tests {
|
|||||||
let minified = String::from_utf8(minifier.get_html().into())?;
|
let minified = String::from_utf8(minifier.get_html().into())?;
|
||||||
|
|
||||||
let base_url = Url::parse("http://example.com")?;
|
let base_url = Url::parse("http://example.com")?;
|
||||||
let to_remove: &[&str] = &[];
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
self_contained_html(html, &downloader, &base_url, to_remove).await,
|
Config {
|
||||||
|
downloader: Some(&downloader),
|
||||||
|
base_url: Some(&base_url),
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
.run(html)
|
||||||
|
.await,
|
||||||
minified
|
minified
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -337,9 +392,14 @@ mod tests {
|
|||||||
let minified = String::from_utf8(minifier.get_html().into())?;
|
let minified = String::from_utf8(minifier.get_html().into())?;
|
||||||
|
|
||||||
let base_url = Url::parse("http://example.com")?;
|
let base_url = Url::parse("http://example.com")?;
|
||||||
let to_remove: &[&str] = &[];
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
self_contained_html(html, &downloader, &base_url, to_remove).await,
|
Config {
|
||||||
|
downloader: Some(&downloader),
|
||||||
|
base_url: Some(&base_url),
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
.run(html)
|
||||||
|
.await,
|
||||||
minified
|
minified
|
||||||
);
|
);
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -372,12 +432,13 @@ mod tests {
|
|||||||
let minified = String::from_utf8(minifier.get_html().into())?;
|
let minified = String::from_utf8(minifier.get_html().into())?;
|
||||||
|
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
self_contained_html(
|
Config {
|
||||||
html,
|
downloader: Some(&downloader),
|
||||||
&downloader,
|
base_url: Some(&base_url),
|
||||||
&base_url,
|
elements_to_remove: &["header", ".placeholder", "article > span.huge"],
|
||||||
&["header", ".placeholder", "article > span.huge"]
|
..Default::default()
|
||||||
)
|
}
|
||||||
|
.run(html)
|
||||||
.await,
|
.await,
|
||||||
minified
|
minified
|
||||||
);
|
);
|
||||||
|
@ -37,7 +37,8 @@ frame "backend" {
|
|||||||
|
|
||||||
newspaper -> retrieval_tools: uses to implement
|
newspaper -> retrieval_tools: uses to implement
|
||||||
|
|
||||||
article_location --> article_repr :uses
|
article_location --> article_repr: uses
|
||||||
|
retrieval_tools -up-> article_repr: uses
|
||||||
|
|
||||||
auto_retrieve --> rss: watches
|
auto_retrieve --> rss: watches
|
||||||
auto_retrieve --> article_location
|
auto_retrieve --> article_location
|
||||||
|
Loading…
Reference in New Issue
Block a user