fix: keep media queries in ref styles
All checks were successful
continuous-integration/drone/push Build is passing
All checks were successful
continuous-integration/drone/push Build is passing
Previously, media queries weren't keep when downloading styles from ref tags. It have been fixed so that media attribute are kept when creating style tags from ref tags.
This commit is contained in:
parent
40ebc1ddea
commit
e34edf0b21
50
Cargo.lock
generated
50
Cargo.lock
generated
@ -662,9 +662,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures"
|
name = "futures"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "a9d5813545e459ad3ca1bff9915e9ad7f1a47dc6a91b627ce321d5863b7dd253"
|
checksum = "0e7e43a803dae2fa37c1f6a8fe121e1f7bf9548b4dfc0522a42f34145dadfc27"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
@ -677,9 +677,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-channel"
|
name = "futures-channel"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ce79c6a52a299137a6013061e0cf0e688fce5d7f1bc60125f520912fdb29ec25"
|
checksum = "e682a68b29a882df0545c143dc3646daefe80ba479bcdede94d5a703de2871e2"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-sink",
|
"futures-sink",
|
||||||
@ -687,15 +687,15 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-core"
|
name = "futures-core"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "098cd1c6dda6ca01650f1a37a794245eb73181d0d4d4e955e2f3c37db7af1815"
|
checksum = "0402f765d8a89a26043b889b26ce3c4679d268fa6bb22cd7c6aad98340e179d1"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-executor"
|
name = "futures-executor"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "10f6cb7042eda00f0049b1d2080aa4b93442997ee507eb3828e8bd7577f94c9d"
|
checksum = "badaa6a909fac9e7236d0620a2f57f7664640c56575b71a7552fbd68deafab79"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-task",
|
"futures-task",
|
||||||
@ -704,9 +704,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-io"
|
name = "futures-io"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "365a1a1fb30ea1c03a830fdb2158f5236833ac81fa0ad12fe35b29cddc35cb04"
|
checksum = "acc499defb3b348f8d8f3f66415835a9131856ff7714bf10dadfc4ec4bdb29a1"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-locks"
|
name = "futures-locks"
|
||||||
@ -719,10 +719,11 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-macro"
|
name = "futures-macro"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "668c6733a182cd7deb4f1de7ba3bf2120823835b3bcfbeacf7d2c4a773c1bb8b"
|
checksum = "a4c40298486cdf52cc00cd6d6987892ba502c7656a16a4192a9992b1ccedd121"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"autocfg 1.0.1",
|
||||||
"proc-macro-hack",
|
"proc-macro-hack",
|
||||||
"proc-macro2",
|
"proc-macro2",
|
||||||
"quote",
|
"quote",
|
||||||
@ -731,15 +732,15 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-sink"
|
name = "futures-sink"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5c5629433c555de3d82861a7a4e3794a4c40040390907cfbfd7143a92a426c23"
|
checksum = "a57bead0ceff0d6dde8f465ecd96c9338121bb7717d3e7b108059531870c4282"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-task"
|
name = "futures-task"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ba7aa51095076f3ba6d9a1f702f74bd05ec65f555d70d2033d55ba8d69f581bc"
|
checksum = "8a16bef9fc1a4dddb5bee51c989e3fbba26569cbb0e31f5b303c184e3dd33dae"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-timer"
|
name = "futures-timer"
|
||||||
@ -753,10 +754,11 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "futures-util"
|
name = "futures-util"
|
||||||
version = "0.3.14"
|
version = "0.3.15"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "3c144ad54d60f23927f0a6b6d816e4271278b64f005ad65e4e35291d2de9c025"
|
checksum = "feb5c238d27e2bf94ffdfd27b2c29e3df4a68c4193bb6427384259e2bf191967"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"autocfg 1.0.1",
|
||||||
"futures-channel",
|
"futures-channel",
|
||||||
"futures-core",
|
"futures-core",
|
||||||
"futures-io",
|
"futures-io",
|
||||||
@ -895,9 +897,9 @@ dependencies = [
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "html-minifier"
|
name = "html-minifier"
|
||||||
version = "3.0.12"
|
version = "3.0.13"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "5a3b862e637e82b3134913fdd0aa0b8e79b7486fe88878f6bab0d09daf4996af"
|
checksum = "70f11cbdecf4fde3b74532e8d6d5926b6354adfda3fe259ed585402f9ae0dbaf"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"cow-utils",
|
"cow-utils",
|
||||||
"educe",
|
"educe",
|
||||||
@ -1284,9 +1286,9 @@ checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "minifier"
|
name = "minifier"
|
||||||
version = "0.0.40"
|
version = "0.0.41"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "ed8f02a863a23d5797a6e72ea7102bd0ee38ceba1256b522aeddb70ff14b32c9"
|
checksum = "5594542d20834f2b974f5e5fb8e0cf1c67a2119dcadc29ef5d93a081fb30cc08"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"macro-utils",
|
"macro-utils",
|
||||||
]
|
]
|
||||||
@ -2659,9 +2661,9 @@ checksum = "cda74da7e1a664f795bb1f8a87ec406fb89a02522cf6e50620d016add6dbbf5c"
|
|||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "tokio"
|
name = "tokio"
|
||||||
version = "1.5.0"
|
version = "1.6.0"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "83f0c8e7c0addab50b663055baf787d0af7f413a46e6e7fb9559a4e4db7137a5"
|
checksum = "bd3076b5c8cc18138b8f8814895c11eb4de37114a5d127bafdc5e55798ceef37"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"autocfg 1.0.1",
|
"autocfg 1.0.1",
|
||||||
"bytes",
|
"bytes",
|
||||||
|
@ -22,5 +22,5 @@ crieur-chatbot = {version = "0.1", path="crieur-chatbot"}
|
|||||||
dotenv = "0.15.0"
|
dotenv = "0.15.0"
|
||||||
env_logger = "0.8.3"
|
env_logger = "0.8.3"
|
||||||
log = "0.4.14"
|
log = "0.4.14"
|
||||||
tokio = { version = "1.5.0", features = ["full"] }
|
tokio = { version = "1.6.0", features = ["full"] }
|
||||||
tracing-subscriber = "0.2.18"
|
tracing-subscriber = "0.2.18"
|
||||||
|
@ -16,14 +16,14 @@ hyper-rustls = "0.22.1"
|
|||||||
cookie = "0.15.0"
|
cookie = "0.15.0"
|
||||||
lol_html = "0.3.0"
|
lol_html = "0.3.0"
|
||||||
indoc = "1.0.3"
|
indoc = "1.0.3"
|
||||||
html-minifier = "3.0.12"
|
html-minifier = "3.0.13"
|
||||||
bytes = "1.0.1"
|
bytes = "1.0.1"
|
||||||
base64 = "0.13.0"
|
base64 = "0.13.0"
|
||||||
futures = "0.3.14"
|
futures = "0.3.15"
|
||||||
derive_builder = "0.10.2"
|
derive_builder = "0.10.2"
|
||||||
nipper = "0.1.9"
|
nipper = "0.1.9"
|
||||||
log = "0.4.14"
|
log = "0.4.14"
|
||||||
env_logger = "0.8.3"
|
env_logger = "0.8.3"
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
tokio = "1.5.0"
|
tokio = "1.6.0"
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
use anyhow::{anyhow, bail, Result};
|
use anyhow::{anyhow, bail, Result};
|
||||||
use async_trait::async_trait;
|
use async_trait::async_trait;
|
||||||
use cookie::Cookie;
|
use cookie::Cookie;
|
||||||
|
use indoc::indoc;
|
||||||
use url::Host;
|
use url::Host;
|
||||||
|
|
||||||
use crate::newspaper::{Metadata, Newspaper};
|
use crate::newspaper::{Metadata, Newspaper};
|
||||||
@ -81,9 +82,42 @@ impl Newspaper for CourrierInternational {
|
|||||||
None => bail!("404 not found"),
|
None => bail!("404 not found"),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let elements_to_remove = &[
|
||||||
|
// navigation elements
|
||||||
|
"header.site-header",
|
||||||
|
"footer.site-footer",
|
||||||
|
// Social buttons
|
||||||
|
"#toolbox-share",
|
||||||
|
".toolbox-share",
|
||||||
|
".toolbox-print",
|
||||||
|
".toolbox-respond",
|
||||||
|
".toolbox-zen",
|
||||||
|
".toolbox-newsletter",
|
||||||
|
".toolbox-offer",
|
||||||
|
".box-article-offer-friend-abo",
|
||||||
|
// unused services
|
||||||
|
".article-aside",
|
||||||
|
".article-secondary",
|
||||||
|
".article-subject-readmore",
|
||||||
|
// misc
|
||||||
|
".element-invisible",
|
||||||
|
".gptcontainer",
|
||||||
|
];
|
||||||
|
|
||||||
|
// FIXME: it doesn't work because the aside is in the article body
|
||||||
|
//
|
||||||
|
let toolbox_style = indoc! {"
|
||||||
|
aside.article-toolbox {
|
||||||
|
position: sticky;
|
||||||
|
top: 1em;
|
||||||
|
}
|
||||||
|
"};
|
||||||
|
|
||||||
let single_page_html = tools::self_contained_html::Config {
|
let single_page_html = tools::self_contained_html::Config {
|
||||||
downloader: Some(&downloader),
|
downloader: Some(&downloader),
|
||||||
base_url: Some(&url),
|
base_url: Some(&url),
|
||||||
|
elements_to_remove,
|
||||||
|
styles_to_add: &[toolbox_style],
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}
|
}
|
||||||
.run(&html)
|
.run(&html)
|
||||||
|
@ -86,7 +86,7 @@ impl Newspaper for Mediapart {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// TODO: Move to const
|
// TODO: Move to const
|
||||||
let elements_to_remove = [
|
let elements_to_remove = &[
|
||||||
// header
|
// header
|
||||||
".fb-root",
|
".fb-root",
|
||||||
".skipLinks",
|
".skipLinks",
|
||||||
@ -107,7 +107,7 @@ impl Newspaper for Mediapart {
|
|||||||
let single_page_html = tools::self_contained_html::Config {
|
let single_page_html = tools::self_contained_html::Config {
|
||||||
downloader: Some(&downloader),
|
downloader: Some(&downloader),
|
||||||
base_url: Some(&url),
|
base_url: Some(&url),
|
||||||
elements_to_remove: &elements_to_remove,
|
elements_to_remove,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}
|
}
|
||||||
.run(&html)
|
.run(&html)
|
||||||
|
@ -91,7 +91,7 @@ impl Newspaper for MondeDiplo {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// TODO: Move to const
|
// TODO: Move to const
|
||||||
let elements_to_remove = [
|
let elements_to_remove = &[
|
||||||
// navigation elements
|
// navigation elements
|
||||||
"#tout-en-haut.preentete",
|
"#tout-en-haut.preentete",
|
||||||
"#entete.connecte",
|
"#entete.connecte",
|
||||||
@ -110,7 +110,7 @@ impl Newspaper for MondeDiplo {
|
|||||||
let single_page_html = tools::self_contained_html::Config {
|
let single_page_html = tools::self_contained_html::Config {
|
||||||
downloader: Some(&downloader),
|
downloader: Some(&downloader),
|
||||||
base_url: Some(&url),
|
base_url: Some(&url),
|
||||||
elements_to_remove: &elements_to_remove,
|
elements_to_remove,
|
||||||
..Default::default()
|
..Default::default()
|
||||||
}
|
}
|
||||||
.run(&html)
|
.run(&html)
|
||||||
|
@ -59,6 +59,8 @@ where
|
|||||||
let downloader = self.downloader.expect("Downloader not defined");
|
let downloader = self.downloader.expect("Downloader not defined");
|
||||||
// TODO: split/refactor this function :
|
// TODO: split/refactor this function :
|
||||||
// - ¿ find a way to ease the |get urls| -> |async download| -> |replace| workflow ?
|
// - ¿ find a way to ease the |get urls| -> |async download| -> |replace| workflow ?
|
||||||
|
// - put each modification (ex: style in the `foreach`) in functions, maybe using
|
||||||
|
// (¿sealed?) Traits : https://git.alpaga.dev/poc/crieur/issues/42
|
||||||
// - `element_to_remove` , `base_url` and `downloader` should be in a configuration structure
|
// - `element_to_remove` , `base_url` and `downloader` should be in a configuration structure
|
||||||
let (style_urls, html) = {
|
let (style_urls, html) = {
|
||||||
let document = Document::from(html.as_ref());
|
let document = Document::from(html.as_ref());
|
||||||
@ -84,9 +86,8 @@ where
|
|||||||
let stylesheets = document.select("link[href][rel=\"stylesheet\"]");
|
let stylesheets = document.select("link[href][rel=\"stylesheet\"]");
|
||||||
let styles_url = stylesheets
|
let styles_url = stylesheets
|
||||||
.iter()
|
.iter()
|
||||||
.map(|stylesheet| {
|
.map(|style_link| {
|
||||||
if let Some(src) = stylesheet.attr("href") {
|
if let Some(src) = style_link.attr("href") {
|
||||||
//TODO: does it work with absolute urls ?
|
|
||||||
base_url.join(src.as_ref()).ok()
|
base_url.join(src.as_ref()).ok()
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
@ -110,13 +111,19 @@ where
|
|||||||
styles
|
styles
|
||||||
.iter()
|
.iter()
|
||||||
.zip(downloaded_styles.iter())
|
.zip(downloaded_styles.iter())
|
||||||
.for_each(|(mut stylesheet, inner_css)| {
|
.for_each(|(mut style_link, inner_css)| {
|
||||||
if let Some(Some(inner_css)) = inner_css {
|
if let Some(Some(inner_css)) = inner_css {
|
||||||
let css = String::from_utf8(inner_css.to_vec()).unwrap();
|
let css = String::from_utf8(inner_css.to_vec()).unwrap();
|
||||||
let css = format!("<style>{}</style>", css);
|
let media_query = style_link.attr("media");
|
||||||
stylesheet.replace_with_html(css);
|
let css = match media_query {
|
||||||
|
Some(media_query) => {
|
||||||
|
format!("<style media=\"{}\">{}</style>", media_query, css)
|
||||||
|
}
|
||||||
|
None => format!("<style>{}</style>", css),
|
||||||
|
};
|
||||||
|
style_link.replace_with_html(css);
|
||||||
} else {
|
} else {
|
||||||
stylesheet.remove();
|
style_link.remove();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
String::from(document.html())
|
String::from(document.html())
|
||||||
@ -192,6 +199,7 @@ where
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
|
// TODO: reduce boilerplate, DRY
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
|
|
||||||
@ -362,6 +370,49 @@ mod tests {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[tokio::test]
|
||||||
|
async fn download_css_with_media_query() -> Result<()> {
|
||||||
|
let downloader = CssDownloader {};
|
||||||
|
|
||||||
|
let html = indoc! {"
|
||||||
|
<html>
|
||||||
|
<head>
|
||||||
|
<link rel=\"stylesheet\" href=\"main.css\" media=\"print\">
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"};
|
||||||
|
|
||||||
|
let wanted_html = indoc! {"
|
||||||
|
<html><head>
|
||||||
|
<style media=\"print\">
|
||||||
|
section#warning {
|
||||||
|
color: red;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
</body></html>
|
||||||
|
"};
|
||||||
|
let mut minifier = HTMLMinifier::new();
|
||||||
|
minifier.digest(wanted_html)?;
|
||||||
|
let minified = String::from_utf8(minifier.get_html().into())?;
|
||||||
|
|
||||||
|
let base_url = Url::parse("http://example.com")?;
|
||||||
|
assert_eq!(
|
||||||
|
Config {
|
||||||
|
downloader: Some(&downloader),
|
||||||
|
base_url: Some(&base_url),
|
||||||
|
..Default::default()
|
||||||
|
}
|
||||||
|
.run(html)
|
||||||
|
.await,
|
||||||
|
minified
|
||||||
|
);
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
struct PngDownloader;
|
struct PngDownloader;
|
||||||
#[async_trait]
|
#[async_trait]
|
||||||
impl Download for PngDownloader {
|
impl Download for PngDownloader {
|
||||||
|
Loading…
Reference in New Issue
Block a user