From cee0af6c3c3c0b8e997a201cbf52a4a9d221b10b Mon Sep 17 00:00:00 2001 From: koalp Date: Thu, 13 May 2021 20:30:27 +0200 Subject: [PATCH] fix: only select images that have non-data src Previously, when the image url contained data, it tried to parse an url and failed, instead of keeping data. It have been fixed so that images where url is starting by 'data' are not modified. --- crieur-retrieve/src/tools/self_contained_html.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crieur-retrieve/src/tools/self_contained_html.rs b/crieur-retrieve/src/tools/self_contained_html.rs index 32b8c85..44c3e11 100644 --- a/crieur-retrieve/src/tools/self_contained_html.rs +++ b/crieur-retrieve/src/tools/self_contained_html.rs @@ -92,7 +92,7 @@ where // let image_urls = { let document = Document::from(&html); - let imgs = document.select("img"); + let imgs = document.select("img:not([src^=\"data:\"])"); imgs.iter() .map(|image| { @@ -115,7 +115,7 @@ where let html = { let document = Document::from(&html); - let imgs = document.select("img"); + let imgs = document.select("img:not([src^=\"data:\"])"); imgs.iter() .zip(downloaded_images.iter())