Add retrieval from le monde diplomatique Previously, 404 pages were injected in the document when downloading styles Now, the downloader returns None when documents are not found
33 lines
912 B
Rust
33 lines
912 B
Rust
use std::convert::TryInto;
|
|
use std::env;
|
|
|
|
use anyhow::Result;
|
|
use crieur_retrieve::{ArticleLocation, Url};
|
|
use dotenv::dotenv;
|
|
use log::info;
|
|
|
|
#[tokio::main]
|
|
async fn main() -> Result<()> {
|
|
dotenv().ok();
|
|
tracing_subscriber::fmt()
|
|
.with_writer(std::io::stderr)
|
|
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
|
|
.init();
|
|
|
|
let url = match env::args().nth(1) {
|
|
Some(url) => Url::parse(&url)?,
|
|
None => "https://www.mediapart.fr/journal/france/030421/l-hotel-dieu-patients-et-medecins-tentent-de-percer-les-mysteres-du-covid-long".try_into()?,
|
|
};
|
|
|
|
info!("Trying to download article from {}", url);
|
|
|
|
// TODO: shorten this, maybe an helper function ?
|
|
let article_location = ArticleLocation::builder().url(url)?.build()?;
|
|
|
|
let article_str = article_location.retrieve_html().await?;
|
|
|
|
println!("{}", article_str);
|
|
|
|
Ok(())
|
|
}
|