crieur/examples/cli_downloader.rs
koalp 970f510cd1
feat: add retrieval from le monde diplomatique
Add retrieval from le monde diplomatique

Previously, 404 pages were injected in the document when downloading
styles
Now, the downloader returns None when documents are not found
2021-05-22 04:41:01 +02:00

33 lines
912 B
Rust

use std::convert::TryInto;
use std::env;
use anyhow::Result;
use crieur_retrieve::{ArticleLocation, Url};
use dotenv::dotenv;
use log::info;
#[tokio::main]
async fn main() -> Result<()> {
dotenv().ok();
tracing_subscriber::fmt()
.with_writer(std::io::stderr)
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.init();
let url = match env::args().nth(1) {
Some(url) => Url::parse(&url)?,
None => "https://www.mediapart.fr/journal/france/030421/l-hotel-dieu-patients-et-medecins-tentent-de-percer-les-mysteres-du-covid-long".try_into()?,
};
info!("Trying to download article from {}", url);
// TODO: shorten this, maybe an helper function ?
let article_location = ArticleLocation::builder().url(url)?.build()?;
let article_str = article_location.retrieve_html().await?;
println!("{}", article_str);
Ok(())
}