A basic chabot application that downloads article from one newspaper have been added. It can download html pages and is called with !hmtl ArticleLocation have been refactored to own it's internal data.
40 lines
1.1 KiB
Rust
40 lines
1.1 KiB
Rust
use std::convert::TryInto;
|
|
use std::env;
|
|
|
|
use anyhow::Result;
|
|
use crieur_retrieve::{newspaper::Newspaper, ArticleLocation, Mediapart, Url};
|
|
use dotenv::dotenv;
|
|
use log::info;
|
|
|
|
#[tokio::main]
|
|
async fn main() -> Result<()> {
|
|
dotenv().ok();
|
|
env_logger::init();
|
|
|
|
let url = match env::args().nth(1) {
|
|
Some(url) => Url::parse(&url)?,
|
|
None => "https://www.mediapart.fr/journal/france/030421/l-hotel-dieu-patients-et-medecins-tentent-de-percer-les-mysteres-du-covid-long".try_into()?,
|
|
};
|
|
|
|
// TODO: remove this in favor of default newspapers
|
|
let mut mediapart = Mediapart::new()
|
|
//.login(USERNAME, PASSWORD)
|
|
//
|
|
;
|
|
|
|
mediapart.login_cookie = Some(("MPRUUID".into(), env::var("MEDIAPART_COOKIE")?.into()));
|
|
info!("Trying to download article from {}", url);
|
|
|
|
// TODO: shorten this, maybe an helper function ?
|
|
let article_location = ArticleLocation::builder()
|
|
.url(url)?
|
|
.newspaper(mediapart)
|
|
.build()?;
|
|
|
|
let article_str = article_location.retrieve_html().await?;
|
|
|
|
println!("{}", article_str);
|
|
|
|
Ok(())
|
|
}
|