Skip to content

Commit

Permalink
feat: add --id flag and model not found validation (#32)
Browse files Browse the repository at this point in the history
  • Loading branch information
baduker authored Nov 11, 2024
1 parent 2c2121c commit 312ffc0
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 16 deletions.
2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "untitled"
version = "0.4.0"
version = "0.4.1"
edition = "2021"

[dependencies]
Expand Down
9 changes: 5 additions & 4 deletions src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@ pub struct Cli {
#[arg(short, long, value_name = "FILE", default_value = "untitled.toml")]
pub config: PathBuf,

#[arg(long, default_value = "false")]
pub full_size_image: bool,

#[command(subcommand)]
pub command: Option<Commands>,
}
Expand All @@ -36,8 +33,12 @@ pub enum Commands {
#[arg(short, long, value_name = "URL")]
url: Option<String>,

/// Use just the girl's gallery ID
#[arg(short, long, value_name = "ID", conflicts_with = "url")]
id: Option<String>,

/// Download full-size images
#[arg(long)]
#[arg(long, default_value = "false")]
full_size_image: bool,
},
#[command(about = "Updates girl's galleries")]
Expand Down
21 changes: 18 additions & 3 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ mod scraper;
mod utilities;

use crate::scraper::collector::scrape;
use crate::utilities::validate_id;
use clap::Parser;
use cli::{Cli, Commands};
use config::{print_config, read_or_create_config, MyConfig};
Expand All @@ -19,10 +20,24 @@ fn main() {
}
Some(Commands::Scrape {
url,
id,
full_size_image,
}) => match url {
Some(url) => scrape(&config, Some(&url), full_size_image),
None => scrape(&config, None, false),
}) => match (url, id) {
(Some(url), None) => scrape(&config, Some(&url), full_size_image),
(None, Some(id)) => {
if !validate_id(&id) {
eprintln!(
"Girl's page ID's are only numbers! Double check the id and try again."
);
eprintln!("Note: id's between 0 and 5 are invalid.");
return;
}
let constructed_url =
format!("https://www.kindgirls.com/old/girls.php?id={}", id);
scrape(&config, Some(&constructed_url), full_size_image)
}
(None, None) => eprintln!("You need to specify either a girl's page URL or ID!"),
(Some(_), Some(_)) => eprintln!("You can't use both URL and ID at the same time!"),
},
Some(Commands::Update) => {
scraper::updater::Updater::update(&config).unwrap();
Expand Down
18 changes: 16 additions & 2 deletions src/scraper/collector.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use super::structs::{Bio, Gallery, Girl, Selectors, Stats, Video, Visuals};
use crate::config::Config;
use crate::scraper::downloader::{Downloader, DownloaderImpl};
use crate::utilities::{build_video_src_url, parse_video_duration, splitter, todays_date};
use crate::utilities::{build_video_src_url, parse_video_duration, splitter, today_date};
use reqwest::blocking::Client;
use reqwest::header::{HeaderMap, HeaderValue, USER_AGENT};
use reqwest::Error;
Expand All @@ -24,6 +24,12 @@ pub fn scrape<T: Config>(config: &T, url: Option<&str>, full_size_image: bool) {
match body {
Ok(content) => {
let document = Html::parse_document(&content);

if is_model_not_found(&document) {
println!("Model not found! Please check the URl or ID and try again.");
return;
}

let girl = collect_girl(url, &document, full_size_image);
// Debug only; TODO: remove when logging is implemented
// println!("{:?}", girl);
Expand Down Expand Up @@ -171,9 +177,17 @@ fn collect_stats(visuals: &Visuals) -> Stats {
}
}

fn is_model_not_found(document: &Html) -> bool {
let selector = Selector::parse(Selectors::MODEL_NOT_FOUND).unwrap();
if let Some(element) = document.select(&selector).next() {
return element.text().any(|s| s.contains("Model not found"));
}
false
}

pub(crate) fn collect_girl(url: &str, document: &Html, full_size_image: bool) -> Girl {
let is_single_gallery = Girl::is_single_gallery(url);
let last_update: Option<String> = Some(todays_date());
let last_update: Option<String> = Some(today_date());
let bio = collect_bio(document, url);
let galleries = collect_gallery(document, full_size_image);
let videos = collect_videos(document);
Expand Down
2 changes: 2 additions & 0 deletions src/scraper/structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ impl Selectors {
pub const GALLERY_IMAGE_SRC: &'static str = r#".gal_list a img"#;
pub const GALLERY_IMAGE_FULL_SIZE_SRC: &'static str = r#".gal_full a img"#;
pub const MODEL_VIDEOS: &'static str = r#".video_list a"#;
// Use # for id's and . for classes.
pub const MODEL_NOT_FOUND: &'static str = "#cuerpo";
}

#[derive(Debug, Serialize, Deserialize)]
Expand Down
14 changes: 9 additions & 5 deletions src/scraper/updater.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use crate::config::Config;
use crate::scraper::collector::{collect_girl, fetch};
use crate::scraper::downloader::Downloader;
use crate::scraper::structs::Girl;
use crate::utilities::todays_date;
use crate::utilities::today_date;

pub(crate) struct Updater;

Expand All @@ -21,7 +21,6 @@ impl Updater {
let dir = dir?;
let path = dir.path();
if path.is_dir() {
println!("Updating: {:?}", path);
Self::update_girl_content(config, &dir.path())?;
}
}
Expand All @@ -42,7 +41,10 @@ impl Updater {
let mut existing_girl: Girl = serde_json::from_str(&content)?;

if let Some(link) = &existing_girl.bio.link {
println!("Checking updates for {}", existing_girl.bio.get_name());
println!(
"Checking for new content for {}",
existing_girl.bio.get_name()
);

let body = fetch(link)?;
let document = scraper::Html::parse_document(&body);
Expand All @@ -51,14 +53,14 @@ impl Updater {
let has_new_content = Self::compare_content(&existing_girl, &new_girl);

if has_new_content {
println!("New content found for {}", existing_girl.bio.get_name());
println!("New content found!");
Self::prompt_and_download(config, &new_girl)?;

// Update the existing girl with new content and timestamp
existing_girl.is_single_gallery = false;
existing_girl.content = new_girl.content;
existing_girl.stats = new_girl.stats;
existing_girl.last_update = Some(todays_date());
existing_girl.last_update = Some(today_date());

// Save updated JSON
let json = serde_json::to_string_pretty(&existing_girl)?;
Expand All @@ -80,8 +82,10 @@ impl Updater {
.as_ref()
.map(|v| v.len())
.unwrap_or(0);

let new_videos = new.content.videos.as_ref().map(|v| v.len()).unwrap_or(0);

// This is a simple quantity comparison, which should be enough for now
new_galleries > existing_galleries || new_videos > existing_videos
}

Expand Down
6 changes: 5 additions & 1 deletion src/utilities.rs
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,10 @@ pub fn format_date(date_str: &str) -> Option<String> {
.map(|date| date.format("%d-%m-%Y").to_string())
}

pub fn todays_date() -> String {
pub fn today_date() -> String {
chrono::Local::now().format("%d-%m-%Y").to_string()
}

pub fn validate_id(id: &str) -> bool {
id.chars().all(char::is_numeric)
}

0 comments on commit 312ffc0

Please sign in to comment.