root/src/booru/org.rs

// org.rs
//
// Copyright 2020 nee <nee-git@hidamari.blue>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
//
// SPDX-License-Identifier: GPL-3.0-or-later
use crate::booru::Booru;
use crate::booru::Boorus;
use crate::booru::Post;
use crate::booru::Posts;
use crate::booru::http::http_client;
use anyhow::Result;
use serde::Deserialize;

// this one doesn't have an API
#[derive(Deserialize, Debug, Clone)]
pub struct OrgbooruPost {
    id: u64,
    filename: String,
    directory: String,
    sub_booru: String,
    tags: String,
    #[serde(skip_deserializing)]
    domain: String,
}

impl Post for OrgbooruPost {
    fn id(&self) -> u64 {
        self.id
    }
    fn sort_id(&self) -> u64 {
        self.id
    }
    fn width(&self) -> u32 {
        0_u32
    }
    fn height(&self) -> u32 {
        0_u32
    }
    fn sample_url(&self) -> String {
        self.full_url() // this booru does not generate samples
    }
    fn thumb_url(&self) -> String {
        // let dot_pos = &self.image.find('.').unwrap_or(self.image.len());
        // let mut filename = self.image.clone();
        // filename.replace_range(dot_pos.., ".jpg");
        [
            "https://thumbs.booru.org/",
            &self.sub_booru,
            "/thumbnails/",
            &self.directory,
            "/thumbnail_",
            &self.filename,
        ]
        .join("")
    }
    fn full_url(&self) -> String {
        [
            "https://img.booru.org/",
            &self.sub_booru,
            "/images/",
            &self.directory,
            "/",
            &self.filename,
        ]
        .join("")
    }
    fn filename(&self) -> String {
        self.filename.to_owned()
    }
    fn tags(&self) -> Vec<&str> {
        self.tags
            .split(' ')
            .filter(|t| !t.is_empty() && !t.starts_with("score:"))
            .collect()
    }
    fn web_url(&self) -> String {
        format!(
            "https://{}/index.php?page=post&s=view&id={}",
            self.domain(),
            self.id()
        )
    }
    fn domain(&self) -> &String {
        &self.domain
    }
    fn clone_post(&self) -> Box<dyn Post> {
        std::boxed::Box::new(self.clone())
    }
    fn imp(&self) -> Posts {
        Posts::Orgbooru(self.clone())
    }
}

#[derive(Deserialize, Debug, Clone)]
pub struct Orgbooru {
    domain: String,
}
impl Booru for Orgbooru {
    fn next_page(&self, current_page: u32, last_result_count: u32) -> u32 {
        current_page + last_result_count
    }
    fn clone_booru(&self) -> Box<dyn Booru> {
        Box::new(self.clone())
    }
    fn check_api_by_domain(&self, _hash: u64, domain: &str) -> bool {
        domain.contains(".booru.org")
    }
    fn get_domain(&self) -> &String {
        &self.domain
    }
    fn imp(&self) -> Boorus {
        Boorus::Orgbooru(self.clone())
    }
}
fn slice_between(r: &str, p: usize, start: &str, end: &str) -> Option<(usize, String)> {
    let p_start = r[p..].find(start).map(|np| np + p + start.len())?;
    let p_end = r[p_start..].find(end).map(|np| np + p_start + end.len())?;
    let slice = r[p_start..(p_end - end.len())].to_owned();
    Some((p_end, slice))
}

fn slice_post(domain: &str, r: &str, p: usize) -> Option<(usize, Box<OrgbooruPost>)> {
    let (p2, sub_booru) = slice_between(r, p, ".org/", "/")?;
    let (p3, directory) = slice_between(r, p2, "thumbnails//", "/")?;
    let (p4, filename) = slice_between(r, p3, "thumbnail_", "\"")?;
    let (p5, tags) = slice_between(r, p4, "title=\" ", "\"")?;
    let (p6, id) = slice_between(r, p5, "posts[", "]")?;
    Some((
        p6,
        Box::new(OrgbooruPost {
            id: id.parse::<u64>().ok()?,
            filename,
            directory,
            sub_booru,
            tags,
            domain: domain.to_string(),
        }),
    ))
}

impl Orgbooru {
    pub fn new(domain: String) -> Orgbooru {
        Orgbooru { domain }
    }

    fn find_posts(&self, r: String) -> Vec<Box<dyn Post>> {
        // println!("orgbooru {}", r);
        let mut pos = 0;
        let mut got_more = true;
        let mut results: Vec<Box<dyn Post>> = Vec::new();
        while got_more {
            let link = r[pos..].find("https://thumbs.booru.org/");
            match link {
                Some(l) => {
                    let p = pos + l;
                    match slice_post(&self.domain, &r, p) {
                        Some((p2, mut post)) => {
                            pos = p2;
                            post.domain = self.domain.clone();
                            results.push(post);
                            got_more = true;
                        }
                        None => {
                            got_more = false;
                        }
                    }
                }
                None => {
                    got_more = false;
                }
            }
        }
        results
    }

    pub async fn fetch_index(
        &self,
        search: &str,
        pid: u32,
    ) -> Result<std::vec::Vec<Box<dyn Post>>> {
        let url = [
            "https://",
            &self.domain[..],
            "/index.php?page=post&s=list&pid=",
            &format!("{}", pid),
            "&tags=",
            if search.is_empty() { "all" } else { search },
        ]
        .join("");
        println!("fetching {}", url);
        let result = http_client().get(&url).send().await?.text().await?;
        Ok(Orgbooru::find_posts(self, result))
    }
}