root/src/booru/mod.rs

// booru/mod.rs
//
// Copyright 2020 nee <nee-git@hidamari.blue>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program.  If not, see <http://www.gnu.org/licenses/>.
//
// SPDX-License-Identifier: GPL-3.0-or-later
use anyhow::{Result, bail};
use brotli::BrotliDecompress;
use serde::Deserialize;
use std::collections::HashMap;
use std::hash::Hasher;
use std::io::Read;
use std::io::{BufRead, BufReader};
// use std::io::{BufWriter}
// use std::io::Write;
// use brotli::enc::BrotliCompress;
// use brotli::enc::BrotliEncoderParams;

use crate::booru::http::http_client;

pub(crate) mod dan;
pub(crate) mod e621;
pub(crate) mod gel;
pub(crate) mod http;
pub(crate) mod local;
pub(crate) mod moe;
pub(crate) mod org;
pub(crate) mod safe;
pub(crate) mod zero;

pub use local::Localbooru;

#[derive(Copy, Clone, PartialEq, Debug)]
#[repr(u8)]
pub enum TagType {
    General = 0,
    Artist = 1,
    Unknown = 2,
    Copyright = 3,
    Character = 4,
    Meta = 5,
    Deprecated = 6,
}
impl From<u8> for TagType {
    fn from(v: u8) -> TagType {
        match v {
            0 => TagType::General,
            1 => TagType::Artist,
            3 => TagType::Copyright,
            4 => TagType::Character,
            5 => TagType::Meta,
            6 => TagType::Deprecated,
            _ => TagType::Unknown,
        }
    }
}
pub type TagDB = HashMap<String, (TagType, u32)>;
pub fn tag_db_empty() -> TagDB {
    HashMap::<String, (TagType, u32)>::new()
}

impl TagType {
    pub fn css_class(&self) -> &str {
        match self {
            TagType::General => "General",
            TagType::Artist => "Artist",
            TagType::Unknown => "Unknown",
            TagType::Copyright => "Copyright",
            TagType::Character => "Character",
            TagType::Meta => "Meta",
            TagType::Deprecated => "Deprecated",
        }
    }
}

#[allow(dead_code)]
#[derive(Clone, Copy, PartialEq, Debug)]
#[repr(u8)]
pub enum Rating {
    Safe = b's',
    Questionable = b'q',
    Explicit = b'e',
}

pub trait Post: std::fmt::Debug + std::marker::Send + std::marker::Sync {
    fn id(&self) -> u64;
    fn sort_id(&self) -> u64;
    fn width(&self) -> u32;
    fn height(&self) -> u32;

    fn sample_url(&self) -> String;
    fn thumb_url(&self) -> String;
    fn full_url(&self) -> String;
    fn filename(&self) -> String;

    fn tags(&self) -> Vec<&str>;
    fn tags_character(&self, db: &TagDB) -> Vec<&str> {
        filter_tags(self.tags(), db, TagType::Character)
    }
    fn tags_copyright(&self, db: &TagDB) -> Vec<&str> {
        filter_tags(self.tags(), db, TagType::Copyright)
    }
    fn tags_artist(&self, db: &TagDB) -> Vec<&str> {
        filter_tags(self.tags(), db, TagType::Artist)
    }
    fn tags_general(&self, db: &TagDB) -> Vec<&str> {
        filter_tags(self.tags(), db, TagType::General)
    }
    fn tags_meta(&self, db: &TagDB) -> Vec<&str> {
        filter_tags(self.tags(), db, TagType::Meta)
    }
    fn tags_unknown(&self, db: &TagDB) -> Vec<&str> {
        self.tags()
            .drain(..)
            .filter(|t| db.get(&t.to_string()).map(|_| false).unwrap_or(true))
            .collect()
    }
    fn web_url(&self) -> String;
    fn domain(&self) -> &String;
    fn clone_post(&self) -> std::boxed::Box<dyn Post>;
    fn imp(&self) -> Posts;
    fn is_local(&self) -> bool {
        false
    }

    fn open_web(&self) {
        info!("{}", self.web_url());
        gtk::UriLauncher::new(&self.web_url()).launch(
            gtk::Window::NONE,
            gtk::gio::Cancellable::NONE,
            |_| {},
        );
    }
}

#[derive(Deserialize, Debug, Default)]
pub struct WikiEntry {
    // pub id: u64,
    pub title: String,
    pub body: String,
    #[serde(default)]
    pub other_names: Vec<String>,
    #[serde(default)]
    pub links: Vec<String>,
}

fn filter_tags<'a>(mut tags: Vec<&'a str>, db: &TagDB, tag_type: TagType) -> Vec<&'a str> {
    tags.drain(..)
        .filter(|t| {
            db.get(&t.to_string())
                .map(|(t2, _)| *t2 == tag_type)
                .unwrap_or(false)
        })
        .collect()
}

pub trait Booru: std::fmt::Debug + std::marker::Send {
    fn next_page(&self, current_page: u32, last_result_count: u32) -> u32;
    fn clone_booru(&self) -> Box<dyn Booru>;
    fn get_domain(&self) -> &String;
    fn check_api_by_domain(&self, hash: u64, domain: &str) -> bool;
    fn imp(&self) -> Boorus;
    fn is_local(&self) -> bool {
        false
    }
    fn has_wiki(&self) -> bool {
        false
    }
}

impl PartialEq for dyn Booru {
    fn eq(&self, other: &Self) -> bool {
        self.get_domain() == other.get_domain()
    }
}

// async fn check_api(this: &dyn Booru) -> bool {
//     !this.imp().fetch_index("", 0).await.is_empty()
// }

pub fn make_booru(domain: &String) -> Option<Box<dyn Booru>> {
    let base: [(&str, Box<dyn Booru>); 8] = [
        ("moe", Box::new(moe::Moebooru::new(domain.clone()))),
        ("dan", Box::new(dan::Danbooru::new(domain.clone()))),
        ("e621", Box::new(e621::E621::new(domain.clone()))),
        ("gel", Box::new(gel::Gelbooru::new(domain.clone()))),
        ("safe", Box::new(safe::Safebooru::new(domain.clone()))),
        ("org", Box::new(org::Orgbooru::new(domain.clone()))),
        ("zero", Box::new(zero::Zerobooru::new(domain.clone()))),
        ("local", Box::new(local::Localbooru::default())),
    ];
    let boorus: HashMap<&str, Box<dyn Booru>> = HashMap::from(base);

    let mut hasher = std::collections::hash_map::DefaultHasher::new();
    hasher.write(domain.as_bytes());
    let hash = hasher.finish();
    let mut result: Option<&str> = None;
    println!("{} {}", domain, hash);
    // try by preprogrammed domain hashes
    for (i, b) in &boorus {
        if b.check_api_by_domain(hash, domain) {
            result = Some(i);
        }
    }
    // try by api
    if result.is_none() {
        // for (_i, _b) in &boorus {
        // TODO rethink this
        // if b.check_api() {
        //     result = Some(i);
        // }
        // }
    }
    if let Some(key) = result {
        return boorus.get(key).map(|b| b.clone_booru());
    }

    None
}

pub enum Boorus {
    Danbooru(crate::booru::dan::Danbooru),
    E621(crate::booru::e621::E621),
    Gelbooru(crate::booru::gel::Gelbooru),
    Moebooru(crate::booru::moe::Moebooru),
    Orgbooru(crate::booru::org::Orgbooru),
    Safebooru(crate::booru::safe::Safebooru),
    Zerobooru(crate::booru::zero::Zerobooru),
    Localbooru(crate::booru::local::Localbooru),
}

pub enum Posts {
    Danbooru(crate::booru::dan::DanbooruPost),
    E621(crate::booru::e621::E621Post),
    Gelbooru(crate::booru::gel::GelbooruPost),
    Moebooru(crate::booru::moe::MoebooruPost),
    Orgbooru(crate::booru::org::OrgbooruPost),
    Safebooru(crate::booru::safe::SafebooruPost),
    ZerobooruIndex(crate::booru::zero::ZeroIndexPost),
    ZerobooruDetail(crate::booru::zero::ZeroDetailPost),
    Localbooru(crate::booru::local::LocalbooruPost),
}

impl Boorus {
    pub async fn fetch_index(&self, search: &str, pid: u32) -> Vec<Box<dyn Post>> {
        let result = match self {
            Boorus::Danbooru(booru) => booru.fetch_index(search, pid).await,
            Boorus::E621(booru) => booru.fetch_index(search, pid).await,
            Boorus::Gelbooru(booru) => booru.fetch_index(search, pid).await,
            Boorus::Moebooru(booru) => booru.fetch_index(search, pid).await,
            Boorus::Orgbooru(booru) => booru.fetch_index(search, pid).await,
            Boorus::Safebooru(booru) => booru.fetch_index(search, pid).await,
            Boorus::Zerobooru(booru) => booru.fetch_index(search, pid).await,
            Boorus::Localbooru(booru) => booru.fetch_index(search, pid).await,
        };
        if let Err(err) = result {
            eprintln!("Error fetching: {err:#?}");
            return vec![];
        }
        result.unwrap()
    }

    pub async fn fetch_wiki(&self, search: &str) -> Result<WikiEntry> {
        let result = match self {
            Boorus::Danbooru(booru) => booru.fetch_wiki(search).await,
            Boorus::E621(booru) => booru.fetch_wiki(search).await,
            Boorus::Moebooru(booru) => booru.fetch_wiki(search).await,
            _ => bail!("no wiki for this booru"),
        };
        result
    }

    pub async fn fetch_artist(&self, search: &str) -> Result<WikiEntry> {
        let result = match self {
            Boorus::Danbooru(booru) => booru.fetch_artist(search).await,
            Boorus::E621(booru) => booru.fetch_artist(search).await,
            Boorus::Moebooru(booru) => booru.fetch_artist(search).await,
            _ => bail!("no wiki for this booru"),
        };
        result
    }

    pub fn is_local(&self) -> bool {
        matches!(self, Boorus::Localbooru(_))
    }
}

impl Posts {
    pub async fn fetch_full(&self) -> Result<(bytes::Bytes, Option<std::boxed::Box<dyn Post>>)> {
        match self {
            Posts::Danbooru(post) => {
                fetch_full_generic(Box::<dyn Post>::from(Box::new(post.clone()))).await
            }
            Posts::E621(post) => {
                fetch_full_generic(Box::<dyn Post>::from(Box::new(post.clone()))).await
            }
            Posts::Gelbooru(post) => {
                fetch_full_generic(Box::<dyn Post>::from(Box::new(post.clone()))).await
            }
            Posts::Moebooru(post) => {
                fetch_full_generic(Box::<dyn Post>::from(Box::new(post.clone()))).await
            }
            Posts::Orgbooru(post) => {
                fetch_full_generic(Box::<dyn Post>::from(Box::new(post.clone()))).await
            }
            Posts::Safebooru(post) => {
                fetch_full_generic(Box::<dyn Post>::from(Box::new(post.clone()))).await
            }
            Posts::ZerobooruIndex(post) => post.fetch_full().await,
            Posts::ZerobooruDetail(post) => post.fetch_full().await,
            Posts::Localbooru(post) => fetch_full_local(post.clone()).await,
        }
    }
    pub fn post(&self) -> &dyn Post {
        match &self {
            Posts::Danbooru(post) => post,
            Posts::E621(post) => post,
            Posts::Gelbooru(post) => post,
            Posts::Moebooru(post) => post,
            Posts::Orgbooru(post) => post,
            Posts::Safebooru(post) => post,
            Posts::ZerobooruIndex(post) => post,
            Posts::ZerobooruDetail(post) => post,
            Posts::Localbooru(post) => post,
        }
    }
}

async fn fetch_full_generic(
    this: std::boxed::Box<dyn Post>,
) -> Result<(bytes::Bytes, Option<std::boxed::Box<dyn Post>>)> {
    println!("fetching the img {}", &this.full_url());
    let url = this.full_url();

    let request = http_client().get(&url).send().await;
    let binary = request?.bytes().await?;
    Ok((binary, None))
}

async fn fetch_full_local(
    this: local::LocalbooruPost,
) -> Result<(bytes::Bytes, Option<std::boxed::Box<dyn Post>>)> {
    let path = crate::download::download_read_path(this.full_url());
    let contents = tokio::fs::read(path).await?;
    Ok((bytes::Bytes::from(contents), None))
}

pub fn display_tag(tag: &str) -> String {
    tag.replace("&amp;", "&").replace("&#039;", "'")
}

static TAG_DB_BINARY_BROTLI: &[u8] = include_bytes!("../../data/tags/merged.boorutags.br");
// TODO any way to statically load this? Maybe pairs if Hashmap is impossible?
// const TAG_DB: TagDB = load_tag_db();

pub fn load_tag_db() -> TagDB {
    let uncompressed = {
        let mut raw = BufReader::new(TAG_DB_BINARY_BROTLI);
        let mut out: Vec<u8> = vec![];
        BrotliDecompress(&mut raw, &mut out).unwrap();
        out
    };

    let mut db: TagDB = HashMap::new();
    let mut buffer = BufReader::new(uncompressed.as_slice());
    let mut eof: bool = false;
    while !eof {
        // lists first the tagtype, then all tags of that type
        // 1 byte = tagtype
        let number_b = read_u8(&mut buffer);
        let number_s = number_b.and_then(|n| String::from_utf8(vec![n]).map_err(From::from));
        let number = number_s.and_then(|n| n.parse::<u8>().map_err(From::from));

        if number.is_err() {
            // eof = true;
            break;
        }
        let number = number.unwrap();

        while !eof {
            // the amount this tag was used aka. how popular it is
            let count = read_u32(&mut buffer);
            // the tag name
            let mut tag_buffer = Vec::<u8>::new();
            let _ = buffer.read_until(b' ', &mut tag_buffer);
            let tag = String::from_utf8(tag_buffer);

            if let (Ok(count), Ok(tag)) = (count, tag) {
                if count == 0 {
                    // empty entry marks the end of the tag_type
                    break;
                }
                // println!("{count} - {tag:#?}");
                db.insert(
                    tag.to_string().trim().to_string(),
                    (TagType::from(number), count),
                );
            } else {
                error!("TagDB: unexpected eof at tag");
                eof = true;
            }
        }
    }
    db
}

fn read_u8<R>(mut reader: R) -> Result<u8, Box<dyn std::error::Error>>
where
    R: Read,
{
    let mut buf: [u8; 1] = [0; 1];
    reader.read_exact(&mut buf)?;
    Ok(buf[0])
}

fn read_u32<R>(mut reader: R) -> Result<u32, Box<dyn std::error::Error>>
where
    R: Read,
{
    let mut buf: [u8; 4] = [0; 4];
    reader.read_exact(&mut buf)?;
    Ok(u32::from_le_bytes(buf))
}