first! :^)

master
MMaker 2023-05-26 17:41:57 -04:00
commit bd6b1e5f09
Signed by: mmaker
GPG Key ID: CCE79B8FEDA40FB2
6 changed files with 316 additions and 0 deletions

1
.gitignore vendored 100644
View File

@ -0,0 +1 @@
*.dll

9
README.md 100644
View File

@ -0,0 +1,9 @@
# Install
Assuming venv usage (i.e. `python -m virtualenv venv`)
```
pip install torch==2.0.1 torchvision==0.15.2 --extra-index-url https://download.pytorch.org/whl/cu118
pip install -r requirements.txt
python app.py -p [PATH]
```

54
app.py 100644
View File

@ -0,0 +1,54 @@
import argparse
import time
from pathlib import Path
import interrogator
from PIL import Image
EXTS = [
'.jpg',
'.jpeg',
'.webp',
'.png'
]
parser = argparse.ArgumentParser()
parser.add_argument('-p', '--path', type=str, required=True)
args = parser.parse_args()
def get_tags(model: interrogator.Interrogator, file: Path):
start = time.time()
tags = []
res = model.interrogate(Image.open(file))
if res:
rating = max(res[0], key=res[0].get) # type: ignore
pp_tags = list(model.postprocess_tags(
res[1],
threshold=0.35,
additional_tags=[],
sort_by_alphabetical_order=False,
replace_underscore=True,
escape_tag=False
).keys())
tags = [rating] + pp_tags
end = time.time()
print(f"{end - start:.2f}s - {str(file)}")
return tags
if __name__ == '__main__':
files = [p for p in Path(args.path).rglob('*') if p.suffix.lower() in EXTS]
model = interrogator.WaifuDiffusionInterrogator(
'wd14-swinv2-v2-git',
repo_id='SmilingWolf/wd-v1-4-swinv2-tagger-v2'
)
model.load()
for file in files:
tags = get_tags(model, file)
if not tags:
continue
with open(file.parent.joinpath(f"{file.stem}.txt"), 'w+', encoding='utf8') as f:
f.write('\n'.join(tags))

54
dbimutils.py 100644
View File

@ -0,0 +1,54 @@
# DanBooru IMage Utility functions
import cv2
import numpy as np
from PIL import Image
def smart_imread(img, flag=cv2.IMREAD_UNCHANGED):
if img.endswith(".gif"):
img = Image.open(img)
img = img.convert("RGB")
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
else:
img = cv2.imread(img, flag)
return img
def smart_24bit(img):
if img.dtype is np.dtype(np.uint16):
img = (img / 257).astype(np.uint8)
if len(img.shape) == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
elif img.shape[2] == 4:
trans_mask = img[:, :, 3] == 0
img[trans_mask] = [255, 255, 255, 255]
img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
return img
def make_square(img, target_size):
old_size = img.shape[:2]
desired_size = max(old_size)
desired_size = max(desired_size, target_size)
delta_w = desired_size - old_size[1]
delta_h = desired_size - old_size[0]
top, bottom = delta_h // 2, delta_h - (delta_h // 2)
left, right = delta_w // 2, delta_w - (delta_w // 2)
color = [255, 255, 255]
new_im = cv2.copyMakeBorder(
img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
)
return new_im
def smart_resize(img, size):
# Assumes the image has already gone through make_square
if img.shape[0] > size:
img = cv2.resize(img, (size, size), interpolation=cv2.INTER_AREA)
elif img.shape[0] < size:
img = cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
return img

189
interrogator.py 100644
View File

@ -0,0 +1,189 @@
# i'm not sure if it's okay to add this file to the repository
import dbimutils
import os
import pandas as pd
import numpy as np
import re
from typing import Tuple, List, Dict
from PIL import Image
from pathlib import Path
from huggingface_hub import hf_hub_download
tag_escape_pattern = re.compile(r'([\\()])')
# select a device to process
tf_device_name = '/gpu:0'
class Interrogator:
@staticmethod
def postprocess_tags(
tags: Dict[str, float],
threshold=0.35,
additional_tags: List[str] = [],
exclude_tags: List[str] = [],
sort_by_alphabetical_order=False,
add_confident_as_weight=False,
replace_underscore=False,
replace_underscore_excludes: List[str] = [],
escape_tag=False
) -> Dict[str, float]:
for t in additional_tags:
tags[t] = 1.0
# those lines are totally not "pythonic" but looks better to me
tags = {
t: c
# sort by tag name or confident
for t, c in sorted(
tags.items(),
key=lambda i: i[0 if sort_by_alphabetical_order else 1],
reverse=not sort_by_alphabetical_order
)
# filter tags
if (
c >= threshold
and t not in exclude_tags
)
}
new_tags = []
for tag in list(tags):
new_tag = tag
if replace_underscore and tag not in replace_underscore_excludes:
new_tag = new_tag.replace('_', ' ')
if escape_tag:
new_tag = tag_escape_pattern.sub(r'\\\1', new_tag)
if add_confident_as_weight:
new_tag = f'({new_tag}:{tags[tag]})'
new_tags.append((new_tag, tags[tag]))
tags = dict(new_tags)
return tags
def __init__(self, name: str) -> None:
self.name = name
self.model = None
self.tags = None
def load(self):
raise NotImplementedError()
def unload(self) -> bool:
unloaded = False
if hasattr(self, 'model') and self.model is not None:
del self.model
unloaded = True
print(f'Unloaded {self.name}')
if hasattr(self, 'tags'):
del self.tags
return unloaded
def interrogate(
self,
image: Image.Image
) -> Tuple[
Dict[str, float], # rating confidents
Dict[str, float] # tag confidents
]:
raise NotImplementedError()
class WaifuDiffusionInterrogator(Interrogator):
def __init__(
self,
name: str,
model_path='model.onnx',
tags_path='selected_tags.csv',
**kwargs
) -> None:
super().__init__(name)
self.model_path = model_path
self.tags_path = tags_path
self.kwargs = kwargs
def download(self) -> Tuple[os.PathLike, os.PathLike]:
print(f"Loading {self.name} model file from {self.kwargs['repo_id']}")
model_path = Path(hf_hub_download(
**self.kwargs, filename=self.model_path))
tags_path = Path(hf_hub_download(
**self.kwargs, filename=self.tags_path))
return model_path, tags_path
def load(self) -> None:
model_path, tags_path = self.download()
from onnxruntime import InferenceSession
# https://onnxruntime.ai/docs/execution-providers/
# https://github.com/toriato/stable-diffusion-webui-wd14-tagger/commit/e4ec460122cf674bbf984df30cdb10b4370c1224#r92654958
providers = ['CUDAExecutionProvider']
self.model = InferenceSession(str(model_path), providers=providers)
print(f'Loaded {self.name} model from {model_path}')
self.tags = pd.read_csv(tags_path)
def interrogate(
self,
image: Image.Image
) -> Tuple[
Dict[str, float], # rating confidents
Dict[str, float] # tag confidents
]:
# init model
if not hasattr(self, 'model') or self.model is None:
self.load()
# code for converting the image and running the model is taken from the link below
# thanks, SmilingWolf!
# https://huggingface.co/spaces/SmilingWolf/wd-v1-4-tags/blob/main/app.py
# convert an image to fit the model
_, height, _, _ = self.model.get_inputs()[0].shape
# alpha to white
image = image.convert('RGBA')
new_image = Image.new('RGBA', image.size, 'WHITE')
new_image.paste(image, mask=image)
image = new_image.convert('RGB')
image = np.asarray(image) # type: ignore
# PIL RGB to OpenCV BGR
image = image[:, :, ::-1] # type: ignore
image = dbimutils.make_square(image, height)
image = dbimutils.smart_resize(image, height) # type: ignore
image = image.astype(np.float32) # type: ignore
image = np.expand_dims(image, 0) # type: ignore
# evaluate model
input_name = self.model.get_inputs()[0].name
label_name = self.model.get_outputs()[0].name
confidents = self.model.run([label_name], {input_name: image})[0]
tags = self.tags[:][['name']]
tags['confidents'] = confidents[0]
# first 4 items are for rating (general, sensitive, questionable, explicit)
ratings = dict(tags[:4].values)
# rest are regular tags
tags = dict(tags[4:].values)
return ratings, tags

9
requirements.txt 100644
View File

@ -0,0 +1,9 @@
huggingface-hub==0.14.1
numpy==1.24.3
onnxruntime-gpu==1.15.0
opencv-contrib-python==4.7.0.72
packaging==23.1
pandas==2.0.1
Pillow==9.5.0
protobuf==4.23.2
tqdm==4.65.0