first! :^)
commit
bd6b1e5f09
|
@ -0,0 +1 @@
|
|||
*.dll
|
|
@ -0,0 +1,9 @@
|
|||
# Install
|
||||
|
||||
Assuming venv usage (i.e. `python -m virtualenv venv`)
|
||||
|
||||
```
|
||||
pip install torch==2.0.1 torchvision==0.15.2 --extra-index-url https://download.pytorch.org/whl/cu118
|
||||
pip install -r requirements.txt
|
||||
python app.py -p [PATH]
|
||||
```
|
|
@ -0,0 +1,54 @@
|
|||
import argparse
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import interrogator
|
||||
from PIL import Image
|
||||
|
||||
EXTS = [
|
||||
'.jpg',
|
||||
'.jpeg',
|
||||
'.webp',
|
||||
'.png'
|
||||
]
|
||||
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('-p', '--path', type=str, required=True)
|
||||
args = parser.parse_args()
|
||||
|
||||
def get_tags(model: interrogator.Interrogator, file: Path):
|
||||
start = time.time()
|
||||
|
||||
tags = []
|
||||
res = model.interrogate(Image.open(file))
|
||||
if res:
|
||||
rating = max(res[0], key=res[0].get) # type: ignore
|
||||
pp_tags = list(model.postprocess_tags(
|
||||
res[1],
|
||||
threshold=0.35,
|
||||
additional_tags=[],
|
||||
sort_by_alphabetical_order=False,
|
||||
replace_underscore=True,
|
||||
escape_tag=False
|
||||
).keys())
|
||||
tags = [rating] + pp_tags
|
||||
|
||||
end = time.time()
|
||||
print(f"{end - start:.2f}s - {str(file)}")
|
||||
return tags
|
||||
|
||||
if __name__ == '__main__':
|
||||
files = [p for p in Path(args.path).rglob('*') if p.suffix.lower() in EXTS]
|
||||
|
||||
model = interrogator.WaifuDiffusionInterrogator(
|
||||
'wd14-swinv2-v2-git',
|
||||
repo_id='SmilingWolf/wd-v1-4-swinv2-tagger-v2'
|
||||
)
|
||||
model.load()
|
||||
|
||||
for file in files:
|
||||
tags = get_tags(model, file)
|
||||
if not tags:
|
||||
continue
|
||||
with open(file.parent.joinpath(f"{file.stem}.txt"), 'w+', encoding='utf8') as f:
|
||||
f.write('\n'.join(tags))
|
|
@ -0,0 +1,54 @@
|
|||
# DanBooru IMage Utility functions
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
from PIL import Image
|
||||
|
||||
|
||||
def smart_imread(img, flag=cv2.IMREAD_UNCHANGED):
|
||||
if img.endswith(".gif"):
|
||||
img = Image.open(img)
|
||||
img = img.convert("RGB")
|
||||
img = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
|
||||
else:
|
||||
img = cv2.imread(img, flag)
|
||||
return img
|
||||
|
||||
|
||||
def smart_24bit(img):
|
||||
if img.dtype is np.dtype(np.uint16):
|
||||
img = (img / 257).astype(np.uint8)
|
||||
|
||||
if len(img.shape) == 2:
|
||||
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
|
||||
elif img.shape[2] == 4:
|
||||
trans_mask = img[:, :, 3] == 0
|
||||
img[trans_mask] = [255, 255, 255, 255]
|
||||
img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)
|
||||
return img
|
||||
|
||||
|
||||
def make_square(img, target_size):
|
||||
old_size = img.shape[:2]
|
||||
desired_size = max(old_size)
|
||||
desired_size = max(desired_size, target_size)
|
||||
|
||||
delta_w = desired_size - old_size[1]
|
||||
delta_h = desired_size - old_size[0]
|
||||
top, bottom = delta_h // 2, delta_h - (delta_h // 2)
|
||||
left, right = delta_w // 2, delta_w - (delta_w // 2)
|
||||
|
||||
color = [255, 255, 255]
|
||||
new_im = cv2.copyMakeBorder(
|
||||
img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color
|
||||
)
|
||||
return new_im
|
||||
|
||||
|
||||
def smart_resize(img, size):
|
||||
# Assumes the image has already gone through make_square
|
||||
if img.shape[0] > size:
|
||||
img = cv2.resize(img, (size, size), interpolation=cv2.INTER_AREA)
|
||||
elif img.shape[0] < size:
|
||||
img = cv2.resize(img, (size, size), interpolation=cv2.INTER_CUBIC)
|
||||
return img
|
|
@ -0,0 +1,189 @@
|
|||
# i'm not sure if it's okay to add this file to the repository
|
||||
import dbimutils
|
||||
|
||||
import os
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import re
|
||||
|
||||
from typing import Tuple, List, Dict
|
||||
from PIL import Image
|
||||
|
||||
from pathlib import Path
|
||||
from huggingface_hub import hf_hub_download
|
||||
|
||||
tag_escape_pattern = re.compile(r'([\\()])')
|
||||
|
||||
# select a device to process
|
||||
tf_device_name = '/gpu:0'
|
||||
|
||||
|
||||
class Interrogator:
|
||||
@staticmethod
|
||||
def postprocess_tags(
|
||||
tags: Dict[str, float],
|
||||
|
||||
threshold=0.35,
|
||||
additional_tags: List[str] = [],
|
||||
exclude_tags: List[str] = [],
|
||||
sort_by_alphabetical_order=False,
|
||||
add_confident_as_weight=False,
|
||||
replace_underscore=False,
|
||||
replace_underscore_excludes: List[str] = [],
|
||||
escape_tag=False
|
||||
) -> Dict[str, float]:
|
||||
for t in additional_tags:
|
||||
tags[t] = 1.0
|
||||
|
||||
# those lines are totally not "pythonic" but looks better to me
|
||||
tags = {
|
||||
t: c
|
||||
|
||||
# sort by tag name or confident
|
||||
for t, c in sorted(
|
||||
tags.items(),
|
||||
key=lambda i: i[0 if sort_by_alphabetical_order else 1],
|
||||
reverse=not sort_by_alphabetical_order
|
||||
)
|
||||
|
||||
# filter tags
|
||||
if (
|
||||
c >= threshold
|
||||
and t not in exclude_tags
|
||||
)
|
||||
}
|
||||
|
||||
new_tags = []
|
||||
for tag in list(tags):
|
||||
new_tag = tag
|
||||
|
||||
if replace_underscore and tag not in replace_underscore_excludes:
|
||||
new_tag = new_tag.replace('_', ' ')
|
||||
|
||||
if escape_tag:
|
||||
new_tag = tag_escape_pattern.sub(r'\\\1', new_tag)
|
||||
|
||||
if add_confident_as_weight:
|
||||
new_tag = f'({new_tag}:{tags[tag]})'
|
||||
|
||||
new_tags.append((new_tag, tags[tag]))
|
||||
tags = dict(new_tags)
|
||||
|
||||
return tags
|
||||
|
||||
def __init__(self, name: str) -> None:
|
||||
self.name = name
|
||||
self.model = None
|
||||
self.tags = None
|
||||
|
||||
def load(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def unload(self) -> bool:
|
||||
unloaded = False
|
||||
|
||||
if hasattr(self, 'model') and self.model is not None:
|
||||
del self.model
|
||||
unloaded = True
|
||||
print(f'Unloaded {self.name}')
|
||||
|
||||
if hasattr(self, 'tags'):
|
||||
del self.tags
|
||||
|
||||
return unloaded
|
||||
|
||||
def interrogate(
|
||||
self,
|
||||
image: Image.Image
|
||||
) -> Tuple[
|
||||
Dict[str, float], # rating confidents
|
||||
Dict[str, float] # tag confidents
|
||||
]:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class WaifuDiffusionInterrogator(Interrogator):
|
||||
def __init__(
|
||||
self,
|
||||
name: str,
|
||||
model_path='model.onnx',
|
||||
tags_path='selected_tags.csv',
|
||||
**kwargs
|
||||
) -> None:
|
||||
super().__init__(name)
|
||||
self.model_path = model_path
|
||||
self.tags_path = tags_path
|
||||
self.kwargs = kwargs
|
||||
|
||||
def download(self) -> Tuple[os.PathLike, os.PathLike]:
|
||||
print(f"Loading {self.name} model file from {self.kwargs['repo_id']}")
|
||||
|
||||
model_path = Path(hf_hub_download(
|
||||
**self.kwargs, filename=self.model_path))
|
||||
tags_path = Path(hf_hub_download(
|
||||
**self.kwargs, filename=self.tags_path))
|
||||
return model_path, tags_path
|
||||
|
||||
def load(self) -> None:
|
||||
model_path, tags_path = self.download()
|
||||
|
||||
from onnxruntime import InferenceSession
|
||||
|
||||
# https://onnxruntime.ai/docs/execution-providers/
|
||||
# https://github.com/toriato/stable-diffusion-webui-wd14-tagger/commit/e4ec460122cf674bbf984df30cdb10b4370c1224#r92654958
|
||||
providers = ['CUDAExecutionProvider']
|
||||
|
||||
self.model = InferenceSession(str(model_path), providers=providers)
|
||||
|
||||
print(f'Loaded {self.name} model from {model_path}')
|
||||
|
||||
self.tags = pd.read_csv(tags_path)
|
||||
|
||||
def interrogate(
|
||||
self,
|
||||
image: Image.Image
|
||||
) -> Tuple[
|
||||
Dict[str, float], # rating confidents
|
||||
Dict[str, float] # tag confidents
|
||||
]:
|
||||
# init model
|
||||
if not hasattr(self, 'model') or self.model is None:
|
||||
self.load()
|
||||
|
||||
# code for converting the image and running the model is taken from the link below
|
||||
# thanks, SmilingWolf!
|
||||
# https://huggingface.co/spaces/SmilingWolf/wd-v1-4-tags/blob/main/app.py
|
||||
|
||||
# convert an image to fit the model
|
||||
_, height, _, _ = self.model.get_inputs()[0].shape
|
||||
|
||||
# alpha to white
|
||||
image = image.convert('RGBA')
|
||||
new_image = Image.new('RGBA', image.size, 'WHITE')
|
||||
new_image.paste(image, mask=image)
|
||||
image = new_image.convert('RGB')
|
||||
image = np.asarray(image) # type: ignore
|
||||
|
||||
# PIL RGB to OpenCV BGR
|
||||
image = image[:, :, ::-1] # type: ignore
|
||||
|
||||
image = dbimutils.make_square(image, height)
|
||||
image = dbimutils.smart_resize(image, height) # type: ignore
|
||||
image = image.astype(np.float32) # type: ignore
|
||||
image = np.expand_dims(image, 0) # type: ignore
|
||||
|
||||
# evaluate model
|
||||
input_name = self.model.get_inputs()[0].name
|
||||
label_name = self.model.get_outputs()[0].name
|
||||
confidents = self.model.run([label_name], {input_name: image})[0]
|
||||
|
||||
tags = self.tags[:][['name']]
|
||||
tags['confidents'] = confidents[0]
|
||||
|
||||
# first 4 items are for rating (general, sensitive, questionable, explicit)
|
||||
ratings = dict(tags[:4].values)
|
||||
|
||||
# rest are regular tags
|
||||
tags = dict(tags[4:].values)
|
||||
|
||||
return ratings, tags
|
|
@ -0,0 +1,9 @@
|
|||
huggingface-hub==0.14.1
|
||||
numpy==1.24.3
|
||||
onnxruntime-gpu==1.15.0
|
||||
opencv-contrib-python==4.7.0.72
|
||||
packaging==23.1
|
||||
pandas==2.0.1
|
||||
Pillow==9.5.0
|
||||
protobuf==4.23.2
|
||||
tqdm==4.65.0
|
Loading…
Reference in New Issue