Add sanitizer for captions in Textual inversion

This commit is contained in:
Raphael Stoeckli 2022-10-05 21:57:18 +02:00 committed by AUTOMATIC1111
parent 0e92c36707
commit 2499fb4e19

@ -1,5 +1,8 @@
from cmath import log
import os
from PIL import Image, ImageOps
import platform
import sys
import tqdm
from modules import shared, images
@ -25,6 +28,7 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca
def save_pic_with_caption(image, index):
if process_caption:
caption = "-" + shared.interrogator.generate_caption(image)
caption = sanitize_caption(os.path.join(dst, f"{index:05}-{subindex[0]}"), caption, ".png")
else:
caption = filename
caption = os.path.splitext(caption)[0]
@ -75,3 +79,27 @@ def preprocess(process_src, process_dst, process_flip, process_split, process_ca
if process_caption:
shared.interrogator.send_blip_to_ram()
def sanitize_caption(base_path, original_caption, suffix):
operating_system = platform.system().lower()
if (operating_system == "windows"):
invalid_path_characters = "\\/:*?\"<>|"
max_path_length = 259
else:
invalid_path_characters = "/" #linux/macos
max_path_length = 1023
caption = original_caption
for invalid_character in invalid_path_characters:
caption = caption.replace(invalid_character, "")
fixed_path_length = len(base_path) + len(suffix)
if fixed_path_length + len(caption) <= max_path_length:
return caption
caption_tokens = caption.split()
new_caption = ""
for token in caption_tokens:
last_caption = new_caption
new_caption = new_caption + token + " "
if (len(new_caption) + fixed_path_length - 1 > max_path_length):
break
print(f"\nPath will be too long. Truncated caption: {original_caption}\nto: {last_caption}", file=sys.stderr)
return last_caption.strip()