png-image-info / app.py
trojblue's picture
Update app.py
8353d61 verified
import io
import json
import struct
import zlib
from typing import List, Dict, Any, Optional, Union
import gradio as gr
from PIL import Image, PngImagePlugin
# -------- THEME (similar to your example) --------
theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="violet", radius_size="lg")
# =================================================
# ========== PNG Text Chunk Reader (tab 1) ========
# =================================================
PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n"
def _parse_png_text_chunks(data: bytes) -> List[Dict[str, Any]]:
"""
Parse PNG chunks and extract tEXt, zTXt, and iTXt entries.
"""
if not data.startswith(PNG_SIGNATURE):
raise ValueError("Not a PNG file.")
pos = len(PNG_SIGNATURE)
out = []
while pos + 8 <= len(data):
# Read chunk length and type
length = struct.unpack(">I", data[pos:pos+4])[0]
ctype = data[pos+4:pos+8]
pos += 8
if pos + length + 4 > len(data):
break
cdata = data[pos:pos+length]
pos += length
# Skip CRC (4 bytes)
pos += 4
if ctype == b"tEXt":
# Latin-1: key\0value
try:
null_idx = cdata.index(b"\x00")
key = cdata[:null_idx].decode("latin-1", "replace")
text = cdata[null_idx+1:].decode("latin-1", "replace")
out.append({"type": "tEXt", "keyword": key, "text": text})
except Exception:
pass
elif ctype == b"zTXt":
# key\0compression_method(1) + compressed data
try:
null_idx = cdata.index(b"\x00")
key = cdata[:null_idx].decode("latin-1", "replace")
method = cdata[null_idx+1:null_idx+2]
comp = cdata[null_idx+2:]
if method == b"\x00": # zlib/deflate
text = zlib.decompress(comp).decode("latin-1", "replace")
out.append({"type": "zTXt", "keyword": key, "text": text})
except Exception:
pass
elif ctype == b"iTXt":
# UTF-8: key\0flag(1)\0method(1)\0lang\0translated\0text
try:
i0 = cdata.index(b"\x00")
key = cdata[:i0].decode("latin-1", "replace")
comp_flag = cdata[i0+1:i0+2]
comp_method = cdata[i0+2:i0+3]
rest = cdata[i0+3:]
i1 = rest.index(b"\x00")
language_tag = rest[:i1].decode("ascii", "replace")
rest2 = rest[i1+1:]
i2 = rest2.index(b"\x00")
translated_keyword = rest2[:i2].decode("utf-8", "replace")
text_bytes = rest2[i2+1:]
if comp_flag == b"\x01" and comp_method == b"\x00":
text = zlib.decompress(text_bytes).decode("utf-8", "replace")
else:
text = text_bytes.decode("utf-8", "replace")
out.append({
"type": "iTXt",
"keyword": key,
"language_tag": language_tag,
"translated_keyword": translated_keyword,
"text": text,
})
except Exception:
pass
if ctype == b"IEND":
break
return out
def read_png_info(file_obj) -> Dict[str, Any]:
"""
Given an uploaded file (path or file-like), return structured PNG text info.
Also surface Pillow's .info (which often contains 'parameters').
"""
if hasattr(file_obj, "read"):
data = file_obj.read()
else:
with open(file_obj, "rb") as f:
data = f.read()
chunks = _parse_png_text_chunks(data)
try:
img = Image.open(io.BytesIO(data))
pil_info = dict(img.info)
for k, v in list(pil_info.items()):
if isinstance(v, (bytes, bytearray)):
try:
pil_info[k] = v.decode("utf-8", "replace")
except Exception:
pil_info[k] = repr(v)
elif isinstance(v, PngImagePlugin.PngInfo):
pil_info[k] = "PngInfo(...)"
except Exception as e:
pil_info = {"_error": f"Pillow failed to open PNG: {e}"}
response = {
"found_text_chunks": chunks,
"pil_info": pil_info,
"quick_fields": {
"parameters": next((c["text"] for c in chunks if c.get("keyword") == "parameters"), pil_info.get("parameters")),
"Software": next((c["text"] for c in chunks if c.get("keyword") == "Software"), pil_info.get("Software")),
},
}
return response
def infer_png_text(file):
if file is None:
return {"error": "Please upload a PNG file."}
try:
return read_png_info(file.name if hasattr(file, "name") else file)
except Exception as e:
return {"error": str(e)}
# =================================================
# ========== NovelAI LSB Reader (tab 2) ===========
# =================================================
# (User-provided logic, lightly wrapped for Gradio.)
import numpy as np
import gzip
from pathlib import Path
from io import BytesIO
def _pack_lsb_bytes(alpha: np.ndarray) -> np.ndarray:
"""
Pack the least significant bits (LSB) from an image's alpha channel into bytes.
"""
alpha = alpha.T.reshape((-1,))
alpha = alpha[:(alpha.shape[0] // 8) * 8]
alpha = np.bitwise_and(alpha, 1)
alpha = alpha.reshape((-1, 8))
alpha = np.packbits(alpha, axis=1)
return alpha
class LSBReader:
"""
Utility class for reading hidden data from an image's alpha channel using LSB encoding.
"""
def __init__(self, data: np.ndarray):
self.data = _pack_lsb_bytes(data[..., -1])
self.pos = 0
def read_bytes(self, n: int) -> bytearray:
"""Read `n` bytes from the bitstream."""
n_bytes = self.data[self.pos:self.pos + n]
self.pos += n
return bytearray(n_bytes.flatten().tolist())
def read_int32(self) -> Optional[int]:
"""Read a 4-byte big-endian integer from the bitstream."""
bytes_list = self.read_bytes(4)
return int.from_bytes(bytes_list, 'big') if len(bytes_list) == 4 else None
def _extract_nai_metadata_from_image(image: Image.Image) -> dict:
"""
Extract embedded metadata from a PNG image generated by NovelAI.
"""
image_array = np.array(image.convert("RGBA"))
if image_array.shape[-1] != 4 or len(image_array.shape) != 3:
raise ValueError("Image must be in RGBA format")
reader = LSBReader(image_array)
magic = "stealth_pngcomp"
if reader.read_bytes(len(magic)).decode("utf-8", "replace") != magic:
raise ValueError("Invalid magic number (not NovelAI stealth payload)")
bit_len = reader.read_int32()
if bit_len is None or bit_len <= 0:
raise ValueError("Invalid payload length")
json_len = bit_len // 8
compressed_json = reader.read_bytes(json_len)
json_data = json.loads(gzip.decompress(bytes(compressed_json)).decode("utf-8"))
if "Comment" in json_data and isinstance(json_data["Comment"], str):
try:
json_data["Comment"] = json.loads(json_data["Comment"])
except Exception:
# Leave as-is if not valid JSON
pass
return json_data
def extract_nai_metadata(image: Union[Image.Image, str, Path]) -> dict:
if isinstance(image, (str, Path)):
image = Image.open(image)
elif not isinstance(image, Image.Image):
raise ValueError("Input must be a file path (string/Path) or a PIL Image")
return _extract_nai_metadata_from_image(image)
def extract_nai_caption_from_hf_img(hf_img: dict) -> Optional[str]:
image_bytes = hf_img['bytes']
pil_image = Image.open(BytesIO(image_bytes))
metadata = extract_nai_metadata(pil_image)
return metadata.get('Description')
def infer_nai(image: Optional[Image.Image]):
if image is None:
return None, {"error": "Please upload a PNG with alpha channel (RGBA)."}
try:
meta = extract_nai_metadata(image)
description = meta.get("Description")
return description, meta
except Exception as e:
return None, {"error": str(e)}
# =================================================
# =========== Similarity Metrics (tab 3) ===========
# =================================================
def _load_rgb_image(path: Union[str, Path]) -> np.ndarray:
"""Load an image file as RGB uint8 numpy array."""
img = Image.open(path).convert("RGB")
return np.array(img, dtype=np.uint8)
def _pixel_metrics(img_a: np.ndarray, img_b: np.ndarray) -> Dict[str, float]:
"""Compute basic pixel-wise similarity metrics between two RGB images."""
if img_a.shape != img_b.shape:
raise ValueError(f"Image size mismatch: {img_a.shape} vs {img_b.shape}")
diff = img_a.astype(np.float32) - img_b.astype(np.float32)
abs_diff = np.abs(diff)
mse = float(np.mean(diff ** 2))
mae = float(np.mean(abs_diff))
max_abs = float(np.max(abs_diff))
pixel_match = float(np.mean(img_a == img_b))
pixel_diff_pct = float(100.0 * (1.0 - pixel_match))
if mse == 0.0:
psnr = float("inf")
else:
psnr = float(20.0 * np.log10(255.0 / np.sqrt(mse)))
return {
"pixel_diff_pct": pixel_diff_pct,
"pixel_match": pixel_match,
"mse": mse,
"mae": mae,
"max_abs": max_abs,
"psnr": psnr,
}
def compute_similarity_report(files: Optional[List[str]]) -> str:
if not files or len(files) < 2:
return "Upload at least two images to compare (first file is treated as base)."
try:
images: Dict[str, np.ndarray] = {}
base_name = None
base_img = None
for idx, file_path in enumerate(files):
name = Path(file_path).name
images[name] = _load_rgb_image(file_path)
if idx == 0:
base_name = name
base_img = images[name]
if base_name is None or base_img is None:
return "Failed to load base image."
metrics: Dict[str, Dict[str, float]] = {}
# Base vs others
for name, img in images.items():
if name == base_name:
continue
metrics[f"{base_name}_vs_{name}"] = _pixel_metrics(base_img, img)
# Pairwise among non-base images
other_keys = [k for k in images.keys() if k != base_name]
for i in range(len(other_keys)):
for j in range(i + 1, len(other_keys)):
k1, k2 = other_keys[i], other_keys[j]
metrics[f"{k1}_vs_{k2}"] = _pixel_metrics(images[k1], images[k2])
lines = [
"=== similarity metrics ===",
f"Base image: {base_name}",
]
for name, vals in metrics.items():
lines.append(
(
f"{name}: pixel_diff_pct={vals['pixel_diff_pct']:.6f}%, "
f"pixel_match={vals['pixel_match']:.6f}, mse={vals['mse']:.6e}, "
f"mae={vals['mae']:.6e}, max_abs={vals['max_abs']:.6e}, "
f"psnr={vals['psnr']:.2f}dB"
)
)
lines.append("\nMetrics (JSON):")
lines.append(json.dumps(metrics, indent=2))
return "\n".join(lines)
except Exception as exc: # pragma: no cover - handled for UI
return f"Error computing metrics: {exc}"
# =================================================
# =============== Gradio App (two tabs) ===========
# =================================================
with gr.Blocks(title="PNG Tools — ImageInfo & NovelAI Reader", theme=theme, analytics_enabled=False) as demo:
gr.Markdown("# PNG Tools\nTwo utilities: PNG text-chunk metadata and NovelAI LSB metadata.")
with gr.Tabs():
with gr.Tab("PNG ImageInfo Reader"):
with gr.Row():
inp_png = gr.File(label="PNG file", file_types=[".png"])
out_png = gr.JSON(label="pngImageInfo")
inp_png.change(fn=infer_png_text, inputs=inp_png, outputs=out_png)
gr.Markdown("Tip: Stable Diffusion ‘parameters’ often appear under a **tEXt** chunk with keyword `parameters`.")
with gr.Tab("NovelAI Reader"):
with gr.Row():
nai_img = gr.Image(label="Upload PNG (RGBA preferred)", type="pil", height=360)
with gr.Row():
nai_btn = gr.Button("Extract NovelAI Metadata", variant="primary")
with gr.Row():
nai_desc = gr.Textbox(label="Description (if present)", lines=4)
nai_json = gr.JSON(label="Decoded NovelAI JSON")
nai_btn.click(fn=infer_nai, inputs=nai_img, outputs=[nai_desc, nai_json])
with gr.Tab("Similarity Metrics"):
gr.Markdown("Upload multiple images; the first file is treated as the base for comparisons.")
files_in = gr.Files(
label="Image files",
# Explicit list ensures WebP acceptance across Gradio builds
file_types=[
".png", ".jpg", ".jpeg", ".webp", ".gif",
".bmp", ".tif", ".tiff", ".jfif"
],
type="filepath",
interactive=True,
)
with gr.Row():
metrics_btn = gr.Button("Compute Similarity", variant="primary")
metrics_out = gr.Textbox(label="Similarity report", lines=14, show_copy_button=True)
metrics_btn.click(fn=compute_similarity_report, inputs=files_in, outputs=metrics_out)
if __name__ == "__main__":
demo.launch()