Spaces:

trojblue
/

png-image-info

Running

File size: 13,753 Bytes

import io
import json
import struct
import zlib
from typing import List, Dict, Any, Optional, Union

import gradio as gr
from PIL import Image, PngImagePlugin

# -------- THEME (similar to your example) --------
theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="violet", radius_size="lg")

# =================================================
# ========== PNG Text Chunk Reader (tab 1) ========
# =================================================

PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n"


def _parse_png_text_chunks(data: bytes) -> List[Dict[str, Any]]:
    """
    Parse PNG chunks and extract tEXt, zTXt, and iTXt entries.
    """
    if not data.startswith(PNG_SIGNATURE):
        raise ValueError("Not a PNG file.")

    pos = len(PNG_SIGNATURE)
    out = []

    while pos + 8 <= len(data):
        # Read chunk length and type
        length = struct.unpack(">I", data[pos:pos+4])[0]
        ctype = data[pos+4:pos+8]
        pos += 8

        if pos + length + 4 > len(data):
            break

        cdata = data[pos:pos+length]
        pos += length

        # Skip CRC (4 bytes)
        pos += 4

        if ctype == b"tEXt":
            # Latin-1: key\0value
            try:
                null_idx = cdata.index(b"\x00")
                key = cdata[:null_idx].decode("latin-1", "replace")
                text = cdata[null_idx+1:].decode("latin-1", "replace")
                out.append({"type": "tEXt", "keyword": key, "text": text})
            except Exception:
                pass

        elif ctype == b"zTXt":
            # key\0compression_method(1) + compressed data
            try:
                null_idx = cdata.index(b"\x00")
                key = cdata[:null_idx].decode("latin-1", "replace")
                method = cdata[null_idx+1:null_idx+2]
                comp = cdata[null_idx+2:]
                if method == b"\x00":  # zlib/deflate
                    text = zlib.decompress(comp).decode("latin-1", "replace")
                    out.append({"type": "zTXt", "keyword": key, "text": text})
            except Exception:
                pass

        elif ctype == b"iTXt":
            # UTF-8: key\0flag(1)\0method(1)\0lang\0translated\0text
            try:
                i0 = cdata.index(b"\x00")
                key = cdata[:i0].decode("latin-1", "replace")
                comp_flag = cdata[i0+1:i0+2]
                comp_method = cdata[i0+2:i0+3]
                rest = cdata[i0+3:]

                i1 = rest.index(b"\x00")
                language_tag = rest[:i1].decode("ascii", "replace")
                rest2 = rest[i1+1:]

                i2 = rest2.index(b"\x00")
                translated_keyword = rest2[:i2].decode("utf-8", "replace")
                text_bytes = rest2[i2+1:]

                if comp_flag == b"\x01" and comp_method == b"\x00":
                    text = zlib.decompress(text_bytes).decode("utf-8", "replace")
                else:
                    text = text_bytes.decode("utf-8", "replace")

                out.append({
                    "type": "iTXt",
                    "keyword": key,
                    "language_tag": language_tag,
                    "translated_keyword": translated_keyword,
                    "text": text,
                })
            except Exception:
                pass

        if ctype == b"IEND":
            break

    return out


def read_png_info(file_obj) -> Dict[str, Any]:
    """
    Given an uploaded file (path or file-like), return structured PNG text info.
    Also surface Pillow's .info (which often contains 'parameters').
    """
    if hasattr(file_obj, "read"):
        data = file_obj.read()
    else:
        with open(file_obj, "rb") as f:
            data = f.read()

    chunks = _parse_png_text_chunks(data)

    try:
        img = Image.open(io.BytesIO(data))
        pil_info = dict(img.info)
        for k, v in list(pil_info.items()):
            if isinstance(v, (bytes, bytearray)):
                try:
                    pil_info[k] = v.decode("utf-8", "replace")
                except Exception:
                    pil_info[k] = repr(v)
            elif isinstance(v, PngImagePlugin.PngInfo):
                pil_info[k] = "PngInfo(...)"
    except Exception as e:
        pil_info = {"_error": f"Pillow failed to open PNG: {e}"}

    response = {
        "found_text_chunks": chunks,
        "pil_info": pil_info,
        "quick_fields": {
            "parameters": next((c["text"] for c in chunks if c.get("keyword") == "parameters"), pil_info.get("parameters")),
            "Software": next((c["text"] for c in chunks if c.get("keyword") == "Software"), pil_info.get("Software")),
        },
    }
    return response


def infer_png_text(file):
    if file is None:
        return {"error": "Please upload a PNG file."}
    try:
        return read_png_info(file.name if hasattr(file, "name") else file)
    except Exception as e:
        return {"error": str(e)}


# =================================================
# ========== NovelAI LSB Reader (tab 2) ===========
# =================================================

# (User-provided logic, lightly wrapped for Gradio.)
import numpy as np
import gzip
from pathlib import Path
from io import BytesIO

def _pack_lsb_bytes(alpha: np.ndarray) -> np.ndarray:
    """
    Pack the least significant bits (LSB) from an image's alpha channel into bytes.
    """
    alpha = alpha.T.reshape((-1,))
    alpha = alpha[:(alpha.shape[0] // 8) * 8]
    alpha = np.bitwise_and(alpha, 1)
    alpha = alpha.reshape((-1, 8))
    alpha = np.packbits(alpha, axis=1)
    return alpha


class LSBReader:
    """
    Utility class for reading hidden data from an image's alpha channel using LSB encoding.
    """
    def __init__(self, data: np.ndarray):
        self.data = _pack_lsb_bytes(data[..., -1])
        self.pos = 0

    def read_bytes(self, n: int) -> bytearray:
        """Read `n` bytes from the bitstream."""
        n_bytes = self.data[self.pos:self.pos + n]
        self.pos += n
        return bytearray(n_bytes.flatten().tolist())

    def read_int32(self) -> Optional[int]:
        """Read a 4-byte big-endian integer from the bitstream."""
        bytes_list = self.read_bytes(4)
        return int.from_bytes(bytes_list, 'big') if len(bytes_list) == 4 else None


def _extract_nai_metadata_from_image(image: Image.Image) -> dict:
    """
    Extract embedded metadata from a PNG image generated by NovelAI.
    """
    image_array = np.array(image.convert("RGBA"))
    if image_array.shape[-1] != 4 or len(image_array.shape) != 3:
        raise ValueError("Image must be in RGBA format")

    reader = LSBReader(image_array)
    magic = "stealth_pngcomp"
    if reader.read_bytes(len(magic)).decode("utf-8", "replace") != magic:
        raise ValueError("Invalid magic number (not NovelAI stealth payload)")

    bit_len = reader.read_int32()
    if bit_len is None or bit_len <= 0:
        raise ValueError("Invalid payload length")

    json_len = bit_len // 8
    compressed_json = reader.read_bytes(json_len)
    json_data = json.loads(gzip.decompress(bytes(compressed_json)).decode("utf-8"))

    if "Comment" in json_data and isinstance(json_data["Comment"], str):
        try:
            json_data["Comment"] = json.loads(json_data["Comment"])
        except Exception:
            # Leave as-is if not valid JSON
            pass

    return json_data


def extract_nai_metadata(image: Union[Image.Image, str, Path]) -> dict:
    if isinstance(image, (str, Path)):
        image = Image.open(image)
    elif not isinstance(image, Image.Image):
        raise ValueError("Input must be a file path (string/Path) or a PIL Image")
    return _extract_nai_metadata_from_image(image)


def extract_nai_caption_from_hf_img(hf_img: dict) -> Optional[str]:
    image_bytes = hf_img['bytes']
    pil_image = Image.open(BytesIO(image_bytes))
    metadata = extract_nai_metadata(pil_image)
    return metadata.get('Description')


def infer_nai(image: Optional[Image.Image]):
    if image is None:
        return None, {"error": "Please upload a PNG with alpha channel (RGBA)."}
    try:
        meta = extract_nai_metadata(image)
        description = meta.get("Description")
        return description, meta
    except Exception as e:
        return None, {"error": str(e)}


# =================================================
# =========== Similarity Metrics (tab 3) ===========
# =================================================

def _load_rgb_image(path: Union[str, Path]) -> np.ndarray:
    """Load an image file as RGB uint8 numpy array."""
    img = Image.open(path).convert("RGB")
    return np.array(img, dtype=np.uint8)


def _pixel_metrics(img_a: np.ndarray, img_b: np.ndarray) -> Dict[str, float]:
    """Compute basic pixel-wise similarity metrics between two RGB images."""
    if img_a.shape != img_b.shape:
        raise ValueError(f"Image size mismatch: {img_a.shape} vs {img_b.shape}")

    diff = img_a.astype(np.float32) - img_b.astype(np.float32)
    abs_diff = np.abs(diff)

    mse = float(np.mean(diff ** 2))
    mae = float(np.mean(abs_diff))
    max_abs = float(np.max(abs_diff))

    pixel_match = float(np.mean(img_a == img_b))
    pixel_diff_pct = float(100.0 * (1.0 - pixel_match))

    if mse == 0.0:
        psnr = float("inf")
    else:
        psnr = float(20.0 * np.log10(255.0 / np.sqrt(mse)))

    return {
        "pixel_diff_pct": pixel_diff_pct,
        "pixel_match": pixel_match,
        "mse": mse,
        "mae": mae,
        "max_abs": max_abs,
        "psnr": psnr,
    }


def compute_similarity_report(files: Optional[List[str]]) -> str:
    if not files or len(files) < 2:
        return "Upload at least two images to compare (first file is treated as base)."

    try:
        images: Dict[str, np.ndarray] = {}
        base_name = None
        base_img = None

        for idx, file_path in enumerate(files):
            name = Path(file_path).name
            images[name] = _load_rgb_image(file_path)
            if idx == 0:
                base_name = name
                base_img = images[name]

        if base_name is None or base_img is None:
            return "Failed to load base image."

        metrics: Dict[str, Dict[str, float]] = {}

        # Base vs others
        for name, img in images.items():
            if name == base_name:
                continue
            metrics[f"{base_name}_vs_{name}"] = _pixel_metrics(base_img, img)

        # Pairwise among non-base images
        other_keys = [k for k in images.keys() if k != base_name]
        for i in range(len(other_keys)):
            for j in range(i + 1, len(other_keys)):
                k1, k2 = other_keys[i], other_keys[j]
                metrics[f"{k1}_vs_{k2}"] = _pixel_metrics(images[k1], images[k2])

        lines = [
            "=== similarity metrics ===",
            f"Base image: {base_name}",
        ]
        for name, vals in metrics.items():
            lines.append(
                (
                    f"{name}: pixel_diff_pct={vals['pixel_diff_pct']:.6f}%, "
                    f"pixel_match={vals['pixel_match']:.6f}, mse={vals['mse']:.6e}, "
                    f"mae={vals['mae']:.6e}, max_abs={vals['max_abs']:.6e}, "
                    f"psnr={vals['psnr']:.2f}dB"
                )
            )

        lines.append("\nMetrics (JSON):")
        lines.append(json.dumps(metrics, indent=2))

        return "\n".join(lines)
    except Exception as exc:  # pragma: no cover - handled for UI
        return f"Error computing metrics: {exc}"


# =================================================
# =============== Gradio App (two tabs) ===========
# =================================================

with gr.Blocks(title="PNG Tools — ImageInfo & NovelAI Reader", theme=theme, analytics_enabled=False) as demo:
    gr.Markdown("# PNG Tools\nTwo utilities: PNG text-chunk metadata and NovelAI LSB metadata.")

    with gr.Tabs():
        with gr.Tab("PNG ImageInfo Reader"):
            with gr.Row():
                inp_png = gr.File(label="PNG file", file_types=[".png"])
            out_png = gr.JSON(label="pngImageInfo")
            inp_png.change(fn=infer_png_text, inputs=inp_png, outputs=out_png)
            gr.Markdown("Tip: Stable Diffusion ‘parameters’ often appear under a **tEXt** chunk with keyword `parameters`.")

        with gr.Tab("NovelAI Reader"):
            with gr.Row():
                nai_img = gr.Image(label="Upload PNG (RGBA preferred)", type="pil", height=360)
            with gr.Row():
                nai_btn = gr.Button("Extract NovelAI Metadata", variant="primary")
            with gr.Row():
                nai_desc = gr.Textbox(label="Description (if present)", lines=4)
            nai_json = gr.JSON(label="Decoded NovelAI JSON")

            nai_btn.click(fn=infer_nai, inputs=nai_img, outputs=[nai_desc, nai_json])

        with gr.Tab("Similarity Metrics"):
            gr.Markdown("Upload multiple images; the first file is treated as the base for comparisons.")
            files_in = gr.Files(
                label="Image files",
                # Explicit list ensures WebP acceptance across Gradio builds
                file_types=[
                    ".png", ".jpg", ".jpeg", ".webp", ".gif",
                    ".bmp", ".tif", ".tiff", ".jfif"
                ],
                type="filepath",
                interactive=True,
            )
            with gr.Row():
                metrics_btn = gr.Button("Compute Similarity", variant="primary")
            metrics_out = gr.Textbox(label="Similarity report", lines=14, show_copy_button=True)
            metrics_btn.click(fn=compute_similarity_report, inputs=files_in, outputs=metrics_out)

if __name__ == "__main__":
    demo.launch()