Spaces:
Running
Running
| import io | |
| import json | |
| import struct | |
| import zlib | |
| from typing import List, Dict, Any, Optional, Union | |
| import gradio as gr | |
| from PIL import Image, PngImagePlugin | |
| # -------- THEME (similar to your example) -------- | |
| theme = gr.themes.Soft(primary_hue="indigo", secondary_hue="violet", radius_size="lg") | |
| # ================================================= | |
| # ========== PNG Text Chunk Reader (tab 1) ======== | |
| # ================================================= | |
| PNG_SIGNATURE = b"\x89PNG\r\n\x1a\n" | |
| def _parse_png_text_chunks(data: bytes) -> List[Dict[str, Any]]: | |
| """ | |
| Parse PNG chunks and extract tEXt, zTXt, and iTXt entries. | |
| """ | |
| if not data.startswith(PNG_SIGNATURE): | |
| raise ValueError("Not a PNG file.") | |
| pos = len(PNG_SIGNATURE) | |
| out = [] | |
| while pos + 8 <= len(data): | |
| # Read chunk length and type | |
| length = struct.unpack(">I", data[pos:pos+4])[0] | |
| ctype = data[pos+4:pos+8] | |
| pos += 8 | |
| if pos + length + 4 > len(data): | |
| break | |
| cdata = data[pos:pos+length] | |
| pos += length | |
| # Skip CRC (4 bytes) | |
| pos += 4 | |
| if ctype == b"tEXt": | |
| # Latin-1: key\0value | |
| try: | |
| null_idx = cdata.index(b"\x00") | |
| key = cdata[:null_idx].decode("latin-1", "replace") | |
| text = cdata[null_idx+1:].decode("latin-1", "replace") | |
| out.append({"type": "tEXt", "keyword": key, "text": text}) | |
| except Exception: | |
| pass | |
| elif ctype == b"zTXt": | |
| # key\0compression_method(1) + compressed data | |
| try: | |
| null_idx = cdata.index(b"\x00") | |
| key = cdata[:null_idx].decode("latin-1", "replace") | |
| method = cdata[null_idx+1:null_idx+2] | |
| comp = cdata[null_idx+2:] | |
| if method == b"\x00": # zlib/deflate | |
| text = zlib.decompress(comp).decode("latin-1", "replace") | |
| out.append({"type": "zTXt", "keyword": key, "text": text}) | |
| except Exception: | |
| pass | |
| elif ctype == b"iTXt": | |
| # UTF-8: key\0flag(1)\0method(1)\0lang\0translated\0text | |
| try: | |
| i0 = cdata.index(b"\x00") | |
| key = cdata[:i0].decode("latin-1", "replace") | |
| comp_flag = cdata[i0+1:i0+2] | |
| comp_method = cdata[i0+2:i0+3] | |
| rest = cdata[i0+3:] | |
| i1 = rest.index(b"\x00") | |
| language_tag = rest[:i1].decode("ascii", "replace") | |
| rest2 = rest[i1+1:] | |
| i2 = rest2.index(b"\x00") | |
| translated_keyword = rest2[:i2].decode("utf-8", "replace") | |
| text_bytes = rest2[i2+1:] | |
| if comp_flag == b"\x01" and comp_method == b"\x00": | |
| text = zlib.decompress(text_bytes).decode("utf-8", "replace") | |
| else: | |
| text = text_bytes.decode("utf-8", "replace") | |
| out.append({ | |
| "type": "iTXt", | |
| "keyword": key, | |
| "language_tag": language_tag, | |
| "translated_keyword": translated_keyword, | |
| "text": text, | |
| }) | |
| except Exception: | |
| pass | |
| if ctype == b"IEND": | |
| break | |
| return out | |
| def read_png_info(file_obj) -> Dict[str, Any]: | |
| """ | |
| Given an uploaded file (path or file-like), return structured PNG text info. | |
| Also surface Pillow's .info (which often contains 'parameters'). | |
| """ | |
| if hasattr(file_obj, "read"): | |
| data = file_obj.read() | |
| else: | |
| with open(file_obj, "rb") as f: | |
| data = f.read() | |
| chunks = _parse_png_text_chunks(data) | |
| try: | |
| img = Image.open(io.BytesIO(data)) | |
| pil_info = dict(img.info) | |
| for k, v in list(pil_info.items()): | |
| if isinstance(v, (bytes, bytearray)): | |
| try: | |
| pil_info[k] = v.decode("utf-8", "replace") | |
| except Exception: | |
| pil_info[k] = repr(v) | |
| elif isinstance(v, PngImagePlugin.PngInfo): | |
| pil_info[k] = "PngInfo(...)" | |
| except Exception as e: | |
| pil_info = {"_error": f"Pillow failed to open PNG: {e}"} | |
| response = { | |
| "found_text_chunks": chunks, | |
| "pil_info": pil_info, | |
| "quick_fields": { | |
| "parameters": next((c["text"] for c in chunks if c.get("keyword") == "parameters"), pil_info.get("parameters")), | |
| "Software": next((c["text"] for c in chunks if c.get("keyword") == "Software"), pil_info.get("Software")), | |
| }, | |
| } | |
| return response | |
| def infer_png_text(file): | |
| if file is None: | |
| return {"error": "Please upload a PNG file."} | |
| try: | |
| return read_png_info(file.name if hasattr(file, "name") else file) | |
| except Exception as e: | |
| return {"error": str(e)} | |
| # ================================================= | |
| # ========== NovelAI LSB Reader (tab 2) =========== | |
| # ================================================= | |
| # (User-provided logic, lightly wrapped for Gradio.) | |
| import numpy as np | |
| import gzip | |
| from pathlib import Path | |
| from io import BytesIO | |
| def _pack_lsb_bytes(alpha: np.ndarray) -> np.ndarray: | |
| """ | |
| Pack the least significant bits (LSB) from an image's alpha channel into bytes. | |
| """ | |
| alpha = alpha.T.reshape((-1,)) | |
| alpha = alpha[:(alpha.shape[0] // 8) * 8] | |
| alpha = np.bitwise_and(alpha, 1) | |
| alpha = alpha.reshape((-1, 8)) | |
| alpha = np.packbits(alpha, axis=1) | |
| return alpha | |
| class LSBReader: | |
| """ | |
| Utility class for reading hidden data from an image's alpha channel using LSB encoding. | |
| """ | |
| def __init__(self, data: np.ndarray): | |
| self.data = _pack_lsb_bytes(data[..., -1]) | |
| self.pos = 0 | |
| def read_bytes(self, n: int) -> bytearray: | |
| """Read `n` bytes from the bitstream.""" | |
| n_bytes = self.data[self.pos:self.pos + n] | |
| self.pos += n | |
| return bytearray(n_bytes.flatten().tolist()) | |
| def read_int32(self) -> Optional[int]: | |
| """Read a 4-byte big-endian integer from the bitstream.""" | |
| bytes_list = self.read_bytes(4) | |
| return int.from_bytes(bytes_list, 'big') if len(bytes_list) == 4 else None | |
| def _extract_nai_metadata_from_image(image: Image.Image) -> dict: | |
| """ | |
| Extract embedded metadata from a PNG image generated by NovelAI. | |
| """ | |
| image_array = np.array(image.convert("RGBA")) | |
| if image_array.shape[-1] != 4 or len(image_array.shape) != 3: | |
| raise ValueError("Image must be in RGBA format") | |
| reader = LSBReader(image_array) | |
| magic = "stealth_pngcomp" | |
| if reader.read_bytes(len(magic)).decode("utf-8", "replace") != magic: | |
| raise ValueError("Invalid magic number (not NovelAI stealth payload)") | |
| bit_len = reader.read_int32() | |
| if bit_len is None or bit_len <= 0: | |
| raise ValueError("Invalid payload length") | |
| json_len = bit_len // 8 | |
| compressed_json = reader.read_bytes(json_len) | |
| json_data = json.loads(gzip.decompress(bytes(compressed_json)).decode("utf-8")) | |
| if "Comment" in json_data and isinstance(json_data["Comment"], str): | |
| try: | |
| json_data["Comment"] = json.loads(json_data["Comment"]) | |
| except Exception: | |
| # Leave as-is if not valid JSON | |
| pass | |
| return json_data | |
| def extract_nai_metadata(image: Union[Image.Image, str, Path]) -> dict: | |
| if isinstance(image, (str, Path)): | |
| image = Image.open(image) | |
| elif not isinstance(image, Image.Image): | |
| raise ValueError("Input must be a file path (string/Path) or a PIL Image") | |
| return _extract_nai_metadata_from_image(image) | |
| def extract_nai_caption_from_hf_img(hf_img: dict) -> Optional[str]: | |
| image_bytes = hf_img['bytes'] | |
| pil_image = Image.open(BytesIO(image_bytes)) | |
| metadata = extract_nai_metadata(pil_image) | |
| return metadata.get('Description') | |
| def infer_nai(image: Optional[Image.Image]): | |
| if image is None: | |
| return None, {"error": "Please upload a PNG with alpha channel (RGBA)."} | |
| try: | |
| meta = extract_nai_metadata(image) | |
| description = meta.get("Description") | |
| return description, meta | |
| except Exception as e: | |
| return None, {"error": str(e)} | |
| # ================================================= | |
| # =========== Similarity Metrics (tab 3) =========== | |
| # ================================================= | |
| def _load_rgb_image(path: Union[str, Path]) -> np.ndarray: | |
| """Load an image file as RGB uint8 numpy array.""" | |
| img = Image.open(path).convert("RGB") | |
| return np.array(img, dtype=np.uint8) | |
| def _pixel_metrics(img_a: np.ndarray, img_b: np.ndarray) -> Dict[str, float]: | |
| """Compute basic pixel-wise similarity metrics between two RGB images.""" | |
| if img_a.shape != img_b.shape: | |
| raise ValueError(f"Image size mismatch: {img_a.shape} vs {img_b.shape}") | |
| diff = img_a.astype(np.float32) - img_b.astype(np.float32) | |
| abs_diff = np.abs(diff) | |
| mse = float(np.mean(diff ** 2)) | |
| mae = float(np.mean(abs_diff)) | |
| max_abs = float(np.max(abs_diff)) | |
| pixel_match = float(np.mean(img_a == img_b)) | |
| pixel_diff_pct = float(100.0 * (1.0 - pixel_match)) | |
| if mse == 0.0: | |
| psnr = float("inf") | |
| else: | |
| psnr = float(20.0 * np.log10(255.0 / np.sqrt(mse))) | |
| return { | |
| "pixel_diff_pct": pixel_diff_pct, | |
| "pixel_match": pixel_match, | |
| "mse": mse, | |
| "mae": mae, | |
| "max_abs": max_abs, | |
| "psnr": psnr, | |
| } | |
| def compute_similarity_report(files: Optional[List[str]]) -> str: | |
| if not files or len(files) < 2: | |
| return "Upload at least two images to compare (first file is treated as base)." | |
| try: | |
| images: Dict[str, np.ndarray] = {} | |
| base_name = None | |
| base_img = None | |
| for idx, file_path in enumerate(files): | |
| name = Path(file_path).name | |
| images[name] = _load_rgb_image(file_path) | |
| if idx == 0: | |
| base_name = name | |
| base_img = images[name] | |
| if base_name is None or base_img is None: | |
| return "Failed to load base image." | |
| metrics: Dict[str, Dict[str, float]] = {} | |
| # Base vs others | |
| for name, img in images.items(): | |
| if name == base_name: | |
| continue | |
| metrics[f"{base_name}_vs_{name}"] = _pixel_metrics(base_img, img) | |
| # Pairwise among non-base images | |
| other_keys = [k for k in images.keys() if k != base_name] | |
| for i in range(len(other_keys)): | |
| for j in range(i + 1, len(other_keys)): | |
| k1, k2 = other_keys[i], other_keys[j] | |
| metrics[f"{k1}_vs_{k2}"] = _pixel_metrics(images[k1], images[k2]) | |
| lines = [ | |
| "=== similarity metrics ===", | |
| f"Base image: {base_name}", | |
| ] | |
| for name, vals in metrics.items(): | |
| lines.append( | |
| ( | |
| f"{name}: pixel_diff_pct={vals['pixel_diff_pct']:.6f}%, " | |
| f"pixel_match={vals['pixel_match']:.6f}, mse={vals['mse']:.6e}, " | |
| f"mae={vals['mae']:.6e}, max_abs={vals['max_abs']:.6e}, " | |
| f"psnr={vals['psnr']:.2f}dB" | |
| ) | |
| ) | |
| lines.append("\nMetrics (JSON):") | |
| lines.append(json.dumps(metrics, indent=2)) | |
| return "\n".join(lines) | |
| except Exception as exc: # pragma: no cover - handled for UI | |
| return f"Error computing metrics: {exc}" | |
| # ================================================= | |
| # =============== Gradio App (two tabs) =========== | |
| # ================================================= | |
| with gr.Blocks(title="PNG Tools — ImageInfo & NovelAI Reader", theme=theme, analytics_enabled=False) as demo: | |
| gr.Markdown("# PNG Tools\nTwo utilities: PNG text-chunk metadata and NovelAI LSB metadata.") | |
| with gr.Tabs(): | |
| with gr.Tab("PNG ImageInfo Reader"): | |
| with gr.Row(): | |
| inp_png = gr.File(label="PNG file", file_types=[".png"]) | |
| out_png = gr.JSON(label="pngImageInfo") | |
| inp_png.change(fn=infer_png_text, inputs=inp_png, outputs=out_png) | |
| gr.Markdown("Tip: Stable Diffusion ‘parameters’ often appear under a **tEXt** chunk with keyword `parameters`.") | |
| with gr.Tab("NovelAI Reader"): | |
| with gr.Row(): | |
| nai_img = gr.Image(label="Upload PNG (RGBA preferred)", type="pil", height=360) | |
| with gr.Row(): | |
| nai_btn = gr.Button("Extract NovelAI Metadata", variant="primary") | |
| with gr.Row(): | |
| nai_desc = gr.Textbox(label="Description (if present)", lines=4) | |
| nai_json = gr.JSON(label="Decoded NovelAI JSON") | |
| nai_btn.click(fn=infer_nai, inputs=nai_img, outputs=[nai_desc, nai_json]) | |
| with gr.Tab("Similarity Metrics"): | |
| gr.Markdown("Upload multiple images; the first file is treated as the base for comparisons.") | |
| files_in = gr.Files( | |
| label="Image files", | |
| # Explicit list ensures WebP acceptance across Gradio builds | |
| file_types=[ | |
| ".png", ".jpg", ".jpeg", ".webp", ".gif", | |
| ".bmp", ".tif", ".tiff", ".jfif" | |
| ], | |
| type="filepath", | |
| interactive=True, | |
| ) | |
| with gr.Row(): | |
| metrics_btn = gr.Button("Compute Similarity", variant="primary") | |
| metrics_out = gr.Textbox(label="Similarity report", lines=14, show_copy_button=True) | |
| metrics_btn.click(fn=compute_similarity_report, inputs=files_in, outputs=metrics_out) | |
| if __name__ == "__main__": | |
| demo.launch() | |