AI-Clothes-Changer

Running on Zero

App Files Files Community

frogleo commited on 1 day ago

Commit

fdf1179

verified ·

1 Parent(s): c260184

Update app.py

Browse files

Files changed (1) hide show

app.py +118 -104

app.py CHANGED Viewed

@@ -26,6 +26,9 @@ from detectron2.data.detection_utils import convert_PIL_to_numpy,_apply_exif_ori
 from torchvision.transforms.functional import to_pil_image
 from PIL import Image, ImageDraw, ImageFont
 # Enhanced logging configuration
 logging.basicConfig(
@@ -164,119 +167,130 @@ def _infer(person,garment,denoise_steps,seed):
     progress(0,desc="Starting")
     device = "cuda"
-    openpose_model.preprocessor.body_estimation.model.to(device)
-    pipe.to(device)
-    pipe.unet_encoder.to(device)
-    personRGB =  person.convert("RGB")
-    crop_size = personRGB.size
-    human_img = personRGB.resize((768,1024))
-    garm_img= garment.convert("RGB").resize((768,1024))
-    progress(0.1,desc="Mask generating")
-    keypoints = openpose_model(human_img.resize((384,512)))
-    model_parse, _ = parsing_model(human_img.resize((384,512)))
-    mask, mask_gray = get_mask_location('hd', "upper_body", model_parse, keypoints)
-    mask = mask.resize((768,1024))
-    mask_gray = (1-transforms.ToTensor()(mask)) * tensor_transfrom(human_img)
-    mask_gray = to_pil_image((mask_gray+1.0)/2.0)
-    progress(0.3,desc="DensePose processing")
-    human_img_arg = _apply_exif_orientation(human_img.resize((384,512)))
-    human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
-    args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
-    # verbosity = getattr(args, "verbosity", None)
-    pose_img = args.func(args,human_img_arg)
-    pose_img = pose_img[:,:,::-1]
-    pose_img = Image.fromarray(pose_img).resize((768,1024))
-    progress(0.5,desc="Image generating")
-    def callback(pipe, step, timestep, callback_kwargs):
-        progress_value = 0.5 + ((step+1.0)/denoise_steps)*(0.5/1.0)
-        progress(progress_value, desc=f"Image generating, {step + 1}/{denoise_steps} steps")
-        return callback_kwargs
-    with torch.no_grad():
-        # Extract the images
-        with torch.cuda.amp.autocast():
-            with torch.no_grad():
-                prompt = "model is wearing clothing"
-                negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
-                with torch.inference_mode():
-                    (
-                        prompt_embeds,
-                        negative_prompt_embeds,
-                        pooled_prompt_embeds,
-                        negative_pooled_prompt_embeds,
-                    ) = pipe.encode_prompt(
-                        prompt,
-                        num_images_per_prompt=1,
-                        do_classifier_free_guidance=True,
-                        negative_prompt=negative_prompt,
-                    )
-                prompt = "a photo of clothing"
-                negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
-                if not isinstance(prompt, List):
-                        prompt = [prompt] * 1
-                if not isinstance(negative_prompt, List):
-                    negative_prompt = [negative_prompt] * 1
-                with torch.inference_mode():
-                    (
-                        prompt_embeds_c,
-                        _,
-                        _,
-                        _,
-                    ) = pipe.encode_prompt(
-                        prompt,
-                        num_images_per_prompt=1,
-                        do_classifier_free_guidance=False,
-                        negative_prompt=negative_prompt,
-                    )
-                pose_img =  tensor_transfrom(pose_img).unsqueeze(0).to(device,torch.float16)
-                garm_tensor =  tensor_transfrom(garm_img).unsqueeze(0).to(device,torch.float16)
-                generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
-                images = pipe(
-                    prompt_embeds=prompt_embeds.to(device,torch.float16),
-                    negative_prompt_embeds=negative_prompt_embeds.to(device,torch.float16),
-                    pooled_prompt_embeds=pooled_prompt_embeds.to(device,torch.float16),
-                    negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device,torch.float16),
-                    num_inference_steps=denoise_steps,
-                    generator=generator,
-                    strength = 1.0,
-                    pose_img = pose_img.to(device,torch.float16),
-                    text_embeds_cloth=prompt_embeds_c.to(device,torch.float16),
-                    cloth = garm_tensor.to(device,torch.float16),
-                    mask_image=mask,
-                    image=human_img,
-                    height=1024,
-                    width=768,
-                    ip_adapter_image = garm_img.resize((768,1024)),
-                    guidance_scale=2.0,
-                    callback_on_step_end=callback
-                )[0]
-    out_img = images[0].resize(crop_size)
-    # NSFW 检测
-    if nsfw_model and nsfw_processor:
-        if detect_nsfw(out_img):
-            error_info = {
-                "error": "Generated image contains NSFW content and cannot be displayed. Please modify your prompt and try again.",
-                "status": "failed"
-            }
-            return None, error_info
-    info = {
-        "status": "success"
-    }
-    progress(1,desc="Complete")
-    return out_img, info
 def infer(person,garment,denoise_steps,seed):

 from torchvision.transforms.functional import to_pil_image
 from PIL import Image, ImageDraw, ImageFont
+class GenerationError(Exception):
+    """Custom exception for generation errors"""
+    pass
 # Enhanced logging configuration
 logging.basicConfig(
     progress(0,desc="Starting")
     device = "cuda"
+    try:
+        openpose_model.preprocessor.body_estimation.model.to(device)
+        pipe.to(device)
+        pipe.unet_encoder.to(device)
+        personRGB =  person.convert("RGB")
+        crop_size = personRGB.size
+        human_img = personRGB.resize((768,1024))
+        garm_img= garment.convert("RGB").resize((768,1024))
+        progress(0.1,desc="Mask generating")
+        keypoints = openpose_model(human_img.resize((384,512)))
+        model_parse, _ = parsing_model(human_img.resize((384,512)))
+        mask, mask_gray = get_mask_location('hd', "upper_body", model_parse, keypoints)
+        mask = mask.resize((768,1024))
+        mask_gray = (1-transforms.ToTensor()(mask)) * tensor_transfrom(human_img)
+        mask_gray = to_pil_image((mask_gray+1.0)/2.0)
+        progress(0.3,desc="DensePose processing")
+        human_img_arg = _apply_exif_orientation(human_img.resize((384,512)))
+        human_img_arg = convert_PIL_to_numpy(human_img_arg, format="BGR")
+        args = apply_net.create_argument_parser().parse_args(('show', './configs/densepose_rcnn_R_50_FPN_s1x.yaml', './ckpt/densepose/model_final_162be9.pkl', 'dp_segm', '-v', '--opts', 'MODEL.DEVICE', 'cuda'))
+        # verbosity = getattr(args, "verbosity", None)
+        pose_img = args.func(args,human_img_arg)
+        pose_img = pose_img[:,:,::-1]
+        pose_img = Image.fromarray(pose_img).resize((768,1024))
+        progress(0.5,desc="Image generating")
+        def callback(pipe, step, timestep, callback_kwargs):
+            progress_value = 0.5 + ((step+1.0)/denoise_steps)*(0.5/1.0)
+            progress(progress_value, desc=f"Image generating, {step + 1}/{denoise_steps} steps")
+            return callback_kwargs
+        with torch.no_grad():
+            # Extract the images
+            with torch.cuda.amp.autocast():
+                with torch.no_grad():
+                    prompt = "model is wearing clothing"
+                    negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
+                    with torch.inference_mode():
+                        (
+                            prompt_embeds,
+                            negative_prompt_embeds,
+                            pooled_prompt_embeds,
+                            negative_pooled_prompt_embeds,
+                        ) = pipe.encode_prompt(
+                            prompt,
+                            num_images_per_prompt=1,
+                            do_classifier_free_guidance=True,
+                            negative_prompt=negative_prompt,
+                        )
+                    prompt = "a photo of clothing"
+                    negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality"
+                    if not isinstance(prompt, List):
+                            prompt = [prompt] * 1
+                    if not isinstance(negative_prompt, List):
+                        negative_prompt = [negative_prompt] * 1
+                    with torch.inference_mode():
+                        (
+                            prompt_embeds_c,
+                            _,
+                            _,
+                            _,
+                        ) = pipe.encode_prompt(
+                            prompt,
+                            num_images_per_prompt=1,
+                            do_classifier_free_guidance=False,
+                            negative_prompt=negative_prompt,
+                        )
+                    pose_img =  tensor_transfrom(pose_img).unsqueeze(0).to(device,torch.float16)
+                    garm_tensor =  tensor_transfrom(garm_img).unsqueeze(0).to(device,torch.float16)
+                    generator = torch.Generator(device).manual_seed(seed) if seed is not None else None
+                    images = pipe(
+                        prompt_embeds=prompt_embeds.to(device,torch.float16),
+                        negative_prompt_embeds=negative_prompt_embeds.to(device,torch.float16),
+                        pooled_prompt_embeds=pooled_prompt_embeds.to(device,torch.float16),
+                        negative_pooled_prompt_embeds=negative_pooled_prompt_embeds.to(device,torch.float16),
+                        num_inference_steps=denoise_steps,
+                        generator=generator,
+                        strength = 1.0,
+                        pose_img = pose_img.to(device,torch.float16),
+                        text_embeds_cloth=prompt_embeds_c.to(device,torch.float16),
+                        cloth = garm_tensor.to(device,torch.float16),
+                        mask_image=mask,
+                        image=human_img,
+                        height=1024,
+                        width=768,
+                        ip_adapter_image = garm_img.resize((768,1024)),
+                        guidance_scale=2.0,
+                        callback_on_step_end=callback
+                    )[0]
+        out_img = images[0].resize(crop_size)
+        # NSFW 检测
+        if nsfw_model and nsfw_processor:
+            if detect_nsfw(out_img):
+                msg = "Generated image contains NSFW content and cannot be displayed. Please modify your prompt and try again."
+                raise Exception(msg)
+        info = {
+            "status": "success"
+        }
+        progress(1,desc="Complete")
+        return out_img, info
+    except GenerationError as e:
+        error_info = {
+            "error": str(e),
+            "status": "failed",
+        }
+        return None, error_info
+    except Exception as e:
+        error_info = {
+            "error": str(e),
+            "status": "failed",
+        }
+        return None, error_info
 def infer(person,garment,denoise_steps,seed):