Spaces:

AvocadoMuffin
/

Image_Analyzer

Sleeping

App Files Files Community

Image_Analyzer / model_loader.py

AvocadoMuffin

Update model_loader.py

b51b14d verified 9 months ago

raw

history blame contribute delete

2.91 kB

	import google.generativeai as genai
	from PIL import Image
	import io
	import base64
	import os
	from dotenv import load_dotenv


	# Function to get image bytes for Gemini (which requires image data)
	def get_image_bytes(image):
	"""Convert PIL Image to bytes for Gemini API"""
	buffer = io.BytesIO()
	image.save(buffer, format="PNG")
	image_bytes = buffer.getvalue()
	return image_bytes

	# Load Gemini Vision Model
	def load_gemini_model():
	# Using Gemini 1.5 Flash which supports multimodal inputs (as of April 2025)
	model = genai.GenerativeModel(model_name="gemini-1.5-flash")
	return model

	# Generate basic caption with Gemini
	def generate_caption_with_gemini(image, model):
	image_bytes = get_image_bytes(image)

	# Basic captioning prompt
	prompt = "Describe what you see in this image in a single sentence."

	response = model.generate_content([prompt, {"mime_type": "image/png", "data": image_bytes}])
	caption = response.text

	return caption

	# Function to generate detailed description with Gemini
	def generate_detailed_description(image, model, prompt_type, custom_prompt=None):
	image_bytes = get_image_bytes(image)

	# Define prompts based on type
	prompts = {
	"basic": "Please provide a description of the objects and their relationships in the image. Focus on identifying the most prominent objects and their actions.",
	"chain_of_thought": "Step 1: Look at the image carefully and identify all visible objects. Step 2: Consider what actions are occurring in the scene, if any. Step 3: Conclude with a description of the environment, highlighting any emotions or atmosphere present in the image.",
	"story": "Look at the image carefully. Based on what you see, create an imaginative story that could explain the scene in the picture. Include details that make the story engaging and creative.",
	"emotional": "Analyze the image and describe the emotions conveyed by the scene. Focus on the mood, facial expressions (if any), and the overall atmosphere.",
	"object": "List and describe all objects visible in the image in detail. Mention their characteristics, colors, and any notable features.",
	"context": "Describe the context of the image. What could be the location, time of day, or situation in the scene? Try to infer as much information as possible based on the image.",
	"action": "Look at the image and describe the main action or movement happening. Who or what is performing the action, and what is the outcome?"
	}

	if prompt_type == "custom" and custom_prompt:
	final_prompt = custom_prompt
	else:
	final_prompt = prompts.get(prompt_type, prompts["basic"])

	# Generate response with the image and prompt
	response = model.generate_content([final_prompt, {"mime_type": "image/png", "data": image_bytes}])
	return response.text