File size: 5,445 Bytes
fbbdeab
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
"""
Validation module - Handles all edge cases and input validation
"""

import os
from PIL import Image
from .config import MAX_FILE_SIZE_MB, MIN_IMAGE_SIZE_PX, VALID_EXTENSIONS

class FishImageValidator:
    """Comprehensive image validation with edge case handling"""
    
    def __init__(self, max_size_mb=MAX_FILE_SIZE_MB, 
                 min_size_px=MIN_IMAGE_SIZE_PX, 
                 valid_extensions=VALID_EXTENSIONS):
        self.max_size_mb = max_size_mb
        self.min_size_px = min_size_px
        self.valid_extensions = valid_extensions
    
    def validate_file(self, file_path):
        """
        Validate file exists, type, size, and image integrity
        
        Returns:
            tuple: (is_valid: bool, message: str, image: PIL.Image or None)
        """
        # Check 1: File exists
        if not os.path.exists(file_path):
            return False, "❌ File not found", None
        
        # Check 2: File extension
        if not any(file_path.lower().endswith(ext.lower()) for ext in self.valid_extensions):
            return False, f"❌ Invalid file type. Accepted: {', '.join(self.valid_extensions)}", None
        
        # Check 3: File size
        try:
            file_size_mb = os.path.getsize(file_path) / (1024 * 1024)
            if file_size_mb > self.max_size_mb:
                return False, f"❌ File too large ({file_size_mb:.1f}MB). Max: {self.max_size_mb}MB", None
        except Exception as e:
            return False, f"❌ Cannot read file: {e}", None
        
        # Check 4: Valid image file
        try:
            img = Image.open(file_path)
            img.verify()  # Check for corruption
            img = Image.open(file_path)  # Re-open after verify (verify closes it)
            img = img.convert('RGB')  # Ensure RGB format
            
            # Check 5: Image dimensions
            if img.width < self.min_size_px or img.height < self.min_size_px:
                return False, f"❌ Image too small ({img.width}x{img.height}px). Min: {self.min_size_px}x{self.min_size_px}px", None
            
            return True, "βœ… File validation passed", img
            
        except Exception as e:
            return False, f"❌ Invalid or corrupted image: {str(e)}", None
    
    def validate_with_gemini(self, image, gemini_model):
        """
        AI-based validation with Gemini Vision
        Handles edge cases: multiple fish, dead fish, toys, drawings, partial fish
        Accepts dataset images with transparent or solid backgrounds
        
        Returns:
            tuple: (is_valid: bool, message: str)
        """
        if gemini_model is None:
            return True, "⚠️ Gemini validation disabled"
        
        try:
            prompt = """Analyze this image for fish disease diagnosis.

Answer these questions:

1. Is there a FISH visible in this image? (Can be a real photo, medical/diagnostic image, or isolated fish specimen on any background including transparent/solid backgrounds)
2. How many fish are in the image?
3. Is the fish body clearly visible (not just head or tail)?
4. Can you see enough fish detail for disease assessment?

Respond in this EXACT format:
CONTAINS_FISH: YES or NO
FISH_COUNT: [number]
BODY_VISIBLE: YES or NO
SUFFICIENT_DETAIL: YES or NO
REASON: [brief explanation if any answer is NO]

IMPORTANT NOTES:
- Isolated fish on transparent, white, or solid backgrounds ARE ACCEPTABLE (common in medical datasets)
- Focus on whether the FISH ITSELF is clear and detailed, not the background
- Reject only if it's clearly NOT a fish (toy, cartoon, drawing of non-fish subject)"""

            response = gemini_model.generate_content([prompt, image])
            answer = response.text.strip().upper()
            
            # Parse and validate responses
            
            # Check 1: Contains fish?
            if "CONTAINS_FISH: NO" in answer:
                reason = self._extract_reason(answer)
                return False, f"❌ No fish detected. {reason}"
            
            # Check 2: Fish count zero
            if "FISH_COUNT: 0" in answer or "FISH_COUNT: NONE" in answer:
                return False, "❌ No fish found in image"
            
            # Check 3: Multiple fish
            for i in range(2, 20):
                if f"FISH_COUNT: {i}" in answer:
                    return False, "❌ Multiple fish detected. Upload single fish only"
            
            # Check 4: Body visible
            if "BODY_VISIBLE: NO" in answer:
                return False, "❌ Fish body not clearly visible"
            
            # Check 5: Sufficient detail
            if "SUFFICIENT_DETAIL: NO" in answer:
                reason = self._extract_reason(answer)
                return False, f"❌ Insufficient detail for diagnosis. {reason}"
            
            return True, "βœ… Valid fish image detected"
            
        except Exception as e:
            # Graceful degradation - log but don't block
            print(f"⚠️ Gemini validation error: {e}")
            return True, "⚠️ AI validation skipped (error occurred)"
    
    @staticmethod
    def _extract_reason(response_text):
        """Extract reason from Gemini response"""
        if "REASON:" in response_text:
            reason = response_text.split("REASON:")[-1].strip()
            return reason[:150]  # Limit length
        return "See validation details"