galihboy commited on
Commit
944da81
·
verified ·
1 Parent(s): 401a72d

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -729
app.py DELETED
@@ -1,729 +0,0 @@
1
- import gradio as gr
2
- from sentence_transformers import SentenceTransformer
3
- import json
4
- import numpy as np
5
- import os
6
- import httpx
7
- import hashlib
8
- from dotenv import load_dotenv
9
-
10
- # Load environment variables from .env file
11
- load_dotenv()
12
-
13
- # Google GenAI SDK (new library)
14
- from google import genai
15
- from google.genai import types
16
-
17
- # ==================== CONFIGURATION ====================
18
-
19
- # Model - akan auto-download dari HF Hub saat pertama kali
20
- HF_MODEL_NAME = "sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"
21
-
22
- # Path lokal untuk development (opsional, diabaikan jika tidak ada)
23
- LOCAL_MODEL_PATH = r"E:\huggingface_models\hub\models--sentence-transformers--paraphrase-multilingual-MiniLM-L12-v2\snapshots"
24
-
25
- # Supabase configuration (dari environment variables untuk keamanan)
26
- # Di HF Space: Settings > Repository secrets
27
- # Di lokal: set environment variable atau gunakan default untuk testing
28
- SUPABASE_URL = os.environ.get("SUPABASE_URL", "")
29
- SUPABASE_KEY = os.environ.get("SUPABASE_KEY", "")
30
-
31
- # Gemini API configuration with key rotation
32
- GEMINI_MODEL = os.environ.get("GEMINI_MODEL", "gemini-2.5-pro") # atau gemini-2.5-flash, gemini-2.5-flash-lite
33
-
34
- # Load multiple API keys for rotation
35
- GEMINI_API_KEYS = []
36
- for i in range(1, 10): # Support up to 9 keys
37
- key = os.environ.get(f"GEMINI_API_KEY_{i}", "")
38
- if key:
39
- GEMINI_API_KEYS.append(key)
40
-
41
- # Fallback to single key if no numbered keys found
42
- if not GEMINI_API_KEYS:
43
- single_key = os.environ.get("GEMINI_API_KEY", "")
44
- if single_key:
45
- GEMINI_API_KEYS.append(single_key)
46
-
47
- # Track current key index for rotation
48
- current_key_index = 0
49
-
50
- def get_gemini_client():
51
- """Get Gemini client with current API key"""
52
- global current_key_index
53
- if not GEMINI_API_KEYS:
54
- return None
55
- return genai.Client(api_key=GEMINI_API_KEYS[current_key_index])
56
-
57
- def rotate_api_key():
58
- """Rotate to next API key"""
59
- global current_key_index
60
- if len(GEMINI_API_KEYS) > 1:
61
- current_key_index = (current_key_index + 1) % len(GEMINI_API_KEYS)
62
- print(f"🔄 Rotated to API key #{current_key_index + 1}")
63
- return current_key_index
64
-
65
- def call_gemini_with_retry(prompt: str, max_retries: int = None):
66
- """Call Gemini API with automatic key rotation on rate limit"""
67
- global current_key_index
68
-
69
- if not GEMINI_API_KEYS:
70
- return None, "No API keys configured"
71
-
72
- if max_retries is None:
73
- max_retries = len(GEMINI_API_KEYS)
74
-
75
- last_error = None
76
-
77
- for attempt in range(max_retries):
78
- try:
79
- client = get_gemini_client()
80
- response = client.models.generate_content(
81
- model=GEMINI_MODEL,
82
- contents=prompt
83
- )
84
- return response, None
85
-
86
- except Exception as e:
87
- error_str = str(e).lower()
88
- last_error = str(e)
89
-
90
- # Check if rate limit error
91
- if "429" in error_str or "rate" in error_str or "quota" in error_str or "resource" in error_str:
92
- print(f"⚠️ Rate limit hit on key #{current_key_index + 1}: {e}")
93
- rotate_api_key()
94
- continue
95
- else:
96
- # Non-rate-limit error, don't retry
97
- return None, str(e)
98
-
99
- return None, f"All API keys exhausted. Last error: {last_error}"
100
-
101
- # Initialize and print status
102
- if GEMINI_API_KEYS:
103
- print(f"✅ Gemini configured with {len(GEMINI_API_KEYS)} API key(s)")
104
- print(f" Model: {GEMINI_MODEL}")
105
- else:
106
- print("⚠️ No Gemini API keys found")
107
-
108
- def get_model_path():
109
- """Deteksi environment dan return path model yang sesuai"""
110
- # Cek apakah folder lokal ada
111
- if os.path.exists(LOCAL_MODEL_PATH):
112
- # Cari snapshot terbaru
113
- snapshots = os.listdir(LOCAL_MODEL_PATH)
114
- if snapshots:
115
- return os.path.join(LOCAL_MODEL_PATH, snapshots[0])
116
- # Fallback ke HF Hub (untuk deployment di Space)
117
- return HF_MODEL_NAME
118
-
119
- # Load model saat startup
120
- print("Loading model...")
121
- model_path = get_model_path()
122
- print(f"Using model from: {model_path}")
123
- model = SentenceTransformer(model_path)
124
- print("Model loaded successfully!")
125
-
126
-
127
- def get_embedding(text: str):
128
- """Generate embedding untuk single text"""
129
- if not text or not text.strip():
130
- return {"error": "Text tidak boleh kosong"}
131
-
132
- try:
133
- embedding = model.encode(text.strip())
134
- return {"embedding": embedding.tolist()}
135
- except Exception as e:
136
- return {"error": str(e)}
137
-
138
-
139
- def get_embeddings_batch(texts_json: str):
140
- """Generate embeddings untuk multiple texts (JSON array)"""
141
- try:
142
- texts = json.loads(texts_json)
143
- if not isinstance(texts, list):
144
- return {"error": "Input harus JSON array"}
145
-
146
- if len(texts) == 0:
147
- return {"error": "Array tidak boleh kosong"}
148
-
149
- # Filter empty strings
150
- texts = [t.strip() for t in texts if t and t.strip()]
151
-
152
- if len(texts) == 0:
153
- return {"error": "Semua text kosong"}
154
-
155
- embeddings = model.encode(texts)
156
- return {"embeddings": embeddings.tolist()}
157
- except json.JSONDecodeError:
158
- return {"error": "Invalid JSON format. Gunakan format: [\"teks 1\", \"teks 2\"]"}
159
- except Exception as e:
160
- return {"error": str(e)}
161
-
162
-
163
- def calculate_similarity(text1: str, text2: str):
164
- """Hitung cosine similarity antara dua teks"""
165
- if not text1 or not text1.strip():
166
- return {"error": "Text 1 tidak boleh kosong"}
167
- if not text2 or not text2.strip():
168
- return {"error": "Text 2 tidak boleh kosong"}
169
-
170
- try:
171
- embeddings = model.encode([text1.strip(), text2.strip()])
172
-
173
- # Cosine similarity
174
- similarity = np.dot(embeddings[0], embeddings[1]) / (
175
- np.linalg.norm(embeddings[0]) * np.linalg.norm(embeddings[1])
176
- )
177
-
178
- return {
179
- "similarity": float(similarity),
180
- "percentage": f"{similarity * 100:.2f}%"
181
- }
182
- except Exception as e:
183
- return {"error": str(e)}
184
-
185
-
186
- # ==================== SUPABASE PROXY FUNCTIONS ====================
187
-
188
- def get_supabase_headers():
189
- """Get headers untuk Supabase API calls"""
190
- return {
191
- "apikey": SUPABASE_KEY,
192
- "Authorization": f"Bearer {SUPABASE_KEY}",
193
- "Content-Type": "application/json",
194
- "Prefer": "return=representation"
195
- }
196
-
197
-
198
- def db_get_all_embeddings():
199
- """Ambil semua embeddings dari Supabase"""
200
- if not SUPABASE_URL or not SUPABASE_KEY:
201
- return {"error": "Supabase not configured"}
202
-
203
- try:
204
- url = f"{SUPABASE_URL}/rest/v1/proposal_embeddings?select=nim,content_hash,embedding_combined,embedding_judul,embedding_deskripsi,embedding_problem,embedding_metode,nama,judul"
205
-
206
- with httpx.Client(timeout=30.0) as client:
207
- response = client.get(url, headers=get_supabase_headers())
208
-
209
- if response.status_code == 200:
210
- return {"data": response.json(), "count": len(response.json())}
211
- else:
212
- return {"error": f"Supabase error: {response.status_code}", "detail": response.text}
213
- except Exception as e:
214
- return {"error": str(e)}
215
-
216
-
217
- def db_get_embedding(nim: str, content_hash: str):
218
- """Ambil embedding untuk NIM dan content_hash tertentu"""
219
- if not SUPABASE_URL or not SUPABASE_KEY:
220
- return {"error": "Supabase not configured"}
221
-
222
- try:
223
- url = f"{SUPABASE_URL}/rest/v1/proposal_embeddings?nim=eq.{nim}&content_hash=eq.{content_hash}&select=*"
224
-
225
- with httpx.Client(timeout=30.0) as client:
226
- response = client.get(url, headers=get_supabase_headers())
227
-
228
- if response.status_code == 200:
229
- data = response.json()
230
- return {"data": data[0] if data else None, "found": len(data) > 0}
231
- else:
232
- return {"error": f"Supabase error: {response.status_code}"}
233
- except Exception as e:
234
- return {"error": str(e)}
235
-
236
-
237
- def db_save_embedding(data_json: str):
238
- """Simpan embedding ke Supabase (upsert)"""
239
- if not SUPABASE_URL or not SUPABASE_KEY:
240
- return {"error": "Supabase not configured"}
241
-
242
- try:
243
- data = json.loads(data_json)
244
-
245
- # Validate required fields
246
- if not data.get("nim") or not data.get("content_hash"):
247
- return {"error": "nim and content_hash are required"}
248
-
249
- if not data.get("embedding_combined"):
250
- return {"error": "embedding_combined is required"}
251
-
252
- url = f"{SUPABASE_URL}/rest/v1/proposal_embeddings"
253
- headers = get_supabase_headers()
254
- headers["Prefer"] = "resolution=merge-duplicates,return=representation"
255
-
256
- payload = {
257
- "nim": data["nim"],
258
- "content_hash": data["content_hash"],
259
- "embedding_combined": data["embedding_combined"],
260
- "embedding_judul": data.get("embedding_judul"),
261
- "embedding_deskripsi": data.get("embedding_deskripsi"),
262
- "embedding_problem": data.get("embedding_problem"),
263
- "embedding_metode": data.get("embedding_metode"),
264
- "nama": data.get("nama"),
265
- "judul": data.get("judul")
266
- }
267
-
268
- with httpx.Client(timeout=30.0) as client:
269
- response = client.post(url, headers=headers, json=payload)
270
-
271
- if response.status_code in [200, 201]:
272
- return {"success": True, "data": response.json()}
273
- else:
274
- return {"error": f"Supabase error: {response.status_code}", "detail": response.text}
275
- except json.JSONDecodeError:
276
- return {"error": "Invalid JSON format"}
277
- except Exception as e:
278
- return {"error": str(e)}
279
-
280
-
281
- def db_check_connection():
282
- """Test koneksi ke Supabase"""
283
- if not SUPABASE_URL or not SUPABASE_KEY:
284
- return {"connected": False, "error": "Supabase URL or KEY not configured"}
285
-
286
- try:
287
- url = f"{SUPABASE_URL}/rest/v1/proposal_embeddings?select=id&limit=1"
288
-
289
- with httpx.Client(timeout=10.0) as client:
290
- response = client.get(url, headers=get_supabase_headers())
291
-
292
- return {
293
- "connected": response.status_code == 200,
294
- "status_code": response.status_code,
295
- "supabase_url": SUPABASE_URL[:30] + "..." if len(SUPABASE_URL) > 30 else SUPABASE_URL
296
- }
297
- except Exception as e:
298
- return {"connected": False, "error": str(e)}
299
-
300
-
301
- # ==================== LLM CACHE FUNCTIONS (SUPABASE) ====================
302
-
303
- def db_get_llm_analysis(pair_hash: str):
304
- """Ambil cached LLM analysis dari Supabase by pair_hash"""
305
- if not SUPABASE_URL or not SUPABASE_KEY:
306
- return None
307
-
308
- try:
309
- url = f"{SUPABASE_URL}/rest/v1/llm_analysis?pair_hash=eq.{pair_hash}&select=*"
310
-
311
- with httpx.Client(timeout=10.0) as client:
312
- response = client.get(url, headers=get_supabase_headers())
313
-
314
- if response.status_code == 200:
315
- data = response.json()
316
- if data and len(data) > 0:
317
- result = data[0]
318
- # Parse similar_aspects from JSONB
319
- if isinstance(result.get('similar_aspects'), str):
320
- result['similar_aspects'] = json.loads(result['similar_aspects'])
321
- result['from_cache'] = True
322
- return result
323
- return None
324
- except Exception as e:
325
- print(f"Error getting cached LLM analysis: {e}")
326
- return None
327
-
328
-
329
- def db_save_llm_analysis(pair_hash: str, proposal1_judul: str, proposal2_judul: str, result: dict):
330
- """Simpan LLM analysis result ke Supabase"""
331
- if not SUPABASE_URL or not SUPABASE_KEY:
332
- return False
333
-
334
- try:
335
- url = f"{SUPABASE_URL}/rest/v1/llm_analysis"
336
- headers = get_supabase_headers()
337
- headers["Prefer"] = "resolution=merge-duplicates" # Upsert
338
-
339
- payload = {
340
- "pair_hash": pair_hash,
341
- "proposal1_judul": proposal1_judul[:500] if proposal1_judul else "",
342
- "proposal2_judul": proposal2_judul[:500] if proposal2_judul else "",
343
- "similarity_score": result.get("similarity_score"),
344
- "verdict": result.get("verdict"),
345
- "reasoning": result.get("reasoning"),
346
- "saran": result.get("saran"),
347
- "similar_aspects": json.dumps(result.get("similar_aspects", {})),
348
- "differentiator": result.get("differentiator"),
349
- "model_used": result.get("model_used", GEMINI_MODEL)
350
- }
351
-
352
- with httpx.Client(timeout=10.0) as client:
353
- response = client.post(url, headers=headers, json=payload)
354
-
355
- if response.status_code in [200, 201]:
356
- print(f"✅ LLM result cached: {pair_hash[:8]}...")
357
- return True
358
- else:
359
- print(f"⚠️ Failed to cache LLM result: {response.status_code}")
360
- return False
361
- except Exception as e:
362
- print(f"Error saving LLM analysis: {e}")
363
- return False
364
-
365
-
366
- # ==================== LLM FUNCTIONS (GEMINI) ====================
367
-
368
- def generate_pair_hash(proposal1: dict, proposal2: dict) -> str:
369
- """Generate unique hash untuk pasangan proposal"""
370
- def proposal_hash(p):
371
- content = f"{p.get('nim', '')}|{p.get('judul', '')}|{p.get('deskripsi', '')}|{p.get('problem', '')}|{p.get('metode', '')}"
372
- return hashlib.md5(content.encode()).hexdigest()[:16]
373
-
374
- h1 = proposal_hash(proposal1)
375
- h2 = proposal_hash(proposal2)
376
- # Sort untuk konsistensi (A,B = B,A)
377
- sorted_hashes = sorted([h1, h2])
378
- return hashlib.md5(f"{sorted_hashes[0]}|{sorted_hashes[1]}".encode()).hexdigest()[:32]
379
-
380
-
381
- def llm_analyze_pair(proposal1_json: str, proposal2_json: str, use_cache: bool = True):
382
- """Analisis kemiripan dua proposal menggunakan Gemini LLM"""
383
- if not GEMINI_API_KEYS:
384
- return {"error": "Gemini API key not configured. Set GEMINI_API_KEY_1, GEMINI_API_KEY_2, etc in .env file"}
385
-
386
- try:
387
- proposal1 = json.loads(proposal1_json)
388
- proposal2 = json.loads(proposal2_json)
389
- except json.JSONDecodeError:
390
- return {"error": "Invalid JSON format for proposals"}
391
-
392
- # Generate pair hash untuk caching
393
- pair_hash = generate_pair_hash(proposal1, proposal2)
394
-
395
- # Check cache first
396
- if use_cache:
397
- cached_result = db_get_llm_analysis(pair_hash)
398
- if cached_result:
399
- print(f"📦 Using cached LLM result: {pair_hash[:8]}...")
400
- return cached_result
401
-
402
- # Build prompt
403
- prompt = f"""Anda adalah penilai kemiripan proposal skripsi yang ahli dan berpengalaman. Analisis dua proposal berikut dengan KRITERIA AKADEMIK yang benar.
404
-
405
- ATURAN PENILAIAN PENTING:
406
- 1. Proposal skripsi dianggap BERMASALAH hanya jika KETIGA aspek ini SAMA: Topik/Domain + Dataset/Objek Penelitian + Metode/Algoritma
407
- 2. Jika METODE BERBEDA (walaupun topik & dataset sama) → AMAN, karena memberikan kontribusi ilmiah berbeda
408
- 3. Jika DATASET/OBJEK BERBEDA (walaupun topik & metode sama) → AMAN, karena studi kasus berbeda
409
- 4. Jika TOPIK/DOMAIN BERBEDA → AMAN
410
- 5. Penelitian replikasi dengan variasi adalah HAL YANG WAJAR dalam dunia akademik
411
-
412
- PROPOSAL 1:
413
- - NIM: {proposal1.get('nim', 'N/A')}
414
- - Nama: {proposal1.get('nama', 'N/A')}
415
- - Judul: {proposal1.get('judul', 'N/A')}
416
- - Deskripsi: {proposal1.get('deskripsi', 'N/A')[:500] if proposal1.get('deskripsi') else 'N/A'}
417
- - Problem Statement: {proposal1.get('problem', 'N/A')[:500] if proposal1.get('problem') else 'N/A'}
418
- - Metode: {proposal1.get('metode', 'N/A')}
419
-
420
- PROPOSAL 2:
421
- - NIM: {proposal2.get('nim', 'N/A')}
422
- - Nama: {proposal2.get('nama', 'N/A')}
423
- - Judul: {proposal2.get('judul', 'N/A')}
424
- - Deskripsi: {proposal2.get('deskripsi', 'N/A')[:500] if proposal2.get('deskripsi') else 'N/A'}
425
- - Problem Statement: {proposal2.get('problem', 'N/A')[:500] if proposal2.get('problem') else 'N/A'}
426
- - Metode: {proposal2.get('metode', 'N/A')}
427
-
428
- ANALISIS dengan cermat, lalu berikan output JSON (HANYA JSON, tanpa markdown):
429
- {{
430
- "similarity_score": <0-100, tinggi HANYA jika topik+dataset+metode SEMUA sama>,
431
- "verdict": "<BERMASALAH jika score>=80, PERLU_REVIEW jika 50-79, AMAN jika <50>",
432
- "similar_aspects": {{
433
- "topik": <true/false - apakah tema/domain penelitian sama>,
434
- "dataset": <true/false - apakah objek/data penelitian sama>,
435
- "metode": <true/false - apakah algoritma/metode sama>,
436
- "pendekatan": <true/false - apakah framework/pendekatan sama>
437
- }},
438
- "differentiator": "<aspek pembeda utama: metode/dataset/domain/tidak_ada>",
439
- "reasoning": "<analisis mendalam 4-5 kalimat: jelaskan persamaan dan perbedaan dari aspek topik, dataset, dan metode. Jelaskan mengapa proposal ini aman/bermasalah berdasarkan kriteria akademik>",
440
- "saran": "<nasihat konstruktif 2-3 kalimat untuk mahasiswa: jika aman, beri saran penguatan diferensiasi. Jika bermasalah, beri warning dan alternatif arah penelitian>"
441
- }}"""
442
-
443
- # Call Gemini API with retry/rotation
444
- response, error = call_gemini_with_retry(prompt)
445
-
446
- if error:
447
- return {"error": f"Gemini API error: {error}"}
448
-
449
- try:
450
- # Parse response
451
- response_text = response.text.strip()
452
-
453
- # Clean response (remove markdown code blocks if present)
454
- if response_text.startswith("```"):
455
- lines = response_text.split("\n")
456
- response_text = "\n".join(lines[1:-1]) # Remove first and last lines
457
-
458
- result = json.loads(response_text)
459
- result["pair_hash"] = pair_hash
460
- result["model_used"] = GEMINI_MODEL
461
- result["api_key_used"] = current_key_index + 1
462
- result["from_cache"] = False
463
-
464
- # Save to cache
465
- db_save_llm_analysis(
466
- pair_hash=pair_hash,
467
- proposal1_judul=proposal1.get('judul', ''),
468
- proposal2_judul=proposal2.get('judul', ''),
469
- result=result
470
- )
471
-
472
- return result
473
-
474
- except json.JSONDecodeError as e:
475
- return {
476
- "error": "Failed to parse LLM response as JSON",
477
- "raw_response": response_text if 'response_text' in dir() else "No response",
478
- "parse_error": str(e)
479
- }
480
-
481
-
482
- def llm_check_status():
483
- """Check Gemini API status"""
484
- if not GEMINI_API_KEYS:
485
- return {
486
- "configured": False,
487
- "error": "No GEMINI_API_KEY found in environment"
488
- }
489
-
490
- response, error = call_gemini_with_retry("Respond with only: OK")
491
-
492
- if error:
493
- return {
494
- "configured": True,
495
- "total_keys": len(GEMINI_API_KEYS),
496
- "model": GEMINI_MODEL,
497
- "status": "error",
498
- "error": error
499
- }
500
-
501
- return {
502
- "configured": True,
503
- "total_keys": len(GEMINI_API_KEYS),
504
- "current_key": current_key_index + 1,
505
- "model": GEMINI_MODEL,
506
- "status": "connected",
507
- "test_response": response.text.strip()[:50]
508
- }
509
-
510
-
511
- def llm_analyze_simple(judul1: str, judul2: str, metode1: str, metode2: str):
512
- """Simplified analysis - hanya judul dan metode (untuk testing cepat)"""
513
- if not GEMINI_API_KEYS:
514
- return {"error": "Gemini API key not configured"}
515
-
516
- prompt = f"""Anda adalah penilai kemiripan proposal skripsi yang ahli. Bandingkan dua proposal berikut dengan KRITERIA AKADEMIK yang benar.
517
-
518
- ATURAN PENILAIAN PENTING:
519
- 1. Proposal skripsi dianggap BERMASALAH hanya jika KETIGA aspek ini SAMA: Topik/Domain + Dataset + Metode
520
- 2. Jika METODE BERBEDA (walaupun topik sama) → AMAN, karena kontribusi berbeda
521
- 3. Jika DATASET BERBEDA (walaupun topik & metode sama) → AMAN, karena studi kasus berbeda
522
- 4. Jika TOPIK/DOMAIN BERBEDA → AMAN
523
-
524
- Proposal 1:
525
- - Judul: {judul1}
526
- - Metode: {metode1}
527
-
528
- Proposal 2:
529
- - Judul: {judul2}
530
- - Metode: {metode2}
531
-
532
- ANALISIS dengan cermat, lalu berikan output JSON (HANYA JSON, tanpa markdown):
533
- {{
534
- "similarity_score": <0-100, tinggi HANYA jika topik+dataset+metode SEMUA sama>,
535
- "verdict": "<BERMASALAH jika score>=80, PERLU_REVIEW jika 50-79, AMAN jika <50>",
536
- "topik_sama": <true/false>,
537
- "metode_sama": <true/false>,
538
- "differentiator": "<aspek pembeda utama: metode/dataset/domain/tidak_ada>",
539
- "reasoning": "<analisis mendalam 3-4 kalimat: jelaskan persamaan, perbedaan, dan mengapa aman/bermasalah>",
540
- "saran": "<nasihat konstruktif untuk mahasiswa, misal: cara memperkuat diferensiasi, atau warning jika terlalu mirip>"
541
- }}"""
542
-
543
- response, error = call_gemini_with_retry(prompt)
544
-
545
- if error:
546
- return {"error": error}
547
-
548
- try:
549
- response_text = response.text.strip()
550
-
551
- if response_text.startswith("```"):
552
- lines = response_text.split("\n")
553
- response_text = "\n".join(lines[1:-1])
554
-
555
- result = json.loads(response_text)
556
- result["model_used"] = GEMINI_MODEL
557
- result["api_key_used"] = current_key_index + 1
558
- return result
559
-
560
- except json.JSONDecodeError as e:
561
- return {"error": f"Failed to parse response: {e}", "raw": response_text}
562
-
563
-
564
- # Gradio Interface
565
- with gr.Blocks(title="Semantic Embedding API") as demo:
566
- gr.Markdown("# 🔤 Semantic Embedding API")
567
- gr.Markdown("API untuk menghasilkan text embedding menggunakan `paraphrase-multilingual-MiniLM-L12-v2`")
568
- gr.Markdown("**Model**: Multilingual, mendukung 50+ bahasa termasuk Bahasa Indonesia")
569
-
570
- with gr.Tab("🔢 Single Embedding"):
571
- gr.Markdown("Generate embedding vector untuk satu teks")
572
- text_input = gr.Textbox(
573
- label="Input Text",
574
- placeholder="Masukkan teks untuk di-embed...",
575
- lines=2
576
- )
577
- single_output = gr.JSON(label="Embedding Result")
578
- single_btn = gr.Button("Generate Embedding", variant="primary")
579
- single_btn.click(fn=get_embedding, inputs=text_input, outputs=single_output)
580
-
581
- with gr.Tab("📦 Batch Embedding"):
582
- gr.Markdown("Generate embeddings untuk multiple teks sekaligus")
583
- batch_input = gr.Textbox(
584
- label="JSON Array of Texts",
585
- placeholder='["teks pertama", "teks kedua", "teks ketiga"]',
586
- lines=4
587
- )
588
- batch_output = gr.JSON(label="Embeddings Result")
589
- batch_btn = gr.Button("Generate Embeddings", variant="primary")
590
- batch_btn.click(fn=get_embeddings_batch, inputs=batch_input, outputs=batch_output)
591
-
592
- with gr.Tab("📊 Similarity Check"):
593
- gr.Markdown("Hitung kemiripan semantik antara dua teks")
594
- with gr.Row():
595
- sim_text1 = gr.Textbox(label="Text 1", placeholder="Teks pertama...", lines=2)
596
- sim_text2 = gr.Textbox(label="Text 2", placeholder="Teks kedua...", lines=2)
597
- sim_output = gr.JSON(label="Similarity Result")
598
- sim_btn = gr.Button("Calculate Similarity", variant="primary")
599
- sim_btn.click(fn=calculate_similarity, inputs=[sim_text1, sim_text2], outputs=sim_output)
600
-
601
- with gr.Tab("💾 Database (Supabase)"):
602
- gr.Markdown("### Supabase Cache Operations")
603
- gr.Markdown("Proxy untuk akses Supabase (API key aman di server)")
604
- gr.Markdown("*Note: Operasi write (save) hanya tersedia melalui API untuk keamanan.*")
605
-
606
- with gr.Row():
607
- db_check_btn = gr.Button("🔌 Check Connection", variant="secondary")
608
- db_check_output = gr.JSON(label="Connection Status")
609
- db_check_btn.click(fn=db_check_connection, outputs=db_check_output)
610
-
611
- gr.Markdown("---")
612
-
613
- gr.Markdown("#### Get All Cached Embeddings")
614
- db_all_btn = gr.Button("📥 Get All Embeddings", variant="primary")
615
- db_all_output = gr.JSON(label="All Embeddings")
616
- db_all_btn.click(fn=db_get_all_embeddings, outputs=db_all_output)
617
-
618
- gr.Markdown("---")
619
-
620
- gr.Markdown("#### Get Single Embedding by NIM")
621
- with gr.Row():
622
- db_nim_input = gr.Textbox(label="NIM", placeholder="10121xxx")
623
- db_hash_input = gr.Textbox(label="Content Hash", placeholder="abc123...")
624
- db_get_btn = gr.Button("🔍 Get Embedding", variant="primary")
625
- db_get_output = gr.JSON(label="Embedding Result")
626
- db_get_btn.click(fn=db_get_embedding, inputs=[db_nim_input, db_hash_input], outputs=db_get_output)
627
-
628
- with gr.Tab("🤖 LLM Analysis (Gemini)"):
629
- gr.Markdown("### Analisis Kemiripan dengan LLM")
630
- gr.Markdown("Menggunakan Google Gemini untuk analisis mendalam dengan penjelasan")
631
-
632
- with gr.Row():
633
- llm_check_btn = gr.Button("🔌 Check Gemini Status", variant="secondary")
634
- llm_check_output = gr.JSON(label="Gemini Status")
635
- llm_check_btn.click(fn=llm_check_status, outputs=llm_check_output)
636
-
637
- gr.Markdown("---")
638
-
639
- gr.Markdown("#### Quick Analysis (Judul + Metode saja)")
640
- with gr.Row():
641
- with gr.Column():
642
- llm_judul1 = gr.Textbox(label="Judul Proposal 1", placeholder="Analisis Sentimen dengan SVM...", lines=2)
643
- llm_metode1 = gr.Textbox(label="Metode 1", placeholder="Support Vector Machine")
644
- with gr.Column():
645
- llm_judul2 = gr.Textbox(label="Judul Proposal 2", placeholder="Klasifikasi Sentimen dengan SVM...", lines=2)
646
- llm_metode2 = gr.Textbox(label="Metode 2", placeholder="Support Vector Machine")
647
-
648
- llm_simple_btn = gr.Button("🚀 Analyze (Quick)", variant="primary")
649
- llm_simple_output = gr.JSON(label="Quick Analysis Result")
650
- llm_simple_btn.click(
651
- fn=llm_analyze_simple,
652
- inputs=[llm_judul1, llm_judul2, llm_metode1, llm_metode2],
653
- outputs=llm_simple_output
654
- )
655
-
656
- gr.Markdown("---")
657
-
658
- gr.Markdown("#### Full Analysis (Complete Proposal Data)")
659
- gr.Markdown("*Hasil di-cache ke Supabase. Request yang sama akan menggunakan cache.*")
660
- with gr.Row():
661
- llm_proposal1 = gr.Textbox(
662
- label="Proposal 1 (JSON)",
663
- placeholder='{"nim": "123", "nama": "Ahmad", "judul": "...", "deskripsi": "...", "problem": "...", "metode": "..."}',
664
- lines=5
665
- )
666
- llm_proposal2 = gr.Textbox(
667
- label="Proposal 2 (JSON)",
668
- placeholder='{"nim": "456", "nama": "Budi", "judul": "...", "deskripsi": "...", "problem": "...", "metode": "..."}',
669
- lines=5
670
- )
671
-
672
- with gr.Row():
673
- llm_use_cache = gr.Checkbox(label="Gunakan Cache", value=True, info="Uncheck untuk force refresh dari Gemini")
674
- llm_full_btn = gr.Button("🔍 Analyze (Full)", variant="primary")
675
-
676
- llm_full_output = gr.JSON(label="Full Analysis Result")
677
- llm_full_btn.click(
678
- fn=llm_analyze_pair,
679
- inputs=[llm_proposal1, llm_proposal2, llm_use_cache],
680
- outputs=llm_full_output
681
- )
682
-
683
- gr.Markdown("""
684
- **Output mencakup:**
685
- - `similarity_score`: Skor 0-100 (tinggi hanya jika topik+dataset+metode sama)
686
- - `verdict`: BERMASALAH / PERLU_REVIEW / AMAN
687
- - `reasoning`: Analisis mendalam dari AI
688
- - `similar_aspects`: Aspek yang mirip (topik/dataset/metode/pendekatan)
689
- - `differentiator`: Pembeda utama
690
- - `saran`: Nasihat untuk mahasiswa
691
- - `from_cache`: true jika hasil dari cache
692
- """)
693
-
694
- with gr.Accordion("📡 API Usage (untuk Developer)", open=False):
695
- gr.Markdown("""
696
- ### Endpoints
697
-
698
- #### Embedding
699
- - `get_embedding` - Single text embedding
700
- - `get_embeddings_batch` - Batch text embeddings
701
- - `calculate_similarity` - Compare two texts
702
-
703
- #### Database (Supabase Proxy)
704
- - `db_check_connection` - Test Supabase connection
705
- - `db_get_all_embeddings` - Get all cached embeddings
706
- - `db_get_embedding` - Get embedding by NIM + hash
707
- - `db_save_embedding` - Save embedding to cache
708
-
709
- ### Example API Call
710
- ```javascript
711
- // Get all cached embeddings
712
- const response = await fetch("YOUR_SPACE_URL/gradio_api/call/db_get_all_embeddings", {
713
- method: "POST",
714
- headers: { "Content-Type": "application/json" },
715
- body: JSON.stringify({ data: [] })
716
- });
717
- const result = await response.json();
718
- const eventId = result.event_id;
719
-
720
- // Get result
721
- const dataResponse = await fetch(`YOUR_SPACE_URL/gradio_api/call/db_get_all_embeddings/${eventId}`);
722
- ```
723
- """)
724
-
725
- gr.Markdown("---")
726
- gr.Markdown("*Dibuat untuk Monitoring Proposal Skripsi KK E - UNIKOM*")
727
-
728
- # Launch dengan API enabled
729
- demo.launch()