|
|
|
|
|
|
|
|
import os |
|
|
import time |
|
|
import traceback |
|
|
import requests |
|
|
import pandas as pd |
|
|
import gradio as gr |
|
|
|
|
|
|
|
|
API_URL = os.getenv("API_URL", "https://agents-course-unit4-scoring.hf.space") |
|
|
SPACE_ID = os.getenv("SPACE_ID", "framsouza/Final_Assignment_Template") |
|
|
MODEL_ID = os.getenv("MODEL_ID", "meta-llama/Llama-2-7b-instruct") |
|
|
HF_TOKEN = os.getenv("HUGGINGFACEHUB_API_TOKEN") |
|
|
|
|
|
if not HF_TOKEN or not SPACE_ID: |
|
|
raise RuntimeError( |
|
|
"β Please set both SPACE_ID and HUGGINGFACEHUB_API_TOKEN in your Space Secrets." |
|
|
) |
|
|
|
|
|
HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} |
|
|
|
|
|
WELCOME = """ |
|
|
## GAIA Benchmark Runner π |
|
|
|
|
|
Build your agent, score **β₯30%** to earn your Certificate, |
|
|
and see where you land on the Student Leaderboard! |
|
|
""" |
|
|
|
|
|
|
|
|
class GAIAAgent: |
|
|
def __init__(self, model_id: str): |
|
|
print(f"[DEBUG] Initializing GAIAAgent with model={model_id}") |
|
|
self.model_id = model_id |
|
|
self.headers = HEADERS |
|
|
|
|
|
def answer(self, prompt: str) -> str: |
|
|
payload = { |
|
|
"inputs": prompt, |
|
|
"parameters": { |
|
|
"max_new_tokens": 512, |
|
|
"temperature": 0.2 |
|
|
} |
|
|
} |
|
|
url = f"https://api-inference.huggingface.co/models/{self.model_id}" |
|
|
resp = requests.post(url, headers=self.headers, json=payload, timeout=60) |
|
|
resp.raise_for_status() |
|
|
data = resp.json() |
|
|
if isinstance(data, list) and data and "generated_text" in data[0]: |
|
|
return data[0]["generated_text"].strip() |
|
|
return str(data) |
|
|
|
|
|
|
|
|
def run_and_submit_all(): |
|
|
try: |
|
|
|
|
|
who = requests.get("https://huggingface.co/api/whoami-v2", headers=HEADERS, timeout=10) |
|
|
who.raise_for_status() |
|
|
username = who.json().get("user", {}).get("username") |
|
|
if not username: |
|
|
return "β Could not fetch your HF username. Check your token.", pd.DataFrame() |
|
|
|
|
|
|
|
|
q_resp = requests.get(f"{API_URL}/questions", timeout=15) |
|
|
q_resp.raise_for_status() |
|
|
questions = q_resp.json() or [] |
|
|
if not questions: |
|
|
return "β No questions returned; check your API_URL.", pd.DataFrame() |
|
|
|
|
|
|
|
|
agent = GAIAAgent(MODEL_ID) |
|
|
results = [] |
|
|
payload = [] |
|
|
for task in questions: |
|
|
tid = task["task_id"] |
|
|
q = task.get("question", "") |
|
|
try: |
|
|
ans = agent.answer(q) |
|
|
except Exception as e: |
|
|
ans = f"ERROR: {e}" |
|
|
results.append({"Task ID": tid, "Question": q, "Answer": ans}) |
|
|
payload.append({"task_id": tid, "submitted_answer": ans}) |
|
|
time.sleep(0.5) |
|
|
|
|
|
|
|
|
submission = { |
|
|
"username": username, |
|
|
"agent_code": f"https://huggingface.co/spaces/{SPACE_ID}/tree/main", |
|
|
"answers": payload |
|
|
} |
|
|
s_resp = requests.post(f"{API_URL}/submit", json=submission, timeout=60) |
|
|
s_resp.raise_for_status() |
|
|
data = s_resp.json() |
|
|
|
|
|
|
|
|
status = ( |
|
|
f"β
**Submission Successful!**\n\n" |
|
|
f"**User:** {data.get('username')}\n" |
|
|
f"**Score:** {data.get('score')}% " |
|
|
f"({data.get('correct_count')}/{data.get('total_attempted')} correct)\n" |
|
|
f"**Message:** {data.get('message')}" |
|
|
) |
|
|
return status, pd.DataFrame(results) |
|
|
|
|
|
except Exception as e: |
|
|
tb = traceback.format_exc() |
|
|
print("[ERROR] Unhandled exception:\n", tb) |
|
|
return f"β Unexpected error:\n{e}\n\nSee logs for details.", pd.DataFrame() |
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown(WELCOME) |
|
|
run_btn = gr.Button("βΆοΈ Run GAIA Benchmark") |
|
|
status = gr.Markdown() |
|
|
table_df = gr.Dataframe(headers=["Task ID", "Question", "Answer"], wrap=True) |
|
|
|
|
|
run_btn.click( |
|
|
fn=run_and_submit_all, |
|
|
inputs=[], |
|
|
outputs=[status, table_df] |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|