Spaces:

autogenCTF
/

agent_ctf_leaderboard

Running

胥基 commited on Mar 29, 2024

Commit

2c1761f

1 Parent(s): 9eea524

change app

Files changed (3) hide show

__pycache__/content.cpython-310.pyc ADDED Viewed

Binary file (3.37 kB). View file

__pycache__/scorer.cpython-310.pyc ADDED Viewed

Binary file (2.65 kB). View file

app.py CHANGED Viewed

@@ -26,7 +26,7 @@ RESULTS_DATASET = f"{OWNER}/CTFAIA_results_public"
 LEADERBOARD_PATH = f"{OWNER}/agent_ctf_leaderboard"
 api = HfApi()
-YEAR_VERSION = "2024"
 os.makedirs("scored", exist_ok=True)
@@ -56,8 +56,8 @@ eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, spli
 # Gold answers
 gold_results = {}
-gold_dataset = load_dataset(INTERNAL_DATA_DATASET, f"{YEAR_VERSION}_all", token=TOKEN)
-gold_results = {split: {row["task_id"]: row for row in gold_dataset[split]} for split in ["test", "validation"]}
 def restart_space():
@@ -113,17 +113,17 @@ def add_new_eval(
                 if "model_answer" not in task:
                     raise format_error(f"Line {ix} contains no model_answer key. Please fix it and resubmit your file.")
                 answer = task["model_answer"]
-                task_id = task["task_id"]
                 try:
-                    level = int(gold_results[val_or_test][task_id]["Level"])
                 except KeyError:
-                    return format_error(f"{task_id} not found in split {val_or_test}. Are you sure you submitted the correct file?")
-                score = question_scorer(task['model_answer'], gold_results[val_or_test][task_id]["Final answer"])
                 scored_file.write(
                     json.dumps({
-                        "id": task_id,
                         "model_answer": answer,
                         "score": score,
                         "level": level

 LEADERBOARD_PATH = f"{OWNER}/agent_ctf_leaderboard"
 api = HfApi()
+YEAR_VERSION = "default"
 os.makedirs("scored", exist_ok=True)
 # Gold answers
 gold_results = {}
+gold_dataset = load_dataset(INTERNAL_DATA_DATASET, f"{YEAR_VERSION}", token=TOKEN)
+gold_results = {split: {row["task_name"]: row for row in gold_dataset[split]} for split in ["test", "validation"]}
 def restart_space():
                 if "model_answer" not in task:
                     raise format_error(f"Line {ix} contains no model_answer key. Please fix it and resubmit your file.")
                 answer = task["model_answer"]
+                task_name = task["task_name"]
                 try:
+                    level = int(gold_results[val_or_test][task_name]["Level"])
                 except KeyError:
+                    return format_error(f"{task_name} not found in split {val_or_test}. Are you sure you submitted the correct file?")
+                score = question_scorer(task['model_answer'], gold_results[val_or_test][task_name]["Final answer"])
                 scored_file.write(
                     json.dumps({
+                        "id": task_name,
                         "model_answer": answer,
                         "score": score,
                         "level": level