胥基
commited on
Commit
·
2c1761f
1
Parent(s):
9eea524
change app
Browse files- __pycache__/content.cpython-310.pyc +0 -0
- __pycache__/scorer.cpython-310.pyc +0 -0
- app.py +8 -8
__pycache__/content.cpython-310.pyc
ADDED
|
Binary file (3.37 kB). View file
|
|
|
__pycache__/scorer.cpython-310.pyc
ADDED
|
Binary file (2.65 kB). View file
|
|
|
app.py
CHANGED
|
@@ -26,7 +26,7 @@ RESULTS_DATASET = f"{OWNER}/CTFAIA_results_public"
|
|
| 26 |
LEADERBOARD_PATH = f"{OWNER}/agent_ctf_leaderboard"
|
| 27 |
api = HfApi()
|
| 28 |
|
| 29 |
-
YEAR_VERSION = "
|
| 30 |
|
| 31 |
os.makedirs("scored", exist_ok=True)
|
| 32 |
|
|
@@ -56,8 +56,8 @@ eval_dataframe_test = get_dataframe_from_results(eval_results=eval_results, spli
|
|
| 56 |
|
| 57 |
# Gold answers
|
| 58 |
gold_results = {}
|
| 59 |
-
gold_dataset = load_dataset(INTERNAL_DATA_DATASET, f"{YEAR_VERSION}
|
| 60 |
-
gold_results = {split: {row["
|
| 61 |
|
| 62 |
|
| 63 |
def restart_space():
|
|
@@ -113,17 +113,17 @@ def add_new_eval(
|
|
| 113 |
if "model_answer" not in task:
|
| 114 |
raise format_error(f"Line {ix} contains no model_answer key. Please fix it and resubmit your file.")
|
| 115 |
answer = task["model_answer"]
|
| 116 |
-
|
| 117 |
try:
|
| 118 |
-
level = int(gold_results[val_or_test][
|
| 119 |
except KeyError:
|
| 120 |
-
return format_error(f"{
|
| 121 |
|
| 122 |
-
score = question_scorer(task['model_answer'], gold_results[val_or_test][
|
| 123 |
|
| 124 |
scored_file.write(
|
| 125 |
json.dumps({
|
| 126 |
-
"id":
|
| 127 |
"model_answer": answer,
|
| 128 |
"score": score,
|
| 129 |
"level": level
|
|
|
|
| 26 |
LEADERBOARD_PATH = f"{OWNER}/agent_ctf_leaderboard"
|
| 27 |
api = HfApi()
|
| 28 |
|
| 29 |
+
YEAR_VERSION = "default"
|
| 30 |
|
| 31 |
os.makedirs("scored", exist_ok=True)
|
| 32 |
|
|
|
|
| 56 |
|
| 57 |
# Gold answers
|
| 58 |
gold_results = {}
|
| 59 |
+
gold_dataset = load_dataset(INTERNAL_DATA_DATASET, f"{YEAR_VERSION}", token=TOKEN)
|
| 60 |
+
gold_results = {split: {row["task_name"]: row for row in gold_dataset[split]} for split in ["test", "validation"]}
|
| 61 |
|
| 62 |
|
| 63 |
def restart_space():
|
|
|
|
| 113 |
if "model_answer" not in task:
|
| 114 |
raise format_error(f"Line {ix} contains no model_answer key. Please fix it and resubmit your file.")
|
| 115 |
answer = task["model_answer"]
|
| 116 |
+
task_name = task["task_name"]
|
| 117 |
try:
|
| 118 |
+
level = int(gold_results[val_or_test][task_name]["Level"])
|
| 119 |
except KeyError:
|
| 120 |
+
return format_error(f"{task_name} not found in split {val_or_test}. Are you sure you submitted the correct file?")
|
| 121 |
|
| 122 |
+
score = question_scorer(task['model_answer'], gold_results[val_or_test][task_name]["Final answer"])
|
| 123 |
|
| 124 |
scored_file.write(
|
| 125 |
json.dumps({
|
| 126 |
+
"id": task_name,
|
| 127 |
"model_answer": answer,
|
| 128 |
"score": score,
|
| 129 |
"level": level
|