Spaces:

morgankavanagh
/

post_editing_evaluator

Sleeping

morgankavanagh commited on May 9, 2025

Commit

0db2cde

1 Parent(s): dcdf99d

internal comet

Files changed (2) hide show

evaluator/comet.py CHANGED Viewed

@@ -1,28 +1,34 @@
 import os
-from comet import download_model, load_from_checkpoint
-# Set a custom cache directory for COMET
-os.environ["COMET_CACHE"] = "/tmp"
 def calculate_comet(source_sentences, translations, references):
     """
-    Calculate COMET scores for a list of translations.
     :param source_sentences: List of source sentences.
     :param translations: List of translated sentences (hypotheses).
     :param references: List of reference translations.
     :return: List of COMET scores (one score per sentence pair).
     """
-    # Download and load the COMET model
-    model_path = download_model("Unbabel/wmt22-comet-da")  # Use a supported model
-    model = load_from_checkpoint(model_path)
-    # Prepare data for COMET
     data = [
-        {"src": src, "mt": mt, "ref": ref}
         for src, mt, ref in zip(source_sentences, translations, references)
     ]
-    # Compute COMET scores
-    results = model.predict(data, batch_size=8, gpus=0)
-    scores = results["scores"]  # Extract the scores from the results
     return scores

 import os
+import requests
+# Set the Hugging Face Inference API URL and token
+HF_API_URL = "https://api-inference.huggingface.co/models/Unbabel/wmt20-comet-da"
+HF_API_TOKEN = os.getenv("HF_API_TOKEN")  # Ensure this is set in your environment
 def calculate_comet(source_sentences, translations, references):
     """
+    Calculate COMET scores using the Hugging Face Inference API.
     :param source_sentences: List of source sentences.
     :param translations: List of translated sentences (hypotheses).
     :param references: List of reference translations.
     :return: List of COMET scores (one score per sentence pair).
     """
+    headers = {
+        "Authorization": f"Bearer {HF_API_TOKEN}",
+        "Content-Type": "application/json"
+    }
+    # Prepare data for the API
     data = [
+        {"source": src, "translation": mt, "reference": ref}
         for src, mt, ref in zip(source_sentences, translations, references)
     ]
+    # Make the API call
+    response = requests.post(HF_API_URL, headers=headers, json={"inputs": data})
+    response.raise_for_status()  # Raise an error for bad responses
+    # Parse the response
+    results = response.json()
+    scores = [item["score"] for item in results]  # Extract scores from the response
     return scores

evaluator/comet_internal.py ADDED Viewed

+import os
+from comet import download_model, load_from_checkpoint
+# Set a custom cache directory for COMET
+os.environ["COMET_CACHE"] = "/tmp"
+def calculate_comet(source_sentences, translations, references):
+    """
+    Calculate COMET scores for a list of translations.
+    :param source_sentences: List of source sentences.
+    :param translations: List of translated sentences (hypotheses).
+    :param references: List of reference translations.
+    :return: List of COMET scores (one score per sentence pair).
+    """
+    # Download and load the COMET model
+    model_path = download_model("Unbabel/wmt22-comet-da")  # Use a supported model
+    model = load_from_checkpoint(model_path)
+    # Prepare data for COMET
+    data = [
+        {"src": src, "mt": mt, "ref": ref}
+        for src, mt, ref in zip(source_sentences, translations, references)
+    ]
+    # Compute COMET scores
+    results = model.predict(data, batch_size=8, gpus=0)
+    scores = results["scores"]  # Extract the scores from the results
+    return scores