morgankavanagh's picture
Fixes of Docker, chrf, comet_hf, interface
d130b8e
from collections import Counter
def calculate_chrf(
hypothesis: str,
reference: str,
char_order: int = 6,
beta: float = 2.0,
) -> float:
"""
Compute the character n-gram F-score between a hypothesis and a reference string.
:param hypothesis:
A string representing the hypothesis text.
:param reference:
A string representing the reference text.
:param char_order:
The maximum n-gram order to consider. Default is 6.
This means that unigrams, bigrams, trigrams, fourgrams, fivegrams and sixgrams will be considered.
:param beta:
The weight of recall in the F-score. Default is 2.0.
"""
def get_ngrams(text, n):
"""Extract character n-grams from a string."""
return Counter([text[i:i+n] for i in range(len(text) - n + 1)])
# Initialize precision and recall sums
precision_sum = 0.0
recall_sum = 0.0
# Loop over all n-gram orders
for n in range(1, char_order + 1):
hyp_ngrams = get_ngrams(hypothesis, n)
ref_ngrams = get_ngrams(reference, n)
# Calculate the intersection of n-grams
intersection = sum((hyp_ngrams & ref_ngrams).values())
# Calculate precision and recall for this n-gram order
precision = intersection / sum(hyp_ngrams.values()) if hyp_ngrams else 0.0
recall = intersection / sum(ref_ngrams.values()) if ref_ngrams else 0.0
# Accumulate precision and recall
precision_sum += precision
recall_sum += recall
# Average precision and recall across all n-gram orders
precision_avg = precision_sum / char_order
recall_avg = recall_sum / char_order
# Calculate the harmonic mean (ChrF score)
beta_squared = beta ** 2
if precision_avg + recall_avg == 0:
return 0.0
chrf = (1 + beta_squared) * (precision_avg * recall_avg) / (beta_squared * precision_avg + recall_avg)
return chrf * 100 # Scale to percentage