from collections import Counter def calculate_chrf( hypothesis: str, reference: str, char_order: int = 6, beta: float = 2.0, ) -> float: """ Compute the character n-gram F-score between a hypothesis and a reference string. :param hypothesis: A string representing the hypothesis text. :param reference: A string representing the reference text. :param char_order: The maximum n-gram order to consider. Default is 6. This means that unigrams, bigrams, trigrams, fourgrams, fivegrams and sixgrams will be considered. :param beta: The weight of recall in the F-score. Default is 2.0. """ def get_ngrams(text, n): """Extract character n-grams from a string.""" return Counter([text[i:i+n] for i in range(len(text) - n + 1)]) # Initialize precision and recall sums precision_sum = 0.0 recall_sum = 0.0 # Loop over all n-gram orders for n in range(1, char_order + 1): hyp_ngrams = get_ngrams(hypothesis, n) ref_ngrams = get_ngrams(reference, n) # Calculate the intersection of n-grams intersection = sum((hyp_ngrams & ref_ngrams).values()) # Calculate precision and recall for this n-gram order precision = intersection / sum(hyp_ngrams.values()) if hyp_ngrams else 0.0 recall = intersection / sum(ref_ngrams.values()) if ref_ngrams else 0.0 # Accumulate precision and recall precision_sum += precision recall_sum += recall # Average precision and recall across all n-gram orders precision_avg = precision_sum / char_order recall_avg = recall_sum / char_order # Calculate the harmonic mean (ChrF score) beta_squared = beta ** 2 if precision_avg + recall_avg == 0: return 0.0 chrf = (1 + beta_squared) * (precision_avg * recall_avg) / (beta_squared * precision_avg + recall_avg) return chrf * 100 # Scale to percentage