File size: 1,986 Bytes
5d5c713
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d130b8e
5d5c713
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
from collections import Counter

def calculate_chrf(
        hypothesis: str,
        reference: str,
        char_order: int = 6,
        beta: float = 2.0,
) -> float:
    """
    Compute the character n-gram F-score between a hypothesis and a reference string.
    :param hypothesis:
        A string representing the hypothesis text.
    :param reference:
        A string representing the reference text.
    :param char_order:
        The maximum n-gram order to consider. Default is 6.
        This means that unigrams, bigrams, trigrams, fourgrams, fivegrams and sixgrams will be considered.
    :param beta:
        The weight of recall in the F-score. Default is 2.0.
    """
    
    def get_ngrams(text, n):
        """Extract character n-grams from a string."""
        return Counter([text[i:i+n] for i in range(len(text) - n + 1)])

    # Initialize precision and recall sums
    precision_sum = 0.0
    recall_sum = 0.0

    # Loop over all n-gram orders
    for n in range(1, char_order + 1):
        hyp_ngrams = get_ngrams(hypothesis, n)
        ref_ngrams = get_ngrams(reference, n)

        # Calculate the intersection of n-grams
        intersection = sum((hyp_ngrams & ref_ngrams).values())

        # Calculate precision and recall for this n-gram order
        precision = intersection / sum(hyp_ngrams.values()) if hyp_ngrams else 0.0
        recall = intersection / sum(ref_ngrams.values()) if ref_ngrams else 0.0

        # Accumulate precision and recall
        precision_sum += precision
        recall_sum += recall

    # Average precision and recall across all n-gram orders
    precision_avg = precision_sum / char_order
    recall_avg = recall_sum / char_order

    # Calculate the harmonic mean (ChrF score)
    beta_squared = beta ** 2
    if precision_avg + recall_avg == 0:
        return 0.0
    chrf = (1 + beta_squared) * (precision_avg * recall_avg) / (beta_squared * precision_avg + recall_avg)
    return chrf * 100  # Scale to percentage