morgankavanagh commited on
Commit
1fb8162
·
1 Parent(s): db8ecbb
.gitignore CHANGED
@@ -1,3 +1,3 @@
1
  myvenv/
2
  tests.py/
3
- test_hf_aip.py
 
1
  myvenv/
2
  tests.py/
3
+ text_hf_aip.py
evaluator/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (181 Bytes). View file
 
evaluator/__pycache__/chrf.cpython-312.pyc ADDED
Binary file (2.25 kB). View file
 
evaluator/__pycache__/comet.cpython-312.pyc ADDED
Binary file (1.78 kB). View file
 
evaluator/comet_hf.py CHANGED
@@ -2,7 +2,8 @@ import os
2
  import requests
3
 
4
  # Set the Hugging Face Inference API URL and token
5
- HF_API_URL = "https://api-inference.huggingface.co/models/Unbabel/wmt20-comet-da"
 
6
  HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Ensure this is set in your environment
7
 
8
  def calculate_comet(source_sentences, translations, references):
 
2
  import requests
3
 
4
  # Set the Hugging Face Inference API URL and token
5
+ HF_API_URL = "https://huggingface.co/Unbabel/wmt22-comet-da"
6
+
7
  HF_API_TOKEN = os.getenv("HF_API_TOKEN") # Ensure this is set in your environment
8
 
9
  def calculate_comet(source_sentences, translations, references):
interface.py CHANGED
@@ -3,7 +3,7 @@ import requests
3
  import json
4
  import os
5
  from evaluator.chrf import calculate_chrf
6
- from evaluator.comet import calculate_comet # Import the COMET function
7
  from pathlib import Path
8
 
9
  # OpenAI API URL and key
 
3
  import json
4
  import os
5
  from evaluator.chrf import calculate_chrf
6
+ from evaluator.comet_hf import calculate_comet # Import the COMET function
7
  from pathlib import Path
8
 
9
  # OpenAI API URL and key
interface_local.py ADDED
@@ -0,0 +1,139 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import json
4
+ import os
5
+ from evaluator.chrf import calculate_chrf
6
+ from evaluator.comet import calculate_comet # Import the COMET function
7
+ from pathlib import Path
8
+
9
+ # OpenAI API URL and key
10
+ OPENAI_API_URL = "https://api.openai.com/v1/chat/completions"
11
+ # Check for required environment variables
12
+ OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
13
+ if not OPENAI_API_KEY:
14
+ raise ValueError("OPENAI_API_KEY not found. Please set this environment variable.")
15
+
16
+
17
+ CHATGPT_MODELS = {
18
+ "GPT-4": "gpt-4"
19
+ }
20
+
21
+ def improve_translations(system_prompt, temperature, top_p):
22
+ # Load data
23
+ data_dir = Path(__file__).parent / "evaluator" / "mt_data"
24
+ source_sentences = (data_dir / "source_sentences.txt").read_text(encoding="utf-8").splitlines()
25
+ beam_search_translations = (data_dir / "beam_search_translations.txt").read_text(encoding="utf-8").splitlines()
26
+ reference_translations = (data_dir / "reference_translations.txt").read_text(encoding="utf-8").splitlines()
27
+
28
+ improved_translations = []
29
+ sentence_pairs = [] # To store source, draft 1, draft 2, and reference
30
+
31
+ for source, target, reference in zip(source_sentences, beam_search_translations, reference_translations):
32
+ # Construct the prompt
33
+ user_prompt = f"""
34
+ As an expert translation post editor, your task is to improve the English translation (Target) for the below German text (Source)
35
+ Source: {source}
36
+ Target: {target}
37
+ Your output should be your improved version of the target text only. Do not add any comments or explanations before or after the improved version of the target text.
38
+ """
39
+
40
+ # Prepare API payload
41
+ payload = {
42
+ "model": CHATGPT_MODELS["GPT-4"],
43
+ "messages": [
44
+ {"role": "system", "content": system_prompt},
45
+ {"role": "user", "content": user_prompt}
46
+ ],
47
+ "temperature": temperature,
48
+ "top_p": top_p,
49
+ "max_tokens": 512
50
+ }
51
+
52
+ headers = {
53
+ "Authorization": f"Bearer {OPENAI_API_KEY}",
54
+ "Content-Type": "application/json"
55
+ }
56
+
57
+ # Call OpenAI API
58
+ response = requests.post(OPENAI_API_URL, headers=headers, json=payload)
59
+ response.raise_for_status()
60
+ data = response.json()
61
+
62
+ # Extract improved translation
63
+ output = data["choices"][0]["message"]["content"]
64
+ improved_translation = output.split("Improved Translation:")[-1].strip()
65
+ improved_translations.append(improved_translation)
66
+
67
+ # Add sentence pair to the list
68
+ sentence_pairs.append([source, target, improved_translation, reference])
69
+
70
+ # Calculate ChrF scores
71
+ beam_chrf_scores = [
72
+ calculate_chrf(beam_translation, reference)
73
+ for beam_translation, reference in zip(beam_search_translations, reference_translations)
74
+ ]
75
+ improved_chrf_scores = [
76
+ calculate_chrf(improved_translation, reference)
77
+ for improved_translation, reference in zip(improved_translations, reference_translations)
78
+ ]
79
+
80
+ # Calculate COMET scores
81
+ beam_comet_scores = calculate_comet(source_sentences, beam_search_translations, reference_translations)
82
+ improved_comet_scores = calculate_comet(source_sentences, improved_translations, reference_translations)
83
+
84
+ # Calculate average scores
85
+ average_beam_chrf = sum(beam_chrf_scores) / len(beam_chrf_scores)
86
+ average_improved_chrf = sum(improved_chrf_scores) / len(improved_chrf_scores)
87
+ average_beam_comet = sum(beam_comet_scores) / len(beam_comet_scores)
88
+ average_improved_comet = sum(improved_comet_scores) / len(improved_comet_scores)
89
+
90
+ # Calculate score changes
91
+ chrf_change = average_improved_chrf - average_beam_chrf
92
+ comet_change = average_improved_comet - average_beam_comet
93
+
94
+ # Prepare dataframes
95
+ sentence_pairs_df = sentence_pairs # Dataframe for sentence pairs
96
+ scores_df = [
97
+ ["ChrF", round(average_beam_chrf, 2), round(average_improved_chrf, 2), round(chrf_change, 2)],
98
+ ["COMET", round(average_beam_comet, 2), round(average_improved_comet, 2), round(comet_change, 2)]
99
+ ]
100
+
101
+ # Return dataframes and evaluation message
102
+ evaluation_message = f"ChrF Change: {(average_improved_chrf/chrf_change):.2f}%, COMET Change: {(average_improved_comet/comet_change):.2f}%"
103
+ return sentence_pairs_df, scores_df, evaluation_message
104
+
105
+ # Gradio interface
106
+ app = gr.Interface(
107
+ fn=improve_translations,
108
+ inputs=[
109
+ gr.Textbox(label="System Prompt", placeholder="Define the assistant's behavior here..."),
110
+ gr.Slider(value=1, minimum=0, maximum=1.9, step=0.1, label="Temperature"),
111
+ gr.Slider(value=1, minimum=0, maximum=1, step=0.01, label="Top P")
112
+ ],
113
+ outputs=[
114
+ gr.Dataframe(headers=["Source text", "Draft 1", "Draft 2", "Reference"], label="Sentence Pairs"),
115
+ gr.Dataframe(headers=["Metric", "Draft 1", "Draft 2", "Change"], label="Scores"),
116
+ gr.Textbox(label="Evaluation Results")
117
+ ],
118
+ title="Translation Post-Editing and Evaluation",
119
+ description="Improve translations using GPT-4 and evaluate the results with ChrF and COMET."
120
+ )
121
+
122
+
123
+
124
+ if __name__ == "__main__":
125
+ # Try different ports in case 7860 is occupied
126
+ for port in range(7860, 7870):
127
+ try:
128
+ app.launch(
129
+ server_name="127.0.0.1", # localhost
130
+ server_port=port,
131
+ share=False, # Don't create public URL
132
+ debug=True # Show detailed errors
133
+ )
134
+ break
135
+ except OSError:
136
+ if port == 7869: # Last attempt
137
+ print("Could not find an available port between 7860-7869")
138
+ raise
139
+ continue