Spaces:
Runtime error
Runtime error
new feature added
Browse files- app.py +4 -1
- utils/helper_functions.py +76 -0
app.py
CHANGED
|
@@ -157,7 +157,10 @@ if prompt := st.chat_input(initial_input):
|
|
| 157 |
for i in range(final_ref.shape[0]):
|
| 158 |
this_content = final_ref["answers"][i]
|
| 159 |
if len(this_content) > 3:
|
| 160 |
-
|
|
|
|
|
|
|
|
|
|
| 161 |
else:
|
| 162 |
this_score = 0
|
| 163 |
independent_ai_judge_score.append(this_score)
|
|
|
|
| 157 |
for i in range(final_ref.shape[0]):
|
| 158 |
this_content = final_ref["answers"][i]
|
| 159 |
if len(this_content) > 3:
|
| 160 |
+
arr1 = openai_text_embedding(question)
|
| 161 |
+
arr2 = openai_text_embedding(this_content)
|
| 162 |
+
# this_score = calculate_sts_openai_score(question, this_content)
|
| 163 |
+
this_score = quantized_influence(arr1, arr2)
|
| 164 |
else:
|
| 165 |
this_score = 0
|
| 166 |
independent_ai_judge_score.append(this_score)
|
utils/helper_functions.py
CHANGED
|
@@ -18,6 +18,21 @@ from scipy.spatial.distance import cosine
|
|
| 18 |
openai.api_key = os.environ["OPENAI_API_KEY"]
|
| 19 |
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
def call_chatgpt(prompt: str) -> str:
|
| 22 |
"""
|
| 23 |
Uses the OpenAI API to generate an AI response to a prompt.
|
|
@@ -69,6 +84,46 @@ def calculate_sts_openai_score(sentence1: str, sentence2: str) -> float:
|
|
| 69 |
return similarity_score
|
| 70 |
|
| 71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
def query(payload: Dict[str, Any]) -> Dict[str, Any]:
|
| 73 |
"""
|
| 74 |
Sends a JSON payload to a predefined API URL and returns the JSON response.
|
|
@@ -123,3 +178,24 @@ def llama2_7b_ysa(prompt: str) -> str:
|
|
| 123 |
response: str = output[0]["generated_text"]
|
| 124 |
|
| 125 |
return response
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
openai.api_key = os.environ["OPENAI_API_KEY"]
|
| 19 |
|
| 20 |
|
| 21 |
+
def merge_dataframes(dataframes: List[pd.DataFrame]) -> pd.DataFrame:
|
| 22 |
+
"""Merges a list of DataFrames, keeping only specific columns."""
|
| 23 |
+
# Concatenate the list of dataframes
|
| 24 |
+
combined_dataframe = pd.concat(
|
| 25 |
+
dataframes, ignore_index=True
|
| 26 |
+
) # Combine all dataframes into one
|
| 27 |
+
|
| 28 |
+
# Ensure that the resulting dataframe only contains the columns "context", "questions", "answers"
|
| 29 |
+
combined_dataframe = combined_dataframe[
|
| 30 |
+
["context", "questions", "answers"]
|
| 31 |
+
] # Filter for specific columns
|
| 32 |
+
|
| 33 |
+
return combined_dataframe # Return the merged and filtered DataFrame
|
| 34 |
+
|
| 35 |
+
|
| 36 |
def call_chatgpt(prompt: str) -> str:
|
| 37 |
"""
|
| 38 |
Uses the OpenAI API to generate an AI response to a prompt.
|
|
|
|
| 84 |
return similarity_score
|
| 85 |
|
| 86 |
|
| 87 |
+
def add_dist_score_column(
|
| 88 |
+
dataframe: pd.DataFrame,
|
| 89 |
+
sentence: str,
|
| 90 |
+
) -> pd.DataFrame:
|
| 91 |
+
dataframe["stsopenai"] = dataframe["questions"].apply(
|
| 92 |
+
lambda x: calculate_sts_openai_score(str(x), sentence)
|
| 93 |
+
)
|
| 94 |
+
|
| 95 |
+
sorted_dataframe = dataframe.sort_values(by="stsopenai", ascending=False)
|
| 96 |
+
return sorted_dataframe.iloc[:5, :]
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def convert_to_list_of_dict(df: pd.DataFrame) -> List[Dict[str, str]]:
|
| 100 |
+
"""
|
| 101 |
+
Reads in a pandas DataFrame and produces a list of dictionaries with two keys each, 'question' and 'answer.'
|
| 102 |
+
|
| 103 |
+
Args:
|
| 104 |
+
df: A pandas DataFrame with columns named 'questions' and 'answers'.
|
| 105 |
+
|
| 106 |
+
Returns:
|
| 107 |
+
A list of dictionaries, with each dictionary containing a 'question' and 'answer' key-value pair.
|
| 108 |
+
"""
|
| 109 |
+
|
| 110 |
+
# Initialize an empty list to store the dictionaries
|
| 111 |
+
result = []
|
| 112 |
+
|
| 113 |
+
# Loop through each row of the DataFrame
|
| 114 |
+
for index, row in df.iterrows():
|
| 115 |
+
# Create a dictionary with the current question and answer
|
| 116 |
+
qa_dict_quest = {"role": "user", "content": row["questions"]}
|
| 117 |
+
qa_dict_ans = {"role": "assistant", "content": row["answers"]}
|
| 118 |
+
|
| 119 |
+
# Add the dictionary to the result list
|
| 120 |
+
result.append(qa_dict_quest)
|
| 121 |
+
result.append(qa_dict_ans)
|
| 122 |
+
|
| 123 |
+
# Return the list of dictionaries
|
| 124 |
+
return result
|
| 125 |
+
|
| 126 |
+
|
| 127 |
def query(payload: Dict[str, Any]) -> Dict[str, Any]:
|
| 128 |
"""
|
| 129 |
Sends a JSON payload to a predefined API URL and returns the JSON response.
|
|
|
|
| 178 |
response: str = output[0]["generated_text"]
|
| 179 |
|
| 180 |
return response
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def quantize_to_4bit(arr: Union[np.ndarray, Any]) -> np.ndarray:
|
| 184 |
+
"""Converts an array to a 4-bit representation by normalizing and scaling its values."""
|
| 185 |
+
if not isinstance(arr, np.ndarray): # Ensure input is a numpy array
|
| 186 |
+
arr = np.array(arr)
|
| 187 |
+
arr_min = arr.min() # Find minimum value
|
| 188 |
+
arr_max = arr.max() # Find maximum value
|
| 189 |
+
normalized_arr = (arr - arr_min) / (arr_max - arr_min) # Normalize values to [0, 1]
|
| 190 |
+
return np.round(normalized_arr * 15).astype(int) # Scale to 0-15 and round
|
| 191 |
+
|
| 192 |
+
|
| 193 |
+
def quantized_influence(arr1: np.ndarray, arr2: np.ndarray) -> float:
|
| 194 |
+
"""Calculates a weighted measure of influence based on quantized version of input arrays."""
|
| 195 |
+
arr1_4bit = quantize_to_4bit(arr1) # Quantize arr1 to 4-bit
|
| 196 |
+
arr2_4bit = quantize_to_4bit(arr2) # Quantize arr2 to 4-bit
|
| 197 |
+
unique_values = np.unique(arr1_4bit) # Find unique values in arr1_4bit
|
| 198 |
+
y_bar_global = np.mean(arr2_4bit) # Compute global average of arr2_4bit
|
| 199 |
+
# Compute weighted local averages and normalize
|
| 200 |
+
weighted_local_averages = [(np.mean(arr2_4bit[arr1_4bit == val])-y_bar_global)**2 * len(arr2_4bit[arr1_4bit == val])**2 for val in unique_values]
|
| 201 |
+
return np.mean(weighted_local_averages) / np.std(arr2_4bit) # Return normalized weighted average
|