Spaces:
Running
Running
| #!/usr/bin/python3 | |
| # -*- coding: utf-8 -*- | |
| """ | |
| https://help.aliyun.com/zh/model-studio/qwen-api-reference | |
| https://help.aliyun.com/zh/model-studio/models | |
| https://help.aliyun.com/zh/model-studio/models?spm=a2c4g.11186623.0.i4#d4ccf72f23jh9 | |
| https://help.aliyun.com/zh/model-studio/text-generation?spm=a2c4g.11186623.0.0.6b772e068nnT1J#24e54b27d4agt | |
| Deep-Thinking | |
| https://help.aliyun.com/zh/model-studio/deep-thinking?spm=a2c4g.11186623.0.0.56076f58IJd4mP | |
| """ | |
| import argparse | |
| from datetime import datetime | |
| import json | |
| import os | |
| from pathlib import Path | |
| import sys | |
| import time | |
| from zoneinfo import ZoneInfo # Python 3.9+ 自带,无需安装 | |
| pwd = os.path.abspath(os.path.dirname(__file__)) | |
| sys.path.append(os.path.join(pwd, "../")) | |
| from openai import OpenAI | |
| from project_settings import environment, project_path | |
| def get_args(): | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument( | |
| "--model_name", | |
| # default="qwen3-max-2025-09-23", | |
| # default="qwen3-max-preview", | |
| # default="qwen-plus-2025-12-01", | |
| # default="qwen-turbo-2025-07-15", | |
| default="qwen-flash-2025-07-28", | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--eval_dataset_name", | |
| default="agent-nxcloud-zh-375-choice.jsonl", | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--eval_dataset_dir", | |
| default=(project_path / "data/dataset").as_posix(), | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--eval_data_dir", | |
| default=(project_path / "data/eval_data").as_posix(), | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--client", | |
| default="shenzhen_sase", | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--service", | |
| default="aliyun_api_key_bj", | |
| # default="aliyun_api_key_sgp", | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--create_time_str", | |
| # default="null", | |
| default="20251209_140530", | |
| type=str | |
| ) | |
| parser.add_argument( | |
| "--interval", | |
| default=1, | |
| type=int | |
| ) | |
| args = parser.parse_args() | |
| return args | |
| def conversation_to_str(conversation: list): | |
| conversation_str = "" | |
| for turn in conversation: | |
| role = turn["role"] | |
| content = turn["content"] | |
| row_ = f"{role}: {content}\n" | |
| conversation_str += row_ | |
| return conversation_str | |
| system_prompt = """ | |
| 你是一位专业的电话对话分析专家,负责根据客服与客户之间的通话内容判断客户意图类别。 | |
| 请仔细分析用户提供的完整对话,并严格按照以下规则进行分类: | |
| - **A**:客户**明确同意参加试听课**(如“好啊,安排一下”)。仅询问细节、模糊回应(如“嗯嗯”“好的”)不算。 | |
| - **B**:客户**投诉、辱骂、或明确要求停止拨打此类电话**(如“别再打了!”)。仅拒绝试听(如“不用了”)不属于 B。 | |
| - **C**:客户表示**当前时刻不方便通话,例如提到“在开车”、“不方便”等**。 | |
| - **D**:对话为**语音留言/自动应答**,或包含“留言”“voicemail”“message”“已录音”等关键词,或出现**逐字念出的数字串**(如“九零九五……”)。 | |
| - **E**:客服**完成两次独立推销后**,客户**两次都表达了明确拒绝,仅一次不算做E分类**。 | |
| - **F**:客户未表达明确意愿,或以上情况均不符合(默认类别)。 | |
| **输出要求:** | |
| - 仅输出一个大写字母(A、B、C、D、E 或 F); | |
| - 不要任何解释、标点、空格、换行、JSON、引号或其他字符; | |
| - 输出必须且只能是单个字母。 | |
| """ | |
| def main(): | |
| args = get_args() | |
| eval_dataset_dir = Path(args.eval_dataset_dir) | |
| eval_dataset_dir.mkdir(parents=True, exist_ok=True) | |
| eval_data_dir = Path(args.eval_data_dir) | |
| eval_data_dir.mkdir(parents=True, exist_ok=True) | |
| if args.create_time_str == "null": | |
| tz = ZoneInfo("Asia/Shanghai") | |
| now = datetime.now(tz) | |
| create_time_str = now.strftime("%Y%m%d_%H%M%S") | |
| # create_time_str = "20250724_090615" | |
| else: | |
| create_time_str = args.create_time_str | |
| eval_dataset = eval_dataset_dir / args.eval_dataset_name | |
| model_name_ = args.model_name.replace("/", "#") | |
| output_file = eval_data_dir / f"aliyun_nxcloud_choice/aliyun/{model_name_}/{args.client}/{args.service}/{create_time_str}/{args.eval_dataset_name}" | |
| output_file.parent.mkdir(parents=True, exist_ok=True) | |
| api_key = environment.get(args.service, dtype=str) | |
| if args.service == "aliyun_api_key_bj": | |
| base_url = "https://dashscope.aliyuncs.com/compatible-mode/v1" | |
| elif args.service == "aliyun_api_key_sgp": | |
| base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1" | |
| else: | |
| raise AssertionError(f"invalid service: {args.service}") | |
| client = OpenAI( | |
| base_url=base_url, | |
| # Read your Ark API Key from the environment variable. | |
| api_key=api_key | |
| ) | |
| total = 0 | |
| total_correct = 0 | |
| # finished | |
| finished_idx_set = set() | |
| if os.path.exists(output_file.as_posix()): | |
| with open(output_file.as_posix(), "r", encoding="utf-8") as f: | |
| for row in f: | |
| row = json.loads(row) | |
| idx = row["idx"] | |
| total = row["total"] | |
| total_correct = row["total_correct"] | |
| finished_idx_set.add(idx) | |
| print(f"finished count: {len(finished_idx_set)}") | |
| with open(eval_dataset.as_posix(), "r", encoding="utf-8") as fin, open(output_file.as_posix(), "a+", encoding="utf-8") as fout: | |
| for row in fin: | |
| row = json.loads(row) | |
| idx = row["idx"] | |
| # system_prompt = row["system_prompt"] | |
| conversation = row["conversation"] | |
| examples = row["examples"] | |
| choices = row["choices"] | |
| response = row["response"] | |
| if idx in finished_idx_set: | |
| continue | |
| # conversation | |
| conversation_str = conversation_to_str(conversation) | |
| examples_str = "" | |
| for example in examples: | |
| conversation_ = example["conversation"] | |
| outputs = example["outputs"] | |
| output = outputs["output"] | |
| explanation = outputs["explanation"] | |
| examples_str += conversation_to_str(conversation_) | |
| # output_json = {"Explanation": explanation, "output": output} | |
| # output_json_str = json.dumps(output_json, ensure_ascii=False) | |
| # examples_str += f"\nOutput: {output_json_str}\n" | |
| examples_str += f"\nOutput: {output}\n\n" | |
| # print(examples_str) | |
| choices_str = "" | |
| for choice in choices: | |
| condition = choice["condition"] | |
| choice_letter = choice["choice_letter"] | |
| row_ = f"{condition}, output: {choice_letter}\n" | |
| choices_str += row_ | |
| # choices_str += "\nRemember to output ONLY the corresponding letter.\nYour output is:" | |
| # choices_str += "\nPlease use only 10-15 words to explain.\nOutput:" | |
| # prompt = f"{system_prompt}\n\n**Output**\n{choices_}\n**Examples**\n{examples_}" | |
| prompt1 = f"{system_prompt}\n\n**Examples**\n{examples_str}" | |
| prompt2 = f"**Conversation**\n{conversation_str}\n\nOutput:" | |
| # print(prompt1) | |
| # print(prompt2) | |
| messages = list() | |
| messages.append( | |
| {"role": "system", "content": prompt1}, | |
| ) | |
| messages.append( | |
| {"role": "user", "content": prompt2}, | |
| ) | |
| # print(f"messages: {json.dumps(messages, ensure_ascii=False, indent=4)}") | |
| try: | |
| time.sleep(args.interval) | |
| print(f"sleep: {args.interval}") | |
| time_begin = time.time() | |
| completion = client.chat.completions.create( | |
| model=args.model_name, | |
| messages=messages, | |
| temperature=0.01, | |
| # 由于 enable_thinking 非 OpenAI 标准参数,需要通过 extra_body 传入 | |
| extra_body={"enable_thinking": False}, | |
| stream=False, | |
| ) | |
| time_cost = time.time() - time_begin | |
| print(f"time_cost: {time_cost}") | |
| except Exception as e: | |
| print(f"request failed, error type: {type(e)}, error text: {str(e)}") | |
| continue | |
| # print(f"completion: {completion}") | |
| prediction = completion.choices[0].message.content | |
| rid = completion.id | |
| correct = 1 if prediction == response else 0 | |
| total += 1 | |
| total_correct += correct | |
| score = total_correct / total | |
| row_ = { | |
| "idx": idx, | |
| "rid": rid, | |
| "messages": messages, | |
| "response": response, | |
| "prediction": prediction, | |
| "correct": correct, | |
| "total": total, | |
| "total_correct": total_correct, | |
| "score": score, | |
| "time_cost": time_cost, | |
| } | |
| row_ = json.dumps(row_, ensure_ascii=False) | |
| fout.write(f"{row_}\n") | |
| fout.flush() | |
| return | |
| if __name__ == "__main__": | |
| main() | |