Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM | |
| import pandas as pd | |
| import torch | |
| import matplotlib.pyplot as plt | |
| import numpy as np | |
| import re | |
| st.set_page_config(page_title="🛡️ 智盾内容安全审核平台", layout="wide") | |
| PAGES = { | |
| "🏠 首页": "home", | |
| "📋 审核": "moderation", | |
| "📊 分析": "analysis", | |
| "🧠 产品能力": "capability", | |
| "🧾 策略配置": "strategy" | |
| } | |
| if "page" not in st.session_state: | |
| st.session_state.page = "home" | |
| if "analysis_input_text" not in st.session_state: | |
| st.session_state.analysis_input_text = "" | |
| selected_page = st.sidebar.radio("📌 导航", list(PAGES.keys())) | |
| st.session_state.page = PAGES[selected_page] | |
| def parse_scores_from_llm_output(text): | |
| matches = re.findall(r"([\u4e00-\u9fa5A-Za-z]+)[::]?\s*([0]\.\d+|1\.0+)", text) | |
| score_dict = {} | |
| for label, score in matches: | |
| try: | |
| score_dict[label.strip()] = float(score) | |
| except: | |
| continue | |
| return score_dict | |
| def plot_radar_chart(score_dict): | |
| labels = list(score_dict.keys()) | |
| scores = list(score_dict.values()) | |
| angles = np.linspace(0, 2 * np.pi, len(labels), endpoint=False).tolist() | |
| scores += scores[:1] | |
| angles += angles[:1] | |
| fig, ax = plt.subplots(figsize=(5, 5), subplot_kw=dict(polar=True)) | |
| ax.plot(angles, scores, "o-", linewidth=2) | |
| ax.fill(angles, scores, alpha=0.25) | |
| ax.set_thetagrids(np.degrees(angles[:-1]), labels) | |
| ax.set_ylim(0, 1) | |
| st.pyplot(fig) | |
| def plot_bar_chart(score_dict): | |
| labels = list(score_dict.keys()) | |
| scores = list(score_dict.values()) | |
| fig, ax = plt.subplots() | |
| ax.barh(labels, scores) | |
| ax.set_xlim(0, 1) | |
| ax.set_xlabel("分数 (0-1)") | |
| st.pyplot(fig) | |
| if st.session_state.page == "home": | |
| st.title("🛡️ 智盾内容安全审核平台") | |
| st.markdown("欢迎使用智盾平台。本系统支持多维度内容审核与分析,适用于金融、政府、互联网等高风险行业。") | |
| elif st.session_state.page == "moderation": | |
| st.title("📋 文本内容审核") | |
| model_choice = st.selectbox("选择模型", ["BERT", "DeepSeek", "GPT"]) | |
| model_map = { | |
| "BERT": "uer/roberta-base-finetuned-jd-binary-chinese", | |
| "DeepSeek": "deepseek-ai/deepseek-llm-7b-chat", | |
| "GPT": "IDEA-CCNL/Taiyi-CLUE-small" | |
| } | |
| model_name = model_map[model_choice] | |
| input_text = st.text_area("请输入待审核文本:", height=150) | |
| if st.button("🚨 开始审核"): | |
| with st.spinner("模型分析中..."): | |
| if model_choice == "DeepSeek": | |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| device_map="auto", | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| trust_remote_code=True | |
| ) | |
| prompt = f"""你是一个中文内容审核助手,请对以下文本内容进行分析。 | |
| 任务包括: | |
| 1. 判断是否存在违规内容,并简要解释原因。 | |
| 2. 给出以下五个风险维度的评分(0-1之间): | |
| - 人身攻击 | |
| - 暴力 | |
| - 政治敏感 | |
| - 舆论攻击 | |
| - 歧视成分 | |
| 输出格式如下: | |
| - 人身攻击: x.xx | |
| - 暴力: x.xx | |
| - 政治敏感: x.xx | |
| - 舆论攻击: x.xx | |
| - 歧视成分: x.xx | |
| 文本如下: | |
| {input_text} | |
| """ | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| outputs = model.generate(**inputs, max_new_tokens=512) | |
| result = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| st.code(result) | |
| st.session_state.analysis_input_text = input_text | |
| if st.button("🔍 深度分析此文本"): | |
| st.session_state.page = "analysis" | |
| st.experimental_rerun() | |
| else: | |
| classifier = pipeline("text-classification", model=model_name, tokenizer=model_name, device=0 if torch.cuda.is_available() else -1) | |
| result = classifier(input_text)[0] | |
| st.write(f"标签: {result['label']} / 置信度: {result['score']:.2f}") | |
| elif st.session_state.page == "analysis": | |
| st.title("📊 文本风险分析") | |
| input_text = st.session_state.analysis_input_text or st.text_area("请输入需要分析的文本:", height=150) | |
| if st.button("🔍 分析文本"): | |
| with st.spinner("分析中..."): | |
| tokenizer = AutoTokenizer.from_pretrained("deepseek-ai/deepseek-llm-7b-chat", trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| "deepseek-ai/deepseek-llm-7b-chat", | |
| device_map="auto", | |
| torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
| trust_remote_code=True | |
| ) | |
| prompt = f""" | |
| 你是一个中文文本风险分析专家,请根据以下文本的内容,对以下五个维度评分(范围0-1,1为最严重): | |
| - 人身攻击 | |
| - 暴力 | |
| - 政治敏感 | |
| - 舆论攻击 | |
| - 歧视成分 | |
| 输出格式如下: | |
| - personal abuse: x.xx | |
| - violence: x.xx | |
| - Political sensitivity: x.xx | |
| - Public opinion attack: x.xx | |
| - Component of discrimination: x.xx | |
| 文本如下: | |
| {input_text} | |
| """ | |
| inputs = tokenizer(prompt, return_tensors="pt").to(model.device) | |
| outputs = model.generate(**inputs, max_new_tokens=512) | |
| result = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
| st.markdown("### 🧾 模型原始输出") | |
| st.code(result) | |
| scores = parse_scores_from_llm_output(result) | |
| if scores: | |
| plot_bar_chart(scores) | |
| plot_radar_chart(scores) | |
| avg = np.mean(list(scores.values())) | |
| if avg >= 0.7: | |
| level = "严重 🔴" | |
| elif avg >= 0.4: | |
| level = "中等 🟠" | |
| else: | |
| level = "轻微 🟢" | |
| st.success(f"综合风险等级:{level}(平均分 {avg:.2f})") | |
| elif st.session_state.page == "capability": | |
| st.title("🧠 产品能力") | |
| features = [ | |
| ("📝 字词错误", "错别字、音近字、形近字、多字、重叠、颠倒、异形词等"), | |
| ("📌 常识错误", "标点符号、地名关联、表达不当、语义错误、不语名词等"), | |
| ("🚫 敏感词过滤", "涉及暴恐、色情、违禁、侮辱、歧视等不健康词语"), | |
| ("⚠️ 政治性差错", "领导人姓名、职务、讲话、政治口号、固定表述等"), | |
| ("📄 文本比对", "快速找出两个文本之间的差异之处,高清高亮显示"), | |
| ("📐 格式错误", "参照国家标准和党政公文规范,自动识别格式问题"), | |
| ("🤖 智能写作", "自动生成新闻稿、公告、任务文书,响应快速"), | |
| ("🌐 网站巡检", "自动抓取网页历史快照,输出违规风险报告") | |
| ] | |
| for i in range(0, len(features), 2): | |
| col1, col2 = st.columns(2) | |
| for col, feat in zip((col1, col2), features[i:i+2]): | |
| with col.expander(feat[0], expanded=True): | |
| st.markdown(f"**功能描述:** {feat[1]}") | |
| st.button(f"👉 体验 {feat[0]}", key=feat[0]) | |
| elif st.session_state.page == "strategy": | |
| st.title("📋 冒犯性内容风控策略配置") | |
| st.markdown("配置不同风险维度的评分阈值,并设定处理策略。") | |
| st.markdown("---") | |
| risk_dimensions = [ | |
| ("人身攻击", 0.75, "封禁账号 + 内容拦截"), | |
| ("暴力", 0.70, "内容拦截 + 人工复审"), | |
| ("政治敏感", 0.65, "限流处理 + 推送复审"), | |
| ("舆论攻击", 0.60, "记录风险 + 降权曝光"), | |
| ("歧视成分", 0.60, "提醒整改 + 限流") | |
| ] | |
| config = {} | |
| for dim, default_thresh, default_action in risk_dimensions: | |
| st.subheader(f"🧠 风险维度:{dim}") | |
| col1, col2 = st.columns([1, 2]) | |
| with col1: | |
| thresh = st.slider(f"{dim} 风险评分阈值", 0.0, 1.0, default_thresh, 0.01, key=f"{dim}_slider") | |
| with col2: | |
| action = st.text_input(f"{dim} 处理策略", value=default_action, key=f"{dim}_action") | |
| config[dim] = (thresh, action) | |
| if st.button("💾 保存配置"): | |
| st.success("✅ 策略保存成功(模拟)") | |
| st.json(config) | |