Spaces:
Running
Running
| import json | |
| import re | |
| import os | |
| # 1. ํ ์คํธ ์ ์ ํจ์ ์ ์ (train_final.py์ ๋ก์ง) | |
| def clean_text(text: str) -> str: | |
| """ํ๊ธ, ์์ด, ์ซ์, ๊ณต๋ฐฑ์ ์ ์ธํ ๋ชจ๋ ํน์๋ฌธ์๋ฅผ ์ ๊ฑฐํฉ๋๋ค.""" | |
| return re.sub(r'[^๊ฐ-ํฃa-zA-Z0-9 ]', '', str(text)) | |
| # 2. ํ์ผ ๊ฒฝ๋ก ์ค์ ๋ฐ ๋ฐ์ดํฐ ๋ก๋ (ํ์ผ ๊ฒฝ๋ก๊ฐ data/์ ์๋ค๊ณ ๊ฐ์ ) | |
| file_path = './data/training-label.json' | |
| try: | |
| with open(file_path, 'r', encoding='utf-8') as f: | |
| training_data_raw = json.load(f) | |
| print(f"โ '{file_path}' ํ์ผ ๋ก๋ฉ ์ฑ๊ณต. ์ด {len(training_data_raw)}๊ฐ ๋ฐ์ดํฐ ์ค 10๊ฐ๋ง ์ถ์ถํฉ๋๋ค.") | |
| print("---------------------------------------------------\n") | |
| # 3. ์ฒซ 10๊ฐ ๋ฐ์ดํฐ์ ๋ํด ์ฒ๋ฆฌ ๋ฐ ๋น๊ต | |
| comparison_data = [] | |
| # training_data_raw๋ ๋ํ ๋จ์์ ๋ฆฌ์คํธ์ ๋๋ค. | |
| for i, data in enumerate(training_data_raw[:5]): | |
| # ๋ํ์ ๋ชจ๋ ๋ฌธ์ฅ์ ๊ณต๋ฐฑ์ผ๋ก ์ฐ๊ฒฐํ์ฌ ์๋ณธ ํ ์คํธ๋ฅผ ๋ง๋ญ๋๋ค. (explore_data.py ๋ก์ง) | |
| raw_text = " ".join(data['talk']['content'].values()) | |
| cleaned_text = clean_text(raw_text) | |
| # ์๋ณธ ๋ฐ์ดํฐ์ E์ฝ๋ ๊ฐ์ ์ถ์ถ (์ฐธ๊ณ ์ฉ) | |
| emotion_type = data['profile']['emotion']['type'] | |
| comparison_data.append({ | |
| 'ID': i + 1, | |
| 'Emotion': emotion_type, | |
| 'Raw Text': raw_text, | |
| 'Cleaned Text': cleaned_text | |
| }) | |
| # 4. ๊ฒฐ๊ณผ ์ถ๋ ฅ | |
| for item in comparison_data: | |
| print(f"--- ID: {item['ID']} (๊ฐ์ ์ฝ๋: {item['Emotion']}) ---") | |
| print(f" ์๋ณธ (Raw) : {item['Raw Text']}") | |
| print(f" ์ ์ (Clean): {item['Cleaned Text']}") | |
| print("-" * 30) | |
| except FileNotFoundError: | |
| print(f"โ ์ค๋ฅ: ๋ฐ์ดํฐ ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค. ๊ฒฝ๋ก๋ฅผ ํ์ธํ์ธ์: {os.path.abspath(file_path)}") | |
| except Exception as e: | |
| print(f"โ ๋ฐ์ดํฐ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}") |