Spaces:
Sleeping
Sleeping
File size: 1,984 Bytes
e221c83 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
import json
import re
import os
# 1. ํ
์คํธ ์ ์ ํจ์ ์ ์ (train_final.py์ ๋ก์ง)
def clean_text(text: str) -> str:
"""ํ๊ธ, ์์ด, ์ซ์, ๊ณต๋ฐฑ์ ์ ์ธํ ๋ชจ๋ ํน์๋ฌธ์๋ฅผ ์ ๊ฑฐํฉ๋๋ค."""
return re.sub(r'[^๊ฐ-ํฃa-zA-Z0-9 ]', '', str(text))
# 2. ํ์ผ ๊ฒฝ๋ก ์ค์ ๋ฐ ๋ฐ์ดํฐ ๋ก๋ (ํ์ผ ๊ฒฝ๋ก๊ฐ data/์ ์๋ค๊ณ ๊ฐ์ )
file_path = './data/training-label.json'
try:
with open(file_path, 'r', encoding='utf-8') as f:
training_data_raw = json.load(f)
print(f"โ
'{file_path}' ํ์ผ ๋ก๋ฉ ์ฑ๊ณต. ์ด {len(training_data_raw)}๊ฐ ๋ฐ์ดํฐ ์ค 10๊ฐ๋ง ์ถ์ถํฉ๋๋ค.")
print("---------------------------------------------------\n")
# 3. ์ฒซ 10๊ฐ ๋ฐ์ดํฐ์ ๋ํด ์ฒ๋ฆฌ ๋ฐ ๋น๊ต
comparison_data = []
# training_data_raw๋ ๋ํ ๋จ์์ ๋ฆฌ์คํธ์
๋๋ค.
for i, data in enumerate(training_data_raw[:5]):
# ๋ํ์ ๋ชจ๋ ๋ฌธ์ฅ์ ๊ณต๋ฐฑ์ผ๋ก ์ฐ๊ฒฐํ์ฌ ์๋ณธ ํ
์คํธ๋ฅผ ๋ง๋ญ๋๋ค. (explore_data.py ๋ก์ง)
raw_text = " ".join(data['talk']['content'].values())
cleaned_text = clean_text(raw_text)
# ์๋ณธ ๋ฐ์ดํฐ์ E์ฝ๋ ๊ฐ์ ์ถ์ถ (์ฐธ๊ณ ์ฉ)
emotion_type = data['profile']['emotion']['type']
comparison_data.append({
'ID': i + 1,
'Emotion': emotion_type,
'Raw Text': raw_text,
'Cleaned Text': cleaned_text
})
# 4. ๊ฒฐ๊ณผ ์ถ๋ ฅ
for item in comparison_data:
print(f"--- ID: {item['ID']} (๊ฐ์ ์ฝ๋: {item['Emotion']}) ---")
print(f" ์๋ณธ (Raw) : {item['Raw Text']}")
print(f" ์ ์ (Clean): {item['Cleaned Text']}")
print("-" * 30)
except FileNotFoundError:
print(f"โ ์ค๋ฅ: ๋ฐ์ดํฐ ํ์ผ์ ์ฐพ์ ์ ์์ต๋๋ค. ๊ฒฝ๋ก๋ฅผ ํ์ธํ์ธ์: {os.path.abspath(file_path)}")
except Exception as e:
print(f"โ ๋ฐ์ดํฐ ์ฒ๋ฆฌ ์ค ์ค๋ฅ ๋ฐ์: {e}") |