File size: 1,984 Bytes
e221c83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import json
import re
import os

# 1. ํ…์ŠคํŠธ ์ •์ œ ํ•จ์ˆ˜ ์ •์˜ (train_final.py์˜ ๋กœ์ง)
def clean_text(text: str) -> str:
    """ํ•œ๊ธ€, ์˜์–ด, ์ˆซ์ž, ๊ณต๋ฐฑ์„ ์ œ์™ธํ•œ ๋ชจ๋“  ํŠน์ˆ˜๋ฌธ์ž๋ฅผ ์ œ๊ฑฐํ•ฉ๋‹ˆ๋‹ค."""
    return re.sub(r'[^๊ฐ€-ํžฃa-zA-Z0-9 ]', '', str(text))

# 2. ํŒŒ์ผ ๊ฒฝ๋กœ ์„ค์ • ๋ฐ ๋ฐ์ดํ„ฐ ๋กœ๋“œ (ํŒŒ์ผ ๊ฒฝ๋กœ๊ฐ€ data/์— ์žˆ๋‹ค๊ณ  ๊ฐ€์ •)
file_path = './data/training-label.json'

try:
    with open(file_path, 'r', encoding='utf-8') as f:
        training_data_raw = json.load(f)

    print(f"โœ… '{file_path}' ํŒŒ์ผ ๋กœ๋”ฉ ์„ฑ๊ณต. ์ด {len(training_data_raw)}๊ฐœ ๋ฐ์ดํ„ฐ ์ค‘ 10๊ฐœ๋งŒ ์ถ”์ถœํ•ฉ๋‹ˆ๋‹ค.")
    print("---------------------------------------------------\n")

    # 3. ์ฒซ 10๊ฐœ ๋ฐ์ดํ„ฐ์— ๋Œ€ํ•ด ์ฒ˜๋ฆฌ ๋ฐ ๋น„๊ต
    comparison_data = []
    
    # training_data_raw๋Š” ๋Œ€ํ™” ๋‹จ์œ„์˜ ๋ฆฌ์ŠคํŠธ์ž…๋‹ˆ๋‹ค.
    for i, data in enumerate(training_data_raw[:5]):
        # ๋Œ€ํ™”์˜ ๋ชจ๋“  ๋ฌธ์žฅ์„ ๊ณต๋ฐฑ์œผ๋กœ ์—ฐ๊ฒฐํ•˜์—ฌ ์›๋ณธ ํ…์ŠคํŠธ๋ฅผ ๋งŒ๋“ญ๋‹ˆ๋‹ค. (explore_data.py ๋กœ์ง)
        raw_text = " ".join(data['talk']['content'].values())
        
        cleaned_text = clean_text(raw_text)
        
        # ์›๋ณธ ๋ฐ์ดํ„ฐ์˜ E์ฝ”๋“œ ๊ฐ์ • ์ถ”์ถœ (์ฐธ๊ณ ์šฉ)
        emotion_type = data['profile']['emotion']['type']
        
        comparison_data.append({
            'ID': i + 1,
            'Emotion': emotion_type,
            'Raw Text': raw_text,
            'Cleaned Text': cleaned_text
        })
    
    # 4. ๊ฒฐ๊ณผ ์ถœ๋ ฅ
    for item in comparison_data:
        print(f"--- ID: {item['ID']} (๊ฐ์ • ์ฝ”๋“œ: {item['Emotion']}) ---")
        print(f"  ์›๋ณธ (Raw) : {item['Raw Text']}")
        print(f"  ์ •์ œ (Clean): {item['Cleaned Text']}")
        print("-" * 30)

except FileNotFoundError:
    print(f"โŒ ์˜ค๋ฅ˜: ๋ฐ์ดํ„ฐ ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค. ๊ฒฝ๋กœ๋ฅผ ํ™•์ธํ•˜์„ธ์š”: {os.path.abspath(file_path)}")
except Exception as e:
    print(f"โŒ ๋ฐ์ดํ„ฐ ์ฒ˜๋ฆฌ ์ค‘ ์˜ค๋ฅ˜ ๋ฐœ์ƒ: {e}")