File size: 2,740 Bytes
6266635
b80c16d
6266635
b80c16d
 
6266635
b80c16d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
import gradio as gr
from gliner import GLiNER

# Load the model outside of the processing function to ensure it's only loaded once
model = GLiNER.from_pretrained("nvidia/gliner-pii")

# Define a comprehensive default list of PII labels for the user
DEFAULT_LABELS = [
    "person_name", "email", "phone_number", "address",
    "ssn", "credit_card_number", "date", "user_name"
]

def redact_pii(text, labels_str, threshold):
    """
    Redact PII from text using the GLiNER model
    
    Args:
        text (str): Input text to redact
        labels_str (str): Comma-separated string of labels or empty for defaults
        threshold (float): Confidence threshold for entity detection
        
    Returns:
        str: Redacted text with PII replaced by <REDACTED>
    """
    # Process labels
    if not labels_str or not labels_str.strip():
        labels = DEFAULT_LABELS
    else:
        labels = [label.strip() for label in labels_str.split(",")]
    
    # Predict entities
    entities = model.predict_entities(text, labels, threshold=threshold)
    
    # Redact entities in reverse order to prevent index shifting
    redacted_text = text
    for entity in reversed(entities):
        start = entity["start"]
        end = entity["end"]
        redacted_text = redacted_text[:start] + "<REDACTED>" + redacted_text[end:]
    
    return redacted_text

# Create the Gradio interface
with gr.Blocks(title="GLiNER PII Redaction App") as demo:
    gr.Markdown("# GLiNER PII Redaction App")
    gr.Markdown("This app uses the NVIDIA GLiNER PII model to detect and redact personally identifiable information from text.")
    
    with gr.Row():
        with gr.Column():
            input_text = gr.Textbox(
                label="Input Text (Plain, Markdown, or HTML)",
                lines=10,
                placeholder="Enter the text you want to redact..."
            )
            custom_labels = gr.Textbox(
                label="Custom PII Labels (comma-separated)",
                placeholder="person_name, email, phone_number, address, ssn, credit_card_number, date, user_name"
            )
            threshold = gr.Slider(
                minimum=0.0,
                maximum=1.0,
                step=0.05,
                value=0.5,
                label="Confidence Threshold"
            )
            submit_btn = gr.Button("Redact PII")
        
        with gr.Column():
            output_text = gr.Textbox(
                label="Redacted Output",
                lines=10,
                interactive=False
            )
    
    submit_btn.click(
        fn=redact_pii,
        inputs=[input_text, custom_labels, threshold],
        outputs=output_text
    )

if __name__ == "__main__":
    demo.launch()