|
|
import gradio as gr |
|
|
from gliner import GLiNER |
|
|
|
|
|
|
|
|
model = GLiNER.from_pretrained("nvidia/gliner-pii") |
|
|
|
|
|
|
|
|
DEFAULT_LABELS = [ |
|
|
"person_name", "email", "phone_number", "address", |
|
|
"ssn", "credit_card_number", "date", "user_name" |
|
|
] |
|
|
|
|
|
def redact_pii(text, labels_str, threshold): |
|
|
""" |
|
|
Redact PII from text using the GLiNER model |
|
|
|
|
|
Args: |
|
|
text (str): Input text to redact |
|
|
labels_str (str): Comma-separated string of labels or empty for defaults |
|
|
threshold (float): Confidence threshold for entity detection |
|
|
|
|
|
Returns: |
|
|
str: Redacted text with PII replaced by <REDACTED> |
|
|
""" |
|
|
|
|
|
if not labels_str or not labels_str.strip(): |
|
|
labels = DEFAULT_LABELS |
|
|
else: |
|
|
labels = [label.strip() for label in labels_str.split(",")] |
|
|
|
|
|
|
|
|
entities = model.predict_entities(text, labels, threshold=threshold) |
|
|
|
|
|
|
|
|
redacted_text = text |
|
|
for entity in reversed(entities): |
|
|
start = entity["start"] |
|
|
end = entity["end"] |
|
|
redacted_text = redacted_text[:start] + "<REDACTED>" + redacted_text[end:] |
|
|
|
|
|
return redacted_text |
|
|
|
|
|
|
|
|
with gr.Blocks(title="GLiNER PII Redaction App") as demo: |
|
|
gr.Markdown("# GLiNER PII Redaction App") |
|
|
gr.Markdown("This app uses the NVIDIA GLiNER PII model to detect and redact personally identifiable information from text.") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
input_text = gr.Textbox( |
|
|
label="Input Text (Plain, Markdown, or HTML)", |
|
|
lines=10, |
|
|
placeholder="Enter the text you want to redact..." |
|
|
) |
|
|
custom_labels = gr.Textbox( |
|
|
label="Custom PII Labels (comma-separated)", |
|
|
placeholder="person_name, email, phone_number, address, ssn, credit_card_number, date, user_name" |
|
|
) |
|
|
threshold = gr.Slider( |
|
|
minimum=0.0, |
|
|
maximum=1.0, |
|
|
step=0.05, |
|
|
value=0.5, |
|
|
label="Confidence Threshold" |
|
|
) |
|
|
submit_btn = gr.Button("Redact PII") |
|
|
|
|
|
with gr.Column(): |
|
|
output_text = gr.Textbox( |
|
|
label="Redacted Output", |
|
|
lines=10, |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
submit_btn.click( |
|
|
fn=redact_pii, |
|
|
inputs=[input_text, custom_labels, threshold], |
|
|
outputs=output_text |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |
|
|
|