Spaces:

OSS-forge
/

HighQualityPython

Running

File size: 3,781 Bytes

ca49620

from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
import torch

# ==========================
#  Load your model
# ==========================

MODEL_ID = "OSS-forge/DeepSeek-Coder-1.3B-cleaned"

device = "cuda" if torch.cuda.is_available() else "cpu"

tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID,
    torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
    device_map="auto" if torch.cuda.is_available() else None,
)
model.to(device)
model.eval()


# ==========================
#  Prompt builder
# ==========================

def build_instruction_prompt(instruction: str) -> str:
    return '''
You are an AI programming assistant, utilizing the DeepSeek Coder model, developed by DeepSeek Company, and you only answer questions related to computer science.
### Instruction:
{}
### Response:
'''.format(instruction.strip()).lstrip()


# ==========================
#  Gradio logic
# ==========================

def generate_code(instruction, chat_history, is_first_time):
    if chat_history is None or is_first_time:
        chat_history = []

    instruction = instruction.strip()
    if not instruction:
        return chat_history, gr.update(value=instruction), False

    prompt = build_instruction_prompt(instruction)

    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        padding=True,
        truncation=True,
        max_length=512,
    ).to(device)

    try:
        stop_id = tokenizer.convert_tokens_to_ids("<|EOT|>")
    except Exception:
        stop_id = tokenizer.eos_token_id

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=512,
            do_sample=False,
            pad_token_id=stop_id,
            eos_token_id=stop_id,
        )

    input_len = inputs["input_ids"].shape[1]
    generated_tokens = outputs[0, input_len:]
    code = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()

    user_message = f"**Instruction**:\n{instruction}"
    ai_message = f"**Generated code**:\n```python\n{code}\n```"

    chat_history = chat_history + [
        {"role": "user", "content": user_message},
        {"role": "assistant", "content": ai_message},
    ]

    return chat_history, gr.update(value=""), False


def reset_interface():
    return [], gr.update(value=""), True


# ==========================
#  Gradio UI
# ==========================

with gr.Blocks(title="Python Code Generator") as demo:
    gr.Markdown("# 🧠 Python Code Generator")
    gr.Markdown(
        "Generate Python code from natural language instructions using your Hugging Face model."
    )

    with gr.Row():
        with gr.Column(scale=2):
            instruction_input = gr.Textbox(
                label="Instruction",
                placeholder="Describe the code you want. E.g., 'Write a Python function that checks if a number is prime.'",
                lines=4,
            )

            is_first = gr.State(True)

            submit_btn = gr.Button("Generate Code")
            reset_btn = gr.Button("Start Over")

        with gr.Column(scale=3):
            chat_output = gr.Chatbot(
                label="Conversation",
                height=500,
            )

    submit_btn.click(
        fn=generate_code,
        inputs=[instruction_input, chat_output, is_first],
        outputs=[chat_output, instruction_input, is_first],
    )

    reset_btn.click(
        fn=reset_interface,
        outputs=[chat_output, instruction_input, is_first],
    )

if __name__ == "__main__":
    print("Launching Gradio interface...")
    demo.queue(max_size=10).launch()