Spaces:

JabriA
/

OCR

Sleeping

JabriA commited on Apr 14

Commit

b01b23a

1 Parent(s): 84f2a1e

Initial commit

Files changed (3) hide show

app.py CHANGED Viewed

@@ -1,7 +1,45 @@
 import gradio as gr
-def greet(name):
-    return "Hello " + name + "!!"
-demo = gr.Interface(fn=greet, inputs="text", outputs="text")
-demo.launch()

 import gradio as gr
+from transformers import AutoModel, AutoTokenizer
+from PIL import Image
+import torch
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+model = AutoModel.from_pretrained(
+    'ucaslcl/GOT-OCR2_0',
+    trust_remote_code=True,
+    low_cpu_mem_usage=True,
+    device_map='cuda' if torch.cuda.is_available() else 'cpu',
+    use_safetensors=True,
+    pad_token_id=tokenizer.eos_token_id
+)
+model = model.eval()
+if torch.cuda.is_available():
+    model = model.cuda()
+# OCR function
+def ocr_from_image(image, ocr_type):
+    if image is None:
+        return "Please upload an image."
+    image_path = "uploaded_image.jpg"
+    image.save(image_path)
+    res = model.chat(tokenizer, image_path, ocr_type=ocr_type)
+    return res
+# OCR types to choose from
+ocr_types = ["ocr", "format"]
+# Gradio interface
+iface = gr.Interface(
+    fn=ocr_from_image,
+    inputs=[
+        gr.File(label="Upload Image", file_types=[".jpg", ".jpeg", ".png"]),
+        gr.Radio(ocr_types, label="OCR Type", value="ocr")
+    ],
+    outputs="text",
+    title="🧠 GOT-OCR2.0 Transformer OCR",
+    description="Upload an image file and select the OCR type: plain text (`ocr`) or formatted (`format`)."
+)
+if __name__ == "__main__":
+    iface.launch()

app.py.bak ADDED Viewed

+import gradio as gr
+from transformers import AutoModel, AutoTokenizer
+from PIL import Image
+import torch
+# Load model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
+model = AutoModel.from_pretrained(
+    'ucaslcl/GOT-OCR2_0',
+    trust_remote_code=True,
+    low_cpu_mem_usage=True,
+    device_map='cuda' if torch.cuda.is_available() else 'cpu',
+    use_safetensors=True,
+    pad_token_id=tokenizer.eos_token_id
+)
+model = model.eval()
+if torch.cuda.is_available():
+    model = model.cuda()
+# OCR function
+def ocr_from_image(image, ocr_type):
+    image_path = "temp.jpg"
+    image.save(image_path)
+    res = model.chat(tokenizer, image_path, ocr_type=ocr_type)
+    return res
+# Gradio interface
+ocr_types = ["ocr", "format"]
+iface = gr.Interface(
+    fn=ocr_from_image,
+    inputs=[
+        gr.Image(type="pil", label="Upload Image"),
+        gr.Radio(ocr_types, label="OCR Type", value="ocr")
+    ],
+    outputs="text",
+    title="GOT-OCR2.0: OCR with Transformers",
+    description="Upload an image and select OCR type (plain text or formatted)."
+)
+if __name__ == "__main__":
+    iface.launch()

requirements.txt ADDED Viewed

+gradio
+torch==2.0.1
+torchvision==0.15.2
+transformers==4.37.2
+tiktoken==0.6.0
+verovio==4.3.1
+accelerate==0.28.0