Spaces:

JabriA
/

OCR

Sleeping

App Files Files Community

JabriA commited on Apr 14

Commit

b35952c

1 Parent(s): b01b23a

Initial commit

Browse files

Files changed (3) hide show

app.py +35 -30
app.py.bak +8 -5
app1.py.bak +54 -0

app.py CHANGED Viewed

@@ -1,45 +1,50 @@
 import gradio as gr
-from transformers import AutoModel, AutoTokenizer
 from PIL import Image
 import torch
-# Load model and tokenizer
-tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
-model = AutoModel.from_pretrained(
-    'ucaslcl/GOT-OCR2_0',
-    trust_remote_code=True,
-    low_cpu_mem_usage=True,
-    device_map='cuda' if torch.cuda.is_available() else 'cpu',
-    use_safetensors=True,
-    pad_token_id=tokenizer.eos_token_id
-)
-model = model.eval()
-if torch.cuda.is_available():
-    model = model.cuda()
-# OCR function
-def ocr_from_image(image, ocr_type):
-    if image is None:
-        return "Please upload an image."
-    image_path = "uploaded_image.jpg"
-    image.save(image_path)
-    res = model.chat(tokenizer, image_path, ocr_type=ocr_type)
-    return res
-# OCR types to choose from
 ocr_types = ["ocr", "format"]
-# Gradio interface
 iface = gr.Interface(
     fn=ocr_from_image,
     inputs=[
-        gr.File(label="Upload Image", file_types=[".jpg", ".jpeg", ".png"]),
-        gr.Radio(ocr_types, label="OCR Type", value="ocr")
     ],
     outputs="text",
-    title="🧠 GOT-OCR2.0 Transformer OCR",
-    description="Upload an image file and select the OCR type: plain text (`ocr`) or formatted (`format`)."
 )
 if __name__ == "__main__":
     iface.launch()

+import os
 import gradio as gr
+from transformers import TrOCRProcessor, TrOCRForConditionalGeneration
 from PIL import Image
 import torch
+# ?? Chargement du modele et du processor
+model_name = "microsoft/trocr-base-handwritten"
+model = TrOCRForConditionalGeneration.from_pretrained(model_name)
+processor = TrOCRProcessor.from_pretrained(model_name)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+model.eval()
+# ?? Fonction OCR
+def ocr_from_image(image_file, ocr_type):
+    if image_file is None:
+        return "Veuillez importer une image."
+    # Pretraitement de l'image
+    image = Image.open(image_file.name).convert("RGB")
+    pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
+    # Generation de texte
+    with torch.no_grad():
+        generated_ids = model.generate(pixel_values)
+    # Decodage du texte genere
+    generated_text = processor.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return generated_text
+# ?? Types d'OCR (juste pour l'interface ici)
 ocr_types = ["ocr", "format"]
+# ?? Interface Gradio
 iface = gr.Interface(
     fn=ocr_from_image,
     inputs=[
+        gr.File(label="Importer une image", file_types=[".jpg", ".jpeg", ".png"]),
+        gr.Radio(ocr_types, label="Type d'OCR", value="ocr")
     ],
     outputs="text",
+    title="?? OCR manuscrit avec TrOCR",
+    description="Importez une image manuscrite pour extraire le texte avec le modele Microsoft TrOCR."
 )
+# ?? Lancement
 if __name__ == "__main__":
     iface.launch()

app.py.bak CHANGED Viewed

@@ -19,23 +19,26 @@ if torch.cuda.is_available():
 # OCR function
 def ocr_from_image(image, ocr_type):
-    image_path = "temp.jpg"
     image.save(image_path)
     res = model.chat(tokenizer, image_path, ocr_type=ocr_type)
     return res
-# Gradio interface
 ocr_types = ["ocr", "format"]
 iface = gr.Interface(
     fn=ocr_from_image,
     inputs=[
-        gr.Image(type="pil", label="Upload Image"),
         gr.Radio(ocr_types, label="OCR Type", value="ocr")
     ],
     outputs="text",
-    title="GOT-OCR2.0: OCR with Transformers",
-    description="Upload an image and select OCR type (plain text or formatted)."
 )
 if __name__ == "__main__":

 # OCR function
 def ocr_from_image(image, ocr_type):
+    if image is None:
+        return "Please upload an image."
+    image_path = "uploaded_image.jpg"
     image.save(image_path)
     res = model.chat(tokenizer, image_path, ocr_type=ocr_type)
     return res
+# OCR types to choose from
 ocr_types = ["ocr", "format"]
+# Gradio interface
 iface = gr.Interface(
     fn=ocr_from_image,
     inputs=[
+        gr.File(label="Upload Image", file_types=[".jpg", ".jpeg", ".png"]),
         gr.Radio(ocr_types, label="OCR Type", value="ocr")
     ],
     outputs="text",
+    title="🧠 GOT-OCR2.0 Transformer OCR",
+    description="Upload an image file and select the OCR type: plain text (`ocr`) or formatted (`format`)."
 )
 if __name__ == "__main__":

app1.py.bak ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+import gradio as gr
+from transformers import TrOCRProcessor, TrOCRForConditionalGeneration
+from PIL import Image
+import torch
+# 🛡️ Configuration du proxy si nécessaire
+os.environ["HTTP_PROXY"] = "http://meditelproxy.meditel.int:80"
+os.environ["HTTPS_PROXY"] = "http://meditelproxy.meditel.int:80"
+# 🔄 Chargement du modèle et du processor
+model_name = "microsoft/trocr-base-handwritten"
+model = TrOCRForConditionalGeneration.from_pretrained(model_name)
+processor = TrOCRProcessor.from_pretrained(model_name)
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+model.to(device)
+model.eval()
+# 🧠 Fonction OCR
+def ocr_from_image(image_file, ocr_type):
+    if image_file is None:
+        return "Veuillez importer une image."
+    # Prétraitement de l'image
+    image = Image.open(image_file.name).convert("RGB")
+    pixel_values = processor(images=image, return_tensors="pt").pixel_values.to(device)
+    # Génération de texte
+    with torch.no_grad():
+        generated_ids = model.generate(pixel_values)
+    # Décodage du texte généré
+    generated_text = processor.tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return generated_text
+# 🔘 Types d’OCR (juste pour l’interface ici)
+ocr_types = ["ocr", "format"]
+# 🎨 Interface Gradio
+iface = gr.Interface(
+    fn=ocr_from_image,
+    inputs=[
+        gr.File(label="Importer une image", file_types=[".jpg", ".jpeg", ".png"]),
+        gr.Radio(ocr_types, label="Type d’OCR", value="ocr")
+    ],
+    outputs="text",
+    title="🧠 OCR manuscrit avec TrOCR",
+    description="Importez une image manuscrite pour extraire le texte avec le modèle Microsoft TrOCR."
+)
+# 🚀 Lancement
+if __name__ == "__main__":
+    iface.launch()