Spaces:

JeffMII
/

CEC-Learning

Sleeping

Jeff Myers II commited on Jun 17

Commit

cb3e313

1 Parent(s): 297702e

Removed 8-bit quantization and changed model_id to google/gemma-3n-E4B-it-litert-preview

Files changed (1) hide show

Gemma.py CHANGED Viewed

@@ -1,8 +1,6 @@
-from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
-from transformers.utils import quantization_config
 from huggingface_hub import login
 import spaces
-import torch
 import json
 import os
@@ -12,17 +10,18 @@ class GemmaLLM:
     def __init__(self):
         login(token=os.environ.get("GEMMA_TOKEN"))
-        quant_config = quantization_config.BitsAndBytesConfig(
-            load_in_8bit=True,
-            llm_int8_threshold=6.0,
-            llm_int8_has_fp16_weight=False,
-        )
-        model_id = "google/gemma-3-4b-it"
-        model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quant_config)
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        self.model = pipeline("text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16)
     @spaces.GPU
     def generate(self, message) -> str:

+from transformers import pipeline
 from huggingface_hub import login
 import spaces
 import json
 import os
     def __init__(self):
         login(token=os.environ.get("GEMMA_TOKEN"))
+        # quant_config = quantization_config.BitsAndBytesConfig(
+        #     load_in_8bit=True,
+        #     llm_int8_threshold=6.0,
+        #     llm_int8_has_fp16_weight=False,
+        # )
+        # model_id = "google/gemma-3-4b-it"
+        model_id = "google/gemma-3n-E4B-it-litert-preview"
+        # model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quant_config)
+        # tokenizer = AutoTokenizer.from_pretrained(model_id)
+        self.model = pipeline("text-generation", model_id)
     @spaces.GPU
     def generate(self, message) -> str: