Fix: GPU usage issue
Browse files
app.py
CHANGED
|
@@ -53,7 +53,6 @@ def combine_model_parts(model_dir="split_models", output_file="checkpoints/last.
|
|
| 53 |
print(f"Model combined successfully: {output_file}")
|
| 54 |
return output_file
|
| 55 |
|
| 56 |
-
@spaces.GPU(enable_queue=True)
|
| 57 |
def load_model():
|
| 58 |
"""
|
| 59 |
Load the SmollmV2 model and tokenizer.
|
|
@@ -89,6 +88,8 @@ def load_model():
|
|
| 89 |
except Exception as e:
|
| 90 |
raise RuntimeError(f"Error loading model: {str(e)}")
|
| 91 |
|
|
|
|
|
|
|
| 92 |
|
| 93 |
@spaces.GPU(enable_queue=True)
|
| 94 |
def generate_text(prompt, num_tokens, temperature=0.8, top_p=0.9):
|
|
@@ -142,13 +143,6 @@ def generate_text(prompt, num_tokens, temperature=0.8, top_p=0.9):
|
|
| 142 |
except Exception as e:
|
| 143 |
return f"Error during text generation: {str(e)}"
|
| 144 |
|
| 145 |
-
# Load the model globally
|
| 146 |
-
try:
|
| 147 |
-
model, tokenizer, device = load_model()
|
| 148 |
-
except Exception as e:
|
| 149 |
-
print(f"Error initializing model: {str(e)}")
|
| 150 |
-
raise
|
| 151 |
-
|
| 152 |
# Create the Gradio interface
|
| 153 |
demo = gr.Interface(
|
| 154 |
fn=generate_text,
|
|
|
|
| 53 |
print(f"Model combined successfully: {output_file}")
|
| 54 |
return output_file
|
| 55 |
|
|
|
|
| 56 |
def load_model():
|
| 57 |
"""
|
| 58 |
Load the SmollmV2 model and tokenizer.
|
|
|
|
| 88 |
except Exception as e:
|
| 89 |
raise RuntimeError(f"Error loading model: {str(e)}")
|
| 90 |
|
| 91 |
+
# Load the model globally
|
| 92 |
+
model, tokenizer, device = load_model()
|
| 93 |
|
| 94 |
@spaces.GPU(enable_queue=True)
|
| 95 |
def generate_text(prompt, num_tokens, temperature=0.8, top_p=0.9):
|
|
|
|
| 143 |
except Exception as e:
|
| 144 |
return f"Error during text generation: {str(e)}"
|
| 145 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
# Create the Gradio interface
|
| 147 |
demo = gr.Interface(
|
| 148 |
fn=generate_text,
|