Keeby-smilyai commited on
Commit
1f69c1b
·
verified ·
1 Parent(s): 2e9b43f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -58
app.py CHANGED
@@ -1,61 +1,4 @@
1
- # ==============================================================================
2
- # Optimized send_message handler with reduced UI updates
3
- # ==============================================================================
4
- def send_message(message, show_thinking, temperature, model_choice, max_tokens, show_raw):
5
- global stop_generation
6
- stop_generation.clear()
7
-
8
- if not message.strip():
9
- return "", "", "⚡ 0.0 tok/s", gr.update(interactive=True), gr.update(interactive=False)
10
-
11
- # Disable send button, enable stop button (ONCE at start)
12
- yield "", "", "⚡ Generating...", gr.update(interactive=False), gr.update(interactive=True)
13
-
14
- # Switch backend based on selection
15
- backend = available_models[model_choice]
16
-
17
- # Create single-turn history
18
- history = [{"role": "user", "content": message}]
19
-
20
- # Show user message immediately (no button updates)
21
- yield "", render_history(history, show_thinking, show_raw), "⚡ Generating...", gr.update(), gr.update()
22
-
23
- # Generate prompt (single turn, no history)
24
- prompt = f"User: {message}\nSam: <think>"
25
-
26
- # Start assistant message
27
- history.append({"role": "assistant", "content": "<think>"})
28
-
29
- # Stream response
30
- last_tokens_per_sec = 0
31
- was_stopped = False
32
-
33
- for chunk_data in generate_response_stream(prompt, temperature, backend, max_tokens):
34
- if len(chunk_data) == 5: # New format with stopped flag
35
- new_chunk, in_thinking, tokens_per_sec, avg_tokens_per_sec, stopped = chunk_data
36
-
37
- if stopped:
38
- was_stopped = True
39
- print(" ✅ Generation stopped successfully")
40
- break
41
-
42
- if new_chunk: # Only update if there's actual content
43
- history[-1]["content"] += new_chunk
44
-
45
- last_tokens_per_sec = avg_tokens_per_sec
46
-
47
- # Update UI WITHOUT button updates for speed (use gr.update() to skip)
48
- speed_text = f"⚡ {tokens_per_sec:.1f} tok/s"
49
- yield "", render_history(history, show_thinking, show_raw), speed_text, gr.update(), gr.update()
50
-
51
- # Final yield - enable send button, disable stop button (ONCE at end)
52
- if was_stopped:
53
- final_speed = f"🛑 Stopped at {last_tokens_per_sec:.1f} tok/s"
54
- else:
55
- final_speed = f"✅ {last_tokens_per_sec:.1f} tok/s (avg)"
56
-
57
- print(f" 📊 Final speed: {final_speed}")
58
- yield "", render_history(history, show_thinking, show_raw), final_speed, gr.update(interactive=True), gr.update(interactive=False)import os
59
  os.environ['KERAS_BACKEND'] = 'tensorflow'
60
  os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
61
 
 
1
+ import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  os.environ['KERAS_BACKEND'] = 'tensorflow'
3
  os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
4