Fred808 commited on
Commit
e8ef0cf
·
verified ·
1 Parent(s): 9bd5c0b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -12
app.py CHANGED
@@ -3,27 +3,26 @@ import requests
3
  import base64
4
  from pydantic import BaseModel
5
  from typing import Optional, List
6
-
7
  import re
8
 
9
  app = FastAPI()
10
 
11
- # NVIDIA API endpoint and API key
12
- NVIDIA_API_URL = "https://ai.api.nvidia.com/v1/gr/meta/llama-3.2-90b-vision-instruct/chat/completions"
13
  API_KEY = "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" # Replace securely in production
14
 
15
  class ChatMessage(BaseModel):
16
- role: str # "user" or "assistant" or "system"
17
  content: str
18
 
19
  class TextRequest(BaseModel):
20
  messages: List[ChatMessage]
21
- max_tokens: Optional[int] = 512
22
  temperature: Optional[float] = 1.0
23
- top_p: Optional[float] = 1.0
24
 
25
  PRE_PROMPT_MESSAGES = [
26
- {"role": "system", "content": "You are a helpful multimodal assistant powered by LLaMA 3.2 Vision-Instruct."},
27
  ]
28
 
29
  def call_nvidia_api(payload: dict):
@@ -42,12 +41,12 @@ async def chat_with_text(request: TextRequest):
42
  messages = PRE_PROMPT_MESSAGES + [msg.dict() for msg in request.messages]
43
 
44
  payload = {
45
- "model": "meta/llama-3.2-90b-vision-instruct",
46
  "messages": messages,
47
  "max_tokens": request.max_tokens,
48
  "temperature": request.temperature,
49
  "top_p": request.top_p,
50
- "stream": False,
51
  }
52
  try:
53
  response = call_nvidia_api(payload)
@@ -55,7 +54,6 @@ async def chat_with_text(request: TextRequest):
55
  except Exception as e:
56
  raise HTTPException(status_code=500, detail=str(e))
57
 
58
-
59
  @app.post("/chat/vision")
60
  async def chat_from_text_with_image_url(request: TextRequest):
61
  # Find image URLs in the last user message(s)
@@ -81,12 +79,12 @@ async def chat_from_text_with_image_url(request: TextRequest):
81
  messages = PRE_PROMPT_MESSAGES + new_messages
82
 
83
  payload = {
84
- "model": "meta/llama-3.2-90b-vision-instruct",
85
  "messages": messages,
86
  "max_tokens": request.max_tokens,
87
  "temperature": request.temperature,
88
  "top_p": request.top_p,
89
- "stream": False,
90
  }
91
 
92
  try:
 
3
  import base64
4
  from pydantic import BaseModel
5
  from typing import Optional, List
 
6
  import re
7
 
8
  app = FastAPI()
9
 
10
+ # New NVIDIA API endpoint and API key (adjust for the new model)
11
+ NVIDIA_API_URL = "https://ai.api.nvidia.com/v1/gr/meta/llama-3.1-nemotron-nano-vl-8b-v1/chat/completions"
12
  API_KEY = "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" # Replace securely in production
13
 
14
  class ChatMessage(BaseModel):
15
+ role: str # "user", "assistant", or "system"
16
  content: str
17
 
18
  class TextRequest(BaseModel):
19
  messages: List[ChatMessage]
20
+ max_tokens: Optional[int] = 1024
21
  temperature: Optional[float] = 1.0
22
+ top_p: Optional[float] = 0.01
23
 
24
  PRE_PROMPT_MESSAGES = [
25
+ {"role": "system", "content": "You are a helpful multimodal assistant powered by LLaMA 3.1 Nemotron Nano VL-8B."},
26
  ]
27
 
28
  def call_nvidia_api(payload: dict):
 
41
  messages = PRE_PROMPT_MESSAGES + [msg.dict() for msg in request.messages]
42
 
43
  payload = {
44
+ "model": "nvidia/llama-3.1-nemotron-nano-vl-8b-v1",
45
  "messages": messages,
46
  "max_tokens": request.max_tokens,
47
  "temperature": request.temperature,
48
  "top_p": request.top_p,
49
+ "stream": True, # Set to True if you want streaming
50
  }
51
  try:
52
  response = call_nvidia_api(payload)
 
54
  except Exception as e:
55
  raise HTTPException(status_code=500, detail=str(e))
56
 
 
57
  @app.post("/chat/vision")
58
  async def chat_from_text_with_image_url(request: TextRequest):
59
  # Find image URLs in the last user message(s)
 
79
  messages = PRE_PROMPT_MESSAGES + new_messages
80
 
81
  payload = {
82
+ "model": "nvidia/llama-3.1-nemotron-nano-vl-8b-v1",
83
  "messages": messages,
84
  "max_tokens": request.max_tokens,
85
  "temperature": request.temperature,
86
  "top_p": request.top_p,
87
+ "stream": True, # Set to True if you want streaming
88
  }
89
 
90
  try: