File size: 10,023 Bytes
8cca16d 8046fd8 8cca16d 922a4e4 8046fd8 dee8140 6241817 8cca16d 75c2039 bbbeaa5 8cca16d 6318955 4e39fe5 8cca16d 6f35cd3 5c75995 592ac8f 5c75995 8a5710d 5c75995 8046fd8 5c75995 8a5710d 8046fd8 8a5710d 8046fd8 8a5710d 6faa4f6 8a5710d 1c1855a 8cca16d 2fdd252 8cca16d dedb583 8cca16d 63f8a32 1dcc8ff e883e28 c77c1e9 373ea19 1094c35 cd76f13 63f8a32 8cca16d c872d7b 8cca16d 6241817 3a44cee 6241817 8cca16d 1e32346 4c813ac 3fe53bb 6241817 8cca16d c872d7b 8cca16d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 |
import os
import gradio as gr
from duckduckgo_search import DDGS
import sqlite3
import json
import requests
from typing import List, Dict, Optional
import time
from bs4 import BeautifulSoup
import urllib.parse
#specialtoken=os.getenv("SPECIALTOKEN")+"deepseek-r1-0528"
specialtoken=os.getenv("SPECIALTOKEN")+"models/"+"openai-large"#"openai-roblox"
#plants=['Turmeric', 'Aloe Vera', 'Neem', 'Tulsi', 'Ashwagandha', 'Ginger', 'Basil', 'Peppermint', 'Lavender', 'Eucalyptus', 'Chamomile', 'Sandalwood', 'Giloy', 'Haritaki', 'Brahmi', 'Gotu Kola', 'Holy Basil', 'Fenugreek', 'Licorice', 'Fennel', 'Cinnamon', 'Clove', 'Black Pepper', 'Cardamom', 'Neem', 'Indian Gooseberry', 'Saffron', 'Thyme', 'Valerian', 'Marigold', 'Ginseng', 'Dandelion', 'Hibiscus', 'Milk Thistle', 'Magnolia', "St. John's Wort", 'Yarrow', 'Calendula', 'Coriander', 'Senna', 'Echinacea', 'Moringa', 'Plantain', 'Amla', 'Shatavari', 'Peppermint', 'Chamomile', 'Gotu Kola', 'Ashoka', 'Arnica', 'Burdock Root', "Cat's Claw", "Devil's Claw", 'Elderberry', 'Feverfew', 'Ginkgo Biloba', 'Goldenseal', 'Hawthorn', 'Kava', 'Lemon Balm', 'Marshmallow Root', 'Nettle', 'Olive Leaf', 'Passionflower', 'Red Clover', 'Reishi Mushroom', 'Rhodiola', 'Sage', 'Saw Palmetto', 'Slippery Elm', 'Stinging Nettle', 'Witch Hazel', 'Yellow Dock', 'Ashitaba', 'Bael', 'Bacopa', 'Cumin', 'Guduchi', 'Jamun', 'Jatamansi', 'Karela', 'Gudmar', 'Schisandra', 'Baikal Skullcap', 'Mullein', 'Chrysanthemum', 'Catuaba', 'Dong Quai', 'Jiaogulan', 'Muira Puama', 'Catnip', 'Olive']
#plants = ["Echinacea", "Ginkgo biloba", "Turmeric"]
PROMPT_TEMPLATE = """Extract detailed information about a plant from any reliable source. Provide the information in a JSON object with the following keys: ["Name", "Scientific Name", "Alternate Names", "Description", "Plant Family", "Origin", "Growth Habitat", "Active Components", "Treatable Conditions", "Preparation Methods", "Dosage", "Duration", "Contraindications", "Side Effects", "Interactions", "Part Used", "Harvesting Time", "Storage Tips", "Images", "Related Videos", "Sources"].
Use only string values for each key. If any information is missing, set the value as an empty string (""). Format the output as a valid JSON object with proper syntax.
Example: For the plant 'Name', provide the information accordingly.
---
**Plant Name:** {plant_name}
Additional source of information about that plant: '''{content}'''
Please generate the JSON output for the above plant, ensuring it adheres to the specified keys and format.
"""
def fetch_page_content(url: str):
"""Get webpage content with error handling"""
try:
response = requests.get(url, timeout=10)
soup = BeautifulSoup(response.text, 'html.parser')
# Remove unwanted elements
for element in soup(['script', 'style', 'header', 'footer', 'nav']):
element.decompose()
text = soup.get_text(separator='\n', strip=True)
for each in ["Page not available","403 Forbidden"]:
if each in text:
return "No information found!"
return text[:3500] # Limit to 3.5k characters
except Exception as e:
return f"Error fetching page: {str(e)}"
def search_full_plant_information(plant_name:str):
""" """
query = f"{plant_name} plant medicinal uses scientific information site:.edu OR site:.gov OR site:.org"
search_results=""
for attempt in range(3): # Retry up to 3 times
try:
search_results = DDGS().text(keywords=query, max_results=5)
except Exception as e:
if "Ratelimit" in str(e):
try:
wait_time = 2 ** attempt # Exponential backoff
print(f"Rate limit hit, retrying in {wait_time} seconds...")
time.sleep(wait_time)
except Exception as e:
pass
if search_results:
content=""
for result in search_results:
#content+=requests.get(f"{specialtoken}/Analyze this:{result['body']}").text
content+=result['body']+" "
content+=fetch_page_content(result['href'])+" "
time.sleep(2)
prompt = PROMPT_TEMPLATE.format(plant_name=plant_name, content=content)
#encodeURI
prompt=urllib.parse.quote(prompt)
response = requests.get(f"{specialtoken}/{prompt}")
#print (response.text)
return response.text
else:
#URI encoded:
content="Get any information from any source about:"+plant_name
prompt = PROMPT_TEMPLATE.format(plant_name=plant_name, content=content)
#encodeURI
prompt=urllib.parse.quote(prompt)
response = requests.get(f"{specialtoken}/{prompt}")
print(prompt)
return response.text
return f"No data Found for {plant_name}!"
DB_NAME="plants.db"
def save_to_db(plant_data: Dict) -> bool:
"""Save processed plant data to database"""
try:
conn = sqlite3.connect(DB_NAME)
cursor = conn.cursor()
# Convert arrays to strings if they exist
for field in ["Alternate Names", "Active Components", "Treatable Conditions",
"Preparation Methods", "Contraindications", "Side Effects",
"Interactions"]:
if field in plant_data:
if isinstance(plant_data[field], list):
plant_data[field] = ", ".join(plant_data[field])
elif not isinstance(plant_data[field], str):
plant_data[field] = str(plant_data[field])
columns = []
values = []
for key, value in plant_data.items():
if key.lower() == "error": # Skip error field
continue
columns.append(key.lower().replace(" ", "_"))
values.append(str(value) if value else None)
columns_str = ", ".join(columns)
placeholders = ", ".join(["?"] * len(columns))
cursor.execute(
f"INSERT INTO plants ({columns_str}) VALUES ({placeholders})",
values
)
conn.commit()
conn.close()
return True
except Exception as e:
print(f"Database save error: {e}")
return False
def process_plants(plants_array: List[str]) -> str:
"""Main processing pipeline"""
results = []
for plant in plants_array:
plant = plant.strip()
if not plant:
continue
print(f"Processing {plant}...")
plant_data = search_full_plant_information(plant)
if plant_data:
pass
#save_success = save_to_db(plant_data)
#plant_data["Database_Save_Success"] = save_success
#results.append(plant_data)
time.sleep(2) # Rate limiting
return print(results)
def split_and_search(text:str):
all_data=""
plants=text.split(",")
for each in plants:
sp=search_full_plant_information(each.strip()).replace('```json','').replace('```','').split('---')[0]+",\n" #.split('**Sponsored**')[0].split('**Sponsor**')[0]
#yield sp
if len(sp) < 10:
emptydata="{"+f'''
"Name": "No data for {plant_name}",
"Scientific Name": "",
"Alternate Names": "",
"Description": "",
"Plant Family": "",
"Origin": "",
"Growth Habitat": "",
"Active Components": "",
"Treatable Conditions": "",
"Preparation Methods": "",
"Dosage": "",
"Duration": "",
"Contraindications": "",
"Side Effects": "",
"Interactions": "",
"Part Used": "",
"Harvesting Time": "",
"Storage Tips": "",
"Images": "",
"Related Videos": "",
"Sources": ""'''+"}"
sp=emptydata#f"No data Found for {plant_name}!"
all_data+=sp
time.sleep(1)
return all_data
#For View:
def get_all_plants() -> List[Dict]:
"""Retrieve all plants from database"""
try:
conn = sqlite3.connect(DB_NAME)
conn.row_factory = sqlite3.Row
cursor = conn.cursor()
cursor.execute("SELECT `_rowid_`,* FROM plants ORDER BY `_rowid_` DESC")
plants = [dict(row) for row in cursor.fetchall()]
conn.close()
return plants
except Exception as e:
print(f"Database retrieval error: {e}")
return [{"Error": "Failed to retrieve data from database"}]
#use it here :
#process_plants(plants)
#or use interface:
with gr.Blocks(title="AI-Powered Medicinal Plants Database") as app:
gr.Markdown("# ๐ฟ AI-Powered Medicinal Plants Database")
with gr.Tab("Fetch & Process Plants"):
gr.Markdown("### Enter plant names (comma separated)")
with gr.Row():
plant_input = gr.Textbox(label="Plant Names",
placeholder="e.g., Neem, Peppermint, Aloe Vera")
fetch_btn = gr.Button("Process Plants", variant="primary")
output_area = gr.Textbox(label="AI-Processed Results", lines=12, interactive=False)
#json_output = gr.JSON(label="AI-Processed Results")
fetch_btn.click(
fn=split_and_search,
#fn=lambda x: process_plants([p.strip() for p in x.split(",")]),
inputs=plant_input,
outputs=output_area #json_output
)
with gr.Tab("View Database"):
gr.Markdown("### Stored Plant Information")
with gr.Row():
refresh_btn = gr.Button("Refresh Data", variant="secondary")
clear_db = gr.Button("Clear Database", variant="stop")
db_table = gr.Dataframe(
headers=["id", "name", "scientific_name", "description"],
datatype=["number", "str", "str", "str"],
col_count=(4, "fixed"),
interactive=True
)
refresh_btn.click(
fn=get_all_plants,
outputs=db_table
)
if __name__ == "__main__":
app.launch(debug=True, share=False) |