File size: 10,023 Bytes
8cca16d
 
 
 
 
 
 
 
 
8046fd8
 
8cca16d
922a4e4
8046fd8
dee8140
6241817
8cca16d
75c2039
 
 
 
 
 
 
 
 
 
 
 
bbbeaa5
8cca16d
 
 
 
 
 
 
 
 
 
 
 
 
 
6318955
4e39fe5
8cca16d
 
 
 
 
 
6f35cd3
5c75995
 
 
592ac8f
5c75995
8a5710d
 
 
 
 
 
 
5c75995
 
 
 
 
 
 
 
8046fd8
 
5c75995
 
 
8a5710d
8046fd8
8a5710d
 
8046fd8
 
8a5710d
6faa4f6
8a5710d
1c1855a
8cca16d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2fdd252
 
 
 
8cca16d
 
 
dedb583
8cca16d
63f8a32
 
 
1dcc8ff
e883e28
c77c1e9
373ea19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1094c35
cd76f13
63f8a32
8cca16d
c872d7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8cca16d
 
 
 
 
 
 
 
 
 
 
 
6241817
3a44cee
6241817
8cca16d
 
1e32346
4c813ac
3fe53bb
6241817
8cca16d
 
c872d7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8cca16d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
import os
import gradio as gr
from duckduckgo_search import DDGS
import sqlite3
import json
import requests
from typing import List, Dict, Optional
import time
from bs4 import BeautifulSoup
import urllib.parse


#specialtoken=os.getenv("SPECIALTOKEN")+"deepseek-r1-0528"
specialtoken=os.getenv("SPECIALTOKEN")+"models/"+"openai-large"#"openai-roblox"
#plants=['Turmeric', 'Aloe Vera', 'Neem', 'Tulsi', 'Ashwagandha', 'Ginger', 'Basil', 'Peppermint', 'Lavender', 'Eucalyptus', 'Chamomile', 'Sandalwood', 'Giloy', 'Haritaki', 'Brahmi', 'Gotu Kola', 'Holy Basil', 'Fenugreek', 'Licorice', 'Fennel', 'Cinnamon', 'Clove', 'Black Pepper', 'Cardamom', 'Neem', 'Indian Gooseberry', 'Saffron', 'Thyme', 'Valerian', 'Marigold', 'Ginseng', 'Dandelion', 'Hibiscus', 'Milk Thistle', 'Magnolia', "St. John's Wort", 'Yarrow', 'Calendula', 'Coriander', 'Senna', 'Echinacea', 'Moringa', 'Plantain', 'Amla', 'Shatavari', 'Peppermint', 'Chamomile', 'Gotu Kola', 'Ashoka', 'Arnica', 'Burdock Root', "Cat's Claw", "Devil's Claw", 'Elderberry', 'Feverfew', 'Ginkgo Biloba', 'Goldenseal', 'Hawthorn', 'Kava', 'Lemon Balm', 'Marshmallow Root', 'Nettle', 'Olive Leaf', 'Passionflower', 'Red Clover', 'Reishi Mushroom', 'Rhodiola', 'Sage', 'Saw Palmetto', 'Slippery Elm', 'Stinging Nettle', 'Witch Hazel', 'Yellow Dock', 'Ashitaba', 'Bael', 'Bacopa', 'Cumin', 'Guduchi', 'Jamun', 'Jatamansi', 'Karela', 'Gudmar', 'Schisandra', 'Baikal Skullcap', 'Mullein', 'Chrysanthemum', 'Catuaba', 'Dong Quai', 'Jiaogulan', 'Muira Puama', 'Catnip', 'Olive']
#plants = ["Echinacea", "Ginkgo biloba", "Turmeric"]

PROMPT_TEMPLATE = """Extract detailed information about a plant from any reliable source. Provide the information in a JSON object with the following keys: ["Name", "Scientific Name", "Alternate Names", "Description", "Plant Family", "Origin", "Growth Habitat", "Active Components", "Treatable Conditions", "Preparation Methods", "Dosage", "Duration", "Contraindications", "Side Effects", "Interactions", "Part Used", "Harvesting Time", "Storage Tips", "Images", "Related Videos", "Sources"].

Use only string values for each key. If any information is missing, set the value as an empty string (""). Format the output as a valid JSON object with proper syntax. 

Example: For the plant 'Name', provide the information accordingly.

---

**Plant Name:** {plant_name}
Additional source of information about that plant: '''{content}'''

Please generate the JSON output for the above plant, ensuring it adheres to the specified keys and format.
"""

def fetch_page_content(url: str):
    """Get webpage content with error handling"""
    try:
        response = requests.get(url, timeout=10)
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Remove unwanted elements
        for element in soup(['script', 'style', 'header', 'footer', 'nav']):
            element.decompose()
            
        text = soup.get_text(separator='\n', strip=True)
        for each in ["Page not available","403 Forbidden"]:
            if each in text:
                return "No information found!"
        return text[:3500]  # Limit to 3.5k characters
    except Exception as e:
        return f"Error fetching page: {str(e)}"

def search_full_plant_information(plant_name:str):
    """ """
    query = f"{plant_name} plant medicinal uses scientific information site:.edu OR site:.gov OR site:.org"
    search_results=""
    for attempt in range(3):  # Retry up to 3 times
        try:
            search_results = DDGS().text(keywords=query, max_results=5)
        except Exception as e:
            if "Ratelimit" in str(e):
                try:
                    wait_time = 2 ** attempt  # Exponential backoff
                    print(f"Rate limit hit, retrying in {wait_time} seconds...")
                    time.sleep(wait_time)
                except Exception as e:
                    pass
        
        if search_results:
            content=""
            for result in search_results:
                #content+=requests.get(f"{specialtoken}/Analyze this:{result['body']}").text
                content+=result['body']+" "
                content+=fetch_page_content(result['href'])+" "
                time.sleep(2)
            prompt = PROMPT_TEMPLATE.format(plant_name=plant_name, content=content)
            #encodeURI
            prompt=urllib.parse.quote(prompt)
            response = requests.get(f"{specialtoken}/{prompt}")
            #print (response.text)
            return response.text
        else:
            #URI encoded:
            content="Get any information from any source about:"+plant_name
            prompt = PROMPT_TEMPLATE.format(plant_name=plant_name, content=content)
            #encodeURI
            prompt=urllib.parse.quote(prompt)
            response = requests.get(f"{specialtoken}/{prompt}")
            print(prompt)
            return response.text
    return f"No data Found for {plant_name}!"

DB_NAME="plants.db"
def save_to_db(plant_data: Dict) -> bool:
    """Save processed plant data to database"""
    try:
        conn = sqlite3.connect(DB_NAME)
        cursor = conn.cursor()
        
        # Convert arrays to strings if they exist
        for field in ["Alternate Names", "Active Components", "Treatable Conditions", 
                     "Preparation Methods", "Contraindications", "Side Effects", 
                     "Interactions"]:
            if field in plant_data:
                if isinstance(plant_data[field], list):
                    plant_data[field] = ", ".join(plant_data[field])
                elif not isinstance(plant_data[field], str):
                    plant_data[field] = str(plant_data[field])
        
        columns = []
        values = []
        for key, value in plant_data.items():
            if key.lower() == "error":  # Skip error field
                continue
            columns.append(key.lower().replace(" ", "_"))
            values.append(str(value) if value else None)
        
        columns_str = ", ".join(columns)
        placeholders = ", ".join(["?"] * len(columns))
        
        cursor.execute(
            f"INSERT INTO plants ({columns_str}) VALUES ({placeholders})",
            values
        )
        
        conn.commit()
        conn.close()
        return True
    except Exception as e:
        print(f"Database save error: {e}")
        return False


def process_plants(plants_array: List[str]) -> str:
    """Main processing pipeline"""
    results = []
    for plant in plants_array:
        plant = plant.strip()
        if not plant:
            continue
            
        print(f"Processing {plant}...")
        plant_data = search_full_plant_information(plant)
        
        if plant_data:
            pass
            #save_success = save_to_db(plant_data)
            #plant_data["Database_Save_Success"] = save_success
            #results.append(plant_data)
        
        time.sleep(2)  # Rate limiting
    
    return print(results)

def split_and_search(text:str):
    all_data=""
    plants=text.split(",")
    for each in plants:
        sp=search_full_plant_information(each.strip()).replace('```json','').replace('```','').split('---')[0]+",\n" #.split('**Sponsored**')[0].split('**Sponsor**')[0]
        #yield sp
        if len(sp) < 10:
            emptydata="{"+f'''
"Name": "No data for {plant_name}",
"Scientific Name": "",
"Alternate Names": "",
"Description": "",
"Plant Family": "",
"Origin": "",
"Growth Habitat": "",
"Active Components": "",
"Treatable Conditions": "",
"Preparation Methods": "",
"Dosage": "",
"Duration": "",
"Contraindications": "",
"Side Effects": "",
"Interactions": "",
"Part Used": "",
"Harvesting Time": "",
"Storage Tips": "",
"Images": "",
"Related Videos": "",
"Sources": ""'''+"}"
            sp=emptydata#f"No data Found for {plant_name}!"
        all_data+=sp
        time.sleep(1)
    return all_data

#For View:
def get_all_plants() -> List[Dict]:
    """Retrieve all plants from database"""
    try:
        conn = sqlite3.connect(DB_NAME)
        conn.row_factory = sqlite3.Row
        cursor = conn.cursor()
        cursor.execute("SELECT `_rowid_`,* FROM plants ORDER BY `_rowid_` DESC")
        plants = [dict(row) for row in cursor.fetchall()]
        conn.close()
        return plants
    except Exception as e:
        print(f"Database retrieval error: {e}")
        return [{"Error": "Failed to retrieve data from database"}]


#use it here :
#process_plants(plants)

#or use interface:
with gr.Blocks(title="AI-Powered Medicinal Plants Database") as app:
    gr.Markdown("# ๐ŸŒฟ AI-Powered Medicinal Plants Database")
    with gr.Tab("Fetch & Process Plants"):
        gr.Markdown("### Enter plant names (comma separated)")
        with gr.Row():
            plant_input = gr.Textbox(label="Plant Names", 
                                   placeholder="e.g., Neem, Peppermint, Aloe Vera")
            fetch_btn = gr.Button("Process Plants", variant="primary")

        output_area = gr.Textbox(label="AI-Processed Results", lines=12, interactive=False)
        #json_output = gr.JSON(label="AI-Processed Results")
        
        fetch_btn.click(
            fn=split_and_search,
            #fn=lambda x: process_plants([p.strip() for p in x.split(",")]),
            inputs=plant_input,
            outputs=output_area #json_output
        )

    with gr.Tab("View Database"):
        gr.Markdown("### Stored Plant Information")
        with gr.Row():
            refresh_btn = gr.Button("Refresh Data", variant="secondary")
            clear_db = gr.Button("Clear Database", variant="stop")
        
        db_table = gr.Dataframe(
            headers=["id", "name", "scientific_name", "description"],
            datatype=["number", "str", "str", "str"],
            col_count=(4, "fixed"),
            interactive=True
        )
        
        refresh_btn.click(
            fn=get_all_plants,
            outputs=db_table
        )

if __name__ == "__main__":
    app.launch(debug=True, share=False)