Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,76 +1,98 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
-
from datasets import list_datasets, load_dataset
|
| 3 |
import pandas as pd
|
| 4 |
-
import
|
| 5 |
|
| 6 |
-
|
| 7 |
-
# Hugging Face datasets.list_datasets no longer supports with_community_datasets parameter
|
| 8 |
-
all_dsets = list_datasets()
|
| 9 |
-
# Filter by query substring
|
| 10 |
-
matches = [ds for ds in all_dsets if query.lower() in ds.lower()] if query else all_dsets
|
| 11 |
-
return matches[:50] # maximal 50 Ergebnisse
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
# Schema: column name to feature type
|
| 19 |
-
schema = {col: str(ds.features[col]) for col in ds.column_names}
|
| 20 |
-
# Examples DataFrame
|
| 21 |
-
examples = ds.select(range(min(len(ds), num_examples))).to_pandas()
|
| 22 |
-
# Statistics: total samples and column types
|
| 23 |
-
stats = {"Anzahl Samples": len(ds)}
|
| 24 |
-
stats.update({col: str(ds.features[col]) for col in ds.column_names})
|
| 25 |
-
return schema, examples, stats
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
df = ds[column].to_pandas()
|
| 35 |
-
buffer = io.StringIO()
|
| 36 |
-
df.to_csv(buffer, index=False)
|
| 37 |
-
csv_text = buffer.getvalue()
|
| 38 |
-
return f"CSV für Spalte '{column}' erzeugt.", csv_text
|
| 39 |
|
| 40 |
-
with gr.Blocks() as demo:
|
| 41 |
-
gr.Markdown("## 📊 DataScout – Hugging Face Dataset Explorer")
|
| 42 |
-
with gr.Row():
|
| 43 |
-
query = gr.Textbox(label="Dataset suchen", placeholder="z.B. imdb")
|
| 44 |
-
search_btn = gr.Button("🔍 Suchen")
|
| 45 |
-
results = gr.Dropdown(label="Gefundene Datasets", choices=[], interactive=True)
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
|
|
|
| 59 |
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
explore_btn.click(fn=explore_dataset, inputs=[results, split, num_examples], outputs=[schema_out, examples_out, stats_out])
|
| 63 |
-
# Sicheres Laden der Spaltenliste
|
| 64 |
-
def get_columns(name):
|
| 65 |
-
if not name:
|
| 66 |
-
return []
|
| 67 |
-
try:
|
| 68 |
-
ds = load_dataset(name, split="train")
|
| 69 |
-
return ds.column_names
|
| 70 |
-
except Exception:
|
| 71 |
-
return []
|
| 72 |
-
results.change(fn=get_columns, inputs=results, outputs=col_dropdown)
|
| 73 |
-
export_btn.click(fn=export_column, inputs=[results, split, col_dropdown], outputs=[export_msg, export_csv])
|
| 74 |
|
| 75 |
if __name__ == "__main__":
|
| 76 |
-
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
+
from huggingface_hub import HfApi, SpaceSort, SpaceLibraries, SpaceHardware
|
| 4 |
|
| 5 |
+
api = HfApi()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
# Mapping user-friendly labels to API values
|
| 8 |
+
LIBRARY_OPTIONS = ["All", "Gradio", "Streamlit", "Flask", "FastAPI", "Transformers", "Diffusers"]
|
| 9 |
+
LICENSE_OPTIONS = ["All", "Apache-2.0", "MIT", "BSD-3-Clause", "GPL-3.0"]
|
| 10 |
+
HARDWARE_OPTIONS = ["All", "CPU", "GPU"]
|
| 11 |
+
VISIBILITY_OPTIONS = ["All", "Public", "Private"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
+
SORT_OPTIONS = {
|
| 14 |
+
"Last Modified": SpaceSort.LAST_MODIFIED,
|
| 15 |
+
"First Indexed": SpaceSort.FIRST_INDEXED,
|
| 16 |
+
"Likes": SpaceSort.LIKES,
|
| 17 |
+
"Runs": SpaceSort.RUNS
|
| 18 |
+
}
|
| 19 |
+
DIRECTION_OPTIONS = ["Descending", "Ascending"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
|
| 22 |
+
def search_spaces(query, library, license, tags, visibility, hardware, sort_by, direction, limit):
|
| 23 |
+
# Prepare filters
|
| 24 |
+
lib_filter = None if library == "All" else getattr(SpaceLibraries, library.upper(), None)
|
| 25 |
+
license_filter = None if license == "All" else license
|
| 26 |
+
hardware_filter = None
|
| 27 |
+
if hardware == "CPU":
|
| 28 |
+
hardware_filter = SpaceHardware.CPU
|
| 29 |
+
elif hardware == "GPU":
|
| 30 |
+
hardware_filter = SpaceHardware.GPU
|
| 31 |
+
vis_filter = None
|
| 32 |
+
if visibility == "Public":
|
| 33 |
+
vis_filter = False # private=False
|
| 34 |
+
elif visibility == "Private":
|
| 35 |
+
vis_filter = True
|
| 36 |
+
# Tags: comma-separated
|
| 37 |
+
tag_list = [t.strip() for t in tags.split(",")] if tags else None
|
| 38 |
+
sort_value = SORT_OPTIONS.get(sort_by, SpaceSort.LAST_MODIFIED)
|
| 39 |
+
reverse = False if direction == "Descending" else True
|
| 40 |
+
|
| 41 |
+
spaces = api.list_spaces(
|
| 42 |
+
search=query or None,
|
| 43 |
+
library_filter=lib_filter,
|
| 44 |
+
license=license_filter,
|
| 45 |
+
hardware=hardware_filter,
|
| 46 |
+
private=vis_filter,
|
| 47 |
+
sort=sort_value,
|
| 48 |
+
direction="asc" if reverse else "desc",
|
| 49 |
+
limit=limit,
|
| 50 |
+
task=tag_list # filters by tags/tasks
|
| 51 |
+
)
|
| 52 |
+
|
| 53 |
+
# Build list of dicts
|
| 54 |
+
data = []
|
| 55 |
+
for s in spaces:
|
| 56 |
+
data.append({
|
| 57 |
+
"Name": s.id,
|
| 58 |
+
"Author": s.author,
|
| 59 |
+
"Library": s.spaceType,
|
| 60 |
+
"SDK": s.sdk,
|
| 61 |
+
"Tags": ", ".join(s.tags) if hasattr(s, 'tags') else "",
|
| 62 |
+
"Hardware": s.hardware if hasattr(s, 'hardware') else "",
|
| 63 |
+
"Visibility": "Private" if s.private else "Public",
|
| 64 |
+
"Likes": s.likes,
|
| 65 |
+
"Runs": s.runs,
|
| 66 |
+
"Last Modified": s.lastModified,
|
| 67 |
+
"URL": f"https://huggingface.co/spaces/{s.id}"
|
| 68 |
+
})
|
| 69 |
+
return data
|
| 70 |
|
| 71 |
+
with gr.Blocks() as demo:
|
| 72 |
+
gr.Markdown("# 🔍 HF Spaces Explorer with Advanced Filters")
|
| 73 |
+
with gr.Row():
|
| 74 |
+
query = gr.Textbox(label="Search Query", placeholder="Enter keywords...")
|
| 75 |
+
library = gr.Dropdown(LIBRARY_OPTIONS, label="Library", value="All")
|
| 76 |
+
license = gr.Dropdown(LICENSE_OPTIONS, label="License", value="All")
|
| 77 |
+
with gr.Row():
|
| 78 |
+
tags = gr.Textbox(label="Tags (comma-separated)", placeholder="e.g. text-generation, image-classification")
|
| 79 |
+
visibility = gr.Dropdown(VISIBILITY_OPTIONS, label="Visibility", value="All")
|
| 80 |
+
hardware = gr.Dropdown(HARDWARE_OPTIONS, label="Hardware", value="All")
|
| 81 |
+
with gr.Row():
|
| 82 |
+
sort_by = gr.Dropdown(list(SORT_OPTIONS.keys()), label="Sort By", value="Last Modified")
|
| 83 |
+
direction = gr.Radio(DIRECTION_OPTIONS, label="Direction", value="Descending")
|
| 84 |
+
limit = gr.Slider(1, 100, label="Max Results", value=20, step=1)
|
| 85 |
+
search_btn = gr.Button("🔎 Search")
|
| 86 |
+
results = gr.Dataframe(headers=["Name", "Author", "Library", "SDK", "Tags", "Hardware", "Visibility", "Likes", "Runs", "Last Modified", "URL"], label="Results")
|
| 87 |
|
| 88 |
+
search_btn.click(
|
| 89 |
+
fn=search_spaces,
|
| 90 |
+
inputs=[query, library, license, tags, visibility, hardware, sort_by, direction, limit],
|
| 91 |
+
outputs=results
|
| 92 |
+
)
|
| 93 |
|
| 94 |
+
gr.Markdown("---")
|
| 95 |
+
gr.Markdown("Enhanced with tag, hardware & visibility filters. Built on `huggingface_hub` and Gradio Blocks.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
if __name__ == "__main__":
|
| 98 |
+
demo.launch(share=True)
|