Spaces:

Chris4K
/

Hub-Search

Runtime error

App Files Files Community

Chris4K commited on May 22, 2025

Commit

dc72cab

verified ·

1 Parent(s): f250035

Update app.py

Browse files

Files changed (1) hide show

app.py +86 -64

app.py CHANGED Viewed

@@ -1,76 +1,98 @@
 import gradio as gr
-from datasets import list_datasets, load_dataset
 import pandas as pd
-import io
-def list_available_datasets(query: str):
-    # Hugging Face datasets.list_datasets no longer supports with_community_datasets parameter
-    all_dsets = list_datasets()
-    # Filter by query substring
-    matches = [ds for ds in all_dsets if query.lower() in ds.lower()] if query else all_dsets
-    return matches[:50]  # maximal 50 Ergebnisse
-def explore_dataset(dataset_name: str, split: str, num_examples: int):
-    try:
-        ds = load_dataset(dataset_name, split=split)
-    except Exception as e:
-        return {}, pd.DataFrame(), {"Anzahl Samples": 0, "Fehler": str(e)}
-    # Schema: column name to feature type
-    schema = {col: str(ds.features[col]) for col in ds.column_names}
-    # Examples DataFrame
-    examples = ds.select(range(min(len(ds), num_examples))).to_pandas()
-    # Statistics: total samples and column types
-    stats = {"Anzahl Samples": len(ds)}
-    stats.update({col: str(ds.features[col]) for col in ds.column_names})
-    return schema, examples, stats
-def export_column(dataset_name: str, split: str, column: str):
-    try:
-        ds = load_dataset(dataset_name, split=split)
-    except Exception as e:
-        return "Fehler beim Laden des Datasets.", ""
-    if column not in ds.column_names:
-        return "Spalte nicht gefunden.", ""
-    df = ds[column].to_pandas()
-    buffer = io.StringIO()
-    df.to_csv(buffer, index=False)
-    csv_text = buffer.getvalue()
-    return f"CSV für Spalte '{column}' erzeugt.", csv_text
-with gr.Blocks() as demo:
-    gr.Markdown("## 📊 DataScout – Hugging Face Dataset Explorer")
-    with gr.Row():
-        query = gr.Textbox(label="Dataset suchen", placeholder="z.B. imdb")
-        search_btn = gr.Button("🔍 Suchen")
-    results = gr.Dropdown(label="Gefundene Datasets", choices=[], interactive=True)
-    split = gr.Dropdown(label="Split wählen", choices=["train", "test", "validation"], value="train")
-    num_examples = gr.Slider(label="Anzahl Beispiele", minimum=1, maximum=20, value=5, step=1)
-    explore_btn = gr.Button("👁️ Dataset erkunden")
-    schema_out = gr.JSON(label="Schema")
-    examples_out = gr.Dataframe(label="Beispiele")
-    stats_out = gr.JSON(label="Statistiken")
-    col_dropdown = gr.Dropdown(label="Spalte für CSV-Export", choices=[], interactive=True)
-    export_btn = gr.Button("📥 CSV erzeugen")
-    export_msg = gr.Textbox(label="Status")
-    export_csv = gr.TextArea(label="CSV-Ausgabe", lines=10)
-    # Events
-    search_btn.click(fn=list_available_datasets, inputs=query, outputs=results)
-    explore_btn.click(fn=explore_dataset, inputs=[results, split, num_examples], outputs=[schema_out, examples_out, stats_out])
-    # Sicheres Laden der Spaltenliste
-    def get_columns(name):
-        if not name:
-            return []
-        try:
-            ds = load_dataset(name, split="train")
-            return ds.column_names
-        except Exception:
-            return []
-    results.change(fn=get_columns, inputs=results, outputs=col_dropdown)
-    export_btn.click(fn=export_column, inputs=[results, split, col_dropdown], outputs=[export_msg, export_csv])
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import pandas as pd
+from huggingface_hub import HfApi, SpaceSort, SpaceLibraries, SpaceHardware
+api = HfApi()
+# Mapping user-friendly labels to API values
+LIBRARY_OPTIONS = ["All", "Gradio", "Streamlit", "Flask", "FastAPI", "Transformers", "Diffusers"]
+LICENSE_OPTIONS = ["All", "Apache-2.0", "MIT", "BSD-3-Clause", "GPL-3.0"]
+HARDWARE_OPTIONS = ["All", "CPU", "GPU"]
+VISIBILITY_OPTIONS = ["All", "Public", "Private"]
+SORT_OPTIONS = {
+    "Last Modified": SpaceSort.LAST_MODIFIED,
+    "First Indexed": SpaceSort.FIRST_INDEXED,
+    "Likes": SpaceSort.LIKES,
+    "Runs": SpaceSort.RUNS
+}
+DIRECTION_OPTIONS = ["Descending", "Ascending"]
+def search_spaces(query, library, license, tags, visibility, hardware, sort_by, direction, limit):
+    # Prepare filters
+    lib_filter = None if library == "All" else getattr(SpaceLibraries, library.upper(), None)
+    license_filter = None if license == "All" else license
+    hardware_filter = None
+    if hardware == "CPU":
+        hardware_filter = SpaceHardware.CPU
+    elif hardware == "GPU":
+        hardware_filter = SpaceHardware.GPU
+    vis_filter = None
+    if visibility == "Public":
+        vis_filter = False  # private=False
+    elif visibility == "Private":
+        vis_filter = True
+    # Tags: comma-separated
+    tag_list = [t.strip() for t in tags.split(",")] if tags else None
+    sort_value = SORT_OPTIONS.get(sort_by, SpaceSort.LAST_MODIFIED)
+    reverse = False if direction == "Descending" else True
+    spaces = api.list_spaces(
+        search=query or None,
+        library_filter=lib_filter,
+        license=license_filter,
+        hardware=hardware_filter,
+        private=vis_filter,
+        sort=sort_value,
+        direction="asc" if reverse else "desc",
+        limit=limit,
+        task=tag_list  # filters by tags/tasks
+    )
+    # Build list of dicts
+    data = []
+    for s in spaces:
+        data.append({
+            "Name": s.id,
+            "Author": s.author,
+            "Library": s.spaceType,
+            "SDK": s.sdk,
+            "Tags": ", ".join(s.tags) if hasattr(s, 'tags') else "",
+            "Hardware": s.hardware if hasattr(s, 'hardware') else "",
+            "Visibility": "Private" if s.private else "Public",
+            "Likes": s.likes,
+            "Runs": s.runs,
+            "Last Modified": s.lastModified,
+            "URL": f"https://huggingface.co/spaces/{s.id}"
+        })
+    return data
+with gr.Blocks() as demo:
+    gr.Markdown("# 🔍 HF Spaces Explorer with Advanced Filters")
+    with gr.Row():
+        query = gr.Textbox(label="Search Query", placeholder="Enter keywords...")
+        library = gr.Dropdown(LIBRARY_OPTIONS, label="Library", value="All")
+        license = gr.Dropdown(LICENSE_OPTIONS, label="License", value="All")
+    with gr.Row():
+        tags = gr.Textbox(label="Tags (comma-separated)", placeholder="e.g. text-generation, image-classification")
+        visibility = gr.Dropdown(VISIBILITY_OPTIONS, label="Visibility", value="All")
+        hardware = gr.Dropdown(HARDWARE_OPTIONS, label="Hardware", value="All")
+    with gr.Row():
+        sort_by = gr.Dropdown(list(SORT_OPTIONS.keys()), label="Sort By", value="Last Modified")
+        direction = gr.Radio(DIRECTION_OPTIONS, label="Direction", value="Descending")
+        limit = gr.Slider(1, 100, label="Max Results", value=20, step=1)
+    search_btn = gr.Button("🔎 Search")
+    results = gr.Dataframe(headers=["Name", "Author", "Library", "SDK", "Tags", "Hardware", "Visibility", "Likes", "Runs", "Last Modified", "URL"], label="Results")
+    search_btn.click(
+        fn=search_spaces,
+        inputs=[query, library, license, tags, visibility, hardware, sort_by, direction, limit],
+        outputs=results
+    )
+    gr.Markdown("---")
+    gr.Markdown("Enhanced with tag, hardware & visibility filters. Built on `huggingface_hub` and Gradio Blocks.")
 if __name__ == "__main__":
+    demo.launch(share=True)