Chris4K commited on
Commit
dc72cab
·
verified ·
1 Parent(s): f250035

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +86 -64
app.py CHANGED
@@ -1,76 +1,98 @@
1
  import gradio as gr
2
- from datasets import list_datasets, load_dataset
3
  import pandas as pd
4
- import io
5
 
6
- def list_available_datasets(query: str):
7
- # Hugging Face datasets.list_datasets no longer supports with_community_datasets parameter
8
- all_dsets = list_datasets()
9
- # Filter by query substring
10
- matches = [ds for ds in all_dsets if query.lower() in ds.lower()] if query else all_dsets
11
- return matches[:50] # maximal 50 Ergebnisse
12
 
13
- def explore_dataset(dataset_name: str, split: str, num_examples: int):
14
- try:
15
- ds = load_dataset(dataset_name, split=split)
16
- except Exception as e:
17
- return {}, pd.DataFrame(), {"Anzahl Samples": 0, "Fehler": str(e)}
18
- # Schema: column name to feature type
19
- schema = {col: str(ds.features[col]) for col in ds.column_names}
20
- # Examples DataFrame
21
- examples = ds.select(range(min(len(ds), num_examples))).to_pandas()
22
- # Statistics: total samples and column types
23
- stats = {"Anzahl Samples": len(ds)}
24
- stats.update({col: str(ds.features[col]) for col in ds.column_names})
25
- return schema, examples, stats
26
 
27
- def export_column(dataset_name: str, split: str, column: str):
28
- try:
29
- ds = load_dataset(dataset_name, split=split)
30
- except Exception as e:
31
- return "Fehler beim Laden des Datasets.", ""
32
- if column not in ds.column_names:
33
- return "Spalte nicht gefunden.", ""
34
- df = ds[column].to_pandas()
35
- buffer = io.StringIO()
36
- df.to_csv(buffer, index=False)
37
- csv_text = buffer.getvalue()
38
- return f"CSV für Spalte '{column}' erzeugt.", csv_text
39
 
40
- with gr.Blocks() as demo:
41
- gr.Markdown("## 📊 DataScout – Hugging Face Dataset Explorer")
42
- with gr.Row():
43
- query = gr.Textbox(label="Dataset suchen", placeholder="z.B. imdb")
44
- search_btn = gr.Button("🔍 Suchen")
45
- results = gr.Dropdown(label="Gefundene Datasets", choices=[], interactive=True)
46
 
47
- split = gr.Dropdown(label="Split wählen", choices=["train", "test", "validation"], value="train")
48
- num_examples = gr.Slider(label="Anzahl Beispiele", minimum=1, maximum=20, value=5, step=1)
49
- explore_btn = gr.Button("👁️ Dataset erkunden")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
- schema_out = gr.JSON(label="Schema")
52
- examples_out = gr.Dataframe(label="Beispiele")
53
- stats_out = gr.JSON(label="Statistiken")
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
- col_dropdown = gr.Dropdown(label="Spalte für CSV-Export", choices=[], interactive=True)
56
- export_btn = gr.Button("📥 CSV erzeugen")
57
- export_msg = gr.Textbox(label="Status")
58
- export_csv = gr.TextArea(label="CSV-Ausgabe", lines=10)
 
59
 
60
- # Events
61
- search_btn.click(fn=list_available_datasets, inputs=query, outputs=results)
62
- explore_btn.click(fn=explore_dataset, inputs=[results, split, num_examples], outputs=[schema_out, examples_out, stats_out])
63
- # Sicheres Laden der Spaltenliste
64
- def get_columns(name):
65
- if not name:
66
- return []
67
- try:
68
- ds = load_dataset(name, split="train")
69
- return ds.column_names
70
- except Exception:
71
- return []
72
- results.change(fn=get_columns, inputs=results, outputs=col_dropdown)
73
- export_btn.click(fn=export_column, inputs=[results, split, col_dropdown], outputs=[export_msg, export_csv])
74
 
75
  if __name__ == "__main__":
76
- demo.launch()
 
1
  import gradio as gr
 
2
  import pandas as pd
3
+ from huggingface_hub import HfApi, SpaceSort, SpaceLibraries, SpaceHardware
4
 
5
+ api = HfApi()
 
 
 
 
 
6
 
7
+ # Mapping user-friendly labels to API values
8
+ LIBRARY_OPTIONS = ["All", "Gradio", "Streamlit", "Flask", "FastAPI", "Transformers", "Diffusers"]
9
+ LICENSE_OPTIONS = ["All", "Apache-2.0", "MIT", "BSD-3-Clause", "GPL-3.0"]
10
+ HARDWARE_OPTIONS = ["All", "CPU", "GPU"]
11
+ VISIBILITY_OPTIONS = ["All", "Public", "Private"]
 
 
 
 
 
 
 
 
12
 
13
+ SORT_OPTIONS = {
14
+ "Last Modified": SpaceSort.LAST_MODIFIED,
15
+ "First Indexed": SpaceSort.FIRST_INDEXED,
16
+ "Likes": SpaceSort.LIKES,
17
+ "Runs": SpaceSort.RUNS
18
+ }
19
+ DIRECTION_OPTIONS = ["Descending", "Ascending"]
 
 
 
 
 
20
 
 
 
 
 
 
 
21
 
22
+ def search_spaces(query, library, license, tags, visibility, hardware, sort_by, direction, limit):
23
+ # Prepare filters
24
+ lib_filter = None if library == "All" else getattr(SpaceLibraries, library.upper(), None)
25
+ license_filter = None if license == "All" else license
26
+ hardware_filter = None
27
+ if hardware == "CPU":
28
+ hardware_filter = SpaceHardware.CPU
29
+ elif hardware == "GPU":
30
+ hardware_filter = SpaceHardware.GPU
31
+ vis_filter = None
32
+ if visibility == "Public":
33
+ vis_filter = False # private=False
34
+ elif visibility == "Private":
35
+ vis_filter = True
36
+ # Tags: comma-separated
37
+ tag_list = [t.strip() for t in tags.split(",")] if tags else None
38
+ sort_value = SORT_OPTIONS.get(sort_by, SpaceSort.LAST_MODIFIED)
39
+ reverse = False if direction == "Descending" else True
40
+
41
+ spaces = api.list_spaces(
42
+ search=query or None,
43
+ library_filter=lib_filter,
44
+ license=license_filter,
45
+ hardware=hardware_filter,
46
+ private=vis_filter,
47
+ sort=sort_value,
48
+ direction="asc" if reverse else "desc",
49
+ limit=limit,
50
+ task=tag_list # filters by tags/tasks
51
+ )
52
+
53
+ # Build list of dicts
54
+ data = []
55
+ for s in spaces:
56
+ data.append({
57
+ "Name": s.id,
58
+ "Author": s.author,
59
+ "Library": s.spaceType,
60
+ "SDK": s.sdk,
61
+ "Tags": ", ".join(s.tags) if hasattr(s, 'tags') else "",
62
+ "Hardware": s.hardware if hasattr(s, 'hardware') else "",
63
+ "Visibility": "Private" if s.private else "Public",
64
+ "Likes": s.likes,
65
+ "Runs": s.runs,
66
+ "Last Modified": s.lastModified,
67
+ "URL": f"https://huggingface.co/spaces/{s.id}"
68
+ })
69
+ return data
70
 
71
+ with gr.Blocks() as demo:
72
+ gr.Markdown("# 🔍 HF Spaces Explorer with Advanced Filters")
73
+ with gr.Row():
74
+ query = gr.Textbox(label="Search Query", placeholder="Enter keywords...")
75
+ library = gr.Dropdown(LIBRARY_OPTIONS, label="Library", value="All")
76
+ license = gr.Dropdown(LICENSE_OPTIONS, label="License", value="All")
77
+ with gr.Row():
78
+ tags = gr.Textbox(label="Tags (comma-separated)", placeholder="e.g. text-generation, image-classification")
79
+ visibility = gr.Dropdown(VISIBILITY_OPTIONS, label="Visibility", value="All")
80
+ hardware = gr.Dropdown(HARDWARE_OPTIONS, label="Hardware", value="All")
81
+ with gr.Row():
82
+ sort_by = gr.Dropdown(list(SORT_OPTIONS.keys()), label="Sort By", value="Last Modified")
83
+ direction = gr.Radio(DIRECTION_OPTIONS, label="Direction", value="Descending")
84
+ limit = gr.Slider(1, 100, label="Max Results", value=20, step=1)
85
+ search_btn = gr.Button("🔎 Search")
86
+ results = gr.Dataframe(headers=["Name", "Author", "Library", "SDK", "Tags", "Hardware", "Visibility", "Likes", "Runs", "Last Modified", "URL"], label="Results")
87
 
88
+ search_btn.click(
89
+ fn=search_spaces,
90
+ inputs=[query, library, license, tags, visibility, hardware, sort_by, direction, limit],
91
+ outputs=results
92
+ )
93
 
94
+ gr.Markdown("---")
95
+ gr.Markdown("Enhanced with tag, hardware & visibility filters. Built on `huggingface_hub` and Gradio Blocks.")
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
  if __name__ == "__main__":
98
+ demo.launch(share=True)