File size: 9,886 Bytes
bcec9c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7d19a9
bcec9c2
e7d19a9
 
 
bcec9c2
 
e7d19a9
 
 
 
 
 
 
 
 
 
 
bcec9c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7d19a9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bcec9c2
f68b9ef
 
 
 
 
bcec9c2
 
 
 
 
 
 
 
 
 
 
e7d19a9
bcec9c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e7d19a9
bcec9c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a71ac2d
 
bcec9c2
a71ac2d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
from pathlib import Path

import gradio as gr
import pandas as pd
from gradio_leaderboard import Leaderboard

custom_css = """
.logo {
    width: 300px;
    height: auto;
    max-width: 100%;
    margin: 0 auto;
    object-fit: contain;
    padding-bottom: 0;
}
.text {
    font-size: 16px !important;
}
.tabs button {
    font-size: 20px;
}
.subtabs button {
    font-size: 20px;
}
h1, h2 {
    margin: 0;
    padding-top: 0;
}
"""

# override method to avoid bugg
Leaderboard.raise_error_if_incorrect_config = lambda self: None

abs_path = Path(__file__).parent / "data"

# Load the JSONL file into a pandas DataFrame using the json library
df = pd.read_json(abs_path / "text_to_image.jsonl", lines=True)

# Format URL column, handling None/empty URLs
df["URL"] = df.apply(
    lambda row: f'<a target="_blank" href="{row["URL"]}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">link</a>'
    if pd.notna(row["URL"]) and row["URL"]
    else "",
    axis=1,
)

# Compute aggregate GenEval score if individual GenEval columns exist but "GenEval" doesn't
if "GenEval" not in df.columns:
    # Find all GenEval-related columns (e.g., "GenEval (Single Object) (VQA)")
    geneval_cols = [col for col in df.columns if col.startswith("GenEval")]
    if geneval_cols:
        # Compute mean of all GenEval columns, ignoring NaN values
        df["GenEval"] = df[geneval_cols].mean(axis=1, skipna=True)
        # If all values are NaN, set to NaN
        df["GenEval"] = df["GenEval"].where(df[geneval_cols].notna().any(axis=1), None)

df = df[
    [
        "URL",
        "Platform",
        "Owner",
        "Device",
        "Model",
        "Optimization",
        "Median Inference Time",
        "Price per Image",
    ]
    + [
        col
        for col in df.columns.tolist()
        if col
        not in [
            "URL",
            "Model",
            "Median Inference Time",
            "Price per Image",
            "Platform",
            "Owner",
            "Device",
            "Optimization",
        ]
    ]
]

# Sort by GenEval if it exists, otherwise try other common metrics
sort_column = None
if "GenEval" in df.columns:
    sort_column = "GenEval"
elif "HPS (v2.1)" in df.columns:
    sort_column = "HPS (v2.1)"
elif "GenAI-Bench (VQA)" in df.columns:
    sort_column = "GenAI-Bench (VQA)"
elif len(df.columns) > 0:
    # Sort by first numeric column if available
    numeric_cols = df.select_dtypes(include=[float, int]).columns.tolist()
    if numeric_cols:
        sort_column = numeric_cols[0]

if sort_column:
    df = df.sort_values(by=sort_column, ascending=False, na_position="last")
else:
    # If no sort column found, just keep original order
    pass

# Format numeric columns to have at most 4 decimal places
numeric_cols = df.select_dtypes(include=[float, int]).columns.tolist()
for col in numeric_cols:
    df[col] = df[col].apply(lambda x: round(x, 4) if pd.notna(x) else x)

with gr.Blocks("ParityError/Interstellar", fill_width=True, css=custom_css) as demo:
    gr.HTML(
        """
            <div style="text-align: center;">
                <img src="https://huggingface.co/datasets/PrunaAI/documentation-images/resolve/main/inferbench/logo2-cropped.png" style="width: 200px; height: auto; max-width: 100%; margin: 0 auto;">
                <h1>🏋️ InferBench 🏋️</h1>
                <h2>A cost/quality/speed Leaderboard for Inference Providers!</h2>
            </div>
            """
    )
    with gr.Tabs():
        with gr.TabItem("Text-to-Image Leaderboard [WIP]"):
            Leaderboard(
                value=df,
                select_columns=df.columns.tolist(),
                datatype=[
                    "markdown",
                    "markdown",
                    "markdown",
                    "markdown",
                    "markdown",
                    "markdown",
                ]
                + ["number"] * (len(df.columns.tolist()) - 6),
                filter_columns=[
                    "Platform",
                    "Owner",
                    "Device",
                    "Model",
                    "Optimization",
                ],
            )
            gr.Markdown(
                """
                > **💡 Note:** Each efficiency metric and quality metric captures only one dimension of model capacity. Rankings may vary when considering other metrics. This leaderboard is a work in progress and will be updated regularly. For now, some metrics are not computed on the entire benchmark.
                """
            )
        with gr.TabItem("About"):
            with gr.Row():
                with gr.Column():
                    gr.Markdown(
                        """
                        # 📊 Text-to-Image Leaderboard

                        This leaderboard compares the performance of different text-to-image providers.

                        We started with a comprehensive benchmark comparing our very own FLUX-juiced with the “FLUX.1 [dev]” endpoints offered by:

                        - Replicate: https://replicate.com/black-forest-labs/flux-dev
                        - Fal: https://fal.ai/models/fal-ai/flux/dev
                        - Fireworks AI: https://fireworks.ai/models/fireworks/flux-1-dev-fp8
                        - Together AI: https://www.together.ai/models/flux-1-dev

                        We also included the following non-FLUX providers:

                        - AWS Nova Canvas: https://aws.amazon.com/ai/generative-ai/nova/creative/

                        All of these inference providers offer implementations but they don’t always communicate about the optimisation methods used in the background, and most endpoint have different response times and performance measures.

                        For comparison purposes we used the same generation set-up for all the providers.

                        - 28 inference steps
                        - 1024×1024 resolution
                        - Guidance scale of 3.5
                        - H100 GPU (80GB)—only reported by Replicate

                        Although we did test with this specific Pruna configuration and hardware, the applied compression methods work with different config and hardware too!

                        > We published a full blog post on [the creation of our FLUX-juiced endpoint](https://www.pruna.ai/blog/flux-juiced-the-fastest-image-generation-endpoint).
                        """
                    )
                with gr.Column():
                    gr.Markdown(
                        """
                        # 🧃 FLUX.1-dev (juiced)

                        FLUX.1-dev (juiced) is our optimized version of FLUX.1-dev, delivering up to **2.6x faster inference** than the official Replicate API, **without sacrificing image quality**.

                        Under the hood, it uses a custom combination of:

                        - **Graph compilation** for optimized execution paths
                        - **Inference-time caching** for repeated operations

                        We won’t go deep into the internals here, but here’s the gist:

                        > We combine compiler-level execution graph optimization with selective caching of heavy operations (like attention layers), allowing inference to skip redundant computations without any loss in fidelity.

                        These techniques are generalized and plug-and-play via the **Pruna Pro** pipeline, and can be applied to nearly any diffusion-based image model—not just FLUX. For a free but still very juicy model you can use our open source solution.

                        > 🧪 Try FLUX-juiced now → [replicate.com/prunaai/flux.1-juiced](https://replicate.com/prunaai/flux.1-juiced)

                        ## Sample Images

                        The prompts were randomly sampled from the [parti-prompts dataset](https://github.com/google-research/parti). The reported times represent the full duration of each API call.

                        > **For samples, check out the [Pruna Notion page](https://pruna.notion.site/FLUX-1-dev-vs-Pruna-s-FLUX-juiced-1d270a039e5f80c6a2a3c00fc0d75ef0)**
                        """
                    )

        with gr.Accordion("🌍 Join the Pruna AI community!", open=False):
            gr.HTML(
                """
                    <a rel="nofollow" href="https://twitter.com/PrunaAI"><img alt="Twitter" src="https://img.shields.io/twitter/follow/PrunaAI?style=social"></a>
                    <a rel="nofollow" href="https://github.com/PrunaAI/pruna"><img alt="GitHub" src="https://img.shields.io/github/stars/prunaai/pruna"></a>
                    <a rel="nofollow" href="https://www.linkedin.com/company/93832878/admin/feed/posts/?feedType=following"><img alt="LinkedIn" src="https://img.shields.io/badge/LinkedIn-Connect-blue"></a>
                    <a rel="nofollow" href="https://discord.com/invite/rskEr4BZJx"><img alt="Discord" src="https://img.shields.io/badge/Discord-Join%20Us-blue?style=social&amp;logo=discord"></a>
                    <a rel="nofollow" href="https://www.reddit.com/r/PrunaAI/"><img alt="Reddit" src="https://img.shields.io/reddit/subreddit-subscribers/PrunaAI?style=social"></a>
                """
            )
        with gr.Accordion("Citation", open=True):
            gr.Markdown(
                """
                ```bibtex
                @article{InferBench,
                    title={InferBench: A Leaderboard for Inference Providers},
                    author={PrunaAI},
                    year={2025},
                    howpublished={\\url{https://huggingface.co/spaces/PrunaAI/InferBench}}
                }
                ```
                """
            )


if __name__ == "__main__":
    demo.launch(ssr_mode=False)