Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -12,6 +12,8 @@ pipes = {
|
|
| 12 |
inputs = [
|
| 13 |
gr.Image(type='pil',
|
| 14 |
label="Image"),
|
|
|
|
|
|
|
| 15 |
gr.Radio(choices=[
|
| 16 |
"ViT/B-16",
|
| 17 |
"ViT/L-14",
|
|
@@ -20,8 +22,6 @@ inputs = [
|
|
| 20 |
label="Prompt Template Prompt",
|
| 21 |
placeholder="Optional prompt template as prefix",
|
| 22 |
value="a photo of a {}"),
|
| 23 |
-
gr.Textbox(lines=1,
|
| 24 |
-
label="Candidate Labels", placeholder="Add a class label, one by one",),
|
| 25 |
]
|
| 26 |
images="festival.jpg"
|
| 27 |
|
|
@@ -35,7 +35,7 @@ def shot(image, labels_text, model_name, hypothesis_template):
|
|
| 35 |
iface = gr.Interface(shot,
|
| 36 |
inputs,
|
| 37 |
"label",
|
| 38 |
-
examples=[["festival.jpg", "ViT/B-16", "a photo of a {}"
|
| 39 |
description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
|
| 40 |
Paper: <a href='https://arxiv.org/pdf/2403.02714'>https://arxiv.org/pdf/2403.02714</a> <br>
|
| 41 |
To begin with the demo, provide a picture (either upload manually, or select from the given examples) and add class labels one by one. Optionally, you can also add template as a prefix to the class labels. <br>""",
|
|
|
|
| 12 |
inputs = [
|
| 13 |
gr.Image(type='pil',
|
| 14 |
label="Image"),
|
| 15 |
+
gr.Textbox(lines=1,
|
| 16 |
+
label="Candidate Labels", placeholder="Add a class label, one by one"),
|
| 17 |
gr.Radio(choices=[
|
| 18 |
"ViT/B-16",
|
| 19 |
"ViT/L-14",
|
|
|
|
| 22 |
label="Prompt Template Prompt",
|
| 23 |
placeholder="Optional prompt template as prefix",
|
| 24 |
value="a photo of a {}"),
|
|
|
|
|
|
|
| 25 |
]
|
| 26 |
images="festival.jpg"
|
| 27 |
|
|
|
|
| 35 |
iface = gr.Interface(shot,
|
| 36 |
inputs,
|
| 37 |
"label",
|
| 38 |
+
examples=[["festival.jpg", "lantern, firecracker, couplet", "ViT/B-16", "a photo of a {}"]],
|
| 39 |
description="""<p>Chinese CLIP is a contrastive-learning-based vision-language foundation model pretrained on large-scale Chinese data. For more information, please refer to the paper and official github. Also, Chinese CLIP has already been merged into Huggingface Transformers! <br><br>
|
| 40 |
Paper: <a href='https://arxiv.org/pdf/2403.02714'>https://arxiv.org/pdf/2403.02714</a> <br>
|
| 41 |
To begin with the demo, provide a picture (either upload manually, or select from the given examples) and add class labels one by one. Optionally, you can also add template as a prefix to the class labels. <br>""",
|