Spaces:
Sleeping
Sleeping
Merge pull request #3 from rerun-io/alexander/fix-lerobot-download
Browse files- Dockerfile +6 -0
- app.py +13 -9
- dataset_conversion.py +92 -4
- requirements.txt +2 -0
Dockerfile
CHANGED
|
@@ -3,6 +3,9 @@
|
|
| 3 |
|
| 4 |
FROM python:3.11.8
|
| 5 |
|
|
|
|
|
|
|
|
|
|
| 6 |
# Set up a new user named "user" with user ID 1000
|
| 7 |
RUN useradd -m -u 1000 user
|
| 8 |
|
|
@@ -22,6 +25,9 @@ RUN pip install --no-cache-dir --upgrade pip
|
|
| 22 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
| 23 |
COPY --chown=user . $HOME/app
|
| 24 |
|
|
|
|
|
|
|
|
|
|
| 25 |
# Install requirements.txt
|
| 26 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 27 |
|
|
|
|
| 3 |
|
| 4 |
FROM python:3.11.8
|
| 5 |
|
| 6 |
+
# Dependencies for opencv
|
| 7 |
+
RUN apt update && apt upgrade -y && apt install -y ffmpeg
|
| 8 |
+
|
| 9 |
# Set up a new user named "user" with user ID 1000
|
| 10 |
RUN useradd -m -u 1000 user
|
| 11 |
|
|
|
|
| 25 |
# Copy the current directory contents into the container at $HOME/app setting the owner to the user
|
| 26 |
COPY --chown=user . $HOME/app
|
| 27 |
|
| 28 |
+
# https://github.com/huggingface/lerobot/issues/105
|
| 29 |
+
RUN pip install --no-cache-dir --upgrade cmake
|
| 30 |
+
|
| 31 |
# Install requirements.txt
|
| 32 |
RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
| 33 |
|
app.py
CHANGED
|
@@ -18,8 +18,9 @@ from datasets import load_dataset
|
|
| 18 |
from fastapi import FastAPI
|
| 19 |
from fastapi.middleware.cors import CORSMiddleware
|
| 20 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
|
|
|
| 21 |
|
| 22 |
-
from dataset_conversion import log_dataset_to_rerun
|
| 23 |
|
| 24 |
CUSTOM_PATH = "/"
|
| 25 |
|
|
@@ -50,14 +51,17 @@ def show_dataset(dataset_id: str, episode_index: int) -> str:
|
|
| 50 |
|
| 51 |
rr.save(filename.as_posix())
|
| 52 |
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
|
|
|
|
|
|
|
|
|
| 61 |
|
| 62 |
return filename.as_posix()
|
| 63 |
|
|
|
|
| 18 |
from fastapi import FastAPI
|
| 19 |
from fastapi.middleware.cors import CORSMiddleware
|
| 20 |
from gradio_huggingfacehub_search import HuggingfaceHubSearch
|
| 21 |
+
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
| 22 |
|
| 23 |
+
from dataset_conversion import log_dataset_to_rerun, log_lerobot_dataset_to_rerun
|
| 24 |
|
| 25 |
CUSTOM_PATH = "/"
|
| 26 |
|
|
|
|
| 51 |
|
| 52 |
rr.save(filename.as_posix())
|
| 53 |
|
| 54 |
+
if "/" in dataset_id and dataset_id.split("/")[0] == "lerobot":
|
| 55 |
+
dataset = LeRobotDataset(dataset_id)
|
| 56 |
+
log_lerobot_dataset_to_rerun(dataset, episode_index)
|
| 57 |
+
else:
|
| 58 |
+
dataset = load_dataset(dataset_id, split="train", streaming=True)
|
| 59 |
+
|
| 60 |
+
# This is for LeRobot datasets (https://huggingface.co/lerobot):
|
| 61 |
+
ds_subset = dataset.filter(
|
| 62 |
+
lambda frame: "episode_index" not in frame or frame["episode_index"] == episode_index
|
| 63 |
+
)
|
| 64 |
+
log_dataset_to_rerun(ds_subset)
|
| 65 |
|
| 66 |
return filename.as_posix()
|
| 67 |
|
dataset_conversion.py
CHANGED
|
@@ -1,17 +1,55 @@
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import logging
|
|
|
|
| 4 |
from typing import Any
|
| 5 |
|
|
|
|
| 6 |
import numpy as np
|
| 7 |
import rerun as rr
|
|
|
|
|
|
|
| 8 |
from PIL import Image
|
| 9 |
from tqdm import tqdm
|
| 10 |
|
| 11 |
logger = logging.getLogger(__name__)
|
| 12 |
|
| 13 |
|
| 14 |
-
def
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
"""Do our best to interpret the value and convert it to a Rerun-compatible archetype."""
|
| 16 |
if isinstance(value, Image.Image):
|
| 17 |
if "depth" in column_name:
|
|
@@ -27,22 +65,47 @@ def to_rerun(column_name: str, value: Any) -> Any:
|
|
| 27 |
return rr.TextDocument(str(value)) # Fallback to text
|
| 28 |
elif isinstance(value, float) or isinstance(value, int):
|
| 29 |
return rr.Scalar(value)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
else:
|
| 31 |
return rr.TextDocument(str(value)) # Fallback to text
|
| 32 |
|
| 33 |
|
| 34 |
-
def
|
| 35 |
-
# Special time-like columns for LeRobot datasets (https://huggingface.co/
|
| 36 |
TIME_LIKE = {"index", "frame_id", "timestamp"}
|
| 37 |
|
| 38 |
# Ignore these columns (again, LeRobot-specific):
|
| 39 |
IGNORE = {"episode_data_index_from", "episode_data_index_to", "episode_id"}
|
| 40 |
|
| 41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
# Handle time-like columns first, since they set a state (time is an index in Rerun):
|
| 43 |
for column_name in TIME_LIKE:
|
| 44 |
if column_name in row:
|
| 45 |
cell = row[column_name]
|
|
|
|
|
|
|
| 46 |
if isinstance(cell, int):
|
| 47 |
rr.set_time_sequence(column_name, cell)
|
| 48 |
elif isinstance(cell, float):
|
|
@@ -54,5 +117,30 @@ def log_dataset_to_rerun(dataset: Any) -> None:
|
|
| 54 |
for column_name, cell in row.items():
|
| 55 |
if column_name in TIME_LIKE or column_name in IGNORE:
|
| 56 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
rr.log(column_name, to_rerun(column_name, cell))
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import logging
|
| 4 |
+
from pathlib import PosixPath
|
| 5 |
from typing import Any
|
| 6 |
|
| 7 |
+
import cv2
|
| 8 |
import numpy as np
|
| 9 |
import rerun as rr
|
| 10 |
+
import torch
|
| 11 |
+
from lerobot.common.datasets.lerobot_dataset import LeRobotDataset
|
| 12 |
from PIL import Image
|
| 13 |
from tqdm import tqdm
|
| 14 |
|
| 15 |
logger = logging.getLogger(__name__)
|
| 16 |
|
| 17 |
|
| 18 |
+
def get_frame(
|
| 19 |
+
video_path: PosixPath, timestamp: float, video_cache: dict[PosixPath, tuple[np.ndarray, float]] | None = None
|
| 20 |
+
) -> np.ndarray:
|
| 21 |
+
"""
|
| 22 |
+
Extracts a specific frame from a video.
|
| 23 |
+
|
| 24 |
+
`video_path`: path to the video.
|
| 25 |
+
`timestamp`: timestamp of the wanted frame.
|
| 26 |
+
`video_cache`: cache to prevent reading the same video file twice.
|
| 27 |
+
"""
|
| 28 |
+
|
| 29 |
+
if video_cache is None:
|
| 30 |
+
video_cache = {}
|
| 31 |
+
if video_path not in video_cache:
|
| 32 |
+
cap = cv2.VideoCapture(str(video_path))
|
| 33 |
+
frames = []
|
| 34 |
+
while cap.isOpened():
|
| 35 |
+
success, frame = cap.read()
|
| 36 |
+
if success:
|
| 37 |
+
frames.append(frame)
|
| 38 |
+
else:
|
| 39 |
+
break
|
| 40 |
+
frame_rate = cap.get(cv2.CAP_PROP_FPS)
|
| 41 |
+
video_cache[video_path] = (frames, frame_rate)
|
| 42 |
+
|
| 43 |
+
frames, frame_rate = video_cache[video_path]
|
| 44 |
+
return frames[int(timestamp * frame_rate)]
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def to_rerun(
|
| 48 |
+
column_name: str,
|
| 49 |
+
value: Any,
|
| 50 |
+
video_cache: dict[PosixPath, tuple[np.ndarray, float]] | None = None,
|
| 51 |
+
videos_dir: PosixPath | None = None,
|
| 52 |
+
) -> Any:
|
| 53 |
"""Do our best to interpret the value and convert it to a Rerun-compatible archetype."""
|
| 54 |
if isinstance(value, Image.Image):
|
| 55 |
if "depth" in column_name:
|
|
|
|
| 65 |
return rr.TextDocument(str(value)) # Fallback to text
|
| 66 |
elif isinstance(value, float) or isinstance(value, int):
|
| 67 |
return rr.Scalar(value)
|
| 68 |
+
elif isinstance(value, torch.Tensor):
|
| 69 |
+
if value.dim() == 0:
|
| 70 |
+
return rr.Scalar(value.item())
|
| 71 |
+
elif value.dim() == 1:
|
| 72 |
+
return rr.BarChart(value)
|
| 73 |
+
elif value.dim() == 2 and "depth" in column_name:
|
| 74 |
+
return rr.DepthImage(value)
|
| 75 |
+
elif value.dim() == 2:
|
| 76 |
+
return rr.Image(value)
|
| 77 |
+
elif value.dim() == 3 and (value.shape[2] == 3 or value.shape[2] == 4):
|
| 78 |
+
return rr.Image(value) # Treat it as a RGB or RGBA image
|
| 79 |
+
else:
|
| 80 |
+
return rr.Tensor(value)
|
| 81 |
+
elif isinstance(value, dict) and "path" in value and "timestamp" in value:
|
| 82 |
+
path = (videos_dir or PosixPath("./")) / PosixPath(value["path"])
|
| 83 |
+
timestamp = value["timestamp"]
|
| 84 |
+
return rr.Image(get_frame(path, timestamp, video_cache=video_cache))
|
| 85 |
else:
|
| 86 |
return rr.TextDocument(str(value)) # Fallback to text
|
| 87 |
|
| 88 |
|
| 89 |
+
def log_lerobot_dataset_to_rerun(dataset: LeRobotDataset, episode_index: int) -> None:
|
| 90 |
+
# Special time-like columns for LeRobot datasets (https://huggingface.co/lerobot/):
|
| 91 |
TIME_LIKE = {"index", "frame_id", "timestamp"}
|
| 92 |
|
| 93 |
# Ignore these columns (again, LeRobot-specific):
|
| 94 |
IGNORE = {"episode_data_index_from", "episode_data_index_to", "episode_id"}
|
| 95 |
|
| 96 |
+
hf_ds_subset = dataset.hf_dataset.filter(
|
| 97 |
+
lambda frame: "episode_index" not in frame or frame["episode_index"] == episode_index
|
| 98 |
+
)
|
| 99 |
+
|
| 100 |
+
video_cache: dict[PosixPath, tuple[np.ndarray, float]] = {}
|
| 101 |
+
|
| 102 |
+
for row in tqdm(hf_ds_subset):
|
| 103 |
# Handle time-like columns first, since they set a state (time is an index in Rerun):
|
| 104 |
for column_name in TIME_LIKE:
|
| 105 |
if column_name in row:
|
| 106 |
cell = row[column_name]
|
| 107 |
+
if isinstance(cell, torch.Tensor) and cell.dim() == 0:
|
| 108 |
+
cell = cell.item()
|
| 109 |
if isinstance(cell, int):
|
| 110 |
rr.set_time_sequence(column_name, cell)
|
| 111 |
elif isinstance(cell, float):
|
|
|
|
| 117 |
for column_name, cell in row.items():
|
| 118 |
if column_name in TIME_LIKE or column_name in IGNORE:
|
| 119 |
continue
|
| 120 |
+
else:
|
| 121 |
+
rr.log(
|
| 122 |
+
column_name,
|
| 123 |
+
to_rerun(column_name, cell, video_cache=video_cache, videos_dir=dataset.videos_dir.parent),
|
| 124 |
+
)
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def log_dataset_to_rerun(dataset: Any) -> None:
|
| 128 |
+
TIME_LIKE = {"index", "frame_id", "timestamp"}
|
| 129 |
+
|
| 130 |
+
for row in tqdm(dataset):
|
| 131 |
+
# Handle time-like columns first, since they set a state (time is an index in Rerun):
|
| 132 |
+
for column_name in TIME_LIKE:
|
| 133 |
+
if column_name in row:
|
| 134 |
+
cell = row[column_name]
|
| 135 |
+
if isinstance(cell, int):
|
| 136 |
+
rr.set_time_sequence(column_name, cell)
|
| 137 |
+
elif isinstance(cell, float):
|
| 138 |
+
rr.set_time_seconds(column_name, cell) # assume seconds
|
| 139 |
+
else:
|
| 140 |
+
print(f"Unknown time-like column {column_name} with value {cell}")
|
| 141 |
|
| 142 |
+
# Now log actual data columns:
|
| 143 |
+
for column_name, cell in row.items():
|
| 144 |
+
if column_name in TIME_LIKE:
|
| 145 |
+
continue
|
| 146 |
rr.log(column_name, to_rerun(column_name, cell))
|
requirements.txt
CHANGED
|
@@ -5,4 +5,6 @@ gradio_huggingfacehub_search
|
|
| 5 |
pillow
|
| 6 |
rerun-sdk>=0.15.0,<0.16.0
|
| 7 |
tqdm
|
|
|
|
| 8 |
webdataset
|
|
|
|
|
|
| 5 |
pillow
|
| 6 |
rerun-sdk>=0.15.0,<0.16.0
|
| 7 |
tqdm
|
| 8 |
+
opencv-python
|
| 9 |
webdataset
|
| 10 |
+
git+https://github.com/huggingface/lerobot@7bb5b15f4c0393ba16b73f6482611892301401d7#egg=lerobot
|