Spaces:

AmnaHassan
/

AI-Car-Navigation-Simulator

Sleeping

App Files Files Community

AmnaHassan commited on Oct 22

Commit

ad43496

verified ·

1 Parent(s): 0f85d48

Update app.py

Browse files

Files changed (1) hide show

app.py +227 -102

app.py CHANGED Viewed

@@ -1,23 +1,41 @@
 import numpy as np
 import matplotlib.pyplot as plt
-from mpl_toolkits.mplot3d import Axes3D
-import gradio as gr
 from io import BytesIO
 import base64
-import random
-# ---------- ENVIRONMENT SETUP ----------
 GRID_SIZE = 8
 ACTIONS = ['up', 'down', 'left', 'right']
 class CarEnvironment:
-    def __init__(self):
-        self.reset()
-    def reset(self):
         self.car = (0, 0)
         self.goal = (GRID_SIZE - 1, GRID_SIZE - 1)
-        self.obstacles = [(random.randint(1, GRID_SIZE-2), random.randint(1, GRID_SIZE-2)) for _ in range(10)]
         self.steps = 0
         return self.car
@@ -36,10 +54,10 @@ class CarEnvironment:
         self.steps += 1
         if new_pos in self.obstacles:
-            reward = -5
             done = True
         elif new_pos == self.goal:
-            reward = 10
             done = True
         else:
             reward = -0.1
@@ -49,118 +67,225 @@ class CarEnvironment:
         return new_pos, reward, done
 # ---------- Q-LEARNING ----------
-def q_learning(env, episodes=100):
     q_table = np.zeros((GRID_SIZE, GRID_SIZE, len(ACTIONS)))
-    alpha, gamma, epsilon = 0.5, 0.9, 0.3
-    for _ in range(episodes):
         state = env.reset()
         done = False
-        while not done:
-            if random.uniform(0, 1) < epsilon:
                 action = random.choice(ACTIONS)
             else:
                 action = ACTIONS[np.argmax(q_table[state[0], state[1]])]
             next_state, reward, done = env.step(action)
-            old_value = q_table[state[0], state[1], ACTIONS.index(action)]
-            next_max = np.max(q_table[next_state[0], next_state[1]])
-            q_table[state[0], state[1], ACTIONS.index(action)] = old_value + alpha * (reward + gamma * next_max - old_value)
             state = next_state
-    return q_table
-def simulate_path(env, q_table):
     state = env.reset()
     path = [state]
     done = False
-    while not done and len(path) < 100:
         action = ACTIONS[np.argmax(q_table[state[0], state[1]])]
         next_state, _, done = env.step(action)
         path.append(next_state)
         state = next_state
     return path
-# ---------- VISUALIZATION ----------
-def render_scene(path, obstacles, goal, view="3D"):
-    fig = plt.figure(figsize=(6, 6))
-    if view == "3D":
-        ax = fig.add_subplot(111, projection="3d")
-        X, Y = np.meshgrid(np.arange(GRID_SIZE), np.arange(GRID_SIZE))
-        Z = np.zeros_like(X)
-        ax.plot_surface(X, Y, Z, color='gray', alpha=0.3)
-        # Obstacles
-        for (x, y) in obstacles:
-            ax.bar3d(y, x, 0, 1, 1, 2, color='red', alpha=0.8)
-        # Path
-        for (x, y) in path:
-            ax.bar3d(y, x, 0, 1, 1, 0.3, color='dodgerblue', alpha=0.6)
-        # Car (last position)
-        car_x, car_y = path[-1]
-        ax.bar3d(car_y, car_x, 0, 1, 1, 1, color='yellow', alpha=1.0)
-        # Goal
-        ax.bar3d(goal[1], goal[0], 0, 1, 1, 0.1, color='lime', alpha=1.0)
-        ax.set_xlim(0, GRID_SIZE)
-        ax.set_ylim(0, GRID_SIZE)
-        ax.set_zlim(0, 3)
-        ax.set_title("3D Autonomous Car Navigation", fontsize=14, color="white", pad=20)
-        ax.set_facecolor("#1a1a1a")
-    else:  # 2D View
-        ax = fig.add_subplot(111)
-        ax.set_xlim(0, GRID_SIZE)
-        ax.set_ylim(0, GRID_SIZE)
-        ax.set_xticks(range(GRID_SIZE))
-        ax.set_yticks(range(GRID_SIZE))
-        ax.grid(True, linestyle='--', alpha=0.4)
-        ax.set_facecolor("#121212")
-        for (x, y) in obstacles:
-            ax.add_patch(plt.Rectangle((y, GRID_SIZE-1-x), 1, 1, color="crimson"))
-        for (x, y) in path:
-            ax.add_patch(plt.Rectangle((y, GRID_SIZE-1-x), 1, 1, color="dodgerblue", alpha=0.4))
-        ax.add_patch(plt.Rectangle((goal[1], GRID_SIZE-1-goal[0]), 1, 1, color="lime"))
-        car_x, car_y = path[-1]
-        ax.add_patch(plt.Rectangle((car_y, GRID_SIZE-1-car_x), 1, 1, color="gold"))
-        ax.set_title("2D Autonomous Car Path", color="white", fontsize=14)
-    plt.tight_layout()
     buf = BytesIO()
-    plt.savefig(buf, format="png", bbox_inches="tight", facecolor="#1a1a1a")
     plt.close(fig)
     buf.seek(0)
-    img_data = base64.b64encode(buf.read()).decode("utf-8")
-    return f"<img src='data:image/png;base64,{img_data}' style='border-radius:12px; width:100%;'/>"
-# ---------- SIMULATION FUNCTION ----------
-def run_simulation(view):
-    env = CarEnvironment()
-    q_table = q_learning(env)
-    path = simulate_path(env, q_table)
-    return render_scene(path, env.obstacles, env.goal, view)
-# ---------- GRADIO INTERFACE ----------
-with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="violet")) as demo:
-    gr.Markdown(
-        """
-        <div style='text-align:center; background: linear-gradient(90deg, #0d47a1, #311b92);
-        color:white; padding:20px; border-radius:12px;'>
-        <h1>🚗 AI Car Navigation Simulator</h1>
-        <p>Watch a Reinforcement Learning Agent learn to drive towards its goal while avoiding obstacles — in 2D or 3D!</p>
-        </div>
-        """
-    )
-    view_mode = gr.Radio(["2D", "3D"], value="3D", label="Choose View Mode")
-    run_btn = gr.Button("▶️ Run Simulation", variant="primary")
-    output = gr.HTML(label="Simulation Output")
-    run_btn.click(fn=run_simulation, inputs=view_mode, outputs=output)
 demo.launch()

+import random
 import numpy as np
+import matplotlib
+matplotlib.use("Agg")  # for headless servers like Hugging Face Spaces
 import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D  # noqa: F401
 from io import BytesIO
 import base64
+from PIL import Image
+import gradio as gr
+# ---------- ENVIRONMENT ----------
 GRID_SIZE = 8
 ACTIONS = ['up', 'down', 'left', 'right']
 class CarEnvironment:
+    def __init__(self, obstacles=None, seed=None):
+        self.seed = seed
+        self.reset(obstacles)
+    def reset(self, obstacles=None):
+        if self.seed is not None:
+            random.seed(self.seed)
+            np.random.seed(self.seed)
         self.car = (0, 0)
         self.goal = (GRID_SIZE - 1, GRID_SIZE - 1)
+        # deterministic obstacles if provided, else random but reproducible with seed
+        if obstacles:
+            self.obstacles = obstacles
+        else:
+            # ensure obstacles don't overlap start/goal
+            obs = set()
+            while len(obs) < int(GRID_SIZE * 1.25):
+                x = random.randint(1, GRID_SIZE - 2)
+                y = random.randint(1, GRID_SIZE - 2)
+                if (x, y) not in [(0,0), self.goal]:
+                    obs.add((x,y))
+            self.obstacles = list(obs)
         self.steps = 0
         return self.car
         self.steps += 1
         if new_pos in self.obstacles:
+            reward = -5.0
             done = True
         elif new_pos == self.goal:
+            reward = 10.0
             done = True
         else:
             reward = -0.1
         return new_pos, reward, done
 # ---------- Q-LEARNING ----------
+def q_learning(env, episodes=500, alpha=0.7, gamma=0.95, epsilon=0.1):
     q_table = np.zeros((GRID_SIZE, GRID_SIZE, len(ACTIONS)))
+    rewards_per_episode = []
+    for ep in range(episodes):
         state = env.reset()
+        total = 0.0
         done = False
+        steps = 0
+        while not done and steps < 400:
+            if random.random() < epsilon:
                 action = random.choice(ACTIONS)
             else:
                 action = ACTIONS[np.argmax(q_table[state[0], state[1]])]
             next_state, reward, done = env.step(action)
+            ai = ACTIONS.index(action)
+            old = q_table[state[0], state[1], ai]
+            # Temporal difference update (Q-learning)
+            q_table[state[0], state[1], ai] = old + alpha * (reward + gamma * np.max(q_table[next_state[0], next_state[1]]) - old)
+            total += reward
             state = next_state
+            steps += 1
+        rewards_per_episode.append(total)
+    return q_table, rewards_per_episode
+# ---------- SIMULATION / PATH ----------
+def simulate_path(env, q_table, max_steps=200):
     state = env.reset()
     path = [state]
     done = False
+    steps = 0
+    while not done and steps < max_steps:
         action = ACTIONS[np.argmax(q_table[state[0], state[1]])]
         next_state, _, done = env.step(action)
         path.append(next_state)
         state = next_state
+        steps += 1
     return path
+# ---------- RENDER HELPERS ----------
+def fig_to_pil(fig, facecolor=None):
     buf = BytesIO()
+    fig.savefig(buf, format="png", bbox_inches='tight', facecolor=facecolor)
     plt.close(fig)
     buf.seek(0)
+    return Image.open(buf).convert("RGBA")
+def render_frame_3d(path, obstacles, goal, elev=30, azim=45):
+    fig = plt.figure(figsize=(6,6), facecolor="#111111")
+    ax = fig.add_subplot(111, projection="3d")
+    # floor
+    X, Y = np.meshgrid(np.arange(GRID_SIZE+1), np.arange(GRID_SIZE+1))
+    Z = np.zeros_like(X)
+    ax.plot_surface(X, Y, Z, color='gray', alpha=0.08)
+    # obstacles
+    for (x,y) in obstacles:
+        ax.bar3d(y, x, 0, 1, 1, 1.8, color='red', alpha=0.9)
+    # path bars (lower)
+    for (x,y) in path:
+        ax.bar3d(y, x, 0, 1, 1, 0.25, color='deepskyblue', alpha=0.6)
+    # car (top)
+    car_x, car_y = path[-1]
+    ax.bar3d(car_y, car_x, 0, 1, 1, 0.9, color='gold', alpha=1.0, edgecolor='k')
+    # goal
+    ax.bar3d(goal[1], goal[0], 0, 1, 1, 0.12, color='lime', alpha=1.0)
+    ax.set_xlim( -0.5, GRID_SIZE - 0.5)
+    ax.set_ylim( -0.5, GRID_SIZE - 0.5)
+    ax.set_zlim(0, 3)
+    ax.view_init(elev=elev, azim=azim)
+    ax.set_xticks([])
+    ax.set_yticks([])
+    ax.set_zticks([])
+    ax.set_facecolor("#111111")
+    return fig_to_pil(fig, facecolor="#111111")
+def render_frame_2d(path, obstacles, goal):
+    fig = plt.figure(figsize=(6,6), facecolor="#111111")
+    ax = fig.add_subplot(111)
+    ax.set_xlim(0, GRID_SIZE)
+    ax.set_ylim(0, GRID_SIZE)
+    ax.set_xticks(np.arange(0.5, GRID_SIZE, 1))
+    ax.set_yticks(np.arange(0.5, GRID_SIZE, 1))
+    ax.set_xticklabels([])
+    ax.set_yticklabels([])
+    ax.grid(True, color='#2a2a2a', linestyle='--', linewidth=1)
+    ax.set_facecolor("#0f0f0f")
+    # draw obstacles
+    for (x,y) in obstacles:
+        ax.add_patch(plt.Rectangle((y, GRID_SIZE-1-x), 1, 1, color='crimson'))
+    # draw path
+    for (x,y) in path:
+        ax.add_patch(plt.Rectangle((y, GRID_SIZE-1-x), 1, 1, color='deepskyblue', alpha=0.6))
+    # car
+    car_x, car_y = path[-1]
+    ax.add_patch(plt.Rectangle((car_y, GRID_SIZE-1-car_x), 1, 1, color='gold'))
+    # goal
+    ax.add_patch(plt.Rectangle((goal[1], GRID_SIZE-1-goal[0]), 1, 1, color='lime'))
+    ax.set_title("2D View", color="white")
+    return fig_to_pil(fig, facecolor="#111111")
+def frames_to_gif(frames, duration_ms=300):
+    # frames: list of PIL.Image
+    # duration_ms per frame
+    buf = BytesIO()
+    # convert to P mode for smaller size & better GIF compatibility
+    frames[0].save(buf, format='GIF', save_all=True, append_images=frames[1:],
+                   duration=duration_ms, loop=0, disposal=2, optimize=True)
+    buf.seek(0)
+    return buf.read()
+def img_bytes_to_datauri(img_bytes, mime='image/gif'):
+    return "data:{};base64,{}".format(mime, base64.b64encode(img_bytes).decode('utf-8'))
+def plot_reward_curve(rewards):
+    fig = plt.figure(figsize=(6,3), facecolor="#111111")
+    ax = fig.add_subplot(111)
+    ax.plot(rewards, linewidth=1.5)
+    ax.set_xlabel("Episode", color="white")
+    ax.set_ylabel("Total Reward", color="white")
+    ax.set_facecolor("#111111")
+    ax.tick_params(colors="white")
+    fig.tight_layout()
+    return fig_to_pil(fig, facecolor="#111111")
+# ---------- GRADIO CALLBACKS & STATE ----------
+def train_agent(episodes, alpha, gamma, epsilon, seed):
+    # create reproducible environment for training
+    env = CarEnvironment(seed=seed)
+    q_table, rewards = q_learning(env, episodes=episodes, alpha=alpha, gamma=gamma, epsilon=epsilon)
+    reward_img = plot_reward_curve(rewards)
+    # store q_table and obstacles/goal for later simulation
+    metadata = {
+        "q_table": q_table,
+        "obstacles": env.obstacles.copy(),
+        "goal": env.goal,
+        "seed": seed
+    }
+    # return metadata as state, and reward image as data URI
+    buf = BytesIO()
+    reward_img.save(buf, format="PNG")
+    buf.seek(0)
+    reward_datauri = "data:image/png;base64," + base64.b64encode(buf.read()).decode("utf-8")
+    return metadata, reward_datauri, f"Trained for {episodes} episodes. Reward (last): {round(rewards[-1], 2)}"
+def start_drive(view_mode, speed_ms, rotate_camera, state):
+    # state should contain q_table and map details
+    if not state:
+        return None, "No trained agent found. Please train the agent first."
+    q_table = state["q_table"]
+    obstacles = state["obstacles"]
+    goal = state["goal"]
+    seed = state.get("seed", None)
+    env = CarEnvironment(obstacles=obstacles, seed=seed)
+    path = simulate_path(env, q_table, max_steps=200)
+    # Create frames
+    frames = []
+    # small camera motion parameters
+    base_elev = 30
+    base_azim = 45
+    for i in range(1, len(path)+1):
+        subpath = path[:i]
+        if view_mode == "3D":
+            elev = base_elev + (rotate_camera * (i/len(path)) * 10)
+            azim = base_azim + (rotate_camera * (i/len(path)) * 40)
+            frame = render_frame_3d(subpath, obstacles, goal, elev=elev, azim=azim)
+        else:
+            frame = render_frame_2d(subpath, obstacles, goal)
+        frames.append(frame)
+    # hold on final frame longer
+    if len(frames) >= 1:
+        frames.append(frames[-1])
+    gif_bytes = frames_to_gif(frames, duration_ms=max(50, int(speed_ms)))
+    datauri = img_bytes_to_datauri(gif_bytes, mime='image/gif')
+    info = f"Drive simulated: {len(path)-1} steps. View: {view_mode}. Speed: {speed_ms} ms/frame."
+    return datauri, info
+# ---------- GRADIO LAYOUT ----------
+with gr.Blocks(theme=gr.themes.Soft(primary_hue="violet")) as demo:
+    gr.Markdown("""<div style="text-align:center; padding:18px; border-radius:10px;
+                  background: linear-gradient(90deg,#0d47a1,#4a148c); color:white">
+                  <h2>🚗 AI Car Navigation Lab — Animated 2D / 3D Demo</h2>
+                  <p style="margin:0">Train a tabular Q-learning agent, visualize training, then run an animated drive (GIF)</p>
+                  </div>""")
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### ▶ Training Controls")
+            episodes = gr.Slider(100, 3000, step=100, value=600, label="Training Episodes")
+            alpha = gr.Slider(0.05, 1.0, step=0.05, value=0.7, label="Learning rate α")
+            gamma = gr.Slider(0.1, 0.999, step=0.01, value=0.95, label="Discount factor γ")
+            epsilon = gr.Slider(0.0, 1.0, step=0.05, value=0.15, label="Exploration ε")
+            seed = gr.Number(value=42, label="Random seed (reproducible map)", precision=0)
+            train_btn = gr.Button("🧠 Train Agent", variant="primary")
+            reward_output = gr.Image(label="Reward Curve (training)", interactive=False)
+            train_status = gr.Textbox(label="Training status", interactive=False)
+        with gr.Column(scale=1):
+            gr.Markdown("### ▶ Simulation & Animation")
+            view_mode = gr.Radio(["3D", "2D"], value="3D", label="Visualization Mode")
+            speed_slider = gr.Slider(50, 1000, step=10, value=250, label="Animation Speed (ms per frame)")
+            rotate_cam = gr.Slider(0, 1, step=0.1, value=0.6, label="Subtle camera rotation (3D only)")
+            drive_btn = gr.Button("▶ Start Drive", variant="secondary")
+            gif_output = gr.HTML(label="Animated Drive (GIF)")
+            drive_info = gr.Textbox(label="Simulation info", interactive=False)
+    # hidden state to hold the trained model & environment metadata
+    state = gr.State(value=None)
+    # wire up callbacks
+    train_btn.click(fn=train_agent, inputs=[episodes, alpha, gamma, epsilon, seed],
+                    outputs=[state, reward_output, train_status])
+    drive_btn.click(fn=start_drive, inputs=[view_mode, speed_slider, rotate_cam, state],
+                    outputs=[gif_output, drive_info])
+    # helpful footer
+    gr.Markdown("""
+    **Notes:** The agent is tabular Q-learning. Use the sliders to tune hyperparameters.
+    The animation is a GIF generated on-the-fly; download it from the GIF image if you want a clip.
+    """)
 demo.launch()