diff --git a/04-spiral_classification.ipynb b/04-spiral_classification.ipynb
index bce1314..09abfad 100644
--- a/04-spiral_classification.ipynb
+++ b/04-spiral_classification.ipynb
@@ -15,7 +15,7 @@
    "source": [
     "import torch\n",
     "from torch import nn, optim\n",
-    "from math import pi as π"
+    "from math import pi as π  # convenient constant for angles"
    ]
   },
   {
@@ -33,7 +33,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "set_default()"
+    "set_default()  # apply plotting style defaults from res.plot_lib"
    ]
   },
   {
@@ -42,7 +42,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
+    "# Use GPU if available for faster full-batch training\n",
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda\")\n",
+    "elif torch.backends.mps.is_available():\n",
+    "    device = torch.device(\"mps\")\n",
+    "else:\n",
+    "    device = torch.device(\"cpu\")\n",
+    "\n",
+    "print(f\"Using device: {device}\")"
    ]
   },
   {
@@ -59,11 +67,12 @@
    "outputs": [],
    "source": [
     "seed = 12345\n",
+    "# Fix RNG for reproducibility across data and training\n",
     "torch.manual_seed(seed)\n",
     "N = 1000  # num_samples_per_class\n",
     "n = 2     # input dimensions\n",
     "K = 5     # num_classes\n",
-    "d = 100   # num_hidden_units"
+    "d = 100   # num_hidden_units\n"
    ]
   },
   {
@@ -74,8 +83,9 @@
    "source": [
     "# Generate spirals\n",
     "\n",
+    "# Radii grow linearly with t to spread points; small noise on angle for class overlap\n",
     "t = torch.linspace(0, 1, N)\n",
-    "a = 0.8 * t + 0.2  # amplitude 0.2 → 1.0\n",
+    "a = 0.8 * t + 0.2  # amplitude 0.2 -> 1.0\n",
     "X = list()\n",
     "y = list()\n",
     "for k in range(K):\n",
@@ -85,9 +95,12 @@
     "X = torch.cat(X)\n",
     "y = torch.cat(y)\n",
     "\n",
+    "# Keep CPU copies for plotting, send copies to device for training\n",
+    "X_dev, y_dev = X.to(device), y.to(device)\n",
+    "\n",
     "print(\"Shapes:\")\n",
     "print(\"X:\", tuple(X.size()))\n",
-    "print(\"y:\", tuple(y.size()))"
+    "print(\"y:\", tuple(y.size()))\n"
    ]
   },
   {
@@ -97,7 +110,7 @@
    "outputs": [],
    "source": [
     "# And visualise them\n",
-    "plot_data(X, y)"
+    "plot_data(X, y)  # colors correspond to class labels"
    ]
   },
   {
@@ -113,8 +126,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "learning_rate = 1e-3\n",
-    "lambda_l2 = 1e-5"
+    "learning_rate = 1e-3   # stable default for Adam\n",
+    "lambda_l2 = 1e-5       # small weight decay to regularize"
    ]
   },
   {
@@ -124,11 +137,12 @@
    "outputs": [],
    "source": [
     "# Model definition\n",
+    "# Toggle ReLU to compare linear vs non-linear decision boundaries; optional 2D bottleneck for visualization\n",
     "model = nn.Sequential(\n",
     "    nn.Linear(n, d),\n",
     "    # nn.ReLU(),  # Comment this line for a linear model\n",
     "    nn.Linear(d, K)  # (Optional) Comment this line and uncomment the next one to display 2D embeddings below\n",
-    "    # nn.Linear(d, 2), nn.Linear(2, K)\n",
+    "    # nn.Linear(d, 2), nn.Linear(2, K),\n",
     ")\n",
     "model.to(device)  # possibly send to CUDA\n",
     "\n",
@@ -138,28 +152,29 @@
     "# Using Adam optimiser\n",
     "optimiser = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=lambda_l2) # built-in L2\n",
     "\n",
-    "# Full-batch training loop\n",
+    "# Full-batch training loop (not mini-batch; deterministic given the fixed seed)\n",
     "for t in range(2_000):\n",
-    "    \n",
     "    # Feed forward to get the linear sum s\n",
-    "    s = model(X)\n",
-    "    \n",
+    "    s = model(X_dev)\n",
+    "\n",
     "    # Compute the free energy F and loss L\n",
-    "    F = C(s, y)\n",
+    "    F = C(s, y_dev)\n",
     "    L = F.mean()\n",
-    "    \n",
+    "\n",
     "    # Zero the gradients\n",
     "    optimiser.zero_grad()\n",
-    "    \n",
-    "    # Backward pass to compute and accumulate the gradient\n",
-    "    # of the free energy w.r.t our learnable params\n",
+    "\n",
+    "    # Backward pass to compute and accumulate the gradient of the free energy w.r.t params\n",
     "    L.backward()\n",
-    "    \n",
+    "\n",
     "    # Update params\n",
     "    optimiser.step()\n",
-    "    \n",
-    "    # Display epoch, L, and accuracy\n",
-    "    overwrite(f'[EPOCH]: {t}, [LOSS]: {L.item():.6f}, [ACCURACY]: {acc(s, y):.3f}')"
+    "\n",
+    "    # Display epoch, L, and accuracy (uses on-device tensors)\n",
+    "    overwrite(f'[EPOCH]: {t}, [LOSS]: {L.item():.6f}, [ACCURACY]: {acc(s, y_dev):.3f}')\n",
+    "\n",
+    "# Move model back to CPU for downstream plotting utilities\n",
+    "model_cpu = model.to('cpu')\n"
    ]
   },
   {
@@ -169,8 +184,8 @@
    "outputs": [],
    "source": [
     "# Plot trained model\n",
-    "print(model)\n",
-    "plot_model(X, y, model)"
+    "print(model_cpu)\n",
+    "plot_model(X, y, model_cpu)\n"
    ]
   },
   {
@@ -180,7 +195,7 @@
    "outputs": [],
    "source": [
     "# (Optional) Plot internal 2D embeddings if available\n",
-    "plot_embeddings(X, y, model, zoom=10)"
+    "plot_embeddings(X, y, model_cpu, zoom=10)\n"
    ]
   },
   {
@@ -191,11 +206,12 @@
    "source": [
     "# Compute linear output s for a fine grid over the input space\n",
     "\n",
-    "mesh = torch.arange(-1.5, 1.5, 0.01)\n",
+    "mesh = torch.arange(-1.5, 1.5, 0.01)  # step balances resolution vs compute\n",
     "xx, yy = torch.meshgrid(mesh, mesh, indexing='ij')\n",
     "grid = torch.stack((xx.reshape(-1), yy.reshape(-1)), dim=1)\n",
     "with torch.no_grad():\n",
-    "    s = model(grid)"
+    "    s = model_cpu(grid)\n",
+    "    s = s.detach().cpu()  # keep on CPU for plotting\n"
    ]
   },
   {
@@ -204,8 +220,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Choice of free energy\n",
-    "\n",
+    "# Choice of free energy (toggle to inspect different energy landscapes)\n",
     "fe = 'cross-entropy'\n",
     "# fe = 'negative linear output'"
    ]
@@ -216,7 +231,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Switch to non-interactive matplotlib\n",
+    "# Switch to non-interactive matplotlib (inline) for static plots\n",
     "%matplotlib inline\n",
     "set_default()"
    ]
@@ -240,7 +255,8 @@
     "\n",
     "for k in range(K):\n",
     "    if fe == 'cross-entropy':\n",
-    "        F = C(s, torch.LongTensor(1).fill_(k).expand(s.size(0)))\n",
+    "        target = torch.full((s.size(0),), k, dtype=torch.long, device='cpu')  # CPU for plotting\n",
+    "        F = C(s, target)\n",
     "        F = F.reshape(xx.shape)\n",
     "        plot_2d_energy_levels(X, y, (xx, yy, F, k, K), (0, 35), (1, 35, 4))\n",
     "\n",
@@ -277,7 +293,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Cross-entropy\n",
+    "# Cross-entropy (uses the last computed F from loop above)\n",
     "if fe == 'cross-entropy':\n",
     "    fig, ax = plot_3d_energy_levels(X, y, (xx, yy, F, k, K), (0, 18), (0, 19, 1), (0, 19, 2))\n",
     "elif fe == 'negative linear output':\n",
@@ -320,7 +336,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -334,7 +350,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.12"
+   "version": "3.9.12"
   }
  },
  "nbformat": 4,
diff --git a/10-autoencoder.ipynb b/10-autoencoder.ipynb
index 597de24..e1cfcfd 100644
--- a/10-autoencoder.ipynb
+++ b/10-autoencoder.ipynb
@@ -39,23 +39,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Displaying routine\n",
+    "# Displaying routine (handles optional inputs and detaches tensors before plotting)\n",
     "\n",
     "def display_images(in_, out, n=1):\n",
     "    for N in range(n):\n",
     "        if in_ is not None:\n",
-    "            in_pic = to_img(in_.cpu().data)\n",
+    "            in_pic = to_img(in_.detach().cpu())\n",
     "            plt.figure(figsize=(18, 6))\n",
     "            for i in range(4):\n",
     "                plt.subplot(1,4,i+1)\n",
     "                plt.imshow(in_pic[i+4*N])\n",
     "                plt.axis('off')\n",
-    "        out_pic = to_img(out.cpu().data)\n",
+    "        out_pic = to_img(out.detach().cpu())\n",
     "        plt.figure(figsize=(18, 6))\n",
     "        for i in range(4):\n",
     "            plt.subplot(1,4,i+1)\n",
     "            plt.imshow(out_pic[i+4*N])\n",
-    "            plt.axis('off')"
+    "            plt.axis('off')\n"
    ]
   },
   {
@@ -65,7 +65,7 @@
    "outputs": [],
    "source": [
     "# Define data loading step\n",
-    "\n",
+    "# Normalize to roughly (-1, 1) so Tanh decoder output matches input scale\n",
     "batch_size = 256\n",
     "\n",
     "img_transform = transforms.Compose([\n",
@@ -74,7 +74,7 @@
     "])\n",
     "\n",
     "dataset = MNIST('./data', transform=img_transform, download=True)\n",
-    "dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)"
+    "dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)\n"
    ]
   },
   {
@@ -83,7 +83,14 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda\")\n",
+    "elif torch.backends.mps.is_available():\n",
+    "    device = torch.device(\"mps\")\n",
+    "else:\n",
+    "    device = torch.device(\"cpu\")\n",
+    "\n",
+    "print(f\"Using device: {device}\")"
    ]
   },
   {
@@ -101,6 +108,7 @@
     "class Autoencoder(nn.Module):\n",
     "    def __init__(self):\n",
     "        super().__init__()\n",
+    "        # Single hidden layer MLP with Tanh bottleneck\n",
     "        self.encoder = nn.Sequential(\n",
     "            nn.Linear(28 * 28, d),\n",
     "            nn.Tanh(),\n",
@@ -116,7 +124,7 @@
     "        return ỹ\n",
     "    \n",
     "ae = Autoencoder().to(device)\n",
-    "criterion = nn.MSELoss()"
+    "criterion = nn.MSELoss()\n"
    ]
   },
   {
@@ -163,14 +171,15 @@
     "        # img_bad = (img * noise).to(device)  # comment out for standard AE\n",
     "        # ===================forward=====================\n",
     "        output = ae(img)  # feed <img> (for std AE) or <img_bad> (for denoising AE)\n",
-    "        loss = criterion(output, img.data)\n",
+    "        loss = criterion(output, img.detach())  # detach target to avoid grad through input\n",
     "        # ===================backward====================\n",
     "        optimizer.zero_grad()\n",
     "        loss.backward()\n",
     "        optimizer.step()\n",
     "    # ===================log========================\n",
     "    print(f'epoch [{epoch + 1}/{num_epochs}], loss:{loss.item():.4f}')\n",
-    "    display_images(None, output)  # pass (None, output) for std AE, (img_bad, output) for denoising AE"
+    "    # visualize clean vs reconstructed; if using denoising, pass (img_bad, output)\n",
+    "    display_images(None, output)\n"
    ]
   },
   {
@@ -257,18 +266,19 @@
     "        img, _ = data\n",
     "        img = img.to(device)\n",
     "        img = img.view(img.size(0), -1)\n",
+    "        # multiplicative dropout mask to create corrupted input\n",
     "        noise = do(torch.ones(img.shape)).to(device)\n",
     "        img_bad = (img * noise).to(device)  # comment out for standard AE\n",
     "        # ===================forward=====================\n",
     "        output = model(img_bad)  # feed <img> (for std AE) or <img_bad> (for denoising AE)\n",
-    "        loss = criterion(output, img.data)\n",
+    "        loss = criterion(output, img.detach())  # target stays the clean image\n",
     "        # ===================backward====================\n",
     "        optimizer.zero_grad()\n",
     "        loss.backward()\n",
     "        optimizer.step()\n",
     "    # ===================log========================\n",
     "    print(f'epoch [{epoch + 1}/{num_epochs}], loss:{loss.item():.4f}')\n",
-    "    display_images(img_bad, output)  # pass (None, output) for std AE, (img_bad, output) for denoising AE"
+    "    display_images(img_bad, output)  # pass (None, output) for std AE, (img_bad, output) for denoising AE\n"
    ]
   },
   {
@@ -306,7 +316,9 @@
     "dst_NS = list()\n",
     "\n",
     "for i in range(0, 5):\n",
-    "    corrupted_img = ((img_bad.data.cpu()[i].view(28, 28) / 4 + 0.5) * 255).byte().numpy()\n",
+    "    # OpenCV expects uint8 images; convert normalized tensor back to [0,255]\n",
+    "    corrupted_img = ((img_bad.detach().cpu()[i].view(28, 28) / 4 + 0.5) * 255).byte().numpy()\n",
+    "    # Mask marks missing pixels (0 keeps, 1/2 are holes); derived from dropout noise\n",
     "    mask = 2 - noise.cpu()[i].view(28, 28).byte().numpy()\n",
     "    dst_TELEA.append(inpaint(corrupted_img, mask, 3, INPAINT_TELEA))\n",
     "    dst_NS.append(inpaint(corrupted_img, mask, 3, INPAINT_NS))\n",
@@ -315,7 +327,7 @@
     "tns_NS = [torch.from_numpy(d) for d in dst_NS]\n",
     "\n",
     "TELEA = torch.stack(tns_TELEA).float()\n",
-    "NS = torch.stack(tns_NS).float()"
+    "NS = torch.stack(tns_NS).float()\n"
    ]
   },
   {
@@ -376,18 +388,19 @@
     "N = 16\n",
     "samples = torch.Tensor(N, 28 * 28).to(device)\n",
     "for i in range(N):\n",
-    "    samples[i] = i / (N - 1) * img[B].data + (1 - i / (N - 1) ) * img[A].data\n",
+    "    # linear blend between digit A and B to probe latent continuity\n",
+    "    samples[i] = i / (N - 1) * img[B].detach() + (1 - i / (N - 1)) * img[A].detach()\n",
     "with torch.no_grad():\n",
     "    reconstructions = model(samples)[0]\n",
     "\n",
     "plt.title(f'{A = }, {B = }')\n",
-    "plt.plot(samples.sub(reconstructions).pow(2).sum(dim=(1)), '-o')"
+    "plt.plot(samples.sub(reconstructions).pow(2).sum(dim=(1)), '-o')\n"
    ]
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -401,7 +414,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.13"
+   "version": "3.9.12"
   }
  },
  "nbformat": 4,
diff --git a/11-VAE.ipynb b/11-VAE.ipynb
index 6ce9aa1..f53e9f8 100644
--- a/11-VAE.ipynb
+++ b/11-VAE.ipynb
@@ -25,16 +25,17 @@
     "# Displaying routine\n",
     "\n",
     "def display_images(in_, out, n=1, label='', count=False, energy=None):\n",
+    "    \"\"\"Display MNIST inputs/outputs; can annotate indices and optional energy.\"\"\"\n",
     "    for N in range(n):\n",
     "        if in_ is not None:\n",
-    "            in_pic = in_.data.cpu().view(-1, 28, 28)\n",
+    "            in_pic = in_.detach().cpu().view(-1, 28, 28)\n",
     "            plt.figure(figsize=(18, 4))\n",
     "            plt.suptitle(label + ' – real test data / reconstructions', color='w', fontsize=16)\n",
     "            for i in range(4):\n",
     "                plt.subplot(1,4,i+1)\n",
     "                plt.imshow(in_pic[i+4*N])\n",
     "                plt.axis('off')\n",
-    "        out_pic = out.data.cpu().view(-1, 28, 28)\n",
+    "        out_pic = out.detach().cpu().view(-1, 28, 28)\n",
     "        plt.figure(figsize=(18, 6))\n",
     "        for i in range(4):\n",
     "            plt.subplot(1,4,i+1)\n",
@@ -42,7 +43,7 @@
     "            plt.axis('off')\n",
     "            c = 4 * N + i\n",
     "            if count: plt.title(str(c), color='w')\n",
-    "            if count and energy is not None: plt.title(f'{c}, e={energy[c].item():.2f}', color='w')\n"
+    "            if count and energy is not None: plt.title(f'{c}, e={energy[c].item():.2f}', color='w')"
    ]
   },
   {
@@ -85,7 +86,14 @@
    "source": [
     "# Defining the device\n",
     "\n",
-    "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")"
+    "if torch.cuda.is_available():\n",
+    "    device = torch.device(\"cuda\")\n",
+    "elif torch.backends.mps.is_available():\n",
+    "    device = torch.device(\"mps\")\n",
+    "else:\n",
+    "    device = torch.device(\"cpu\")\n",
+    "\n",
+    "print(f\"Using device: {device}\")"
    ]
   },
   {
@@ -117,9 +125,9 @@
     "\n",
     "    def reparameterise(self, mu, logvar):\n",
     "        if self.training:\n",
-    "            std = logvar.mul(0.5).exp_()\n",
-    "            eps = std.data.new(std.size()).normal_()\n",
-    "            return eps.mul(std).add_(mu)\n",
+    "            std = (0.5 * logvar).exp()\n",
+    "            eps = torch.randn_like(std)\n",
+    "            return eps * std + mu\n",
     "        else:\n",
     "            return mu\n",
     "\n",
@@ -254,16 +262,16 @@
     "# Choose starting and ending point for the interpolation -> shows original and reconstructed\n",
     "\n",
     "A, B = 0, 6\n",
-    "sample = model.decoder(torch.stack((mu[A].data, mu[B].data), 0))\n",
+    "sample = model.decoder(torch.stack((mu[A].detach(), mu[B].detach()), 0))\n",
     "display_images(None, torch.stack(((\n",
-    "    y[A].data.view(-1),\n",
-    "    y[B].data.view(-1),\n",
-    "    sample.data[0],\n",
-    "    sample.data[1],\n",
-    "    sample.data[0],\n",
-    "    sample.data[1],\n",
-    "    y[A].data.view(-1) - sample.data[0],\n",
-    "    y[B].data.view(-1) - sample.data[1]\n",
+    "    y[A].detach().view(-1),\n",
+    "    y[B].detach().view(-1),\n",
+    "    sample.detach()[0],\n",
+    "    sample.detach()[1],\n",
+    "    sample.detach()[0],\n",
+    "    sample.detach()[1],\n",
+    "    y[A].detach().view(-1) - sample.detach()[0],\n",
+    "    y[B].detach().view(-1) - sample.detach()[1]\n",
     ")), 0), 2)"
    ]
   },
@@ -280,7 +288,7 @@
     "samples = torch.Tensor(N, 28, 28).to(device)\n",
     "for i in range(N):\n",
     "    # code[i] = i / (N - 1) * mu[B].data + (1 - i / (N - 1) ) * mu[A].data\n",
-    "    samples[i] = i / (N - 1) * y[B].data + (1 - i / (N - 1) ) * y[A].data\n",
+    "    samples[i] = i / (N - 1) * y[B].detach() + (1 - i / (N - 1) ) * y[A].detach()\n",
     "# samples = model.decoder(code)\n",
     "display_images(None, samples, N // 4, count=True)"
    ]
@@ -310,7 +318,7 @@
     "N = 16\n",
     "samples = torch.Tensor(N, 28, 28).to(device)\n",
     "for i in range(N):\n",
-    "    samples[i] = i / (N - 1) * y[B].data + (1 - i / (N - 1) ) * y[A].data\n",
+    "    samples[i] = i / (N - 1) * y[B].detach() + (1 - i / (N - 1) ) * y[A].detach()\n",
     "with torch.no_grad():\n",
     "    reconstructions = model(samples)[0].view(-1, 28, 28)\n",
     "\n",
@@ -377,7 +385,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3 (ipykernel)",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -391,7 +399,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.13.2"
+   "version": "3.9.12"
   }
  },
  "nbformat": 4,
diff --git a/17-optimal_control.ipynb b/17-optimal_control.ipynb
index 6bb9bf6..c5f7950 100644
--- a/17-optimal_control.ipynb
+++ b/17-optimal_control.ipynb
@@ -27,8 +27,10 @@
     "plt.rc('savefig', bbox='tight')\n",
     "plt.rc('axes', labelsize=36)\n",
     "plt.rc('legend', fontsize=24)\n",
-    "plt.rc('text', usetex=True)\n",
-    "plt.rcParams['text.latex.preamble'] = [r'\\usepackage{bm}']\n",
+    "USE_LATEX = False # Toggle LaTeX rendering here; leave False if LaTeX packages (e.g., type1ec.sty) are missing on your system.\n",
+    "plt.rc('text', usetex=USE_LATEX)\n",
+    "if USE_LATEX:\n",
+    "    plt.rcParams['text.latex.preamble'] = r'\\\\usepackage{bm}\\\\usepackage{xcolor}'  # string form to satisfy matplotlib validator\n",
     "plt.rc('lines', markersize=10)"
    ]
   },
@@ -177,8 +179,8 @@
     "\n",
     "plt.figure(figsize=(6, 2))\n",
     "plt.title('Control signal')\n",
-    "plt.stem(np.arange(10)+0.9, u[:,0], 'C1', markerfmt='C1o', use_line_collection=True, basefmt='none')\n",
-    "plt.stem(np.arange(10)+1.1, u[:,1], 'C2', markerfmt='C2o', use_line_collection=True, basefmt='none')\n",
+    "plt.stem(np.arange(10)+0.9, u[:,0], 'C1', markerfmt='C1o', basefmt='none') # Removed the deprecated use_line_collection=True argument\n",
+    "plt.stem(np.arange(10)+1.1, u[:,1], 'C2', markerfmt='C2o', basefmt='none')\n",
     "plt.ylim((-0.5, 0.5))\n",
     "plt.xticks(np.arange(12))\n",
     "plt.xlabel('discrete time index', fontsize=12)\n",
@@ -227,14 +229,15 @@
     "def path_planning_with_cost(x_x, x_y, s, T, epochs, stepsize, cost_f, ax=None, ax_lims=None, debug=False):\n",
     "    \"\"\"\n",
     "    Path planning for tricycle\n",
-    "    x_x: x component of postion vector\n",
-    "    x_y: y component of postion vector\n",
+    "    x_x: x component of position vector\n",
+    "    x_y: y component of position vector\n",
     "    s: initial speed\n",
     "    T: time steps\n",
     "    epochs: number of epochs for back propagation\n",
     "    stepsize: stepsize for back propagation\n",
-    "    cost_f: cost funciton that takes the trajectory and the tuple (x, y) - target.\n",
+    "    cost_f: cost function that takes the trajectory and the tuple (x, y) - target.\n",
     "    ax: axis to plot the trajectory\n",
+    "    Returns: list of per-epoch costs\n",
     "    \"\"\"\n",
     "    ax = ax or plt.gca()\n",
     "    plt.plot(0, 0, 'gx', markersize=20, markeredgewidth=5)\n",
@@ -260,7 +263,8 @@
     "        if epoch == 0: \n",
     "            plot_τ(ax, τ, ax_lims=ax_lims)\n",
     "        if epoch == epochs-1:\n",
-    "            plot_τ(ax, τ, car=True, ax_lims=ax_lims)"
+    "            plot_τ(ax, τ, car=True, ax_lims=ax_lims)\n",
+    "    return costs"
    ]
   },
   {
@@ -340,7 +344,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "base",
    "language": "python",
    "name": "python3"
   },
@@ -354,7 +358,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.1"
+   "version": "3.9.12"
   }
  },
  "nbformat": 4,