diff --git a/04-spiral_classification.ipynb b/04-spiral_classification.ipynb index bce1314..09abfad 100644 --- a/04-spiral_classification.ipynb +++ b/04-spiral_classification.ipynb @@ -15,7 +15,7 @@ "source": [ "import torch\n", "from torch import nn, optim\n", - "from math import pi as π" + "from math import pi as π # convenient constant for angles" ] }, { @@ -33,7 +33,7 @@ "metadata": {}, "outputs": [], "source": [ - "set_default()" + "set_default() # apply plotting style defaults from res.plot_lib" ] }, { @@ -42,7 +42,15 @@ "metadata": {}, "outputs": [], "source": [ - "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")" + "# Use GPU if available for faster full-batch training\n", + "if torch.cuda.is_available():\n", + " device = torch.device(\"cuda\")\n", + "elif torch.backends.mps.is_available():\n", + " device = torch.device(\"mps\")\n", + "else:\n", + " device = torch.device(\"cpu\")\n", + "\n", + "print(f\"Using device: {device}\")" ] }, { @@ -59,11 +67,12 @@ "outputs": [], "source": [ "seed = 12345\n", + "# Fix RNG for reproducibility across data and training\n", "torch.manual_seed(seed)\n", "N = 1000 # num_samples_per_class\n", "n = 2 # input dimensions\n", "K = 5 # num_classes\n", - "d = 100 # num_hidden_units" + "d = 100 # num_hidden_units\n" ] }, { @@ -74,8 +83,9 @@ "source": [ "# Generate spirals\n", "\n", + "# Radii grow linearly with t to spread points; small noise on angle for class overlap\n", "t = torch.linspace(0, 1, N)\n", - "a = 0.8 * t + 0.2 # amplitude 0.2 → 1.0\n", + "a = 0.8 * t + 0.2 # amplitude 0.2 -> 1.0\n", "X = list()\n", "y = list()\n", "for k in range(K):\n", @@ -85,9 +95,12 @@ "X = torch.cat(X)\n", "y = torch.cat(y)\n", "\n", + "# Keep CPU copies for plotting, send copies to device for training\n", + "X_dev, y_dev = X.to(device), y.to(device)\n", + "\n", "print(\"Shapes:\")\n", "print(\"X:\", tuple(X.size()))\n", - "print(\"y:\", tuple(y.size()))" + "print(\"y:\", tuple(y.size()))\n" ] }, { @@ -97,7 +110,7 @@ "outputs": [], "source": [ "# And visualise them\n", - "plot_data(X, y)" + "plot_data(X, y) # colors correspond to class labels" ] }, { @@ -113,8 +126,8 @@ "metadata": {}, "outputs": [], "source": [ - "learning_rate = 1e-3\n", - "lambda_l2 = 1e-5" + "learning_rate = 1e-3 # stable default for Adam\n", + "lambda_l2 = 1e-5 # small weight decay to regularize" ] }, { @@ -124,11 +137,12 @@ "outputs": [], "source": [ "# Model definition\n", + "# Toggle ReLU to compare linear vs non-linear decision boundaries; optional 2D bottleneck for visualization\n", "model = nn.Sequential(\n", " nn.Linear(n, d),\n", " # nn.ReLU(), # Comment this line for a linear model\n", " nn.Linear(d, K) # (Optional) Comment this line and uncomment the next one to display 2D embeddings below\n", - " # nn.Linear(d, 2), nn.Linear(2, K)\n", + " # nn.Linear(d, 2), nn.Linear(2, K),\n", ")\n", "model.to(device) # possibly send to CUDA\n", "\n", @@ -138,28 +152,29 @@ "# Using Adam optimiser\n", "optimiser = optim.Adam(model.parameters(), lr=learning_rate, weight_decay=lambda_l2) # built-in L2\n", "\n", - "# Full-batch training loop\n", + "# Full-batch training loop (not mini-batch; deterministic given the fixed seed)\n", "for t in range(2_000):\n", - " \n", " # Feed forward to get the linear sum s\n", - " s = model(X)\n", - " \n", + " s = model(X_dev)\n", + "\n", " # Compute the free energy F and loss L\n", - " F = C(s, y)\n", + " F = C(s, y_dev)\n", " L = F.mean()\n", - " \n", + "\n", " # Zero the gradients\n", " optimiser.zero_grad()\n", - " \n", - " # Backward pass to compute and accumulate the gradient\n", - " # of the free energy w.r.t our learnable params\n", + "\n", + " # Backward pass to compute and accumulate the gradient of the free energy w.r.t params\n", " L.backward()\n", - " \n", + "\n", " # Update params\n", " optimiser.step()\n", - " \n", - " # Display epoch, L, and accuracy\n", - " overwrite(f'[EPOCH]: {t}, [LOSS]: {L.item():.6f}, [ACCURACY]: {acc(s, y):.3f}')" + "\n", + " # Display epoch, L, and accuracy (uses on-device tensors)\n", + " overwrite(f'[EPOCH]: {t}, [LOSS]: {L.item():.6f}, [ACCURACY]: {acc(s, y_dev):.3f}')\n", + "\n", + "# Move model back to CPU for downstream plotting utilities\n", + "model_cpu = model.to('cpu')\n" ] }, { @@ -169,8 +184,8 @@ "outputs": [], "source": [ "# Plot trained model\n", - "print(model)\n", - "plot_model(X, y, model)" + "print(model_cpu)\n", + "plot_model(X, y, model_cpu)\n" ] }, { @@ -180,7 +195,7 @@ "outputs": [], "source": [ "# (Optional) Plot internal 2D embeddings if available\n", - "plot_embeddings(X, y, model, zoom=10)" + "plot_embeddings(X, y, model_cpu, zoom=10)\n" ] }, { @@ -191,11 +206,12 @@ "source": [ "# Compute linear output s for a fine grid over the input space\n", "\n", - "mesh = torch.arange(-1.5, 1.5, 0.01)\n", + "mesh = torch.arange(-1.5, 1.5, 0.01) # step balances resolution vs compute\n", "xx, yy = torch.meshgrid(mesh, mesh, indexing='ij')\n", "grid = torch.stack((xx.reshape(-1), yy.reshape(-1)), dim=1)\n", "with torch.no_grad():\n", - " s = model(grid)" + " s = model_cpu(grid)\n", + " s = s.detach().cpu() # keep on CPU for plotting\n" ] }, { @@ -204,8 +220,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Choice of free energy\n", - "\n", + "# Choice of free energy (toggle to inspect different energy landscapes)\n", "fe = 'cross-entropy'\n", "# fe = 'negative linear output'" ] @@ -216,7 +231,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Switch to non-interactive matplotlib\n", + "# Switch to non-interactive matplotlib (inline) for static plots\n", "%matplotlib inline\n", "set_default()" ] @@ -240,7 +255,8 @@ "\n", "for k in range(K):\n", " if fe == 'cross-entropy':\n", - " F = C(s, torch.LongTensor(1).fill_(k).expand(s.size(0)))\n", + " target = torch.full((s.size(0),), k, dtype=torch.long, device='cpu') # CPU for plotting\n", + " F = C(s, target)\n", " F = F.reshape(xx.shape)\n", " plot_2d_energy_levels(X, y, (xx, yy, F, k, K), (0, 35), (1, 35, 4))\n", "\n", @@ -277,7 +293,7 @@ "metadata": {}, "outputs": [], "source": [ - "# Cross-entropy\n", + "# Cross-entropy (uses the last computed F from loop above)\n", "if fe == 'cross-entropy':\n", " fig, ax = plot_3d_energy_levels(X, y, (xx, yy, F, k, K), (0, 18), (0, 19, 1), (0, 19, 2))\n", "elif fe == 'negative linear output':\n", @@ -320,7 +336,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -334,7 +350,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.12" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/10-autoencoder.ipynb b/10-autoencoder.ipynb index 597de24..e1cfcfd 100644 --- a/10-autoencoder.ipynb +++ b/10-autoencoder.ipynb @@ -39,23 +39,23 @@ "metadata": {}, "outputs": [], "source": [ - "# Displaying routine\n", + "# Displaying routine (handles optional inputs and detaches tensors before plotting)\n", "\n", "def display_images(in_, out, n=1):\n", " for N in range(n):\n", " if in_ is not None:\n", - " in_pic = to_img(in_.cpu().data)\n", + " in_pic = to_img(in_.detach().cpu())\n", " plt.figure(figsize=(18, 6))\n", " for i in range(4):\n", " plt.subplot(1,4,i+1)\n", " plt.imshow(in_pic[i+4*N])\n", " plt.axis('off')\n", - " out_pic = to_img(out.cpu().data)\n", + " out_pic = to_img(out.detach().cpu())\n", " plt.figure(figsize=(18, 6))\n", " for i in range(4):\n", " plt.subplot(1,4,i+1)\n", " plt.imshow(out_pic[i+4*N])\n", - " plt.axis('off')" + " plt.axis('off')\n" ] }, { @@ -65,7 +65,7 @@ "outputs": [], "source": [ "# Define data loading step\n", - "\n", + "# Normalize to roughly (-1, 1) so Tanh decoder output matches input scale\n", "batch_size = 256\n", "\n", "img_transform = transforms.Compose([\n", @@ -74,7 +74,7 @@ "])\n", "\n", "dataset = MNIST('./data', transform=img_transform, download=True)\n", - "dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)" + "dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)\n" ] }, { @@ -83,7 +83,14 @@ "metadata": {}, "outputs": [], "source": [ - "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")" + "if torch.cuda.is_available():\n", + " device = torch.device(\"cuda\")\n", + "elif torch.backends.mps.is_available():\n", + " device = torch.device(\"mps\")\n", + "else:\n", + " device = torch.device(\"cpu\")\n", + "\n", + "print(f\"Using device: {device}\")" ] }, { @@ -101,6 +108,7 @@ "class Autoencoder(nn.Module):\n", " def __init__(self):\n", " super().__init__()\n", + " # Single hidden layer MLP with Tanh bottleneck\n", " self.encoder = nn.Sequential(\n", " nn.Linear(28 * 28, d),\n", " nn.Tanh(),\n", @@ -116,7 +124,7 @@ " return ỹ\n", " \n", "ae = Autoencoder().to(device)\n", - "criterion = nn.MSELoss()" + "criterion = nn.MSELoss()\n" ] }, { @@ -163,14 +171,15 @@ " # img_bad = (img * noise).to(device) # comment out for standard AE\n", " # ===================forward=====================\n", " output = ae(img) # feed (for std AE) or (for denoising AE)\n", - " loss = criterion(output, img.data)\n", + " loss = criterion(output, img.detach()) # detach target to avoid grad through input\n", " # ===================backward====================\n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", " # ===================log========================\n", " print(f'epoch [{epoch + 1}/{num_epochs}], loss:{loss.item():.4f}')\n", - " display_images(None, output) # pass (None, output) for std AE, (img_bad, output) for denoising AE" + " # visualize clean vs reconstructed; if using denoising, pass (img_bad, output)\n", + " display_images(None, output)\n" ] }, { @@ -257,18 +266,19 @@ " img, _ = data\n", " img = img.to(device)\n", " img = img.view(img.size(0), -1)\n", + " # multiplicative dropout mask to create corrupted input\n", " noise = do(torch.ones(img.shape)).to(device)\n", " img_bad = (img * noise).to(device) # comment out for standard AE\n", " # ===================forward=====================\n", " output = model(img_bad) # feed (for std AE) or (for denoising AE)\n", - " loss = criterion(output, img.data)\n", + " loss = criterion(output, img.detach()) # target stays the clean image\n", " # ===================backward====================\n", " optimizer.zero_grad()\n", " loss.backward()\n", " optimizer.step()\n", " # ===================log========================\n", " print(f'epoch [{epoch + 1}/{num_epochs}], loss:{loss.item():.4f}')\n", - " display_images(img_bad, output) # pass (None, output) for std AE, (img_bad, output) for denoising AE" + " display_images(img_bad, output) # pass (None, output) for std AE, (img_bad, output) for denoising AE\n" ] }, { @@ -306,7 +316,9 @@ "dst_NS = list()\n", "\n", "for i in range(0, 5):\n", - " corrupted_img = ((img_bad.data.cpu()[i].view(28, 28) / 4 + 0.5) * 255).byte().numpy()\n", + " # OpenCV expects uint8 images; convert normalized tensor back to [0,255]\n", + " corrupted_img = ((img_bad.detach().cpu()[i].view(28, 28) / 4 + 0.5) * 255).byte().numpy()\n", + " # Mask marks missing pixels (0 keeps, 1/2 are holes); derived from dropout noise\n", " mask = 2 - noise.cpu()[i].view(28, 28).byte().numpy()\n", " dst_TELEA.append(inpaint(corrupted_img, mask, 3, INPAINT_TELEA))\n", " dst_NS.append(inpaint(corrupted_img, mask, 3, INPAINT_NS))\n", @@ -315,7 +327,7 @@ "tns_NS = [torch.from_numpy(d) for d in dst_NS]\n", "\n", "TELEA = torch.stack(tns_TELEA).float()\n", - "NS = torch.stack(tns_NS).float()" + "NS = torch.stack(tns_NS).float()\n" ] }, { @@ -376,18 +388,19 @@ "N = 16\n", "samples = torch.Tensor(N, 28 * 28).to(device)\n", "for i in range(N):\n", - " samples[i] = i / (N - 1) * img[B].data + (1 - i / (N - 1) ) * img[A].data\n", + " # linear blend between digit A and B to probe latent continuity\n", + " samples[i] = i / (N - 1) * img[B].detach() + (1 - i / (N - 1)) * img[A].detach()\n", "with torch.no_grad():\n", " reconstructions = model(samples)[0]\n", "\n", "plt.title(f'{A = }, {B = }')\n", - "plt.plot(samples.sub(reconstructions).pow(2).sum(dim=(1)), '-o')" + "plt.plot(samples.sub(reconstructions).pow(2).sum(dim=(1)), '-o')\n" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -401,7 +414,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.13" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/11-VAE.ipynb b/11-VAE.ipynb index 6ce9aa1..f53e9f8 100644 --- a/11-VAE.ipynb +++ b/11-VAE.ipynb @@ -25,16 +25,17 @@ "# Displaying routine\n", "\n", "def display_images(in_, out, n=1, label='', count=False, energy=None):\n", + " \"\"\"Display MNIST inputs/outputs; can annotate indices and optional energy.\"\"\"\n", " for N in range(n):\n", " if in_ is not None:\n", - " in_pic = in_.data.cpu().view(-1, 28, 28)\n", + " in_pic = in_.detach().cpu().view(-1, 28, 28)\n", " plt.figure(figsize=(18, 4))\n", " plt.suptitle(label + ' – real test data / reconstructions', color='w', fontsize=16)\n", " for i in range(4):\n", " plt.subplot(1,4,i+1)\n", " plt.imshow(in_pic[i+4*N])\n", " plt.axis('off')\n", - " out_pic = out.data.cpu().view(-1, 28, 28)\n", + " out_pic = out.detach().cpu().view(-1, 28, 28)\n", " plt.figure(figsize=(18, 6))\n", " for i in range(4):\n", " plt.subplot(1,4,i+1)\n", @@ -42,7 +43,7 @@ " plt.axis('off')\n", " c = 4 * N + i\n", " if count: plt.title(str(c), color='w')\n", - " if count and energy is not None: plt.title(f'{c}, e={energy[c].item():.2f}', color='w')\n" + " if count and energy is not None: plt.title(f'{c}, e={energy[c].item():.2f}', color='w')" ] }, { @@ -85,7 +86,14 @@ "source": [ "# Defining the device\n", "\n", - "device = torch.device(\"cuda:0\" if torch.cuda.is_available() else \"cpu\")" + "if torch.cuda.is_available():\n", + " device = torch.device(\"cuda\")\n", + "elif torch.backends.mps.is_available():\n", + " device = torch.device(\"mps\")\n", + "else:\n", + " device = torch.device(\"cpu\")\n", + "\n", + "print(f\"Using device: {device}\")" ] }, { @@ -117,9 +125,9 @@ "\n", " def reparameterise(self, mu, logvar):\n", " if self.training:\n", - " std = logvar.mul(0.5).exp_()\n", - " eps = std.data.new(std.size()).normal_()\n", - " return eps.mul(std).add_(mu)\n", + " std = (0.5 * logvar).exp()\n", + " eps = torch.randn_like(std)\n", + " return eps * std + mu\n", " else:\n", " return mu\n", "\n", @@ -254,16 +262,16 @@ "# Choose starting and ending point for the interpolation -> shows original and reconstructed\n", "\n", "A, B = 0, 6\n", - "sample = model.decoder(torch.stack((mu[A].data, mu[B].data), 0))\n", + "sample = model.decoder(torch.stack((mu[A].detach(), mu[B].detach()), 0))\n", "display_images(None, torch.stack(((\n", - " y[A].data.view(-1),\n", - " y[B].data.view(-1),\n", - " sample.data[0],\n", - " sample.data[1],\n", - " sample.data[0],\n", - " sample.data[1],\n", - " y[A].data.view(-1) - sample.data[0],\n", - " y[B].data.view(-1) - sample.data[1]\n", + " y[A].detach().view(-1),\n", + " y[B].detach().view(-1),\n", + " sample.detach()[0],\n", + " sample.detach()[1],\n", + " sample.detach()[0],\n", + " sample.detach()[1],\n", + " y[A].detach().view(-1) - sample.detach()[0],\n", + " y[B].detach().view(-1) - sample.detach()[1]\n", ")), 0), 2)" ] }, @@ -280,7 +288,7 @@ "samples = torch.Tensor(N, 28, 28).to(device)\n", "for i in range(N):\n", " # code[i] = i / (N - 1) * mu[B].data + (1 - i / (N - 1) ) * mu[A].data\n", - " samples[i] = i / (N - 1) * y[B].data + (1 - i / (N - 1) ) * y[A].data\n", + " samples[i] = i / (N - 1) * y[B].detach() + (1 - i / (N - 1) ) * y[A].detach()\n", "# samples = model.decoder(code)\n", "display_images(None, samples, N // 4, count=True)" ] @@ -310,7 +318,7 @@ "N = 16\n", "samples = torch.Tensor(N, 28, 28).to(device)\n", "for i in range(N):\n", - " samples[i] = i / (N - 1) * y[B].data + (1 - i / (N - 1) ) * y[A].data\n", + " samples[i] = i / (N - 1) * y[B].detach() + (1 - i / (N - 1) ) * y[A].detach()\n", "with torch.no_grad():\n", " reconstructions = model(samples)[0].view(-1, 28, 28)\n", "\n", @@ -377,7 +385,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3 (ipykernel)", + "display_name": "base", "language": "python", "name": "python3" }, @@ -391,7 +399,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.13.2" + "version": "3.9.12" } }, "nbformat": 4, diff --git a/17-optimal_control.ipynb b/17-optimal_control.ipynb index 6bb9bf6..c5f7950 100644 --- a/17-optimal_control.ipynb +++ b/17-optimal_control.ipynb @@ -27,8 +27,10 @@ "plt.rc('savefig', bbox='tight')\n", "plt.rc('axes', labelsize=36)\n", "plt.rc('legend', fontsize=24)\n", - "plt.rc('text', usetex=True)\n", - "plt.rcParams['text.latex.preamble'] = [r'\\usepackage{bm}']\n", + "USE_LATEX = False # Toggle LaTeX rendering here; leave False if LaTeX packages (e.g., type1ec.sty) are missing on your system.\n", + "plt.rc('text', usetex=USE_LATEX)\n", + "if USE_LATEX:\n", + " plt.rcParams['text.latex.preamble'] = r'\\\\usepackage{bm}\\\\usepackage{xcolor}' # string form to satisfy matplotlib validator\n", "plt.rc('lines', markersize=10)" ] }, @@ -177,8 +179,8 @@ "\n", "plt.figure(figsize=(6, 2))\n", "plt.title('Control signal')\n", - "plt.stem(np.arange(10)+0.9, u[:,0], 'C1', markerfmt='C1o', use_line_collection=True, basefmt='none')\n", - "plt.stem(np.arange(10)+1.1, u[:,1], 'C2', markerfmt='C2o', use_line_collection=True, basefmt='none')\n", + "plt.stem(np.arange(10)+0.9, u[:,0], 'C1', markerfmt='C1o', basefmt='none') # Removed the deprecated use_line_collection=True argument\n", + "plt.stem(np.arange(10)+1.1, u[:,1], 'C2', markerfmt='C2o', basefmt='none')\n", "plt.ylim((-0.5, 0.5))\n", "plt.xticks(np.arange(12))\n", "plt.xlabel('discrete time index', fontsize=12)\n", @@ -227,14 +229,15 @@ "def path_planning_with_cost(x_x, x_y, s, T, epochs, stepsize, cost_f, ax=None, ax_lims=None, debug=False):\n", " \"\"\"\n", " Path planning for tricycle\n", - " x_x: x component of postion vector\n", - " x_y: y component of postion vector\n", + " x_x: x component of position vector\n", + " x_y: y component of position vector\n", " s: initial speed\n", " T: time steps\n", " epochs: number of epochs for back propagation\n", " stepsize: stepsize for back propagation\n", - " cost_f: cost funciton that takes the trajectory and the tuple (x, y) - target.\n", + " cost_f: cost function that takes the trajectory and the tuple (x, y) - target.\n", " ax: axis to plot the trajectory\n", + " Returns: list of per-epoch costs\n", " \"\"\"\n", " ax = ax or plt.gca()\n", " plt.plot(0, 0, 'gx', markersize=20, markeredgewidth=5)\n", @@ -260,7 +263,8 @@ " if epoch == 0: \n", " plot_τ(ax, τ, ax_lims=ax_lims)\n", " if epoch == epochs-1:\n", - " plot_τ(ax, τ, car=True, ax_lims=ax_lims)" + " plot_τ(ax, τ, car=True, ax_lims=ax_lims)\n", + " return costs" ] }, { @@ -340,7 +344,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "base", "language": "python", "name": "python3" }, @@ -354,7 +358,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.1" + "version": "3.9.12" } }, "nbformat": 4,