From 560594b7b7eb5622d5f115892932353fc9dd8128 Mon Sep 17 00:00:00 2001 From: Christopher Poon Date: Fri, 6 Feb 2026 17:26:25 -0500 Subject: [PATCH 1/6] add gurobi jupyter notebook baseline Signed-off-by: Christopher Poon --- src/baseline/gurobi_qubo.ipynb | 177 +++++++++++++++++++++++++++++++++ 1 file changed, 177 insertions(+) create mode 100644 src/baseline/gurobi_qubo.ipynb diff --git a/src/baseline/gurobi_qubo.ipynb b/src/baseline/gurobi_qubo.ipynb new file mode 100644 index 0000000..c5a3060 --- /dev/null +++ b/src/baseline/gurobi_qubo.ipynb @@ -0,0 +1,177 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Prerequisites\n", + "Need to have Gurobi Optimizer and obtain license.\n", + "\n", + "1. Install Gurobi\n", + "- https://www.gurobi.com/downloads/gurobi-software/\n", + "- https://support.gurobi.com/hc/en-us/articles/4534161999889-How-do-I-install-Gurobi-Optimizer\n", + "2. Obtain License\n", + "- https://portal.gurobi.com/iam/licenses/list/\n", + "\n", + "\n", + "%pip install gurobipy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "import os\n", + "import json\n", + "import numpy as np\n", + "import networkx as nx\n", + "from natsort import natsorted\n", + "import gurobipy as gp\n", + "from math import sqrt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def read_nxgraph(filename: str) -> nx.Graph(): # type: ignore\n", + " graph = nx.Graph()\n", + " with open(filename, 'r') as file:\n", + " # lines = []\n", + " line = file.readline()\n", + " is_first_line = True\n", + " while line is not None and line != '':\n", + " if '//' not in line:\n", + " if is_first_line:\n", + " strings = line.split(\" \")\n", + " num_nodes = int(strings[0])\n", + " num_edges = int(strings[1])\n", + " nodes = list(range(num_nodes))\n", + " graph.add_nodes_from(nodes)\n", + " is_first_line = False\n", + " else:\n", + " node1, node2, weight = line.split()\n", + " graph.add_edge(int(node1), int(node2), weight=weight)\n", + " line = file.readline()\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def gurobi_maxcut(graph):\n", + "\n", + " # Create QUBO matrix\n", + " nodes = len(list(graph.nodes))\n", + " J = nx.to_numpy_array(graph)\n", + "\n", + " # Construct gurobi model\n", + " model = gp.Model(\"maxcut_qubo\")\n", + "\n", + " # Set time limit 1 hr\n", + " model.Params.LogFile = \"../logs/gurobi.log\"\n", + " model.setParam('TimeLimit', 3600)\n", + " model.Params.LogToConsole = 1\n", + " model.setParam(\"MIPGap\", 0.0)\n", + "\n", + " # Create variable for each vertex\n", + " x = model.addVars(nodes, vtype=gp.GRB.BINARY)\n", + "\n", + " objective = gp.quicksum(\n", + " -J[i, j] * (2 * x[i] - 1) * (2 * x[j] - 1) * (1/sqrt(nodes))\n", + " for i in range(nodes) for j in range(i+1, nodes) if J[i, j] != 0.0\n", + " )\n", + " model.setObjective(objective, gp.GRB.MINIMIZE)\n", + " \n", + " # Solve\n", + " model.optimize()\n", + " obj_val = model.ObjVal\n", + " obj_bnd = model.ObjBound\n", + " solution = \"\"\n", + " for i in range(nodes):\n", + " solution += f\"{int(x[i].X)}\"\n", + "\n", + " return obj_val, solution" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# graph = read_nxgraph(\"../data/vna/ER/100_SK_seed40.txt\")\n", + "# obj_val, sol = gurobi_maxcut(graph)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 4081189 3127413 -8.84763 56 71 -7.15263 -8.84763 23.7% 29.7 1190s\n" + ] + } + ], + "source": [ + "# Parse all files in data_dir \n", + "data_dir = f\"../data/vna/SK\"\n", + "input_files = [ f for f in os.listdir(data_dir) ]\n", + "input_files = natsorted(input_files)\n", + "\n", + "# Write directories & files\n", + "results_dir = f\"../results/{'/'.join(data_dir.split('/')[2:])}\"\n", + "solutions_dir = f\"../solutions/{'/'.join(data_dir.split('/')[2:])}\"\n", + "os.makedirs(results_dir, exist_ok=True)\n", + "os.makedirs(solutions_dir, exist_ok=True)\n", + "\n", + "i = 0\n", + "for file in input_files:\n", + " i += 1\n", + " if i > 25: break\n", + "\n", + " graph = read_nxgraph(f'{data_dir}/{file}') \n", + " obj_val, sol = gurobi_maxcut(graph)\n", + "\n", + " with open(f\"{results_dir}/GUROBI.txt\", \"a\") as f:\n", + " f.write(f\"{obj_val}\\n\")\n", + "\n", + " with open(f\"{solutions_dir}/GUROBI.txt\", \"a\") as f:\n", + " f.write(f\"{sol}\\n\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} From 6cfc790f5ca29d870d04bfd85076af42f46800ee Mon Sep 17 00:00:00 2001 From: Christopher Poon Date: Fri, 6 Feb 2026 17:27:00 -0500 Subject: [PATCH 2/6] add copt jupyter notebook baseline Signed-off-by: Christopher Poon --- src/baseline/copt_qubo.ipynb | 165 +++++++++++++++++++++++++++++++++++ 1 file changed, 165 insertions(+) create mode 100644 src/baseline/copt_qubo.ipynb diff --git a/src/baseline/copt_qubo.ipynb b/src/baseline/copt_qubo.ipynb new file mode 100644 index 0000000..3874649 --- /dev/null +++ b/src/baseline/copt_qubo.ipynb @@ -0,0 +1,165 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "30310187", + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "import os\n", + "import json\n", + "import numpy as np\n", + "import networkx as nx\n", + "from natsort import natsorted\n", + "import coptpy as cp\n", + "from coptpy import COPT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1628fb1", + "metadata": {}, + "outputs": [], + "source": [ + "def read_nxgraph(filename: str) -> nx.Graph(): # type: ignore\n", + " graph = nx.Graph()\n", + " with open(filename, 'r') as file:\n", + " line = file.readline()\n", + " is_first_line = True\n", + " while line is not None and line != '':\n", + " if '//' not in line:\n", + " if is_first_line:\n", + " strings = line.split(\" \")\n", + " num_nodes = int(strings[0])\n", + " num_edges = int(strings[1])\n", + " nodes = list(range(num_nodes))\n", + " graph.add_nodes_from(nodes)\n", + " is_first_line = False\n", + " else:\n", + " node1, node2, weight = line.split()\n", + " graph.add_edge(int(node1), int(node2), weight=weight)\n", + " line = file.readline()\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05705cb8", + "metadata": {}, + "outputs": [], + "source": [ + "def copt_maxcut(graph):\n", + "\n", + " # Create QUBO matrix\n", + " nodes = len(list(graph.nodes))\n", + " J = nx.to_numpy_array(graph)\n", + "\n", + " # Create COPT environment\n", + " env = cp.Envr()\n", + " \n", + " # Construct model\n", + " model = env.createModel(\"qubo\")\n", + "\n", + " # Set time limit 1 hr\n", + " model.setLogFile(\"../logs/copt.log\")\n", + " model.setParam(COPT.Param.TimeLimit, 3600.0)\n", + " model.setParam(COPT.Param.RelGap, 0.0)\n", + "\n", + " # Create variable for each vertex\n", + " x = [ model.addVar(vtype=COPT.BINARY, name=f\"x_{i}\") for i in range(nodes) ]\n", + " \n", + " # Linearize x_i * x_j to y_ij\n", + " y = np.zeros((nodes, nodes), dtype=object)\n", + " for i in range(nodes):\n", + " for j in range(i +1, nodes):\n", + " y[i, j] = model.addVar(vtype=COPT.BINARY, name=f\"y_{i},{j}\")\n", + " model.addConstr(y[i, j] <= x[i])\n", + " model.addConstr(y[i, j] <= x[j])\n", + " model.addConstr(y[i, j] >= x[i] + x[j] - 1)\n", + "\n", + " # Objective function\n", + " model.setObjective(cp.quicksum(\n", + " -J[i, j] * ( (4 * y[i,j]) + (-2 * x[i]) + (-2 * x[j]) + 1 )\n", + " for i in range(nodes) for j in range(i + 1, nodes) if J[i, j] != 0.0 \n", + " ), sense=COPT.MINIMIZE)\n", + "\n", + " # Solve\n", + " model.solve()\n", + " sol = \"\"\n", + " for i in range(nodes):\n", + " sol += f\"{int(x[i].x)}\"\n", + " \n", + " return model.BestObj, sol" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4397de58", + "metadata": {}, + "outputs": [], + "source": [ + "# graph = read_nxgraph(\"../data/RL/spin_glass/10x10/GSRL_100_ID2.txt\")\n", + "# obj_val, sol = copt_maxcut(graph)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ada14dc", + "metadata": {}, + "outputs": [], + "source": [ + "# Parse all files in data_dir \n", + "data_dir = f\"../data/vna/EA/EA_40x40\"\n", + "input_files = [ f for f in os.listdir(data_dir) ]\n", + "input_files = natsorted(input_files)\n", + "\n", + "# Write directories & files\n", + "results_dir = f\"../results/{'/'.join(data_dir.split('/')[2:])}\"\n", + "solutions_dir = f\"../solutions/{'/'.join(data_dir.split('/')[2:])}\"\n", + "os.makedirs(results_dir, exist_ok=True)\n", + "os.makedirs(solutions_dir, exist_ok=True)\n", + "\n", + "i = 0\n", + "for file in input_files:\n", + " i += 1\n", + " if i > 25: break\n", + "\n", + " graph = read_nxgraph(f'{data_dir}/{file}') \n", + " obj_val, sol = copt_maxcut(graph)\n", + "\n", + " with open(f\"{results_dir}/COPT.txt\", \"a\") as f:\n", + " f.write(f\"{obj_val}\\n\")\n", + "\n", + " with open(f\"{solutions_dir}/COPT.txt\", \"a\") as f:\n", + " f.write(f\"{sol}\\n\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From fe697336258a01ef1de3b60ff6f798714cb603fb Mon Sep 17 00:00:00 2001 From: Christopher Poon Date: Fri, 6 Feb 2026 17:27:39 -0500 Subject: [PATCH 3/6] add cplex jupyter notebook baseline Signed-off-by: Christopher Poon --- src/baseline/cplex_qubo.ipynb | 178 ++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) create mode 100644 src/baseline/cplex_qubo.ipynb diff --git a/src/baseline/cplex_qubo.ipynb b/src/baseline/cplex_qubo.ipynb new file mode 100644 index 0000000..cec75f9 --- /dev/null +++ b/src/baseline/cplex_qubo.ipynb @@ -0,0 +1,178 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "038c7bab", + "metadata": {}, + "source": [ + "# Prerequisites\n", + "Need to have IBM ILOG CPLEX Optimization Studio installed locally on your machine.\n", + "\n", + "For students:\n", + "https://www.ibm.com/academic/\n", + "1. Access sofware downloads\n", + "2. Sign in\n", + "3. Data Science\n", + "4. ILOG CPLEX Optimization Studio\n", + "5. Selecy HTTP as Download method\n", + "\n", + "Otherwise:\n", + "https://www.ibm.com/products/ilog-cplex-optimization-studio/pricing\n", + "\n", + "%pip install docplex" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fb72cf52", + "metadata": {}, + "outputs": [], + "source": [ + "# Imports\n", + "import os\n", + "import json\n", + "import numpy as np\n", + "import networkx as nx\n", + "from math import sqrt\n", + "from natsort import natsorted\n", + "import docplex\n", + "from docplex.mp.model import Model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2a551ba", + "metadata": {}, + "outputs": [], + "source": [ + "def read_nxgraph(filename: str) -> nx.Graph(): # type: ignore\n", + " graph = nx.Graph()\n", + " with open(filename, 'r') as file:\n", + " # lines = []\n", + " line = file.readline()\n", + " is_first_line = True\n", + " while line is not None and line != '':\n", + " if '//' not in line:\n", + " if is_first_line:\n", + " strings = line.split(\" \")\n", + " num_nodes = int(strings[0])\n", + " num_edges = int(strings[1])\n", + " nodes = list(range(num_nodes))\n", + " graph.add_nodes_from(nodes)\n", + " is_first_line = False\n", + " else:\n", + " node1, node2, weight = line.split()\n", + " graph.add_edge(int(node1), int(node2), weight=weight)\n", + " line = file.readline()\n", + " return graph" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae9519dd", + "metadata": {}, + "outputs": [], + "source": [ + "def cplex_maxcut(graph):\n", + "\n", + " # Create QUBO matrix\n", + " nodes = len(list(graph.nodes))\n", + " J = nx.to_numpy_array(graph)\n", + " \n", + " # Construct model\n", + " model = Model(\"maxcut_qubo\")\n", + "\n", + " # Set time limit 1 hr\n", + " model.set_time_limit(900)\n", + " model.set_log_output(True) \n", + " model.parameters.mip.tolerances.mipgap=0.0\n", + "\n", + " # Create variable for each vertex\n", + " x = model.binary_var_list(nodes)\n", + "\n", + " # Objective function\n", + " objective = model.sum(\n", + " -J[i, j] * (2 * x[i] - 1) * (2 * x[j] - 1) * (1/sqrt(nodes))\n", + " for i in range(nodes) for j in range(i + 1, nodes) if J[i, j] != 0.0\n", + " )\n", + " model.minimize(objective)\n", + " \n", + " # Solve\n", + " solution = model.solve()\n", + " sol = \"\"\n", + " for i in range(nodes):\n", + " sol += f\"{int(x[i].solution_value)}\"\n", + " return float(solution.objective_value), sol\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6943d3d6", + "metadata": {}, + "outputs": [], + "source": [ + "# graph = read_nxgraph(\"../data/RL/spin_glass/10x10/GSRL_100_ID2.txt\")\n", + "# obj_val, sol = cplex_maxcut(graph)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86913789", + "metadata": {}, + "outputs": [], + "source": [ + "# Parse all files in data_dir \n", + "data_dir = f\"../data/vna/SK\"\n", + "input_files = [ f for f in os.listdir(data_dir) ]\n", + "input_files = natsorted(input_files)\n", + "\n", + "# Write directories & files\n", + "results_dir = f\"../results/{'/'.join(data_dir.split('/')[2:])}\"\n", + "solutions_dir = f\"../solutions/{'/'.join(data_dir.split('/')[2:])}\"\n", + "os.makedirs(results_dir, exist_ok=True)\n", + "os.makedirs(solutions_dir, exist_ok=True)\n", + "\n", + "i = 0\n", + "for file in input_files:\n", + " i += 1\n", + " if i > 25: break\n", + " \n", + " graph = read_nxgraph(f'{data_dir}/{file}') \n", + " obj_val, sol = cplex_maxcut(graph)\n", + "\n", + " with open(f\"{results_dir}/CPLEX.txt\", \"a\") as f:\n", + " f.write(f\"{obj_val}\\n\")\n", + "\n", + " with open(f\"{solutions_dir}/CPLEX.txt\", \"a\") as f:\n", + " f.write(f\"{sol}\\n\")\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.9" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 15b171691ac029c05a3ed3d9a9d19a287bc2b18c Mon Sep 17 00:00:00 2001 From: Christopher Poon Date: Fri, 6 Feb 2026 17:32:54 -0500 Subject: [PATCH 4/6] update mcpg to read all graphs --- src/algorithms/mcpg/mcpg.py | 405 +++++++++++++++++++++++++++++++----- 1 file changed, 353 insertions(+), 52 deletions(-) diff --git a/src/algorithms/mcpg/mcpg.py b/src/algorithms/mcpg/mcpg.py index 3665b48..6351add 100644 --- a/src/algorithms/mcpg/mcpg.py +++ b/src/algorithms/mcpg/mcpg.py @@ -1,33 +1,187 @@ -""" -Copyright (c) 2024 Cheng Chen, Ruitao Chen, Tianyou Li, Zaiwen Wen -All rights reserved. -Redistribution and use in source and binary forms, with or without modification, are permitted provided that -the following conditions are met: -1. Redistributions of source code must retain the above copyright notice, this list of conditions and the - following disclaimer. -2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions - and the following disclaimer in the documentation and/or other materials provided with the distribution. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR -ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -""" import torch +import os +from natsort import natsorted +from torch_scatter import scatter +from torch.distributions.bernoulli import Bernoulli +from torch_geometric.data import Data import yaml import argparse import time -from dataloader import dataloader_select -from model import simple -from sampling import sampler_select, sample_initializer +''' +Sampler +''' +def sample_initializer(problem_type, probs, config, + device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'), data=None): + if problem_type in ["r_cheegercut", "n_cheegercut"]: + samples = torch.zeros(config['total_mcmc_num'], data.num_nodes) + index = data.sorted_degree_nodes[- config['total_mcmc_num']:] + for i in range(config['total_mcmc_num']): + samples[i][index[i]] = 1 + samples = samples.repeat(config['repeat_times'], 1) + return samples.t() + m = Bernoulli(probs) + samples = m.sample([config['total_mcmc_num'] * config['repeat_times']]) + samples = samples.detach().to(device) + return samples.t() +def metro_sampling(probs, start_status, max_transfer_time, + device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): + num_node = len(probs) + num_chain = start_status.shape[1] + index_col = torch.tensor(list(range(num_chain))).to(device) + probs = probs.detach().to(device) + samples = start_status.bool().to(device) + + count = 0 + for t in range(max_transfer_time * 5): + if count >= num_chain*max_transfer_time: + break + index_row = torch.randint(low=0, high=num_node, size=[ + num_chain], device=device) + chosen_probs_base = probs[index_row] + chosen_value = samples[index_row, index_col] + chosen_probs = torch.where( + chosen_value, chosen_probs_base, 1-chosen_probs_base) + accept_rate = (1 - chosen_probs) / chosen_probs + r = torch.rand(num_chain, device=device) + is_accept = (r < accept_rate) + samples[index_row, index_col] = torch.where( + is_accept, ~chosen_value, chosen_value) + + count += is_accept.sum() + + return samples.float().to(device) + +def mcpg_sampling_ising(data, + start_result, probs, + num_ls, change_times, total_mcmc_num, + device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): + probs = probs.to(torch.device("cpu")) + num_edges = data.num_edges + edges = data.edge_index + nlr_graph = edges[0] + nlc_graph = edges[1] + edge_weight = data.edge_attr + edge_weight_sum = data.edge_weight_sum + graph_probs = start_result.clone() + # get probs + graph_probs = metro_sampling( + probs, graph_probs, change_times, device) + start = graph_probs.clone() + + temp = graph_probs[data.sorted_degree_nodes[0]].clone() + graph_probs += temp + graph_probs = graph_probs % 2 + + graph_probs = (graph_probs - 0.5) * 2 + 0.5 + + # local search + temp = torch.zeros(4, graph_probs.size(dim=1)).to(device) + expected_cut = torch.zeros(graph_probs.size(dim=1)) + cnt = 0 + while True: + cnt += 1 + for i in range(num_edges): + index = data.sorted_degree_edges[i] + node_r = nlr_graph[index] + node_c = nlc_graph[index] + edges_r = data.n0_edges[index] + edges_c = data.n1_edges[index] + add_0 = data.add[0][index] + add_1 = data.add[1][index] + add_2 = data.add[2][index] + + temp_r_v = torch.mm(edges_r, graph_probs[data.n0[index]]) + temp_c_v = torch.mm(edges_c, graph_probs[data.n1[index]]) + + temp[1] = temp_r_v + torch.rand(graph_probs.size(dim=1), device=device) * 0.1 + add_0 + temp[2] = temp_c_v + torch.rand(graph_probs.size(dim=1), device=device) * 0.1 + add_1 + temp[0] = temp[1] + temp[2] + torch.rand(graph_probs.size(dim=1), + device=torch.device(device)) * 0.1 - add_2 + + max_index = torch.argmax(temp, dim=0) + graph_probs[node_r] = torch.floor(max_index / 2) + graph_probs[node_c] = max_index % 2 + + if cnt >= num_ls: + break + + # Convert Spins back to Ising model conventions -1 or +1 + spins = 2 * graph_probs - 1 # [nvar, chains] + + # raw energy per chain: E(s) = - sum J_ij s_i s_j + energy_vec = (edge_weight * (spins[nlr_graph] * spins[nlc_graph])).sum(dim=0) # [chains] + + # full‐chain advantage for policy gradient + adv = (energy_vec - energy_vec.mean()).to(device) # [chains] + + # pick the best chain out of each repeat group + energy_reshape = energy_vec.view(-1, total_mcmc_num) # [repeat_times, total_mcmc_num] + idx = torch.argmin(energy_reshape, dim=0) # [total_mcmc_num] + for i0 in range(total_mcmc_num): + idx[i0] = i0 + idx[i0] * total_mcmc_num + temp_min = energy_vec[idx] # [total_mcmc_num] + temp_min_info = graph_probs[:, idx] # [nvar, total_mcmc_num] + start_samples = start # [nvar, chains] + + # return best energy state, probs of the best energy states, the total probs, and the advantage + return temp_min, temp_min_info, start_samples, adv + +''' +Network +''' +class simple(torch.nn.Module): + def __init__(self, output_num): + super(simple, self).__init__() + self.lin = torch.nn.Linear(1, output_num) + self.sigmoid = torch.nn.Sigmoid() + + def reset_parameters(self): + self.lin.reset_parameters() + + def forward(self, alpha = 0.1, start_samples=None, value=None, + device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): + x = torch.ones(1).to(device) + x = self.lin(x) + x = self.sigmoid(x) + + x = (x-0.5) * 0.6 + 0.5 + probs = x + probs = probs.squeeze() + retdict = {} + reg = probs * torch.log(probs) + (1-probs) * torch.log(1-probs ) + reg = torch.mean(reg) + if start_samples == None: + retdict["output"] = [probs.squeeze(-1), "hist"] # output + retdict["reg"] = [reg, "sequence"] + retdict["loss"] = [alpha * reg, "sequence"] + return retdict + + res_samples = value.t().detach() + + start_samples_idx = start_samples * \ + probs + (1 - start_samples) * (1 - probs) + log_start_samples_idx = torch.log(start_samples_idx) + log_start_samples = log_start_samples_idx.sum(dim=1) + loss_ls = torch.mean(log_start_samples * res_samples) + loss = loss_ls + alpha * reg + + retdict["output"] = [probs.squeeze(-1), "hist"] # output + retdict["reg"] = [reg, "sequence"] + retdict["loss"] = [loss, "sequence"] + return retdict + + def __repr__(self): + return self.__class__.__name__ + +''' +Algorithm +''' def mcpg_solver(nvar, config, data, verbose=False): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - sampler = sampler_select(config["problem_type"]) + sampler = mcpg_sampling_ising change_times = int(nvar/10) # transition times for metropolis sampling net = simple(nvar) @@ -56,11 +210,11 @@ def mcpg_solver(nvar, config, data, verbose=False): probs = (torch.zeros(nvar)+0.5).to(device) tensor_probs = sample_initializer( config["problem_type"], probs, config, data=data) - temp_min, temp_min_info, temp_start_samples, value = sampler( + temp_max, temp_max_info, temp_start_samples, value = sampler( data, tensor_probs, probs, config['num_ls'], 0, config['total_mcmc_num']) - now_min_res = temp_min - now_min_info = temp_min_info - tensor_probs = temp_min_info.clone() + now_max_res = temp_max + now_max_info = temp_max_info + tensor_probs = temp_max_info.clone() tensor_probs = tensor_probs.repeat(1, config['repeat_times']) start_samples = temp_start_samples.t().to(device) @@ -68,37 +222,168 @@ def mcpg_solver(nvar, config, data, verbose=False): if epoch % config['sample_epoch_num'] == 0 and epoch > 0: probs = retdict["output"][0] probs = probs.detach() - temp_min, temp_min_info, start_samples_temp, value = sampler( + temp_max, temp_max_info, start_samples_temp, value = sampler( data, tensor_probs, probs, config['num_ls'], change_times, config['total_mcmc_num']) # update now_max for i0 in range(config['total_mcmc_num']): - if temp_min[i0] < now_min_res[i0]: - now_min_res[i0] = temp_min[i0] - now_min_info[:, i0] = temp_min_info[:, i0] + if temp_max[i0] < now_max_res[i0]: + now_max_res[i0] = temp_max[i0] + now_max_info[:, i0] = temp_max_info[:, i0] # update if min is too small - now_min = min(now_min_res).item() - now_min_index = torch.argmax(now_min_res) - now_max = max(now_min_res).item() - now_max_index = torch.argmin(now_min_res) - now_min_res[now_min_index] = now_min - now_min_info[:, now_max_index] = now_min_info[:, now_min_index] - temp_min_info[:, now_max_index] = now_min_info[:, now_min_index] + now_max = min(now_max_res).item() + now_max_index = torch.argmin(now_max_res) + + now_min = max(now_max_res).item() + now_min_index = torch.argmax(now_max_res) + + now_max_res[now_min_index] = now_max + + now_max_info[:, now_min_index] = now_max_info[:, now_max_index] + temp_max_info[:, now_min_index] = now_max_info[:, now_max_index] # select best samples - tensor_probs = temp_min_info.clone() + tensor_probs = temp_max_info.clone() tensor_probs = tensor_probs.repeat(1, config['repeat_times']) # construct the start point for next iteration start_samples = start_samples_temp.t() if verbose: - print("o {:.3f}".format((min(now_min_res).item()))) + if config["problem_type"] == "maxsat" and len(data.pdata) == 7: + res = max(now_max_res).item() + if res > data.pdata[5] * data.pdata[6]: + res -= data.pdata[5] * data.pdata[6] + print("o {:.3f}".format(res)) + elif "obj_type" in config and config["obj_type"] == "neg": + print("o {:.3f}".format((-min(now_max_res).item()))) + else: + print("o {:f}".format((min(now_max_res).item()))) del (retdict) - total_min = now_min_res - best_sort = torch.argsort(now_min_res, descending=False) - total_best_info = torch.squeeze(now_min_info[:, best_sort[0]]) + total_max = now_max_res + best_sort = torch.argsort(now_max_res, descending=False) + total_best_info = torch.squeeze(now_max_info[:, best_sort[0]]) + + return min(total_max).item(), total_best_info, now_max_res, now_max_info + +''' +Dataloader +''' +def maxcut_dataloader(path, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): + with open(path) as f: + fline = f.readline() + fline = fline.split() + num_nodes, num_edges = int(fline[0]), int(fline[1]) + edge_index = torch.LongTensor(2, num_edges) + edge_attr = torch.Tensor(num_edges, 1) + cnt = 0 + while True: + lines = f.readlines(num_edges * 2) + if not lines: + break + for line in lines: + line = line.rstrip('\n').split() + edge_index[0][cnt] = int(line[0]) - 1 + edge_index[1][cnt] = int(line[1]) - 1 + edge_attr[cnt][0] = float(line[2]) + cnt += 1 + data_maxcut = Data(num_nodes=num_nodes, + edge_index=edge_index, edge_attr=edge_attr) + data_maxcut = data_maxcut.to(device) + data_maxcut.edge_weight_sum = float(torch.sum(data_maxcut.edge_attr)) + + data_maxcut = append_neighbors(data_maxcut) + + data_maxcut.single_degree = [] + data_maxcut.weighted_degree = [] + tensor_abs_weighted_degree = [] + for i0 in range(data_maxcut.num_nodes): + data_maxcut.single_degree.append(len(data_maxcut.neighbors[i0])) + data_maxcut.weighted_degree.append( + float(torch.sum(data_maxcut.neighbor_edges[i0]))) + tensor_abs_weighted_degree.append( + float(torch.sum(torch.abs(data_maxcut.neighbor_edges[i0])))) + tensor_abs_weighted_degree = torch.tensor(tensor_abs_weighted_degree) + data_maxcut.sorted_degree_nodes = torch.argsort( + tensor_abs_weighted_degree, descending=True) - return min(total_min).item(), total_best_info, now_min_res, now_min_info + edge_degree = [] + add = torch.zeros(3, num_edges).to(device) + for i0 in range(num_edges): + edge_degree.append(abs(edge_attr[i0].item())*( + tensor_abs_weighted_degree[edge_index[0][i0]]+tensor_abs_weighted_degree[edge_index[1][i0]])) + node_r = edge_index[0][i0] + node_c = edge_index[1][i0] + add[0][i0] = - data_maxcut.weighted_degree[node_r] / \ + 2 + data_maxcut.edge_attr[i0] - 0.05 + add[1][i0] = - data_maxcut.weighted_degree[node_c] / \ + 2 + data_maxcut.edge_attr[i0] - 0.05 + add[2][i0] = data_maxcut.edge_attr[i0]+0.05 + + for i0 in range(num_nodes): + data_maxcut.neighbor_edges[i0] = data_maxcut.neighbor_edges[i0].unsqueeze( + 0) + data_maxcut.add = add + edge_degree = torch.tensor(edge_degree) + data_maxcut.sorted_degree_edges = torch.argsort( + edge_degree, descending=True) + + return data_maxcut, num_nodes + +def append_neighbors(data, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): + data.neighbors = [] + data.neighbor_edges = [] + num_nodes = data.num_nodes + for i in range(num_nodes): + data.neighbors.append([]) + data.neighbor_edges.append([]) + edge_number = data.edge_index.shape[1] + + for index in range(0, edge_number): + row = data.edge_index[0][index] + col = data.edge_index[1][index] + edge_weight = data.edge_attr[index][0].item() + + data.neighbors[row].append(col.item()) + data.neighbor_edges[row].append(edge_weight) + data.neighbors[col].append(row.item()) + data.neighbor_edges[col].append(edge_weight) + + data.n0 = [] + data.n1 = [] + data.n0_edges = [] + data.n1_edges = [] + for index in range(0, edge_number): + row = data.edge_index[0][index] + col = data.edge_index[1][index] + data.n0.append(data.neighbors[row].copy()) + data.n1.append(data.neighbors[col].copy()) + data.n0_edges.append(data.neighbor_edges[row].copy()) + data.n1_edges.append(data.neighbor_edges[col].copy()) + i = 0 + for i in range(len(data.n0[index])): + if data.n0[index][i] == col: + break + data.n0[index].pop(i) + data.n0_edges[index].pop(i) + for i in range(len(data.n1[index])): + if data.n1[index][i] == row: + break + data.n1[index].pop(i) + data.n1_edges[index].pop(i) + + data.n0[index] = torch.LongTensor(data.n0[index]).to(device) + data.n1[index] = torch.LongTensor(data.n1[index]).to(device) + data.n0_edges[index] = torch.tensor( + data.n0_edges[index]).unsqueeze(0).to(device) + data.n1_edges[index] = torch.tensor( + data.n1_edges[index]).unsqueeze(0).to(device) + + for i in range(num_nodes): + data.neighbors[i] = torch.LongTensor(data.neighbors[i]).to(device) + data.neighbor_edges[i] = torch.tensor( + data.neighbor_edges[i]).to(device) + + return data if __name__ == "__main__": device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') @@ -113,14 +398,30 @@ def mcpg_solver(nvar, config, data, verbose=False): config = yaml.safe_load(f) path = args.problem_instance - start_time = time.perf_counter() - dataloader = dataloader_select(config["problem_type"]) - data, nvar = dataloader(path) - dataloader_t = time.perf_counter() - res, solutions, _, _ = mcpg_solver(nvar, config, data, verbose=True) - mcpg_t = time.perf_counter() - + input_files = [ f for f in os.listdir(path) ] + input_files = natsorted(input_files) + + os.makedirs(f"energies/{path[5:]}", exist_ok=True) + i = 0 + for instance in input_files: + start_time = time.perf_counter() + dataloader = maxcut_dataloader + data, nvar = dataloader(f"{path}/{instance}") + dataloader_t = time.perf_counter() + res, solutions, _, _ = mcpg_solver(nvar, config, data, verbose=True) + mcpg_t = time.perf_counter() + + if "obj_type" in config and config["obj_type"] == "neg": + print("OUTPUT: {:.2f}".format(-res)) + else: + print("OUTPUT: {:f}".format(res)) + + with open(f"energies/{path[5:]}/MCPG.txt", "a") as f: + f.write(f"{res}\n") + + print("DATA LOADING TIME: {:f}".format(dataloader_t - start_time)) + print("MCPG RUNNING TIME: {:f}".format(mcpg_t - dataloader_t)) - print("OUTPUT: {:f}".format(res)) - print("DATA LOADING TIME: {:.4f}".format(dataloader_t - start_time)) - print("MCPG RUNNING TIME: {:.4f}".format(mcpg_t - dataloader_t)) + if i == 0: break + + From 354cfb50e0034d8739fb4b3abc377f8eda956594 Mon Sep 17 00:00:00 2001 From: Christopher Poon Date: Fri, 6 Feb 2026 17:33:37 -0500 Subject: [PATCH 5/6] update vca to read all graphs --- src/algorithms/vca/vca.py | 607 +++++++++++++++++++++++++++++++++++--- 1 file changed, 566 insertions(+), 41 deletions(-) diff --git a/src/algorithms/vca/vca.py b/src/algorithms/vca/vca.py index e3fd434..3aa657c 100644 --- a/src/algorithms/vca/vca.py +++ b/src/algorithms/vca/vca.py @@ -1,25 +1,529 @@ import tensorflow as tf import numpy as np import argparse +from math import sqrt +import os import time import random -from config import config -from DilatedRNN import DilatedRNNWavefunction -from utils import * +from natsort import natsorted +from typing import Any, Optional, Union, Text, Sequence, Tuple, List + +Tensor = Any + +''' +Config +''' +class Config: + def __init__(self, graph_path, seed): + Nx, Ny, Jz = self.read_graph(graph_path) + self.seed = seed + + self.Nx = Nx + self.Ny = Ny + self.N = Nx*Ny #total number of sites + self.Jz = Jz + + '''model''' + self.num_units = 40 #number of memory units + self.activation_function = tf.nn.elu #non-linear activation function for the 2D Tensorized RNN cell + + '''training''' + self.numsamples = 50 #number of samples used for training + self.lr = 5*(1e-4) #learning rate + self.T0 = 2 #Initial temperature + self.Bx0 = 0 #Initial magnetic field + self.num_warmup_steps = 1000 #number of warmup steps + self.num_annealing_steps = 2**8 #number of annealing steps + self.num_equilibrium_steps = 5 #number of training steps after each annealing step + + def read_graph(self, graph_path): + edge_list = dict() + with open(graph_path, "r") as f: + line = f.readline() + is_first_line = True + while line is not None and line != '': + if is_first_line: + nodes, edges = line.split(" ") + num_nodes = int(nodes) + num_edges = int(edges) + is_first_line = False + else: + node1, node2, weight = line.split(" ") + edge_list[(int(node1), int(node2))] = float(weight) + line = f.readline() + + Nx = int(sqrt(num_nodes)) + Ny = Nx + Jz = np.zeros((Nx, Ny, 2), dtype=np.float64) + for i in range(Nx): + for j in range(Ny): + if i != Nx-1: + right = edge_list.get((i+j*Ny+1, i+1+j*Ny+1)) + if right is not None: + Jz[i, j, 0] = right + if j != Ny-1: + down = edge_list.get((i+j*Ny+1, i+(j+1)*Ny+1)) + if down is not None: + Jz[i, j, 1] = down + return Nx, Ny, Jz + +''' +Network +''' +class MDRNNWavefunction(object): + def __init__(self,systemsize_x = None, systemsize_y = None,cell=None,activation=None,num_units = None,scope='RNNWavefunction',seed = 111): + self.graph=tf.Graph() + self.scope=scope #Label of the RNN wavefunction + self.Nx=systemsize_x + self.Ny=systemsize_y + + random.seed(seed) # `python` built-in pseudo-random generator + np.random.seed(seed) # numpy pseudo-random generator + + #Defining the neural network + with self.graph.as_default(): + with tf.variable_scope(self.scope,reuse=tf.AUTO_REUSE): + + tf.set_random_seed(seed) # tensorflow pseudo-random generator + + #Defining the 2D Tensorized RNN cell with non-weight sharing + self.rnn=[cell(num_units = num_units, activation = activation, name="rnn_"+str(0)+str(i),dtype=tf.float64) for i in range(self.Nx*self.Ny)] + self.dense = [tf.layers.Dense(2,activation=tf.nn.softmax,name='wf_dense'+str(i), dtype = tf.float64) for i in range(self.Nx*self.Ny)] + + def sample(self,numsamples,inputdim): + with self.graph.as_default(): #Call the default graph, used if willing to create multiple graphs. + with tf.variable_scope(self.scope,reuse=tf.AUTO_REUSE): + self.inputdim=inputdim + self.outputdim=self.inputdim + self.numsamples=numsamples + + samples=[[[] for nx in range(self.Nx)] for ny in range(self.Ny)] + probs = [[[] for nx in range(self.Nx)] for ny in range(self.Ny)] + rnn_states = {} + inputs = {} + + for ny in range(self.Ny): #Loop over the boundaries for initialization + if ny%2==0: + nx = -1 + # print(nx,ny) + rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) + inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. + + if ny%2==1: + nx = self.Nx + # print(nx,ny) + rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) + inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. + + + for nx in range(self.Nx): #Loop over the boundaries for initialization + ny = -1 + rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) + inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. + + #Making a loop over the sites with the 2DRNN + for ny in range(self.Ny): + + if ny%2 == 0: + for nx in range(self.Nx): #left to right -def vca_solver(config: config): + rnn_output, rnn_states[str(nx)+str(ny)] = self.rnn[ny*self.Nx+nx]((inputs[str(nx-1)+str(ny)],inputs[str(nx)+str(ny-1)]), (rnn_states[str(nx-1)+str(ny)],rnn_states[str(nx)+str(ny-1)])) + + output=self.dense[ny*self.Nx+nx](rnn_output) + sample_temp=tf.reshape(tf.multinomial(tf.log(output),num_samples=1),[-1,]) + samples[nx][ny] = sample_temp + probs[nx][ny] = output + inputs[str(nx)+str(ny)]=tf.one_hot(sample_temp,depth=self.outputdim, dtype = tf.float64) + + + if ny%2 == 1: + + for nx in range(self.Nx-1,-1,-1): #right to left + + rnn_output, rnn_states[str(nx)+str(ny)] = self.rnn[ny*self.Nx+nx]((inputs[str(nx+1)+str(ny)],inputs[str(nx)+str(ny-1)]), (rnn_states[str(nx+1)+str(ny)],rnn_states[str(nx)+str(ny-1)])) + + output=self.dense[ny*self.Nx+nx](rnn_output) + sample_temp=tf.reshape(tf.multinomial(tf.log(output),num_samples=1),[-1,]) + samples[nx][ny] = sample_temp + probs[nx][ny] = output + inputs[str(nx)+str(ny)]=tf.one_hot(sample_temp,depth=self.outputdim, dtype = tf.float64) + + + self.samples=tf.transpose(tf.stack(values=samples,axis=0), perm = [2,0,1]) + + probs=tf.transpose(tf.stack(values=probs,axis=0),perm=[2,0,1,3]) + one_hot_samples=tf.one_hot(self.samples,depth=self.inputdim, dtype = tf.float64) + self.log_probs=tf.reduce_sum(tf.reduce_sum(tf.log(tf.reduce_sum(tf.multiply(probs,one_hot_samples),axis=3)),axis=2),axis=1) + + return self.samples,self.log_probs + + def log_probability(self,samples,inputdim): + with self.graph.as_default(): + + self.inputdim=inputdim + self.outputdim=self.inputdim + + self.numsamples=tf.shape(samples)[0] + + #Initial input to feed to the lstm + self.outputdim=self.inputdim + + + samples_=tf.transpose(samples, perm = [1,2,0]) + rnn_states = {} + inputs = {} + + for ny in range(self.Ny): #Loop over the boundaries for initialization + if ny%2==0: + nx = -1 + rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) + inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. + + if ny%2==1: + nx = self.Nx + rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) + inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. + + + for nx in range(self.Nx): #Loop over the boundaries for initialization + ny = -1 + rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) + inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. + + + with tf.variable_scope(self.scope,reuse=tf.AUTO_REUSE): + probs = [[[] for nx in range(self.Nx)] for ny in range(self.Ny)] + + #Making a loop over the sites with the 2DRNN + for ny in range(self.Ny): + + if ny%2 == 0: + + for nx in range(self.Nx): #left to right + + rnn_output, rnn_states[str(nx)+str(ny)] = self.rnn[ny*self.Nx+nx]((inputs[str(nx-1)+str(ny)],inputs[str(nx)+str(ny-1)]), (rnn_states[str(nx-1)+str(ny)],rnn_states[str(nx)+str(ny-1)])) + + output=self.dense[ny*self.Nx+nx](rnn_output) + sample_temp=tf.reshape(tf.multinomial(tf.log(output),num_samples=1),[-1,]) + probs[nx][ny] = output + inputs[str(nx)+str(ny)]=tf.one_hot(samples_[nx,ny],depth=self.outputdim,dtype = tf.float64) + + if ny%2 == 1: + + for nx in range(self.Nx-1,-1,-1): #right to left + + rnn_output, rnn_states[str(nx)+str(ny)] = self.rnn[ny*self.Nx+nx]((inputs[str(nx+1)+str(ny)],inputs[str(nx)+str(ny-1)]), (rnn_states[str(nx+1)+str(ny)],rnn_states[str(nx)+str(ny-1)])) + + output=self.dense[ny*self.Nx+nx](rnn_output) + sample_temp=tf.reshape(tf.multinomial(tf.log(output),num_samples=1),[-1,]) + probs[nx][ny] = output + inputs[str(nx)+str(ny)]=tf.one_hot(samples_[nx,ny],depth=self.outputdim,dtype = tf.float64) + + probs=tf.transpose(tf.stack(values=probs,axis=0),perm=[2,0,1,3]) + one_hot_samples=tf.one_hot(samples,depth=self.inputdim, dtype = tf.float64) + + self.log_probs=tf.reduce_sum(tf.reduce_sum(tf.log(tf.reduce_sum(tf.multiply(probs,one_hot_samples),axis=3)),axis=2),axis=1) + + return self.log_probs + +class MDTensorizedRNNCell(tf.contrib.rnn.RNNCell): + """The 2D Tensorized RNN cell. + """ + def __init__(self, num_units = None, activation = None, name=None, dtype = None, reuse=None): + super(MDTensorizedRNNCell, self).__init__(_reuse=reuse, name=name) + # save class variables + self._num_in = 2 + self._num_units = num_units + self._state_size = num_units + self._output_size = num_units + self.activation = activation + + # set up input -> hidden connection + self.W = tf.get_variable("W_"+name, shape=[num_units, 2*num_units, 2*self._num_in], + initializer=tf.contrib.layers.xavier_initializer(), dtype = dtype) + + self.b = tf.get_variable("b_"+name, shape=[num_units], + initializer=tf.contrib.layers.xavier_initializer(), dtype = dtype) + + @property + def input_size(self): + return self._num_in # real + + @property + def state_size(self): + return self._state_size # real + + @property + def output_size(self): + return self._output_size # real + + def call(self, inputs, states): + + inputstate_mul = tf.einsum('ij,ik->ijk', tf.concat(states, 1),tf.concat(inputs,1)) + # prepare input linear combination + state_mul = tensordot(tf, inputstate_mul, self.W, axes=[[1,2],[1,2]]) # [batch_sz, num_units] + + preact = state_mul + self.b + + output = self.activation(preact) # [batch_sz, num_units] C + + new_state = output + + return output, new_state + +def tensordot(tf, + a, + b, + axes, + name: Optional[Text] = None) -> Tensor: + + def _tensordot_should_flip(contraction_axes: List[int], + free_axes: List[int]) -> bool: + # NOTE: This will fail if the arguments contain any Tensors. + if contraction_axes and free_axes: + return bool(np.mean(contraction_axes) < np.mean(free_axes)) + return False + + def _tranpose_if_necessary(tensor: Tensor, perm: List[int]) -> Tensor: + if perm == list(range(len(perm))): + return tensor + return tf.transpose(tensor, perm) + + def _reshape_if_necessary(tensor: Tensor, + new_shape: List[int]) -> Tensor: + cur_shape = tensor.get_shape().as_list() + if (len(new_shape) == len(cur_shape) and + all(d0 == d1 for d0, d1 in zip(cur_shape, new_shape))): + return tensor + return tf.reshape(tensor, new_shape) + + def _tensordot_reshape( + a: Tensor, axes: Union[Sequence[int], Tensor], is_right_term=False + ) -> Tuple[Tensor, Union[List[int], Tensor], Optional[List[int]], bool]: + if a.get_shape().is_fully_defined() and isinstance(axes, (list, tuple)): + shape_a = a.get_shape().as_list() + # NOTE: This will fail if axes contains any tensors + axes = [i if i >= 0 else i + len(shape_a) for i in axes] + free = [i for i in range(len(shape_a)) if i not in axes] + flipped = _tensordot_should_flip(axes, free) + + free_dims = [shape_a[i] for i in free] + prod_free = int(np.prod([shape_a[i] for i in free])) + prod_axes = int(np.prod([shape_a[i] for i in axes])) + perm = axes + free if flipped else free + axes + new_shape = [prod_axes, prod_free] if flipped else [prod_free, prod_axes] + transposed_a = _tranpose_if_necessary(a, perm) + reshaped_a = _reshape_if_necessary(transposed_a, new_shape) + transpose_needed = (not flipped) if is_right_term else flipped + return reshaped_a, free_dims, free_dims, transpose_needed + if a.get_shape().ndims is not None and isinstance(axes, (list, tuple)): + shape_a = a.get_shape().as_list() + axes = [i if i >= 0 else i + len(shape_a) for i in axes] + free = [i for i in range(len(shape_a)) if i not in axes] + flipped = _tensordot_should_flip(axes, free) + perm = axes + free if flipped else free + axes + + axes_dims = [shape_a[i] for i in axes] + free_dims = [shape_a[i] for i in free] + free_dims_static = free_dims + axes = tf.convert_to_tensor(axes, dtype=tf.dtypes.int32, name="axes") + free = tf.convert_to_tensor(free, dtype=tf.dtypes.int32, name="free") + shape_a = tf.shape(a) + transposed_a = _tranpose_if_necessary(a, perm) + else: + free_dims_static = None + shape_a = tf.shape(a) + rank_a = tf.rank(a) + axes = tf.convert_to_tensor(axes, dtype=tf.dtypes.int32, name="axes") + axes = tf.where(axes >= 0, axes, axes + rank_a) + free, _ = tf.compat.v1.setdiff1d(tf.range(rank_a), axes) + flipped = is_right_term + perm = ( + tf.concat([axes, free], 0) if flipped else tf.concat([free, axes], 0)) + transposed_a = tf.transpose(a, perm) + + free_dims = tf.gather(shape_a, free) + axes_dims = tf.gather(shape_a, axes) + prod_free_dims = tf.reduce_prod(free_dims) + prod_axes_dims = tf.reduce_prod(axes_dims) + + if flipped: + new_shape = tf.stack([prod_axes_dims, prod_free_dims]) + else: + new_shape = tf.stack([prod_free_dims, prod_axes_dims]) + reshaped_a = tf.reshape(transposed_a, new_shape) + transpose_needed = (not flipped) if is_right_term else flipped + return reshaped_a, free_dims, free_dims_static, transpose_needed + + def _tensordot_axes(a: Tensor, axes + ) -> Tuple[Any, Any]: + """Generates two sets of contraction axes for the two tensor arguments.""" + a_shape = a.get_shape() + if isinstance(axes, tf.compat.integral_types): + if axes < 0: + raise ValueError("'axes' must be at least 0.") + if a_shape.ndims is not None: + if axes > a_shape.ndims: + raise ValueError("'axes' must not be larger than the number of " + "dimensions of tensor %s." % a) + return (list(range(a_shape.ndims - axes, + a_shape.ndims)), list(range(axes))) + rank = tf.rank(a) + return (tf.range(rank - axes, rank, + dtype=tf.int32), tf.range(axes, dtype=tf.int32)) + if isinstance(axes, (list, tuple)): + if len(axes) != 2: + raise ValueError("'axes' must be an integer or have length 2.") + a_axes = axes[0] + b_axes = axes[1] + if isinstance(a_axes, tf.compat.integral_types) and \ + isinstance(b_axes, tf.compat.integral_types): + a_axes = [a_axes] + b_axes = [b_axes] + # NOTE: This fails if either a_axes and b_axes are Tensors. + if len(a_axes) != len(b_axes): + raise ValueError( + "Different number of contraction axes 'a' and 'b', %s != %s." % + (len(a_axes), len(b_axes))) + + # The contraction indices do not need to be permuted. + # Sort axes to avoid unnecessary permutations of a. + # NOTE: This fails if either a_axes and b_axes contain Tensors. + # pylint: disable=len-as-condition + if len(a_axes) > 0: + a_axes, b_axes = list(zip(*sorted(zip(a_axes, b_axes)))) + + return a_axes, b_axes + axes = tf.convert_to_tensor(axes, name="axes", dtype=tf.int32) + return axes[0], axes[1] + + with tf.compat.v1.name_scope(name, "Tensordot", [a, b, axes]) as _name: + a = tf.convert_to_tensor(a, name="a") + b = tf.convert_to_tensor(b, name="b") + a_axes, b_axes = _tensordot_axes(a, axes) + a_reshape, a_free_dims, a_free_dims_static, a_transp = _tensordot_reshape( + a, a_axes) + b_reshape, b_free_dims, b_free_dims_static, b_transp = _tensordot_reshape( + b, b_axes, is_right_term=True) + + ab_matmul = tf.matmul( + a_reshape, b_reshape, transpose_a=a_transp, transpose_b=b_transp) + + if isinstance(a_free_dims, list) and isinstance(b_free_dims, list): + return tf.reshape(ab_matmul, a_free_dims + b_free_dims, name=_name) + a_free_dims = tf.convert_to_tensor(a_free_dims, dtype=tf.dtypes.int32) + b_free_dims = tf.convert_to_tensor(b_free_dims, dtype=tf.dtypes.int32) + product = tf.reshape( + ab_matmul, tf.concat([a_free_dims, b_free_dims], 0), name=_name) + if a_free_dims_static is not None and b_free_dims_static is not None: + product.set_shape(a_free_dims_static + b_free_dims_static) + return product + +''' +Utils +''' +def Ising2D_diagonal_matrixelements(Jz, samples): + numsamples = samples.shape[0] + Nx = samples.shape[1] + Ny = samples.shape[2] + + N = Nx*Ny #Total number of spins + + local_energies = np.zeros((numsamples), dtype = np.float64) + + for i in range(Nx-1): #diagonal elements (right neighbours) + values = samples[:,i]+samples[:,i+1] + valuesT = np.copy(values) + valuesT[values==2] = +1 #If both spins are up + valuesT[values==0] = +1 #If both spins are down + valuesT[values==1] = -1 #If they are opposite + + local_energies += np.sum(valuesT*(-Jz[i,:,0]), axis = 1) + + for i in range(Ny-1): #diagonal elements (upward neighbours (or downward, it depends on the way you see the lattice :))) + values = samples[:,:,i]+samples[:,:,i+1] + valuesT = np.copy(values) + valuesT[values==2] = +1 #If both spins are up + valuesT[values==0] = +1 #If both spins are down + valuesT[values==1] = -1 #If they are opposite + + local_energies += np.sum(valuesT*(-Jz[:,i,1]), axis = 1) + + return local_energies + +def Ising2D_local_energies(Jz, Bx, samples, queue_samples, log_probs_tensor, samples_placeholder, log_probs, sess): + numsamples = samples.shape[0] + Nx = samples.shape[1] + Ny = samples.shape[2] + + N = Nx*Ny #Total number of spins + + local_energies = np.zeros((numsamples), dtype = np.float64) + + for i in range(Nx-1): #diagonal elements (right neighbours) + values = samples[:,i]+samples[:,i+1] + valuesT = np.copy(values) + valuesT[values==2] = +1 #If both spins are up + valuesT[values==0] = +1 #If both spins are down + valuesT[values==1] = -1 #If they are opposite + + local_energies += np.sum(valuesT*(-Jz[i,:,0]), axis = 1) + + for i in range(Ny-1): #diagonal elements (upward neighbours (or downward, it depends on the way you see the lattice :))) + values = samples[:,:,i]+samples[:,:,i+1] + valuesT = np.copy(values) + valuesT[values==2] = +1 #If both spins are up + valuesT[values==0] = +1 #If both spins are down + valuesT[values==1] = -1 #If they are opposite + + local_energies += np.sum(valuesT*(-Jz[:,i,1]), axis = 1) + + + queue_samples[0] = samples #storing the diagonal samples + + if Bx != 0: + for i in range(Nx): #Non-diagonal elements + for j in range(Ny): + valuesT = np.copy(samples) + valuesT[:,i,j][samples[:,i,j]==1] = 0 #Flip + valuesT[:,i,j][samples[:,i,j]==0] = 1 #Flip + + queue_samples[i*Ny+j+1] = valuesT + + len_sigmas = (N+1)*numsamples + steps = len_sigmas//50000+1 #I want a maximum in batch size just to not allocate too much memory + # print("Total num of steps =", steps) + queue_samples_reshaped = np.reshape(queue_samples, [(N+1)*numsamples, Nx,Ny]) + for i in range(steps): + if i < steps-1: + cut = slice((i*len_sigmas)//steps,((i+1)*len_sigmas)//steps) + else: + cut = slice((i*len_sigmas)//steps,len_sigmas) + log_probs[cut] = sess.run(log_probs_tensor, feed_dict={samples_placeholder:queue_samples_reshaped[cut]}) + + log_probs_reshaped = np.reshape(log_probs, [N+1,numsamples]) + for j in range(numsamples): + local_energies[j] += -Bx*np.sum(np.exp(0.5*log_probs_reshaped[1:,j]-0.5*log_probs_reshaped[0,j])) + + return local_energies + +''' +Variational Classical Annealing +''' +def run_vca(config: Config): seed = config.seed tf.compat.v1.reset_default_graph() - random.seed(seed) # `python` built-in pseudo-random generator - np.random.seed(seed) # numpy pseudo-random generator + random.seed(seed) # `python` built-in pseudo-random generator + np.random.seed(seed) # numpy pseudo-random generator tf.compat.v1.set_random_seed(seed) # tensorflow pseudo-random generator + Nx = config.Nx + Ny = config.Ny N = config.N Jz = config.Jz num_units = config.num_units - num_layers = config.num_layers activation_function = config.activation_function numsamples = config.numsamples @@ -40,10 +544,9 @@ def vca_solver(config: config): print("\nNumber of annealing steps = {0}".format(num_annealing_steps)) print("Number of training steps = {0}".format(num_steps)) - units = [num_units] * num_layers - DRNNWF = DilatedRNNWavefunction(N, units=units, layers=num_layers, cell=tf.nn.rnn_cell.BasicRNNCell, activation=activation_function, seed=seed) # contains the graph with the RNNs - with tf.compat.v1.variable_scope(DRNNWF.scope,reuse=tf.compat.v1.AUTO_REUSE): - with DRNNWF.graph.as_default(): + MDRNNWF = MDRNNWavefunction(systemsize_x = Nx, systemsize_y = Ny ,num_units = num_units,cell=MDTensorizedRNNCell, activation = activation_function, seed = seed) #contains the graph with the RNNs + with tf.compat.v1.variable_scope(MDRNNWF.scope,reuse=tf.compat.v1.AUTO_REUSE): + with MDRNNWF.graph.as_default(): global_step = tf.Variable(0, trainable=False) learningrate_placeholder = tf.compat.v1.placeholder(dtype=tf.float64,shape=[]) @@ -54,11 +557,12 @@ def vca_solver(config: config): #Defining Tensorflow placeholders Eloc=tf.compat.v1.placeholder(dtype=tf.float64,shape=[numsamples]) - sampleplaceholder_forgrad=tf.compat.v1.placeholder(dtype=tf.int32,shape=[numsamples,N]) - log_probs_forgrad = DRNNWF.log_probability(sampleplaceholder_forgrad,inputdim=2) - samples_placeholder=tf.compat.v1.placeholder(dtype=tf.int32,shape=(None,N)) - log_probs_tensor=DRNNWF.log_probability(samples_placeholder,inputdim=2) - samplesandprobs = DRNNWF.sample(numsamples=numsamples,inputdim=2) + sampleplaceholder_forgrad=tf.compat.v1.placeholder(dtype=tf.int32,shape=[numsamples,Nx,Ny]) + log_probs_forgrad = MDRNNWF.log_probability(sampleplaceholder_forgrad,inputdim=2) + + samples_placeholder=tf.compat.v1.placeholder(dtype=tf.int32,shape=(None,Nx, Ny)) + log_probs_tensor=MDRNNWF.log_probability(samples_placeholder,inputdim=2) + samplesandprobs = MDRNNWF.sample(numsamples=numsamples,inputdim=2) T_placeholder = tf.compat.v1.placeholder(dtype=tf.float64,shape=()) @@ -67,34 +571,30 @@ def vca_solver(config: config): cost = tf.reduce_mean(tf.multiply(log_probs_forgrad,tf.stop_gradient(Floc))) - tf.reduce_mean(log_probs_forgrad)*tf.reduce_mean(tf.stop_gradient(Floc)) gradients, variables = zip(*optimizer.compute_gradients(cost)) - #Calculate Gradients--------------- - #Define the optimization step optstep=optimizer.apply_gradients(zip(gradients,variables), global_step = global_step) - #Tensorflow saver to checkpoint saver=tf.compat.v1.train.Saver() - #For initialization init=tf.compat.v1.global_variables_initializer() initialize_parameters = tf.initialize_all_variables() config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True - sess=tf.compat.v1.Session(graph=DRNNWF.graph, config=config) + sess=tf.compat.v1.Session(graph=MDRNNWF.graph, config=config) sess.run(init) - # Run Variational Annealing - with tf.compat.v1.variable_scope(DRNNWF.scope,reuse=tf.compat.v1.AUTO_REUSE): - with DRNNWF.graph.as_default(): - # To store data + ## Run Variational Annealing + with tf.compat.v1.variable_scope(MDRNNWF.scope,reuse=tf.compat.v1.AUTO_REUSE): + with MDRNNWF.graph.as_default(): + #To store data meanEnergy=[] varEnergy=[] varFreeEnergy = [] meanFreeEnergy = [] - samples = np.ones((numsamples, N), dtype=np.int32) - queue_samples = np.zeros((N+1, numsamples, N), dtype = np.int32) + samples = np.ones((numsamples, Nx, Ny), dtype=np.int32) + queue_samples = np.zeros((N+1, numsamples, Nx, Ny), dtype = np.int32) log_probs = np.zeros((N+1)*numsamples, dtype=np.float64) T = T0 #initializing temperature @@ -119,12 +619,12 @@ def vca_solver(config: config): samples, log_probabilities = sess.run(samplesandprobs) # Estimating the local energies - local_energies = Fullyconnected_localenergies(Jz, Bx, samples, queue_samples, log_probs_tensor, samples_placeholder, log_probs, sess) + local_energies = Ising2D_local_energies(Jz, Bx, samples, queue_samples, log_probs_tensor, samples_placeholder, log_probs, sess) meanE = np.mean(local_energies) varE = np.var(local_energies) - # adding elements to be saved + #adding elements to be saved meanEnergy.append(meanE) varEnergy.append(varE) @@ -143,40 +643,65 @@ def vca_solver(config: config): if it == num_annealing_steps*num_equilibrium_steps + num_warmup_steps: Nsteps = 20 - numsamples_estimation = 10**5 #Num samples to be obtained at the end + numsamples_estimation = 10**6 #Num samples to be obtained at the end numsamples_perstep = numsamples_estimation//Nsteps #The number of steps taken to get "numsamples_estimation" samples (to avoid memory allocation issues) - samplesandprobs_final = DRNNWF.sample(numsamples=numsamples_perstep,inputdim=2) + samplesandprobs_final = MDRNNWF.sample(numsamples=numsamples_perstep,inputdim=2) energies = np.zeros((numsamples_estimation)) - solutions = np.zeros((numsamples_estimation, N)) + solutions = np.zeros((numsamples_estimation, Nx, Ny)) print("\nSaving energy and variance before the end of annealing") for i in range(Nsteps): + # print("\nsampling started") samples_final, _ = sess.run(samplesandprobs_final) - energies[i*numsamples_perstep:(i+1)*numsamples_perstep] = Fullyconnected_diagonal_matrixelements(Jz,samples_final) + # print("\nsampling finished") + energies[i*numsamples_perstep:(i+1)*numsamples_perstep] = Ising2D_diagonal_matrixelements(Jz,samples_final) solutions[i*numsamples_perstep:(i+1)*numsamples_perstep] = samples_final print("Sampling step:" , i+1, "/", Nsteps) print("meanE = ", np.mean(energies)) print("varE = ", np.var(energies)) print("minE = ",np.min(energies)) - print("Elapsed time is =", time.time()-start, " seconds") return np.mean(energies), np.min(energies) - # Run gradient descent step + #Run gradient descent step sess.run(optstep,feed_dict={Eloc:local_energies, sampleplaceholder_forgrad: samples, learningrate_placeholder: lr, T_placeholder:T}) if it%5 == 0: print("Elapsed time is =", time.time()-start, " seconds") print('\n\n') + + - +''' +Run VCA +''' if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("problem_instance", type=str, help="input the data file for the problem instance") - args = parser.parse_args() - path = args.problem_instance - seed = 0 - vca_config = config(path, seed) - mean_energies, min_energies = vca_solver(vca_config) \ No newline at end of file + args = parser.parse_args() + data_dir = args.problem_instance + input_files = [ f for f in os.listdir(data_dir) ] + input_files = natsorted(input_files) + + results_dir = f"results/{'/'.join(data_dir.split('/')[1:])}" + os.makedirs(results_dir, exist_ok=True) + + i = 0 + for file in input_files: + i += 1 + if i > 25: break + # if i > 1: break + + vca_config = Config(f'{data_dir}/{file}', i) + vca_config.num_units = 40 + vca_config.numsamples = 50 + vca_config.T0 = 1 + vca_config.lr = 5*(1e-4) + vca_config.num_warmup_steps = 1000 + + mean_energies, min_energies = run_vca(vca_config) + + with open(f"{results_dir}/VCA.txt", "a") as f: + f.write(f"{min_energies}\n") From f260a1c70b51dfddd931d4fef38de7f97e67017f Mon Sep 17 00:00:00 2001 From: Christopher Poon Date: Fri, 6 Feb 2026 17:48:46 -0500 Subject: [PATCH 6/6] re add files, rename files to single file --- src/algorithms/mcpg/mcpg.py | 405 ++------------ src/algorithms/mcpg/mcpg_single_file.py | 427 ++++++++++++++ src/algorithms/vca/vca.py | 607 ++------------------ src/algorithms/vca/vca_single_file.py | 707 ++++++++++++++++++++++++ 4 files changed, 1227 insertions(+), 919 deletions(-) create mode 100644 src/algorithms/mcpg/mcpg_single_file.py create mode 100644 src/algorithms/vca/vca_single_file.py diff --git a/src/algorithms/mcpg/mcpg.py b/src/algorithms/mcpg/mcpg.py index 6351add..3665b48 100644 --- a/src/algorithms/mcpg/mcpg.py +++ b/src/algorithms/mcpg/mcpg.py @@ -1,187 +1,33 @@ +""" +Copyright (c) 2024 Cheng Chen, Ruitao Chen, Tianyou Li, Zaiwen Wen +All rights reserved. +Redistribution and use in source and binary forms, with or without modification, are permitted provided that +the following conditions are met: +1. Redistributions of source code must retain the above copyright notice, this list of conditions and the + following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions + and the following disclaimer in the documentation and/or other materials provided with the distribution. +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" import torch -import os -from natsort import natsorted -from torch_scatter import scatter -from torch.distributions.bernoulli import Bernoulli -from torch_geometric.data import Data import yaml import argparse import time -''' -Sampler -''' -def sample_initializer(problem_type, probs, config, - device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'), data=None): - if problem_type in ["r_cheegercut", "n_cheegercut"]: - samples = torch.zeros(config['total_mcmc_num'], data.num_nodes) - index = data.sorted_degree_nodes[- config['total_mcmc_num']:] - for i in range(config['total_mcmc_num']): - samples[i][index[i]] = 1 - samples = samples.repeat(config['repeat_times'], 1) - return samples.t() - m = Bernoulli(probs) - samples = m.sample([config['total_mcmc_num'] * config['repeat_times']]) - samples = samples.detach().to(device) - return samples.t() +from dataloader import dataloader_select +from model import simple +from sampling import sampler_select, sample_initializer -def metro_sampling(probs, start_status, max_transfer_time, - device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): - num_node = len(probs) - num_chain = start_status.shape[1] - index_col = torch.tensor(list(range(num_chain))).to(device) - probs = probs.detach().to(device) - samples = start_status.bool().to(device) - - count = 0 - for t in range(max_transfer_time * 5): - if count >= num_chain*max_transfer_time: - break - index_row = torch.randint(low=0, high=num_node, size=[ - num_chain], device=device) - chosen_probs_base = probs[index_row] - chosen_value = samples[index_row, index_col] - chosen_probs = torch.where( - chosen_value, chosen_probs_base, 1-chosen_probs_base) - accept_rate = (1 - chosen_probs) / chosen_probs - r = torch.rand(num_chain, device=device) - is_accept = (r < accept_rate) - samples[index_row, index_col] = torch.where( - is_accept, ~chosen_value, chosen_value) - - count += is_accept.sum() - - return samples.float().to(device) - -def mcpg_sampling_ising(data, - start_result, probs, - num_ls, change_times, total_mcmc_num, - device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): - probs = probs.to(torch.device("cpu")) - num_edges = data.num_edges - edges = data.edge_index - nlr_graph = edges[0] - nlc_graph = edges[1] - edge_weight = data.edge_attr - edge_weight_sum = data.edge_weight_sum - graph_probs = start_result.clone() - # get probs - graph_probs = metro_sampling( - probs, graph_probs, change_times, device) - start = graph_probs.clone() - - temp = graph_probs[data.sorted_degree_nodes[0]].clone() - graph_probs += temp - graph_probs = graph_probs % 2 - - graph_probs = (graph_probs - 0.5) * 2 + 0.5 - - # local search - temp = torch.zeros(4, graph_probs.size(dim=1)).to(device) - expected_cut = torch.zeros(graph_probs.size(dim=1)) - cnt = 0 - while True: - cnt += 1 - for i in range(num_edges): - index = data.sorted_degree_edges[i] - node_r = nlr_graph[index] - node_c = nlc_graph[index] - edges_r = data.n0_edges[index] - edges_c = data.n1_edges[index] - add_0 = data.add[0][index] - add_1 = data.add[1][index] - add_2 = data.add[2][index] - - temp_r_v = torch.mm(edges_r, graph_probs[data.n0[index]]) - temp_c_v = torch.mm(edges_c, graph_probs[data.n1[index]]) - - temp[1] = temp_r_v + torch.rand(graph_probs.size(dim=1), device=device) * 0.1 + add_0 - temp[2] = temp_c_v + torch.rand(graph_probs.size(dim=1), device=device) * 0.1 + add_1 - temp[0] = temp[1] + temp[2] + torch.rand(graph_probs.size(dim=1), - device=torch.device(device)) * 0.1 - add_2 - - max_index = torch.argmax(temp, dim=0) - graph_probs[node_r] = torch.floor(max_index / 2) - graph_probs[node_c] = max_index % 2 - - if cnt >= num_ls: - break - - # Convert Spins back to Ising model conventions -1 or +1 - spins = 2 * graph_probs - 1 # [nvar, chains] - - # raw energy per chain: E(s) = - sum J_ij s_i s_j - energy_vec = (edge_weight * (spins[nlr_graph] * spins[nlc_graph])).sum(dim=0) # [chains] - - # full‐chain advantage for policy gradient - adv = (energy_vec - energy_vec.mean()).to(device) # [chains] - - # pick the best chain out of each repeat group - energy_reshape = energy_vec.view(-1, total_mcmc_num) # [repeat_times, total_mcmc_num] - idx = torch.argmin(energy_reshape, dim=0) # [total_mcmc_num] - for i0 in range(total_mcmc_num): - idx[i0] = i0 + idx[i0] * total_mcmc_num - temp_min = energy_vec[idx] # [total_mcmc_num] - temp_min_info = graph_probs[:, idx] # [nvar, total_mcmc_num] - start_samples = start # [nvar, chains] - - # return best energy state, probs of the best energy states, the total probs, and the advantage - return temp_min, temp_min_info, start_samples, adv - -''' -Network -''' -class simple(torch.nn.Module): - def __init__(self, output_num): - super(simple, self).__init__() - self.lin = torch.nn.Linear(1, output_num) - self.sigmoid = torch.nn.Sigmoid() - - def reset_parameters(self): - self.lin.reset_parameters() - - def forward(self, alpha = 0.1, start_samples=None, value=None, - device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): - x = torch.ones(1).to(device) - x = self.lin(x) - x = self.sigmoid(x) - - x = (x-0.5) * 0.6 + 0.5 - probs = x - probs = probs.squeeze() - retdict = {} - reg = probs * torch.log(probs) + (1-probs) * torch.log(1-probs ) - reg = torch.mean(reg) - if start_samples == None: - retdict["output"] = [probs.squeeze(-1), "hist"] # output - retdict["reg"] = [reg, "sequence"] - retdict["loss"] = [alpha * reg, "sequence"] - return retdict - - res_samples = value.t().detach() - - start_samples_idx = start_samples * \ - probs + (1 - start_samples) * (1 - probs) - log_start_samples_idx = torch.log(start_samples_idx) - log_start_samples = log_start_samples_idx.sum(dim=1) - loss_ls = torch.mean(log_start_samples * res_samples) - loss = loss_ls + alpha * reg - - retdict["output"] = [probs.squeeze(-1), "hist"] # output - retdict["reg"] = [reg, "sequence"] - retdict["loss"] = [loss, "sequence"] - return retdict - - def __repr__(self): - return self.__class__.__name__ - -''' -Algorithm -''' def mcpg_solver(nvar, config, data, verbose=False): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') - sampler = mcpg_sampling_ising + sampler = sampler_select(config["problem_type"]) change_times = int(nvar/10) # transition times for metropolis sampling net = simple(nvar) @@ -210,11 +56,11 @@ def mcpg_solver(nvar, config, data, verbose=False): probs = (torch.zeros(nvar)+0.5).to(device) tensor_probs = sample_initializer( config["problem_type"], probs, config, data=data) - temp_max, temp_max_info, temp_start_samples, value = sampler( + temp_min, temp_min_info, temp_start_samples, value = sampler( data, tensor_probs, probs, config['num_ls'], 0, config['total_mcmc_num']) - now_max_res = temp_max - now_max_info = temp_max_info - tensor_probs = temp_max_info.clone() + now_min_res = temp_min + now_min_info = temp_min_info + tensor_probs = temp_min_info.clone() tensor_probs = tensor_probs.repeat(1, config['repeat_times']) start_samples = temp_start_samples.t().to(device) @@ -222,168 +68,37 @@ def mcpg_solver(nvar, config, data, verbose=False): if epoch % config['sample_epoch_num'] == 0 and epoch > 0: probs = retdict["output"][0] probs = probs.detach() - temp_max, temp_max_info, start_samples_temp, value = sampler( + temp_min, temp_min_info, start_samples_temp, value = sampler( data, tensor_probs, probs, config['num_ls'], change_times, config['total_mcmc_num']) # update now_max for i0 in range(config['total_mcmc_num']): - if temp_max[i0] < now_max_res[i0]: - now_max_res[i0] = temp_max[i0] - now_max_info[:, i0] = temp_max_info[:, i0] + if temp_min[i0] < now_min_res[i0]: + now_min_res[i0] = temp_min[i0] + now_min_info[:, i0] = temp_min_info[:, i0] # update if min is too small - now_max = min(now_max_res).item() - now_max_index = torch.argmin(now_max_res) - - now_min = max(now_max_res).item() - now_min_index = torch.argmax(now_max_res) - - now_max_res[now_min_index] = now_max - - now_max_info[:, now_min_index] = now_max_info[:, now_max_index] - temp_max_info[:, now_min_index] = now_max_info[:, now_max_index] + now_min = min(now_min_res).item() + now_min_index = torch.argmax(now_min_res) + now_max = max(now_min_res).item() + now_max_index = torch.argmin(now_min_res) + now_min_res[now_min_index] = now_min + now_min_info[:, now_max_index] = now_min_info[:, now_min_index] + temp_min_info[:, now_max_index] = now_min_info[:, now_min_index] # select best samples - tensor_probs = temp_max_info.clone() + tensor_probs = temp_min_info.clone() tensor_probs = tensor_probs.repeat(1, config['repeat_times']) # construct the start point for next iteration start_samples = start_samples_temp.t() if verbose: - if config["problem_type"] == "maxsat" and len(data.pdata) == 7: - res = max(now_max_res).item() - if res > data.pdata[5] * data.pdata[6]: - res -= data.pdata[5] * data.pdata[6] - print("o {:.3f}".format(res)) - elif "obj_type" in config and config["obj_type"] == "neg": - print("o {:.3f}".format((-min(now_max_res).item()))) - else: - print("o {:f}".format((min(now_max_res).item()))) + print("o {:.3f}".format((min(now_min_res).item()))) del (retdict) - total_max = now_max_res - best_sort = torch.argsort(now_max_res, descending=False) - total_best_info = torch.squeeze(now_max_info[:, best_sort[0]]) - - return min(total_max).item(), total_best_info, now_max_res, now_max_info - -''' -Dataloader -''' -def maxcut_dataloader(path, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): - with open(path) as f: - fline = f.readline() - fline = fline.split() - num_nodes, num_edges = int(fline[0]), int(fline[1]) - edge_index = torch.LongTensor(2, num_edges) - edge_attr = torch.Tensor(num_edges, 1) - cnt = 0 - while True: - lines = f.readlines(num_edges * 2) - if not lines: - break - for line in lines: - line = line.rstrip('\n').split() - edge_index[0][cnt] = int(line[0]) - 1 - edge_index[1][cnt] = int(line[1]) - 1 - edge_attr[cnt][0] = float(line[2]) - cnt += 1 - data_maxcut = Data(num_nodes=num_nodes, - edge_index=edge_index, edge_attr=edge_attr) - data_maxcut = data_maxcut.to(device) - data_maxcut.edge_weight_sum = float(torch.sum(data_maxcut.edge_attr)) - - data_maxcut = append_neighbors(data_maxcut) - - data_maxcut.single_degree = [] - data_maxcut.weighted_degree = [] - tensor_abs_weighted_degree = [] - for i0 in range(data_maxcut.num_nodes): - data_maxcut.single_degree.append(len(data_maxcut.neighbors[i0])) - data_maxcut.weighted_degree.append( - float(torch.sum(data_maxcut.neighbor_edges[i0]))) - tensor_abs_weighted_degree.append( - float(torch.sum(torch.abs(data_maxcut.neighbor_edges[i0])))) - tensor_abs_weighted_degree = torch.tensor(tensor_abs_weighted_degree) - data_maxcut.sorted_degree_nodes = torch.argsort( - tensor_abs_weighted_degree, descending=True) + total_min = now_min_res + best_sort = torch.argsort(now_min_res, descending=False) + total_best_info = torch.squeeze(now_min_info[:, best_sort[0]]) - edge_degree = [] - add = torch.zeros(3, num_edges).to(device) - for i0 in range(num_edges): - edge_degree.append(abs(edge_attr[i0].item())*( - tensor_abs_weighted_degree[edge_index[0][i0]]+tensor_abs_weighted_degree[edge_index[1][i0]])) - node_r = edge_index[0][i0] - node_c = edge_index[1][i0] - add[0][i0] = - data_maxcut.weighted_degree[node_r] / \ - 2 + data_maxcut.edge_attr[i0] - 0.05 - add[1][i0] = - data_maxcut.weighted_degree[node_c] / \ - 2 + data_maxcut.edge_attr[i0] - 0.05 - add[2][i0] = data_maxcut.edge_attr[i0]+0.05 - - for i0 in range(num_nodes): - data_maxcut.neighbor_edges[i0] = data_maxcut.neighbor_edges[i0].unsqueeze( - 0) - data_maxcut.add = add - edge_degree = torch.tensor(edge_degree) - data_maxcut.sorted_degree_edges = torch.argsort( - edge_degree, descending=True) - - return data_maxcut, num_nodes - -def append_neighbors(data, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): - data.neighbors = [] - data.neighbor_edges = [] - num_nodes = data.num_nodes - for i in range(num_nodes): - data.neighbors.append([]) - data.neighbor_edges.append([]) - edge_number = data.edge_index.shape[1] - - for index in range(0, edge_number): - row = data.edge_index[0][index] - col = data.edge_index[1][index] - edge_weight = data.edge_attr[index][0].item() - - data.neighbors[row].append(col.item()) - data.neighbor_edges[row].append(edge_weight) - data.neighbors[col].append(row.item()) - data.neighbor_edges[col].append(edge_weight) - - data.n0 = [] - data.n1 = [] - data.n0_edges = [] - data.n1_edges = [] - for index in range(0, edge_number): - row = data.edge_index[0][index] - col = data.edge_index[1][index] - data.n0.append(data.neighbors[row].copy()) - data.n1.append(data.neighbors[col].copy()) - data.n0_edges.append(data.neighbor_edges[row].copy()) - data.n1_edges.append(data.neighbor_edges[col].copy()) - i = 0 - for i in range(len(data.n0[index])): - if data.n0[index][i] == col: - break - data.n0[index].pop(i) - data.n0_edges[index].pop(i) - for i in range(len(data.n1[index])): - if data.n1[index][i] == row: - break - data.n1[index].pop(i) - data.n1_edges[index].pop(i) - - data.n0[index] = torch.LongTensor(data.n0[index]).to(device) - data.n1[index] = torch.LongTensor(data.n1[index]).to(device) - data.n0_edges[index] = torch.tensor( - data.n0_edges[index]).unsqueeze(0).to(device) - data.n1_edges[index] = torch.tensor( - data.n1_edges[index]).unsqueeze(0).to(device) - - for i in range(num_nodes): - data.neighbors[i] = torch.LongTensor(data.neighbors[i]).to(device) - data.neighbor_edges[i] = torch.tensor( - data.neighbor_edges[i]).to(device) - - return data + return min(total_min).item(), total_best_info, now_min_res, now_min_info if __name__ == "__main__": device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') @@ -398,30 +113,14 @@ def append_neighbors(data, device=torch.device('cuda' if torch.cuda.is_available config = yaml.safe_load(f) path = args.problem_instance - input_files = [ f for f in os.listdir(path) ] - input_files = natsorted(input_files) - - os.makedirs(f"energies/{path[5:]}", exist_ok=True) - i = 0 - for instance in input_files: - start_time = time.perf_counter() - dataloader = maxcut_dataloader - data, nvar = dataloader(f"{path}/{instance}") - dataloader_t = time.perf_counter() - res, solutions, _, _ = mcpg_solver(nvar, config, data, verbose=True) - mcpg_t = time.perf_counter() - - if "obj_type" in config and config["obj_type"] == "neg": - print("OUTPUT: {:.2f}".format(-res)) - else: - print("OUTPUT: {:f}".format(res)) - - with open(f"energies/{path[5:]}/MCPG.txt", "a") as f: - f.write(f"{res}\n") - - print("DATA LOADING TIME: {:f}".format(dataloader_t - start_time)) - print("MCPG RUNNING TIME: {:f}".format(mcpg_t - dataloader_t)) + start_time = time.perf_counter() + dataloader = dataloader_select(config["problem_type"]) + data, nvar = dataloader(path) + dataloader_t = time.perf_counter() + res, solutions, _, _ = mcpg_solver(nvar, config, data, verbose=True) + mcpg_t = time.perf_counter() + - if i == 0: break - - + print("OUTPUT: {:f}".format(res)) + print("DATA LOADING TIME: {:.4f}".format(dataloader_t - start_time)) + print("MCPG RUNNING TIME: {:.4f}".format(mcpg_t - dataloader_t)) diff --git a/src/algorithms/mcpg/mcpg_single_file.py b/src/algorithms/mcpg/mcpg_single_file.py new file mode 100644 index 0000000..6351add --- /dev/null +++ b/src/algorithms/mcpg/mcpg_single_file.py @@ -0,0 +1,427 @@ +import torch +import os +from natsort import natsorted +from torch_scatter import scatter +from torch.distributions.bernoulli import Bernoulli +from torch_geometric.data import Data +import yaml +import argparse +import time + +''' +Sampler +''' +def sample_initializer(problem_type, probs, config, + device=torch.device('cuda' if torch.cuda.is_available() else 'cpu'), data=None): + if problem_type in ["r_cheegercut", "n_cheegercut"]: + samples = torch.zeros(config['total_mcmc_num'], data.num_nodes) + index = data.sorted_degree_nodes[- config['total_mcmc_num']:] + for i in range(config['total_mcmc_num']): + samples[i][index[i]] = 1 + samples = samples.repeat(config['repeat_times'], 1) + return samples.t() + m = Bernoulli(probs) + samples = m.sample([config['total_mcmc_num'] * config['repeat_times']]) + samples = samples.detach().to(device) + return samples.t() + +def metro_sampling(probs, start_status, max_transfer_time, + device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): + num_node = len(probs) + num_chain = start_status.shape[1] + index_col = torch.tensor(list(range(num_chain))).to(device) + + probs = probs.detach().to(device) + samples = start_status.bool().to(device) + + count = 0 + for t in range(max_transfer_time * 5): + if count >= num_chain*max_transfer_time: + break + index_row = torch.randint(low=0, high=num_node, size=[ + num_chain], device=device) + chosen_probs_base = probs[index_row] + chosen_value = samples[index_row, index_col] + chosen_probs = torch.where( + chosen_value, chosen_probs_base, 1-chosen_probs_base) + accept_rate = (1 - chosen_probs) / chosen_probs + r = torch.rand(num_chain, device=device) + is_accept = (r < accept_rate) + samples[index_row, index_col] = torch.where( + is_accept, ~chosen_value, chosen_value) + + count += is_accept.sum() + + return samples.float().to(device) + +def mcpg_sampling_ising(data, + start_result, probs, + num_ls, change_times, total_mcmc_num, + device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): + probs = probs.to(torch.device("cpu")) + num_edges = data.num_edges + edges = data.edge_index + nlr_graph = edges[0] + nlc_graph = edges[1] + edge_weight = data.edge_attr + edge_weight_sum = data.edge_weight_sum + graph_probs = start_result.clone() + # get probs + graph_probs = metro_sampling( + probs, graph_probs, change_times, device) + start = graph_probs.clone() + + temp = graph_probs[data.sorted_degree_nodes[0]].clone() + graph_probs += temp + graph_probs = graph_probs % 2 + + graph_probs = (graph_probs - 0.5) * 2 + 0.5 + + # local search + temp = torch.zeros(4, graph_probs.size(dim=1)).to(device) + expected_cut = torch.zeros(graph_probs.size(dim=1)) + cnt = 0 + while True: + cnt += 1 + for i in range(num_edges): + index = data.sorted_degree_edges[i] + node_r = nlr_graph[index] + node_c = nlc_graph[index] + edges_r = data.n0_edges[index] + edges_c = data.n1_edges[index] + add_0 = data.add[0][index] + add_1 = data.add[1][index] + add_2 = data.add[2][index] + + temp_r_v = torch.mm(edges_r, graph_probs[data.n0[index]]) + temp_c_v = torch.mm(edges_c, graph_probs[data.n1[index]]) + + temp[1] = temp_r_v + torch.rand(graph_probs.size(dim=1), device=device) * 0.1 + add_0 + temp[2] = temp_c_v + torch.rand(graph_probs.size(dim=1), device=device) * 0.1 + add_1 + temp[0] = temp[1] + temp[2] + torch.rand(graph_probs.size(dim=1), + device=torch.device(device)) * 0.1 - add_2 + + max_index = torch.argmax(temp, dim=0) + graph_probs[node_r] = torch.floor(max_index / 2) + graph_probs[node_c] = max_index % 2 + + if cnt >= num_ls: + break + + # Convert Spins back to Ising model conventions -1 or +1 + spins = 2 * graph_probs - 1 # [nvar, chains] + + # raw energy per chain: E(s) = - sum J_ij s_i s_j + energy_vec = (edge_weight * (spins[nlr_graph] * spins[nlc_graph])).sum(dim=0) # [chains] + + # full‐chain advantage for policy gradient + adv = (energy_vec - energy_vec.mean()).to(device) # [chains] + + # pick the best chain out of each repeat group + energy_reshape = energy_vec.view(-1, total_mcmc_num) # [repeat_times, total_mcmc_num] + idx = torch.argmin(energy_reshape, dim=0) # [total_mcmc_num] + for i0 in range(total_mcmc_num): + idx[i0] = i0 + idx[i0] * total_mcmc_num + temp_min = energy_vec[idx] # [total_mcmc_num] + temp_min_info = graph_probs[:, idx] # [nvar, total_mcmc_num] + start_samples = start # [nvar, chains] + + # return best energy state, probs of the best energy states, the total probs, and the advantage + return temp_min, temp_min_info, start_samples, adv + +''' +Network +''' +class simple(torch.nn.Module): + def __init__(self, output_num): + super(simple, self).__init__() + self.lin = torch.nn.Linear(1, output_num) + self.sigmoid = torch.nn.Sigmoid() + + def reset_parameters(self): + self.lin.reset_parameters() + + def forward(self, alpha = 0.1, start_samples=None, value=None, + device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): + x = torch.ones(1).to(device) + x = self.lin(x) + x = self.sigmoid(x) + + x = (x-0.5) * 0.6 + 0.5 + probs = x + probs = probs.squeeze() + retdict = {} + reg = probs * torch.log(probs) + (1-probs) * torch.log(1-probs ) + reg = torch.mean(reg) + if start_samples == None: + retdict["output"] = [probs.squeeze(-1), "hist"] # output + retdict["reg"] = [reg, "sequence"] + retdict["loss"] = [alpha * reg, "sequence"] + return retdict + + res_samples = value.t().detach() + + start_samples_idx = start_samples * \ + probs + (1 - start_samples) * (1 - probs) + log_start_samples_idx = torch.log(start_samples_idx) + log_start_samples = log_start_samples_idx.sum(dim=1) + loss_ls = torch.mean(log_start_samples * res_samples) + loss = loss_ls + alpha * reg + + retdict["output"] = [probs.squeeze(-1), "hist"] # output + retdict["reg"] = [reg, "sequence"] + retdict["loss"] = [loss, "sequence"] + return retdict + + def __repr__(self): + return self.__class__.__name__ + +''' +Algorithm +''' +def mcpg_solver(nvar, config, data, verbose=False): + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + sampler = mcpg_sampling_ising + change_times = int(nvar/10) # transition times for metropolis sampling + + net = simple(nvar) + net.to(device).reset_parameters() + optimizer = torch.optim.Adam(net.parameters(), lr=config['lr_init']) + + start_samples = None + for epoch in range(config['max_epoch_num']): + + if epoch % config['reset_epoch_num'] == 0: + net.to(device).reset_parameters() + regular = config['regular_init'] + + net.train() + if epoch <= 0: + retdict = net(regular, None, None) + else: + retdict = net(regular, start_samples, value) + + retdict["loss"][0].backward() + torch.nn.utils.clip_grad_norm_(net.parameters(), 1) + optimizer.step() + + # get start samples + if epoch == 0: + probs = (torch.zeros(nvar)+0.5).to(device) + tensor_probs = sample_initializer( + config["problem_type"], probs, config, data=data) + temp_max, temp_max_info, temp_start_samples, value = sampler( + data, tensor_probs, probs, config['num_ls'], 0, config['total_mcmc_num']) + now_max_res = temp_max + now_max_info = temp_max_info + tensor_probs = temp_max_info.clone() + tensor_probs = tensor_probs.repeat(1, config['repeat_times']) + start_samples = temp_start_samples.t().to(device) + + # get samples + if epoch % config['sample_epoch_num'] == 0 and epoch > 0: + probs = retdict["output"][0] + probs = probs.detach() + temp_max, temp_max_info, start_samples_temp, value = sampler( + data, tensor_probs, probs, config['num_ls'], change_times, config['total_mcmc_num']) + # update now_max + for i0 in range(config['total_mcmc_num']): + if temp_max[i0] < now_max_res[i0]: + now_max_res[i0] = temp_max[i0] + now_max_info[:, i0] = temp_max_info[:, i0] + + # update if min is too small + now_max = min(now_max_res).item() + now_max_index = torch.argmin(now_max_res) + + now_min = max(now_max_res).item() + now_min_index = torch.argmax(now_max_res) + + now_max_res[now_min_index] = now_max + + now_max_info[:, now_min_index] = now_max_info[:, now_max_index] + temp_max_info[:, now_min_index] = now_max_info[:, now_max_index] + + # select best samples + tensor_probs = temp_max_info.clone() + tensor_probs = tensor_probs.repeat(1, config['repeat_times']) + # construct the start point for next iteration + start_samples = start_samples_temp.t() + if verbose: + if config["problem_type"] == "maxsat" and len(data.pdata) == 7: + res = max(now_max_res).item() + if res > data.pdata[5] * data.pdata[6]: + res -= data.pdata[5] * data.pdata[6] + print("o {:.3f}".format(res)) + elif "obj_type" in config and config["obj_type"] == "neg": + print("o {:.3f}".format((-min(now_max_res).item()))) + else: + print("o {:f}".format((min(now_max_res).item()))) + del (retdict) + + total_max = now_max_res + best_sort = torch.argsort(now_max_res, descending=False) + total_best_info = torch.squeeze(now_max_info[:, best_sort[0]]) + + return min(total_max).item(), total_best_info, now_max_res, now_max_info + +''' +Dataloader +''' +def maxcut_dataloader(path, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): + with open(path) as f: + fline = f.readline() + fline = fline.split() + num_nodes, num_edges = int(fline[0]), int(fline[1]) + edge_index = torch.LongTensor(2, num_edges) + edge_attr = torch.Tensor(num_edges, 1) + cnt = 0 + while True: + lines = f.readlines(num_edges * 2) + if not lines: + break + for line in lines: + line = line.rstrip('\n').split() + edge_index[0][cnt] = int(line[0]) - 1 + edge_index[1][cnt] = int(line[1]) - 1 + edge_attr[cnt][0] = float(line[2]) + cnt += 1 + data_maxcut = Data(num_nodes=num_nodes, + edge_index=edge_index, edge_attr=edge_attr) + data_maxcut = data_maxcut.to(device) + data_maxcut.edge_weight_sum = float(torch.sum(data_maxcut.edge_attr)) + + data_maxcut = append_neighbors(data_maxcut) + + data_maxcut.single_degree = [] + data_maxcut.weighted_degree = [] + tensor_abs_weighted_degree = [] + for i0 in range(data_maxcut.num_nodes): + data_maxcut.single_degree.append(len(data_maxcut.neighbors[i0])) + data_maxcut.weighted_degree.append( + float(torch.sum(data_maxcut.neighbor_edges[i0]))) + tensor_abs_weighted_degree.append( + float(torch.sum(torch.abs(data_maxcut.neighbor_edges[i0])))) + tensor_abs_weighted_degree = torch.tensor(tensor_abs_weighted_degree) + data_maxcut.sorted_degree_nodes = torch.argsort( + tensor_abs_weighted_degree, descending=True) + + edge_degree = [] + add = torch.zeros(3, num_edges).to(device) + for i0 in range(num_edges): + edge_degree.append(abs(edge_attr[i0].item())*( + tensor_abs_weighted_degree[edge_index[0][i0]]+tensor_abs_weighted_degree[edge_index[1][i0]])) + node_r = edge_index[0][i0] + node_c = edge_index[1][i0] + add[0][i0] = - data_maxcut.weighted_degree[node_r] / \ + 2 + data_maxcut.edge_attr[i0] - 0.05 + add[1][i0] = - data_maxcut.weighted_degree[node_c] / \ + 2 + data_maxcut.edge_attr[i0] - 0.05 + add[2][i0] = data_maxcut.edge_attr[i0]+0.05 + + for i0 in range(num_nodes): + data_maxcut.neighbor_edges[i0] = data_maxcut.neighbor_edges[i0].unsqueeze( + 0) + data_maxcut.add = add + edge_degree = torch.tensor(edge_degree) + data_maxcut.sorted_degree_edges = torch.argsort( + edge_degree, descending=True) + + return data_maxcut, num_nodes + +def append_neighbors(data, device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')): + data.neighbors = [] + data.neighbor_edges = [] + num_nodes = data.num_nodes + for i in range(num_nodes): + data.neighbors.append([]) + data.neighbor_edges.append([]) + edge_number = data.edge_index.shape[1] + + for index in range(0, edge_number): + row = data.edge_index[0][index] + col = data.edge_index[1][index] + edge_weight = data.edge_attr[index][0].item() + + data.neighbors[row].append(col.item()) + data.neighbor_edges[row].append(edge_weight) + data.neighbors[col].append(row.item()) + data.neighbor_edges[col].append(edge_weight) + + data.n0 = [] + data.n1 = [] + data.n0_edges = [] + data.n1_edges = [] + for index in range(0, edge_number): + row = data.edge_index[0][index] + col = data.edge_index[1][index] + data.n0.append(data.neighbors[row].copy()) + data.n1.append(data.neighbors[col].copy()) + data.n0_edges.append(data.neighbor_edges[row].copy()) + data.n1_edges.append(data.neighbor_edges[col].copy()) + i = 0 + for i in range(len(data.n0[index])): + if data.n0[index][i] == col: + break + data.n0[index].pop(i) + data.n0_edges[index].pop(i) + for i in range(len(data.n1[index])): + if data.n1[index][i] == row: + break + data.n1[index].pop(i) + data.n1_edges[index].pop(i) + + data.n0[index] = torch.LongTensor(data.n0[index]).to(device) + data.n1[index] = torch.LongTensor(data.n1[index]).to(device) + data.n0_edges[index] = torch.tensor( + data.n0_edges[index]).unsqueeze(0).to(device) + data.n1_edges[index] = torch.tensor( + data.n1_edges[index]).unsqueeze(0).to(device) + + for i in range(num_nodes): + data.neighbors[i] = torch.LongTensor(data.neighbors[i]).to(device) + data.neighbor_edges[i] = torch.tensor( + data.neighbor_edges[i]).to(device) + + return data + +if __name__ == "__main__": + device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') + parser = argparse.ArgumentParser() + parser.add_argument("config_file", type=str, + help="input the configuration file for the mcpg solver") + parser.add_argument("problem_instance", type=str, + help="input the data file for the problem instance") + + args = parser.parse_args() + with open(args.config_file) as f: + config = yaml.safe_load(f) + + path = args.problem_instance + input_files = [ f for f in os.listdir(path) ] + input_files = natsorted(input_files) + + os.makedirs(f"energies/{path[5:]}", exist_ok=True) + i = 0 + for instance in input_files: + start_time = time.perf_counter() + dataloader = maxcut_dataloader + data, nvar = dataloader(f"{path}/{instance}") + dataloader_t = time.perf_counter() + res, solutions, _, _ = mcpg_solver(nvar, config, data, verbose=True) + mcpg_t = time.perf_counter() + + if "obj_type" in config and config["obj_type"] == "neg": + print("OUTPUT: {:.2f}".format(-res)) + else: + print("OUTPUT: {:f}".format(res)) + + with open(f"energies/{path[5:]}/MCPG.txt", "a") as f: + f.write(f"{res}\n") + + print("DATA LOADING TIME: {:f}".format(dataloader_t - start_time)) + print("MCPG RUNNING TIME: {:f}".format(mcpg_t - dataloader_t)) + + if i == 0: break + + diff --git a/src/algorithms/vca/vca.py b/src/algorithms/vca/vca.py index 3aa657c..e3fd434 100644 --- a/src/algorithms/vca/vca.py +++ b/src/algorithms/vca/vca.py @@ -1,529 +1,25 @@ import tensorflow as tf import numpy as np import argparse -from math import sqrt -import os import time import random -from natsort import natsorted -from typing import Any, Optional, Union, Text, Sequence, Tuple, List - -Tensor = Any - -''' -Config -''' -class Config: - def __init__(self, graph_path, seed): - Nx, Ny, Jz = self.read_graph(graph_path) - self.seed = seed - - self.Nx = Nx - self.Ny = Ny - self.N = Nx*Ny #total number of sites - self.Jz = Jz - - '''model''' - self.num_units = 40 #number of memory units - self.activation_function = tf.nn.elu #non-linear activation function for the 2D Tensorized RNN cell - - '''training''' - self.numsamples = 50 #number of samples used for training - self.lr = 5*(1e-4) #learning rate - self.T0 = 2 #Initial temperature - self.Bx0 = 0 #Initial magnetic field - self.num_warmup_steps = 1000 #number of warmup steps - self.num_annealing_steps = 2**8 #number of annealing steps - self.num_equilibrium_steps = 5 #number of training steps after each annealing step - - def read_graph(self, graph_path): - edge_list = dict() - with open(graph_path, "r") as f: - line = f.readline() - is_first_line = True - while line is not None and line != '': - if is_first_line: - nodes, edges = line.split(" ") - num_nodes = int(nodes) - num_edges = int(edges) - is_first_line = False - else: - node1, node2, weight = line.split(" ") - edge_list[(int(node1), int(node2))] = float(weight) - line = f.readline() - - Nx = int(sqrt(num_nodes)) - Ny = Nx - Jz = np.zeros((Nx, Ny, 2), dtype=np.float64) - for i in range(Nx): - for j in range(Ny): - if i != Nx-1: - right = edge_list.get((i+j*Ny+1, i+1+j*Ny+1)) - if right is not None: - Jz[i, j, 0] = right - if j != Ny-1: - down = edge_list.get((i+j*Ny+1, i+(j+1)*Ny+1)) - if down is not None: - Jz[i, j, 1] = down - return Nx, Ny, Jz - -''' -Network -''' -class MDRNNWavefunction(object): - def __init__(self,systemsize_x = None, systemsize_y = None,cell=None,activation=None,num_units = None,scope='RNNWavefunction',seed = 111): - self.graph=tf.Graph() - self.scope=scope #Label of the RNN wavefunction - self.Nx=systemsize_x - self.Ny=systemsize_y - - random.seed(seed) # `python` built-in pseudo-random generator - np.random.seed(seed) # numpy pseudo-random generator - - #Defining the neural network - with self.graph.as_default(): - with tf.variable_scope(self.scope,reuse=tf.AUTO_REUSE): - - tf.set_random_seed(seed) # tensorflow pseudo-random generator - - #Defining the 2D Tensorized RNN cell with non-weight sharing - self.rnn=[cell(num_units = num_units, activation = activation, name="rnn_"+str(0)+str(i),dtype=tf.float64) for i in range(self.Nx*self.Ny)] - self.dense = [tf.layers.Dense(2,activation=tf.nn.softmax,name='wf_dense'+str(i), dtype = tf.float64) for i in range(self.Nx*self.Ny)] - - def sample(self,numsamples,inputdim): - with self.graph.as_default(): #Call the default graph, used if willing to create multiple graphs. - with tf.variable_scope(self.scope,reuse=tf.AUTO_REUSE): - self.inputdim=inputdim - self.outputdim=self.inputdim - self.numsamples=numsamples - - samples=[[[] for nx in range(self.Nx)] for ny in range(self.Ny)] - probs = [[[] for nx in range(self.Nx)] for ny in range(self.Ny)] - rnn_states = {} - inputs = {} - - for ny in range(self.Ny): #Loop over the boundaries for initialization - if ny%2==0: - nx = -1 - # print(nx,ny) - rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) - inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. - - if ny%2==1: - nx = self.Nx - # print(nx,ny) - rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) - inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. - - - for nx in range(self.Nx): #Loop over the boundaries for initialization - ny = -1 - rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) - inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. - - #Making a loop over the sites with the 2DRNN - for ny in range(self.Ny): - - if ny%2 == 0: +from config import config +from DilatedRNN import DilatedRNNWavefunction +from utils import * - for nx in range(self.Nx): #left to right - rnn_output, rnn_states[str(nx)+str(ny)] = self.rnn[ny*self.Nx+nx]((inputs[str(nx-1)+str(ny)],inputs[str(nx)+str(ny-1)]), (rnn_states[str(nx-1)+str(ny)],rnn_states[str(nx)+str(ny-1)])) - - output=self.dense[ny*self.Nx+nx](rnn_output) - sample_temp=tf.reshape(tf.multinomial(tf.log(output),num_samples=1),[-1,]) - samples[nx][ny] = sample_temp - probs[nx][ny] = output - inputs[str(nx)+str(ny)]=tf.one_hot(sample_temp,depth=self.outputdim, dtype = tf.float64) - - - if ny%2 == 1: - - for nx in range(self.Nx-1,-1,-1): #right to left - - rnn_output, rnn_states[str(nx)+str(ny)] = self.rnn[ny*self.Nx+nx]((inputs[str(nx+1)+str(ny)],inputs[str(nx)+str(ny-1)]), (rnn_states[str(nx+1)+str(ny)],rnn_states[str(nx)+str(ny-1)])) - - output=self.dense[ny*self.Nx+nx](rnn_output) - sample_temp=tf.reshape(tf.multinomial(tf.log(output),num_samples=1),[-1,]) - samples[nx][ny] = sample_temp - probs[nx][ny] = output - inputs[str(nx)+str(ny)]=tf.one_hot(sample_temp,depth=self.outputdim, dtype = tf.float64) - - - self.samples=tf.transpose(tf.stack(values=samples,axis=0), perm = [2,0,1]) - - probs=tf.transpose(tf.stack(values=probs,axis=0),perm=[2,0,1,3]) - one_hot_samples=tf.one_hot(self.samples,depth=self.inputdim, dtype = tf.float64) - self.log_probs=tf.reduce_sum(tf.reduce_sum(tf.log(tf.reduce_sum(tf.multiply(probs,one_hot_samples),axis=3)),axis=2),axis=1) - - return self.samples,self.log_probs - - def log_probability(self,samples,inputdim): - with self.graph.as_default(): - - self.inputdim=inputdim - self.outputdim=self.inputdim - - self.numsamples=tf.shape(samples)[0] - - #Initial input to feed to the lstm - self.outputdim=self.inputdim - - - samples_=tf.transpose(samples, perm = [1,2,0]) - rnn_states = {} - inputs = {} - - for ny in range(self.Ny): #Loop over the boundaries for initialization - if ny%2==0: - nx = -1 - rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) - inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. - - if ny%2==1: - nx = self.Nx - rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) - inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. - - - for nx in range(self.Nx): #Loop over the boundaries for initialization - ny = -1 - rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) - inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. - - - with tf.variable_scope(self.scope,reuse=tf.AUTO_REUSE): - probs = [[[] for nx in range(self.Nx)] for ny in range(self.Ny)] - - #Making a loop over the sites with the 2DRNN - for ny in range(self.Ny): - - if ny%2 == 0: - - for nx in range(self.Nx): #left to right - - rnn_output, rnn_states[str(nx)+str(ny)] = self.rnn[ny*self.Nx+nx]((inputs[str(nx-1)+str(ny)],inputs[str(nx)+str(ny-1)]), (rnn_states[str(nx-1)+str(ny)],rnn_states[str(nx)+str(ny-1)])) - - output=self.dense[ny*self.Nx+nx](rnn_output) - sample_temp=tf.reshape(tf.multinomial(tf.log(output),num_samples=1),[-1,]) - probs[nx][ny] = output - inputs[str(nx)+str(ny)]=tf.one_hot(samples_[nx,ny],depth=self.outputdim,dtype = tf.float64) - - if ny%2 == 1: - - for nx in range(self.Nx-1,-1,-1): #right to left - - rnn_output, rnn_states[str(nx)+str(ny)] = self.rnn[ny*self.Nx+nx]((inputs[str(nx+1)+str(ny)],inputs[str(nx)+str(ny-1)]), (rnn_states[str(nx+1)+str(ny)],rnn_states[str(nx)+str(ny-1)])) - - output=self.dense[ny*self.Nx+nx](rnn_output) - sample_temp=tf.reshape(tf.multinomial(tf.log(output),num_samples=1),[-1,]) - probs[nx][ny] = output - inputs[str(nx)+str(ny)]=tf.one_hot(samples_[nx,ny],depth=self.outputdim,dtype = tf.float64) - - probs=tf.transpose(tf.stack(values=probs,axis=0),perm=[2,0,1,3]) - one_hot_samples=tf.one_hot(samples,depth=self.inputdim, dtype = tf.float64) - - self.log_probs=tf.reduce_sum(tf.reduce_sum(tf.log(tf.reduce_sum(tf.multiply(probs,one_hot_samples),axis=3)),axis=2),axis=1) - - return self.log_probs - -class MDTensorizedRNNCell(tf.contrib.rnn.RNNCell): - """The 2D Tensorized RNN cell. - """ - def __init__(self, num_units = None, activation = None, name=None, dtype = None, reuse=None): - super(MDTensorizedRNNCell, self).__init__(_reuse=reuse, name=name) - # save class variables - self._num_in = 2 - self._num_units = num_units - self._state_size = num_units - self._output_size = num_units - self.activation = activation - - # set up input -> hidden connection - self.W = tf.get_variable("W_"+name, shape=[num_units, 2*num_units, 2*self._num_in], - initializer=tf.contrib.layers.xavier_initializer(), dtype = dtype) - - self.b = tf.get_variable("b_"+name, shape=[num_units], - initializer=tf.contrib.layers.xavier_initializer(), dtype = dtype) - - @property - def input_size(self): - return self._num_in # real - - @property - def state_size(self): - return self._state_size # real - - @property - def output_size(self): - return self._output_size # real - - def call(self, inputs, states): - - inputstate_mul = tf.einsum('ij,ik->ijk', tf.concat(states, 1),tf.concat(inputs,1)) - # prepare input linear combination - state_mul = tensordot(tf, inputstate_mul, self.W, axes=[[1,2],[1,2]]) # [batch_sz, num_units] - - preact = state_mul + self.b - - output = self.activation(preact) # [batch_sz, num_units] C - - new_state = output - - return output, new_state - -def tensordot(tf, - a, - b, - axes, - name: Optional[Text] = None) -> Tensor: - - def _tensordot_should_flip(contraction_axes: List[int], - free_axes: List[int]) -> bool: - # NOTE: This will fail if the arguments contain any Tensors. - if contraction_axes and free_axes: - return bool(np.mean(contraction_axes) < np.mean(free_axes)) - return False - - def _tranpose_if_necessary(tensor: Tensor, perm: List[int]) -> Tensor: - if perm == list(range(len(perm))): - return tensor - return tf.transpose(tensor, perm) - - def _reshape_if_necessary(tensor: Tensor, - new_shape: List[int]) -> Tensor: - cur_shape = tensor.get_shape().as_list() - if (len(new_shape) == len(cur_shape) and - all(d0 == d1 for d0, d1 in zip(cur_shape, new_shape))): - return tensor - return tf.reshape(tensor, new_shape) - - def _tensordot_reshape( - a: Tensor, axes: Union[Sequence[int], Tensor], is_right_term=False - ) -> Tuple[Tensor, Union[List[int], Tensor], Optional[List[int]], bool]: - if a.get_shape().is_fully_defined() and isinstance(axes, (list, tuple)): - shape_a = a.get_shape().as_list() - # NOTE: This will fail if axes contains any tensors - axes = [i if i >= 0 else i + len(shape_a) for i in axes] - free = [i for i in range(len(shape_a)) if i not in axes] - flipped = _tensordot_should_flip(axes, free) - - free_dims = [shape_a[i] for i in free] - prod_free = int(np.prod([shape_a[i] for i in free])) - prod_axes = int(np.prod([shape_a[i] for i in axes])) - perm = axes + free if flipped else free + axes - new_shape = [prod_axes, prod_free] if flipped else [prod_free, prod_axes] - transposed_a = _tranpose_if_necessary(a, perm) - reshaped_a = _reshape_if_necessary(transposed_a, new_shape) - transpose_needed = (not flipped) if is_right_term else flipped - return reshaped_a, free_dims, free_dims, transpose_needed - if a.get_shape().ndims is not None and isinstance(axes, (list, tuple)): - shape_a = a.get_shape().as_list() - axes = [i if i >= 0 else i + len(shape_a) for i in axes] - free = [i for i in range(len(shape_a)) if i not in axes] - flipped = _tensordot_should_flip(axes, free) - perm = axes + free if flipped else free + axes - - axes_dims = [shape_a[i] for i in axes] - free_dims = [shape_a[i] for i in free] - free_dims_static = free_dims - axes = tf.convert_to_tensor(axes, dtype=tf.dtypes.int32, name="axes") - free = tf.convert_to_tensor(free, dtype=tf.dtypes.int32, name="free") - shape_a = tf.shape(a) - transposed_a = _tranpose_if_necessary(a, perm) - else: - free_dims_static = None - shape_a = tf.shape(a) - rank_a = tf.rank(a) - axes = tf.convert_to_tensor(axes, dtype=tf.dtypes.int32, name="axes") - axes = tf.where(axes >= 0, axes, axes + rank_a) - free, _ = tf.compat.v1.setdiff1d(tf.range(rank_a), axes) - flipped = is_right_term - perm = ( - tf.concat([axes, free], 0) if flipped else tf.concat([free, axes], 0)) - transposed_a = tf.transpose(a, perm) - - free_dims = tf.gather(shape_a, free) - axes_dims = tf.gather(shape_a, axes) - prod_free_dims = tf.reduce_prod(free_dims) - prod_axes_dims = tf.reduce_prod(axes_dims) - - if flipped: - new_shape = tf.stack([prod_axes_dims, prod_free_dims]) - else: - new_shape = tf.stack([prod_free_dims, prod_axes_dims]) - reshaped_a = tf.reshape(transposed_a, new_shape) - transpose_needed = (not flipped) if is_right_term else flipped - return reshaped_a, free_dims, free_dims_static, transpose_needed - - def _tensordot_axes(a: Tensor, axes - ) -> Tuple[Any, Any]: - """Generates two sets of contraction axes for the two tensor arguments.""" - a_shape = a.get_shape() - if isinstance(axes, tf.compat.integral_types): - if axes < 0: - raise ValueError("'axes' must be at least 0.") - if a_shape.ndims is not None: - if axes > a_shape.ndims: - raise ValueError("'axes' must not be larger than the number of " - "dimensions of tensor %s." % a) - return (list(range(a_shape.ndims - axes, - a_shape.ndims)), list(range(axes))) - rank = tf.rank(a) - return (tf.range(rank - axes, rank, - dtype=tf.int32), tf.range(axes, dtype=tf.int32)) - if isinstance(axes, (list, tuple)): - if len(axes) != 2: - raise ValueError("'axes' must be an integer or have length 2.") - a_axes = axes[0] - b_axes = axes[1] - if isinstance(a_axes, tf.compat.integral_types) and \ - isinstance(b_axes, tf.compat.integral_types): - a_axes = [a_axes] - b_axes = [b_axes] - # NOTE: This fails if either a_axes and b_axes are Tensors. - if len(a_axes) != len(b_axes): - raise ValueError( - "Different number of contraction axes 'a' and 'b', %s != %s." % - (len(a_axes), len(b_axes))) - - # The contraction indices do not need to be permuted. - # Sort axes to avoid unnecessary permutations of a. - # NOTE: This fails if either a_axes and b_axes contain Tensors. - # pylint: disable=len-as-condition - if len(a_axes) > 0: - a_axes, b_axes = list(zip(*sorted(zip(a_axes, b_axes)))) - - return a_axes, b_axes - axes = tf.convert_to_tensor(axes, name="axes", dtype=tf.int32) - return axes[0], axes[1] - - with tf.compat.v1.name_scope(name, "Tensordot", [a, b, axes]) as _name: - a = tf.convert_to_tensor(a, name="a") - b = tf.convert_to_tensor(b, name="b") - a_axes, b_axes = _tensordot_axes(a, axes) - a_reshape, a_free_dims, a_free_dims_static, a_transp = _tensordot_reshape( - a, a_axes) - b_reshape, b_free_dims, b_free_dims_static, b_transp = _tensordot_reshape( - b, b_axes, is_right_term=True) - - ab_matmul = tf.matmul( - a_reshape, b_reshape, transpose_a=a_transp, transpose_b=b_transp) - - if isinstance(a_free_dims, list) and isinstance(b_free_dims, list): - return tf.reshape(ab_matmul, a_free_dims + b_free_dims, name=_name) - a_free_dims = tf.convert_to_tensor(a_free_dims, dtype=tf.dtypes.int32) - b_free_dims = tf.convert_to_tensor(b_free_dims, dtype=tf.dtypes.int32) - product = tf.reshape( - ab_matmul, tf.concat([a_free_dims, b_free_dims], 0), name=_name) - if a_free_dims_static is not None and b_free_dims_static is not None: - product.set_shape(a_free_dims_static + b_free_dims_static) - return product - -''' -Utils -''' -def Ising2D_diagonal_matrixelements(Jz, samples): - numsamples = samples.shape[0] - Nx = samples.shape[1] - Ny = samples.shape[2] - - N = Nx*Ny #Total number of spins - - local_energies = np.zeros((numsamples), dtype = np.float64) - - for i in range(Nx-1): #diagonal elements (right neighbours) - values = samples[:,i]+samples[:,i+1] - valuesT = np.copy(values) - valuesT[values==2] = +1 #If both spins are up - valuesT[values==0] = +1 #If both spins are down - valuesT[values==1] = -1 #If they are opposite - - local_energies += np.sum(valuesT*(-Jz[i,:,0]), axis = 1) - - for i in range(Ny-1): #diagonal elements (upward neighbours (or downward, it depends on the way you see the lattice :))) - values = samples[:,:,i]+samples[:,:,i+1] - valuesT = np.copy(values) - valuesT[values==2] = +1 #If both spins are up - valuesT[values==0] = +1 #If both spins are down - valuesT[values==1] = -1 #If they are opposite - - local_energies += np.sum(valuesT*(-Jz[:,i,1]), axis = 1) - - return local_energies - -def Ising2D_local_energies(Jz, Bx, samples, queue_samples, log_probs_tensor, samples_placeholder, log_probs, sess): - numsamples = samples.shape[0] - Nx = samples.shape[1] - Ny = samples.shape[2] - - N = Nx*Ny #Total number of spins - - local_energies = np.zeros((numsamples), dtype = np.float64) - - for i in range(Nx-1): #diagonal elements (right neighbours) - values = samples[:,i]+samples[:,i+1] - valuesT = np.copy(values) - valuesT[values==2] = +1 #If both spins are up - valuesT[values==0] = +1 #If both spins are down - valuesT[values==1] = -1 #If they are opposite - - local_energies += np.sum(valuesT*(-Jz[i,:,0]), axis = 1) - - for i in range(Ny-1): #diagonal elements (upward neighbours (or downward, it depends on the way you see the lattice :))) - values = samples[:,:,i]+samples[:,:,i+1] - valuesT = np.copy(values) - valuesT[values==2] = +1 #If both spins are up - valuesT[values==0] = +1 #If both spins are down - valuesT[values==1] = -1 #If they are opposite - - local_energies += np.sum(valuesT*(-Jz[:,i,1]), axis = 1) - - - queue_samples[0] = samples #storing the diagonal samples - - if Bx != 0: - for i in range(Nx): #Non-diagonal elements - for j in range(Ny): - valuesT = np.copy(samples) - valuesT[:,i,j][samples[:,i,j]==1] = 0 #Flip - valuesT[:,i,j][samples[:,i,j]==0] = 1 #Flip - - queue_samples[i*Ny+j+1] = valuesT - - len_sigmas = (N+1)*numsamples - steps = len_sigmas//50000+1 #I want a maximum in batch size just to not allocate too much memory - # print("Total num of steps =", steps) - queue_samples_reshaped = np.reshape(queue_samples, [(N+1)*numsamples, Nx,Ny]) - for i in range(steps): - if i < steps-1: - cut = slice((i*len_sigmas)//steps,((i+1)*len_sigmas)//steps) - else: - cut = slice((i*len_sigmas)//steps,len_sigmas) - log_probs[cut] = sess.run(log_probs_tensor, feed_dict={samples_placeholder:queue_samples_reshaped[cut]}) - - log_probs_reshaped = np.reshape(log_probs, [N+1,numsamples]) - for j in range(numsamples): - local_energies[j] += -Bx*np.sum(np.exp(0.5*log_probs_reshaped[1:,j]-0.5*log_probs_reshaped[0,j])) - - return local_energies - -''' -Variational Classical Annealing -''' -def run_vca(config: Config): +def vca_solver(config: config): seed = config.seed tf.compat.v1.reset_default_graph() - random.seed(seed) # `python` built-in pseudo-random generator - np.random.seed(seed) # numpy pseudo-random generator + random.seed(seed) # `python` built-in pseudo-random generator + np.random.seed(seed) # numpy pseudo-random generator tf.compat.v1.set_random_seed(seed) # tensorflow pseudo-random generator - Nx = config.Nx - Ny = config.Ny N = config.N Jz = config.Jz num_units = config.num_units + num_layers = config.num_layers activation_function = config.activation_function numsamples = config.numsamples @@ -544,9 +40,10 @@ def run_vca(config: Config): print("\nNumber of annealing steps = {0}".format(num_annealing_steps)) print("Number of training steps = {0}".format(num_steps)) - MDRNNWF = MDRNNWavefunction(systemsize_x = Nx, systemsize_y = Ny ,num_units = num_units,cell=MDTensorizedRNNCell, activation = activation_function, seed = seed) #contains the graph with the RNNs - with tf.compat.v1.variable_scope(MDRNNWF.scope,reuse=tf.compat.v1.AUTO_REUSE): - with MDRNNWF.graph.as_default(): + units = [num_units] * num_layers + DRNNWF = DilatedRNNWavefunction(N, units=units, layers=num_layers, cell=tf.nn.rnn_cell.BasicRNNCell, activation=activation_function, seed=seed) # contains the graph with the RNNs + with tf.compat.v1.variable_scope(DRNNWF.scope,reuse=tf.compat.v1.AUTO_REUSE): + with DRNNWF.graph.as_default(): global_step = tf.Variable(0, trainable=False) learningrate_placeholder = tf.compat.v1.placeholder(dtype=tf.float64,shape=[]) @@ -557,12 +54,11 @@ def run_vca(config: Config): #Defining Tensorflow placeholders Eloc=tf.compat.v1.placeholder(dtype=tf.float64,shape=[numsamples]) - sampleplaceholder_forgrad=tf.compat.v1.placeholder(dtype=tf.int32,shape=[numsamples,Nx,Ny]) - log_probs_forgrad = MDRNNWF.log_probability(sampleplaceholder_forgrad,inputdim=2) - - samples_placeholder=tf.compat.v1.placeholder(dtype=tf.int32,shape=(None,Nx, Ny)) - log_probs_tensor=MDRNNWF.log_probability(samples_placeholder,inputdim=2) - samplesandprobs = MDRNNWF.sample(numsamples=numsamples,inputdim=2) + sampleplaceholder_forgrad=tf.compat.v1.placeholder(dtype=tf.int32,shape=[numsamples,N]) + log_probs_forgrad = DRNNWF.log_probability(sampleplaceholder_forgrad,inputdim=2) + samples_placeholder=tf.compat.v1.placeholder(dtype=tf.int32,shape=(None,N)) + log_probs_tensor=DRNNWF.log_probability(samples_placeholder,inputdim=2) + samplesandprobs = DRNNWF.sample(numsamples=numsamples,inputdim=2) T_placeholder = tf.compat.v1.placeholder(dtype=tf.float64,shape=()) @@ -571,30 +67,34 @@ def run_vca(config: Config): cost = tf.reduce_mean(tf.multiply(log_probs_forgrad,tf.stop_gradient(Floc))) - tf.reduce_mean(log_probs_forgrad)*tf.reduce_mean(tf.stop_gradient(Floc)) gradients, variables = zip(*optimizer.compute_gradients(cost)) + #Calculate Gradients--------------- + #Define the optimization step optstep=optimizer.apply_gradients(zip(gradients,variables), global_step = global_step) + #Tensorflow saver to checkpoint saver=tf.compat.v1.train.Saver() + #For initialization init=tf.compat.v1.global_variables_initializer() initialize_parameters = tf.initialize_all_variables() config = tf.compat.v1.ConfigProto() config.gpu_options.allow_growth = True - sess=tf.compat.v1.Session(graph=MDRNNWF.graph, config=config) + sess=tf.compat.v1.Session(graph=DRNNWF.graph, config=config) sess.run(init) - ## Run Variational Annealing - with tf.compat.v1.variable_scope(MDRNNWF.scope,reuse=tf.compat.v1.AUTO_REUSE): - with MDRNNWF.graph.as_default(): - #To store data + # Run Variational Annealing + with tf.compat.v1.variable_scope(DRNNWF.scope,reuse=tf.compat.v1.AUTO_REUSE): + with DRNNWF.graph.as_default(): + # To store data meanEnergy=[] varEnergy=[] varFreeEnergy = [] meanFreeEnergy = [] - samples = np.ones((numsamples, Nx, Ny), dtype=np.int32) - queue_samples = np.zeros((N+1, numsamples, Nx, Ny), dtype = np.int32) + samples = np.ones((numsamples, N), dtype=np.int32) + queue_samples = np.zeros((N+1, numsamples, N), dtype = np.int32) log_probs = np.zeros((N+1)*numsamples, dtype=np.float64) T = T0 #initializing temperature @@ -619,12 +119,12 @@ def run_vca(config: Config): samples, log_probabilities = sess.run(samplesandprobs) # Estimating the local energies - local_energies = Ising2D_local_energies(Jz, Bx, samples, queue_samples, log_probs_tensor, samples_placeholder, log_probs, sess) + local_energies = Fullyconnected_localenergies(Jz, Bx, samples, queue_samples, log_probs_tensor, samples_placeholder, log_probs, sess) meanE = np.mean(local_energies) varE = np.var(local_energies) - #adding elements to be saved + # adding elements to be saved meanEnergy.append(meanE) varEnergy.append(varE) @@ -643,65 +143,40 @@ def run_vca(config: Config): if it == num_annealing_steps*num_equilibrium_steps + num_warmup_steps: Nsteps = 20 - numsamples_estimation = 10**6 #Num samples to be obtained at the end + numsamples_estimation = 10**5 #Num samples to be obtained at the end numsamples_perstep = numsamples_estimation//Nsteps #The number of steps taken to get "numsamples_estimation" samples (to avoid memory allocation issues) - samplesandprobs_final = MDRNNWF.sample(numsamples=numsamples_perstep,inputdim=2) + samplesandprobs_final = DRNNWF.sample(numsamples=numsamples_perstep,inputdim=2) energies = np.zeros((numsamples_estimation)) - solutions = np.zeros((numsamples_estimation, Nx, Ny)) + solutions = np.zeros((numsamples_estimation, N)) print("\nSaving energy and variance before the end of annealing") for i in range(Nsteps): - # print("\nsampling started") samples_final, _ = sess.run(samplesandprobs_final) - # print("\nsampling finished") - energies[i*numsamples_perstep:(i+1)*numsamples_perstep] = Ising2D_diagonal_matrixelements(Jz,samples_final) + energies[i*numsamples_perstep:(i+1)*numsamples_perstep] = Fullyconnected_diagonal_matrixelements(Jz,samples_final) solutions[i*numsamples_perstep:(i+1)*numsamples_perstep] = samples_final print("Sampling step:" , i+1, "/", Nsteps) print("meanE = ", np.mean(energies)) print("varE = ", np.var(energies)) print("minE = ",np.min(energies)) + print("Elapsed time is =", time.time()-start, " seconds") return np.mean(energies), np.min(energies) - #Run gradient descent step + # Run gradient descent step sess.run(optstep,feed_dict={Eloc:local_energies, sampleplaceholder_forgrad: samples, learningrate_placeholder: lr, T_placeholder:T}) if it%5 == 0: print("Elapsed time is =", time.time()-start, " seconds") print('\n\n') - - -''' -Run VCA -''' + if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("problem_instance", type=str, help="input the data file for the problem instance") - args = parser.parse_args() - data_dir = args.problem_instance - input_files = [ f for f in os.listdir(data_dir) ] - input_files = natsorted(input_files) - - results_dir = f"results/{'/'.join(data_dir.split('/')[1:])}" - os.makedirs(results_dir, exist_ok=True) - - i = 0 - for file in input_files: - i += 1 - if i > 25: break - # if i > 1: break - - vca_config = Config(f'{data_dir}/{file}', i) - vca_config.num_units = 40 - vca_config.numsamples = 50 - vca_config.T0 = 1 - vca_config.lr = 5*(1e-4) - vca_config.num_warmup_steps = 1000 - - mean_energies, min_energies = run_vca(vca_config) - - with open(f"{results_dir}/VCA.txt", "a") as f: - f.write(f"{min_energies}\n") + + path = args.problem_instance + seed = 0 + vca_config = config(path, seed) + mean_energies, min_energies = vca_solver(vca_config) \ No newline at end of file diff --git a/src/algorithms/vca/vca_single_file.py b/src/algorithms/vca/vca_single_file.py new file mode 100644 index 0000000..3aa657c --- /dev/null +++ b/src/algorithms/vca/vca_single_file.py @@ -0,0 +1,707 @@ +import tensorflow as tf +import numpy as np +import argparse +from math import sqrt +import os +import time +import random +from natsort import natsorted +from typing import Any, Optional, Union, Text, Sequence, Tuple, List + +Tensor = Any + +''' +Config +''' +class Config: + def __init__(self, graph_path, seed): + Nx, Ny, Jz = self.read_graph(graph_path) + self.seed = seed + + self.Nx = Nx + self.Ny = Ny + self.N = Nx*Ny #total number of sites + self.Jz = Jz + + '''model''' + self.num_units = 40 #number of memory units + self.activation_function = tf.nn.elu #non-linear activation function for the 2D Tensorized RNN cell + + '''training''' + self.numsamples = 50 #number of samples used for training + self.lr = 5*(1e-4) #learning rate + self.T0 = 2 #Initial temperature + self.Bx0 = 0 #Initial magnetic field + self.num_warmup_steps = 1000 #number of warmup steps + self.num_annealing_steps = 2**8 #number of annealing steps + self.num_equilibrium_steps = 5 #number of training steps after each annealing step + + def read_graph(self, graph_path): + edge_list = dict() + with open(graph_path, "r") as f: + line = f.readline() + is_first_line = True + while line is not None and line != '': + if is_first_line: + nodes, edges = line.split(" ") + num_nodes = int(nodes) + num_edges = int(edges) + is_first_line = False + else: + node1, node2, weight = line.split(" ") + edge_list[(int(node1), int(node2))] = float(weight) + line = f.readline() + + Nx = int(sqrt(num_nodes)) + Ny = Nx + Jz = np.zeros((Nx, Ny, 2), dtype=np.float64) + for i in range(Nx): + for j in range(Ny): + if i != Nx-1: + right = edge_list.get((i+j*Ny+1, i+1+j*Ny+1)) + if right is not None: + Jz[i, j, 0] = right + if j != Ny-1: + down = edge_list.get((i+j*Ny+1, i+(j+1)*Ny+1)) + if down is not None: + Jz[i, j, 1] = down + return Nx, Ny, Jz + +''' +Network +''' +class MDRNNWavefunction(object): + def __init__(self,systemsize_x = None, systemsize_y = None,cell=None,activation=None,num_units = None,scope='RNNWavefunction',seed = 111): + self.graph=tf.Graph() + self.scope=scope #Label of the RNN wavefunction + self.Nx=systemsize_x + self.Ny=systemsize_y + + random.seed(seed) # `python` built-in pseudo-random generator + np.random.seed(seed) # numpy pseudo-random generator + + #Defining the neural network + with self.graph.as_default(): + with tf.variable_scope(self.scope,reuse=tf.AUTO_REUSE): + + tf.set_random_seed(seed) # tensorflow pseudo-random generator + + #Defining the 2D Tensorized RNN cell with non-weight sharing + self.rnn=[cell(num_units = num_units, activation = activation, name="rnn_"+str(0)+str(i),dtype=tf.float64) for i in range(self.Nx*self.Ny)] + self.dense = [tf.layers.Dense(2,activation=tf.nn.softmax,name='wf_dense'+str(i), dtype = tf.float64) for i in range(self.Nx*self.Ny)] + + def sample(self,numsamples,inputdim): + with self.graph.as_default(): #Call the default graph, used if willing to create multiple graphs. + with tf.variable_scope(self.scope,reuse=tf.AUTO_REUSE): + self.inputdim=inputdim + self.outputdim=self.inputdim + self.numsamples=numsamples + + samples=[[[] for nx in range(self.Nx)] for ny in range(self.Ny)] + probs = [[[] for nx in range(self.Nx)] for ny in range(self.Ny)] + rnn_states = {} + inputs = {} + + for ny in range(self.Ny): #Loop over the boundaries for initialization + if ny%2==0: + nx = -1 + # print(nx,ny) + rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) + inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. + + if ny%2==1: + nx = self.Nx + # print(nx,ny) + rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) + inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. + + + for nx in range(self.Nx): #Loop over the boundaries for initialization + ny = -1 + rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) + inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. + + #Making a loop over the sites with the 2DRNN + for ny in range(self.Ny): + + if ny%2 == 0: + + for nx in range(self.Nx): #left to right + + rnn_output, rnn_states[str(nx)+str(ny)] = self.rnn[ny*self.Nx+nx]((inputs[str(nx-1)+str(ny)],inputs[str(nx)+str(ny-1)]), (rnn_states[str(nx-1)+str(ny)],rnn_states[str(nx)+str(ny-1)])) + + output=self.dense[ny*self.Nx+nx](rnn_output) + sample_temp=tf.reshape(tf.multinomial(tf.log(output),num_samples=1),[-1,]) + samples[nx][ny] = sample_temp + probs[nx][ny] = output + inputs[str(nx)+str(ny)]=tf.one_hot(sample_temp,depth=self.outputdim, dtype = tf.float64) + + + if ny%2 == 1: + + for nx in range(self.Nx-1,-1,-1): #right to left + + rnn_output, rnn_states[str(nx)+str(ny)] = self.rnn[ny*self.Nx+nx]((inputs[str(nx+1)+str(ny)],inputs[str(nx)+str(ny-1)]), (rnn_states[str(nx+1)+str(ny)],rnn_states[str(nx)+str(ny-1)])) + + output=self.dense[ny*self.Nx+nx](rnn_output) + sample_temp=tf.reshape(tf.multinomial(tf.log(output),num_samples=1),[-1,]) + samples[nx][ny] = sample_temp + probs[nx][ny] = output + inputs[str(nx)+str(ny)]=tf.one_hot(sample_temp,depth=self.outputdim, dtype = tf.float64) + + + self.samples=tf.transpose(tf.stack(values=samples,axis=0), perm = [2,0,1]) + + probs=tf.transpose(tf.stack(values=probs,axis=0),perm=[2,0,1,3]) + one_hot_samples=tf.one_hot(self.samples,depth=self.inputdim, dtype = tf.float64) + self.log_probs=tf.reduce_sum(tf.reduce_sum(tf.log(tf.reduce_sum(tf.multiply(probs,one_hot_samples),axis=3)),axis=2),axis=1) + + return self.samples,self.log_probs + + def log_probability(self,samples,inputdim): + with self.graph.as_default(): + + self.inputdim=inputdim + self.outputdim=self.inputdim + + self.numsamples=tf.shape(samples)[0] + + #Initial input to feed to the lstm + self.outputdim=self.inputdim + + + samples_=tf.transpose(samples, perm = [1,2,0]) + rnn_states = {} + inputs = {} + + for ny in range(self.Ny): #Loop over the boundaries for initialization + if ny%2==0: + nx = -1 + rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) + inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. + + if ny%2==1: + nx = self.Nx + rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) + inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. + + + for nx in range(self.Nx): #Loop over the boundaries for initialization + ny = -1 + rnn_states[str(nx)+str(ny)]=self.rnn[0].zero_state(self.numsamples,dtype=tf.float64) + inputs[str(nx)+str(ny)] = tf.zeros((self.numsamples,inputdim), dtype = tf.float64) #Feed the table b in tf. + + + with tf.variable_scope(self.scope,reuse=tf.AUTO_REUSE): + probs = [[[] for nx in range(self.Nx)] for ny in range(self.Ny)] + + #Making a loop over the sites with the 2DRNN + for ny in range(self.Ny): + + if ny%2 == 0: + + for nx in range(self.Nx): #left to right + + rnn_output, rnn_states[str(nx)+str(ny)] = self.rnn[ny*self.Nx+nx]((inputs[str(nx-1)+str(ny)],inputs[str(nx)+str(ny-1)]), (rnn_states[str(nx-1)+str(ny)],rnn_states[str(nx)+str(ny-1)])) + + output=self.dense[ny*self.Nx+nx](rnn_output) + sample_temp=tf.reshape(tf.multinomial(tf.log(output),num_samples=1),[-1,]) + probs[nx][ny] = output + inputs[str(nx)+str(ny)]=tf.one_hot(samples_[nx,ny],depth=self.outputdim,dtype = tf.float64) + + if ny%2 == 1: + + for nx in range(self.Nx-1,-1,-1): #right to left + + rnn_output, rnn_states[str(nx)+str(ny)] = self.rnn[ny*self.Nx+nx]((inputs[str(nx+1)+str(ny)],inputs[str(nx)+str(ny-1)]), (rnn_states[str(nx+1)+str(ny)],rnn_states[str(nx)+str(ny-1)])) + + output=self.dense[ny*self.Nx+nx](rnn_output) + sample_temp=tf.reshape(tf.multinomial(tf.log(output),num_samples=1),[-1,]) + probs[nx][ny] = output + inputs[str(nx)+str(ny)]=tf.one_hot(samples_[nx,ny],depth=self.outputdim,dtype = tf.float64) + + probs=tf.transpose(tf.stack(values=probs,axis=0),perm=[2,0,1,3]) + one_hot_samples=tf.one_hot(samples,depth=self.inputdim, dtype = tf.float64) + + self.log_probs=tf.reduce_sum(tf.reduce_sum(tf.log(tf.reduce_sum(tf.multiply(probs,one_hot_samples),axis=3)),axis=2),axis=1) + + return self.log_probs + +class MDTensorizedRNNCell(tf.contrib.rnn.RNNCell): + """The 2D Tensorized RNN cell. + """ + def __init__(self, num_units = None, activation = None, name=None, dtype = None, reuse=None): + super(MDTensorizedRNNCell, self).__init__(_reuse=reuse, name=name) + # save class variables + self._num_in = 2 + self._num_units = num_units + self._state_size = num_units + self._output_size = num_units + self.activation = activation + + # set up input -> hidden connection + self.W = tf.get_variable("W_"+name, shape=[num_units, 2*num_units, 2*self._num_in], + initializer=tf.contrib.layers.xavier_initializer(), dtype = dtype) + + self.b = tf.get_variable("b_"+name, shape=[num_units], + initializer=tf.contrib.layers.xavier_initializer(), dtype = dtype) + + @property + def input_size(self): + return self._num_in # real + + @property + def state_size(self): + return self._state_size # real + + @property + def output_size(self): + return self._output_size # real + + def call(self, inputs, states): + + inputstate_mul = tf.einsum('ij,ik->ijk', tf.concat(states, 1),tf.concat(inputs,1)) + # prepare input linear combination + state_mul = tensordot(tf, inputstate_mul, self.W, axes=[[1,2],[1,2]]) # [batch_sz, num_units] + + preact = state_mul + self.b + + output = self.activation(preact) # [batch_sz, num_units] C + + new_state = output + + return output, new_state + +def tensordot(tf, + a, + b, + axes, + name: Optional[Text] = None) -> Tensor: + + def _tensordot_should_flip(contraction_axes: List[int], + free_axes: List[int]) -> bool: + # NOTE: This will fail if the arguments contain any Tensors. + if contraction_axes and free_axes: + return bool(np.mean(contraction_axes) < np.mean(free_axes)) + return False + + def _tranpose_if_necessary(tensor: Tensor, perm: List[int]) -> Tensor: + if perm == list(range(len(perm))): + return tensor + return tf.transpose(tensor, perm) + + def _reshape_if_necessary(tensor: Tensor, + new_shape: List[int]) -> Tensor: + cur_shape = tensor.get_shape().as_list() + if (len(new_shape) == len(cur_shape) and + all(d0 == d1 for d0, d1 in zip(cur_shape, new_shape))): + return tensor + return tf.reshape(tensor, new_shape) + + def _tensordot_reshape( + a: Tensor, axes: Union[Sequence[int], Tensor], is_right_term=False + ) -> Tuple[Tensor, Union[List[int], Tensor], Optional[List[int]], bool]: + if a.get_shape().is_fully_defined() and isinstance(axes, (list, tuple)): + shape_a = a.get_shape().as_list() + # NOTE: This will fail if axes contains any tensors + axes = [i if i >= 0 else i + len(shape_a) for i in axes] + free = [i for i in range(len(shape_a)) if i not in axes] + flipped = _tensordot_should_flip(axes, free) + + free_dims = [shape_a[i] for i in free] + prod_free = int(np.prod([shape_a[i] for i in free])) + prod_axes = int(np.prod([shape_a[i] for i in axes])) + perm = axes + free if flipped else free + axes + new_shape = [prod_axes, prod_free] if flipped else [prod_free, prod_axes] + transposed_a = _tranpose_if_necessary(a, perm) + reshaped_a = _reshape_if_necessary(transposed_a, new_shape) + transpose_needed = (not flipped) if is_right_term else flipped + return reshaped_a, free_dims, free_dims, transpose_needed + if a.get_shape().ndims is not None and isinstance(axes, (list, tuple)): + shape_a = a.get_shape().as_list() + axes = [i if i >= 0 else i + len(shape_a) for i in axes] + free = [i for i in range(len(shape_a)) if i not in axes] + flipped = _tensordot_should_flip(axes, free) + perm = axes + free if flipped else free + axes + + axes_dims = [shape_a[i] for i in axes] + free_dims = [shape_a[i] for i in free] + free_dims_static = free_dims + axes = tf.convert_to_tensor(axes, dtype=tf.dtypes.int32, name="axes") + free = tf.convert_to_tensor(free, dtype=tf.dtypes.int32, name="free") + shape_a = tf.shape(a) + transposed_a = _tranpose_if_necessary(a, perm) + else: + free_dims_static = None + shape_a = tf.shape(a) + rank_a = tf.rank(a) + axes = tf.convert_to_tensor(axes, dtype=tf.dtypes.int32, name="axes") + axes = tf.where(axes >= 0, axes, axes + rank_a) + free, _ = tf.compat.v1.setdiff1d(tf.range(rank_a), axes) + flipped = is_right_term + perm = ( + tf.concat([axes, free], 0) if flipped else tf.concat([free, axes], 0)) + transposed_a = tf.transpose(a, perm) + + free_dims = tf.gather(shape_a, free) + axes_dims = tf.gather(shape_a, axes) + prod_free_dims = tf.reduce_prod(free_dims) + prod_axes_dims = tf.reduce_prod(axes_dims) + + if flipped: + new_shape = tf.stack([prod_axes_dims, prod_free_dims]) + else: + new_shape = tf.stack([prod_free_dims, prod_axes_dims]) + reshaped_a = tf.reshape(transposed_a, new_shape) + transpose_needed = (not flipped) if is_right_term else flipped + return reshaped_a, free_dims, free_dims_static, transpose_needed + + def _tensordot_axes(a: Tensor, axes + ) -> Tuple[Any, Any]: + """Generates two sets of contraction axes for the two tensor arguments.""" + a_shape = a.get_shape() + if isinstance(axes, tf.compat.integral_types): + if axes < 0: + raise ValueError("'axes' must be at least 0.") + if a_shape.ndims is not None: + if axes > a_shape.ndims: + raise ValueError("'axes' must not be larger than the number of " + "dimensions of tensor %s." % a) + return (list(range(a_shape.ndims - axes, + a_shape.ndims)), list(range(axes))) + rank = tf.rank(a) + return (tf.range(rank - axes, rank, + dtype=tf.int32), tf.range(axes, dtype=tf.int32)) + if isinstance(axes, (list, tuple)): + if len(axes) != 2: + raise ValueError("'axes' must be an integer or have length 2.") + a_axes = axes[0] + b_axes = axes[1] + if isinstance(a_axes, tf.compat.integral_types) and \ + isinstance(b_axes, tf.compat.integral_types): + a_axes = [a_axes] + b_axes = [b_axes] + # NOTE: This fails if either a_axes and b_axes are Tensors. + if len(a_axes) != len(b_axes): + raise ValueError( + "Different number of contraction axes 'a' and 'b', %s != %s." % + (len(a_axes), len(b_axes))) + + # The contraction indices do not need to be permuted. + # Sort axes to avoid unnecessary permutations of a. + # NOTE: This fails if either a_axes and b_axes contain Tensors. + # pylint: disable=len-as-condition + if len(a_axes) > 0: + a_axes, b_axes = list(zip(*sorted(zip(a_axes, b_axes)))) + + return a_axes, b_axes + axes = tf.convert_to_tensor(axes, name="axes", dtype=tf.int32) + return axes[0], axes[1] + + with tf.compat.v1.name_scope(name, "Tensordot", [a, b, axes]) as _name: + a = tf.convert_to_tensor(a, name="a") + b = tf.convert_to_tensor(b, name="b") + a_axes, b_axes = _tensordot_axes(a, axes) + a_reshape, a_free_dims, a_free_dims_static, a_transp = _tensordot_reshape( + a, a_axes) + b_reshape, b_free_dims, b_free_dims_static, b_transp = _tensordot_reshape( + b, b_axes, is_right_term=True) + + ab_matmul = tf.matmul( + a_reshape, b_reshape, transpose_a=a_transp, transpose_b=b_transp) + + if isinstance(a_free_dims, list) and isinstance(b_free_dims, list): + return tf.reshape(ab_matmul, a_free_dims + b_free_dims, name=_name) + a_free_dims = tf.convert_to_tensor(a_free_dims, dtype=tf.dtypes.int32) + b_free_dims = tf.convert_to_tensor(b_free_dims, dtype=tf.dtypes.int32) + product = tf.reshape( + ab_matmul, tf.concat([a_free_dims, b_free_dims], 0), name=_name) + if a_free_dims_static is not None and b_free_dims_static is not None: + product.set_shape(a_free_dims_static + b_free_dims_static) + return product + +''' +Utils +''' +def Ising2D_diagonal_matrixelements(Jz, samples): + numsamples = samples.shape[0] + Nx = samples.shape[1] + Ny = samples.shape[2] + + N = Nx*Ny #Total number of spins + + local_energies = np.zeros((numsamples), dtype = np.float64) + + for i in range(Nx-1): #diagonal elements (right neighbours) + values = samples[:,i]+samples[:,i+1] + valuesT = np.copy(values) + valuesT[values==2] = +1 #If both spins are up + valuesT[values==0] = +1 #If both spins are down + valuesT[values==1] = -1 #If they are opposite + + local_energies += np.sum(valuesT*(-Jz[i,:,0]), axis = 1) + + for i in range(Ny-1): #diagonal elements (upward neighbours (or downward, it depends on the way you see the lattice :))) + values = samples[:,:,i]+samples[:,:,i+1] + valuesT = np.copy(values) + valuesT[values==2] = +1 #If both spins are up + valuesT[values==0] = +1 #If both spins are down + valuesT[values==1] = -1 #If they are opposite + + local_energies += np.sum(valuesT*(-Jz[:,i,1]), axis = 1) + + return local_energies + +def Ising2D_local_energies(Jz, Bx, samples, queue_samples, log_probs_tensor, samples_placeholder, log_probs, sess): + numsamples = samples.shape[0] + Nx = samples.shape[1] + Ny = samples.shape[2] + + N = Nx*Ny #Total number of spins + + local_energies = np.zeros((numsamples), dtype = np.float64) + + for i in range(Nx-1): #diagonal elements (right neighbours) + values = samples[:,i]+samples[:,i+1] + valuesT = np.copy(values) + valuesT[values==2] = +1 #If both spins are up + valuesT[values==0] = +1 #If both spins are down + valuesT[values==1] = -1 #If they are opposite + + local_energies += np.sum(valuesT*(-Jz[i,:,0]), axis = 1) + + for i in range(Ny-1): #diagonal elements (upward neighbours (or downward, it depends on the way you see the lattice :))) + values = samples[:,:,i]+samples[:,:,i+1] + valuesT = np.copy(values) + valuesT[values==2] = +1 #If both spins are up + valuesT[values==0] = +1 #If both spins are down + valuesT[values==1] = -1 #If they are opposite + + local_energies += np.sum(valuesT*(-Jz[:,i,1]), axis = 1) + + + queue_samples[0] = samples #storing the diagonal samples + + if Bx != 0: + for i in range(Nx): #Non-diagonal elements + for j in range(Ny): + valuesT = np.copy(samples) + valuesT[:,i,j][samples[:,i,j]==1] = 0 #Flip + valuesT[:,i,j][samples[:,i,j]==0] = 1 #Flip + + queue_samples[i*Ny+j+1] = valuesT + + len_sigmas = (N+1)*numsamples + steps = len_sigmas//50000+1 #I want a maximum in batch size just to not allocate too much memory + # print("Total num of steps =", steps) + queue_samples_reshaped = np.reshape(queue_samples, [(N+1)*numsamples, Nx,Ny]) + for i in range(steps): + if i < steps-1: + cut = slice((i*len_sigmas)//steps,((i+1)*len_sigmas)//steps) + else: + cut = slice((i*len_sigmas)//steps,len_sigmas) + log_probs[cut] = sess.run(log_probs_tensor, feed_dict={samples_placeholder:queue_samples_reshaped[cut]}) + + log_probs_reshaped = np.reshape(log_probs, [N+1,numsamples]) + for j in range(numsamples): + local_energies[j] += -Bx*np.sum(np.exp(0.5*log_probs_reshaped[1:,j]-0.5*log_probs_reshaped[0,j])) + + return local_energies + +''' +Variational Classical Annealing +''' +def run_vca(config: Config): + seed = config.seed + tf.compat.v1.reset_default_graph() + random.seed(seed) # `python` built-in pseudo-random generator + np.random.seed(seed) # numpy pseudo-random generator + tf.compat.v1.set_random_seed(seed) # tensorflow pseudo-random generator + + Nx = config.Nx + Ny = config.Ny + N = config.N + Jz = config.Jz + + num_units = config.num_units + activation_function = config.activation_function + + numsamples = config.numsamples + lr = config.lr + T0 = config.T0 + Bx0 = config.Bx0 + num_warmup_steps = config.num_warmup_steps + num_annealing_steps = config.num_annealing_steps + num_equilibrium_steps = config.num_equilibrium_steps + + print('\n') + print("Number of spins =", N) + print("Initial_temperature =", T0) + print('Seed = ', seed) + + num_steps = num_annealing_steps*num_equilibrium_steps + num_warmup_steps + + print("\nNumber of annealing steps = {0}".format(num_annealing_steps)) + print("Number of training steps = {0}".format(num_steps)) + + MDRNNWF = MDRNNWavefunction(systemsize_x = Nx, systemsize_y = Ny ,num_units = num_units,cell=MDTensorizedRNNCell, activation = activation_function, seed = seed) #contains the graph with the RNNs + with tf.compat.v1.variable_scope(MDRNNWF.scope,reuse=tf.compat.v1.AUTO_REUSE): + with MDRNNWF.graph.as_default(): + + global_step = tf.Variable(0, trainable=False) + learningrate_placeholder = tf.compat.v1.placeholder(dtype=tf.float64,shape=[]) + learningrate = tf.compat.v1.train.exponential_decay(learningrate_placeholder, global_step, 100, 1.0, staircase=True) + + #Defining the optimizer + optimizer=tf.compat.v1.train.AdamOptimizer(learning_rate=learningrate) + + #Defining Tensorflow placeholders + Eloc=tf.compat.v1.placeholder(dtype=tf.float64,shape=[numsamples]) + sampleplaceholder_forgrad=tf.compat.v1.placeholder(dtype=tf.int32,shape=[numsamples,Nx,Ny]) + log_probs_forgrad = MDRNNWF.log_probability(sampleplaceholder_forgrad,inputdim=2) + + samples_placeholder=tf.compat.v1.placeholder(dtype=tf.int32,shape=(None,Nx, Ny)) + log_probs_tensor=MDRNNWF.log_probability(samples_placeholder,inputdim=2) + samplesandprobs = MDRNNWF.sample(numsamples=numsamples,inputdim=2) + + T_placeholder = tf.compat.v1.placeholder(dtype=tf.float64,shape=()) + + #Here we define a fake cost function that would allows to get the gradients of free energy using the tf.stop_gradient trick + Floc = Eloc + T_placeholder*log_probs_forgrad + cost = tf.reduce_mean(tf.multiply(log_probs_forgrad,tf.stop_gradient(Floc))) - tf.reduce_mean(log_probs_forgrad)*tf.reduce_mean(tf.stop_gradient(Floc)) + + gradients, variables = zip(*optimizer.compute_gradients(cost)) + + optstep=optimizer.apply_gradients(zip(gradients,variables), global_step = global_step) + + saver=tf.compat.v1.train.Saver() + + init=tf.compat.v1.global_variables_initializer() + initialize_parameters = tf.initialize_all_variables() + + config = tf.compat.v1.ConfigProto() + config.gpu_options.allow_growth = True + + sess=tf.compat.v1.Session(graph=MDRNNWF.graph, config=config) + sess.run(init) + + ## Run Variational Annealing + with tf.compat.v1.variable_scope(MDRNNWF.scope,reuse=tf.compat.v1.AUTO_REUSE): + with MDRNNWF.graph.as_default(): + #To store data + meanEnergy=[] + varEnergy=[] + varFreeEnergy = [] + meanFreeEnergy = [] + samples = np.ones((numsamples, Nx, Ny), dtype=np.int32) + queue_samples = np.zeros((N+1, numsamples, Nx, Ny), dtype = np.int32) + log_probs = np.zeros((N+1)*numsamples, dtype=np.float64) + + T = T0 #initializing temperature + Bx = Bx0 #initializing magnetic field + + sess.run(initialize_parameters) #Reinitialize the parameters + + start = time.time() + for it in range(len(meanEnergy),num_steps+1): + #Annealing + if it>=num_warmup_steps and it <= num_annealing_steps*num_equilibrium_steps + num_warmup_steps and it % num_equilibrium_steps == 0: + annealing_step = (it-num_warmup_steps)/num_equilibrium_steps + T = T0*(1-annealing_step/num_annealing_steps) + Bx = Bx0*(1-annealing_step/num_annealing_steps) + + #Showing current status after that the annealing starts + if it%num_equilibrium_steps==0: + if it <= num_annealing_steps*num_equilibrium_steps + num_warmup_steps and it>=num_warmup_steps: + annealing_step = (it-num_warmup_steps)/num_equilibrium_steps + print("\nAnnealing step: {0}/{1}".format(annealing_step,num_annealing_steps)) + + samples, log_probabilities = sess.run(samplesandprobs) + + # Estimating the local energies + local_energies = Ising2D_local_energies(Jz, Bx, samples, queue_samples, log_probs_tensor, samples_placeholder, log_probs, sess) + + meanE = np.mean(local_energies) + varE = np.var(local_energies) + + #adding elements to be saved + meanEnergy.append(meanE) + varEnergy.append(varE) + + meanF = np.mean(local_energies+T*log_probabilities) + varF = np.var(local_energies+T*log_probabilities) + + meanFreeEnergy.append(meanF) + varFreeEnergy.append(varF) + + if it%num_equilibrium_steps==0: + print('mean(E): {0}, mean(F): {1}, var(E): {2}, var(F): {3}, #samples {4}, #Training step {5}'.format(meanE,meanF,varE,varF,numsamples, it)) + print("Temperature: ", T) + print("Magnetic field: ", Bx) + + #Here we produce samples at the end of annealing + if it == num_annealing_steps*num_equilibrium_steps + num_warmup_steps: + + Nsteps = 20 + numsamples_estimation = 10**6 #Num samples to be obtained at the end + numsamples_perstep = numsamples_estimation//Nsteps #The number of steps taken to get "numsamples_estimation" samples (to avoid memory allocation issues) + + samplesandprobs_final = MDRNNWF.sample(numsamples=numsamples_perstep,inputdim=2) + energies = np.zeros((numsamples_estimation)) + solutions = np.zeros((numsamples_estimation, Nx, Ny)) + print("\nSaving energy and variance before the end of annealing") + + for i in range(Nsteps): + # print("\nsampling started") + samples_final, _ = sess.run(samplesandprobs_final) + # print("\nsampling finished") + energies[i*numsamples_perstep:(i+1)*numsamples_perstep] = Ising2D_diagonal_matrixelements(Jz,samples_final) + solutions[i*numsamples_perstep:(i+1)*numsamples_perstep] = samples_final + print("Sampling step:" , i+1, "/", Nsteps) + print("meanE = ", np.mean(energies)) + print("varE = ", np.var(energies)) + print("minE = ",np.min(energies)) + return np.mean(energies), np.min(energies) + + #Run gradient descent step + sess.run(optstep,feed_dict={Eloc:local_energies, sampleplaceholder_forgrad: samples, learningrate_placeholder: lr, T_placeholder:T}) + + if it%5 == 0: + print("Elapsed time is =", time.time()-start, " seconds") + print('\n\n') + + + +''' +Run VCA +''' +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("problem_instance", type=str, + help="input the data file for the problem instance") + + args = parser.parse_args() + data_dir = args.problem_instance + input_files = [ f for f in os.listdir(data_dir) ] + input_files = natsorted(input_files) + + results_dir = f"results/{'/'.join(data_dir.split('/')[1:])}" + os.makedirs(results_dir, exist_ok=True) + + i = 0 + for file in input_files: + i += 1 + if i > 25: break + # if i > 1: break + + vca_config = Config(f'{data_dir}/{file}', i) + vca_config.num_units = 40 + vca_config.numsamples = 50 + vca_config.T0 = 1 + vca_config.lr = 5*(1e-4) + vca_config.num_warmup_steps = 1000 + + mean_energies, min_energies = run_vca(vca_config) + + with open(f"{results_dir}/VCA.txt", "a") as f: + f.write(f"{min_energies}\n")