From 522012b23d82c7405b81b18e0e412c425d1d8a0a Mon Sep 17 00:00:00 2001 From: Toru Tamaki Date: Thu, 25 Dec 2025 19:01:00 +0900 Subject: [PATCH 1/2] add PPXA and ConsensusADMM algorithms with corresponding documentation and tests - Parallel Proximal Algorithm (PPXA) for minimizing the sum of proximable functions - ConsensusADMM for solving the global consensus problem with ADMM - add tests in test_solver.py - test_ConsensusADMM_with_ADMM - test_ConsensusADMM_with_ADMM_for_Lasso - test_ConsensusADMM_with_GPG - test_PPXA_with_ADMM - test_PPXA_with_GPG --- docs/source/api/index.rst | 2 + pyproximal/optimization/__init__.py | 2 + pyproximal/optimization/primal.py | 292 ++++++++++++++++++++++++++++ pytests/test_solver.py | 221 +++++++++++++++++++++ 4 files changed, 517 insertions(+) diff --git a/docs/source/api/index.rst b/docs/source/api/index.rst index 7184220..ddf5c47 100755 --- a/docs/source/api/index.rst +++ b/docs/source/api/index.rst @@ -171,6 +171,8 @@ Primal ProximalPoint TwIST DouglasRachfordSplitting + PPXA + ConsensusADMM .. currentmodule:: pyproximal.optimization.palm diff --git a/pyproximal/optimization/__init__.py b/pyproximal/optimization/__init__.py index 3e7e555..694aac1 100644 --- a/pyproximal/optimization/__init__.py +++ b/pyproximal/optimization/__init__.py @@ -20,6 +20,8 @@ TwIST Two-step Iterative Shrinkage/Threshold PlugAndPlay Plug-and-Play Prior with ADMM DouglasRachfordSplitting Douglas-Rachford algorithm + PPXA Parallel Proximal Algorithm + ConsensusADMM Consensus problem with ADMM A list of solvers in ``pyproximal.optimization.proximaldual`` using both proximal and dual proximal operators: diff --git a/pyproximal/optimization/primal.py b/pyproximal/optimization/primal.py index d93db4c..f92885c 100644 --- a/pyproximal/optimization/primal.py +++ b/pyproximal/optimization/primal.py @@ -1793,3 +1793,295 @@ def DouglasRachfordSplitting( print(f"\nTotal time (s) = {time.time() - tstart:.2f}") print("---------------------------------------------------------\n") return x, y + + +def PPXA( # pylint: disable=invalid-name + prox_ops: List[ProxOperator], + x0: NDArray | List[NDArray], + tau: float, + eta: float = 1.0, + weights: NDArray | List[float] | None = None, + niter: int = 1000, + tol: Optional[float] = 1e-7, + callback: Optional[Callable[..., None]] = None, + show: bool = False, +) -> NDArray: + r"""Parallel Proximal Algorithm (PPXA) + + Solves the following minimization problem using + Parallel Proximal Algorithm (PPXA): + + .. math:: + + \mathbf{x} = \argmin_\mathbf{x} \sum_{i=1}^m f_i(\mathbf{x}) + + where :math:`f_i(\mathbf{x})` are any convex + functions that has known proximal operators. + + Parameters + ---------- + prox_ops : :obj:`list` + A list of proximable functions :math:`f_1, \ldots, f_m`. + x0 : :obj:`np.ndarray` or :obj:`list` + Initial vector :math:`\mathbf{x}` of all :math:`f_i` if 1D :obj:`np.ndarray`, + or :math:`\mathbf{x}_{i}` of each :math:`f_i` for :math:`i=1,\ldots,m` + if :obj:`list` or 2D :obj:`np.ndarray`. + tau : :obj:`float` + Positive scalar weight + eta : :obj:`float`, optional + Relaxation parameter (must be between 0 and 2, 0 excluded). + weights : :obj:`np.ndarray` or :obj:`list` or :obj:`None`, optional + Weights :math:`\sum_{i=1}^m w_i = 1, \ 0 < w_i < 1`, + Defaults to None, which means :math:`w_1 = \cdots = w_m = \frac{1}{m}.` + niter : :obj:`int`, optional + The maximum number of iterations. + tol : :obj:`float`, optional + Tolerance on change of the solution (used as stopping criterion). + If ``tol=0``, run until ``niter`` is reached. + callback : :obj:`callable`, optional + Function with signature (``callback(x)``) to call after each iteration + where ``x`` is the current model vector + show : :obj:`bool`, optional + Display iterations log + + Returns + ------- + x : :obj:`numpy.ndarray` + Inverted model + + See Also + -------- + ConsensusADMM: Consensus ADMM + + Notes + ----- + The Parallel Proximal Algorithm (PPXA) can be expressed by the following + recursion [1]_, [2]_, [3]_, [4]_: + + * :math:`\mathbf{y}_{i}^{(0)} = \mathbf{x}` or :math:`\mathbf{y}_{i}^{(0)} = \mathbf{x}_{i}` for :math:`i=1,\ldots,m` + * :math:`\mathbf{x}^{(0)} = \sum_{i=1}^m w_i \mathbf{y}_{i}^{(0)}` + * for :math:`k = 1, \ldots` + + * for :math:`i = 1, \ldots, m` + + * :math:`\mathbf{p}_{i}^{(k)} = \prox_{\frac{\tau}{w_i} f_i} (\mathbf{y}_{i}^{(k)})` + + * :math:`\mathbf{p}^{(k)} = \sum_{i=1}^{m} w_i \mathbf{p}_{i}^{(k)}` + * for :math:`i = 1, \ldots, m` + + * :math:`\mathbf{y}_{i}^{(k+1)} = \mathbf{y}_{i}^{(k)} + \eta (2 \mathbf{p}^{(k)} - \mathbf{x}^{(k)} - \mathbf{p}_i^{(k)})` + + * :math:`\mathbf{x}^{(k+1)} = \mathbf{x}^{(k)} + \eta (\mathbf{p}^{(k)} - \mathbf{x}^{(k)})` + + where :math:`0 < \eta < 2` and + :math:`\sum_{i=1}^m w_i = 1, \ 0 < w_i < 1`. + In the current implementation, :math:`w_i = 1 / m` when not provided. + + References + ---------- + .. [1] Combettes, P.L., Pesquet, J.-C., 2008. A proximal decomposition + method for solving convex variational inverse problems. Inverse Problems + 24, 065014. Algorithm 3.1. https://doi.org/10.1088/0266-5611/24/6/065014 + https://arxiv.org/abs/0807.2617 + .. [2] Combettes, P.L., Pesquet, J.-C., 2011. Proximal Splitting Methods in + Signal Processing, in Fixed-Point Algorithms for Inverse Problems in + Science and Engineering, Springer, pp. 185-212. Algorithm 10.27. + https://doi.org/10.1007/978-1-4419-9569-8_10 + .. [3] Bauschke, H.H., Combettes, P.L., 2011. Convex Analysis and Monotone + Operator Theory in Hilbert Spaces, 1st ed, CMS Books in Mathematics. + Springer, New York, NY. Proposition 27.8. + https://doi.org/10.1007/978-1-4419-9467-7 + .. [4] Ryu, E.K., Yin, W., 2022. Large-Scale Convex Optimization: Algorithms + & Analyses via Monotone Operators. Cambridge University Press, + Cambridge. Exercise 2.38 https://doi.org/10.1017/9781009160865 + https://large-scale-book.mathopt.com/ + + """ + if show: + tstart = time.time() + print( + "Parallel Proximal Algorithm\n" + "---------------------------------------------------------" + ) + for i, prox_op in enumerate(prox_ops): + print(f"Proximal operator (f{i}): {type(prox_op)}") + print(f"tau = {tau:10e}\tniter = {niter:d}\n") + head = " Itn x[0] J=sum_i f_i" + print(head) + + ncp = get_array_module(x0) + + m = len(prox_ops) + if weights is None: + w = ncp.full(m, 1. / m) + else: + w = ncp.asarray(weights) + + if isinstance(x0, list) or x0.ndim == 2: + y = ncp.asarray(x0) # yi_0 = xi_0, for i = 1, ..., m + else: + y = ncp.full((m, x0.size), x0) # y1_0 = y2_0 = ... = ym_0 = x0 + + x = ncp.mean(y, axis=0) + x_old = x.copy() + + for iiter in range(niter): + + p = ncp.stack([prox_ops[i].prox(y[i], tau / w[i]) for i in range(m)]) + pn = ncp.sum(w[:, None] * p, axis=0) + y = y + eta * (2 * pn - x - p) + x = x + eta * (pn - x) + + if callback is not None: + callback(x) + + if show: + if iiter < 10 or niter - iiter < 10 or iiter % (niter // 10) == 0: + pf = ncp.sum([prox_ops[i](x) for i in range(m)]) + print( + f"{iiter + 1:6d} {ncp.real(to_numpy(x[0])):12.5e} " + f"{pf:10.3e}" + ) + + if ncp.abs(x - x_old).max() < tol: + break + + x_old = x + + if show: + print(f"\nTotal time (s) = {time.time() - tstart:.2f}") + print("---------------------------------------------------------\n") + + return x + + +def ConsensusADMM( # pylint: disable=invalid-name + prox_ops: List[ProxOperator], + x0: NDArray, + tau: float, + niter: int = 1000, + tol: Optional[float] = 1e-7, + callback: Optional[Callable[..., None]] = None, + show: bool = False, +) -> NDArray: + r"""Consensus ADMM + + Solves the following global consensus problem using ADMM: + + .. math:: + + \argmin_{\mathbf{x_1}, \mathbf{x_2}, \ldots, \mathbf{x_m}} + \sum_{i=1}^m f_i(\mathbf{x}_i) \quad \text{s.t.} + \quad \mathbf{x_1} = \mathbf{x_2} = \cdots = \mathbf{x_m} + + where :math:`f_i(\mathbf{x})` are any convex + functions that has known proximal operators. + + Parameters + ---------- + prox_ops : :obj:`list` + A list of proximable functions :math:`f_1, \ldots, f_m`. + x0 : :obj:`np.ndarray` + Initial vector + tau : :obj:`float` + Positive scalar weight + niter : :obj:`int`, optional + The maximum number of iterations. + tol : :obj:`float`, optional + Tolerance on change of the solution (used as stopping criterion). + If ``tol=0``, run until ``niter`` is reached. + callback : :obj:`callable`, optional + Function with signature (``callback(x)``) to call after each iteration + where ``x`` is the current model vector + show : :obj:`bool`, optional + Display iterations log + + Returns + ------- + x : :obj:`numpy.ndarray` + Inverted model + + See Also + -------- + ADMM: Alternating Direction Method of Multipliers + PPXA: Parallel Proximal Algorithm + + Notes + ----- + The ADMM for the consensus problem can be expressed by the following + recursion [1]_, [2]_: + + * :math:`\bar{\mathbf{x}}^{(0)} = \mathbf{x}` + * for :math:`k = 1, \ldots` + + * for :math:`i = 1, \ldots, m` + + * :math:`\mathbf{x}_i^{(k+1)} = \mathrm{prox}_{\tau f_i} \left(\bar{\mathbf{x}}^{(k)} - \mathbf{y}_i^{(k)}\right)` + + * :math:`\bar{\mathbf{x}}^{(k+1)} = \frac{1}{m} \sum_{i=1}^m \mathbf{x}_i^{(k)}` + + * for :math:`i = 1, \ldots, m` + + * :math:`\mathbf{y}_i^{(k+1)} = \mathbf{y}_i^{(k)} + \mathbf{x}_i^{(k+1)} - \bar{\mathbf{x}}^{(k+1)}` + + The current implementation returns :math:`\bar{\mathbf{x}}`. + + References + ---------- + .. [1] Boyd, S., Parikh, N., Chu, E., Peleato, B., Eckstein, J., 2011. + Distributed Optimization and Statistical Learning via the Alternating + Direction Method of Multipliers. Foundations and Trends in Machine Learning, + Vol. 3, No. 1, pp 1-122. Section 7.1. https://doi.org/10.1561/2200000016 + https://stanford.edu/~boyd/papers/pdf/admm_distr_stats.pdf + .. [2] Parikh, N., Boyd, S., 2014. Proximal Algorithms. Foundations and + Trends in Optimization, Vol. 1, No. 3, pp 127-239. + Section 5.2.1. https://doi.org/10.1561/2400000003 + https://web.stanford.edu/~boyd/papers/pdf/prox_algs.pdf + + """ + if show: + tstart = time.time() + print( + "Consensus ADMM\n" + "---------------------------------------------------------" + ) + for i, prox_op in enumerate(prox_ops): + print(f"Proximal operator (f{i}): {type(prox_op)}") + print(f"tau = {tau:10e}\tniter = {niter:d}\n") + head = " Itn x[0] J=sum_i f_i" + print(head) + + ncp = get_array_module(x0) + + m = len(prox_ops) + x_bar = x0.copy() + x_bar_old = x0.copy() + y = ncp.zeros_like(x0) + + for iiter in range(niter): + + x = ncp.stack([prox_ops[i].prox(x_bar - y[i], tau) for i in range(m)]) + x_bar = ncp.mean(x, axis=0) + y = y + x - x_bar + + if callback is not None: + callback(x_bar) + + if show: + if iiter < 10 or niter - iiter < 10 or iiter % (niter // 10) == 0: + pf = ncp.sum([prox_ops[i](x_bar) for i in range(m)]) + print( + f"{iiter + 1:6d} {ncp.real(to_numpy(x_bar[0])):12.5e} " + f"{pf:10.3e}" + ) + + if ncp.abs(x_bar - x_bar_old).max() < tol: + break + + x_bar_old = x_bar + + if show: + print(f"\nTotal time (s) = {time.time() - tstart:.2f}") + print("---------------------------------------------------------\n") + + return x_bar diff --git a/pytests/test_solver.py b/pytests/test_solver.py index a6a1f42..b961224 100644 --- a/pytests/test_solver.py +++ b/pytests/test_solver.py @@ -1,3 +1,5 @@ +from typing import Dict, Any + import numpy as np import pytest from numpy.testing import assert_array_almost_equal @@ -11,6 +13,8 @@ GeneralizedProximalGradient, LinearizedADMM, ProximalGradient, + PPXA, + ConsensusADMM, ) from pyproximal.proximal import L1, L2 @@ -224,3 +228,220 @@ def test_ADMM_DRS(par): assert_array_almost_equal(xadmm, xdrs_g, decimal=2) assert_array_almost_equal(xadmm, xdrs_f, decimal=2) + + +@pytest.mark.parametrize("par", [(par1), (par2)]) +def test_PPXA_with_ADMM(par: Dict[str, Any]) -> None: + """Check equivalency of PPXA and ADMM + when using a single regularization term + """ + np.random.seed(0) + + n, m = par["n"], par["m"] + + # Define sparse model + x = np.zeros(m) + x[2], x[4] = 1, 0.5 + + # Random mixing matrix + R = np.random.normal(0.0, 1.0, (n, m)) + Rop = MatrixMult(R) + + y = Rop @ x + + l2 = L2(Op=Rop, b=y, niter=50, warm=False) + l1 = L1(sigma=5e-1) + + # Step size + L = (Rop.H * Rop).eigs(1).real.item() + tau = 0.5 / L + + xadmm, _ = ADMM( + l2, l1, x0=np.zeros(m), tau=tau, niter=15000, show=True) + xppxa = PPXA( + [l2, l1], x0=np.zeros(m), tau=tau, niter=15000, show=True, tol=1e-7) + + assert_array_almost_equal(xadmm, xppxa, decimal=2) + + +@pytest.mark.parametrize("par", [(par1), (par2)]) +def test_PPXA_with_GPG(par: Dict[str, Any]) -> None: + """Check equivalency of PPXA and GeneralizedProximalGradient + """ + np.random.seed(0) + + n, m = par["n"], par["m"] + + # Define sparse model + x = np.zeros(m) + x[2], x[4] = 1, 0.5 + + g = np.zeros_like(x) + g[1], g[2] = 1, 0.5 + + # Random mixing matrices + R1 = np.random.normal(0.0, 1.0, (n, m)) + Rop1 = MatrixMult(R1) + y1 = Rop1 @ x + + R2 = np.random.normal(0.0, 1.0, (n, m)) + Rop2 = MatrixMult(R2) + y2 = Rop2 @ x + + l2_1 = L2(Op=Rop1, b=y1, niter=50, warm=False) + l2_2 = L2(Op=Rop2, b=y2, niter=50, warm=False) + l1_1 = L1(sigma=5e-1) + l1_2 = L1(sigma=2.5e-1, g=g) + + # Step size + L = (Rop1.H * Rop1).eigs(1).real.item() + tau = 0.5 / L + + xgpg = GeneralizedProximalGradient( + [l2_1, l2_2], + [l1_1, l1_2], + x0=np.zeros(m), tau=tau, niter=150, show=True) + xppxa = PPXA( + [l2_1, l2_2, l1_1, l1_2], + x0=np.zeros(m), tau=tau, niter=15000, show=True, tol=1e-7) + + assert_array_almost_equal(xgpg, xppxa, decimal=2) + + +@pytest.mark.parametrize("par", [(par1), (par2)]) +def test_ConsensusADMM_with_ADMM(par: Dict[str, Any]) -> None: + """Check equivalency of ConsensusADMM and ADMM + when two proximable functions + """ + np.random.seed(0) + + n, m = par["n"], par["m"] + + # Define sparse model + x = np.zeros(m) + x[2], x[4] = 1, 0.5 + + # Random mixing matrix + R = np.random.normal(0.0, 1.0, (n, m)) + Rop = MatrixMult(R) + + y = Rop @ x + + l2 = L2(Op=Rop, b=y, niter=50, warm=False) + l1 = L1(sigma=5e-1) + + # Step size + L = (Rop.H * Rop).eigs(1).real.item() + tau = 0.5 / L + + xadmm, _ = ADMM( + l2, l1, x0=np.zeros(m), tau=tau, niter=15000, show=True) + xcadmm = ConsensusADMM( + [l2, l1], x0=np.zeros(m), tau=tau, niter=15000, show=True, tol=1e-7) + + assert_array_almost_equal(xadmm, xcadmm, decimal=2) + + +@pytest.mark.parametrize("par", [(par1), (par2)]) +def test_ConsensusADMM_with_ADMM_for_Lasso(par: Dict[str, Any]) -> None: + """Check equivalency of ConsensusADMM and ADMM + when more than two proximable functions for lasso + """ + m = par["m"] + lmd = 1e-2 + niter = 15000 + n_l2_ops = 3 + + np.random.seed(0) + + # Define sparse model + x_true = np.zeros(m) + nnz = np.random.randint(3, m // 2) + support = np.random.choice(m, size=nnz, replace=False) + x_true[support] = np.random.normal(0.0, 1.0, size=len(support)) + + # Random mixing matrix + R_list, y_list = [], [] + for ni in np.random.randint(3, 10, size=n_l2_ops): + R = np.random.normal(0.0, 1.0, size=(ni, m)) + R_list.append(R) + y_list.append(R @ x_true) + + # 1/2||R1||_2^2, 1/2||R2||_2^2, 1/2||R3||_2^2 + l2_ops = [ + L2(Op=MatrixMult(Ri), b=yi, niter=50, warm=False) + for Ri, yi in zip(R_list, y_list) + ] + + # 1/2 || [R1; R2; R3] ||_2^2 + Rop_stack = MatrixMult(np.vstack(R_list)) + y_stack = np.concatenate(y_list) + l2_stack = L2(Op=Rop_stack, b=y_stack, niter=50, warm=False) + + # ||x||_1 + l1_op = L1(sigma=lmd) + + # Step size + L = (Rop_stack.H * Rop_stack).eigs(1).real.item() + tau = 0.5 / L + + # 1/2||R1||_2^2 + 1/2||R2||_2^2 + 1/2||R3||_2^2 + ||x||_1 + x_cons = ConsensusADMM( + prox_ops=[*l2_ops, l1_op], + x0=np.zeros(m), + tau=tau, + niter=niter, + tol=1e-7, + show=False, + ) + + # 1/2 || [R1; R2; R3] ||_2^2 + ||x||_1 + x_lasso, _ = ADMM( + l2_stack, l1_op, x0=np.zeros(m), tau=tau, niter=niter, show=False + ) + + assert_array_almost_equal(x_cons, x_lasso, decimal=2) + + +@pytest.mark.parametrize("par", [(par1), (par2)]) +def test_ConsensusADMM_with_GPG(par: Dict[str, Any]) -> None: + """Check equivalency of ConsensusADMM and GeneralizedProximalGradient + """ + np.random.seed(0) + + n, m = par["n"], par["m"] + + # Define sparse model + x = np.zeros(m) + x[2], x[4] = 1, 0.5 + + g = np.zeros_like(x) + g[1], g[2] = 1, 0.5 + + # Random mixing matrices + R1 = np.random.normal(0.0, 1.0, (n, m)) + Rop1 = MatrixMult(R1) + y1 = Rop1 @ x + + R2 = np.random.normal(0.0, 1.0, (n, m)) + Rop2 = MatrixMult(R2) + y2 = Rop2 @ x + + l2_1 = L2(Op=Rop1, b=y1, niter=50, warm=False) + l2_2 = L2(Op=Rop2, b=y2, niter=50, warm=False) + l1_1 = L1(sigma=5e-1) + l1_2 = L1(sigma=2.5e-1, g=g) + + # Step size + L = (Rop1.H * Rop1).eigs(1).real.item() + tau = 0.5 / L + + xgpg = GeneralizedProximalGradient( + [l2_1, l2_2], + [l1_1, l1_2], + x0=np.zeros(m), tau=tau, niter=150, show=True) + xppxa = ConsensusADMM( + [l2_1, l2_2, l1_1, l1_2], + x0=np.zeros(m), tau=tau, niter=15000, show=True, tol=1e-7) + + assert_array_almost_equal(xgpg, xppxa, decimal=2) From aaab2ec91d2247f4df331d503edfe1e30f9d0265 Mon Sep 17 00:00:00 2001 From: Toru Tamaki Date: Mon, 5 Jan 2026 13:57:27 +0900 Subject: [PATCH 2/2] PR of PPXA and ConsensusADMM refined primal.py - prox_ops --> proxfs - np --> numpy - fix description of niter - remove () in math superscripts - add some comments test_solver.py - change niter to fit each test with seeds 0 to 499 - change tau to random value - change the order of argments to assert_array_almost_equal (actual <--> desired) --- pyproximal/optimization/primal.py | 75 ++++++++++++++++------------ pytests/test_solver.py | 82 ++++++++++++++++++++++--------- 2 files changed, 102 insertions(+), 55 deletions(-) diff --git a/pyproximal/optimization/primal.py b/pyproximal/optimization/primal.py index f92885c..f7a568a 100644 --- a/pyproximal/optimization/primal.py +++ b/pyproximal/optimization/primal.py @@ -1796,7 +1796,7 @@ def DouglasRachfordSplitting( def PPXA( # pylint: disable=invalid-name - prox_ops: List[ProxOperator], + proxfs: List[ProxOperator], x0: NDArray | List[NDArray], tau: float, eta: float = 1.0, @@ -1820,21 +1820,22 @@ def PPXA( # pylint: disable=invalid-name Parameters ---------- - prox_ops : :obj:`list` + proxfs : :obj:`list` A list of proximable functions :math:`f_1, \ldots, f_m`. - x0 : :obj:`np.ndarray` or :obj:`list` - Initial vector :math:`\mathbf{x}` of all :math:`f_i` if 1D :obj:`np.ndarray`, - or :math:`\mathbf{x}_{i}` of each :math:`f_i` for :math:`i=1,\ldots,m` - if :obj:`list` or 2D :obj:`np.ndarray`. + x0 : :obj:`numpy.ndarray` or :obj:`list` + Initial vector :math:`\mathbf{x}` for all :math:`f_i` if 1-dimensional array + is provided, or initial vectors :math:`\mathbf{x}_{i}` for each :math:`f_i` + for :math:`i=1,\ldots,m` if a :obj:`list` of 1-dimensional arrays or a 2-dimensional + array of size ``(m, d)`` is provided, where ``d`` is the dimension of :math:`\mathbf{x}_{i}`. tau : :obj:`float` Positive scalar weight eta : :obj:`float`, optional Relaxation parameter (must be between 0 and 2, 0 excluded). - weights : :obj:`np.ndarray` or :obj:`list` or :obj:`None`, optional + weights : :obj:`numpy.ndarray` or :obj:`list` or :obj:`None`, optional Weights :math:`\sum_{i=1}^m w_i = 1, \ 0 < w_i < 1`, Defaults to None, which means :math:`w_1 = \cdots = w_m = \frac{1}{m}.` niter : :obj:`int`, optional - The maximum number of iterations. + Number of iterations of iterative scheme. tol : :obj:`float`, optional Tolerance on change of the solution (used as stopping criterion). If ``tol=0``, run until ``niter`` is reached. @@ -1858,20 +1859,20 @@ def PPXA( # pylint: disable=invalid-name The Parallel Proximal Algorithm (PPXA) can be expressed by the following recursion [1]_, [2]_, [3]_, [4]_: - * :math:`\mathbf{y}_{i}^{(0)} = \mathbf{x}` or :math:`\mathbf{y}_{i}^{(0)} = \mathbf{x}_{i}` for :math:`i=1,\ldots,m` - * :math:`\mathbf{x}^{(0)} = \sum_{i=1}^m w_i \mathbf{y}_{i}^{(0)}` + * :math:`\mathbf{y}_{i}^{0} = \mathbf{x}` or :math:`\mathbf{y}_{i}^{0} = \mathbf{x}_{i}` for :math:`i=1,\ldots,m` + * :math:`\mathbf{x}^{0} = \sum_{i=1}^m w_i \mathbf{y}_{i}^{0}` * for :math:`k = 1, \ldots` * for :math:`i = 1, \ldots, m` - * :math:`\mathbf{p}_{i}^{(k)} = \prox_{\frac{\tau}{w_i} f_i} (\mathbf{y}_{i}^{(k)})` + * :math:`\mathbf{p}_{i}^{k} = \prox_{\frac{\tau}{w_i} f_i} (\mathbf{y}_{i}^{k})` - * :math:`\mathbf{p}^{(k)} = \sum_{i=1}^{m} w_i \mathbf{p}_{i}^{(k)}` + * :math:`\mathbf{p}^{k} = \sum_{i=1}^{m} w_i \mathbf{p}_{i}^{k}` * for :math:`i = 1, \ldots, m` - * :math:`\mathbf{y}_{i}^{(k+1)} = \mathbf{y}_{i}^{(k)} + \eta (2 \mathbf{p}^{(k)} - \mathbf{x}^{(k)} - \mathbf{p}_i^{(k)})` + * :math:`\mathbf{y}_{i}^{k+1} = \mathbf{y}_{i}^{k} + \eta (2 \mathbf{p}^{k} - \mathbf{x}^{k} - \mathbf{p}_i^{k})` - * :math:`\mathbf{x}^{(k+1)} = \mathbf{x}^{(k)} + \eta (\mathbf{p}^{(k)} - \mathbf{x}^{(k)})` + * :math:`\mathbf{x}^{k+1} = \mathbf{x}^{k} + \eta (\mathbf{p}^{k} - \mathbf{x}^{k})` where :math:`0 < \eta < 2` and :math:`\sum_{i=1}^m w_i = 1, \ 0 < w_i < 1`. @@ -1903,15 +1904,16 @@ def PPXA( # pylint: disable=invalid-name "Parallel Proximal Algorithm\n" "---------------------------------------------------------" ) - for i, prox_op in enumerate(prox_ops): - print(f"Proximal operator (f{i}): {type(prox_op)}") + for i, proxf in enumerate(proxfs): + print(f"Proximal operator (f{i}): {type(proxf)}") print(f"tau = {tau:10e}\tniter = {niter:d}\n") head = " Itn x[0] J=sum_i f_i" print(head) ncp = get_array_module(x0) - m = len(prox_ops) + # initialize model + m = len(proxfs) if weights is None: w = ncp.full(m, 1. / m) else: @@ -1925,24 +1927,28 @@ def PPXA( # pylint: disable=invalid-name x = ncp.mean(y, axis=0) x_old = x.copy() + # iterate for iiter in range(niter): - p = ncp.stack([prox_ops[i].prox(y[i], tau / w[i]) for i in range(m)]) + p = ncp.stack([proxfs[i].prox(y[i], tau / w[i]) for i in range(m)]) pn = ncp.sum(w[:, None] * p, axis=0) y = y + eta * (2 * pn - x - p) x = x + eta * (pn - x) + # run callback if callback is not None: callback(x) + # show iteration logger if show: if iiter < 10 or niter - iiter < 10 or iiter % (niter // 10) == 0: - pf = ncp.sum([prox_ops[i](x) for i in range(m)]) + pf = ncp.sum([proxfs[i](x) for i in range(m)]) print( f"{iiter + 1:6d} {ncp.real(to_numpy(x[0])):12.5e} " f"{pf:10.3e}" ) + # break if tolerance condition is met if ncp.abs(x - x_old).max() < tol: break @@ -1956,7 +1962,7 @@ def PPXA( # pylint: disable=invalid-name def ConsensusADMM( # pylint: disable=invalid-name - prox_ops: List[ProxOperator], + proxfs: List[ProxOperator], x0: NDArray, tau: float, niter: int = 1000, @@ -1979,14 +1985,14 @@ def ConsensusADMM( # pylint: disable=invalid-name Parameters ---------- - prox_ops : :obj:`list` + proxfs : :obj:`list` A list of proximable functions :math:`f_1, \ldots, f_m`. - x0 : :obj:`np.ndarray` + x0 : :obj:`numpy.ndarray` Initial vector tau : :obj:`float` Positive scalar weight niter : :obj:`int`, optional - The maximum number of iterations. + Number of iterations of iterative scheme. tol : :obj:`float`, optional Tolerance on change of the solution (used as stopping criterion). If ``tol=0``, run until ``niter`` is reached. @@ -2011,18 +2017,18 @@ def ConsensusADMM( # pylint: disable=invalid-name The ADMM for the consensus problem can be expressed by the following recursion [1]_, [2]_: - * :math:`\bar{\mathbf{x}}^{(0)} = \mathbf{x}` + * :math:`\bar{\mathbf{x}}^{0} = \mathbf{x}` * for :math:`k = 1, \ldots` * for :math:`i = 1, \ldots, m` - * :math:`\mathbf{x}_i^{(k+1)} = \mathrm{prox}_{\tau f_i} \left(\bar{\mathbf{x}}^{(k)} - \mathbf{y}_i^{(k)}\right)` + * :math:`\mathbf{x}_i^{k+1} = \mathrm{prox}_{\tau f_i} \left(\bar{\mathbf{x}}^{k} - \mathbf{y}_i^{k}\right)` - * :math:`\bar{\mathbf{x}}^{(k+1)} = \frac{1}{m} \sum_{i=1}^m \mathbf{x}_i^{(k)}` + * :math:`\bar{\mathbf{x}}^{k+1} = \frac{1}{m} \sum_{i=1}^m \mathbf{x}_i^{k}` * for :math:`i = 1, \ldots, m` - * :math:`\mathbf{y}_i^{(k+1)} = \mathbf{y}_i^{(k)} + \mathbf{x}_i^{(k+1)} - \bar{\mathbf{x}}^{(k+1)}` + * :math:`\mathbf{y}_i^{k+1} = \mathbf{y}_i^{k} + \mathbf{x}_i^{k+1} - \bar{\mathbf{x}}^{k+1}` The current implementation returns :math:`\bar{\mathbf{x}}`. @@ -2045,36 +2051,41 @@ def ConsensusADMM( # pylint: disable=invalid-name "Consensus ADMM\n" "---------------------------------------------------------" ) - for i, prox_op in enumerate(prox_ops): - print(f"Proximal operator (f{i}): {type(prox_op)}") + for i, proxf in enumerate(proxfs): + print(f"Proximal operator (f{i}): {type(proxf)}") print(f"tau = {tau:10e}\tniter = {niter:d}\n") head = " Itn x[0] J=sum_i f_i" print(head) ncp = get_array_module(x0) - m = len(prox_ops) + # initialize model + m = len(proxfs) x_bar = x0.copy() x_bar_old = x0.copy() y = ncp.zeros_like(x0) + # iterate for iiter in range(niter): - x = ncp.stack([prox_ops[i].prox(x_bar - y[i], tau) for i in range(m)]) + x = ncp.stack([proxfs[i].prox(x_bar - y[i], tau) for i in range(m)]) x_bar = ncp.mean(x, axis=0) y = y + x - x_bar + # run callback if callback is not None: callback(x_bar) + # show iteration logger if show: if iiter < 10 or niter - iiter < 10 or iiter % (niter // 10) == 0: - pf = ncp.sum([prox_ops[i](x_bar) for i in range(m)]) + pf = ncp.sum([proxfs[i](x_bar) for i in range(m)]) print( f"{iiter + 1:6d} {ncp.real(to_numpy(x_bar[0])):12.5e} " f"{pf:10.3e}" ) + # break if tolerance condition is met if ncp.abs(x_bar - x_bar_old).max() < tol: break diff --git a/pytests/test_solver.py b/pytests/test_solver.py index b961224..2c532eb 100644 --- a/pytests/test_solver.py +++ b/pytests/test_solver.py @@ -257,11 +257,20 @@ def test_PPXA_with_ADMM(par: Dict[str, Any]) -> None: tau = 0.5 / L xadmm, _ = ADMM( - l2, l1, x0=np.zeros(m), tau=tau, niter=15000, show=True) + l2, l1, + x0=np.zeros(m), + tau=tau, + niter=2000, # niter=1500 makes this test fail for seeds 0 to 499 + show=True, + ) xppxa = PPXA( - [l2, l1], x0=np.zeros(m), tau=tau, niter=15000, show=True, tol=1e-7) + [l2, l1], + x0=np.zeros(m), + tau=np.random.uniform(3 * tau, 5 * tau), + show=True, + ) - assert_array_almost_equal(xadmm, xppxa, decimal=2) + assert_array_almost_equal(xppxa, xadmm, decimal=2) @pytest.mark.parametrize("par", [(par1), (par2)]) @@ -300,12 +309,19 @@ def test_PPXA_with_GPG(par: Dict[str, Any]) -> None: xgpg = GeneralizedProximalGradient( [l2_1, l2_2], [l1_1, l1_2], - x0=np.zeros(m), tau=tau, niter=150, show=True) + x0=np.zeros(m), + tau=tau, + niter=200, # niter=150 makes this test fail for seeds 0 to 499 + show=True, + ) xppxa = PPXA( [l2_1, l2_2, l1_1, l1_2], - x0=np.zeros(m), tau=tau, niter=15000, show=True, tol=1e-7) + x0=np.zeros(m), + tau=np.random.uniform(3 * tau, 5 * tau), + show=True + ) - assert_array_almost_equal(xgpg, xppxa, decimal=2) + assert_array_almost_equal(xppxa, xgpg, decimal=2) @pytest.mark.parametrize("par", [(par1), (par2)]) @@ -335,11 +351,20 @@ def test_ConsensusADMM_with_ADMM(par: Dict[str, Any]) -> None: tau = 0.5 / L xadmm, _ = ADMM( - l2, l1, x0=np.zeros(m), tau=tau, niter=15000, show=True) + l2, l1, + x0=np.zeros(m), + tau=tau, + niter=2000, # niter=1500 makes this test fail for seeds 0 to 499 + show=True + ) xcadmm = ConsensusADMM( - [l2, l1], x0=np.zeros(m), tau=tau, niter=15000, show=True, tol=1e-7) + [l2, l1], + x0=np.random.normal(0.0, 1.0, m), # x0=np.zeros(m), + tau=np.random.uniform(3 * tau, 5 * tau), + show=True, + ) - assert_array_almost_equal(xadmm, xcadmm, decimal=2) + assert_array_almost_equal(xcadmm, xadmm, decimal=2) @pytest.mark.parametrize("par", [(par1), (par2)]) @@ -349,7 +374,6 @@ def test_ConsensusADMM_with_ADMM_for_Lasso(par: Dict[str, Any]) -> None: """ m = par["m"] lmd = 1e-2 - niter = 15000 n_l2_ops = 3 np.random.seed(0) @@ -386,27 +410,32 @@ def test_ConsensusADMM_with_ADMM_for_Lasso(par: Dict[str, Any]) -> None: tau = 0.5 / L # 1/2||R1||_2^2 + 1/2||R2||_2^2 + 1/2||R3||_2^2 + ||x||_1 - x_cons = ConsensusADMM( - prox_ops=[*l2_ops, l1_op], - x0=np.zeros(m), - tau=tau, - niter=niter, - tol=1e-7, - show=False, + xcadmm = ConsensusADMM( + [*l2_ops, l1_op], + x0=np.random.normal(0.0, 1.0, m), # x0=np.zeros(m), + tau=np.random.uniform(3 * tau, 5 * tau), + niter=20000, # niter=15000 makes this test fail for seeds 0 to 499 + show=True, ) # 1/2 || [R1; R2; R3] ||_2^2 + ||x||_1 - x_lasso, _ = ADMM( - l2_stack, l1_op, x0=np.zeros(m), tau=tau, niter=niter, show=False + xadmm, _ = ADMM( + l2_stack, + l1_op, + x0=np.zeros(m), + tau=tau, + niter=15000, # niter=10000 makes this test fail for seeds 0 to 499 + show=True, ) - assert_array_almost_equal(x_cons, x_lasso, decimal=2) + assert_array_almost_equal(xcadmm, xadmm, decimal=2) @pytest.mark.parametrize("par", [(par1), (par2)]) def test_ConsensusADMM_with_GPG(par: Dict[str, Any]) -> None: """Check equivalency of ConsensusADMM and GeneralizedProximalGradient """ + np.random.seed(0) n, m = par["n"], par["m"] @@ -439,9 +468,16 @@ def test_ConsensusADMM_with_GPG(par: Dict[str, Any]) -> None: xgpg = GeneralizedProximalGradient( [l2_1, l2_2], [l1_1, l1_2], - x0=np.zeros(m), tau=tau, niter=150, show=True) + x0=np.zeros(m), + tau=tau, + niter=200, # niter=150 makes this test fail for seeds 0 to 499 + show=True, + ) xppxa = ConsensusADMM( [l2_1, l2_2, l1_1, l1_2], - x0=np.zeros(m), tau=tau, niter=15000, show=True, tol=1e-7) + x0=np.zeros(m), + tau=np.random.uniform(3 * tau, 5 * tau), + show=True, + ) - assert_array_almost_equal(xgpg, xppxa, decimal=2) + assert_array_almost_equal(xppxa, xgpg, decimal=2)