optsuite · TomasOrtega · Aug 21, 2024 · Aug 21, 2024
diff --git a/Convex/Algorithm/GradientDescent.lean b/Convex/Algorithm/GradientDescent.lean
@@ -111,7 +111,7 @@ open InnerProductSpace Set
 
 variable {f : E → ℝ} {f' : E → E}
 
-variable {l : NNReal} {xm x₀ : E}{a : ℝ}
+variable {l : NNReal} {xm x₀ : E} {a : ℝ}
 
 variable {alg : Gradient_Descent_fix_stepsize f f' x₀}
 
@@ -142,7 +142,7 @@ lemma convex_lipschitz (h₁ : ∀ x₁ : E, HasGradientAt f (f' x₁) x₁)
         · simp;
           calc l / 2 * a * a = (l * a) * (a / 2) := by ring_nf
                 _  ≤ 1 * (a / 2) := by
-                  apply mul_le_mul_of_le_of_le _ (by linarith) (by positivity) (by linarith)
+                  apply mul_le_mul_of_nonneg _ (by linarith) (by positivity) (by linarith)
                   · exact (le_div_iff ha₂).mp ha₁
                 _  = - a / 2 + a := by ring_nf
         · exact sq_nonneg ‖f' x‖
@@ -153,15 +153,13 @@ lemma point_descent_for_convex (hfun : ConvexOn ℝ Set.univ f) (step₂ : alg.a
     ∀ k : ℕ, f (alg.x (k + 1)) ≤ f xm + 1 / ((2 : ℝ) * alg.a)
       * (‖alg.x k - xm‖ ^ 2 - ‖alg.x (k + 1) - xm‖ ^ 2) := by
   have step₂ : alg.l ≤ 1 / alg.a := by
-    rw [le_one_div alg.step₁] at step₂; exact step₂; linarith [alg.hl]
+    rw [le_one_div alg.step₁] at step₂; exact step₂; exact alg.hl
   intro k
-  have : LipschitzWith alg.l f' := alg.smooth
-  have : alg.l > 0 := alg.hl
   have descent: ∀ x : E, f (x - alg.a • (f' x)) ≤
       f xm + 1 / ((2 : ℝ) * alg.a) * (‖x - xm‖ ^ 2 - ‖x - alg.a • (f' x) - xm‖ ^ 2) := by
     intro x
     have t1 : 1 / ((2 : ℝ) * alg.a) * ((2 : ℝ) * alg.a) = 1 := by
-      field_simp; ring_nf; apply mul_inv_cancel; linarith [alg.step₁]
+      ring_nf; field_simp [ne_of_gt alg.step₁]
     have t2 : inner (f' x) (x - xm) - alg.a / 2 * ‖f' x‖ ^ 2 =
         1 / ((2 : ℝ) * alg.a) * (‖x - xm‖ ^ 2 - ‖x - alg.a • (f' x) - xm‖ ^ 2) := by
       symm
@@ -179,7 +177,7 @@ lemma point_descent_for_convex (hfun : ConvexOn ℝ Set.univ f) (step₂ : alg.a
           _ = inner (f' x) (x - xm) - alg.a / (2 : ℝ)
               * ‖f' x‖ ^ 2 := by ring_nf; simp; left; rw [pow_two,mul_self_mul_inv alg.a]
     calc f (x - alg.a • (f' x)) ≤ f x - alg.a / 2 * ‖f' x‖ ^ 2 := by
-              exact convex_lipschitz alg.diff this step₂ alg.step₁ alg.smooth x
+              exact convex_lipschitz alg.diff alg.hl step₂ alg.step₁ alg.smooth x
             _   ≤ f xm + inner (f' x) (x - xm) - alg.a / 2 * ‖f' x‖ ^ 2 := by
               linarith [convex_function alg.diff hfun x xm]
             _   = f xm + 1 / ((2 : ℝ) * alg.a) * (‖x - xm‖ ^ 2 - ‖x - alg.a • (f' x) - xm‖ ^ 2) := by
@@ -194,7 +192,7 @@ lemma gradient_method (hfun: ConvexOn ℝ Set.univ f) (step₂ : alg.a ≤ 1 / a
     ∀ k : ℕ, f (alg.x (k + 1)) - f xm ≤ 1 / (2 * (k + 1) * alg.a) * ‖x₀ - xm‖ ^ 2 := by
   intro k
   have step1₂ : alg.l ≤ 1 / alg.a := by
-    rw [le_one_div alg.step₁] at step₂; exact step₂; linarith [alg.hl]
+    rw [le_one_div alg.step₁] at step₂; exact step₂; exact alg.hl
   have : LipschitzWith alg.l f' := alg.smooth
   have : alg.l > 0 := alg.hl
   have tα : 1 / ((2 : ℝ) * (k + 1) * alg.a) > 0 := by
@@ -219,7 +217,7 @@ lemma gradient_method (hfun: ConvexOn ℝ Set.univ f) (step₂ : alg.a ≤ 1 / a
         _ ≤ f xm + 1 / (2 * alg.a) * (‖alg.x 0 - xm‖ ^ 2 - ‖alg.x (0 + 1) - xm‖ ^ 2) :=
             xdescent
         _ = alg.a⁻¹ * 2⁻¹ * (‖x₀ - xm‖^ 2 - ‖alg.x 1 - xm‖ ^ 2) + f xm := by
-            rw [alg.initial]; simp; ring_nf
+            rw [alg.initial, one_div, mul_inv_rev, zero_add]; ring_nf
     · specialize xdescent (j + 1)
       calc
         _ = (Finset.range (j + 1)).sum (fun (k : ℕ) ↦ f (alg.x (k + 1)) - f xm)
@@ -238,7 +236,7 @@ lemma gradient_method (hfun: ConvexOn ℝ Set.univ f) (step₂ : alg.a ≤ 1 / a
   specialize sum_prop k
   have h : f (alg.x (k + 1)) - f xm ≤ 1 / (2 * (k + 1) * alg.a) *
      (‖x₀ - xm‖ ^ 2 - ‖alg.x (k + 1) - xm‖ ^ 2) := by
-    have tt1 : 0 ≤ k + (1 : ℝ) := by exact add_nonneg (Nat.cast_nonneg k) zero_le_one
+    have tt1 : 0 ≤ k + (1 : ℝ) := add_nonneg (Nat.cast_nonneg k) zero_le_one
     calc
       _ ≤ (Finset.range (k + 1)).sum (fun (k : ℕ) ↦ f (alg.x (k + 1)) - f xm) / (k + 1) :=
         sum_prop_1

diff --git a/Convex/Algorithm/GradientDescentStronglyConvex.lean b/Convex/Algorithm/GradientDescentStronglyConvex.lean
@@ -38,7 +38,7 @@ theorem Strong_convex_Lipschitz_smooth (hsc: StrongConvexOn univ m f) (mp : m >
   let phi : E → ℝ := fun x ↦ l / 2 * ‖x‖ ^ 2 - f x
   have convphi : ConvexOn ℝ univ phi := by
     apply lipschitz_to_lnorm_sub_convex
-    apply cov; simp; apply hf; rw [← lipschitzOn_univ] at h₂; apply h₂; apply hl
+    apply cov; simp; apply hf; rw [← lipschitzOnWith_univ] at h₂; apply h₂; apply hl
   let g : E → ℝ := fun x ↦ f x - m / 2 * ‖x‖ ^ 2
   let g' : E → E := fun x ↦ f' x - m • x
   let h : E → ℝ := fun x ↦ (l - m) / 2 * ‖x‖ ^ 2 - g x
@@ -135,10 +135,9 @@ lemma lipschitz_derivxm_eq_zero (h₁ : ∀ x : E, HasGradientAt f (f' x) x)
   have eq4 : ‖f' xm‖ ^ 2 / (2 * l) = 0 := by linarith
   field_simp at eq4; exact eq4
 
-variable (hsc: StrongConvexOn univ m f) {alg : Gradient_Descent_fix_stepsize f f' x₀}
+variable {alg : Gradient_Descent_fix_stepsize f f' x₀}
 
-lemma gradient_method_strong_convex (hm : m > 0) (min : IsMinOn f univ xm)
-    (step₂ : alg.a ≤ 2 / (m + alg.l)) : ∀ k : ℕ , ‖alg.x k - xm‖ ^ 2 ≤ (1 - alg.a *
+lemma gradient_method_strong_convex (hm : m > 0) (min : IsMinOn f univ xm) (step₂ : alg.a ≤ 2 / (m + alg.l)) (hsc: StrongConvexOn univ m f) : ∀ k : ℕ , ‖alg.x k - xm‖ ^ 2 ≤ (1 - alg.a *
     (2 * m * alg.l / (m + alg.l))) ^ k * ‖x₀ - xm‖ ^ 2 := by
   have : LipschitzWith alg.l f' := alg.smooth
   have : alg.l > (0 : ℝ) := alg.hl

diff --git a/Convex/Algorithm/LASSO.lean b/Convex/Algorithm/LASSO.lean
@@ -3,7 +3,7 @@ Copyright (c) 2024 Yuxuan Wu, Chenyi Li. All rights reserved.
 Released under Apache 2.0 license as described in the file LICENSE.
 Authors: Yuxuan Wu, Chenyi Li
 -/
-import Mathlib.Analysis.NormedSpace.Star.Matrix
+import Mathlib.Analysis.CStarAlgebra.Matrix
 import Mathlib.LinearAlgebra.Matrix.DotProduct
 import Convex.Algorithm.ProximalGradient
 
@@ -148,8 +148,7 @@ theorem affine_sq_gradient :  ∀ x : (EuclideanSpace ℝ (Fin n)),
   have φeq : φ = fun x : (EuclideanSpace ℝ (Fin n)) => f x - h x + (1 / 2) * b ⬝ᵥ b := by
     ext z; simp [φ]; rw [norm2eq_dot]; simp [f, h]
     rw [← sub_add, dotProduct_comm _ b, sub_sub, ← two_mul, mul_add, mul_sub, ← mul_assoc]
-    rw [inv_mul_cancel, one_mul]
-    simp
+    field_simp
   have φ'eq : φ' = fun x : (EuclideanSpace ℝ (Fin n)) => f' x - h' x := by
     ext y z; simp [φ', f', h']
     rw [Matrix.mulVec_sub Aᵀ]; simp
@@ -211,8 +210,7 @@ theorem norm_one_proximal
     ext z; rw [Pi.smul_apply]; simp [g]; rw [lasso]; simp; rw [mul_assoc]
   rw [← geqth]
   show prox_prop ((t * μ) • (fun (x : EuclideanSpace ℝ (Fin n)) => ‖x‖₁)) x xm
-  have tμpos : 0 < t * μ := by
-    apply mul_pos; linarith [tpos]; linarith [μpos]
+  have tμpos : 0 < t * μ := mul_pos tpos μpos;
   rw [prox_iff_subderiv_smul (fun x : (EuclideanSpace ℝ (Fin n)) => ‖x‖₁) norm_one_convex tμpos]
   rw [← mem_SubderivAt, HasSubgradientAt]
   intro y
@@ -237,9 +235,7 @@ theorem norm_one_proximal
         exact aux; apply mul_nonneg; apply mul_nonneg
         apply abs_nonneg; simp; linarith [μpos]; simp; linarith [tpos]
       _ = |y i| := by
-        rw [mul_assoc _ (t⁻¹) t, inv_mul_cancel, mul_one]
-        rw [mul_assoc _ (μ⁻¹) μ, inv_mul_cancel, mul_one]
-        linarith [μpos]; linarith [tpos]
+        field_simp; linarith [μpos];
   · rw [eq_ite_iff, or_iff_right] at abs_subg
     rcases abs_subg with ⟨_, abs_subg⟩
     let sgnxm := sign (xm i)
@@ -256,7 +252,7 @@ theorem norm_one_proximal
       rw [eq1]; simp; nth_rw 3 [mul_sub]
       rw [← sub_add, real_sign_mul_abs]; simp
       nth_rw 2 [mul_comm (sign (x i))]
-      rw [← mul_assoc _ (t * μ), ← mul_inv, mul_comm μ t, inv_mul_cancel, one_mul]
+      rw [← mul_assoc _ (t * μ), ← mul_inv, mul_comm μ t, inv_mul_cancel₀ (ne_of_gt tμpos), one_mul];
       by_cases hx : 0 < x i
       · have eq2 : sign (sign (x i) * (|x i| - t * μ)) = 1 := by
           apply Real.sign_of_pos; apply mul_pos
@@ -276,7 +272,6 @@ theorem norm_one_proximal
             _ < 0 := by linarith
           linarith [ieq]
         rw [eq2]; symm; apply Real.sign_of_neg xneg
-      linarith [μpos, tpos]
     rw [aux2] at aux; linarith [aux]
     push_neg; intro hxm'; contrapose! hxm'; exact hxm
 

diff --git a/Convex/Algorithm/NesterovAccelerationFirst.lean b/Convex/Algorithm/NesterovAccelerationFirst.lean
@@ -37,9 +37,9 @@ class Nesterov_first (f h: E → ℝ) (f' : E → E) (x0 : E) :=
   (update2 : ∀ k : ℕ, prox_prop (t k • h) (y k - t k • f' (y k)) (x (k + 1)))
 
 variable {alg : Nesterov_first f h f' x0}
-variable {xm : E} (minφ : IsMinOn (f + h) univ xm)
+variable {xm : E}
 
-theorem Nesterov_first_converge : ∀ k, f (alg.x (k + 1)) + h (alg.x (k + 1)) -
+theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : ∀ k, f (alg.x (k + 1)) + h (alg.x (k + 1)) -
     f xm - h xm ≤ (alg.γ k) ^ 2 / (2 * alg.t k) * ‖x0 - xm‖ ^ 2 := by
   have h1 : ∀ k : ℕ, alg.y k - alg.x (k + 1) - (alg.t k) • (f' (alg.y k))
       ∈ (SubderivAt ((alg.t k) • h) (alg.x (k + 1))) := by
@@ -277,9 +277,9 @@ theorem Nesterov_first_converge : ∀ k, f (alg.x (k + 1)) + h (alg.x (k + 1)) -
           linarith [alg.tbound 0]
         _ = ‖alg.x 0 - xm‖ ^ 2 := by
           rw [← add_sub, sub_self, add_zero, mul_add, ← mul_assoc]; ring_nf
-          rw [mul_inv_cancel, one_mul, one_mul, alg.oriy, norm_sub_rev (alg.x 1) xm]
+          rw [mul_inv_cancel₀ (by linarith [alg.tbound 0]), one_mul, one_mul, alg.oriy, norm_sub_rev (alg.x 1) xm]
           rw [add_comm (⟪alg.x 1 - alg.x 0, xm - alg.x 1⟫ * 2), mul_comm, ← norm_add_sq_real]
-          simp; rw [norm_sub_rev]; linarith [alg.tbound 0]
+          simp; rw [norm_sub_rev];
       rw [alg.initial]; apply div_pos; rw [sq_pos_iff]
       linarith [(alg.γbound k).1]; linarith [alg.tbound k]
 
@@ -329,9 +329,9 @@ instance {f h: E → ℝ} {f' : E → E} {x0 : E} [p : Nesterov_first_fix_stepsi
 
 variable {alg : Nesterov_first_fix_stepsize f h f' x0}
 
-variable {xm : E} (minφ : IsMinOn (f + h) univ xm)
+variable {xm : E}
 
-theorem Nesterov_first_fix_stepsize_converge:
+theorem Nesterov_first_fix_stepsize_converge (minφ : IsMinOn (f + h) univ xm):
     ∀ (k : ℕ), f (alg.x (k + 1)) + h (alg.x (k + 1)) - f xm - h xm ≤
     2 * alg.l / (k + 2) ^ 2 * ‖x0 - xm‖ ^ 2 := by
   intro k

diff --git a/Convex/Algorithm/NesterovAccelerationSecond.lean b/Convex/Algorithm/NesterovAccelerationSecond.lean
@@ -41,9 +41,9 @@ class Nesterov_second (f h : E → ℝ) (f' : E → E) (x0 : E) :=
 
 variable {alg : Nesterov_second f h f' x0}
 
-variable {xm : E} (minφ : IsMinOn (f + h) Set.univ xm)
+variable {xm : E}
 
-theorem Nesterov_second_convergence :
+theorem Nesterov_second_convergence (minφ : IsMinOn (f + h) Set.univ xm):
     ∀ (k : ℕ), f (alg.x (k + 1)) + h (alg.x (k + 1)) - f xm - h xm ≤
       (alg.γ (k + 1)) ^ 2 / (2 * alg.t (k + 1)) * ‖x0 - xm‖ ^ 2 := by
   let φ := fun z : E ↦ f z + h z
@@ -52,13 +52,13 @@ theorem Nesterov_second_convergence :
       ∈ (SubderivAt (alg.t k • h) (alg.y k)) := by
     intro k; obtain h1 := alg.update2 k
     rw [prox_iff_subderiv] at h1
-    have upd2 := @SubderivAt.pos_smul _ _ _ ((alg.t k / alg.γ k) • h) (alg.y k) (alg.γ k) (alg.γbound k).1
+    have upd2 := @SubderivAt.pos_smul _ _ _ _ ((alg.t k / alg.γ k) • h) (alg.y k) (alg.γ k) (alg.γbound k).1
     rw [← smul_assoc, smul_eq_mul, mul_div, mul_comm, ← mul_div, div_self, mul_one] at upd2
     rw [upd2]
     use (alg.y (↑k - 1) - (alg.t ↑k / alg.γ ↑k) • f' (alg.z k) - alg.y ↑k)
     constructor
-    . exact h1
-    . simp
+    · exact h1
+    · simp
       rw [sub_right_comm, smul_sub, ← smul_assoc, smul_eq_mul]
       rw [mul_div, mul_comm, ← mul_div, div_self, mul_one]
       linarith [(alg.γbound k).1]
@@ -81,16 +81,16 @@ theorem Nesterov_second_convergence :
     have mem1 : (alg.x (k - 1)) ∈ univ := by simp
     have mem2 : alg.y k ∈ univ := by simp
     by_cases eq1 : alg.γ k = 1
-    . simp [eq1]
+    · simp [eq1]
       obtain update3 := alg.update3 k
       simp [eq1] at update3
       rw [update3]
-    . push_neg at eq1
+    · push_neg at eq1
       have pos : 1 - alg.γ k > 0 := by
         apply lt_iff_le_and_ne.mpr
         constructor
-        . linarith [(alg.γbound k).2]
-        . contrapose eq1
+        · linarith [(alg.γbound k).2]
+        · contrapose eq1
           push_neg at *
           linarith [eq1]
       specialize fall mem1 mem2 pos ((alg.γbound k).1) (by linarith)
@@ -132,9 +132,9 @@ theorem Nesterov_second_convergence :
     apply le_trans (hieq4 k)
     simp only [add_le_add_iff_left]
     by_cases nm0 : ‖alg.x ↑k - alg.z k‖ ^ 2 = 0
-    . rw [nm0]
+    · rw [nm0]
       simp
-    . push_neg at nm0
+    · push_neg at nm0
       have ax : ‖alg.x ↑k - alg.z k‖ ^ 2 > 0 := by
         apply lt_of_le_of_ne
         simp
@@ -186,13 +186,13 @@ theorem Nesterov_second_convergence :
           + ((alg.γ k) ^ 2 / (2 * alg.t k)) * ‖alg.y k - alg.y (k - 1)‖ ^ 2 := by
         simp only [add_le_add_iff_right]
         by_cases eq1 : alg.γ k = 1
-        . simp [eq1]
-        . push_neg at eq1
+        · simp [eq1]
+        · push_neg at eq1
           have pos : 1 - alg.γ k > 0 := by
             apply lt_iff_le_and_ne.mpr
             constructor
-            . linarith [(alg.γbound k).2]
-            . contrapose eq1
+            · linarith [(alg.γbound k).2]
+            · contrapose eq1
               push_neg at *
               linarith [eq1]
           apply (mul_le_mul_left pos).mpr
@@ -228,7 +228,7 @@ theorem Nesterov_second_convergence :
           + alg.γ ↑k * (f (alg.z k) + ⟪f' (alg.z k), alg.y ↑k - alg.z k⟫_ℝ)
           + alg.γ ↑k ^ 2 / (2 * alg.t ↑k) * ‖alg.y ↑k - alg.y (↑k - 1)‖ ^ 2 := by
         simp
-        have gpos : alg.γ k > 0 := by exact (alg.γbound k).1
+        have gpos : alg.γ k > 0 := (alg.γbound k).1
         apply (mul_le_mul_left gpos).mpr
         apply Convex_first_order_condition' (alg.h₁ (alg.z k)) alg.convf
         simp
@@ -315,7 +315,7 @@ theorem Nesterov_second_convergence :
       + 1 / 2 * ‖alg.y (k + 1) - xm‖ ^ 2 ≤
       alg.t 1 / (alg.γ 1 ^ 2) * (φ (alg.x 1) - φ xm) + 1 / 2 * ‖alg.y 1 - xm‖ ^ 2 := by
     induction' k with k ik
-    . simp
+    · simp
     have ine := decrease (Nat.toPNat' (k + 1))
     simp only [Nat.toPNat'_coe, add_pos_iff, zero_lt_one, or_true, ↓reduceIte] at ine
     apply le_trans ine
@@ -422,9 +422,9 @@ instance {f h : E → ℝ} {f' : E → E} {x0 : E} [p : Nesterov_second_fix_step
 
 variable {alg : Nesterov_second_fix_stepsize f h f' x0}
 
-variable {xm : E} (minφ : IsMinOn (f + h) univ xm)
+variable {xm : E}
 
-theorem Nesterov_second_fix_stepsize_converge:
+theorem Nesterov_second_fix_stepsize_converge (minφ : IsMinOn (f + h) univ xm):
     ∀ (k : ℕ), f (alg.x (k + 1)) + h (alg.x (k + 1)) - f xm - h xm ≤
     2 * alg.l / (k + 2) ^ 2 * ‖x0 - xm‖ ^ 2 := by
   intro k
@@ -442,7 +442,7 @@ theorem Nesterov_second_fix_stepsize_converge:
         ‖x0 - xm‖ ^ 2 =
         Nesterov_second.γ f h f' x0 (k + 1) ^ 2 / (2 * Nesterov_second.t f h f' x0 (k + 1)) *
         ‖x0 - xm‖ ^ 2 := rfl
-      rw [h1, h2]; apply Nesterov_second_convergence minφ
+      rw [h1, h2]; apply Nesterov_second_convergence minφ _
     _ ≤ 2 * alg.l / (k + 2) ^ 2 * ‖x0 - xm‖ ^ 2 := by
       apply mul_le_mul_of_nonneg_right _ (sq_nonneg _)
       rw [alg.γeq (k + 1), alg.teq (k + 1)]; field_simp

diff --git a/Convex/Algorithm/NesterovSmooth.lean b/Convex/Algorithm/NesterovSmooth.lean
@@ -131,7 +131,7 @@ lemma one_iter (hfun : ConvexOn ℝ Set.univ f) (hg : ∀ (k : ℕ+), γ k = 2 /
         _ = alg.l / 2 * (‖alg.y k - (1 - γ k) • (alg.x (k - 1)) - γ k • xm‖ ^ 2 -
                 ‖alg.x k - (1 - γ k) • alg.x (k - 1) - γ k • xm‖ ^ 2) := by
             rw [smul_add, smul_smul]; simp
-            left; rw [mul_inv_cancel (by linarith), one_smul, sub_smul, one_smul, add_comm, sub_add]
+            left; rw [mul_inv_cancel₀ (ne_of_gt hzs), one_smul, sub_smul, one_smul, add_comm, sub_add]
   have this2 : alg.l / 2 * (‖alg.y k - (1 - γ k) • (alg.x (k - 1)) - γ k • xm‖ ^ 2 -
       ‖alg.x k - (1 - γ k) • alg.x (k - 1) - γ k • xm‖ ^ 2) = alg.l *
       (inner (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm)- alg.x k))
@@ -199,7 +199,7 @@ theorem nesterov_algorithm_smooth (hfun: ConvexOn ℝ Set.univ f)
     · specialize IH j (le_refl _)
       specialize h5 (j + 1)
       have y1: ↑(j + 1) - (1 : ℕ) = j := by simp
-      have y2: j + 1 - 1 = j := by exact Iff.mp PNat.natPred_inj rfl
+      have y2: j + 1 - 1 = j := Iff.mp PNat.natPred_inj rfl
       apply le_trans h5 _
       rw [y1, y2]
       exact IH