From 271da9d93244429837e56bef427416c5f7b0c21a Mon Sep 17 00:00:00 2001 From: or4nge19 Date: Sun, 21 Sep 2025 01:59:32 +0200 Subject: [PATCH 1/2] Update lean-toolchain --- lean-toolchain | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lean-toolchain b/lean-toolchain index 4f86f95..66a6d41 100644 --- a/lean-toolchain +++ b/lean-toolchain @@ -1 +1 @@ -leanprover/lean4:v4.13.0 +leanprover/lean4:v4.24.0-rc1 \ No newline at end of file From 8e68c677a24aa6965b87cd1db2ab66152ebe00e1 Mon Sep 17 00:00:00 2001 From: or4nge19 Date: Tue, 23 Sep 2025 19:43:38 +0200 Subject: [PATCH 2/2] update Optlib to lean4:v4.24.0-rc1 --- Optlib/Algorithm.lean | 2 +- Optlib/Algorithm/ADMM/Inv_bounded.lean | 21 +- Optlib/Algorithm/ADMM/Lemma.lean | 796 ++++++++++-------- Optlib/Algorithm/ADMM/Scheme.lean | 3 +- ...em_converge.lean => Theorem_converge.lean} | 14 +- Optlib/Algorithm/BCD/Convergence.lean | 134 +-- Optlib/Algorithm/BCD/Scheme.lean | 9 +- Optlib/Algorithm/GD/GradientDescent.lean | 33 +- .../GD/GradientDescentStronglyConvex.lean | 142 ++-- Optlib/Algorithm/LASSO.lean | 109 +-- .../Nesterov/NesterovAccelerationFirst.lean | 43 +- .../Nesterov/NesterovAccelerationSecond.lean | 50 +- Optlib/Algorithm/Nesterov/NesterovSmooth.lean | 119 +-- Optlib/Algorithm/ProximalGradient.lean | 53 +- Optlib/Algorithm/SubgradientMethod.lean | 313 ++++--- Optlib/Convex/BanachSubgradient.lean | 73 +- Optlib/Convex/ClosedCone.lean | 8 +- Optlib/Convex/ConicCaratheodory.lean | 5 +- Optlib/Convex/ConvexFunction.lean | 123 +-- Optlib/Convex/Farkas.lean | 132 +-- ...sionalConvexFunctionsLocallyLipschitz.lean | 41 +- Optlib/Convex/ImageSubgradientClosed.lean | 2 +- Optlib/Convex/QuasiConvexFirstOrder.lean | 6 +- Optlib/Convex/StronglyConvex.lean | 94 ++- Optlib/Convex/Subgradient.lean | 91 +- Optlib/Differential/Calculation.lean | 18 +- Optlib/Differential/GradientDiv.lean | 67 +- Optlib/Differential/Lemmas.lean | 146 ++-- Optlib/Differential/Subdifferential.lean | 46 +- Optlib/Function/KL.lean | 32 +- Optlib/Function/L1Space.lean | 102 ++- Optlib/Function/Lsmooth.lean | 111 +-- Optlib/Function/MinimaClosedFunction.lean | 16 +- Optlib/Function/Proximal.lean | 285 ++++--- Optlib/Optimality/Constrained_Problem.lean | 474 +++++++---- ...malityConditionOfUnconstrainedProblem.lean | 30 +- Optlib/Optimality/Weak_Duality.lean | 20 +- lake-manifest.json | 118 +-- lakefile.lean | 2 +- 39 files changed, 2210 insertions(+), 1673 deletions(-) rename Optlib/Algorithm/ADMM/{Theroem_converge.lean => Theorem_converge.lean} (99%) diff --git a/Optlib/Algorithm.lean b/Optlib/Algorithm.lean index 1f5852f..06250b4 100644 --- a/Optlib/Algorithm.lean +++ b/Optlib/Algorithm.lean @@ -1,7 +1,7 @@ import Optlib.Algorithm.ADMM.Inv_bounded import Optlib.Algorithm.ADMM.Lemma import Optlib.Algorithm.ADMM.Scheme -import Optlib.Algorithm.ADMM.Theroem_converge +import Optlib.Algorithm.ADMM.Theorem_converge import Optlib.Algorithm.BCD.Convergence import Optlib.Algorithm.BCD.Scheme import Optlib.Algorithm.GD.GradientDescent diff --git a/Optlib/Algorithm/ADMM/Inv_bounded.lean b/Optlib/Algorithm/ADMM/Inv_bounded.lean index e05881b..9791cb1 100644 --- a/Optlib/Algorithm/ADMM/Inv_bounded.lean +++ b/Optlib/Algorithm/ADMM/Inv_bounded.lean @@ -4,6 +4,7 @@ import Mathlib.Topology.MetricSpace.Sequences noncomputable section open Set InnerProductSpace Topology Filter LinearMap ContinuousLinearMap InnerProduct Function + variable {X Y:Type*} [NormedAddCommGroup X] [InnerProductSpace ℝ X] [NormedAddCommGroup Y] [InnerProductSpace ℝ Y] @@ -14,14 +15,18 @@ lemma KerA_bot (fullrank: Injective A): ker A = ⊥ := ker_eq_bot.2 fullrank variable [CompleteSpace X] [CompleteSpace Y] lemma KerA_eq_KerA'A : ker A = ker (A†.comp A) := by - ext x; constructor; simp - · intro h; rw[h]; continuity - · intro h; simp at h - have : ((inner (A x) (A x)):ℝ) = (0:ℝ) := by - calc - _ = (inner x ((A†) (A x)):ℝ) := by rw [ContinuousLinearMap.adjoint_inner_right] - _ = (0:ℝ) := by rw [h, inner_zero_right] - apply inner_self_eq_zero.1 this + ext x; constructor + · intro hx + simp [ContinuousLinearMap.comp_apply]; simp_all + · intro hx + have hx' : (A†) (A x) = 0 := by + simpa [ContinuousLinearMap.comp_apply] using hx + have hinner : ⟪A x, A x⟫_ℝ = ⟪x, (A†) (A x)⟫_ℝ := by + dsimp only + exact Eq.symm (ContinuousLinearMap.adjoint_inner_right A x (A x)) + have : ⟪A x, A x⟫_ℝ = 0 := by + simpa [hx', inner_zero_right] using hinner + exact inner_self_eq_zero.mp this lemma KerA'A_bot (fullrank: Injective A) : ker (A†.comp A) = ⊥ := by rw[← KerA_eq_KerA'A] diff --git a/Optlib/Algorithm/ADMM/Lemma.lean b/Optlib/Algorithm/ADMM/Lemma.lean index 2582e62..1a92d37 100644 --- a/Optlib/Algorithm/ADMM/Lemma.lean +++ b/Optlib/Algorithm/ADMM/Lemma.lean @@ -83,7 +83,7 @@ def υ [Setting E₁ E₂ F admm admm_kkt] : ℕ → F := fun n => (y n) + ((1 - τ) * ρ) • (A₁ (x₁ n) + A₂ (x₂ n) - b) def M [Setting E₁ E₂ F admm admm_kkt] : ℕ+ → ℝ := - fun n => ((1 - τ) * ρ) * (inner (A₂ ((x₂ n) - (x₂ n.natPred))) (A₁ (x₁ n.natPred) + A₂ (x₂ n.natPred) - b)) + fun n => ((1 - τ) * ρ) * ⟪A₂ ((x₂ n) - (x₂ n.natPred)), A₁ (x₁ n.natPred) + A₂ (x₂ n.natPred) - b⟫ lemma f₁_continuous [Setting E₁ E₂ F admm admm_kkt]: ContinuousOn f₁ univ := FiniteDimensionalConvexFunctionsContinous convex_univ isOpen_univ OptProblem.cf₁ @@ -184,7 +184,7 @@ lemma norm_covex1 [Setting E₁ E₂ F admm admm_kkt]:∀ n : ℕ+ , let g := A₁ have h2 : u ∘ g = f := by ext x - simp only [Function.comp_apply] + simp only [Function.comp_apply]; try? rw[← h2] have h3 : ⇑g ⁻¹' univ = univ := by simp only [preimage_univ] @@ -218,7 +218,7 @@ lemma norm_covex2 [Setting E₁ E₂ F admm admm_kkt]:∀ n : ℕ+ , let g := A₂ have h2 : u ∘ g = f := by ext x - simp only [Function.comp_apply] + simp only [Function.comp_apply]; exact rfl rw[← h2] have h3 : ⇑g ⁻¹' univ = univ := by simp only [preimage_univ] @@ -276,7 +276,7 @@ lemma ADMM_iter_process₁'_eq3_2' [Setting E₁ E₂ F admm admm_kkt]: ∀ n : rw[this] show HasGradientAt ((fun x => ⟪c , (A₁ x)⟫ + c₁)) (A₁† c) x rw[hasGradientAt_iff_hasFDerivAt] - apply HasFDerivAt.add_const _ c₁ + refine HasFDerivAt.add_const c₁ ?_ show HasGradientAt ((fun x => ⟪c , (A₁ x)⟫)) (A₁† c) x apply ADMM_iter_process₁'_eq3_2'_1 @@ -313,56 +313,60 @@ lemma Gradient_of_quadratic_forms { α β : Type*} rw[HasGradient_iff_Convergence_Point] intro ε εpos rcases (le_iff_eq_or_lt.1 $ norm_nonneg A) with h | h - · use ε,εpos - intro x hx - symm at h - rw[norm_eq_zero] at h - simp[h] - have := norm_nonneg (s - x) - rwa[mul_nonneg_iff_right_nonneg_of_pos εpos] + · use ε + constructor + · exact εpos + · intro x hx + symm at h + rw[norm_eq_zero] at h + simp[h] + have := norm_nonneg (s - x) + rwa[mul_nonneg_iff_right_nonneg_of_pos εpos] · use ε / ‖A‖ ^ 2 - field_simp - intro x hx - have hzero : 0 < ‖A‖ ^ 2 := by apply sq_pos_of_pos h - let t := x - s - have t1 : s + t = x := by - show s + (x - s) = x - simp only [add_sub_cancel] - have : ⟪A x, A x⟫ - ⟪A s, A s⟫ - ⟪(2 : ℝ) • (A†) (A s), x - s⟫ = - ⟪A (x - s) , A (x - s)⟫ := by - rw[← t1] - simp only [map_add, add_sub_cancel_left] - show ⟪A s + A t , A s + A t⟫ - ⟪A s, A s⟫ - ⟪(2 : ℝ) • (A†) (A s), t⟫ = - ⟪A t , A t⟫ - rw[real_inner_add_add_self] - rw[real_inner_smul_left,ContinuousLinearMap.adjoint_inner_left] - ring - rw[this,real_inner_self_eq_norm_sq] - simp only [abs_pow, abs_norm, ge_iff_le] - calc - _ = ‖A (s - x)‖ ^ 2 := by - rw[norm_comm] - _ ≤ (‖A‖ * ‖s - x‖) ^ 2:= by - rw[sq,sq,← mul_self_le_mul_self_iff] - apply ContinuousLinearMap.le_opNorm - apply norm_nonneg - simp[h , norm_nonneg (s - x)] - _ = ‖A‖ ^ 2 * ‖s - x‖ ^ 2 := by - linarith - rcases (le_iff_eq_or_lt.1 $ norm_nonneg (s - x)) with h1 | _ - · rw[← h1] - simp only [ne_eq, OfNat.ofNat_ne_zero, not_false_eq_true, zero_pow, mul_zero, le_refl] - · calc - _ = ‖A‖ ^ 2 * ‖s - x‖ * ‖s - x‖:= by - nth_rw 2 [sq]; - rw[mul_assoc] - _ ≤ ‖A‖ ^ 2 * ‖s - x‖ * (ε / ‖A‖ ^ 2) :=by - have :0 ≤ ‖A‖ ^ 2 * ‖s - x‖ := by - simp[hzero,norm_nonneg (s - x)] - apply mul_le_mul_of_nonneg_left hx this - _ = _ := by - field_simp[hzero] - ring_nf + constructor + · + have hzero : 0 < ‖A‖ ^ 2 := by apply sq_pos_of_pos h + exact div_pos εpos hzero + · intro x hx + have hzero : 0 < ‖A‖ ^ 2 := by apply sq_pos_of_pos h + let t := x - s + have t1 : s + t = x := by + show s + (x - s) = x + simp only [add_sub_cancel] + have : ⟪A x, A x⟫ - ⟪A s, A s⟫ - ⟪(2 : ℝ) • (A†) (A s), x - s⟫ = + ⟪A (x - s) , A (x - s)⟫ := by + rw[← t1] + simp only [map_add, add_sub_cancel_left] + show ⟪A s + A t , A s + A t⟫ - ⟪A s, A s⟫ - ⟪(2 : ℝ) • (A†) (A s), t⟫ = + ⟪A t , A t⟫ + rw[real_inner_add_add_self] + rw[real_inner_smul_left,ContinuousLinearMap.adjoint_inner_left] + ring + rw[this,real_inner_self_eq_norm_sq] + simp only [ge_iff_le] + calc + _ = ‖A (s - x)‖ ^ 2 := by + rw[norm_comm]; simp + _ ≤ (‖A‖ * ‖s - x‖) ^ 2:= by + rw[sq,sq,← mul_self_le_mul_self_iff] + apply ContinuousLinearMap.le_opNorm + apply norm_nonneg + simp[h , norm_nonneg (s - x)] + _ = ‖A‖ ^ 2 * ‖s - x‖ ^ 2 := by + linarith + rcases (le_iff_eq_or_lt.1 $ norm_nonneg (s - x)) with h1 | _ + · rw[← h1] + simp only [ne_eq, OfNat.ofNat_ne_zero, not_false_eq_true, zero_pow, mul_zero, le_refl] + · calc + _ = ‖A‖ ^ 2 * ‖s - x‖ * ‖s - x‖:= by + nth_rw 2 [sq]; + rw[mul_assoc] + _ ≤ ‖A‖ ^ 2 * ‖s - x‖ * (ε / ‖A‖ ^ 2) :=by + have :0 ≤ ‖A‖ ^ 2 * ‖s - x‖ := by + simp[hzero,norm_nonneg (s - x)] + apply mul_le_mul_of_nonneg_left hx this + _ = _ := by + field_simp[hzero] #check add_sub lemma ADMM_iter_process₁'_eq3_3' [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ+ , @@ -554,7 +558,7 @@ lemma ADMM_iter_process₂'_eq3_2' [Setting E₁ E₂ F admm admm_kkt]: ∀ n : rw[this] show HasGradientAt (fun x => ⟪c , (A₂ x)⟫ + c₁) (A₂† c) x rw[hasGradientAt_iff_hasFDerivAt] - apply HasFDerivAt.add_const _ c₁ + apply HasFDerivAt.add_const c₁ _ show HasGradientAt ((fun x => ⟪c , (A₂ x)⟫)) (A₂† c) x apply inner_gradient @@ -846,28 +850,29 @@ lemma Φ_isdescending_eq3' [Setting E₁ E₂ F admm admm_kkt] : ∀ n : ℕ+ , exact this lemma subgradientAt_mono_u [Setting E₁ E₂ F admm admm_kkt] : ∀ n : ℕ+, - (0 : ℝ) ≤ (inner (u (n) + A₁† y') (x₁ (n) - x₁')) := by + (0 : ℝ) ≤ ⟪u n + A₁† y', x₁ n - x₁'⟫ := by intro n calc - _= inner (u (n) - (- A₁† y')) (x₁ (n) - x₁') := by simp[v] - _≥ (0 : ℝ) := by + _ = ⟪u n - (- A₁† y'), x₁ n - x₁'⟫ := by + simp [sub_eq_add_neg] + _ ≥ (0 : ℝ) := by apply subgradientAt_mono apply u_inthesubgradient exact admm_kkt.h.subgrad₁ lemma subgradientAt_mono_v [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ+, - (0 : ℝ) ≤ (inner (v (n) + A₂† y') (x₂ (n) - x₂')) := by + (0 : ℝ) ≤ ⟪v n + A₂† y', x₂ n - x₂'⟫ := by intro n calc - _= inner (v (n) - (- A₂† y')) (x₂ (n) - x₂') := by simp[v] - _≥ (0 : ℝ) := by + _ = ⟪v n - (- A₂† y'), x₂ n - x₂'⟫ := by simp [v] + _ ≥ (0 : ℝ) := by apply subgradientAt_mono apply v_inthesubgradient exact admm_kkt.h.subgrad₂ lemma expended_u_gt_zero [Setting E₁ E₂ F admm admm_kkt]: ∀ n, (0 : ℝ) ≤ - (inner ( -ey (n + 1) - ((1-τ) * ρ) • (A₁ (e₁ (n + 1)) + A₂ (e₂ (n + 1))) - - (ρ • (A₂ (x₂ (n) - x₂ (n+1))))) (A₁ (e₁ (n + 1)))):= by + ⟪ -ey (n + 1) - ((1-τ) * ρ) • (A₁ (e₁ (n + 1)) + A₂ (e₂ (n + 1))) + - (ρ • (A₂ (x₂ (n) - x₂ (n+1)))), A₁ (e₁ (n + 1)) ⟫ := by intro n let Ae1 := A₁ (e₁ (n + 1)) let e' := e₁ (n + 1) @@ -878,9 +883,10 @@ lemma expended_u_gt_zero [Setting E₁ E₂ F admm admm_kkt]: ∀ n, (0 : ℝ) let x_diff := x₁ (n + 1) - x₁' let succ_n := Nat.toPNat' (n + 1) calc - _= inner (𝕜 := ℝ) block Ae1 := by rfl - _= inner (A₁† block) (e') := by rw [ContinuousLinearMap.adjoint_inner_left] - _= inner (u' + A₁† y') (x_diff) := by + _= ⟪block, Ae1⟫ := by rfl + _= ⟪A₁† block, e'⟫ := by + simpa [Ae1, e'] using (ContinuousLinearMap.adjoint_inner_left A₁ e' block).symm + _= ⟪u' + A₁† y', x_diff⟫ := by let block₁ := y (n + 1) + ((1-τ) * ρ) • (A₁ (e₁ (n + 1)) + A₂ (e₂ (n + 1))) + (ρ • (A₂ (x₂ (n) - x₂ (n+1)))) have split_block : -block = block₁ - y' := by simp[block, block₁] @@ -908,12 +914,11 @@ lemma expended_u_gt_zero [Setting E₁ E₂ F admm admm_kkt]: ∀ n, (0 : ℝ) rw [← u'_eq, Aty'_eq, add_comm, sub_eq_add_neg] simp[e', x_diff] rfl - _= (inner (u (succ_n) + A₁† y') (x₁ (succ_n) - x₁')) := by rfl + _= ⟪u (succ_n) + A₁† y', x₁ (succ_n) - x₁'⟫ := by rfl _≥ 0 := by apply subgradientAt_mono_u lemma expended_v_gt_zero [Setting E₁ E₂ F admm admm_kkt]: ∀ n, - (inner ( -ey (n + 1) - ((1 - τ) * ρ) • ((A₁ (e₁ (n + 1))) + (A₂ (e₂ (n + 1))))) - (A₂ (e₂ (n + 1)))) ≥ (0 : ℝ) := by + ⟪ -ey (n + 1) - ((1 - τ) * ρ) • ((A₁ (e₁ (n + 1))) + (A₂ (e₂ (n + 1)))) , A₂ (e₂ (n + 1)) ⟫ ≥ (0 : ℝ) := by intro n let succ_n := Nat.toPNat' (n + 1) let ey' := ey (succ_n) @@ -923,149 +928,170 @@ lemma expended_v_gt_zero [Setting E₁ E₂ F admm admm_kkt]: ∀ n, let v_k_1 := v (succ_n) let x_diff := x₂ (succ_n) - x₂' calc - _ = inner ( -ey'- ((1 - τ) * ρ) • (A₁ e₁' + A₂ e₂')) (A₂ e₂') := by rfl - _ = inner (A₂† (-ey'- ((1 - τ) * ρ) • (A₁ e₁' + A₂ e₂'))) (e₂') := by rw [ContinuousLinearMap.adjoint_inner_left] - _ = inner (-A₂† (ey'+ ((1 - τ) * ρ) • (A₁ e₁' + A₂ e₂'))) (e₂') := by - rw [sub_eq_add_neg,← neg_add,A₂†.map_neg] - _ = inner (-A₂† (y_k_1 - y' + ((1 - τ) * ρ) • (A₁ e₁' + A₂ e₂'))) (e₂') := by - have sub : ey' = y_k_1 - y' := by simp [ey', y_k_1] ;rfl + _ = ⟪ -ey' - ((1 - τ) * ρ) • (A₁ e₁' + A₂ e₂') , A₂ e₂' ⟫ := by rfl + _ = ⟪ A₂† (-ey' - ((1 - τ) * ρ) • (A₁ e₁' + A₂ e₂')) , e₂' ⟫ := by + rw [ContinuousLinearMap.adjoint_inner_left] + _ = ⟪ -A₂† (ey' + ((1 - τ) * ρ) • (A₁ e₁' + A₂ e₂')) , e₂' ⟫ := by + rw [sub_eq_add_neg, ← neg_add, A₂†.map_neg] + _ = ⟪ -A₂† (y_k_1 - y' + ((1 - τ) * ρ) • (A₁ e₁' + A₂ e₂')) , e₂' ⟫ := by + have sub : ey' = y_k_1 - y' := by simp [ey', y_k_1] ; rfl rw [sub] - _ = inner (-A₂† (y_k_1 + ((1 - τ) * ρ) • (A₁ e₁' + A₂ e₂')) + A₂† y') (e₂') := by - rw [sub_eq_add_neg, add_comm y_k_1, add_assoc,A₂†.map_add] + _ = ⟪ -A₂† (y_k_1 + ((1 - τ) * ρ) • (A₁ e₁' + A₂ e₂')) + A₂† y' , e₂' ⟫ := by + rw [sub_eq_add_neg, add_comm y_k_1, add_assoc, A₂†.map_add] simp - _ = inner (v_k_1 + A₂† y') x_diff := rfl + _ = ⟪ v_k_1 + A₂† y' , x_diff ⟫ := by rfl _ ≥ (0 : ℝ) := by apply subgradientAt_mono_v lemma starRingEnd_eq_R (x : ℝ) : (starRingEnd ℝ) x = x := rfl -lemma expended_u_v_gt_zero [Setting E₁ E₂ F admm admm_kkt]: ∀ n , (inner (ey (n + 1)) (-(A₁ (e₁ (n + 1)) + A₂ (e₂ (n + 1))))) +set_option maxHeartbeats 500000 in +lemma expended_u_v_gt_zero [Setting E₁ E₂ F admm admm_kkt]: ∀ n, + ⟪ ey (n + 1), -(A₁ (e₁ (n + 1)) + A₂ (e₂ (n + 1))) ⟫ - (1 - τ) * ρ * ‖A₁ (e₁ (n+1)) + A₂ (e₂ (n+1))‖^2 - + ρ * (inner (-A₂ (x₂ (n) - x₂ (n + 1))) (A₁ (e₁ (n + 1)))) ≥ 0 := by - intro n - let A_e_sum := (A₁ (e₁ (n + 1))) + A₂ (e₂ (n + 1)) - let A_x_sum := -A₂ (x₂ (n) - x₂ (n + 1)) - let ey' := ey (n + 1) - let Ae1 := A₁ (e₁ (n + 1)) - let Ae2 := A₂ (e₂ (n + 1)) - calc - _ = inner ey' (-(A_e_sum)) - (1 - τ) * ρ * (inner A_e_sum A_e_sum) - + ρ * (inner (A_x_sum) (Ae1)) := by rw [norm_sq_eq_inner (𝕜:=ℝ) (A_e_sum)];rfl - _ = inner ey' (-(A_e_sum)) + inner (- ((1 - τ) * ρ) • A_e_sum) A_e_sum - + ρ * (inner A_x_sum Ae1) := by rw [smul_left,starRingEnd_eq_R];ring - _ = inner (-ey') A_e_sum + inner (- ((1 - τ) * ρ) • A_e_sum) A_e_sum - + ρ * (inner A_x_sum Ae1) := by - rw [inner_neg_right (𝕜 := ℝ), inner_neg_left (𝕜 := ℝ), inner_neg_left (𝕜 := ℝ)] - _ = inner (-ey' - ((1 - τ) * ρ) • A_e_sum) A_e_sum + ρ * (inner A_x_sum Ae1) := by - rw [← add_left];ring_nf - have sub: -ey' + (τ * ρ - ρ) • A_e_sum = -ey' - (-(τ * ρ) + ρ) • A_e_sum := by - rw [← sub_eq_zero,sub_eq_add_neg] - rw [sub_eq_add_neg (G := F) (-ey') ((-(τ * ρ) + ρ) • A_e_sum)] - rw [← neg_one_smul (R := ℝ) (-ey' + -((-(τ * ρ) + ρ) • A_e_sum))] - rw [smul_add (-1) (-ey') (-((-(τ * ρ) + ρ) • A_e_sum))] - rw [neg_smul_neg, neg_smul_neg,one_smul, one_smul] - rw [add_assoc, add_comm, add_assoc,add_comm ey' ((-(τ * ρ) + ρ) • A_e_sum)] - rw [add_assoc] - rw [add_neg_cancel, add_zero] - rw [← add_smul (τ * ρ - ρ) (-(τ * ρ) + ρ) (A_e_sum)] - rw [add_comm (-(τ * ρ)) ρ, ← add_assoc] - rw [sub_eq_add_neg, add_assoc (τ * ρ) (-ρ) ρ, add_comm (-ρ) ρ, add_neg_cancel, add_zero, add_neg_cancel, zero_smul] - rw [sub] - _ = inner (-ey' - ((1 - τ) * ρ) • A_e_sum) (Ae1 + Ae2) + ρ * (inner A_x_sum Ae1) := by rfl - _ = inner (-ey' - ((1 - τ) * ρ) • A_e_sum) Ae1 + inner (-ey' - ((1 - τ) * ρ) • A_e_sum) Ae2 - + ρ * (inner A_x_sum Ae1) := by rw [inner_add_right] - _ = inner (-ey' - ((1 - τ) * ρ) • A_e_sum) Ae2 - + inner (-ey' - ((1 - τ) * ρ) • A_e_sum + ρ • A_x_sum) Ae1 := by - rw [inner_add_left,add_assoc] - rw [inner_smul_left A_x_sum Ae1 ρ, starRingEnd_eq_R, add_comm];ring - _ = (inner ( -ey (n + 1) - ((1 - τ) * ρ) • ((A₁ (e₁ (n + 1))) + (A₂ (e₂ (n + 1))))) - (A₂ (e₂ (n + 1)))) + - (inner ( -ey (n + 1) - ((1-τ) * ρ) • (A₁ (e₁ (n + 1)) + A₂ (e₂ (n + 1))) - (ρ • (A₂ (x₂ (n) - x₂ (n+1))))) - (A₁ (e₁ (n + 1)))) := by - have sub : ρ • (A₂ (x₂ (n + 1)) - A₂ (x₂ (n))) = -1 • ρ • (A₂ (x₂ (n)) - A₂ (x₂ (n + 1))) := by - rw [smul_comm,neg_one_smul,neg_sub] - simp[ey', A_e_sum, Ae2, A_x_sum, Ae1] - nth_rw 5 [sub_eq_add_neg] - rw [← neg_one_smul (R := ℝ) (ρ • (A₂ (x₂ n) - A₂ (x₂ (n + 1)))),sub] - simp only [Int.reduceNeg, neg_smul, one_smul] - _ ≥ 0 := by - apply add_nonneg - apply expended_v_gt_zero - apply expended_u_gt_zero - -lemma substitution1 [Setting E₁ E₂ F admm admm_kkt]: ∀ n , - ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₂ (e₂ (n+1))) ) = ρ * (inner (A₂ (x₂ n - x₂ (n+1))) (A₂ (e₂ (n+1))) ) := by - intro n - rw [neg_mul (ρ) (inner (A₂ (x₂ (n+1) - x₂ n)) (A₂ (e₂ (n+1))))] - rw [← mul_neg] - rw [← inner_neg_left (A₂ (x₂ (n+1) - x₂ n)) (A₂ (e₂ (n+1)))] - rw [← map_neg A₂ (x₂ (n+1) - x₂ n)] - rw [neg_sub (x₂ (n+1)) (x₂ n)] + + ρ * ⟪ -A₂ (x₂ n - x₂ (n + 1)), A₁ (e₁ (n + 1)) ⟫ ≥ 0 := by + intro n + set A_e_sum := A₁ (e₁ (n + 1)) + A₂ (e₂ (n + 1)) with hAes + set Ae1 := A₁ (e₁ (n + 1)) with hAe1 + set Ae2 := A₂ (e₂ (n + 1)) with hAe2 + have hv : ⟪ -ey (n + 1) - ((1 - τ) * ρ) • A_e_sum, Ae2 ⟫ ≥ (0 : ℝ) := by + simpa [A_e_sum, Ae2] using (expended_v_gt_zero (admm:=admm) (admm_kkt:=admm_kkt) n) + have hu : ⟪ -ey (n + 1) - ((1 - τ) * ρ) • A_e_sum - ρ • A₂ (x₂ n - x₂ (n + 1)), Ae1 ⟫ ≥ (0 : ℝ) := by + simpa [A_e_sum, Ae1, sub_eq_add_neg] using (expended_u_gt_zero (admm:=admm) (admm_kkt:=admm_kkt) n) + have hsum : + 0 ≤ ⟪ -ey (n + 1) - ((1 - τ) * ρ) • A_e_sum, Ae2 ⟫ + + ⟪ -ey (n + 1) - ((1 - τ) * ρ) • A_e_sum - ρ • A₂ (x₂ n - x₂ (n + 1)), Ae1 ⟫ := + add_nonneg hv hu + set U := -ey (n + 1) - ((1 - τ) * ρ) • A_e_sum with hU + set Z := A₂ (x₂ n - x₂ (n + 1)) with hZ + have hrewrite : + ⟪ ey (n + 1), -A_e_sum ⟫ + - (1 - τ) * ρ * ‖A_e_sum‖^2 + + ρ * ⟪ -A₂ (x₂ n - x₂ (n + 1)), Ae1 ⟫ + = + ⟪ U, Ae2 ⟫ + ⟪ U - ρ • Z, Ae1 ⟫ := by + have h1 : ⟪ U - ρ • Z, Ae1 ⟫ = ⟪ U, Ae1 ⟫ + ⟪ -ρ • Z, Ae1 ⟫ := by + simpa [sub_eq_add_neg] using (inner_add_left U (-ρ • Z) Ae1) + have h2 : ⟪ U, Ae2 ⟫ + ⟪ U, Ae1 ⟫ = ⟪ U, Ae1 + Ae2 ⟫ := by + rw [add_comm] + simpa using (inner_add_right (𝕜 := ℝ) U Ae1 Ae2).symm + have h3 : Ae1 + Ae2 = A_e_sum := by simp [hAes] + have h4 : ⟪ U, A_e_sum ⟫ = ⟪ -ey (n + 1), A_e_sum ⟫ + ⟪ -((1 - τ) * ρ) • A_e_sum, A_e_sum ⟫ := by + have : U = (-ey (n + 1)) + ( -((1 - τ) * ρ) • A_e_sum) := by + simp [U, sub_eq_add_neg] + simp [this, inner_add_left] + have h5 : ⟪ -ey (n + 1), A_e_sum ⟫ = ⟪ ey (n + 1), -A_e_sum ⟫ := by + simp [inner_neg_right] + have h6 : ⟪ -((1 - τ) * ρ) • A_e_sum, A_e_sum ⟫ = -(1 - τ) * ρ * ‖A_e_sum‖^2 := by + simp [real_inner_smul_left, real_inner_self_eq_norm_sq, mul_comm, mul_assoc]; grind + have h7 : ⟪ -ρ • Z, Ae1 ⟫ = ρ * ⟪ -Z, Ae1 ⟫ := by + simp [real_inner_smul_left] + have h8 : -Z = -A₂ (x₂ n - x₂ (n + 1)) := by simp [Z] + have h6' : + -(1 - τ) * ρ * ‖A_e_sum‖^2 + = -⟪((1 - τ) * ρ) • A_e_sum, A_e_sum⟫ := by + simpa [inner_neg_left] using h6.symm + calc + ⟪ ey (n + 1), -A_e_sum ⟫ - (1 - τ) * ρ * ‖A_e_sum‖^2 + ρ * ⟪ -A₂ (x₂ n - x₂ (n + 1)), Ae1 ⟫ + = (⟪ ey (n + 1), -A_e_sum ⟫ + (-(1 - τ) * ρ * ‖A_e_sum‖^2)) + ρ * ⟪ -A₂ (x₂ n - x₂ (n + 1)), Ae1 ⟫ := by + ring + _ = (⟪ -ey (n + 1), A_e_sum ⟫ + ⟪ -((1 - τ) * ρ) • A_e_sum, A_e_sum ⟫) + ρ * ⟪ -Z, Ae1 ⟫ := by + rw [h5, h6', h8]; simp + _ = ⟪ U, A_e_sum ⟫ + ⟪ -ρ • Z, Ae1 ⟫ := by + have := congrArg (fun t => t + ⟪ -ρ • Z, Ae1 ⟫) h4 + simpa [real_inner_smul_left] using this.symm + _ = ⟪ U, Ae1 + Ae2 ⟫ + ⟪ -ρ • Z, Ae1 ⟫ := by + simp [h3] + _ = (⟪ U, Ae2 ⟫ + ⟪ U, Ae1 ⟫) + ⟪ -ρ • Z, Ae1 ⟫ := by + simp [inner_add_right, add_comm] + _ = ⟪ U, Ae2 ⟫ + ⟪ U - ρ • Z, Ae1 ⟫ := by + simp [sub_eq_add_neg, inner_add_left, real_inner_smul_left, add_comm, add_left_comm, add_assoc] + have : ⟪ ey (n + 1), -A_e_sum ⟫ + - (1 - τ) * ρ * ‖A_e_sum‖^2 + + ρ * ⟪ -A₂ (x₂ n - x₂ (n + 1)), Ae1 ⟫ ≥ 0 := by + have h' : 0 ≤ ⟪ U, Ae2 ⟫ + ⟪ U - ρ • Z, Ae1 ⟫ := hsum + simpa [hrewrite.symm] using h' + simp [A_e_sum, Ae1] at this + exact le_of_le_of_eq hsum (id (Eq.symm hrewrite)) + +lemma substitution1 [Setting E₁ E₂ F admm admm_kkt]: + ∀ n, + -ρ * (⟪A₂ (x₂ (n + 1) - x₂ n), A₂ (e₂ (n + 1))⟫) + = ρ * (⟪A₂ (x₂ n - x₂ (n + 1)), A₂ (e₂ (n + 1))⟫) := by + intro n + rw [neg_mul ρ (⟪A₂ (x₂ (n + 1) - x₂ n), A₂ (e₂ (n + 1))⟫)] + rw [← mul_neg] + rw [← inner_neg_left] + rw [← map_neg A₂ (x₂ (n + 1) - x₂ n)] + simp [neg_sub] lemma substitution2 [Setting E₁ E₂ F admm admm_kkt]: ∀ n , A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b - A₂ (e₂ (n+1)) = A₁ (e₁ (n+1)) := by intro n have h := Φ_isdescending_eq3 n simp [h] -lemma Φ_isdescending_inequ1 [Setting E₁ E₂ F admm admm_kkt]: ∀ n , 1/(τ * ρ) * (inner (ey (n+1)) ((ey n)-(ey (n+1)))) +lemma Φ_isdescending_inequ1 [Setting E₁ E₂ F admm admm_kkt]: ∀ n , 1/(τ * ρ) * (⟪ey (n+1), (ey n) - (ey (n+1))⟫) - (1-τ)*ρ*‖admm.A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 - + ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)) - -ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₂ (e₂ (n+1))) ) ≥ 0 := by + + ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b⟫) + - ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₂ (e₂ (n+1))⟫) ≥ 0 := by intro n let pm1 := 1 / (τ * ρ) let pm2 := (1 - τ) * ρ - have h1: pm1 * (inner (ey (n+1)) ((ey n)-(ey (n+1)))) - = (inner (ey (n + 1)) (-((A₁ (e₁ (n + 1))) + A₂ (e₂ (n + 1))))) := by - calc pm1 * (inner (ey (n+1)) ((ey n)-(ey (n+1)))) - _ = (inner (ey (n+1)) ( pm1 • ((ey n)-(ey (n+1))) )) := by - rw [← real_inner_smul_right (ey (n+1)) ((ey n)-(ey (n+1))) pm1] - _ = (inner (ey (n+1)) ( pm1 • -((ey (n+1))-(ey n)) )) := by + have h1: pm1 * ⟪ey (n+1), (ey n) - (ey (n+1))⟫ + = ⟪ey (n + 1), -((A₁ (e₁ (n + 1))) + A₂ (e₂ (n + 1)))⟫ := by + calc pm1 * ⟪ey (n+1), (ey n) - (ey (n+1))⟫ + _ = ⟪ey (n+1), pm1 • ((ey n) - (ey (n+1)))⟫ := by + rw [← real_inner_smul_right (ey (n+1)) ((ey n) - (ey (n+1))) pm1] + _ = ⟪ey (n+1), pm1 • -((ey (n+1)) - (ey n))⟫ := by rw [← neg_sub (ey (n+1)) (ey n)] - _ = (inner (ey (n+1)) ( -(pm1 • ((ey (n+1))-(ey n))) )) := by + _ = ⟪ey (n+1), -(pm1 • ((ey (n+1)) - (ey n)))⟫ := by rw [smul_neg] - _ = (inner (ey (n+1)) ( -(A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b) )) := by + _ = ⟪ey (n+1), -(A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)⟫ := by rw [← Φ_isdescending_eq2, ← Φ_isdescending_eq1] - _ = (inner (ey (n+1)) (-(A₁ (e₁ (n+1)) + A₂ (e₂ (n+1))))) := by + _ = ⟪ey (n+1), -(A₁ (e₁ (n+1)) + A₂ (e₂ (n+1)))⟫ := by rw [Φ_isdescending_eq3] have h2: pm2*‖admm.A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 = pm2*‖admm.A₁ (e₁ (n+1)) + A₂ (e₂ (n+1))‖^2 := by rw [Φ_isdescending_eq3] - have h3: ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)) -ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₂ (e₂ (n+1))) ) - = ρ * (inner (-A₂ (x₂ (n) - x₂ (n + 1))) (A₁ (e₁ (n+1)))) := by - calc ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)) - -ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₂ (e₂ (n+1))) ) - _ = ρ * (inner (- (A₂ (x₂ (n) - x₂ (n+1)))) (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)) - - ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₂ (e₂ (n+1))) ) := by + have h3: ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b⟫) - ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₂ (e₂ (n+1))⟫) + = ρ * (⟪-A₂ (x₂ (n) - x₂ (n + 1)), A₁ (e₁ (n+1))⟫) := by + calc ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b⟫) + - ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₂ (e₂ (n+1))⟫) + _ = ρ * (⟪- (A₂ (x₂ (n) - x₂ (n+1))), A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b⟫) + - ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₂ (e₂ (n+1))⟫) := by rw [← neg_sub (x₂ n) (x₂ (n+1))] rw [map_neg A₂ (x₂ (n) - x₂ (n+1))] - _ = - ρ * (inner (A₂ (x₂ (n) - x₂ (n+1))) (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)) - - ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₂ (e₂ (n+1))) ) := by + _ = - ρ * (⟪A₂ (x₂ (n) - x₂ (n+1)), A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b⟫) + - ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₂ (e₂ (n+1))⟫) := by rw [inner_neg_left (A₂ (x₂ (n) - x₂ (n+1))) (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)] simp - _ = - ρ * (inner (A₂ (x₂ (n) - x₂ (n+1))) (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)) - + ρ * (inner (A₂ (x₂ n - x₂ (n+1))) (A₂ (e₂ (n+1))) ) := by + _ = - ρ * (⟪A₂ (x₂ (n) - x₂ (n+1)), A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b⟫) + + ρ * (⟪A₂ (x₂ n - x₂ (n+1)), A₂ (e₂ (n+1))⟫) := by rw [← substitution1] - simp only [map_sub, neg_mul];rfl - _ = ρ * (inner (A₂ (x₂ n - x₂ (n+1))) (A₂ (e₂ (n+1))) ) - - ρ * (inner (A₂ (x₂ (n) - x₂ (n+1))) (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)) := by + simp only [map_sub, neg_mul]; rfl + _ = ρ * (⟪A₂ (x₂ n - x₂ (n+1)), A₂ (e₂ (n+1))⟫) + - ρ * (⟪A₂ (x₂ (n) - x₂ (n+1)), A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b⟫) := by ring - _ = ρ * (inner (A₂ (x₂ (n) - x₂ (n+1))) (A₂ (e₂ (n+1)) - (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b))):= by + _ = ρ * (⟪A₂ (x₂ (n) - x₂ (n+1)), A₂ (e₂ (n+1)) - (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)⟫) := by rw [← mul_sub] rw [← inner_sub_right (A₂ (x₂ (n) - x₂ (n+1))) (A₂ (e₂ (n+1))) ((A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b))] - _ = - ρ * (inner (A₂ (x₂ (n) - x₂ (n+1))) (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b - A₂ (e₂ (n+1)))) := by + _ = - ρ * (⟪A₂ (x₂ (n) - x₂ (n+1)), A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b - A₂ (e₂ (n+1))⟫) := by rw [← neg_sub (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b) (A₂ (e₂ (n+1)))] rw [inner_neg_right] simp only [map_sub, mul_neg, neg_mul] - _ = - ρ * (inner (A₂ (x₂ (n) - x₂ (n+1))) (A₁ (e₁ (n+1)))) := by + _ = - ρ * (⟪A₂ (x₂ (n) - x₂ (n+1)), A₁ (e₁ (n+1))⟫) := by rw [substitution2] - _ = ρ * (inner (-A₂ (x₂ (n) - x₂ (n + 1))) (A₁ (e₁ (n+1)))) := by - rw [neg_mul (ρ) (inner (A₂ (x₂ (n) - x₂ (n + 1))) (A₁ (e₁ (n+1))))] + _ = ρ * (⟪-A₂ (x₂ (n) - x₂ (n + 1)), A₁ (e₁ (n+1))⟫) := by + rw [neg_mul ρ (⟪A₂ (x₂ (n) - x₂ (n + 1)), A₁ (e₁ (n+1))⟫)] rw [← mul_neg] rw [← inner_neg_left (A₂ (x₂ (n) - x₂ (n+1))) (A₁ (e₁ (n+1)))] - rw [h1,h2] - have h4: (inner (ey (n + 1)) (-((A₁ (e₁ (n + 1))) + A₂ (e₂ (n + 1))))) + rw [h1, h2] + have h4: (⟪ey (n + 1), -((A₁ (e₁ (n + 1))) + A₂ (e₂ (n + 1)))⟫) - pm2*‖admm.A₁ (e₁ (n+1)) + A₂ (e₂ (n+1))‖^2 + - (ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)) -ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₂ (e₂ (n+1))) )) = (inner (ey (n + 1)) (-((A₁ (e₁ (n + 1))) + A₂ (e₂ (n + 1))))) + (ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b⟫) - ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₂ (e₂ (n+1))⟫)) = + (⟪ey (n + 1), -((A₁ (e₁ (n + 1))) + A₂ (e₂ (n + 1)))⟫) - pm2*‖admm.A₁ (e₁ (n+1)) + A₂ (e₂ (n+1))‖^2 + - ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)) -ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₂ (e₂ (n+1))) ) := by ring - rw [← h4,h3] + ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b⟫) - ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₂ (e₂ (n+1))⟫) := by + ring + rw [← h4, h3] exact expended_u_v_gt_zero n lemma A'υ_inthesubgradient [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ+ , - (A₂†) (υ n) ∈ SubderivAt f₂ (x₂ n):= by @@ -1075,110 +1101,135 @@ lemma A'υ_inthesubgradient [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ+ , rw[Φ_isdescending_eq3' , ← this] apply v_inthesubgradient -lemma Φ_isdescending_inequ2 [Setting E₁ E₂ F admm admm_kkt]:∀ n : ℕ+ , - inner ( - ( A₂† ( υ (n+1) - υ n ))) ((x₂ (n+1)) - (x₂ n)) ≥ ( 0 : ℝ ) := by - intro n - let A₂T := A₂† - let A₂υn := - (A₂T ((υ) n)) - let A₂υn1 := - (A₂T ((υ) (n+1))) - have h1 : A₂υn ∈ SubderivAt f₂ (x₂ n) := by apply A'υ_inthesubgradient - have h2 : A₂υn1 ∈ SubderivAt f₂ (x₂ (n+1)) := by apply A'υ_inthesubgradient (n+1) - have mono : inner (A₂υn1 - A₂υn) (x₂ (n+1) - x₂ n) ≥ (0:ℝ):= subgradientAt_mono h2 h1 - have h: -(A₂T ((υ (n+1)) - (υ n))) = A₂υn1 - A₂υn := by - calc - -(A₂T ((υ (n+1)) - (υ n))) = - (A₂T (υ (n+1)) - A₂T (υ n)) := by continuity - _= (A₂T ((υ) n)) - (A₂T ((υ) (n+1))) := by simp - _= - (A₂T ((υ) (n+1))) - (-(A₂T ((υ) n))) := by rw [sub_neg_eq_add,add_comm (- (A₂T ((υ) (n+1)))),sub_eq_add_neg] - _= A₂υn1 - A₂υn := by rfl - rwa [h] - -lemma Φ_isdescending_inequ3 [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ+ , - ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)) ≤ M (n+1) := by - intro n - let A₂_x_diff := A₂ (x₂ (n+1) - x₂ n) - let r_n := A₁ (x₁ n) + A₂ (x₂ n) - b - let r_n_1 := A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b - let υ_diff := υ (n+1) - υ n - let y_diff := y (n+1) - y n - let x_diff := x₂ (n+1) - x₂ n - let A₂T := A₂† - have h: ρ * (inner A₂_x_diff r_n_1) = - M (n+1) + inner υ_diff A₂_x_diff := by - calc - ρ * (inner A₂_x_diff r_n_1) = - (1 - τ) * ρ * (inner A₂_x_diff r_n_1) + (τ * ρ) * (inner A₂_x_diff r_n_1) := by - linarith - _= (1 - τ) * ρ * (inner A₂_x_diff r_n_1) + (inner A₂_x_diff ((τ * ρ) • r_n_1)) := by - rw [inner_smul_right] - _= (1 - τ) * ρ * (inner A₂_x_diff r_n_1) + (inner A₂_x_diff y_diff) := by - have : (τ * ρ) • r_n_1 = y_diff := by - simp [r_n_1, y_diff] - rw [Φ_isdescending_eq1, ← mul_smul, mul_div, mul_one, div_self, one_smul] - intro H - rw [mul_eq_zero] at H - rcases H with _ | _ - · linarith [admm.htau] - · linarith [admm.hrho] - rw [this] - _= (1 - τ) * ρ * (inner A₂_x_diff r_n) - (1 - τ) * ρ * (inner A₂_x_diff r_n) - + (1 - τ) * ρ * (inner A₂_x_diff r_n_1) + (inner A₂_x_diff y_diff) := by - rw [sub_self ((1 - τ) * ρ * (inner A₂_x_diff r_n)), zero_add] - _= M (n+1) - (1 - τ) * ρ * (inner A₂_x_diff r_n) - + (1 - τ) * ρ * (inner A₂_x_diff r_n_1) + (inner A₂_x_diff y_diff) := by - rw [M]; rfl - _= (1 - τ) * ρ * ((inner A₂_x_diff r_n_1) - (inner A₂_x_diff r_n)) + - M (n+1) + (inner A₂_x_diff y_diff) := by - ring - _= (1 - τ) * ρ * (inner A₂_x_diff (r_n_1 - r_n)) + - M (n+1) + (inner A₂_x_diff y_diff) := by - rw [← inner_sub_right] - _= inner A₂_x_diff (((1 - τ) * ρ) • (r_n_1 - r_n)) + - M (n+1) + (inner A₂_x_diff y_diff) := by - rw [inner_smul_right] - _= inner A₂_x_diff (υ_diff - y_diff) + - M (n+1) + (inner A₂_x_diff y_diff) := by - have : ((1 - τ) * ρ) • (r_n_1 - r_n) = υ_diff - y_diff := by - rw [smul_sub] - have h1: ((1 - τ) * ρ) • r_n_1 = υ (n+1) - y (n+1) := by +lemma Φ_isdescending_inequ2 [Setting E₁ E₂ F admm admm_kkt]: + ∀ n : ℕ+, + ⟪-(A₂† (υ (n + 1) - υ n)), x₂ (n + 1) - x₂ n⟫ ≥ (0 : ℝ) := by + intro n + let A₂T := A₂† + let A₂υn := - (A₂T ((υ) n)) + let A₂υn1 := - (A₂T ((υ) (n + 1))) + have h1 : A₂υn ∈ SubderivAt f₂ (x₂ n) := by apply A'υ_inthesubgradient + have h2 : A₂υn1 ∈ SubderivAt f₂ (x₂ (n + 1)) := by apply A'υ_inthesubgradient (n + 1) + have mono : ⟪A₂υn1 - A₂υn, x₂ (n + 1) - x₂ n⟫ ≥ (0 : ℝ) := subgradientAt_mono h2 h1 + have h : + -(A₂T (υ (n + 1) - υ n)) = A₂υn1 - A₂υn := by + calc + -(A₂T (υ (n + 1) - υ n)) + = -(A₂T (υ (n + 1)) - A₂T (υ n)) := by continuity + _ = (A₂T ((υ) n)) - (A₂T ((υ) (n + 1))) := by simp + _ = - (A₂T ((υ) (n + 1))) - (-(A₂T ((υ) n))) := by + rw [sub_neg_eq_add, add_comm (- (A₂T ((υ) (n + 1)))), sub_eq_add_neg] + _ = A₂υn1 - A₂υn := by rfl + simp_all only [sub_neg_eq_add, ge_iff_le, map_sub, neg_sub, A₂υn, A₂T, A₂υn1] + +lemma Φ_isdescending_inequ3 [Setting E₁ E₂ F admm admm_kkt]: + ∀ n : ℕ+, + ρ * ⟪A₂ (x₂ (n + 1) - x₂ n), + A₁ (x₁ (n + 1)) + A₂ (x₂ (n + 1)) - b⟫ ≤ M (n + 1) := by + intro n + let A₂_x_diff := A₂ (x₂ (n + 1) - x₂ n) + let r_n := A₁ (x₁ n) + A₂ (x₂ n) - b + let r_n_1 := A₁ (x₁ (n + 1)) + A₂ (x₂ (n + 1)) - b + let υ_diff := υ (n + 1) - υ n + let y_diff := y (n + 1) - y n + let x_diff := x₂ (n + 1) - x₂ n + let A₂T := A₂† + have h : + ρ * ⟪A₂_x_diff, r_n_1⟫ + = M (n + 1) + ⟪υ_diff, A₂_x_diff⟫ := by + calc + ρ * ⟪A₂_x_diff, r_n_1⟫ + = (1 - τ) * ρ * ⟪A₂_x_diff, r_n_1⟫ + + (τ * ρ) * ⟪A₂_x_diff, r_n_1⟫ := by linarith + _ = (1 - τ) * ρ * ⟪A₂_x_diff, r_n_1⟫ + + ⟪A₂_x_diff, (τ * ρ) • r_n_1⟫ := by + rw [inner_smul_right] + _ = (1 - τ) * ρ * ⟪A₂_x_diff, r_n_1⟫ + + ⟪A₂_x_diff, y_diff⟫ := by + have : (τ * ρ) • r_n_1 = y_diff := by + simp [r_n_1, y_diff] + rw [Φ_isdescending_eq1, ← mul_smul, mul_div, mul_one, div_self, one_smul] + intro H + rw [mul_eq_zero] at H + rcases H with hτ | hρ + · linarith [admm.htau] + · linarith [admm.hrho] + simp [this] + _ = (1 - τ) * ρ * ⟪A₂_x_diff, r_n⟫ + - (1 - τ) * ρ * ⟪A₂_x_diff, r_n⟫ + + (1 - τ) * ρ * ⟪A₂_x_diff, r_n_1⟫ + + ⟪A₂_x_diff, y_diff⟫ := by + rw [sub_self ((1 - τ) * ρ * ⟪A₂_x_diff, r_n⟫), zero_add] + _ = M (n + 1) - (1 - τ) * ρ * ⟪A₂_x_diff, r_n⟫ + + (1 - τ) * ρ * ⟪A₂_x_diff, r_n_1⟫ + + ⟪A₂_x_diff, y_diff⟫ := by + rw [M]; rfl + _ = (1 - τ) * ρ * (⟪A₂_x_diff, r_n_1⟫ - ⟪A₂_x_diff, r_n⟫) + + M (n + 1) + ⟪A₂_x_diff, y_diff⟫ := by + ring + _ = (1 - τ) * ρ * ⟪A₂_x_diff, r_n_1 - r_n⟫ + + M (n + 1) + ⟪A₂_x_diff, y_diff⟫ := by + rw [← inner_sub_right] + _ = ⟪A₂_x_diff, ((1 - τ) * ρ) • (r_n_1 - r_n)⟫ + + M (n + 1) + ⟪A₂_x_diff, y_diff⟫ := by + rw [inner_smul_right] + _ = ⟪A₂_x_diff, υ_diff - y_diff⟫ + + M (n + 1) + ⟪A₂_x_diff, y_diff⟫ := by + have : ((1 - τ) * ρ) • (r_n_1 - r_n) = υ_diff - y_diff := by + rw [smul_sub] + have h1 : ((1 - τ) * ρ) • r_n_1 = υ (n + 1) - y (n + 1) := by rw [υ, add_sub_assoc, add_sub_left_comm, sub_self, add_zero] - have h2: ((1 - τ) * ρ) • r_n = υ n - y n := by + have h2 : ((1 - τ) * ρ) • r_n = υ n - y n := by rw [υ, add_sub_assoc, add_sub_left_comm, sub_self, add_zero] - rw [h1, h2, sub_sub_eq_add_sub, sub_add_comm, add_sub_assoc, sub_add_comm, sub_add] - rw [this] - _= M (n+1) + (inner A₂_x_diff υ_diff) := by - rw [inner_sub_right] - ring - _= M (n+1) + (inner υ_diff A₂_x_diff) := by - rw [real_inner_comm] - have mono: (inner υ_diff A₂_x_diff) ≤ (0:ℝ) := by - simp [υ_diff, A₂_x_diff] - rw [← map_sub A₂] - have : ((inner υ_diff A₂_x_diff):ℝ) = ((inner (A₂T υ_diff) x_diff):ℝ) := by - rw [ContinuousLinearMap.adjoint_inner_left] - rw [this] - apply neg_nonneg.1 - rw [← inner_neg_left] - apply Φ_isdescending_inequ2 - linarith - -lemma Φ_isdescending_inequ4 [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ+, 1 / (τ * ρ) * (inner (ey (n + 1)) ((ey n) - (ey (n + 1)))) - - (1 - τ) * ρ * ‖A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 + M (n + 1) - - ρ * (inner (A₂ (x₂ (n + 1) - x₂ n)) (A₂ (e₂ (n+1))) ) ≥ 0:= by - intro n - let a := 1/(τ*ρ) * (inner (ey (n+1)) ((ey n)-(ey (n+1)))) - - (1-τ)*ρ*‖A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 - let b0 := M (n+1) - let c := ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₂ (e₂ (n+1))) ) - let d := ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)) - have dleqb: d ≤ b0 := by apply Φ_isdescending_inequ3 - have h : a + d - c ≥ 0 := by apply Φ_isdescending_inequ1 - have : a + b0 - c ≥ 0 := by linarith - exact this + simp [h1, h2, sub_sub_eq_add_sub, sub_add_comm, add_sub_assoc, sub_add_comm, sub_add] + ring_nf; grind + simp [this] + _ = M (n + 1) + ⟪A₂_x_diff, υ_diff⟫ := by + rw [inner_sub_right]; ring + _ = M (n + 1) + ⟪υ_diff, A₂_x_diff⟫ := by + rw [real_inner_comm] + have mono : ⟪υ_diff, A₂_x_diff⟫ ≤ (0 : ℝ) := by + -- move A₂ to the other side via the adjoint + have hEq : (⟪υ_diff, A₂_x_diff⟫ : ℝ) = ⟪A₂T υ_diff, x_diff⟫ := by + rw [ContinuousLinearMap.adjoint_inner_left] + -- from Φ_isdescending_inequ2 we have ⟪-A₂T υ_diff, x_diff⟫ ≥ 0 + have hmono := Φ_isdescending_inequ2 (admm:=admm) (admm_kkt:=admm_kkt) n + have hneg : ⟪-A₂T υ_diff, x_diff⟫ ≥ (0 : ℝ) := by + simpa [υ_diff, x_diff] using hmono + -- hence ⟪A₂T υ_diff, x_diff⟫ ≤ 0 + have hnonpos : ⟪A₂T υ_diff, x_diff⟫ ≤ (0 : ℝ) := by + have h' : 0 ≤ -⟪A₂T υ_diff, x_diff⟫ := by + have h0 : -⟪A₂T υ_diff, x_diff⟫ ≥ 0 := by + simpa [inner_neg_left] using hneg + simpa [ge_iff_le] using h0 + exact (neg_nonneg.mp h') + simpa [hEq] using hnonpos + -- conclude by comparing with M + linarith + +lemma Φ_isdescending_inequ4 [Setting E₁ E₂ F admm admm_kkt]: + ∀ n : ℕ+, + 1 / (τ * ρ) * (⟪ey (n + 1), (ey n) - (ey (n + 1))⟫) + - (1 - τ) * ρ * ‖A₁ (x₁ (n + 1)) + A₂ (x₂ (n + 1)) - b‖ ^ 2 + M (n + 1) + - ρ * (⟪A₂ (x₂ (n + 1) - x₂ n), A₂ (e₂ (n + 1))⟫) ≥ 0 := by + intro n + let a := + 1 / (τ * ρ) * (⟪ey (n + 1), (ey n) - (ey (n + 1))⟫) + - (1 - τ) * ρ * ‖A₁ (x₁ (n + 1)) + A₂ (x₂ (n + 1)) - b‖ ^ 2 + let b0 := M (n + 1) + let c := ρ * (⟪A₂ (x₂ (n + 1) - x₂ n), A₂ (e₂ (n + 1))⟫) + let d := ρ * (⟪A₂ (x₂ (n + 1) - x₂ n), + A₁ (x₁ (n + 1)) + A₂ (x₂ (n + 1)) - b⟫) + have dleqb : d ≤ b0 := by apply Φ_isdescending_inequ3 + have h : a + d - c ≥ 0 := by apply Φ_isdescending_inequ1 + have : a + b0 - c ≥ 0 := by linarith + exact this lemma inner_eq_norm {X : Type*} [NormedAddCommGroup X] [InnerProductSpace ℝ X] - (a₁ a₂ : X) : inner a₁ a₂ = 1/2 * (‖a₁‖^2 + ‖a₂‖^2 - ‖a₁- a₂‖^2) := by - rw [norm_sub_sq (𝕜 := ℝ) a₁ a₂];ring_nf; - rfl + (a₁ a₂ : X) : + ⟪a₁, a₂⟫ = 1 / 2 * (‖a₁‖ ^ 2 + ‖a₂‖ ^ 2 - ‖a₁ - a₂‖ ^ 2) := by + rw [norm_sub_sq (𝕜 := ℝ) a₁ a₂]; ring_nf + rfl lemma Φ_isdescending_eq2' [Setting E₁ E₂ F admm admm_kkt]: ∀ n , (τ * ρ) • (A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b) = ey (n+1) - ey n:=by @@ -1195,68 +1246,91 @@ lemma Φ_isdescending_eq2' [Setting E₁ E₂ F admm admm_kkt]: lemma Φ_isdescending_inequ5' [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ+, 1 / (τ * ρ) * (‖ey n‖^2 - ‖ey (n+1)‖^2) - (2 - τ) * ρ * ‖A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 + 2 * (M (n+1)) - ρ * ‖A₂ (x₂ (n+1) - x₂ n)‖^2 - ρ * ‖A₂ (e₂ (n+1))‖^2 + ρ * ‖A₂ (e₂ n)‖ ^ 2 - = 2 * (1 / (τ * ρ) * (inner (ey (n+1)) ((ey n)-(ey (n+1)))) - + = 2 * (1 / (τ * ρ) * (⟪ey (n+1), (ey n) - (ey (n+1))⟫) - (1 - τ) * ρ * ‖A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 - + M (n+1) - ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₂ (e₂ (n+1))))) := by - intro n - have h₄'' : ‖A₂ (x₂') - A₂ (x₂ n)‖ = ‖- (A₂ (x₂ n) - A₂ (x₂'))‖ := by simp only [neg_sub] - have h₄' : ‖A₂ (x₂ (n+1) - x₂ n) - A₂ (e₂ (n+1))‖ = ‖A₂ (e₂ n)‖ := by rw [e₂]; rw[e₂]; simp only [map_sub,sub_sub_sub_cancel_left]; rw [h₄'', norm_neg] - have h₆ : ‖ey (n+1) - ey n‖ = (τ * ρ) * ‖(A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)‖ - := by rw [←Φ_isdescending_eq2', norm_smul]; simp only [norm_mul, Real.norm_eq_abs,mul_eq_mul_right_iff, norm_eq_zero] - left - have h1: τ ≥ 0 := by rcases admm.htau with ⟨h₁, _⟩; apply le_of_lt h₁ - have h2: ρ ≥ 0 := by apply le_of_lt admm.hrho - have h3: |τ| = τ := by apply abs_eq_self.mpr h1 - have h4: |ρ| = ρ := by apply abs_eq_self.mpr h2 - rw [h3, h4] - symm - calc 2 * (1/(τ*ρ) * (inner (ey (n+1)) ((ey n)-(ey (n+1)))) - (1-τ)*ρ*‖A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 + M (n+1) - ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₂ (e₂ (n+1))))) - _ = 2 / (τ * ρ) * (inner (ey (n+1)) ((ey n)-(ey (n+1)))) + + M (n+1) - ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₂ (e₂ (n+1))⟫)) := by + intro n + have h₄'' : ‖A₂ (x₂') - A₂ (x₂ n)‖ = ‖- (A₂ (x₂ n) - A₂ (x₂'))‖ := by + simp [neg_sub] + have h₄' : ‖A₂ (x₂ (n+1) - x₂ n) - A₂ (e₂ (n+1))‖ = ‖A₂ (e₂ n)‖ := by + rw [e₂, e₂]; simp [map_sub, sub_sub_sub_cancel_left, h₄''] + exact + norm_neg ((OptProblem.A₂ E₁) (ADMM.x₂ E₁ F ↑n) - (OptProblem.A₂ E₁) admm_kkt.x₂) + have hτpos : 0 < τ := (admm.htau).1 + have hρpos : 0 < ρ := admm.hrho + have hτρ_ne : τ * ρ ≠ 0 := mul_ne_zero (ne_of_gt hτpos) (ne_of_gt hρpos) + have hτρ_abs : |τ * ρ| = τ * ρ := by + have : 0 ≤ τ * ρ := mul_nonneg (le_of_lt hτpos) (le_of_lt hρpos) + exact abs_of_nonneg this + have h₆ : ‖ey (n+1) - ey n‖ = (τ * ρ) * ‖(A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)‖ := by + rw [← Φ_isdescending_eq2' (admm:=admm) (admm_kkt:=admm_kkt) (n:=n)] + have hnonneg : 0 ≤ τ * ρ := mul_nonneg (le_of_lt hτpos) (le_of_lt hρpos) + simp [norm_smul, Real.norm_eq_abs]; ring_nf; simp_all + symm + calc + 2 * (1/(τ*ρ) * (⟪ey (n+1), (ey n) - (ey (n+1))⟫) + - (1-τ)*ρ*‖A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 + + M (n+1) + - ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₂ (e₂ (n+1))⟫)) + = + 2 / (τ * ρ) * (⟪ey (n+1), (ey n) - (ey (n+1))⟫) - 2 * (1-τ) * ρ * ‖A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 + 2 * M (n+1) - - 2 * ρ * (inner (A₂ (x₂ (n+1) - x₂ n)) (A₂ (e₂ (n+1)))) := by ring - _ = 2 / (τ * ρ) * (1 / 2 * (‖ey (n+1)‖^2 + ‖ey n‖^2 - ‖ey (n+1) - ey n‖^2)-‖ey (n+1)‖^2) + - 2 * ρ * (⟪A₂ (x₂ (n+1) - x₂ n), A₂ (e₂ (n+1))⟫) := by ring + _ = + 2 / (τ * ρ) * (1 / 2 * (‖ey (n+1)‖^2 + ‖ey n‖^2 - ‖ey (n+1) - ey n‖^2) - ‖ey (n+1)‖^2) - 2 * (1-τ) * ρ * ‖A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 + 2 * M (n+1) - - 2 * ρ * ( 1 / 2 * (‖A₂ (x₂ (n+1) - x₂ n)‖^2 + ‖A₂ (e₂ (n+1))‖^2 - ‖A₂ (x₂ (n+1) - x₂ n) - A₂ (e₂ (n+1))‖^2)) - := by nth_rw 2 [inner_eq_norm]; rw [inner_sub_right]; rw [inner_eq_norm, real_inner_self_eq_norm_sq] - _ = 2 / (τ * ρ) * (1 / 2 * (‖ey n‖^2 - ‖ey (n+1) - ey n‖^2-‖ey (n+1)‖^2)) + - 2 * ρ * ( 1 / 2 * (‖A₂ (x₂ (n+1) - x₂ n)‖^2 + ‖A₂ (e₂ (n+1))‖^2 - ‖A₂ (x₂ (n+1) - x₂ n) - A₂ (e₂ (n+1))‖^2)) := by + nth_rw 2 [inner_eq_norm] + rw [inner_sub_right, inner_eq_norm, real_inner_self_eq_norm_sq] + _ = + 2 / (τ * ρ) * (1 / 2 * (‖ey n‖^2 - ‖ey (n+1) - ey n‖^2 - ‖ey (n+1)‖^2)) - 2 * (1-τ) * ρ * ‖A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 + 2 * M (n+1) - - 2 * ρ * ( 1 / 2 * (‖A₂ (x₂ (n+1) - x₂ n)‖^2 + ‖A₂ (e₂ (n+1))‖^2 - ‖A₂ (e₂ n)‖^2)) - := by rw [h₄']; ring_nf - _ = 1 / (τ * ρ) * ((‖ey n‖^2 - ((τ * ρ) * ‖(A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)‖)^2-‖ey (n+1)‖^2)) + - 2 * ρ * ( 1 / 2 * (‖A₂ (x₂ (n+1) - x₂ n)‖^2 + ‖A₂ (e₂ (n+1))‖^2 - ‖A₂ (e₂ n)‖^2)) := by + rw [h₄']; ring_nf + _ = + 1 / (τ * ρ) * (‖ey n‖^2 - ((τ * ρ) * ‖(A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)‖)^2 - ‖ey (n+1)‖^2) - 2 * (1-τ) * ρ * ‖A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 + 2 * M (n+1) - - 1 * ρ * ((‖A₂ (x₂ (n+1) - x₂ n)‖^2 + ‖A₂ (e₂ (n+1))‖^2 - ‖A₂ (e₂ n)‖^2)) - := by rw [h₆]; ring_nf - _ = 1 / (τ * ρ) * ((‖ey n‖^2 -‖ey (n+1)‖^2)) - 1 / (τ * ρ) * (τ * ρ) ^ 2 * (‖(A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)‖)^2 + - ρ * (‖A₂ (x₂ (n+1) - x₂ n)‖^2 + ‖A₂ (e₂ (n+1))‖^2 - ‖A₂ (e₂ n)‖^2) := by + rw [h₆]; ring_nf + _ = + 1 / (τ * ρ) * (‖ey n‖^2 - ‖ey (n+1)‖^2) + - 1 / (τ * ρ) * (τ * ρ) ^ 2 * ‖(A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)‖^2 - 2 * (1-τ) * ρ * ‖A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 + 2 * M (n+1) - - 1 * ρ * ((‖A₂ (x₂ (n+1) - x₂ n)‖^2 + ‖A₂ (e₂ (n+1))‖^2 - ‖A₂ (e₂ n)‖^2)) - := by ring - _ = 1 / (τ * ρ) * ((‖ey n‖^2 -‖ey (n+1)‖^2)) - (τ * ρ) * (‖(A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)‖)^2 + - ρ * (‖A₂ (x₂ (n+1) - x₂ n)‖^2 + ‖A₂ (e₂ (n+1))‖^2 - ‖A₂ (e₂ n)‖^2) := by + ring + _ = + 1 / (τ * ρ) * (‖ey n‖^2 - ‖ey (n+1)‖^2) + - (τ * ρ) * ‖(A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)‖^2 - 2 * (1-τ) * ρ * ‖A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 + 2 * M (n+1) - - 1 * ρ * ((‖A₂ (x₂ (n+1) - x₂ n)‖^2 + ‖A₂ (e₂ (n+1))‖^2 - ‖A₂ (e₂ n)‖^2)) - := by nth_rw 2 [div_eq_mul_inv]; rw [one_mul]; nth_rw 3 [pow_two]; simp [inv_mul_cancel] - left; rw [mul_assoc] - nth_rw 2 [← mul_assoc] - nth_rw 2 [← mul_assoc] - nth_rw 2 [← mul_assoc] - rw [inv_mul_cancel₀, one_mul] - repeat rw [← mul_assoc] - rw [inv_mul_cancel₀, one_mul] - apply ne_of_gt admm.hrho - rcases admm.htau with ⟨h₁, _⟩ - apply ne_of_gt h₁ - _ = 1/(τ*ρ) * (‖ey n‖^2 - ‖ey (n+1)‖^2) + - ρ * (‖A₂ (x₂ (n+1) - x₂ n)‖^2 + ‖A₂ (e₂ (n+1))‖^2 - ‖A₂ (e₂ n)‖^2) := by + -- clean cancellation: (1/(τρ)) * (τρ)^2 = τρ + have hcancel : 1 / (τ * ρ) * ((τ * ρ) ^ 2) = τ * ρ := by + calc + 1 / (τ * ρ) * ((τ * ρ) ^ 2) + = (τ * ρ)⁻¹ * ((τ * ρ) * (τ * ρ)) := by simp [one_div, pow_two, mul_comm, mul_left_comm, mul_assoc] + _ = ((τ * ρ)⁻¹ * (τ * ρ)) * (τ * ρ) := by ac_rfl + _ = 1 * (τ * ρ) := by simp; grind -- [inv_mul_cancel hτρ_ne] + _ = τ * ρ := by simp + -- apply the scalar identity to the squared norm term + have hscale : + 1 / (τ * ρ) * (τ * ρ) ^ 2 * ‖(A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)‖^2 + = (τ * ρ) * ‖(A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b)‖^2 := by + simp; grind + simp; grind + _ = + 1/(τ*ρ) * (‖ey n‖^2 - ‖ey (n+1)‖^2) - (2-τ)*ρ*‖A₁ (x₁ (n+1)) + A₂ (x₂ (n+1)) - b‖^2 + 2*(M (n+1)) - -ρ * ‖A₂ (x₂ (n+1) - x₂ n)‖^2 - -ρ * ‖A₂ (e₂ (n+1))‖^2 - +ρ * ‖A₂ (e₂ n)‖^2 - := by ring_nf + - ρ * ‖A₂ (x₂ (n+1) - x₂ n)‖^2 + - ρ * ‖A₂ (e₂ (n+1))‖^2 + + ρ * ‖A₂ (e₂ n)‖^2 := by + ring_nf lemma Φ_isdescending_inequ5 [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ+ , 1 / (τ * ρ) * (‖ey n‖ ^ 2 - ‖ey (n+1)‖ ^ 2) - (2 - τ) * ρ * ‖A₁ (x₁ (n + 1)) + A₂ (x₂ (n + 1)) - b‖ ^ 2 + 2 * M (n+1) @@ -1267,7 +1341,7 @@ lemma Φ_isdescending_inequ5 [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ+ , · norm_num apply Φ_isdescending_inequ4 n -lemma basic_inequ₁' (n : ℕ+) : 2 * inner (A₂ (x₂ (n+1) - x₂ n)) (A₁ (x₁ n) + A₂ (x₂ n) - b) +lemma basic_inequ₁' (n : ℕ+) : 2 * ⟪A₂ (x₂ (n+1) - x₂ n), A₁ (x₁ n) + A₂ (x₂ n) - b⟫ ≤ ‖A₂ (x₂ n - x₂ (n + 1))‖ ^ 2 + ‖A₁ (x₁ n) + A₂ (x₂ n) - b‖ ^ 2 := by have norm_abs: ‖A₂ (x₂ n - x₂ (n+1))‖^2 = ‖A₂ (x₂ (n+1) - x₂ (n))‖^2:= by @@ -1276,8 +1350,8 @@ lemma basic_inequ₁' (n : ℕ+) : 2 * inner (A₂ (x₂ (n+1) - x₂ n)) (A₁ rw [this] rw [←sub_nonneg]; have : ‖A₂ (x₂ n - x₂ (n+1))‖^2 - + ‖A₁ (x₁ n) + A₂ (x₂ n) - b‖^2 - 2 * inner (A₂ (x₂ (n+1) - x₂ (n))) (A₁ (x₁ n) + A₂ (x₂ n) - b) - = ‖A₂ (x₂ n - x₂ (n+1))‖^2 - 2 * inner (A₂ (x₂ (n+1) - x₂ (n))) (A₁ (x₁ n) + A₂ (x₂ n) - b) + ‖A₁ (x₁ n) + A₂ (x₂ n) - b‖^2 + + ‖A₁ (x₁ n) + A₂ (x₂ n) - b‖^2 - 2 * ⟪A₂ (x₂ (n+1) - x₂ (n)), A₁ (x₁ n) + A₂ (x₂ n) - b⟫ + = ‖A₂ (x₂ n - x₂ (n+1))‖^2 - 2 * ⟪A₂ (x₂ (n+1) - x₂ (n)), A₁ (x₁ n) + A₂ (x₂ n) - b⟫ + ‖A₁ (x₁ n) + A₂ (x₂ n) - b‖^2 := by ring_nf rw [this, norm_abs, ← norm_sub_sq_real] apply pow_two_nonneg @@ -1289,8 +1363,7 @@ lemma M_le [Setting E₁ E₂ F admm admm_kkt](n : ℕ+)(htau : 0 < τ ∧ τ = (1 - τ) * ρ * (‖A₂ (x₂ n - x₂ (n + 1))‖ ^ 2 + ‖A₁ (x₁ n) + A₂ (x₂ n) - b‖ ^ 2 ) := by ring rw [this] - have : 2 * M (n + 1) = (1 - τ) * ρ * ( 2 * inner (A₂ (x₂ (n + 1) - x₂ (n))) - (A₁ (x₁ n) + A₂ (x₂ n) - b) ) := by + have : 2 * M (n + 1) = (1 - τ) * ρ * ( 2 * ⟪A₂ (x₂ (n + 1) - x₂ (n)), A₁ (x₁ n) + A₂ (x₂ n) - b⟫ ) := by dsimp [M] have : (n + 1).natPred = n := rfl simp only [this] @@ -1370,7 +1443,7 @@ lemma Φ_isdescending_inequ6 [Setting E₁ E₂ F admm admm_kkt](htau : 0 < τ + τ * ρ * ‖A₂ (x₂ (n+1) - x₂ n)‖^2) := by rfl exact res -lemma basic_inequ₂ (n : ℕ+) : - 2 * inner (A₂ (x₂ (n+1) - x₂ n)) (A₁ (x₁ n) + A₂ (x₂ n) - b) +lemma basic_inequ₂ (n : ℕ+) : - 2 * ⟪A₂ (x₂ (n+1) - x₂ n), A₁ (x₁ n) + A₂ (x₂ n) - b⟫ ≤ τ * ‖A₂ (x₂ (n+1) - x₂ n)‖^2 + 1 / τ * ‖A₁ (x₁ n) + A₂ (x₂ n) - b‖ ^ 2 := by rw [← sub_nonneg] have h : τ ≥ 0 := by @@ -1415,18 +1488,18 @@ lemma Φ_isdescending_inequ7 [Setting E₁ E₂ F admm admm_kkt](htau : 1 < τ ) let c₁ := (1 - (1 / τ)) * ρ * ‖e_sum‖^2; let c₂ := (1 - (1 / τ)) * ρ * ‖e_sum_1‖^2 let d₁ := (1 + τ - τ^2) * ρ * ‖A₂ x_diff‖^2; let d₂ := (1 + 1 / τ - τ) * ρ * ‖r_n_1‖^2 have M_inequ : 2 * (M (n+1)) ≤ (τ^2 - τ) * ρ * ‖A₂ x_diff‖ ^ 2 + (1 - 1 / τ) * ρ * ‖r_n‖ ^ 2 := by - have h1: 2 * (M (n+1)) = (τ - 1) * ρ * (-2 * (inner (A₂ x_diff) (r_n))) := by + have h1: 2 * (M (n+1)) = (τ - 1) * ρ * (-2 * ⟪A₂ x_diff, r_n⟫) := by calc - _= 2 * (1 - τ) * ρ * (inner (A₂ x_diff) (r_n)) := by + _= 2 * (1 - τ) * ρ * (⟪A₂ x_diff, r_n⟫) := by dsimp [M,x_diff,r_n] have : (n + 1).natPred = n := rfl simp only [this] ring_nf - _= (τ - 1) * ρ * (-2 * (inner (A₂ x_diff) (r_n))) := by ring + _= (τ - 1) * ρ * (-2 * (⟪A₂ x_diff, r_n⟫)) := by ring rw [h1] - have h2: (τ - 1) * ρ * (-2 * (inner (A₂ x_diff) (r_n))) ≤ (τ - 1) * ρ * (τ * ‖A₂ x_diff‖^2 + have h2: (τ - 1) * ρ * (-2 * ⟪A₂ x_diff, r_n⟫) ≤ (τ - 1) * ρ * (τ * ‖A₂ x_diff‖^2 + 1 / τ * ‖A₁ (x₁ n) + A₂ (x₂ n) - b‖^2) := by - have iequ: -2 * (inner (A₂ x_diff) (r_n)) ≤ τ * ‖A₂ x_diff‖^2 + (1/τ) * ‖r_n‖^2 := by + have iequ: -2 * ⟪A₂ x_diff, r_n⟫ ≤ τ * ‖A₂ x_diff‖^2 + (1/τ) * ‖r_n‖^2 := by simp only [x_diff, r_n]; apply basic_inequ₂ have cpos: (τ - 1) * ρ ≥ 0 := by apply mul_nonneg_iff.2 @@ -1472,15 +1545,24 @@ lemma τ_segment [Setting E₁ E₂ F admm admm_kkt] : (0 < τ ∧ τ ≤ 1) ∨ lemma τ_min1_1 [Setting E₁ E₂ F admm admm_kkt] (h: 0 < τ ∧ τ ≤ 1) : min τ (1 + τ - τ ^ 2) = τ := by rcases h with ⟨h1, h2⟩ apply min_eq_left - have h3: τ ^ 2 ≤ 1 := by - apply pow_le_one;linarith;linarith - linarith + have h3 : τ ^ 2 ≤ 1 := by + have hτ0 : 0 ≤ τ := le_of_lt h1 + have hmul : τ * τ ≤ τ * 1 := mul_le_mul_of_nonneg_left h2 hτ0 + simp [pow_two]; grind + have hnonneg : 0 ≤ 1 - τ ^ 2 := sub_nonneg.mpr h3 + have := add_le_add_left hnonneg τ + simpa [sub_eq_add_neg, add_comm, add_left_comm, add_assoc] using this lemma τ_min1_2 [Setting E₁ E₂ F admm admm_kkt] (h: τ > 1 ) : min τ (1 + τ - τ ^ 2) = 1 + τ - τ ^ 2 := by apply min_eq_right - have : 1 < τ ^ 2 := by - apply one_lt_pow;exact h;linarith - linarith + have hτpos : 0 < τ := lt_trans zero_lt_one h + have hτlt : τ < τ ^ 2 := by + have := mul_lt_mul_of_pos_right h hτpos + simpa [pow_two] using this + have hge : 1 ≤ τ ^ 2 := le_of_lt (lt_trans h hτlt) + have hnonpos : 1 - τ ^ 2 ≤ 0 := sub_nonpos.mpr hge + have := add_le_add_left hnonpos τ + simpa [sub_eq_add_neg, add_comm, add_left_comm, add_assoc] using this lemma τ_min2_1 [Setting E₁ E₂ F admm admm_kkt] (h: 0 < τ ∧ τ ≤ 1) : min 1 (1 + 1 / τ - τ ) = 1 := by rcases h with ⟨h1, h2⟩ @@ -1494,12 +1576,17 @@ lemma τ_min2_1 [Setting E₁ E₂ F admm admm_kkt] (h: 0 < τ ∧ τ ≤ 1) : m lemma τ_min2_2 [Setting E₁ E₂ F admm admm_kkt] (h: τ > 1 ) : min 1 (1 + 1 / τ - τ ) = 1 + 1 / τ - τ := by apply min_eq_right - have : τ > 1 / τ := - calc - _ > 1 := h - _ > 1 / τ := by - rw [one_div, ← inv_one];apply inv_lt_inv_of_lt;linarith;exact h - linarith + have hτpos : 0 < τ := lt_trans zero_lt_one h + have hle : (1 : ℝ) ≤ τ := le_of_lt h + have h_inv_le_one : 1 / τ ≤ 1 := by + have h1pos : (0 : ℝ) < 1 := zero_lt_one + simp [one_div]; · expose_names; exact inv_le_one_of_one_le₀ hle --using inv_le_inv_of_le h1pos hle + have h_div_le : 1 / τ ≤ τ := le_trans h_inv_le_one hle + have : 1 + 1 / τ - τ ≤ 1 := by + have hdiff_nonpos : 1 / τ - τ ≤ 0 := sub_nonpos.mpr h_div_le + have := add_le_add_left hdiff_nonpos (1 : ℝ) + simpa [sub_eq_add_neg] using this + exact this lemma τ_min3_1 [Setting E₁ E₂ F admm admm_kkt] (h: 0 < τ ∧ τ ≤ 1) : max (1 - τ) (1 - 1 / τ) = 1 - τ := by rcases h with ⟨h1, h2⟩ @@ -1513,12 +1600,25 @@ lemma τ_min3_1 [Setting E₁ E₂ F admm admm_kkt] (h: 0 < τ ∧ τ ≤ 1) : m lemma τ_min3_2 [Setting E₁ E₂ F admm admm_kkt] (h: τ > 1) : max (1 - τ) (1 - 1 / τ) = 1 - 1 / τ := by apply max_eq_right - have : τ > 1 / τ := - calc - _ > 1 := h - _ > 1 / τ := by - rw [one_div, ← inv_one];apply inv_lt_inv_of_lt;linarith;exact h - linarith + have hτpos : 0 < τ := lt_trans zero_lt_one h + have hτsq_ge1 : 1 ≤ τ ^ 2 := by + have htlt : τ < τ ^ 2 := by + simpa [pow_two] using (mul_lt_mul_of_pos_left h hτpos) + exact le_of_lt (lt_trans h htlt) + have h_div_le : 1 / τ ≤ τ := by + have hnonneg : 0 ≤ τ⁻¹ := inv_nonneg.mpr (le_of_lt hτpos) + have hmul : 1 * τ⁻¹ ≤ τ ^ 2 * τ⁻¹ := mul_le_mul_of_nonneg_right hτsq_ge1 hnonneg + have hτne : τ ≠ 0 := ne_of_gt hτpos + have hr : τ ^ 2 * τ⁻¹ = τ := by + calc + τ ^ 2 * τ⁻¹ = τ * (τ * τ⁻¹) := by + simp [pow_two, mul_assoc] + _ = τ * 1 := by simp; grind + _ = τ := by simp + simpa [one_div, one_mul, hr] using hmul + have : 1 - τ ≤ 1 - 1 / τ := by + simpa [sub_eq_add_neg] using (sub_le_sub_left h_div_le 1) + exact this lemma Φ_isdescending [Setting E₁ E₂ F admm admm_kkt]: ∀ n : ℕ+, (Φ n ) - (Φ (n + 1) ) ≥ (min τ (1 + τ - τ ^ 2) )* ρ * ‖A₂ (x₂ n - x₂ (n + 1))‖ ^ 2 + (min 1 (1 + 1 / τ - τ )) * ρ * diff --git a/Optlib/Algorithm/ADMM/Scheme.lean b/Optlib/Algorithm/ADMM/Scheme.lean index 8a701f6..9a8a9d8 100644 --- a/Optlib/Algorithm/ADMM/Scheme.lean +++ b/Optlib/Algorithm/ADMM/Scheme.lean @@ -1,5 +1,6 @@ import Optlib.Function.Proximal import Mathlib.Topology.MetricSpace.Sequences +import Mathlib noncomputable section @@ -30,7 +31,7 @@ def Admm_sub_Isunique {E : Type*}(f : E → ℝ)(x : E)(_h : IsMinOn f univ x): -- Augmented Lagrangian Function def Augmented_Lagrangian_Function (opt : OptProblem E₁ E₂ F) (ρ : ℝ) : E₁ × E₂ × F → ℝ := fun (x₁ , x₂ , y) => (opt.f₁ x₁) + (opt.f₂ x₂) + - inner y ((opt.A₁ x₁) + (opt.A₂ x₂) - opt.b) + ρ / 2 * ‖(opt.A₁ x₁) + (opt.A₂ x₂) - opt.b‖ ^ 2 + @inner ℝ F _ y ((opt.A₁ x₁) + (opt.A₂ x₂) - opt.b) + ρ / 2 * ‖(opt.A₁ x₁) + (opt.A₂ x₂) - opt.b‖ ^ 2 -- The basic iteration format of ADMM class ADMM extends (OptProblem E₁ E₂ F) where diff --git a/Optlib/Algorithm/ADMM/Theroem_converge.lean b/Optlib/Algorithm/ADMM/Theorem_converge.lean similarity index 99% rename from Optlib/Algorithm/ADMM/Theroem_converge.lean rename to Optlib/Algorithm/ADMM/Theorem_converge.lean index 4563a6b..bc7e5f2 100644 --- a/Optlib/Algorithm/ADMM/Theroem_converge.lean +++ b/Optlib/Algorithm/ADMM/Theorem_converge.lean @@ -82,10 +82,12 @@ lemma nonneg₁ [Setting E₁ E₂ F admm admm_kkt]: min τ (1 + τ - τ ^ 2) > lemma nonneg₂ [Setting E₁ E₂ F admm admm_kkt]: min 1 (1 + 1 / τ - τ) > 0 := by rcases admm.htau with ⟨h1, _⟩ - have h2: 1 + 1/τ - τ > 0 := by - field_simp;rw [← sq] - have h3 : 1 + τ - τ ^ 2 > 0 := nonneg_prime - linarith + have h2 : 1 + 1 / τ - τ > 0 := by + have h3 : 0 < 1 + τ - τ ^ 2 := nonneg_prime + have hquot : 0 < (1 + τ - τ ^ 2) / τ := by exact div_pos h3 h1 + have hrew : (1 + τ - τ ^ 2) / τ = 1 + 1 / τ - τ := by + field_simp [one_div]; simp; grind + simpa [hrew] using hquot apply lt_min one_pos h2 lemma Φ₁_nonneg [Setting E₁ E₂ F admm admm_kkt]: @@ -674,7 +676,7 @@ lemma Φ_Summable₁' [Setting E₁ E₂ F admm admm_kkt] : intro n let φ₀ := (fun i : ℕ => Φ i.succ) have : ∀ i ∈ Finset.range n , (φ₀ i)-(φ₀ (i+1)) = (Φ i.succ ) - (Φ (i.succ + 1)) := by - simp only [Finset.mem_range, Nat.succ_eq_add_one, implies_true] + simp only [Finset.mem_range, Nat.succ_eq_add_one]; grind have h : Finset.range n =Finset.range n := rfl rw[← Finset.sum_congr h this , Finset.sum_range_sub'] simp only [φ₀] @@ -705,7 +707,7 @@ Summable g:=by apply hgf apply NNReal.summable_of_le this rw[← NNReal.summable_coe] - exact hf + exact hf; grind lemma Φ_inequ₁ [Setting E₁ E₂ F admm admm_kkt] (m : ℕ+): (min 1 (1 + 1 / τ - τ )) * ρ * ‖A₁ (e₁ (m+1)) + A₂ (e₂ (m+1))‖ ^ 2 ≤ Φ m - Φ (m + 1) := by diff --git a/Optlib/Algorithm/BCD/Convergence.lean b/Optlib/Algorithm/BCD/Convergence.lean index 35be560..96262b9 100644 --- a/Optlib/Algorithm/BCD/Convergence.lean +++ b/Optlib/Algorithm/BCD/Convergence.lean @@ -3,8 +3,8 @@ Copyright (c) 2024 Chenyi Li, Bowen Yang, Yifan Bai. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. Authors: Chenyi Li, Bowen Yang, Yifan Bai -/ +import Mathlib.Tactic import Optlib.Algorithm.BCD.Scheme - /-! # Block Coordinate Descent @@ -59,7 +59,6 @@ lemma f_subdiff_block (hf : u ∈ f_subdifferential f x) (hg : v ∈ f_subdiffer have ε2pos : 0 < ε / 2 := by positivity filter_upwards [Eventually.prod_nhds (hf _ ε2pos) (hg _ ε2pos)] with z ⟨hfz, hyz⟩ rw [WithLp.prod_inner_apply] - simp only [WithLp.sub_fst, WithLp.sub_snd] let z' : WithLp 2 (E × F) := (x, y) show f z.1 + g z.2 - (f x + g y) - (⟪u, z.1 - x⟫ + ⟪v, z.2 - y⟫) ≥ -ε * ‖z - z'‖ have h1 : ‖z.1 - x‖ ≤ ‖z - z'‖ := fst_norm_le_prod_L2 (z - z') @@ -91,16 +90,16 @@ theorem PALM_Descent (h : E → ℝ) {h' : E → E} (Lₕ : NNReal) rw [this] at u₁prox have : u₁ - (u - t • h' u) = (u₁ - u) + t • h' u := by abel rw [this] at u₁prox - simp [norm_add_sq_real, this] at u₁prox + simp [norm_add_sq_real] at u₁prox have ha : t * σ u₁ + ‖u₁ - u‖ ^ 2 / 2 + ⟪u₁ - u, t • h' u⟫ ≤ t * σ u := by linarith [u₁prox] rw [inner_smul_right] at ha - have : t * (‖u₁ - u‖ ^ 2 / (2 * t)) = ‖u₁ - u‖ ^ 2 / 2 := by field_simp; ring + have : t * (‖u₁ - u‖ ^ 2 / (2 * t)) = ‖u₁ - u‖ ^ 2 / 2 := by field_simp rw [← this] at ha have : t * σ u₁ + t * (‖u₁ - u‖ ^ 2 / (2 * t)) + t * ⟪u₁ - u, h' u⟫ = t * (σ u₁ + ‖u₁ - u‖ ^ 2 / (2 * t) + ⟪u₁ - u, h' u⟫) := by ring rw [this] at ha have hne : ⟪u₁ - u, h' u⟫ ≤ σ u - σ u₁ - ‖u₁ - u‖ ^ 2 / (2 * t) := by - linarith [(mul_le_mul_left h₅).1 ha] + linarith [(mul_le_mul_iff_right₀ h₅).1 ha] rw [real_inner_comm] at hne calc _ ≤ h u + σ u - σ u₁ - ‖u₁ - u‖ ^ 2 / (2 * t) + ↑Lₕ / 2 * ‖u₁ - u‖ ^ 2 + σ u₁ := by @@ -156,7 +155,7 @@ theorem Sufficient_Descent1 (γ : ℝ) (hγ : γ > 1) _ = _ := by simp only [WithLp.prod_norm_sq_eq_of_L2] rw [Prod.fst_sub, Prod.snd_sub, BCD.z, BCD.z] - ring_nf; simp + ring_nf; simp; grind /- the value is monotone -/ theorem Sufficient_Descent2 (γ : ℝ) (hγ : γ > 1) @@ -274,7 +273,7 @@ theorem Ψ_subdiff_bound (γ : ℝ) (hγ : γ > 1) have cpos' : (alg.c k)⁻¹ ≥ 0 := by simp; apply le_of_lt (alg.cpos γ hγ ck k) have dpos' : (alg.d k)⁻¹ ≥ 0 := by simp; apply le_of_lt (alg.dpos γ hγ dk k) have h1 : ‖(alg.subdiff k).1‖ ≤ l * (γ + 1) * ‖alg.z (k + 1) - alg.z k‖ := by - simp only [BCD.subdiff, BCD.A_kx, Prod.fst_add, grad_fun_comp, grad_comp, sub_add]; + simp only [BCD.subdiff]; rw [A_k, A_kx, A_ky]; simp let a := (alg.c k)⁻¹ • (alg.x k - alg.x (k + 1)) calc @@ -296,7 +295,7 @@ theorem Ψ_subdiff_bound (γ : ℝ) (hγ : γ > 1) _ = (1 / alg.c k) * ‖(alg.z (k + 1) - alg.z k).1‖ := by rw [z]; simp; left; rw [z]; simp _ ≤ (1 / alg.c k) * ‖alg.z (k + 1) - alg.z k‖ := by have : ‖(alg.z (k + 1) - alg.z k).1‖ ≤ ‖alg.z (k + 1) - alg.z k‖ := fst_norm_le_prod_L2 _ - simp; apply mul_le_mul_of_nonneg_left this cpos' + simp; apply mul_le_mul_of_nonneg_left this; apply cpos' _ = (γ * l) * ‖alg.z (k + 1) - alg.z k‖ := by rw [ck k]; simp have inequ₂ : ‖gradient H (alg.x (k + 1), alg.y (k + 1)) - gradient H (alg.x k, alg.y k)‖ ≤ l * ‖alg.z (k+1) - alg.z k‖ := by @@ -307,7 +306,7 @@ theorem Ψ_subdiff_bound (γ : ℝ) (hγ : γ > 1) _ = l * ‖alg.z (k+1) - alg.z k‖ := by repeat rw [z]; simp; left; rfl linarith have h2 : ‖(alg.subdiff k).2‖ ≤ l * (γ + 1) * ‖alg.z (k + 1) - alg.z k‖ := by - simp only [BCD.subdiff, BCD.A_kx, Prod.fst_add, grad_fun_comp, grad_comp, sub_add]; + simp only [BCD.subdiff] rw [A_k, A_kx, A_ky]; simp let a := (alg.d k)⁻¹ • (alg.y k - alg.y (k + 1)) calc @@ -452,7 +451,18 @@ lemma fconv (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : rintro ε epos simp at defle; simp by_cases Cpos : 0 < C - · rcases defle (ε / (C / (γ * l))) (by field_simp [alg.lpos, Cpos]) with ⟨nn,ieq⟩ + · rcases + defle (ε / (C / (γ * ↑l))) + (by + have hγpos : (0 : ℝ) < γ := by + have : γ > 1 := hγ; linarith + have hlpos : (0 : ℝ) < (↑l : ℝ) := alg.lpos + have hden : 0 < C / (γ * (↑l)) := by + exact div_pos Cpos (mul_pos hγpos hlpos) + have hnum : 0 < ε := epos + have : 0 < ε / (C / (γ * (↑l))) := div_pos hnum hden + simpa [div_div_eq_mul_div, mul_comm, mul_left_comm, mul_assoc] using this) + with ⟨nn, ieq⟩ use nn; rintro b nleb; rw [ck] calc _ ≤ ‖k b‖ * ‖(1 / (γ * ↑l)) • grad_fst H (alg.y (α b - 1)) (alg.x (α b - 1))‖ @@ -460,7 +470,13 @@ lemma fconv (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : _ ≤ ε / (C / (γ * ↑l))*‖(1 / (γ * ↑l)) • grad_fst H (alg.y (α b - 1)) (alg.x (α b - 1))‖:= by apply mul_le_mul (le_of_lt (ieq b nleb)); trivial repeat apply norm_nonneg - field_simp [alg.lpos, Cpos]; positivity + have hγlpos : 0 < γ * (↑l : ℝ) := by + have hγpos : (0 : ℝ) < γ := by + have : γ > 1 := hγ + linarith + exact mul_pos hγpos alg.lpos + field_simp [alg.lpos, Cpos, hγlpos] at * + positivity _ = ε / (C / (γ * ↑l))*(1 / (γ * ↑l)) * ‖grad_fst H (alg.y (α b - 1)) (alg.x (α b - 1))‖:= by rw [mul_assoc]; apply mul_eq_mul_left_iff.mpr left; exact norm_smul_of_nonneg (by positivity) (grad_fst H _ _) @@ -505,13 +521,12 @@ lemma fconv (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : apply norm_nonneg exact assx calc - ‖z_.1 - alg.x (α x - 1)‖ ^ 2 / 2<(2*(ε/(γ*l)/3))/2:= by - apply (div_lt_div_right _).mpr - apply this - linarith - _=(ε/(γ*l)/3):= by - apply mul_div_cancel_left₀ - linarith + ‖z_.1 - alg.x (α x - 1)‖ ^ 2 / 2 + < (2 * (ε / (γ * l) / 3)) / 2 := by + have h := this + linarith [h] + _ = (ε / (γ * l) / 3) := by + ring simp at h1 h2 h3; simp only [ck] at h1 h2 h3; simp rcases h1 with ⟨m1,ie1⟩ rcases h2 with ⟨m2,ie2⟩ @@ -761,14 +776,12 @@ lemma gconv (γ : ℝ) (hγ : γ > 1) (ck: ∀ k, alg.c k = 1 / (γ * l)) (dk: refine (Real.lt_sqrt ?hy).mp ?_ apply norm_nonneg exact assx - calc - ‖z_.2 - alg.y (α x - 1)‖ ^ 2 / 2<(2*(ε/(γ*l)/3))/2:= by - apply (div_lt_div_right _).mpr - apply this - linarith - _=(ε/(γ*l)/3):= by - apply mul_div_cancel_left₀ - linarith + have h_sq : ‖z_.2 - alg.y (α x - 1)‖ ^ 2 / 2 < ε / (γ * l) / 3 := by + have h := this + have hpos : (0 : ℝ) < 2 := by norm_num + have this' := (div_lt_div_of_pos_right h hpos) + rwa [mul_div_cancel_left₀ _ (ne_of_gt hpos)] at this' + exact h_sq simp at h1 h2 h3 simp only [dk] at h1 h2 h3 simp @@ -820,7 +833,7 @@ lemma limitset_property_1 (γ : ℝ) (hγ : γ > 1) have hz : ∀ (n : ℕ), alg.z n ∈ alg.z '' univ:= by intro n; use n; constructor; exact Set.mem_univ n; rfl rcases (tendsto_subseq_of_bounded (bd) (hz)) with ⟨a, _ , φ, ⟨hmφ,haφ⟩⟩ use a; simp [limit_set] - rw [mapClusterPt_iff]; intro s hs + rw [mapClusterPt_iff_frequently]; intro s hs apply Filter.frequently_iff.mpr intro U hU; rw [Filter.mem_atTop_sets] at hU rcases hU with ⟨ax,hax⟩; rw [mem_nhds_iff] at hs @@ -935,21 +948,33 @@ lemma limitset_property_3 (γ : ℝ) (hγ : γ > 1) have sum_ne_zero : ∀ z, (EMetric.infEdist z A).toReal + (EMetric.infEdist z B).toReal ≠ 0:= by intro z eq0 have inA : z ∈ A := by - apply EMetric.mem_closure_iff_infEdist_zero.mpr - have : (EMetric.infEdist z A).toReal = 0 := by - linarith [eq0, @ENNReal.toReal_nonneg (EMetric.infEdist z A), - @ENNReal.toReal_nonneg (EMetric.infEdist z B)] - exact (((fun {x y} hx hy ↦ (ENNReal.toReal_eq_toReal_iff' hx hy).mp) - ENNReal.top_ne_zero.symm (Metric.infEdist_ne_top nez_a) (id (Eq.symm this)))).symm - simp; constructor; rw [isOpen_compl_iff]; apply IsClosed.inter isClosed_setOf_clusterPt closea - have inB : z ∈ B :=by - apply EMetric.mem_closure_iff_infEdist_zero.mpr - have : (EMetric.infEdist z B).toReal = 0 := by - linarith [eq0, @ENNReal.toReal_nonneg (EMetric.infEdist z A), - @ENNReal.toReal_nonneg (EMetric.infEdist z B)] - exact (((fun {x y} hx hy ↦ (ENNReal.toReal_eq_toReal_iff' hx hy).mp) - ENNReal.top_ne_zero.symm (Metric.infEdist_ne_top nez_b) (id (Eq.symm this)))).symm - simp; constructor; rw [isOpen_compl_iff]; apply IsClosed.inter isClosed_setOf_clusterPt closeb + -- first show z ∈ closure A from infEdist z A = 0 + have hzcl : z ∈ closure A := by + apply EMetric.mem_closure_iff_infEdist_zero.mpr + have h0 : (EMetric.infEdist z A).toReal = 0 := by + linarith [eq0, @ENNReal.toReal_nonneg (EMetric.infEdist z A), + @ENNReal.toReal_nonneg (EMetric.infEdist z B)] + exact (((fun {x y} hx hy ↦ (ENNReal.toReal_eq_toReal_iff' hx hy).mp) + ENNReal.top_ne_zero.symm (Metric.infEdist_ne_top nez_a) (id (Eq.symm h0)))).symm + -- A is closed, so closure A = A + have hAclosed : IsClosed A := by + have hlim : IsClosed (limit_set alg.z) := isClosed_setOf_clusterPt + simpa [A] using hlim.inter closea + simpa [hAclosed.closure_eq] using hzcl + have inB : z ∈ B := by + -- first show z ∈ closure B from infEdist z B = 0 + have hzcl : z ∈ closure B := by + apply EMetric.mem_closure_iff_infEdist_zero.mpr + have h0 : (EMetric.infEdist z B).toReal = 0 := by + linarith [eq0, @ENNReal.toReal_nonneg (EMetric.infEdist z A), + @ENNReal.toReal_nonneg (EMetric.infEdist z B)] + exact (((fun {x y} hx hy ↦ (ENNReal.toReal_eq_toReal_iff' hx hy).mp) + ENNReal.top_ne_zero.symm (Metric.infEdist_ne_top nez_b) (id (Eq.symm h0)))).symm + -- B is closed, so closure B = B + have hBclosed : IsClosed B := by + have hlim : IsClosed (limit_set alg.z) := isClosed_setOf_clusterPt + simpa [B] using hlim.inter closeb + simpa [hBclosed.closure_eq] using hzcl obtain hzin : z ∈ A ∩ B := mem_inter inA inB rw [disjoint_AB] at hzin; contradiction have contω : Continuous ω := by @@ -1086,7 +1111,7 @@ lemma limitset_property_4 (γ : ℝ) (hγ : γ > 1) have monopsi : Antitone (alg.ψ ∘ alg.z) := antitone_nat_of_succ_le (Sufficient_Descent2 γ hγ ck dk) rcases tendsto_of_antitone monopsi with h1 | h2 - obtain notbd := unbounded_of_tendsto_atBot h1 + obtain notbd := Filter.not_bddBelow_of_tendsto_atBot h1 apply absurd notbd; push_neg exact BddBelow.mono (by simp; apply range_comp_subset_range) lbdψ; exact h2 rcases decent_ψ with ⟨ψ_final, hψ⟩ @@ -1139,7 +1164,7 @@ theorem Limited_length (γ : ℝ) (hγ : γ > 1) (ck : ∀ k, alg.c k = 1 / (γ * l)) (dk : ∀ k, alg.d k = 1 / (γ * l)) (bd : Bornology.IsBounded (alg.z '' univ)) (hψ : KL_function alg.ψ) (lbdψ : BddBelow (alg.ψ '' univ)): ∃ M : ℝ, ∀ n, - ∑ k in Finset.range n, ‖alg.z (k + 1) - alg.z k‖ ≤ M := by + ∑ k ∈ Finset.range n, ‖alg.z (k + 1) - alg.z k‖ ≤ M := by have :∃ z_∈ closure (alg.z '' univ), ∃ α:ℕ → ℕ,StrictMono α∧Tendsto (fun n ↦ alg.z (α n)) atTop (𝓝 z_):= by have hcs : IsSeqCompact (closure (alg.z '' univ)) := by @@ -1268,15 +1293,18 @@ theorem Limited_length (γ : ℝ) (hγ : γ > 1) repeat rw [← ENNReal.ofReal_mul] apply (ENNReal.ofReal_le_ofReal_iff _).2 apply (mul_le_mul_iff_of_pos_right hposd).mpr hub - field_simp; simp [c]; simp; field_simp; simp [c] + all_goals + try { exact mul_nonneg (le_of_lt ρpos) (norm_nonneg _) } + try { exact norm_nonneg _ } + try { simp [c] } + aesop _ ≥ (EMetric.infEdist 0 (subdifferential ψ (z (n + LL + 1)))) * ENNReal.ofReal (d n) := by apply mul_le_mul_right' this _ ≥ 1 := by rw [mul_comm]; exact (ieq (n + LL + 1) (by linarith)).2 - simp - field_simp - simp [c] - field_simp - simp [c] + all_goals + try { simp [c] } + try { exact mul_nonneg (le_of_lt ρpos) (norm_nonneg _) } + aesop have hsd : ρ1 / 2 * (c (n + 1)) ^ 2 ≤ b n := by obtain h := suff_des.2 (n + LL + 1) rw [add_right_comm n LL 1] at h @@ -1357,12 +1385,12 @@ theorem Limited_length (γ : ℝ) (hγ : γ > 1) _ ≤ alg.ψ (alg.z i) -alg.ψ (alg.z (i + 1)) := suff_des.2 i _ = 0 := by simp [this i ige,this (i+1) (Nat.le_add_right_of_le ige)] apply dist_eq_zero.mp (by rw [NormedAddCommGroup.dist_eq, this]) - use ∑ k in Finset.range N, ‖alg.z (k + 1) - alg.z k‖ + use ∑ k ∈ Finset.range N, ‖alg.z (k + 1) - alg.z k‖ intro n; by_cases nlen : n ≤ N · refine Finset.sum_le_sum_of_subset_of_nonneg (GCongr.finset_range_subset_of_le nlen) ?_ exact fun a _ _ ↦norm_nonneg (alg.z (a + 1) - alg.z a) push_neg at nlen - have eq0 : ∑ i in (Finset.range n \ Finset.range N), ‖alg.z (i + 1) - alg.z i‖ = 0 := by + have eq0 : ∑ i ∈ (Finset.range n \ Finset.range N), ‖alg.z (i + 1) - alg.z i‖ = 0 := by apply Finset.sum_eq_zero; rintro x xin; simp at xin exact norm_sub_eq_zero_iff.mpr (eq0 x xin.2) refine Finset.sum_sdiff_le_sum_sdiff.mp ?_ @@ -1380,7 +1408,7 @@ theorem Convergence_to_critpt (γ : ℝ) (hγ : γ > 1) apply cauchySeq_of_summable_dist rcases Limited_length γ hγ ck dk bd hψ lbdψ with ⟨M,sumle⟩ apply @summable_of_sum_range_le _ M _ _ - intro n; simp; exact dist_nonneg + intro n; exact dist_nonneg intro n calc _ = ∑ k ∈ Finset.range n, ‖alg.z (k + 1) - alg.z k‖ := diff --git a/Optlib/Algorithm/BCD/Scheme.lean b/Optlib/Algorithm/BCD/Scheme.lean index acc6539..290a613 100644 --- a/Optlib/Algorithm/BCD/Scheme.lean +++ b/Optlib/Algorithm/BCD/Scheme.lean @@ -12,6 +12,7 @@ import Optlib.Function.Proximal import Optlib.Differential.Subdifferential import Mathlib.Topology.EMetricSpace.Lipschitz + /-! # Block Coordinate Descent @@ -32,7 +33,7 @@ variable [NormedAddCommGroup E] [InnerProductSpace ℝ E] variable [NormedAddCommGroup F] [InnerProductSpace ℝ F] variable {H : WithLp 2 (E × F) → ℝ} -lemma diff_from_l2 (h : Differentiable ℝ H) : @Differentiable ℝ _ (E × F) _ _ ℝ _ _ H := by +lemma diff_from_l2 (h : Differentiable ℝ H) : @Differentiable ℝ _ (E × F) _ _ _ ℝ _ _ _ H := by apply Differentiable.comp h apply IsBoundedLinearMap.differentiable exact instIsBoundedLinearMapL2equiv @@ -40,12 +41,12 @@ lemma diff_from_l2 (h : Differentiable ℝ H) : @Differentiable ℝ _ (E × F) _ theorem diff_prod₁ (h : Differentiable ℝ H) (y : F) : Differentiable ℝ (fun x ↦ H (x, y)) := by apply Differentiable.comp (diff_from_l2 h) - exact Differentiable.prod differentiable_id' (differentiable_const y) + exact Differentiable.prodMk differentiable_fun_id (differentiable_const y) theorem diff_prod₂ (h : Differentiable ℝ H) (x : E) : Differentiable ℝ (fun y ↦ H (x, y)) := by apply Differentiable.comp (diff_from_l2 h) - exact Differentiable.prod (differentiable_const x) differentiable_id' + exact Differentiable.prodMk (differentiable_const x) differentiable_fun_id end diff @@ -185,7 +186,7 @@ instance Proper_Prod : ProperSpace (WithLp 2 (E × F)) where constructor · exact le_trans this hball · exact le_trans this ((add_comm (‖x' - x‖ ^ 2) _) ▸ hball) - apply IsCompact.of_isClosed_subset h (@Metric.isClosed_ball (WithLp 2 (E × F)) _ _ _) hsub + apply IsCompact.of_isClosed_subset h (@Metric.isClosed_closedBall (WithLp 2 (E × F)) _ _ _) hsub /-- Assumption: f and g are lower semicontinuous, H is continuously differentiable diff --git a/Optlib/Algorithm/GD/GradientDescent.lean b/Optlib/Algorithm/GD/GradientDescent.lean index 04590cf..81a5545 100644 --- a/Optlib/Algorithm/GD/GradientDescent.lean +++ b/Optlib/Algorithm/GD/GradientDescent.lean @@ -4,7 +4,7 @@ Released under Apache 2.0 license as described in the file LICENSE. Authors: Chenyi Li, Ziyu Wang, Zaiwen Wen -/ import Optlib.Function.Lsmooth - +import Mathlib.Tactic /-! # GradientDescent @@ -58,7 +58,7 @@ lemma mono_sum_prop_primal' (mono : ∀ k : ℕ, f (g (k + 1)) ≤ f (g k)): + f (g (n.succ + 1)) / (n.succ + 1) := by rw [Finset.sum_range_succ, add_div] _ ≥ n.succ * f (g (n.succ + 1)) / (n.succ + 1) + f (g (n.succ + 1)) / (n.succ + 1) := by simp; exact h - _ = f (g (n + 2)) := by field_simp; ring_nf + _ = f (g (n + 2)) := by field_simp -- the sumation property of the gradient method omit [NormedAddCommGroup E] in @@ -75,7 +75,7 @@ lemma mono_sum_prop (mono : ∀ k: ℕ, f (g (k + 1)) ≤ f (g k)): _ = (Finset.range (j.succ + 1)).sum (fun (k : ℕ) ↦ f (g (k + 1))) / (j + 2) - f xm * 1 + f xm := by rw [Nat.succ_eq_add_one j]; simp - ring_nf; rw [add_assoc, one_add_one_eq_two] + ring_nf _ = (Finset.range (j.succ + 1)).sum (fun (k : ℕ) ↦ f (g (k + 1))) / (j + 2) - f xm * ((j + 2) / (j + 2)) + f xm := by field_simp _ = ((Finset.range (j.succ + 1)).sum (fun (k : ℕ) ↦ f (g (k + 1))) @@ -88,13 +88,13 @@ noncomputable section gradient_descent variable {E : Type*} [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteSpace E] -class GradientDescent (f : E → ℝ) (f' : E → E) (x0 : E) := +class GradientDescent (f : E → ℝ) (f' : E → E) (x0 : E) where (x : ℕ → E) (a : ℕ → ℝ) (l : NNReal) (diff : ∀ x₁, HasGradientAt f (f' x₁) x₁) (smooth : LipschitzWith l f') (update : ∀ k : ℕ, x (k + 1) = x k - a k • f' (x k)) (hl : l > 0) (step₁ : ∀ k, a k > 0) (initial : x 0 = x0) -class Gradient_Descent_fix_stepsize (f : E → ℝ) (f' : E → E) (x0 : E) := +class Gradient_Descent_fix_stepsize (f : E → ℝ) (f' : E → E) (x0 : E) where (x : ℕ → E) (a : ℝ) (l : NNReal) (diff : ∀ x₁, HasGradientAt f (f' x₁) x₁) (smooth : LipschitzWith l f') (update : ∀ k : ℕ, x (k + 1) = x k - a • f' (x k)) @@ -123,7 +123,7 @@ variable {alg : Gradient_Descent_fix_stepsize f f' x₀} -- equivalent description of the convexity of a smooth function lemma convex_function (h₁ : ∀ x₁ : E, HasGradientAt f (f' x₁) x₁) (hfun: ConvexOn ℝ Set.univ f) : - ∀ x y, f x ≤ f y + inner (f' x) (x - y) := by + ∀ x y, f x ≤ f y + ⟪f' x, x - y⟫_ℝ := by intro x y obtain this := Convex_first_order_condition' (h₁ x) hfun (by trivial) y (by trivial) rw [← neg_sub, inner_neg_right] at this @@ -135,14 +135,14 @@ lemma convex_lipschitz (h₁ : ∀ x₁ : E, HasGradientAt f (f' x₁) x₁) ∀ x : E, f (x - a • (f' x)) ≤ f x - a / 2 * ‖f' x‖ ^ 2 := by intro x calc - _ ≤ f x + inner (f' x) (x - a • (f' x) - x) + l / 2 * ‖x - a • (f' x) - x‖ ^ 2 := + _ ≤ f x + ⟪f' x, x - a • (f' x) - x⟫_ℝ + l / 2 * ‖x - a • (f' x) - x‖ ^ 2 := lipschitz_continuos_upper_bound' h₁ h₃ x (x - a • (f' x)) _ = f x + ((l.1 / 2 * a * a -a) * ‖f' x‖ ^ 2) := by simp; ring_nf; simp rw [real_inner_smul_right, real_inner_self_eq_norm_sq, norm_smul]; simp rw [abs_of_pos ha₂]; ring_nf _ ≤ f x + (- a / 2* ‖(f' x)‖ ^2) := by - simp only [add_le_add_iff_left, gt_iff_lt, norm_pos_iff, ne_eq] + simp only [add_le_add_iff_left] apply mul_le_mul_of_nonneg_right · simp; calc l / 2 * a * a = (l * a) * (a / 2) := by ring_nf @@ -167,25 +167,25 @@ lemma point_descent_for_convex (hfun : ConvexOn ℝ Set.univ f) (step₂ : alg.a intro x have t1 : 1 / ((2 : ℝ) * alg.a) * ((2 : ℝ) * alg.a) = 1 := by field_simp; ring_nf; apply mul_inv_cancel₀; linarith [alg.step₁] - have t2 : inner (f' x) (x - xm) - alg.a / 2 * ‖f' x‖ ^ 2 = + have t2 : ⟪f' x, x - xm⟫_ℝ - alg.a / 2 * ‖f' x‖ ^ 2 = 1 / ((2 : ℝ) * alg.a) * (‖x - xm‖ ^ 2 - ‖x - alg.a • (f' x) - xm‖ ^ 2) := by symm have t2₁ : ‖x - alg.a • (f' x) - xm‖ ^ 2 = - ‖x - xm‖ ^ 2 - ((2 : ℝ) * alg.a) * inner (f' x) (x - xm) + ‖alg.a • (f' x)‖ ^ 2 := by + ‖x - xm‖ ^ 2 - ((2 : ℝ) * alg.a) * ⟪f' x, x - xm⟫_ℝ + ‖alg.a • (f' x)‖ ^ 2 := by rw [sub_right_comm]; simp [norm_sub_sq_real (x - xm) _] ring_nf; rw [real_inner_smul_right, real_inner_comm]; calc - _ = 1 / ((2 : ℝ) * alg.a) * ((2 : ℝ) * alg.a) * (inner (f' x) (x - xm)) + _ = 1 / ((2 : ℝ) * alg.a) * ((2 : ℝ) * alg.a) * (⟪f' x, x - xm⟫_ℝ) + 1 / ((2 : ℝ) * alg.a) * (- ‖alg.a • (f' x)‖ ^ 2) := by rw [t2₁]; ring_nf - _ = inner (f' x) (x - xm) + 1 / ((2 : ℝ) * alg.a) + _ = ⟪f' x, x - xm⟫_ℝ + 1 / ((2 : ℝ) * alg.a) * (- ‖alg.a • (f' x)‖ ^ 2) := by rw [t1, one_mul] - _ = inner (f' x) (x - xm) - 1 / ((2 : ℝ) * alg.a) * (alg.a * alg.a) * (‖f' x‖ ^ 2) := by + _ = ⟪f' x, x - xm⟫_ℝ - 1 / ((2 : ℝ) * alg.a) * (alg.a * alg.a) * (‖f' x‖ ^ 2) := by rw [norm_smul _ _]; simp; rw [abs_of_pos alg.step₁]; ring_nf - _ = inner (f' x) (x - xm) - alg.a / (2 : ℝ) + _ = ⟪f' x, x - xm⟫_ℝ - alg.a / (2 : ℝ) * ‖f' x‖ ^ 2 := by ring_nf; simp; left; rw [pow_two,mul_self_mul_inv alg.a] calc f (x - alg.a • (f' x)) ≤ f x - alg.a / 2 * ‖f' x‖ ^ 2 := by exact convex_lipschitz alg.diff this step₂ alg.step₁ alg.smooth x - _ ≤ f xm + inner (f' x) (x - xm) - alg.a / 2 * ‖f' x‖ ^ 2 := by + _ ≤ f xm + ⟪f' x, x - xm⟫_ℝ - alg.a / 2 * ‖f' x‖ ^ 2 := by linarith [convex_function alg.diff hfun x xm] _ = f xm + 1 / ((2 : ℝ) * alg.a) * (‖x - xm‖ ^ 2 - ‖x - alg.a • (f' x) - xm‖ ^ 2) := by rw [add_sub_assoc, t2] @@ -229,8 +229,7 @@ lemma gradient_method (hfun: ConvexOn ℝ Set.univ f) (step₂ : alg.a ≤ 1 / a calc _ = (Finset.range (j + 1)).sum (fun (k : ℕ) ↦ f (alg.x (k + 1)) - f xm) + f (alg.x (j + 2)) - f xm := by - rw [Finset.sum_range_succ (fun (k : ℕ)↦ f (alg.x (k+1))-f (xm)) j.succ] - rw [Nat.succ_eq_add_one j]; ring_nf; rw [add_sub] + simp [Finset.sum_range_succ, add_comm, add_left_comm]; grind _ ≤ 1 / (2 * alg.a) * (‖x₀ - xm‖ ^ 2 - ‖alg.x (j + 1) - xm‖ ^ 2) + f (alg.x (j + 2)) - f xm := by linarith _ ≤ 1 / (2 * alg.a) * (‖x₀ - xm‖ ^ 2 - ‖alg.x (j + 1) - xm‖ ^ 2) diff --git a/Optlib/Algorithm/GD/GradientDescentStronglyConvex.lean b/Optlib/Algorithm/GD/GradientDescentStronglyConvex.lean index d4cd46e..64d0d5c 100644 --- a/Optlib/Algorithm/GD/GradientDescentStronglyConvex.lean +++ b/Optlib/Algorithm/GD/GradientDescentStronglyConvex.lean @@ -28,10 +28,11 @@ variable {f : E → ℝ} {m : ℝ} {f' : E → E} {xm x₀ : E} {x : ℕ → E} variable {a : ℝ} {x y : E} {l : NNReal} open Set +open scoped InnerProductSpace BigOperators RealInnerProductSpace theorem Strong_convex_Lipschitz_smooth (hsc: StrongConvexOn univ m f) (mp : m > 0) (hf : ∀ x, HasGradientAt f (f' x) x) (h₂ : LipschitzWith l f') (hl : l > (0 : ℝ)): - inner (f' x - f' y) (x - y) ≥ m * l / (m + l) * ‖x - y‖ ^ 2 + ⟪f' x - f' y, x - y⟫_ℝ ≥ m * l / (m + l) * ‖x - y‖ ^ 2 + 1 / (m + l) * ‖f' x - f' y‖ ^ 2 := by rw [StrongConvexOn, UniformConvexOn] at hsc rcases hsc with ⟨cov, hsc⟩ @@ -52,13 +53,13 @@ theorem Strong_convex_Lipschitz_smooth (hsc: StrongConvexOn univ m f) (mp : m > use cov have convh : ConvexOn ℝ univ h := by have (x : E) : h x = phi x := by - field_simp [phi, h]; ring_nf + simp [phi, h]; ring_nf; simp; grind rw [ConvexOn]; use cov; intro x xin y yin a b apos bpos absum1 rw [this, this, this] rw [ConvexOn] at convphi apply convphi.2 xin yin apos bpos absum1 by_cases coef: 0 < l - m - · have eq1 : inner (g' x - g' y) (x - y) ≥ 1 / (l - m) * ‖g' x - g' y‖ ^ 2 := by + · have eq1 : ⟪g' x - g' y, x - y⟫_ℝ ≥ 1 / (l - m) * ‖g' x - g' y‖ ^ 2 := by apply convex_to_lower gderiv show ConvexOn ℝ univ h; apply convh; apply coef; apply convg let alpha : E := f' x - f' y @@ -67,47 +68,57 @@ theorem Strong_convex_Lipschitz_smooth (hsc: StrongConvexOn univ m f) (mp : m > simp [g']; rw [smul_sub]; rw [← sub_add, ← sub_add]; simp rw [sub_right_comm] rw [eq2] at eq1 - have eq3 (u v : E) : inner (u - m • v) v ≥ 1 / (l - m) * ‖u - m • v‖ ^ 2 - → inner u v ≥ m * l / (m + l) * ‖v‖ ^ 2 + 1 / (m + l) * ‖u‖ ^ 2 := by - have : ‖u - m • v‖ ^ 2 = ‖u‖ ^ 2 + m ^ 2 * ‖v‖ ^ 2 - 2 * m * inner u v := by + have eq3 (u v : E) : ⟪u - m • v, v⟫_ℝ ≥ 1 / (l - m) * ‖u - m • v‖ ^ 2 + → ⟪u, v⟫_ℝ ≥ m * l / (m + l) * ‖v‖ ^ 2 + 1 / (m + l) * ‖u‖ ^ 2 := by + have : ‖u - m • v‖ ^ 2 = ‖u‖ ^ 2 + m ^ 2 * ‖v‖ ^ 2 - 2 * m * ⟪u, v⟫_ℝ := by rw [norm_sub_sq_real, inner_smul_right]; ring_nf; rw [norm_smul]; simp rw [mul_pow, sq_abs] rw [this] - intro h0; rw [inner_sub_left, inner_smul_left] at h0; field_simp at h0 - rw [real_inner_self_eq_norm_sq, div_le_iff₀] at h0 - field_simp at h0; rw [sub_mul, sub_add_eq_add_sub, le_sub_iff_add_le] at h0 - rw [mul_right_comm, mul_sub] at h0; ring_nf at h0 - rw [mul_right_comm, ← add_mul] at h0 + intro h0 + rw [inner_sub_left, inner_smul_left] at h0 + simp [real_inner_self_eq_norm_sq] at h0 + rw [inv_mul_eq_div] at h0 + rw [div_le_iff₀ coef] at h0 + rw [sub_mul, add_sub_assoc, le_sub_iff_add_le] at h0 + rw [mul_right_comm, mul_sub] at h0 + ring_nf at h0 + rw [mul_right_comm] at h0 have mlpos : 0 < m + l := by linarith rw [ge_iff_le] - field_simp; rw [add_comm] - calc - _ ≤ ((m + l) * inner u v) / (m + l) := by - rw [div_le_div_right]; apply h0; apply mlpos - _ = inner u v := by field_simp - apply coef - show inner alpha beta ≥ m * l / (m + l) * ‖beta‖ ^ 2 + 1 / (m + l) * ‖alpha‖ ^ 2 + -- Rearrange h0 to isolate ⟪u, v⟫ on the right, then divide by (m + l) > 0 + have h1 : ‖u‖ ^ 2 + m * l * ‖v‖ ^ 2 ≤ (m + l) * ⟪u, v⟫_ℝ := by + have h0' := add_le_add_right h0 (2 * m * ⟪u, v⟫_ℝ) + ring_nf at h0' + simpa [add_mul] using h0' + have hdiv : + (‖u‖ ^ 2 + m * l * ‖v‖ ^ 2) / (m + l) ≤ ⟪u, v⟫_ℝ := by + have h1' : ‖u‖ ^ 2 + m * l * ‖v‖ ^ 2 ≤ ⟪u, v⟫_ℝ * (m + l) := by + simpa [mul_comm] using h1 + exact (div_le_iff₀ mlpos).2 h1' + -- Rewrite the left-hand side as the desired combination + simpa [div_eq_mul_inv, mul_add, add_comm, add_left_comm, add_assoc, mul_comm, mul_left_comm, mul_assoc] using hdiv + show ⟪alpha, beta⟫_ℝ ≥ m * l / (m + l) * ‖beta‖ ^ 2 + 1 / (m + l) * ‖alpha‖ ^ 2 apply eq3 - show inner (alpha - m • beta) (x - y) ≥ 1 / (l - m) * ‖alpha - m • beta‖ ^ 2 + show ⟪alpha - m • beta, x - y⟫_ℝ ≥ 1 / (l - m) * ‖alpha - m • beta‖ ^ 2 apply eq1 · let alpha : E := f' x - f' y let beta : E := x - y - have eq1 : inner alpha beta ≥ m * ‖beta‖ ^ 2 := by - show inner (f' x - f' y) (x - y) ≥ m * ‖x - y‖ ^ 2 + have eq1 : ⟪alpha, beta⟫_ℝ ≥ m * ‖beta‖ ^ 2 := by + show ⟪f' x - f' y, x - y⟫_ℝ ≥ m * ‖x - y‖ ^ 2 apply Strong_Convex_lower; rw [StrongConvexOn, UniformConvexOn] use cov; simp; apply hf; simp; simp - have eq2 : inner alpha beta ≥ 1 / l * ‖alpha‖ ^ 2 := by - show inner (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 + have eq2 : ⟪alpha, beta⟫_ℝ ≥ 1 / l * ‖alpha‖ ^ 2 := by + show ⟪f' x - f' y, x - y⟫_ℝ ≥ 1 / l * ‖f' x - f' y‖ ^ 2 apply lipschitz_to_lower hf h₂ apply StrictConvexOn.convexOn; apply StrongConvexOn.strictConvexOn rw [StrongConvexOn, UniformConvexOn]; use cov; apply mp; apply hl rw [ge_iff_le] at eq1 rw [ge_iff_le] at eq2 have mlpos : 0 < m + l := by linarith - have eq3 (u v : E) (h1 : m * ‖v‖ ^ 2 ≤ inner u v) (h2 : 1 / l * ‖u‖ ^ 2 ≤ inner u v): - inner u v ≥ m * l / (m + l) * ‖v‖ ^ 2 + 1 / (m + l) * ‖u‖ ^ 2 := by - field_simp; rw [div_le_iff₀ mlpos, mul_comm _ (m + l), add_mul] - have eq4 : m * l * ‖v‖ ^ 2 ≤ m * inner u v := by + have eq3 (u v : E) (h1 : m * ‖v‖ ^ 2 ≤ ⟪u, v⟫_ℝ) (h2 : 1 / l * ‖u‖ ^ 2 ≤ ⟪u, v⟫_ℝ): + ⟪u, v⟫_ℝ ≥ m * l / (m + l) * ‖v‖ ^ 2 + 1 / (m + l) * ‖u‖ ^ 2 := by + field_simp; rw [ge_iff_le, div_le_iff₀ mlpos, mul_comm _ (m + l), add_mul] + have eq4 : m * l * ‖v‖ ^ 2 ≤ m * ⟪u, v⟫_ℝ := by calc _ ≤ m * m * ‖v‖ ^ 2 := by rw [mul_comm m l, mul_assoc, mul_assoc] @@ -115,12 +126,12 @@ theorem Strong_convex_Lipschitz_smooth (hsc: StrongConvexOn univ m f) (mp : m > apply mul_le_mul_of_nonneg_right simp at coef; apply coef rw [mul_nonneg_iff_right_nonneg_of_pos]; simp; apply mp - _ ≤ m * inner u v := by - rw [mul_assoc, mul_le_mul_left]; apply h1; apply mp - have eq5 : ‖u‖ ^ 2 ≤ l * inner u v := by + _ ≤ m * ⟪u, v⟫_ℝ := by + rw [mul_assoc, mul_le_mul_iff_right₀]; apply h1; apply mp + have eq5 : ‖u‖ ^ 2 ≤ l * ⟪u, v⟫_ℝ := by field_simp at h2; rw [mul_comm, ← div_le_iff₀]; apply h2; apply hl linarith - show inner alpha beta ≥ m * l / (m + l) * ‖beta‖ ^ 2 + 1 / (m + l) * ‖alpha‖ ^ 2 + show ⟪alpha, beta⟫_ℝ ≥ m * l / (m + l) * ‖beta‖ ^ 2 + 1 / (m + l) * ‖alpha‖ ^ 2 apply eq3; apply eq2; apply eq1 lemma lipschitz_derivxm_eq_zero (h₁ : ∀ x : E, HasGradientAt f (f' x) x) @@ -133,7 +144,7 @@ lemma lipschitz_derivxm_eq_zero (h₁ : ∀ x : E, HasGradientAt f (f' x) x) have eq3 : 0 ≤ ‖f' xm‖ ^ 2 / (2 * l) := by apply div_nonneg; simp; linarith have eq4 : ‖f' xm‖ ^ 2 / (2 * l) = 0 := by linarith - field_simp at eq4; exact eq4 + field_simp at eq4; simp_all variable (hsc: StrongConvexOn univ m f) {alg : Gradient_Descent_fix_stepsize f f' x₀} @@ -151,13 +162,13 @@ lemma gradient_method_strong_convex (hm : m > 0) (min : IsMinOn f univ xm) calc _ = ‖alg.x k - xm - alg.a • f' (alg.x k)‖ ^ 2 := by rw [sub_right_comm] - _ = ‖alg.x k - xm‖ ^ 2 - 2 * alg.a * inner (alg.x k - xm) (f' (alg.x k)) + _ = ‖alg.x k - xm‖ ^ 2 - 2 * alg.a * ⟪alg.x k - xm, f' (alg.x k)⟫_ℝ + alg.a ^ 2 * ‖f' (alg.x k)‖ ^ 2 := by rw [norm_sub_sq_real, inner_smul_right] ring_nf; rw [norm_smul]; simp; rw [mul_pow, sq_abs] _ ≤ (1 - alg.a * (2 * m * alg.l / (m + alg.l))) * ‖alg.x k - xm‖ ^ 2 + alg.a * (alg.a - 2 / (m + alg.l)) * ‖f' (alg.x k)‖ ^ 2 := by - have : inner (alg.x k - xm) (f' (alg.x k)) ≥ + have : ⟪alg.x k - xm, f' (alg.x k)⟫_ℝ ≥ m * alg.l / (m + alg.l) * ‖alg.x k - xm‖ ^ 2 + 1 / (m + alg.l) * ‖f' (alg.x k)‖ ^ 2 := by have eq1 : f' (alg.x k) = f' (alg.x k) - f' xm := by @@ -174,12 +185,11 @@ lemma gradient_method_strong_convex (hm : m > 0) (min : IsMinOn f univ xm) _ = 2 * alg.a * ((m * alg.l / (m + alg.l)) * ‖alg.x k - xm‖ ^ 2 + (1 / (m + alg.l)) * ‖f' (alg.x k)‖ ^ 2) := by - field_simp; rw [mul_add, mul_comm alg.a 2, ← mul_assoc, ← mul_assoc, mul_comm alg.a 2] - ring_nf - _ ≤ 2 * alg.a * inner (alg.x k - xm) (f' (alg.x k)) := by + field_simp + _ ≤ 2 * alg.a * ⟪alg.x k - xm, f' (alg.x k)⟫_ℝ := by rw [ge_iff_le] at this have twoapos : 0 < 2 * alg.a := by linarith [alg.step₁] - rw [mul_le_mul_left twoapos]; apply this + rw [mul_le_mul_iff_right₀ twoapos]; apply this _ ≤ (1 - alg.a * (2 * m * alg.l / (m + alg.l))) * ‖alg.x k - xm‖ ^ 2 := by simp have eq2 : alg.a * (alg.a - 2 / (m + alg.l)) ≤ 0 := by @@ -188,24 +198,44 @@ lemma gradient_method_strong_convex (hm : m > 0) (min : IsMinOn f univ xm) have eq3 : 0 ≤ ‖f' (alg.x k)‖ ^ 2 := by simp apply mul_nonpos_of_nonpos_of_nonneg eq2 eq3 have eq : 0 ≤ (1 - alg.a * (2 * m * alg.l / (m + alg.l))) := by - have : 0 < m + alg.l := by linarith - field_simp; rw [div_nonneg_iff]; left - constructor - · simp - calc - alg.a * (2 * m * alg.l) ≤ 2 / (m + alg.l) * (2 * m * alg.l) := by - rw [mul_le_mul_right]; linarith [step₂, alg.step₁]; - apply mul_pos <;> linarith - _ ≤ (m + alg.l) ^ 2 / (m + alg.l) := by - field_simp; rw [div_le_div_right this] - ring_nf - calc - _ ≤ m * alg.l * 4 + (m - alg.l) ^ 2 := by - simp; apply sq_nonneg - _ = m * alg.l * 2 + m ^ 2 + alg.l ^ 2 := by ring_nf - _ = (m + alg.l) := by - rw [pow_two]; simp - · linarith + have hpos_ml : 0 < m + alg.l := by linarith + let t : ℝ := (2 * m * alg.l) / (m + alg.l) + have t_nonneg : 0 ≤ t := by + have num_nonneg : 0 ≤ 2 * m * alg.l := by + have h2 : 0 ≤ (2 : ℝ) := by norm_num + exact mul_nonneg (mul_nonneg h2 (le_of_lt hm)) (le_of_lt alg.hl) + exact div_nonneg num_nonneg (le_of_lt hpos_ml) + have hmono_ge : 1 - alg.a * t ≥ 1 - (2 / (m + alg.l)) * t := by + have hmul := mul_le_mul_of_nonneg_right step₂ t_nonneg + have hneg' : -(2 / (m + alg.l)) * t ≤ -(alg.a * t) := by + simpa [ge_iff_le] using (neg_le_neg hmul) + simp [sub_eq_add_neg]; grind + have hfrac_le_one : (2 / (m + alg.l)) * t ≤ 1 := by + have denom_pos : 0 < (m + alg.l) ^ 2 := by + simpa [pow_two] using mul_pos hpos_ml hpos_ml + have hrewrite : + (2 / (m + alg.l)) * t = + (4 * m * alg.l) * ((m + alg.l) ^ 2)⁻¹ := by + unfold t + simp [div_eq_mul_inv, pow_two, mul_comm, mul_left_comm, mul_assoc]; grind + have hsq : 4 * m * alg.l ≤ (m + alg.l) ^ 2 := by + have hnonneg : 0 ≤ (m - alg.l) ^ 2 := by + simpa using sq_nonneg (m - alg.l) + have hbase : 0 ≤ m ^ 2 + alg.l ^ 2 - 2 * m * alg.l := by + simp [pow_two, sub_eq_add_neg, add_assoc, + mul_comm, mul_left_comm]; grind + have : 4 * m * alg.l ≤ m ^ 2 + alg.l ^ 2 + 2 * m * alg.l := by + linarith + simp [pow_two, mul_comm, mul_left_comm]; grind + have : (4 * m * alg.l) * ((m + alg.l) ^ 2)⁻¹ ≤ 1 := by + have h := mul_le_mul_of_nonneg_right hsq (inv_nonneg.mpr (le_of_lt denom_pos)) + have denom_ne_zero : (m + alg.l) ^ 2 ≠ 0 := ne_of_gt denom_pos + simpa [mul_comm, mul_left_comm, mul_assoc, denom_ne_zero] using h + simpa [hrewrite] using this + have hRHS_nonneg : 0 ≤ 1 - (2 / (m + alg.l)) * t := sub_nonneg.mpr hfrac_le_one + have hmono_le : 1 - (2 / (m + alg.l)) * t ≤ 1 - alg.a * t := by + simpa [ge_iff_le] using hmono_ge + exact le_trans hRHS_nonneg hmono_le intro k induction' k with q IH1 · simp; rw [alg.initial] diff --git a/Optlib/Algorithm/LASSO.lean b/Optlib/Algorithm/LASSO.lean index 9e9dc64..83ea9bd 100644 --- a/Optlib/Algorithm/LASSO.lean +++ b/Optlib/Algorithm/LASSO.lean @@ -31,7 +31,7 @@ local notation "‖" x "‖₂" => @Norm.norm (EuclideanSpace ℝ (Fin m)) (PiLp local notation "‖" x "‖₁" => (Finset.sum Finset.univ (fun (i : Fin n) => ‖x i‖)) open Set Real Matrix Finset - +open scoped InnerProductSpace RealInnerProductSpace EuclideanSpace /- `u ⬝ Av = Aᵀu ⬝ v` for u v in EuclideanSpace -/ lemma dot_mul_eq_transpose_mul_dot (u : EuclideanSpace ℝ (Fin m)) (v : EuclideanSpace ℝ (Fin n)) : @@ -53,8 +53,9 @@ lemma norm2eq_dot (x : EuclideanSpace ℝ (Fin m)) : ‖x‖₂ ^ 2 = x ⬝ᵥ /- `⟪x, y⟫_ℝ = x ⬝ y` for x y in EuclideanSpace -/ -lemma real_inner_eq_dot (x y : EuclideanSpace ℝ (Fin m)) : inner x y = x ⬝ᵥ y := by - simp; rw [dotProduct] +lemma real_inner_eq_dot (x y : EuclideanSpace ℝ (Fin m)) : ⟪x, y⟫_ℝ = x ⬝ᵥ y := by + simpa [real_inner_comm, dotProduct_comm] using + (EuclideanSpace.inner_eq_star_dotProduct (x := y) (y := x)) /- gradient of a quadratic in ℝⁿ -/ @@ -80,11 +81,8 @@ lemma quadratic_gradient : ∀ x : (EuclideanSpace ℝ (Fin n)), · apply div_pos εpos; rw [sq_pos_iff]; linarith [normApos] intro y ydist; rw [inner_smul_left] - simp; rw [← dotProduct] - have aux1 : (fun x_1 ↦ ((Aᵀ * A) *ᵥ x) x_1) ⬝ᵥ (fun x_1 ↦ y x_1 - x x_1) - = (Aᵀ * A) *ᵥ x ⬝ᵥ (y - x) := by - rw [dotProduct, dotProduct]; simp - rw [aux1, ← mulVec_mulVec, ← dot_mul_eq_transpose_mul_dot _ (y - x), Matrix.mulVec_sub, + simp [real_inner_eq_dot] + rw [← mulVec_mulVec, ← dot_mul_eq_transpose_mul_dot _ (y - x), Matrix.mulVec_sub, dotProduct_sub] ring_nf have aux2 (u v : Fin m → ℝ) : u ⬝ᵥ u + (v ⬝ᵥ v - v ⬝ᵥ u * 2) = (u - v) ⬝ᵥ (u - v) := by @@ -120,11 +118,12 @@ private lemma linear_gradient : ∀ x : (EuclideanSpace ℝ (Fin n)), intro ε εpos use ε; use εpos intro y _ - rw [dot_mul_eq_transpose_mul_dot, dot_mul_eq_transpose_mul_dot, ← dotProduct_sub] - rw [EuclideanSpace.inner_eq_star_dotProduct]; simp - repeat rw [dotProduct] - simp - apply mul_nonneg; linarith [εpos]; apply norm_nonneg + rw [dot_mul_eq_transpose_mul_dot (u := b) (v := y), + dot_mul_eq_transpose_mul_dot (u := b) (v := x)] + simp only [real_inner_eq_dot] + rw [← dotProduct_sub] + rw [sub_self, norm_zero] + exact mul_nonneg (le_of_lt εpos) (norm_nonneg (x - y)) /- gradient of the square of an affine map in ℝⁿ -/ @@ -182,7 +181,7 @@ lemma norm_one_convex : ConvexOn ℝ univ (fun x : (EuclideanSpace ℝ (Fin n)) intro i _ simp calc - |a * x i + b * y i| ≤ |a * x i| + |b * y i| := by apply abs_add + |a * x i + b * y i| ≤ |a * x i| + |b * y i| := abs_add_le (a * x i) (b * y i) _ = a * |x i| + b * |y i| := by rw [abs_mul, abs_mul, abs_of_nonneg anneg, abs_of_nonneg bnneg] @@ -216,8 +215,11 @@ theorem norm_one_proximal rw [prox_iff_subderiv_smul (fun x : (EuclideanSpace ℝ (Fin n)) => ‖x‖₁) norm_one_convex tμpos] rw [← mem_SubderivAt, HasSubgradientAt] intro y - simp; rw [← sum_add_distrib]; apply sum_le_sum + simp [real_inner_eq_dot, dotProduct] -- expand dot products into explicit sums + rw [← sum_add_distrib] + apply sum_le_sum intro i _ + rw [← le_sub_iff_add_le'] let abs_subg := SubderivAt_abs (xm i) by_cases hxm : xm i = 0 · rw [hxm]; simp @@ -228,7 +230,7 @@ theorem norm_one_proximal · simp [hx] at minpoint; exact minpoint calc μ⁻¹ * t⁻¹ * x i * y i ≤ μ⁻¹ * t⁻¹ * |x i * y i| := by - rw [mul_assoc _ (x i), mul_le_mul_left] + rw [mul_assoc _ (x i), mul_le_mul_iff_right₀] apply le_abs_self; rw [← mul_inv, inv_pos]; apply mul_pos linarith [μpos]; linarith [tpos] _ ≤ |y i| * μ⁻¹ * t⁻¹ * t * μ := by @@ -244,39 +246,50 @@ theorem norm_one_proximal rcases abs_subg with ⟨_, abs_subg⟩ let sgnxm := sign (xm i) have aux : sgnxm ∈ SubderivAt abs (xm i) := by - rw [abs_subg]; simp + rw [abs_subg]; simp; rfl rw [← mem_SubderivAt, HasSubgradientAt] at aux specialize aux (y i) - have aux2 : inner sgnxm (y i - xm i) = μ⁻¹ * t⁻¹ * (x i - xm i) * (y i - xm i) := by - simp [sgnxm]; left - rw [minpoint]; simp; rw [minpoint] at hxm; simp at hxm; push_neg at hxm + have aux2 : ⟪sgnxm, (y i - xm i)⟫_ℝ = μ⁻¹ * t⁻¹ * (x i - xm i) * (y i - xm i) := by + simp [sgnxm] + rw [minpoint] at hxm; simp at hxm; push_neg at hxm rcases hxm with ⟨xiieq0, ieq⟩ have eq1 : max (|x i| - t * μ) 0 = |x i| - t * μ := by apply max_eq_left; linarith - rw [eq1]; simp; nth_rw 3 [mul_sub] - rw [← sub_add, real_sign_mul_abs]; simp - nth_rw 2 [mul_comm (sign (x i))] - rw [← mul_assoc _ (t * μ), ← mul_inv, mul_comm μ t, inv_mul_cancel₀, one_mul] - by_cases hx : 0 < x i - · have eq2 : sign (sign (x i) * (|x i| - t * μ)) = 1 := by - apply Real.sign_of_pos; apply mul_pos - calc - 0 < 1 := by simp - 1 = sign (x i) := by - symm; apply Real.sign_of_pos hx - linarith [ieq] - rw [eq2]; symm; apply Real.sign_of_pos hx - · have xneg : x i < 0 := by - contrapose! xiieq0; linarith - have eq2 : sign (sign (x i) * (|x i| - t * μ)) = -1 := by - apply Real.sign_of_neg; apply mul_neg_of_neg_of_pos + have hxmi : xm i = Real.sign (x i) * (|x i| - t * μ) := by + simp [minpoint i, eq1] + have hxabs : Real.sign (x i) * |x i| = x i := real_sign_mul_abs (x i) + have coeff : μ⁻¹ * t⁻¹ * (x i - xm i) = Real.sign (x i) := by + have hxmx : x i - xm i = Real.sign (x i) * (t * μ) := by calc - sign (x i) = -1 := by - apply Real.sign_of_neg xneg - _ < 0 := by linarith - linarith [ieq] - rw [eq2]; symm; apply Real.sign_of_neg xneg - linarith [μpos, tpos] + x i - xm i + = Real.sign (x i) * |x i| - Real.sign (x i) * (|x i| - t * μ) := by + rw [hxabs, hxmi] + _ = Real.sign (x i) * (|x i| - (|x i| - t * μ)) := by + ring + _ = Real.sign (x i) * (t * μ) := by + ring + field_simp [hxmx, tpos.ne', μpos.ne']; grind + have sgnxm_eq : Real.sign (xm i) = Real.sign (x i) := by + by_cases hx : 0 < x i + · have eq2 : Real.sign (Real.sign (x i) * (|x i| - t * μ)) = 1 := by + apply Real.sign_of_pos + have pos : 0 < |x i| - t * μ := by linarith [ieq] + have sgnpos : 0 < Real.sign (x i) := by + simp [Real.sign_of_pos hx] + exact mul_pos sgnpos pos + have : Real.sign (xm i) = 1 := by simpa [hxmi] using eq2 + simp [Real.sign_of_pos hx, this] + · have xneg : x i < 0 := by + contrapose! xiieq0; linarith + have eq2 : Real.sign (Real.sign (x i) * (|x i| - t * μ)) = -1 := by + apply Real.sign_of_neg + have pos : 0 < |x i| - t * μ := by linarith [ieq] + have sgnneg : Real.sign (x i) < 0 := by + simp [Real.sign_of_neg xneg] + exact mul_neg_of_neg_of_pos sgnneg pos + have : Real.sign (xm i) = -1 := by simpa [hxmi] using eq2 + simp [Real.sign_of_neg xneg, this] + simp [sgnxm_eq, coeff]; grind rw [aux2] at aux; linarith [aux] push_neg; intro hxm'; contrapose! hxm'; exact hxm @@ -298,7 +311,7 @@ open Set Real Matrix Finset NNReal structure LASSO (A : Matrix (Fin m) (Fin n) ℝ) (b : (Fin m) → ℝ) (μ : ℝ) (μpos : 0 < μ) (Ane0 : A ≠ 0) - (x₀ : (EuclideanSpace ℝ (Fin n))) := + (x₀ : (EuclideanSpace ℝ (Fin n))) where (f h : (EuclideanSpace ℝ (Fin n)) → ℝ) (f' : (EuclideanSpace ℝ (Fin n)) → (EuclideanSpace ℝ (Fin n))) (L : ℝ≥0) (t : ℝ) (xm : (EuclideanSpace ℝ (Fin n))) (x y : ℕ → (EuclideanSpace ℝ (Fin n))) @@ -364,10 +377,10 @@ instance {A : Matrix (Fin m) (Fin n) ℝ} {b : (Fin m) → ℝ} {μ : ℝ} {μpo calc |μ| * |Finset.sum Finset.univ fun i ↦ (|y i| - |x i|)| ≤ |μ| * Finset.sum Finset.univ fun i ↦ |(|y i| - |x i|)| := by - rw [mul_le_mul_left]; apply Finset.abs_sum_le_sum_abs + rw [mul_le_mul_iff_right₀]; apply Finset.abs_sum_le_sum_abs simp; linarith [μpos] _ ≤ |μ| * (n * (ε / n / μ)) := by - rw [mul_le_mul_left] + rw [mul_le_mul_iff_right₀] calc (Finset.sum Finset.univ fun i ↦ |(|y i| - |x i|)|) ≤ (Finset.sum Finset.univ (fun _ ↦ (ε / n / μ))) := by @@ -376,8 +389,8 @@ instance {A : Matrix (Fin m) (Fin n) ℝ} {b : (Fin m) → ℝ} {μ : ℝ} {μpo _ = (n * (ε / n / μ)) := by simp simp; linarith [μpos] _ = ε := by - field_simp; rw [mul_comm, ← mul_assoc, mul_comm ε] - simp; left; linarith + field_simp + simp [abs_of_pos μpos] minphi : IsMinOn (p.f + p.h) Set.univ p.xm := p.minphi tpos : 0 < p.t := by rw [p.teq]; simp diff --git a/Optlib/Algorithm/Nesterov/NesterovAccelerationFirst.lean b/Optlib/Algorithm/Nesterov/NesterovAccelerationFirst.lean index 794055e..8ccf6b7 100644 --- a/Optlib/Algorithm/Nesterov/NesterovAccelerationFirst.lean +++ b/Optlib/Algorithm/Nesterov/NesterovAccelerationFirst.lean @@ -25,7 +25,7 @@ variable {f h : E → ℝ} {f' : E → E} {x0 : E} open Set Real -class Nesterov_first (f h: E → ℝ) (f' : E → E) (x0 : E) := +class Nesterov_first (f h: E → ℝ) (f' : E → E) (x0 : E) where (l : NNReal) (x y : ℕ → E) (t γ : ℕ → ℝ) (hl : l > (0 : ℝ)) (h₁ : ∀ x : E, HasGradientAt f (f' x) x) (convf : ConvexOn ℝ univ f) (h₂ : LipschitzWith l f') (convh : ConvexOn ℝ univ h) @@ -66,7 +66,7 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : linarith [(alg.tbound k).1] _ ≤ (1 / alg.t k) * (alg.t k * h z - ⟪alg.y k - alg.x (k + 1) - (alg.t k) • (f' (alg.y k)), z - alg.x (k + 1)⟫) := by - rw [mul_le_mul_left]; apply add_le_add_right; exact hieq1 z k + rw [mul_le_mul_iff_right₀]; apply add_le_add_right; exact hieq1 z k simp; linarith [(alg.tbound k).1] _ = h z + ⟪(f' (alg.y k)) + (1 / alg.t k) • (alg.x (k + 1) - alg.y k), z - alg.x (k + 1)⟫ := by @@ -83,7 +83,7 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : apply lipschitz_continuos_upper_bound' alg.h₁ alg.h₂ y x _ ≤ f y + ⟪f' y, x - y⟫ + 1 / (2 * alg.t k) * ‖x - y‖ ^ 2 := by apply add_le_add_left; apply mul_le_mul_of_nonneg_right - rw [← mul_one_div, ← one_div_mul_one_div, mul_comm, mul_le_mul_left] + rw [← mul_one_div, ← one_div_mul_one_div, mul_comm, mul_le_mul_iff_right₀] rw [le_one_div]; exact (alg.tbound k).2; exact alg.hl; exact (alg.tbound k).1 simp; apply sq_nonneg let φ := fun z : E ↦ f z + h z @@ -137,7 +137,7 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : (1 - alg.γ k) * ((1 / alg.t k) • ⟪alg.x (k + 1) - alg.y k, alg.x k - alg.x (k + 1)⟫ + 1 / (2 * alg.t k) * ‖alg.x (k + 1) - alg.y k‖ ^ 2) := by apply add_le_add - · rw [mul_le_mul_left]; exact ieq2; linarith [(alg.γbound k).1] + · rw [mul_le_mul_iff_right₀]; exact ieq2; linarith [(alg.γbound k).1] · apply mul_le_mul_of_nonneg_left; exact ieq1; linarith [(alg.γbound k).2] _ = (alg.γ k) * (1 / alg.t k) * ⟪alg.x (k + 1) - alg.y k, xm - alg.x (k + 1)⟫ + (1 - alg.γ k) * (1 / alg.t k) * ⟪alg.x (k + 1) - alg.y k, alg.x k - alg.x (k + 1)⟫ + @@ -155,7 +155,7 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : let v := fun k : ℕ+ ↦ alg.x (k - 1) + (1 / (alg.γ (k - 1))) • (alg.x k - alg.x (k - 1)) have eq : ∀ k : ℕ+, alg.y k = (1 - alg.γ k) • alg.x k + (alg.γ k) • (v k) := by intro k - simp [φ, v]; rw [alg.update1 k, sub_smul, sub_add_eq_add_sub, ← smul_add, ← add_sub, one_smul] + simp [v]; rw [alg.update1 k, sub_smul, sub_add_eq_add_sub, ← smul_add, ← add_sub, one_smul] rw [add_left_cancel_iff, ← smul_sub, mul_div_assoc, ← smul_eq_mul, smul_assoc] have h2 : ((1 - alg.γ (k - 1)) / alg.γ (k - 1)) • (alg.x k - alg.x (k - 1)) = alg.x (k - 1) + (alg.γ (k - 1))⁻¹ • (alg.x k - alg.x (k - 1)) - alg.x k := by @@ -212,11 +212,16 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : intro n let cond := alg.cond n simp [α] - rw [mul_div_assoc, mul_div_assoc, ← mul_assoc, mul_comm _ 2, mul_assoc, mul_le_mul_left] + rw [mul_div_assoc, mul_div_assoc, ← mul_assoc, mul_comm _ 2, mul_assoc, mul_le_mul_iff_right₀] rw [← mul_div_assoc]; exact cond; simp have h10 (n : ℕ) : α n * (alg.γ n ^ (2 : ℕ) / (2 * alg.t n)) = 1 := by - field_simp [α]; rw [mul_comm, div_self]; apply mul_ne_zero - simp; linarith [(alg.γbound n).1]; linarith [alg.tbound n] + have hγnz : alg.γ n ^ 2 ≠ 0 := by + have hγpos : 0 < alg.γ n := (alg.γbound n).1 + exact ne_of_gt (pow_pos hγpos 2) + have htnz : 2 * alg.t n ≠ 0 := by + have htpos : 0 < alg.t n := (alg.tbound n).1 + exact ne_of_gt (by linarith [htpos]) + simp [α, pow_two, mul_comm, mul_left_comm, mul_div_mul_comm]; aesop have decrease (n : ℕ+) : (α n) * (φ (alg.x (n + 1)) - φ xm) + ‖v (n + 1) - xm‖ ^ 2 ≤ (α (n - 1)) * (φ (alg.x n) - φ xm) + ‖v n - xm‖ ^ 2 := by calc @@ -228,7 +233,7 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : * (‖v n - xm‖ ^ 2 - ‖v (n + 1) - xm‖ ^ 2)) + (α n) * ((1 - alg.γ n) * (φ (alg.x n) - φ xm)) + ‖v (n + 1) - xm‖ ^ 2 := by rw [mul_add]; repeat apply add_le_add_right - rw [mul_le_mul_left]; exact φieq4 n; exact αpos n + rw [mul_le_mul_iff_right₀]; exact φieq4 n; exact αpos n _ = ‖v n - xm‖ ^ 2 - ‖v (n + 1) - xm‖ ^ 2 + (α n) * ((1 - alg.γ n) * (φ (alg.x n) - φ xm)) + ‖v (n + 1) - xm‖ ^ 2 := by rw [← mul_assoc, h10, one_mul] @@ -258,22 +263,22 @@ theorem Nesterov_first_converge (minφ : IsMinOn (f + h) univ xm) : calc f (alg.x (k + 1)) + h (alg.x (k + 1)) - f xm - h xm = alg.γ k ^ 2 / (2 * alg.t k) * ((α k) * (φ (alg.x (↑k + 1))- φ xm)) := by - rw [sub_sub, ← mul_assoc, mul_comm _ (α k), h10 k]; simp + rw [sub_sub, ← mul_assoc, mul_comm _ (α k), h10 k]; simp; grind _ ≤ alg.γ k ^ 2 / (2 * alg.t k) * nr k := by - rw [mul_le_mul_left]; simp [nr]; apply div_pos + rw [mul_le_mul_iff_right₀]; simp [nr]; apply div_pos rw [sq_pos_iff]; linarith [(alg.γbound k).1]; linarith [alg.tbound k] _ ≤ alg.γ k ^ 2 / (2 * alg.t k) * nr 0 := by - rw [mul_le_mul_left]; exact bound; apply div_pos + rw [mul_le_mul_iff_right₀]; exact bound; apply div_pos rw [sq_pos_iff]; linarith [(alg.γbound k).1]; linarith [alg.tbound k] _ ≤ alg.γ k ^ 2 / (2 * alg.t k) * ‖x0 - xm‖ ^ 2 := by - rw [mul_le_mul_left]; simp [nr, v, α]; rw [alg.oriγ]; simp + rw [mul_le_mul_iff_right₀]; simp [nr, v, α]; rw [alg.oriγ]; simp specialize φieq3 0; rw [alg.oriγ] at φieq3; simp at φieq3 calc 2 * alg.t 0 * (φ (alg.x 1) - φ xm) + ‖alg.x 1 - xm‖ ^ 2 ≤ 2 * alg.t 0 * ((alg.t 0)⁻¹ * ⟪alg.x 1 - alg.y 0, xm - alg.x 1⟫ + (alg.t 0)⁻¹ * 2⁻¹ * ‖alg.x 1 - alg.y 0‖ ^ 2 + φ xm - φ xm) + ‖alg.x 1 - xm‖ ^ 2 := by - apply add_le_add_right; rw [mul_le_mul_left]; simp; linarith [φieq3] + apply add_le_add_right; rw [mul_le_mul_iff_right₀]; simp; linarith [φieq3] linarith [alg.tbound 0] _ = ‖alg.x 0 - xm‖ ^ 2 := by rw [← add_sub, sub_self, add_zero, mul_add, ← mul_assoc]; ring_nf @@ -292,7 +297,7 @@ variable {f h : E → ℝ} {f' : E → E} {x0 : E} open Set Real PNat -class Nesterov_first_fix_stepsize (f h: E → ℝ) (f' : E → E) (x0 : E) := +class Nesterov_first_fix_stepsize (f h: E → ℝ) (f' : E → E) (x0 : E) where (l : NNReal) (hl : l > (0 : ℝ)) (h₁ : ∀ x : E, HasGradientAt f (f' x) x) (convf: ConvexOn ℝ univ f) (h₂ : LipschitzWith l f') (convh : ConvexOn ℝ univ h) @@ -314,8 +319,8 @@ instance {f h: E → ℝ} {f' : E → E} {x0 : E} [p : Nesterov_first_fix_stepsi initial := p.initial cond := by intro n; simp [p.teq n, p.teq (n - 1), p.γeq n, p.γeq (n - 1)]; field_simp - rw [mul_assoc, ← div_div, div_le_div_right, pow_two, ← mul_assoc, mul_div_assoc] - rw [div_self, add_sub]; ring_nf; simp; linarith; linarith [p.hl] + ring_nf + norm_num tbound := by intro k; rw [p.teq k]; simp; exact p.hl hl := p.hl @@ -351,8 +356,6 @@ theorem Nesterov_first_fix_stepsize_converge (minφ : IsMinOn (f + h) univ xm): rw [h1, h2]; apply Nesterov_first_converge minφ _ ≤ 2 * alg.l / (k + 2) ^ 2 * ‖x0 - xm‖ ^ 2 := by apply mul_le_mul_of_nonneg_right; rw [alg.γeq k, alg.teq k]; field_simp - rw [pow_two, add_comm]; rw [mul_comm ((k + 2 : ℝ) ^ 2), ← div_div, div_le_div_right] - rw [mul_rotate, ← mul_div, div_self, mul_one] - simp; field_simp; apply sq_nonneg + rw [add_comm (2 : ℝ) (↑k)]; apply sq_nonneg end Nesterov_first_fix_stepsize diff --git a/Optlib/Algorithm/Nesterov/NesterovAccelerationSecond.lean b/Optlib/Algorithm/Nesterov/NesterovAccelerationSecond.lean index 052e315..99517dd 100644 --- a/Optlib/Algorithm/Nesterov/NesterovAccelerationSecond.lean +++ b/Optlib/Algorithm/Nesterov/NesterovAccelerationSecond.lean @@ -4,6 +4,7 @@ Released under Apache 2.0 license as described in the file LICENSE. Authors: Yuxuan Wu, Chenyi Li -/ import Optlib.Function.Proximal +import Mathlib.Tactic /-! # NesterovAccelerationSecond @@ -26,7 +27,7 @@ variable {f h : E → ℝ} {f' : E → E} open Set Real -class Nesterov_second (f h : E → ℝ) (f' : E → E) (x0 : E) := +class Nesterov_second (f h : E → ℝ) (f' : E → E) (x0 : E) where (l : NNReal) (hl : l > (0 : ℝ)) (x y : ℕ → E) (z : ℕ+ → E) (t γ : ℕ → ℝ) (h₁ : ∀ x : E, HasGradientAt f (f' x) x) (convf: ConvexOn ℝ Set.univ f) (h₂ : LipschitzWith l f') (convh : ConvexOn ℝ univ h) @@ -45,7 +46,7 @@ theorem Nesterov_second_convergence (minφ : IsMinOn (f + h) Set.univ xm): ∀ (k : ℕ), f (alg.x (k + 1)) + h (alg.x (k + 1)) - f xm - h xm ≤ (alg.γ (k + 1)) ^ 2 / (2 * alg.t (k + 1)) * ‖x0 - xm‖ ^ 2 := by let φ := fun z : E ↦ f z + h z - have φdef : ∀ z : E, φ z = f z + h z := by simp + have φdef : ∀ z : E, φ z = f z + h z := by aesop have h1 : ∀ k : ℕ+, alg.γ k • (alg.y (k - 1) - alg.y k) - alg.t k • (f' (alg.z k)) ∈ (SubderivAt (alg.t k • h) (alg.y k)) := by intro k; obtain h1 := alg.update2 k @@ -103,7 +104,7 @@ theorem Nesterov_second_convergence (minφ : IsMinOn (f + h) Set.univ xm): ⟪alg.γ k • (alg.y (k - 1) - alg.y k) - alg.t k • (f' (alg.z k)), w - (alg.y k)⟫ := by intro w k rw [← mul_div_right_comm, ← mul_div, ← mul_sub] - apply (mul_le_mul_right (bsc1 k)).mp + apply (mul_le_mul_iff_left₀ (bsc1 k)).mp rw [mul_comm, ← mul_assoc, div_mul, div_self, div_one] rw [mul_assoc] nth_rw 3 [mul_comm] @@ -138,7 +139,7 @@ theorem Nesterov_second_convergence (minφ : IsMinOn (f + h) Set.univ xm): simp symm apply nm0 - apply (mul_le_mul_right ax).mpr + apply (mul_le_mul_iff_left₀ ax).mpr have lc2 : alg.l / 2 > (0 : ℝ) := by linarith [alg.hl] have tc2 : (2 * alg.t ↑k) > 0 := by linarith [(alg.tbound k).1] rw [one_div] @@ -193,7 +194,7 @@ theorem Nesterov_second_convergence (minφ : IsMinOn (f + h) Set.univ xm): . contrapose eq1 push_neg at * linarith [eq1] - apply (mul_le_mul_left pos).mpr + apply (mul_le_mul_iff_right₀ pos).mpr apply Convex_first_order_condition' (alg.h₁ (alg.z k)) alg.convf simp simp @@ -227,7 +228,7 @@ theorem Nesterov_second_convergence (minφ : IsMinOn (f + h) Set.univ xm): + alg.γ ↑k ^ 2 / (2 * alg.t ↑k) * ‖alg.y ↑k - alg.y (↑k - 1)‖ ^ 2 := by simp have gpos : alg.γ k > 0 := by exact (alg.γbound k).1 - apply (mul_le_mul_left gpos).mpr + apply (mul_le_mul_iff_right₀ gpos).mpr apply Convex_first_order_condition' (alg.h₁ (alg.z k)) alg.convf simp simp @@ -367,7 +368,7 @@ variable {f h : E → ℝ} {f' : E → E} {x0 : E} open Set Real PNat -class Nesterov_second_fix_stepsize (f h: E → ℝ) (f' : E → E) (x0 : E) := +class Nesterov_second_fix_stepsize (f h: E → ℝ) (f' : E → E) (x0 : E) where (l : NNReal) (hl : l > (0 : ℝ)) (x y : ℕ → E) (z : ℕ+ → E) (t γ : ℕ → ℝ) (h₁ : ∀ x : E, HasGradientAt f (f' x) x) (convf: ConvexOn ℝ Set.univ f) (h₂ : LipschitzWith l f') (convh : ConvexOn ℝ univ h) @@ -385,20 +386,24 @@ instance {f h : E → ℝ} {f' : E → E} {x0 : E} [p : Nesterov_second_fix_step convf := p.convf h₂ := p.h₂ convh := p.convh - x := p.x; y := p.y; t := p.t; γ := p.γ; + x := p.x; y := p.y; z := p.z; t := p.t; γ := p.γ; oriy := p.oriy oriγ := by simp [p.γeq 1]; norm_num initial := p.initial cond := by - intro n; rw [p.teq n, p.teq (n + 1), p.γeq n, p.γeq (n + 1)]; field_simp [p.hl] - rw [← div_div, ← div_div, ← div_div] - repeat apply div_le_div_of_nonneg_right _ (by positivity) - rw [pow_two, ← mul_assoc, mul_div_assoc, div_self, mul_one] - · calc - _ = n ^ 2 + (2 : ℝ) * n := by ring_nf - _ ≤ 1 + (2 : ℝ) * n + n ^ 2 := by linarith - _ = (1 + n) ^ 2 := by rw [add_pow_two]; simp - · linarith + intro n + have hn0 : (↑n : ℕ) ≠ 0 := by + have : 1 ≤ (↑n : ℕ) := by simpa using n.2 + exact Nat.pos_iff_ne_zero.mp ((Nat.succ_le_iff).1 (by simpa using this)) + have h1 : 0 < (1 + (↑n : ℕ) : ℝ) := by positivity + have h2 : 0 < (1 + (↑n : ℕ) + 1 : ℝ) := by positivity + simp [p.γeq, p.teq, hn0, pow_two] + have hn1 : (↑n + 1 : ℕ) ≠ 0 := by simp + simp only [ge_iff_le] + field_simp [h1, h2] + ring_nf + simp_all only [ne_eq, ne_zero, not_false_eq_true, Nat.add_eq_zero, one_ne_zero, and_self, one_div, + le_add_iff_nonneg_right, inv_pos, Nat.ofNat_pos, mul_nonneg_iff_of_pos_right, inv_nonneg, NNReal.zero_le_coe] tbound := by intro k; rw [p.teq k]; simp; exact p.hl hl := p.hl @@ -409,7 +414,7 @@ instance {f h : E → ℝ} {f' : E → E} {x0 : E} [p : Nesterov_second_fix_step simp [hk]; positivity · by_cases hk : k = 0 rw [hk]; simp; norm_num; push_neg at hk - simp [hk]; rw [div_le_iff₀ (by positivity)]; simp [hk] + simp [hk]; rw [div_le_iff₀ (by positivity)]; simp have : (k : ℝ) ≥ 1 := by rw [← Nat.pos_iff_ne_zero, Nat.lt_iff_add_one_le, zero_add] at hk; simp [hk] linarith @@ -443,7 +448,12 @@ theorem Nesterov_second_fix_stepsize_converge (minφ : IsMinOn (f + h) Set.univ _ ≤ 2 * alg.l / (k + 2) ^ 2 * ‖x0 - xm‖ ^ 2 := by apply mul_le_mul_of_nonneg_right _ (sq_nonneg _) rw [alg.γeq (k + 1), alg.teq (k + 1)]; field_simp - rw [pow_two, add_comm]; rw [add_assoc, one_add_one_eq_two, ← div_div] - apply le_of_eq; ring_nf + simp only [pow_two] + have h_nonzero : 1 + k ≠ 0 := by simp + simp only [Nat.add_eq_zero, one_ne_zero, and_false, ↓reduceIte, Nat.cast_add, Nat.cast_one, + ge_iff_le] + field_simp + ring_nf + simp_all only [ne_eq, Nat.add_eq_zero, one_ne_zero, false_and, not_false_eq_true, le_refl] end Nesterov_second_fix_stepsize diff --git a/Optlib/Algorithm/Nesterov/NesterovSmooth.lean b/Optlib/Algorithm/Nesterov/NesterovSmooth.lean index 5b3fda2..01fd1c3 100644 --- a/Optlib/Algorithm/Nesterov/NesterovSmooth.lean +++ b/Optlib/Algorithm/Nesterov/NesterovSmooth.lean @@ -4,6 +4,7 @@ Released under Apache 2.0 license as described in the file LICENSE. Authors: Chenyi Li, Ziyu Wang, Zaiwen Wen -/ import Optlib.Function.Lsmooth +import Mathlib.Tactic /-! # NesterovSmooth @@ -20,8 +21,9 @@ variable {E : Type*} [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteS section open Set +open scoped Set InnerProductSpace RealInnerProductSpace -class Nesterov (f : E → ℝ) (f' : E → E) (γ : ℕ+ → ℝ) (initial_point : E) := +class Nesterov (f : E → ℝ) (f' : E → E) (γ : ℕ+ → ℝ) (initial_point : E) where (x : ℕ → E) (y : ℕ+ → E) (v : ℕ → E) (l : NNReal) (diff : ∀ x₁, HasGradientAt f (f' x₁) x₁) (update1 : ∀ (k : ℕ+), y k = (1 - γ k) • x (k - 1) + γ k • v (k - 1)) @@ -36,37 +38,41 @@ lemma one_iter (hfun : ConvexOn ℝ Set.univ f) (hg : ∀ (k : ℕ+), γ k = 2 / ∀ (k : ℕ+), f (alg.x k) - f xm - (1 - γ k) * (f (alg.x (k - 1)) - f xm) ≤ alg.l * (γ k) ^ 2 / 2 * (‖alg.v (k - 1) - xm‖ ^ 2 - ‖alg.v k - xm‖ ^ 2) := by have h2 : ∀ (k : ℕ+), ∀ x' : E , f (alg.x k) - f x' ≤ alg.l * - inner (alg.x k - alg.y k) (x' - alg.x k) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by + (⟪alg.x k - alg.y k, x' - alg.x k⟫_ℝ) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by intro k x' rw [sub_le_iff_le_add', ← add_assoc] have : (f' (alg.y k)) = alg.l.1 • (alg.y k - alg.x k) := by - have update2 : ∀ (k : ℕ+), alg.x k = alg.y k - (1 / alg.l.1) • (f' (alg.y k)) := alg.update2 - specialize update2 k - have : alg.l > 0 := alg.hl - rw [eq_sub_iff_add_eq', ← eq_sub_iff_add_eq] at update2 - rw [← update2, smul_smul] - field_simp - have t1 : f (alg.y k) + inner (f' (alg.y k)) (x' - alg.y k) ≤ f x' := by + have update2 := alg.update2 k + have h1 := (eq_sub_iff_add_eq').1 update2 + have h2 : alg.y k - alg.x k = (1 / alg.l.1) • f' (alg.y k) := by + exact sub_eq_iff_eq_add.mpr (id (Eq.symm h1)) + have hlne : alg.l.1 ≠ 0 := by exact ne_of_gt alg.hl + have hsmul := congrArg (fun t => alg.l.1 • t) h2 + have : alg.l.1 • (alg.y k - alg.x k) = f' (alg.y k) := by + simp_all only [NNReal.val_eq_coe, one_div, add_sub_cancel, sub_sub_cancel, ne_eq, NNReal.coe_eq_zero, + not_false_eq_true, smul_inv_smul₀] + exact this.symm + have t1 : f (alg.y k) + ⟪f' (alg.y k), x' - alg.y k⟫_ℝ ≤ f x' := by exact Convex_first_order_condition' (alg.diff (alg.y k)) hfun (by trivial) x' (by trivial) calc - _ ≤ f (alg.y k) + inner (f' (alg.y k)) (alg.x k - alg.y k) + + _ ≤ f (alg.y k) + ⟪f' (alg.y k), alg.x k - alg.y k⟫_ℝ + alg.l.1 / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by exact lipschitz_continuos_upper_bound' alg.diff alg.smooth (alg.y k) (alg.x k) - _ = f (alg.y k) + inner (f' (alg.y k)) (x' - alg.y k + (alg.x k - x')) + + _ = f (alg.y k) + ⟪f' (alg.y k), x' - alg.y k + (alg.x k - x')⟫_ℝ + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [add_comm (x' - alg.y k), add_sub (alg.x k - x'), sub_add, sub_self, sub_zero]; simp - _ = f (alg.y k) + inner (f' (alg.y k)) (x' - alg.y k) + inner (f' (alg.y k)) (alg.x k - x') + _ = f (alg.y k) + ⟪f' (alg.y k), x' - alg.y k⟫_ℝ + ⟪f' (alg.y k), alg.x k - x'⟫_ℝ + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [inner_add_right, ← add_assoc] - _ ≤ f x' + inner (f' (alg.y k)) (alg.x k - x') + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by + _ ≤ f x' + ⟪f' (alg.y k), alg.x k - x'⟫_ℝ + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [add_le_add_iff_right, add_le_add_iff_right]; exact t1 - _ = f x' + inner (alg.l.1 • (alg.y k - alg.x k)) (alg.x k - x') + + _ = f x' + ⟪alg.l.1 • (alg.y k - alg.x k), alg.x k - x'⟫_ℝ + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [this] - _ = f x' + alg.l * inner (alg.x k - alg.y k) (x' - alg.x k) + + _ = f x' + alg.l * (⟪alg.x k - alg.y k, x' - alg.x k⟫_ℝ) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [real_inner_smul_left, ← inner_neg_neg, neg_sub, neg_sub]; simp have h3 : ∀ (k : ℕ+), f (alg.x k) - f xm - (1 - γ k) * (f (alg.x (k - 1)) - f xm) ≤ - alg.l * (inner (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm) - - alg.x k)) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by + alg.l * (⟪alg.x k - alg.y k, (1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm) - + alg.x k⟫_ℝ) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by intro k have : f (alg.x k) - f xm - (1 - γ k) * (f (alg.x (k - 1)) - f xm) = γ k * (f (alg.x k) - f xm) + (1 - γ k) * (f (alg.x k) - f (alg.x (k - 1))) := by ring_nf @@ -84,24 +90,24 @@ lemma one_iter (hfun : ConvexOn ℝ Set.univ f) (hg : ∀ (k : ℕ+), γ k = 2 / rw [smul_sub, smul_sub, add_sub, ← add_sub_right_comm, sub_sub, ← add_smul] ring_nf; rw [one_smul, add_comm] calc - _ ≤ γ k * (alg.l * (inner (alg.x k - alg.y k) (xm - alg.x k)) + alg.l / 2 * - ‖alg.x k - alg.y k‖ ^ 2) + (1 - γ k) * (alg.l * (inner (alg.x k - alg.y k) - (alg.x (k - 1) - alg.x k)) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2) := by + _ ≤ γ k * (alg.l * (⟪alg.x k - alg.y k, xm - alg.x k⟫_ℝ) + alg.l / 2 * + ‖alg.x k - alg.y k‖ ^ 2) + (1 - γ k) * (alg.l * (⟪alg.x k - alg.y k, + alg.x (k - 1) - alg.x k⟫_ℝ) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2) := by apply add_le_add · exact mul_le_mul_of_nonneg_left (h2 k xm) hz · exact mul_le_mul_of_nonneg_left (h2 k (alg.x (k - 1))) (by linarith) - _ = alg.l * (γ k * (inner (alg.x k - alg.y k) (xm - alg.x k))) + alg.l * ((1 - γ k) * - (inner (alg.x k - alg.y k) (alg.x (k - 1) - alg.x k))) + + _ = alg.l * (γ k * (⟪alg.x k - alg.y k, xm - alg.x k⟫_ℝ)) + alg.l * ((1 - γ k) * + (⟪alg.x k - alg.y k, alg.x (k - 1) - alg.x k⟫_ℝ)) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by ring_nf - _ = alg.l * inner (alg.x k - alg.y k) (γ k • (xm - alg.x k)) + alg.l * - (inner (alg.x k - alg.y k) ((1 - γ k) • - (alg.x (k - 1) - alg.x k))) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by + _ = alg.l * (⟪alg.x k - alg.y k, γ k • (xm - alg.x k)⟫_ℝ) + alg.l * + (⟪alg.x k - alg.y k, (1 - γ k) • + (alg.x (k - 1) - alg.x k)⟫_ℝ) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [← inner_smul_right _ _ (γ k), ← inner_smul_right _ _ (1 - γ k)] - _ = alg.l * inner (alg.x k - alg.y k) (γ k • (xm - alg.x k) + (1 - γ k) • - (alg.x (k - 1) - alg.x k)) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by + _ = alg.l * (⟪alg.x k - alg.y k, γ k • (xm - alg.x k) + (1 - γ k) • + (alg.x (k - 1) - alg.x k)⟫_ℝ) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [← mul_add, ← inner_add_right (alg.x k - alg.y k)] - _ = alg.l * inner (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + - ((γ k) • xm)- alg.x k) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [this] + _ = alg.l * (⟪alg.x k - alg.y k, (1 - γ k) • (alg.x (k - 1)) + + ((γ k) • xm)- alg.x k⟫_ℝ) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [this] intro k have hz : γ k ≥ (0 : ℝ) := by rw [hg k] @@ -134,33 +140,34 @@ lemma one_iter (hfun : ConvexOn ℝ Set.univ f) (hg : ∀ (k : ℕ+), γ k = 2 / left; rw [mul_inv_cancel₀ (by linarith), one_smul, sub_smul, one_smul, add_comm, sub_add] have this2 : alg.l / 2 * (‖alg.y k - (1 - γ k) • (alg.x (k - 1)) - γ k • xm‖ ^ 2 - ‖alg.x k - (1 - γ k) • alg.x (k - 1) - γ k • xm‖ ^ 2) = alg.l * - (inner (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm)- alg.x k)) + (⟪alg.x k - alg.y k, (1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm)- alg.x k⟫_ℝ) + alg.l / 2 * ‖alg.x k - alg.y k‖ ^ 2 := by rw [sub_sub, sub_sub, norm_sub_sq_real, norm_sub_sq_real, norm_sub_sq_real] calc - _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l / 2 * 2 * (inner (alg.x k) - ((1 - γ k) • alg.x (↑k - 1) + γ k • xm) - inner (alg.y k) - ((1 - γ k) • alg.x (↑k - 1) + γ k • xm)) := by ring_nf - _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l * inner (alg.x k - alg.y k) - ((1 - γ k) • alg.x (↑k - 1) + γ k • xm) := by rw [← inner_sub_left]; ring_nf - _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l * inner (alg.x k - alg.y k) - ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm) - alg.x k + alg.x k) := by + _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l / 2 * 2 * (⟪alg.x k, + (1 - γ k) • alg.x (↑k - 1) + γ k • xm⟫_ℝ - ⟪alg.y k, + (1 - γ k) • alg.x (↑k - 1) + γ k • xm⟫_ℝ) := by ring_nf + _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l * (⟪alg.x k - alg.y k, + (1 - γ k) • alg.x (↑k - 1) + γ k • xm⟫_ℝ) := by rw [← inner_sub_left]; ring_nf + _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l * (⟪alg.x k - alg.y k, + (1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm) - alg.x k + alg.x k⟫_ℝ) := by rw [sub_add, sub_self, sub_zero] - _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l * inner (alg.x k - alg.y k) - (alg.x k) + alg.l * (inner (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) - + ((γ k) • xm) - alg.x k)) := by + _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l * (⟪alg.x k - alg.y k, + alg.x k⟫_ℝ) + alg.l * (⟪alg.x k - alg.y k, (1 - γ k) • (alg.x (k - 1)) + + ((γ k) • xm) - alg.x k⟫_ℝ) := by rw [inner_add_right, mul_add]; ring_nf _ = alg.l / 2 * (‖alg.y k‖ ^ 2 - ‖alg.x k‖ ^ 2) + alg.l * ‖alg.x k‖ ^ 2 - - alg.l * inner (alg.x k) (alg.y k) + alg.l * (inner (alg.x k - alg.y k) ((1 - γ k) - • (alg.x (k - 1)) + ((γ k) • xm) - alg.x k)) := by + alg.l * (⟪alg.x k, alg.y k⟫_ℝ) + alg.l * (⟪alg.x k - alg.y k, (1 - γ k) + • (alg.x (k - 1)) + ((γ k) • xm) - alg.x k⟫_ℝ) := by rw [inner_sub_left, mul_sub, mul_sub, real_inner_self_eq_norm_sq] rw [real_inner_comm, add_sub]; - _ = alg.l * (inner (alg.x k - alg.y k) ((1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm) - - alg.x k)) + alg.l / 2 * (‖alg.x k‖ ^ 2 - 2 * - inner (alg.x k) (alg.y k) + ‖alg.y k‖ ^ 2) := by ring_nf + _ = alg.l * (⟪alg.x k - alg.y k, (1 - γ k) • (alg.x (k - 1)) + ((γ k) • xm) + - alg.x k⟫_ℝ) + alg.l / 2 * (‖alg.x k‖ ^ 2 - 2 * + ⟪alg.x k, alg.y k⟫_ℝ + ‖alg.y k‖ ^ 2) := by ring_nf rw [this1, this2] exact h3 k +set_option maxHeartbeats 0 in theorem nesterov_algorithm_smooth (hfun: ConvexOn ℝ Set.univ f) (hg : ∀ (k : ℕ+), γ k = 2 / (k + 1)) (min : IsMinOn f Set.univ xm) (con : ∀ k : ℕ+ , (1 - γ k) / (γ k) ^ 2 ≤ 1 / (γ (k - 1)) ^ 2): @@ -176,18 +183,24 @@ theorem nesterov_algorithm_smooth (hfun: ConvexOn ℝ Set.univ f) specialize con k have : (γ k) ^ 2 > 0 := by rw [hg k] - simp only [Real.rpow_two, div_pow, gt_iff_lt] + simp only [div_pow, gt_iff_lt] apply div_pos (by linarith) apply sq_pos_of_ne_zero exact Nat.cast_add_one_ne_zero ↑k - rw [← div_le_div_right this, sub_div, mul_div_right_comm (1 - γ k)] at h4 + have hpos : 0 < (γ k) ^ 2 := by + rw [hg k] + simp only [div_pow] + apply div_pos (by linarith) + apply sq_pos_of_ne_zero + exact Nat.cast_add_one_ne_zero ↑k + rw [← div_le_div_iff_of_pos_right this, sub_div, mul_div_right_comm (1 - γ k)] at h4 rw [← one_mul (f (alg.x k) - f xm), mul_div_right_comm 1] at h4 rw [mul_div_right_comm (alg.l).1, mul_assoc, mul_comm (γ k ^ 2)] at h4 rw [← mul_assoc, mul_div_assoc] at h4 rw [div_self (by linarith), mul_one, mul_sub (alg.l.1 / 2)] at h4 rw [tsub_le_iff_left, add_sub, le_sub_iff_add_le] at h4 apply le_trans h4 - simp only [Real.rpow_two, ge_iff_le, add_le_add_iff_right, gt_iff_lt, sub_pos, sub_neg] + simp only [add_le_add_iff_right] have : f xm ≤ f (alg.x (k - 1)):= min (by trivial) apply mul_le_mul_of_nonneg_right _ (by linarith) exact con @@ -210,7 +223,7 @@ theorem nesterov_algorithm_smooth (hfun: ConvexOn ℝ Set.univ f) rw [alg.initial1, sub_self, zero_mul, sub_zero] at h4 rw [alg.initial1, sub_self, zero_div, zero_mul, zero_add] simp - simp only [PNat.one_coe, Real.rpow_two, one_pow, mul_one, le_refl, tsub_eq_zero_of_le] at h4 + simp only [PNat.one_coe, one_pow, mul_one, le_refl, tsub_eq_zero_of_le] at h4 rw [← le_sub_iff_add_le, ← mul_sub] exact h4 have h8 : ∀ (k : ℕ+), 1 / (γ k) ^ 2 * (f (alg.x k) - f xm) + alg.l / 2 @@ -226,16 +239,16 @@ theorem nesterov_algorithm_smooth (hfun: ConvexOn ℝ Set.univ f) have : alg.l > 0 := alg.hl apply mul_nonneg _ _ · positivity - · simp only [Real.rpow_two, sq_nonneg] + · simp only [sq_nonneg] have h10 : alg.l / (2 : ℝ) * ‖x₀ - xm‖ ^ 2 / ((1 :ℝ) / (2 / (k + 1)) ^ 2) = 2 * alg.l / ((k + 1) ^ 2) * ‖x₀ - xm‖ ^ 2 := by - simp [Nat.cast_add_one_ne_zero ↑k]; field_simp; ring_nf + simp; field_simp rw [hg k] at h9 rw [← le_div_iff₀'] at h9 · rw [h10] at h9 exact h9 - · simp only [Real.rpow_two, div_pow, one_div, inv_div] + · simp only [div_pow, one_div, inv_div] apply div_pos · apply sq_pos_of_ne_zero exact Nat.cast_add_one_ne_zero ↑k - · simp only [gt_iff_lt, zero_lt_two, pow_pos] + · simp only [zero_lt_two, pow_pos] diff --git a/Optlib/Algorithm/ProximalGradient.lean b/Optlib/Algorithm/ProximalGradient.lean index 51d60d2..f656750 100644 --- a/Optlib/Algorithm/ProximalGradient.lean +++ b/Optlib/Algorithm/ProximalGradient.lean @@ -20,13 +20,14 @@ import Optlib.Function.Proximal section method open Set +open scoped RealInnerProductSpace InnerProductSpace variable {E : Type*} [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteSpace E] variable [ProperSpace E] variable {xm x₀: E} {s : Set E} {f : E → ℝ} {f' : E → E} {h : E → ℝ} variable {t : ℝ} {x : ℕ → E} {L : NNReal} -class proximal_gradient_method (f h: E → ℝ) (f' : E → E) (x₀ : E) := +class proximal_gradient_method (f h: E → ℝ) (f' : E → E) (x₀ : E) where (xm : E) (t : ℝ) (x : ℕ → E) (L : NNReal) (fconv : ConvexOn ℝ univ f) (hconv : ConvexOn ℝ univ h) (h₁ : ∀ x₁ : E, HasGradientAt f (f' x₁) x₁) (h₂ : LipschitzWith L f') @@ -58,78 +59,78 @@ theorem proximal_gradient_method_converge : ∀ (k : ℕ+), rw [one_div_mul_cancel, one_smul] at eq2; exact eq2 linarith [alg.tpos]; exact alg.hconv; linarith [alg.tpos] have fieq1 : ∀ x : E, f (x - alg.t • Gt x) ≤ - f x - alg.t * inner (f' x) (Gt x) + alg.t ^ 2 * alg.L / 2 * ‖Gt x‖ ^ 2 := by + f x - alg.t * ⟪f' x, Gt x⟫_ℝ + alg.t ^ 2 * alg.L / 2 * ‖Gt x‖ ^ 2 := by intro x let y := x - alg.t • Gt x - have ieq1 : f y ≤ f x + inner (f' x) (y - x) + alg.L / 2 * ‖y - x‖ ^ 2 := by + have ieq1 : f y ≤ f x + ⟪f' x, y - x⟫_ℝ + alg.L / 2 * ‖y - x‖ ^ 2 := by apply lipschitz_continuos_upper_bound' alg.h₁ alg.h₂ have eq3 : y - x = - alg.t • Gt x := by simp [Gt, y] rw [eq3] at ieq1; rw [inner_smul_right, norm_smul, mul_pow] at ieq1 rw [← mul_assoc, mul_comm ] at ieq1 simp at ieq1; rw [← sub_eq_add_neg] at ieq1; simp; linarith [alg.tpos] have fieq2 : ∀ x : E, - f (x - alg.t • Gt x) ≤ f x - alg.t * inner (f' x) (Gt x) + alg.t / 2 * ‖Gt x‖ ^ 2 := by + f (x - alg.t • Gt x) ≤ f x - alg.t * ⟪f' x, Gt x⟫_ℝ + alg.t / 2 * ‖Gt x‖ ^ 2 := by intro x calc f (x - alg.t • Gt x) ≤ - f x - alg.t * inner (f' x) (Gt x) + alg.t ^ 2 * alg.L / 2 * ‖Gt x‖ ^ 2 := fieq1 x - _ ≤ f x - alg.t * inner (f' x) (Gt x) + alg.t / 2 * ‖Gt x‖ ^ 2 := by + f x - alg.t * ⟪f' x, Gt x⟫_ℝ + alg.t ^ 2 * alg.L / 2 * ‖Gt x‖ ^ 2 := fieq1 x + _ ≤ f x - alg.t * ⟪f' x, Gt x⟫_ℝ + alg.t / 2 * ‖Gt x‖ ^ 2 := by apply add_le_add_left apply mul_le_mul_of_nonneg_right apply div_le_div_of_nonneg_right _ (by norm_num) calc alg.t ^ 2 * alg.L ≤ alg.t * (1 / alg.L) * alg.L := by rw [pow_two]; apply mul_le_mul_of_nonneg_right - rw [mul_le_mul_left alg.tpos]; exact alg.step; simp + rw [mul_le_mul_iff_right₀ alg.tpos]; exact alg.step; simp _ = alg.t := by field_simp; rw [← mul_div, div_self (by linarith [alg.hL]), mul_one] exact sq_nonneg _ - have fieq3 : ∀ x z : E, f x + inner (f' x) (z - x) ≤ f z := by + have fieq3 : ∀ x z : E, f x + ⟪f' x, z - x⟫_ℝ ≤ f z := by intro x z apply Convex_first_order_condition' (alg.h₁ x) alg.fconv simp; simp have hieq1 : ∀ x z : E, - h (x - alg.t • Gt x) + inner (Gt x - f' x) (z - x + alg.t • Gt x) ≤ h z := by + h (x - alg.t • Gt x) + ⟪Gt x - f' x, z - x + alg.t • Gt x⟫_ℝ ≤ h z := by intro x z specialize hG x rw [← mem_SubderivAt, HasSubgradientAt] at hG specialize hG z; rw [sub_add]; apply hG have hieq2 : ∀ x z : E, - h (x - alg.t • Gt x) ≤ h z - inner (Gt x - f' x) (z - x + alg.t • Gt x) := by + h (x - alg.t • Gt x) ≤ h z - ⟪Gt x - f' x, z - x + alg.t • Gt x⟫_ℝ := by intro x z; linarith [hieq1 x z] have univieq : ∀ x z : E, - φ (x - alg.t • Gt x) ≤ φ z + inner (Gt x) (x - z) - alg.t / 2 * ‖Gt x‖ ^ 2 := by + φ (x - alg.t • Gt x) ≤ φ z + ⟪Gt x, x - z⟫_ℝ - alg.t / 2 * ‖Gt x‖ ^ 2 := by intro x z calc - φ (x - alg.t • Gt x) ≤ (f x - alg.t * inner (f' x) (Gt x) + alg.t / 2 * ‖Gt x‖ ^ 2) - + (h z - inner (Gt x - f' x) (z - x + alg.t • Gt x)) := by + φ (x - alg.t • Gt x) ≤ (f x - alg.t * ⟪f' x, Gt x⟫_ℝ + alg.t / 2 * ‖Gt x‖ ^ 2) + + (h z - ⟪Gt x - f' x, z - x + alg.t • Gt x⟫_ℝ) := by linarith [fieq2 x, hieq2 x z] - _ ≤ (f z - inner (f' x) (z - x) - alg.t * inner (f' x) (Gt x) + alg.t / 2 * ‖Gt x‖ ^ 2) - + (h z - inner (Gt x - f' x) (z - x + alg.t • Gt x)) := by + _ ≤ (f z - ⟪f' x, z - x⟫_ℝ - alg.t * ⟪f' x, Gt x⟫_ℝ + alg.t / 2 * ‖Gt x‖ ^ 2) + + (h z - ⟪Gt x - f' x, z - x + alg.t • Gt x⟫_ℝ) := by linarith [fieq3 x z] - _ = φ z + inner (Gt x) (x - z) - alg.t / 2 * ‖Gt x‖ ^ 2 := by + _ = φ z + ⟪Gt x, x - z⟫_ℝ - alg.t / 2 * ‖Gt x‖ ^ 2 := by rw [← inner_smul_right, sub_sub, ← inner_add_right] rw [inner_sub_left, ← sub_add, add_rotate, ← add_comm_sub, ← add_sub] rw [← add_sub, sub_self, add_zero, add_rotate, inner_add_right, ← neg_sub x z] - rw [inner_neg_right, ← sub_sub, sub_neg_eq_add, add_comm _ (inner (Gt x) (x - z))] - rw [add_comm _ (inner (Gt x) (x - z)), ← add_sub _ (φ z), ← add_sub, add_assoc] + rw [inner_neg_right, ← sub_sub, sub_neg_eq_add, add_comm _ ⟪Gt x, x - z⟫_ℝ] + rw [add_comm _ ⟪Gt x, x - z⟫_ℝ, ← add_sub _ (φ z), ← add_sub, add_assoc] rw [add_assoc, add_left_cancel_iff] rw [inner_smul_right, real_inner_self_eq_norm_sq] rw [add_comm_sub, ← add_sub] have (a : ℝ): alg.t / 2 * a - alg.t * a = - alg.t / 2 * a := by ring - rw [this, sub_eq_add_neg, ← add_assoc, add_comm (h z) (f z)]; field_simp + rw [this, sub_eq_add_neg, ← add_assoc, add_comm (h z) (f z)]; field_simp; grind have φieq1 : ∀ x : E, φ (x - alg.t • Gt x) - φ alg.xm ≤ (1 / (2 * alg.t)) * (‖x - alg.xm‖ ^ 2 - ‖x - alg.t • Gt x - alg.xm‖ ^ 2) := by intro x calc - φ (x - alg.t • Gt x) - φ alg.xm ≤ inner (Gt x) (x - alg.xm) - alg.t / 2 * ‖Gt x‖ ^ 2 := by + φ (x - alg.t • Gt x) - φ alg.xm ≤ ⟪Gt x, x - alg.xm⟫_ℝ - alg.t / 2 * ‖Gt x‖ ^ 2 := by linarith [univieq x alg.xm] _ = (1 / (2 * alg.t)) * (‖x - alg.xm‖ ^ 2 - ‖x - alg.t • Gt x - alg.xm‖ ^ 2) := by - have aux (p q : E) : inner p q - alg.t / 2 * ‖p‖ ^ 2 = + have aux (p q : E) : ⟪p, q⟫_ℝ - alg.t / 2 * ‖p‖ ^ 2 = 1 / (2 * alg.t) * (‖q‖ ^ 2 - ‖q - alg.t • p‖ ^ 2) := by rw [norm_sub_sq_real]; field_simp; ring_nf rw [inner_smul_right, real_inner_comm]; nth_rw 2 [mul_comm _ (alg.t)⁻¹]; rw [norm_smul, mul_pow, pow_two ‖alg.t‖] - simp; rw [mul_comm _ (inner q p), mul_assoc _ alg.t, mul_inv_cancel₀, ← mul_assoc] + simp; rw [mul_comm _ ⟪q, p⟫_ℝ, mul_assoc _ alg.t, mul_inv_cancel₀, ← mul_assoc] rw [← mul_assoc, inv_mul_cancel₀]; simp repeat linarith [alg.tpos] rw [sub_right_comm]; apply aux @@ -144,7 +145,7 @@ theorem proximal_gradient_method_converge : ∀ (k : ℕ+), rw [iter i] calc φ ((alg.x i) - alg.t • Gt (alg.x i)) ≤ φ (alg.x i) - + inner (Gt (alg.x i)) ((alg.x i) - (alg.x i)) + + ⟪Gt (alg.x i), (alg.x i) - (alg.x i)⟫_ℝ - alg.t / 2 * ‖Gt (alg.x i)‖ ^ 2 := by linarith [univieq (alg.x i) (alg.x i)] _ ≤ φ (alg.x i) := by @@ -194,12 +195,14 @@ theorem proximal_gradient_method_converge : ∀ (k : ℕ+), _ ≤ 2 * alg.t * ((1 / (2 * alg.t)) * ‖(alg.x 0) - alg.xm‖ ^ 2 - (1 / (2 * alg.t)) * ‖(alg.x k) - alg.xm‖ ^ 2) := by - rw [mul_le_mul_left] + rw [mul_le_mul_iff_right₀] let ieq' := ieq k; simp at ieq' simp; apply ieq'; linarith [alg.tpos] _ = ‖(alg.x 0) - alg.xm‖ ^ 2 - ‖(alg.x k) - alg.xm‖ ^ 2 := by rw [← mul_sub, ← mul_assoc, mul_one_div_cancel]; simp; linarith [alg.tpos] _ ≤ ‖x₀ - alg.xm‖ ^ 2 := by rw [alg.ori]; simp - field_simp; linarith [alg.tpos] + · field_simp + simp only [Nat.ofNat_pos, mul_pos_iff_of_pos_left, Nat.cast_pos, PNat.pos]; + exact proximal_gradient_method.tpos end method diff --git a/Optlib/Algorithm/SubgradientMethod.lean b/Optlib/Algorithm/SubgradientMethod.lean index 7150965..df6fe7e 100644 --- a/Optlib/Algorithm/SubgradientMethod.lean +++ b/Optlib/Algorithm/SubgradientMethod.lean @@ -20,7 +20,7 @@ import Optlib.Convex.Subgradient -/ open Filter Topology Set InnerProductSpace Finset - +open scoped Set InnerProductSpace RealInnerProductSpace Mathlib /-! ### Convergence of Subgradient method -/ section @@ -39,21 +39,21 @@ theorem bounded_subgradient_to_Lipschitz (hf : ConvexOn ℝ univ f) (hc : Contin simp at hx₂' rcases hx₂' with ⟨gx, hx₁⟩ have hx₃ : ‖gx‖ ≤ G := by rcases h hx₁ with hx; apply hx - rcases hx₁ y with hx₂ - have hx₄ : f x - f y ≤ inner gx (x - y) := by + have hx₂ := hx₁ y + have hx₄ : f x - f y ≤ @inner ℝ E _ gx (x - y) := by rw [add_comm] at hx₂ - have : f x ≤ f y - inner gx (y - x) := le_sub_left_of_add_le hx₂ + have : f x ≤ f y - @inner ℝ E _ gx (y - x) := le_sub_left_of_add_le hx₂ rw [sub_eq_add_neg, ← inner_neg_right, neg_sub] at this exact sub_left_le_of_le_add this have hy₂' : Nonempty (SubderivAt f y) := SubderivAt.nonempty hf hc y simp at hy₂' rcases hy₂' with ⟨gy, hy₁⟩ have hy₃ : ‖gy‖ ≤ G := by rcases h hy₁ with hy; apply hy - rcases hy₁ x with hy₂ - have hy₄: f x - f y ≥ inner gy (x - y) := by + have hy₂ := hy₁ x + have hy₄: f x - f y ≥ @inner ℝ E _ gy (x - y) := by calc - _ ≥ f y + inner gy (x - y) - f y := by apply sub_le_sub_right hy₂ - _ = inner gy (x - y) := by ring + _ ≥ f y + @inner ℝ E _ gy (x - y) - f y := by apply sub_le_sub_right hy₂ + _ = @inner ℝ E _ gy (x - y) := by ring have hG₁: ↑G = ENNReal.ofReal ↑G := by simp rw [edist_dist, edist_dist, hG₁] have hG₂ : ENNReal.ofReal (↑G * (dist x y)) = ENNReal.ofReal ↑G * ENNReal.ofReal (dist x y) := by @@ -66,14 +66,14 @@ theorem bounded_subgradient_to_Lipschitz (hf : ConvexOn ℝ univ f) (hc : Contin apply abs_le.mpr constructor · calc - f x - f y ≥ inner gy (x - y) := hy₄ + f x - f y ≥ @inner ℝ E _ gy (x - y) := hy₄ _ ≥ - (‖gy‖ * ‖x - y‖) := by apply neg_le_of_neg_le rw [← inner_neg_right, neg_sub, norm_sub_rev] apply real_inner_le_norm _ ≥ - (↑G * ‖x - y‖) := neg_le_neg (mul_le_mul_of_nonneg_right hy₃ (norm_nonneg _)) · calc - f x - f y ≤ inner gx (x - y) := hx₄ + f x - f y ≤ @inner ℝ E _ gx (x - y) := hx₄ _ ≤ ‖gx‖ * ‖x - y‖ := real_inner_le_norm _ _ _ ≤ ↑G * ‖x - y‖ := mul_le_mul_of_nonneg_right hx₃ (norm_nonneg _) @@ -85,14 +85,14 @@ theorem Lipschitz_to_bounded_subgradient (h : LipschitzWith G f ) : rcases h₁ with ⟨x, g, h₂, h₃⟩ let y : E := x + ((1 / ‖g‖) • g) have hy : y = x + ((1 / ‖g‖) • g) := by rfl - rcases h₂ y with hy₂ + have hy₂ := h₂ y rw[LipschitzWith] at h have hg₁ : ‖g‖ ≠ 0 := by apply ne_of_gt (lt_of_le_of_lt _ h₃) simp only [NNReal.zero_le_coe] - have hl : inner g (y - x) = ‖g‖ := by + have hl : @inner ℝ E _ g (y - x) = ‖g‖ := by rw[hy ,add_comm, ← add_sub, sub_self, add_zero, inner_smul_right, inner_self_eq_norm_sq_to_K] - field_simp; apply pow_two + field_simp; simp rw [hl] at hy₂ have _ : f y - f x ≥ ‖g‖ := by calc @@ -113,7 +113,15 @@ theorem Lipschitz_to_bounded_subgradient (h : LipschitzWith G f ) : calc f y - f x ≤ |f y - f x|:= by apply le_abs_self _ ≤ ↑G * (‖1 / ‖g‖‖ * ‖g‖) := by apply h₃' - _ = ↑G := by field_simp + _ = ↑G := by + have hgnz : ‖g‖ ≠ 0 := hg₁ + have hnorm : ‖(1 / ‖g‖ : ℝ)‖ = 1 / ‖g‖ := by + have hnonneg : 0 ≤ (1 / ‖g‖ : ℝ) := by + rw [one_div]; exact inv_nonneg.mpr (norm_nonneg g) + rw [Real.norm_of_nonneg hnonneg] + have hcancel : (1 / ‖g‖) * ‖g‖ = (1 : ℝ) := by + rw [one_div]; exact inv_mul_cancel₀ hg₁ + rw [hnorm, hcancel]; simp _ < ‖g‖ := by apply h₃ linarith @@ -134,7 +142,7 @@ variable (hf : ConvexOn ℝ univ f) open Finset -class subgradient_method (f : E → ℝ) (x₀ : E) := +class subgradient_method (f : E → ℝ) (x₀ : E) where (x g : ℕ → E) (a : ℕ → ℝ) (ha : ∀ n, a n > 0) (G : NNReal) (lipschitz : LipschitzWith G f) @@ -151,86 +159,110 @@ theorem subgradient_method_converge: (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm) ≤ ‖x₀ - xm‖ ^ 2 + alg.G ^ 2 * (Finset.range (k + 1)).sum (fun i => alg.a i ^ 2) := by intro k - have h' : ∀ ⦃x : E⦄ , ∀ ⦃g⦄ , g ∈ SubderivAt f x → ‖g‖ ≤ alg.G := Lipschitz_to_bounded_subgradient alg.lipschitz - by_cases k₀ : k = 0 - · simp [k₀] - rcases (alg.hg 0) xm with hk₀ - rcases h' (alg.hg 0) with h₀' - rw [← mul_pow] - apply le_trans _ (two_mul_le_add_sq _ _) - rw [mul_assoc, mul_assoc]; apply (mul_le_mul_left two_pos).mpr - rw [mul_comm, ← mul_assoc]; apply (mul_le_mul_right (alg.ha 0)).mpr - have : f (alg.x 0) - f xm ≤ - inner (alg.g 0) (xm - alg.x 0) := by - simp [hk₀]; rw[add_comm]; apply hk₀ - apply le_trans this _ - rw [← inner_neg_right,neg_sub, alg.initial] - apply le_trans (real_inner_le_norm _ _) _; rw [mul_comm] - apply mul_le_mul_of_nonneg_left h₀' (norm_nonneg _) - · have heq : (Set.range fun (x : Finset.range (k + 1)) => f (alg.x x)) = - {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} := by simp [Set.ext_iff] - have h₁ : ∀ ⦃i : ℕ⦄, i ≥ 0 ∧ i ≤ k → ‖alg.x (i+1) - xm‖ ^ 2 ≤ ‖alg.x i - xm‖ ^ 2 - 2 * alg.a i - * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm) + alg.G ^ 2 * alg.a i ^ 2 := by - intro i ⟨ _ ,hi₂⟩ - rw [alg.update i, sub_right_comm, norm_sub_sq_real, norm_smul, mul_pow, sub_eq_add_neg] - have : ‖alg.x i - xm‖ ^ 2 - 2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = - x} - f xm) + ↑alg.G ^ 2 * alg.a i ^ 2 = ‖alg.x i - xm‖ ^ 2 + - (2 * alg.a i * (sInf {x | ∃ i ∈ - Finset.range (k + 1), f (alg.x i) = x} - f xm)) + ↑alg.G ^ 2 * alg.a i ^ 2 := by ring - rw [this] - have inq₁: ‖alg.a i‖ ^ 2 * ‖alg.g i‖ ^ 2 ≤ ↑alg.G ^ 2 * alg.a i ^ 2 := by - rw[mul_comm]; simp - rcases h' (alg.hg i) with hi - apply mul_le_mul_of_nonneg_right _ (sq_nonneg _) - · apply pow_le_pow_left; apply norm_nonneg; apply hi - have inq₂: 2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm) - ≤ 2 * inner (alg.x i - xm) (alg.a i • alg.g i) := by - rw [mul_assoc]; apply (mul_le_mul_left two_pos).mpr - rw [inner_smul_right]; apply (mul_le_mul_left (alg.ha i)).mpr - rcases (alg.hg i) xm with hxm - calc - _ = sInf (Set.range fun (x : Finset.range (k + 1)) => f (alg.x x)) - f xm := by rw [← heq] - _ ≤ f (alg.x i) - f xm := by - simp - have : f (alg.x i) ∈ Set.range fun (x : Finset.range (k + 1)) => f (alg.x x) := by - simp; use i - constructor - · apply lt_of_le_of_lt hi₂; apply (Nat.lt_succ_self k) - · simp - apply csInf_le _ this; apply Finite.bddBelow_range - _ ≤ inner (alg.x i - xm) (alg.g i) := by - simp; apply le_add_of_sub_left_le - rw [sub_eq_add_neg, ← inner_neg_left, neg_sub, real_inner_comm] - apply hxm - rw [add_assoc, add_assoc] - apply add_le_add_left; apply add_le_add - · apply neg_le_neg; apply inq₂ - · apply inq₁ - have h₁' : ∀ ⦃i : ℕ⦄, i ≥ 0 ∧ i ≤ k → alg.a i * (2 * (sInf {f (alg.x i) | i ∈ Finset.range (k + - 1)} - f xm)) ≤ ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i+1) - xm‖ ^ 2 + alg.G ^ 2 * (alg.a i) ^ 2 := by - intro i ⟨hi₁, hi₂⟩ - rcases h₁ ⟨hi₁, hi₂⟩ with hii - have : 2 * (alg.a i) * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm) ≤ - ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i+1) - xm‖ ^ 2 + alg.G ^ 2 * (alg.a i) ^ 2:= by - linarith [hii] - rw [mul_assoc, mul_comm, mul_assoc, mul_comm _ 2] at this - apply this - have h₂ : (Finset.range (k + 1)).sum (fun i => (alg.a i) * (2 * (sInf {f (alg.x i) | i ∈ - Finset.range (k + 1)} - f xm))) ≤ (Finset.range (k + 1)).sum - (fun i => ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i+1) - xm‖ ^ 2 + alg.G ^ 2 * (alg.a i) ^ 2) := by - apply Finset.sum_le_sum; intro i hi; apply h₁' - constructor - · simp - · have : i < k + 1 := by - apply Finset.mem_range.mp; apply hi - apply (Nat.lt_add_one_iff).mp this - rw [← sum_mul, ← mul_assoc, mul_comm _ 2, sum_add_distrib] at h₂ - rw [sum_range_sub' _, ← mul_sum, alg.initial] at h₂ - calc - _ = (2 * Finset.sum (Finset.range (k + 1)) fun x => alg.a x) * (sInf {x | ∃ i ∈ - Finset.range (k + 1), f (alg.x i) = x} - f xm) := by simp - _ ≤ ‖x₀ - xm‖ ^ 2 - ‖alg.x (k + 1) - xm‖ ^ 2 + ↑alg.G ^ 2 * Finset.sum (Finset.range - (k + 1)) fun x => alg.a x ^ 2 := by apply h₂ - _ ≤ ‖x₀ - xm‖ ^ 2 + alg.G ^ 2 * Finset.sum (Finset.range (k + 1)) fun x => alg.a x ^ 2 := by simp + have h' : ∀ ⦃x : E⦄ , ∀ ⦃g⦄ , g ∈ SubderivAt f x → ‖g‖ ≤ alg.G := + Lipschitz_to_bounded_subgradient alg.lipschitz + have heq : + (Set.range fun (x : Finset.range (k + 1)) => f (alg.x x)) = + {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} := by + simp [Set.ext_iff] + have h₁ : + ∀ ⦃i : ℕ⦄, i ≥ 0 ∧ i ≤ k → + ‖alg.x (i+1) - xm‖ ^ 2 ≤ + ‖alg.x i - xm‖ ^ 2 + - 2 * alg.a i * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm) + + alg.G ^ 2 * alg.a i ^ 2 := by + intro i ⟨_, hi₂⟩ + rw [alg.update i, sub_right_comm, norm_sub_sq_real, norm_smul, mul_pow, sub_eq_add_neg] + have : ‖alg.x i - xm‖ ^ 2 + - 2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm) + + ↑alg.G ^ 2 * alg.a i ^ 2 + = ‖alg.x i - xm‖ ^ 2 + + - (2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm)) + + ↑alg.G ^ 2 * alg.a i ^ 2 := by + ring + rw [this] + have inq₁ : ‖alg.a i‖ ^ 2 * ‖alg.g i‖ ^ 2 ≤ ↑alg.G ^ 2 * alg.a i ^ 2 := by + rw [mul_comm]; simp + have hi := h' (alg.hg i) + have hi_sq : ‖alg.g i‖ ^ 2 ≤ (alg.G : ℝ) ^ 2 := by + apply pow_le_pow_left₀ (norm_nonneg _) hi 2 + exact mul_le_mul_of_nonneg_right hi_sq (sq_nonneg _) + have inq₂ : + 2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm) + ≤ 2 * @inner ℝ E _ (alg.x i - xm) (alg.a i • alg.g i) := by + have hxm := (alg.hg i) xm + have base_range : + sInf (Set.range fun (x : Finset.range (k + 1)) => f (alg.x x)) - f xm + ≤ @inner ℝ E _ (alg.x i - xm) (alg.g i) := by + have hxmem : + f (alg.x i) ∈ Set.range (fun (x : Finset.range (k + 1)) => f (alg.x x)) := by + simp; use i + constructor + · exact lt_of_le_of_lt hi₂ (Nat.lt_succ_self k) + · simp + have h₁ : + sInf (Set.range fun (x : Finset.range (k + 1)) => f (alg.x x)) ≤ f (alg.x i) := by + apply csInf_le _ hxmem + apply Finite.bddBelow_range + have h₂ : + f (alg.x i) - f xm ≤ @inner ℝ E _ (alg.x i - xm) (alg.g i) := by + have h3 : f (alg.x i) - f xm ≤ - @inner ℝ E _ (alg.g i) (xm - alg.x i) := + (sub_le_iff_le_add).2 (by simpa [add_comm, sub_eq_add_neg] using hxm) + rw [← inner_neg_right, neg_sub, real_inner_comm] at h3 + exact h3 + exact le_trans (sub_le_sub_right h₁ _) h₂ + have base : + sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm + ≤ @inner ℝ E _ (alg.x i - xm) (alg.g i) := by + simpa [heq] using base_range + have hnonneg : 0 ≤ 2 * alg.a i := by + have h2 : (0 : ℝ) ≤ 2 := by norm_num + exact mul_nonneg h2 (le_of_lt (alg.ha i)) + have hmul := + mul_le_mul_of_nonneg_left base hnonneg + simpa [mul_left_comm, mul_comm, mul_assoc, inner_smul_right] using hmul + apply add_le_add + · apply add_le_add + · rfl + · apply neg_le_neg; exact inq₂ + · exact inq₁ + have h₁' : + ∀ ⦃i : ℕ⦄, i ≥ 0 ∧ i ≤ k → + alg.a i * (2 * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm)) + ≤ ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i+1) - xm‖ ^ 2 + alg.G ^ 2 * (alg.a i) ^ 2 := by + intro i ⟨hi₁, hi₂⟩ + rcases h₁ ⟨hi₁, hi₂⟩ with hii + have : 2 * (alg.a i) * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm) + ≤ ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i+1) - xm‖ ^ 2 + alg.G ^ 2 * (alg.a i) ^ 2 := by + linarith [hii] + rw [mul_assoc, mul_comm, mul_assoc, mul_comm _ 2] at this + exact this + have h₂ : + (Finset.range (k + 1)).sum (fun i => (alg.a i) * (2 * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm))) + ≤ (Finset.range (k + 1)).sum + (fun i => ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i+1) - xm‖ ^ 2 + alg.G ^ 2 * (alg.a i) ^ 2) := by + apply Finset.sum_le_sum + intro i hi + apply h₁' + constructor + · simp + · have : i < k + 1 := Finset.mem_range.mp hi + exact (Nat.lt_add_one_iff).mp this + rw [← sum_mul, ← mul_assoc, mul_comm _ 2, sum_add_distrib] at h₂ + have h₃ : ∑ x ∈ Finset.range (k + 1), (‖alg.x x - xm‖ ^ 2 - ‖alg.x (x + 1) - xm‖ ^ 2) = + ‖alg.x 0 - xm‖ ^ 2 - ‖alg.x (k + 1) - xm‖ ^ 2 := by + exact sum_range_sub' (fun i ↦ ‖subgradient_method.x f x₀ i - xm‖ ^ 2) (k + 1) + rw [h₃, ← mul_sum, alg.initial] at h₂ + calc + _ = (2 * Finset.sum (Finset.range (k + 1)) fun x => alg.a x) * + (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm) := by simp + _ ≤ ‖x₀ - xm‖ ^ 2 - ‖alg.x (k + 1) - xm‖ ^ 2 + + ↑alg.G ^ 2 * Finset.sum (Finset.range (k + 1)) fun x => alg.a x ^ 2 := by + exact h₂ + _ ≤ ‖x₀ - xm‖ ^ 2 + alg.G ^ 2 * Finset.sum (Finset.range (k + 1)) fun x => alg.a x ^ 2 := by + simp +omit [CompleteSpace E] in /-- convergence with fixed step size --/ theorem subgradient_method_fix_step_size {t : ℝ} (ha' : ∀ (n : ℕ), alg.a n = t) : @@ -248,14 +280,15 @@ theorem subgradient_method_fix_step_size {t : ℝ} simp apply mul_pos _ ht · apply add_pos_of_nonneg_of_pos (Nat.cast_nonneg k) zero_lt_one - apply (mul_le_mul_left hpos).mp + apply (mul_le_mul_iff_right₀ hpos).mp calc 2 * ((↑k + 1) * t) * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm) = 2 * ((↑k + 1) * t) * (sInf {x | ∃ i < k + 1, f (alg.x i) = x} - f xm) := by simp _ ≤ ‖x₀ - xm‖ ^ 2 + ↑alg.G ^ 2 * ((↑k + 1) * t ^ 2) := by apply hk _ = 2 * ((↑k + 1) * t) * (‖x₀ - xm‖ ^ 2 / (2 * (↑k + 1) * t) + ↑alg.G ^ 2 * t / 2) := by - field_simp; ring + field_simp +omit [CompleteSpace E] in /-- convergence with fixed $‖x^{i+1}-x^{i}‖$ --/ theorem subgradient_method_fixed_distance {s : ℝ} (hm : IsMinOn f univ xm) (ha' : ∀ (n : ℕ), alg.a n * ‖alg.g n‖ = s) (hs : s > 0): @@ -270,17 +303,11 @@ theorem subgradient_method_fixed_distance {s : ℝ} (hm : IsMinOn f univ xm) have h₁ : ∀ ⦃i : ℕ⦄ , i ≥ 0 ∧ i ≤ k → ‖alg.x (i+1) - xm‖ ^ 2 ≤ ‖alg.x i - xm‖ ^ 2 - 2 * (alg.a i) * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm) + ‖alg.a i‖ ^ 2 * ‖alg.g i‖ ^ 2:= by intro i ⟨_, hi₂⟩ - rw [alg.update i, sub_right_comm, norm_sub_sq_real, norm_smul,mul_pow, sub_eq_add_neg] - have : ‖alg.x i - xm‖ ^ 2 - 2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), - f (alg.x i) = x} - f xm) + ‖alg.a i‖ ^ 2 * ‖alg.g i‖ ^ 2 = ‖alg.x i - xm‖ ^ 2 + - -(2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm)) - + ‖alg.a i‖ ^ 2 * ‖alg.g i‖ ^ 2 := by ring - rw [this] have inq₂: 2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm) - ≤ 2 * inner (alg.x i - xm) (alg.a i • alg.g i) := by - rw [mul_assoc]; apply (mul_le_mul_left two_pos).mpr - rw[inner_smul_right]; apply (mul_le_mul_left (alg.ha i)).mpr - rcases (alg.hg i) xm with hxm + ≤ 2 * @inner ℝ E _ (alg.x i - xm) (alg.a i • alg.g i) := by + rw [mul_assoc]; apply (mul_le_mul_iff_right₀ two_pos).mpr + rw[inner_smul_right]; apply (mul_le_mul_iff_right₀ (alg.ha i)).mpr + have hxm := (alg.hg i) xm calc _ = sInf (Set.range fun (x : Finset.range (k + 1)) => f (alg.x x)) - f xm := by rw [← heq] _ ≤ f (alg.x i)- f xm := by @@ -291,14 +318,24 @@ theorem subgradient_method_fixed_distance {s : ℝ} (hm : IsMinOn f univ xm) · apply lt_of_le_of_lt hi₂; apply (Nat.lt_succ_self k) · simp apply csInf_le _ this; apply Finite.bddBelow_range - _ ≤ inner (alg.x i - xm) (alg.g i) := by - simp; apply le_add_of_sub_left_le - rw[sub_eq_add_neg, ← inner_neg_left, neg_sub, real_inner_comm]; apply hxm - rw[add_assoc, add_assoc] - apply add_le_add_left - apply add_le_add - · apply neg_le_neg; apply inq₂ - · simp + _ ≤ @inner ℝ E _ (alg.x i - xm) (alg.g i) := by + have h3 : f (alg.x i) - f xm ≤ - @inner ℝ E _ (alg.g i) (xm - alg.x i) := by + exact (sub_le_iff_le_add).2 (by simpa [add_comm, sub_eq_add_neg] using hxm) + rw [← inner_neg_right, neg_sub, real_inner_comm] at h3 + exact h3 + rw [alg.update i, sub_right_comm, norm_sub_sq_real, norm_smul, mul_pow, sub_eq_add_neg] + have hneg : + -(2 * @inner ℝ E _ (alg.x i - xm) (alg.a i • alg.g i)) + ≤ -(2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm)) := by + exact neg_le_neg inq₂ + have : + ‖alg.x i - xm‖ ^ 2 + + (-(2 * @inner ℝ E _ (alg.x i - xm) (alg.a i • alg.g i))) + ≤ + ‖alg.x i - xm‖ ^ 2 + + (-(2 * alg.a i * (sInf {x | ∃ i ∈ Finset.range (k + 1), f (alg.x i) = x} - f xm))) := by + exact add_le_add_left hneg _ + simpa [sub_eq_add_neg, mul_assoc] using this have h₁' : ∀ ⦃i : ℕ⦄ , i ≥ 0 ∧ i ≤ k → alg.a i * (2 * (sInf {f (alg.x i) | i ∈ Finset.range (k + 1)} - f xm)) ≤ ‖alg.x i - xm‖ ^ 2 - ‖alg.x (i+1) - xm‖ ^ 2 + s ^ 2 := by intro i ⟨hi₁, hi₂⟩ @@ -319,7 +356,7 @@ theorem subgradient_method_fixed_distance {s : ℝ} (hm : IsMinOn f univ xm) · have : i < k + 1 := Finset.mem_range.mp hi apply (Nat.lt_add_one_iff).mp this rw [← Finset.sum_mul, ← mul_assoc, mul_comm _ 2, Finset.sum_add_distrib] at h₂ - rw [Finset.sum_range_sub', alg.initial, Finset.sum_const] at h₂ + rw [Finset.sum_range_sub' (fun i => ‖alg.x i - xm‖ ^ 2) (k + 1), alg.initial, Finset.sum_const] at h₂ simp at h₂ have hG : (NNReal.toReal alg.G) > 0 := by apply lt_of_lt_of_le _ (h' (alg.hg 0)) @@ -332,7 +369,7 @@ theorem subgradient_method_fixed_distance {s : ℝ} (hm : IsMinOn f univ xm) apply Finset.sum_le_sum intro i _ rw [← (ha' i)] - apply (div_le_iff₀ hG).mpr ((mul_le_mul_left (alg.ha i)).mpr (h' (alg.hg i))) + apply (div_le_iff₀ hG).mpr ((mul_le_mul_iff_right₀ (alg.ha i)).mpr (h' (alg.hg i))) have hpos₁ : (↑k + 1) * (s / ↑alg.G) > 0 := by apply mul_pos · apply add_pos_of_nonneg_of_pos (Nat.cast_nonneg k) zero_lt_one @@ -367,9 +404,15 @@ theorem subgradient_method_fixed_distance {s : ℝ} (hm : IsMinOn f univ xm) _ ≤ (‖x₀ - xm‖ ^ 2 - ‖alg.x (k + 1) - xm‖ ^ 2 + (k + 1) * s ^ 2) / (2 * (k + 1) * (s / alg.G)) := by apply (le_div_iff₀' hpos₁').mpr h₂' _ ≤ (‖x₀ - xm‖ ^ 2 + (↑k + 1) * s ^ 2) / (2 * (↑k + 1) * (s / ↑alg.G)) := by - apply (div_le_div_right hpos₁').mpr; simp + apply (div_le_div_iff_of_pos_right hpos₁').mpr + have hneg_le_zero : - ‖alg.x (k + 1) - xm‖ ^ 2 ≤ 0 := by + exact neg_nonpos.mpr (sq_nonneg _) + have hA : + ‖x₀ - xm‖ ^ 2 - ‖alg.x (k + 1) - xm‖ ^ 2 ≤ ‖x₀ - xm‖ ^ 2 := by + simp + simp _ = alg.G * ‖x₀ - xm‖ ^ 2 / (2 * (k + 1) * s) + alg.G * s / 2 := by - field_simp; ring + field_simp /- @@ -411,7 +454,7 @@ lemma subgradient_method_diminishing_step_size (hm : IsMinOn f univ xm) calc ‖x₀ - xm‖ ^ 2 < Finset.sum (Finset.range (b + 1)) alg.a * ε := by apply h₂ _ = 2 * Finset.sum (Finset.range (b + 1)) alg.a * (ε / 2) := by - field_simp;ring + field_simp have ha₂ : ∃ a₂, ∀ (b : ℕ), a₂ ≤ b → alg.G ^ 2 * (Finset.range (b + 1)).sum (fun i => (alg.a i) ^ 2) / (2 * (Finset.range (b + 1)).sum alg.a) < ε / 2 := by by_cases hG : ↑alg.G = 0 @@ -486,18 +529,22 @@ lemma subgradient_method_diminishing_step_size (hm : IsMinOn f univ xm) (2 * Finset.sum (Finset.range (b + 1)) alg.a) + (↑alg.G ^ 2 * Finset.sum (Finset.range (b - a₂)) fun x => alg.a (a₂ + 1 + x) ^ 2) / (2 * Finset.sum (Finset.range (b + 1)) alg.a) := by - field_simp; rw[← mul_add]; simp - left + field_simp obtain heq := Finset.sum_range_add (fun i => alg.a i ^ 2) (a₂ + 1) (b - a₂) - have h₃' : (b + 1) = a₂ + 1 + (b - a₂) := by - rw[(Nat.add_comm a₂ 1), Nat.add_assoc, (Nat.add_sub_cancel' hba₂), Nat.add_comm] - rw[h₃']; apply heq + have h₃' : a₂ + 1 + (b - a₂) = b + 1 := by + have hb' : a₂ + (b - a₂) = b := Nat.add_sub_cancel' hba₂ + simp [ Nat.add_comm, Nat.add_left_comm]; grind + simpa [h₃'.symm] using heq _ < ε / 4 + ε / 4 := by apply add_lt_add · rcases hasA b hba₁ with h₃; simp [s₁] at h₃ obtain h₃₁ := (div_lt_iff₀ εpos).mp h₃ obtain h₃₂ := (div_lt_iff₀' hpos'').mpr h₃₁ - obtain h₃₃ := (div_lt_div_right zero_lt_four).mpr h₃₂ + have h₃₃ : + (2 * ↑alg.G ^ 2 * Finset.sum (Finset.range (a₂ + 1)) (fun x => alg.a x ^ 2)) / + Finset.sum (Finset.range (b + 1)) alg.a / 4 < ε / 4 := by + have hmul := (mul_lt_mul_of_pos_right h₃₂ (by norm_num : (0 : ℝ) < (1 / 4))) + simpa [div_eq_mul_inv, mul_comm, mul_left_comm, mul_assoc] using hmul calc _ = (2 * ↑alg.G ^ 2 * Finset.sum (Finset.range (a₂ + 1)) fun x => alg.a x ^ 2) / Finset.sum (Finset.range (b + 1)) alg.a / 4 := by field_simp;ring @@ -506,9 +553,9 @@ lemma subgradient_method_diminishing_step_size (hm : IsMinOn f univ xm) calc _ ≤ ↑alg.G ^ 2 * Finset.sum (Finset.range (b - a₂)) (fun x => (ε / (2 * ↑alg.G ^ 2)) * alg.a (a₂ + 1 + x)) := by - apply (mul_le_mul_left hpos').mpr; apply Finset.sum_le_sum; intro i _ + apply (mul_le_mul_iff_right₀ hpos').mpr; apply Finset.sum_le_sum; intro i _ have hposi : alg.a (a₂ + 1 + i) > 0 := by apply (alg.ha (a₂ + 1 + i)) - rw [pow_two]; apply (mul_le_mul_right hposi).mpr + rw [pow_two]; apply (mul_le_mul_iff_left₀ hposi).mpr have : a₂ + 1 + i ≥ a₂ := by rw[Nat.add_assoc]; apply Nat.le_add_right rcases ha₂ (a₂ + 1 + i) this with hai₂ @@ -574,13 +621,13 @@ lemma subgradient_method_diminishing_step_size (hm : IsMinOn f univ xm) _ ≤ (‖x₀ - xm‖ ^ 2 + ↑alg.G ^ 2 * Finset.sum (Finset.range (b + 1)) fun i => alg.a i ^ 2) / (2 * Finset.sum (Finset.range (b + 1)) alg.a) := by apply (le_div_iff₀' hpos).mpr; simp at hb₁; apply hb₁ - _ = ‖x₀ - xm‖ ^ 2 / (2 * Finset.sum (Finset.range (b + 1)) alg.a) + (↑alg.G ^ 2 * - Finset.sum (Finset.range (b + 1)) fun i => alg.a i ^ 2) / - (2 * Finset.sum (Finset.range (b + 1)) alg.a) := by - simp [div_add_div_same] + _ = ‖x₀ - xm‖ ^ 2 / (2 * Finset.sum (Finset.range (b + 1)) alg.a) + + (↑alg.G ^ 2 * Finset.sum (Finset.range (b + 1)) fun i => alg.a i ^ 2) / + (2 * Finset.sum (Finset.range (b + 1)) alg.a) := by + field_simp _ < ε / 2 + ε / 2 := by - apply add_lt_add; apply hba₁'; apply hba₂' - _ = ε := by field_simp + apply add_lt_add; exact hba₁'; exact hba₂' + _ = ε := by field_simp; ring obtain h₁' := Filter.Tendsto.add_const (f xm) h₁ simp at h₁'; simp; apply h₁' diff --git a/Optlib/Convex/BanachSubgradient.lean b/Optlib/Convex/BanachSubgradient.lean index 3077448..0f20ed9 100644 --- a/Optlib/Convex/BanachSubgradient.lean +++ b/Optlib/Convex/BanachSubgradient.lean @@ -3,10 +3,15 @@ Copyright (c) 2023 Wanyi He. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. Author: Wanyi He, Chenyi Li, Zichen Wang -/ -import Mathlib.Analysis.NormedSpace.HahnBanach.Separation -import Mathlib.LinearAlgebra.Dual - +import Mathlib.Algebra.Order.Ring.Star +import Mathlib.Analysis.InnerProductSpace.Basic +import Mathlib.Analysis.Normed.Operator.Bilinear +import Mathlib.Analysis.Normed.Order.Lattice +import Mathlib.Analysis.NormedSpace.HahnBanach.Separation +import Mathlib.Data.Real.StarOrdered +import Mathlib.GroupTheory.MonoidLocalization.Basic +import Mathlib.LinearAlgebra.Dual.Lemmas section variable {E : Type*} [SeminormedAddCommGroup E] @@ -23,7 +28,7 @@ lemma EpigraphInterior_existence (hc : ContinuousOn f (interior s)) (hx : x ∈ have h1 : IsOpen t := IsOpen.preimage continuous_fst isOpen_interior have h2: ContinuousOn (fun p : (E × ℝ) => f p.fst) t := ContinuousOn.comp hc continuousOn_fst (fun ⦃x⦄ a => a) - apply ContinuousOn.isOpen_inter_preimage (h2.prod continuousOn_snd) h1 isOpen_lt_prod + apply ContinuousOn.isOpen_inter_preimage (h2.prodMk continuousOn_snd) h1 isOpen_lt_prod have h' : {p : E × ℝ| p.1 ∈ interior s ∧ f p.1 < p.2} ⊆ {p | p.1 ∈ s ∧ f p.1 ≤ p.2} := fun p ⟨hp1, hp2⟩ => ⟨interior_subset hp1, le_of_lt hp2⟩ apply interior_mono h' @@ -60,7 +65,7 @@ lemma Continuous_epi_open {f₁ : E → ℝ} (hcon : ContinuousOn f₁ univ) : have : {(x, y) : E × ℝ | y > f₁ x} = {(x, y) : E × ℝ | x ∈ univ ∧ y > f₁ x} := by ext z; simp rw [this] - apply ContinuousOn.isOpen_inter_preimage (h2.prod continuousOn_snd) h1 isOpen_lt_prod + apply ContinuousOn.isOpen_inter_preimage (h2.prodMk continuousOn_snd) h1 isOpen_lt_prod end noncomputable section @@ -88,6 +93,8 @@ def Banach_SubderivWithinAt (f : E → ℝ) (s : Set E) (x : E) : Set (E →L[ def Epi (f : E → ℝ) (s : Set E) : Set (E × ℝ) := {p : E × ℝ | p.1 ∈ s ∧ f p.1 ≤ p.2} +set_option maxHeartbeats 0 + in theorem Banach_SubderivWithinAt.Nonempty (hf : ConvexOn ℝ s f) (hc : ContinuousOn f (interior s)) (hx : x ∈ interior s) : Set.Nonempty (Banach_SubderivWithinAt f s x) := by @@ -127,38 +134,45 @@ theorem Banach_SubderivWithinAt.Nonempty (hf : ConvexOn ℝ s f) have hgu' : g.1 x + g.2 (f x) < g.1 a.1 + g.2 a.2 := by obtain hg1 := hg a; obtain hg2 := hg (x , f x) rw[← hg1 , ← hg2]; apply hφ a ha - simp only [hu, hu] at hgu'; exact hgu' + simp only [hu] at hgu'; exact hgu' have hu0 : u > 0 := by specialize hgu (x, f x + 1) (EpigraphInterior_existence hc hx (f x + 1) (lt_add_one (f x))) dsimp at hgu; linarith let h := - (1 / u) • g.1 - have : ∀ (x : E), ‖h x‖ ≤ ((1 / u) * ‖φ‖) * ‖x‖ := by - intro x; field_simp [h]; simp only [abs_of_pos hu0] - apply div_le_div_of_nonneg_right _ (by linarith) + have hbound : ∀ (x : E), ‖h x‖ ≤ ((1 / u) * ‖φ‖) * ‖x‖ := by + intro x + have hpos : 0 ≤ (1 / u) := by + have : 0 < 1 / u := by exact one_div_pos.mpr hu0 + exact this.le calc - |φ (x, 0)| = ‖φ (x, 0)‖ := rfl - _ ≤ ‖φ‖ * ‖(x , (0 : ℝ))‖ := ContinuousLinearMap.le_opNorm φ (x, 0) - _ = ‖φ‖ * ‖x‖ := by - simp only [Prod.norm_def, norm_zero, max_eq_left (norm_nonneg x)] + ‖h x‖ + = ‖-(1 / u)‖ * ‖g.1 x‖ := by + simp [h] + _ = (1 / u) * ‖g.1 x‖ := by + simp [Real.norm_eq_abs]; grind only + _ = (1 / u) * ‖φ (x, 0)‖ := by + have hx0 : φ (x, 0) = g.1 x := by + simpa [hu] using hg (x, 0) + simp [hx0] + _ ≤ (1 / u) * (‖φ‖ * ‖(x, (0 : ℝ))‖) := by + have := ContinuousLinearMap.le_opNorm φ (x, 0) + exact mul_le_mul_of_nonneg_left this hpos + _ = (1 / u) * ‖φ‖ * ‖x‖ := by + simp [mul_left_comm, mul_comm, Prod.norm_def, norm_zero] have hh : ∃ (C : ℝ), ∀ (x : E), ‖h x‖ ≤ C * ‖x‖ := by use ((1 / u) * ‖φ‖) let h' := (LinearMap.mkContinuousOfExistsBound h hh) have key1 : ∀ a ∈ interior (Epi f s) , h' (a.1 - x) + f x < a.2 := by - dsimp [h']; intro a ha - specialize hgu a ha; dsimp [g] at hgu - have uneq : u ≠ 0 := by linarith - rw [← mul_lt_mul_iff_of_pos_left hu0]; field_simp - have eq1 : u * (-φ (a.1 - x, 0) + f x * u) / u = u * f x - φ (a.1 - x, 0) := by - field_simp; ring_nf - have eq2 : φ (x, 0) - φ (a.1, 0) = -φ (a.1 - x, 0) := by - have : φ (x, 0) - φ (a.1, 0) = φ ((x, 0) - (a.1, 0)) := by - simp only [φ.map_sub] - simp only [this, Prod.mk_sub_mk, sub_zero] - have : (-(1 : ℝ)) • (a.1 - x, (0 : ℝ)) = (x - a.1, 0) := by simp - rw [← this, ContinuousLinearMap.map_smulₛₗ]; simp - field_simp [h, g, eq1, eq2, hgu] - rw [div_lt_iff₀ (by positivity)]; rw [← mul_lt_mul_iff_of_pos_left hu0] at hgu - linarith + intro a ha + change h (a.1 - x) + f x < a.2 + have hsub : g.1 x + u * f x - g.1 a.1 < u * a.2 := by + have := sub_lt_sub_right (hgu a ha) (g.1 a.1) + simpa [sub_eq_add_neg, add_comm, add_left_comm, add_assoc] using this + have hne : u ≠ 0 := ne_of_gt hu0 + have hmul : u * (h (a.1 - x) + f x) < u * a.2 := by + simpa [h, mul_add, sub_eq_add_neg, map_sub, add_comm, add_left_comm, add_assoc, + mul_comm, mul_left_comm, mul_assoc, hne] using hsub + exact (mul_lt_mul_iff_of_pos_left hu0).1 hmul have key2₀ : ∀ a ∈ (Epi f s), a.1 ∈ interior s → h' (a.1 - x) + f x ≤ a.2 := by intro a ha posa @@ -181,8 +195,7 @@ theorem Banach_SubderivWithinAt.Nonempty (hf : ConvexOn ℝ s f) exact tendsto_const_nhds apply le_of_tendsto_of_tendsto' cleft ?_ hxn simp only [an, hfa] - exact can2 - + rw [← hfa]; grind only [cases eager Prod] have key2₁ : ∀ a ∈ (Epi f s), a.1 ∉ interior s → h' (a.1 - x) + f x ≤ a.2 := by intro a ha _ let an : ℕ → E × ℝ := fun n => ((n : ℝ) / (n + 1)) • a + ((1 : ℝ) / (n + 1)) • (x, f x) diff --git a/Optlib/Convex/ClosedCone.lean b/Optlib/Convex/ClosedCone.lean index 263f705..5aadd3c 100644 --- a/Optlib/Convex/ClosedCone.lean +++ b/Optlib/Convex/ClosedCone.lean @@ -20,7 +20,7 @@ This file contains the following parts of closed cone. section ClosedCone -open Finset Matrix +open Finset Matrix Topology variable {n : ℕ} {s : Finset ℕ} {V : ℕ → (EuclideanSpace ℝ (Fin n))} variable {x : EuclideanSpace ℝ (Fin n)} @@ -90,13 +90,13 @@ lemma cone_eq_finite_union (s : Finset ℕ) (V : ℕ → (EuclideanSpace ℝ (Fi · intro xin let mem_x := conic_Caratheodory s V x xin rcases mem_x with ⟨τ, τsubs, xinτ, idpτ, _⟩ - simp [finite_F, F, idx_set, idx_to_cone] + simp [F, idx_set, idx_to_cone] use τ - · simp [finite_F, F, idx_set, idx_to_cone] + · simp [F, idx_set, idx_to_cone] intro τ τsubs _ xinτ apply cone_subset_of_idx_subset' s τ τsubs V xinτ · intro C Cin - simp [finite_F, F] at Cin; rcases Cin with ⟨τ, τin, Ceq⟩ + simp [F] at Cin; rcases Cin with ⟨τ, τin, Ceq⟩ use τ; constructor · rw [← Ceq] · simp [idx_set] at τin; exact τin.2 diff --git a/Optlib/Convex/ConicCaratheodory.lean b/Optlib/Convex/ConicCaratheodory.lean index 9f6bf21..9a6e7b1 100644 --- a/Optlib/Convex/ConicCaratheodory.lean +++ b/Optlib/Convex/ConicCaratheodory.lean @@ -80,7 +80,7 @@ private lemma mem_conic_erase (s : Finset ℕ) (V : ℕ → (EuclideanSpace ℝ by_cases hi : i ∈ s · linarith [αpos ⟨i, hi⟩] · simp [α, hi, β]; linarith [tin i] - · have hαj₀ : α j₀ = 0 := by field_simp [α, β] + · have hαj₀ : α j₀ = 0 := by grind only [cases eager Subtype, cases Or] rw [hαj₀, ← xdecompose]; simp [α] have aux : (Finset.sum s fun x ↦ (t x - β x / k j₀) • V x) = (Finset.sum s fun x ↦ t x • V x) - (1 / k j₀) • (Finset.sum s fun x ↦ β x • V x) := by @@ -127,7 +127,8 @@ theorem conic_Caratheodory (s : Finset ℕ) (V : ℕ → (EuclideanSpace ℝ (Fi apply Finset.erase_subset specialize τcardmin τ'subs xinerase simp [to_card] at τcardmin - absurd τcardmin; simp + absurd τcardmin; grind only [= Set.setOf_true, = subset_iff, = Set.setOf_false, + usr card_ne_zero_of_mem, = mem_erase, cases eager Subtype, cases Or] · intro σ σsubs; specialize τcardmin σ simp [idx, to_card] at τcardmin apply τcardmin σsubs diff --git a/Optlib/Convex/ConvexFunction.lean b/Optlib/Convex/ConvexFunction.lean index 378e2eb..304b1c2 100644 --- a/Optlib/Convex/ConvexFunction.lean +++ b/Optlib/Convex/ConvexFunction.lean @@ -101,7 +101,7 @@ theorem Convex_first_order_condition {s : Set E} have x1nbhd: ‖x - x'‖ ≤ δ := by rw[h1, h2] have h3: b * ‖x - y‖ ≤ b1 * ‖x - y‖:= by - rw [mul_le_mul_right] + rw [mul_le_mul_iff_left₀] apply min_le_left exact h₃ have h4: b1 * ‖x - y‖ = δ := by @@ -217,47 +217,57 @@ variable {E : Type*} [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteS variable {f : E → ℝ} {f' : E → E} {s : Set E} {x : E} theorem Convex_first_order_condition' (h : HasGradientAt f (f' x) x) (hf : ConvexOn ℝ s f) - (xs : x ∈ s) : ∀ (y : E), y ∈ s → f x + inner (f' x) (y - x) ≤ f y := by + (xs : x ∈ s) : ∀ (y : E), y ∈ s → f x + ⟪f' x, y - x⟫_ℝ ≤ f y := by show ∀ (y : E), y ∈ s → f x + (toDual ℝ E) (f' x) (y - x) ≤ f y apply Convex_first_order_condition _ hf xs apply h -theorem Convex_first_order_condition_inverse' (h : ∀ x ∈ s , HasGradientAt f (f' x) x) - (h₁ : Convex ℝ s) (h₂ : ∀ x : E, x ∈ s → ∀ y : E, y ∈ s → f x + inner (f' x) (y - x) ≤ f y) : +theorem Convex_first_order_condition_inverse' + (h : ∀ x ∈ s, HasGradientAt f (f' x) x) + (h₁ : Convex ℝ s) + (h₂ : ∀ x ∈ s, ∀ y ∈ s, f x + ⟪f' x, y - x⟫_ℝ ≤ f y) : ConvexOn ℝ s f := by apply Convex_first_order_condition_inverse - intro x; specialize h x - rw [hasGradientAt_iff_hasFDerivAt] at h - apply h; apply h₁; apply h₂ + intro x hx + have : HasFDerivAt f (toDual ℝ E (f' x)) x := by + simpa [hasGradientAt_iff_hasFDerivAt] using (h x hx) + exact this + · exact h₁ + · intro x xs y ys + simpa using h₂ x xs y ys theorem Convex_first_order_condition_iff' (h₁ : Convex ℝ s) (h : ∀ x ∈ s, HasGradientAt f (f' x) x) : - ConvexOn ℝ s f ↔ ∀ x ∈ s, ∀ y ∈ s, f x + inner (f' x) (y - x) ≤ f y := + ConvexOn ℝ s f ↔ ∀ x ∈ s, ∀ y ∈ s, f x + ⟪f' x, y - x⟫_ℝ ≤ f y := ⟨fun h₂ x xs ↦ Convex_first_order_condition' (h x xs) h₂ xs, Convex_first_order_condition_inverse' h h₁⟩ -theorem Convex_monotone_gradient' (hfun: ConvexOn ℝ s f) (h : ∀ x ∈ s, HasGradientAt f (f' x) x) : - ∀ x ∈ s, ∀ y ∈ s, inner (f' x - f' y) (x - y) ≥ (0 : ℝ) := by - let g := fun x ↦ (toDual ℝ E) (f' x) - have h' : ∀ x ∈ s, HasFDerivAt f (g x) x := h - have equiv : ∀ x y : E, inner (f' x - f' y) (x - y) = (g x - g y) (x - y) := by +theorem Convex_monotone_gradient' + (hfun : ConvexOn ℝ s f) + (h : ∀ x ∈ s, HasGradientAt f (f' x) x) : + ∀ x ∈ s, ∀ y ∈ s, ⟪f' x - f' y, x - y⟫_ℝ ≥ (0 : ℝ) := by + let g : E → (E →L[ℝ] ℝ) := fun x ↦ toDual ℝ E (f' x) + have hg : ∀ x ∈ s, HasFDerivAt f (g x) x := by + intro x hx; simpa [g, hasGradientAt_iff_hasFDerivAt] using h x hx + have equiv : ∀ x y : E, ⟪f' x - f' y, x - y⟫_ℝ = (g x - g y) (x - y) := by intro x y - rw [← InnerProductSpace.toDual_apply] - simp only [map_sub, ContinuousLinearMap.coe_sub', Pi.sub_apply, toDual_apply, g] + have hlin : toDual ℝ E (f' x - f' y) = (g x - g y) := by + simp [g] + have ht : ⟪f' x - f' y, x - y⟫_ℝ = (toDual ℝ E (f' x - f' y)) (x - y) := by rfl + simp_all only [map_sub, ContinuousLinearMap.coe_sub', Pi.sub_apply, toDual_apply, g] intro x hx y hy - rw [equiv] - exact Convex_monotone_gradient hfun h' x hx y hy + simpa [equiv] using Convex_monotone_gradient hfun hg x hx y hy theorem monotone_gradient_convex' (h₁ : Convex ℝ s) (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) - (mono: ∀ x ∈ s, ∀ y ∈ s, inner (f' x - f' y) (x - y) ≥ (0 : ℝ)) : ConvexOn ℝ s f := by + (mono: ∀ x ∈ s, ∀ y ∈ s, ⟪f' x - f' y, x - y⟫_ℝ ≥ (0 : ℝ)) : ConvexOn ℝ s f := by apply Convex_first_order_condition_inverse' hf h₁ intro x xs y ys let g := fun t : ℝ ↦ f (x + t • (y - x)) - let g' := fun t : ℝ ↦ (inner (f' (x + t • (y - x))) (y - x) : ℝ) + let g' := fun t : ℝ ↦ ⟪f' (x + t • (y - x)), y - x⟫_ℝ have h1 : ∀ r ∈ Icc 0 1, HasDerivAt g (g' r) r := by let h := fun r : ℝ ↦ (x + r • (y - x)) have : g = f ∘ h := rfl rw [this]; intro t ht - have : inner (f' (x + t • (y - x))) (y - x) = toDual ℝ E (f' (x + t • (y - x))) (y - x) := rfl + have : ⟪f' (x + t • (y - x)), y - x⟫_ℝ = toDual ℝ E (f' (x + t • (y - x))) (y - x) := rfl simp [g']; rw [this]; apply HasFDerivAt.comp_hasDerivAt · apply hasGradientAt_iff_hasFDerivAt.mp have : x + t • (y - x) ∈ s := by @@ -270,13 +280,13 @@ theorem monotone_gradient_convex' (h₁ : Convex ℝ s) (hf : ∀ x ∈ s, HasGr rw [one_smul] at this; exact HasDerivAt.const_add x this have e1 : f x = g 0 := by simp [g] have e2 : f y = g 1 := by simp [g] - have e3 : inner (f' x) (y - x) = g' 0 := by simp [g'] + have e3 : ⟪f' x, y - x⟫_ℝ = g' 0 := by simp [g'] rw [e1, e2, e3] have mono' : ∀ t ∈ Ioo 0 1, g' t ≥ g' 0 := by intro t ht; simp [g']; rw [← sub_nonneg, ← inner_sub_left] rcases ht with ⟨ht1, ht2⟩ - have hh: inner (f' (x + t • (y - x)) - f' x) (x + t • (y - x) - x) ≥ (0 : ℝ) := by + have hh: ⟪f' (x + t • (y - x)) - f' x, x + t • (y - x) - x⟫_ℝ ≥ (0 : ℝ) := by apply mono (x + t • (y - x)) _ x xs have e4 : x + t • (y - x) = (1 - t) • x + t • y := by rw [smul_sub, add_sub, sub_smul, one_smul, add_sub_right_comm] @@ -300,7 +310,7 @@ theorem monotone_gradient_convex' (h₁ : Convex ℝ s) (hf : ∀ x ∈ s, HasGr simp; constructor; linarith; linarith theorem monotone_gradient_iff_convex' (h₁ : Convex ℝ s) (hf : ∀ x ∈ s, HasGradientAt f (f' x) x): - ConvexOn ℝ s f ↔ ∀ x ∈ s, ∀ y ∈ s, inner (f' x - f' y) (x - y) ≥ (0 : ℝ) := + ConvexOn ℝ s f ↔ ∀ x ∈ s, ∀ y ∈ s, ⟪f' x - f' y, x - y⟫_ℝ ≥ (0 : ℝ) := ⟨fun h ↦ Convex_monotone_gradient' h hf, fun h ↦ monotone_gradient_convex' h₁ hf h⟩ theorem monotone_gradient_convex {f' : E → (E →L[ℝ] ℝ)} (h₁ : Convex ℝ s) @@ -310,16 +320,17 @@ theorem monotone_gradient_convex {f' : E → (E →L[ℝ] ℝ)} (h₁ : Convex have h' : ∀ x ∈ s, HasGradientAt f (g x) x := by intro x' hx' exact HasFDerivAt.hasGradientAt (hf x' hx') - have equiv : ∀ x y : E, inner (g x - g y) (x - y) = (f' x - f' y) (x - y) := by + have equiv : ∀ x y : E, ⟪g x - g y, x - y⟫_ℝ = (f' x - f' y) (x - y) := by intro x y - rw [← InnerProductSpace.toDual_apply]; simp [g] - have mono' : ∀ x ∈ s, ∀ y ∈ s, inner (g x - g y) (x - y) ≥ (0 : ℝ) := by + change (toDual ℝ E (g x - g y)) (x - y) = (f' x - f' y) (x - y) + simp [g, map_sub, ContinuousLinearMap.coe_sub', Pi.sub_apply] + have mono' : ∀ x ∈ s, ∀ y ∈ s, ⟪g x - g y, x - y⟫_ℝ ≥ (0 : ℝ) := by intro x hx y hy specialize mono x hx y hy rw [equiv]; exact mono exact monotone_gradient_convex' h₁ h' mono' -theorem montone_gradient_iff_convex {f' : E → (E →L[ℝ] ℝ)} +theorem monotone_gradient_iff_convex {f' : E → (E →L[ℝ] ℝ)} (h₁ : Convex ℝ s) (hf : ∀ x ∈ s, HasFDerivAt f (f' x) x): ConvexOn ℝ s f ↔ ∀ x ∈ s, ∀ y ∈ s, (f' x - f' y) (x - y) ≥ (0 : ℝ) := ⟨fun h ↦ Convex_monotone_gradient h hf, fun h ↦ monotone_gradient_convex h₁ hf h⟩ @@ -339,14 +350,14 @@ variable {f : E → ℝ} {f' : E → E} {s : Set E} theorem monotone_gradient_strict_convex (hs : Convex ℝ s) (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) - (mono: ∀ x ∈ s, ∀ y ∈ s, x ≠ y → inner (f' x - f' y) (x - y) > (0 : ℝ)) : + (mono: ∀ x ∈ s, ∀ y ∈ s, x ≠ y → ⟪f' x - f' y, x - y⟫_ℝ > (0 : ℝ)) : StrictConvexOn ℝ s f := by rw [StrictConvexOn]; use hs intro x xin y yin xney a b apos bpos absum1 by_contra h₀; push_neg at h₀ have anneg : 0 ≤ a := by linarith have bnneg : 0 ≤ b := by linarith - have mono' : ∀ x ∈ s, ∀ y ∈ s, inner (f' x - f' y) (x - y) ≥ (0 : ℝ) := by + have mono' : ∀ x ∈ s, ∀ y ∈ s, ⟪f' x - f' y, x - y⟫_ℝ ≥ (0 : ℝ) := by intro x xin y yin by_cases h : x = y · rw [h]; simp @@ -363,14 +374,14 @@ theorem monotone_gradient_strict_convex (hs : Convex ℝ s) have : a = 1 - b := by linarith rw [this, sub_smul, add_comm_sub, ← smul_sub]; simp apply Convex.add_smul_sub_mem hs xin yin; simp; use bnneg; linarith - have eq1 : ∃ c : ℝ, c ∈ Set.Ioo 0 1 ∧ inner (f' (x + c • (z - x))) (z - x) = f z - f x := by + have eq1 : ∃ c : ℝ, c ∈ Set.Ioo 0 1 ∧ ⟪f' (x + c • (z - x)), z - x⟫_ℝ = f z - f x := by apply lagrange hs hf x xin z zin - have eq2 : ∃ c : ℝ, c ∈ Set.Ioo 0 1 ∧ inner (f' (z + c • (y - z))) (y - z) = f y - f z := by + have eq2' : ∃ c : ℝ, c ∈ Set.Ioo 0 1 ∧ ⟪f' (z + c • (y - z)), y - z⟫_ℝ = f y - f z := by apply lagrange hs hf z zin y yin rcases eq1 with ⟨c, cin, e1⟩ - rcases eq2 with ⟨d, din, e2⟩ - have eq3 : b * inner (f' (z + d • (y - z))) (y - z) - - a * inner (f' (x + c • (z - x))) (z - x) = 0 := by + rcases eq2' with ⟨d, din, e2⟩ + have eq3 : b * ⟪f' (z + d • (y - z)), y - z⟫_ℝ - + a * ⟪f' (x + c • (z - x)), z - x⟫_ℝ = 0 := by rw [e1, e2]; simp [z]; ring_nf; rw [add_comm, ← add_assoc] simp at eq2; rw [← eq2]; nth_rw 1 [← mul_one (f (a • x + b • y))]; rw [← absum1]; ring_nf rw [← inner_smul_right, ← inner_smul_right] at eq3 @@ -379,14 +390,14 @@ theorem monotone_gradient_strict_convex (hs : Convex ℝ s) have this2 : a • (z - x) = a • b • (y - x) := by simp [z]; nth_rw 2 [← one_smul ℝ x]; rw [← absum1, add_smul]; simp; rw [← smul_sub, smul_comm] rw [this1, this2, ← inner_sub_left, inner_smul_right, inner_smul_right, ← mul_assoc] at eq3 - have eq0 : inner (f' (z + d • (y - z)) - f' (x + c • (z - x))) (y - x) = (0 : ℝ) := by + have eq0 : ⟪f' (z + d • (y - z)) - f' (x + c • (z - x)), y - x⟫_ℝ = (0 : ℝ) := by contrapose! eq3 rw [mul_ne_zero_iff] constructor · rw [mul_ne_zero_iff]; constructor <;> linarith · exact eq3 have zeq : z = x + b • (y - x) := by - nth_rw 1 [← one_smul ℝ x]; rw [← absum1, add_smul, smul_sub]; simp + nth_rw 1 [← one_smul ℝ x]; rw [← absum1, add_smul, smul_sub]; simp; rfl let u : E := z + d • (y - z) let v : E := x + c • (z - x) have ueq : u = x + (b + d) • (y - x) - d • b • (y - x) := by @@ -400,8 +411,8 @@ theorem monotone_gradient_strict_convex (hs : Convex ℝ s) have usubv : u - v = (b + d - d * b - c * b) • (y - x) := by rw [ueq, veq, ← smul_assoc, ← smul_assoc, ← sub_sub]; simp rw [← add_sub, ← sub_smul (b + d) (d * b)]; simp; rw [← sub_smul] - have eeq0 : inner (f' u - f' v) (u - v) = (0 : ℝ) := by - show inner (f' (z + d • (y - z)) - f' (x + c • (z - x))) (u - v) = (0 : ℝ) + have eeq0 : ⟪f' u - f' v, u - v⟫_ℝ = (0 : ℝ) := by + show ⟪f' (z + d • (y - z)) - f' (x + c • (z - x)), u - v⟫_ℝ = (0 : ℝ) rw [usubv, inner_smul_right, eq0]; simp have coefne0 : b + d - d * b - c * b > 0 := by nth_rw 1 [← mul_one d]; rw [← absum1]; simp; ring_nf @@ -412,7 +423,7 @@ theorem monotone_gradient_strict_convex (hs : Convex ℝ s) _ < b + d * a := by have : 0 < d * a := by apply mul_pos dpos apos linarith - have neq0 : inner (f' u - f' v) (u - v) > (0 : ℝ) := by + have neq0 : ⟪f' u - f' v, u - v⟫_ℝ > (0 : ℝ) := by have uin : u ∈ s := by show z + d • (y - z) ∈ s apply Convex.add_smul_sub_mem hs zin yin; simp; simp at din @@ -433,24 +444,24 @@ theorem monotone_gradient_strict_convex (hs : Convex ℝ s) theorem strict_convex_monotone_gradient (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) (h₁ : StrictConvexOn ℝ s f ) : - ∀ x ∈ s, ∀ y ∈ s, x ≠ y → inner (f' x - f' y) (x - y) > (0 : ℝ) := by + ∀ x ∈ s, ∀ y ∈ s, x ≠ y → ⟪f' x - f' y, x - y⟫_ℝ > (0 : ℝ) := by intro x xin y yin xney have convf : ConvexOn ℝ s f := by apply StrictConvexOn.convexOn h₁ rw [StrictConvexOn] at h₁ rcases h₁ with ⟨hs, fsconv⟩ - have : inner (f' x - f' y) (x - y) ≥ (0 : ℝ) := by + have : ⟪f' x - f' y, x - y⟫_ℝ ≥ (0 : ℝ) := by apply Convex_monotone_gradient' convf hf x xin y yin by_contra h0; push_neg at h0 - have eq : inner (f' x - f' y) (x - y) = (0 : ℝ) := by linarith - have eq1 : f x + inner (f' x) (y - x) ≤ f y := by + have eq : ⟪f' x - f' y, x - y⟫_ℝ = (0 : ℝ) := by linarith + have eq1 : f x + ⟪f' x, y - x⟫_ℝ ≤ f y := by apply Convex_first_order_condition' (hf x xin) convf xin y yin - have eq2 : f y + inner (f' y) (x - y) ≤ f x := by + have eq2 : f y + ⟪f' y, x - y⟫_ℝ ≤ f x := by apply Convex_first_order_condition' (hf y yin) convf yin x xin - have eq2' : f y ≤ f x + inner (f' x) (y - x) := by - rw [← add_zero (inner (f' x) (y - x)), ← eq, inner_sub_left, add_sub, ← inner_add_right] + have eq2' : f y ≤ f x + ⟪f' x, y - x⟫_ℝ := by + rw [← add_zero (⟪f' x, y - x⟫_ℝ), ← eq, inner_sub_left, add_sub, ← inner_add_right] simp; apply eq2 - have eq3 : f y - f x = inner (f' x) (y - x) := by linarith - have extc : ∃ c : ℝ, c ∈ Set.Ioo 0 1 ∧ inner (f' (x + c • (y - x))) (y - x) = f y - f x := by + have eq3 : f y - f x = ⟪f' x, y - x⟫_ℝ := by linarith + have extc : ∃ c : ℝ, c ∈ Set.Ioo 0 1 ∧ ⟪f' (x + c • (y - x)), y - x⟫_ℝ = f y - f x := by apply lagrange hs hf x xin y yin rcases extc with ⟨c, cin, e1⟩ let z : E := x + c • (y - x) @@ -458,18 +469,18 @@ theorem strict_convex_monotone_gradient (hf : ∀ x ∈ s, HasGradientAt f (f' x apply Convex.add_smul_sub_mem hs xin yin; simp; simp at cin; rcases cin with ⟨cpos, cl1⟩ constructor <;> linarith simp at cin; rcases cin with ⟨cpos, cl1⟩ - have eq0 : inner (f' z - f' x) (z - x) = (0 : ℝ) := by + have eq0 : ⟪f' z - f' x, z - x⟫_ℝ = (0 : ℝ) := by simp [z]; rw [inner_smul_right, inner_sub_left, ← eq3, e1]; simp - have eq4 : f x + inner (f' x) (z - x) ≤ f z := by + have eq4 : f x + ⟪f' x, z - x⟫_ℝ ≤ f z := by apply Convex_first_order_condition' (hf x xin) convf xin z zin - have eq5 : f z + inner (f' z) (x - z) ≤ f x := by + have eq5 : f z + ⟪f' z, x - z⟫_ℝ ≤ f x := by apply Convex_first_order_condition' (hf z zin) convf zin x xin - have eq5' : f z ≤ f x + inner (f' x) (z - x) := by - rw [← add_zero (inner (f' x) (z - x)), ← eq0, inner_sub_left] - rw [add_sub, add_comm (inner (f' x) (z - x))] + have eq5' : f z ≤ f x + ⟪f' x, z - x⟫_ℝ := by + rw [← add_zero (⟪f' x, z - x⟫_ℝ), ← eq0, inner_sub_left] + rw [add_sub, add_comm (⟪f' x, z - x⟫_ℝ)] rw [← add_sub, ← inner_sub_right, sub_self, inner_zero_right, add_zero] rw [← sub_neg_eq_add, ← inner_neg_right, neg_sub]; linarith - have eq6 : f z = inner (f' x) (z - x) + f x := by linarith + have eq6 : f z = ⟪f' x, z - x⟫_ℝ + f x := by linarith have f1 : f z = (1 - c) • f x + c • f y := by rw [eq6]; simp [z]; rw [inner_smul_right, ← eq3]; ring_nf have f2 : f z < (1 - c) • f x + c • f y := by @@ -485,7 +496,7 @@ theorem strict_convex_monotone_gradient (hf : ∀ x ∈ s, HasGradientAt f (f' x theorem strict_convex_iff_monotone_gradient (hs: Convex ℝ s) (h : ∀ x ∈ s, HasGradientAt f (f' x) x) : - (∀ x ∈ s, ∀ y ∈ s, x ≠ y → inner (f' x - f' y) (x - y) > (0 : ℝ)) + (∀ x ∈ s, ∀ y ∈ s, x ≠ y → ⟪f' x - f' y, x - y⟫_ℝ > (0 : ℝ)) ↔ StrictConvexOn ℝ s f := by constructor exact monotone_gradient_strict_convex hs h diff --git a/Optlib/Convex/Farkas.lean b/Optlib/Convex/Farkas.lean index 38db9b1..30785d0 100644 --- a/Optlib/Convex/Farkas.lean +++ b/Optlib/Convex/Farkas.lean @@ -3,13 +3,11 @@ Copyright (c) 2024 Shengyang Xu, Chenyi Li. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. Authors: Shengyang Xu, Chenyi Li -/ -import Mathlib.Analysis.Convex.Cone.Basic -import Mathlib.Analysis.Calculus.LocalExtr.Basic +import Mathlib.Algebra.Order.Ring.Star +import Mathlib.Analysis.InnerProductSpace.Dual import Mathlib.Analysis.NormedSpace.HahnBanach.Separation -import Mathlib.Analysis.InnerProductSpace.PiL2 -import Mathlib.Data.Matrix.Rank -import Mathlib.LinearAlgebra.FiniteDimensional -import Optlib.Differential.Calculation +import Mathlib.Data.Int.Star +import Mathlib.Data.Real.StarOrdered import Optlib.Convex.ClosedCone /-! @@ -55,9 +53,9 @@ lemma polyhedra_iff_cone {σ : Finset ℕ} : ∀ (b : ℕ → EuclideanSpace ℝ simp [ht]; specialize cpos i ht; exact cpos; simp [ht] rw [h] let f : ℕ → EuclideanSpace ℝ (Fin n) := fun i ↦ (c1 i) • (b i) - have htt : ∑ x in σ.attach, f x = Finset.sum (attach σ) fun x => (c1 x • b x) := by simp [f] + have htt : ∑ x ∈ σ.attach, f x = Finset.sum (attach σ) fun x => (c1 x • b x) := by simp [f] have h1 : ∀ i : σ, c1 i • b i = c i • b i := by intro i; simp [c1] - have ht : ∑ x in σ.attach, f x = Finset.sum (attach σ) fun x => (c x • b x) := by + have ht : ∑ x ∈ σ.attach, f x = Finset.sum (attach σ) fun x => (c x • b x) := by rw [← htt]; apply Finset.sum_congr; simp intro i _; simp [f, c1] nth_rw 1 [Finset.sum_attach] at htt @@ -67,7 +65,7 @@ lemma polyhedra_iff_cone {σ : Finset ℕ} : ∀ (b : ℕ → EuclideanSpace ℝ use c1; constructor · intro i _; exact cpos i let f : ℕ → EuclideanSpace ℝ (Fin n) := fun i ↦ (c i) • (b i) - have : ∑ x in σ.attach, f x = Finset.sum (attach σ) fun x => (c x • b x) := by simp [f] + have : ∑ x ∈ σ.attach, f x = Finset.sum (attach σ) fun x => (c x • b x) := by simp [f] rw [← h]; simp [c1]; rw [← this, Finset.sum_attach] private lemma leq_tendsto_zero {a x : ℝ} (ha : a < 0) (h : ∀ t > 0, t * x > a) : 0 ≤ x := by @@ -162,12 +160,27 @@ lemma general_polyhedra_is_polyhedra_empty (τ σ : Finset ℕ) (he : ¬(τ ∪ ∃ μ c, {z | ∃ (lam : τ → ℝ), ∃ (mu : σ → ℝ), (∀ i, 0 ≤ mu i) ∧ z = Finset.sum univ (fun i ↦ lam i • a i) + Finset.sum univ (fun i ↦ mu i • b i)} = cone μ c := by - simp at he; rw [Finset.union_eq_empty] at he - intro a b; simp [he] + simp at he; rw [← Finset.union_eq_empty] at he + intro a b; simp use ∅; use (fun _ => 0) simp [cone, quadrant]; ext x; simp; constructor - · intro x0; simp [x0]; use (fun _ => 0); simp - · intro cond; simp [cond.2] + · intro a_1 + simp_all only [union_eq_empty, notMem_empty, IsEmpty.forall_iff, implies_true, true_and] + obtain ⟨left, right⟩ := he + obtain ⟨w, h⟩ := a_1 + obtain ⟨w_1, h⟩ := h + subst h right left + simp_all only [attach_empty, sum_empty, add_zero, and_true] + apply Exists.intro + · intro i + rfl + · intro a_1 + simp_all only [union_eq_empty, notMem_empty, IsEmpty.forall_iff, implies_true, true_and] + obtain ⟨left, right⟩ := he + obtain ⟨left_1, right_1⟩ := a_1 + obtain ⟨w, h⟩ := left_1 + subst right right_1 left + simp_all only [attach_empty, sum_empty, add_zero, exists_const] lemma general_polyhedra_is_polyhedra_ne (τ σ : Finset ℕ) (he : (τ ∪ σ).Nonempty) : ∀ (a : ℕ → EuclideanSpace ℝ (Fin n)), ∀ (b : ℕ → EuclideanSpace ℝ (Fin n)), @@ -180,11 +193,11 @@ lemma general_polyhedra_is_polyhedra_ne (τ σ : Finset ℕ) (he : (τ ∪ σ).N let τ2 := Finset.image (fun x => x + 2 * m) τ let μ := σ ∪ τ1 ∪ τ2 have mt1emp : σ ∩ τ1 = ∅ := by - simp only [τ1]; apply s_inter_t1_empty he; simp + simp only [τ1]; apply s_inter_t1_empty he; simp; rfl have mt2emp : σ ∩ τ2 = ∅ := by - simp only [τ2]; apply s_inter_t2_empty he; simp + simp only [τ2]; apply s_inter_t2_empty he; simp; rfl have t1t2emp : τ1 ∩ τ2 = ∅ := by - simp only [τ1, τ2]; apply t1_inter_t2_empty he; simp + simp only [τ1, τ2]; apply t1_inter_t2_empty he; simp; rfl have disj_st : Disjoint σ (τ1 ∪ τ2) := by rw [Finset.disjoint_iff_inter_eq_empty, Finset.inter_union_distrib_left]; simp [mt1emp, mt2emp] have disj_tt : Disjoint τ1 τ2 := by @@ -219,10 +232,10 @@ lemma general_polyhedra_is_polyhedra_ne (τ σ : Finset ℕ) (he : (τ ∪ σ).N simp [hs, ht1, ht2] use w; use wnneg rw [xeq, tau_decpn] - have eq1 : ∑ x : { x // x ∈ σ }, mu x • b x = ∑ x in σ, (fun y => w y • c y) x := by + have eq1 : ∑ x : { x // x ∈ σ }, mu x • b x = ∑ x ∈ σ, (fun y => w y • c y) x := by nth_rw 2 [← Finset.sum_attach]; simp; congr ext x j; simp [w, c, cσ] - have eq2 : ∑ x : τ, (fun y => lamp y • cτ1 y) x = ∑ x in τ1, (fun y => w y • c y) x := by + have eq2 : ∑ x : τ, (fun y => lamp y • cτ1 y) x = ∑ x ∈ τ1, (fun y => w y • c y) x := by rw [shift_sum τ m (fun y => lamp y • cτ1 y)] nth_rw 2 [← Finset.sum_attach]; simp; congr ext x j @@ -230,7 +243,7 @@ lemma general_polyhedra_is_polyhedra_ne (τ σ : Finset ℕ) (he : (τ ∪ σ).N contrapose mt1emp; simp at mt1emp; push_neg; rw [← Finset.nonempty_iff_ne_empty] use x; simp [τ1, mt1emp, x.2] simp [w, c, hns] - have eq3 : ∑ x : τ, (fun y => lamn y • cτ2 y) x = ∑ x in τ2, (fun y => w y • c y) x := by + have eq3 : ∑ x : τ, (fun y => lamn y • cτ2 y) x = ∑ x ∈ τ2, (fun y => w y • c y) x := by rw [shift_sum τ (2 * m) (fun y => lamn y • cτ2 y)] nth_rw 2 [← Finset.sum_attach]; simp; congr ext x j @@ -250,10 +263,10 @@ lemma general_polyhedra_is_polyhedra_ne (τ σ : Finset ℕ) (he : (τ ∪ σ).N let lamn : ℕ → ℝ := fun i => if i ∈ τ then w (i + 2 * m) else 0 let lam : τ → ℝ := fun i => lamp i.1 - lamn i.1 let mu : ℕ → ℝ := fun i => if i ∈ σ then w i else 0 - have eq1 : ∑ x : { x // x ∈ σ }, mu x • b x = ∑ x in σ, (fun y => w y • c y) x := by + have eq1 : ∑ x : { x // x ∈ σ }, mu x • b x = ∑ x ∈ σ, (fun y => w y • c y) x := by nth_rw 2 [← Finset.sum_attach]; simp; congr ext x j; simp [mu, c, cσ] - have eq2 : ∑ x : τ, (fun y => lamp y • cτ1 y) x = ∑ x in τ1, (fun y => w y • c y) x := by + have eq2 : ∑ x : τ, (fun y => lamp y • cτ1 y) x = ∑ x ∈ τ1, (fun y => w y • c y) x := by rw [shift_sum τ m (fun y => lamp y • cτ1 y)] nth_rw 2 [← Finset.sum_attach]; simp; congr ext x j @@ -262,8 +275,8 @@ lemma general_polyhedra_is_polyhedra_ne (τ σ : Finset ℕ) (he : (τ ∪ σ).N use x; simp [τ1, mt1emp, x.2] rcases exist_of_mem_shift x.2 with ⟨a, eq⟩ have hin : x.1 - m ∈ τ := by rw [eq]; simp - simp [mu, lamp, c, hns, hin]; rw [eq]; simp - have eq3 : ∑ x : τ, (fun y => lamn y • cτ2 y) x = ∑ x in τ2, (fun y => w y • c y) x := by + simp [lamp, c, hns, hin]; rw [eq]; simp + have eq3 : ∑ x : τ, (fun y => lamn y • cτ2 y) x = ∑ x ∈ τ2, (fun y => w y • c y) x := by rw [shift_sum τ (2 * m) (fun y => lamn y • cτ2 y)] nth_rw 2 [← Finset.sum_attach]; simp; congr ext x j @@ -275,7 +288,7 @@ lemma general_polyhedra_is_polyhedra_ne (τ σ : Finset ℕ) (he : (τ ∪ σ).N use x; simp [τ2, t1t2emp, x.2] rcases exist_of_mem_shift x.2 with ⟨a, eq⟩ have hin : x.1 - 2 * m ∈ τ := by rw [eq]; simp - simp [mu, lamn, c, hns, hnt, hin]; rw [eq]; simp + simp [lamn, c, hns, hnt, hin]; rw [eq]; simp rw [← eq1, ← eq2, ← eq3] at xeq; simp at xeq simp; use lam; use (fun i => mu i); constructor · intro a ain; simp [mu, ain]; linarith [wnneg a] @@ -301,29 +314,48 @@ lemma general_polyhedra_is_closed : IsClosed {z | ∃ (lam : τ → ℝ), ∃ (m theorem Farkas : (∃ (lam : τ → ℝ), ∃ (mu : σ → ℝ), (∀ i, 0 ≤ mu i) ∧ c = Finset.sum univ (fun i ↦ lam i • a i) + Finset.sum univ (fun i ↦ mu i • b i)) ↔ - ¬ (∃ (z : EuclideanSpace ℝ (Fin n)), (∀ i ∈ τ, inner (a i) z = (0 : ℝ)) - ∧ (∀ i ∈ σ, inner (b i) z ≥ (0 : ℝ)) ∧ (inner c z < (0 : ℝ))) := by + ¬ (∃ (z : EuclideanSpace ℝ (Fin n)), (∀ i ∈ τ, inner (𝕜 := ℝ) (a i) z = (0 : ℝ)) + ∧ (∀ i ∈ σ, inner (𝕜 := ℝ) (b i) z ≥ (0 : ℝ)) ∧ (inner (𝕜 := ℝ) c z < (0 : ℝ))) := by constructor intro h; rcases h with ⟨lam, mu, ⟨h1, h2⟩⟩ by_contra h3 rcases h3 with ⟨z, ⟨h31, ⟨h32, h33⟩⟩⟩ - have : inner c z ≥ (0 : ℝ) := by + have : inner (𝕜 := ℝ) c z ≥ (0 : ℝ) := by + classical + have h31' : ∀ i : τ, inner (𝕜 := ℝ) (a i) z = 0 := fun i => h31 i i.2 + have h32' : ∀ i : σ, inner (𝕜 := ℝ) (b i) z ≥ 0 := fun i => h32 i i.2 calc - _ = inner (Finset.sum univ (fun i ↦ lam i • a i)) z - + inner (Finset.sum univ (fun i ↦ mu i • b i)) z := by rw [h2]; simp [inner_add_left] - _ = Finset.sum univ (fun i ↦ inner (lam i • a i) z) - + Finset.sum univ (fun i ↦ inner (mu i • b i) z) := by - rw [sum_inner, sum_inner] - _ = Finset.sum univ (fun i ↦ lam i * inner (a i) z) - + Finset.sum univ (fun i ↦ mu i * inner (b i) z) := by - congr; ext i; rw [inner_smul_left]; simp - ext i; rw [inner_smul_left]; simp - _ = Finset.sum univ (fun i ↦ mu i * inner (b i) z) := by simp [h31] + _ = inner (𝕜 := ℝ) (Finset.sum univ (fun i ↦ lam i • a i)) z + + inner (𝕜 := ℝ) (Finset.sum univ (fun i ↦ mu i • b i)) z := by + rw [h2]; simp [inner_add_left] + _ = Finset.sum univ (fun i ↦ inner (𝕜 := ℝ) (lam i • a i) z) + + Finset.sum univ (fun i ↦ inner (𝕜 := ℝ) (mu i • b i) z) := by + rw [@sum_inner]; rw [@sum_inner] + _ = Finset.sum univ (fun i ↦ lam i * inner (𝕜 := ℝ) (a i) z) + + Finset.sum univ (fun i ↦ mu i * inner (𝕜 := ℝ) (b i) z) := by + have hsumA : + Finset.sum univ (fun i ↦ inner (𝕜 := ℝ) (lam i • a i) z) + = Finset.sum univ (fun i ↦ lam i * inner (𝕜 := ℝ) (a i) z) := by + refine Finset.sum_congr rfl ?_ + intro i _ + simp [inner_smul_left] + have hsumB : + Finset.sum univ (fun i ↦ inner (𝕜 := ℝ) (mu i • b i) z) + = Finset.sum univ (fun i ↦ mu i * inner (𝕜 := ℝ) (b i) z) := by + refine Finset.sum_congr rfl ?_ + intro i _ + simp [inner_smul_left] + rw [hsumA, hsumB] + _ = Finset.sum univ (fun i ↦ mu i * inner (𝕜 := ℝ) (b i) z) := by + have hz : ∀ i : τ, lam i * inner (𝕜 := ℝ) (a i) z = 0 := by + intro i; simp [h31' i] + simp [hz] _ ≥ 0 := by apply Finset.sum_nonneg; intro i _ - obtain h1i := h1 i; obtain h2i := h32 i i.2; positivity + have h1i := h1 i + have h2i := h32' i + positivity linarith - intro h; by_contra h1 let S := {z | ∃ (lam : τ → ℝ), ∃ (mu : σ → ℝ), (∀ i, 0 ≤ mu i) ∧ z = Finset.sum univ (fun i ↦ lam i • a i) + Finset.sum univ (fun i ↦ mu i • b i)} @@ -365,16 +397,20 @@ theorem Farkas : apply h1; use lam; use mu obtain sep := geometric_hahn_banach_point_closed scon sc cn rcases sep with ⟨f, u, ⟨sep1, sep2⟩⟩ - have feq : ∃ d : EuclideanSpace ℝ (Fin n), ∀ x, f x = inner d x := by - use ((toDual ℝ (EuclideanSpace ℝ (Fin n))).symm f); simp + have feq : ∃ d : EuclideanSpace ℝ (Fin n), ∀ x, f x = inner (𝕜 := ℝ) d x := by + refine ⟨((toDual ℝ (EuclideanSpace ℝ (Fin n))).symm f), ?_⟩ + intro x + have h := (toDual ℝ (EuclideanSpace ℝ (Fin n))).apply_symm_apply f + have hx := congrArg (fun g => g x) h + simp rcases feq with ⟨d, feq⟩ have uleq : u < 0 := by have : 0 ∈ S := by simp [S]; use 0; use 0; simp specialize sep2 0 this; rw [feq 0, inner_zero_right] at sep2; exact sep2 - have hc : inner c d < (0 : ℝ) := by + have hc : inner (𝕜 := ℝ) c d < (0 : ℝ) := by rw [real_inner_comm, ← feq c] apply lt_trans sep1 uleq - have hb : ∀ i : σ, inner (b i) d ≥ (0 : ℝ) := by + have hb : ∀ i : σ, inner (𝕜 := ℝ) (b i) d ≥ (0 : ℝ) := by intro i have : ∀ t > (0 : ℝ), (t • b i) ∈ S := by intro t ht @@ -384,22 +420,22 @@ theorem Farkas : apply leq_tendsto_zero uleq intro t ht specialize sep2 (t • b i) (this t ht); - rw [feq, inner_smul_right, real_inner_comm] at sep2; exact sep2 - have ha : ∀ i : τ, inner (a i) d = (0 : ℝ) := by + rw [feq (t • b i), inner_smul_right, real_inner_comm] at sep2; exact sep2 + have ha : ∀ i : τ, inner (𝕜 := ℝ) (a i) d = (0 : ℝ) := by intro i have : ∀ t : ℝ, (t • a i) ∈ S := by intro t simp only [S]; use (fun j ↦ if j = i then t else 0); use 0; constructor; intro _; simp; simp only [Pi.zero_apply, zero_smul, sum_const_zero, - ite_smul, zero_add]; simp + ite_smul]; simp rw [le_antisymm_iff]; constructor · apply geq_tendsto_zero uleq intro t _ - specialize sep2 (t • a i) (this t); rw [feq, inner_smul_right, real_inner_comm] at sep2 + specialize sep2 (t • a i) (this t); rw [feq (t • a i), inner_smul_right, real_inner_comm] at sep2 linarith apply leq_tendsto_zero uleq intro t _ - specialize sep2 (t • a i) (this t); rw [feq, inner_smul_right, real_inner_comm] at sep2 + specialize sep2 (t • a i) (this t); rw [feq (t • a i), inner_smul_right, real_inner_comm] at sep2 linarith apply absurd h push_neg; use d; diff --git a/Optlib/Convex/FiniteDimensionalConvexFunctionsLocallyLipschitz.lean b/Optlib/Convex/FiniteDimensionalConvexFunctionsLocallyLipschitz.lean index 5e90ea4..6f2bd2a 100644 --- a/Optlib/Convex/FiniteDimensionalConvexFunctionsLocallyLipschitz.lean +++ b/Optlib/Convex/FiniteDimensionalConvexFunctionsLocallyLipschitz.lean @@ -14,6 +14,8 @@ import Mathlib.Topology.Algebra.Module.FiniteDimension import Mathlib.Analysis.InnerProductSpace.PiL2 import Optlib.Function.L1Space +open NormedSpace InnerProductSpace + /-! # Finite-Dimensional Convex Functions and Their Lipschitz Properties @@ -61,7 +63,7 @@ open scoped Pointwise section Boundedness -variable {X : Type*} [SeminormedAddCommGroup X] [NormedSpace ℝ X] +variable {X : Type*} [NormedAddCommGroup X] [NormedSpace ℝ X] {x₀ : X}{r : ℝ}{f : X → ℝ} /-- @@ -103,7 +105,7 @@ lemma Bounded_of_UpperBounded (hf : ConvexOn ℝ (ball x₀ r) f) simp only [smul_eq_mul, a] at h have h' : - f y + 2 * f x₀ ≤ f x := by linarith have fy_pos : - |m| ≤ - f y := by - simp only [neg_le_neg_iff, ge_iff_le] + simp only [neg_le_neg_iff] apply le_trans (hm y y_pos) (le_abs_self m) constructor · calc @@ -145,7 +147,7 @@ lemma Lipschitz_of_Bounded [T0Space X](hf : ConvexOn ℝ (ball x₀ r) f) --type conversion rw[edist_dist,edist_dist] rw[ENNReal.coe_nnreal_eq] - simp only [NNReal.coe_mk, ge_iff_le] + simp only [NNReal.coe_mk] rw[← ENNReal.ofReal_mul K_pos] rw[ENNReal.ofReal_le_ofReal_iff (mul_nonneg K_pos dist_nonneg)] --type conversion @@ -162,8 +164,7 @@ lemma Lipschitz_of_Bounded [T0Space X](hf : ConvexOn ℝ (ball x₀ r) f) have uy_pos : uy ∈ ball x₀ r := sub hu let z := uy + (ε / ‖uy - vx‖) • (uy - vx) have sub_pos : 0 < ‖uy - vx‖ := by - apply norm_pos_iff'.mpr - exact sub_ne_zero_of_ne h + exact norm_pos_iff.mpr (sub_ne_zero_of_ne h) have z_pos : z ∈ ball x₀ r := by simp only [mem_ball,dist_eq_norm,z] calc @@ -193,8 +194,8 @@ lemma Lipschitz_of_Bounded [T0Space X](hf : ConvexOn ℝ (ball x₀ r) f) apply div_pos hε.1 this have b_pos : 0 < b := by apply div_pos - rw[norm_pos_iff'] - exact sub_ne_zero_of_ne h + rw [@norm_sub_pos_iff] + exact h apply this have a_add_b_one : a + b = 1 := by simp[a,b] @@ -215,7 +216,7 @@ lemma Lipschitz_of_Bounded [T0Space X](hf : ConvexOn ℝ (ball x₀ r) f) have h1 : (ε + ‖uy - vx‖) * f uy ≤ ε * f vx + ‖uy - vx‖ * f z:= by rw[← h_combin] at h simp[a,b] at h - rw[← mul_le_mul_left this] at h + rw[← mul_le_mul_iff_right₀ this] at h field_simp at h exact h have h2 : ε * (f uy - f vx) ≤ 2 * M * ‖uy - vx‖ := by @@ -238,15 +239,16 @@ lemma Lipschitz_of_Bounded [T0Space X](hf : ConvexOn ℝ (ball x₀ r) f) _ ≤ M + M :=by linarith _ = 2 * M :=by linarith calc - _ ≤ 2 * M * ‖uy - vx‖ / ε := by rwa[le_div_iff₀' hε.1] + _ ≤ 2 * M * ‖uy - vx‖ / ε := by rwa [le_div_iff₀' hε.1] _ = (2 * M / ε) * ‖uy - vx‖ := by ring _ ≤ _ := by - simp[K] - apply mul_le_mul_of_nonneg_right _ (le_of_lt sub_pos) - rw[div_le_div_right hε.1] - apply mul_le_mul_of_nonneg_left - apply le_abs_self - norm_num + simp [K] + have hcoeff : (2 * M / ε) ≤ (2 * |M| / ε) := by + have hbase : (2 : ℝ) * M ≤ 2 * |M| := by + have h2 : 0 ≤ (2 : ℝ) := by norm_num + exact mul_le_mul_of_nonneg_left (le_abs_self M) h2 + exact div_le_div_of_nonneg_right hbase (le_of_lt hε.1) + exact mul_le_mul_of_nonneg_right hcoeff (le_of_lt sub_pos) by_cases h : x = y · rw[h] simp only [sub_self, abs_zero, norm_zero, mul_zero, le_refl] @@ -325,8 +327,8 @@ lemma LocallyUpperBounded (hs_convex : Convex ℝ s)(hs_isopen : IsOpen s) have bi_pos : ∀ i : ι , ‖b i‖ ≠ 0 := by intro i refine norm_ne_zero_iff.mpr ?_ - exact Basis.ne_zero b i - change Basis ι ℝ α at b + exact Module.Basis.ne_zero b i + change Module.Basis ι ℝ α at b by_cases hn : n = 0 · have : Module.finrank ℝ α = 0 := by show n = 0;apply hn; @@ -368,7 +370,7 @@ lemma LocallyUpperBounded (hs_convex : Convex ℝ s)(hs_isopen : IsOpen s) simp only [neg_add_cancel_comm]; rw[this] apply hr₀.2 - simp only [mem_ball, dist_self_add_left,dist_add_self_left] + simp only [mem_ball,dist_add_self_left] rw[norm_smul,norm_div,norm_norm,div_mul_cancel₀] simp[r] calc @@ -381,7 +383,7 @@ lemma LocallyUpperBounded (hs_convex : Convex ℝ s)(hs_isopen : IsOpen s) simp only [neg_add_cancel_comm] rw[this] apply hr₀.2 - simp only [mem_ball, dist_self_add_left,dist_add_self_left,neg_smul, norm_neg] + simp only [mem_ball, dist_add_self_left,neg_smul, norm_neg] rw[norm_smul,norm_div,norm_norm,div_mul_cancel₀] simp[r] calc @@ -435,6 +437,7 @@ lemma LocallyUpperBounded (hs_convex : Convex ℝ s)(hs_isopen : IsOpen s) apply ConvexOn.le_sup_of_mem_convexHull hf _ hx apply subset_convexHull +omit [FiniteDimensional ℝ α] in lemma LocallyLipschitz_of_LocallyUpperBounded (hs : IsOpen s) (h : ∀ x ∈ s , ∃ t ∈ 𝓝[s] x , Convex ℝ t ∧ IsOpen t ∧ BddAbove (f '' t)) (hf : ConvexOn ℝ s f) diff --git a/Optlib/Convex/ImageSubgradientClosed.lean b/Optlib/Convex/ImageSubgradientClosed.lean index 3dcfafd..01f2fa6 100644 --- a/Optlib/Convex/ImageSubgradientClosed.lean +++ b/Optlib/Convex/ImageSubgradientClosed.lean @@ -1,5 +1,5 @@ import Optlib.Function.Proximal -import Mathlib.Topology.Instances.EReal +import Mathlib open Set InnerProductSpace Topology Filter diff --git a/Optlib/Convex/QuasiConvexFirstOrder.lean b/Optlib/Convex/QuasiConvexFirstOrder.lean index 0b16ec5..32768fc 100644 --- a/Optlib/Convex/QuasiConvexFirstOrder.lean +++ b/Optlib/Convex/QuasiConvexFirstOrder.lean @@ -35,7 +35,7 @@ theorem Quasiconvex_first_order_condition_right (h : HasFDerivAt f (f' x) x) (xs let ε := (f' x) (y - x) / (2 * ‖x-y‖) have εpos: 0 < ε := by apply div_pos H - exact Real.mul_pos two_pos h₃ + simp; grind specialize h₁ ε εpos rcases h₁ with ⟨δ, dpos, converge⟩ let b1:= δ /(‖x - y‖) @@ -60,7 +60,7 @@ theorem Quasiconvex_first_order_condition_right (h : HasFDerivAt f (f' x) x) (xs _= (1 : ℝ) • x - a • x - b • y:= by rw [one_smul] _= b • (x - y) := by - rw [← sub_smul 1 a]; simp [a, b, sum_a_b]; rw[smul_sub b x y] + rw [← sub_smul 1 a]; simp [a, b]; rw[smul_sub b x y] have h01 : x' - x = b • (y - x) :=by rw [← neg_inj, ← smul_neg, neg_sub, neg_sub]; exact h10 have h1 : ‖x - x'‖ = ‖b • (x - y)‖ := by @@ -71,7 +71,7 @@ theorem Quasiconvex_first_order_condition_right (h : HasFDerivAt f (f' x) x) (xs have x1nbhd: ‖x - x'‖ ≤ δ := by rw [h1, h2] have h3: b * ‖x - y‖ ≤ b1 * ‖x - y‖:= by - rw [mul_le_mul_right] + rw [mul_le_mul_iff_left₀] apply min_le_left exact h₃ have h4: b1 * ‖x - y‖= δ := by diff --git a/Optlib/Convex/StronglyConvex.lean b/Optlib/Convex/StronglyConvex.lean index c36d40e..1371535 100644 --- a/Optlib/Convex/StronglyConvex.lean +++ b/Optlib/Convex/StronglyConvex.lean @@ -6,6 +6,7 @@ Authors: Chenyi Li, Ziyu Wang import Mathlib.Analysis.InnerProductSpace.PiL2 import Mathlib.Analysis.Convex.Strong import Optlib.Function.Lsmooth +import Optlib.Convex.ConvexFunction /-! the properties of strongly convex function and gradient descent method @@ -18,6 +19,7 @@ variable {s : Set E} {f : E → ℝ} {m : ℝ} {xm xm': E} {f' : E → E} {mp : section Strongly_Convex open Set InnerProductSpace +open scoped RealInnerProductSpace theorem Strongly_Convex_Bound (m : ℝ) (strongly_convex: StrongConvexOn s m f): ∀ ⦃x⦄, x ∈ s → ∀ ⦃y⦄, y ∈ s → @@ -76,7 +78,7 @@ theorem Strongly_Convex_Unique_Minima (hsc: StrongConvexOn s m f) {mp : m > 0} . linarith . apply pow_pos; linarith apply absurd (min xs) - simp [← xeq] + simp calc f x ≤ f xm - 2⁻¹ * 2⁻¹ * (m / 2 * ‖xm - xm'‖ ^ 2) := by apply sc _ < f xm := by apply lt_of_sub_pos; simp; apply nng @@ -98,40 +100,71 @@ lemma strongconvex_of_convex_add_sq (f : E → ℝ) (x : E) (hfun : ConvexOn ℝ rw [mul_add, ← add_assoc, ← add_sub _ (a * (‖y - x‖ ^ 2 / 2)), add_assoc] apply add_le_add · rw [← smul_eq_mul, ← smul_eq_mul] - apply hfun yin zin anneg bnneg absum1 - · field_simp; rw [div_le_div_right, add_sub] - have eq1 : a • y + b • z - x = a • (y - x) + b • (z - x) := by + exact hfun yin zin anneg bnneg absum1 + · have eq1 : a • y + b • z - x = a • (y - x) + b • (z - x) := by rw [smul_sub, smul_sub, add_comm_sub, sub_sub, ← add_smul, add_comm b a] rw [absum1, one_smul, ← add_sub] - have eq2 (u v : E) : ‖a • u + b • v‖ ^ 2 = b * ‖v‖ ^ 2 - + a * ‖u‖ ^ 2 - a * b * ‖u - v‖ ^ 2 := by - rw [norm_add_sq_real, norm_sub_sq_real] - rw [inner_smul_left, inner_smul_right, norm_smul, norm_smul]; field_simp - rw [add_comm (b * ‖v‖ ^ 2), mul_pow, sq_abs, mul_pow, sq_abs] - rw [mul_add, ← sub_sub, mul_sub, ← sub_add] - rw [add_sub_right_comm, add_sub_right_comm, ← sub_mul, ← add_sub, ← sub_mul] - nth_rw 3 [← mul_one a]; rw [← absum1, mul_add] - nth_rw 5 [← mul_one b]; rw [← absum1, mul_add, mul_comm b a] - rw [pow_two, pow_two b]; simp; rw [add_right_comm, add_left_cancel_iff] - rw [mul_mul_mul_comm, mul_comm a 2, mul_assoc] + have eq2 (u v : E) : + ‖a • u + b • v‖ ^ 2 = + b * ‖v‖ ^ 2 + a * ‖u‖ ^ 2 - a * b * ‖u - v‖ ^ 2 := by + have an := abs_of_nonneg anneg + have bn := abs_of_nonneg bnneg + calc + ‖a • u + b • v‖ ^ 2 + = ‖a • u‖ ^ 2 + 2 * ⟪a • u, b • v⟫_ℝ + ‖b • v‖ ^ 2 := by + simpa using norm_add_sq_real (a • u) (b • v) + _ = ‖a • u‖ ^ 2 + ‖b • v‖ ^ 2 + 2 * ⟪a • u, b • v⟫_ℝ := by + ring + _ = (‖a‖ * ‖u‖) ^ 2 + (‖b‖ * ‖v‖) ^ 2 + 2 * (a * b * ⟪u, v⟫_ℝ) := by + simp [norm_smul, real_inner_smul_left, real_inner_smul_right, + mul_comm, mul_left_comm, mul_assoc] + _ = (a * ‖u‖) ^ 2 + (b * ‖v‖) ^ 2 + 2 * a * b * ⟪u, v⟫_ℝ := by + simp [Real.norm_eq_abs, an, bn] + ring + _ = a ^ 2 * ‖u‖ ^ 2 + b ^ 2 * ‖v‖ ^ 2 + 2 * a * b * ⟪u, v⟫_ℝ := by + ring + _ = a * ‖u‖ ^ 2 + b * ‖v‖ ^ 2 - a * b * (‖u‖ ^ 2 + ‖v‖ ^ 2 - 2 * ⟪u, v⟫_ℝ) := by + have h1 : a = 1 - b := by linarith + have h2 : b = 1 - a := by linarith + have ha : a * (a * ‖u‖ ^ 2) = a * ((1 - b) * ‖u‖ ^ 2) := by simp [h1] + have hb : b * (b * ‖v‖ ^ 2) = b * ((1 - a) * ‖v‖ ^ 2) := by simp [h2] + calc + a ^ 2 * ‖u‖ ^ 2 + b ^ 2 * ‖v‖ ^ 2 + 2 * a * b * ⟪u, v⟫_ℝ + = a * (a * ‖u‖ ^ 2) + b * (b * ‖v‖ ^ 2) + 2 * a * b * ⟪u, v⟫_ℝ := by ring + _ = a * ((1 - b) * ‖u‖ ^ 2) + b * ((1 - a) * ‖v‖ ^ 2) + + 2 * a * b * ⟪u, v⟫_ℝ := by simp [ha, hb] + _ = a * ‖u‖ ^ 2 + b * ‖v‖ ^ 2 + - a * b * (‖u‖ ^ 2 + ‖v‖ ^ 2 - 2 * ⟪u, v⟫_ℝ) := by ring + _ = b * ‖v‖ ^ 2 + a * ‖u‖ ^ 2 - a * b * ‖u - v‖ ^ 2 := by + rw [norm_sub_sq_real]; ring + have eq2_div (u v : E) : + ‖a • u + b • v‖ ^ 2 / 2 = + b * (‖v‖ ^ 2 / 2) + a * (‖u‖ ^ 2 / 2) - a * b * ( (2 : ℝ)⁻¹ * ‖u - v‖ ^ 2) := by + have h := eq2 u v + have h' := congrArg (fun t : ℝ => t / 2) h + simpa [div_eq_mul_inv, sub_eq_add_neg, mul_add, add_mul, + mul_comm, mul_left_comm, mul_assoc] using h' have eq3 : y - z = (y - x) - (z - x) := by simp - have eq4 (u v : E) : ‖a • u + b • v‖ ^ 2 ≤ b * ‖v‖ ^ 2 - + a * ‖u‖ ^ 2 - a * b * ‖u - v‖ ^ 2 := by rw [eq2] let u := y - x let v := z - x - rw [eq1, eq3]; - show ‖a • u + b • v‖ ^ 2 ≤ b * ‖v‖ ^ 2 + a * ‖u‖ ^ 2 - a * b * ‖u - v‖ ^ 2 - apply eq4 u v - simp + have : ‖a • u + b • v‖ ^ 2 / 2 + ≤ b * (‖v‖ ^ 2 / 2) + a * (‖u‖ ^ 2 / 2) - a * b * ((2 : ℝ)⁻¹ * ‖u - v‖ ^ 2) := by + exact (eq2_div u v).le + rw [eq1, eq3] + simp only [div_eq_mul_inv] + grind end Strongly_Convex section variable [CompleteSpace E] +open Set InnerProductSpace +open scoped RealInnerProductSpace +open scoped InnerProductSpace theorem Strong_Convex_lower (hsc : StrongConvexOn s m f) (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) : - ∀ x ∈ s, ∀ y ∈ s, inner (f' x - f' y) (x - y) ≥ m * ‖x - y‖ ^ 2 := by + ∀ x ∈ s, ∀ y ∈ s, ⟪f' x - f' y, x - y⟫_ℝ ≥ m * ‖x - y‖ ^ 2 := by intro x xs y ys have cvx := strongConvexOn_iff_convex.mp hsc have grd := sub_normsquare_gradient hf m @@ -139,10 +172,10 @@ theorem Strong_Convex_lower (hsc : StrongConvexOn s m f) (hf : ∀ x ∈ s, HasG rw [sub_sub, add_sub, add_comm, ← add_sub, ← sub_sub, inner_sub_left, ← smul_sub] at grm apply le_of_sub_nonneg at grm rw [real_inner_smul_left, real_inner_self_eq_norm_sq] at grm - apply grm + exact grm theorem Lower_Strong_Convex (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) (hs : Convex ℝ s) - (h : ∀ x ∈ s, ∀ y ∈ s, inner (f' x - f' y) (x - y) ≥ m * ‖x - y‖ ^ 2) : + (h : ∀ x ∈ s, ∀ y ∈ s, ⟪f' x - f' y, x - y⟫_ℝ ≥ m * ‖x - y‖ ^ 2) : StrongConvexOn s m f := by apply strongConvexOn_iff_convex.mpr have grd := sub_normsquare_gradient hf m @@ -152,20 +185,21 @@ theorem Lower_Strong_Convex (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) (hs : C rw [sub_sub, add_sub, add_comm, ← add_sub, ← sub_sub, inner_sub_left, ← smul_sub] apply sub_nonneg_of_le rw [real_inner_smul_left, real_inner_self_eq_norm_sq] - apply h + exact h theorem Strong_Convex_iff_lower (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) (hs : Convex ℝ s) : - StrongConvexOn s m f ↔ ∀ x ∈ s, ∀ y ∈ s, inner (f' x - f' y) (x - y) ≥ m * ‖x - y‖ ^ 2 := - ⟨fun hsc x xs y ys ↦ Strong_Convex_lower hsc hf x xs y ys, fun h ↦ Lower_Strong_Convex hf hs h⟩ + StrongConvexOn s m f ↔ ∀ x ∈ s, ∀ y ∈ s, ⟪f' x - f' y, x - y⟫_ℝ ≥ m * ‖x - y‖ ^ 2 := + ⟨fun hsc x xs y ys ↦ Strong_Convex_lower hsc hf x xs y ys, + fun h ↦ Lower_Strong_Convex hf hs h⟩ theorem Strong_Convex_second_lower (hsc: StrongConvexOn s m f) (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) : ∀ x ∈ s, ∀ y ∈ s, - f y ≥ f x + inner (f' x) (y - x) + m / 2 * ‖y - x‖ ^ 2 := by + f y ≥ f x + ⟪f' x, y - x⟫_ℝ + m / 2 * ‖y - x‖ ^ 2 := by intro x xs y ys have cvx := strongConvexOn_iff_convex.mp hsc have grd := sub_normsquare_gradient hf m x xs let g := fun x ↦ f' x - m • x - have : g x = f' x - m • x := by rfl + have : g x = f' x - m • x := rfl rw [← this] at grd have foc := Convex_first_order_condition' grd cvx xs y ys rw [this] at foc @@ -179,7 +213,7 @@ theorem Strong_Convex_second_lower (hsc: StrongConvexOn s m f) nth_rw 1 [← sub_self y] at foc rw [← sub_self x] at foc rw [sub_add, ← sub_add y x x, add_comm (y - x), inner_sub_right x, inner_add_right y] at foc - rw [real_inner_comm x y, sub_right_comm (inner x y), ← sub_sub, sub_self, sub_sub 0] at foc + rw [real_inner_comm x y, sub_right_comm ⟪x, y⟫_ℝ, ← sub_sub, sub_self, sub_sub 0] at foc rw [← inner_add_left, zero_sub, mul_neg, sub_neg_eq_add] at foc have : m = m / 2 * 2 := by simp nth_rw 1 [this] at foc diff --git a/Optlib/Convex/Subgradient.lean b/Optlib/Convex/Subgradient.lean index c9c48f2..c0780c5 100644 --- a/Optlib/Convex/Subgradient.lean +++ b/Optlib/Convex/Subgradient.lean @@ -189,11 +189,11 @@ theorem SubderivAt.convex : ∀ x, Convex ℝ (SubderivAt f x) := by have ineq1 : a • f y ≥ a • f x + a • ⟪g₁, y - x⟫ := by rw [← smul_add] apply smul_le_smul_of_nonneg_left (h1 y) lea - have ineq2 : b • f y ≥ b • f x + b • inner g₂ (y - x) := by + have ineq2 : b • f y ≥ b • f x + b • ⟪g₂, y - x⟫ := by rw [← smul_add] apply smul_le_smul_of_nonneg_left (h2 y) leb - have eq : (a • f x + a • inner g₁ (y - x)) + (b • f x + b • inner g₂ (y - x)) - = f x + inner (a • g₁ + b • g₂) (y - x) := by + have eq : (a • f x + a • ⟪g₁, y - x⟫) + (b • f x + b • ⟪g₂, y - x⟫) + = f x + ⟪a • g₁ + b • g₂, y - x⟫ := by rw [add_add_add_comm, ← Eq.symm (Convex.combo_self abeq (f x))] apply congrArg (HAdd.hAdd (f x)) rw [inner_add_left, inner_smul_left, inner_smul_left]; rfl @@ -208,11 +208,11 @@ theorem SubderivWithinAt.convex : ∀ x ∈ s, Convex ℝ (SubderivWithinAt f s have ineq1 : a • f y ≥ a • f x + a • ⟪g₁, y - x⟫ := by rw [← smul_add] apply smul_le_smul_of_nonneg_left (h1 y ys) lea - have ineq2 : b • f y ≥ b • f x + b • inner g₂ (y - x) := by + have ineq2 : b • f y ≥ b • f x + b • ⟪g₂, y - x⟫ := by rw [← smul_add] apply smul_le_smul_of_nonneg_left (h2 y ys) leb - have eq : (a • f x + a • inner g₁ (y - x)) + (b • f x + b • inner g₂ (y - x)) - = f x + inner (a • g₁ + b • g₂) (y - x) := by + have eq : (a • f x + a • ⟪g₁, y - x⟫) + (b • f x + b • ⟪g₂, y - x⟫) + = f x + ⟪a • g₁ + b • g₂, y - x⟫ := by rw [add_add_add_comm, ← Eq.symm (Convex.combo_self abeq (f x))] apply congrArg (HAdd.hAdd (f x)) rw [inner_add_left, inner_smul_left, inner_smul_left]; rfl @@ -224,8 +224,8 @@ theorem subgradientAt_mono {u v : E} {f : E → ℝ}{y : E} (hu : u ∈ SubderivAt f x) (hv : v ∈ SubderivAt f y) : ⟪u - v, x - y⟫ ≥ (0 : ℝ):= by specialize hu y; specialize hv x have ineq1 : ⟪u, x - y⟫ ≥ f x - f y := by - rw [congrArg (inner u) (Eq.symm (neg_sub y x)), inner_neg_right]; linarith - have _ : inner v (x - y) ≤ f x - f y := Iff.mpr le_sub_iff_add_le' hv + rw [congrArg (⟪u, ·⟫) (Eq.symm (neg_sub y x)), inner_neg_right]; linarith + have _ : ⟪v, x - y⟫ ≤ f x - f y := Iff.mpr le_sub_iff_add_le' hv rw [inner_sub_left]; linarith end congr @@ -259,7 +259,7 @@ theorem SubderivAt.nonempty (hf : ConvexOn ℝ univ f) (hc : ContinuousOn f univ have : x ∈ interior univ := by simp rw [← interior_univ] at hc obtain h := SubderivWithinAt.Nonempty hf hc x this - simp [h] + simp only [nonempty_subtype] rcases h with ⟨a, ha⟩ exact ⟨a, ha⟩ @@ -286,7 +286,7 @@ theorem SubderivWithinAt_eq_gradient {f'x : E} (hx : x ∈ interior s) · use g; intro y ys apply Convex_first_order_condition' h hf (interior_subset hx) y ys intro g' hg'; by_contra neq - apply not_le_of_lt (norm_sub_pos_iff.mpr neq) + apply not_le_of_gt (norm_sub_pos_iff.mpr neq) let v := g' - g; obtain vneq := sub_ne_zero.mpr neq have : Tendsto (fun (t : ℝ) => (f (x + t • v) - f x - ⟪g, t • v⟫) * ‖t • v‖⁻¹) (𝓝[>] 0) (𝓝 0) := by @@ -353,7 +353,7 @@ theorem SubderivWithinAt_eq_gradient {f'x : E} (hx : x ∈ interior s) rw [this] rw [eq2, eq3, mul_eq_mul_right_iff]; left; rw [inner_sub_left] - rw [mem_setOf, eq1, mul_le_mul_right tvpos] + rw [mem_setOf, eq1, mul_le_mul_iff_left₀ tvpos] apply sub_le_sub_right (le_sub_iff_add_le'.mpr (ineq1 mems)) /-- Alternarive version for FDeriv --/ @@ -432,9 +432,9 @@ variable {f : E → ℝ} {g : E} {x : E} {s : Set E} theorem HasSubgradientAt.pos_smul {c : ℝ} (h : HasSubgradientAt f g x) (hc : 0 < c) : HasSubgradientAt (c • f) (c • g) x := by intro y; rw [inner_smul_left] - have ineq : c * f y ≥ c * (f x + inner g (y - x)) := (mul_le_mul_left hc).mpr (h y) - have eq : c * (f x + inner g (y - x)) = c * f x + c * inner g (y - x) := - mul_add c (f x) (inner g (y - x)) + have ineq : c * f y ≥ c * (f x + ⟪g, y - x⟫) := (mul_le_mul_iff_right₀ hc).mpr (h y) + have eq : c * (f x + ⟪g, y - x⟫) = c * f x + c * ⟪g, y - x⟫ := + mul_add c (f x) (⟪g, y - x⟫) exact Eq.trans_le (id eq.symm) ineq theorem SubderivAt.pos_smul {c : ℝ} (hc : 0 < c) : @@ -447,19 +447,19 @@ theorem SubderivAt.pos_smul {c : ℝ} (hc : 0 < c) : have neq : c ≠ 0 := ne_of_gt hc calc f y = c⁻¹ * (c * f y) := (eq_inv_mul_iff_mul_eq₀ neq).mpr rfl - _ ≥ c⁻¹ * (c * f x + inner g (y - x)) := + _ ≥ c⁻¹ * (c * f x + ⟪g, y - x⟫) := mul_le_mul_of_nonneg_left (hg y) (inv_nonneg.mpr (le_of_lt hc)) - _ = f x + inner (c⁻¹ • g) (y - x) := by + _ = f x + ⟪c⁻¹ • g, y - x⟫ := by rw [mul_add, inner_smul_left, ← ((eq_inv_mul_iff_mul_eq₀ neq).mpr rfl)] rfl exact smul_inv_smul₀ (ne_of_gt hc) g rintro ⟨gg, hgg, eq⟩; intro y calc - c * f y ≥ c * (f x + inner gg (y - x)) := (mul_le_mul_left hc).mpr (hgg y) - _ = c * f x + c * inner gg (y - x) := mul_add c (f x) (inner gg (y - x)) - _ = c * f x + inner (c • gg) (y - x) := by + c * f y ≥ c * (f x + ⟪gg, y - x⟫) := (mul_le_mul_iff_right₀ hc).mpr (hgg y) + _ = c * f x + c * ⟪gg, y - x⟫ := mul_add c (f x) (⟪gg, y - x⟫) + _ = c * f x + ⟪c • gg, y - x⟫ := by rw [inner_smul_left]; exact rfl - _ = c * f x + inner g (y - x) := by rw [← eq] + _ = c * f x + ⟪g, y - x⟫ := by rw [← eq] /-- Subderivatives of the sum of two functions is a subset of the sum of the subderivatives of the two functions --/ @@ -493,7 +493,7 @@ theorem SubderivAt.add {f₁ f₂ : E → ℝ} (h₁ : ConvexOn ℝ univ f₁) ( rw [SubderivAt, SubderivAt, SubderivAt, Set.subset_def] intro g hg rw [Set.mem_setOf] at hg; rw [Set.mem_add] - let S₁ := {(x, y) : E × ℝ | y > f₁ (x + x₀) - f₁ x₀ - inner g x} + let S₁ := {(x, y) : E × ℝ | y > f₁ (x + x₀) - f₁ x₀ - ⟪g, x⟫} let S₂ := {(x, y) : E × ℝ | y ≤ f₂ x₀ - f₂ (x + x₀)} have hs1 : Convex ℝ S₁ := by @@ -538,7 +538,7 @@ theorem SubderivAt.add {f₁ f₂ : E → ℝ} (h₁ : ConvexOn ℝ univ f₁) ( apply le_trans (add_le_add hi hj) hh have hint : Disjoint S₁ S₂ := by rw [disjoint_iff]; by_contra joint - obtain ⟨⟨x, y⟩, ⟨hp1, hp2⟩⟩ := nmem_singleton_empty.mp joint + obtain ⟨⟨x, y⟩, ⟨hp1, hp2⟩⟩ := Set.notMem_singleton_empty.mp joint rw [Set.mem_setOf] at hp1 hp2 specialize hg (x + x₀); rw [← add_sub, sub_self, add_zero] at hg apply not_le_of_gt ?_ hg @@ -550,7 +550,7 @@ theorem SubderivAt.add {f₁ f₂ : E → ℝ} (h₁ : ConvexOn ℝ univ f₁) ( f₁ x₀ + f₂ x₀ + ⟪g, x⟫_ℝ - (f₁ (x + x₀) + f₂ (x + x₀)) := by ring rwa [hh x₀, hh (x + x₀), ← eq] have hso : IsOpen S₁ := by - apply Continuous_epi_open (f₁ := fun x ↦ f₁ (x + x₀) - f₁ x₀ - inner g x) + apply Continuous_epi_open (f₁ := fun x ↦ f₁ (x + x₀) - f₁ x₀ - ⟪g, x⟫) apply ContinuousOn.sub · apply ContinuousOn.sub · apply ContinuousOn.comp (g := f₁) (f := fun x ↦ x + x₀) (t := univ) hcon @@ -560,19 +560,16 @@ theorem SubderivAt.add {f₁ f₂ : E → ℝ} (h₁ : ConvexOn ℝ univ f₁) ( apply ContinuousOn.inner continuousOn_const continuousOn_id obtain ⟨f, c, ⟨hsl, hsr⟩⟩ := geometric_hahn_banach_open hs1 hso hs2 hint - have eq : ∃ a : E, ∃ b : ℝ, ∀ (p : E × ℝ), f p = inner a p.1 + b * p.2 := by + have eq : ∃ a : E, ∃ b : ℝ, ∀ (p : E × ℝ), f p = ⟪a, p.1⟫ + b * p.2 := by let f1 := ContinuousLinearMap.comp f (ContinuousLinearMap.inl ℝ E ℝ) let f2 := ContinuousLinearMap.comp f (ContinuousLinearMap.inr ℝ E ℝ) use (toDual ℝ E).symm f1 - use (toDual ℝ ℝ).symm f2 + use (f2 1) intro p - have : ((toDual ℝ ℝ).symm f2) * p.2 = inner (((toDual ℝ ℝ).symm f2)) p.2 := by - simp [f2] - have : ((toDual ℝ ℝ).symm f2) * p.2 = f2 p.2 := by - rw [this] - simp only [toDual_symm_apply, ContinuousLinearMap.coe_comp', Function.comp_apply, - ContinuousLinearMap.inl_apply, ContinuousLinearMap.inr_apply] - rw [this]; simp [f1, f2] + have hmul : (f2 1) * p.2 = f2 p.2 := by + have h := f2.map_smul p.2 (1 : ℝ) + simpa [smul_eq_mul, mul_comm] using h.symm + rw [hmul]; simp [f1, f2] have : (p.1, (0 : ℝ)) + ((0 : E), p.2) = p := by simp nth_rw 1 [← this]; rw [ContinuousLinearMap.map_add] rcases eq with ⟨a, b, hab⟩ @@ -596,9 +593,11 @@ theorem SubderivAt.add {f₁ f₂ : E → ℝ} (h₁ : ConvexOn ℝ univ f₁) ( apply div_pos_of_neg_of_neg hc (by linarith) specialize (htp (c / (2 * b)) pos); field_simp [hb] at htp have eq : b * c / (2 * b) = c / 2 := by - ring_nf; simp; field_simp [hb] - rw [mul_div_right_comm, div_self (by linarith), one_mul] - rw [eq] at htp; linarith + have hb0 : b ≠ 0 := ne_of_lt hb + simpa [mul_comm] using (mul_div_mul_right (c) (2 : ℝ) hb0) + have eq' : b * c / (b * 2) = c / 2 := by + simpa [mul_comm] using eq + rw [eq'] at htp; linarith have bleq0 : b < 0 := by rw [ceq0] at htp specialize htp 1 (by linarith); rw [mul_one] at htp; linarith @@ -653,7 +652,7 @@ theorem SubderivAt_of_norm_at_zero : SubderivAt (fun (x : E) => ‖x‖) 0 = {g apply not_lt.mpr hg this intro hg y calc - ‖(0 : E)‖ + inner g (y - 0) = inner g y := by simp only [norm_zero, zero_add, sub_zero] + ‖(0 : E)‖ + ⟪g, y - 0⟫ = ⟪g, y⟫ := by simp only [norm_zero, zero_add, sub_zero] _ ≤ ‖g‖ * ‖y‖ := real_inner_le_norm g y _ ≤ 1 * ‖y‖ := mul_le_mul_of_nonneg_right hg (norm_nonneg y) _ = ‖y‖ := by simp only [one_mul] @@ -681,12 +680,12 @@ theorem SubderivAt_abs (x : ℝ) : · by_contra gne by_cases glt : g < 1 · specialize hg 0 + have hinner : ⟪g, -x⟫_ℝ = g * (-x) := by simp; grind have ineq : (0 : ℝ) < 0 := by calc - 0 ≥ x + g * (-x):= by - simp only [abs_zero, zero_sub, abs_pos_of_pos, abs_of_pos hx] at hg - have : inner g (-x) = g * (-x) := by rfl - rwa [this] at hg + 0 ≥ x + g * (-x) := by + simp only [abs_zero, zero_sub, abs_of_pos hx] at hg + rwa [hinner] at hg _ = x * (1 - g) := by ring _ > 0 := mul_pos hx (by linarith) exact LT.lt.false ineq @@ -696,7 +695,7 @@ theorem SubderivAt_abs (x : ℝ) : apply glt have h1: g ≤ 1 := by calc - g = inner g 1 := by simp + g = ⟪g, 1⟫ := by simp _ ≤ 1 := hg simp only [Real.sign_of_pos hx] at gne exact Ne.lt_of_le gne h1 @@ -706,14 +705,14 @@ theorem SubderivAt_abs (x : ℝ) : by_cases glt : g < -1 · specialize hg (x - 1) have : x - 1 < 0 := by linarith - simp only [abs_of_neg this, abs_of_neg hx, abs_zero, zero_sub] at hg + simp only [abs_of_neg this, abs_of_neg hx] at hg have : -g ≤ 1 := by calc - -g = inner g (x - 1 - x) := by simp + -g = ⟪g, x - 1 - x⟫ := by simp _ ≤ 1 := by linarith [hg] linarith specialize hg 0 - have eq1 : inner g (-x) = g * (-x) := rfl + have eq1 : ⟪g, -x⟫_ℝ = g * (-x) := by simp; grind have eq2 : -x + g * -x = -x * (1 + g) := by ring simp only [abs_zero, zero_sub, abs_of_neg hx, eq1, eq2] at hg have : -x * (1 + g) > 0 := by @@ -725,13 +724,13 @@ theorem SubderivAt_abs (x : ℝ) : by_cases hx : x > 0 · simp only [Real.sign_of_pos hx] at hg calc - |x| + inner g (y - x) = x + inner 1 (y - x) := by rw [abs_of_pos hx, hg] + |x| + ⟪g, y - x⟫ = x + ⟪1, y - x⟫ := by rw [abs_of_pos hx, hg] _ = y := by simp _ ≤ |y| := le_abs_self y have hx : x < 0 := Ne.lt_of_le h (not_lt.mp hx) simp only [Real.sign_of_neg hx] at hg calc - |x| + inner g (y - x) = -x + inner (-1) (y - x) := by rw [abs_of_neg hx, hg] + |x| + ⟪g, y - x⟫ = -x + ⟪-1, y - x⟫ := by rw [abs_of_neg hx, hg] _ = -y := by simp; ring _ ≤ |y| := neg_le_abs y diff --git a/Optlib/Differential/Calculation.lean b/Optlib/Differential/Calculation.lean index 89e163e..c3da920 100644 --- a/Optlib/Differential/Calculation.lean +++ b/Optlib/Differential/Calculation.lean @@ -189,22 +189,22 @@ open BigOperators Asymptotics variable {ι : Type*} {u : Finset ι} {A : ι → F → 𝕜} {A' : ι → F} theorem HasGradientAtFilter.sum (h : ∀ i ∈ u, HasGradientAtFilter (A i) (A' i) x L) : - HasGradientAtFilter (fun y => ∑ i in u, A i y) (∑ i in u, A' i) x L := by - have : ∑ i in u, (toDual 𝕜 F) (A' i) = (toDual 𝕜 F) (∑ i in u, A' i) := by + HasGradientAtFilter (fun y => ∑ i ∈ u, A i y) (∑ i ∈ u, A' i) x L := by + have : ∑ i ∈ u, (toDual 𝕜 F) (A' i) = (toDual 𝕜 F) (∑ i ∈ u, A' i) := by rw [map_sum] rw [HasGradientAtFilter, ← this]; unfold HasGradientAtFilter at h - exact HasFDerivAtFilter.sum h + exact HasFDerivAtFilter.fun_sum h theorem HasGradientWithinAt.sum (h : ∀ i ∈ u, HasGradientWithinAt (A i) (A' i) s x) : - HasGradientWithinAt (fun y => ∑ i in u, A i y) (∑ i in u, A' i) s x := by + HasGradientWithinAt (fun y => ∑ i ∈ u, A i y) (∑ i ∈ u, A' i) s x := by exact HasGradientAtFilter.sum h theorem HasGradientAt.sum (h : ∀ i ∈ u, HasGradientAt (A i) (A' i) x) : - HasGradientAt (fun y => ∑ i in u, A i y) (∑ i in u, A' i) x := by + HasGradientAt (fun y => ∑ i ∈ u, A i y) (∑ i ∈ u, A' i) x := by exact HasGradientAtFilter.sum h theorem gradient_sum (h : ∀ i ∈ u, DifferentiableAt 𝕜 (A i) x) : - ∇ (fun y => ∑ i in u, A i y) x = ∑ i in u, ∇ (A i) x := + ∇ (fun y => ∑ i ∈ u, A i y) x = ∑ i ∈ u, ∇ (A i) x := (HasGradientAt.sum fun i hi => (h i hi).hasGradientAt).gradient end Sum @@ -229,7 +229,7 @@ theorem HasGradientAt.neg (h : HasGradientAt f f' x) : theorem gradient_neg : ∇ (fun y => - f y) x = - ∇ f x := by unfold gradient - simp only [fderiv_neg, map_neg] + simp only [fderiv_fun_neg, map_neg] end Neg @@ -300,8 +300,8 @@ open ContinuousLinearMap lemma equiv_lemma_mul : c x • (toDual 𝕜 F) d' + d x • (toDual 𝕜 F) c' = (toDual 𝕜 F) ((starRingEnd 𝕜) (c x) • d' + (starRingEnd 𝕜) (d x) • c'):= by - simp - congr <;> exact SemilinearMapClass.map_smul_inv _ _ _ + simp only [map_add] + congr <;> rw [← @SemilinearMapClass.map_smul_inv] theorem HasGradientAt.mul (hc : HasGradientAt c c' x) (hd : HasGradientAt d d' x) : HasGradientAt (fun y => c y * d y) diff --git a/Optlib/Differential/GradientDiv.lean b/Optlib/Differential/GradientDiv.lean index 57ad10f..2cf92e0 100644 --- a/Optlib/Differential/GradientDiv.lean +++ b/Optlib/Differential/GradientDiv.lean @@ -48,7 +48,7 @@ lemma Simplifying₁ (h₁ : a ≠ 0) (h₂ : b ≠ 0) (h₃ : ‖b‖ / 2 ≤ simp only [one_div, div_inv_eq_mul, one_mul] have l₃ : |b * b * b| / 2 = |b * b| * (|b| / 2) := by rw [mul_div, abs_mul] have l₄ : |b * b * a| = |b * b| * |a| := by rw [abs_mul] - rw [l₃, l₄, mul_le_mul_left] + rw [l₃, l₄, mul_le_mul_iff_right₀] apply h₃ rw [abs_pos] simp only [ne_eq, mul_eq_zero, or_self] @@ -81,7 +81,7 @@ lemma div_div_mul (h₁ : a / b ≤ c) (h₂ : 0 < a) (h₃ : 0 < b) (h₄ : 0 < 1 / c ≤ b / a := by have : a ≤ c * b := Iff.mp (div_le_iff₀ h₃) h₁ have : a ≤ b * c := by linarith - apply Iff.mpr (div_le_div_iff h₄ h₂) + apply Iff.mpr (div_le_div_iff₀ h₄ h₂) rw [one_mul] apply this @@ -251,10 +251,10 @@ theorem HasGradientAt.one_div (hf : HasGradientAt f grad x)(h₁: ¬ f x = (0 : have h₂ : min δ₀ δ₂ ≤ δ₂ := by exact min_le_right δ₀ δ₂ apply le_trans h' h₂ - have zp1 :‖f x * (f x - f x' + inner grad (x' - x)) / (f x * f x * f x')‖ = - ‖(f x - f x' + inner grad (x' - x)) / (f x * f x')‖ := by + have zp1 :‖f x * (f x - f x' + ⟪grad, (x' - x)⟫) / (f x * f x * f x')‖ = + ‖(f x - f x' + ⟪grad, (x' - x)⟫) / (f x * f x')‖ := by rw [mul_comm, mul_assoc (f x) (f x) (f x'), - div_mul_eq_div_div ((f x - f x' + inner grad (x' - x)) * (f x)) (f x) (f x * f x'), mul_div_cancel_right₀] + div_mul_eq_div_div ((f x - f x' + ⟪grad, (x' - x)⟫) * (f x)) (f x) (f x * f x'), mul_div_cancel_right₀] apply h₁ have zp2 : ‖f x‖ * ‖f x‖/2 ≤ ‖f x * f x'‖ := by @@ -305,16 +305,16 @@ theorem HasGradientAt.one_div (hf : HasGradientAt f grad x)(h₁: ¬ f x = (0 : _ = (ε / 2) * ‖x' - x‖ := by rw [div_self (mul_ne_zero l l), mul_one, norm_sub_rev] calc - ‖f x * (f x - f x' + inner grad (x' - x)) / (f x * f x * f x')‖ = - ‖(f x - f x' + inner grad (x' - x)) / (f x * f x')‖ := by + ‖f x * (f x - f x' + ⟪grad, (x' - x)⟫) / (f x * f x * f x')‖ = + ‖(f x - f x' + ⟪grad, (x' - x)⟫) / (f x * f x')‖ := by apply zp1 - _ = ‖(f x - f x' + inner grad (x' - x))‖ * ‖1/(f x * f x')‖ := by + _ = ‖(f x - f x' + ⟪grad, (x' - x)⟫)‖ * ‖1/(f x * f x')‖ := by apply Vert_div - _ ≤ ‖(f x - f x' + inner grad (x' - x))‖ * (2 / (‖f x‖ * ‖f x‖)) := by + _ ≤ ‖(f x - f x' + ⟪grad, (x' - x)⟫)‖ * (2 / (‖f x‖ * ‖f x‖)) := by apply mul_le_mul_of_nonneg_left zp3 apply norm_nonneg _ ≤ ((ε * ‖f x‖ * ‖f x‖/4) * ‖x - x'‖) * (2 / (‖f x‖ * ‖f x‖)) := by - have : ‖(f x - f x' + inner grad (x' - x))‖ ≤ (ε * ‖f x‖ * ‖f x‖/4) * ‖x - x'‖ := by + have : ‖(f x - f x' + ⟪grad, (x' - x)⟫)‖ ≤ (ε * ‖f x‖ * ‖f x‖/4) * ‖x - x'‖ := by apply hδ₂ apply hp₂ apply mul_le_mul_of_nonneg_right this @@ -385,22 +385,23 @@ theorem HasGradientAt.one_div (hf : HasGradientAt f grad x)(h₁: ¬ f x = (0 : apply Eq.symm (mul_div_mul_left (⟪grad, (x' - x)⟫) ((f x) * (f x)) l') have k₆ : (f x - f x') * f x /(f x' * f x * f x) + f x' * (⟪grad, (x' - x)⟫)/(f x' * f x * f x) = ((f x - f x') * f x + f x' * (⟪grad, (x' - x)⟫))/(f x' * f x * f x) := by - apply div_add_div_same ((f x - f x') * f x) (f x' * (⟪grad, (x' - x)⟫)) (f x' * f x * f x) - have k₇ : ((f x - f x') * f x + f x' * inner grad (x' - x)) / (f x' * f x * f x) = - (f x * (f x - f x' + inner grad (x' - x)) + - (f x' * inner grad (x' - x) - f x * inner grad (x' - x))) / (f x' * f x * f x) := by - have h' : (f x - f x') * f x + f x' * inner grad (x' - x) = - f x * (f x - f x' + inner grad (x' - x)) + - (f x' * (inner grad (x' - x)) - f x * (inner grad (x' - x))) := by + simpa using + (add_div ((f x - f x') * f x) (f x' * inner ℝ grad (x' - x)) (f x' * f x * f x)).symm + have k₇ : ((f x - f x') * f x + f x' * ⟪grad, (x' - x)⟫) / (f x' * f x * f x) = + (f x * (f x - f x' + ⟪grad, (x' - x)⟫) + + (f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫)) / (f x' * f x * f x) := by + have h' : (f x - f x') * f x + f x' * ⟪grad, (x' - x)⟫ = + f x * (f x - f x' + ⟪grad, (x' - x)⟫) + + (f x' * (⟪grad, (x' - x)⟫) - f x * (⟪grad, (x' - x)⟫)) := by linarith rw [h'] - have k₈ : (f x * (f x - f x' + inner grad (x' - x)) + - (f x' * inner grad (x' - x) - f x * inner grad (x' - x))) / - (f x * f x * f x') = f x * (f x - f x' + inner grad (x' - x))/ - (f x * f x * f x') + (f x' * inner grad (x' - x) - f x * inner grad (x' - x))/ + have k₈ : (f x * (f x - f x' + ⟪grad, (x' - x)⟫) + + (f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫)) / + (f x * f x * f x') = f x * (f x - f x' + ⟪grad, (x' - x)⟫)/ + (f x * f x * f x') + (f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫)/ (f x * f x * f x') := by - apply add_div ((f x) * (f x - f x' + inner grad (x' - x))) - (f x' * inner grad (x' - x) - f x * inner grad (x' - x)) (f x * f x * f x') + apply add_div ((f x) * (f x - f x' + ⟪grad, (x' - x)⟫)) + (f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫) (f x * f x * f x') have k₉ : f x' * f x * f x = f x * f x * f x' := by linarith have p₁ : ‖1 / f x' - 1 / f x - (- (⟪grad, (x' - x)⟫))/((f x) * (f x))‖ ≤ ε * ‖x' - x‖ := by rw [k₄, k₁, k₂, k₅] @@ -409,22 +410,22 @@ theorem HasGradientAt.one_div (hf : HasGradientAt f grad x)(h₁: ¬ f x = (0 : rw [this] rw [k₆, k₇, k₉] calc - ‖(f x * (f x - f x' + inner grad (x' - x)) + - (f x' * inner grad (x' - x) - f x * inner grad (x' - x))) / - (f x * f x * f x')‖ = ‖f x * (f x - f x' + inner grad (x' - x))/ - (f x * f x * f x') + (f x' * inner grad (x' - x) - f x * inner grad (x' - x))/ + ‖(f x * (f x - f x' + ⟪grad, (x' - x)⟫) + + (f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫)) / + (f x * f x * f x')‖ = ‖f x * (f x - f x' + ⟪grad, (x' - x)⟫)/ + (f x * f x * f x') + (f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫)/ (f x * f x * f x')‖ := by rw [k₈] - _ ≤ ‖f x * (f x - f x' + inner grad (x' - x))/ - (f x * f x * f x')‖ + ‖(f x' * inner grad (x' - x) - f x * inner grad (x' - x))/ + _ ≤ ‖f x * (f x - f x' + ⟪grad, (x' - x)⟫)/ + (f x * f x * f x')‖ + ‖(f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫)/ (f x * f x * f x')‖ := by - apply norm_add_le ((f x) * (f x - f x' + inner grad (x' - x))/ - (f x * f x * f x')) ((f x' * inner grad (x' - x) - f x * inner grad (x' - x))/ + apply norm_add_le ((f x) * (f x - f x' + ⟪grad, (x' - x)⟫)/ + (f x * f x * f x')) ((f x' * ⟪grad, (x' - x)⟫ - f x * ⟪grad, (x' - x)⟫)/ (f x * f x * f x')) _ ≤ (ε/2) * ‖x' - x‖ + (ε/2) * ‖x' - x‖ := by exact add_le_add (hδ₅ x' hp₂) (hδ₄ x' hp₁) _ = ε * ‖x' - x‖ := by linarith - have j₁ : ‖1 / f x' - 1 / f x - (- (⟪grad, (x' - x)⟫))/((f x) * (f x))‖ = ‖1 / f x' - 1 / f x - - inner ((-(1 / f x ^ ↑2) • grad)) (x' - x)‖ := by + have j₁ : ‖1 / f x' - 1 / f x - (- (⟪grad, (x' - x)⟫))/((f x) * (f x))‖ = + ‖1 / f x' - 1 / f x - ⟪(-(1 / f x ^ (2 : ℕ)) • grad), (x' - x)⟫‖ := by congr; rw [k₃] rw [j₁] at p₁ have l1 : ‖x - x'‖ = ‖x' - x‖ := by diff --git a/Optlib/Differential/Lemmas.lean b/Optlib/Differential/Lemmas.lean index a8f3203..af00602 100644 --- a/Optlib/Differential/Lemmas.lean +++ b/Optlib/Differential/Lemmas.lean @@ -3,10 +3,13 @@ Copyright (c) 2023 Chenyi Li. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. Authors: Chenyi Li -/ -import Mathlib.Analysis.Calculus.MeanValue +import Mathlib.Algebra.Lie.OfAssociative +import Mathlib.Algebra.Order.Ring.Star import Mathlib.Analysis.Calculus.ContDiff.Defs -import Mathlib.Topology.Semicontinuous +import Mathlib.Analysis.Calculus.Deriv.MeanValue import Mathlib.Analysis.Normed.Lp.ProdLp +import Mathlib.Data.Real.StarOrdered +import Mathlib.Order.Filter.ENNReal import Optlib.Differential.Calculation /-! @@ -164,7 +167,7 @@ lemma continuous_positive_direction [NormedSpace ℝ E] (h : ContinuousAt f x) ( obtain ⟨δ, hδ1, hδ2⟩ := continuous_positive_neighborhood h hx by_cases hv : v = 0 · rw [hv]; simp; use 1; constructor; linarith; intro t _ _; exact hx - have : ‖v‖ > 0 := norm_pos_iff'.mpr hv + have : ‖v‖ > 0 := norm_pos_iff.mpr hv use δ / (2 * ‖v‖); constructor; positivity intro y hy obtain hδ2 := hδ2 (x + y • v) @@ -173,7 +176,7 @@ lemma continuous_positive_direction [NormedSpace ℝ E] (h : ContinuousAt f x) ( simp at hy; rw [norm_smul]; simp; rw [abs_of_nonneg hy.1] calc _ ≤ δ / (2 * ‖v‖) * ‖v‖ := (mul_le_mul_iff_of_pos_right this).mpr hy.2 - _ = δ / 2 := by field_simp; ring + _ = δ / 2 := by field_simp _ < δ := by linarith exact hδ2 this @@ -269,26 +272,24 @@ variable {E : Type*} [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteS variable {x p y : E} {f : E → ℝ} {f' : E → E} {s : Set E} -open Topology InnerProductSpace Set Filter Tendsto +open Topology InnerProductSpace Set Filter theorem HasGradient_Convergence (h : HasGradientAt f (f' x) x) : ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ x' : E, ‖x - x'‖ ≤ δ - → ‖f x' - f x - inner (f' x) (x' - x)‖ ≤ ε * ‖x - x'‖ := by + → ‖f x' - f x - ((toDual ℝ E) (f' x)) (x' - x)‖ ≤ ε * ‖x - x'‖ := by rw [hasGradientAt_iff_hasFDerivAt] at h - show ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ (x' : E), ‖x - x'‖ ≤ δ - → ‖f x' - f x - ((toDual ℝ E) (f' x)) (x' - x)‖ ≤ ε * ‖x - x'‖ apply HasFDeriv_Convergence exact h theorem Convergence_HasGradient (h : ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ x' : E, - ‖x - x'‖ ≤ δ → ‖f x' - f x - inner (f' x) (x' - x)‖ ≤ ε * ‖x - x'‖) : + ‖x - x'‖ ≤ δ → ‖f x' - f x - ((toDual ℝ E) (f' x)) (x' - x)‖ ≤ ε * ‖x - x'‖) : HasGradientAt f (f' x) x := by rw [hasGradientAt_iff_hasFDerivAt] exact HasFDeriv_iff_Convergence_Point.mpr h -theorem HasGradient_iff_Convergence_Point {f'x : E}: - HasGradientAt f f'x x ↔ ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ x' : E, - ‖x - x'‖ ≤ δ → ‖f x' - f x - inner (f'x) (x' - x)‖ ≤ ε * ‖x - x'‖ := by +theorem HasGradient_iff_Convergence_Point {g : E}: + HasGradientAt f g x ↔ ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ x' : E, + ‖x - x'‖ ≤ δ → ‖f x' - f x - (@inner ℝ E _ g (x' - x))‖ ≤ ε * ‖x - x'‖ := by constructor · intro h; apply HasGradient_Convergence exact h @@ -296,7 +297,7 @@ theorem HasGradient_iff_Convergence_Point {f'x : E}: theorem HasGradient_iff_Convergence : HasGradientAt f (f' x) x ↔ ∀ ε > (0 : ℝ), ∃ δ > (0 : ℝ), ∀ x' : E, - ‖x - x'‖ ≤ δ → ‖f x' - f x - inner (f' x) (x' - x)‖ ≤ ε * ‖x - x'‖ := by + ‖x - x'‖ ≤ δ → ‖f x' - f x - (@inner ℝ E _ (f' x) (x' - x))‖ ≤ ε * ‖x - x'‖ := by constructor apply HasGradient_Convergence apply Convergence_HasGradient @@ -308,20 +309,39 @@ lemma gradient_norm_sq_eq_two_self (x : E) : intro e ep use e constructor - . linarith - . intro x' dles - rw [← norm_neg (x - x'), neg_sub] at dles - rw [← real_inner_self_eq_norm_sq, ← real_inner_self_eq_norm_sq, inner_sub_right] - rw [real_inner_smul_left, real_inner_smul_left]; ring_nf - rw [add_sub, add_sub_right_comm, mul_two, ← sub_sub] - rw [← inner_sub_left, sub_add, ← inner_sub_right] - rw [real_inner_comm, ← inner_sub_left, real_inner_self_eq_norm_sq] - rw [abs_of_nonneg, pow_two, ← norm_neg (x - x'), neg_sub] - apply mul_le_mul_of_nonneg_right dles (norm_nonneg (x' - x)) - apply pow_two_nonneg + · linarith + · intro x' dles + have hId : + ⟪x', x'⟫_ℝ - ⟪x, x⟫_ℝ - 2 * ⟪x, x' - x⟫_ℝ + = ⟪x' - x, x' - x⟫_ℝ := by + calc + ⟪x', x'⟫_ℝ - ⟪x, x⟫_ℝ - 2 * ⟪x, x' - x⟫_ℝ + = ⟪x', x'⟫_ℝ - ⟪x, x⟫_ℝ - 2 * (⟪x, x'⟫_ℝ - ⟪x, x⟫_ℝ) := by + simp [inner_sub_right] + _ = ⟪x', x'⟫_ℝ + ⟪x, x⟫_ℝ - 2 * ⟪x, x'⟫_ℝ := by + ring_nf + _ = ⟪x', x'⟫_ℝ - ⟪x', x⟫_ℝ - ⟪x, x'⟫_ℝ + ⟪x, x⟫_ℝ := by + have hxcomm : ⟪x', x⟫_ℝ = ⟪x, x'⟫_ℝ := real_inner_comm x x' + have : ⟪x', x'⟫_ℝ + ⟪x, x⟫_ℝ - 2 * ⟪x, x'⟫_ℝ + = ⟪x', x'⟫_ℝ - ⟪x, x'⟫_ℝ - ⟪x, x'⟫_ℝ + ⟪x, x⟫_ℝ := by + ring_nf + simpa [hxcomm, add_comm, add_left_comm, add_assoc, sub_eq_add_neg] using this + _ = ⟪x' - x, x' - x⟫_ℝ := by + simp_rw [inner_sub_left, inner_sub_right, sub_eq_add_neg, add_comm, add_left_comm, add_assoc] + grind only + have hId2 : + ‖x'‖ ^ 2 - ‖x‖ ^ 2 - 2 * ⟪x, x' - x⟫_ℝ = ‖x' - x‖ ^ 2 := by + simpa [real_inner_self_eq_norm_sq] using hId + have hle' : ‖x - x'‖ ≤ e := dles + have hineq : ‖x' - x‖ ^ 2 ≤ e * ‖x - x'‖ := by + have : ‖x - x'‖ ^ 2 ≤ e * ‖x - x'‖ := by + simpa [pow_two] using mul_le_mul_of_nonneg_right hle' (norm_nonneg _) + simpa [norm_sub_rev] using this + rw [hId2, abs_of_nonneg (pow_two_nonneg _)] + exact hineq lemma gradient_of_inner_const (x : E) (a : E): - HasGradientAt (fun x ↦ (inner a x : ℝ)) a x := by + HasGradientAt (fun x ↦ (⟪a, x⟫_ℝ)) a x := by apply HasGradient_iff_Convergence_Point.mpr simp only [gt_iff_lt, Real.norm_eq_abs] intros ε εpos @@ -336,43 +356,29 @@ lemma gradient_of_const_mul_norm (l : ℝ) (z : E) : let h := fun x : E => ‖x‖ ^ 2 have e1 : (l • z) = (l / 2) • (2 : ℝ) • z := by rw [smul_smul]; simp have : (fun (x : E) => l / 2 * ‖x‖ ^ 2) = (fun (x : E) => (l / 2) • h x) := by - ext; simp + ext; simp only [smul_eq_mul, mul_eq_mul_left_iff, div_eq_zero_iff, OfNat.ofNat_ne_zero, + or_false]; grind only have h1 : HasGradientAt h ((2 : ℝ) • z) z := gradient_norm_sq_eq_two_self z rw [this, e1]; refine HasGradientAt.const_smul' (l / 2) h1 lemma gradient_of_sq : ∀ u : E, HasGradientAt (fun u ↦ ‖u - x‖ ^ 2 / 2) (u - x) u := by - intro s - rw [HasGradient_iff_Convergence_Point] - simp; intro e ep; use e - constructor - · linarith - · intro x' dles; field_simp; rw [abs_div]; simp - have eq1 (u v : E) (e : ℝ) (dle : ‖u - v‖ ≤ e) : - |‖v‖ ^ 2 - ‖u‖ ^ 2 - inner ((2 : ℝ) • u) (v - u)| ≤ e * ‖u - v‖ := by - rw [← norm_neg (u - v), neg_sub] at dle; - rw [← real_inner_self_eq_norm_sq, ← real_inner_self_eq_norm_sq, inner_sub_right] - rw [real_inner_smul_left, real_inner_smul_left]; ring_nf - rw [add_sub, add_sub_right_comm, mul_two, ← sub_sub] - rw [← inner_sub_left, sub_add, ← inner_sub_right] - rw [real_inner_comm, ← inner_sub_left, real_inner_self_eq_norm_sq] - rw [abs_of_nonneg, pow_two, ← norm_neg (u - v), neg_sub] - apply mul_le_mul_of_nonneg_right dle (norm_nonneg (v - u)) - apply pow_two_nonneg - let u := s - x - have hu : u = s - x := rfl - let v := x' - x - have hv : v = x' - x := rfl - rw [← real_inner_smul_left] - have eq2 : s - x' = u - v := by rw [hu, hv]; simp - have eq3 : x' - s = v - u := by rw [hu, hv]; simp - rw [eq2, eq3] - show |‖v‖ ^ 2 - ‖u‖ ^ 2 - inner ((2 : ℝ) • u) (v - u)| / 2 ≤ e * ‖u - v‖ - calc - |‖v‖ ^ 2 - ‖u‖ ^ 2 - inner ((2 : ℝ) • u) (v - u)| / 2 ≤ (e * ‖u - v‖) / 2 := by - rw [div_le_div_right] - apply eq1; rw [hu, hv]; simp; apply dles; simp - _ ≤ e * ‖u - v‖ := by - field_simp + intro u + have hT : HasFDerivAt (fun u : E ↦ u - x) (1 : E →L[ℝ] E) u := by + simpa using + (hasFDerivAt_id (𝕜 := ℝ) (E := E) u).sub_const x + have hg0 : HasGradientAt (fun z : E ↦ ‖z‖ ^ 2) ((2 : ℝ) • (u - x)) (u - x) := by + simpa using (gradient_norm_sq_eq_two_self (u - x)) + have hF : HasFDerivAt (fun u : E ↦ ‖u - x‖ ^ 2) + ((toDual ℝ E) ((2 : ℝ) • (u - x))) u := by + simpa [Function.comp] using + (hasGradientAt_iff_hasFDerivAt.mp hg0).comp u hT + have hG : HasGradientAt (fun u : E ↦ ‖u - x‖ ^ 2) + ((2 : ℝ) • (u - x)) u := + (hasGradientAt_iff_hasFDerivAt.mpr hF) + have hG' : HasGradientAt + (fun u : E ↦ (1 / 2 : ℝ) * ‖u - x‖ ^ 2) (u - x) u := by + simpa [smul_eq_mul] using hG.const_mul (1 / 2 : ℝ) + simpa [div_eq_inv_mul, mul_comm] using hG' lemma sub_normsquare_gradient (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) (m : ℝ): ∀ x ∈ s, HasGradientAt (fun x ↦ f x - m / 2 * ‖x‖ ^ 2) (f' x - m • x) x := by @@ -431,14 +437,14 @@ open InnerProductSpace Set -/ lemma expansion (hf : ∀ x : E, HasGradientAt f (f' x) x) (x p : E) : - ∃ t : ℝ, t > 0 ∧ t < 1 ∧ f (x + p) = f x + inner (f' (x + t • p)) p := by + ∃ t : ℝ, t > 0 ∧ t < 1 ∧ f (x + p) = f x + ⟪f' (x + t • p), p⟫_ℝ := by let g := fun r : ℝ ↦ f (x + r • p) - let g' := fun r : ℝ ↦ (inner (f' (x + r • p)) p : ℝ) + let g' := fun r : ℝ ↦ (⟪f' (x + r • p), p⟫_ℝ : ℝ) have h1 : ∀ r , HasDerivAt g (g' r) r := by let h := fun r : ℝ ↦ x + r • p have : g = f ∘ h := by rfl rw [this]; intro r - have : inner (f' (x + r • p)) p = toDual ℝ E (f' (x + r • p)) p := rfl + have : ⟪f' (x + r • p), p⟫_ℝ = toDual ℝ E (f' (x + r • p)) p := rfl simp [g']; rw [this]; apply HasFDerivAt.comp_hasDerivAt · apply hasGradientAt_iff_hasFDerivAt.mp exact hf (x + r • p) @@ -449,7 +455,9 @@ lemma expansion (hf : ∀ x : E, HasGradientAt f (f' x) x) (x p : E) : rw [one_smul] at this; exact this have e1 : f (x + p) = g 1 := by simp [g] have e2 : f x = g 0 := by simp [g] - have e3 : ∀ t, inner (f' (x + t • p)) p = g' t := by simp [] + have e3 : ∀ t, ⟪f' (x + t • p), p⟫_ℝ = g' t := by + intro t + simp_all only [one_smul, zero_smul, add_zero, g, g'] rw [e1, e2] have : ∃ c ∈ Set.Ioo 0 1, g' c = (g 1 - g 0) / (1 - 0) := by apply exists_hasDerivAt_eq_slope g g' (by norm_num) @@ -465,14 +473,14 @@ lemma expansion (hf : ∀ x : E, HasGradientAt f (f' x) x) (x p : E) : rw [e3 c]; simp [h2] lemma general_expansion (x p : E) (hf : ∀ y ∈ Metric.closedBall x ‖p‖, HasGradientAt f (f' y) y) : - ∃ t : ℝ, t > 0 ∧ t < 1 ∧ f (x + p) = f x + inner (f' (x + t • p)) p := by + ∃ t : ℝ, t > 0 ∧ t < 1 ∧ f (x + p) = f x + ⟪f' (x + t • p), p⟫_ℝ := by let g := fun r : ℝ ↦ f (x + r • p) - let g' := fun r : ℝ ↦ (inner (f' (x + r • p)) p : ℝ) + let g' := fun r : ℝ ↦ (⟪f' (x + r • p), p⟫_ℝ : ℝ) have h1 : ∀ r ∈ Icc 0 1, HasDerivAt g (g' r) r := by let h := fun r : ℝ ↦ x + r • p have : g = f ∘ h := by rfl rw [this]; intro r hr - have : inner (f' (x + r • p)) p = toDual ℝ E (f' (x + r • p)) p := rfl + have : ⟪f' (x + r • p), p⟫_ℝ = toDual ℝ E (f' (x + r • p)) p := rfl simp [g']; rw [this]; apply HasFDerivAt.comp_hasDerivAt · apply hasGradientAt_iff_hasFDerivAt.mp have : x + r • p ∈ Metric.closedBall x ‖p‖ := by @@ -486,7 +494,7 @@ lemma general_expansion (x p : E) (hf : ∀ y ∈ Metric.closedBall x ‖p‖, H rw [one_smul] at this; exact this have e1 : f (x + p) = g 1 := by simp [g] have e2 : f x = g 0 := by simp [g] - have e3 : ∀ t, inner (f' (x + t • p)) p = g' t := by simp [] + have e3 : ∀ t, ⟪f' (x + t • p), p⟫_ℝ = g' t := by grind only rw [e1, e2] have : ∃ c ∈ Set.Ioo 0 1, g' c = (g 1 - g 0) / (1 - 0) := by apply exists_hasDerivAt_eq_slope g g' (by norm_num) @@ -501,15 +509,15 @@ lemma general_expansion (x p : E) (hf : ∀ y ∈ Metric.closedBall x ‖p‖, H theorem lagrange (hs : Convex ℝ s) (hf : ∀ x ∈ s, HasGradientAt f (f' x) x) : ∀ x ∈ s, ∀ y ∈ s, ∃ c : ℝ, c ∈ Set.Ioo 0 1 ∧ - inner (f' (x + c • (y - x))) (y - x) = f y - f x := by + ⟪f' (x + c • (y - x)), (y - x)⟫_ℝ = f y - f x := by intro x xs y ys let g := fun t : ℝ ↦ f (x + t • (y - x)) - let g' := fun t : ℝ ↦ (inner (f' (x + t • (y - x))) (y - x) : ℝ) + let g' := fun t : ℝ ↦ (⟪f' (x + t • (y - x)), (y - x)⟫_ℝ : ℝ) have h1 : ∀ r ∈ Icc 0 1 , HasDerivAt g (g' r) r := by let h := fun r : ℝ ↦ (x + r • (y - x)) have : g = f ∘ h := rfl rw [this]; intro t ht - have : inner (f' (x + t • (y - x))) (y - x) = toDual ℝ E (f' (x + t • (y - x))) (y - x) := rfl + have : ⟪f' (x + t • (y - x)), (y - x)⟫_ℝ = toDual ℝ E (f' (x + t • (y - x))) (y - x) := rfl simp [g']; rw [this]; apply HasFDerivAt.comp_hasDerivAt · apply hasGradientAt_iff_hasFDerivAt.mp have : x + t • (y - x) ∈ s := by diff --git a/Optlib/Differential/Subdifferential.lean b/Optlib/Differential/Subdifferential.lean index 9c8720d..57f814d 100644 --- a/Optlib/Differential/Subdifferential.lean +++ b/Optlib/Differential/Subdifferential.lean @@ -1,6 +1,6 @@ import Mathlib.Order.LiminfLimsup import Mathlib.Topology.Defs.Filter -import Mathlib.Data.Real.EReal +import Mathlib.Data.EReal.Basic import Optlib.Differential.Calculation import Optlib.Function.Proximal @@ -14,7 +14,7 @@ variable {f g : E → ℝ} {x y u v : E} {c : ℝ} /- the general differential function used in the definition -/ def differential_fun (x : E) (f : E → ℝ) (u : E) := - fun y ↦ Real.toEReal ((f y - f x - inner u (y - x)) / ‖y - x‖) + fun y ↦ Real.toEReal ((f y - f x - inner (𝕜 := ℝ) u (y - x)) / ‖y - x‖) /- the definition of the Frechet subdifferential-/ def f_subdifferential (f : E → ℝ) (x : E) : Set E := @@ -35,9 +35,9 @@ def critial_point (f : E → ℝ) : Set E := /-- equivalence of Frechet subdifferential -/ theorem has_f_subdiff_iff : u ∈ f_subdifferential f x ↔ - ∀ ε > 0, ∀ᶠ y in 𝓝 x, f y - f x - inner u (y - x) ≥ -ε * ‖y - x‖ := by - have h0 : (∀ ε > 0, ∀ᶠ y in 𝓝[≠] x, f y - f x - inner u (y - x) > -ε * ‖y - x‖) - ↔ ∀ ε > 0, ∀ᶠ y in 𝓝 x, f y - f x - inner u (y - x) ≥ -ε * ‖y - x‖ := by + ∀ ε > 0, ∀ᶠ y in 𝓝 x, f y - f x - inner (𝕜 := ℝ) u (y - x) ≥ -ε * ‖y - x‖ := by + have h0 : (∀ ε > 0, ∀ᶠ y in 𝓝[≠] x, f y - f x - inner (𝕜 := ℝ) u (y - x) > -ε * ‖y - x‖) + ↔ ∀ ε > 0, ∀ᶠ y in 𝓝 x, f y - f x - inner (𝕜 := ℝ) u (y - x) ≥ -ε * ‖y - x‖ := by constructor · intro h ε εpos specialize h ε εpos @@ -144,7 +144,7 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) have yin': y' ∈ univ:= by simp specialize convfun xin yin' - have pos: 0 < (1 / 2) * ((f x) + inner g (y' - x) - f y') / ‖y' - x‖:=by + have pos: 0 < (1 / 2) * ((f x) + inner (𝕜 := ℝ) g (y' - x) - f y') / ‖y' - x‖:=by apply div_pos · apply mul_pos simp; simp @@ -155,7 +155,7 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) rw [yeq'] at hy' simp at hy' rw[← gt_iff_lt] at pos - specialize hg ((1 / 2) * ((f x) + inner g (y' - x) - f y')/‖y' - x‖) + specialize hg ((1 / 2) * ((f x) + inner (𝕜 := ℝ) g (y' - x) - f y')/‖y' - x‖) specialize hg pos simp at hg rw[Filter.Eventually,mem_nhds_iff] at hg @@ -185,7 +185,7 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) by_contra yeq' rw[sub_eq_zero] at yeq' rw[yeq'] at hy' - rw[sub_self, inner_zero_right,add_zero, lt_iff_not_le] at hy' + rw[sub_self, inner_zero_right,add_zero, lt_iff_not_ge] at hy' apply hy' simp simp @@ -199,14 +199,14 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) by_contra yeq' rw[sub_eq_zero] at yeq' rw[yeq'] at hy' - rw[sub_self, inner_zero_right,add_zero, lt_iff_not_le] at hy' + rw[sub_self, inner_zero_right,add_zero, lt_iff_not_ge] at hy' apply hy' simp · apply norm_pos_iff.mpr by_contra yeq' rw[sub_eq_zero] at yeq' rw[yeq'] at hy' - rw[sub_self, inner_zero_right,add_zero, lt_iff_not_le] at hy' + rw[sub_self, inner_zero_right,add_zero, lt_iff_not_ge] at hy' apply hy' simp simp @@ -226,7 +226,7 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) by_contra yeq' rw[norm_eq_zero,sub_eq_zero] at yeq' rw[yeq'] at hy' - rw[sub_self, inner_zero_right,add_zero, lt_iff_not_le] at hy' + rw[sub_self, inner_zero_right,add_zero, lt_iff_not_ge] at hy' apply hy' simp refine div_mul_cancel₀ δ' nonzero @@ -236,7 +236,7 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) apply lt_of_le_of_lt apply min_le_left exact lt_two_mul_self posδ - simp + --simp apply div_nonneg apply le_min linarith @@ -247,8 +247,8 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) rcases hx1 with ⟨x1s,x1t⟩ rw[mem_setOf] at x1s rcases x1s with ⟨r,rpos,rltone,x1eq⟩ - have x1in: x1 ∈ {x_1 | inner g (x_1 - x) ≤ - f x_1 - f x + 2⁻¹ * (f x + inner g (y' - x) - f y') / ‖y' - x‖ * ‖x_1 - x‖}:=by + have x1in: x1 ∈ {x_1 | inner (𝕜 := ℝ) g (x_1 - x) ≤ + f x_1 - f x + 2⁻¹ * (f x + inner (𝕜 := ℝ) g (y' - x) - f y') / ‖y' - x‖ * ‖x_1 - x‖}:=by apply mem_of_subset_of_mem tin assumption rw[mem_setOf,x1eq] at x1in @@ -266,7 +266,7 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) have r2pos: 0 < (1 -r)/2:=by linarith have req: r + (1-r) = 1:=by simp specialize convfun rnonneg rleone req - have nonneg: 0 ≤ f y' - f x - inner g (y' - x):=by + have nonneg: 0 ≤ f y' - f x - inner (𝕜 := ℝ) g (y' - x):=by apply nonneg_of_mul_nonneg_right _ r2pos rw[mul_sub, ← sub_self_div_two (1 - r), sub_mul, sub_mul (1 - r)] simp @@ -283,17 +283,17 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) rw[neg_mul,← sub_eq_add_neg ((1 - r) * f y'),← mul_sub, mul_assoc, mul_comm (1 - r) ‖y' - x‖] rw[← mul_assoc, div_mul, div_self] simp - rw[mul_comm (2⁻¹ * inner g (y' - x) + 2⁻¹ * (f x - f y')), - mul_add, add_comm ((1 - r) * (2⁻¹ * inner g (y' - x)))] + rw[mul_comm (2⁻¹ * inner (𝕜 := ℝ) g (y' - x) + 2⁻¹ * (f x - f y')), + mul_add, add_comm ((1 - r) * (2⁻¹ * inner (𝕜 := ℝ) g (y' - x)))] rw[← add_assoc, ← mul_assoc, ← mul_assoc,inv_eq_one_div] linarith by_contra yeq' rw[norm_eq_zero,sub_eq_zero] at yeq' rw[yeq'] at hy' - rw[sub_self, inner_zero_right,add_zero, lt_iff_not_le] at hy' + rw[sub_self, inner_zero_right,add_zero, lt_iff_not_ge] at hy' apply hy' simp - have nonneg': ¬ 0 > f y' - f x - inner g (y' - x):=by linarith + have nonneg': ¬ 0 > f y' - f x - inner (𝕜 := ℝ) g (y' - x):=by linarith apply nonneg' simp linarith @@ -321,7 +321,7 @@ theorem convex_f_f_subdiff_eq_subgradient (f : E → ℝ) (x : E) theorem f_subdiff_neg_f_subdiff_unique (hu : u ∈ f_subdifferential f x) (hv : v ∈ f_subdifferential (- f) x) : u = - v := by rw [has_f_subdiff_iff] at * - have h : ∀ ε > 0, ∀ᶠ y in 𝓝 x, inner (u + v) (y - x) ≤ ε * ‖y - x‖ := by + have h : ∀ ε > 0, ∀ᶠ y in 𝓝 x, inner (𝕜 := ℝ) (u + v) (y - x) ≤ ε * ‖y - x‖ := by intro ε εpos have ε2pos : 0 < ε / 2 := by positivity filter_upwards [hu _ ε2pos, hv _ ε2pos] with y huy hvy @@ -366,7 +366,7 @@ theorem f_subdiff_smul (h : u ∈ f_subdifferential (c • f) x) (cpos : 0 < c) filter_upwards [h _ (mul_pos cpos εpos)] with y hy rw [real_inner_smul_left] simp only [Pi.smul_apply, smul_eq_mul, neg_mul, neg_le_sub_iff_le_add] at hy - apply (mul_le_mul_left cpos).mp + apply (mul_le_mul_iff_right₀ cpos).mp field_simp linarith @@ -503,8 +503,8 @@ theorem f_subdiff_add' (f : E → ℝ ) (g : E → ℝ ) (g' : E → E) (x : E) specialize hg ε2pos filter_upwards [zin _ ε2pos, hg ] with a za ga simp at ga - have h: - (g a - g x - inner (g' x) (a - x)) ≥ -(ε / 2) * ‖a - x‖:=by - change -(ε / 2) * ‖a - x‖ ≤ - (g a - g x - inner (g' x) (a - x)) + have h: - (g a - g x - inner (𝕜 := ℝ) (g' x) (a - x)) ≥ -(ε / 2) * ‖a - x‖:=by + change -(ε / 2) * ‖a - x‖ ≤ - (g a - g x - inner (𝕜 := ℝ) (g' x) (a - x)) rw[neg_mul, neg_le_neg_iff] apply le_trans; apply le_abs_self; assumption rw[inner_sub_left]; diff --git a/Optlib/Function/KL.lean b/Optlib/Function/KL.lean index a0ecccb..8b94c3b 100644 --- a/Optlib/Function/KL.lean +++ b/Optlib/Function/KL.lean @@ -26,8 +26,8 @@ lemma subdifferential_Graph' (f : E → ℝ) : use fun n => (u n, f (u n), v n) constructor · intro n; simp; exact (hv n).1 - · apply Tendsto.prod_mk_nhds u_conv - (Tendsto.prod_mk_nhds fun_conv ((forall_and_right _ _).1 hv).2) + · apply Tendsto.prodMk_nhds u_conv + (Tendsto.prodMk_nhds fun_conv ((forall_and_right _ _).1 hv).2) · intro h simp [subdifferential_Graph, subdifferential] simp at h @@ -63,7 +63,7 @@ theorem GraphOfSubgradientIsClosed {f : E → ℝ} exact this rw [nhds_prod_eq,Filter.tendsto_prod_iff'] at hconv; simp at hconv - exact Tendsto.prod_mk_nhds hconv.1 (Tendsto.prod_mk_nhds hf hconv.2) + exact Tendsto.prodMk_nhds hconv.1 (Tendsto.prodMk_nhds hf hconv.2) /- Definition of Φ_η, the family of desingularizing function -/ def desingularizing_function (η : ℝ) := {φ : ℝ → ℝ | (ConcaveOn ℝ (Ico 0 η) φ) -- ∧ (∀ x ∈ Ioo 0 η, φ x > 0) @@ -103,12 +103,16 @@ lemma desingularizing_function_is_nonneg (φ : ℝ → ℝ) (η : ℝ) (h : φ obtain h_lag := exists_deriv_eq_slope φ hx₁ Cont_φ Diff_φ rcases h_lag with ⟨c, ⟨hc, hval⟩⟩ use c, hc - field_simp [hval] + -- Rearrange hval: deriv φ c = (φ x - φ 0) / (x - 0) to φ x = φ 0 + deriv φ c * (x - 0) + have : φ x = φ 0 + deriv φ c * (x - 0) := by + rw [hval] + field_simp [ne_of_gt hx₁]; simp + exact this choose y hy₁ hy₂ using hhh simp [hy₂, h₂]; field_simp; rcases hy₁ with ⟨hy₁,hy₁'⟩ have yleq: y < η := by linarith - exact h₅ y hy₁ yleq + exact Left.mul_pos (h₅ y hy₁ yleq) hx₁ -- Definition of KL property with specific desingularizing function def KL_point_with_reparameter (σ : E → ℝ) (u : E) (φ : ℝ → ℝ) : Prop := @@ -161,9 +165,9 @@ lemma const_mul_special_concave : ∀ c > 0, (fun t => c⁻¹ * t) ∈ desingula rw [fun_smul_eq_mul]; apply ContDiff.contDiffOn; apply contDiff_const_smul have h₄: ContinuousAt (fun t ↦ c⁻¹ * t) 0 := by rw [fun_smul_eq_mul]; apply (continuousAt_const_smul_iff₀ _).2 - apply continuousAt_id; field_simp + apply continuousAt_id; field_simp; simp; grind have h₅: ∀ (x : ℝ), 0 < x → x < c / 2 → 0 < deriv (fun t ↦ c⁻¹ * t) x := by - intro x _ _; rw [deriv_of_const_mul_func]; field_simp; exact cpos + intro x _ _; rw [deriv_of_const_mul_func]; field_simp; simp; grind exact ⟨h₁, h₃, h₄, h₅⟩ @@ -243,7 +247,7 @@ lemma edist_geq_const (h_noncrit : 0 ∉ subdifferential f x) : intro n exact (hv n).1 have v_to_zero: Tendsto v atTop (𝓝 0) := by - rw [dist_zero_left] at hv + rw [dist_zero] at hv have : Tendsto (fun n => ‖v n‖) atTop (𝓝 0) := by apply squeeze_zero (by simp) _ tendsto_one_div_add_atTop_nhds_zero_nat intro n @@ -251,7 +255,7 @@ lemma edist_geq_const (h_noncrit : 0 ∉ subdifferential f x) : apply tendsto_zero_iff_norm_tendsto_zero.2 this show (x, 0) ∈ subdifferential_Graph f apply GraphOfSubgradientIsClosed v_in_subdiff - (Filter.Tendsto.prod_mk_nhds u_to_x v_to_zero) fu_to_fx + (Filter.Tendsto.prodMk_nhds u_to_x v_to_zero) fu_to_fx contradiction /-- Non-critical KL property is naturally true -/ @@ -322,8 +326,7 @@ theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsComp (ENNReal.ofReal (deriv φ (f x - f u))) * EMetric.infEdist 0 (subdifferential f x) ≥ 1 := by -- case : Ω = ∅ by_cases h_nonempty : Ω = ∅ - · push_neg at h_nonempty - use 1, (by simp), 1, (by simp), (fun t => 2⁻¹ * t) + · use 1, (by simp), 1, (by simp), (fun t => 2⁻¹ * t) constructor rw [← div_self] exact (const_mul_special_concave 2 (by simp)) @@ -452,7 +455,7 @@ theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsComp rw [ContinuousAt] have : φ_sum = (fun c => ∑ x ∈ ht2.toFinset, φ x c) := by ext c; simp [φ_sum] rw [this] - simp [φ_sum] + simp apply tendsto_finset_sum intro c hc obtain cont := (hφ c (mem_t_in_Ω c hc)).2.2.2.1 @@ -463,6 +466,7 @@ theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsComp have : φ_sum = (fun c => ∑ x ∈ ht2.toFinset, φ x c) := by ext c; simp [φ_sum] rw [this] have : deriv (fun c ↦ ∑ x ∈ ht2.toFinset, φ x c) y = ∑ x ∈ ht2.toFinset, deriv (φ x) y := by + rw [funext (fun c => (Finset.sum_apply c _ _).symm)] apply deriv_sum intro c hc have η_inequ: y < η c := by @@ -527,7 +531,9 @@ theorem uniformized_KL_property {f : E → ℝ} {Ω : Set E} (h_compact : IsComp simp [φ_sum] have equ₁: deriv (fun c ↦ ∑ x ∈ ht2.toFinset, φ x c) (f u - μ) = ∑ x ∈ ht2.toFinset, deriv (φ x) (f u - μ) := by - apply deriv_sum + have : (fun c ↦ ∑ x ∈ ht2.toFinset, φ x c) = ∑ x ∈ ht2.toFinset, φ x := by + ext c; exact Eq.symm (Finset.sum_apply c ht2.toFinset φ) + rw [this, deriv_sum] intro c hc have σu_pos : f u - μ > 0 := by linarith [hu2] have η_inequ: (f u - μ) < η c := by diff --git a/Optlib/Function/L1Space.lean b/Optlib/Function/L1Space.lean index d3d0436..39bade6 100644 --- a/Optlib/Function/L1Space.lean +++ b/Optlib/Function/L1Space.lean @@ -6,6 +6,8 @@ Authors: Zichen Wang import Mathlib.Analysis.Normed.Lp.PiLp import Mathlib.Topology.Algebra.Module.FiniteDimension import Mathlib.Analysis.InnerProductSpace.Basic +import Mathlib + /-! # l₁ Space and Continuous Linear Maps between l₁ Space and Finite Dimensional Space @@ -20,7 +22,7 @@ This file contains several key definitions and theorems that involve continuous - `f` : A noncomputable function mapping basis vectors to the `l₁` space. -- `σ` : A noncomputable map constructed using `Basis.constrL` which is shown to be continuous. +- `σ` : A noncomputable map constructed using `Module.Basis.constrL` which is shown to be continuous. ## Main Theorems @@ -54,7 +56,7 @@ open scoped Pointwise Module noncomputable def f : Fin (Module.finrank ℝ α) → PiLp 1 (fun _ : Fin (Module.finrank ℝ α) => ℝ) := fun i j => if i = j then ‖(Module.finBasis ℝ α) i‖ else 0 -noncomputable def σ := Basis.constrL (Module.finBasis ℝ α) f +noncomputable def σ := Module.Basis.constrL (Module.finBasis ℝ α) f theorem continuous_map_sigma : Continuous (σ (α := α)):= by exact ContinuousLinearMap.continuous σ @@ -91,7 +93,7 @@ theorem sigma_decompose_apply : ∀ x , ∀ j , (σ x) j = rw[← PiLp.ext_iff] calc _ = σ (∑ i , (((Module.finBasis ℝ α).repr x) i) • (Module.finBasis ℝ α) i):= by - congr;exact Eq.symm (Basis.sum_repr (Module.finBasis ℝ α) x) + congr;exact Eq.symm (Module.Basis.sum_repr (Module.finBasis ℝ α) x) _ = ∑ i , σ ((((Module.finBasis ℝ α).repr x) i) • (Module.finBasis ℝ α) i):= by simp only [map_sum, map_smul] _ = _ := by @@ -134,7 +136,7 @@ theorem l1Ball_sub_convexHull{x : α}{r : ℝ}(hr : r > 0)(hn : Module.finrank rw[← map_sub] at hx₀ have sum_le_r : ∑ i , ‖(b).equivFun (x₀ - x) i‖ * ‖(b) i‖ / r ≤ 1 := by rw[← Finset.sum_div] - simp only [Basis.equivFun_apply, Pi.sub_apply] + simp only [Module.Basis.equivFun_apply] rw[← l1_norm_eq (x₀ - x)] apply le_of_lt apply Bound.div_lt_one_of_pos_of_lt hr hx₀ @@ -142,13 +144,13 @@ theorem l1Ball_sub_convexHull{x : α}{r : ℝ}(hr : r > 0)(hn : Module.finrank let ι := Fin n let ι₀ := Fin (n + 2) let w₀ := (b).equivFun (x₀ - x) - have repr : ∑ i , w₀ i • b i = x₀ - x := Basis.sum_equivFun b (x₀ - x) + have repr : ∑ i , w₀ i • b i = x₀ - x := Module.Basis.sum_equivFun b (x₀ - x) let w₁ : ι → ℝ := fun i => |(b).equivFun (x₀ - x) i| * ‖b i‖ / r let sum := ∑ i : ι, w₁ i have sum_pos : 1 - sum ≥ 0 := by - simp only [sum,w₁,ge_iff_le, gt_iff_lt,sub_pos,Pi.sub_apply, sub_nonneg] + simp only [sum,w₁,ge_iff_le, sub_nonneg] apply sum_le_r let w : ι₀ → ℝ @@ -177,7 +179,7 @@ theorem l1Ball_sub_convexHull{x : α}{r : ℝ}(hr : r > 0)(hn : Module.finrank have hw₀ : ∀ (i : ι₀), 0 ≤ w i := by intro ⟨i,hi⟩ by_cases h : i < n - · simp only [Pi.sub_apply, h, ↓reduceDIte, ge_iff_le, w, w₁] + · simp only [h, ↓reduceDIte, ge_iff_le, w, w₁] apply div_nonneg _ (le_of_lt hr) apply mul_nonneg apply abs_nonneg @@ -196,33 +198,65 @@ theorem l1Ball_sub_convexHull{x : α}{r : ℝ}(hr : r > 0)(hn : Module.finrank have hz : ∀ (i : ι₀), z i ∈ ((⋃ i , {(r / ‖b i‖) • (b i)}) ∪ (⋃ i ,{- (r / ‖b i‖) • (b i)})) := by intro i - simp only [dite_eq_ite, z] - by_cases h₁ : (i : ℕ) = n + 1 - · simp[h₁] - simp only [h₁, ↓reduceIte] - by_cases h₂ : (i : ℕ) = n - · simp[h₂] - simp only [h₂, ↓reduceIte, add_right_inj] - let use_i : ι := ⟨i ,lem_i i.2 h₁ h₂⟩ - simp only [↓reduceDIte] - let a := (b).equivFun (x₀ - x) use_i - rcases lt_trichotomy a 0 with ha | ha | ha - · right - have : (b).equivFun (x₀ - x) use_i ≠ 0 := by linarith - simp at this - rw[sign_neg ha] - simp[this, ↓reduceIte] - · left; - simp only [a] at ha - rw[ha,sign_zero] - simp; - · left - rw[sign_pos ha] - simp + rcases i with ⟨k, hk⟩ + by_cases h₁ : k = n + 1 + · refine Or.inr ?_ + refine Set.mem_iUnion.2 ?_ + refine ⟨fin0, ?_⟩ + simp [z, dite_eq_ite, h₁] + · by_cases h₂ : k = n + · refine Or.inl ?_ + refine Set.mem_iUnion.2 ?_ + refine ⟨fin0, ?_⟩ + simp [z, dite_eq_ite, h₂] + · have hlt : k < n := lem_i hk (by exact h₁) (by exact h₂) + let use_i : ι := ⟨k, hlt⟩ + let a : ℝ := (b).equivFun (x₀ - x) use_i + have hz_form : + z ⟨k, hk⟩ = + if a = 0 then (r / ‖b use_i‖) • b use_i + else ((SignType.sign a) * (r / ‖b use_i‖)) • b use_i := by + simp [z, dite_eq_ite, h₁, h₂, a] + rfl + have hrepr_sub : + ((b).repr x₀) use_i - ((b).repr x) use_i = a := by + simp [Pi.sub_apply, Module.Basis.equivFun_apply, a] + rcases lt_trichotomy a 0 with hlt0 | heq | hgt0 + · have ha0 : a ≠ 0 := ne_of_lt hlt0 + refine Or.inr ?_ + refine Set.mem_iUnion.2 ?_ + refine ⟨use_i, ?_⟩ + have hz_neg' : + z ⟨k, hk⟩ = ((SignType.sign a) * (r / ‖b use_i‖)) • b use_i := by + simp [hz_form, ha0] + have hz_neg : + z ⟨k, hk⟩ = - (r / ‖b use_i‖) • b use_i := by + have hsign : SignType.sign a = (-1 : ℝ) := by simp [*] + simpa [hsign, smul_smul, neg_one_smul] using hz_neg' + exact Set.mem_singleton_iff.mpr hz_neg + · refine Or.inl ?_ + refine Set.mem_iUnion.2 ?_ + refine ⟨use_i, ?_⟩ + have hz_zero : + z ⟨k, hk⟩ = (r / ‖b use_i‖) • b use_i := by + simp [hz_form, heq] + exact Set.mem_singleton_iff.mpr hz_zero + · have ha0 : a ≠ 0 := ne_of_gt hgt0 + refine Or.inl ?_ + refine Set.mem_iUnion.2 ?_ + refine ⟨use_i, ?_⟩ + have hz_pos' : + z ⟨k, hk⟩ = ((SignType.sign a) * (r / ‖b use_i‖)) • b use_i := by + simp [hz_form, ha0] + have hz_pos : + z ⟨k, hk⟩ = (r / ‖b use_i‖) • b use_i := by + have hsign : SignType.sign a = (1 : ℝ) := by simp [*] + simpa [hsign, one_mul] using hz_pos' + exact Set.mem_singleton_iff.mpr hz_pos have bi_pos : ∀ i : ι , ‖b i‖ ≠ 0 := by intro i refine norm_ne_zero_iff.mpr ?_ - exact Basis.ne_zero b i + exact Module.Basis.ne_zero b i have hx : ∑ i : ι₀, w i • z i = x₀ - x := by rw[Fin.sum_univ_castSucc,Fin.sum_univ_castSucc] @@ -242,7 +276,7 @@ theorem l1Ball_sub_convexHull{x : α}{r : ℝ}(hr : r > 0)(hn : Module.finrank simp only [neg_smul, dite_eq_ite, Fin.coe_castSucc, h₁, ↓reduceIte, h₂, Fin.eta, z] have : w₁ i • ((SignType.sign ((b).equivFun (x₀ - x) i)) * (r / ‖b i‖)) = w₀ i := by - simp only [Pi.sub_apply, smul_eq_mul, w₁, w₀] + simp only [smul_eq_mul, w₁, w₀] calc _ = |(b).equivFun (x₀ - x) i| * (‖b i‖ / r) * (SignType.sign ((b).equivFun (x₀ - x) i)) * (r / ‖b i‖) := by rw[← mul_div] @@ -274,7 +308,7 @@ theorem sigma_is_injective : Function.Injective σ (α := α) := by let z := x - y let n := Module.finrank ℝ α let bs := Module.finBasis ℝ α - have hz : z = ∑ i : Fin n , (bs.repr z i)• bs i := Eq.symm (Basis.sum_repr bs z) + have hz : z = ∑ i : Fin n , (bs.repr z i)• bs i := Eq.symm (Module.Basis.sum_repr bs z) change σ z = 0 at h rw[hz] at h simp at h @@ -289,7 +323,7 @@ theorem sigma_is_injective : Function.Injective σ (α := α) := by intro i rw[smul_eq_zero] left - have : ‖(Module.finBasis ℝ α) i‖ ≠ 0:= norm_ne_zero_iff.mpr $ Basis.ne_zero (Module.finBasis ℝ α) i + have : ‖(Module.finBasis ℝ α) i‖ ≠ 0:= norm_ne_zero_iff.mpr $ Module.Basis.ne_zero (Module.finBasis ℝ α) i have h1 : (bs.repr z) i * ‖(Module.finBasis ℝ α) i‖ = 0 := by rw[← hi , h, PiLp.zero_apply] apply eq_zero_of_ne_zero_of_mul_right_eq_zero this h1 diff --git a/Optlib/Function/Lsmooth.lean b/Optlib/Function/Lsmooth.lean index 1ff68dd..60308f3 100644 --- a/Optlib/Function/Lsmooth.lean +++ b/Optlib/Function/Lsmooth.lean @@ -76,12 +76,12 @@ theorem lipschitz_continuous_upper_bound {E : Type*} apply HasDerivAt.add · apply HasDerivAt.const_add · apply hasDerivAt_mul_const - · have : l * ‖y - x‖ ^ 2 * t = (2 * t) * (l * ‖y - x‖ ^ 2 / 2) := by field_simp; ring_nf + · have : l * ‖y - x‖ ^ 2 * t = (2 * t) * (l * ‖y - x‖ ^ 2 / 2) := by field_simp rw [this]; apply HasDerivAt.mul_const obtain hd := HasDerivAt.pow (n := 2) (hasDerivAt_id' t) simp at hd; exact hd suffices g 1 ≤ u 1 by - simp [u, g, u', LL, g'] at this + simp [u, g, LL, g'] at this rw [map_sub]; linarith apply image_le_of_deriv_right_le_deriv_boundary (a := 0) (b := 2) · exact HasDerivAt.continuousOn (fun x _ ↦ gderiv x) @@ -106,7 +106,8 @@ open InnerProductSpace Set variable {f : E → ℝ} {a : ℝ} {f' : E → E} {l : NNReal} -theorem lower_to_lipschitz (h₂ : ∀ x y, inner (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2) +theorem lower_to_lipschitz + (h₂ : ∀ x y : E, ⟪f' x - f' y, x - y⟫_ℝ ≥ (1 / (l : ℝ)) * ‖f' x - f' y‖ ^ 2) (hl : l > 0) : LipschitzWith l f' := by rw [lipschitzWith_iff_norm_sub_le] intro x y @@ -120,18 +121,17 @@ theorem lower_to_lipschitz (h₂ : ∀ x y, inner (f' x - f' y) (x - y) ≥ 1 / apply real_inner_le_norm _ = (1 / l * ‖f' x - f' y‖) * (l * ‖x - y‖) := by field_simp - ring_nf have H₂ : 1 / l > 0 := by apply one_div_pos.mpr hl cases lt_or_ge 0 (‖f' x - f' y‖) case inl h => apply le_of_mul_le_mul_left H₁ apply mul_pos _ h - · simp [H₂, hl] + · simp [hl] case inr h => apply le_trans h apply mul_nonneg - · simp [hl] + · simp apply norm_nonneg _ end @@ -145,16 +145,16 @@ variable {f : E → ℝ} {a : ℝ} {f' : E → E} {xm : E} {l : NNReal} theorem lipschitz_continuos_upper_bound' (h₁ : ∀ x₁ : E, HasGradientAt f (f' x₁) x₁) (h₂ : LipschitzWith l f') : - ∀ x y : E, f y ≤ f x + inner (f' x) (y - x) + l / 2 * ‖y - x‖ ^ 2 := by + ∀ x y : E, f y ≤ f x + ⟪f' x, y - x⟫_ℝ + l / 2 * ‖y - x‖ ^ 2 := by intro x y rw [lipschitzWith_iff_norm_sub_le] at h₂ let g := fun x ↦ (toDual ℝ E) (f' x) have h' : ∀ x : E, HasFDerivAt f (g x) x := h₁ - have equiv : ∀ x y : E, inner (f' x) (y - x) = (g x) (y - x) := by + have equiv : ∀ x y : E, ⟪f' x, y - x⟫_ℝ = (g x) (y - x) := by intro x y rw [InnerProductSpace.toDual_apply] have h₂' : LipschitzWith l g := by - simp only [g, equiv] + simp only [g] rw [lipschitzWith_iff_norm_sub_le] intro x y have h1 : ∀ x : E, ‖(toDual ℝ E) x‖ =‖x‖ := by @@ -177,13 +177,15 @@ theorem lipschitz_minima_lower_bound (h₁ : ∀ x : E, HasGradientAt f (f' x) x have eq : f xm ≤ f x - 1 / (2 * l) * ‖f' x‖ ^ 2 := by calc _ ≤ f y := by apply min - _ ≤ f x + inner (f' x) (y - x) + l / 2 * ‖y - x‖ ^ 2 := by + _ ≤ f x + ⟪f' x, y - x⟫_ℝ + l / 2 * ‖y - x‖ ^ 2 := by apply lipschitz_continuos_upper_bound' h₁ h₂ _ = f x - 1 / (2 * l) * ‖f' x‖ ^ 2 := by rw [add_assoc]; rw [sub_eq_add_neg (f x), add_left_cancel_iff.2] - field_simp [y]; rw [← real_inner_self_eq_norm_sq, ← real_inner_self_eq_norm_sq] - rw [inner_smul_right, inner_smul_left, inner_smul_right] - field_simp; ring_nf + have hyx : y - x = - ((1 : ℝ) / (l : ℝ)) • f' x := by simp [y] + have hl0 : (l : ℝ) ≠ 0 := by exact ne_of_gt hl + have hα : 0 ≤ (1 : ℝ) / (l : ℝ) := by exact one_div_nonneg.mpr (le_of_lt hl) + simp [hyx, real_inner_smul_right, real_inner_self_eq_norm_sq, norm_smul] + field_simp [hl0]; ring_nf linarith end @@ -198,18 +200,18 @@ variable {x y : E} {s v : Set E} {l : NNReal} open Set theorem lipschitz_to_lnorm_sub_convex (hs : Convex ℝ s) - (h₁ : ∀ x ∈ s, HasGradientAt f (f' x) x) (h₂ : LipschitzOnWith l f' s) (hl : l > 0) : + (h₁ : ∀ x ∈ s, HasGradientAt f (f' x) x) (h₂ : LipschitzOnWith l f' s) (_ : l > 0) : ConvexOn ℝ s (fun x ↦ l / 2 * ‖x‖ ^ 2 - f x) := by rw [lipschitzOnWith_iff_norm_sub_le] at h₂ let g' : E → E := fun x ↦ l.1 • x - f' x - have H₂ : ∀ x ∈ s, ∀ y ∈ s, inner (g' x - g' y) (x - y) ≥ (0 : ℝ) := by + have H₂ : ∀ x ∈ s, ∀ y ∈ s, ⟪g' x - g' y, x - y⟫_ℝ ≥ (0 : ℝ) := by intro x hx y hy calc - _ = l.1 * (inner (x - y) (x - y)) - inner (f' x - f' y) (x - y) := by + _ = l.1 * (⟪x - y, x - y⟫_ℝ) - ⟪f' x - f' y, x - y⟫_ℝ := by simp [g'] rw [← sub_add, sub_right_comm, sub_add, inner_sub_left, ← smul_sub, inner_smul_left] simp only [conj_trivial] - _ = l * ‖x - y‖ ^ 2 - inner (f' x - f' y) (x - y) := by + _ = l * ‖x - y‖ ^ 2 - ⟪f' x - f' y, x - y⟫_ℝ := by simp; left apply real_inner_self_eq_norm_sq _ ≥ l * ‖x - y‖ ^ 2 - ‖f' x - f' y‖ * ‖x - y‖ := by @@ -221,7 +223,7 @@ theorem lipschitz_to_lnorm_sub_convex (hs : Convex ℝ s) rw [pow_two, ← mul_assoc] apply mul_le_mul (h₂ hx hy); linarith; apply norm_nonneg apply mul_nonneg _ (norm_nonneg _) - simp [hl] + simp _ = 0 := by simp have H₃ : ∀ x ∈ s, HasGradientAt (fun x ↦ l / 2 * ‖x‖ ^ 2 - f x) (g' x) x := by intro x hx @@ -241,9 +243,9 @@ theorem lipschitz_to_lnorm_sub_convex (hs : Convex ℝ s) theorem convex_to_lower {l : ℝ} (h₁ : ∀ x : E, HasGradientAt f (f' x) x) (h₂ : ConvexOn ℝ Set.univ (fun x ↦ l / 2 * ‖x‖ ^ 2 - f x)) (lp : l > 0) (hfun: ConvexOn ℝ Set.univ f) (x : E) (y : E) : - inner (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := by + ⟪f' x - f' y, x - y⟫_ℝ ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := by rw [ConvexOn] at hfun - let fs : E → (E → ℝ) := fun s => (fun x => f x - inner (f' s) x) + let fs : E → (E → ℝ) := fun s => (fun x => f x - ⟪f' s, x⟫_ℝ) have hfunconvex : ∀ s : E, ConvexOn ℝ Set.univ (fs s) := by intro s rw [ConvexOn] @@ -281,22 +283,22 @@ theorem convex_to_lower {l : ℝ} (h₁ : ∀ x : E, HasGradientAt f (f' x) x) rw [inner_add_right, real_inner_smul_right, real_inner_smul_right] calc _ = (l / 2) * ‖a • x₁ + b • y₁‖ ^ 2 - f (a • x₁ + b • y₁) + - (a * inner (f' s) x₁ + b * inner (f' s) y₁) := by ring_nf + (a * ⟪f' s, x₁⟫_ℝ + b * ⟪f' s, y₁⟫_ℝ) := by ring_nf _ ≤ a • (l / 2 * ‖x₁‖ ^ 2 - f x₁) + b • (l / 2 * ‖y₁‖ ^ 2 - f y₁) + - (a * inner (f' s) x₁ + b * inner (f' s) y₁) := by apply add_le_add_right h₂' - _ = a • (l / 2 * ‖x₁‖ ^ 2 - (f x₁ - inner (f' s) x₁)) + b • - (l / 2 * ‖y₁‖ ^ 2 - (f y₁ - inner (f' s) y₁)) := by simp; ring_nf + (a * ⟪f' s, x₁⟫_ℝ + b * ⟪f' s, y₁⟫_ℝ) := by apply add_le_add_right h₂' + _ = a • (l / 2 * ‖x₁‖ ^ 2 - (f x₁ - ⟪f' s, x₁⟫_ℝ)) + b • + (l / 2 * ‖y₁‖ ^ 2 - (f y₁ - ⟪f' s, y₁⟫_ℝ)) := by simp; ring_nf let gs' := fun s ↦ (fun z ↦ l • z - (fs' s z)) have hgx₁ : ∀ s x : E, HasGradientAt (gs s) ((gs' s) x) x := by intro s z apply HasGradientAt.sub (gradient_of_const_mul_norm l z) (hfconx₁ s z) - have hgx₂ : ∀ s z₁ z₂ : E, (gs s) z₁ + inner (gs' s z₁) (z₂ - z₁) ≤ gs s z₂ := by + have hgx₂ : ∀ s z₁ z₂ : E, (gs s) z₁ + ⟪gs' s z₁, z₂ - z₁⟫_ℝ ≤ gs s z₂ := by intro s z₁ z₂ apply Convex_first_order_condition' (hgx₁ s z₁) (hgxconvex s) · simp only [Set.mem_univ] · simp only [Set.mem_univ] have hfx₂ : ∀ (s x y₁ : E), (fs s) y₁ ≤ fs s x + - inner (fs' s x) (y₁ - x) + l / 2 * ‖y₁ - x‖ ^ 2 := by + ⟪fs' s x, y₁ - x⟫_ℝ + l / 2 * ‖y₁ - x‖ ^ 2 := by intro s z₁ z₂ simp only [fs, fs'] rcases hgx₂ s z₁ z₂ with hgx₂' @@ -304,30 +306,31 @@ theorem convex_to_lower {l : ℝ} (h₁ : ∀ x : E, HasGradientAt f (f' x) x) have t₈ : gs s z₂ = l / 2 * ‖z₂‖ ^ 2 - fs s z₂ := by rfl have t₉ : gs' s z₁ = l • z₁ - fs' s z₁ := by rfl rw [t₇, t₈, t₉] at hgx₂' - have t₁₀ : fs s z₂ + (l / 2 * ‖z₁‖ ^ 2 - fs s z₁ + inner (l • z₁ - fs' s z₁) (z₂ - z₁)) + have t₁₀ : fs s z₂ + (l / 2 * ‖z₁‖ ^ 2 - fs s z₁ + ⟪l • z₁ - fs' s z₁, z₂ - z₁⟫_ℝ) ≤ l / 2 * ‖z₂‖ ^ 2 := by apply add_le_of_le_sub_left hgx₂' have t₁₁ : fs s z₂ ≤ l / 2 * ‖z₂‖ ^ 2 - (l / 2 * ‖z₁‖ ^ 2 - fs s z₁ + - inner (l • z₁ - fs' s z₁) (z₂ - z₁)) := by + ⟪l • z₁ - fs' s z₁, z₂ - z₁⟫_ℝ) := by rw [add_comm] at t₁₀ apply le_sub_left_of_add_le t₁₀ - simp only [] at t₁₁; rw [← sub_add (l / 2 * ‖z₁‖ ^ 2) _ _] at t₁₁ + rw [← sub_add (l / 2 * ‖z₁‖ ^ 2) _ _] at t₁₁ calc _ ≤ l / 2 * ‖z₂‖ ^ 2 - (l / 2 * ‖z₁‖ ^ 2 - f z₁ + - inner (f' s) z₁ + inner (l • z₁ - fs' s z₁) (z₂ - z₁)) := by apply t₁₁ - _ = l / 2 * ‖z₂‖ ^ 2 -(l / 2 * ‖z₁‖ ^ 2 - f z₁ + inner (f' s) z₁ + - (l * (inner z₁ z₂ - ‖z₁‖ ^ 2) - inner (f' z₁ - f' s) (z₂ - z₁))) := by + ⟪f' s, z₁⟫_ℝ + ⟪l • z₁ - fs' s z₁, z₂ - z₁⟫_ℝ) := by apply t₁₁ + _ = l / 2 * ‖z₂‖ ^ 2 -(l / 2 * ‖z₁‖ ^ 2 - f z₁ + ⟪f' s, z₁⟫_ℝ + + (l * (⟪z₁, z₂⟫_ℝ - ‖z₁‖ ^ 2) - ⟪f' z₁ - f' s, z₂ - z₁⟫_ℝ)) := by rw [inner_sub_left, inner_smul_left] - simp; rw [inner_sub_right, real_inner_self_eq_norm_sq];left ; simp - _ = f z₁ - inner (f' s) z₁ + inner (f' z₁ - f' s) (z₂ - z₁) + - l / 2 * (‖z₂‖ ^ 2 - 2 * inner z₂ z₁ + ‖z₁‖ ^ 2) := by + simp; rw [inner_sub_right, real_inner_self_eq_norm_sq] + _ = f z₁ - ⟪f' s, z₁⟫_ℝ + ⟪f' z₁ - f' s, z₂ - z₁⟫_ℝ + + l / 2 * (‖z₂‖ ^ 2 - 2 * ⟪z₂, z₁⟫_ℝ + ‖z₁‖ ^ 2) := by field_simp; ring_nf; rw [real_inner_comm] - _ = f z₁ - inner (f' s) z₁ + inner (f' z₁ - f' s) (z₂ - z₁) + l / 2 * ‖z₂ - z₁‖ ^ 2 := by + _ = f z₁ - ⟪f' s, z₁⟫_ℝ + ⟪f' z₁ - f' s, z₂ - z₁⟫_ℝ + l / 2 * ‖z₂ - z₁‖ ^ 2 := by rw [← norm_sub_sq_real] have hfs₃ : ∀ s : E, IsMinOn (fs s) univ s := by intro s apply first_order_convex (hfconx₁ s) (hfunconvex s) - simp only [fs, fs', sub_self] + simp_all only [mem_univ, smul_eq_mul, tsub_le_iff_right, forall_const, true_and, gt_iff_lt, sub_self, + fs, fs', gs, gs'] have hfy₃ : IsMinOn (fs y) _ y := hfs₃ y have hfx₄ : fs x x ≤ fs x y - 1 / (2 * l) * ‖fs' x y‖ ^ 2 := by have : fs x x ≤ fs x (y - (1 / l) • fs' x y) := by @@ -338,7 +341,7 @@ theorem convex_to_lower {l : ℝ} (h₁ : ∀ x : E, HasGradientAt f (f' x) x) apply le_trans this rcases hfx₂ x y (y - (1 / l) • fs' x y) with hfx₂' calc - _ ≤ fs x y + inner (fs' x y) (y - (1 / l) • fs' x y - y) + _ ≤ fs x y + ⟪fs' x y, y - (1 / l) • fs' x y - y⟫_ℝ + l / 2 * ‖y - (1 / l) • fs' x y - y‖ ^ 2 := by apply hfx₂' _ = fs x y - 1 / (2 * l) * ‖fs' x y‖ ^ 2 := by have : y - (1 / l) • fs' x y - y = - (1 / l) • fs' x y := by simp @@ -354,14 +357,14 @@ theorem convex_to_lower {l : ℝ} (h₁ : ∀ x : E, HasGradientAt f (f' x) x) apply le_trans this rcases hfx₂ y x (x - (1 / l) • fs' y x) with hfy₂' calc - _ ≤ fs y x + inner (fs' y x) (x - (1 / l) • fs' y x - x) + _ ≤ fs y x + ⟪fs' y x, x - (1 / l) • fs' y x - x⟫_ℝ + l / 2 * ‖x - (1 / l) • fs' y x - x‖ ^ 2 := by apply hfy₂' _ = fs y x - 1 / (2 * l) * ‖fs' y x‖ ^ 2 := by have : x - (1 / l) • fs' y x - x = - (1 / l) • fs' y x := by simp rw [this, real_inner_smul_right] rw [← real_inner_self_eq_norm_sq, ← real_inner_self_eq_norm_sq, real_inner_smul_right] rw [real_inner_smul_left]; field_simp; ring - have hh₁: (1 / (2 * l)) * ‖f' x - f' y‖ ^ 2 ≤ f y - f x - inner (f' x) (y - x) := by + have hh₁: (1 / (2 * l)) * ‖f' x - f' y‖ ^ 2 ≤ f y - f x - ⟪f' x, y - x⟫_ℝ := by calc (1 / (2 * l)) * ‖f' x - f' y‖ ^ 2 ≤ fs x y - fs x x := by have : f' x - f' y = - fs' x y := by @@ -371,20 +374,20 @@ theorem convex_to_lower {l : ℝ} (h₁ : ∀ x : E, HasGradientAt f (f' x) x) have : ‖- fs' x y‖ = ‖fs' x y‖ :=by apply norm_neg rw [this] linarith [hfx₄] - _ = f y - f x - inner (f' x) (y - x) := by - have t₄: fs x y = f y - inner (f' x) y := by rfl - have t₅: fs x x = f x - inner (f' x) x := by rfl + _ = f y - f x - ⟪f' x, y - x⟫_ℝ := by + have t₄: fs x y = f y - ⟪f' x, y⟫_ℝ := by rfl + have t₅: fs x x = f x - ⟪f' x, x⟫_ℝ := by rfl rw [t₄,t₅,inner_sub_right] ring - have hh₂: (1 / (2 * l)) * ‖f' x - f' y‖ ^ 2 ≤ f x - f y - inner (f' y) (x - y) := by + have hh₂: (1 / (2 * l)) * ‖f' x - f' y‖ ^ 2 ≤ f x - f y - ⟪f' y, x - y⟫_ℝ := by calc (1 / (2 * l)) * ‖f' x - f' y‖ ^ 2 ≤ fs y x -fs y y := by - have : f' x - f' y = fs' y x := by simp + have : f' x - f' y = fs' y x := by aesop rw [this] linarith [hfy₄] - _ = f x - f y - inner (f' y) (x - y) := by - have t₄' : fs y y = f y - inner (f' y) y := by rfl - have t₅' : fs y x = f x - inner (f' y) x := by rfl + _ = f x - f y - ⟪f' y, x - y⟫_ℝ := by + have t₄' : fs y y = f y - ⟪f' y, y⟫_ℝ := by rfl + have t₅' : fs y x = f x - ⟪f' y, x⟫_ℝ := by rfl rw [t₄', t₅', inner_sub_right] ring calc @@ -392,24 +395,24 @@ theorem convex_to_lower {l : ℝ} (h₁ : ∀ x : E, HasGradientAt f (f' x) x) field_simp rw [← mul_two,mul_comm] ring - _ ≤ (f y - f x - inner (f' x) (y - x)) + (f x - f y - inner (f' y) (x - y)) := by + _ ≤ (f y - f x - ⟪f' x, y - x⟫_ℝ) + (f x - f y - ⟪f' y, x - y⟫_ℝ) := by apply add_le_add hh₁ hh₂ - _ = inner (f' x - f' y) (x - y) := by + _ = ⟪f' x - f' y, x - y⟫_ℝ := by rw [inner_sub_left] - have t₆ : (inner (f' x) (y - x) : ℝ) = - (inner (f' x) (x - y) : ℝ) := by + have t₆ : (⟪f' x, y - x⟫_ℝ : ℝ) = - (⟪f' x, x - y⟫_ℝ : ℝ) := by rw [inner_sub_right, inner_sub_right]; ring rw[t₆]; ring theorem lipschitz_to_lower (h₁ : ∀ x, HasGradientAt f (f' x) x) (h₂ : LipschitzWith l f') (hfun : ConvexOn ℝ Set.univ f) (hl : l > 0) : - ∀ x y, inner (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := by + ∀ x y, ⟪f' x - f' y, x - y⟫_ℝ ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := by obtain convex : ConvexOn ℝ Set.univ (fun x ↦ l / 2 * ‖x‖ ^ 2 - f x) := lipschitz_to_lnorm_sub_convex convex_univ (fun x _ => h₁ x) (lipschitzOnWith_univ.mpr h₂) hl exact convex_to_lower h₁ convex hl hfun theorem lower_iff_lipschitz (h₁ : ∀ x, HasGradientAt f (f' x) x) (hfun: ConvexOn ℝ Set.univ f) (hl : l > 0) : LipschitzWith l f' ↔ - ∀ x y, inner (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := + ∀ x y, ⟪f' x - f' y, x - y⟫_ℝ ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := ⟨fun h ↦ lipschitz_to_lower h₁ h hfun hl, fun h ↦ lower_to_lipschitz h hl⟩ theorem lipshictz_iff_lnorm_sub_convex (h₁ : ∀ x, HasGradientAt f (f' x) x) @@ -424,7 +427,7 @@ theorem lipshictz_iff_lnorm_sub_convex (h₁ : ∀ x, HasGradientAt f (f' x) x) theorem lower_iff_lnorm_sub_convex (h₁ : ∀ x, HasGradientAt f (f' x) x) (hfun: ConvexOn ℝ Set.univ f) (hl : l > 0) : ConvexOn ℝ univ (fun x ↦ l / 2 * ‖x‖ ^ 2 - f x) - ↔ ∀ x y, inner (f' x - f' y) (x - y) ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := by + ↔ ∀ x y, ⟪f' x - f' y, x - y⟫_ℝ ≥ 1 / l * ‖f' x - f' y‖ ^ 2 := by rw [← lipshictz_iff_lnorm_sub_convex h₁ hfun hl] rw [lower_iff_lipschitz h₁ hfun hl] diff --git a/Optlib/Function/MinimaClosedFunction.lean b/Optlib/Function/MinimaClosedFunction.lean index 44c01dc..a4126ad 100644 --- a/Optlib/Function/MinimaClosedFunction.lean +++ b/Optlib/Function/MinimaClosedFunction.lean @@ -33,14 +33,14 @@ private lemma l0 {f : E → F}(y : F) (h : (f ⁻¹' Set.Iic y).Nonempty) : · exact Exists.intro x xeq · exact Exists.intro x xeq have h₁ : sInf {f x | x ∈ f ⁻¹' Set.Iic y} ≤ sInf {f x | x ∈ (f ⁻¹' Set.Iic y)ᶜ} := by - apply sInf_le_sInf_of_forall_exists_le + apply sInf_le_sInf_of_isCoinitialFor intro y' ynsub rcases h with ⟨x', xsub⟩; use f x' constructor · exact ⟨x', xsub, rfl⟩ rcases ynsub with ⟨x, xnsub, xeq⟩ apply le_trans xsub (Eq.trans_ge xeq (le_of_lt _)) - simp only [← Set.preimage_setOf_eq, ← Set.preimage_compl, Set.compl_Iic, Set.Ioi_def] at xnsub + simp only [← Set.preimage_compl, Set.compl_Iic] at xnsub assumption calc sInf {f x | x ∈ f ⁻¹' Set.Iic y} = @@ -63,6 +63,7 @@ variable [TopologicalSpace E] [TopologicalSpace F] [OrderTopology F] variable [FirstCountableTopology E] [FirstCountableTopology F] +omit [DenselyOrdered F] in /- If a premiage of `f` is nonempty and compact, then its minimum point set `{x | IsMinOn f univ x}` is nonempty -/ theorem IsMinOn.of_isCompact_preimage (hf : LowerSemicontinuous f) {y : F} @@ -82,13 +83,14 @@ theorem IsMinOn.of_isCompact_preimage (hf : LowerSemicontinuous f) {y : F} apply Tendsto.comp cfx (StrictMono.tendsto_atTop mono) have inepi : (x', sInf {f x | x ∈ (f ⁻¹' Set.Iic y)}) ∈ {p : E × F | f p.1 ≤ p.2} := (IsClosed.isSeqClosed (LowerSemicontinuous.isClosed_epigraph hf)) - (fun n => Eq.le (by rfl)) (Tendsto.prod_mk_nhds cxk cfxk) + (fun n => Eq.le (by rfl)) (Filter.Tendsto.prodMk_nhds cxk cfxk) use x'; intro xx _ apply le_of_eq_of_le · apply le_antisymm inepi (sInf_le (Exists.intro x' ⟨xsub', rfl⟩)) · apply le_of_eq_of_le (l0 y h1) (sInf_le (by use xx)) -/- If a premiage of `f` is nonempty and compact, +omit [DenselyOrdered F] in +/- If a preimage of `f` is nonempty and compact, then its minimum point set `{x | IsMinOn f univ x}` is compact -/ theorem IsCompact_isMinOn_of_isCompact_preimage (hf : LowerSemicontinuous f) {y : F} @@ -114,9 +116,9 @@ section variable {𝕜 : Type _} {f : E → F} variable [AddCommMonoid E] [CompleteLinearOrder F] -variable [LinearOrderedRing 𝕜] [DenselyOrdered 𝕜] [Module 𝕜 E] +variable [Ring 𝕜] [LinearOrder 𝕜] [IsStrictOrderedRing 𝕜] [DenselyOrdered 𝕜] [Module 𝕜 E] -def strong_quasi (f : E → F) (𝕜 : Type _) [LinearOrderedRing 𝕜] [Module 𝕜 E] : Prop := +def strong_quasi (f : E → F) (𝕜 : Type _) [Ring 𝕜] [LinearOrder 𝕜] [IsStrictOrderedRing 𝕜] [Module 𝕜 E] : Prop := ∀ ⦃x⦄, ∀ ⦃y⦄, x ≠ y → ∀ ⦃a b : 𝕜⦄, 0 < a → 0 < b → a + b = 1 → f ((a • x : E) + (b • y : E)) < max (f x) (f y) @@ -133,6 +135,6 @@ theorem isMinOn_unique {x y : E} (hf' : strong_quasi f 𝕜) apply hf' neq lta lta' eqone simp only [isMinOn_iff] at hy specialize hy (a • x + (1 - a) • y) trivial - apply not_le_of_lt h hy + apply not_le_of_gt h hy end diff --git a/Optlib/Function/Proximal.lean b/Optlib/Function/Proximal.lean index 2ad865f..bedccf2 100644 --- a/Optlib/Function/Proximal.lean +++ b/Optlib/Function/Proximal.lean @@ -17,6 +17,8 @@ set_option linter.unusedVariables false -/ noncomputable section +set_option linter.unusedSectionVars false + open Set InnerProductSpace Topology Filter variable {E : Type*} [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteSpace E] @@ -64,11 +66,13 @@ theorem prox_set_compact_of_lowersemi (f : E → ℝ) (hc : LowerSemicontinuous _ ≤ f z + ‖z - x‖ ^ 2 / 2 := by simp; linarith [sq_nonneg ‖z - x‖] have hg : LowerSemicontinuous g := by - apply LowerSemicontinuous.add hc - apply Continuous.lowerSemicontinuous - apply continuous_iff_continuousOn_univ.2 - apply HasGradientAt.continuousOn - intro u _; apply gradient_of_sq u + refine hc.add ?_ + have hcont : Continuous (fun z : E => ‖z - x‖ ^ 2 / 2) := by + have h1 : Continuous (fun z : E => ‖z - x‖) := (continuous_id.sub continuous_const).norm + have h2 : Continuous (fun z : E => ‖z - x‖ ^ 2) := h1.pow 2 + have h3 : Continuous (fun z : E => ‖z - x‖ ^ 2 * (1 / 2)) := h2.mul continuous_const + simpa [div_eq_mul_inv, mul_comm] using h3 + exact hcont.lowerSemicontinuous have S_bddbelow : BddBelow ImS := by use L; rw [mem_lowerBounds] rintro gy ⟨y0, _, gyeq⟩; rw [← gyeq]; exact boundg y0 @@ -101,7 +105,9 @@ theorem prox_set_compact_of_lowersemi (f : E → ℝ) (hc : LowerSemicontinuous rw [add_right_comm, mul_comm]; simp; linarith [sq_nonneg b] calc 0 ≤ a ^ 2 / 2 := by linarith [sq_nonneg a] - _ ≤ b * 2 / 2 := by rw [div_le_div_right]; exact h1; linarith + _ ≤ b * 2 / 2 := by + have h := mul_le_mul_of_nonneg_right h1 (by norm_num : 0 ≤ (1 / 2 : ℝ)) + simpa [div_eq_mul_inv, mul_comm, mul_left_comm, mul_assoc] using h _ ≤ b + 1 := by simp linarith apply aux ieq @@ -119,10 +125,13 @@ theorem prox_set_compact_of_lowersemi (f : E → ℝ) (hc : LowerSemicontinuous apply Tendsto.comp cfx (StrictMono.tendsto_atTop mono) have inepi : (x', sInf ImS) ∈ epi := by let p := fun c ↦ (((fun n ↦ xn n) ∘ k) c, (g ∘ xn ∘ k) c) - have pnin : ∀ c : ℕ, p c ∈ epi := by simp [epi] + have pnin : ∀ c : ℕ, p c ∈ epi := by + simp [epi] + exact fun c ↦ + Std.IsPreorder.le_refl (g (p c).1) apply IsClosed.isSeqClosed epi_closed pnin show Tendsto (fun c ↦ (((fun n ↦ xn n) ∘ k) c, (g ∘ xn ∘ k) c)) atTop (𝓝 (x', sInf ImS)) - apply Tendsto.prod_mk_nhds cxk cfxk + apply Tendsto.prodMk_nhds cxk cfxk have minima_ieq : g x' ≤ sInf ImS := inepi have minima : ∀ w : E, g x' ≤ g w := by intro w @@ -134,7 +143,7 @@ theorem prox_set_compact_of_lowersemi (f : E → ℝ) (hc : LowerSemicontinuous linarith · have gwnin : g x < g w := by simp [g, S] at hw; simp [g]; exact hw - have gxin : g x ∈ ImS := by use x; simp [g, ImS, S] + have gxin : g x ∈ ImS := by use x; simp [g, S] have legw : sInf ImS ≤ g w := by rw [Real.sInf_le_iff S_bddbelow neImS] intro _ epos; use g x; use gxin; linarith @@ -170,44 +179,60 @@ theorem prox_set_compact_of_convex (f : E → ℝ) (hc : ContinuousOn f univ) have subd: ∃ z : E, Nonempty (SubderivAt f z) := by use x; apply SubderivAt.nonempty hconv hc have hc : LowerSemicontinuous f := - Continuous.lowerSemicontinuous (continuous_iff_continuousOn_univ.mpr hc) + Continuous.lowerSemicontinuous (continuousOn_univ.mp hc) rcases subd with ⟨z, a, ain⟩ rw [← mem_SubderivAt, HasSubgradientAt] at ain let g := fun u ↦ f u + ‖u - x‖ ^ 2 / 2 let epi := {p : (E × ℝ) | g p.1 ≤ p.2} - have second_lower_bound (y : E) : g y ≥ f z + inner a (y - z) + ‖y - x‖ ^ 2 / 2 := by - simp [g] - specialize ain y; linarith - have lower_bound (y : E) : f z + inner a (x - z) - ‖a‖ ^ 2 / 2 ≤ g y := by + have second_lower_bound (y : E) : g y ≥ f z + ⟪a, y - z⟫_ℝ + ‖y - x‖ ^ 2 / 2 := by + have h := ain y + have h' := add_le_add_right h (‖y - x‖ ^ 2 / 2) + simpa [g, add_comm, add_left_comm, add_assoc] using h' + have lower_bound (y : E) : f z + ⟪a, x - z⟫_ℝ - ‖a‖ ^ 2 / 2 ≤ g y := by have : y - z = x - z + (y - x) := by simp specialize second_lower_bound y rw [this, inner_add_right, ← add_assoc, add_assoc] at second_lower_bound - have : 0 ≤ ‖a‖ ^ 2 / 2 + inner a (y - x) + ‖y - x‖ ^ 2 / 2 := by - field_simp; rw [mul_comm, ← norm_add_sq_real] - apply div_nonneg (sq_nonneg ‖a + (y - x)‖) - norm_num + have : 0 ≤ ‖a‖ ^ 2 / 2 + ⟪a, y - x⟫_ℝ + ‖y - x‖ ^ 2 / 2 := by + have hrewrite : + ‖a‖ ^ 2 / 2 + ⟪a, y - x⟫_ℝ + ‖y - x‖ ^ 2 / 2 + = ‖a + (y - x)‖ ^ 2 / 2 := by + have h1 : + (‖a‖ ^ 2 + 2 * ⟪a, y - x⟫_ℝ + ‖y - x‖ ^ 2) / 2 + = ‖a + (y - x)‖ ^ 2 / 2 := by + simpa using + (congrArg (fun t : ℝ => t / 2) (norm_add_sq_real a (y - x))).symm + have h2 : + ‖a‖ ^ 2 / 2 + ⟪a, y - x⟫_ℝ + ‖y - x‖ ^ 2 / 2 + = (‖a‖ ^ 2 + 2 * ⟪a, y - x⟫_ℝ + ‖y - x‖ ^ 2) / 2 := by + ring + simpa [h2] using h1 + have hnonneg : 0 ≤ ‖a + (y - x)‖ ^ 2 / 2 := by + exact div_nonneg (sq_nonneg ‖a + (y - x)‖) (by norm_num) + simpa [hrewrite] using hnonneg calc - f z + inner a (x - z) - ‖a‖ ^ 2 / 2 ≤ f z + inner a (x - z) - ‖a‖ ^ 2 / 2 + - (‖a‖ ^ 2 / 2 + inner a (y - x) + ‖y - x‖ ^ 2 / 2) := le_add_of_nonneg_right this - _ = f z + inner a (x - z) + (inner a (y - x) + ‖y - x‖ ^ 2 / 2) := by ring + f z + ⟪a, x - z⟫_ℝ - ‖a‖ ^ 2 / 2 ≤ f z + ⟪a, x - z⟫_ℝ - ‖a‖ ^ 2 / 2 + + (‖a‖ ^ 2 / 2 + ⟪a, y - x⟫_ℝ + ‖y - x‖ ^ 2 / 2) := le_add_of_nonneg_right this + _ = f z + ⟪a, x - z⟫_ℝ + (⟪a, y - x⟫_ℝ + ‖y - x‖ ^ 2 / 2) := by ring _ ≤ g y := second_lower_bound have hg : LowerSemicontinuous g := by - apply LowerSemicontinuous.add hc - apply Continuous.lowerSemicontinuous - apply continuous_iff_continuousOn_univ.2 - apply HasGradientAt.continuousOn - intro u _; apply gradient_of_sq u + have hcont : Continuous (fun z : E => ‖z - x‖ ^ 2 / 2) := by + have h1 : Continuous (fun z : E => ‖z - x‖) := (continuous_id.sub continuous_const).norm + have h2 : Continuous (fun z : E => ‖z - x‖ ^ 2) := h1.pow 2 + have h3 : Continuous (fun z : E => ‖z - x‖ ^ 2 * (1 / 2)) := h2.mul continuous_const + simpa [div_eq_mul_inv, mul_comm] using h3 + exact hc.add hcont.lowerSemicontinuous have epi_closed : IsClosed epi := by apply bounded_lowersemicontinuous_to_epi_closed · exact lowerSemicontinuousOn_univ_iff.2 hg - use (f z + inner a (x - z) - ‖a‖ ^ 2 / 2) + refine ⟨f z + ⟪a, x - z⟫_ℝ - ‖a‖ ^ 2 / 2, ?_⟩ + intro y; exact lower_bound y let S := {y : E| g y ≤ g z} have eq : S = (g ⁻¹' Set.Iic (g z)) := by constructor let ImS := {g y | y ∈ S} have neImS : Set.Nonempty ImS := by use g z; simp [ImS, S]; use z have S_bddbelow : BddBelow ImS := by - use (f z + inner a (x - z) - ‖a‖ ^ 2 / 2) + use (f z + ⟪a, x - z⟫_ℝ - ‖a‖ ^ 2 / 2) rw [mem_lowerBounds] rintro gy ⟨y0, _, gyeq⟩ rw [← gyeq]; exact lower_bound y0 @@ -221,33 +246,42 @@ theorem prox_set_compact_of_convex (f : E → ℝ) (hc : ContinuousOn f univ) intro u uin simp [S] at uin apply mem_closedBall_iff_norm.2 - have norm_bound: ‖u - (x - a)‖ ≤ ‖z - (x - a)‖ + 2 := by - have ieq : f z + inner a (u - z) + ‖u - x‖ ^ 2 / 2 ≤ f z + ‖z - x‖ ^ 2 / 2 + 1 := by - calc - f z + inner a (u - z) + ‖u - x‖ ^ 2 / 2 ≤ g u := second_lower_bound u - _ ≤ f z + ‖z - x‖ ^ 2 / 2 := uin - _ ≤ f z + ‖z - x‖ ^ 2 / 2 + 1 := by linarith - rw [add_assoc, add_assoc, add_le_add_iff_left] at ieq - have eq : inner a (u - z) + ‖u - x‖ ^ 2 / 2 = - (‖u - (x - a)‖ ^ 2 - ‖a‖ ^ 2 + 2 * inner (x - z) a) / 2 := by - field_simp; rw [← sub_add, norm_add_sq_real]; ring_nf - rw [add_assoc, ← add_mul, ← inner_add_left, add_comm, real_inner_comm]; simp - rw [eq] at ieq - have ieq2 : ‖u - (x - a)‖ ^ 2 ≤ ‖z - (x - a)‖ ^ 2 + 2 := by - field_simp at ieq; rw [div_le_div_right, sub_add, sub_le_iff_le_add] at ieq - rw [add_right_comm, add_comm (‖z - x‖ ^ 2), norm_sub_rev z x] at ieq - rw [real_inner_comm, ← norm_sub_sq_real, ← sub_add a, sub_add_comm] at ieq - rw [sub_add] at ieq; exact ieq; norm_num - have : |‖z - (x - a)‖ + 2| = ‖z - (x - a)‖ + 2 := by - apply abs_of_pos; apply add_pos_of_nonneg_of_pos (norm_nonneg (z - (x - a))) - simp - rw [← abs_norm, ← this, ← sq_le_sq, add_sq] + have ieq : f z + ⟪a, u - z⟫_ℝ + ‖u - x‖ ^ 2 / 2 ≤ f z + ‖z - x‖ ^ 2 / 2 + 1 := by calc - ‖u - (x - a)‖ ^ 2 ≤ ‖z - (x - a)‖ ^ 2 + 2 := ieq2 - _ ≤ ‖z - (x - a)‖ ^ 2 + 2 * ‖z - (x - a)‖ * 2 + 2 ^ 2 := by - rw [add_assoc, add_le_add_iff_left]; apply le_add_of_nonneg_of_le - simp; norm_num - exact norm_bound + f z + ⟪a, u - z⟫_ℝ + ‖u - x‖ ^ 2 / 2 ≤ g u := second_lower_bound u + _ ≤ f z + ‖z - x‖ ^ 2 / 2 := uin + _ ≤ f z + ‖z - x‖ ^ 2 / 2 + 1 := by linarith + _ ≤ f z + ‖z - x‖ ^ 2 / 2 + 1 := by linarith + rw [add_assoc, add_assoc, add_le_add_iff_left] at ieq + have eq : ⟪a, u - z⟫_ℝ + ‖u - x‖ ^ 2 / 2 = + (‖u - (x - a)‖ ^ 2 - ‖a‖ ^ 2 + 2 * ⟪x - z, a⟫_ℝ) / 2 := by + field_simp; rw [← sub_add, norm_add_sq_real]; ring_nf + rw [add_assoc, ← add_mul, ← inner_add_left, add_comm, real_inner_comm]; simp + rw [eq] at ieq + have ieq2 : ‖u - (x - a)‖ ^ 2 ≤ ‖z - (x - a)‖ ^ 2 + 2 := by + -- clear the division by 2 on both sides + have ieq' := + (mul_le_mul_of_nonneg_right ieq (by norm_num : 0 ≤ (2 : ℝ))) + -- simplify ((·)/2)*2 = · and (·/2 + 1)*2 = · + 2 + have ieq' : + ‖u - (x - a)‖ ^ 2 - ‖a‖ ^ 2 + 2 * ⟪x - z, a⟫_ℝ ≤ ‖z - x‖ ^ 2 + 2 := by + have h2 : (2 : ℝ) ≠ 0 := by norm_num + simpa [add_mul, mul_add, div_eq_mul_inv, h2] using ieq' + rw [sub_add, sub_le_iff_le_add] at ieq' + rw [add_right_comm, add_comm (‖z - x‖ ^ 2), norm_sub_rev z x] at ieq' + rw [real_inner_comm, ← norm_sub_sq_real, ← sub_add a, sub_add_comm] at ieq' + rw [sub_add] at ieq' + exact ieq' + have : |‖z - (x - a)‖ + 2| = ‖z - (x - a)‖ + 2 := by + apply abs_of_pos; apply add_pos_of_nonneg_of_pos (norm_nonneg (z - (x - a))) + simp + rw [← abs_norm, ← this, ← sq_le_sq, add_sq] + calc + ‖u - (x - a)‖ ^ 2 ≤ ‖z - (x - a)‖ ^ 2 + 2 := ieq2 + _ ≤ ‖z - (x - a)‖ ^ 2 + 2 * ‖z - (x - a)‖ * 2 + 2 ^ 2 := by + rw [add_assoc, add_le_add_iff_left]; apply le_add_of_nonneg_of_le + simp; norm_num +-- exact norm_bound have compactb : IsCompact B := isCompact_closedBall (x - a) (‖z - (x - a)‖ + 2) rw [← closure_eq_iff_isClosed] at closeds; rw [← closeds] apply IsCompact.closure_of_subset compactb sinb @@ -260,10 +294,10 @@ theorem prox_set_compact_of_convex (f : E → ℝ) (hc : ContinuousOn f univ) apply Tendsto.comp cfx (StrictMono.tendsto_atTop mono) have inepi : (x', sInf ImS) ∈ epi := by let p := fun c ↦ (((fun n ↦ xn n) ∘ k) c, (g ∘ xn ∘ k) c) - have pnin : ∀ c : ℕ, p c ∈ epi := by simp [epi] + have pnin : ∀ c : ℕ, p c ∈ epi := by simp [epi]; exact fun c ↦ Std.IsPreorder.le_refl (g (p c).1) apply IsClosed.isSeqClosed epi_closed pnin show Tendsto (fun c ↦ (((fun n ↦ xn n) ∘ k) c, (g ∘ xn ∘ k) c)) atTop (𝓝 (x', sInf ImS)) - apply Tendsto.prod_mk_nhds cxk cfxk + apply Tendsto.prodMk_nhds cxk cfxk have minima_ieq : g x' ≤ sInf ImS := inepi have minima : ∀ w : E, g x' ≤ g w := by intro w @@ -275,7 +309,7 @@ theorem prox_set_compact_of_convex (f : E → ℝ) (hc : ContinuousOn f univ) linarith · have gwnin : g z < g w := by simp [S] at hw; simp [g]; exact hw - have gzin : g z ∈ ImS := by use z; simp [ImS, S] + have gzin : g z ∈ ImS := by use z; simp [S] have legw : sInf ImS ≤ g w := by rw [Real.sInf_le_iff S_bddbelow neImS] intro _ epos; use g z; use gzin; linarith @@ -361,37 +395,42 @@ lemma convex_of_norm_sq {s : Set E} (x : E) (conv: Convex ℝ s) : ConvexOn ℝ s (fun (u : E) ↦ ‖u - x‖ ^ 2 / 2) := by rw [ConvexOn]; use conv intro y _ z _ a b anneg bnneg absum1 - field_simp have eq1 : a • y + b • z - x = a • (y - x) + b • (z - x) := by - rw [smul_sub, smul_sub, add_comm_sub, sub_sub, ← add_smul, add_comm b a] - rw [absum1, one_smul, ← add_sub] + rw [smul_sub, smul_sub, add_comm_sub, sub_sub, ← add_smul, add_comm b a, absum1, + one_smul, ← add_sub] rw [eq1] - have ieq1 (u v : E) : ‖a • u + b • v‖ ^ 2 / 2 ≤ (a * ‖u‖ ^ 2 + b * ‖v‖ ^ 2) / 2 := by - rw [div_le_div_right, norm_add_sq_real, add_comm, ← add_assoc] - rw [norm_smul, norm_smul, mul_pow, mul_pow]; simp - nth_rw 3 [← mul_one a]; nth_rw 3 [← one_mul b] - rw [← absum1]; ring_nf; rw [add_right_comm] - apply add_le_add_right - rw [add_comm]; apply add_le_add_right - calc - inner (a • u) (b • v) * 2 ≤ ‖a • u‖ * ‖b • v‖ * 2 := by - rw [mul_le_mul_right] - apply real_inner_le_norm - simp - _ = a * b * (2 * ‖u‖ * ‖v‖) := by - rw [norm_smul, norm_smul]; simp - rw [abs_of_nonneg anneg, abs_of_nonneg bnneg]; ring - _ ≤ a * b * (‖u‖ ^ 2 + ‖v‖ ^ 2) := by - by_cases a * b > 0 - · rw [mul_le_mul_left] - apply two_mul_le_add_pow_two - linarith - · have ieq2 : 0 ≤ a * b := by apply mul_nonneg anneg bnneg - have ieq3 : 0 = a * b := by linarith - rw [← ieq3]; simp - _ = b * ‖v‖ ^ 2 * a + b * a * ‖u‖ ^ 2 := by ring - simp - apply ieq1 + have ieq1 (u v : E) : + ‖a • u + b • v‖ ^ 2 / 2 ≤ a * (‖u‖ ^ 2 / 2) + b * (‖v‖ ^ 2 / 2) := by + have hbase : + ‖a • u + b • v‖ ^ 2 ≤ a * ‖u‖ ^ 2 + b * ‖v‖ ^ 2 := by + rw [norm_add_sq_real, add_comm, ← add_assoc] + rw [norm_smul, norm_smul, mul_pow, mul_pow]; simp + nth_rw 3 [← mul_one a]; nth_rw 3 [← one_mul b] + rw [← absum1]; ring_nf; rw [add_right_comm] + apply add_le_add_right + rw [add_comm]; apply add_le_add_right + calc + ⟪a • u, b • v⟫_ℝ * 2 + ≤ ‖a • u‖ * ‖b • v‖ * 2 := by + have h := real_inner_le_norm (a • u) (b • v) + exact mul_le_mul_of_nonneg_right h (by norm_num) + _ = a * b * (2 * ‖u‖ * ‖v‖) := by + rw [norm_smul, norm_smul]; simp + rw [abs_of_nonneg anneg, abs_of_nonneg bnneg]; ring + _ ≤ b * ‖v‖ ^ 2 * a + b * a * ‖u‖ ^ 2 := by + have hab_nonneg : 0 ≤ a * b := mul_nonneg anneg bnneg + have hineq : 2 * ‖u‖ * ‖v‖ ≤ ‖u‖ ^ 2 + ‖v‖ ^ 2 := by + simpa using two_mul_le_add_pow_two (‖u‖) (‖v‖) + have hmul : + a * b * (2 * ‖u‖ * ‖v‖) ≤ a * b * (‖u‖ ^ 2 + ‖v‖ ^ 2) := + mul_le_mul_of_nonneg_left hineq hab_nonneg + simp; grind + have : (1 / 2 : ℝ) * ‖a • u + b • v‖ ^ 2 + ≤ (1 / 2 : ℝ) * (a * ‖u‖ ^ 2 + b * ‖v‖ ^ 2) := + mul_le_mul_of_nonneg_left hbase (by norm_num) + simp; grind + have h := ieq1 (y - x) (z - x) + simpa [smul_eq_mul, div_eq_mul_inv, mul_add, mul_comm, mul_left_comm, mul_assoc] using h /- Sub-derivative at x equal to sub-derivative within univ at x @@ -425,20 +464,20 @@ theorem proximal_shift (a : E) {t : ℝ} (tnz : t ≠ 0) (f : E → ℝ): calc t ^ 2 * f (t • z + a) + ‖t • z - t • x‖ ^ 2 / 2 = t ^ 2 * (f (t • z + a) + ‖z - x‖ ^ 2 / 2) := by - rw [← smul_sub, norm_smul, mul_pow, mul_add]; field_simp + rw [← smul_sub, norm_smul, mul_pow, mul_add]; field_simp; simp; grind _ ≤ t ^ 2 * (f y + ‖t⁻¹ • (y - a) - x‖ ^ 2 / 2) := by - rw [mul_le_mul_left]; use cond; rw [sq_pos_iff]; use tnz + rw [mul_le_mul_iff_right₀]; use cond; rw [sq_pos_iff]; use tnz _ = t ^ 2 * f y + ‖t • ((1 / t) • (y - a) - x)‖ ^ 2 / 2 := by - rw [mul_add, norm_smul, mul_pow]; field_simp + rw [mul_add, norm_smul, mul_pow]; field_simp; simp; grind _ = t ^ 2 * f y + ‖y - (t • x + a)‖ ^ 2 / 2 := by - rw [smul_sub, ← smul_assoc, smul_eq_mul, ← sub_sub, sub_right_comm]; field_simp + rw [smul_sub, ← smul_assoc, smul_eq_mul, ← sub_sub, sub_right_comm]; field_simp; simp use tnz · intro cond y specialize cond (t • y + a) rw [← smul_sub, norm_smul, mul_pow] at cond; simp at cond rw [← smul_sub, norm_smul, mul_pow] at cond; simp at cond rw [mul_div_assoc, ← mul_add, mul_div_assoc, ← mul_add] at cond - rw [mul_le_mul_left] at cond; use cond; rw [sq_pos_iff]; use tnz + rw [mul_le_mul_iff_right₀] at cond; use cond; rw [sq_pos_iff]; use tnz /- relation of proximal between a function and its scale @@ -452,8 +491,8 @@ theorem proximal_scale {t : ℝ} (tpos : 0 < t) (f : E → ℝ): constructor · intro cond y specialize cond (t • y) - have tsq : 0 < t ^ 2 := by field_simp - rw [← mul_le_mul_left tsq] + have tsq : 0 < t ^ 2 := by field_simp; aesop + rw [← mul_le_mul_iff_right₀ tsq] calc t ^ 2 * (t⁻¹ * f (t⁻¹ • z) + ‖t⁻¹ • z - t⁻¹ • x‖ ^ 2 / 2) = t * f (t⁻¹ • z) + ‖z - x‖ ^ 2 / 2 := by @@ -466,11 +505,11 @@ theorem proximal_scale {t : ℝ} (tpos : 0 < t) (f : E → ℝ): rw [← smul_assoc, smul_eq_mul, inv_mul_cancel₀]; simp rw [smul_sub, ← smul_assoc, smul_eq_mul, mul_inv_cancel₀]; simp; repeat linarith _ = t ^ 2 * (t⁻¹ * f y + ‖y - t⁻¹ • x‖ ^ 2 / 2) := by - rw [mul_add, norm_smul, mul_pow]; field_simp + rw [mul_add, norm_smul, mul_pow]; field_simp; simp; grind · intro cond y specialize cond (t⁻¹ • y) - have tsq : 0 < t ^ 2 := by field_simp - rw [← mul_le_mul_left tsq] at cond + have tsq : 0 < t ^ 2 := by field_simp; exact sq_pos_of_pos tpos + rw [← mul_le_mul_iff_right₀ tsq] at cond calc t * f (t⁻¹ • z) + ‖z - x‖ ^ 2 / 2 = t ^ 2 * (t⁻¹ * f (t⁻¹ • z) + ‖t⁻¹ • z - t⁻¹ • x‖ ^ 2 / 2) := by @@ -479,7 +518,7 @@ theorem proximal_scale {t : ℝ} (tpos : 0 < t) (f : E → ℝ): rw [← pow_two, mul_inv_cancel₀]; repeat simp; repeat linarith _ ≤ t ^ 2 * (t⁻¹ * f (t⁻¹ • y) + ‖t⁻¹ • y - t⁻¹ • x‖ ^ 2 / 2) := cond _ = t ^ 2 * (t⁻¹ * f (t⁻¹ • y)) + ‖t • (t⁻¹ • y - t⁻¹ • x)‖ ^ 2 / 2 := by - rw [mul_add, norm_smul, mul_pow]; field_simp + rw [mul_add, norm_smul, mul_pow]; field_simp; simp; grind _ = t * f (t⁻¹ • y) + ‖y - x‖ ^ 2 / 2 := by rw [pow_two t, ← mul_assoc, mul_assoc _ _ (t⁻¹), mul_inv_cancel₀] rw [smul_sub, ← smul_assoc, smul_eq_mul, mul_inv_cancel₀]; simp @@ -489,22 +528,27 @@ theorem proximal_scale {t : ℝ} (tpos : 0 < t) (f : E → ℝ): change of proximal when added a linear components -/ theorem proximal_add_linear (a : E) (f : E → ℝ): - ∀ z : E, prox_prop (fun x ↦ f x + inner a x) x z ↔ + ∀ z : E, prox_prop (fun x ↦ f x + ⟪a, x⟫_ℝ) x z ↔ prox_prop f (x - a) z := by intro z rw [prox_prop, prox_prop, isMinOn_univ_iff, isMinOn_univ_iff] have aux (v : E) : ‖v - (x - a)‖ ^ 2 / 2 = - ‖v - x‖ ^ 2 / 2 + inner a v + (‖a‖ ^ 2 / 2 - inner a x) := by - rw [← sub_add, norm_add_sq_real, real_inner_comm, inner_sub_right]; ring_nf + ‖v - x‖ ^ 2 / 2 + ⟪a, v⟫_ℝ + (‖a‖ ^ 2 / 2 - ⟪a, x⟫_ℝ) := by + have hx : v - (x - a) = (v - x) + a := by + simp [sub_eq_add_neg, add_comm, add_left_comm] + have h := norm_add_sq_real (v - x) a + have h' := congrArg (fun t : ℝ => t / 2) (by simpa [hx] using h) + have h2 : (2 : ℝ) ≠ 0 := by norm_num + have hinner : ⟪v - x, a⟫_ℝ = ⟪a, v⟫_ℝ - ⟪a, x⟫_ℝ := by + simp [real_inner_comm, inner_sub_right] + grind constructor · intro cond y specialize cond y - rw [aux, aux, add_comm _ (inner a z), add_comm _ (inner a y)] - linarith + grind · intro cond y specialize cond y - rw [aux, aux, add_comm _ (inner a z), add_comm _ (inner a y)] at cond - linarith + grind /- change of proximal when added a square components @@ -525,15 +569,15 @@ theorem proximal_add_sq (a : E) {l : ℝ} (lpos : 0 < l) (f : E → ℝ): rw [add_sub_right_comm]; simp; rw [mul_sub, ← add_sub_right_comm, ← add_sub_assoc] nth_rw 3 [← one_mul (‖v‖ ^ 2)]; rw [← add_mul, ← mul_assoc l, mul_comm l 2, sub_sub] rw [mul_assoc, ← mul_add, ← inner_smul_right _ _ l, ← inner_add_right] - field_simp; rw [mul_comm]; simp + field_simp; simp constructor · intro cond y specialize cond y - rw [aux, aux]; simp; rw [← mul_add, ← mul_add, mul_le_mul_left] + rw [aux, aux]; simp; rw [← mul_add, ← mul_add, mul_le_mul_iff_right₀] linarith [cond]; simp; linarith · intro cond y specialize cond y - rw [aux, aux] at cond; simp at cond; rw [← mul_add, ← mul_add, mul_le_mul_left] at cond + rw [aux, aux] at cond; simp at cond; rw [← mul_add, ← mul_add, mul_le_mul_iff_right₀] at cond linarith [cond]; simp; linarith end properties @@ -553,12 +597,11 @@ theorem prox_iff_subderiv (f : E → ℝ) (hfun : ConvexOn ℝ univ f) : let g := fun u ↦ ‖u - x‖ ^ 2 / 2 have hg : ConvexOn ℝ Set.univ g := by apply convex_of_norm_sq x (convex_univ) have hcg : ContinuousOn g univ := by - simp [g]; apply ContinuousOn.div - apply ContinuousOn.pow _ - · apply ContinuousOn.norm - apply ContinuousOn.sub continuousOn_id continuousOn_const - · apply continuousOn_const - · simp + have h1 : Continuous (fun z : E => ‖z - x‖) := + (continuous_id.sub continuous_const).norm + have h2 : Continuous (fun z : E => ‖z - x‖ ^ 2) := h1.pow 2 + have h3 : Continuous (fun z : E => ‖z - x‖ ^ 2 * (1 / 2)) := h2.mul continuous_const + simp; exact Continuous.div_const h2 2 show 0 ∈ SubderivAt (f + g) u ↔ x - u ∈ SubderivAt f u have : SubderivAt (f + g) u = SubderivAt (g + f) u := by unfold SubderivAt; ext z; rw [Set.mem_setOf, Set.mem_setOf]; @@ -638,15 +681,17 @@ theorem prox_iff_subderiv_smul (f : E → ℝ) {t : ℝ} (hfun : ConvexOn ℝ un · intro cond y specialize cond y; simp at cond rw [inner_smul_left]; simp - rw [← mul_le_mul_left ht]; ring_nf; field_simp + rw [← mul_le_mul_iff_right₀ ht]; ring_nf; field_simp exact cond · intro cond y - specialize cond y; rw [inner_smul_left] at cond; field_simp at cond - simp - have hrect : 0 < t⁻¹ := by - simp; linarith - rw [← mul_le_mul_left hrect]; ring_nf; field_simp - exact cond + specialize cond y + have hmul0 := mul_le_mul_of_nonneg_left cond (le_of_lt ht) + have hmul : t * f y ≥ t * f u + t * ((1 / t) * ⟪x - u, y - u⟫_ℝ) := by + simpa [mul_add, mul_assoc, inner_smul_left] using hmul0 + have htne : t ≠ 0 := ne_of_gt ht + have eqt : t * ((1 / t) * ⟪x - u, y - u⟫_ℝ) = ⟪x - u, y - u⟫_ℝ := by + field_simp [htne] + simp; grind exact gconv end diff --git a/Optlib/Optimality/Constrained_Problem.lean b/Optlib/Optimality/Constrained_Problem.lean index 03cba1d..f3e09a1 100644 --- a/Optlib/Optimality/Constrained_Problem.lean +++ b/Optlib/Optimality/Constrained_Problem.lean @@ -3,14 +3,7 @@ Copyright (c) 2024 Chenyi Li. All rights reserved. Released under Apache 2.0 license as described in the file LICENSE. Authors: Chenyi Li, Shengyang Xu, Yuxuan Wu -/ -import Mathlib.Analysis.Convex.Cone.Basic -import Mathlib.Analysis.Calculus.LocalExtr.Basic -import Mathlib.Analysis.NormedSpace.HahnBanach.Separation -import Mathlib.Data.Matrix.Rank -import Mathlib.LinearAlgebra.FiniteDimensional -import Mathlib.Analysis.Calculus.Implicit -import Mathlib.Analysis.Calculus.MeanValue -import Mathlib.Analysis.InnerProductSpace.Calculus +import Mathlib import Optlib.Differential.Calculation import Optlib.Convex.Farkas import Optlib.Differential.Lemmas @@ -34,6 +27,7 @@ This file contains the following parts of constrained optimization problem. open InnerProductSpace Set BigOperators set_option linter.unusedVariables false +set_option maxHeartbeats 500000 noncomputable section @@ -46,7 +40,7 @@ variable {τ σ : Finset ℕ} The equality constraints are a set of functions from a Hilbert space to ℝ. The inequality constraints are a set of functions from a Hilbert space to ℝ. -/ -structure Constrained_OptimizationProblem (E : Type _) (τ σ : Finset ℕ) := +structure Constrained_OptimizationProblem (E : Type _) (τ σ : Finset ℕ) where (domain : Set E) (equality_constraints : (i : ℕ) → E → ℝ) (inequality_constraints : (j : ℕ) → E → ℝ) @@ -57,7 +51,7 @@ namespace Constrained_OptimizationProblem variable {p : Constrained_OptimizationProblem E τ σ} {x : E} -open Topology InnerProductSpace Set Filter Tendsto +open Topology InnerProductSpace Set Filter /- The feasible point is a point that satisfies all the constraints. @@ -145,13 +139,19 @@ section linear variable {E : Type _} [NormedAddCommGroup E] [InnerProductSpace ℝ E] -def IsLinear (f : E → ℝ) : Prop := ∃ a, ∃ b, f = fun x ↦ (inner x a : ℝ) + b +def IsLinear (f : E → ℝ) : Prop := ∃ a, ∃ b, f = fun x ↦ ⟪x, a⟫_ℝ + b -lemma IsLinear_iff (f : E → ℝ) : IsLinear f ↔ ∃ a b, f = fun x ↦ (inner x a : ℝ) + b := by rfl +lemma IsLinear_iff (f : E → ℝ) : IsLinear f ↔ ∃ a b, f = fun x ↦ ⟪x, a⟫_ℝ + b := by rfl -lemma IsLinear_iff' (f : E → ℝ) : IsLinear f ↔ ∃ a b, f = fun x ↦ (inner a x : ℝ) + b := by +lemma IsLinear_iff' (f : E → ℝ) : + IsLinear f ↔ ∃ a b, f = fun x ↦ ⟪a, x⟫_ℝ + b := by constructor - repeat rintro ⟨a, b, rfl⟩; exact ⟨a, b, by ext x; simp; exact real_inner_comm _ _⟩ + · rintro ⟨a, b, rfl⟩ + exact ⟨a, b, by + funext x; simp [real_inner_comm]⟩ + · rintro ⟨a, b, rfl⟩ + exact ⟨a, b, by + funext x; simp [real_inner_comm]⟩ end linear @@ -168,7 +168,7 @@ end section Constrained_OptimizationProblem_property_general -open Constrained_OptimizationProblem Topology InnerProductSpace Set Filter Tendsto +open Constrained_OptimizationProblem Topology InnerProductSpace Set Filter variable {E : Type _} [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteSpace E] @@ -249,7 +249,7 @@ lemma posTangentCone_localmin_inner_pos {f : E → ℝ} {loc : E} (hl : IsLocalM calc _ ≤ (‖v‖ + ε) / c n := (le_div_iff₀' cpos).mpr this _ ≤ (‖v‖ + ε) / (2 * (‖v‖ + ε) / ε) := div_le_div_of_nonneg_left (by positivity) (by positivity) ha1 - _ = ε / 2 := by field_simp [εpos]; ring_nf + _ = ε / 2 := by field_simp [εpos] _ < ε := by linarith simp; exact this have h1 : z = (fun n ↦ d n + loc) := by @@ -257,16 +257,16 @@ lemma posTangentCone_localmin_inner_pos {f : E → ℝ} {loc : E} (hl : IsLocalM rw [h1] convert Filter.Tendsto.add_const loc this rw [zero_add] - have hz : (fun n ↦ f (z n) - f loc - inner (z n - loc) (gradient f loc)) + have hz : (fun n ↦ f (z n) - f loc - ⟪z n - loc, gradient f loc⟫_ℝ) =o[atTop] (fun n ↦ z n - loc) := by have : HasGradientAt f (gradient f loc) loc := hf.hasGradientAt rw [hasGradientAt_iff_isLittleO] at this - have heq : (fun n ↦ f (z n) - f loc - inner (z n - loc) (gradient f loc)) = - (fun n ↦ f (z n) - f loc - inner (gradient f loc) (z n - loc)) := by + have heq : (fun n ↦ f (z n) - f loc - ⟪z n - loc, gradient f loc⟫_ℝ) = + (fun n ↦ f (z n) - f loc - ⟪gradient f loc, z n - loc⟫_ℝ) := by ext n; rw [real_inner_comm] rw [heq] apply Asymptotics.IsLittleO.comp_tendsto this hzt - have hz1 : (fun n ↦ f (z n) - f loc - (1 / c n) * inner v (gradient f loc)) + have hz1 : (fun n ↦ f (z n) - f loc - (1 / c n) * ⟪v, gradient f loc⟫_ℝ) =o[atTop] (fun n ↦ 1 / c n) := by have t1: (fun n ↦ z n - loc) =O[atTop] (fun n ↦ 1 / c n) := by rw [Asymptotics.isBigO_iff] @@ -295,9 +295,9 @@ lemma posTangentCone_localmin_inner_pos {f : E → ℝ} {loc : E} (hl : IsLocalM rw [norm_smul, Real.norm_eq_abs, abs_of_pos cpos] at this; field_simp; exact (le_div_iff₀' cpos).mpr this rw [← hzd n]; exact this; apply cpos - have t2 : (fun n ↦ f (z n) - f loc - inner (z n - loc) (gradient f loc)) + have t2 : (fun n ↦ f (z n) - f loc - ⟪z n - loc, gradient f loc⟫_ℝ) =o[atTop] (fun n ↦ 1 / c n) := Asymptotics.IsLittleO.trans_isBigO hz t1 - have t3 : (fun n ↦ (inner (z n - loc - (1 / c n) • v) (gradient f loc) : ℝ)) + have t3 : (fun n ↦ (⟪z n - loc - (1 / c n) • v, gradient f loc⟫_ℝ : ℝ)) =o[atTop] (fun n ↦ 1 / c n) := by have t5: (fun n ↦ z n - loc - (1 / c n) • v) =o[atTop] (fun n ↦ 1 / c n) := by rw [← Asymptotics.isLittleO_norm_norm] @@ -308,7 +308,7 @@ lemma posTangentCone_localmin_inner_pos {f : E → ℝ} {loc : E} (hl : IsLocalM apply Filter.Eventually.mp hcp apply Filter.Eventually.of_forall intro n hcn1; rw [mul_comm, ← Real.norm_eq_abs, ← norm_smul] - congr; field_simp; rw [smul_sub, smul_smul]; field_simp + congr; field_simp; rw [smul_sub, smul_smul]; field_simp; ring_nf; simp rw [Filter.tendsto_congr' this]; have : Tendsto (fun (n : ℕ) => c n • d n - v) atTop (𝓝 (v - v)) := by apply Filter.Tendsto.sub vt2 tendsto_const_nhds @@ -322,7 +322,7 @@ lemma posTangentCone_localmin_inner_pos {f : E → ℝ} {loc : E} (hl : IsLocalM rw [Asymptotics.isLittleO_iff] at t5; have pos1 : ‖gradient f loc‖ ≠ (0 : ℝ) := by by_contra hhh; simp at hhh - have : inner v (gradient f loc) = (0 : ℝ) := by rw [hhh, inner_zero_right] + have : ⟪v, gradient f loc⟫_ℝ = (0 : ℝ) := by rw [hhh, inner_zero_right] linarith have pos2 : ‖gradient f loc‖ > (0 : ℝ) := by positivity have : c1 / ‖gradient f loc‖ > (0 : ℝ) := by positivity @@ -332,18 +332,18 @@ lemma posTangentCone_localmin_inner_pos {f : E → ℝ} {loc : E} (hl : IsLocalM intro n hn; calc _ ≤ ‖z n - loc - (1 / c n) • v‖ * ‖gradient f loc‖ := norm_inner_le_norm _ _ _ ≤ c1 / ‖gradient f loc‖ * ‖1 / c n‖ * ‖gradient f loc‖ := - (mul_le_mul_right pos2).mpr hn - _ ≤ c1 * ‖1 / c n‖ := by ring_nf; field_simp [pos1] - have t4 : (fun n => f (z n) - f loc - 1 / c n * Inner.inner v (gradient f loc)) = - (fun n ↦ f (z n) - f loc - inner (z n - loc) (gradient f loc)) + - (fun n ↦ (inner (z n - loc - (1 / c n) • v) (gradient f loc) : ℝ)) := by - ext n; dsimp; simp [inner_sub_left, inner_add_left, inner_smul_left] + (mul_le_mul_iff_left₀ pos2).mpr hn + _ ≤ c1 * ‖1 / c n‖ := by ring_nf; field_simp [pos1]; simp + have t4 : (fun n => f (z n) - f loc - (1 / c n) * ⟪v, gradient f loc⟫_ℝ) = + (fun n ↦ f (z n) - f loc - ⟪z n - loc, gradient f loc⟫_ℝ) + + (fun n ↦ (⟪z n - loc - (1 / c n) • v, gradient f loc⟫_ℝ : ℝ)) := by + ext n; dsimp; simp [inner_sub_left, inner_smul_left] rw [t4]; apply Asymptotics.IsLittleO.add t2 t3 have hz2 : ∀ᶠ (n : ℕ) in atTop, f (z n) ≤ f loc + (1 / 2) * - (1 / c n) * inner v (gradient f loc) := by + (1 / c n) * ⟪v, gradient f loc⟫_ℝ := by rw [Asymptotics.isLittleO_iff] at hz1 - have : (- (1 / 2 : ℝ) * inner v (gradient f loc)) > 0 := by - simp [proneg];rw [mul_comm]; apply mul_neg_of_neg_of_pos proneg (by norm_num) + have : (- (1 / 2 : ℝ) * ⟪v, gradient f loc⟫_ℝ) > 0 := by + simp; rw [mul_comm]; apply mul_neg_of_neg_of_pos proneg (by norm_num) specialize hz1 this apply Filter.Eventually.mp hz1 apply Filter.Eventually.mp hz3 @@ -352,8 +352,8 @@ lemma posTangentCone_localmin_inner_pos {f : E → ℝ} {loc : E} (hl : IsLocalM rw [Real.norm_eq_abs, Real.norm_eq_abs, abs_le, abs_of_pos hn] at hn1 rcases hn1 with ⟨_, hn1⟩ rw [sub_le_iff_le_add, sub_le_iff_le_add] at hn1 - have : -(1 / 2) * inner v (gradient f loc) * (1 / c n) + 1 / c n * inner v - (gradient f loc) + f loc = f loc + 1 / 2 * (1 / c n) * inner v (gradient f loc) := by + have : -(1 / 2) * ⟪v, gradient f loc⟫_ℝ * (1 / c n) + 1 / c n * ⟪v, + gradient f loc⟫_ℝ + f loc = f loc + 1 / 2 * (1 / c n) * ⟪v, gradient f loc⟫_ℝ := by ring_nf rw [this] at hn1; exact hn1 have hz4 : ∀ᶠ (n : ℕ) in atTop, f (z n) < f loc := by @@ -361,13 +361,13 @@ lemma posTangentCone_localmin_inner_pos {f : E → ℝ} {loc : E} (hl : IsLocalM apply Filter.Eventually.mp hz3 apply Filter.Eventually.of_forall intro n hn1 hn2 - have : 1 / 2 * (1 / c n) * (inner v (gradient f loc)) < 0 := by + have : 1 / 2 * (1 / c n) * (⟪v, gradient f loc⟫_ℝ) < 0 := by apply mul_neg_of_pos_of_neg · apply Right.mul_pos; simp; exact hn1 · exact proneg linarith have hz5 : ∀ᶠ (n : ℕ) in atTop, z n ∈ s1 := by - simp only [s1, mem_inter_iff, Metric.mem_ball, dist_self_add_left] + simp only [s1, mem_inter_iff, Metric.mem_ball] apply Filter.Eventually.and · rw [Filter.tendsto_atTop'] at hzt simp; @@ -418,19 +418,22 @@ theorem linearized_feasible_directions_contain_tagent_cone (xf : x ∈ p.FeasSet . intro i itau apply ge_antisymm . apply posTangentCone_localmin_inner_pos (imin i itau) (diffable i itau) v hv - . rw [← neg_neg (inner (gradient (equality_constraints p i) x) v)] - apply neg_nonpos_of_nonneg - rw [← inner_neg_left] - have a₁ : ∀ i ∈ τ, DifferentiableAt ℝ (-equality_constraints p i) x := + . have a₁ : ∀ i ∈ τ, DifferentiableAt ℝ (-(p.equality_constraints i)) x := fun i itau ↦ DifferentiableAt.neg (diffable i itau) - have a₂ : - gradient (equality_constraints p i) x = - gradient (-equality_constraints p i) x := by + have a₂ : - gradient (p.equality_constraints i) x = + gradient (-(p.equality_constraints i)) x := by symm apply HasGradientAt.gradient apply HasGradientAt.neg exact DifferentiableAt.hasGradientAt (diffable i itau) - rw [a₂] - apply posTangentCone_localmin_inner_pos (negimin i itau) (a₁ i itau) v hv + have hpos : + 0 ≤ ⟪gradient (-(p.equality_constraints i)) x, v⟫_ℝ := + posTangentCone_localmin_inner_pos (negimin i itau) (a₁ i itau) v hv + have hpos' : 0 ≤ -⟪gradient (p.equality_constraints i) x, v⟫_ℝ := by + have : 0 ≤ ⟪- gradient (p.equality_constraints i) x, v⟫_ℝ := by + simpa [a₂] using hpos + simpa [inner_neg_left] using this + exact (neg_nonneg).1 hpos' . intro j hj jact rw [active_set] at jact; simp at jact rcases jact with jtau | jsigma @@ -467,7 +470,7 @@ theorem local_Minimum_TangentCone (loc : E) (hl : p.Local_Minimum loc) theorem local_Minimum_TangentCone' (loc : E) (hl : p.Local_Minimum loc) (hf : Differentiable ℝ p.objective) : posTangentConeAt p.FeasSet loc ∩ {d | ⟪gradient p.objective loc, d⟫_ℝ < (0 : ℝ)} = ∅ := by - rw [Set.eq_empty_iff_forall_not_mem] + rw [Set.eq_empty_iff_forall_notMem] intro d ⟨hd1, hd2⟩ simp at hd2 obtain hd1 := local_Minimum_TangentCone loc hl hf d hd1 @@ -567,7 +570,7 @@ lemma StrictFderivAt_of_FderivAt_of_ContinuousAt with ⟨ε, ε0, hε⟩ refine ⟨ε, ε0, ?_⟩ rintro ⟨a, b⟩ h - rw [← ball_prod_same, prod_mk_mem_set_prod_eq] at h + rw [← ball_prod_same, prodMk_mem_set_prod_eq] at h have hf' : ∀ x' ∈ Metric.ball x ε, ‖c' x' - c' x‖ ≤ μ := fun x' H' => by rw [← dist_eq_norm] exact le_of_lt (hε H').2 @@ -608,7 +611,7 @@ lemma exist_forall_forall_exist (P : ℕ → ℝ → Prop) (s : Finset ℕ) (hs have po : ∀ y ∈ s1, y > 0 := by intro y hy simp [s1] at hy; rcases hy with ⟨a, ha1, ha2⟩ - simp only [gt_iff_lt, and_imp, ha1, ↓reduceDIte, f] at ha2; rw [← ha2] + simp only [gt_iff_lt, ha1, ↓reduceDIte, f] at ha2; rw [← ha2] exact (h a ha1).choose_spec.1 have up : ∀ y ∈ s1, tm ≤ y := fun y a ↦ Finset.min'_le s1 y a use tm; constructor @@ -637,7 +640,7 @@ end Constrained_OptimizationProblem_property_general section Constrained_OptimizationProblem_property_finite_dimensional -open Constrained_OptimizationProblem Topology InnerProductSpace Set Filter Tendsto Matrix +open Constrained_OptimizationProblem Topology InnerProductSpace Set Filter Matrix variable {n : ℕ} {x : EuclideanSpace ℝ (Fin n)} variable {τ σ : Finset ℕ} {p : Constrained_OptimizationProblem (EuclideanSpace ℝ (Fin n)) τ σ} @@ -696,7 +699,7 @@ lemma LICQ_existZ (x : EuclideanSpace ℝ (Fin n)) (LIx : p.LICQ x) · simp rw [Matrix.rank_eq_finrank_span_row, finrank_span_eq_card] simp; rw [Nat.sub_add_cancel]; apply mlen - let base_indep := Basis.linearIndependent base + let base_indep := Module.Basis.linearIndependent base simp only [Z] rw [linearIndependent_iff''] intro s g cond sum @@ -708,11 +711,11 @@ lemma LICQ_existZ (x : EuclideanSpace ℝ (Fin n)) (LIx : p.LICQ x) · intro cond; rw [cond]; simp [coe] · intro cond; simp [coe] at cond; exact cond rw [coe_zero]; simp only [coe] - rw [← sum]; simp + rw [← sum]; simp; rfl lemma mulVec_eq_toEuclidean {s : Type*} (M : Matrix s (Fin n) ℝ) (y : EuclideanSpace ℝ (Fin n)) : M *ᵥ y = (toEuclideanLin M) y := by - rw [Matrix.toEuclideanLin_apply]; ext j; simp [Matrix.mulVec, Matrix.dotProduct] + rw [Matrix.toEuclideanLin_apply]; ext j; simp; rfl lemma inj_iff_full_finrank {s t : Type*} {M : Matrix s t ℝ} [Fintype s] [Fintype t] (hn : Fintype.card s = Fintype.card t) : @@ -794,15 +797,15 @@ lemma LICQ_strictfderiv_Ax_elem {x : EuclideanSpace ℝ (Fin n)} rw [eventually_iff, Metric.mem_nhds_iff] at h; rcases h with ⟨ε, _, _⟩ intro i; by_cases hi : i.1 ∈ τ · rw [ceq, Jzeq, Aeq]; simp [hi] - rw [HasStrictFDerivAt]; + rw [hasStrictFDerivAt_iff_isLittleO]; have eq : (fun p_1 : EuclideanSpace ℝ (Fin n) × EuclideanSpace ℝ (Fin n) ↦ p.equality_constraints i.1 p_1.1 - p.equality_constraints i.1 p_1.2 - ((EuclideanSpace.proj i).comp (LinearMap.toContinuousLinearMap (toEuclideanLin fun i ↦ gradc x i))) (p_1.1 - p_1.2)) = (fun p_1 : EuclideanSpace ℝ (Fin n) × EuclideanSpace ℝ (Fin n) ↦ p.equality_constraints i.1 p_1.1 - p.equality_constraints i.1 p_1.2 - - inner (gradient (p.equality_constraints ↑i) x) (p_1.1 - p_1.2) ):= by + ⟪gradient (p.equality_constraints ↑i) x, p_1.1 - p_1.2⟫_ℝ ):= by ext q; rw [inner_sub_right, gradceq]; simp [toEuclideanLin_apply, mulVec, dotProduct, hi] - rw [← Finset.sum_sub_distrib]; apply Finset.sum_congr; rfl; exact fun _ _ ↦ by ring_nf + rw [← inner_sub_right]; simp [inner]; grind rw [eq] specialize conte i hi exact StrictFderivAt_of_FderivAt_of_ContinuousAt conte @@ -813,7 +816,7 @@ lemma LICQ_strictfderiv_Ax_elem {x : EuclideanSpace ℝ (Fin n)} rw [Finset.mem_filter] at hi2 exact hi2.1 rw [ceq, Jzeq, Aeq]; simp [hi] - rw [HasStrictFDerivAt]; + rw [hasStrictFDerivAt_iff_isLittleO]; have eq : (fun p_1 : EuclideanSpace ℝ (Fin n) × EuclideanSpace ℝ (Fin n) ↦ p.inequality_constraints i.1 p_1.1 - p.inequality_constraints i.1 p_1.2 - ((EuclideanSpace.proj i).comp (LinearMap.toContinuousLinearMap (toEuclideanLin fun i ↦ gradc x i))) @@ -821,7 +824,7 @@ lemma LICQ_strictfderiv_Ax_elem {x : EuclideanSpace ℝ (Fin n)} p.inequality_constraints i.1 p_1.1 - p.inequality_constraints i.1 p_1.2 - ⟪gradient (p.inequality_constraints ↑i) x, p_1.1 - p_1.2⟫_ℝ ):= by ext q; rw [inner_sub_right, gradceq]; simp [toEuclideanLin_apply, mulVec, dotProduct, hi] - rw [← Finset.sum_sub_distrib]; apply Finset.sum_congr; rfl; exact fun _ _ ↦ by ring_nf + rw [← inner_sub_right]; simp [inner]; grind rw [eq] specialize conti i hi' exact StrictFderivAt_of_FderivAt_of_ContinuousAt conti @@ -862,13 +865,20 @@ lemma LICQ_implicit_f {x : EuclideanSpace ℝ (Fin n)} {m : ℕ} (v : EuclideanS · rw [Rxeq0]; rw [NormedAddCommGroup.tendsto_nhds_zero]; simp; apply Rtleε · simp -lemma eq_lemma {y z : EuclideanSpace ℝ (Fin n)} {n : ℕ} (h : ‖(n : ℝ) • y‖ ≠ 0) : - (1 / ‖y‖) • (y - (1 / (n : ℝ)) • z) = (1 / ‖(n : ℝ) • y‖) • ((n : ℝ) • y - z) := by - rw [norm_smul] at h; simp at h - have eq : z = (n : ℝ) • (1 / n : ℝ) • z := by +lemma eq_lemma {y z : EuclideanSpace ℝ (Fin n)} {k : ℕ} + (h : ‖(k : ℝ) • y‖ ≠ 0) : + (1 / ‖y‖) • (y - (1 / (k : ℝ)) • z) + = (1 / ‖(k : ℝ) • y‖) • ((k : ℝ) • y - z) := by + rw [norm_smul] at h + simp at h + have eq : z = (k : ℝ) • (1 / k : ℝ) • z := by rw [smul_smul]; field_simp; rw [div_self, one_smul]; simp [h] nth_rw 2 [eq] - rw [← smul_sub, smul_smul, norm_smul]; field_simp; rw [← div_div, div_self]; simp [h] + rw [← smul_sub, smul_smul, norm_smul] + field_simp + have norm_cast : ‖(k : ℝ)‖ = (k : ℝ) := by simp + rw [norm_cast] + field_simp [h.1] lemma comap1 {x : EuclideanSpace ℝ (Fin n)} {m : ℕ} {Mx : EuclideanSpace ℝ (Fin n) →L[ℝ] EuclideanSpace ℝ (p.active_set x) × (Fin (n - m) → ℝ)} @@ -903,31 +913,48 @@ lemma comap2 (hv : v ≠ 0): rw [Metric.mem_nhds_iff] at smem; rcases smem with ⟨a, apos, ha⟩ let μ := a / (a + ‖v‖) have eq : μ * ‖v‖ = (1 - μ) * a := by - field_simp [μ]; rw [mul_comm] + field_simp [μ]; rw [mul_comm]; rw [@mul_div] + simp [μ]; ring_nf; field_simp; ring have vpos : 0 < ‖v‖ := by refine lt_of_le_of_ne (norm_nonneg v) ?_; symm; simp [hv] have μle : 0 < 1 - μ := by - field_simp [μ, hv] - apply add_pos ?_ vpos; linarith + simp [μ]; rw [div_lt_one_iff] + left + constructor + · exact Right.add_pos' apos vpos + · linarith [apos, vpos] have μpos : 0 < μ := by - field_simp [μ]; apply add_pos_of_pos_of_nonneg _ (norm_nonneg v); linarith + simp [μ]; apply div_pos apos; exact add_pos apos (by positivity) let r := min μ ‖v‖ use Metric.ball 0 r; constructor · apply Metric.ball_mem_nhds; simp [r]; exact ⟨μpos, hv⟩ · intro z zin; simp at zin; have ze : z ≠ 0 := by by_contra hz; simp [hz] at zin; simp [r] at zin - simp [ze] at zin; rw [norm_smul] at zin; field_simp at zin - have : 0 < ‖z‖ := by refine lt_of_le_of_ne (norm_nonneg z) ?_; symm; simp [ze] - rw [div_lt_iff₀ this] at zin + simp [ze] at zin + rw [norm_smul] at zin + have hzpos : 0 < ‖z‖ := by refine lt_of_le_of_ne (norm_nonneg z) ?_; symm; simp [ze] + have zin' : ‖z‖⁻¹ * ‖z - v‖ < r := by + simpa [Real.norm_eq_abs, abs_of_pos (inv_pos.2 hzpos)] using zin + have hmul' : ‖z - v‖ < r * ‖z‖ := by + have h := mul_lt_mul_of_pos_right zin' hzpos + have hzne : ‖z‖ ≠ 0 := ne_of_gt hzpos + simpa [one_div, hzne, mul_comm, mul_left_comm, mul_assoc] using h have ieq : ‖z - v‖ < μ * ‖z - v‖ + (1 - μ) * a := by calc - _ < r * ‖z‖ := zin - _ ≤ μ * ‖z‖ := by rw [mul_le_mul_right this]; simp [r] + ‖z - v‖ < r * ‖z‖ := hmul' + _ ≤ μ * ‖z‖ := by + apply mul_le_mul_of_nonneg_right + · simp [r] + · exact norm_nonneg z _ ≤ μ * (‖z - v‖ + ‖v‖) := by - rw [mul_le_mul_left μpos, add_comm]; apply norm_le_norm_add_norm_sub' - _ ≤ μ * ‖z - v‖ + (1 - μ) * a := by rw [mul_add]; apply add_le_add_left; rw [eq] - rw [← sub_lt_iff_lt_add'] at ieq; nth_rw 1 [← one_mul (‖z - v‖)] at ieq + apply mul_le_mul_of_nonneg_left + rw [add_comm]; apply norm_le_norm_add_norm_sub' + exact le_of_lt μpos + _ ≤ μ * ‖z - v‖ + (1 - μ) * a := by + rw [mul_add]; apply add_le_add_left; rw [eq] + rw [← sub_lt_iff_lt_add'] at ieq + nth_rw 1 [← one_mul (‖z - v‖)] at ieq rw [← sub_mul, mul_lt_mul_left μle] at ieq apply ha; simp; rw [dist_eq_norm]; simp [ieq] @@ -944,22 +971,37 @@ lemma LICQ_tendsto {x : EuclideanSpace ℝ (Fin n)} {m N : ℕ} (deriv : Tendsto ((fun x' ↦ ‖x' - x‖⁻¹ * ‖Rz x' - Rz x - Mx (x' - x)‖) ∘ d) atTop (𝓝 0)) : Tendsto (fun i : ℕ ↦ (i : ℝ) • (d i - x)) atTop (𝓝 v) := by have dne : ∀ i ≥ N.succ, d i ≠ x := by - contrapose! hfd; rcases hfd with ⟨i, igeN, dieq⟩; simp at igeN - use i; constructor + contrapose! hfd + rcases hfd with ⟨i, igeN, dieq⟩ + simp at igeN + refine ⟨i, ?_, ?_⟩ · simp; linarith [igeN] - · rw [dieq, Rxeq0, Rteq]; symm; rw [smul_ne_zero_iff]; simp; constructor - · linarith [Nat.lt_of_add_one_le igeN] - · contrapose! vne0; apply Mxbij.1; rw [vne0]; simp + · rw [dieq, Rxeq0, Rteq]; symm + rw [smul_ne_zero_iff]; constructor + · have hi_pos : (0 : ℕ) < i := Nat.zero_lt_of_lt igeN + have hi0 : (i : ℝ) ≠ 0 := by + have : (0 : ℝ) < (i : ℝ) := by + exact_mod_cast hi_pos + exact ne_of_gt this + exact one_div_ne_zero hi0 + · contrapose! vne0 + apply Mxbij.1 + rw [vne0]; simp have eq1 : ((fun x' ↦ ‖x' - x‖⁻¹ * ‖Rz x' - Rz x - Mx (x' - x)‖) ∘ d) = - fun i : ℕ ↦ ‖d i - x‖⁻¹ * ‖Rz (d i) - Rz x - Mx (d i - x)‖ := by ext i; simp + fun i : ℕ ↦ ‖d i - x‖⁻¹ * ‖Rz (d i) - Rz x - Mx (d i - x)‖ := by + ext i; simp have eq2 : (fun i : ℕ ↦ ‖d i - x‖⁻¹ * ‖Rz (d i) - Rz x - Mx (d i - x)‖) = fun i : ℕ ↦ ‖d i - x‖⁻¹ * ‖Rz (d i) - Rt (1 / (i : ℝ)) - Rz x - Mx (d i - x - (1 / (i : ℝ)) • v)‖ := by - ext i; rw [Rteq]; simp; left + ext i + rw [Rteq]; simp + left rw [sub_right_comm _ _ (Rz x), sub_sub (Rz (d i) - Rz x), add_comm, sub_add_cancel] have eq3 : (fun i : ℕ ↦ ‖d i - x‖⁻¹ * ‖Rz (d i) - Rt (1 / (i : ℝ)) - Rz x - Mx (d i - x - (1 / (i : ℝ)) • v)‖) - =ᶠ[atTop] (fun i : ℕ ↦ ‖d i - x‖⁻¹ * ‖Mx (d i - x - (1 / (i : ℝ)) • v)‖) := by - rw [EventuallyEq, eventually_atTop]; use N - intro i igeN; specialize hfd i igeN + =ᶠ[atTop] (fun i : ℕ ↦ ‖d i - x‖⁻¹ * ‖Mx (d i - x - (1 / (i : ℝ)) • v)‖) := by + rw [EventuallyEq, eventually_atTop] + refine ⟨N, ?_⟩ + intro i igeN + specialize hfd i igeN rw [hfd, Rxeq0, sub_self, zero_sub, neg_zero, zero_sub, norm_neg] rw [eq1, eq2] at deriv obtain deriv := Filter.Tendsto.congr' eq3 deriv @@ -970,23 +1012,46 @@ lemma LICQ_tendsto {x : EuclideanSpace ℝ (Fin n)} {m N : ℕ} ext i; simp [NMx, deriv']; rw [norm_smul]; simp rw [eq4] at deriv have comap_le : Filter.comap NMx (𝓝 0) ≤ (𝓝 0) := by - simp only [NMx]; exact comap1 v vne0 Mxbij + simp only [NMx] + exact comap1 v vne0 Mxbij obtain lim := Tendsto.of_tendsto_comp deriv comap_le let φ : EuclideanSpace ℝ (Fin n) → EuclideanSpace ℝ (Fin n) := fun z ↦ if (‖z‖ = 0) then v else ‖z‖⁻¹ • (z - v) have eq5 : deriv' =ᶠ[atTop] φ ∘ (fun i : ℕ ↦ (i : ℝ) • (d i - x)) := by rw [EventuallyEq, eventually_atTop] - have : 0 < ‖v‖ := by refine lt_of_le_of_ne (norm_nonneg v) ?_; symm; simp [vne0] - specialize dtend ‖v‖ this; rcases dtend with ⟨N₁, dtend⟩ - use max N₁ N.succ; intro i igeN; simp only [ge_iff_le, max_le_iff] at igeN + have : 0 < ‖v‖ := by + refine lt_of_le_of_ne (norm_nonneg v) ?_ + symm; simp [vne0] + rcases dtend ‖v‖ this with ⟨N₁, dtend⟩ + refine ⟨max N₁ N.succ, ?_⟩ + intro i igeN + simp only [ge_iff_le, max_le_iff] at igeN specialize dtend i igeN.1 have neq : ‖(i : ℝ) • (d i - x)‖ ≠ 0 := by - rw [norm_smul]; apply mul_ne_zero; simp; linarith [Nat.lt_of_add_one_le igeN.2] - specialize dne i igeN.2; simp; apply sub_ne_zero_of_ne dne - field_simp [deriv', φ, neq]; apply eq_lemma neq + rw [norm_smul] + apply mul_ne_zero + · have i_pos : 0 < i := lt_of_lt_of_le (Nat.succ_pos N) igeN.2 + have hi_real_ne : (i : ℝ) ≠ 0 := by + have : (0 : ℝ) < (i : ℝ) := by exact_mod_cast i_pos + exact ne_of_gt this + simpa [norm_eq_zero] using hi_real_ne + · have hdiff_ne : d i - x ≠ 0 := by + exact sub_ne_zero.mpr (dne i igeN.2) + simpa [norm_eq_zero] using hdiff_ne + have i_ne_zero : i ≠ 0 := by + exact ne_of_gt (lt_of_lt_of_le (Nat.succ_pos N) igeN.2) + refine (calc + deriv' i + = (1 / ‖d i - x‖) • (d i - x - (1 / (i : ℝ)) • v) := by + simp [deriv', one_div] + _ = (1 / ‖(i : ℝ) • (d i - x)‖) • ((i : ℝ) • (d i - x) - v) := by + simpa using eq_lemma (y := d i - x) (z := v) (k := i) neq + _ = φ ((i : ℝ) • (d i - x)) := by + simp [φ, one_div]; grind) obtain lim' := Filter.Tendsto.congr' eq5 lim refine Filter.Tendsto.of_tendsto_comp lim' ?_ - simp only [φ]; exact comap2 vne0 + simp only [φ] + exact comap2 vne0 /- Linearized feasible directions equal tagent cone when LICQ holds @@ -1032,11 +1097,11 @@ theorem LICQ_linearized_feasible_directions_sub_posTangentCone fun z ↦ (fun i ↦ gradc z i) -- compose the gradient matrix let Jz : EuclideanSpace ℝ (Fin n) → EuclideanSpace ℝ (Fin n) →L[ℝ] EuclideanSpace ℝ (p.active_set x) := fun z ↦ (LinearMap.toContinuousLinearMap (toEuclideanLin (A z))) -- change the Jacobi into linear transformation - have cgrad_atx : Jz x = (LinearMap.toContinuousLinearMap (toEuclideanLin Ax)) := by simp [Jz, A, gradc] -- A x = Ax + have cgrad_atx : Jz x = (LinearMap.toContinuousLinearMap (toEuclideanLin Ax)) := by simp [Jz, A, gradc]; rfl -- A x = Ax have Rzgrad : HasStrictFDerivAt Rz Mx x := by - simp only [Rz, Ax] - apply HasStrictFDerivAt.prod + simp only [Rz] + apply HasStrictFDerivAt.prodMk · rw [← cgrad_atx] rw [hasStrictFDerivAt_euclidean] refine LICQ_strictfderiv_Ax_elem c ?_ gradc ?_ A ?_ Jz ?_ conte conti @@ -1044,12 +1109,15 @@ theorem LICQ_linearized_feasible_directions_sub_posTangentCone · let N : EuclideanSpace ℝ (Fin n) →L[ℝ] (Fin (n - m) → ℝ) := (LinearMap.toContinuousLinearMap (toEuclideanLin Zᵀ)) show HasStrictFDerivAt (fun y : EuclideanSpace ℝ (Fin n) ↦ Zᵀ *ᵥ (y - x)) N x - rw [HasStrictFDerivAt] - have aux : (fun p : EuclideanSpace ℝ (Fin n) × EuclideanSpace ℝ (Fin n) - ↦ Zᵀ *ᵥ (p.1 - x) - Zᵀ *ᵥ (p.2 - x) - N (p.1 - p.2)) = 0 := by - ext y j; rw [← mulVec_sub, sub_sub, add_sub_cancel]; rw [mulVec_eq_toEuclidean] - simp [N]; apply sub_eq_zero_of_eq; tauto - rw [aux]; simp + rw [hasStrictFDerivAt_iff_isLittleO] + have aux : + (fun p : EuclideanSpace ℝ (Fin n) × EuclideanSpace ℝ (Fin n) ↦ + Zᵀ *ᵥ (p.1 - x) - Zᵀ *ᵥ (p.2 - x) - N (p.1 - p.2)) = 0 := by + ext y j + rw [← mulVec_sub, sub_sub, add_sub_cancel, mulVec_eq_toEuclidean] + simp [N]; grind + rw [aux] + simp have Rxeq0 : Rz x = 0 := by simp [Rz, c]; ext i; @@ -1073,7 +1141,7 @@ theorem LICQ_linearized_feasible_directions_sub_posTangentCone refine LICQ_injM z m Z Ax ?_ mlen ?_ eq2 eq1 ⟨heq1, heq2⟩ simp [m] obtain hAx := LICQ_Axfullrank x LIx; simp at hAx - show Ax.rank = (active_set x).card; apply hAx; simp only [Ax] + show Ax.rank = (active_set x).card; apply hAx; simp only [Ax]; rfl have Mxsurj : LinearMap.range Mx = ⊤ := by show LinearMap.range (Mx : EuclideanSpace ℝ (Fin n) →ₗ[ℝ] EuclideanSpace ℝ (p.active_set x) × (Fin (n - m) → ℝ)) = ⊤ rw [← LinearMap.ker_eq_bot_iff_range_eq_top_of_finrank_eq_finrank] @@ -1101,19 +1169,27 @@ theorem LICQ_linearized_feasible_directions_sub_posTangentCone use (fun n ↦ d n - x); constructor · use max N N'; intro nn hnn; simp [FeasSet, FeasPoint] - specialize hfd nn (le_of_max_le_left hnn); simp [Rz, Rt, Mx] at hfd; rw [← mulVec_eq_toEuclidean] at hfd - rcases hfd with ⟨hv1, hv2⟩ - have Axeq : (nn : ℝ)⁻¹ • Ax *ᵥ v = fun i : (p.active_set x) ↦ ((nn : ℝ)⁻¹ * (gradc x i) ⬝ᵥ v) := by - simp [Ax]; ext i; simp; left; simp [mulVec] - have Axroweq : ∀ i : (p.active_set x), c (d nn) i = (nn : ℝ)⁻¹ * (gradc x i) ⬝ᵥ v := by - rw [Axeq] at hv1; simp [hv1] + have hfd_nn := hfd nn (le_of_max_le_left hnn) + simp [Rz, Rt, Mx] at hfd_nn; rw [← mulVec_eq_toEuclidean] at hfd_nn + rcases hfd_nn with ⟨hv1, hv2⟩ + have Axeq : (nn : ℝ)⁻¹ • Ax *ᵥ v = + fun i => (nn : ℝ)⁻¹ * ⟪gradc x i, v⟫_ℝ := by + ext i + have hdot : (Ax *ᵥ v) i = ⟪gradc x i, v⟫_ℝ := by + simp [Ax, Matrix.mulVec, dotProduct, EuclideanSpace.inner_eq_star_dotProduct, mul_comm] + simp [hdot] + have Axroweq : ∀ i : (p.active_set x), c (d nn) i = (nn : ℝ)⁻¹ * ⟪gradc x i, v⟫_ℝ := by + intro i + have : c (d nn) = (nn : ℝ)⁻¹ • Ax *ᵥ v := hv1 + rw [this, Axeq] constructor; constructor · rw [hdomain]; simp · intro i hi - have iina : i ∈ (p.active_set x) := by simp [active_set, hi] obtain h := hvh1 i hi - obtain eq := Axroweq ⟨i, iina⟩; simp [c, hi, gradc] at eq - rw [eq]; simp; right; apply h + obtain eq := Axroweq ⟨i, by simp [active_set, hi]⟩ + simp [c, hi, gradc] at eq + rw [eq] + exact mul_eq_zero.mpr (Or.inr h) constructor · rw [hdomain]; simp · intro j hj @@ -1124,13 +1200,20 @@ theorem LICQ_linearized_feasible_directions_sub_posTangentCone by_cases hj1 : j ∈ p.active_set x · have jin : j ∈ σ ∩ (p.active_set x) := by simp [hj1, hj] obtain h := hvh2 j jin - obtain eq := Axroweq ⟨j, hj1⟩; simp [c, hj1, notin, gradc] at eq - rw [eq]; field_simp - rw [div_nonneg_iff]; left; simp at h; simp [dotProduct, h] + obtain eq := Axroweq ⟨j, hj1⟩ + simp [c, notin, gradc] at eq + rw [eq] + have inv_nonneg : 0 ≤ (nn : ℝ)⁻¹ := by + have : 0 ≤ (nn : ℝ) := by exact_mod_cast (Nat.zero_le nn) + exact inv_nonneg.mpr this + have : 0 ≤ (nn : ℝ)⁻¹ * ⟪gradient (p.inequality_constraints j) x, v⟫_ℝ := + mul_nonneg inv_nonneg h + simpa [gradc, notin, hj1] · specialize inactive j; simp [hj, hj1] at inactive specialize inactive (d nn) specialize dtendx nn (le_of_max_le_right hnn); rw [← dist_eq_norm] at dtendx - specialize inactive dtendx; linarith [inactive] + have hpos : 0 < p.inequality_constraints j (d nn) := inactive dtendx + exact le_of_lt hpos constructor · exact tendsto_natCast_atTop_atTop @@ -1179,9 +1262,9 @@ lemma subtype_sum (σ τ : Finset ℕ) (f : σ → EuclideanSpace ℝ (Fin n)) have : ∑ i, g i = ∑ i : {x // x ∈ σ ∩ τ}, f {val := i.1, property := by obtain hi := i.2; rw [Finset.mem_inter] at hi; exact hi.1} := by congr; ext i; rw [h2] - rw [this]; simp [h3] + rw [this]; simp let f₁ : ℕ → EuclideanSpace ℝ (Fin n):= fun i => if h : i ∈ σ then f ⟨i, h⟩ else 0 - have eq1 : ∑ i ∈ σ.attach, f i = ∑ i in σ, f₁ i := by + have eq1 : ∑ i ∈ σ.attach, f i = ∑ i ∈ σ, f₁ i := by simp [f₁]; nth_rw 2 [← Finset.sum_attach]; congr; simp have eq2 : ∑ i ∈ (σ ∩ τ).attach, f {val := i.1, property := by obtain hi := i.2; rw [Finset.mem_inter] at hi; exact hi.1} = @@ -1198,6 +1281,8 @@ lemma subtype_sum (σ τ : Finset ℕ) (f : σ → EuclideanSpace ℝ (Fin n)) apply Finset.sum_eq_zero feq0 apply Finset.disjoint_sdiff_inter σ τ +open Constrained_OptimizationProblem Topology InnerProductSpace Set Filter + theorem first_order_neccessary_general (p1 : Constrained_OptimizationProblem (EuclideanSpace ℝ (Fin n)) τ σ) (loc : EuclideanSpace ℝ (Fin n)) (hl : p1.Local_Minimum loc) (hf : Differentiable ℝ p1.objective) @@ -1221,54 +1306,95 @@ theorem first_order_neccessary_general (p1 : Constrained_OptimizationProblem (Eu mu {val := i.1, property := by simp [k]} else 0 use lam; use mu1; constructor · unfold Lagrange_function - rw [gradient_sub, gradient_sub, gradient_sum, gradient_sum]; rw [h2] - rw [sub_sub, ← sub_add_sub_comm]; - have : ∑ x , lam x • gradient (p1.equality_constraints x) loc - ∑ i, - gradient (fun m => lam i * p1.equality_constraints i m) loc = 0 := by - rw [← Finset.sum_sub_distrib]; apply Finset.sum_eq_zero - intro i _; rw [gradient_const_mul']; simp - exact (he1 i i.2) - rw [this, zero_add, sub_eq_zero]; symm; - have : ∑ i, gradient (fun m => mu1 i * p1.inequality_constraints (↑i) m) loc = - ∑ i, mu1 i • gradient (p1.inequality_constraints (↑i)) loc := by - congr; ext i; rw [← gradient_const_mul']; exact (hc1 i i.2) - rw [this]; - let f := fun i ↦ mu1 i • gradient (p1.inequality_constraints ↑i) loc - let g := fun i ↦ mu i • gradient (p1.inequality_constraints ↑i) loc - have : ∑ i, f i = ∑ i, g i := by - apply subtype_sum σ (p1.active_set loc) f g - · intro i; simp [f, g]; simp [mu1]; - obtain hi := i.2; rw [Finset.mem_inter] at hi; simp [hi.2] - intro i hi; simp [f]; left; simp [mu1, hi] - simp only [f, g] at this; exact this - intro i _; apply DifferentiableAt.const_mul; exact (hc1 i i.2) - intro i _; apply DifferentiableAt.const_mul; exact (he1 i i.2) - exact hf.differentiableAt - apply DifferentiableAt.sum; intro i _; apply DifferentiableAt.const_mul - exact (he1 i i.2) - apply DifferentiableAt.sub hf.differentiableAt - apply DifferentiableAt.sum; intro i _; apply DifferentiableAt.const_mul - exact (he1 i i.2) - apply DifferentiableAt.sum; intro i _; apply DifferentiableAt.const_mul - exact (hc1 i i.2) + have hDiffEqSum : + DifferentiableAt ℝ + (fun m : EuclideanSpace ℝ (Fin n) => + ∑ i : {x // x ∈ τ}, + lam i * p1.equality_constraints (↑i) m) loc := by + change + DifferentiableAt ℝ + (fun m : EuclideanSpace ℝ (Fin n) => + ∑ i ∈ (Finset.univ : Finset {x // x ∈ τ}), + lam i * p1.equality_constraints (↑i) m) loc + refine DifferentiableAt.fun_sum ?_ + intro i _ + exact DifferentiableAt.const_mul (he1 i.1 i.2) (lam i) + have hDiffObjSubEq : + DifferentiableAt ℝ + (fun m : EuclideanSpace ℝ (Fin n) => + p1.objective m - ∑ i : {x // x ∈ τ}, lam i * p1.equality_constraints (↑i) m) loc := by + have := hf loc + have := this.sub hDiffEqSum + simpa using this + have hDiffIneqSum : + DifferentiableAt ℝ + (fun m : EuclideanSpace ℝ (Fin n) => + ∑ j : {x // x ∈ σ}, + mu1 j * p1.inequality_constraints (↑j) m) loc := by + change + DifferentiableAt ℝ + (fun m : EuclideanSpace ℝ (Fin n) => + ∑ j ∈ (Finset.univ : Finset {x // x ∈ σ}), + mu1 j * p1.inequality_constraints (↑j) m) loc + refine DifferentiableAt.fun_sum ?_ + intro j _ + exact DifferentiableAt.const_mul (hc1 j.1 j.2) (mu1 j) + rw [gradient_sub, gradient_sub] + · + have h_grad_sum_eq : gradient (fun m ↦ ∑ i, lam i * p1.equality_constraints (↑i) m) loc = + ∑ i, gradient (fun m ↦ lam i * p1.equality_constraints (↑i) m) loc := by + apply gradient_sum + intro i _; exact DifferentiableAt.const_mul (he1 i.1 i.2) (lam i) + rw [h_grad_sum_eq] + have h_grad_sum_ineq : gradient (fun m ↦ ∑ j, mu1 j * p1.inequality_constraints (↑j) m) loc = + ∑ j, gradient (fun m ↦ mu1 j * p1.inequality_constraints (↑j) m) loc := by + apply gradient_sum + intro i _; exact DifferentiableAt.const_mul (hc1 i.1 i.2) (mu1 i) + rw [h_grad_sum_ineq, h2] + rw [sub_sub, ← sub_add_sub_comm] + have : ∑ x, lam x • gradient (p1.equality_constraints x) loc - + ∑ i, gradient (fun m => lam i * p1.equality_constraints i m) loc = 0 := by + rw [← Finset.sum_sub_distrib]; apply Finset.sum_eq_zero + intro i _; rw [gradient_const_mul']; simp; exact (he1 i.1 i.2) + rw [this, zero_add, sub_eq_zero]; symm + have : ∑ i, gradient (fun m => mu1 i * p1.inequality_constraints (↑i) m) loc = + ∑ i, mu1 i • gradient (p1.inequality_constraints (↑i)) loc := by + congr; ext i; rw [← gradient_const_mul']; exact (hc1 i i.property) + rw [this] + let f := fun i ↦ mu1 i • gradient (p1.inequality_constraints ↑i) loc + let g := fun i ↦ mu i • gradient (p1.inequality_constraints ↑i) loc + have : ∑ i, f i = ∑ i, g i := by + apply subtype_sum σ (p1.active_set loc) f g + · intro i; simp [f, g, mu1]; + obtain hi := i.2; rw [Finset.mem_inter] at hi; simp [hi.2] + · intro i hi; simp [f, mu1, hi] + simp only [f, g] at this; exact this + · exact hf loc + · exact hDiffEqSum + · exact hDiffObjSubEq + · exact hDiffIneqSum constructor · intro j; simp [mu1] - by_cases ht : j.1 ∈ p1.active_set loc - · simp [ht]; exact h1 {val := j, property :=by simp [ht]} - simp [ht] - intro j; simp [mu1] - by_cases ht : j.1 ∈ p1.active_set loc - · simp [ht]; right; - have heq : j.1 ∈ σ ∩ p1.active_set loc := by simp [ht] - unfold active_set at heq - simp at heq - rcases heq with hl | hl - · obtain neq := p1.eq_ine_not_intersect - exfalso; - apply absurd neq; push_neg; - apply Finset.ne_empty_of_mem (a := j.1) (by simp [hl]) - exact hl - simp [ht] + by_cases h : j.1 ∈ p1.active_set loc + · simp [h]; exact h1 {val := j.1, property := by simp [h]} + · simp [h] + · intro j; simp [mu1] + by_cases h : j.1 ∈ p1.active_set loc + · simp [h] + unfold active_set at h + rw [Finset.mem_union] at h + cases h with + | inl h_eq => + exfalso + have : j.1 ∈ τ ∧ j.1 ∈ σ := ⟨h_eq, j.2⟩ + have : j.1 ∈ τ ∩ σ := Finset.mem_inter.mpr this + rw [p1.eq_ine_not_intersect] at this + simp at this + | inr h_ineq => + rw [Finset.mem_filter] at h_ineq + right + exact h_ineq.2 + · simp [h] /- Karush–Kuhn–Tucker conditions @@ -1291,20 +1417,20 @@ end Constrained_OptimizationProblem_property_finite_dimensional section Constrained_OptimizationProblem_property_linear -open Constrained_OptimizationProblem Topology InnerProductSpace Set Filter Tendsto Matrix +open Constrained_OptimizationProblem Topology InnerProductSpace Set Filter Matrix variable {n : ℕ} {x : EuclideanSpace ℝ (Fin n)} variable {τ σ : Finset ℕ} {p : Constrained_OptimizationProblem (EuclideanSpace ℝ (Fin n)) τ σ} theorem LinearCQ_linear_constraint_eq (x : EuclideanSpace ℝ (Fin n)) (Lx : p.LinearCQ x) : - ∀ i ∈ τ, ∃ a, ∃ b, (equality_constraints p i) = fun y ↦ (inner a y : ℝ) + b := by + ∀ i ∈ τ, ∃ a, ∃ b, (equality_constraints p i) = fun y ↦ ⟪a, y⟫_ℝ + b := by intro i hi simp [LinearCQ] at Lx obtain Lx := (Lx).1 i ((equality_constraint_active_set x) hi) hi exact (IsLinear_iff' _).mp Lx theorem LinearCQ_linear_constraint_gradient_eq (x : EuclideanSpace ℝ (Fin n)) (Lx : p.LinearCQ x) : - ∀ i ∈ τ, ∃ a, ∃ b, ((equality_constraints p i) = fun y ↦ (inner a y : ℝ) + b) ∧ + ∀ i ∈ τ, ∃ a, ∃ b, ((equality_constraints p i) = fun y ↦ ⟪a, y⟫_ℝ + b) ∧ gradient (equality_constraints p i) x = a := by intro i hi obtain ⟨a, b, hab⟩ := LinearCQ_linear_constraint_eq x Lx i hi @@ -1313,14 +1439,14 @@ theorem LinearCQ_linear_constraint_gradient_eq (x : EuclideanSpace ℝ (Fin n)) exact (gradient_of_inner_const x a).gradient theorem LinearCQ_linear_constraint_ineq (x : EuclideanSpace ℝ (Fin n)) (Lx : p.LinearCQ x) : - ∀ i ∈ p.active_set x ∩ σ, ∃ a, ∃ b, (inequality_constraints p i) = fun y ↦ (inner a y : ℝ) + b := by + ∀ i ∈ p.active_set x ∩ σ, ∃ a, ∃ b, (inequality_constraints p i) = fun y ↦ ⟪a, y⟫_ℝ + b := by intro i hi - simp only [LinearCQ, and_imp] at Lx + simp only [LinearCQ] at Lx obtain Lx := (Lx).2 i hi exact (IsLinear_iff' _).mp Lx theorem LinearCQ_linear_constraint_gradient_ineq (x : EuclideanSpace ℝ (Fin n)) (Lx : p.LinearCQ x) : - ∀ i ∈ p.active_set x ∩ σ, ∃ a, ∃ b, ((inequality_constraints p i) = fun y ↦ (inner a y : ℝ) + b) ∧ + ∀ i ∈ p.active_set x ∩ σ, ∃ a, ∃ b, ((inequality_constraints p i) = fun y ↦ ⟪a, y⟫_ℝ + b) ∧ gradient (inequality_constraints p i) x = a := by intro i hi obtain ⟨a, b, hab⟩ := LinearCQ_linear_constraint_ineq x Lx i hi @@ -1355,7 +1481,7 @@ theorem Linear_linearized_feasible_directions_eq_posTangentCone obtain ⟨a, c, ⟨hab, hg⟩⟩ := LinearCQ_linear_constraint_gradient_eq x Lx i hi simp [FeasSet, FeasPoint] at xf obtain ⟨⟨_, h2⟩, ⟨_, _⟩⟩ := xf - obtain h2 := h2 i hi; rw [← h2]; rw [hab]; simp only [RCLike.inner_apply, conj_trivial] + obtain h2 := h2 i hi; rw [← h2]; rw [hab]; simp only have : ⟪a, z n⟫_ℝ = 0 := by obtain hv1 := hv1 i hi rw [hg] at hv1 @@ -1378,7 +1504,7 @@ theorem Linear_linearized_feasible_directions_eq_posTangentCone have : j ∈ τ ∩ σ := by simp [hj, hh] rw [p.eq_ine_not_intersect] at this; tauto simp [this] at hj1 - rw [← hj1.2, hab]; simp only [RCLike.inner_apply, conj_trivial] + rw [← hj1.2, hab]; simp only rw [inner_add_right] linarith simp [z] @@ -1389,12 +1515,12 @@ theorem Linear_linearized_feasible_directions_eq_posTangentCone linarith constructor · apply Filter.Tendsto.atTop_div_const ht_ - apply tendsto_atTop_add_nonneg_right' + refine Tendsto.atTop_add_nonneg ?_ ?_ · exact tendsto_natCast_atTop_atTop - apply Filter.Eventually.of_forall; exact fun x ↦ zero_le_one' ℝ + exact fun x ↦ zero_le_one' ℝ apply tendsto_atTop_of_eventually_const (i₀ := 1) intro i hi; simp [c, z] - rw [smul_smul]; field_simp + rw [smul_smul]; field_simp; simp theorem first_order_neccessary_LinearCQ (p1 : Constrained_OptimizationProblem (EuclideanSpace ℝ (Fin n)) τ σ) diff --git a/Optlib/Optimality/OptimalityConditionOfUnconstrainedProblem.lean b/Optlib/Optimality/OptimalityConditionOfUnconstrainedProblem.lean index 757ec1e..7ba257f 100644 --- a/Optlib/Optimality/OptimalityConditionOfUnconstrainedProblem.lean +++ b/Optlib/Optimality/OptimalityConditionOfUnconstrainedProblem.lean @@ -18,7 +18,7 @@ open Set InnerProductSpace x with d is less than zero. -/ def DescentDirection (d : E) (x : E) (_ : HasGradientAt f (f' x) x) : Prop := - inner (f' x) d < (0 : ℝ) + ⟪f' x, d⟫_ℝ < (0 : ℝ) /- For any vector d, there does not exist a descent direction for the function f @@ -30,19 +30,19 @@ theorem optimal_no_descent_direction (hf : ∀ x : E, HasGradientAt f (f' x) x) intro d by_contra h have : ∃ t : ℝ , f (xm + t • d) < f xm := by - have h₁ : ∃ T : ℝ , T > 0 ∧ (∀ a ∈ Icc (- T) T, inner (f' (xm + a • d)) d < (0 : ℝ)) := by - let g := fun r : ℝ ↦ (inner (f' (xm + r • d)) d : ℝ) - have hg0 : g 0 = inner (f' xm) d := by simp [g] + have h₁ : ∃ T : ℝ , T > 0 ∧ (∀ a ∈ Icc (- T) T, ⟪f' (xm + a • d), d⟫_ℝ < (0 : ℝ)) := by + let g := fun r : ℝ ↦ (⟪f' (xm + r • d), d⟫_ℝ : ℝ) + have hg0 : g 0 = ⟪f' xm, d⟫_ℝ := by simp [g] have hc : ContinuousOn g univ := by - simp [g] + change ContinuousOn (fun r : ℝ => ⟪f' (xm + r • d), d⟫_ℝ) univ apply ContinuousOn.inner · apply ContinuousOn.comp hfc · apply ContinuousOn.add continuousOn_const apply ContinuousOn.smul continuousOn_id continuousOn_const - · simp + · intro _ _; simp · exact continuousOn_const - have hu : ∃ u < (0 : ℝ) , inner (f' xm) d ≤ u := by - use (inner (f' xm) d / 2) + have hu : ∃ u < (0 : ℝ) , ⟪f' xm, d⟫_ℝ ≤ u := by + use (⟪f' xm, d⟫_ℝ / 2) rw [DescentDirection] at h constructor · linarith @@ -51,7 +51,7 @@ theorem optimal_no_descent_direction (hf : ∀ x : E, HasGradientAt f (f' x) x) rw [← hg0] at hu2 have hc' : ∃ T, T > 0 ∧ (∀ a ∈ Icc (- T) T, g a < 0) := by have : univ ∈ nhds (0 : ℝ) := by simp - rcases continuous (ContinuousOn.continuousAt hc this) with h1 + rcases Metric.continuousAt_iff.mp (ContinuousOn.continuousAt hc this) with h1 specialize h1 (- u / 2) (by linarith) rcases h1 with ⟨T, ⟨hT1, hT2⟩⟩ use T / 2; constructor @@ -61,10 +61,10 @@ theorem optimal_no_descent_direction (hf : ∀ x : E, HasGradientAt f (f' x) x) simp; rw [abs_lt]; simp at ha rcases ha with ⟨ha1, ha2⟩ constructor; linarith; linarith - specialize hT2 a this - simp at hT2 - rw [abs_lt] at hT2 - rcases hT2 with ⟨_, hs2⟩ + specialize hT2 this + have : |g a - g 0| < -u / 2 := hT2 + rw [abs_lt] at this + rcases this with ⟨_, hs2⟩ rw [sub_lt_iff_lt_add] at hs2 apply lt_trans hs2 · linarith @@ -72,7 +72,7 @@ theorem optimal_no_descent_direction (hf : ∀ x : E, HasGradientAt f (f' x) x) use T rcases h₁ with ⟨T, ⟨hT1,hT2⟩⟩ have h₂ : ∃ t1 : ℝ, t1 ≥ -T ∧ t1 ≤ T ∧ f (xm + T • d) = - f xm + inner (f' (xm + t1 • d)) (T • d) := by + f xm + ⟪f' (xm + t1 • d), T • d⟫_ℝ := by rcases (expansion hf xm (T • d)) with ⟨ts,⟨ts1,⟨ts2,ts3⟩⟩⟩ use (ts • T) constructor @@ -111,7 +111,7 @@ theorem first_order_unconstrained (hf : ∀ x : E, HasGradientAt f (f' x) x) (mi -/ theorem first_order_convex (hf : ∀ x : E, HasGradientAt f (f' x) x) (hcon : ConvexOn ℝ univ f) (hfm : f' xm = 0) : IsMinOn f univ xm := by - have : ∀ y , f y ≥ f xm + inner (f' xm) (y - xm) := by + have : ∀ y , f y ≥ f xm + ⟪f' xm, y - xm⟫_ℝ := by intro y apply Convex_first_order_condition' (hf xm) hcon (by trivial) · trivial diff --git a/Optlib/Optimality/Weak_Duality.lean b/Optlib/Optimality/Weak_Duality.lean index 337bcfc..1a827c9 100644 --- a/Optlib/Optimality/Weak_Duality.lean +++ b/Optlib/Optimality/Weak_Duality.lean @@ -180,7 +180,7 @@ variable {E : Type _} {τ σ : Finset ℕ} variable [NormedAddCommGroup E] [InnerProductSpace ℝ E] [CompleteSpace E] variable {p : Constrained_OptimizationProblem E τ σ} -lemma ConcaveOn.sum {α 𝕜 : Type*} [OrderedSemiring 𝕜] [AddCommMonoid α][SMul 𝕜 α] +lemma ConcaveOn.sum {α 𝕜 : Type*} [Semiring 𝕜] [PartialOrder 𝕜] [IsOrderedRing 𝕜] [AddCommMonoid α][SMul 𝕜 α] {ι : Type*} [DecidableEq ι] {s : Finset ι} {t : s → α → 𝕜} {d : Set α} (h : ∀ i : s, ConcaveOn 𝕜 d (t i)) (hd : Convex 𝕜 d): ConcaveOn 𝕜 d (fun x => ∑ i : s, t i x) := by @@ -203,14 +203,14 @@ theorem convex_problem_convex_Lagrange {p : Constrained_OptimizationProblem E τ unfold Lagrange_function apply ConvexOn.sub · apply ConvexOn.sub h - simp [hτ]; apply concaveOn_const 0 - exact convex_univ - apply ConcaveOn.sum _ convex_univ - intro i - apply ConcaveOn.smul - · unfold KKT_point at hKKT - exact hKKT.2.2.1 i - exact hi i i.2 + cases hτ + simpa using (concaveOn_const (0 : ℝ) convex_univ) + apply ConcaveOn.sum + · intro i + apply ConcaveOn.smul + · exact hKKT.2.2.1 i + · exact hi i i.2 + · exact convex_univ omit [CompleteSpace E] in theorem diff_problem_diff_Lagrange {p : Constrained_OptimizationProblem E τ σ} @@ -223,7 +223,7 @@ theorem diff_problem_diff_Lagrange {p : Constrained_OptimizationProblem E τ σ} · apply DifferentiableAt.sub · exact hf simp [hτ] - apply DifferentiableAt.sum + refine DifferentiableAt.fun_sum ?_-- DifferentiableAt.sum intro i _ apply DifferentiableAt.const_mul _ (lambda2 i) apply conti i i.2 diff --git a/lake-manifest.json b/lake-manifest.json index f0afaa7..3dc230f 100644 --- a/lake-manifest.json +++ b/lake-manifest.json @@ -1,52 +1,32 @@ {"version": "1.1.0", "packagesDir": ".lake/packages", "packages": - [{"url": "https://github.com/leanprover-community/batteries", + [{"url": "https://github.com/leanprover-community/mathlib4", "type": "git", "subDir": null, - "scope": "leanprover-community", - "rev": "31a10a332858d6981dbcf55d54ee51680dd75f18", - "name": "batteries", - "manifestFile": "lake-manifest.json", - "inputRev": "main", - "inherited": true, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/leanprover-community/quote4", - "type": "git", - "subDir": null, - "scope": "leanprover-community", - "rev": "1357f4f49450abb9dfd4783e38219f4ce84f9785", - "name": "Qq", + "scope": "", + "rev": "efcc0aa5d8cb0da159f343000d325b6f33f8942b", + "name": "mathlib", "manifestFile": "lake-manifest.json", "inputRev": "master", - "inherited": true, + "inherited": false, "configFile": "lakefile.lean"}, - {"url": "https://github.com/leanprover-community/aesop", + {"url": "https://github.com/leanprover-community/plausible", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "5f934891e11d70a1b86e302fdf9cecfc21e8de46", - "name": "aesop", + "rev": "9f492660e9837df43fd885a2ad05c520da9ff9f5", + "name": "plausible", "manifestFile": "lake-manifest.json", - "inputRev": "master", + "inputRev": "main", "inherited": true, "configFile": "lakefile.toml"}, - {"url": "https://github.com/leanprover-community/ProofWidgets4", + {"url": "https://github.com/leanprover-community/LeanSearchClient", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "23268f52d3505955de3c26a42032702c25cfcbf8", - "name": "proofwidgets", - "manifestFile": "lake-manifest.json", - "inputRev": "v0.0.44", - "inherited": true, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/leanprover/lean4-cli", - "type": "git", - "subDir": null, - "scope": "leanprover", - "rev": "2cf1030dc2ae6b3632c84a09350b675ef3e347d0", - "name": "Cli", + "rev": "99657ad92e23804e279f77ea6dbdeebaa1317b98", + "name": "LeanSearchClient", "manifestFile": "lake-manifest.json", "inputRev": "main", "inherited": true, @@ -55,81 +35,61 @@ "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "984d7ee170b75d6b03c0903e0b750ee2c6d1e3fb", + "rev": "90f3b0f429411beeeb29bbc248d799c18a2d520d", "name": "importGraph", "manifestFile": "lake-manifest.json", "inputRev": "main", "inherited": true, "configFile": "lakefile.toml"}, - {"url": "https://github.com/leanprover-community/LeanSearchClient", + {"url": "https://github.com/leanprover-community/ProofWidgets4", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "7bedaed1ef024add1e171cc17706b012a9a37802", - "name": "LeanSearchClient", + "rev": "556caed0eadb7901e068131d1be208dd907d07a2", + "name": "proofwidgets", "manifestFile": "lake-manifest.json", - "inputRev": "main", + "inputRev": "v0.0.74", "inherited": true, - "configFile": "lakefile.toml"}, - {"url": "https://github.com/leanprover-community/plausible", + "configFile": "lakefile.lean"}, + {"url": "https://github.com/leanprover-community/aesop", "type": "git", "subDir": null, "scope": "leanprover-community", - "rev": "d212dd74414e997653cd3484921f4159c955ccca", - "name": "plausible", + "rev": "9e8de5716b162ec8983a89711a186d13ff871c22", + "name": "aesop", "manifestFile": "lake-manifest.json", - "inputRev": "main", + "inputRev": "master", "inherited": true, "configFile": "lakefile.toml"}, - {"url": "https://github.com/leanprover-community/mathlib4", - "type": "git", - "subDir": null, - "scope": "", - "rev": "d7317655e2826dc1f1de9a0c138db2775c4bb841", - "name": "mathlib", - "manifestFile": "lake-manifest.json", - "inputRev": "v4.13.0", - "inherited": false, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/acmepjz/md4lean", + {"url": "https://github.com/leanprover-community/quote4", "type": "git", "subDir": null, - "scope": "", - "rev": "5e95f4776be5e048364f325c7e9d619bb56fb005", - "name": "MD4Lean", + "scope": "leanprover-community", + "rev": "345a958916d27982d4ecb4500fba0ebb21096651", + "name": "Qq", "manifestFile": "lake-manifest.json", - "inputRev": "main", + "inputRev": "master", "inherited": true, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/fgdorais/lean4-unicode-basic", + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover-community/batteries", "type": "git", "subDir": null, - "scope": "", - "rev": "107e98b3e7603628d9bfd817b4704488d8a25e96", - "name": "UnicodeBasic", + "scope": "leanprover-community", + "rev": "e91bd23a55766cfbe2a0bad429057e998b4ed370", + "name": "batteries", "manifestFile": "lake-manifest.json", "inputRev": "main", "inherited": true, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/dupuisf/BibtexQuery", + "configFile": "lakefile.toml"}, + {"url": "https://github.com/leanprover/lean4-cli", "type": "git", "subDir": null, - "scope": "", - "rev": "bdc2fc30b1e834b294759a5d391d83020a90058e", - "name": "BibtexQuery", + "scope": "leanprover", + "rev": "b62fd39acc32da6fb8bb160c82d1bbc3cb3c186e", + "name": "Cli", "manifestFile": "lake-manifest.json", - "inputRev": "master", + "inputRev": "main", "inherited": true, - "configFile": "lakefile.lean"}, - {"url": "https://github.com/leanprover/doc-gen4.git", - "type": "git", - "subDir": null, - "scope": "", - "rev": "c2156beadb1a4d049ff3b19fe396c5403025aac5", - "name": "«doc-gen4»", - "manifestFile": "lake-manifest.json", - "inputRev": "c2156beadb1a4d049ff3b19fe396c5403025aac5", - "inherited": false, - "configFile": "lakefile.lean"}], + "configFile": "lakefile.toml"}], "name": "optlib", "lakeDir": ".lake"} diff --git a/lakefile.lean b/lakefile.lean index 26f93ec..b4409ab 100644 --- a/lakefile.lean +++ b/lakefile.lean @@ -10,7 +10,7 @@ package optlib where @[default_target] lean_lib Optlib where -require mathlib from git "https://github.com/leanprover-community/mathlib4"@"v4.13.0" +require mathlib from git "https://github.com/leanprover-community/mathlib4"@"master" meta if get_config? env = some "CI_BUILD" then require «doc-gen4» from git