From 9f724c50e2ac3c395e1309498f2667599b95bd0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krsto=20Prorokovi=C4=87?= Date: Wed, 30 Oct 2024 16:48:46 +0100 Subject: [PATCH 01/13] Set default optimizer to :adam --- lib/scholar/linear/logistic_regression.ex | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/scholar/linear/logistic_regression.ex b/lib/scholar/linear/logistic_regression.ex index c49887e2..2d8c2b60 100644 --- a/lib/scholar/linear/logistic_regression.ex +++ b/lib/scholar/linear/logistic_regression.ex @@ -34,7 +34,7 @@ defmodule Scholar.Linear.LogisticRegression do ], optimizer: [ type: {:custom, Scholar.Options, :optimizer, []}, - default: :sgd, + default: :adam, doc: """ The optimizer name or {init, update} pair of functions (see `Polaris.Optimizers` for more details). """ From 045684f32116c861f3474049fe0025ac21b87f67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krsto=20Prorokovi=C4=87?= Date: Wed, 30 Oct 2024 16:49:28 +0100 Subject: [PATCH 02/13] Remove :mode from docs --- lib/scholar/linear/logistic_regression.ex | 4 ---- 1 file changed, 4 deletions(-) diff --git a/lib/scholar/linear/logistic_regression.ex b/lib/scholar/linear/logistic_regression.ex index 2d8c2b60..0b232c2f 100644 --- a/lib/scholar/linear/logistic_regression.ex +++ b/lib/scholar/linear/logistic_regression.ex @@ -68,10 +68,6 @@ defmodule Scholar.Linear.LogisticRegression do * `:bias` - Bias added to the decision function. - * `:mode` - Indicates whether the problem is binary classification (`:num_classes` set to 2) - or multinomial (`:num_classes` is bigger than 2). For binary classification set to `:binary`, otherwise - set to `:multinomial`. - ## Examples iex> x = Nx.tensor([[1.0, 2.0], [3.0, 2.0], [4.0, 7.0]]) From eec4ebf3d09ebaf1c7aab3d40bf5891d34c6c5db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krsto=20Prorokovi=C4=87?= Date: Thu, 31 Oct 2024 10:11:04 +0100 Subject: [PATCH 03/13] Add average reduction in loss computation (bug fix) --- lib/scholar/linear/logistic_regression.ex | 25 +++++++++++------------ 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/lib/scholar/linear/logistic_regression.ex b/lib/scholar/linear/logistic_regression.ex index 0b232c2f..92a84211 100644 --- a/lib/scholar/linear/logistic_regression.ex +++ b/lib/scholar/linear/logistic_regression.ex @@ -25,13 +25,6 @@ defmodule Scholar.Linear.LogisticRegression do regression. """ ], - learning_loop_unroll: [ - type: :boolean, - default: false, - doc: ~S""" - If `true`, the learning loop is unrolled. - """ - ], optimizer: [ type: {:custom, Scholar.Options, :optimizer, []}, default: :adam, @@ -91,8 +84,8 @@ defmodule Scholar.Linear.LogisticRegression do "expected x to have shape {n_samples, n_features}, got tensor with shape: #{inspect(Nx.shape(x))}" end - {n_samples, _} = Nx.shape(x) - y = LinearHelpers.validate_y_shape(y, n_samples, __MODULE__) + {num_samples, num_features} = Nx.shape(x) + y = LinearHelpers.validate_y_shape(y, num_samples, __MODULE__) opts = NimbleOptions.validate!(opts, @opts_schema) @@ -104,13 +97,12 @@ defmodule Scholar.Linear.LogisticRegression do {f1, f2} -> {f1, f2} end - n = Nx.axis_size(x, -1) num_classes = opts[:num_classes] coef = Nx.broadcast( Nx.tensor(1.0, type: to_float_type(x)), - {n, num_classes} + {num_features, num_classes} ) bias = Nx.broadcast(Nx.tensor(0, type: to_float_type(x)), {num_classes}) @@ -181,7 +173,14 @@ defmodule Scholar.Linear.LogisticRegression do defnp loss_and_grad(coeff, bias, xs, ys) do value_and_grad({coeff, bias}, fn {coeff, bias} -> - -Nx.sum(ys * log_softmax(Nx.dot(xs, coeff) + bias), axes: [-1]) + xs + |> Nx.dot(coeff) + |> Nx.add(bias) + |> log_softmax() + |> Nx.multiply(ys) + |> Nx.sum(axes: [1]) + |> Nx.negate() + |> Nx.mean() end) end @@ -242,6 +241,6 @@ defmodule Scholar.Linear.LogisticRegression do > """ defn predict_probability(%__MODULE__{coefficients: coeff, bias: bias} = _model, x) do - softmax(Nx.dot(x, [1], coeff, [0]) + bias) + softmax(Nx.dot(x, coeff) + bias) end end From b70631b797b4897cb70e6b7e770d778de8b3c361 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krsto=20Prorokovi=C4=87?= Date: Sun, 11 Jan 2026 21:56:03 +0100 Subject: [PATCH 04/13] Update --- lib/scholar/linear/logistic_regression.ex | 218 ++++++++++++------ .../linear/logistic_regression_test.exs | 49 +++- 2 files changed, 194 insertions(+), 73 deletions(-) diff --git a/lib/scholar/linear/logistic_regression.ex b/lib/scholar/linear/logistic_regression.ex index 92a84211..b8553787 100644 --- a/lib/scholar/linear/logistic_regression.ex +++ b/lib/scholar/linear/logistic_regression.ex @@ -1,12 +1,11 @@ defmodule Scholar.Linear.LogisticRegression do @moduledoc """ - Logistic regression in both binary and multinomial variants. + Multiclass logistic regression. Time complexity is $O(N * K * I)$ where $N$ is the number of samples, $K$ is the number of features, and $I$ is the number of iterations. """ import Nx.Defn import Scholar.Shared - alias Scholar.Linear.LinearHelpers @derive {Nx.Container, containers: [:coefficients, :bias]} defstruct [:coefficients, :bias] @@ -15,28 +14,44 @@ defmodule Scholar.Linear.LogisticRegression do num_classes: [ required: true, type: :pos_integer, - doc: "number of classes contained in the input tensors." + doc: "Number of output classes." ], - iterations: [ + max_iterations: [ type: :pos_integer, default: 1000, - doc: """ - number of iterations of gradient descent performed inside logistic - regression. - """ + doc: "Maximum number of gradient descent iterations to perform." ], optimizer: [ type: {:custom, Scholar.Options, :optimizer, []}, - default: :adam, + default: :sgd, + doc: """ + Optimizer name or {init, update} pair of functions (see `Polaris.Optimizers` for more details). + """ + ], + alpha: [ + type: {:custom, Scholar.Options, :non_negative_number, []}, + default: 1.0, doc: """ - The optimizer name or {init, update} pair of functions (see `Polaris.Optimizers` for more details). + Constant that multiplies the regularization term, controlling regularization strength. + If 0, no regularization is applied. """ ], - eps: [ - type: :float, - default: 1.0e-8, - doc: - "The convergence tolerance. If the `abs(loss) < size(x) * :eps`, the algorithm is considered to have converged." + l1_ratio: [ + type: {:custom, Scholar.Options, :non_negative_number, []}, + default: 0.0, + doc: """ + The Elastic-Net mixing parameter, with `0 <= l1_ratio <= 1`. + Setting `l1_ratio` to 0 gives pure L2 regularization, and setting it to 1 gives pure L1 regularization. + For values between 0 and 1, a penalty of the form `l1_ratio * L1 + (1 - l1_ratio) * L2` is used. + """ + ], + tol: [ + type: {:custom, Scholar.Options, :non_negative_number, []}, + default: 1.0e-4, + doc: """ + Convergence tolerance. If the infinity norm of the gradient is less than `:tol`, + the algorithm is considered to have converged. + """ ] ] @@ -46,9 +61,6 @@ defmodule Scholar.Linear.LogisticRegression do Fits a logistic regression model for sample inputs `x` and sample targets `y`. - Depending on number of classes the function chooses either binary - or multinomial logistic regression. - ## Options #{NimbleOptions.docs(@opts_schema)} @@ -69,26 +81,41 @@ defmodule Scholar.Linear.LogisticRegression do %Scholar.Linear.LogisticRegression{ coefficients: Nx.tensor( [ - [2.5531527996063232, -0.5531544089317322], - [-0.35652396082878113, 2.3565237522125244] + [0.09002052247524261, -0.09002052992582321], + [-0.1521512120962143, 0.1521512120962143] ] ), - bias: Nx.tensor( - [-0.28847914934158325, 0.28847917914390564] - ) + bias: Nx.tensor([-0.05300388112664223, 0.053003907203674316]) } """ deftransform fit(x, y, opts \\ []) do if Nx.rank(x) != 2 do raise ArgumentError, - "expected x to have shape {n_samples, n_features}, got tensor with shape: #{inspect(Nx.shape(x))}" + "expected x to have shape {num_samples, num_features}, got tensor with shape: #{inspect(Nx.shape(x))}" + end + + if Nx.rank(y) != 1 do + raise ArgumentError, + "expected y to have shape {num_samples}, got tensor with shape: #{inspect(Nx.shape(y))}" end {num_samples, num_features} = Nx.shape(x) - y = LinearHelpers.validate_y_shape(y, num_samples, __MODULE__) + + if Nx.axis_size(y, 0) != num_samples do + raise ArgumentError, + "expected x and y to have the same number of samples, got #{num_samples} and #{Nx.axis_size(y, 0)}" + end opts = NimbleOptions.validate!(opts, @opts_schema) + {l1_ratio, opts} = Keyword.pop!(opts, :l1_ratio) + + unless l1_ratio >= 0.0 and l1_ratio <= 1.0 do + raise ArgumentError, + "expected l1_ratio to be between 0 and 1, got: #{inspect(l1_ratio)}" + end + + type = to_float_type(x) {optimizer, opts} = Keyword.pop!(opts, :optimizer) {optimizer_init_fn, optimizer_update_fn} = @@ -101,18 +128,35 @@ defmodule Scholar.Linear.LogisticRegression do coef = Nx.broadcast( - Nx.tensor(1.0, type: to_float_type(x)), + Nx.tensor(0.0, type: type), {num_features, num_classes} ) - bias = Nx.broadcast(Nx.tensor(0, type: to_float_type(x)), {num_classes}) + bias = Nx.broadcast(Nx.tensor(0.0, type: type), {num_classes}) + + coef_optimizer_state = optimizer_init_fn.(coef) |> as_type(type) + bias_optimizer_state = optimizer_init_fn.(bias) |> as_type(type) - coef_optimizer_state = optimizer_init_fn.(coef) |> as_type(to_float_type(x)) - bias_optimizer_state = optimizer_init_fn.(bias) |> as_type(to_float_type(x)) + {alpha, opts} = Keyword.pop!(opts, :alpha) + {tol, opts} = Keyword.pop!(opts, :tol) + alpha = Nx.tensor(alpha, type: type) + l1_ratio = Nx.tensor(l1_ratio, type: type) + tol = Nx.tensor(tol, type: type) opts = Keyword.put(opts, :optimizer_update_fn, optimizer_update_fn) - fit_n(x, y, coef, bias, coef_optimizer_state, bias_optimizer_state, opts) + fit_n( + x, + y, + coef, + bias, + alpha, + l1_ratio, + tol, + coef_optimizer_state, + bias_optimizer_state, + opts + ) end deftransformp as_type(container, target_type) do @@ -127,11 +171,20 @@ defmodule Scholar.Linear.LogisticRegression do end) end - # Logistic Regression training loop - - defnp fit_n(x, y, coef, bias, coef_optimizer_state, bias_optimizer_state, opts) do + defnp fit_n( + x, + y, + coef, + bias, + alpha, + l1_ratio, + tol, + coef_optimizer_state, + bias_optimizer_state, + opts + ) do num_samples = Nx.axis_size(x, 0) - iterations = opts[:iterations] + max_iterations = opts[:max_iterations] num_classes = opts[:num_classes] optimizer_update_fn = opts[:optimizer_update_fn] @@ -141,12 +194,15 @@ defmodule Scholar.Linear.LogisticRegression do |> Nx.broadcast({num_samples, num_classes}) |> Nx.equal(Nx.iota({num_samples, num_classes}, axis: 1)) - {{final_coef, final_bias}, _} = - while {{coef, bias}, - {x, iterations, y_one_hot, coef_optimizer_state, bias_optimizer_state, - has_converged = Nx.u8(0), iter = 0}}, - iter < iterations and not has_converged do - {loss, {coef_grad, bias_grad}} = loss_and_grad(coef, bias, x, y_one_hot) + {final_coef, final_bias, _} = + while {coef, bias, + {x, y_one_hot, max_iterations, alpha, l1_ratio, tol, coef_optimizer_state, + bias_optimizer_state, converged? = Nx.u8(0), iter = Nx.u32(0)}}, + iter < max_iterations and not converged? do + {coef_grad, bias_grad} = + grad({coef, bias}, fn {coef, bias} -> + compute_loss(coef, bias, alpha, l1_ratio, x, y_one_hot) + end) {coef_updates, coef_optimizer_state} = optimizer_update_fn.(coef_grad, coef_optimizer_state, coef) @@ -158,11 +214,12 @@ defmodule Scholar.Linear.LogisticRegression do bias = Polaris.Updates.apply_updates(bias, bias_updates) - has_converged = Nx.sum(Nx.abs(loss)) < Nx.size(x) * opts[:eps] + converged? = + Nx.reduce_max(Nx.abs(coef_grad)) < tol and Nx.reduce_max(Nx.abs(bias_grad)) < tol - {{coef, bias}, - {x, iterations, y_one_hot, coef_optimizer_state, bias_optimizer_state, has_converged, - iter + 1}} + {coef, bias, + {x, y_one_hot, max_iterations, alpha, l1_ratio, tol, coef_optimizer_state, + bias_optimizer_state, converged?, iter + 1}} end %__MODULE__{ @@ -171,17 +228,42 @@ defmodule Scholar.Linear.LogisticRegression do } end - defnp loss_and_grad(coeff, bias, xs, ys) do - value_and_grad({coeff, bias}, fn {coeff, bias} -> - xs - |> Nx.dot(coeff) - |> Nx.add(bias) - |> log_softmax() - |> Nx.multiply(ys) - |> Nx.sum(axes: [1]) - |> Nx.negate() - |> Nx.mean() - end) + defnp compute_regularization(coeff, alpha, l1_ratio) do + if alpha > 0.0 do + reg = + cond do + l1_ratio == 0.0 -> + # L2 regularization + Nx.sum(coeff * coeff) + + l1_ratio == 1.0 -> + # L1 regularization + Nx.sum(Nx.abs(coeff)) + + # Elastic-Net regularization + true -> + l1_ratio * Nx.sum(Nx.abs(coeff)) + + (1 - l1_ratio) * Nx.sum(coeff * coeff) + end + + alpha * reg + else + 0.0 + end + end + + defnp compute_loss(coeff, bias, alpha, l1_ratio, xs, ys) do + reg = compute_regularization(coeff, alpha, l1_ratio) + + xs + |> Nx.dot(coeff) + |> Nx.add(bias) + |> log_softmax() + |> Nx.multiply(ys) + |> Nx.sum(axes: [1]) + |> Nx.negate() + |> Nx.mean() + |> Nx.add(reg) end defnp log_softmax(x) do @@ -214,14 +296,16 @@ defmodule Scholar.Linear.LogisticRegression do iex> y = Nx.tensor([1, 0, 1]) iex> model = Scholar.Linear.LogisticRegression.fit(x, y, num_classes: 2) iex> Scholar.Linear.LogisticRegression.predict(model, Nx.tensor([[-3.0, 5.0]])) - #Nx.Tensor< - s32[1] - [1] - > + Nx.tensor([1]) """ defn predict(%__MODULE__{coefficients: coeff, bias: bias} = _model, x) do - inter = Nx.dot(x, [1], coeff, [0]) + bias - Nx.argmax(inter, axis: 1) + if Nx.rank(x) != 2 do + raise ArgumentError, + "expected x to have shape {n_samples, n_features}, got tensor with shape: #{inspect(Nx.shape(x))}" + end + + logits = Nx.dot(x, coeff) + bias + Nx.argmax(logits, axis: 1) end @doc """ @@ -233,14 +317,14 @@ defmodule Scholar.Linear.LogisticRegression do iex> y = Nx.tensor([1, 0, 1]) iex> model = Scholar.Linear.LogisticRegression.fit(x, y, num_classes: 2) iex> Scholar.Linear.LogisticRegression.predict_probability(model, Nx.tensor([[-3.0, 5.0]])) - #Nx.Tensor< - f32[1][2] - [ - [6.470913388456623e-11, 1.0] - ] - > + Nx.tensor([[0.10269401967525482, 0.8973060250282288]]) """ defn predict_probability(%__MODULE__{coefficients: coeff, bias: bias} = _model, x) do + if Nx.rank(x) != 2 do + raise ArgumentError, + "expected x to have shape {n_samples, n_features}, got tensor with shape: #{inspect(Nx.shape(x))}" + end + softmax(Nx.dot(x, coeff) + bias) end end diff --git a/test/scholar/linear/logistic_regression_test.exs b/test/scholar/linear/logistic_regression_test.exs index 8fc2d374..e8a630a6 100644 --- a/test/scholar/linear/logistic_regression_test.exs +++ b/test/scholar/linear/logistic_regression_test.exs @@ -45,7 +45,11 @@ defmodule Scholar.Linear.LogisticRegressionTest do y = Nx.tensor([1, 2]) assert_raise NimbleOptions.ValidationError, - "invalid value for :optimizer option: expected :optimizer to be either a valid 0-arity function in Polaris.Optimizers or a valid {init_fn, update_fn} tuple", + """ + invalid value for :optimizer option: expected :optimizer to be either \ + a valid 0-arity function in Polaris.Optimizers or a valid {init_fn, update_fn} tuple + """, + # "invalid value for :optimizer option: expected :optimizer to be either a valid 0-arity function in Polaris.Optimizers or a valid {init_fn, update_fn} tuple", fn -> LogisticRegression.fit(x, y, num_classes: 2, @@ -54,14 +58,14 @@ defmodule Scholar.Linear.LogisticRegressionTest do end end - test "when :iterations is not a positive integer" do + test "when :max_iterations is not a positive integer" do x = Nx.tensor([[1, 2], [3, 4]]) y = Nx.tensor([1, 2]) assert_raise NimbleOptions.ValidationError, - "invalid value for :iterations option: expected positive integer, got: 0", + "invalid value for :max_iterations option: expected positive integer, got: 0", fn -> - LogisticRegression.fit(x, y, num_classes: 2, iterations: 0) + LogisticRegression.fit(x, y, num_classes: 2, max_iterations: 0) end end @@ -70,7 +74,7 @@ defmodule Scholar.Linear.LogisticRegressionTest do y = Nx.tensor([1, 2]) assert_raise ArgumentError, - "expected x to have shape {n_samples, n_features}, got tensor with shape: {2}", + "expected x to have shape {num_samples, num_features}, got tensor with shape: {2}", fn -> LogisticRegression.fit(x, y, num_classes: 2) end end @@ -79,7 +83,10 @@ defmodule Scholar.Linear.LogisticRegressionTest do y = Nx.tensor([[0, 1], [1, 0]]) assert_raise ArgumentError, - "Scholar.Linear.LogisticRegression expected y to have shape {n_samples}, got tensor with shape: {2, 2}", + """ + Scholar.Linear.LogisticRegression expected y to have shape {num_samples}, \ + got tensor with shape: {2, 2} + """, fn -> LogisticRegression.fit(x, y, num_classes: 2) end end end @@ -97,4 +104,34 @@ defmodule Scholar.Linear.LogisticRegressionTest do assert pred == col_pred end end + + describe "linearly separable data" do + test "1D" do + key = Nx.Random.key(12) + {x1, key} = Nx.Random.uniform(key, -1.0, 0.0, shape: {1000, 1}) + {x2, _key} = Nx.Random.uniform(key, 0.0, 1.0, shape: {1000, 1}) + x = Nx.concatenate([x1, x2]) + y1 = Nx.broadcast(0, {1000}) + y2 = Nx.broadcast(1, {1000}) + y = Nx.concatenate([y1, y2]) + model = LogisticRegression.fit(x, y, num_classes: 2) + y_pred = LogisticRegression.predict(model, x) + accuracy = Scholar.Metrics.Classification.accuracy(y, y_pred) + assert Nx.equal(accuracy, 1) + end + + test "2D" do + key = Nx.Random.key(12) + {x1, key} = Nx.Random.uniform(key, -1.0, 0.0, shape: {1000, 2}) + {x2, _key} = Nx.Random.uniform(key, 0.0, 1.0, shape: {1000, 2}) + x = Nx.concatenate([x1, x2]) + y1 = Nx.broadcast(0, {1000}) + y2 = Nx.broadcast(1, {1000}) + y = Nx.concatenate([y1, y2]) + model = LogisticRegression.fit(x, y, num_classes: 2) + y_pred = LogisticRegression.predict(model, x) + accuracy = Scholar.Metrics.Classification.accuracy(y, y_pred) + assert Nx.equal(accuracy, 1) + end + end end From aff58fbe0b02aac103ff482bdb8f1adee94ccdd8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krsto=20Prorokovi=C4=87?= Date: Sun, 11 Jan 2026 22:06:24 +0100 Subject: [PATCH 05/13] Update --- lib/scholar/linear/logistic_regression.ex | 70 +++++++++++------------ 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/lib/scholar/linear/logistic_regression.ex b/lib/scholar/linear/logistic_regression.ex index b8553787..0db6b75f 100644 --- a/lib/scholar/linear/logistic_regression.ex +++ b/lib/scholar/linear/logistic_regression.ex @@ -126,16 +126,16 @@ defmodule Scholar.Linear.LogisticRegression do num_classes = opts[:num_classes] - coef = + w = Nx.broadcast( Nx.tensor(0.0, type: type), {num_features, num_classes} ) - bias = Nx.broadcast(Nx.tensor(0.0, type: type), {num_classes}) + b = Nx.broadcast(Nx.tensor(0.0, type: type), {num_classes}) - coef_optimizer_state = optimizer_init_fn.(coef) |> as_type(type) - bias_optimizer_state = optimizer_init_fn.(bias) |> as_type(type) + w_optimizer_state = optimizer_init_fn.(w) |> as_type(type) + b_optimizer_state = optimizer_init_fn.(b) |> as_type(type) {alpha, opts} = Keyword.pop!(opts, :alpha) {tol, opts} = Keyword.pop!(opts, :tol) @@ -148,13 +148,13 @@ defmodule Scholar.Linear.LogisticRegression do fit_n( x, y, - coef, - bias, + w, + b, alpha, l1_ratio, tol, - coef_optimizer_state, - bias_optimizer_state, + w_optimizer_state, + b_optimizer_state, opts ) end @@ -174,13 +174,13 @@ defmodule Scholar.Linear.LogisticRegression do defnp fit_n( x, y, - coef, - bias, + w, + b, alpha, l1_ratio, tol, - coef_optimizer_state, - bias_optimizer_state, + w_optimizer_state, + b_optimizer_state, opts ) do num_samples = Nx.axis_size(x, 0) @@ -194,37 +194,37 @@ defmodule Scholar.Linear.LogisticRegression do |> Nx.broadcast({num_samples, num_classes}) |> Nx.equal(Nx.iota({num_samples, num_classes}, axis: 1)) - {final_coef, final_bias, _} = - while {coef, bias, - {x, y_one_hot, max_iterations, alpha, l1_ratio, tol, coef_optimizer_state, - bias_optimizer_state, converged? = Nx.u8(0), iter = Nx.u32(0)}}, + {coef, bias, _} = + while {w, b, + {x, y_one_hot, max_iterations, alpha, l1_ratio, tol, w_optimizer_state, + b_optimizer_state, converged? = Nx.u8(0), iter = Nx.u32(0)}}, iter < max_iterations and not converged? do - {coef_grad, bias_grad} = - grad({coef, bias}, fn {coef, bias} -> - compute_loss(coef, bias, alpha, l1_ratio, x, y_one_hot) + {w_grad, b_grad} = + grad({w, b}, fn {w, b} -> + compute_loss(w, b, alpha, l1_ratio, x, y_one_hot) end) - {coef_updates, coef_optimizer_state} = - optimizer_update_fn.(coef_grad, coef_optimizer_state, coef) + {w_updates, w_optimizer_state} = + optimizer_update_fn.(w_grad, w_optimizer_state, w) - coef = Polaris.Updates.apply_updates(coef, coef_updates) + w = Polaris.Updates.apply_updates(w, w_updates) - {bias_updates, bias_optimizer_state} = - optimizer_update_fn.(bias_grad, bias_optimizer_state, bias) + {b_updates, b_optimizer_state} = + optimizer_update_fn.(b_grad, b_optimizer_state, b) - bias = Polaris.Updates.apply_updates(bias, bias_updates) + b = Polaris.Updates.apply_updates(b, bias_updates) converged? = - Nx.reduce_max(Nx.abs(coef_grad)) < tol and Nx.reduce_max(Nx.abs(bias_grad)) < tol + Nx.reduce_max(Nx.abs(w_grad)) < tol and Nx.reduce_max(Nx.abs(bias_grad)) < tol - {coef, bias, - {x, y_one_hot, max_iterations, alpha, l1_ratio, tol, coef_optimizer_state, - bias_optimizer_state, converged?, iter + 1}} + {w, b, + {x, y_one_hot, max_iterations, alpha, l1_ratio, tol, w_optimizer_state, + b_optimizer_state, converged?, iter + 1}} end %__MODULE__{ - coefficients: final_coef, - bias: final_bias + coefficients: coef, + bias: bias } end @@ -252,12 +252,12 @@ defmodule Scholar.Linear.LogisticRegression do end end - defnp compute_loss(coeff, bias, alpha, l1_ratio, xs, ys) do - reg = compute_regularization(coeff, alpha, l1_ratio) + defnp compute_loss(w, b, alpha, l1_ratio, xs, ys) do + reg = compute_regularization(w, alpha, l1_ratio) xs - |> Nx.dot(coeff) - |> Nx.add(bias) + |> Nx.dot(w) + |> Nx.add(b) |> log_softmax() |> Nx.multiply(ys) |> Nx.sum(axes: [1]) From 01e5b5d7dd6b9a671d7afffd9bf1ad2ada305f83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krsto=20Prorokovi=C4=87?= Date: Sun, 11 Jan 2026 22:23:10 +0100 Subject: [PATCH 06/13] Bug fix --- lib/scholar/linear/logistic_regression.ex | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/lib/scholar/linear/logistic_regression.ex b/lib/scholar/linear/logistic_regression.ex index 0db6b75f..d43c10d0 100644 --- a/lib/scholar/linear/logistic_regression.ex +++ b/lib/scholar/linear/logistic_regression.ex @@ -212,10 +212,10 @@ defmodule Scholar.Linear.LogisticRegression do {b_updates, b_optimizer_state} = optimizer_update_fn.(b_grad, b_optimizer_state, b) - b = Polaris.Updates.apply_updates(b, bias_updates) + b = Polaris.Updates.apply_updates(b, b_updates) converged? = - Nx.reduce_max(Nx.abs(w_grad)) < tol and Nx.reduce_max(Nx.abs(bias_grad)) < tol + Nx.reduce_max(Nx.abs(w_grad)) < tol and Nx.reduce_max(Nx.abs(b_grad)) < tol {w, b, {x, y_one_hot, max_iterations, alpha, l1_ratio, tol, w_optimizer_state, @@ -228,22 +228,22 @@ defmodule Scholar.Linear.LogisticRegression do } end - defnp compute_regularization(coeff, alpha, l1_ratio) do + defnp compute_regularization(w, alpha, l1_ratio) do if alpha > 0.0 do reg = cond do l1_ratio == 0.0 -> # L2 regularization - Nx.sum(coeff * coeff) + Nx.sum(w * w) l1_ratio == 1.0 -> # L1 regularization - Nx.sum(Nx.abs(coeff)) + Nx.sum(Nx.abs(w)) # Elastic-Net regularization true -> - l1_ratio * Nx.sum(Nx.abs(coeff)) + - (1 - l1_ratio) * Nx.sum(coeff * coeff) + l1_ratio * Nx.sum(Nx.abs(w)) + + (1 - l1_ratio) * Nx.sum(w * w) end alpha * reg From f16795dfe41c920eabb2a9b629c4266664fa0fe5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krsto=20Prorokovi=C4=87?= Date: Wed, 14 Jan 2026 15:07:35 +0100 Subject: [PATCH 07/13] Fix some unit tests --- test/scholar/linear/logistic_regression_test.exs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/test/scholar/linear/logistic_regression_test.exs b/test/scholar/linear/logistic_regression_test.exs index e8a630a6..c6ac0b2b 100644 --- a/test/scholar/linear/logistic_regression_test.exs +++ b/test/scholar/linear/logistic_regression_test.exs @@ -47,9 +47,8 @@ defmodule Scholar.Linear.LogisticRegressionTest do assert_raise NimbleOptions.ValidationError, """ invalid value for :optimizer option: expected :optimizer to be either \ - a valid 0-arity function in Polaris.Optimizers or a valid {init_fn, update_fn} tuple + a valid 0-arity function in Polaris.Optimizers or a valid {init_fn, update_fn} tuple\ """, - # "invalid value for :optimizer option: expected :optimizer to be either a valid 0-arity function in Polaris.Optimizers or a valid {init_fn, update_fn} tuple", fn -> LogisticRegression.fit(x, y, num_classes: 2, @@ -84,7 +83,7 @@ defmodule Scholar.Linear.LogisticRegressionTest do assert_raise ArgumentError, """ - Scholar.Linear.LogisticRegression expected y to have shape {num_samples}, \ + expected y to have shape {num_samples}, \ got tensor with shape: {2, 2} """, fn -> LogisticRegression.fit(x, y, num_classes: 2) end From dc280a6c1327a0dda1260c6f4843aeaa48342a74 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krsto=20Prorokovi=C4=87?= Date: Wed, 14 Jan 2026 19:32:03 +0100 Subject: [PATCH 08/13] Update tests --- test/scholar/linear/logistic_regression_test.exs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/scholar/linear/logistic_regression_test.exs b/test/scholar/linear/logistic_regression_test.exs index c6ac0b2b..69cda7be 100644 --- a/test/scholar/linear/logistic_regression_test.exs +++ b/test/scholar/linear/logistic_regression_test.exs @@ -10,7 +10,7 @@ defmodule Scholar.Linear.LogisticRegressionTest do res = LogisticRegression.predict(model, x_test) accuracy = Scholar.Metrics.Classification.accuracy(res, y_test) - assert Nx.greater_equal(accuracy, 0.96) == Nx.u8(1) + assert Nx.to_number(accuracy) >= 0.96 end describe "errors" do @@ -84,7 +84,7 @@ defmodule Scholar.Linear.LogisticRegressionTest do assert_raise ArgumentError, """ expected y to have shape {num_samples}, \ - got tensor with shape: {2, 2} + got tensor with shape: {2, 2}\ """, fn -> LogisticRegression.fit(x, y, num_classes: 2) end end @@ -116,7 +116,7 @@ defmodule Scholar.Linear.LogisticRegressionTest do model = LogisticRegression.fit(x, y, num_classes: 2) y_pred = LogisticRegression.predict(model, x) accuracy = Scholar.Metrics.Classification.accuracy(y, y_pred) - assert Nx.equal(accuracy, 1) + assert Nx.to_number(accuracy) == 1.0 end test "2D" do @@ -130,7 +130,7 @@ defmodule Scholar.Linear.LogisticRegressionTest do model = LogisticRegression.fit(x, y, num_classes: 2) y_pred = LogisticRegression.predict(model, x) accuracy = Scholar.Metrics.Classification.accuracy(y, y_pred) - assert Nx.equal(accuracy, 1) + assert Nx.to_number(accuracy) == 1.0 end end end From 524d163117f902934da3f97f19c959a2d3f5acfa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krsto=20Prorokovi=C4=87?= Date: Fri, 16 Jan 2026 12:55:14 +0100 Subject: [PATCH 09/13] Update --- lib/scholar/linear/logistic_regression.ex | 250 +++++++----------- .../linear/logistic_regression_test.exs | 10 +- 2 files changed, 106 insertions(+), 154 deletions(-) diff --git a/lib/scholar/linear/logistic_regression.ex b/lib/scholar/linear/logistic_regression.ex index d43c10d0..9c1f1301 100644 --- a/lib/scholar/linear/logistic_regression.ex +++ b/lib/scholar/linear/logistic_regression.ex @@ -21,30 +21,14 @@ defmodule Scholar.Linear.LogisticRegression do default: 1000, doc: "Maximum number of gradient descent iterations to perform." ], - optimizer: [ - type: {:custom, Scholar.Options, :optimizer, []}, - default: :sgd, - doc: """ - Optimizer name or {init, update} pair of functions (see `Polaris.Optimizers` for more details). - """ - ], alpha: [ type: {:custom, Scholar.Options, :non_negative_number, []}, default: 1.0, doc: """ - Constant that multiplies the regularization term, controlling regularization strength. + Constant that multiplies the L2 regularization term, controlling regularization strength. If 0, no regularization is applied. """ ], - l1_ratio: [ - type: {:custom, Scholar.Options, :non_negative_number, []}, - default: 0.0, - doc: """ - The Elastic-Net mixing parameter, with `0 <= l1_ratio <= 1`. - Setting `l1_ratio` to 0 gives pure L2 regularization, and setting it to 1 gives pure L1 regularization. - For values between 0 and 1, a penalty of the form `l1_ratio * L1 + (1 - l1_ratio) * L2` is used. - """ - ], tol: [ type: {:custom, Scholar.Options, :non_negative_number, []}, default: 1.0e-4, @@ -81,11 +65,10 @@ defmodule Scholar.Linear.LogisticRegression do %Scholar.Linear.LogisticRegression{ coefficients: Nx.tensor( [ - [0.09002052247524261, -0.09002052992582321], - [-0.1521512120962143, 0.1521512120962143] - ] + [0.0915902629494667, -0.09159023314714432], + [-0.1507941037416458, 0.1507941335439682] ), - bias: Nx.tensor([-0.05300388112664223, 0.053003907203674316]) + bias: Nx.tensor([-0.06566660106182098, 0.06566664576530457]) } """ deftransform fit(x, y, opts \\ []) do @@ -99,7 +82,7 @@ defmodule Scholar.Linear.LogisticRegression do "expected y to have shape {num_samples}, got tensor with shape: #{inspect(Nx.shape(y))}" end - {num_samples, num_features} = Nx.shape(x) + num_samples = Nx.axis_size(x, 0) if Nx.axis_size(y, 0) != num_samples do raise ArgumentError, @@ -108,24 +91,25 @@ defmodule Scholar.Linear.LogisticRegression do opts = NimbleOptions.validate!(opts, @opts_schema) - {l1_ratio, opts} = Keyword.pop!(opts, :l1_ratio) - - unless l1_ratio >= 0.0 and l1_ratio <= 1.0 do - raise ArgumentError, - "expected l1_ratio to be between 0 and 1, got: #{inspect(l1_ratio)}" - end - type = to_float_type(x) - {optimizer, opts} = Keyword.pop!(opts, :optimizer) - {optimizer_init_fn, optimizer_update_fn} = - case optimizer do - atom when is_atom(atom) -> apply(Polaris.Optimizers, atom, []) - {f1, f2} -> {f1, f2} - end + {alpha, opts} = Keyword.pop!(opts, :alpha) + alpha = Nx.tensor(alpha, type: type) + {tol, opts} = Keyword.pop!(opts, :tol) + tol = Nx.tensor(tol, type: type) + {max_iterations, opts} = Keyword.pop!(opts, :max_iterations) + max_iterations = Nx.tensor(max_iterations, type: :u32) + + fit_n(x, y, alpha, max_iterations, tol, opts) + end + defnp fit_n(x, y, alpha, max_iterations, tol, opts) do num_classes = opts[:num_classes] + {num_samples, num_features} = Nx.shape(x) + + type = to_float_type(x) + # Initialize weights and bias with zeros w = Nx.broadcast( Nx.tensor(0.0, type: type), @@ -134,92 +118,64 @@ defmodule Scholar.Linear.LogisticRegression do b = Nx.broadcast(Nx.tensor(0.0, type: type), {num_classes}) - w_optimizer_state = optimizer_init_fn.(w) |> as_type(type) - b_optimizer_state = optimizer_init_fn.(b) |> as_type(type) - - {alpha, opts} = Keyword.pop!(opts, :alpha) - {tol, opts} = Keyword.pop!(opts, :tol) - alpha = Nx.tensor(alpha, type: type) - l1_ratio = Nx.tensor(l1_ratio, type: type) - tol = Nx.tensor(tol, type: type) - - opts = Keyword.put(opts, :optimizer_update_fn, optimizer_update_fn) - - fit_n( - x, - y, - w, - b, - alpha, - l1_ratio, - tol, - w_optimizer_state, - b_optimizer_state, - opts - ) - end - - deftransformp as_type(container, target_type) do - Nx.Defn.Composite.traverse(container, fn t -> - type = Nx.type(t) - - if Nx.Type.float?(type) and not Nx.Type.complex?(type) do - Nx.as_type(t, target_type) - else - t - end - end) - end - - defnp fit_n( - x, - y, - w, - b, - alpha, - l1_ratio, - tol, - w_optimizer_state, - b_optimizer_state, - opts - ) do - num_samples = Nx.axis_size(x, 0) - max_iterations = opts[:max_iterations] - num_classes = opts[:num_classes] - optimizer_update_fn = opts[:optimizer_update_fn] - + # One-hot encoding of target labels y_one_hot = y |> Nx.new_axis(1) |> Nx.broadcast({num_samples, num_classes}) |> Nx.equal(Nx.iota({num_samples, num_classes}, axis: 1)) - {coef, bias, _} = - while {w, b, - {x, y_one_hot, max_iterations, alpha, l1_ratio, tol, w_optimizer_state, - b_optimizer_state, converged? = Nx.u8(0), iter = Nx.u32(0)}}, - iter < max_iterations and not converged? do - {w_grad, b_grad} = - grad({w, b}, fn {w, b} -> - compute_loss(w, b, alpha, l1_ratio, x, y_one_hot) - end) - - {w_updates, w_optimizer_state} = - optimizer_update_fn.(w_grad, w_optimizer_state, w) + # Define Armijo parameters + c = Nx.tensor(1.0e-4, type: type) + rho = Nx.tensor(0.5, type: type) - w = Polaris.Updates.apply_updates(w, w_updates) + eta_min = + case type do + {:f, 32} -> Nx.tensor(1.0e-6, type: type) + {:f, 64} -> Nx.tensor(1.0e-8, type: type) + _ -> Nx.tensor(1.0e-6, type: type) + end - {b_updates, b_optimizer_state} = - optimizer_update_fn.(b_grad, b_optimizer_state, b) + armijo_params = %{ + c: c, + rho: rho, + eta_min: eta_min + } - b = Polaris.Updates.apply_updates(b, b_updates) + {coef, bias, _} = + while {w, b, + {alpha, x, y_one_hot, max_iterations, tol, armijo_params, iter = Nx.u32(0), + converged? = Nx.u8(0)}}, + iter < max_iterations and not converged? do + logits = Nx.dot(x, w) + b + probabilities = softmax(logits) + residuals = probabilities - y_one_hot + + # Compute loss + loss = + logits + |> log_softmax() + |> Nx.multiply(y_one_hot) + |> Nx.sum(axes: [1]) + |> Nx.mean() + |> Nx.negate() + |> Nx.add(alpha * Nx.sum(w * w)) + + # Compute gradients + grad_w = Nx.dot(x, [0], residuals, [0]) / num_samples + 2 * alpha * w + grad_b = Nx.sum(residuals, axes: [0]) / num_samples + + # Perform line search to find step size + eta = + armijo_line_search(w, b, alpha, x, y_one_hot, loss, grad_w, grad_b, armijo_params) + + w = w - eta * grad_w + b = b - eta * grad_b converged? = - Nx.reduce_max(Nx.abs(w_grad)) < tol and Nx.reduce_max(Nx.abs(b_grad)) < tol + Nx.reduce_max(Nx.abs(grad_w)) < tol and Nx.reduce_max(Nx.abs(grad_b)) < tol - {w, b, - {x, y_one_hot, max_iterations, alpha, l1_ratio, tol, w_optimizer_state, - b_optimizer_state, converged?, iter + 1}} + {w, b, {alpha, x, y_one_hot, max_iterations, tol, armijo_params, iter + 1, converged?}} end %__MODULE__{ @@ -228,63 +184,59 @@ defmodule Scholar.Linear.LogisticRegression do } end - defnp compute_regularization(w, alpha, l1_ratio) do - if alpha > 0.0 do - reg = - cond do - l1_ratio == 0.0 -> - # L2 regularization - Nx.sum(w * w) - - l1_ratio == 1.0 -> - # L1 regularization - Nx.sum(Nx.abs(w)) - - # Elastic-Net regularization - true -> - l1_ratio * Nx.sum(Nx.abs(w)) + - (1 - l1_ratio) * Nx.sum(w * w) - end - - alpha * reg - else - 0.0 - end - end + defnp armijo_line_search(w, b, alpha, x, y, loss, grad_w, grad_b, armijo_params) do + c = armijo_params[:c] + rho = armijo_params[:rho] + eta_min = armijo_params[:eta_min] - defnp compute_loss(w, b, alpha, l1_ratio, xs, ys) do - reg = compute_regularization(w, alpha, l1_ratio) + type = to_float_type(x) + dir_w = -grad_w + dir_b = -grad_b + # Directional derivative + slope = Nx.sum(dir_w * grad_w) + Nx.sum(dir_b * grad_b) + + {eta, _} = + while {eta = Nx.tensor(1.0, type: type), + {w, b, alpha, x, y, loss, dir_w, dir_b, slope, c, rho, eta_min}}, + compute_loss(w + eta * dir_w, b + eta * dir_b, alpha, x, y) > loss + c * eta * slope and + eta > eta_min do + eta = eta * rho + + {eta, {w, b, alpha, x, y, loss, dir_w, dir_b, slope, c, rho, eta_min}} + end + + eta + end - xs + defnp compute_loss(w, b, alpha, x, y) do + x |> Nx.dot(w) |> Nx.add(b) |> log_softmax() - |> Nx.multiply(ys) + |> Nx.multiply(y) |> Nx.sum(axes: [1]) - |> Nx.negate() |> Nx.mean() - |> Nx.add(reg) + |> Nx.negate() + |> Nx.add(alpha * Nx.sum(w * w)) + end + + defnp softmax(logits) do + max = stop_grad(Nx.reduce_max(logits, axes: [1], keep_axes: true)) + normalized_exp = (logits - max) |> Nx.exp() + normalized_exp / Nx.sum(normalized_exp, axes: [1], keep_axes: true) end defnp log_softmax(x) do - shifted = x - stop_grad(Nx.reduce_max(x, axes: [-1], keep_axes: true)) + shifted = x - stop_grad(Nx.reduce_max(x, axes: [1], keep_axes: true)) shifted |> Nx.exp() - |> Nx.sum(axes: [-1], keep_axes: true) + |> Nx.sum(axes: [1], keep_axes: true) |> Nx.log() |> Nx.negate() |> Nx.add(shifted) end - # Normalized softmax - - defnp softmax(t) do - max = stop_grad(Nx.reduce_max(t, axes: [-1], keep_axes: true)) - normalized_exp = (t - max) |> Nx.exp() - normalized_exp / Nx.sum(normalized_exp, axes: [-1], keep_axes: true) - end - @doc """ Makes predictions with the given `model` on inputs `x`. @@ -317,7 +269,7 @@ defmodule Scholar.Linear.LogisticRegression do iex> y = Nx.tensor([1, 0, 1]) iex> model = Scholar.Linear.LogisticRegression.fit(x, y, num_classes: 2) iex> Scholar.Linear.LogisticRegression.predict_probability(model, Nx.tensor([[-3.0, 5.0]])) - Nx.tensor([[0.10269401967525482, 0.8973060250282288]]) + Nx.tensor([[0.10075931251049042, 0.8992406725883484]]) """ defn predict_probability(%__MODULE__{coefficients: coeff, bias: bias} = _model, x) do if Nx.rank(x) != 2 do diff --git a/test/scholar/linear/logistic_regression_test.exs b/test/scholar/linear/logistic_regression_test.exs index 69cda7be..99767a1b 100644 --- a/test/scholar/linear/logistic_regression_test.exs +++ b/test/scholar/linear/logistic_regression_test.exs @@ -6,7 +6,7 @@ defmodule Scholar.Linear.LogisticRegressionTest do test "Iris Data Set - multinomial logistic regression test" do {x_train, x_test, y_train, y_test} = iris_data() - model = LogisticRegression.fit(x_train, y_train, num_classes: 3) + model = LogisticRegression.fit(x_train, y_train, num_classes: 3, alpha: 0.0) res = LogisticRegression.predict(model, x_test) accuracy = Scholar.Metrics.Classification.accuracy(res, y_test) @@ -107,8 +107,8 @@ defmodule Scholar.Linear.LogisticRegressionTest do describe "linearly separable data" do test "1D" do key = Nx.Random.key(12) - {x1, key} = Nx.Random.uniform(key, -1.0, 0.0, shape: {1000, 1}) - {x2, _key} = Nx.Random.uniform(key, 0.0, 1.0, shape: {1000, 1}) + {x1, key} = Nx.Random.uniform(key, -2, -1, shape: {1000, 1}) + {x2, _key} = Nx.Random.uniform(key, 1, 2, shape: {1000, 1}) x = Nx.concatenate([x1, x2]) y1 = Nx.broadcast(0, {1000}) y2 = Nx.broadcast(1, {1000}) @@ -121,8 +121,8 @@ defmodule Scholar.Linear.LogisticRegressionTest do test "2D" do key = Nx.Random.key(12) - {x1, key} = Nx.Random.uniform(key, -1.0, 0.0, shape: {1000, 2}) - {x2, _key} = Nx.Random.uniform(key, 0.0, 1.0, shape: {1000, 2}) + {x1, key} = Nx.Random.uniform(key, -2, -1, shape: {1000, 2}) + {x2, _key} = Nx.Random.uniform(key, 1, 2, shape: {1000, 2}) x = Nx.concatenate([x1, x2]) y1 = Nx.broadcast(0, {1000}) y2 = Nx.broadcast(1, {1000}) From 6283b8d22d180b5b701e8b75476b09ea3bd3d759 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krsto=20Prorokovi=C4=87?= Date: Fri, 16 Jan 2026 13:00:23 +0100 Subject: [PATCH 10/13] Fix tests --- lib/scholar/linear/logistic_regression.ex | 1 + .../linear/logistic_regression_test.exs | 31 ------------------- 2 files changed, 1 insertion(+), 31 deletions(-) diff --git a/lib/scholar/linear/logistic_regression.ex b/lib/scholar/linear/logistic_regression.ex index 9c1f1301..b6c8c79f 100644 --- a/lib/scholar/linear/logistic_regression.ex +++ b/lib/scholar/linear/logistic_regression.ex @@ -67,6 +67,7 @@ defmodule Scholar.Linear.LogisticRegression do [ [0.0915902629494667, -0.09159023314714432], [-0.1507941037416458, 0.1507941335439682] + ] ), bias: Nx.tensor([-0.06566660106182098, 0.06566664576530457]) } diff --git a/test/scholar/linear/logistic_regression_test.exs b/test/scholar/linear/logistic_regression_test.exs index 99767a1b..3248f0cd 100644 --- a/test/scholar/linear/logistic_regression_test.exs +++ b/test/scholar/linear/logistic_regression_test.exs @@ -40,23 +40,6 @@ defmodule Scholar.Linear.LogisticRegressionTest do fn -> LogisticRegression.fit(x, y) end end - test "when :optimizer is invalid" do - x = Nx.tensor([[1, 2], [3, 4]]) - y = Nx.tensor([1, 2]) - - assert_raise NimbleOptions.ValidationError, - """ - invalid value for :optimizer option: expected :optimizer to be either \ - a valid 0-arity function in Polaris.Optimizers or a valid {init_fn, update_fn} tuple\ - """, - fn -> - LogisticRegression.fit(x, y, - num_classes: 2, - optimizer: :invalid_optimizer - ) - end - end - test "when :max_iterations is not a positive integer" do x = Nx.tensor([[1, 2], [3, 4]]) y = Nx.tensor([1, 2]) @@ -90,20 +73,6 @@ defmodule Scholar.Linear.LogisticRegressionTest do end end - describe "column target tests" do - @tag :wip - test "column target" do - {x_train, _, y_train, _} = iris_data() - - model = LogisticRegression.fit(x_train, y_train, num_classes: 3) - pred = LogisticRegression.predict(model, x_train) - col_model = LogisticRegression.fit(x_train, y_train |> Nx.new_axis(-1), num_classes: 3) - col_pred = LogisticRegression.predict(col_model, x_train) - assert model == col_model - assert pred == col_pred - end - end - describe "linearly separable data" do test "1D" do key = Nx.Random.key(12) From 9e7d78450bf4dea74f69625a2ce817047e4d1aef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krsto=20Prorokovi=C4=87?= Date: Fri, 16 Jan 2026 13:05:21 +0100 Subject: [PATCH 11/13] Fix docstring in model_selection.ex --- lib/scholar/model_selection.ex | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/scholar/model_selection.ex b/lib/scholar/model_selection.ex index 6303971e..20ce653f 100644 --- a/lib/scholar/model_selection.ex +++ b/lib/scholar/model_selection.ex @@ -178,8 +178,8 @@ defmodule Scholar.ModelSelection do iex> y = Nx.tensor([0, 1, 2, 0, 1, 1, 0]) iex> opts = [ ...> num_classes: [3], - ...> iterations: [10, 20, 50], - ...> optimizer: [Polaris.Optimizers.adam(learning_rate: 0.005), Polaris.Optimizers.adam(learning_rate: 0.01)], + ...> max_iterations: [10, 20, 50], + ...> alpha: [0.0, 0.1, 1.0], ...> ] iex> Scholar.ModelSelection.grid_search(x, y, folding_fun, scoring_fun, opts) """ From dcc2629f7111a404c9bbcfc891f9a4fa8ca3db3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krsto=20Prorokovi=C4=87?= Date: Fri, 16 Jan 2026 13:46:12 +0100 Subject: [PATCH 12/13] Move max_iterations from arguments to options --- lib/scholar/linear/logistic_regression.ex | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/lib/scholar/linear/logistic_regression.ex b/lib/scholar/linear/logistic_regression.ex index b6c8c79f..9259f3f8 100644 --- a/lib/scholar/linear/logistic_regression.ex +++ b/lib/scholar/linear/logistic_regression.ex @@ -98,14 +98,13 @@ defmodule Scholar.Linear.LogisticRegression do alpha = Nx.tensor(alpha, type: type) {tol, opts} = Keyword.pop!(opts, :tol) tol = Nx.tensor(tol, type: type) - {max_iterations, opts} = Keyword.pop!(opts, :max_iterations) - max_iterations = Nx.tensor(max_iterations, type: :u32) - fit_n(x, y, alpha, max_iterations, tol, opts) + fit_n(x, y, alpha, tol, opts) end - defnp fit_n(x, y, alpha, max_iterations, tol, opts) do + defnp fit_n(x, y, alpha, tol, opts) do num_classes = opts[:num_classes] + max_iterations = opts[:max_iterations] {num_samples, num_features} = Nx.shape(x) type = to_float_type(x) @@ -145,7 +144,7 @@ defmodule Scholar.Linear.LogisticRegression do {coef, bias, _} = while {w, b, - {alpha, x, y_one_hot, max_iterations, tol, armijo_params, iter = Nx.u32(0), + {alpha, x, y_one_hot, tol, armijo_params, iter = Nx.u32(0), converged? = Nx.u8(0)}}, iter < max_iterations and not converged? do logits = Nx.dot(x, w) + b @@ -176,7 +175,7 @@ defmodule Scholar.Linear.LogisticRegression do converged? = Nx.reduce_max(Nx.abs(grad_w)) < tol and Nx.reduce_max(Nx.abs(grad_b)) < tol - {w, b, {alpha, x, y_one_hot, max_iterations, tol, armijo_params, iter + 1, converged?}} + {w, b, {alpha, x, y_one_hot, tol, armijo_params, iter + 1, converged?}} end %__MODULE__{ From 2e230db35525a51ac0b4750199583e0b69ecdf35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krsto=20Prorokovi=C4=87?= Date: Fri, 16 Jan 2026 13:49:06 +0100 Subject: [PATCH 13/13] mix format --- lib/scholar/linear/logistic_regression.ex | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/scholar/linear/logistic_regression.ex b/lib/scholar/linear/logistic_regression.ex index 9259f3f8..9230aee8 100644 --- a/lib/scholar/linear/logistic_regression.ex +++ b/lib/scholar/linear/logistic_regression.ex @@ -144,8 +144,7 @@ defmodule Scholar.Linear.LogisticRegression do {coef, bias, _} = while {w, b, - {alpha, x, y_one_hot, tol, armijo_params, iter = Nx.u32(0), - converged? = Nx.u8(0)}}, + {alpha, x, y_one_hot, tol, armijo_params, iter = Nx.u32(0), converged? = Nx.u8(0)}}, iter < max_iterations and not converged? do logits = Nx.dot(x, w) + b probabilities = softmax(logits)