From 8d0525a30b3ce9114d9c9822427c8b4f48967205 Mon Sep 17 00:00:00 2001
From: jkling <jose.kling@pm.me>
Date: Mon, 17 Nov 2025 15:58:22 +0100
Subject: [PATCH 1/4] Constructors implemented

- `HawkesProcess`
- `UnivariateHawkesProcess`
- `MultivariateHawkesProcess`
---
 src/PointProcesses.jl          |   2 +-
 src/hawkes/fit.jl              | 107 +++++++++++
 src/hawkes/hawkes_process.jl   | 312 +++++++++++++++------------------
 src/hawkes/simulation.jl       |  35 ++++
 src/poisson/poisson_process.jl |   2 +
 5 files changed, 287 insertions(+), 171 deletions(-)
 create mode 100644 src/hawkes/fit.jl
 create mode 100644 src/hawkes/simulation.jl

diff --git a/src/PointProcesses.jl b/src/PointProcesses.jl
index b88b51b..2b0891b 100644
--- a/src/PointProcesses.jl
+++ b/src/PointProcesses.jl
@@ -10,7 +10,7 @@ module PointProcesses
 using DensityInterface: DensityInterface, HasDensity, densityof, logdensityof
 using Distributions: Distributions, UnivariateDistribution, MultivariateDistribution
 using Distributions: Categorical, Exponential, Poisson, Uniform, Dirac
-using Distributions: fit, suffstats, probs
+using Distributions: fit, suffstats, probs, mean
 using LinearAlgebra: dot
 using Random: rand
 using Random: AbstractRNG, default_rng
diff --git a/src/hawkes/fit.jl b/src/hawkes/fit.jl
new file mode 100644
index 0000000..d15137c
--- /dev/null
+++ b/src/hawkes/fit.jl
@@ -0,0 +1,107 @@
+"""
+    StatsAPI.fit(rng, ::Type{HawkesProcess{T}}, h::History; step_tol::Float64 = 1e-6, max_iter::Int = 1000) where {T<:Real}
+
+Expectation-Maximization algorithm from [E. Lewis, G. Mohler (2011)](https://arxiv.org/pdf/1801.08273)).
+The relevant calculations are in page 4, equations 6-13.
+
+Let (t₁ < ... < tₙ) be the event times over the interval [0, T). We use the immigrant-descendant representation,
+where immigrants arrive at a constant base rate μ and each each arrival may generate descendants following the
+activation function α exp(-ω(t - tᵢ)).
+
+The algorithm consists in the following steps:
+1. Start with some initial guess for the parameters μ, ψ, and ω. ψ = α ω is the branching factor.
+2. Calculate λ(tᵢ; μ, ψ, ω) (`lambda_ts` in the code) using the procedure in [Ozaki (1979)](https://doi.org/10.1007/bf02480272).
+3. For each tᵢ and each j < i, calculate Dᵢⱼ = P(tᵢ is a descendant of tⱼ) as
+
+    Dᵢⱼ = ψ ω exp(-ω(tᵢ - tⱼ)) / λ(tᵢ; μ, ψ, ω).
+
+    Define D = ∑_{j < i} Dᵢⱼ (expected number of descendants) and div = ∑_{j < i} (tᵢ - tⱼ) Dᵢⱼ. 
+4. Update the parameters as
+        μ = (N - D) / T
+        ψ = D / N
+        ω = D / div
+5. If convergence criterion is met, return updated parameters, otherwise, back to step 2.
+
+Notice that, in the implementation, the process is normalized so the average inter-event time is equal to 1 and, 
+therefore, the interval of the process is transformed from T to N. Also, in equation (8) in the paper,
+
+∑_{i=1:n} pᵢᵢ = ∑_{i=1:n} (1 - ∑_{j < i} Dᵢⱼ) = N - D.
+
+"""
+function StatsAPI.fit(
+    ::Type{UnivariateHawkesProcess{T}},
+    h::History;
+    step_tol::Float64=1e-6,
+    max_iter::Int=1000,
+    rng::AbstractRNG=default_rng()
+) where {T<:Real}
+    n = nb_events(h)
+    n == 0 && return HawkesProcess(zero(T), zero(T), zero(T))
+
+    tmax = T(duration(h))
+    # Normalize times so average inter-event time is 1 (T -> n)
+    norm_ts = T.(h.times .* (n / tmax))
+
+    # preallocate
+    A = zeros(T, n)            # A[i] = sum_{j<i} exp(-ω (t_i - t_j))
+    S = zeros(T, n)            # S[i] = sum_{j<i} (t_i - t_j) exp(-ω (t_i - t_j))
+    lambda_ts = similar(A)     # λ_i
+
+    # Λ(T) ≈ n after normalization
+    # μ not too close to 0 or 1 so that both base and offspring contribute
+    μ = T(0.2) + (T(0.6) * rand(rng, T))
+    ψ = one(T) - μ                           # ψ = α/ω (branching ratio)
+    t90 = T(0.5) + (T(1.5) * rand(rng, T))   # inverse of the time to decay to 10% in normalized time
+    ω = log(T(10)) * t90
+
+    n_iters = 0
+    step = step_tol + one(T)
+
+    # First event: A[1]=0, S[1]=0, so λ_1 = μ
+    lambda_ts[1] = μ
+
+    while (step >= step_tol) && (n_iters < max_iter)
+        # compute A, S, and λ
+        for i in 2:n
+            Δ = norm_ts[i] - norm_ts[i - 1]
+            e = exp(-ω * Δ)
+            Ai_1 = A[i - 1]
+            A[i] = e * (one(T) + Ai_1)
+            S[i] = e * (S[i - 1] + Δ * (one(T) + Ai_1))
+            lambda_ts[i] = μ + (ψ * ω) * A[i]
+        end
+
+        # E-step aggregates
+        D = zero(T)   # expected number of descendants
+        div = zero(T)   # ∑ (t_i - t_j) D_{ij}
+        for i in 2:n
+            w = (ψ * ω) / lambda_ts[i]  # factor common to all j<i
+            D += w * A[i]
+            div += w * S[i]
+        end
+
+        # Steps 4–5: M-step updates + convergence check
+        new_μ = one(T) - (D / n)
+        new_ψ = D / n
+        new_ω = D / (div + eps(T))   # small guard to avoid div=0
+
+        step = max(abs(μ - new_μ), abs(ψ - new_ψ), abs(ω - new_ω))
+        μ, ψ, ω = new_μ, new_ψ, new_ω
+        n_iters += 1
+
+        # Prepare for next loop: reset λ₁ since A[1]=0 regardless of params
+        lambda_ts[1] = μ
+    end
+
+    n_iters >= max_iter && @warn "Maximum number of iterations reached without convergence."
+
+    # Unnormalize back to original time scale (T -> tmax):
+    # parameters in normalized space (') relate to original by μ0=μ'*(n/tmax), ω0=ω'*(n/tmax), α0=(ψ'ω')*(n/tmax)
+    return HawkesProcess(μ * (n / tmax), ψ * ω * (n / tmax), ω * (n / tmax))
+end
+
+# # Type parameter for `HawkesProcess` was NOT explicitly provided
+# function StatsAPI.fit(HP::Type{HawkesProcess}, h::History{H,M}; kwargs...) where {H<:Real,M}
+#     T = promote_type(Float64, H)
+#     return fit(HP{T}, h; kwargs...)
+# end
diff --git a/src/hawkes/hawkes_process.jl b/src/hawkes/hawkes_process.jl
index 7cf11d1..d30ca24 100644
--- a/src/hawkes/hawkes_process.jl
+++ b/src/hawkes/hawkes_process.jl
@@ -1,185 +1,163 @@
 """
-    HawkesProcess{T<:Real}
+    HawkesProcess{T<:Real,D}
 
-Univariate Hawkes process with exponential decay kernel.
+Hawkes process with exponential decay kernel and mark distribution `D`.
 
 A Hawkes process is a self-exciting point process where each event increases the probability
-of future events. The conditional intensity function is given by:
+of future events. The conditional intensity function is given by
 
-    λ(t) = μ + α ∑_{tᵢ < t} exp(-ω(t - tᵢ))
+    λ(t) = μ + ∑_{tᵢ < t} f(mᵢ) exp(-g(mᵢ)(t - tᵢ)),
 
-where the sum is over all previous event times tᵢ.
+where the sum is over all previous event times (tᵢ, mᵢ).
 
 # Fields
-
 - `μ::T`: baseline intensity (immigration rate)
-- `α::T`: jump size (immediate increase in intensity after an event)  
-- `ω::T`: decay rate (how quickly the excitement fades)
-
-Conditions:
-- μ, α, ω >= 0
-- ψ = α/ω < 1 → Stability condition. ψ is the expected number of events each event generates
-
-Following the notation from [E. Lewis, G. Mohler (2011)](https://arxiv.org/pdf/1801.08273).
+- `f`: function f:M → R, where M is the space of marks. Jump size.
+- `g`: function f:M → R, where M is the space of marks. Decay rate.
 """
-struct HawkesProcess{T<:Real} <: AbstractPointProcess
+struct HawkesProcess{T<:Real,D} <: AbstractPointProcess
     μ::T
-    α::T
-    ω::T
-
-    function HawkesProcess(μ::T1, α::T2, ω::T3) where {T1,T2,T3}
-        any((μ, α, ω) .< 0) &&
-            throw(DomainError((μ, α, ω), "All parameters must be non-negative."))
-        (α > 0 && α >= ω) &&
-            throw(DomainError((α, ω), "Parameter ω must be strictly smaller than α"))
-        T = promote_type(T1, T2, T3)
-        (μ_T, α_T, ω_T) = convert.(T, (μ, α, ω))
-        new{T}(μ_T, α_T, ω_T)
-    end
+    f
+    g
+    mark_dist::D
 end
 
-function simulate(rng::AbstractRNG, hp::HawkesProcess, tmin, tmax)
-    sim = simulate_poisson_times(rng, hp.μ, tmin, tmax) # Simulate Poisson process with base rate
-    sim_desc = generate_descendants(rng, sim, tmax, hp.α, hp.ω) # Recursively generates descendants from first events
-    append!(sim, sim_desc)
-    sort!(sim)
-    return History(; times=sim, tmin=tmin, tmax=tmax, check=false)
+function Base.show(io::IO, hp::HawkesProcess)
+    mean_mark = mean(hp.mark_dist)
+    return print(io, "HawkesProcess($(hp.μ), $(hp.f(mean_mark)), $(hp.g(mean_mark)), $(hp.mark_dist))")
 end
 
+## Alias 
 """
-    StatsAPI.fit(rng, ::Type{HawkesProcess{T}}, h::History; step_tol::Float64 = 1e-6, max_iter::Int = 1000) where {T<:Real}
+    UnivariateHawkesProcess{R}
 
-Expectation-Maximization algorithm from [E. Lewis, G. Mohler (2011)](https://arxiv.org/pdf/1801.08273)).
-The relevant calculations are in page 4, equations 6-13.
+Unmarked univariate temporal Hawkes process.
 
-Let (t₁ < ... < tₙ) be the event times over the interval [0, T). We use the immigrant-descendant representation,
-where immigrants arrive at a constant base rate μ and each each arrival may generate descendants following the
-activation function α exp(-ω(t - tᵢ)).
+The conditional intensity function is given by
 
-The algorithm consists in the following steps:
-1. Start with some initial guess for the parameters μ, ψ, and ω. ψ = α ω is the branching factor.
-2. Calculate λ(tᵢ; μ, ψ, ω) (`lambda_ts` in the code) using the procedure in [Ozaki (1979)](https://doi.org/10.1007/bf02480272).
-3. For each tᵢ and each j < i, calculate Dᵢⱼ = P(tᵢ is a descendant of tⱼ) as
+    λ(t) = μ + ∑_{tᵢ < t} α exp(-β(t - tᵢ)).
 
-    Dᵢⱼ = ψ ω exp(-ω(tᵢ - tⱼ)) / λ(tᵢ; μ, ψ, ω).
+Equivalent to the general definition with f(m) = α and g(m) = ω.
 
-    Define D = ∑_{j < i} Dᵢⱼ (expected number of descendants) and div = ∑_{j < i} (tᵢ - tⱼ) Dᵢⱼ. 
-4. Update the parameters as
-        μ = (N - D) / T
-        ψ = D / N
-        ω = D / div
-5. If convergence criterion is met, return updated parameters, otherwise, back to step 2.
+# Fields
+- `μ::R`: baseline intensity (immigration rate)
+- `α::R`: Jump size.
+- `ω::R`: Decay rate.
 
-Notice that, in the implementation, the process is normalized so the average inter-event time is equal to 1 and, 
-therefore, the interval of the process is transformed from T to N. Also, in equation (8) in the paper,
+Alias for `HawkesProcess{R,Dirac{Nothing}}`.
+"""
+const UnivariateHawkesProcess{R<:Real} = HawkesProcess{R,Dirac{Nothing}}
 
-∑_{i=1:n} pᵢᵢ = ∑_{i=1:n} (1 - ∑_{j < i} Dᵢⱼ) = N - D.
+function Base.show(io::IO, pp::UnivariateHawkesProcess)
+    return print(io, "UnivariateHawkesProcess($(pp.μ), $(pp.f(nothing)), $(pp.g(nothing)))")
+end
 
 """
-function StatsAPI.fit(
-    rng::AbstractRNG,
-    ::Type{HawkesProcess{T}},
-    h::History;
-    step_tol::Float64=1e-6,
-    max_iter::Int=1000,
-) where {T<:Real}
-    n = nb_events(h)
-    n == 0 && return HawkesProcess(zero(T), zero(T), zero(T))
-
-    tmax = T(duration(h))
-    # Normalize times so average inter-event time is 1 (T -> n)
-    norm_ts = T.(h.times .* (n / tmax))
-
-    # preallocate
-    A = zeros(T, n)            # A[i] = sum_{j<i} exp(-ω (t_i - t_j))
-    S = zeros(T, n)            # S[i] = sum_{j<i} (t_i - t_j) exp(-ω (t_i - t_j))
-    lambda_ts = similar(A)     # λ_i
-
-    # Λ(T) ≈ n after normalization
-    # μ not too close to 0 or 1 so that both base and offspring contribute
-    μ = T(0.2) + (T(0.6) * rand(rng, T))
-    ψ = one(T) - μ                           # ψ = α/ω (branching ratio)
-    t90 = T(0.5) + (T(1.5) * rand(rng, T))   # inverse of the time to decay to 10% in normalized time
-    ω = log(T(10)) * t90
-
-    n_iters = 0
-    step = step_tol + one(T)
-
-    # First event: A[1]=0, S[1]=0, so λ_1 = μ
-    lambda_ts[1] = μ
-
-    while (step >= step_tol) && (n_iters < max_iter)
-        # compute A, S, and λ
-        for i in 2:n
-            Δ = norm_ts[i] - norm_ts[i - 1]
-            e = exp(-ω * Δ)
-            Ai_1 = A[i - 1]
-            A[i] = e * (one(T) + Ai_1)
-            S[i] = e * (S[i - 1] + Δ * (one(T) + Ai_1))
-            lambda_ts[i] = μ + (ψ * ω) * A[i]
-        end
-
-        # E-step aggregates
-        D = zero(T)   # expected number of descendants
-        div = zero(T)   # ∑ (t_i - t_j) D_{ij}
-        for i in 2:n
-            w = (ψ * ω) / lambda_ts[i]  # factor common to all j<i
-            D += w * A[i]
-            div += w * S[i]
-        end
-
-        # Steps 4–5: M-step updates + convergence check
-        new_μ = one(T) - (D / n)
-        new_ψ = D / n
-        new_ω = D / (div + eps(T))   # small guard to avoid div=0
-
-        step = max(abs(μ - new_μ), abs(ψ - new_ψ), abs(ω - new_ω))
-        μ, ψ, ω = new_μ, new_ψ, new_ω
-        n_iters += 1
-
-        # Prepare for next loop: reset λ₁ since A[1]=0 regardless of params
-        lambda_ts[1] = μ
-    end
+    MultivariateHawkesProcess{R}
+
+Unmarked multivariate temporal Hawkes process
+
+For a process with D marginal processes, the conditional intensity
+function of the k-th process is given by
+
+    λₖ(t) = μₖ + ∑_{h=1,...,D} ∑_{tʰᵢ < t} αₖₕ exp(-βₖₕ(t - tʰᵢ)),
 
-    n_iters >= max_iter && @warn "Maximum number of iterations reached without convergence."
+where tʰᵢ is the i-th element in the h-th marginal process.
+μ is a vector of length D with elements μᵢ. α and ω are D×D
+matrices with elements αᵢⱼ and ωᵢⱼ.
 
-    # Unnormalize back to original time scale (T -> tmax):
-    # parameters in normalized space (') relate to original by μ0=μ'*(n/tmax), ω0=ω'*(n/tmax), α0=(ψ'ω')*(n/tmax)
-    return HawkesProcess(μ * (n / tmax), ψ * ω * (n / tmax), ω * (n / tmax))
+# Fields
+- `μ::Vector{<:Real}`: baseline intensity (immigration rate)
+- `α::Matrix{<:Real}`: Jump size.
+- `ω::Matrix{<:Real}`: Decay rate.
+
+Alias for `HawkesProcess{R,Categorical{Float64,Vector{Float64}}}`.
+"""
+const MultivariateHawkesProcess{R<:Real} = HawkesProcess{
+    R,Categorical{Float64,Vector{Float64}}
+}
+# The choice to impose the mark distribution Categorical{Float64,Vector{Float64}} was made on purpose
+## It is mainly due to the fact that Distributions.Categorical makes (most of the time) automatic conversions to Float64
+
+function Base.show(io::IO, pp::MultivariateHawkesProcess)
+    D = length(pp.mark_dist.p)
+    inds = [(i, j) for i in 1:D, j in 1:D]
+    return print(io, "MultivariateHawkesProcess\nμ = $(pp.μ .* pp.mark_dist.p)\nα = $(pp.f.(inds))\nω = $(pp.g.(inds))")
 end
 
-# Type parameter for `HawkesProcess` was explicitly provided
-function StatsAPI.fit(HP::Type{HawkesProcess{T}}, h::History; kwargs...) where {T<:Real}
-    return fit(default_rng(), HP, h; kwargs...)
+## Constructors
+### UnivariatehawkesProcess
+function HawkesProcess(
+    μ::R1, α::R2, ω::R3; check_args::Bool=true
+    ) where {R1<:Real,R2<:Real,R3<:Real}
+    check_args && check_args_Hawkes(μ, α, ω)
+    R = promote_type(R1, R2, R3)
+    fα(_...) = R(α)
+    gω(_...) = R(ω)
+    return HawkesProcess(R(μ), fα, gω, Dirac(nothing))
 end
 
-# Type parameter for `HawkesProcess` was NOT explicitly provided
-function StatsAPI.fit(HP::Type{HawkesProcess}, h::History{H,M}; kwargs...) where {H<:Real,M}
-    T = promote_type(Float64, H)
-    return fit(default_rng(), HP{T}, h; kwargs...)
+### MultivariateHawkesProcess
+function HawkesProcess(
+    μ::Vector{R1}, α::Matrix{R2}, ω::Matrix{R3}; check_args::Bool=true
+    ) where {R1<:Real,R2<:Real,R3<:Real}
+    check_args && check_args_Hawkes(μ, α, ω)
+    R = promote_type(R1, R2, R3)
+    f((i, j)) = R(α[i, j])
+    g((i, j)) = R(ω[i, j])
+    return HawkesProcess(R(sum(μ)), f, g, Categorical(Float64.(μ / sum(μ))))
 end
 
-function time_change(hp::HawkesProcess, h::History{T,M}) where {T<:Real,M}
-    n = nb_events(h)
-    A = zeros(T, n + 1) # Array A in Ozaki (1979)
-    @inbounds for i in 2:n
-        A[i] = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (1 + A[i - 1])
+### Check check
+# Univariate Hawkes
+function check_args_Hawkes(μ::Real, α::Real, ω::Real)
+    if any((μ, α, ω) .< 0)
+        throw(
+            DomainError(
+                "μ = $μ, α = $α, ω = $ω",
+                "HawkesProcess: All parameters must be non-negative.",
+            ),
+        )
     end
-    A[end] = exp(-hp.ω * (h.tmax - h.times[end])) * (1 + A[end - 1]) # Used to calculate the integral of the intensity at every event time
-    times = T.(hp.μ .* (h.times .- h.tmin)) # Transformation with respect to base rate
-    T_base = hp.μ * duration(h) # Contribution of base rate to total length of time re-scaled process
-    for i in eachindex(times)
-        times[i] += (hp.α / hp.ω) * ((i - 1) - A[i]) # Add contribution of activation functions
+    return nothing
+end
+
+# Multivariate Hawkes
+function check_args_Hawkes(μ::Vector{<:Real}, α::Matrix{<:Real}, ω::Matrix{<:Real})
+    if !(length(μ) == size(α)[1] && length(μ) == size(α)[2])
+        throw(DimansionMismatch("α must have size $(length(μ))×$(length(μ))")) 
     end
-    return History(;
-        times=times,
-        marks=h.marks,
-        tmin=zero(T),
-        tmax=T(T_base + ((hp.α / hp.ω) * (n - A[end]))),
-        check=false,
-    ) # A time re-scaled process starts at t=0
+    if !(length(μ) == size(ω)[1] && length(μ) == size(ω)[2])
+        throw(DimansionMismatch("ω must have size $(length(μ))×$(length(μ))"))
+    end
+    if any(μ .< zero(μ))
+        throw(
+            DomainError(
+                "μ = $μ",
+                "HawkesProcess: the condition μ ≥ 0 is not satisfied for all dimensions.",
+            ),
+        )
+    end
+    if any(α .< zero(α))
+        throw(
+            DomainError(
+                "α = $α",
+                "HawkesProcess: the condition α ≥ 0 is not satisfied for all dimensions.",
+            ),
+        )
+    end
+    if any(ω .< zero(ω))
+        throw(
+            DomainError(
+                "ω = $ω",
+                "HawkesProcess: the condition ω ≥ 0 is not satisfied for all dimensions.",
+            ),
+        )
+    end
+    return nothing
 end
 
+## Access
 function ground_intensity(hp::HawkesProcess, h::History, t)
     activation = sum(exp.(hp.ω .* (@view h.times[1:(searchsortedfirst(h.times, t) - 1)])))
     return hp.μ + (hp.α * activation / exp(hp.ω * t))
@@ -208,30 +186,24 @@ function DensityInterface.logdensityof(hp::HawkesProcess, h::History)
            ((hp.α / hp.ω) * sum(1 .- exp.(-hp.ω .* (duration(h) .- h.times)))) # Integral of each kernel
 end
 
-#=
-Internal function for simulating Hawkes processes
-The first generation, gen_0, is the `immigrants`, which is a set of event times.
-For each t_g ∈ gen_n, simulate an inhomogeneous Poisson process over the interval [t_g, T]
-with intensity λ(t) = α exp(-ω(t - t_g)) with the inverse method.
-gen_{n+1} is the set of all events simulated from all events in gen_n.
-The algorithm stops when the simulation from one generation results in no further events.
-=#
-function generate_descendants(
-    rng::AbstractRNG, immigrants::Vector{T}, tmax, α, ω
-) where {T<:Real}
-    descendants = T[]
-    next_gen = immigrants
-    while !isempty(next_gen)
-        # OPTIMIZE: Can this be improved by avoiding allocations of `curr_gen` and `next_gen`? Or does the compiler take care of that?
-        curr_gen = copy(next_gen) # The current generation from which we simulate the next one
-        next_gen = eltype(immigrants)[] # Gathers all the descendants from the current generation
-        for parent in curr_gen # Generate the descendants for each individual event with the inverse method
-            activation_integral = (α / ω) * (one(T) - exp(ω * (parent - tmax)))
-            sim_transf = simulate_poisson_times(rng, one(T), zero(T), activation_integral)
-            @. sim_transf = parent - (inv(ω) * log(one(T) - ((ω / α) * sim_transf))) # Inverse of integral of the activation function
-            append!(next_gen, sim_transf)
-        end
-        append!(descendants, next_gen)
+function time_change(hp::HawkesProcess, h::History{T,M}) where {T<:Real,M}
+    n = nb_events(h)
+    A = zeros(T, n + 1) # Array A in Ozaki (1979)
+    @inbounds for i in 2:n
+        A[i] = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (1 + A[i - 1])
     end
-    return descendants
+    A[end] = exp(-hp.ω * (h.tmax - h.times[end])) * (1 + A[end - 1]) # Used to calculate the integral of the intensity at every event time
+    times = T.(hp.μ .* (h.times .- h.tmin)) # Transformation with respect to base rate
+    T_base = hp.μ * duration(h) # Contribution of base rate to total length of time re-scaled process
+    for i in eachindex(times)
+        times[i] += (hp.α / hp.ω) * ((i - 1) - A[i]) # Add contribution of activation functions
+    end
+    return History(;
+        times=times,
+        marks=h.marks,
+        tmin=zero(T),
+        tmax=T(T_base + ((hp.α / hp.ω) * (n - A[end]))),
+        check=false,
+    ) # A time re-scaled process starts at t=0
 end
+
diff --git a/src/hawkes/simulation.jl b/src/hawkes/simulation.jl
new file mode 100644
index 0000000..bd3756c
--- /dev/null
+++ b/src/hawkes/simulation.jl
@@ -0,0 +1,35 @@
+function simulate(rng::AbstractRNG, hp::HawkesProcess, tmin, tmax)
+    sim = simulate_poisson_times(rng, hp.μ, tmin, tmax) # Simulate Poisson process with base rate
+    sim_desc = generate_descendants(rng, sim, tmax, hp.α, hp.ω) # Recursively generates descendants from first events
+    append!(sim, sim_desc)
+    sort!(sim)
+    return History(; times=sim, tmin=tmin, tmax=tmax, check=false)
+end
+
+#=
+Internal function for simulating Hawkes processes
+The first generation, gen_0, is the `immigrants`, which is a set of event times.
+For each t_g ∈ gen_n, simulate an inhomogeneous Poisson process over the interval [t_g, T]
+with intensity λ(t) = α exp(-ω(t - t_g)) with the inverse method.
+gen_{n+1} is the set of all events simulated from all events in gen_n.
+The algorithm stops when the simulation from one generation results in no further events.
+=#
+function generate_descendants(
+    rng::AbstractRNG, immigrants::Vector{T}, tmax, α, ω
+) where {T<:Real}
+    descendants = T[]
+    next_gen = immigrants
+    while !isempty(next_gen)
+        # OPTIMIZE: Can this be improved by avoiding allocations of `curr_gen` and `next_gen`? Or does the compiler take care of that?
+        curr_gen = copy(next_gen) # The current generation from which we simulate the next one
+        next_gen = eltype(immigrants)[] # Gathers all the descendants from the current generation
+        for parent in curr_gen # Generate the descendants for each individual event with the inverse method
+            activation_integral = (α / ω) * (one(T) - exp(ω * (parent - tmax)))
+            sim_transf = simulate_poisson_times(rng, one(T), zero(T), activation_integral)
+            @. sim_transf = parent - (inv(ω) * log(one(T) - ((ω / α) * sim_transf))) # Inverse of integral of the activation function
+            append!(next_gen, sim_transf)
+        end
+        append!(descendants, next_gen)
+    end
+    return descendants
+end
diff --git a/src/poisson/poisson_process.jl b/src/poisson/poisson_process.jl
index 3ff0d1c..8b03716 100644
--- a/src/poisson/poisson_process.jl
+++ b/src/poisson/poisson_process.jl
@@ -64,6 +64,7 @@ function Base.show(io::IO, pp::MultivariatePoissonProcess)
 end
 
 ## Constructors
+### MarkedPoissonProcess
 function PoissonProcess(λ::Vector{R}; check_args::Bool=true) where {R<:Real}
     if check_args
         if any(λ .< zero(λ))
@@ -83,6 +84,7 @@ function PoissonProcess(λ::Vector{R}; check_args::Bool=true) where {R<:Real}
     return PoissonProcess(sum(λ), Categorical(λ / sum(λ)); check_args=check_args)
 end
 
+### UnivariatePoissonProcess
 function PoissonProcess(λ::R; check_args::Bool=true) where {R<:Real}
     return PoissonProcess(λ, Dirac(nothing); check_args=check_args)
 end

From b185a76b72c03464c70afdae7abb9fed4273728f Mon Sep 17 00:00:00 2001
From: jkling <jose.kling@pm.me>
Date: Wed, 19 Nov 2025 12:23:52 +0100
Subject: [PATCH 2/4] Types, constructors and simulation

Everything else still not implemented.
---
 docs/src/api.md              |  13 ++
 src/PointProcesses.jl        |  13 +-
 src/hawkes/fit.jl            |  18 +--
 src/hawkes/hawkes_process.jl | 246 ++++++++++++++++-------------------
 src/hawkes/intensity.jl      |  63 +++++++++
 src/hawkes/simulation.jl     |  71 ++++++----
 src/hawkes/time_change.jl    |  20 +++
 src/history.jl               |  44 ++++---
 src/poisson/simulation.jl    |   2 +-
 src/simulation.jl            |   8 +-
 test/hawkes.jl               |  15 ++-
 11 files changed, 319 insertions(+), 194 deletions(-)
 create mode 100644 src/hawkes/intensity.jl
 create mode 100644 src/hawkes/time_change.jl

diff --git a/docs/src/api.md b/docs/src/api.md
index 8085587..d657037 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -104,6 +104,19 @@ MultivariatePoissonProcessPrior
 HawkesProcess
 ```
 
+### Univariate
+
+```@docs
+UnivariateHawkesProcess
+UnmarkedUnivariateHawkesProcess
+```
+
+### Multivariate
+
+```@docs
+MultivariateHawkesProcess
+```
+
 ## Index
 
 ```@index
diff --git a/src/PointProcesses.jl b/src/PointProcesses.jl
index 2b0891b..ca3ec72 100644
--- a/src/PointProcesses.jl
+++ b/src/PointProcesses.jl
@@ -10,8 +10,8 @@ module PointProcesses
 using DensityInterface: DensityInterface, HasDensity, densityof, logdensityof
 using Distributions: Distributions, UnivariateDistribution, MultivariateDistribution
 using Distributions: Categorical, Exponential, Poisson, Uniform, Dirac
-using Distributions: fit, suffstats, probs, mean
-using LinearAlgebra: dot
+using Distributions: fit, suffstats, probs, mean, support
+using LinearAlgebra: dot, diagm
 using Random: rand
 using Random: AbstractRNG, default_rng
 using StatsAPI: StatsAPI, fit
@@ -46,10 +46,15 @@ export simulate_ogata, simulate
 
 ## Models
 
+### Poisson processes
 export PoissonProcess
 export UnivariatePoissonProcess
 export MultivariatePoissonProcess, MultivariatePoissonProcessPrior
+
+### Hawkes processes
 export HawkesProcess
+export UnivariateHawkesProcess, UnmarkedUnivariateHawkesProcess
+export MultivariateHawkesProcess
 
 # Includes
 
@@ -65,5 +70,9 @@ include("poisson/fit.jl")
 include("poisson/simulation.jl")
 
 include("hawkes/hawkes_process.jl")
+include("hawkes/simulation.jl")
+include("hawkes/intensity.jl")
+include("hawkes/fit.jl")
+include("hawkes/time_change.jl")
 
 end
diff --git a/src/hawkes/fit.jl b/src/hawkes/fit.jl
index d15137c..892f849 100644
--- a/src/hawkes/fit.jl
+++ b/src/hawkes/fit.jl
@@ -1,5 +1,5 @@
 """
-    StatsAPI.fit(rng, ::Type{HawkesProcess{T}}, h::History; step_tol::Float64 = 1e-6, max_iter::Int = 1000) where {T<:Real}
+    StatsAPI.fit(rng, ::Type{UnmarkedUnivariateHawkesProcess{T}}, h::History; step_tol::Float64 = 1e-6, max_iter::Int = 1000) where {T<:Real}
 
 Expectation-Maximization algorithm from [E. Lewis, G. Mohler (2011)](https://arxiv.org/pdf/1801.08273)).
 The relevant calculations are in page 4, equations 6-13.
@@ -29,11 +29,11 @@ therefore, the interval of the process is transformed from T to N. Also, in equa
 
 """
 function StatsAPI.fit(
-    ::Type{UnivariateHawkesProcess{T}},
+    ::Type{<:UnmarkedUnivariateHawkesProcess{T}},
     h::History;
     step_tol::Float64=1e-6,
     max_iter::Int=1000,
-    rng::AbstractRNG=default_rng()
+    rng::AbstractRNG=default_rng(),
 ) where {T<:Real}
     n = nb_events(h)
     n == 0 && return HawkesProcess(zero(T), zero(T), zero(T))
@@ -100,8 +100,10 @@ function StatsAPI.fit(
     return HawkesProcess(μ * (n / tmax), ψ * ω * (n / tmax), ω * (n / tmax))
 end
 
-# # Type parameter for `HawkesProcess` was NOT explicitly provided
-# function StatsAPI.fit(HP::Type{HawkesProcess}, h::History{H,M}; kwargs...) where {H<:Real,M}
-#     T = promote_type(Float64, H)
-#     return fit(HP{T}, h; kwargs...)
-# end
+# Type parameter for `HawkesProcess` was NOT explicitly provided
+function StatsAPI.fit(
+    HP::Type{UnmarkedUnivariateHawkesProcess}, h::History{H,M}; kwargs...
+) where {H<:Real,M}
+    T = promote_type(Float64, H)
+    return fit(HP{T}, h; kwargs...)
+end
diff --git a/src/hawkes/hawkes_process.jl b/src/hawkes/hawkes_process.jl
index d30ca24..51cb379 100644
--- a/src/hawkes/hawkes_process.jl
+++ b/src/hawkes/hawkes_process.jl
@@ -1,116 +1,135 @@
 """
-    HawkesProcess{T<:Real,D}
+    HawkesProcess <: AbstractPointProcess
 
-Hawkes process with exponential decay kernel and mark distribution `D`.
+Common interface for all subtypes of `HawkesProcess`.
+"""
+abstract type HawkesProcess <: AbstractPointProcess end
+
+"""
+    UnivariateHawkesProcess{R<:Real,D} <: HawkesProcess
 
-A Hawkes process is a self-exciting point process where each event increases the probability
-of future events. The conditional intensity function is given by
+Univariate Hawkes process with exponential decay kernel and mark 
+distribution `D`.
 
-    λ(t) = μ + ∑_{tᵢ < t} f(mᵢ) exp(-g(mᵢ)(t - tᵢ)),
+Denote the events of the process by (tᵢ, mᵢ), where tᵢ is the event time
+and mᵢ ∈ M the corresponding mark. The conditional intensity function
+of the Hawkes process is given by
 
-where the sum is over all previous event times (tᵢ, mᵢ).
+λ(t) = μ + ∑_{tᵢ < t} α mᵢ exp(-ω (t - tᵢ)).
+
+Notice that the mark only affects the jump size.
 
 # Fields
-- `μ::T`: baseline intensity (immigration rate)
-- `f`: function f:M → R, where M is the space of marks. Jump size.
-- `g`: function f:M → R, where M is the space of marks. Decay rate.
+- `μ::R`: baseline intensity (immigration rate)
+- `α::R`: jump size
+- `ω::R`: decay rate.
+- `mark_dist::D`: distribution of marks
 """
-struct HawkesProcess{T<:Real,D} <: AbstractPointProcess
+struct UnivariateHawkesProcess{T<:Real,D} <: HawkesProcess
     μ::T
-    f
-    g
+    α::T
+    ω::T
     mark_dist::D
 end
 
-function Base.show(io::IO, hp::HawkesProcess)
-    mean_mark = mean(hp.mark_dist)
-    return print(io, "HawkesProcess($(hp.μ), $(hp.f(mean_mark)), $(hp.g(mean_mark)), $(hp.mark_dist))")
+function Base.show(io::IO, hp::UnivariateHawkesProcess)
+    return print(io, "UnivariateHawkesProcess($(hp.μ), $(hp.α), $(hp.ω), $(hp.mark_dist))")
 end
 
-## Alias 
 """
-    UnivariateHawkesProcess{R}
-
-Unmarked univariate temporal Hawkes process.
+    UnmarkedUnivariateHawkesProcess{R<:Real} <: HawkesProcess
 
-The conditional intensity function is given by
+Unmarked univariate Hawkes process with exponential decay kernel.
 
-    λ(t) = μ + ∑_{tᵢ < t} α exp(-β(t - tᵢ)).
+Denote the events of the process by tᵢ. The conditional intensity function
+of the Hawkes process is given by
 
-Equivalent to the general definition with f(m) = α and g(m) = ω.
+λ(t) = μ + ∑_{tᵢ < t} α exp(-ω (t - tᵢ)).
 
 # Fields
 - `μ::R`: baseline intensity (immigration rate)
-- `α::R`: Jump size.
-- `ω::R`: Decay rate.
+- `α::R`: jump size
+- `ω::R`: decay rate.
 
-Alias for `HawkesProcess{R,Dirac{Nothing}}`.
+Alias for UnivariateHawkesProcess{R, Dirac{Nothing}}.
 """
-const UnivariateHawkesProcess{R<:Real} = HawkesProcess{R,Dirac{Nothing}}
+const UnmarkedUnivariateHawkesProcess{R<:Real} = UnivariateHawkesProcess{R,Dirac{Nothing}}
 
-function Base.show(io::IO, pp::UnivariateHawkesProcess)
-    return print(io, "UnivariateHawkesProcess($(pp.μ), $(pp.f(nothing)), $(pp.g(nothing)))")
+function Base.show(io::IO, hp::UnmarkedUnivariateHawkesProcess)
+    return print(io, "UnmarkedUnivariateHawkesProcess$((hp.μ, hp.α, hp.ω))")
 end
 
 """
-    MultivariateHawkesProcess{R}
+    MultivariateHawkesProcess{R} <: HawkesProcess
 
-Unmarked multivariate temporal Hawkes process
+Unmarked multivariate temporal Hawkes process with exponential decay
 
-For a process with D marginal processes, the conditional intensity
-function of the k-th process is given by
+For a process with m = 1, 2, ..., M marginal processes, the conditional intensity
+function of the m-th process is given by
 
-    λₖ(t) = μₖ + ∑_{h=1,...,D} ∑_{tʰᵢ < t} αₖₕ exp(-βₖₕ(t - tʰᵢ)),
+    λₘ(t) = μₘ + ∑_{l=1,...,M} ∑_{tˡᵢ < t} αₘₗ exp(-βₘₗ(t - tˡᵢ)),
 
-where tʰᵢ is the i-th element in the h-th marginal process.
-μ is a vector of length D with elements μᵢ. α and ω are D×D
-matrices with elements αᵢⱼ and ωᵢⱼ.
+where tˡᵢ is the i-th element in the l-th marginal process.
+μ is a vector of length M with elements μₘ. α and ω are M×M
+matrices with elements αₘₗ and ωₘₗ.
+
+The process is represented as a marked process, where each marginal
+process m = 1, 2, ..., M is represented by events (tᵐᵢ, m), tᵐᵢ being
+the i-th element with mark m.
 
 # Fields
 - `μ::Vector{<:Real}`: baseline intensity (immigration rate)
 - `α::Matrix{<:Real}`: Jump size.
 - `ω::Matrix{<:Real}`: Decay rate.
-
-Alias for `HawkesProcess{R,Categorical{Float64,Vector{Float64}}}`.
 """
-const MultivariateHawkesProcess{R<:Real} = HawkesProcess{
-    R,Categorical{Float64,Vector{Float64}}
-}
-# The choice to impose the mark distribution Categorical{Float64,Vector{Float64}} was made on purpose
-## It is mainly due to the fact that Distributions.Categorical makes (most of the time) automatic conversions to Float64
-
-function Base.show(io::IO, pp::MultivariateHawkesProcess)
-    D = length(pp.mark_dist.p)
-    inds = [(i, j) for i in 1:D, j in 1:D]
-    return print(io, "MultivariateHawkesProcess\nμ = $(pp.μ .* pp.mark_dist.p)\nα = $(pp.f.(inds))\nω = $(pp.g.(inds))")
+struct MultivariateHawkesProcess{T<:Real} <: HawkesProcess
+    μ::T
+    α::Matrix{T}
+    ω::Matrix{T}
+    mark_dist::Categorical{Float64,Vector{Float64}} # To keep consistent with PoissonProcess. Helps in simulation.
 end
 
+function Base.show(io::IO, hp::MultivariateHawkesProcess{T}) where {T<:Real}
+    return print(
+        io,
+        "MultivariateHawkesProcess\nμ = $(T.(hp.μ .* probs(hp.mark_dist)))\nα = $(hp.α)\nω = $(hp.ω)",
+    )
+end
+
+Base.length(mh::MultivariateHawkesProcess) = size(mh.α)[1]
+
 ## Constructors
 ### UnivariatehawkesProcess
-function HawkesProcess(
-    μ::R1, α::R2, ω::R3; check_args::Bool=true
-    ) where {R1<:Real,R2<:Real,R3<:Real}
-    check_args && check_args_Hawkes(μ, α, ω)
-    R = promote_type(R1, R2, R3)
-    fα(_...) = R(α)
-    gω(_...) = R(ω)
-    return HawkesProcess(R(μ), fα, gω, Dirac(nothing))
+function HawkesProcess(μ::Real, α::Real, ω::Real, mark_dist; check_args::Bool=true)
+    check_args && check_args_Hawkes(μ, α, ω, mark_dist)
+    return UnivariateHawkesProcess(promote(μ, α, ω)..., mark_dist)
+end
+
+function HawkesProcess(μ::Real, α::Real, ω::Real; check_args::Bool=true)
+    return HawkesProcess(μ, α, ω, Dirac(nothing); check_args=check_args)
 end
 
 ### MultivariateHawkesProcess
 function HawkesProcess(
-    μ::Vector{R1}, α::Matrix{R2}, ω::Matrix{R3}; check_args::Bool=true
-    ) where {R1<:Real,R2<:Real,R3<:Real}
+    μ::Vector{<:Real}, α::Matrix{<:Real}, ω::Matrix{<:Real}; check_args::Bool=true
+)
     check_args && check_args_Hawkes(μ, α, ω)
-    R = promote_type(R1, R2, R3)
-    f((i, j)) = R(α[i, j])
-    g((i, j)) = R(ω[i, j])
-    return HawkesProcess(R(sum(μ)), f, g, Categorical(Float64.(μ / sum(μ))))
+    R = promote_type(eltype(μ), eltype(α), eltype(ω))
+    return MultivariateHawkesProcess(R(sum(μ)), R.(α), R.(ω), Categorical(μ / sum(μ)))
+end
+
+# For M independent marginal processes
+function HawkesProcess(
+    μ::Vector{<:Real}, α::Vector{<:Real}, ω::Vector{<:Real}; check_args::Bool=true
+)
+    check_args && check_args_Hawkes(μ, diagm(α), diagm(ω))
+    R = promote_type(eltype(μ), eltype(α), eltype(ω))
+    return MultivariateHawkesProcess(R(sum(μ)), R.(α), R.(ω), Categorical(μ / sum(μ)))
 end
 
-### Check check
-# Univariate Hawkes
-function check_args_Hawkes(μ::Real, α::Real, ω::Real)
+# Check args
+## Univariate Hawkes
+function check_args_Hawkes(μ::Real, α::Real, ω::Real, mark_dist)
     if any((μ, α, ω) .< 0)
         throw(
             DomainError(
@@ -119,91 +138,50 @@ function check_args_Hawkes(μ::Real, α::Real, ω::Real)
             ),
         )
     end
+    mean_α = mark_dist isa Dirac{Nothing} ? α : mean(mark_dist) * α
+    if mean_α >= ω
+        throw(
+            DomainError(
+                "α = $(mean_α), ω = $ω",
+                "HawkesProcess: mᵢα must be, on average, smaller than ω. Stability condition.",
+            ),
+        )
+    end
+    if !isa(mark_dist, Dirac{Nothing}) && minimum(mark_dist) < 0
+        throw(
+            DomainError(
+                "Mark distribution support = $((support(mark_dist).lb, support(mark_dist).ub))",
+                "HawkesProcess: Support of mark distribution must be contained in non-negative numbers",
+            ),
+        )
+    end
     return nothing
 end
 
-# Multivariate Hawkes
+## Multivariate Hawkes
 function check_args_Hawkes(μ::Vector{<:Real}, α::Matrix{<:Real}, ω::Matrix{<:Real})
-    if !(length(μ) == size(α)[1] && length(μ) == size(α)[2])
-        throw(DimansionMismatch("α must have size $(length(μ))×$(length(μ))")) 
+    if length(μ) != size(α)[2]
+        throw(DimansionMismatch("α must have size $(length(μ))×$(length(μ))"))
     end
-    if !(length(μ) == size(ω)[1] && length(μ) == size(ω)[2])
+    if length(μ) != size(ω)[2]
         throw(DimansionMismatch("ω must have size $(length(μ))×$(length(μ))"))
     end
-    if any(μ .< zero(μ))
-        throw(
-            DomainError(
-                "μ = $μ",
-                "HawkesProcess: the condition μ ≥ 0 is not satisfied for all dimensions.",
-            ),
-        )
-    end
-    if any(α .< zero(α))
+    if any(sum(α ./ ω; dims=1) .>= 1)
         throw(
             DomainError(
-                "α = $α",
-                "HawkesProcess: the condition α ≥ 0 is not satisfied for all dimensions.",
+                "α = $α, ω = $ω",
+                "HawkesProcess: Sum of α/β over each row must be smaller than 1. Stability condition.",
             ),
         )
     end
-    if any(ω .< zero(ω))
+    if any(μ .< zero(μ)) || any(α .< zero(α)) || any(ω .< zero(ω))
         throw(
             DomainError(
-                "ω = $ω",
-                "HawkesProcess: the condition ω ≥ 0 is not satisfied for all dimensions.",
+                "μ = $μ, α = $α, ω = $ω",
+                "HawkesProcess: All elements of μ, α and ω must be non-negative.",
             ),
         )
     end
+    ω[α .== 0] .= 1 # Protects against division by 0 in simulation
     return nothing
 end
-
-## Access
-function ground_intensity(hp::HawkesProcess, h::History, t)
-    activation = sum(exp.(hp.ω .* (@view h.times[1:(searchsortedfirst(h.times, t) - 1)])))
-    return hp.μ + (hp.α * activation / exp(hp.ω * t))
-end
-
-function integrated_ground_intensity(hp::HawkesProcess, h::History, tmin, tmax)
-    times = event_times(h, h.tmin, tmax)
-    integral = 0
-    for ti in times
-        # Integral of activation function. 'max(tmin - ti, 0)' corrects for events that occurred
-        # inside or outside the interval [tmin, tmax].
-        integral += (exp(-hp.ω * max(tmin - ti, 0)) - exp(-hp.ω * (tmax - ti)))
-    end
-    integral *= hp.α / hp.ω
-    integral += hp.μ * (tmax - tmin) # Integral of base rate
-    return integral
-end
-
-function DensityInterface.logdensityof(hp::HawkesProcess, h::History)
-    A = zeros(nb_events(h)) # Vector A in Ozaki (1979)
-    for i in 2:nb_events(h)
-        A[i] = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (1 + A[i - 1])
-    end
-    return sum(log.(hp.μ .+ (hp.α .* A))) - # Value of intensity at each event
-           (hp.μ * duration(h)) - # Integral of base rate
-           ((hp.α / hp.ω) * sum(1 .- exp.(-hp.ω .* (duration(h) .- h.times)))) # Integral of each kernel
-end
-
-function time_change(hp::HawkesProcess, h::History{T,M}) where {T<:Real,M}
-    n = nb_events(h)
-    A = zeros(T, n + 1) # Array A in Ozaki (1979)
-    @inbounds for i in 2:n
-        A[i] = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (1 + A[i - 1])
-    end
-    A[end] = exp(-hp.ω * (h.tmax - h.times[end])) * (1 + A[end - 1]) # Used to calculate the integral of the intensity at every event time
-    times = T.(hp.μ .* (h.times .- h.tmin)) # Transformation with respect to base rate
-    T_base = hp.μ * duration(h) # Contribution of base rate to total length of time re-scaled process
-    for i in eachindex(times)
-        times[i] += (hp.α / hp.ω) * ((i - 1) - A[i]) # Add contribution of activation functions
-    end
-    return History(;
-        times=times,
-        marks=h.marks,
-        tmin=zero(T),
-        tmax=T(T_base + ((hp.α / hp.ω) * (n - A[end]))),
-        check=false,
-    ) # A time re-scaled process starts at t=0
-end
-
diff --git a/src/hawkes/intensity.jl b/src/hawkes/intensity.jl
new file mode 100644
index 0000000..eea94c6
--- /dev/null
+++ b/src/hawkes/intensity.jl
@@ -0,0 +1,63 @@
+# Ground intensity
+function ground_intensity(hp::UnmarkedUnivariateHawkesProcess, h::History, t)
+    times = event_times(h, h.tmin, t)
+    activation = sum(hp.α .* exp.(hp.ω .* times))
+    return hp.μ + (activation / exp(hp.ω * t))
+end
+
+function integrated_ground_intensity(
+    hp::UnmarkedUnivariateHawkesProcess, h::History, tmin, tmax
+)
+    times = event_times(h, h.tmin, tmax)
+    integral = 0
+    for ti in times
+        # Integral of activation function. 'max(tmin - ti, 0)' corrects for events that occurred
+        # inside or outside the interval [tmin, tmax].
+        integral += (exp(-hp.ω * max(tmin - ti, 0)) - exp(-hp.ω * (tmax - ti)))
+    end
+    integral *= hp.α / hp.ω
+    integral += hp.μ * (tmax - tmin) # Integral of base rate
+    return integral
+end
+
+function DensityInterface.logdensityof(hp::UnmarkedUnivariateHawkesProcess, h::History)
+    A = zeros(nb_events(h)) # Vector A in Ozaki (1979)
+    for i in 2:nb_events(h)
+        A[i] = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (1 + A[i - 1])
+    end
+    return sum(log.(hp.μ .+ (hp.α .* A))) - # Value of intensity at each event
+           (hp.μ * duration(h)) - # Integral of base rate
+           ((hp.α / hp.ω) * sum(1 .- exp.(-hp.ω .* (duration(h) .- h.times)))) # Integral of each kernel
+end
+
+function ground_intensity(hp::UnivariateHawkesProcess, h::History, t)
+    idx = searchsortedfirst(h.times, t) - 1
+    times = (@view h.times[1:idx])
+    marks = (@view h.marks[1:idx])
+    activation = sum(hp.α .* marks .* exp.(hp.ω .* times))
+    return hp.μ + (activation / exp(hp.ω * t))
+end
+
+function integrated_ground_intensity(hp::UnivariateHawkesProcess, h::History, tmin, tmax)
+    times = event_times(h, h.tmin, tmax)
+    marks = event_marks(h, h.tmin, tmax)
+    integral = 0
+    for (ti, mi) in zip(times, marks)
+        # Integral of activation function. 'max(tmin - ti, 0)' corrects for events that occurred
+        # inside or outside the interval [tmin, tmax].
+        integral += mi * (exp(-hp.ω * max(tmin - ti, 0)) - exp(-hp.ω * (tmax - ti)))
+    end
+    integral *= hp.α / hp.ω
+    integral += hp.μ * (tmax - tmin) # Integral of base rate
+    return integral
+end
+
+function DensityInterface.logdensityof(hp::UnivariateHawkesProcess, h::History)
+    A = zeros(nb_events(h)) # Vector A in Ozaki (1979)
+    for i in 2:nb_events(h)
+        A[i] = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (h.marks[i - 1] + A[i - 1])
+    end
+    return sum(log.(hp.μ .+ (hp.α .* A))) - # Value of intensity at each event
+           (hp.μ * duration(h)) - # Integral of base rate
+           ((hp.α / hp.ω) * sum(h.marks .* (1 .- exp.(-hp.ω .* (duration(h) .- h.times))))) # Integral of each kernel
+end
diff --git a/src/hawkes/simulation.jl b/src/hawkes/simulation.jl
index bd3756c..2e0e436 100644
--- a/src/hawkes/simulation.jl
+++ b/src/hawkes/simulation.jl
@@ -1,9 +1,14 @@
-function simulate(rng::AbstractRNG, hp::HawkesProcess, tmin, tmax)
-    sim = simulate_poisson_times(rng, hp.μ, tmin, tmax) # Simulate Poisson process with base rate
-    sim_desc = generate_descendants(rng, sim, tmax, hp.α, hp.ω) # Recursively generates descendants from first events
-    append!(sim, sim_desc)
-    sort!(sim)
-    return History(; times=sim, tmin=tmin, tmax=tmax, check=false)
+function simulate(rng::AbstractRNG, hp::HawkesProcess, tmin::Real, tmax::Real)
+    h = simulate(rng, PoissonProcess(hp.μ, hp.mark_dist), tmin, tmax)
+    sim_desc = generate_descendants!(rng, h, hp, tmax) # Recursively generates descendants from first events
+    perm = sortperm(h.times)
+    h.times .= h.times[perm]
+    h.marks .= h.marks[perm]
+    return h
+end
+
+function simulate(hp::HawkesProcess, tmin::Real, tmax::Real)
+    simulate(default_rng(), hp, tmin, tmax)
 end
 
 #=
@@ -14,22 +19,44 @@ with intensity λ(t) = α exp(-ω(t - t_g)) with the inverse method.
 gen_{n+1} is the set of all events simulated from all events in gen_n.
 The algorithm stops when the simulation from one generation results in no further events.
 =#
-function generate_descendants(
-    rng::AbstractRNG, immigrants::Vector{T}, tmax, α, ω
-) where {T<:Real}
-    descendants = T[]
-    next_gen = immigrants
-    while !isempty(next_gen)
-        # OPTIMIZE: Can this be improved by avoiding allocations of `curr_gen` and `next_gen`? Or does the compiler take care of that?
-        curr_gen = copy(next_gen) # The current generation from which we simulate the next one
-        next_gen = eltype(immigrants)[] # Gathers all the descendants from the current generation
-        for parent in curr_gen # Generate the descendants for each individual event with the inverse method
-            activation_integral = (α / ω) * (one(T) - exp(ω * (parent - tmax)))
-            sim_transf = simulate_poisson_times(rng, one(T), zero(T), activation_integral)
-            @. sim_transf = parent - (inv(ω) * log(one(T) - ((ω / α) * sim_transf))) # Inverse of integral of the activation function
-            append!(next_gen, sim_transf)
+function generate_descendants!(
+    rng::AbstractRNG, h::History{T,M}, hp::HawkesProcess, tmax
+) where {T<:Real,M}
+    gen_start = 1
+    gen_end = nb_events(h)
+    while gen_start <= gen_end # Last generation is not empty
+        for parent_idx in gen_start:gen_end
+            one_parent!(rng, h, hp, h[parent_idx])
         end
-        append!(descendants, next_gen)
+        gen_start = gen_end + 1
+        gen_end = nb_events(h)
+    end
+    return nothing
+end
+
+function one_parent!(
+    rng::AbstractRNG, h::History{T}, hp::UnivariateHawkesProcess, parent
+) where {T<:Real}
+    α = isnothing(parent[2]) ? hp.α : parent[2] * hp.α
+    activation_integral = (α / hp.ω) * (one(T) - exp(hp.ω * (parent[1] - h.tmax)))
+    sim_transf = simulate_poisson_times(rng, one(T), zero(T), activation_integral)
+    @. sim_transf = parent[1] - (inv(hp.ω) * log(one(T) - ((hp.ω / α) * sim_transf))) # Inverse of integral of the activation function
+    append!(h.times, sim_transf)
+    append!(h.marks, [rand(rng, hp.mark_dist) for _ in 1:length(sim_transf)])
+    return nothing
+end
+
+function one_parent!(
+    rng::AbstractRNG, h::History{T}, mh::MultivariateHawkesProcess, parent
+) where {T<:Real}
+    for m in 1:length(mh)
+        α = mh.α[m, parent[2]]
+        ω = mh.ω[m, parent[2]]
+        activation_integral = (α / ω) * (one(T) - exp(ω * (parent[1] - h.tmax)))
+        sim_transf = simulate_poisson_times(rng, one(T), zero(T), activation_integral)
+        @. sim_transf = parent[1] - (inv(ω) * log(one(T) - ((ω / α) * sim_transf))) # Inverse of integral of the activation function
+        append!(h.times, sim_transf)
+        append!(h.marks, fill(m, length(sim_transf)))
     end
-    return descendants
+    return nothing
 end
diff --git a/src/hawkes/time_change.jl b/src/hawkes/time_change.jl
new file mode 100644
index 0000000..b04f014
--- /dev/null
+++ b/src/hawkes/time_change.jl
@@ -0,0 +1,20 @@
+function time_change(hp::UnmarkedUnivariateHawkesProcess, h::History{T,M}) where {T<:Real,M}
+    n = nb_events(h)
+    A = zeros(T, n + 1) # Array A in Ozaki (1979)
+    @inbounds for i in 2:n
+        A[i] = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (1 + A[i - 1])
+    end
+    A[end] = exp(-hp.ω * (h.tmax - h.times[end])) * (1 + A[end - 1]) # Used to calculate the integral of the intensity at every event time
+    times = T.(hp.μ .* (h.times .- h.tmin)) # Transformation with respect to base rate
+    T_base = hp.μ * duration(h) # Contribution of base rate to total length of time re-scaled process
+    for i in eachindex(times)
+        times[i] += (hp.α / hp.ω) * ((i - 1) - A[i]) # Add contribution of activation functions
+    end
+    return History(;
+        times=times,
+        marks=h.marks,
+        tmin=zero(T),
+        tmax=T(T_base + ((hp.α / hp.ω) * (n - A[end]))),
+        check_args=false,
+    ) # A time re-scaled process starts at t=0
+end
diff --git a/src/history.jl b/src/history.jl
index 2e92c46..7121d7f 100644
--- a/src/history.jl
+++ b/src/history.jl
@@ -16,8 +16,8 @@ struct History{T<:Real,M}
     tmax::T
     marks::Vector{M}
 
-    function History(times, tmin, tmax, marks=fill(nothing, length(times)); check=true)
-        if check
+    function History(times, tmin, tmax, marks=fill(nothing, length(times)); check_args=true)
+        if check_args
             if tmin >= tmax
                 throw(
                     DomainError(
@@ -49,8 +49,8 @@ struct History{T<:Real,M}
     end
 end
 
-function History(; times, tmin, tmax, marks=fill(nothing, length(times)), check=true)
-    History(times, tmin, tmax, marks; check)
+function History(; times, tmin, tmax, marks=fill(nothing, length(times)), check_args=true)
+    History(times, tmin, tmax, marks; check_args)
 end
 
 function Base.show(io::IO, h::History{T,M}) where {T,M}
@@ -59,6 +59,19 @@ function Base.show(io::IO, h::History{T,M}) where {T,M}
     )
 end
 
+"""
+    length(h)
+
+Alias for `nb_events(h)`.
+"""
+Base.length(h::History) = nb_events(h)
+
+Base.firstindex(h::History) = 1
+Base.lastindex(h::History) = length(h)
+Base.getindex(h::History, i::Int) = (h.times[i], h.marks[i])
+Base.getindex(h::History, I::Vararg{Int}) = [(h.times[i], h.marks[i]) for i in I]
+Base.getindex(h::History, I::AbstractVector{<:Int}) = getindex(h, I...)
+
 """
     event_times(h)
 
@@ -112,13 +125,6 @@ Count events in `h`.
 """
 nb_events(h::History) = length(h.marks)
 
-"""
-    length(h)
-
-Alias for `nb_events(h)`.
-"""
-Base.length(h::History) = nb_events(h)
-
 """
     nb_events(h, tmin, tmax)
 
@@ -156,8 +162,8 @@ duration(h::History) = max_time(h) - min_time(h)
 
 Add event `(t, m)` inside the interval `[h.tmin, h.tmax)` at the end of history `h`.
 """
-function Base.push!(h::History, t::Real, m; check=true)
-    if check
+function Base.push!(h::History, t::Real, m; check_args=true)
+    if check_args
         @assert h.tmin <= t < h.tmax
         @assert (length(h) == 0) || (h.times[end] <= t)
     end
@@ -171,8 +177,8 @@ end
 
 Append events `(ts, ms)` inside the interval `[h.tmin, h.tmax)` at the end of history `h`.
 """
-function Base.append!(h::History, ts::Vector{<:Real}, ms; check=true)
-    if check
+function Base.append!(h::History, ts::Vector{<:Real}, ms; check_args=true)
+    if check_args
         perm = sortperm(ts)
         ts .= ts[perm]
         ms .= ms[perm]
@@ -200,7 +206,7 @@ function Base.cat(h1::History, h2::History)
     )
     times = [h1.times; h2.times]
     marks = [h1.marks; h2.marks]
-    return History(; times=times, tmin=h1.tmin, tmax=h2.tmax, marks=marks, check=false)
+    return History(; times=times, tmin=h1.tmin, tmax=h2.tmax, marks=marks, check_args=false)
 end
 
 """
@@ -213,7 +219,9 @@ function time_change(h::History, Λ)
     new_marks = copy(event_marks(h))
     new_tmin = Λ(min_time(h))
     new_tmax = Λ(max_time(h))
-    return History(; times=new_times, marks=new_marks, tmin=new_tmin, tmax=new_tmax)
+    return History(;
+        times=new_times, marks=new_marks, tmin=new_tmin, tmax=new_tmax, check_args=false
+    )
 end
 
 """
@@ -230,7 +238,7 @@ function split_into_chunks(h::History{T,M}, chunk_duration) where {T,M}
     for (a, b) in zip(limits[1:(end - 1)], limits[2:end])
         times = [t for t in event_times(h) if a <= t < b]
         marks = [m for (t, m) in zip(event_times(h), event_marks(h)) if a <= t < b]
-        chunk = History(; times=times, marks=marks, tmin=a, tmax=b, check=false)
+        chunk = History(; times=times, marks=marks, tmin=a, tmax=b, check_args=false)
         push!(chunks, chunk)
     end
     return chunks
diff --git a/src/poisson/simulation.jl b/src/poisson/simulation.jl
index aa22d8d..e4a66c1 100644
--- a/src/poisson/simulation.jl
+++ b/src/poisson/simulation.jl
@@ -1,5 +1,5 @@
 function simulate(rng::AbstractRNG, pp::PoissonProcess, tmin::T, tmax::T) where {T<:Real}
     times = simulate_poisson_times(rng, ground_intensity(pp), tmin, tmax)
     marks = [rand(rng, mark_distribution(pp)) for _ in 1:length(times)]
-    return History(; times=times, marks=marks, tmin=tmin, tmax=tmax)
+    return History(; times=times, marks=marks, tmin=tmin, tmax=tmax, check_args=false)
 end
diff --git a/src/simulation.jl b/src/simulation.jl
index 2c39803..69b48c5 100644
--- a/src/simulation.jl
+++ b/src/simulation.jl
@@ -5,6 +5,7 @@ Simulate the event times of a homogeneous Poisson process with parameter λ on t
 Internal function to use in all other simulation algorithms.
 =#
 function simulate_poisson_times(rng::AbstractRNG, λ, tmin, tmax)
+    λ == 0 && return Float64[]
     N = rand(rng, Poisson(λ * (tmax - tmin)))
     times = [rand(rng, Uniform(tmin, tmax)) for _ in 1:N] # rand(rng, Uniform(tmin, tmax), N) always outputs a `Float64`
     sort!(times)
@@ -19,9 +20,8 @@ Simulate a temporal point process `pp` on interval `[tmin, tmax)` using Ogata's
 # Technical Remark
 To infer the type of the marks, the implementation assumes that there is method of `mark_distribution` without the argument `h` such that it corresponds to the distribution of marks in case the history is empty.
 """
-function simulate_ogata(
-    rng::AbstractRNG, pp::AbstractPointProcess, tmin::T, tmax::T
-) where {T<:Real}
+function simulate_ogata(rng::AbstractRNG, pp::AbstractPointProcess, tmin::Real, tmax::Real)
+    T = promote_type(typeof(tmin), typeof(tmax))
     M = typeof(rand(mark_distribution(pp, tmin)))
     h = History(; times=T[], marks=M[], tmin=tmin, tmax=tmax)
     t = tmin
@@ -36,7 +36,7 @@ function simulate_ogata(
             if U < U_max
                 m = rand(rng, mark_distribution(pp, t + τ, h))
                 if t + τ < tmax
-                    push!(h, t + τ, m; check=false)
+                    push!(h, t + τ, m; check_args=false)
                 end
             end
             t = t + τ
diff --git a/test/hawkes.jl b/test/hawkes.jl
index 781b3b6..f6a4002 100644
--- a/test/hawkes.jl
+++ b/test/hawkes.jl
@@ -1,6 +1,6 @@
 # Constructor
-@test HawkesProcess(1, 1, 2) isa HawkesProcess{Int}
-@test HawkesProcess(1, 1, 2.0) isa HawkesProcess{Float64}
+@test HawkesProcess(1, 1, 2) isa UnmarkedUnivariateHawkesProcess{Int64}
+@test HawkesProcess(1, 1, 2.0) isa UnmarkedUnivariateHawkesProcess{Float64}
 @test_throws DomainError HawkesProcess(1, 1, 1)
 @test_throws DomainError HawkesProcess(-1, 1, 2)
 
@@ -44,12 +44,17 @@ Random.seed!(123)
 params_true = (100.0, 100.0, 200.0)
 model = HawkesProcess(params_true...)
 h_sim = simulate(model, 0.0, 50.0)
-model_est = fit(HawkesProcess, h_sim)
+model_est = fit(UnmarkedUnivariateHawkesProcess, h_sim)
 params_est = (model_est.μ, model_est.α, model_est.ω)
 @test isa(model_est, HawkesProcess)
 @test all((params_true .* 0.9) .<= params_est .<= (params_true .* 1.1))
-@test isa(fit(HawkesProcess, h_big), HawkesProcess{BigFloat})
-@test isa(fit(HawkesProcess{Float32}, h_big), HawkesProcess{Float32})
+@test isa(
+    fit(UnmarkedUnivariateHawkesProcess, h_big), UnmarkedUnivariateHawkesProcess{BigFloat}
+)
+@test isa(
+    fit(UnmarkedUnivariateHawkesProcess{Float32}, h_big),
+    UnmarkedUnivariateHawkesProcess{Float32},
+)
 
 # logdensityof
 @test logdensityof(hp, h) ≈

From 658c36410215fb8d114f8181d207d60b8341a6ad Mon Sep 17 00:00:00 2001
From: jkling <jose.kling@pm.me>
Date: Thu, 4 Dec 2025 10:34:33 +0100
Subject: [PATCH 3/4] Univariate completed

Changed file structure so each type of Hawkes process is in its own
folder.
`Intensity`, `simulation`, `time_change` and `fit` implemented for univariate
processes.
Missing `time_change` and `fit` for multivariate processes.
---
 docs/src/api.md                               |   2 +-
 src/PointProcesses.jl                         |  18 +-
 src/hawkes/fit.jl                             |  72 ++-----
 src/hawkes/hawkes_process.jl                  | 190 +-----------------
 src/hawkes/intensity.jl                       | 143 ++++++++-----
 src/hawkes/multivariate/intensity.jl          |  51 +++++
 .../multivariate/multivariate_hawkes.jl       |  90 +++++++++
 src/hawkes/multivariate/simulation.jl         |  10 +
 src/hawkes/simulation.jl                      |  50 ++---
 src/hawkes/time_change.jl                     |  20 --
 src/hawkes/univariate/fit.jl                  |  41 ++++
 src/hawkes/univariate/intensity.jl            |  44 ++++
 src/hawkes/univariate/simulation.jl           |   8 +
 src/hawkes/univariate/time_change.jl          |  27 +++
 src/hawkes/univariate/univariate_hawkes.jl    |  51 +++++
 src/hawkes/unmarked_univariate/fit.jl         |  55 +++++
 src/hawkes/unmarked_univariate/intensity.jl   |  34 ++++
 src/hawkes/unmarked_univariate/simulation.jl  |   8 +
 src/hawkes/unmarked_univariate/time_change.jl |  22 ++
 .../unmarked_univariate_hawkes.jl             |  50 +++++
 src/simulation.jl                             |   2 +-
 test/bounded_point_process.jl                 |   8 +-
 test/hawkes.jl                                | 132 +++++++++---
 23 files changed, 762 insertions(+), 366 deletions(-)
 create mode 100644 src/hawkes/multivariate/intensity.jl
 create mode 100644 src/hawkes/multivariate/multivariate_hawkes.jl
 create mode 100644 src/hawkes/multivariate/simulation.jl
 delete mode 100644 src/hawkes/time_change.jl
 create mode 100644 src/hawkes/univariate/fit.jl
 create mode 100644 src/hawkes/univariate/intensity.jl
 create mode 100644 src/hawkes/univariate/simulation.jl
 create mode 100644 src/hawkes/univariate/time_change.jl
 create mode 100644 src/hawkes/univariate/univariate_hawkes.jl
 create mode 100644 src/hawkes/unmarked_univariate/fit.jl
 create mode 100644 src/hawkes/unmarked_univariate/intensity.jl
 create mode 100644 src/hawkes/unmarked_univariate/simulation.jl
 create mode 100644 src/hawkes/unmarked_univariate/time_change.jl
 create mode 100644 src/hawkes/unmarked_univariate/unmarked_univariate_hawkes.jl

diff --git a/docs/src/api.md b/docs/src/api.md
index d657037..a2f7344 100644
--- a/docs/src/api.md
+++ b/docs/src/api.md
@@ -107,8 +107,8 @@ HawkesProcess
 ### Univariate
 
 ```@docs
-UnivariateHawkesProcess
 UnmarkedUnivariateHawkesProcess
+UnivariateHawkesProcess
 ```
 
 ### Multivariate
diff --git a/src/PointProcesses.jl b/src/PointProcesses.jl
index ca3ec72..3351567 100644
--- a/src/PointProcesses.jl
+++ b/src/PointProcesses.jl
@@ -10,7 +10,7 @@ module PointProcesses
 using DensityInterface: DensityInterface, HasDensity, densityof, logdensityof
 using Distributions: Distributions, UnivariateDistribution, MultivariateDistribution
 using Distributions: Categorical, Exponential, Poisson, Uniform, Dirac
-using Distributions: fit, suffstats, probs, mean, support
+using Distributions: fit, suffstats, probs, mean, support, pdf
 using LinearAlgebra: dot, diagm
 using Random: rand
 using Random: AbstractRNG, default_rng
@@ -73,6 +73,20 @@ include("hawkes/hawkes_process.jl")
 include("hawkes/simulation.jl")
 include("hawkes/intensity.jl")
 include("hawkes/fit.jl")
-include("hawkes/time_change.jl")
+include("hawkes/unmarked_univariate/unmarked_univariate_hawkes.jl")
+include("hawkes/unmarked_univariate/intensity.jl")
+include("hawkes/unmarked_univariate/time_change.jl")
+include("hawkes/unmarked_univariate/fit.jl")
+include("hawkes/unmarked_univariate/simulation.jl")
+include("hawkes/univariate/univariate_hawkes.jl")
+include("hawkes/univariate/intensity.jl")
+include("hawkes/univariate/time_change.jl")
+include("hawkes/univariate/fit.jl")
+include("hawkes/univariate/simulation.jl")
+include("hawkes/multivariate/multivariate_hawkes.jl")
+include("hawkes/multivariate/intensity.jl")
+# include("hawkes/multivariate/time_change.jl")
+# include("hawkes/multivariate/fit.jl")
+include("hawkes/multivariate/simulation.jl")
 
 end
diff --git a/src/hawkes/fit.jl b/src/hawkes/fit.jl
index 892f849..f1f0a15 100644
--- a/src/hawkes/fit.jl
+++ b/src/hawkes/fit.jl
@@ -1,50 +1,29 @@
-"""
-    StatsAPI.fit(rng, ::Type{UnmarkedUnivariateHawkesProcess{T}}, h::History; step_tol::Float64 = 1e-6, max_iter::Int = 1000) where {T<:Real}
-
-Expectation-Maximization algorithm from [E. Lewis, G. Mohler (2011)](https://arxiv.org/pdf/1801.08273)).
-The relevant calculations are in page 4, equations 6-13.
-
-Let (t₁ < ... < tₙ) be the event times over the interval [0, T). We use the immigrant-descendant representation,
-where immigrants arrive at a constant base rate μ and each each arrival may generate descendants following the
-activation function α exp(-ω(t - tᵢ)).
-
-The algorithm consists in the following steps:
-1. Start with some initial guess for the parameters μ, ψ, and ω. ψ = α ω is the branching factor.
-2. Calculate λ(tᵢ; μ, ψ, ω) (`lambda_ts` in the code) using the procedure in [Ozaki (1979)](https://doi.org/10.1007/bf02480272).
-3. For each tᵢ and each j < i, calculate Dᵢⱼ = P(tᵢ is a descendant of tⱼ) as
-
-    Dᵢⱼ = ψ ω exp(-ω(tᵢ - tⱼ)) / λ(tᵢ; μ, ψ, ω).
-
-    Define D = ∑_{j < i} Dᵢⱼ (expected number of descendants) and div = ∑_{j < i} (tᵢ - tⱼ) Dᵢⱼ. 
-4. Update the parameters as
-        μ = (N - D) / T
-        ψ = D / N
-        ω = D / div
-5. If convergence criterion is met, return updated parameters, otherwise, back to step 2.
-
-Notice that, in the implementation, the process is normalized so the average inter-event time is equal to 1 and, 
-therefore, the interval of the process is transformed from T to N. Also, in equation (8) in the paper,
-
-∑_{i=1:n} pᵢᵢ = ∑_{i=1:n} (1 - ∑_{j < i} Dᵢⱼ) = N - D.
-
-"""
-function StatsAPI.fit(
-    ::Type{<:UnmarkedUnivariateHawkesProcess{T}},
-    h::History;
+#=
+Method for fitting the parameters of marked and unmarked Hawkes
+processes. For unmarked processes, the marks in `history` must
+be either all equal to `nothing` or equal to 1.
+=#
+function fit_hawkes_params(
+    ::Type{R},
+    h::History,
+    div_ψ::Real;
     step_tol::Float64=1e-6,
     max_iter::Int=1000,
     rng::AbstractRNG=default_rng(),
-) where {T<:Real}
+) where {R<:Real}
+    T = float(R)
+    div_ψ = T(div_ψ)
     n = nb_events(h)
-    n == 0 && return HawkesProcess(zero(T), zero(T), zero(T))
+    nT = T(n)
+    n == 0 && return (zero(T), zero(T), zero(T))
 
     tmax = T(duration(h))
     # Normalize times so average inter-event time is 1 (T -> n)
     norm_ts = T.(h.times .* (n / tmax))
 
     # preallocate
-    A = zeros(T, n)            # A[i] = sum_{j<i} exp(-ω (t_i - t_j))
-    S = zeros(T, n)            # S[i] = sum_{j<i} (t_i - t_j) exp(-ω (t_i - t_j))
+    A = zeros(T, n)            # A[i] = sum_{j<i} mⱼ exp(-ω (t_i - t_j))
+    S = zeros(T, n)            # S[i] = sum_{j<i} mⱼ (t_i - t_j) exp(-ω (t_i - t_j))
     lambda_ts = similar(A)     # λ_i
 
     # Λ(T) ≈ n after normalization
@@ -65,9 +44,8 @@ function StatsAPI.fit(
         for i in 2:n
             Δ = norm_ts[i] - norm_ts[i - 1]
             e = exp(-ω * Δ)
-            Ai_1 = A[i - 1]
-            A[i] = e * (one(T) + Ai_1)
-            S[i] = e * (S[i - 1] + Δ * (one(T) + Ai_1))
+            A[i] = update_A(A[i - 1], e, h.marks[i - 1]) # Different updates for real marks and no marks
+            S[i] = (Δ * A[i]) + (e * S[i - 1])
             lambda_ts[i] = μ + (ψ * ω) * A[i]
         end
 
@@ -81,8 +59,8 @@ function StatsAPI.fit(
         end
 
         # Steps 4–5: M-step updates + convergence check
-        new_μ = one(T) - (D / n)
-        new_ψ = D / n
+        new_μ = one(T) - (D / nT)
+        new_ψ = D / div_ψ
         new_ω = D / (div + eps(T))   # small guard to avoid div=0
 
         step = max(abs(μ - new_μ), abs(ψ - new_ψ), abs(ω - new_ω))
@@ -97,13 +75,5 @@ function StatsAPI.fit(
 
     # Unnormalize back to original time scale (T -> tmax):
     # parameters in normalized space (') relate to original by μ0=μ'*(n/tmax), ω0=ω'*(n/tmax), α0=(ψ'ω')*(n/tmax)
-    return HawkesProcess(μ * (n / tmax), ψ * ω * (n / tmax), ω * (n / tmax))
-end
-
-# Type parameter for `HawkesProcess` was NOT explicitly provided
-function StatsAPI.fit(
-    HP::Type{UnmarkedUnivariateHawkesProcess}, h::History{H,M}; kwargs...
-) where {H<:Real,M}
-    T = promote_type(Float64, H)
-    return fit(HP{T}, h; kwargs...)
+    return (μ * (nT / tmax), ψ * ω * (nT / tmax), ω * (nT / tmax))
 end
diff --git a/src/hawkes/hawkes_process.jl b/src/hawkes/hawkes_process.jl
index 51cb379..f19e3b9 100644
--- a/src/hawkes/hawkes_process.jl
+++ b/src/hawkes/hawkes_process.jl
@@ -5,183 +5,13 @@ Common interface for all subtypes of `HawkesProcess`.
 """
 abstract type HawkesProcess <: AbstractPointProcess end
 
-"""
-    UnivariateHawkesProcess{R<:Real,D} <: HawkesProcess
-
-Univariate Hawkes process with exponential decay kernel and mark 
-distribution `D`.
-
-Denote the events of the process by (tᵢ, mᵢ), where tᵢ is the event time
-and mᵢ ∈ M the corresponding mark. The conditional intensity function
-of the Hawkes process is given by
-
-λ(t) = μ + ∑_{tᵢ < t} α mᵢ exp(-ω (t - tᵢ)).
-
-Notice that the mark only affects the jump size.
-
-# Fields
-- `μ::R`: baseline intensity (immigration rate)
-- `α::R`: jump size
-- `ω::R`: decay rate.
-- `mark_dist::D`: distribution of marks
-"""
-struct UnivariateHawkesProcess{T<:Real,D} <: HawkesProcess
-    μ::T
-    α::T
-    ω::T
-    mark_dist::D
-end
-
-function Base.show(io::IO, hp::UnivariateHawkesProcess)
-    return print(io, "UnivariateHawkesProcess($(hp.μ), $(hp.α), $(hp.ω), $(hp.mark_dist))")
-end
-
-"""
-    UnmarkedUnivariateHawkesProcess{R<:Real} <: HawkesProcess
-
-Unmarked univariate Hawkes process with exponential decay kernel.
-
-Denote the events of the process by tᵢ. The conditional intensity function
-of the Hawkes process is given by
-
-λ(t) = μ + ∑_{tᵢ < t} α exp(-ω (t - tᵢ)).
-
-# Fields
-- `μ::R`: baseline intensity (immigration rate)
-- `α::R`: jump size
-- `ω::R`: decay rate.
-
-Alias for UnivariateHawkesProcess{R, Dirac{Nothing}}.
-"""
-const UnmarkedUnivariateHawkesProcess{R<:Real} = UnivariateHawkesProcess{R,Dirac{Nothing}}
-
-function Base.show(io::IO, hp::UnmarkedUnivariateHawkesProcess)
-    return print(io, "UnmarkedUnivariateHawkesProcess$((hp.μ, hp.α, hp.ω))")
-end
-
-"""
-    MultivariateHawkesProcess{R} <: HawkesProcess
-
-Unmarked multivariate temporal Hawkes process with exponential decay
-
-For a process with m = 1, 2, ..., M marginal processes, the conditional intensity
-function of the m-th process is given by
-
-    λₘ(t) = μₘ + ∑_{l=1,...,M} ∑_{tˡᵢ < t} αₘₗ exp(-βₘₗ(t - tˡᵢ)),
-
-where tˡᵢ is the i-th element in the l-th marginal process.
-μ is a vector of length M with elements μₘ. α and ω are M×M
-matrices with elements αₘₗ and ωₘₗ.
-
-The process is represented as a marked process, where each marginal
-process m = 1, 2, ..., M is represented by events (tᵐᵢ, m), tᵐᵢ being
-the i-th element with mark m.
-
-# Fields
-- `μ::Vector{<:Real}`: baseline intensity (immigration rate)
-- `α::Matrix{<:Real}`: Jump size.
-- `ω::Matrix{<:Real}`: Decay rate.
-"""
-struct MultivariateHawkesProcess{T<:Real} <: HawkesProcess
-    μ::T
-    α::Matrix{T}
-    ω::Matrix{T}
-    mark_dist::Categorical{Float64,Vector{Float64}} # To keep consistent with PoissonProcess. Helps in simulation.
-end
-
-function Base.show(io::IO, hp::MultivariateHawkesProcess{T}) where {T<:Real}
-    return print(
-        io,
-        "MultivariateHawkesProcess\nμ = $(T.(hp.μ .* probs(hp.mark_dist)))\nα = $(hp.α)\nω = $(hp.ω)",
-    )
-end
-
-Base.length(mh::MultivariateHawkesProcess) = size(mh.α)[1]
-
-## Constructors
-### UnivariatehawkesProcess
-function HawkesProcess(μ::Real, α::Real, ω::Real, mark_dist; check_args::Bool=true)
-    check_args && check_args_Hawkes(μ, α, ω, mark_dist)
-    return UnivariateHawkesProcess(promote(μ, α, ω)..., mark_dist)
-end
-
-function HawkesProcess(μ::Real, α::Real, ω::Real; check_args::Bool=true)
-    return HawkesProcess(μ, α, ω, Dirac(nothing); check_args=check_args)
-end
-
-### MultivariateHawkesProcess
-function HawkesProcess(
-    μ::Vector{<:Real}, α::Matrix{<:Real}, ω::Matrix{<:Real}; check_args::Bool=true
-)
-    check_args && check_args_Hawkes(μ, α, ω)
-    R = promote_type(eltype(μ), eltype(α), eltype(ω))
-    return MultivariateHawkesProcess(R(sum(μ)), R.(α), R.(ω), Categorical(μ / sum(μ)))
-end
-
-# For M independent marginal processes
-function HawkesProcess(
-    μ::Vector{<:Real}, α::Vector{<:Real}, ω::Vector{<:Real}; check_args::Bool=true
-)
-    check_args && check_args_Hawkes(μ, diagm(α), diagm(ω))
-    R = promote_type(eltype(μ), eltype(α), eltype(ω))
-    return MultivariateHawkesProcess(R(sum(μ)), R.(α), R.(ω), Categorical(μ / sum(μ)))
-end
-
-# Check args
-## Univariate Hawkes
-function check_args_Hawkes(μ::Real, α::Real, ω::Real, mark_dist)
-    if any((μ, α, ω) .< 0)
-        throw(
-            DomainError(
-                "μ = $μ, α = $α, ω = $ω",
-                "HawkesProcess: All parameters must be non-negative.",
-            ),
-        )
-    end
-    mean_α = mark_dist isa Dirac{Nothing} ? α : mean(mark_dist) * α
-    if mean_α >= ω
-        throw(
-            DomainError(
-                "α = $(mean_α), ω = $ω",
-                "HawkesProcess: mᵢα must be, on average, smaller than ω. Stability condition.",
-            ),
-        )
-    end
-    if !isa(mark_dist, Dirac{Nothing}) && minimum(mark_dist) < 0
-        throw(
-            DomainError(
-                "Mark distribution support = $((support(mark_dist).lb, support(mark_dist).ub))",
-                "HawkesProcess: Support of mark distribution must be contained in non-negative numbers",
-            ),
-        )
-    end
-    return nothing
-end
-
-## Multivariate Hawkes
-function check_args_Hawkes(μ::Vector{<:Real}, α::Matrix{<:Real}, ω::Matrix{<:Real})
-    if length(μ) != size(α)[2]
-        throw(DimansionMismatch("α must have size $(length(μ))×$(length(μ))"))
-    end
-    if length(μ) != size(ω)[2]
-        throw(DimansionMismatch("ω must have size $(length(μ))×$(length(μ))"))
-    end
-    if any(sum(α ./ ω; dims=1) .>= 1)
-        throw(
-            DomainError(
-                "α = $α, ω = $ω",
-                "HawkesProcess: Sum of α/β over each row must be smaller than 1. Stability condition.",
-            ),
-        )
-    end
-    if any(μ .< zero(μ)) || any(α .< zero(α)) || any(ω .< zero(ω))
-        throw(
-            DomainError(
-                "μ = $μ, α = $α, ω = $ω",
-                "HawkesProcess: All elements of μ, α and ω must be non-negative.",
-            ),
-        )
-    end
-    ω[α .== 0] .= 1 # Protects against division by 0 in simulation
-    return nothing
-end
+# #=
+# If type parameter for `HawkesProcess` was NOT explicitly provided,
+# use `Float64` as the standard type
+# =#
+# function StatsAPI.fit(
+#     HP::Type{<:HawkesProcess}, h::History{H,M}; kwargs...
+# ) where {H<:Real,M}
+#     T = promote_type(Float64, H)
+#     return fit(HP{T}, h; kwargs...)
+# end
diff --git a/src/hawkes/intensity.jl b/src/hawkes/intensity.jl
index eea94c6..9112f95 100644
--- a/src/hawkes/intensity.jl
+++ b/src/hawkes/intensity.jl
@@ -1,63 +1,104 @@
-# Ground intensity
-function ground_intensity(hp::UnmarkedUnivariateHawkesProcess, h::History, t)
-    times = event_times(h, h.tmin, t)
-    activation = sum(hp.α .* exp.(hp.ω .* times))
-    return hp.μ + (activation / exp(hp.ω * t))
-end
+#=
+Calculates the vector `A` as is [Ozaki (1979)](https://doi.org/10.1007/bf02480272)
+A[1] = 0
+A[i] = Σ_{tᵢ<tⱼ} exp(-ω(tᵢ - tⱼ))
+
+For marked processes, the calculation is slightly different
+A[1] = 0
+A[i] = Σ_{tᵢ<tⱼ} mᵢ exp(-ω(tᵢ - tⱼ)), where mᵢ is the i-th mark of the process
+
+This is used to efficiently calculate
 
-function integrated_ground_intensity(
-    hp::UnmarkedUnivariateHawkesProcess, h::History, tmin, tmax
-)
-    times = event_times(h, h.tmin, tmax)
-    integral = 0
-    for ti in times
-        # Integral of activation function. 'max(tmin - ti, 0)' corrects for events that occurred
-        # inside or outside the interval [tmin, tmax].
-        integral += (exp(-hp.ω * max(tmin - ti, 0)) - exp(-hp.ω * (tmax - ti)))
+    λ(tᵢ) = μ + α ∑_{j<i} mⱼ exp(-ω(tᵢ - tⱼ)) = μ + α A[i]
+=#
+function A_Ozaki(times, marks, ω)
+    A = zeros(length(times))
+    for i in 2:length(times)
+        A[i] = update_A(A[i - 1], exp(-ω * (times[i] - times[i - 1])), marks[i - 1])
     end
-    integral *= hp.α / hp.ω
-    integral += hp.μ * (tmax - tmin) # Integral of base rate
-    return integral
+    return A
 end
 
-function DensityInterface.logdensityof(hp::UnmarkedUnivariateHawkesProcess, h::History)
-    A = zeros(nb_events(h)) # Vector A in Ozaki (1979)
-    for i in 2:nb_events(h)
-        A[i] = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (1 + A[i - 1])
+#=
+Uses the idea from [Ozaki (1979)](https://doi.org/10.1007/bf02480272)
+to efficiently calculate the value of the intensity function at any
+time. If tₙ is the last event before time s, then
+
+    λ(s) = μ + α exp(-ω (s - tₙ)) (mₙ + A[n])
+
+The vector A is only calculated up to n.
+=#
+function A_Ozaki_ts(
+    times::AbstractVector{R1}, marks::AbstractVector{M}, ω::R2, t::R3
+) where {R1<:Real,R2<:Real,R3<:Real,M}
+    T = M == Nothing ? float(promote_type(R1, R2, R3)) : float(promote_type(R1, R2, R3, M))
+    A = zero(T)
+    (isempty(times) || t <= times[1]) && return A
+    ind_times = 2
+    while ind_times <= length(times) && times[ind_times] < t
+        A = update_A(
+            A, exp(-ω * (times[ind_times] - times[ind_times - 1])), marks[ind_times - 1]
+        )
+        ind_times += 1
     end
-    return sum(log.(hp.μ .+ (hp.α .* A))) - # Value of intensity at each event
-           (hp.μ * duration(h)) - # Integral of base rate
-           ((hp.α / hp.ω) * sum(1 .- exp.(-hp.ω .* (duration(h) .- h.times)))) # Integral of each kernel
+    A = update_A(A, exp(-ω * (t - times[ind_times - 1])), marks[ind_times - 1])
+    return A
 end
 
-function ground_intensity(hp::UnivariateHawkesProcess, h::History, t)
-    idx = searchsortedfirst(h.times, t) - 1
-    times = (@view h.times[1:idx])
-    marks = (@view h.marks[1:idx])
-    activation = sum(hp.α .* marks .* exp.(hp.ω .* times))
-    return hp.μ + (activation / exp(hp.ω * t))
+#=
+Uses the idea from [Ozaki (1979)](https://doi.org/10.1007/bf02480272)
+to efficiently calculate the values of the intensity function at any
+**ORDERED** set of times.
+=#
+function A_Ozaki_ts(
+    times::AbstractVector{R1}, marks::AbstractVector{M}, ω::R2, ts::AbstractVector{<:R3}
+) where {R1<:Real,R2<:Real,R3<:Real,M}
+    if M == Nothing
+        T = float(promote_type(R1, R2, R3))
+    else
+        T = float(promote_type(R1, R2, R3, M))
+    end
+    A = zeros(T, length(ts))
+    (isempty(times) || ts[end] <= times[1] || isempty(ts)) && return A
+    ind_A = 1
+    while ts[ind_A] <= times[1] # A[i] = 0 for all ts before the first event
+        ind_A += 1
+    end
+    ind_times = 2
+    while ind_A < length(ts)
+        while ind_times <= length(times) && times[ind_times] < ts[ind_A]
+            A[ind_A] = update_A(
+                A[ind_A],
+                exp(-ω * (times[ind_times] - times[ind_times - 1])),
+                marks[ind_times - 1],
+            )
+            ind_times += 1
+        end
+        A[ind_A + 1] = A[ind_A]
+        A[ind_A] = update_A(
+            A[ind_A], exp(-ω * (ts[ind_A] - times[ind_times - 1])), marks[ind_times - 1]
+        )
+        ind_A += 1
+    end
+    if ind_times > length(times) || times[ind_times] >= ts[end]
+        A[end] = update_A(
+            A[end], exp(-ω * (ts[end] - times[ind_times - 1])), marks[ind_times - 1]
+        )
+    else
+        A[end] = update_A(
+            A[end],
+            exp(-ω * (times[ind_times] - times[ind_times - 1])),
+            marks[ind_times - 1],
+        )
+        A[end] = update_A(A[end], exp(-ω * (ts[end] - times[ind_times])), marks[ind_times])
+    end
+    return A
 end
 
-function integrated_ground_intensity(hp::UnivariateHawkesProcess, h::History, tmin, tmax)
-    times = event_times(h, h.tmin, tmax)
-    marks = event_marks(h, h.tmin, tmax)
-    integral = 0
-    for (ti, mi) in zip(times, marks)
-        # Integral of activation function. 'max(tmin - ti, 0)' corrects for events that occurred
-        # inside or outside the interval [tmin, tmax].
-        integral += mi * (exp(-hp.ω * max(tmin - ti, 0)) - exp(-hp.ω * (tmax - ti)))
-    end
-    integral *= hp.α / hp.ω
-    integral += hp.μ * (tmax - tmin) # Integral of base rate
-    return integral
+function update_A(Ai_1, eωt::Real, _::Nothing)
+    return eωt * (1 + Ai_1)
 end
 
-function DensityInterface.logdensityof(hp::UnivariateHawkesProcess, h::History)
-    A = zeros(nb_events(h)) # Vector A in Ozaki (1979)
-    for i in 2:nb_events(h)
-        A[i] = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (h.marks[i - 1] + A[i - 1])
-    end
-    return sum(log.(hp.μ .+ (hp.α .* A))) - # Value of intensity at each event
-           (hp.μ * duration(h)) - # Integral of base rate
-           ((hp.α / hp.ω) * sum(h.marks .* (1 .- exp.(-hp.ω .* (duration(h) .- h.times))))) # Integral of each kernel
+function update_A(Ai_1, eωt::Real, mi_1::Real)
+    return eωt * (mi_1 + Ai_1)
 end
diff --git a/src/hawkes/multivariate/intensity.jl b/src/hawkes/multivariate/intensity.jl
new file mode 100644
index 0000000..746597f
--- /dev/null
+++ b/src/hawkes/multivariate/intensity.jl
@@ -0,0 +1,51 @@
+function ground_intensity(hp::MultivariateHawkesProcess, h::History, ts)
+    return mapreduce(m -> intensity(hp, m, ts, h), +, support(hp.mark_dist))
+end
+
+function intensity(
+    hp::MultivariateHawkesProcess{R1}, m::Int, ts, h::History{R2}
+) where {R1<:Real,R2<:Real}
+    T = float(promote_type(R1, R2, eltype(ts)))
+    m in support(hp.mark_dist) || return T.(zero(T))
+    λt = A_Ozaki_ts(h.times, hp.α[h.marks, m], hp.ω[m], ts)
+    return λt .+ (hp.μ * probs(hp.mark_dist)[m])
+end
+
+function integrated_ground_intensity(hp::MultivariateHawkesProcess, h::History, ts)
+    marks = support(hp.mark_dist)
+    A = map(m -> A_Ozaki_ts(h.times, hp.α[h.marks, m], hp.ω[m], ts), marks)
+    s_marks = sum_marks(hp, h, marks, ts)
+    return (hp.μ .* ts) .+ sum((inv.(hp.ω) .* (s_marks .- A)))
+end
+
+function integrated_ground_intensity(hp::MultivariateHawkesProcess, h::History, a, b)
+    return integrated_ground_intensity(hp, h, b) - integrated_ground_intensity(hp, h, a)
+end
+
+function DensityInterface.logdensityof(hp::MultivariateHawkesProcess, h::History)
+    A = zeros(length(hp.μ))
+    sum_marks = zeros(length(hp.μ))
+    sum_logλ = 0.0
+    for i in 2:nb_events(h)
+        m = h.marks[i - 1]
+        sum_marks .+= hp.α[:, m]
+        A .= exp.(-hp.ω .* (h.times[i] - h.times[i - 1])) .* (hp.α[:, m] .+ A)
+        sum_logλ += log(hp.μ[m] + A[m])
+    end
+    m = h.marks[end]
+    sum_marks .+= hp.α[:, m]
+    A .= exp.(-hp.ω .* (h.tmax - h.times[end])) .* (hp.α[:, m] .+ A)
+    Λ_T = (hp.μ .* durantion(h)) + inv.(hp.ω) .* (sum_marks .- A)
+    return sum_logλ - sum(Λ_T)
+end
+
+function sum_marks(hp::MultivariateHawkesProcess, h::History, marks, t::Real)
+    return map(m -> sum(hp.α[h.marks[1:(searchsortedfirst(h.times, t) - 1)], m]), marks)
+end
+
+function sum_marks(hp::MultivariateHawkesProcess, h::History, marks, ts::Vector{<:Real})
+    return map(
+        m -> [sum(hp.α[h.marks[1:(searchsortedfirst(h.times, t) - 1)], m]) for t in ts],
+        marks,
+    )
+end
diff --git a/src/hawkes/multivariate/multivariate_hawkes.jl b/src/hawkes/multivariate/multivariate_hawkes.jl
new file mode 100644
index 0000000..c36b787
--- /dev/null
+++ b/src/hawkes/multivariate/multivariate_hawkes.jl
@@ -0,0 +1,90 @@
+"""
+    MultivariateHawkesProcess{R} <: HawkesProcess
+
+Unmarked multivariate temporal Hawkes process with exponential decay
+
+For a process with m = 1, 2, ..., M marginal processes, the conditional intensity
+function of the m-th process is given by
+
+    λₘ(t) = μₘ + ∑_{l=1,...,M} ∑_{tˡᵢ < t} αₘₗ exp(-βₘ(t - tˡᵢ)),
+
+where tˡᵢ is the i-th element in the l-th marginal process.
+μ and ω are vectors of length M with elements μₘ, ωₘ. α is a M×M
+matrix with elements αₘₗ.
+
+The process is represented as a marked process, where each marginal
+process m = 1, 2, ..., M is corresponds to events (tᵐᵢ, m), tᵐᵢ being
+the i-th element with mark m.
+
+This is the 'Linear Multidimensional EHP' in Section 2 of
+[Bonnet, Dion-Blanc and Perrin (2024)](https://arxiv.org/pdf/2410.05008v1)
+
+# Fields
+- `μ::Vector{<:Real}`: baseline intensity (immigration rate)
+- `α::Matrix{<:Real}`: Jump size.
+- `ω::Vector{<:Real}`: Decay rate.
+"""
+struct MultivariateHawkesProcess{T<:Real} <: HawkesProcess
+    μ::T
+    α::Matrix{T}
+    ω::Vector{T}
+    mark_dist::Categorical{Float64,Vector{Float64}} # To keep consistent with PoissonProcess. Helps in simulation.
+end
+
+function Base.show(io::IO, hp::MultivariateHawkesProcess{T}) where {T<:Real}
+    return print(
+        io,
+        "MultivariateHawkesProcess\nμ = $(T.(hp.μ .* probs(hp.mark_dist)))\nα = $(hp.α)\nω = $(hp.ω)",
+    )
+end
+
+function HawkesProcess(
+    μ::Vector{<:Real}, α::Matrix{<:Real}, ω::Vector{<:Real}; check_args::Bool=true
+)
+    sum(μ) == 0 && return PoissonProcess(sum(μ))
+    check_args && check_args_Hawkes(μ, α, ω)
+    R = promote_type(eltype(μ), eltype(α), eltype(ω))
+    return MultivariateHawkesProcess(
+        R(sum(μ)), R.(α), R.(ω), Categorical(Float64.(μ / sum(μ)))
+    )
+end
+
+# For M independent marginal processes
+function HawkesProcess(
+    μ::Vector{<:Real}, α::Vector{<:Real}, ω::Vector{<:Real}; check_args::Bool=true
+)
+    return HawkesProcess(μ, diagm(α), ω; check_args=check_args)
+end
+
+function check_args_Hawkes(μ::Vector{<:Real}, α::Matrix{<:Real}, ω::Vector{<:Real})
+    if (length(μ) != size(α)[2])
+        throw(DimensionMismatch("α must have size $(length(μ))×$(length(μ))"))
+    end
+    if (length(μ) != length(ω))
+        throw(DimensionMismatch("α and ω must have the same length"))
+    end
+    if any(α ./ ω' .>= 1)
+        throw(
+            DomainError(
+                "α = $α, ω = $ω",
+                "HawkesProcess: αᵢⱼ/ωⱼ must be smaller than 1 for all i,j. Stability condition.",
+            ),
+        )
+    end
+    if any(ω .<= zero(ω))
+        throw(
+            DomainError(
+                "ω = $ω", "HawkesProcess: All elements of ω must be strictly positive."
+            ),
+        )
+    end
+    if any(μ .< zero(μ)) || any(α .< zero(α))
+        throw(
+            DomainError(
+                "μ = $μ, α = $α",
+                "HawkesProcess: All elements of μ, α and ω must be non-negative.",
+            ),
+        )
+    end
+    return nothing
+end
diff --git a/src/hawkes/multivariate/simulation.jl b/src/hawkes/multivariate/simulation.jl
new file mode 100644
index 0000000..e498ca9
--- /dev/null
+++ b/src/hawkes/multivariate/simulation.jl
@@ -0,0 +1,10 @@
+function one_parent!(
+    rng::AbstractRNG, h::History{T}, mh::MultivariateHawkesProcess, parent_t, parent_m
+) where {T<:Real}
+    for m in 1:length(mh)
+        sim_transf = descendants(rng, parent_t, mh.α[parent_m, m], mh.ω[m], h.tmax)
+        append!(h.times, sim_transf)
+        append!(h.marks, fill(m, length(sim_transf)))
+    end
+    return nothing
+end
diff --git a/src/hawkes/simulation.jl b/src/hawkes/simulation.jl
index 2e0e436..491a05d 100644
--- a/src/hawkes/simulation.jl
+++ b/src/hawkes/simulation.jl
@@ -1,6 +1,6 @@
 function simulate(rng::AbstractRNG, hp::HawkesProcess, tmin::Real, tmax::Real)
-    h = simulate(rng, PoissonProcess(hp.μ, hp.mark_dist), tmin, tmax)
-    sim_desc = generate_descendants!(rng, h, hp, tmax) # Recursively generates descendants from first events
+    h = simulate(rng, PoissonProcess(hp.μ, mark_distribution(hp)), tmin, tmax)
+    sim_desc = generate_descendants!(rng, h, hp) # Recursively generates descendants from first events
     perm = sortperm(h.times)
     h.times .= h.times[perm]
     h.marks .= h.marks[perm]
@@ -13,50 +13,36 @@ end
 
 #=
 Internal function for simulating Hawkes processes
-The first generation, gen_0, is the `immigrants`, which is a set of event times.
+The history `h` contains the event times generated from the base rate of the process, the
+"parents" of the process. This is the starting generation, gen_0.
 For each t_g ∈ gen_n, simulate an inhomogeneous Poisson process over the interval [t_g, T]
-with intensity λ(t) = α exp(-ω(t - t_g)) with the inverse method.
+with intensity λ(t) = α exp(-ω(t - t_g)) using the inverse method.
 gen_{n+1} is the set of all events simulated from all events in gen_n.
 The algorithm stops when the simulation from one generation results in no further events.
 =#
 function generate_descendants!(
-    rng::AbstractRNG, h::History{T,M}, hp::HawkesProcess, tmax
+    rng::AbstractRNG, h::History{T,M}, hp::HawkesProcess
 ) where {T<:Real,M}
-    gen_start = 1
-    gen_end = nb_events(h)
-    while gen_start <= gen_end # Last generation is not empty
+    gen_start = 1              # Starting index of generation gen_0
+    gen_end = nb_events(h)     # Last index of generation gen_0
+    while gen_start <= gen_end # As long as the current genertion is not empty, generate more descendants
         for parent_idx in gen_start:gen_end
-            one_parent!(rng, h, hp, h[parent_idx])
+            # generates the descendants of one single element of the current generation
+            one_parent!(rng, h, hp, h.times[parent_idx], h.marks[parent_idx]) # dispatch on the type of `hp`
         end
+        # gen_i+1 are the descendants of gen_i
         gen_start = gen_end + 1
         gen_end = nb_events(h)
     end
     return nothing
 end
 
-function one_parent!(
-    rng::AbstractRNG, h::History{T}, hp::UnivariateHawkesProcess, parent
+# generates the descendants of one single parent using the inverse transform method
+function descendants(
+    rng::AbstractRNG, parent::T, α::Real, ω::Real, tmax::Real
 ) where {T<:Real}
-    α = isnothing(parent[2]) ? hp.α : parent[2] * hp.α
-    activation_integral = (α / hp.ω) * (one(T) - exp(hp.ω * (parent[1] - h.tmax)))
+    activation_integral = (α / ω) * (one(T) - exp(ω * (parent - tmax)))
     sim_transf = simulate_poisson_times(rng, one(T), zero(T), activation_integral)
-    @. sim_transf = parent[1] - (inv(hp.ω) * log(one(T) - ((hp.ω / α) * sim_transf))) # Inverse of integral of the activation function
-    append!(h.times, sim_transf)
-    append!(h.marks, [rand(rng, hp.mark_dist) for _ in 1:length(sim_transf)])
-    return nothing
-end
-
-function one_parent!(
-    rng::AbstractRNG, h::History{T}, mh::MultivariateHawkesProcess, parent
-) where {T<:Real}
-    for m in 1:length(mh)
-        α = mh.α[m, parent[2]]
-        ω = mh.ω[m, parent[2]]
-        activation_integral = (α / ω) * (one(T) - exp(ω * (parent[1] - h.tmax)))
-        sim_transf = simulate_poisson_times(rng, one(T), zero(T), activation_integral)
-        @. sim_transf = parent[1] - (inv(ω) * log(one(T) - ((ω / α) * sim_transf))) # Inverse of integral of the activation function
-        append!(h.times, sim_transf)
-        append!(h.marks, fill(m, length(sim_transf)))
-    end
-    return nothing
+    @. sim_transf = parent - (inv(ω) * log(one(T) - ((ω / α) * sim_transf))) # Inverse of integral of the activation function
+    return sim_transf
 end
diff --git a/src/hawkes/time_change.jl b/src/hawkes/time_change.jl
deleted file mode 100644
index b04f014..0000000
--- a/src/hawkes/time_change.jl
+++ /dev/null
@@ -1,20 +0,0 @@
-function time_change(hp::UnmarkedUnivariateHawkesProcess, h::History{T,M}) where {T<:Real,M}
-    n = nb_events(h)
-    A = zeros(T, n + 1) # Array A in Ozaki (1979)
-    @inbounds for i in 2:n
-        A[i] = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (1 + A[i - 1])
-    end
-    A[end] = exp(-hp.ω * (h.tmax - h.times[end])) * (1 + A[end - 1]) # Used to calculate the integral of the intensity at every event time
-    times = T.(hp.μ .* (h.times .- h.tmin)) # Transformation with respect to base rate
-    T_base = hp.μ * duration(h) # Contribution of base rate to total length of time re-scaled process
-    for i in eachindex(times)
-        times[i] += (hp.α / hp.ω) * ((i - 1) - A[i]) # Add contribution of activation functions
-    end
-    return History(;
-        times=times,
-        marks=h.marks,
-        tmin=zero(T),
-        tmax=T(T_base + ((hp.α / hp.ω) * (n - A[end]))),
-        check_args=false,
-    ) # A time re-scaled process starts at t=0
-end
diff --git a/src/hawkes/univariate/fit.jl b/src/hawkes/univariate/fit.jl
new file mode 100644
index 0000000..e44ab35
--- /dev/null
+++ b/src/hawkes/univariate/fit.jl
@@ -0,0 +1,41 @@
+"""
+    StatsAPI.fit(::Type{<:UnivariateHawkesProcess{R}}, h::History; step_tol::Float64=1e-6, max_iter::Int=1000, rng::AbstractRNG=default_rng()) where {R<:Real}
+
+Adaptation of the Expectation-Maximization algorithm from [E. Lewis, G. Mohler (2011)](https://arxiv.org/pdf/1801.08273))
+for marked processes. The relevant calculations are in page 4, equations 6-13.
+
+Let (t₁, m₁), (t₂, m₂), ..., (tₙ, mₙ) be a marked event history with  (t₁ < ... < tₙ) over the interval
+[0, T). We use the immigrant-descendant representation, where immigrants arrive at a constant base rate
+μ and each each arrival may generate descendants following the activation function α mᵢ exp(-ω(t - tᵢ)).
+
+The algorithm consists in the following steps:
+1. Start with some initial guess for the parameters μ, ψ, and ω. ψ = α ω is the branching factor.
+2. Calculate λ(tᵢ; μ, ψ, ω) (`lambda_ts` in the code) using the procedure in [Ozaki (1979)](https://doi.org/10.1007/bf02480272).
+3. For each tᵢ and each j < i, calculate Dᵢⱼ = P(tᵢ is a descendant of tⱼ) as
+
+    Dᵢⱼ = ψ ω mⱼ exp(-ω(tᵢ - tⱼ)) / λ(tᵢ; μ, ψ, ω).
+
+    Define D = ∑_{j < i} Dᵢⱼ (expected number of descendants) and div = ∑_{j < i} (tᵢ - tⱼ) Dᵢⱼ. 
+4. Update the parameters as
+        μ = (N - D) / T
+        ψ = D / ∑ mᵢ
+        ω = D / div
+5. If convergence criterion is met, return updated parameters, otherwise, back to step 2.
+
+Notice that, for numerical stability, the process is normalized so the average inter-event time is equal to 1 and, 
+therefore, the interval of the process is transformed from T to N. Also, in equation (8) in the paper,
+
+∑_{i=1:n} pᵢᵢ = ∑_{i=1:n} (1 - ∑_{j < i} Dᵢⱼ) = N - D.
+"""
+function StatsAPI.fit(
+    HP::Type{<:UnivariateHawkesProcess{R,D}},
+    h::History;
+    step_tol::Float64=1e-6,
+    max_iter::Int=1000,
+    rng::AbstractRNG=default_rng(),
+) where {R,D}
+    sum_marks = sum(h.marks)
+    params = fit_hawkes_params(R, h, sum_marks; step_tol=step_tol, max_iter=max_iter, rng=rng)
+    d = fit(D, h.marks)
+    return HawkesProcess(params..., d)
+end
diff --git a/src/hawkes/univariate/intensity.jl b/src/hawkes/univariate/intensity.jl
new file mode 100644
index 0000000..3a8da69
--- /dev/null
+++ b/src/hawkes/univariate/intensity.jl
@@ -0,0 +1,44 @@
+function ground_intensity(hp::UnivariateHawkesProcess, h::History, ts)
+    return hp.μ .+ (hp.α .* A_Ozaki_ts(h.times, h.marks, hp.ω, ts))
+end
+
+function intensity(
+    hp::UnivariateHawkesProcess{R1,D}, m::M, ts, h::History{R2,M}
+) where {R1<:Real,R2<:Real,M<:Real,D}
+    T = float(promote_type(R1, R2, M, eltype(ts))) # For type stability and coherence
+    m in support(hp.mark_dist) || return zero(T)
+    return ground_intensity(hp, h, ts) .* T(pdf(hp.mark_dist, m)) # `pdf` always returns a `Float64`
+end
+
+function integrated_ground_intensity(hp::UnivariateHawkesProcess, h::History, ts)
+    A = A_Ozaki_ts(h.times, h.marks, hp.ω, ts)
+    s_marks = sum_marks(h, ts)
+    return (hp.μ .* ts) .+ sum((hp.α / hp.ω) .* (s_marks .- A))
+end
+
+function integrated_ground_intensity(hp::UnivariateHawkesProcess, h::History, a, b)
+    return integrated_ground_intensity(hp, h, b) - integrated_ground_intensity(hp, h, a)
+end
+
+function DensityInterface.logdensityof(hp::UnivariateHawkesProcess, h::History)
+    A = 0.0
+    sum_marks = 0.0
+    sum_logλ = 0.0
+    for i in 2:nb_events(h)
+        sum_marks += h.marks[i - 1]
+        A = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (h.marks[i - 1] + A)
+        sum_logλ += log(hp.μ + hp.α * A)
+    end
+    sum_marks += h.marks[end]
+    A = exp(-hp.ω * (h.tmax - h.times[end])) * (h.marks[end] + A)
+    Λ_T = (hp.μ * duration(h)) + inv.(hp.ω) * (sum_marks - A)
+    return sum_logλ - sum(Λ_T)
+end
+
+function sum_marks(h::History, t::Real)
+    return sum(h.marks[1:(searchsortedfirst(h.times, t) - 1)])
+end
+
+function sum_marks(h::History, ts::Vector{<:Real})
+    return [sum(h.marks[1:(searchsortedfirst(h.times, t) - 1)]) for t in ts]
+end
diff --git a/src/hawkes/univariate/simulation.jl b/src/hawkes/univariate/simulation.jl
new file mode 100644
index 0000000..e99a9a7
--- /dev/null
+++ b/src/hawkes/univariate/simulation.jl
@@ -0,0 +1,8 @@
+function one_parent!(
+    rng::AbstractRNG, h::History{T}, hp::UnivariateHawkesProcess, parent_t, parent_m
+) where {T<:Real}
+    sim_transf = descendants(rng, parent_t, parent_m * hp.α, hp.ω, h.tmax)
+    append!(h.times, sim_transf)
+    append!(h.marks, [rand(rng, mark_distribution(hp)) for _ in 1:length(sim_transf)])
+    return nothing
+end
diff --git a/src/hawkes/univariate/time_change.jl b/src/hawkes/univariate/time_change.jl
new file mode 100644
index 0000000..bfb5609
--- /dev/null
+++ b/src/hawkes/univariate/time_change.jl
@@ -0,0 +1,27 @@
+function time_change(hp::UnivariateHawkesProcess, h::History{R,M}) where {R<:Real,M}
+    n = nb_events(h)
+    A = zero(R)
+    sum_marks = zero(R)
+    times_transformed = zeros(R, n)
+    # Loop to calculate i -> ∑_{j<i} mⱼ (1 - exp(-ω (tᵢ - tⱼ)))
+    for i in 2:n
+        Δ = h.times[i] - h.times[i - 1]
+        e = exp(-hp.ω * Δ)
+        A = e * (h.marks[i - 1] + A)
+        sum_marks += h.marks[i - 1]
+        times_transformed[i] = sum_marks - A
+    end
+    A = e * (h.marks[end] + A)
+    sum_marks += h.marks[end]
+    times_transformed .*= hp.α / hp.ω      # Integral of activation functions
+    times_transformed .+= h.times .* hp.μ  # Add integral of base rate
+    tmax_transformed = (hp.α / hp.ω) * (sum_marks - A)
+    tmax_transformed += R(hp.μ * duration(h))
+    return History(;
+        times=times_transformed,
+        marks=h.marks,
+        tmin=zero(R),
+        tmax=tmax_transformed,
+        check_args=false,
+    ) # A time re-scaled process starts at t=0
+end
diff --git a/src/hawkes/univariate/univariate_hawkes.jl b/src/hawkes/univariate/univariate_hawkes.jl
new file mode 100644
index 0000000..96a454c
--- /dev/null
+++ b/src/hawkes/univariate/univariate_hawkes.jl
@@ -0,0 +1,51 @@
+"""
+    UnivariateHawkesProcess{R<:Real,D} <: HawkesProcess
+
+Univariate Hawkes process with exponential decay kernel and mark 
+distribution `D`.
+
+Denote the events of the process by (tᵢ, mᵢ), where tᵢ is the event time
+and mᵢ ∈ M the corresponding mark. The conditional intensity function
+of the Hawkes process is given by
+
+λ(t) = μ + ∑_{tᵢ < t} α mᵢ exp(-ω (t - tᵢ)).
+
+Notice that the mark only affects the jump size.
+
+# Fields
+- `μ::R`: baseline intensity (immigration rate)
+- `α::R`: jump size
+- `ω::R`: decay rate.
+- `mark_dist::D`: distribution of marks
+"""
+struct UnivariateHawkesProcess{T<:Real,D} <: HawkesProcess
+    μ::T
+    α::T
+    ω::T
+    mark_dist::D
+end
+
+function Base.show(io::IO, hp::UnivariateHawkesProcess)
+    return print(io, "UnivariateHawkesProcess($(hp.μ), $(hp.α), $(hp.ω), $(hp.mark_dist))")
+end
+
+mark_distribution(hp::UnivariateHawkesProcess, _...) = hp.mark_dist
+
+function HawkesProcess(μ::Real, α::Real, ω::Real, mark_dist; check_args::Bool=true)
+    check_args && check_args_Hawkes(μ, α, ω, mark_dist)
+    return UnivariateHawkesProcess(promote(μ, α, ω)..., mark_dist)
+end
+
+function check_args_Hawkes(μ::Real, α::Real, ω::Real, mark_dist)
+    mean_α = mark_dist isa Dirac{Nothing} ? α : mean(mark_dist) * α
+    check_args_Hawkes(μ, mean_α, ω)
+    if !isa(mark_dist, Dirac{Nothing}) && minimum(mark_dist) < 0
+        throw(
+            DomainError(
+                "Mark distribution support = $((support(mark_dist).lb, support(mark_dist).ub))",
+                "HawkesProcess: Support of mark distribution must be contained in non-negative numbers",
+            ),
+        )
+    end
+    return nothing
+end
diff --git a/src/hawkes/unmarked_univariate/fit.jl b/src/hawkes/unmarked_univariate/fit.jl
new file mode 100644
index 0000000..a8733f0
--- /dev/null
+++ b/src/hawkes/unmarked_univariate/fit.jl
@@ -0,0 +1,55 @@
+"""
+    StatsAPI.fit(::Type{<:UnmarkedUnivariateHawkesProcess{R}}, h::History; step_tol::Float64=1e-6, max_iter::Int=1000, rng::AbstractRNG=default_rng()) where {R<:Real}
+
+Expectation-Maximization algorithm from [E. Lewis, G. Mohler (2011)](https://arxiv.org/pdf/1801.08273)).
+The relevant calculations are in page 4, equations 6-13.
+
+Let (t₁ < ... < tₙ) be the event times over the interval [0, T). We use the immigrant-descendant representation,
+where immigrants arrive at a constant base rate μ and each each arrival may generate descendants following the
+activation function α exp(-ω(t - tᵢ)).
+
+The algorithm consists in the following steps:
+1. Start with some initial guess for the parameters μ, ψ, and ω. ψ = α ω is the branching factor.
+2. Calculate λ(tᵢ; μ, ψ, ω) (`lambda_ts` in the code) using the procedure in [Ozaki (1979)](https://doi.org/10.1007/bf02480272).
+3. For each tᵢ and each j < i, calculate Dᵢⱼ = P(tᵢ is a descendant of tⱼ) as
+
+    Dᵢⱼ = ψ ω exp(-ω(tᵢ - tⱼ)) / λ(tᵢ; μ, ψ, ω).
+
+    Define D = ∑_{j < i} Dᵢⱼ (expected number of descendants) and div = ∑_{j < i} (tᵢ - tⱼ) Dᵢⱼ. 
+4. Update the parameters as
+        μ = (N - D) / T
+        ψ = D / N
+        ω = D / div
+5. If convergence criterion is met, return updated parameters, otherwise, back to step 2.
+
+Notice that, for numerical stability, the process is normalized so the average inter-event time is equal to 1 and, 
+therefore, the interval of the process is transformed from T to N. Also, in equation (8) in the paper,
+
+∑_{i=1:n} pᵢᵢ = ∑_{i=1:n} (1 - ∑_{j < i} Dᵢⱼ) = N - D.
+"""
+function StatsAPI.fit(
+    ::Type{<:UnmarkedUnivariateHawkesProcess{R}},
+    h::History;
+    step_tol::Float64=1e-6,
+    max_iter::Int=1000,
+    rng::AbstractRNG=default_rng(),
+) where {R<:Real}
+    unmarked_h = History(; times=h.times, tmin=h.tmin, tmax=h.tmax)
+    params = fit_hawkes_params(
+        R, unmarked_h, nb_events(h); step_tol=step_tol, max_iter=max_iter, rng=rng
+    )
+    return HawkesProcess(params...)
+end
+
+function StatsAPI.fit(
+    ::Type{<:UnmarkedUnivariateHawkesProcess{R}},
+    h::History{R2, Nothing};
+    step_tol::Float64=1e-6,
+    max_iter::Int=1000,
+    rng::AbstractRNG=default_rng(),
+) where {R<:Real, R2<:Real}
+    params = fit_hawkes_params(
+        R, h, nb_events(h); step_tol=step_tol, max_iter=max_iter, rng=rng
+    )
+    return HawkesProcess(params...)
+end
\ No newline at end of file
diff --git a/src/hawkes/unmarked_univariate/intensity.jl b/src/hawkes/unmarked_univariate/intensity.jl
new file mode 100644
index 0000000..a0fc972
--- /dev/null
+++ b/src/hawkes/unmarked_univariate/intensity.jl
@@ -0,0 +1,34 @@
+function ground_intensity(hp::UnmarkedUnivariateHawkesProcess, h::History, ts)
+    return hp.μ .+ (hp.α .* A_Ozaki_ts(h.times, fill(nothing, length(h.times)), hp.ω, ts))
+end
+
+function intensity(
+    hp::UnmarkedUnivariateHawkesProcess{R}, _, ts, h::History
+) where {R<:Real}
+    return ground_intensity(hp, h, ts)
+end
+
+intensity(hp::UnmarkedUnivariateHawkesProcess, ts, h::History) = ground_intensity(hp, h, ts)
+
+function integrated_ground_intensity(hp::UnmarkedUnivariateHawkesProcess, h::History, ts)
+    A = A_Ozaki_ts(h.times, fill(nothing, length(h.times)), hp.ω, ts)
+    ns = map(t -> nb_events(h, h.tmin, t), ts)
+    return (hp.μ .* ts) .+ sum((hp.α / hp.ω) .* (ns .- A))
+end
+
+function integrated_ground_intensity(hp::UnmarkedUnivariateHawkesProcess, h::History, a, b)
+    return integrated_ground_intensity(hp, h, b) - integrated_ground_intensity(hp, h, a)
+end
+
+# logdensityof (log-likelihood)
+function DensityInterface.logdensityof(hp::UnmarkedUnivariateHawkesProcess, h::History)
+    A = 0.0
+    sum_logλ = 0.0
+    for i in 2:nb_events(h)
+        A = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (1 + A)
+        sum_logλ += log(hp.μ + hp.α * A)
+    end
+    A = exp(-hp.ω * (h.tmax - h.times[end])) * (1 + A)
+    Λ_T = (hp.μ * duration(h)) + (hp.α / hp.ω) * (nb_events(h) - A)
+    return sum_logλ - sum(Λ_T)
+end
diff --git a/src/hawkes/unmarked_univariate/simulation.jl b/src/hawkes/unmarked_univariate/simulation.jl
new file mode 100644
index 0000000..facfc7d
--- /dev/null
+++ b/src/hawkes/unmarked_univariate/simulation.jl
@@ -0,0 +1,8 @@
+function one_parent!(
+    rng::AbstractRNG, h::History{T}, hp::UnmarkedUnivariateHawkesProcess, parent_t, _
+) where {T<:Real}
+    sim_transf = descendants(rng, parent_t, hp.α, hp.ω, h.tmax)
+    append!(h.times, sim_transf)
+    append!(h.marks, fill(nothing, length(sim_transf)))
+    return nothing
+end
diff --git a/src/hawkes/unmarked_univariate/time_change.jl b/src/hawkes/unmarked_univariate/time_change.jl
new file mode 100644
index 0000000..855d0a6
--- /dev/null
+++ b/src/hawkes/unmarked_univariate/time_change.jl
@@ -0,0 +1,22 @@
+function time_change(hp::UnmarkedUnivariateHawkesProcess, h::History{R,M}) where {R<:Real,M}
+    n = nb_events(h)
+    A = zero(R)
+    times_transformed = zeros(R, n)
+    # Loop to calculate i -> ∑_{j<i} 1 - exp(-ω (tᵢ - tⱼ))
+    for i in 2:n
+        A = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (one(R) + A)
+        times_transformed[i] = (i - 1) - A
+    end
+    A = exp(-hp.ω * (h.tmax - h.times[end])) * (one(R) + A)
+    times_transformed .*= hp.α / hp.ω      # Integral of activation functions
+    times_transformed .+= h.times .* hp.μ  # Add integral of base rate
+    tmax_transformed = R((hp.α / hp.ω) * (n - A))
+    tmax_transformed += R(hp.μ * duration(h))
+    return History(;
+        times=times_transformed,
+        marks=h.marks,
+        tmin=zero(R),
+        tmax=tmax_transformed,
+        check_args=false,
+    ) # A time re-scaled process starts at t=0
+end
diff --git a/src/hawkes/unmarked_univariate/unmarked_univariate_hawkes.jl b/src/hawkes/unmarked_univariate/unmarked_univariate_hawkes.jl
new file mode 100644
index 0000000..7fe2fba
--- /dev/null
+++ b/src/hawkes/unmarked_univariate/unmarked_univariate_hawkes.jl
@@ -0,0 +1,50 @@
+"""
+    UnmarkedUnivariateHawkesProcess{R<:Real} <: HawkesProcess
+
+Unmarked univariate Hawkes process with exponential decay kernel.
+
+Denote the events of the process by tᵢ. The conditional intensity function
+of the Hawkes process is given by
+
+λ(t) = μ + ∑_{tᵢ < t} α exp(-ω (t - tᵢ)).
+
+# Fields
+- `μ::R`: baseline intensity (immigration rate)
+- `α::R`: jump size
+- `ω::R`: decay rate.
+"""
+struct UnmarkedUnivariateHawkesProcess{T<:Real} <: HawkesProcess
+    μ::T
+    α::T
+    ω::T
+end
+
+function Base.show(io::IO, hp::UnmarkedUnivariateHawkesProcess)
+    return print(io, "UnmarkedUnivariateHawkesProcess$((hp.μ, hp.α, hp.ω))")
+end
+
+mark_distribution(::UnmarkedUnivariateHawkesProcess, _...) = Dirac(nothing)
+
+function HawkesProcess(μ::Real, α::Real, ω::Real; check_args::Bool=true)
+    check_args && check_args_Hawkes(μ, α, ω)
+    return UnmarkedUnivariateHawkesProcess(promote(μ, α, ω)...)
+end
+
+function check_args_Hawkes(μ::Real, α::Real, ω::Real)
+    if any((μ, α, ω) .< 0)
+        throw(
+            DomainError(
+                "μ = $μ, α = $α, ω = $ω",
+                "HawkesProcess: All parameters must be non-negative.",
+            ),
+        )
+    end
+    if α >= ω
+        throw(
+            DomainError(
+                "α = $α, ω = $ω",
+                "HawkesProcess: mᵢα must be, on average, smaller than ω. Stability condition.",
+            ),
+        )
+    end
+end
diff --git a/src/simulation.jl b/src/simulation.jl
index 69b48c5..9844166 100644
--- a/src/simulation.jl
+++ b/src/simulation.jl
@@ -31,7 +31,7 @@ function simulate_ogata(rng::AbstractRNG, pp::AbstractPointProcess, tmin::Real,
         if τ > L
             t = t + L
         elseif τ <= L
-            U_max = ground_intensity(pp, t + τ, h) / B
+            U_max = ground_intensity(pp, h, t + τ) / B
             U = rand(rng, typeof(U_max))
             if U < U_max
                 m = rand(rng, mark_distribution(pp, t + τ, h))
diff --git a/test/bounded_point_process.jl b/test/bounded_point_process.jl
index 2996869..4286673 100644
--- a/test/bounded_point_process.jl
+++ b/test/bounded_point_process.jl
@@ -6,11 +6,11 @@ h = simulate(rng, bpp)
 
 @test min_time(bpp) == 0.0
 @test max_time(bpp) == 1000.0
-@test ground_intensity(bpp, 0, h) == sum(intensities)
-@test mark_distribution(bpp, 100.0, h) == Categorical(intensities / sum(intensities))
+@test ground_intensity(bpp, h, 0) == sum(intensities)
+@test mark_distribution(bpp, h, 100.0) == Categorical(intensities / sum(intensities))
 @test mark_distribution(bpp, 0.0) == Categorical(intensities / sum(intensities))
-@test intensity(bpp, 1, 0, h) == intensities[1]
-@test log_intensity(bpp, 2, 1.0, h) == log(intensities[2])
+@test intensity(bpp, 1, h, 0) == intensities[1]
+@test log_intensity(bpp, 2, h, 1.0) == log(intensities[2])
 
 @test ground_intensity_bound(bpp, 243, h) == (sum(intensities), typemax(Int))
 @test ground_intensity_bound(bpp, 243.0, h) == (sum(intensities), Inf)
diff --git a/test/hawkes.jl b/test/hawkes.jl
index f6a4002..96ca367 100644
--- a/test/hawkes.jl
+++ b/test/hawkes.jl
@@ -1,20 +1,37 @@
 # Constructor
+## UnmarkedUnivariateHawkesProcess
 @test HawkesProcess(1, 1, 2) isa UnmarkedUnivariateHawkesProcess{Int64}
 @test HawkesProcess(1, 1, 2.0) isa UnmarkedUnivariateHawkesProcess{Float64}
 @test_throws DomainError HawkesProcess(1, 1, 1)
 @test_throws DomainError HawkesProcess(-1, 1, 2)
 
+## UnivariateHawkesProcess
+@test HawkesProcess(1, 1, 2, Uniform()) isa UnivariateHawkesProcess{Int64,Uniform{Float64}}
+@test_throws DomainError HawkesProcess(1, 1, 2, Uniform(-1, 1))
+@test_throws DomainError HawkesProcess(1, 1, 2, Uniform(10, 11))
+
+## MultivariateHawkesProcess
+@test HawkesProcess(rand(Float32, 3), rand(Float32, 3), rand(Float32, 3) .+ 1) isa
+    MultivariateHawkesProcess{Float32}
+@test_throws DomainError HawkesProcess(rand(3) .- 1, rand(3, 3), rand(3) .+ 1)
+@test_throws DomainError HawkesProcess(rand(3), rand(3, 3) .+ 1, rand(3))
+@test_throws DimensionMismatch HawkesProcess(rand(3), rand(2, 2), rand(3) .+ 1)
+
+# Time change
 hp = HawkesProcess(1, 1, 2)
-h = History(; times=[1.0, 2.0, 4.0], marks=["a", "b", "c"], tmin=0.0, tmax=5.0)
+hp2 = HawkesProcess(1, 1, 2, Uniform(0, 3))
+hp3 = HawkesProcess([1, 2, 3], [1 0.1 0.1; 0.2 2 0.2; 0.3 0.3 3], [2, 4, 6])
+
+h = History(; times=[1.0, 2.0, 4.0], marks=[3, 2, 1], tmin=0.0, tmax=5.0)
 h_big = History(;
-    times=BigFloat.([1, 2, 4]), marks=["a", "b", "c"], tmin=BigFloat(0), tmax=BigFloat(5)
+    times=BigFloat.([1, 2, 4]), marks=[3.0, 2.0, 1.0], tmin=BigFloat(0), tmax=BigFloat(5)
 )
 
-# Time change
 h_transf = time_change(hp, h)
 integral =
     (hp.μ * duration(h)) +
     (hp.α / hp.ω) * sum([1 - exp(-hp.ω * (h.tmax - ti)) for ti in h.times])
+
 @test isa(h_transf, typeof(h))
 @test h_transf.marks == h.marks
 @test h_transf.tmin ≈ 0
@@ -23,40 +40,107 @@ integral =
 @test isa(time_change(hp, h_big), typeof(h_big))
 
 # Ground intensity
-@test ground_intensity(hp, h, 1) == 1
-@test ground_intensity(hp, h, 2) == 1 + hp.α * exp(-hp.ω * 1)
+## UnmarkedUnivariateHawkesProcess
+@test ground_intensity(hp, h, 1) ≈ 1
+@test ground_intensity(hp, h, 2) ≈ 1 + hp.α * exp(-hp.ω * 1)
+@test ground_intensity(hp, h, [1, 2, 3, 4]) ≈
+    [ground_intensity(hp, h, t) for t in [1, 2, 3, 4]]
+
+## UnivariateHawkesProcess
+@test ground_intensity(hp2, h, 1) ≈ 1
+@test ground_intensity(hp2, h, 2) ≈ 1 + 3 * hp2.α * exp(-hp2.ω * 1)
+@test ground_intensity(hp2, h, [1, 2, 3, 4]) ≈
+    [ground_intensity(hp2, h, t) for t in [1, 2, 3, 4]]
+
+## MultivariateHawkesProcess
+@test ground_intensity(hp3, h, 1) ≈ 6
+@test ground_intensity(hp3, h, 2) ≈ 6 + sum(hp3.α[3, :] .* exp.(-hp3.ω .* 1))
+@test ground_intensity(hp3, h, [1, 2, 3, 4]) ≈
+    [ground_intensity(hp3, h, t) for t in [1, 2, 3, 4]]
+
+# intensity
+## UnmarkedUnivariateHawkesProcess
+@test intensity(hp, "a", 1, h) ≈ ground_intensity(hp, h, 1)
+@test intensity(hp, 1, 2, h) ≈ ground_intensity(hp, h, 2)
+@test intensity(hp, 1, [1, 2, 3, 4], h) ≈ [intensity(hp, 1, t, h) for t in [1, 2, 3, 4]]
+
+## UnivariateHawkesProcess
+@test intensity(hp2, 1, 1, h) ≈ ground_intensity(hp2, h, 1) * (1/3)
+@test intensity(hp2, 1, 2, h) ≈ ground_intensity(hp2, h, 2) * (1/3)
+@test intensity(hp2, 4, 2, h) == 0
+@test intensity(hp2, 1, [1, 2, 3, 4], h) ≈ [intensity(hp2, 1, t, h) for t in [1, 2, 3, 4]]
+
+## MultivariateHawkesProcess
+@test intensity(hp3, 1, 1, h) ≈ 1
+@test intensity(hp3, 1, 2, h) ≈ 1 + hp3.α[3, 1] * exp(-hp3.ω[1] * 1)
+@test intensity(hp3, 4, 2, h) == 0
+@test intensity(hp3, 1, [1, 2, 3, 4], h) ≈ [intensity(hp3, 1, t, h) for t in [1, 2, 3, 4]]
 
 # Integrated ground intensity
-@test integrated_ground_intensity(hp, h, h.tmin, h.tmax) ≈ integral
-@test integrated_ground_intensity(hp, h, 2, 3) ≈
-    hp.μ +
-      ((hp.α / hp.ω) * (exp(-hp.ω) - exp(-hp.ω * 2))) +
-      ((hp.α / hp.ω) * (1 - exp(-hp.ω)))
+h1 = History([1], 0, 3, [2])
+h2 = History([1, 2], 0, 3, [2, 1])
+
+## UnmarkedUnivariateHawkesProcess
+@test integrated_ground_intensity(hp, h1, 0, 1) ≈ 1
+@test integrated_ground_intensity(hp, h1, 0, 1000) ≈ 1000.5
+@test integrated_ground_intensity(hp, h2, 0, 1000) ≈ 1001
+# @test integrated_ground_intensity(hp, h, [1, 2, 3, 4]) ≈
+#     [integrated_ground_intensity(hp, h, t) for t in [1, 2, 3, 4]]
+
+## UnivariateHawkesProcess
+@test integrated_ground_intensity(hp2, h1, 0, 1) ≈ 1
+@test integrated_ground_intensity(hp2, h1, 0, 1000) ≈ 1001
+@test integrated_ground_intensity(hp2, h2, 0, 1000) ≈ 1001.5
+# @test integrated_ground_intensity(hp2, h, [1, 2, 3, 4]) ≈
+#     [integrated_ground_intensity(hp2, h, t) for t in [1, 2, 3, 4]]
+
+## MultivariateHawkesProcess
+@test integrated_ground_intensity(hp3, h1, 0, 1) ≈ 6
+@test integrated_ground_intensity(hp3, h1, 0, 1000) ≈ 6000 + sum(hp3.α[2, :] ./ hp3.ω)
+@test integrated_ground_intensity(hp3, h2, 0, 1000) ≈
+    6000 + sum(hp3.α[2, :] ./ hp3.ω) + sum(hp3.α[1, :] ./ hp3.ω)
+# @test integrated_ground_intensity(hp3, h, [1, 2, 3, 4]) ≈
+#     [integrated_ground_intensity(hp3, h, t) for t in [1, 2, 3, 4]]
 
-# Rand
+# logdensityof
+# TODO: tests
+@test logdensityof(hp, h) ≈
+    sum(log.(hp.μ .+ (hp.α .* [0, exp(-hp.ω), exp(-hp.ω * 2) + exp(-hp.ω * 3)]))) -
+      integral
+
+# simulate
+# TODO: tests
 h_sim = simulate(hp, 0.0, 10.0)
 @test issorted(h_sim.times)
 @test isa(h_sim, History{Float64,Nothing})
 @test isa(simulate(hp, BigFloat(0), BigFloat(10)), History{BigFloat,Nothing})
 
 # Fit
-Random.seed!(123)
+Random.seed!(1)
 params_true = (100.0, 100.0, 200.0)
-model = HawkesProcess(params_true...)
-h_sim = simulate(model, 0.0, 50.0)
-model_est = fit(UnmarkedUnivariateHawkesProcess, h_sim)
+model_true = HawkesProcess(params_true...)
+h_sim = simulate(model_true, 0.0, 50.0)
+model_est = fit(UnmarkedUnivariateHawkesProcess{Float64}, h_sim)
 params_est = (model_est.μ, model_est.α, model_est.ω)
-@test isa(model_est, HawkesProcess)
+
+@test isa(model_est, UnmarkedUnivariateHawkesProcess)
 @test all((params_true .* 0.9) .<= params_est .<= (params_true .* 1.1))
 @test isa(
-    fit(UnmarkedUnivariateHawkesProcess, h_big), UnmarkedUnivariateHawkesProcess{BigFloat}
-)
-@test isa(
-    fit(UnmarkedUnivariateHawkesProcess{Float32}, h_big),
+    fit(UnmarkedUnivariateHawkesProcess{Float32}, h_sim),
     UnmarkedUnivariateHawkesProcess{Float32},
 )
 
-# logdensityof
-@test logdensityof(hp, h) ≈
-    sum(log.(hp.μ .+ (hp.α .* [0, exp(-hp.ω), exp(-hp.ω * 2) + exp(-hp.ω * 3)]))) -
-      integral
+model_true = HawkesProcess(params_true..., Uniform())
+h_sim = simulate(model_true, 0.0, 50.0)
+model_est = fit(UnivariateHawkesProcess{Float32,Uniform}, h_sim)
+params_est = (model_est.μ, model_est.α, model_est.ω)
+a_est = model_est.mark_dist.a
+b_est = model_est.mark_dist.b
+
+@test isa(model_est, UnivariateHawkesProcess)
+@test params_true[1] * 0.9 <= params_est[1] <= params_true[1] * 1.1
+@test params_true[2] * 0.9 <= params_est[2] <= params_true[2] * 1.1
+@test params_true[3] * 0.9 <= params_est[3] <= params_true[3] * 1.1
+@test a_est < 0.01
+@test b_est > 0.99
+@test typeof(model_est.μ) == Float32

From 6dd55d3217e8f98ea15533289af470c1f0521f84 Mon Sep 17 00:00:00 2001
From: jkling <jose.kling@pm.me>
Date: Wed, 17 Dec 2025 17:51:34 +0100
Subject: [PATCH 4/4] Simplified structure to avoid repetition

- UnivariateHawkesProcess encompasses UnmarkedUnivariateHawkesProcess
- MultivariateHawkesProcess implemented

Missing: fit method for multivariate Hawkes processes
---
 Project.toml                                  |   2 +-
 src/PointProcesses.jl                         |  10 +-
 src/hawkes/fit.jl                             |  79 ---------
 src/hawkes/hawkes_process.jl                  |  47 ++++--
 src/hawkes/intensity.jl                       | 104 ------------
 src/hawkes/multivariate/intensity.jl          | 151 +++++++++++++-----
 .../multivariate/multivariate_hawkes.jl       |  11 +-
 src/hawkes/multivariate/simulation.jl         |   2 +-
 src/hawkes/multivariate/time_change.jl        |  38 +++++
 src/hawkes/simulation.jl                      |   6 +-
 src/hawkes/univariate/fit.jl                  |  94 ++++++++++-
 src/hawkes/univariate/intensity.jl            | 113 +++++++++----
 src/hawkes/univariate/simulation.jl           |  26 ++-
 src/hawkes/univariate/time_change.jl          |  43 +++--
 src/hawkes/univariate/univariate_hawkes.jl    |  28 +++-
 src/hawkes/unmarked_univariate/fit.jl         |  55 -------
 src/hawkes/unmarked_univariate/intensity.jl   |  34 ----
 src/hawkes/unmarked_univariate/simulation.jl  |   8 -
 src/hawkes/unmarked_univariate/time_change.jl |  22 ---
 .../unmarked_univariate_hawkes.jl             |  50 ------
 src/poisson/fit.jl                            |   7 +
 test/hawkes_multivariate.jl                   |  69 ++++++++
 test/hawkes_univariate.jl                     |  81 ++++++++++
 test/hawkes_unmarked.jl                       |  71 ++++++++
 test/runtests.jl                              |  10 +-
 25 files changed, 683 insertions(+), 478 deletions(-)
 delete mode 100644 src/hawkes/fit.jl
 delete mode 100644 src/hawkes/intensity.jl
 create mode 100644 src/hawkes/multivariate/time_change.jl
 delete mode 100644 src/hawkes/unmarked_univariate/fit.jl
 delete mode 100644 src/hawkes/unmarked_univariate/intensity.jl
 delete mode 100644 src/hawkes/unmarked_univariate/simulation.jl
 delete mode 100644 src/hawkes/unmarked_univariate/time_change.jl
 delete mode 100644 src/hawkes/unmarked_univariate/unmarked_univariate_hawkes.jl
 create mode 100644 test/hawkes_multivariate.jl
 create mode 100644 test/hawkes_univariate.jl
 create mode 100644 test/hawkes_unmarked.jl

diff --git a/Project.toml b/Project.toml
index 2c6f23c..0891661 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,7 +1,7 @@
 name = "PointProcesses"
 uuid = "af0b7596-9bb0-472a-a012-63904f2b4c55"
-authors = ["Guillaume Dalle", "José Kling"]
 version = "0.6.0"
+authors = ["Guillaume Dalle", "José Kling"]
 
 [deps]
 DensityInterface = "b429d917-457f-4dbc-8f4c-0cc954292b1d"
diff --git a/src/PointProcesses.jl b/src/PointProcesses.jl
index 3351567..17bbf78 100644
--- a/src/PointProcesses.jl
+++ b/src/PointProcesses.jl
@@ -71,13 +71,6 @@ include("poisson/simulation.jl")
 
 include("hawkes/hawkes_process.jl")
 include("hawkes/simulation.jl")
-include("hawkes/intensity.jl")
-include("hawkes/fit.jl")
-include("hawkes/unmarked_univariate/unmarked_univariate_hawkes.jl")
-include("hawkes/unmarked_univariate/intensity.jl")
-include("hawkes/unmarked_univariate/time_change.jl")
-include("hawkes/unmarked_univariate/fit.jl")
-include("hawkes/unmarked_univariate/simulation.jl")
 include("hawkes/univariate/univariate_hawkes.jl")
 include("hawkes/univariate/intensity.jl")
 include("hawkes/univariate/time_change.jl")
@@ -85,8 +78,7 @@ include("hawkes/univariate/fit.jl")
 include("hawkes/univariate/simulation.jl")
 include("hawkes/multivariate/multivariate_hawkes.jl")
 include("hawkes/multivariate/intensity.jl")
-# include("hawkes/multivariate/time_change.jl")
-# include("hawkes/multivariate/fit.jl")
+include("hawkes/multivariate/time_change.jl")
 include("hawkes/multivariate/simulation.jl")
 
 end
diff --git a/src/hawkes/fit.jl b/src/hawkes/fit.jl
deleted file mode 100644
index f1f0a15..0000000
--- a/src/hawkes/fit.jl
+++ /dev/null
@@ -1,79 +0,0 @@
-#=
-Method for fitting the parameters of marked and unmarked Hawkes
-processes. For unmarked processes, the marks in `history` must
-be either all equal to `nothing` or equal to 1.
-=#
-function fit_hawkes_params(
-    ::Type{R},
-    h::History,
-    div_ψ::Real;
-    step_tol::Float64=1e-6,
-    max_iter::Int=1000,
-    rng::AbstractRNG=default_rng(),
-) where {R<:Real}
-    T = float(R)
-    div_ψ = T(div_ψ)
-    n = nb_events(h)
-    nT = T(n)
-    n == 0 && return (zero(T), zero(T), zero(T))
-
-    tmax = T(duration(h))
-    # Normalize times so average inter-event time is 1 (T -> n)
-    norm_ts = T.(h.times .* (n / tmax))
-
-    # preallocate
-    A = zeros(T, n)            # A[i] = sum_{j<i} mⱼ exp(-ω (t_i - t_j))
-    S = zeros(T, n)            # S[i] = sum_{j<i} mⱼ (t_i - t_j) exp(-ω (t_i - t_j))
-    lambda_ts = similar(A)     # λ_i
-
-    # Λ(T) ≈ n after normalization
-    # μ not too close to 0 or 1 so that both base and offspring contribute
-    μ = T(0.2) + (T(0.6) * rand(rng, T))
-    ψ = one(T) - μ                           # ψ = α/ω (branching ratio)
-    t90 = T(0.5) + (T(1.5) * rand(rng, T))   # inverse of the time to decay to 10% in normalized time
-    ω = log(T(10)) * t90
-
-    n_iters = 0
-    step = step_tol + one(T)
-
-    # First event: A[1]=0, S[1]=0, so λ_1 = μ
-    lambda_ts[1] = μ
-
-    while (step >= step_tol) && (n_iters < max_iter)
-        # compute A, S, and λ
-        for i in 2:n
-            Δ = norm_ts[i] - norm_ts[i - 1]
-            e = exp(-ω * Δ)
-            A[i] = update_A(A[i - 1], e, h.marks[i - 1]) # Different updates for real marks and no marks
-            S[i] = (Δ * A[i]) + (e * S[i - 1])
-            lambda_ts[i] = μ + (ψ * ω) * A[i]
-        end
-
-        # E-step aggregates
-        D = zero(T)   # expected number of descendants
-        div = zero(T)   # ∑ (t_i - t_j) D_{ij}
-        for i in 2:n
-            w = (ψ * ω) / lambda_ts[i]  # factor common to all j<i
-            D += w * A[i]
-            div += w * S[i]
-        end
-
-        # Steps 4–5: M-step updates + convergence check
-        new_μ = one(T) - (D / nT)
-        new_ψ = D / div_ψ
-        new_ω = D / (div + eps(T))   # small guard to avoid div=0
-
-        step = max(abs(μ - new_μ), abs(ψ - new_ψ), abs(ω - new_ω))
-        μ, ψ, ω = new_μ, new_ψ, new_ω
-        n_iters += 1
-
-        # Prepare for next loop: reset λ₁ since A[1]=0 regardless of params
-        lambda_ts[1] = μ
-    end
-
-    n_iters >= max_iter && @warn "Maximum number of iterations reached without convergence."
-
-    # Unnormalize back to original time scale (T -> tmax):
-    # parameters in normalized space (') relate to original by μ0=μ'*(n/tmax), ω0=ω'*(n/tmax), α0=(ψ'ω')*(n/tmax)
-    return (μ * (nT / tmax), ψ * ω * (nT / tmax), ω * (nT / tmax))
-end
diff --git a/src/hawkes/hawkes_process.jl b/src/hawkes/hawkes_process.jl
index f19e3b9..633c5f5 100644
--- a/src/hawkes/hawkes_process.jl
+++ b/src/hawkes/hawkes_process.jl
@@ -3,15 +3,40 @@
 
 Common interface for all subtypes of `HawkesProcess`.
 """
-abstract type HawkesProcess <: AbstractPointProcess end
+abstract type HawkesProcess{R<:Real,D} <: AbstractPointProcess end
 
-# #=
-# If type parameter for `HawkesProcess` was NOT explicitly provided,
-# use `Float64` as the standard type
-# =#
-# function StatsAPI.fit(
-#     HP::Type{<:HawkesProcess}, h::History{H,M}; kwargs...
-# ) where {H<:Real,M}
-#     T = promote_type(Float64, H)
-#     return fit(HP{T}, h; kwargs...)
-# end
+#=
+    process_mark(distribution, mark)
+
+For all algorithms, an unmarked process and a process
+whose marks are all equal to 1 are equivalent.
+This unifies the calculation of intensities and avoids
+code repetition.
+=#
+process_mark(::Any, ::Nothing) = 1.0
+process_mark(::Any, m::Real) = m
+process_mark(::Type{Dirac{Nothing}}, ::Real) = 1.0
+
+#=
+    update_A(α, ω, t, ti_1, mi_1, Ai_1, distribution)
+
+Used for calculating the elements in the vector A in Ozaki (1979). There, the
+definition of A is
+    A[0] = 0
+    A[i] = exp(-ω (tᵢ - tᵢ₋₁)) * (1 + A[i - 1])
+and λ(tᵢ) = μ + α A[i].
+Here we make two modifications.
+First, if α is not fixed for all events, that is, we have α₁, ..., αₙ
+corresponding to each of the events t₁, ..., tₙ (in the marked case, 
+αᵢ = αmᵢ, mᵢ the i-th mark), then we define
+    A[0] = 0
+    A[i] = exp(-ω (tᵢ - tᵢ₋₁)) * (αᵢ + A[i - 1])
+so λ(tᵢ) = μ + A[i].
+The second is that we allow A to be updated with an arbitrary time t, because we
+can calculate λ(t) as λ(t) = μ + At, with
+    At = exp(-ω (t - tₖ)) * (αₖ + A[k])
+where tₖ is the last event time of the process before t.
+=#
+function update_A(α::R, ω::R, t::Real, ti_1::Real, Ai_1)::float(R) where {R<:Real}
+    return exp(-ω * (t - ti_1)) * (α + Ai_1)
+end
diff --git a/src/hawkes/intensity.jl b/src/hawkes/intensity.jl
deleted file mode 100644
index 9112f95..0000000
--- a/src/hawkes/intensity.jl
+++ /dev/null
@@ -1,104 +0,0 @@
-#=
-Calculates the vector `A` as is [Ozaki (1979)](https://doi.org/10.1007/bf02480272)
-A[1] = 0
-A[i] = Σ_{tᵢ<tⱼ} exp(-ω(tᵢ - tⱼ))
-
-For marked processes, the calculation is slightly different
-A[1] = 0
-A[i] = Σ_{tᵢ<tⱼ} mᵢ exp(-ω(tᵢ - tⱼ)), where mᵢ is the i-th mark of the process
-
-This is used to efficiently calculate
-
-    λ(tᵢ) = μ + α ∑_{j<i} mⱼ exp(-ω(tᵢ - tⱼ)) = μ + α A[i]
-=#
-function A_Ozaki(times, marks, ω)
-    A = zeros(length(times))
-    for i in 2:length(times)
-        A[i] = update_A(A[i - 1], exp(-ω * (times[i] - times[i - 1])), marks[i - 1])
-    end
-    return A
-end
-
-#=
-Uses the idea from [Ozaki (1979)](https://doi.org/10.1007/bf02480272)
-to efficiently calculate the value of the intensity function at any
-time. If tₙ is the last event before time s, then
-
-    λ(s) = μ + α exp(-ω (s - tₙ)) (mₙ + A[n])
-
-The vector A is only calculated up to n.
-=#
-function A_Ozaki_ts(
-    times::AbstractVector{R1}, marks::AbstractVector{M}, ω::R2, t::R3
-) where {R1<:Real,R2<:Real,R3<:Real,M}
-    T = M == Nothing ? float(promote_type(R1, R2, R3)) : float(promote_type(R1, R2, R3, M))
-    A = zero(T)
-    (isempty(times) || t <= times[1]) && return A
-    ind_times = 2
-    while ind_times <= length(times) && times[ind_times] < t
-        A = update_A(
-            A, exp(-ω * (times[ind_times] - times[ind_times - 1])), marks[ind_times - 1]
-        )
-        ind_times += 1
-    end
-    A = update_A(A, exp(-ω * (t - times[ind_times - 1])), marks[ind_times - 1])
-    return A
-end
-
-#=
-Uses the idea from [Ozaki (1979)](https://doi.org/10.1007/bf02480272)
-to efficiently calculate the values of the intensity function at any
-**ORDERED** set of times.
-=#
-function A_Ozaki_ts(
-    times::AbstractVector{R1}, marks::AbstractVector{M}, ω::R2, ts::AbstractVector{<:R3}
-) where {R1<:Real,R2<:Real,R3<:Real,M}
-    if M == Nothing
-        T = float(promote_type(R1, R2, R3))
-    else
-        T = float(promote_type(R1, R2, R3, M))
-    end
-    A = zeros(T, length(ts))
-    (isempty(times) || ts[end] <= times[1] || isempty(ts)) && return A
-    ind_A = 1
-    while ts[ind_A] <= times[1] # A[i] = 0 for all ts before the first event
-        ind_A += 1
-    end
-    ind_times = 2
-    while ind_A < length(ts)
-        while ind_times <= length(times) && times[ind_times] < ts[ind_A]
-            A[ind_A] = update_A(
-                A[ind_A],
-                exp(-ω * (times[ind_times] - times[ind_times - 1])),
-                marks[ind_times - 1],
-            )
-            ind_times += 1
-        end
-        A[ind_A + 1] = A[ind_A]
-        A[ind_A] = update_A(
-            A[ind_A], exp(-ω * (ts[ind_A] - times[ind_times - 1])), marks[ind_times - 1]
-        )
-        ind_A += 1
-    end
-    if ind_times > length(times) || times[ind_times] >= ts[end]
-        A[end] = update_A(
-            A[end], exp(-ω * (ts[end] - times[ind_times - 1])), marks[ind_times - 1]
-        )
-    else
-        A[end] = update_A(
-            A[end],
-            exp(-ω * (times[ind_times] - times[ind_times - 1])),
-            marks[ind_times - 1],
-        )
-        A[end] = update_A(A[end], exp(-ω * (ts[end] - times[ind_times])), marks[ind_times])
-    end
-    return A
-end
-
-function update_A(Ai_1, eωt::Real, _::Nothing)
-    return eωt * (1 + Ai_1)
-end
-
-function update_A(Ai_1, eωt::Real, mi_1::Real)
-    return eωt * (mi_1 + Ai_1)
-end
diff --git a/src/hawkes/multivariate/intensity.jl b/src/hawkes/multivariate/intensity.jl
index 746597f..d0b0063 100644
--- a/src/hawkes/multivariate/intensity.jl
+++ b/src/hawkes/multivariate/intensity.jl
@@ -1,51 +1,128 @@
-function ground_intensity(hp::MultivariateHawkesProcess, h::History, ts)
-    return mapreduce(m -> intensity(hp, m, ts, h), +, support(hp.mark_dist))
+#=
+OPTMIZE: If instead of calculating for a single time `t` we want to return
+         the intensities for a ORDERED vector fo times, these methods calculating
+         be refactored to calculate all the intensities in a single pass.
+=#
+
+function ground_intensity(hp::MultivariateHawkesProcess, h::History{<:Real,<:Int}, t::Real)
+    return mapreduce(m -> intensity(hp, m, t, h), +, support(hp.mark_dist))
 end
 
+#=
+OPTMIZE: Calculating `ground_intensity` from `intensity` is cleaner, but
+         it is more efficient to calculate directly in one pass.
+=#
 function intensity(
-    hp::MultivariateHawkesProcess{R1}, m::Int, ts, h::History{R2}
-) where {R1<:Real,R2<:Real}
-    T = float(promote_type(R1, R2, eltype(ts)))
-    m in support(hp.mark_dist) || return T.(zero(T))
-    λt = A_Ozaki_ts(h.times, hp.α[h.marks, m], hp.ω[m], ts)
-    return λt .+ (hp.μ * probs(hp.mark_dist)[m])
+    hp::MultivariateHawkesProcess{R}, m::Int, t::Real, h::History{<:Real,<:Int}
+) where {R<:Real}
+    T = float(R)
+    m in support(hp.mark_dist) || return zero(T)
+
+    # If t is outside of history, intensity is 0
+    (t < h.tmin || t > h.tmax) && return zero(T)
+
+    μ = T(hp.μ * probs(hp.mark_dist)[m])
+    α = T.(hp.α[:, m])
+    ω = T(hp.ω[m])
+
+    # If t is before first event, intensity is just the base intensity
+    t <= h.times[1] && return μ
+
+    # Calculate the contribution of the activation functions using Ozaki (1979)
+    λt = zero(T)
+    ind_h = 2
+    while ind_h <= nb_events(h) && t > h.times[ind_h]
+        mi_1 = h.marks[ind_h - 1]
+        λt = update_A(α[mi_1], ω, h.times[ind_h], h.times[ind_h - 1], λt)
+        ind_h += 1
+    end
+    mi_1 = h.marks[ind_h - 1]
+    λt = update_A(α[mi_1], ω, t, h.times[ind_h - 1], λt)
+
+    # Add the base intensity
+    λt += μ
+
+    return λt
 end
 
-function integrated_ground_intensity(hp::MultivariateHawkesProcess, h::History, ts)
-    marks = support(hp.mark_dist)
-    A = map(m -> A_Ozaki_ts(h.times, hp.α[h.marks, m], hp.ω[m], ts), marks)
-    s_marks = sum_marks(hp, h, marks, ts)
-    return (hp.μ .* ts) .+ sum((inv.(hp.ω) .* (s_marks .- A)))
+#=
+OPTMIZE: Calculating `integrated`ground`intensity` from `integrated_intensity` is cleaner, but
+         it is more efficient to calculate directly in one pass.
+=#
+function integrated_intensity(
+    hp::MultivariateHawkesProcess{R}, m::Int, h::History{<:Real,<:Int}, t::Real
+) where {R<:Real}
+    T = float(R)
+    m in support(hp.mark_dist) || return zero(T)
+
+    # If t occurs before the history starts, then the integral is 0
+    t < h.tmin && return zero(T)
+
+    μ = T(hp.μ * probs(hp.mark_dist)[m])
+    α = T.(hp.α[:, m])
+    ω = T(hp.ω[m])
+
+    Λt = μ * T(t)
+
+    # If t is before first event, intensity is just the base intensity
+    t <= h.times[1] && return Λt
+
+    # Calculate the contribution of the activation functions using Ozaki (1979)
+    ind_h = 2
+    sum_marks = zero(T)
+    A = zero(T)
+    while ind_h <= nb_events(h) && t > h.times[ind_h]
+        mi_1 = h.marks[ind_h - 1]
+        sum_marks += α[mi_1]
+        A = update_A(α[mi_1], ω, h.times[ind_h], h.times[ind_h - 1], A)
+        ind_h += 1
+    end
+    mi_1 = h.marks[ind_h - 1]
+    sum_marks += α[mi_1]
+    A = update_A(α[mi_1], ω, t, h.times[ind_h - 1], A)
+
+    # Add contribution of intensity functions
+    Λt += inv(ω) * (sum_marks - A)
+
+    return Λt
 end
 
-function integrated_ground_intensity(hp::MultivariateHawkesProcess, h::History, a, b)
-    return integrated_ground_intensity(hp, h, b) - integrated_ground_intensity(hp, h, a)
+function integrated_intensity(
+    hp::MultivariateHawkesProcess, m::Int, h::History{<:Real,<:Int}, a, b
+)
+    return integrated_intensity(hp, m, h, b) - integrated_intensity(hp, m, h, a)
 end
 
-function DensityInterface.logdensityof(hp::MultivariateHawkesProcess, h::History)
-    A = zeros(length(hp.μ))
-    sum_marks = zeros(length(hp.μ))
-    sum_logλ = 0.0
-    for i in 2:nb_events(h)
-        m = h.marks[i - 1]
-        sum_marks .+= hp.α[:, m]
-        A .= exp.(-hp.ω .* (h.times[i] - h.times[i - 1])) .* (hp.α[:, m] .+ A)
-        sum_logλ += log(hp.μ[m] + A[m])
-    end
-    m = h.marks[end]
-    sum_marks .+= hp.α[:, m]
-    A .= exp.(-hp.ω .* (h.tmax - h.times[end])) .* (hp.α[:, m] .+ A)
-    Λ_T = (hp.μ .* durantion(h)) + inv.(hp.ω) .* (sum_marks .- A)
-    return sum_logλ - sum(Λ_T)
+function integrated_ground_intensity(hp, h, t)
+    return mapreduce(m -> integrated_intensity(hp, m, h, t), +, support(hp.mark_dist))
 end
 
-function sum_marks(hp::MultivariateHawkesProcess, h::History, marks, t::Real)
-    return map(m -> sum(hp.α[h.marks[1:(searchsortedfirst(h.times, t) - 1)], m]), marks)
+function integrated_ground_intensity(hp, h, a, b)
+    return mapreduce(m -> integrated_intensity(hp, m, h, a, b), +, support(hp.mark_dist))
 end
 
-function sum_marks(hp::MultivariateHawkesProcess, h::History, marks, ts::Vector{<:Real})
-    return map(
-        m -> [sum(hp.α[h.marks[1:(searchsortedfirst(h.times, t) - 1)], m]) for t in ts],
-        marks,
-    )
+function DensityInterface.logdensityof(
+    hp::MultivariateHawkesProcess{R}, h::History{<:Real,<:Int}
+) where {R<:Real}
+    T = float(R)
+
+    isempty(h.times) && return T(hp.μ * duration(h))
+
+    μ = T.(hp.μ .* probs(hp.mark_dist))
+    α = T.(hp.α)
+    ω = T.(hp.ω)
+
+    A = zero(μ)
+    sum_marks = α[h.marks[1], :]
+    sum_logλ = log(μ[h.marks[1]])
+    for i in 2:nb_events(h)
+        mi_1 = h.marks[i - 1]
+        mi = h.marks[i]
+        sum_marks .+= α[mi, :]
+        A .= update_A.(α[mi_1, :], ω, h.times[i], h.times[i - 1], A)
+        sum_logλ += log(μ[mi] + A[mi])
+    end
+    A .= update_A.(α[h.marks[end], :], ω, h.tmax, h.times[end], A)
+    ΛT = (μ .* duration(h)) .+ inv.(ω) .* (sum_marks .- A)
+    return sum_logλ - sum(ΛT)
 end
diff --git a/src/hawkes/multivariate/multivariate_hawkes.jl b/src/hawkes/multivariate/multivariate_hawkes.jl
index c36b787..74af781 100644
--- a/src/hawkes/multivariate/multivariate_hawkes.jl
+++ b/src/hawkes/multivariate/multivariate_hawkes.jl
@@ -24,7 +24,7 @@ This is the 'Linear Multidimensional EHP' in Section 2 of
 - `α::Matrix{<:Real}`: Jump size.
 - `ω::Vector{<:Real}`: Decay rate.
 """
-struct MultivariateHawkesProcess{T<:Real} <: HawkesProcess
+struct MultivariateHawkesProcess{T} <: HawkesProcess{T,Categorical{Float64,Vector{Float64}}}
     μ::T
     α::Matrix{T}
     ω::Vector{T}
@@ -41,7 +41,6 @@ end
 function HawkesProcess(
     μ::Vector{<:Real}, α::Matrix{<:Real}, ω::Vector{<:Real}; check_args::Bool=true
 )
-    sum(μ) == 0 && return PoissonProcess(sum(μ))
     check_args && check_args_Hawkes(μ, α, ω)
     R = promote_type(eltype(μ), eltype(α), eltype(ω))
     return MultivariateHawkesProcess(
@@ -64,12 +63,8 @@ function check_args_Hawkes(μ::Vector{<:Real}, α::Matrix{<:Real}, ω::Vector{<:
         throw(DimensionMismatch("α and ω must have the same length"))
     end
     if any(α ./ ω' .>= 1)
-        throw(
-            DomainError(
-                "α = $α, ω = $ω",
-                "HawkesProcess: αᵢⱼ/ωⱼ must be smaller than 1 for all i,j. Stability condition.",
-            ),
-        )
+        @warn """HawkesProcess: There are parameters αᵢⱼ and ωⱼ with
+        αᵢⱼ / ωⱼ >= 1. This may cause problems, especially in simulations."""
     end
     if any(ω .<= zero(ω))
         throw(
diff --git a/src/hawkes/multivariate/simulation.jl b/src/hawkes/multivariate/simulation.jl
index e498ca9..5bbe08f 100644
--- a/src/hawkes/multivariate/simulation.jl
+++ b/src/hawkes/multivariate/simulation.jl
@@ -1,7 +1,7 @@
 function one_parent!(
     rng::AbstractRNG, h::History{T}, mh::MultivariateHawkesProcess, parent_t, parent_m
 ) where {T<:Real}
-    for m in 1:length(mh)
+    for m in support(mh.mark_dist)
         sim_transf = descendants(rng, parent_t, mh.α[parent_m, m], mh.ω[m], h.tmax)
         append!(h.times, sim_transf)
         append!(h.marks, fill(m, length(sim_transf)))
diff --git a/src/hawkes/multivariate/time_change.jl b/src/hawkes/multivariate/time_change.jl
new file mode 100644
index 0000000..0485a73
--- /dev/null
+++ b/src/hawkes/multivariate/time_change.jl
@@ -0,0 +1,38 @@
+"""
+    time_change(hp::MultivariateHawkesProcess, h::History{R}) where {R<:Real}
+
+Time rescaling for multivariate Hawkes processes.
+
+Notice that the duration of the rescaled processes might be different. The output
+is a single `History` on the interval from 0 to the length of the longest rescaled
+process.
+
+Assume there are M processes, each of them with Nₘ events, m = 1, ..., M. Then the
+m-th rescaled process will be defined on the interval [0, Nₘ).
+"""
+function time_change(hp::MultivariateHawkesProcess, h::History{R,<:Int}) where {R<:Real}
+    T = float(R)
+    μ = T.(hp.μ .* (probs(hp.mark_dist)))   # Base intensity for each process
+    A = zero(μ)                # For calculating the compensator for each process
+    sum_marks = zero(μ)
+    times_transformed = zero(h.times)   # The rescaled event times
+    times_transformed[1] = (h.times[1] - h.tmin) * μ[h.marks[1]] # No activations before first event
+    for i in 2:nb_events(h)
+        mi_1 = h.marks[i - 1]
+        mi = h.marks[i]
+        α = hp.α[mi_1, :]
+        sum_marks .+= α
+        A .= update_A.(α, hp.ω, h.times[i], h.times[i - 1], A)
+        times_transformed[i] =
+            ((h.times[i] - h.tmin) * μ[mi]) +       # Integral of base rate
+            inv(hp.ω[mi]) * (sum_marks[mi] - A[mi]) # Integral of activations
+    end
+    mn = h.marks[end]
+    α = hp.α[mn, :]
+    sum_marks .+= α
+    A .= update_A.(α, hp.ω, h.tmax, h.times[end], A)
+    ΛT = (μ .* duration(h)) .+ inv.(hp.ω) .* (sum_marks .- A) # Length of the intervals of rescaled processes
+    return History(;
+        times=T.(times_transformed), tmin=zero(T), tmax=T(maximum(ΛT)), marks=event_marks(h)
+    )
+end
diff --git a/src/hawkes/simulation.jl b/src/hawkes/simulation.jl
index 491a05d..111e626 100644
--- a/src/hawkes/simulation.jl
+++ b/src/hawkes/simulation.jl
@@ -1,6 +1,6 @@
 function simulate(rng::AbstractRNG, hp::HawkesProcess, tmin::Real, tmax::Real)
-    h = simulate(rng, PoissonProcess(hp.μ, mark_distribution(hp)), tmin, tmax)
-    sim_desc = generate_descendants!(rng, h, hp) # Recursively generates descendants from first events
+    h = simulate(rng, PoissonProcess(hp.μ, hp.mark_dist), tmin, tmax)
+    generate_descendants!(rng, h, hp) # Recursively generates descendants from first events
     perm = sortperm(h.times)
     h.times .= h.times[perm]
     h.marks .= h.marks[perm]
@@ -8,7 +8,7 @@ function simulate(rng::AbstractRNG, hp::HawkesProcess, tmin::Real, tmax::Real)
 end
 
 function simulate(hp::HawkesProcess, tmin::Real, tmax::Real)
-    simulate(default_rng(), hp, tmin, tmax)
+    return simulate(default_rng(), hp, tmin, tmax)
 end
 
 #=
diff --git a/src/hawkes/univariate/fit.jl b/src/hawkes/univariate/fit.jl
index e44ab35..71fa00b 100644
--- a/src/hawkes/univariate/fit.jl
+++ b/src/hawkes/univariate/fit.jl
@@ -22,20 +22,102 @@ The algorithm consists in the following steps:
         ω = D / div
 5. If convergence criterion is met, return updated parameters, otherwise, back to step 2.
 
-Notice that, for numerical stability, the process is normalized so the average inter-event time is equal to 1 and, 
-therefore, the interval of the process is transformed from T to N. Also, in equation (8) in the paper,
+Notice that, for numerical stability, the process is normalized so the average inter-event time is equal to 1 and, therefore, the interval of the process is transformed from T to N. Also, in equation (8) in the paper,
 
 ∑_{i=1:n} pᵢᵢ = ∑_{i=1:n} (1 - ∑_{j < i} Dᵢⱼ) = N - D.
 """
 function StatsAPI.fit(
-    HP::Type{<:UnivariateHawkesProcess{R,D}},
+    HP::Type{UnivariateHawkesProcess{R,D}},
     h::History;
     step_tol::Float64=1e-6,
     max_iter::Int=1000,
     rng::AbstractRNG=default_rng(),
 ) where {R,D}
-    sum_marks = sum(h.marks)
-    params = fit_hawkes_params(R, h, sum_marks; step_tol=step_tol, max_iter=max_iter, rng=rng)
-    d = fit(D, h.marks)
+    div_ψ = D == Dirac{Nothing} ? nb_events(h) : sum(h.marks)
+    params = fit_hawkes_params(HP, h, div_ψ; step_tol=step_tol, max_iter=max_iter, rng=rng)
+    d = D == Dirac{Nothing} ? Dirac(nothing) : fit(D, h.marks)
     return HawkesProcess(params..., d)
 end
+
+#=
+Method for fitting the parameters of marked and unmarked Hawkes
+processes.
+`div_ψ` must be passed as an argument. For unmarked processes, 
+`div_ψ` is equal to the number of events in the evet history, for
+marked processes, `div_ψ` is equal to the sum of all the marks.
+=#
+function fit_hawkes_params(
+    ::Type{UnivariateHawkesProcess{R,Dist}},
+    h::History,
+    div_ψ::Real;
+    step_tol::Float64=1e-6,
+    max_iter::Int=1000,
+    rng::AbstractRNG=default_rng(),
+) where {R<:Real,Dist}
+    T = float(R)
+    div_ψ = T(div_ψ)
+    n = nb_events(h)
+    N = T(n)
+    n == 0 && return (zero(T), zero(T), zero(T))
+
+    tmax = T(duration(h))
+    # Normalize times so average inter-event time is 1 (T -> n)
+    norm_ts = T.(h.times .* (n / tmax))
+
+    # preallocate
+    A = zeros(T, n)            # A[i] = sum_{j<i} α mⱼ exp(-ω (t_i - t_j))
+    S = zeros(T, n)            # S[i] = sum_{j<i} α mⱼ (t_i - t_j) exp(-ω (t_i - t_j))
+    lambda_ts = similar(A)     # λ_i
+
+    # Λ(T) ≈ n after normalization
+    # μ not too close to 0 or 1 so that both base and offspring contribute
+    μ = T(0.2) + (T(0.6) * rand(rng, T))
+    ψ = one(T) - μ                           # ψ = α/ω (branching ratio)
+    t90 = T(0.5) + (T(1.5) * rand(rng, T))   # inverse of the time to decay to 10% in normalized time
+    ω = log(T(10)) * t90
+
+    n_iters = 0
+    step = step_tol + one(T)
+
+    # First event: A[1]=0, S[1]=0, so λ_1 = μ
+    lambda_ts[1] = μ
+
+    while (step >= step_tol) && (n_iters < max_iter)
+        # compute A, S, and λ
+        for i in 2:n
+            Δ = norm_ts[i] - norm_ts[i - 1]
+            e = exp(-ω * Δ)
+            mi_1 = process_mark(Dist, h.marks[i - 1])
+            A[i] = e * (mi_1 + A[i - 1]) # Different updates for real marks and no marks
+            S[i] = (Δ * A[i]) + (e * S[i - 1])
+            lambda_ts[i] = μ + (ψ * ω) * A[i]
+        end
+
+        # E-step aggregates
+        D = zero(T)   # expected number of descendants
+        div = zero(T)   # ∑ (t_i - t_j) D_{ij}
+        for i in 2:n
+            w = (ψ * ω) / lambda_ts[i]  # factor common to all j<i
+            D += w * A[i]
+            div += w * S[i]
+        end
+
+        # Steps 4–5: M-step updates + convergence check
+        new_μ = (N - D) / N # N - D = ∑ pᵢᵢ
+        new_ψ = (D / div_ψ)
+        new_ω = D / (div + eps(T))   # small guard to avoid div=0
+
+        step = max(abs(μ - new_μ), abs(ψ - new_ψ), abs(ω - new_ω))
+        μ, ψ, ω = new_μ, new_ψ, new_ω
+        n_iters += 1
+
+        # Prepare for next loop: reset λ₁ since A[1]=0 regardless of params
+        lambda_ts[1] = μ
+    end
+
+    n_iters >= max_iter && @warn "Maximum number of iterations reached without convergence."
+
+    # Unnormalize back to original time scale (T -> tmax):
+    # parameters in normalized space (') relate to original by μ0=μ'*(n/tmax), ω0=ω'*(n/tmax), α0=(ψ'ω')*(n/tmax)
+    return (μ * (N / tmax), ψ * ω * (N / tmax), ω * (N / tmax))
+end
diff --git a/src/hawkes/univariate/intensity.jl b/src/hawkes/univariate/intensity.jl
index 3a8da69..73ec66d 100644
--- a/src/hawkes/univariate/intensity.jl
+++ b/src/hawkes/univariate/intensity.jl
@@ -1,44 +1,99 @@
-function ground_intensity(hp::UnivariateHawkesProcess, h::History, ts)
-    return hp.μ .+ (hp.α .* A_Ozaki_ts(h.times, h.marks, hp.ω, ts))
+function ground_intensity(
+    hp::UnivariateHawkesProcess{R,D}, h::History, t::Real
+) where {R<:Real,D}
+    T = float(R)
+    μ, α, ω = T.((hp.μ, hp.α, hp.ω))
+
+    # If t is outside of history, intensity is 0
+    (t < h.tmin || t > h.tmax) && return zero(T)
+
+    # If t is before first event, intensity is just the base intensity
+    t <= h.times[1] && return μ
+
+    # Calculate the contribution of the activation functions using Ozaki (1979)
+    λt = zero(T)
+    ind_h = 2
+    while ind_h <= nb_events(h) && t > h.times[ind_h]
+        mi_1 = T(process_mark(D, h.marks[ind_h - 1]))
+        λt = update_A(mi_1 * α, ω, h.times[ind_h], h.times[ind_h - 1], λt)
+        ind_h += 1
+    end
+    mi_1 = T(process_mark(D, h.marks[ind_h - 1]))
+    λt = update_A(mi_1 * α, ω, t, h.times[ind_h - 1], λt)
+
+    # Add the base intensity
+    λt += hp.μ
+
+    return λt
 end
 
-function intensity(
-    hp::UnivariateHawkesProcess{R1,D}, m::M, ts, h::History{R2,M}
-) where {R1<:Real,R2<:Real,M<:Real,D}
-    T = float(promote_type(R1, R2, M, eltype(ts))) # For type stability and coherence
+function intensity(hp::UnivariateHawkesProcess{R}, m, t::Real, h::History) where {R<:Real}
+    T = float(R)
     m in support(hp.mark_dist) || return zero(T)
-    return ground_intensity(hp, h, ts) .* T(pdf(hp.mark_dist, m)) # `pdf` always returns a `Float64`
+    return ground_intensity(hp, h, t) * T(pdf(hp.mark_dist, m)) # `pdf` always returns a `Float64`
+end
+
+function intensity(hp::UnmarkedUnivariateHawkesProcess, _, t::Real, h::History)
+    return ground_intensity(hp, h, t)
 end
 
-function integrated_ground_intensity(hp::UnivariateHawkesProcess, h::History, ts)
-    A = A_Ozaki_ts(h.times, h.marks, hp.ω, ts)
-    s_marks = sum_marks(h, ts)
-    return (hp.μ .* ts) .+ sum((hp.α / hp.ω) .* (s_marks .- A))
+function integrated_ground_intensity(
+    hp::UnivariateHawkesProcess{R,D}, h::History, t
+) where {R<:Real,D}
+    T = float(R)
+    μ, α, ω = T.((hp.μ, hp.α, hp.ω))
+
+    # If t is outside of history, intensity is 0
+    t <= h.tmin && return zero(T)
+
+    # Add the base intensity
+    Λt = μ * T(t)
+
+    # If t is before first event, intensity is just the base intensity
+    t <= h.times[1] && return Λt
+
+    # Calculate the contribution of the activation functions using Ozaki (1979)
+    A = zero(T)
+    sum_marks = zero(T)
+    ind_h = 2
+    while ind_h <= nb_events(h) && t > h.times[ind_h]
+        mi_1 = T(process_mark(D, h.marks[ind_h - 1]))
+        sum_marks += mi_1
+        A = update_A(mi_1 * α, ω, h.times[ind_h], h.times[ind_h - 1], A)
+        ind_h += 1
+    end
+    mi_1 = T(process_mark(D, h.marks[ind_h - 1]))
+    sum_marks += mi_1
+    A = update_A(mi_1 * α, ω, t, h.times[ind_h - 1], A)
+
+    # Add intgral of activation functions
+    Λt += inv(ω) * (sum_marks - A)
+
+    return Λt
 end
 
 function integrated_ground_intensity(hp::UnivariateHawkesProcess, h::History, a, b)
     return integrated_ground_intensity(hp, h, b) - integrated_ground_intensity(hp, h, a)
 end
 
-function DensityInterface.logdensityof(hp::UnivariateHawkesProcess, h::History)
-    A = 0.0
-    sum_marks = 0.0
-    sum_logλ = 0.0
+function DensityInterface.logdensityof(
+    hp::UnivariateHawkesProcess{R,D}, h::History
+) where {R<:Real,D}
+    T = float(R)
+    μ, α, ω = T.((hp.μ, hp.α, hp.ω))
+
+    A = zero(T)
+    sum_marks = zero(T)
+    sum_logλ = zero(T)
     for i in 2:nb_events(h)
-        sum_marks += h.marks[i - 1]
-        A = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (h.marks[i - 1] + A)
-        sum_logλ += log(hp.μ + hp.α * A)
+        mi_1 = T(process_mark(D, h.marks[i - 1]))
+        sum_marks += mi_1
+        A = update_A(mi_1 * α, ω, h.times[i], h.times[i - 1], A)
+        sum_logλ += log(μ + α * A)
     end
-    sum_marks += h.marks[end]
-    A = exp(-hp.ω * (h.tmax - h.times[end])) * (h.marks[end] + A)
-    Λ_T = (hp.μ * duration(h)) + inv.(hp.ω) * (sum_marks - A)
+    mn = T(process_mark(D, h.marks[end]))
+    sum_marks += mn
+    A = update_A(mn * α, ω, h.tmax, h.times[end], A)
+    Λ_T = (μ * duration(h)) + inv.(ω) * (sum_marks - A)
     return sum_logλ - sum(Λ_T)
 end
-
-function sum_marks(h::History, t::Real)
-    return sum(h.marks[1:(searchsortedfirst(h.times, t) - 1)])
-end
-
-function sum_marks(h::History, ts::Vector{<:Real})
-    return [sum(h.marks[1:(searchsortedfirst(h.times, t) - 1)]) for t in ts]
-end
diff --git a/src/hawkes/univariate/simulation.jl b/src/hawkes/univariate/simulation.jl
index e99a9a7..7896b9a 100644
--- a/src/hawkes/univariate/simulation.jl
+++ b/src/hawkes/univariate/simulation.jl
@@ -1,8 +1,28 @@
 function one_parent!(
-    rng::AbstractRNG, h::History{T}, hp::UnivariateHawkesProcess, parent_t, parent_m
-) where {T<:Real}
-    sim_transf = descendants(rng, parent_t, parent_m * hp.α, hp.ω, h.tmax)
+    rng::AbstractRNG, h::History, hp::UnivariateHawkesProcess, parent_t, parent_m
+)
+    sim_transf = descendants(rng, parent_t, (parent_m * hp.α), hp.ω, h.tmax)
     append!(h.times, sim_transf)
     append!(h.marks, [rand(rng, mark_distribution(hp)) for _ in 1:length(sim_transf)])
     return nothing
 end
+
+function one_parent!(
+    rng::AbstractRNG, h::History, hp::UnmarkedUnivariateHawkesProcess, parent_t, _
+)
+    sim_transf = descendants(rng, parent_t, hp.α, hp.ω, h.tmax)
+    append!(h.times, sim_transf)
+    append!(h.marks, fill(nothing, length(sim_transf)))
+    return nothing
+end
+
+# generates the descendants of one single parent using the inverse transform method
+function descendants(rng::AbstractRNG, parent::R, α::Real, ω::Real, tmax::R) where {R<:Real}
+    T = float(R)
+    αT = T(α)
+    ωT = T(ω)
+    activation_integral = (αT / ωT) * (one(T) - exp(ωT * (parent - tmax)))
+    sim_transf = simulate_poisson_times(rng, one(T), zero(T), activation_integral)
+    @. sim_transf = parent - (inv(ωT) * log(one(T) - ((ωT / αT) * sim_transf))) # Inverse of integral of the activation function
+    return sim_transf
+end
diff --git a/src/hawkes/univariate/time_change.jl b/src/hawkes/univariate/time_change.jl
index bfb5609..33f48d6 100644
--- a/src/hawkes/univariate/time_change.jl
+++ b/src/hawkes/univariate/time_change.jl
@@ -1,27 +1,38 @@
-function time_change(hp::UnivariateHawkesProcess, h::History{R,M}) where {R<:Real,M}
+function time_change(hp::UnivariateHawkesProcess{<:Real,D}, h::History{R}) where {R<:Real,D}
+    T = float(R)
     n = nb_events(h)
-    A = zero(R)
-    sum_marks = zero(R)
-    times_transformed = zeros(R, n)
+    A = zero(T)
+    μ, α, ω = T.((hp.μ, hp.α, hp.ω))
+    sum_marks = zero(T)
+    times_transformed = zeros(T, n)
+
     # Loop to calculate i -> ∑_{j<i} mⱼ (1 - exp(-ω (tᵢ - tⱼ)))
     for i in 2:n
-        Δ = h.times[i] - h.times[i - 1]
-        e = exp(-hp.ω * Δ)
-        A = e * (h.marks[i - 1] + A)
-        sum_marks += h.marks[i - 1]
+        mi_1 = T(process_mark(D, h.marks[i - 1]))
+        A = update_A(mi_1 * α, ω, h.times[i], h.times[i - 1], A)
+        sum_marks += mi_1
         times_transformed[i] = sum_marks - A
     end
-    A = e * (h.marks[end] + A)
-    sum_marks += h.marks[end]
-    times_transformed .*= hp.α / hp.ω      # Integral of activation functions
-    times_transformed .+= h.times .* hp.μ  # Add integral of base rate
-    tmax_transformed = (hp.α / hp.ω) * (sum_marks - A)
-    tmax_transformed += R(hp.μ * duration(h))
+
+    # Calculate ∑_{i=1}^n mᵢ (1 - exp(-ω (tmax - tᵢ)))
+    mn = T(process_mark(D, h.marks[end]))
+    A = update_A(mn * α, ω, h.tmax, h.times[end], A)
+    sum_marks += mn
+
+    # Integral of activation functions
+    times_transformed .*= α / ω      # Integral of activation functions
+    tmax_transformed = (α / ω) * (sum_marks - A)
+
+    # Add integral of base rate
+    times_transformed .+= h.times .* μ
+    tmax_transformed += μ * duration(h)
+
+    # A time re-scaled process starts at t=0
     return History(;
         times=times_transformed,
         marks=h.marks,
-        tmin=zero(R),
+        tmin=zero(T),
         tmax=tmax_transformed,
         check_args=false,
-    ) # A time re-scaled process starts at t=0
+    )
 end
diff --git a/src/hawkes/univariate/univariate_hawkes.jl b/src/hawkes/univariate/univariate_hawkes.jl
index 96a454c..bee332f 100644
--- a/src/hawkes/univariate/univariate_hawkes.jl
+++ b/src/hawkes/univariate/univariate_hawkes.jl
@@ -18,17 +18,23 @@ Notice that the mark only affects the jump size.
 - `ω::R`: decay rate.
 - `mark_dist::D`: distribution of marks
 """
-struct UnivariateHawkesProcess{T<:Real,D} <: HawkesProcess
+struct UnivariateHawkesProcess{T,D} <: HawkesProcess{T,D}
     μ::T
     α::T
     ω::T
     mark_dist::D
 end
 
+const UnmarkedUnivariateHawkesProcess{R<:Real} = UnivariateHawkesProcess{R,Dirac{Nothing}}
+
 function Base.show(io::IO, hp::UnivariateHawkesProcess)
     return print(io, "UnivariateHawkesProcess($(hp.μ), $(hp.α), $(hp.ω), $(hp.mark_dist))")
 end
 
+function Base.show(io::IO, hp::UnmarkedUnivariateHawkesProcess)
+    return print(io, "UnmarkedUnivariateHawkesProcess($(hp.μ), $(hp.α), $(hp.ω))")
+end
+
 mark_distribution(hp::UnivariateHawkesProcess, _...) = hp.mark_dist
 
 function HawkesProcess(μ::Real, α::Real, ω::Real, mark_dist; check_args::Bool=true)
@@ -36,6 +42,26 @@ function HawkesProcess(μ::Real, α::Real, ω::Real, mark_dist; check_args::Bool
     return UnivariateHawkesProcess(promote(μ, α, ω)..., mark_dist)
 end
 
+function HawkesProcess(μ::Real, α::Real, ω::Real; check_args::Bool=true)
+    check_args && check_args_Hawkes(μ, α, ω)
+    return UnivariateHawkesProcess(promote(μ, α, ω)..., Dirac(nothing))
+end
+
+function check_args_Hawkes(μ::Real, α::Real, ω::Real)
+    if any((μ, α, ω) .< 0)
+        throw(
+            DomainError(
+                "μ = $μ, α = $α, ω = $ω",
+                "HawkesProcess: All parameters must be non-negative.",
+            ),
+        )
+    end
+    if α >= ω
+        @warn """HawkesProcess: α / ω >= 1. This may cause problems,
+        especially in simulations."""
+    end
+end
+
 function check_args_Hawkes(μ::Real, α::Real, ω::Real, mark_dist)
     mean_α = mark_dist isa Dirac{Nothing} ? α : mean(mark_dist) * α
     check_args_Hawkes(μ, mean_α, ω)
diff --git a/src/hawkes/unmarked_univariate/fit.jl b/src/hawkes/unmarked_univariate/fit.jl
deleted file mode 100644
index a8733f0..0000000
--- a/src/hawkes/unmarked_univariate/fit.jl
+++ /dev/null
@@ -1,55 +0,0 @@
-"""
-    StatsAPI.fit(::Type{<:UnmarkedUnivariateHawkesProcess{R}}, h::History; step_tol::Float64=1e-6, max_iter::Int=1000, rng::AbstractRNG=default_rng()) where {R<:Real}
-
-Expectation-Maximization algorithm from [E. Lewis, G. Mohler (2011)](https://arxiv.org/pdf/1801.08273)).
-The relevant calculations are in page 4, equations 6-13.
-
-Let (t₁ < ... < tₙ) be the event times over the interval [0, T). We use the immigrant-descendant representation,
-where immigrants arrive at a constant base rate μ and each each arrival may generate descendants following the
-activation function α exp(-ω(t - tᵢ)).
-
-The algorithm consists in the following steps:
-1. Start with some initial guess for the parameters μ, ψ, and ω. ψ = α ω is the branching factor.
-2. Calculate λ(tᵢ; μ, ψ, ω) (`lambda_ts` in the code) using the procedure in [Ozaki (1979)](https://doi.org/10.1007/bf02480272).
-3. For each tᵢ and each j < i, calculate Dᵢⱼ = P(tᵢ is a descendant of tⱼ) as
-
-    Dᵢⱼ = ψ ω exp(-ω(tᵢ - tⱼ)) / λ(tᵢ; μ, ψ, ω).
-
-    Define D = ∑_{j < i} Dᵢⱼ (expected number of descendants) and div = ∑_{j < i} (tᵢ - tⱼ) Dᵢⱼ. 
-4. Update the parameters as
-        μ = (N - D) / T
-        ψ = D / N
-        ω = D / div
-5. If convergence criterion is met, return updated parameters, otherwise, back to step 2.
-
-Notice that, for numerical stability, the process is normalized so the average inter-event time is equal to 1 and, 
-therefore, the interval of the process is transformed from T to N. Also, in equation (8) in the paper,
-
-∑_{i=1:n} pᵢᵢ = ∑_{i=1:n} (1 - ∑_{j < i} Dᵢⱼ) = N - D.
-"""
-function StatsAPI.fit(
-    ::Type{<:UnmarkedUnivariateHawkesProcess{R}},
-    h::History;
-    step_tol::Float64=1e-6,
-    max_iter::Int=1000,
-    rng::AbstractRNG=default_rng(),
-) where {R<:Real}
-    unmarked_h = History(; times=h.times, tmin=h.tmin, tmax=h.tmax)
-    params = fit_hawkes_params(
-        R, unmarked_h, nb_events(h); step_tol=step_tol, max_iter=max_iter, rng=rng
-    )
-    return HawkesProcess(params...)
-end
-
-function StatsAPI.fit(
-    ::Type{<:UnmarkedUnivariateHawkesProcess{R}},
-    h::History{R2, Nothing};
-    step_tol::Float64=1e-6,
-    max_iter::Int=1000,
-    rng::AbstractRNG=default_rng(),
-) where {R<:Real, R2<:Real}
-    params = fit_hawkes_params(
-        R, h, nb_events(h); step_tol=step_tol, max_iter=max_iter, rng=rng
-    )
-    return HawkesProcess(params...)
-end
\ No newline at end of file
diff --git a/src/hawkes/unmarked_univariate/intensity.jl b/src/hawkes/unmarked_univariate/intensity.jl
deleted file mode 100644
index a0fc972..0000000
--- a/src/hawkes/unmarked_univariate/intensity.jl
+++ /dev/null
@@ -1,34 +0,0 @@
-function ground_intensity(hp::UnmarkedUnivariateHawkesProcess, h::History, ts)
-    return hp.μ .+ (hp.α .* A_Ozaki_ts(h.times, fill(nothing, length(h.times)), hp.ω, ts))
-end
-
-function intensity(
-    hp::UnmarkedUnivariateHawkesProcess{R}, _, ts, h::History
-) where {R<:Real}
-    return ground_intensity(hp, h, ts)
-end
-
-intensity(hp::UnmarkedUnivariateHawkesProcess, ts, h::History) = ground_intensity(hp, h, ts)
-
-function integrated_ground_intensity(hp::UnmarkedUnivariateHawkesProcess, h::History, ts)
-    A = A_Ozaki_ts(h.times, fill(nothing, length(h.times)), hp.ω, ts)
-    ns = map(t -> nb_events(h, h.tmin, t), ts)
-    return (hp.μ .* ts) .+ sum((hp.α / hp.ω) .* (ns .- A))
-end
-
-function integrated_ground_intensity(hp::UnmarkedUnivariateHawkesProcess, h::History, a, b)
-    return integrated_ground_intensity(hp, h, b) - integrated_ground_intensity(hp, h, a)
-end
-
-# logdensityof (log-likelihood)
-function DensityInterface.logdensityof(hp::UnmarkedUnivariateHawkesProcess, h::History)
-    A = 0.0
-    sum_logλ = 0.0
-    for i in 2:nb_events(h)
-        A = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (1 + A)
-        sum_logλ += log(hp.μ + hp.α * A)
-    end
-    A = exp(-hp.ω * (h.tmax - h.times[end])) * (1 + A)
-    Λ_T = (hp.μ * duration(h)) + (hp.α / hp.ω) * (nb_events(h) - A)
-    return sum_logλ - sum(Λ_T)
-end
diff --git a/src/hawkes/unmarked_univariate/simulation.jl b/src/hawkes/unmarked_univariate/simulation.jl
deleted file mode 100644
index facfc7d..0000000
--- a/src/hawkes/unmarked_univariate/simulation.jl
+++ /dev/null
@@ -1,8 +0,0 @@
-function one_parent!(
-    rng::AbstractRNG, h::History{T}, hp::UnmarkedUnivariateHawkesProcess, parent_t, _
-) where {T<:Real}
-    sim_transf = descendants(rng, parent_t, hp.α, hp.ω, h.tmax)
-    append!(h.times, sim_transf)
-    append!(h.marks, fill(nothing, length(sim_transf)))
-    return nothing
-end
diff --git a/src/hawkes/unmarked_univariate/time_change.jl b/src/hawkes/unmarked_univariate/time_change.jl
deleted file mode 100644
index 855d0a6..0000000
--- a/src/hawkes/unmarked_univariate/time_change.jl
+++ /dev/null
@@ -1,22 +0,0 @@
-function time_change(hp::UnmarkedUnivariateHawkesProcess, h::History{R,M}) where {R<:Real,M}
-    n = nb_events(h)
-    A = zero(R)
-    times_transformed = zeros(R, n)
-    # Loop to calculate i -> ∑_{j<i} 1 - exp(-ω (tᵢ - tⱼ))
-    for i in 2:n
-        A = exp(-hp.ω * (h.times[i] - h.times[i - 1])) * (one(R) + A)
-        times_transformed[i] = (i - 1) - A
-    end
-    A = exp(-hp.ω * (h.tmax - h.times[end])) * (one(R) + A)
-    times_transformed .*= hp.α / hp.ω      # Integral of activation functions
-    times_transformed .+= h.times .* hp.μ  # Add integral of base rate
-    tmax_transformed = R((hp.α / hp.ω) * (n - A))
-    tmax_transformed += R(hp.μ * duration(h))
-    return History(;
-        times=times_transformed,
-        marks=h.marks,
-        tmin=zero(R),
-        tmax=tmax_transformed,
-        check_args=false,
-    ) # A time re-scaled process starts at t=0
-end
diff --git a/src/hawkes/unmarked_univariate/unmarked_univariate_hawkes.jl b/src/hawkes/unmarked_univariate/unmarked_univariate_hawkes.jl
deleted file mode 100644
index 7fe2fba..0000000
--- a/src/hawkes/unmarked_univariate/unmarked_univariate_hawkes.jl
+++ /dev/null
@@ -1,50 +0,0 @@
-"""
-    UnmarkedUnivariateHawkesProcess{R<:Real} <: HawkesProcess
-
-Unmarked univariate Hawkes process with exponential decay kernel.
-
-Denote the events of the process by tᵢ. The conditional intensity function
-of the Hawkes process is given by
-
-λ(t) = μ + ∑_{tᵢ < t} α exp(-ω (t - tᵢ)).
-
-# Fields
-- `μ::R`: baseline intensity (immigration rate)
-- `α::R`: jump size
-- `ω::R`: decay rate.
-"""
-struct UnmarkedUnivariateHawkesProcess{T<:Real} <: HawkesProcess
-    μ::T
-    α::T
-    ω::T
-end
-
-function Base.show(io::IO, hp::UnmarkedUnivariateHawkesProcess)
-    return print(io, "UnmarkedUnivariateHawkesProcess$((hp.μ, hp.α, hp.ω))")
-end
-
-mark_distribution(::UnmarkedUnivariateHawkesProcess, _...) = Dirac(nothing)
-
-function HawkesProcess(μ::Real, α::Real, ω::Real; check_args::Bool=true)
-    check_args && check_args_Hawkes(μ, α, ω)
-    return UnmarkedUnivariateHawkesProcess(promote(μ, α, ω)...)
-end
-
-function check_args_Hawkes(μ::Real, α::Real, ω::Real)
-    if any((μ, α, ω) .< 0)
-        throw(
-            DomainError(
-                "μ = $μ, α = $α, ω = $ω",
-                "HawkesProcess: All parameters must be non-negative.",
-            ),
-        )
-    end
-    if α >= ω
-        throw(
-            DomainError(
-                "α = $α, ω = $ω",
-                "HawkesProcess: mᵢα must be, on average, smaller than ω. Stability condition.",
-            ),
-        )
-    end
-end
diff --git a/src/poisson/fit.jl b/src/poisson/fit.jl
index b0f68da..d63d6c9 100644
--- a/src/poisson/fit.jl
+++ b/src/poisson/fit.jl
@@ -8,6 +8,13 @@ function StatsAPI.fit(
     return PoissonProcess(λ, mark_dist)
 end
 
+function StatsAPI.fit(
+    ::Type{PoissonProcess{R,Dirac{Nothing}}}, ss::PoissonProcessStats{R1,R2}
+) where {R<:Real,R1<:Real,R2<:Real}
+    λ = convert(R, ss.nb_events / ss.duration)
+    return PoissonProcess(λ, Dirac(nothing))
+end
+
 function StatsAPI.fit(pptype::Type{<:PoissonProcess}, args...; kwargs...)
     ss = suffstats(pptype, args...; kwargs...)
     return fit(pptype, ss)
diff --git a/test/hawkes_multivariate.jl b/test/hawkes_multivariate.jl
new file mode 100644
index 0000000..b7505ef
--- /dev/null
+++ b/test/hawkes_multivariate.jl
@@ -0,0 +1,69 @@
+# Constructor
+@test HawkesProcess(rand(Float32, 3), rand(Float32, 3), rand(Float32, 3) .+ 1) isa
+    MultivariateHawkesProcess{Float32}
+@test_throws DomainError HawkesProcess(rand(3) .- 1, rand(3, 3), rand(3) .+ 1)
+@test_throws DimensionMismatch HawkesProcess(rand(3), rand(2, 2), rand(3) .+ 1)
+@test_warn r"may cause problems" HawkesProcess(rand(3), rand(3, 3) .+ 1, rand(3))
+
+hp = HawkesProcess([1, 2, 3], [1 0.1 0.1; 0.2 2 0.2; 0.3 0.3 3], [2, 4, 6])
+h = History(; times=[1.0, 2.0, 4.0], marks=[3, 2, 1], tmin=0.0, tmax=5.0)
+h_big = History(;
+    times=BigFloat.([1, 2, 4]), marks=[3, 2, 1], tmin=BigFloat(0), tmax=BigFloat(5)
+)
+
+# Ground intensity
+@test ground_intensity(hp, h, 1) ≈ 6
+@test ground_intensity(hp, h, 2) ≈ 6 + sum(hp.α[3, :] .* exp.(-hp.ω .* 1))
+# @test ground_intensity(hp, h, [1, 2, 3, 4]) ≈
+#     [ground_intensity(hp, h, t) for t in [1, 2, 3, 4]]
+
+# intensity
+@test intensity(hp, 1, 1, h) ≈ 1
+@test intensity(hp, 1, 2, h) ≈ 1 + hp.α[3, 1] * exp(-hp.ω[1] * 1)
+@test intensity(hp, 4, 2, h) == 0
+# @test intensity(hp, 1, [1, 2, 3, 4], h) ≈ [intensity(hp, 1, t, h) for t in [1, 2, 3, 4]]
+
+h1 = History([1], 0, 3, [2])
+h2 = History([1, 2], 0, 3, [2, 1])
+
+# Integrated ground intensity
+@test integrated_ground_intensity(hp, h1, 0, 1) ≈ 6
+@test integrated_ground_intensity(hp, h1, 0, 1000) ≈ 6000 + sum(hp.α[2, :] ./ hp.ω)
+@test integrated_ground_intensity(hp, h2, 0, 1000) ≈
+    6000 + sum(hp.α[2, :] ./ hp.ω) + sum(hp.α[1, :] ./ hp.ω)
+# @test integrated_ground_intensity(hp, h, [1, 2, 3, 4]) ≈
+#     [integrated_ground_intensity(hp, h, t) for t in [1, 2, 3, 4]]
+
+# Log likelihood (logintensityof)
+@test logdensityof(hp, h1) ≈
+    log(2) - (hp.μ * duration(h1)) - sum((hp.α[2, :] ./ hp.ω) .* (1 .- exp.(-2 .* hp.ω)))
+@test logdensityof(hp, h2) ≈
+    log(2) + log(1 + hp.α[2, 1] * exp(-hp.ω[1])) - (hp.μ * duration(h2)) -
+      sum((hp.α[2, :] ./ hp.ω) .* (1 .- exp.(-2 .* hp.ω))) -
+      sum((hp.α[1, :] ./ hp.ω) .* (1 .- exp.(-hp.ω)))
+
+# time change
+h_transf = time_change(hp, h1)
+integral = maximum(
+    ((hp.μ .* probs(hp.mark_dist)) .* duration(h1)) .+
+    (hp.α[2, :] ./ hp.ω) .* (1 .- exp.(-2 .* hp.ω)),
+)
+
+@test isa(h_transf, History{Float64,Int})
+@test h_transf.marks == h1.marks
+@test h_transf.tmin ≈ 0
+@test h_transf.tmax ≈ integral
+@test h_transf.times ≈ [2.0]
+@test isa(time_change(hp, h_big), typeof(h_big))
+
+# simulate
+h_sim = simulate(hp, 0.0, 10.0)
+hp_mult = HawkesProcess([1, 1], [1, 1], [2, 2])
+hp_univ = HawkesProcess(1, 1, 2)
+n_sims = 10000
+mean_mult = sum([nb_events(simulate(hp_mult, 0, 10)) for _ in 1:n_sims]) / (2 * n_sims)
+mean_univ = sum([nb_events(simulate(hp_univ, 0, 10)) for _ in 1:n_sims]) / n_sims
+
+@test issorted(h_sim.times)
+@test isa(h_sim, History{Float64,Int64})
+@test isapprox(mean_mult, mean_univ, atol=1)
diff --git a/test/hawkes_univariate.jl b/test/hawkes_univariate.jl
new file mode 100644
index 0000000..a9afbf2
--- /dev/null
+++ b/test/hawkes_univariate.jl
@@ -0,0 +1,81 @@
+# Constructor
+@test HawkesProcess(1, 1, 2, Uniform()) isa UnivariateHawkesProcess{Int64,Uniform{Float64}}
+@test_throws DomainError HawkesProcess(1, 1, 2, Uniform(-1, 1))
+@test_warn r"may cause problems" HawkesProcess(1, 1, 2, Uniform(10, 11))
+
+hp = HawkesProcess(1, 1, 2, Uniform(0, 3))
+h = History(; times=[1.0, 2.0, 4.0], marks=[3, 2, 1], tmin=0.0, tmax=5.0)
+h_big = History(;
+    times=BigFloat.([1, 2, 4]), marks=[3.0, 2.0, 1.0], tmin=BigFloat(0), tmax=BigFloat(5)
+)
+
+# Ground intensity
+@test ground_intensity(hp, h, 1) ≈ 1
+@test ground_intensity(hp, h, 2) ≈ 1 + 3 * hp.α * exp(-hp.ω * 1)
+# @test ground_intensity(hp, h, [1, 2, 3, 4]) ≈
+#     [ground_intensity(hp, h, t) for t in [1, 2, 3, 4]]
+
+# Intensity
+@test intensity(hp, 1, 1, h) ≈ ground_intensity(hp, h, 1) * (1/3)
+@test intensity(hp, 1, 2, h) ≈ ground_intensity(hp, h, 2) * (1/3)
+@test intensity(hp, 4, 2, h) == 0
+# @test intensity(hp, 1, [1, 2, 3, 4], h) ≈ [intensity(hp, 1, t, h) for t in [1, 2, 3, 4]]
+
+h1 = History([1], 0, 3, [2])
+h2 = History([1, 2], 0, 3, [2, 1])
+
+# Integrated ground intensity
+@test integrated_ground_intensity(hp, h1, 0, 1) ≈ 1
+@test integrated_ground_intensity(hp, h1, 0, 1000) ≈ 1001
+@test integrated_ground_intensity(hp, h2, 0, 1000) ≈ 1001.5
+# @test integrated_ground_intensity(hp, h, [1, 2, 3, 4]) ≈
+#     [integrated_ground_intensity(hp, h, t) for t in [1, 2, 3, 4]]
+
+# Log likelihood (logintensityof)
+@test logdensityof(hp, h1) ≈ log(1) - (hp.μ * duration(h1)) - (1 - exp(-4))
+@test logdensityof(hp, h2) ≈
+    log(1) + log(1 + 2 * exp(-2)) - (hp.μ * duration(h2)) - ((1 - exp(-4))) -
+      ((1 - exp(-2)) / 2)
+
+# time change
+h_transf = time_change(hp, h)
+integral =
+    (hp.μ * duration(h)) +
+    (hp.α / hp.ω) *
+    sum([h.marks[i] * (1 - exp(-hp.ω * (h.tmax - h.times[i]))) for i in 1:length(h.times)])
+
+@test isa(h_transf, typeof(h))
+@test h_transf.marks == h.marks
+@test h_transf.tmin ≈ 0
+@test h_transf.tmax ≈ integral
+@test h_transf.times ≈
+    [1, 2 + (3 / 2) * (1 - exp(-2)), 4 + (3 / 2) * (1 - exp(-6)) + (1 - exp(-4))]
+@test isa(time_change(hp, h_big), typeof(h_big))
+
+# Fit
+Random.seed!(1)
+model_true = HawkesProcess(params_true..., Uniform())
+h_sim = simulate(model_true, 0.0, 50.0)
+model_est = fit(UnivariateHawkesProcess{Float32,Uniform}, h_sim)
+params_est = (model_est.μ, model_est.α, model_est.ω)
+a_est = model_est.mark_dist.a
+b_est = model_est.mark_dist.b
+
+@test isa(model_est, UnivariateHawkesProcess)
+@test params_true[1] * 0.9 <= params_est[1] <= params_true[1] * 1.1
+@test params_true[2] * 0.9 <= params_est[2] <= params_true[2] * 1.1
+@test params_true[3] * 0.9 <= params_est[3] <= params_true[3] * 1.1
+@test a_est < 0.01
+@test b_est > 0.99
+@test typeof(model_est.μ) == Float32
+
+# simulate
+hp_univ = HawkesProcess(1, 1, 2, Uniform(0, 2))
+hp_unmk = HawkesProcess(1, 1, 2)
+n_sims = 10000
+mean_univ = sum([nb_events(simulate(hp_univ, 0, 10)) for _ in 1:n_sims]) / n_sims
+mean_unmk = sum([nb_events(simulate(hp_unmk, 0, 10)) for _ in 1:n_sims]) / n_sims
+h_sim = simulate(hp, 0.0, 10.0)
+
+@test issorted(h_sim.times)
+@test isa(h_sim, History{Float64,Float64})
diff --git a/test/hawkes_unmarked.jl b/test/hawkes_unmarked.jl
new file mode 100644
index 0000000..3c23f4b
--- /dev/null
+++ b/test/hawkes_unmarked.jl
@@ -0,0 +1,71 @@
+# Constructor
+@test HawkesProcess(1, 1, 2) isa UnmarkedUnivariateHawkesProcess{Int64}
+@test HawkesProcess(1, 1, 2.0) isa UnmarkedUnivariateHawkesProcess{Float64}
+@test_throws DomainError HawkesProcess(-1, 1, 2)
+@test_warn r"may cause problems" HawkesProcess(1, 1, 1)
+
+# Ground intensity
+hp = HawkesProcess(1, 1, 2)
+h = History(; times=[1.0, 2.0, 4.0], marks=[3, 2, 1], tmin=0.0, tmax=5.0)
+h_big = History(;
+    times=BigFloat.([1, 2, 4]), marks=[3.0, 2.0, 1.0], tmin=BigFloat(0), tmax=BigFloat(5)
+)
+
+@test ground_intensity(hp, h, 1) ≈ 1
+@test ground_intensity(hp, h, 2) ≈ 1 + hp.α * exp(-hp.ω * 1)
+# @test ground_intensity(hp, h, [1, 2, 3, 4]) ≈
+#     [ground_intensity(hp, h, t) for t in [1, 2, 3, 4]]
+
+# intensity
+@test intensity(hp, "a", 1, h) ≈ ground_intensity(hp, h, 1)
+@test intensity(hp, 1, 2, h) ≈ ground_intensity(hp, h, 2)
+# @test intensity(hp, 1, [1, 2, 3, 4], h) ≈ [intensity(hp, 1, t, h) for t in [1, 2, 3, 4]]
+
+# Integrated ground intensity
+h1 = History([1], 0, 3, [2])
+h2 = History([1, 2], 0, 3, [2, 1])
+
+@test integrated_ground_intensity(hp, h1, 0, 1) ≈ 1
+@test integrated_ground_intensity(hp, h1, 0, 1000) ≈ 1000.5
+@test integrated_ground_intensity(hp, h2, 0, 1000) ≈ 1001
+# @test integrated_ground_intensity(hp, h, [1, 2, 3, 4]) ≈
+#     [integrated_ground_intensity(hp, h, t) for t in [1, 2, 3, 4]]
+
+# Log likelihood (logintensityof)
+@test logdensityof(hp, h1) ≈ log(1) - (hp.μ * duration(h1)) - (1 - exp(-4)) / 2
+@test logdensityof(hp, h2) ≈
+    log(1) + log(1 + exp(-2)) - (hp.μ * duration(h2)) - ((1 - exp(-4)) / 2) -
+      ((1 - exp(-2)) / 2)
+
+# time change
+h_transf = time_change(hp, h)
+integral =
+    (hp.μ * duration(h)) +
+    (hp.α / hp.ω) * sum([1 - exp(-hp.ω * (h.tmax - ti)) for ti in h.times])
+
+@test isa(h_transf, typeof(h))
+@test h_transf.marks == h.marks
+@test h_transf.tmin ≈ 0
+@test h_transf.tmax ≈ integral
+@test h_transf.times ≈ [1, (2 + (1 - exp(-2)) / 2), 4 + (2 - exp(-2 * 3) - exp(-2 * 2)) / 2]
+@test isa(time_change(hp, h_big), typeof(h_big))
+
+# Fit
+# Random.seed!(1)
+params_true = (100.0, 100.0, 200.0)
+model_true = HawkesProcess(params_true...)
+h_sim = simulate(model_true, 0.0, 50.0)
+model_est = fit(UnmarkedUnivariateHawkesProcess{Float64}, h_sim)
+params_est = (model_est.μ, model_est.α, model_est.ω)
+
+@test isa(model_est, UnmarkedUnivariateHawkesProcess)
+@test all((params_true .* 0.9) .<= params_est .<= (params_true .* 1.1))
+@test isa(
+    fit(UnmarkedUnivariateHawkesProcess{Float32}, h_sim),
+    UnmarkedUnivariateHawkesProcess{Float32},
+)
+
+# simulate
+h_sim = simulate(hp, 0.0, 10.0)
+@test issorted(h_sim.times)
+@test isa(h_sim, History{Float64,Nothing})
diff --git a/test/runtests.jl b/test/runtests.jl
index f2273ff..c61c801 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -40,6 +40,14 @@ DocMeta.setdocmeta!(PointProcesses, :DocTestSetup, :(using PointProcesses); recu
         end
     end
     @testset verbose = true "Hawkes" begin
-        include("hawkes.jl")
+        @testset verbose = true "Unmarked Univariate" begin
+            include("hawkes_unmarked.jl")
+        end
+        @testset verbose = true "Univariate" begin
+            include("hawkes_univariate.jl")
+        end
+        @testset verbose = true "Multivariate" begin
+            include("hawkes_multivariate.jl")
+        end
     end
 end