From 240d09490d86f85afd981a89ec68b1765f2c4282 Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Fri, 5 Sep 2025 10:13:39 -0400
Subject: [PATCH 01/15] initial changes of sst-simx integ. redes.

---
 sim/simx/Makefile             |  26 ++++-
 sim/simx/VortexGPGPU.cpp      |  89 ++++++++++++++++
 sim/simx/VortexGPGPU.h        |  32 ++++++
 sim/simx/mem_backend.h        |  13 +++
 sim/simx/mem_backend_dram.cpp |  75 ++++++++++++++
 sim/simx/mem_backend_dram.h   |  51 ++++++++++
 sim/simx/mem_backend_sst.cpp  |  75 ++++++++++++++
 sim/simx/mem_backend_sst.h    |  46 +++++++++
 sim/simx/mem_sim.cpp          | 186 ++++++++++++++++------------------
 sim/simx/vortex_simulator.cpp | 102 +++++++++++++++++++
 sim/simx/vortex_simulator.h   |  43 ++++++++
 11 files changed, 638 insertions(+), 100 deletions(-)
 create mode 100644 sim/simx/VortexGPGPU.cpp
 create mode 100644 sim/simx/VortexGPGPU.h
 create mode 100644 sim/simx/mem_backend.h
 create mode 100644 sim/simx/mem_backend_dram.cpp
 create mode 100644 sim/simx/mem_backend_dram.h
 create mode 100644 sim/simx/mem_backend_sst.cpp
 create mode 100644 sim/simx/mem_backend_sst.h
 create mode 100644 sim/simx/vortex_simulator.cpp
 create mode 100644 sim/simx/vortex_simulator.h
diff --git a/sim/simx/Makefile b/sim/simx/Makefile
index 1eca622701..90e18e285d 100644
--- a/sim/simx/Makefile
+++ b/sim/simx/Makefile
@@ -27,6 +27,18 @@ SRCS += $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp
 SRCS += $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp
 SRCS += $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
 
+# SST flags
+SST_CFLAGS := $(shell pkg-config --cflags sst-core)
+SST_CFLAGS += -I../../../sst/sst/sst-core/include
+SST_LFLAGS := $(shell pkg-config --libs sst-core)
+
+VORTEX_SST_SRCS := \
+    $(SRC_DIR)/mem_backend.cpp \
+    $(SRC_DIR)/mem_backend_dram.cpp \
+    $(SRC_DIR)/mem_backend_sst.cpp \
+    $(SRC_DIR)/vortex_simulator.cpp \
+    $(SRC_DIR)/VortexGPGPU.cpp
+
 # Add V extension sources
 ifneq ($(findstring -DEXT_V_ENABLE, $(CONFIGS)),)
   	SRCS += $(SRC_DIR)/voperands.cpp
@@ -72,7 +84,14 @@ PROJECT := simx
 
 .PHONY: all force clean clean-lib clean-exe clean-obj
 
-all: $(DESTDIR)/$(PROJECT)
+all: $(DESTDIR)/$(PROJECT) $(DESTDIR)/libvortex.so
+
+$(DESTDIR)/libvortex.so:
+        $(CXX) $(CXXFLAGS) $(SST_CFLAGS) -DUSE_SST_MEM_BACKEND \
+            -I./sim/simx \
+            $(SRCS) $(VORTEX_SST_SRCS) \
+            -shared -o $@ \
+            $(LDFLAGS) $(SST_LFLAGS)
 
 # build common object files
 $(OBJ_DIR)/common/%.o: $(SW_COMMON_DIR)/%.cpp $(CONFIG_FILE)
@@ -113,10 +132,13 @@ $(CONFIG_FILE): force
 clean-lib:
 	rm -f $(DESTDIR)/lib$(PROJECT).so
 
+clean-libvortex:
+	rm -f $(DESTDIR)/libvortex.so
+
 clean-exe:
 	rm -f $(DESTDIR)/$(PROJECT)
 
 clean-obj:
 	rm -rf $(OBJ_DIR)
 
-clean: clean-lib clean-exe clean-obj
\ No newline at end of file
+clean: clean-lib clean-exe clean-obj clean-libvortex
\ No newline at end of file
diff --git a/sim/simx/VortexGPGPU.cpp b/sim/simx/VortexGPGPU.cpp
new file mode 100644
index 0000000000..882d2f98ce
--- /dev/null
+++ b/sim/simx/VortexGPGPU.cpp
@@ -0,0 +1,89 @@
+#include <sst/core/sst_config.h>
+#include "VortexGPGPU.h"
+#include "mem_backend_sst.h"   // needed for vx_register_submit and vx_on_mem_complete
+
+using namespace SST;
+using namespace SST::Vortex;
+using SST::Interfaces::StandardMem;
+
+VortexGPGPU *VortexGPGPU::instance_ = nullptr;
+
+VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
+    : Component(id),
+      sim_(std::make_unique<vortex::VortexSimulator>()),
+      memIface_(nullptr) {
+
+    // Parameter: clock frequency (default 1GHz)
+    std::string clockfreq = params.find<std::string>("clock", "1GHz");
+    // Parameter: program path
+    std::string kernel = params.find<std::string>("program", "");
+
+    // Create StandardMem interface; auto-bind to port name "memIface"
+    memIface_ = loadUserSubComponent<StandardMem>(
+        "memIface", ComponentInfo::SHARE_NONE,
+        registerClock(clockfreq,
+                      new SST::Clock::Handler<VortexGPGPU>(this, &VortexGPGPU::clockTick)),
+        new StandardMem::Handler<VortexGPGPU>(this, &VortexGPGPU::handleMemResp));
+
+    // Register callback so SimX can submit memory to SST
+    instance_ = this;
+    vx_register_submit(+[](uint64_t addr, bool write, uint32_t size, uint64_t tag) {
+        if (write) {
+            std::vector<uint8_t> zero(size, 0);
+            auto *req = new StandardMem::Write(addr, zero);
+            req->setDst(tag);
+            instance_->memIface_->send(req);
+        } else {
+            auto *req = new StandardMem::Read(addr, size);
+            req->setDst(tag);
+            instance_->memIface_->send(req);
+        }
+    });
+
+    // Load the kernel or ELF
+    if (!sim_->init(kernel)) {
+        SST::Output out;
+        out.fatal(CALL_INFO, -1, "VortexSimulator init failed\n");
+    }
+
+    registerAsPrimaryComponent();
+    primaryComponentDoNotEndSim();
+}
+
+VortexGPGPU::~VortexGPGPU() = default;
+
+void VortexGPGPU::setup() {}
+void VortexGPGPU::finish() {}
+
+bool VortexGPGPU::clockTick(SST::Cycle_t) {
+    // Advance the GPU one cycle
+    bool running = sim_->cycle();
+    if (!running) {
+        primaryComponentOKToEndSim();
+        return false;
+    }
+    return true;
+}
+
+void VortexGPGPU::handleMemResp(StandardMem::Request *req) {
+    // Inform SimX that this request has completed
+    vx_on_mem_complete(req->getDst());
+    delete req;
+}
+
+// Register with SST
+SST_ELI_REGISTER_COMPONENT(
+    VortexGPGPU,
+    "vortex",           // element library name
+    "VortexGPGPU",      // component name
+    SST_ELI_ELEMENT_VERSION(1,0,0),
+    "Headless Vortex GPGPU Simulator",
+    COMPONENT_CATEGORY_PROCESSOR
+)
+SST_ELI_DOCUMENT_PARAMS(
+    {"clock", "Clock frequency", "1GHz"},
+    {"program", "Path to the kernel or ELF to load", ""}
+)
+SST_ELI_DOCUMENT_PORTS(
+    {"memIface", "StandardMem port to connect to the SST memory hierarchy", {}}
+)
diff --git a/sim/simx/VortexGPGPU.h b/sim/simx/VortexGPGPU.h
new file mode 100644
index 0000000000..feeb538829
--- /dev/null
+++ b/sim/simx/VortexGPGPU.h
@@ -0,0 +1,32 @@
+// VortexGPGPU.h
+#pragma once
+#include <sst/core/component.h>
+#include <sst/core/interfaces/stdMem.h>
+#include <memory>
+#include <string>
+#include "vortex_simulator.h"  // wrapper around SimX
+
+namespace SST {
+namespace Vortex {
+
+class VortexGPGPU : public SST::Component {
+public:
+    VortexGPGPU(SST::ComponentId_t id, SST::Params& params);
+    ~VortexGPGPU() override;
+
+    void setup() override;
+    void finish() override;
+
+private:
+    bool clockTick(SST::Cycle_t cycle);
+    void handleMemResp(SST::Interfaces::StandardMem::Request* req);
+
+    // static pointer used by lambda in vx_register_submit()
+    static VortexGPGPU* instance_;
+
+    std::unique_ptr<vortex::VortexSimulator> sim_;
+    SST::Interfaces::StandardMem* memIface_;
+};
+
+} // namespace Vortex
+} // namespace SST
diff --git a/sim/simx/mem_backend.h b/sim/simx/mem_backend.h
new file mode 100644
index 0000000000..73f63b2ad0
--- /dev/null
+++ b/sim/simx/mem_backend.h
@@ -0,0 +1,13 @@
+#pragma once
+#include <cstdint>
+
+namespace vortex {
+struct IMemBackend {
+    virtual ~IMemBackend() = default;
+    virtual void reset() = 0;
+    virtual void tick() = 0;
+    virtual void send_request(uint64_t addr, bool write,
+                              uint32_t size, uint32_t tag,
+                              uint32_t cid, uint64_t uuid) = 0;
+};
+} // namespace vortex
diff --git a/sim/simx/mem_backend_dram.cpp b/sim/simx/mem_backend_dram.cpp
new file mode 100644
index 0000000000..8d83a75d2e
--- /dev/null
+++ b/sim/simx/mem_backend_dram.cpp
@@ -0,0 +1,75 @@
+// mem_backend_dram.cpp
+#include "mem_backend_dram.h"
+
+using namespace vortex;
+
+namespace {
+struct CallbackData {
+    MemBackendDram* backend;
+    uint64_t tag;
+};
+} // anonymous namespace
+
+MemBackendDram* MemBackendDram::inst_ = nullptr;
+
+MemBackendDram::MemBackendDram(uint32_t num_banks, uint32_t block_size, float clock_ratio)
+    : num_banks_(num_banks)
+    , block_size_(block_size)
+    , lg2_block_size_(0)
+    , dram_sim_(num_banks, block_size, clock_ratio)
+{
+    // Compute log2(block_size_) once; block_size_ is assumed to be a power of two.
+    uint32_t tmp = block_size_;
+    while (tmp > 1) {
+        ++lg2_block_size_;
+        tmp >>= 1;
+    }
+    inst_ = this;
+}
+
+void MemBackendDram::reset() {
+    inflight_.clear();
+    dram_sim_.reset();
+}
+
+void MemBackendDram::tick() {
+    // Retire pending transactions in DramSim
+    dram_sim_.tick();
+}
+
+void MemBackendDram::dram_complete(void* arg) {
+    auto* data = static_cast<CallbackData*>(arg);
+    MemBackendDram* backend = data->backend;
+    uint64_t tag = data->tag;
+    auto it = backend->inflight_.find(tag);
+    if (it != backend->inflight_.end()) {
+        const Info& info = it->second;
+        if (!info.write) {
+            // Form a MemRsp for reads only
+            MemRsp rsp{tag, info.cid, info.uuid};
+            // Route the response to the recorded bank
+            uint32_t bank = info.bank;
+            if (backend->mem_xbar_rsp_cb_)
+                backend->mem_xbar_rsp_cb_(bank, rsp);
+        }
+        backend->inflight_.erase(it);
+    }
+    delete data;
+}
+
+void MemBackendDram::send_request(uint64_t addr, bool write,
+                                  uint32_t size, uint32_t tag,
+                                  uint32_t cid, uint64_t uuid) {
+    // Compute bank index: (addr >> lg2(block_size)) mod num_banks
+    uint32_t bank_idx = 0;
+    if (num_banks_ > 0)
+        bank_idx = static_cast<uint32_t>((addr >> lg2_block_size_) & (num_banks_ - 1));
+    inflight_.emplace(tag, Info{cid, uuid, write, bank_idx});
+    auto* cb_data = new CallbackData{this, tag};
+    // The size is ignored by DramSim because it is configured with block_size_.
+    dram_sim_.send_request(addr, write, &MemBackendDram::dram_complete, cb_data);
+}
+
+void MemBackendDram::complete(uint64_t tag) {
+    // Not used; dram_complete() handles completions
+}
\ No newline at end of file
diff --git a/sim/simx/mem_backend_dram.h b/sim/simx/mem_backend_dram.h
new file mode 100644
index 0000000000..5a6f6f3d11
--- /dev/null
+++ b/sim/simx/mem_backend_dram.h
@@ -0,0 +1,51 @@
+// mem_backend_dram.h
+#pragma once
+#include "mem_backend.h"
+#include "dram_sim.h"
+#include <unordered_map>
+#include <functional>
+#include "types.h"
+
+namespace vortex {
+
+class MemBackendDram : public IMemBackend {
+public:
+    static MemBackendDram* instance() { return inst_; }
+
+    // Construct with the same parameters as MemSim::Config: number of banks,
+    // block size in bytes, and clock ratio. These values are passed to
+    // the underlying DramSim so that the external memory model matches.
+    MemBackendDram(uint32_t num_banks, uint32_t block_size, float clock_ratio);
+
+    void reset() override;
+    void tick() override;
+    void send_request(uint64_t addr, bool write,
+                      uint32_t size, uint32_t tag,
+                      uint32_t cid, uint64_t uuid) override;
+
+    // Not used directly; completions are handled by dram_complete().
+    void complete(uint64_t tag);
+
+    // Set by MemSim to push completed responses back to the correct
+    // bank in the crossbar.
+    std::function<void(uint32_t bank, const MemRsp& rsp)> mem_xbar_rsp_cb_;
+
+private:
+    struct Info {
+        uint32_t cid;
+        uint64_t uuid;
+        bool write;
+        uint32_t bank;  // bank index computed from the address
+    };
+    std::unordered_map<uint64_t, Info> inflight_;
+    uint32_t num_banks_;
+    uint32_t block_size_;
+    uint32_t lg2_block_size_;
+    static MemBackendDram* inst_;
+    DramSim dram_sim_;
+
+    // Static callback invoked by DramSim when a request completes
+    static void dram_complete(void* arg);
+};
+
+} // namespace vortex
\ No newline at end of file
diff --git a/sim/simx/mem_backend_sst.cpp b/sim/simx/mem_backend_sst.cpp
new file mode 100644
index 0000000000..3cc9da2588
--- /dev/null
+++ b/sim/simx/mem_backend_sst.cpp
@@ -0,0 +1,75 @@
+// mem_backend_sst.cpp
+// Implementation of the SST-backed memory backend.  This backend forwards
+// all off-chip memory requests to the SST StandardMem interface via a
+// registered callback (vx_submit_fn).  It maintains a table of inflight
+// transactions keyed by the original request tag so that completions can
+// be correlated back to the correct cluster and request.  When a read
+// completion is observed via vx_on_mem_complete(), the backend pushes a
+// MemRsp back into the crossbar using the stored cid/uuid.  Writes
+// complete silently.  Bank routing is currently fixed to bank 0; this
+// preserves correctness but may underutilize bank-level parallelism.
+
+#include "mem_backend_sst.h"
+
+extern "C" {
+
+// Register a submit function provided by the SST component.  The
+// MemBackendSST stores it in a static member so that calls to
+// send_request() can forward requests into SST.
+void vx_register_submit(vx_submit_fn fn) {
+    vortex::MemBackendSST::set_vx_submit_fn(fn);
+}
+
+// Notify MemBackendSST that the SST memory system has completed a
+// request identified by 'tag'.  The backend will produce a MemRsp for
+// reads and erase the entry from its inflight table.
+void vx_on_mem_complete(uint64_t tag) {
+    if (auto inst = vortex::MemBackendSST::instance())
+        inst->complete(tag);
+}
+
+} // extern "C"
+
+using namespace vortex;
+
+// Initialise static pointers
+MemBackendSST* MemBackendSST::inst_ = nullptr;
+vx_submit_fn   MemBackendSST::submit_fn_ = nullptr;
+
+MemBackendSST::MemBackendSST() {
+    // Record this instance so the C wrapper can find us
+    inst_ = this;
+}
+
+void MemBackendSST::reset() {
+    // Drop all inflight transactions; pending responses are ignored
+    inflight_.clear();
+}
+
+void MemBackendSST::send_request(uint64_t addr, bool write,
+                                 uint32_t size, uint32_t tag,
+                                 uint32_t cid, uint64_t uuid) {
+    // Save request metadata so we can form a response on completion
+    inflight_.emplace(tag, Info{cid, uuid, write});
+    // Forward the request into SST.  The SST wrapper will create a
+    // StandardMem::Read or ::Write using this address, size and tag.
+    if (submit_fn_) {
+        submit_fn_(addr, write, size, tag);
+    }
+}
+
+void MemBackendSST::complete(uint64_t tag) {
+    auto it = inflight_.find(tag);
+    if (it == inflight_.end())
+        return;
+    const Info &info = it->second;
+    // Only produce a MemRsp for reads; writes complete silently
+    if (!info.write) {
+        MemRsp rsp{tag, info.cid, info.uuid};
+        // Always route completions to bank 0; adjust if you need per-bank
+        // completion routing in the future.
+        if (mem_xbar_rsp_cb_)
+            mem_xbar_rsp_cb_(0, rsp);
+    }
+    inflight_.erase(it);
+}
diff --git a/sim/simx/mem_backend_sst.h b/sim/simx/mem_backend_sst.h
new file mode 100644
index 0000000000..808a52aefd
--- /dev/null
+++ b/sim/simx/mem_backend_sst.h
@@ -0,0 +1,46 @@
+// mem_backend_sst.h
+#pragma once
+#include "mem_backend.h"
+#include <unordered_map>
+#include <functional>
+#include "types.h"
+
+extern "C" {
+  // Function pointer type for SST to call
+  typedef void (*vx_submit_fn)(uint64_t addr, bool write, uint32_t size, uint64_t tag);
+  // SST calls this to register its submit function
+  void vx_register_submit(vx_submit_fn fn);
+  // SST calls this when a memory response completes
+  void vx_on_mem_complete(uint64_t tag);
+}
+
+namespace vortex {
+
+class MemBackendSST : public IMemBackend {
+public:
+    static MemBackendSST* instance() { return inst_; }
+    static vx_submit_fn get_vx_submit_fn() { return submit_fn_; }
+    static void set_vx_submit_fn(vx_submit_fn fn) { submit_fn_ = fn; }
+
+    MemBackendSST();
+    void reset() override;
+    void tick() override {}
+    void send_request(uint64_t addr, bool write,
+                      uint32_t size, uint32_t tag,
+                      uint32_t cid, uint64_t uuid) override;
+
+    // Called from vx_on_mem_complete
+    void complete(uint64_t tag);
+
+    // Set by MemSim to push MemRsp back to crossbar
+    std::function<void(uint32_t bank, const MemRsp& rsp)> mem_xbar_rsp_cb_;
+
+
+private:
+    struct Info { uint32_t cid; uint64_t uuid; bool write; };
+    std::unordered_map<uint64_t,Info> inflight_;
+    static MemBackendSST* inst_;
+    static vx_submit_fn submit_fn_;
+};
+
+} // namespace vortex
diff --git a/sim/simx/mem_sim.cpp b/sim/simx/mem_sim.cpp
index 740ee008b6..61bf174a86 100644
--- a/sim/simx/mem_sim.cpp
+++ b/sim/simx/mem_sim.cpp
@@ -1,127 +1,117 @@
-// Copyright © 2019-2023
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
+// mem_sim.cpp
 #include "mem_sim.h"
 #include <vector>
-#include <queue>
 #include <stdlib.h>
-#include <dram_sim.h>
+#include <math.h>
 
 #include "constants.h"
 #include "types.h"
 #include "debug.h"
+#include "mem_backend.h"
+#include "mem_backend_sst.h"
+#include "mem_backend_dram.h"
 
 using namespace vortex;
 
 class MemSim::Impl {
 private:
-	MemSim*   simobject_;
-	Config    config_;
-	MemCrossBar::Ptr mem_xbar_;
-	DramSim   dram_sim_;
-	mutable PerfStats perf_stats_;
-	struct DramCallbackArgs {
-		MemSim::Impl* memsim;
-		MemReq request;
-		uint32_t bank_id;
-	};
+    MemSim*   simobject_;
+    Config    config_;
+    MemCrossBar::Ptr mem_xbar_;
+    std::unique_ptr<IMemBackend> backend_;
+    mutable PerfStats perf_stats_;
 
 public:
-	Impl(MemSim* simobject, const Config& config)
-		: simobject_(simobject)
-		, config_(config)
-		, dram_sim_(config.num_banks, config.block_size, config.clock_ratio)
-	{
-		char sname[100];
-		snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
-		mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_ports, config.num_banks,
-			[lg2_block_size = log2ceil(config.block_size), num_banks = config.num_banks](const MemCrossBar::ReqType& req) {
-    	// Custom logic to calculate the output index using bank interleaving
-			return (uint32_t)((req.addr >> lg2_block_size) & (num_banks-1));
-		});
-		for (uint32_t i = 0; i < config.num_ports; ++i) {
-			simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i));
-			mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i));
-		}
-	}
-
-	~Impl() {
-		//--
-	}
-
-	const PerfStats& perf_stats() const {
-		perf_stats_.bank_stalls = mem_xbar_->collisions();
-		return perf_stats_;
-	}
-
-	void reset() {
-		dram_sim_.reset();
-	}
-
-	void tick() {
-		dram_sim_.tick();
-
-		for (uint32_t i = 0; i < config_.num_banks; ++i) {
-			if (mem_xbar_->ReqOut.at(i).empty())
-				continue;
-
-			auto& mem_req = mem_xbar_->ReqOut.at(i).front();
-
-			// enqueue the request to the memory system
-			auto req_args = new DramCallbackArgs{this, mem_req, i};
-			dram_sim_.send_request(
-				mem_req.addr,
-				mem_req.write,
-				[](void* arg) {
-					auto rsp_args = reinterpret_cast<const DramCallbackArgs*>(arg);
-					if (!rsp_args->request.write) {
-						// only send a response for read requests
-						MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
-						rsp_args->memsim->mem_xbar_->RspOut.at(rsp_args->bank_id).push(mem_rsp, 1);
-						DT(3, rsp_args->memsim->simobject_->name() << "-mem-rsp" << rsp_args->bank_id << ": " << mem_rsp);
-					}
-					delete rsp_args;
-				},
-				req_args
-			);
-
-			DT(3, simobject_->name() << "-mem-req" << i << ": " << mem_req);
-			mem_xbar_->ReqOut.at(i).pop();
-		}
-	}
+    Impl(MemSim* simobject, const Config& config)
+        : simobject_(simobject)
+        , config_(config)
+    {
+        char sname[100];
+        snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
+        mem_xbar_ = MemCrossBar::Create(
+            sname,
+            ArbiterType::RoundRobin,
+            config.num_ports,
+            config.num_banks,
+            [lg2_block_size = log2ceil(config.block_size), num_banks = config.num_banks](const MemCrossBar::ReqType& req) {
+                // Bank interleaving: choose the output index based on address bits
+                return static_cast<uint32_t>((req.addr >> lg2_block_size) & (num_banks - 1));
+            });
+
+        for (uint32_t i = 0; i < config.num_ports; ++i) {
+            simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i));
+            mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i));
+        }
+
+        #ifdef USE_SST_MEM_BACKEND
+        backend_ = std::make_unique<MemBackendSST>();
+        #else
+        backend_ = std::make_unique<MemBackendDram>(config.num_banks, config.block_size, config.clock_ratio);
+        #endif
+
+        if (backend_) {
+            backend_->mem_xbar_rsp_cb_ = [this](uint32_t bank, const MemRsp& rsp) {
+                // Push the response into the appropriate crossbar output queue
+                if (bank < mem_xbar_->RspOut.size())
+                    mem_xbar_->RspOut.at(bank).push(rsp, 1);
+            };
+        }
+    }
+
+    const PerfStats& perf_stats() const {
+        perf_stats_.bank_stalls = mem_xbar_->collisions();
+        return perf_stats_;
+    }
+
+    void reset() {
+        if (backend_)
+            backend_->reset();
+    }
+
+    void tick() {
+        // Advance the selected memory backend
+        if (backend_)
+            backend_->tick();
+
+        // Drain requests from each bank and send to the backend
+        for (uint32_t bank = 0; bank < config_.num_banks; ++bank) {
+            if (mem_xbar_->ReqOut.at(bank).empty())
+                continue;
+            auto& mem_req = mem_xbar_->ReqOut.at(bank).front();
+            if (backend_) {
+                backend_->send_request(
+                    mem_req.addr,
+                    mem_req.write,
+                    config_.block_size,
+                    mem_req.tag,
+                    mem_req.cid,
+                    mem_req.uuid);
+            }
+            DT(3, simobject_->name() << "-mem-req" << bank << ": " << mem_req);
+            mem_xbar_->ReqOut.at(bank).pop();
+        }
+    }
 };
 
-///////////////////////////////////////////////////////////////////////////////
-
 MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
-	: SimObject<MemSim>(ctx, name)
-	, MemReqPorts(config.num_ports, this)
-	, MemRspPorts(config.num_ports, this)
-	, impl_(new Impl(this, config))
+    : SimObject<MemSim>(ctx, name)
+    , MemReqPorts(config.num_ports, this)
+    , MemRspPorts(config.num_ports, this)
+    , impl_(new Impl(this, config))
 {}
 
 MemSim::~MemSim() {
-  delete impl_;
+    delete impl_;
 }
 
 void MemSim::reset() {
-  impl_->reset();
+    impl_->reset();
 }
 
 void MemSim::tick() {
-  impl_->tick();
+    impl_->tick();
 }
 
-const MemSim::PerfStats &MemSim::perf_stats() const {
-	return impl_->perf_stats();
+const MemSim::PerfStats& MemSim::perf_stats() const {
+    return impl_->perf_stats();
 }
\ No newline at end of file
diff --git a/sim/simx/vortex_simulator.cpp b/sim/simx/vortex_simulator.cpp
new file mode 100644
index 0000000000..82b211e82c
--- /dev/null
+++ b/sim/simx/vortex_simulator.cpp
@@ -0,0 +1,102 @@
+#include "vortex_simulator.h"
+#include <fstream>
+#include <vector>
+#include <string>
+#include "simobject.h"
+#include "dcrs.h"
+
+namespace vortex {
+
+// Fallback macro definitions in case they are not provided by VX_config.h
+#ifndef NUM_CLUSTERS
+#define NUM_CLUSTERS 1
+#endif
+#ifndef NUM_CORES
+#define NUM_CORES 1
+#endif
+#ifndef NUM_WARPS
+#define NUM_WARPS 1
+#endif
+#ifndef NUM_THREADS
+#define NUM_THREADS 1
+#endif
+#ifndef RAM_PAGE_SIZE
+#define RAM_PAGE_SIZE 4096
+#endif
+#ifndef STARTUP_ADDR
+#define STARTUP_ADDR 0x0
+#endif
+
+static std::string getFileExt(const std::string& filename) {
+    auto pos = filename.find_last_of('.');
+    if (pos == std::string::npos) return "";
+    return filename.substr(pos + 1);
+}
+
+VortexSimulator::VortexSimulator() : halted_(true) {}
+
+bool VortexSimulator::init(const std::string& kernelPath) {
+    // Initialize the architecture from macros or fallbacks
+    arch_.num_clusters = NUM_CLUSTERS;
+    arch_.num_cores    = NUM_CORES;
+    arch_.num_warps    = NUM_WARPS;
+    arch_.num_threads  = NUM_THREADS;
+    arch_.global_mem_size = 1ULL << 30; // 1 GiB of global memory
+
+    ram_ = RAM(arch_.global_mem_size, RAM_PAGE_SIZE);
+    proc_ = std::make_unique<Processor>(arch_);
+    proc_->attach_ram(&ram_);
+
+    // Load a kernel binary if provided
+    if (!kernelPath.empty()) {
+        std::string ext = getFileExt(kernelPath);
+        if (ext == "bin") {
+            std::ifstream in(kernelPath, std::ios::binary);
+            if (!in.good()) return false;
+            std::vector<uint8_t> data((std::istreambuf_iterator<char>(in)),
+                                      std::istreambuf_iterator<char>());
+            ram_.loadBinImage(data.data(), data.size(), 0x0);
+        } else if (ext == "hex") {
+            std::ifstream in(kernelPath);
+            if (!in.good()) return false;
+            std::vector<uint8_t> bytes;
+            std::string byteStr;
+            while (in >> byteStr) {
+                uint8_t val = static_cast<uint8_t>(std::stoul(byteStr, nullptr, 16));
+                bytes.push_back(val);
+            }
+            ram_.loadBinImage(bytes.data(), bytes.size(), 0x0);
+        } else {
+            return false;
+        }
+    }
+
+    // Write start address to DCRs for each cluster
+    for (uint32_t cid = 0; cid < arch_.num_clusters; ++cid) {
+        proc_->impl_->dcr_write(cid, DCR_LSU_BASE, STARTUP_ADDR);
+        proc_->impl_->dcr_write(cid, DCR_HALT, 0);
+    }
+
+    halted_ = false;
+    return true;
+}
+
+bool VortexSimulator::cycle() {
+    if (halted_) return false;
+    SimPlatform::instance().tick();
+    bool anyRunning = false;
+    for (auto cluster : proc_->impl_->clusters_) {
+        if (cluster->running()) {
+            anyRunning = true;
+            break;
+        }
+    }
+    halted_ = !anyRunning;
+    return !halted_;
+}
+
+bool VortexSimulator::isHalted() const {
+    return halted_;
+}
+
+} // namespace vortex
\ No newline at end of file
diff --git a/sim/simx/vortex_simulator.h b/sim/simx/vortex_simulator.h
new file mode 100644
index 0000000000..eef604aca1
--- /dev/null
+++ b/sim/simx/vortex_simulator.h
@@ -0,0 +1,43 @@
+// vortex_simulator.h
+#pragma once
+
+#include "processor.h"  // for Processor, RAM
+#include "arch.h"       // for Arch
+#include <memory>
+#include <string>
+
+namespace vortex {
+
+/**
+ * A wrapper class used by the SST integration to drive the Vortex GPU
+ * one cycle at a time.  It encapsulates the architecture definition,
+ * memory subsystem, and processor instance.
+ */
+class VortexSimulator {
+public:
+    VortexSimulator();
+
+    /**
+     * Initializes the simulator.  If @p kernelPath is non-empty, the
+     * kernel image at the given path will be loaded into memory.
+     * Returns false if the image format is not supported.
+     */
+    bool init(const std::string& kernelPath);
+
+    /**
+     * Advances the simulation by one cycle.  Returns false once the
+     * simulation has completed (i.e. all clusters are halted).
+     */
+    bool cycle();
+
+    /** Returns true if the simulation has finished. */
+    bool isHalted() const;
+
+private:
+    Arch arch_;
+    RAM ram_;
+    std::unique_ptr<Processor> proc_;
+    bool halted_;
+};
+
+} // namespace vortex

From 251be55b23c4d1a9597b779be30d93a7474ff7f6 Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Fri, 19 Sep 2025 00:34:36 -0400
Subject: [PATCH 02/15] simx changes to compile libvortex.so file

---
 sim/simx/Makefile             |  30 +++++------
 sim/simx/VortexGPGPU.cpp      |  66 ++++++++++++++----------
 sim/simx/VortexGPGPU.h        |  19 +++++++
 sim/simx/mem_backend.h        |   3 ++
 sim/simx/mem_backend_sst.cpp  |  17 +++----
 sim/simx/mem_backend_sst.h    |   5 +-
 sim/simx/obj/common/util.o    | Bin 0 -> 18136 bytes
 sim/simx/processor.cpp        |  21 ++++++++
 sim/simx/processor.h          |   2 +
 sim/simx/processor_impl.h     |   2 +
 sim/simx/simx_config.stamp    |   1 +
 sim/simx/vortex_simulator.cpp |  92 +++++++++++-----------------------
 12 files changed, 138 insertions(+), 120 deletions(-)
 create mode 100644 sim/simx/obj/common/util.o
 create mode 100644 sim/simx/simx_config.stamp

diff --git a/sim/simx/Makefile b/sim/simx/Makefile
index 90e18e285d..6ffd0a2cd9 100644
--- a/sim/simx/Makefile
+++ b/sim/simx/Makefile
@@ -1,5 +1,5 @@
 include ../common.mk
-
+# now you see me 2
 DESTDIR ?= $(CURDIR)
 
 OBJ_DIR = $(DESTDIR)/obj
@@ -25,17 +25,16 @@ SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(
 SRCS += $(SRC_DIR)/decode.cpp $(SRC_DIR)/opc_unit.cpp $(SRC_DIR)/dispatcher.cpp
 SRCS += $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp
 SRCS += $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp
+SRCS += $(SRC_DIR)/mem_backend_sst.cpp \
+SRCS += $(SRC_DIR)/mem_backend_dram.cpp \
 SRCS += $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
 
 # SST flags
-SST_CFLAGS := $(shell pkg-config --cflags sst-core)
-SST_CFLAGS += -I../../../sst/sst/sst-core/include
-SST_LFLAGS := $(shell pkg-config --libs sst-core)
+SST_CFLAGS := $(shell pkg-config --cflags SST-14.1)
+SST_CFLAGS += -I../../../sst/sst/sst-core/include/
+SST_LFLAGS := $(shell pkg-config --libs SST-14.1)
 
 VORTEX_SST_SRCS := \
-    $(SRC_DIR)/mem_backend.cpp \
-    $(SRC_DIR)/mem_backend_dram.cpp \
-    $(SRC_DIR)/mem_backend_sst.cpp \
     $(SRC_DIR)/vortex_simulator.cpp \
     $(SRC_DIR)/VortexGPGPU.cpp
 
@@ -74,9 +73,6 @@ MAIN_OBJ    := $(OBJ_DIR)/main.o
 
 DEPS := $(OBJS:.o=.d) $(MAIN_OBJ:.o=.d)
 
-# generate .d files alongside .o files
-CXXFLAGS += -MMD -MP -MF $(@:.o=.d)
-
 # optional: pipe through ccache if you have it
 CXX := $(if $(shell which ccache),ccache $(CXX),$(CXX))
 
@@ -86,13 +82,6 @@ PROJECT := simx
 
 all: $(DESTDIR)/$(PROJECT) $(DESTDIR)/libvortex.so
 
-$(DESTDIR)/libvortex.so:
-        $(CXX) $(CXXFLAGS) $(SST_CFLAGS) -DUSE_SST_MEM_BACKEND \
-            -I./sim/simx \
-            $(SRCS) $(VORTEX_SST_SRCS) \
-            -shared -o $@ \
-            $(LDFLAGS) $(SST_LFLAGS)
-
 # build common object files
 $(OBJ_DIR)/common/%.o: $(SW_COMMON_DIR)/%.cpp $(CONFIG_FILE)
 	@mkdir -p $(@D)
@@ -116,6 +105,13 @@ $(DESTDIR)/$(PROJECT): $(OBJS) $(MAIN_OBJ)
 $(DESTDIR)/lib$(PROJECT).so: $(OBJS)
 	$(CXX) $(CXXFLAGS) $^ -shared $(LDFLAGS) -o $@
 
+$(DESTDIR)/libvortex.so: $(OBJS) $(SST_OBJS)
+	$(CXX) $(CXXFLAGS) $(SST_CFLAGS) -DUSE_SST_MEM_BACKEND \
+	-I./sim/simx \
+	$(OBJS) $(VORTEX_SST_SRCS) \
+	-shared -o $@ \
+	$(LDFLAGS) $(SST_LFLAGS)
+
 # updates the timestamp when flags changed.
 $(CONFIG_FILE): force
 	@mkdir -p $(@D)
diff --git a/sim/simx/VortexGPGPU.cpp b/sim/simx/VortexGPGPU.cpp
index 882d2f98ce..11c221f1c0 100644
--- a/sim/simx/VortexGPGPU.cpp
+++ b/sim/simx/VortexGPGPU.cpp
@@ -1,6 +1,9 @@
 #include <sst/core/sst_config.h>
 #include "VortexGPGPU.h"
 #include "mem_backend_sst.h"   // needed for vx_register_submit and vx_on_mem_complete
+#include <vector>
+#include <utility>
+#include <unordered_map>
 
 using namespace SST;
 using namespace SST::Vortex;
@@ -25,21 +28,40 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
                       new SST::Clock::Handler<VortexGPGPU>(this, &VortexGPGPU::clockTick)),
         new StandardMem::Handler<VortexGPGPU>(this, &VortexGPGPU::handleMemResp));
 
+    if (!memIface_) {
+        SST::Output out;
+        out.fatal(CALL_INFO, -1, "VortexGPGPU: failed to load memIface StandardMem port\n");
+    }
+
     // Register callback so SimX can submit memory to SST
     instance_ = this;
-    vx_register_submit(+[](uint64_t addr, bool write, uint32_t size, uint64_t tag) {
+    // Track app-specific tags by StandardMem request-id
+        // (e.g., inside your instance_ type)
+
+        vx_register_submit(+[](uint64_t addr, bool write, uint32_t size, uint64_t tag) {
+
+        StandardMem::Request* req = nullptr;
+
         if (write) {
-            std::vector<uint8_t> zero(size, 0);
-            auto *req = new StandardMem::Write(addr, zero);
-            req->setDst(tag);
-            instance_->memIface_->send(req);
+            std::vector<uint8_t> zeros(static_cast<size_t>(size), 0);
+            // posted=false so we get a WriteResp
+            req = new StandardMem::Write(static_cast<StandardMem::Addr>(addr),
+                                static_cast<uint64_t>(size),
+                                std::move(zeros),
+                                /*posted=*/false);
         } else {
-            auto *req = new StandardMem::Read(addr, size);
-            req->setDst(tag);
-            instance_->memIface_->send(req);
+            req = new StandardMem::Read(static_cast<StandardMem::Addr>(addr),
+                            static_cast<uint64_t>(size));
         }
+
+        // Use the StandardMem-assigned ID to correlate responses
+        const auto id = req->getID();
+        instance_->tag_by_id.emplace(id, tag);
+
+        instance_->memIface_->send(req);
     });
 
+
     // Load the kernel or ELF
     if (!sim_->init(kernel)) {
         SST::Output out;
@@ -67,23 +89,15 @@ bool VortexGPGPU::clockTick(SST::Cycle_t) {
 
 void VortexGPGPU::handleMemResp(StandardMem::Request *req) {
     // Inform SimX that this request has completed
-    vx_on_mem_complete(req->getDst());
+    const auto id = req->getID();
+    const auto it = tag_by_id.find(id);
+    if (it == tag_by_id.end()) {
+        SST::Output out;
+        out.fatal(CALL_INFO, -1, "VortexGPGPU: received response with unknown ID %lu\n", id);
+    }
+    else{
+        vx_on_mem_complete(it->second);
+        tag_by_id.erase(it);
+    }
     delete req;
 }
-
-// Register with SST
-SST_ELI_REGISTER_COMPONENT(
-    VortexGPGPU,
-    "vortex",           // element library name
-    "VortexGPGPU",      // component name
-    SST_ELI_ELEMENT_VERSION(1,0,0),
-    "Headless Vortex GPGPU Simulator",
-    COMPONENT_CATEGORY_PROCESSOR
-)
-SST_ELI_DOCUMENT_PARAMS(
-    {"clock", "Clock frequency", "1GHz"},
-    {"program", "Path to the kernel or ELF to load", ""}
-)
-SST_ELI_DOCUMENT_PORTS(
-    {"memIface", "StandardMem port to connect to the SST memory hierarchy", {}}
-)
diff --git a/sim/simx/VortexGPGPU.h b/sim/simx/VortexGPGPU.h
index feeb538829..664124fc5e 100644
--- a/sim/simx/VortexGPGPU.h
+++ b/sim/simx/VortexGPGPU.h
@@ -5,6 +5,7 @@
 #include <memory>
 #include <string>
 #include "vortex_simulator.h"  // wrapper around SimX
+#include <unordered_map>
 
 namespace SST {
 namespace Vortex {
@@ -17,6 +18,23 @@ class VortexGPGPU : public SST::Component {
     void setup() override;
     void finish() override;
 
+    // Register with SST
+    SST_ELI_REGISTER_COMPONENT(
+        VortexGPGPU,
+        "vortex",           // element library name
+        "VortexGPGPU",      // component name
+        SST_ELI_ELEMENT_VERSION(1,0,0),
+        "Headless Vortex GPGPU Simulator",
+        COMPONENT_CATEGORY_PROCESSOR
+    )
+    SST_ELI_DOCUMENT_PARAMS(
+        {"clock", "Clock frequency", "1GHz"},
+        {"program", "Path to the kernel or ELF to load", ""}
+    )
+    SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS(
+        {"memIface", "StandardMem interface to memory hierarchy", "SST::Interfaces::StandardMem"}
+    )
+
 private:
     bool clockTick(SST::Cycle_t cycle);
     void handleMemResp(SST::Interfaces::StandardMem::Request* req);
@@ -26,6 +44,7 @@ class VortexGPGPU : public SST::Component {
 
     std::unique_ptr<vortex::VortexSimulator> sim_;
     SST::Interfaces::StandardMem* memIface_;
+    std::unordered_map<SST::Interfaces::StandardMem::Request::id_t, uint64_t> tag_by_id;
 };
 
 } // namespace Vortex
diff --git a/sim/simx/mem_backend.h b/sim/simx/mem_backend.h
index 73f63b2ad0..d93e39d503 100644
--- a/sim/simx/mem_backend.h
+++ b/sim/simx/mem_backend.h
@@ -1,11 +1,14 @@
 #pragma once
 #include <cstdint>
+#include <functional>
+#include "types.h"
 
 namespace vortex {
 struct IMemBackend {
     virtual ~IMemBackend() = default;
     virtual void reset() = 0;
     virtual void tick() = 0;
+    std::function<void(uint32_t bank, const MemRsp& rsp)> mem_xbar_rsp_cb_;
     virtual void send_request(uint64_t addr, bool write,
                               uint32_t size, uint32_t tag,
                               uint32_t cid, uint64_t uuid) = 0;
diff --git a/sim/simx/mem_backend_sst.cpp b/sim/simx/mem_backend_sst.cpp
index 3cc9da2588..a06014aab1 100644
--- a/sim/simx/mem_backend_sst.cpp
+++ b/sim/simx/mem_backend_sst.cpp
@@ -6,10 +6,10 @@
 // be correlated back to the correct cluster and request.  When a read
 // completion is observed via vx_on_mem_complete(), the backend pushes a
 // MemRsp back into the crossbar using the stored cid/uuid.  Writes
-// complete silently.  Bank routing is currently fixed to bank 0; this
-// preserves correctness but may underutilize bank-level parallelism.
+// complete silently.
 
 #include "mem_backend_sst.h"
+#include <VX_config.h>
 
 extern "C" {
 
@@ -49,10 +49,9 @@ void MemBackendSST::reset() {
 void MemBackendSST::send_request(uint64_t addr, bool write,
                                  uint32_t size, uint32_t tag,
                                  uint32_t cid, uint64_t uuid) {
-    // Save request metadata so we can form a response on completion
-    inflight_.emplace(tag, Info{cid, uuid, write});
-    // Forward the request into SST.  The SST wrapper will create a
-    // StandardMem::Read or ::Write using this address, size and tag.
+    uint32_t lg2_block = log2ceil(size);
+    uint32_t bank = (addr >> lg2_block) & (PLATFORM_MEMORY_NUM_BANKS - 1);
+    inflight_.emplace(tag, Info{cid, uuid, write, bank});
     if (submit_fn_) {
         submit_fn_(addr, write, size, tag);
     }
@@ -66,10 +65,8 @@ void MemBackendSST::complete(uint64_t tag) {
     // Only produce a MemRsp for reads; writes complete silently
     if (!info.write) {
         MemRsp rsp{tag, info.cid, info.uuid};
-        // Always route completions to bank 0; adjust if you need per-bank
-        // completion routing in the future.
         if (mem_xbar_rsp_cb_)
-            mem_xbar_rsp_cb_(0, rsp);
-    }
+        mem_xbar_rsp_cb_(info.bank, rsp);
+        }
     inflight_.erase(it);
 }
diff --git a/sim/simx/mem_backend_sst.h b/sim/simx/mem_backend_sst.h
index 808a52aefd..678dec0b81 100644
--- a/sim/simx/mem_backend_sst.h
+++ b/sim/simx/mem_backend_sst.h
@@ -32,12 +32,9 @@ class MemBackendSST : public IMemBackend {
     // Called from vx_on_mem_complete
     void complete(uint64_t tag);
 
-    // Set by MemSim to push MemRsp back to crossbar
-    std::function<void(uint32_t bank, const MemRsp& rsp)> mem_xbar_rsp_cb_;
-
 
 private:
-    struct Info { uint32_t cid; uint64_t uuid; bool write; };
+    struct Info { uint32_t cid; uint64_t uuid; bool write; uint32_t bank;};
     std::unordered_map<uint64_t,Info> inflight_;
     static MemBackendSST* inst_;
     static vx_submit_fn submit_fn_;
diff --git a/sim/simx/obj/common/util.o b/sim/simx/obj/common/util.o
new file mode 100644
index 0000000000000000000000000000000000000000..6e8785424bf8ab0f48eb0b6ff4ced8860499c1df
GIT binary patch
literal 18136
zcmeHOeQ;aVm4C9VL`jIG@U?dM5+I5LRF(2+!%}3&N}h-UZtQ@!5EMn0KNj2a%5vgB
zD517PR7H^8$!^)<V;D+z*e$TM6G~wyb(b_rDdnp$OSkP3I$ekh<#P#v0^Z-bAJXx&
z9)z^}$Ik4XdAj%g?z!ijd+s^s-uIq;eKfwJ%Hc3{aTs4V%6A$yjP^*mEt?TzzELY^
zXQ690t~t1l!A0YJxaQ$%Y>VZeUu)*yYc=zKG)G?db(n?fv+B@e=9>fV@VGhlTyB;*
zcE~%r6<}<^>+yUb5F7c(yQ$eMT!4mSXp#5(QDghecp(q~6wjyJO?#`)Hp~JTbeQK#
zc0(=Z$R6Jn>l&|o+<RKYSUvW;+zYRrywl9TWaeK9KX(w!A<}8)_d$VxnP2XU=YMPF
z-w+*&^=scUix(210L9O{-cvtpeJm&j|4}j-8_&%%8y?RcV~&hFu6VrDds;JC7hN<E
zx1B-nX`|FONDmq>r(e8kruqJFOvg*Gf?IOR|C{M}(LDX2nQu7^?cG<bdz|Ra!u5wI
zBm*(N@$$yY8rPB~KU&*r-thW8FmOH>I3`})9oPvvv)CK($Mf$q(~bsi0a18d$|(rl
z!iUU#qk(R<y3`(K?X9{!GVU|;qk?gGyc4Yf^q7(b4ce{<<ArXwS!nkWwo_rT!gUAZ
zd1zxg-Zb;4aq02mmcV**UvnTtGowa5HRAb6YDcIId)AOw_)F571E%NzMT??{Xyq7D
zyXu4D&4IcpQuFU)Y&Ur$i5b>}dq)FXMVkhgMR`Y~1&aZ*NN|2HAvJ>EBUK<bi%aC7
zH<b+NTEIe>`8Ul%%R$re`$ALA$bkn(_Pg^<a$F3Ye2shYW5n|CJNRr%44SwFqp@-C
z=!a;<0Y}V2JYdEf#=SQ_4Jfu?fB3oZp?LmPJ|DBNB``3}P9x*ftRhyCCUL=*fDis!
zP*W@pI5<Btza|xUtPqte2!As3S5}wSLQr9)p_*bKeXJaSE6f}AkZZ>C2Ph01*EL?%
zm_RUY#Sp_RO(&L@UOBN~a&mH~S<D3n^du)v1SGgq7{u~>L==dS$p6Hq{kXspgfixc
zxEwDk^W5<IEi<4(eoMejF*D$VlEp^@16WfNfmCCSo$a6wkH<kDiKE}oEf)T1_)uql
zN1&w|gW>unYB51&AYxu}&Cx)+?gr_4-7d7dX=r!pJ|d!MK;P$oEM2j%9I4?prRR=n
zp)bY_lP{u}=#M)NBc*W#<HdUeTR{<rQ;L8>1k*%S1S?-5HN~iS{tZlDF&Hu)af302
zDqxoydkq9ln&hr$BA0{@iS9#i2e5H@M;*|dqG&y(3dlb|+9A`H!ZBVDc~vP9ey%gW
zD-eNJvBIT3a==(&Qw^D<UQZ2qM=&ci9u!vAIA~f}yPdVy>o!(LDzLj+)EFc5C8Y|R
zVbWZn1<8<76e3P^#R@C^fFSiPp?Wi-tPnY979Xu90NqFjGig4<n%)iFLxijslwj3u
z=66Vm<!mW7Lt+Uf+$u}zWjR22nbfDdL0Y5RBjezzZ2{9uPJZ1D(mLH%D=Q|TxKhSZ
zetDRNVu?Z^S4-0_rlK@hjYQ@KyL!cLHSM;Ru`OaOGB>Fd!3{z^?szLV3%jj%^o&^q
z><-)lNwJYnyxtopp)gtw@0iPpqk#AO)q(9;KjMOfS5L$yaVg#%AoUPh&tYc|p?9}p
z|Ekic2Hi^AErGgt{uOzH`8<FgWOJ$nqdQTPdRtX}C)MC`4{9o}ohbL~!Y%>f_~9M9
z9-i~adFTXV<Z>m*XBO5RJk;#-yK>K(g+~K-g224s(ZE*JGNY1E(~?n%l1x)+kt-}_
zg+=2=NA|9lvV_&jvS=&W5@_cuq|_D;NzRD=I_a9)D}`6b1H&ahCl`m$N|$L4n6w48
zQ11bnnd+78lJ5yI)UEVfkI5?0$ggH`CAQF+w7xZFA%tzwZ93jL_}+VylgiQJ1+|IN
z6a?4L&Q!7D3UI}uVxgogr0;{Jn*vw?_yGW|Rhg2KC`{omWzxcxw9DTn(`h;UN?BFS
ztCS&W@1z}}w9|RV7SeWtDY~)c(bh73zbkC=k33Z^be(wq!^ug#Kdr_|cWeUF77;M<
z!AI29Ot-s*gVFx971@;br=3*i-;d|vbCh6V$NT;pcKiW$Nh%ka4bTid-cg#yBUlKy
zqp)r{rDGj%Bi5l1QnuR>ct()6zz#bC+o_Fuoa%zxY<Si?dOMl2gqea)j47Dzy%C@2
z+b_l;N|bv0B5oi?ykSy$EU90P4utAfnzq~(lqQ9_G=f%!CMXZ6?Xagn5wzX9U2KG$
zIIWvxBTuI5#WG-hK{}vG(H>^)t-1|A7gj_+*-VHitirzbwwZrg8b+!^XQE;wPNEPc
zrqCGCVinnzKq#L7MBVHbhEw~dJiVAmE@Ehb_RaUjzL~=6l=sa$VG8V<R{_*PAj3i)
z$jZW%f2uS=7Sypo=9MbIRSuVKR4nW#&6OvYDMc&@GfIn7V1y_GNNvF#qqu<-2a>C%
z`>6C2<iYJA_Kxm>b5n#!k5Z@0iTj1iH-j7enKlH9PfClWCo#E!kUL<opn$z8$;I>U
z==>f>P>3#&(BO$DKA4=8&s*<$GZCM6fs{Lu{#fHY?sk1bs>LZO!F}=$k~B`bTmTmk
z$~Tdl3kumQCXpv!8x8CyO6sE}flcV1ippQubBG$w+vp~8Vn}W>PIYP3{?9C=4B07*
z`2TxeDIN}DQK{&ND(!g_4Z<TcGNXFu#e5Uv@J0R_fnuzF;tN0p7v3eGY~&M+Z0}=F
zIUGNdw%rpF$2ouAJ`(;%P6JDANDlU-5`(#HPk-0>=O<PtQkj0-4W)Aa{-NGpe}5+D
z-<a&}=`f7Yu!TA~Fp%!=Fup_<7iN*+B;O#>+X);mt~br%@^-UfUv4fQgWAjcj0(Nt
zwe@nboL2~w5-%!@A70CRENK+KsOE=8`J^SCpm?3)7gdQc|K?-e;I~e2#{+CE|7*Fv
z>inpmX=(Y@s{s9I&7Z4jZ`NrV?542LIa7j^*uiW1kuO%<<Sw6DrMe)j(|k+LO2Z>c
z!b_qq|EOu`{Y!C?+jAj;4EGhe_tLf^&V{=w?4~8zAuYSSaTMjoAtaV;qii*?0jC*#
zWO?o7QtRK#52D!Q^^@?F^Be8<Na9p<*2OIST7LNur$kw1Y`N8nP23~4j!!)iL$VkR
z$rEEBH;l<iN9_+CuA65VBTm=I4C9;51@F6zk?P+&Zk<7$)h9+sPh;}dT@J@L9ETjP
z;(X)YD%TD3jfbk1yjN>{uiEwUeB-y(u0PB-el^qe!hGZ4OxI868++WYAI>-aVUFtq
z^v-ep<9x?Wb6ueS&$)zu!{eH$HSX}Zep72a<a0d>+~@j9t?`<#;k)yVH|Dt>u65jX
zoa???W83jAAP=43`u2QBv6e>NQ%g9uR;kxK$E<Z&#HXw9qd?hZ7%P`AKi^-scEeDA
zZpa@F1(yUvUkne4+wj*GhJvBR3luCyz)uVxE^>*Lm#j6gM&pFxIZ%}2RqeQDjo}#f
zIZirerh6wuhJc@}`o^hl^7wXCHG68dIh#HHk?NJ6P}NnlJ^n^dO{2%x<Z(ym2>KQ^
zCQNnI;~S~Ea<<34a*pY78v#HAs-O5%|2E<|;=IJ;zf5py1f36EbljjKJRfxpu6qeU
zG3h5xyf=Yb_U$JO<udv~-?pl%D0S*NIId}liy6+ml_*Zq^CW#qFDhrlC}bOBg~xYu
zRg<UYCTEk!U#xEO)a`IJdqUf0G<xbsX0G%ca5`_Tn(e6v*667N4QOE>Siz)s@AL#q
zrLs(n9+|mq#tv7p`X=Yi6f?92i)jD~=`VCZO}dTaqA~_Ue7bJUQnzR3I<J@k-5QTd
zw`b;lif;GAFSKq_tLeYwt<}cch{EZ`S@OE6eNN$>Q!%=88tR$B(&aQ}S`hi&ql)n#
zXB19@ykvTD8ndQ@K%C+W%JeR0$s4EkISubr0C&T<su;cSOJKJvT;G#Tb#V&M*LUUG
zX&hI+VJ)Zd90sVS(>P%oNN=}|3U-qVHQhedLNB*9`xvtnUTcxyQMlDVjuW`}v5X))
zA3F^rz&TB-By&>svJ`q!p?qO2oi_Nt*x=u_!Jo3hUjlwCG#yuhMZQD(V;g!GR3~~p
zwU8QjCh$u7pQGqQ;73L0!4)?2tAL-u{MnSljO{k`du;G0ZSW6l@Y$GBCAljFPT5cv
z?eC+&$qo@UFUq-8?nZrjRoN2SBgcW4QAta`zp%kSu)!PPn3ec^%?5wW27lTHKLs0X
zCH_lo@at{xzq7%=V>7Q8ZRk%%>a3*CIvczfIIYV7=0}D08!CHj=-;xzPey94B==$)
z{3aXx2^)MC9HSC{(+0oE2LI3oKMPwzCH|M&;8`2|AshS!8~i03+=CDLmGtr3;2|6Q
zDjVDw%JuXHQv(A=;;Wam=9VQAso~*pIJ}TQIL1<~xo|kuoy;b3*<?>{FqVq8Ch-#u
zFXWGuu@*oZ+Y+tI60Ny~q0UTpQ!?9;=*gwC$y^45lU>1{!E7=ZB56xEX0o~Ta5$99
zB)ZeX^pM)LL`aYRbug9b?chYqvgyH0@5XeZv!^$m7)a*2qia?jg~CzjCYcB0%R>f!
z!t}F{ewNVBVv8Y`a6@bu>a@2-^OERBBe61mX;Wi7ap{T`t<knbTVqo^ny|1uLfbZw
zX|w5ME*)Ld*Jt#l`%(j&!3t~9kVxdZvzbka-b@!B@6*|ACfl+qWh7w4Wa{cfs{86h
zXR@c4TW}FsE;^h`_Yd}D`lCb+i)T~aSx&VW?*h8|(;bPvWN&XK6(zx<#fcVbqRwnO
z9c|fYP)!8Pfj)=b7Lwh*+IHE|4Ub}MM{mo<KEVUVDN9Z#`)pQ>20`3XOflJ;ZVn3#
zj!yocHaH|De_AXU&6fwW>CRvx(KS4r7)WOaGyPbA+~&ka5l5}L`ku^S0)Coau1(pR
zYmk16o=P4|^KWb86IS#Y`Kk!NFkYpRGVYcAK10ajp8lR(w59TzgcCxEDXV@|N~lF#
zE*BBPmL?K34NM@>(?6Kb<|@r2x(1Ysq>&VL4DlLDL|d08T2{3#N{Hxb%}^LBHhocy
z%CsG2JcmQzluZxxCR1s|6#|f47Hd#izGk_6fIe>?CMA=Dg9u%u#n#1tiiRCMg9Dkt
z^t6chd}|}4Vr~DXp8k%+n)KjMUz#SX{ru0F&V?&uF&8V3@3ePb>2XI+i<QtmLfO+U
znJCvgLh8jCyjG?BOUx57gzlvP`Z68DLZ2DAOH#e*WHySy3&mhC$YGC|7SQ$597i6{
zQxov#9?reCoPzP%*~V%zkt~Db4sPyGMO#wXYG`v?)|Jln_VlMK1+5&zBDY7awTj$j
zi8Wk}sT(cAqIF544S6RjBc$!JBU@f_+#{pJ+OW{U0#R1R22IX`<5GpKVycR^=PVK5
z;&nVZjx^N5Q0Wwq?(fQVvtt1C?I<UcBZ>^E8%}@8#D;WNPk$nn%%!?<q96pH0rdTm
znxev7KbK31@%Z~n`4}SJJJGv5e5V)XE{31N@b4)ckE?i|DbqisaC*EsmEn&v{4|C?
z#qiS^{+hyxKfRUF`shCtBK~JGe5-oSC7eE_X!;u!eh#n@!=GR{y;IQipD;cP8BXsN
zs1Tn;xHNse4Ze=y3mAP);iM<Mi`0CsW%Oq=oZcT$A$qRQGYY4Q-fL<;FEV=i%Lt8s
z#PFpIZ^rX372>}Pm!@B(aO-^6+u(x?=lbtr{OcJ1TNypK&%KPEWNUr)FnYd@FEILg
zMt>qr2*o<D1q^RMzvjP);oLv>GMxL>&lPU<EBeC(DuiEzOY`r-`vWS}&DXJC;Y1(7
zrRjIr;I}hAjg0;thI9T8GrWn>)B6l6q|b6(TCNlC8>kS@=Q~^Bq-QfOO+Sy}d|kf9
z_;7uGVS^uLe4>p1nRq`zh2(Pl5*vIo!?|DGp>UcPkGpR(`m@oe=M~3$3o1mv0++`B
zis5`-4>Eiuqko0rbqxQ5!maai&jJyO6+g)a4>FwVzeeGtKbPCfaFU_t^#tR?*YQ<G
z&*O{Umrx-d3vg*ZCotS(IK4-qLi7<_ntrJb-l}lYKgQ_SGyGzPU&rus7=D++3FGtn
z5u@kxnq)YSi$V2@mC`7^_1F4et8mhD6~k|~!K;q}5sGzPY8lSs&aZH*{`W9??uY-%
z=p&G$^?aJq^StqEhI2k|Go0rQpL$<O`g8l7rEt=d%RP_b+#i}4&gm13KVSD=hF=UB
zdR}>klWdLO&-n9o{~4p_>;4YIdAuCw0aFy3FFzMGFr1%@HZh#r=LLn6j@%B186H8O
z)_;x{Hz-zn)+wCy;r3a=aBiP94CnUQ#Q1aj>|*rXJ`XXR+vh)Q@Ru2Xp0_`;!M#2)
z-D!F642JXkwnE|7I9kteKHtX}pAbZ8JO7H&^Z9<DaEkX68T}`WKF;vD^v_=?qz^wo
z9ItTVb1W_`x0ccKb8-Vaf1btYFCxY$#Ai0HnRJ7)%7XuegpJEB_?Hx(vfx^uvzVU5
zKcVQu7JRqDFSOu4Q+UjR>wmRdYr*w*siXzJSk1rBg10Grvjx9W;bRv328I8v1^*|7
z-)_MlRQNwz@W<7>9<<=+D*7K=@I4CuxdmUN`25m>k171WEcmmE&)XLKE=8}`pM0Fh
z#d)fqmYK&XJ+n|DdcI#b3sm|o^=W_j-{!CFVbxR1CHw-Wk4hUNzW$Qo+TV%(LWZA?
zHWk9p#ii#(&puRWt~?K1EKo_$^8nYA^VjiV)pIlB!|kcpndn#J()#RB^j3ZJIukwD
z=Uzom_$9bBA03y3b3Gqs^p`UF#~9wia2?0Q=PL|<PSKN|{JG|rHn{%ZAM3nMReo)q
zmyRdG`MNJ+^c=r{;j~8D4)p&uRIGf~*x)*USm(Q4(OdPugYn_(qVo#z;r9Oyqv!U2
zfZ^Q!;-4Cn{+#|tHuO(2ocp=XTeM%$rO#D5&uLtrvvuCmxISlKs3;`neATAPuiBbk
z-5PBcd|ch<EV!ZczRiN`zr%Qd;cnHY>Pd#rV)(lZ4>DZ)q1IFHJM)x$!sjshE{1bH
zI~YEj(f>WeiO(sj?{0>Z4E??TK86!L<!CBSB0`~x%dKNL$<=?~5oS2?8OJ3`aBy>9
zF1f)7;*G7WyLBs@?o9^qB{*jUJCeDi5!^61Xautvx*_s#uzu<-KG~MW1jWnQ()W@h
zfAcKfkN!FM&?h{`XQ;oo=jwFtW+T{@!l&8cRC*wXFW(z_(~3$=3;GAe$H(BOeci*C
z>b|~oKMaGv(@qER2b^CNA9&S$SN~9OAdAll*&M_Plcc*7o!KNlwgkJfnV|t~{iCk`
z27aNo`YAjj>h5?|(|$)6<rylvwi~bG@fb-i#*%;Oak};cBiB@~VbGFgy{g}{DCbkS
zu1-ROcttdz>Y8(9;{o-2MY=n2>6orjbYDPC@~MxPwa4<ia$AK_2{;ce;(4^=!x&3E
z7vkcjUCF0?fETU5)+2(>O5?w-#*=+`(c^SY>qPAtDy}u{ooc+P0aaV=PkV*1zh(S>
z+N4m{i(lzGlsC2h+WwS>E6IOL$=7*Dv6=R_GT%f;CHZyqk`-lGR>J=%Us>h<4w#Uy
z=ybJ$UjCz0Tjr(hCVLIzC8EI<(SWM&RPyN`x2cf-kt<r|)BZs`En<wg`V-IR(Z>tP
z;$=1l(|Dc7b)TOe9#OPBc{lyug~65V7ox)iie4wupVnUU)A&*)KSac=)G7HCAH0yR
zRCIlj8ZXiEZ<`ulMr2Em)A*$T=<IH_U%MLb*MzFp`fHf>UE*mSKdi><?<|_$I(`EN
zS6Y8l+3yx&jzadMCDr4#{VK^O`Ue!je)V8Nw-n3P@f1_m+ylrM_S3In+`$hGxL4|k
VKKh~bT50@&FH4eP%K*!*@jp&36=DDY

literal 0
HcmV?d00001

diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp
index d3b730dc8b..0cb00de010 100644
--- a/sim/simx/processor.cpp
+++ b/sim/simx/processor.cpp
@@ -159,6 +159,19 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const {
   return perf;
 }
 
+bool ProcessorImpl::cycle() {
+  SimPlatform::instance().tick();
+  bool anyRunning = false;
+  for (auto& cluster : clusters_) {
+    if (cluster->running()) {
+      anyRunning = true;
+      break;
+    }
+  }
+  perf_mem_latency_ += perf_mem_pending_reads_;
+  return anyRunning;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 
 Processor::Processor(const Arch& arch)
@@ -196,6 +209,14 @@ void Processor::dcr_write(uint32_t addr, uint32_t value) {
   return impl_->dcr_write(addr, value);
 }
 
+bool Processor::cycle() {
+  try {
+    return impl_->cycle();
+  } catch (...) {
+    return false;
+  }
+}
+
 #ifdef VM_ENABLE
 int16_t Processor::set_satp_by_addr(uint64_t base_addr) {
   uint16_t asid = 0;
diff --git a/sim/simx/processor.h b/sim/simx/processor.h
index 741b04f57d..4bb3f23fc6 100644
--- a/sim/simx/processor.h
+++ b/sim/simx/processor.h
@@ -35,6 +35,8 @@ class Processor {
 
   int run();
 
+  bool cycle();
+
   void dcr_write(uint32_t addr, uint32_t value);
 #ifdef VM_ENABLE
   bool is_satp_unset();
diff --git a/sim/simx/processor_impl.h b/sim/simx/processor_impl.h
index 952b28222f..7b4537677e 100644
--- a/sim/simx/processor_impl.h
+++ b/sim/simx/processor_impl.h
@@ -38,6 +38,8 @@ class ProcessorImpl {
 
   int run();
 
+  bool cycle();
+
   void dcr_write(uint32_t addr, uint32_t value);
 
 #ifdef VM_ENABLE
diff --git a/sim/simx/simx_config.stamp b/sim/simx/simx_config.stamp
new file mode 100644
index 0000000000..1eb1208835
--- /dev/null
+++ b/sim/simx/simx_config.stamp
@@ -0,0 +1 @@
+-std=c++17 -Wall -Wextra -Wfatal-errors -fPIC -Wno-maybe-uninitialized -I/nethome/jsubburayan3/vortex/sim/simx -I/nethome/jsubburayan3/vortex/sim/common -I/export/nethomes/jsubburayan3/vortex/hw -I/nethome/jsubburayan3/vortex/third_party/softfloat/source/include -I/nethome/jsubburayan3/vortex/third_party/ramulator/ext/spdlog/include -I/nethome/jsubburayan3/vortex/third_party/ramulator/ext/yaml-cpp/include -I/nethome/jsubburayan3/vortex/third_party/ramulator/src -DXLEN_64  -O2 -DNDEBUG
diff --git a/sim/simx/vortex_simulator.cpp b/sim/simx/vortex_simulator.cpp
index 82b211e82c..2e64d5215e 100644
--- a/sim/simx/vortex_simulator.cpp
+++ b/sim/simx/vortex_simulator.cpp
@@ -4,99 +4,65 @@
 #include <string>
 #include "simobject.h"
 #include "dcrs.h"
+#include <VX_config.h>
+#include <VX_types.h>
 
 namespace vortex {
 
-// Fallback macro definitions in case they are not provided by VX_config.h
-#ifndef NUM_CLUSTERS
-#define NUM_CLUSTERS 1
-#endif
-#ifndef NUM_CORES
-#define NUM_CORES 1
-#endif
-#ifndef NUM_WARPS
-#define NUM_WARPS 1
-#endif
-#ifndef NUM_THREADS
-#define NUM_THREADS 1
-#endif
-#ifndef RAM_PAGE_SIZE
-#define RAM_PAGE_SIZE 4096
-#endif
-#ifndef STARTUP_ADDR
-#define STARTUP_ADDR 0x0
-#endif
-
+// Utility to extract file extension
 static std::string getFileExt(const std::string& filename) {
     auto pos = filename.find_last_of('.');
     if (pos == std::string::npos) return "";
     return filename.substr(pos + 1);
 }
 
-VortexSimulator::VortexSimulator() : halted_(true) {}
+VortexSimulator::VortexSimulator()
+: arch_(NUM_THREADS, NUM_WARPS, NUM_CORES)
+, ram_(0, MEM_PAGE_SIZE)
+, proc_(std::make_unique<Processor>(arch_))
+, halted_(true) {}
 
 bool VortexSimulator::init(const std::string& kernelPath) {
-    // Initialize the architecture from macros or fallbacks
-    arch_.num_clusters = NUM_CLUSTERS;
-    arch_.num_cores    = NUM_CORES;
-    arch_.num_warps    = NUM_WARPS;
-    arch_.num_threads  = NUM_THREADS;
-    arch_.global_mem_size = 1ULL << 30; // 1 GiB of global memory
-
-    ram_ = RAM(arch_.global_mem_size, RAM_PAGE_SIZE);
-    proc_ = std::make_unique<Processor>(arch_);
     proc_->attach_ram(&ram_);
 
-    // Load a kernel binary if provided
+    // Load the kernel image if provided
+    // Load the kernel image if provided
     if (!kernelPath.empty()) {
         std::string ext = getFileExt(kernelPath);
         if (ext == "bin") {
-            std::ifstream in(kernelPath, std::ios::binary);
-            if (!in.good()) return false;
-            std::vector<uint8_t> data((std::istreambuf_iterator<char>(in)),
-                                      std::istreambuf_iterator<char>());
-            ram_.loadBinImage(data.data(), data.size(), 0x0);
+            // Load raw binary at STARTUP_ADDR
+            ram_.loadBinImage(kernelPath.c_str(), STARTUP_ADDR);
         } else if (ext == "hex") {
-            std::ifstream in(kernelPath);
-            if (!in.good()) return false;
-            std::vector<uint8_t> bytes;
-            std::string byteStr;
-            while (in >> byteStr) {
-                uint8_t val = static_cast<uint8_t>(std::stoul(byteStr, nullptr, 16));
-                bytes.push_back(val);
-            }
-            ram_.loadBinImage(bytes.data(), bytes.size(), 0x0);
+            // Load Intel-hex
+            ram_.loadHexImage(kernelPath.c_str());
         } else {
-            return false;
+            return false; // unsupported format
         }
     }
 
-    // Write start address to DCRs for each cluster
-    for (uint32_t cid = 0; cid < arch_.num_clusters; ++cid) {
-        proc_->impl_->dcr_write(cid, DCR_LSU_BASE, STARTUP_ADDR);
-        proc_->impl_->dcr_write(cid, DCR_HALT, 0);
-    }
+    // Program base DCRs (match main.cpp behavior)
+    const uint64_t startup = STARTUP_ADDR;
+    proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR0, startup & 0xffffffffu);
+
+    #if (XLEN == 64)
+    proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR1, startup >> 32);
+    #endif
+    proc_->dcr_write(VX_DCR_BASE_MPM_CLASS, 0);
 
     halted_ = false;
     return true;
 }
 
 bool VortexSimulator::cycle() {
-    if (halted_) return false;
-    SimPlatform::instance().tick();
-    bool anyRunning = false;
-    for (auto cluster : proc_->impl_->clusters_) {
-        if (cluster->running()) {
-            anyRunning = true;
-            break;
-        }
-    }
-    halted_ = !anyRunning;
-    return !halted_;
+if (halted_) return false;
+// Advance one cycle through the processor interface
+bool running = proc_->cycle(); 
+halted_ = !running;
+return running;
 }
 
 bool VortexSimulator::isHalted() const {
     return halted_;
 }
 
-} // namespace vortex
\ No newline at end of file
+} // namespace vortex

From 258e3e7cf13e056ffebc22be746994841756469c Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Fri, 24 Oct 2025 00:24:22 -0400
Subject: [PATCH 03/15] latest changes - not working

---
 sim/simx/Makefile             |  21 +++-
 sim/simx/VortexGPGPU.cpp      |  45 ++++++-
 sim/simx/VortexGPGPU.h        |  12 +-
 sim/simx/emulator.cpp         |   3 +-
 sim/simx/mem_backend_dram.cpp |   1 +
 sim/simx/mem_backend_sst.cpp  |   7 ++
 sim/simx/vortex_simulator.cpp | 217 +++++++++++++++++++++++++++++++---
 sim/simx/vortex_simulator.h   |  32 +++++
 8 files changed, 315 insertions(+), 23 deletions(-)

diff --git a/sim/simx/Makefile b/sim/simx/Makefile
index 6ffd0a2cd9..4aa769f379 100644
--- a/sim/simx/Makefile
+++ b/sim/simx/Makefile
@@ -6,6 +6,9 @@ OBJ_DIR = $(DESTDIR)/obj
 CONFIG_FILE = $(DESTDIR)/simx_config.stamp
 SRC_DIR = $(VORTEX_HOME)/sim/simx
 
+# SST StandardMem bridge (default off)
+SST_USE_STDMEM ?= 0
+
 CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors
 CXXFLAGS += -fPIC -Wno-maybe-uninitialized
 CXXFLAGS += -I$(SRC_DIR) -I$(SW_COMMON_DIR) -I$(ROOT_DIR)/hw
@@ -34,9 +37,19 @@ SST_CFLAGS := $(shell pkg-config --cflags SST-14.1)
 SST_CFLAGS += -I../../../sst/sst/sst-core/include/
 SST_LFLAGS := $(shell pkg-config --libs SST-14.1)
 
-VORTEX_SST_SRCS := \
-    $(SRC_DIR)/vortex_simulator.cpp \
-    $(SRC_DIR)/VortexGPGPU.cpp
+ifeq ($(SST_USE_STDMEM),1)
+  CXXFLAGS += -DUSE_SST_MEM_BACKEND -DVORTEX_SST_ENABLE_STDMEM
+  LIBVORTEX_SST_DEFS = -DUSE_SST_MEM_BACKEND -DVORTEX_SST_ENABLE_STDMEM
+  VORTEX_SST_SRCS := \
+      $(SRC_DIR)/mem_backend_sst.cpp \
+      $(SRC_DIR)/vortex_simulator.cpp \
+      $(SRC_DIR)/VortexGPGPU.cpp
+else
+  LIBVORTEX_SST_DEFS =
+  VORTEX_SST_SRCS := \
+      $(SRC_DIR)/vortex_simulator.cpp \
+      $(SRC_DIR)/VortexGPGPU.cpp
+endif
 
 # Add V extension sources
 ifneq ($(findstring -DEXT_V_ENABLE, $(CONFIGS)),)
@@ -106,7 +119,7 @@ $(DESTDIR)/lib$(PROJECT).so: $(OBJS)
 	$(CXX) $(CXXFLAGS) $^ -shared $(LDFLAGS) -o $@
 
 $(DESTDIR)/libvortex.so: $(OBJS) $(SST_OBJS)
-	$(CXX) $(CXXFLAGS) $(SST_CFLAGS) -DUSE_SST_MEM_BACKEND \
+	$(CXX) $(CXXFLAGS) $(SST_CFLAGS) $(LIBVORTEX_SST_DEFS) \
 	-I./sim/simx \
 	$(OBJS) $(VORTEX_SST_SRCS) \
 	-shared -o $@ \
diff --git a/sim/simx/VortexGPGPU.cpp b/sim/simx/VortexGPGPU.cpp
index 11c221f1c0..75336e4fd6 100644
--- a/sim/simx/VortexGPGPU.cpp
+++ b/sim/simx/VortexGPGPU.cpp
@@ -1,6 +1,9 @@
 #include <sst/core/sst_config.h>
 #include "VortexGPGPU.h"
-#include "mem_backend_sst.h"   // needed for vx_register_submit and vx_on_mem_complete
+#ifdef VORTEX_SST_ENABLE_STDMEM
+#include "mem_backend_sst.h" // needed for vx_register_submit and vx_on_mem_complete
+#endif   
+#include <cstdlib>
 #include <vector>
 #include <utility>
 #include <unordered_map>
@@ -9,7 +12,14 @@ using namespace SST;
 using namespace SST::Vortex;
 using SST::Interfaces::StandardMem;
 
+namespace {
+constexpr const char* kDefaultKernelPath = "/nethome/jsubburayan3/vortex/kernel.vxbin";
+constexpr uint32_t    kDefaultLaunchBytes = 64;
+}
+
+#ifdef VORTEX_SST_ENABLE_STDMEM
 VortexGPGPU *VortexGPGPU::instance_ = nullptr;
+#endif
 
 VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
     : Component(id),
@@ -18,9 +28,19 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
 
     // Parameter: clock frequency (default 1GHz)
     std::string clockfreq = params.find<std::string>("clock", "1GHz");
+
     // Parameter: program path
     std::string kernel = params.find<std::string>("program", "");
+    if (kernel.empty()) {
+        if (const char* env = std::getenv("VORTEX_DEFAULT_KERNEL"))
+            kernel = env;
+        else
+            kernel = kDefaultKernelPath;
+    }
+    const uint32_t launch_bytes = params.find<uint32_t>("launch_bytes", kDefaultLaunchBytes);
+
 
+#ifdef VORTEX_SST_ENABLE_STDMEM
     // Create StandardMem interface; auto-bind to port name "memIface"
     memIface_ = loadUserSubComponent<StandardMem>(
         "memIface", ComponentInfo::SHARE_NONE,
@@ -32,7 +52,13 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
         SST::Output out;
         out.fatal(CALL_INFO, -1, "VortexGPGPU: failed to load memIface StandardMem port\n");
     }
+#else
+    // No SST memory: just register our clock handler
+    registerClock(clockfreq,
+                  new SST::Clock::Handler<VortexGPGPU>(this, &VortexGPGPU::clockTick));
+#endif
 
+#ifdef VORTEX_SST_ENABLE_STDMEM
     // Register callback so SimX can submit memory to SST
     instance_ = this;
     // Track app-specific tags by StandardMem request-id
@@ -60,7 +86,7 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
 
         instance_->memIface_->send(req);
     });
-
+#endif
 
     // Load the kernel or ELF
     if (!sim_->init(kernel)) {
@@ -68,6 +94,17 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
         out.fatal(CALL_INFO, -1, "VortexSimulator init failed\n");
     }
 
+    // Set up a default launch descriptor if the caller did not supply one
+    if (!sim_->allocateMemory(launch_bytes, 64, true, true, &launch_desc_addr_)) {
+        SST::Output out;
+        out.fatal(CALL_INFO, -1,
+                  "VortexGPGPU: unable to allocate launch descriptor (%u bytes)\n",
+                  launch_bytes);
+    }
+    std::vector<uint8_t> launch_payload(launch_bytes, 0);
+    sim_->writeMemory(launch_desc_addr_, launch_payload.data(), launch_payload.size());
+    sim_->setStartupArg(launch_desc_addr_);
+
     registerAsPrimaryComponent();
     primaryComponentDoNotEndSim();
 }
@@ -88,6 +125,7 @@ bool VortexGPGPU::clockTick(SST::Cycle_t) {
 }
 
 void VortexGPGPU::handleMemResp(StandardMem::Request *req) {
+    #ifdef VORTEX_SST_ENABLE_STDMEM
     // Inform SimX that this request has completed
     const auto id = req->getID();
     const auto it = tag_by_id.find(id);
@@ -100,4 +138,7 @@ void VortexGPGPU::handleMemResp(StandardMem::Request *req) {
         tag_by_id.erase(it);
     }
     delete req;
+    #else
+    delete req; // should never be called without StandardMem
+    #endif
 }
diff --git a/sim/simx/VortexGPGPU.h b/sim/simx/VortexGPGPU.h
index 664124fc5e..d35eda898d 100644
--- a/sim/simx/VortexGPGPU.h
+++ b/sim/simx/VortexGPGPU.h
@@ -29,8 +29,10 @@ class VortexGPGPU : public SST::Component {
     )
     SST_ELI_DOCUMENT_PARAMS(
         {"clock", "Clock frequency", "1GHz"},
-        {"program", "Path to the kernel or ELF to load", ""}
+        {"program", "Path to the kernel or ELF to load (defaults to built-in test image)", ""},
+        {"launch_bytes", "Size in bytes of the default launch descriptor", "64"}
     )
+
     SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS(
         {"memIface", "StandardMem interface to memory hierarchy", "SST::Interfaces::StandardMem"}
     )
@@ -39,12 +41,20 @@ class VortexGPGPU : public SST::Component {
     bool clockTick(SST::Cycle_t cycle);
     void handleMemResp(SST::Interfaces::StandardMem::Request* req);
 
+    #ifdef VORTEX_SST_ENABLE_STDMEM
     // static pointer used by lambda in vx_register_submit()
     static VortexGPGPU* instance_;
+    #endif
 
     std::unique_ptr<vortex::VortexSimulator> sim_;
+    #ifdef VORTEX_SST_ENABLE_STDMEM
     SST::Interfaces::StandardMem* memIface_;
     std::unordered_map<SST::Interfaces::StandardMem::Request::id_t, uint64_t> tag_by_id;
+    #else
+    SST::Interfaces::StandardMem* memIface_ = nullptr;
+    #endif
+
+    uint64_t launch_desc_addr_ = 0;
 };
 
 } // namespace Vortex
diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp
index 3eb62f9c76..7371a553d4 100644
--- a/sim/simx/emulator.cpp
+++ b/sim/simx/emulator.cpp
@@ -16,6 +16,7 @@
 #include <unistd.h>
 #include <math.h>
 #include <assert.h>
+#include <limits>
 #include <util.h>
 
 #include "emulator.h"
@@ -131,7 +132,7 @@ void Emulator::reset() {
 void Emulator::attach_ram(RAM* ram) {
   // bind RAM to memory unit
 #if (XLEN == 64)
-  mmu_.attach(*ram, 0, 0x7FFFFFFFFF); //39bit SV39
+  mmu_.attach(*ram, 0, std::numeric_limits<uint64_t>::max());
 #else
   mmu_.attach(*ram, 0, 0xFFFFFFFF);
 #endif
diff --git a/sim/simx/mem_backend_dram.cpp b/sim/simx/mem_backend_dram.cpp
index 8d83a75d2e..f1cecae661 100644
--- a/sim/simx/mem_backend_dram.cpp
+++ b/sim/simx/mem_backend_dram.cpp
@@ -51,6 +51,7 @@ void MemBackendDram::dram_complete(void* arg) {
             uint32_t bank = info.bank;
             if (backend->mem_xbar_rsp_cb_)
                 backend->mem_xbar_rsp_cb_(bank, rsp);
+                
         }
         backend->inflight_.erase(it);
     }
diff --git a/sim/simx/mem_backend_sst.cpp b/sim/simx/mem_backend_sst.cpp
index a06014aab1..3be8227ffd 100644
--- a/sim/simx/mem_backend_sst.cpp
+++ b/sim/simx/mem_backend_sst.cpp
@@ -12,6 +12,7 @@
 #include <VX_config.h>
 
 extern "C" {
+#ifdef VORTEX_SST_ENABLE_STDMEM
 
 // Register a submit function provided by the SST component.  The
 // MemBackendSST stores it in a static member so that calls to
@@ -27,6 +28,12 @@ void vx_on_mem_complete(uint64_t tag) {
     if (auto inst = vortex::MemBackendSST::instance())
         inst->complete(tag);
 }
+#else
+
+void vx_register_submit(vx_submit_fn)      {}
+void vx_on_mem_complete(uint64_t)          {}
+
+#endif
 
 } // extern "C"
 
diff --git a/sim/simx/vortex_simulator.cpp b/sim/simx/vortex_simulator.cpp
index 2e64d5215e..928a029d0f 100644
--- a/sim/simx/vortex_simulator.cpp
+++ b/sim/simx/vortex_simulator.cpp
@@ -1,7 +1,11 @@
 #include "vortex_simulator.h"
+#include <algorithm>
 #include <fstream>
-#include <vector>
+#include <limits>
+#include <optional>
 #include <string>
+#include <utility>
+#include <vector>
 #include "simobject.h"
 #include "dcrs.h"
 #include <VX_config.h>
@@ -20,28 +24,32 @@ VortexSimulator::VortexSimulator()
 : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES)
 , ram_(0, MEM_PAGE_SIZE)
 , proc_(std::make_unique<Processor>(arch_))
+, kernel_image_{}
+, next_alloc_addr_(kAllocBaseAddr)
 , halted_(true) {}
 
 bool VortexSimulator::init(const std::string& kernelPath) {
     proc_->attach_ram(&ram_);
 
-    // Load the kernel image if provided
-    // Load the kernel image if provided
+    kernel_image_ = {};
+    next_alloc_addr_ = kAllocBaseAddr;
+    ram_.clear();
+    ram_.set_acl(0, kGlobalMemSize, 0);
+
+    bool has_kernel = false;
     if (!kernelPath.empty()) {
-        std::string ext = getFileExt(kernelPath);
-        if (ext == "bin") {
-            // Load raw binary at STARTUP_ADDR
-            ram_.loadBinImage(kernelPath.c_str(), STARTUP_ADDR);
-        } else if (ext == "hex") {
-            // Load Intel-hex
-            ram_.loadHexImage(kernelPath.c_str());
-        } else {
-            return false; // unsupported format
-        }
+        auto image_info = this->loadKernelImage(kernelPath);
+        if (!image_info)
+            return false;
+        kernel_image_ = *image_info;
+        has_kernel = true;
     }
 
-    // Program base DCRs (match main.cpp behavior)
-    const uint64_t startup = STARTUP_ADDR;
+    // Program base DCRs - align startup to loaded kernel when provided
+    uint64_t startup = STARTUP_ADDR;
+    if (has_kernel)
+        startup = kernel_image_.base_addr;
+
     proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR0, startup & 0xffffffffu);
 
     #if (XLEN == 64)
@@ -61,6 +69,185 @@ halted_ = !running;
 return running;
 }
 
+bool VortexSimulator::allocateMemory(uint64_t size, uint64_t alignment, bool readable, bool writable, uint64_t* addr_out) {
+    if (addr_out == nullptr || size == 0)
+        return false;
+
+    alignment = normalizeAlignment(alignment);
+    uint64_t base = alignUp(next_alloc_addr_, alignment);
+    uint64_t end = base + size;
+    if (end > kGlobalMemSize)
+        return false;
+
+    uint64_t acl_start = alignDown(base, RAM_PAGE_SIZE);
+    uint64_t acl_end = alignUp(end, RAM_PAGE_SIZE);
+    if (acl_end > kGlobalMemSize)
+        return false;
+
+    int flags = 0;
+    if (readable) flags |= 0x1;
+    if (writable) flags |= 0x2;
+    if (flags != 0)
+        ram_.set_acl(acl_start, acl_end - acl_start, flags);
+
+    *addr_out = base;
+    next_alloc_addr_ = std::max(next_alloc_addr_, acl_end);
+    return true;
+}
+
+bool VortexSimulator::reserveMemory(uint64_t addr, uint64_t size, bool readable, bool writable) {
+    if (size == 0)
+        return false;
+
+    uint64_t acl_start = alignDown(addr, RAM_PAGE_SIZE);
+    uint64_t acl_end = alignUp(addr + size, RAM_PAGE_SIZE);
+    if (acl_end > kGlobalMemSize)
+        return false;
+
+    int flags = 0;
+    if (readable) flags |= 0x1;
+    if (writable) flags |= 0x2;
+    ram_.set_acl(acl_start, acl_end - acl_start, flags);
+
+    if (acl_end > next_alloc_addr_)
+        next_alloc_addr_ = acl_end;
+    return true;
+}
+
+void VortexSimulator::setMemoryPermissions(uint64_t addr, uint64_t size, bool readable, bool writable) {
+    if (size == 0)
+        return;
+    uint64_t acl_start = alignDown(addr, RAM_PAGE_SIZE);
+    uint64_t acl_end = alignUp(addr + size, RAM_PAGE_SIZE);
+    int flags = 0;
+    if (readable) flags |= 0x1;
+    if (writable) flags |= 0x2;
+    ram_.set_acl(acl_start, acl_end - acl_start, flags);
+}
+
+void VortexSimulator::writeMemory(uint64_t addr, const void* data, uint64_t size) {
+    if (data == nullptr || size == 0)
+        return;
+    ram_.write(data, addr, size);
+}
+
+void VortexSimulator::setStartupArg(uint64_t arg_addr) {
+    proc_->dcr_write(VX_DCR_BASE_STARTUP_ARG0, static_cast<uint32_t>(arg_addr & 0xffffffffu));
+#if (XLEN == 64)
+    proc_->dcr_write(VX_DCR_BASE_STARTUP_ARG1, static_cast<uint32_t>(arg_addr >> 32));
+#endif
+}
+
+std::optional<KernelImageInfo> VortexSimulator::loadKernelImage(const std::string& path) {
+    KernelImageInfo info{};
+
+    if (path.empty())
+        return info;
+
+    const auto ext = getFileExt(path);
+    if (ext == "bin") {
+        std::ifstream ifs(path, std::ios::binary);
+        if (!ifs)
+            return std::nullopt;
+
+        ifs.seekg(0, std::ios::end);
+        const uint64_t size = static_cast<uint64_t>(ifs.tellg());
+        ifs.seekg(0, std::ios::beg);
+        std::vector<uint8_t> payload(size);
+        if (size && !ifs.read(reinterpret_cast<char*>(payload.data()), size))
+            return std::nullopt;
+
+        if (!reserveMemory(STARTUP_ADDR, size, true, true))
+            return std::nullopt;
+        writeMemory(STARTUP_ADDR, payload.data(), size);
+        setMemoryPermissions(STARTUP_ADDR, size, true, false);
+
+        info.base_addr = STARTUP_ADDR;
+        info.size_bytes = size;
+        return info;
+    }
+
+    if (ext == "hex") {
+        ram_.loadHexImage(path.c_str());
+        info.base_addr = STARTUP_ADDR;
+        info.size_bytes = 0;
+        return info;
+    }
+
+    if (ext == "vxbin") {
+        std::ifstream ifs(path, std::ios::binary);
+        if (!ifs)
+            return std::nullopt;
+
+        uint64_t min_vma = 0;
+        uint64_t max_vma = 0;
+
+        ifs.read(reinterpret_cast<char*>(&min_vma), sizeof(uint64_t));
+        ifs.read(reinterpret_cast<char*>(&max_vma), sizeof(uint64_t));
+        if (!ifs || max_vma < min_vma)
+            return std::nullopt;
+
+        constexpr size_t header_bytes = sizeof(uint64_t) * 2;
+        ifs.seekg(0, std::ios::end);
+        const size_t file_size = static_cast<size_t>(ifs.tellg());
+        if (file_size < header_bytes)
+            return std::nullopt;
+
+        const uint64_t payload_size = static_cast<uint64_t>(file_size - header_bytes);
+        const uint64_t image_span   = max_vma - min_vma;
+        if (image_span == 0)
+            return std::nullopt;
+        ifs.seekg(header_bytes, std::ios::beg);
+
+        std::vector<uint8_t> payload(payload_size);
+        if (payload_size && !ifs.read(reinterpret_cast<char*>(payload.data()), payload_size))
+            return std::nullopt;
+
+        if (!reserveMemory(min_vma, image_span, true, true))
+            return std::nullopt;
+        if (payload_size)
+            writeMemory(min_vma, payload.data(), payload_size);
+        if (image_span > payload_size) {
+            std::vector<uint8_t> zeros(static_cast<size_t>(image_span - payload_size), 0);
+            writeMemory(min_vma + payload_size, zeros.data(), zeros.size());
+        }
+        setMemoryPermissions(min_vma, image_span, true, false);
+
+        info.base_addr = min_vma;
+        info.size_bytes = image_span;
+        return info;
+    }
+
+    return std::nullopt;
+}
+
+uint64_t VortexSimulator::alignUp(uint64_t value, uint64_t alignment) {
+    return (value + alignment - 1) & ~(alignment - 1);
+}
+
+uint64_t VortexSimulator::alignDown(uint64_t value, uint64_t alignment) {
+    return value & ~(alignment - 1);
+}
+
+uint64_t VortexSimulator::normalizeAlignment(uint64_t alignment) {
+    if (alignment == 0)
+        alignment = kDefaultAlignment;
+    if (alignment < kDefaultAlignment)
+        alignment = kDefaultAlignment;
+    if ((alignment & (alignment - 1)) == 0)
+        return alignment;
+
+    alignment--;
+    alignment |= alignment >> 1;
+    alignment |= alignment >> 2;
+    alignment |= alignment >> 4;
+    alignment |= alignment >> 8;
+    alignment |= alignment >> 16;
+    alignment |= alignment >> 32;
+    alignment++;
+    return alignment;
+}
+
 bool VortexSimulator::isHalted() const {
     return halted_;
 }
diff --git a/sim/simx/vortex_simulator.h b/sim/simx/vortex_simulator.h
index eef604aca1..d710b8de65 100644
--- a/sim/simx/vortex_simulator.h
+++ b/sim/simx/vortex_simulator.h
@@ -3,11 +3,19 @@
 
 #include "processor.h"  // for Processor, RAM
 #include "arch.h"       // for Arch
+#include "constants.h"
+#include <cstdint>
 #include <memory>
+#include <optional>
 #include <string>
 
 namespace vortex {
 
+struct KernelImageInfo {
+    uint64_t base_addr = 0;
+    uint64_t size_bytes = 0;
+};
+
 /**
  * A wrapper class used by the SST integration to drive the Vortex GPU
  * one cycle at a time.  It encapsulates the architecture definition,
@@ -24,6 +32,18 @@ class VortexSimulator {
      */
     bool init(const std::string& kernelPath);
 
+    // changes to substitute for run-time wrt memory setup
+    const KernelImageInfo& kernelImage() const { return kernel_image_; }
+    bool allocateMemory(uint64_t size, uint64_t alignment, bool readable, bool writable, uint64_t* addr_out);
+    bool reserveMemory(uint64_t addr, uint64_t size, bool readable, bool writable);
+    void setMemoryPermissions(uint64_t addr, uint64_t size, bool readable, bool writable);
+    void writeMemory(uint64_t addr, const void* data, uint64_t size);
+
+    RAM& ram() { return ram_; }
+    const RAM& ram() const { return ram_; }
+
+    void setStartupArg(uint64_t arg_addr);
+
     /**
      * Advances the simulation by one cycle.  Returns false once the
      * simulation has completed (i.e. all clusters are halted).
@@ -34,9 +54,21 @@ class VortexSimulator {
     bool isHalted() const;
 
 private:
+    static constexpr uint64_t kGlobalMemSize = (XLEN == 64) ? 0x200000000ull : 0x100000000ull;
+    static constexpr uint64_t kAllocBaseAddr = USER_BASE_ADDR;
+    static constexpr uint64_t kDefaultAlignment = 64ull;
+
+    static uint64_t alignUp(uint64_t value, uint64_t alignment);
+    static uint64_t alignDown(uint64_t value, uint64_t alignment);
+    static uint64_t normalizeAlignment(uint64_t alignment);
+
+    std::optional<KernelImageInfo> loadKernelImage(const std::string& path);
+
     Arch arch_;
     RAM ram_;
     std::unique_ptr<Processor> proc_;
+    KernelImageInfo kernel_image_;
+    uint64_t next_alloc_addr_;
     bool halted_;
 };
 

From 0d49477fe2afa04977762821d062d5cdfd2f140b Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Tue, 28 Oct 2025 21:55:19 -0400
Subject: [PATCH 04/15] first success - sst now runs simx with hello program

---
 sim/common/mem.cpp            |  13 ++-
 sim/simx/Makefile             |  47 ++++-----
 sim/simx/VortexGPGPU.cpp      |  57 +++++------
 sim/simx/VortexGPGPU.h        |  21 ++--
 sim/simx/cluster.cpp          |   2 +
 sim/simx/core.cpp             |   2 +
 sim/simx/emulator.cpp         |   4 +-
 sim/simx/mem_backend.h        |  16 ---
 sim/simx/mem_backend_dram.cpp |  76 --------------
 sim/simx/mem_backend_dram.h   |  51 ----------
 sim/simx/mem_backend_sst.cpp  |  79 ---------------
 sim/simx/mem_backend_sst.h    |  43 --------
 sim/simx/mem_sim.cpp          | 186 ++++++++++++++++++----------------
 sim/simx/processor.cpp        |  11 +-
 sim/simx/processor_impl.h     |   1 +
 sim/simx/socket.cpp           |   2 +
 sim/simx/vortex_simulator.cpp |  66 +++++++-----
 sim/simx/vortex_simulator.h   |  20 ++--
 18 files changed, 235 insertions(+), 462 deletions(-)
 delete mode 100644 sim/simx/mem_backend.h
 delete mode 100644 sim/simx/mem_backend_dram.cpp
 delete mode 100644 sim/simx/mem_backend_dram.h
 delete mode 100644 sim/simx/mem_backend_sst.cpp
 delete mode 100644 sim/simx/mem_backend_sst.h

diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp
index 96b08ff8a2..64a294b2eb 100644
--- a/sim/common/mem.cpp
+++ b/sim/common/mem.cpp
@@ -59,7 +59,7 @@ void RamMemDevice::read(void* data, uint64_t addr, uint64_t size) {
   if ((addr & (wordSize_-1))
    || (addr_end & (wordSize_-1))
    || (addr_end > contents_.size())) {
-    std::cout << "lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n";
+    std::cout << "RamMemDevice::read lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n";
     throw BadAddress();
   }
 
@@ -74,7 +74,7 @@ void RamMemDevice::write(const void* data, uint64_t addr, uint64_t size) {
   if ((addr & (wordSize_-1))
    || (addr_end & (wordSize_-1))
    || (addr_end > contents_.size())) {
-    std::cout << "lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n";
+    std::cout << "RamMemDevice::write lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n";
     throw BadAddress();
   }
 
@@ -108,14 +108,16 @@ bool MemoryUnit::ADecoder::lookup(uint64_t addr, uint32_t wordSize, mem_accessor
 
 void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) {
   assert(end >= start);
+  //std::cout << "ADecoder: map() with start 0x" << std::hex << start << " and end 0x" << end << std::dec << " and md 0x" << &md << std::endl;
   entry_t entry{&md, start, end};
   entries_.emplace_back(entry);
 }
 
 void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) {
   mem_accessor_t ma;
+  //std::cout << "MemoryUnit::ADecoder::read init lookup of 0x" << std::hex << addr << std::dec << ".\n";
   if (!this->lookup(addr, size, &ma)) {
-    std::cout << "lookup of 0x" << std::hex << addr << std::dec << " failed.\n";
+    std::cout << "MemoryUnit::ADecoder::read lookup of 0x" << std::hex << addr << std::dec << " failed.\n";
     throw BadAddress();
   }
   ma.md->read(data, ma.addr, size);
@@ -124,7 +126,7 @@ void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) {
 void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) {
   mem_accessor_t ma;
   if (!this->lookup(addr, size, &ma)) {
-    std::cout << "lookup of 0x" << std::hex << addr << std::dec << " failed.\n";
+    std::cout << "MemoryUnit::ADecoder::write lookup of 0x" << std::hex << addr << std::dec << " failed.\n";
     throw BadAddress();
   }
   ma.md->write(data, ma.addr, size);
@@ -154,6 +156,7 @@ MemoryUnit::MemoryUnit(uint64_t pageSize)
 #endif
 
 void MemoryUnit::attach(MemDevice &m, uint64_t start, uint64_t end) {
+  //std::cout << "MemoryUnit: attach() with start 0x" << std::hex << start << " and end 0x" << end << std::dec << " and m 0x" << &m << std::endl;
   decoder_.map(start, end, m);
 }
 
@@ -510,6 +513,7 @@ void RAM::loadBinImage(const char* filename, uint64_t destination) {
   std::ifstream ifs(filename);
   if (!ifs) {
     std::cerr << "Error: " << filename << " not found" << std::endl;
+    //std::cout << "loadBinImage Error: " << filename << " not found" << std::endl;
     std::abort();
   }
 
@@ -521,6 +525,7 @@ void RAM::loadBinImage(const char* filename, uint64_t destination) {
 
   this->clear();
   this->write(content.data(), destination, size);
+  //std::cout << "Loaded binary image: " << filename << ", size: " << size << " bytes, destination: 0x" << std::hex << destination << std::dec << std::endl;
 }
 
 void RAM::loadHexImage(const char* filename) {
diff --git a/sim/simx/Makefile b/sim/simx/Makefile
index 4aa769f379..22032d7444 100644
--- a/sim/simx/Makefile
+++ b/sim/simx/Makefile
@@ -1,14 +1,11 @@
 include ../common.mk
-# now you see me 2
+
 DESTDIR ?= $(CURDIR)
 
 OBJ_DIR = $(DESTDIR)/obj
 CONFIG_FILE = $(DESTDIR)/simx_config.stamp
 SRC_DIR = $(VORTEX_HOME)/sim/simx
 
-# SST StandardMem bridge (default off)
-SST_USE_STDMEM ?= 0
-
 CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors
 CXXFLAGS += -fPIC -Wno-maybe-uninitialized
 CXXFLAGS += -I$(SRC_DIR) -I$(SW_COMMON_DIR) -I$(ROOT_DIR)/hw
@@ -28,29 +25,16 @@ SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $(
 SRCS += $(SRC_DIR)/decode.cpp $(SRC_DIR)/opc_unit.cpp $(SRC_DIR)/dispatcher.cpp
 SRCS += $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp
 SRCS += $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp
-SRCS += $(SRC_DIR)/mem_backend_sst.cpp \
-SRCS += $(SRC_DIR)/mem_backend_dram.cpp \
 SRCS += $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
 
+VORTEX_SST_SRCS := $(SRC_DIR)/vortex_simulator.cpp
+VORTEX_SST_SRCS += $(SRC_DIR)/VortexGPGPU.cpp
+
 # SST flags
 SST_CFLAGS := $(shell pkg-config --cflags SST-14.1)
 SST_CFLAGS += -I../../../sst/sst/sst-core/include/
 SST_LFLAGS := $(shell pkg-config --libs SST-14.1)
 
-ifeq ($(SST_USE_STDMEM),1)
-  CXXFLAGS += -DUSE_SST_MEM_BACKEND -DVORTEX_SST_ENABLE_STDMEM
-  LIBVORTEX_SST_DEFS = -DUSE_SST_MEM_BACKEND -DVORTEX_SST_ENABLE_STDMEM
-  VORTEX_SST_SRCS := \
-      $(SRC_DIR)/mem_backend_sst.cpp \
-      $(SRC_DIR)/vortex_simulator.cpp \
-      $(SRC_DIR)/VortexGPGPU.cpp
-else
-  LIBVORTEX_SST_DEFS =
-  VORTEX_SST_SRCS := \
-      $(SRC_DIR)/vortex_simulator.cpp \
-      $(SRC_DIR)/VortexGPGPU.cpp
-endif
-
 # Add V extension sources
 ifneq ($(findstring -DEXT_V_ENABLE, $(CONFIGS)),)
   	SRCS += $(SRC_DIR)/voperands.cpp
@@ -81,19 +65,25 @@ COMMON_SRCS := $(filter $(SW_COMMON_DIR)/%.cpp,$(SRCS))
 SRC_SRCS    := $(filter $(SRC_DIR)/%.cpp,$(SRCS))
 COMMON_OBJS := $(patsubst $(SW_COMMON_DIR)/%.cpp,$(OBJ_DIR)/common/%.o,$(COMMON_SRCS))
 SRC_OBJS    := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(SRC_SRCS))
+VORTEX_SST_OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(VORTEX_SST_SRCS))
 OBJS        := $(COMMON_OBJS) $(SRC_OBJS)
 MAIN_OBJ    := $(OBJ_DIR)/main.o
 
-DEPS := $(OBJS:.o=.d) $(MAIN_OBJ:.o=.d)
+DEPS := $(OBJS:.o=.d) $(MAIN_OBJ:.o=.d) $(VORTEX_SST_OBJS:.o=.d)
 
 # optional: pipe through ccache if you have it
 CXX := $(if $(shell which ccache),ccache $(CXX),$(CXX))
 
 PROJECT := simx
+VORTEX_LIB := libvortex.so
 
-.PHONY: all force clean clean-lib clean-exe clean-obj
+.PHONY: all force clean clean-lib clean-exe clean-obj libvortex clean-libvortex
 
-all: $(DESTDIR)/$(PROJECT) $(DESTDIR)/libvortex.so
+#ifdef USE_SST
+all: $(DESTDIR)/$(PROJECT) $(DESTDIR)/$(VORTEX_LIB)
+#else
+#all: $(DESTDIR)/$(PROJECT)
+#endif
 
 # build common object files
 $(OBJ_DIR)/common/%.o: $(SW_COMMON_DIR)/%.cpp $(CONFIG_FILE)
@@ -105,6 +95,11 @@ $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp $(CONFIG_FILE)
 	@mkdir -p $(@D)
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
+# build SST-specific source object files
+$(VORTEX_SST_OBJS): $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp $(CONFIG_FILE)
+	@mkdir -p $(@D)
+	$(CXX) $(CXXFLAGS) $(SST_CFLAGS) -c $< -o $@
+
 # build main object file
 $(MAIN_OBJ): $(SRC_DIR)/main.cpp $(CONFIG_FILE)
 	@mkdir -p $(@D)
@@ -118,8 +113,10 @@ $(DESTDIR)/$(PROJECT): $(OBJS) $(MAIN_OBJ)
 $(DESTDIR)/lib$(PROJECT).so: $(OBJS)
 	$(CXX) $(CXXFLAGS) $^ -shared $(LDFLAGS) -o $@
 
-$(DESTDIR)/libvortex.so: $(OBJS) $(SST_OBJS)
-	$(CXX) $(CXXFLAGS) $(SST_CFLAGS) $(LIBVORTEX_SST_DEFS) \
+libvortex: $(DESTDIR)/$(VORTEX_LIB)
+
+$(DESTDIR)/$(VORTEX_LIB): $(OBJS) $(VORTEX_SST_OBJS)
+	$(CXX) $(CXXFLAGS) $(SST_CFLAGS) \
 	-I./sim/simx \
 	$(OBJS) $(VORTEX_SST_SRCS) \
 	-shared -o $@ \
diff --git a/sim/simx/VortexGPGPU.cpp b/sim/simx/VortexGPGPU.cpp
index 75336e4fd6..536e0e1583 100644
--- a/sim/simx/VortexGPGPU.cpp
+++ b/sim/simx/VortexGPGPU.cpp
@@ -1,8 +1,8 @@
 #include <sst/core/sst_config.h>
 #include "VortexGPGPU.h"
-#ifdef VORTEX_SST_ENABLE_STDMEM
+#ifdef USE_SST_MEM
 #include "mem_backend_sst.h" // needed for vx_register_submit and vx_on_mem_complete
-#endif   
+#endif
 #include <cstdlib>
 #include <vector>
 #include <utility>
@@ -10,37 +10,29 @@
 
 using namespace SST;
 using namespace SST::Vortex;
+#ifdef USE_SST_MEM
 using SST::Interfaces::StandardMem;
+#endif
 
-namespace {
-constexpr const char* kDefaultKernelPath = "/nethome/jsubburayan3/vortex/kernel.vxbin";
-constexpr uint32_t    kDefaultLaunchBytes = 64;
-}
-
-#ifdef VORTEX_SST_ENABLE_STDMEM
+#ifdef USE_SST_MEM
 VortexGPGPU *VortexGPGPU::instance_ = nullptr;
 #endif
 
 VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
     : Component(id),
-      sim_(std::make_unique<vortex::VortexSimulator>()),
-      memIface_(nullptr) {
+      sim_(std::make_unique<vortex::VortexSimulator>()) {
+
+    std::cout << "VortexGPGPU: initializing Vortex GPGPU simulator\n";
 
     // Parameter: clock frequency (default 1GHz)
     std::string clockfreq = params.find<std::string>("clock", "1GHz");
 
     // Parameter: program path
-    std::string kernel = params.find<std::string>("program", "");
-    if (kernel.empty()) {
-        if (const char* env = std::getenv("VORTEX_DEFAULT_KERNEL"))
-            kernel = env;
-        else
-            kernel = kDefaultKernelPath;
-    }
-    const uint32_t launch_bytes = params.find<uint32_t>("launch_bytes", kDefaultLaunchBytes);
+    std::string kernel = params.find<std::string>("program", "/nethome/jsubburayan3/vortex/build/tests/kernel/hello/hello.bin");
 
+    //const uint32_t launch_bytes = params.find<uint32_t>("launch_bytes", kDefaultLaunchBytes); // required when launch descriptor is used
 
-#ifdef VORTEX_SST_ENABLE_STDMEM
+#ifdef USE_SST_MEM
     // Create StandardMem interface; auto-bind to port name "memIface"
     memIface_ = loadUserSubComponent<StandardMem>(
         "memIface", ComponentInfo::SHARE_NONE,
@@ -58,7 +50,7 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
                   new SST::Clock::Handler<VortexGPGPU>(this, &VortexGPGPU::clockTick));
 #endif
 
-#ifdef VORTEX_SST_ENABLE_STDMEM
+#ifdef USE_SST_MEM
     // Register callback so SimX can submit memory to SST
     instance_ = this;
     // Track app-specific tags by StandardMem request-id
@@ -88,13 +80,17 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
     });
 #endif
 
-    // Load the kernel or ELF
+    // Load the kernel image
     if (!sim_->init(kernel)) {
         SST::Output out;
         out.fatal(CALL_INFO, -1, "VortexSimulator init failed\n");
     }
+    else{
+        std::cout << "VortexGPGPU: loaded kernel: " << kernel << std::endl;
+    }
 
-    // Set up a default launch descriptor if the caller did not supply one
+    // needed when launch descriptor is used
+    /*
     if (!sim_->allocateMemory(launch_bytes, 64, true, true, &launch_desc_addr_)) {
         SST::Output out;
         out.fatal(CALL_INFO, -1,
@@ -104,6 +100,7 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
     std::vector<uint8_t> launch_payload(launch_bytes, 0);
     sim_->writeMemory(launch_desc_addr_, launch_payload.data(), launch_payload.size());
     sim_->setStartupArg(launch_desc_addr_);
+    */
 
     registerAsPrimaryComponent();
     primaryComponentDoNotEndSim();
@@ -114,18 +111,22 @@ VortexGPGPU::~VortexGPGPU() = default;
 void VortexGPGPU::setup() {}
 void VortexGPGPU::finish() {}
 
-bool VortexGPGPU::clockTick(SST::Cycle_t) {
+bool VortexGPGPU::clockTick(SST::Cycle_t cycle) {
     // Advance the GPU one cycle
+    //std::cout << "VortexGPGPU: clockTick came from SST " << std::endl;
     bool running = sim_->cycle();
+    //std::cout << "VortexGPGPU cycle returned: " << running << std::endl;
     if (!running) {
         primaryComponentOKToEndSim();
-        return false;
+        std::cout << "VortexGPGPU: simulation finished\n";
+        return true;
     }
-    return true;
+    //std::cout << "VortexGPGPU clockTick returns false " << std::endl;
+    return false;
 }
 
+#ifdef USE_SST_MEM
 void VortexGPGPU::handleMemResp(StandardMem::Request *req) {
-    #ifdef VORTEX_SST_ENABLE_STDMEM
     // Inform SimX that this request has completed
     const auto id = req->getID();
     const auto it = tag_by_id.find(id);
@@ -138,7 +139,5 @@ void VortexGPGPU::handleMemResp(StandardMem::Request *req) {
         tag_by_id.erase(it);
     }
     delete req;
-    #else
-    delete req; // should never be called without StandardMem
-    #endif
 }
+#endif
diff --git a/sim/simx/VortexGPGPU.h b/sim/simx/VortexGPGPU.h
index d35eda898d..61acbaa40f 100644
--- a/sim/simx/VortexGPGPU.h
+++ b/sim/simx/VortexGPGPU.h
@@ -1,7 +1,7 @@
 // VortexGPGPU.h
 #pragma once
 #include <sst/core/component.h>
-#include <sst/core/interfaces/stdMem.h>
+//#include <sst/core/interfaces/stdMem.h>
 #include <memory>
 #include <string>
 #include "vortex_simulator.h"  // wrapper around SimX
@@ -24,7 +24,7 @@ class VortexGPGPU : public SST::Component {
         "vortex",           // element library name
         "VortexGPGPU",      // component name
         SST_ELI_ELEMENT_VERSION(1,0,0),
-        "Headless Vortex GPGPU Simulator",
+        "Vortex GPGPU Simulator",
         COMPONENT_CATEGORY_PROCESSOR
     )
     SST_ELI_DOCUMENT_PARAMS(
@@ -38,23 +38,24 @@ class VortexGPGPU : public SST::Component {
     )
 
 private:
+
     bool clockTick(SST::Cycle_t cycle);
-    void handleMemResp(SST::Interfaces::StandardMem::Request* req);
 
-    #ifdef VORTEX_SST_ENABLE_STDMEM
+    std::unique_ptr<vortex::VortexSimulator> sim_;
+
+    //uint64_t launch_desc_addr_ = 0; // required only when launch descriptor is required
+
+    #ifdef USE_SST_MEM
+    void handleMemResp(SST::Interfaces::StandardMem::Request* req);
+    
     // static pointer used by lambda in vx_register_submit()
     static VortexGPGPU* instance_;
-    #endif
 
-    std::unique_ptr<vortex::VortexSimulator> sim_;
-    #ifdef VORTEX_SST_ENABLE_STDMEM
     SST::Interfaces::StandardMem* memIface_;
     std::unordered_map<SST::Interfaces::StandardMem::Request::id_t, uint64_t> tag_by_id;
-    #else
+    //#else
     SST::Interfaces::StandardMem* memIface_ = nullptr;
     #endif
-
-    uint64_t launch_desc_addr_ = 0;
 };
 
 } // namespace Vortex
diff --git a/sim/simx/cluster.cpp b/sim/simx/cluster.cpp
index bab67233b8..84cba9f134 100644
--- a/sim/simx/cluster.cpp
+++ b/sim/simx/cluster.cpp
@@ -103,10 +103,12 @@ void Cluster::set_satp(uint64_t satp) {
 #endif
 
 bool Cluster::running() const {
+  //std::cout << "Cluster: running()" << std::endl;
   for (auto& socket : sockets_) {
     if (socket->running())
       return true;
   }
+  std::cout << "Cluster: running() returns false" << std::endl;
   return false;
 }
 
diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp
index 55791907a3..770c86701a 100644
--- a/sim/simx/core.cpp
+++ b/sim/simx/core.cpp
@@ -440,6 +440,7 @@ int Core::get_exitcode() const {
 
 bool Core::running() const {
   if (emulator_.running() || !pending_instrs_.empty()) {
+    //std::cout << "Core::running() emulator running: " << emulator_.running() << ", pending_instrs size: " << pending_instrs_.size() << std::endl;
   #ifndef NDEBUG
     for (auto& trace : pending_instrs_) {
       DT(5, "pipeline-pending: " << *trace);
@@ -447,6 +448,7 @@ bool Core::running() const {
   #endif
     return true;
   }
+  std::cout << "Core::running() returns false" << std::endl;
   return false;
 }
 
diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp
index 7371a553d4..1028aba435 100644
--- a/sim/simx/emulator.cpp
+++ b/sim/simx/emulator.cpp
@@ -16,7 +16,6 @@
 #include <unistd.h>
 #include <math.h>
 #include <assert.h>
-#include <limits>
 #include <util.h>
 
 #include "emulator.h"
@@ -131,8 +130,9 @@ void Emulator::reset() {
 
 void Emulator::attach_ram(RAM* ram) {
   // bind RAM to memory unit
+  //std::cout << "Emulator: attach_ram()" << std::endl;
 #if (XLEN == 64)
-  mmu_.attach(*ram, 0, std::numeric_limits<uint64_t>::max());
+  mmu_.attach(*ram, 0, 0x7FFFFFFFFF); //39bit SV39
 #else
   mmu_.attach(*ram, 0, 0xFFFFFFFF);
 #endif
diff --git a/sim/simx/mem_backend.h b/sim/simx/mem_backend.h
deleted file mode 100644
index d93e39d503..0000000000
--- a/sim/simx/mem_backend.h
+++ /dev/null
@@ -1,16 +0,0 @@
-#pragma once
-#include <cstdint>
-#include <functional>
-#include "types.h"
-
-namespace vortex {
-struct IMemBackend {
-    virtual ~IMemBackend() = default;
-    virtual void reset() = 0;
-    virtual void tick() = 0;
-    std::function<void(uint32_t bank, const MemRsp& rsp)> mem_xbar_rsp_cb_;
-    virtual void send_request(uint64_t addr, bool write,
-                              uint32_t size, uint32_t tag,
-                              uint32_t cid, uint64_t uuid) = 0;
-};
-} // namespace vortex
diff --git a/sim/simx/mem_backend_dram.cpp b/sim/simx/mem_backend_dram.cpp
deleted file mode 100644
index f1cecae661..0000000000
--- a/sim/simx/mem_backend_dram.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-// mem_backend_dram.cpp
-#include "mem_backend_dram.h"
-
-using namespace vortex;
-
-namespace {
-struct CallbackData {
-    MemBackendDram* backend;
-    uint64_t tag;
-};
-} // anonymous namespace
-
-MemBackendDram* MemBackendDram::inst_ = nullptr;
-
-MemBackendDram::MemBackendDram(uint32_t num_banks, uint32_t block_size, float clock_ratio)
-    : num_banks_(num_banks)
-    , block_size_(block_size)
-    , lg2_block_size_(0)
-    , dram_sim_(num_banks, block_size, clock_ratio)
-{
-    // Compute log2(block_size_) once; block_size_ is assumed to be a power of two.
-    uint32_t tmp = block_size_;
-    while (tmp > 1) {
-        ++lg2_block_size_;
-        tmp >>= 1;
-    }
-    inst_ = this;
-}
-
-void MemBackendDram::reset() {
-    inflight_.clear();
-    dram_sim_.reset();
-}
-
-void MemBackendDram::tick() {
-    // Retire pending transactions in DramSim
-    dram_sim_.tick();
-}
-
-void MemBackendDram::dram_complete(void* arg) {
-    auto* data = static_cast<CallbackData*>(arg);
-    MemBackendDram* backend = data->backend;
-    uint64_t tag = data->tag;
-    auto it = backend->inflight_.find(tag);
-    if (it != backend->inflight_.end()) {
-        const Info& info = it->second;
-        if (!info.write) {
-            // Form a MemRsp for reads only
-            MemRsp rsp{tag, info.cid, info.uuid};
-            // Route the response to the recorded bank
-            uint32_t bank = info.bank;
-            if (backend->mem_xbar_rsp_cb_)
-                backend->mem_xbar_rsp_cb_(bank, rsp);
-                
-        }
-        backend->inflight_.erase(it);
-    }
-    delete data;
-}
-
-void MemBackendDram::send_request(uint64_t addr, bool write,
-                                  uint32_t size, uint32_t tag,
-                                  uint32_t cid, uint64_t uuid) {
-    // Compute bank index: (addr >> lg2(block_size)) mod num_banks
-    uint32_t bank_idx = 0;
-    if (num_banks_ > 0)
-        bank_idx = static_cast<uint32_t>((addr >> lg2_block_size_) & (num_banks_ - 1));
-    inflight_.emplace(tag, Info{cid, uuid, write, bank_idx});
-    auto* cb_data = new CallbackData{this, tag};
-    // The size is ignored by DramSim because it is configured with block_size_.
-    dram_sim_.send_request(addr, write, &MemBackendDram::dram_complete, cb_data);
-}
-
-void MemBackendDram::complete(uint64_t tag) {
-    // Not used; dram_complete() handles completions
-}
\ No newline at end of file
diff --git a/sim/simx/mem_backend_dram.h b/sim/simx/mem_backend_dram.h
deleted file mode 100644
index 5a6f6f3d11..0000000000
--- a/sim/simx/mem_backend_dram.h
+++ /dev/null
@@ -1,51 +0,0 @@
-// mem_backend_dram.h
-#pragma once
-#include "mem_backend.h"
-#include "dram_sim.h"
-#include <unordered_map>
-#include <functional>
-#include "types.h"
-
-namespace vortex {
-
-class MemBackendDram : public IMemBackend {
-public:
-    static MemBackendDram* instance() { return inst_; }
-
-    // Construct with the same parameters as MemSim::Config: number of banks,
-    // block size in bytes, and clock ratio. These values are passed to
-    // the underlying DramSim so that the external memory model matches.
-    MemBackendDram(uint32_t num_banks, uint32_t block_size, float clock_ratio);
-
-    void reset() override;
-    void tick() override;
-    void send_request(uint64_t addr, bool write,
-                      uint32_t size, uint32_t tag,
-                      uint32_t cid, uint64_t uuid) override;
-
-    // Not used directly; completions are handled by dram_complete().
-    void complete(uint64_t tag);
-
-    // Set by MemSim to push completed responses back to the correct
-    // bank in the crossbar.
-    std::function<void(uint32_t bank, const MemRsp& rsp)> mem_xbar_rsp_cb_;
-
-private:
-    struct Info {
-        uint32_t cid;
-        uint64_t uuid;
-        bool write;
-        uint32_t bank;  // bank index computed from the address
-    };
-    std::unordered_map<uint64_t, Info> inflight_;
-    uint32_t num_banks_;
-    uint32_t block_size_;
-    uint32_t lg2_block_size_;
-    static MemBackendDram* inst_;
-    DramSim dram_sim_;
-
-    // Static callback invoked by DramSim when a request completes
-    static void dram_complete(void* arg);
-};
-
-} // namespace vortex
\ No newline at end of file
diff --git a/sim/simx/mem_backend_sst.cpp b/sim/simx/mem_backend_sst.cpp
deleted file mode 100644
index 3be8227ffd..0000000000
--- a/sim/simx/mem_backend_sst.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-// mem_backend_sst.cpp
-// Implementation of the SST-backed memory backend.  This backend forwards
-// all off-chip memory requests to the SST StandardMem interface via a
-// registered callback (vx_submit_fn).  It maintains a table of inflight
-// transactions keyed by the original request tag so that completions can
-// be correlated back to the correct cluster and request.  When a read
-// completion is observed via vx_on_mem_complete(), the backend pushes a
-// MemRsp back into the crossbar using the stored cid/uuid.  Writes
-// complete silently.
-
-#include "mem_backend_sst.h"
-#include <VX_config.h>
-
-extern "C" {
-#ifdef VORTEX_SST_ENABLE_STDMEM
-
-// Register a submit function provided by the SST component.  The
-// MemBackendSST stores it in a static member so that calls to
-// send_request() can forward requests into SST.
-void vx_register_submit(vx_submit_fn fn) {
-    vortex::MemBackendSST::set_vx_submit_fn(fn);
-}
-
-// Notify MemBackendSST that the SST memory system has completed a
-// request identified by 'tag'.  The backend will produce a MemRsp for
-// reads and erase the entry from its inflight table.
-void vx_on_mem_complete(uint64_t tag) {
-    if (auto inst = vortex::MemBackendSST::instance())
-        inst->complete(tag);
-}
-#else
-
-void vx_register_submit(vx_submit_fn)      {}
-void vx_on_mem_complete(uint64_t)          {}
-
-#endif
-
-} // extern "C"
-
-using namespace vortex;
-
-// Initialise static pointers
-MemBackendSST* MemBackendSST::inst_ = nullptr;
-vx_submit_fn   MemBackendSST::submit_fn_ = nullptr;
-
-MemBackendSST::MemBackendSST() {
-    // Record this instance so the C wrapper can find us
-    inst_ = this;
-}
-
-void MemBackendSST::reset() {
-    // Drop all inflight transactions; pending responses are ignored
-    inflight_.clear();
-}
-
-void MemBackendSST::send_request(uint64_t addr, bool write,
-                                 uint32_t size, uint32_t tag,
-                                 uint32_t cid, uint64_t uuid) {
-    uint32_t lg2_block = log2ceil(size);
-    uint32_t bank = (addr >> lg2_block) & (PLATFORM_MEMORY_NUM_BANKS - 1);
-    inflight_.emplace(tag, Info{cid, uuid, write, bank});
-    if (submit_fn_) {
-        submit_fn_(addr, write, size, tag);
-    }
-}
-
-void MemBackendSST::complete(uint64_t tag) {
-    auto it = inflight_.find(tag);
-    if (it == inflight_.end())
-        return;
-    const Info &info = it->second;
-    // Only produce a MemRsp for reads; writes complete silently
-    if (!info.write) {
-        MemRsp rsp{tag, info.cid, info.uuid};
-        if (mem_xbar_rsp_cb_)
-        mem_xbar_rsp_cb_(info.bank, rsp);
-        }
-    inflight_.erase(it);
-}
diff --git a/sim/simx/mem_backend_sst.h b/sim/simx/mem_backend_sst.h
deleted file mode 100644
index 678dec0b81..0000000000
--- a/sim/simx/mem_backend_sst.h
+++ /dev/null
@@ -1,43 +0,0 @@
-// mem_backend_sst.h
-#pragma once
-#include "mem_backend.h"
-#include <unordered_map>
-#include <functional>
-#include "types.h"
-
-extern "C" {
-  // Function pointer type for SST to call
-  typedef void (*vx_submit_fn)(uint64_t addr, bool write, uint32_t size, uint64_t tag);
-  // SST calls this to register its submit function
-  void vx_register_submit(vx_submit_fn fn);
-  // SST calls this when a memory response completes
-  void vx_on_mem_complete(uint64_t tag);
-}
-
-namespace vortex {
-
-class MemBackendSST : public IMemBackend {
-public:
-    static MemBackendSST* instance() { return inst_; }
-    static vx_submit_fn get_vx_submit_fn() { return submit_fn_; }
-    static void set_vx_submit_fn(vx_submit_fn fn) { submit_fn_ = fn; }
-
-    MemBackendSST();
-    void reset() override;
-    void tick() override {}
-    void send_request(uint64_t addr, bool write,
-                      uint32_t size, uint32_t tag,
-                      uint32_t cid, uint64_t uuid) override;
-
-    // Called from vx_on_mem_complete
-    void complete(uint64_t tag);
-
-
-private:
-    struct Info { uint32_t cid; uint64_t uuid; bool write; uint32_t bank;};
-    std::unordered_map<uint64_t,Info> inflight_;
-    static MemBackendSST* inst_;
-    static vx_submit_fn submit_fn_;
-};
-
-} // namespace vortex
diff --git a/sim/simx/mem_sim.cpp b/sim/simx/mem_sim.cpp
index 61bf174a86..740ee008b6 100644
--- a/sim/simx/mem_sim.cpp
+++ b/sim/simx/mem_sim.cpp
@@ -1,117 +1,127 @@
-// mem_sim.cpp
+// Copyright © 2019-2023
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
 #include "mem_sim.h"
 #include <vector>
+#include <queue>
 #include <stdlib.h>
-#include <math.h>
+#include <dram_sim.h>
 
 #include "constants.h"
 #include "types.h"
 #include "debug.h"
-#include "mem_backend.h"
-#include "mem_backend_sst.h"
-#include "mem_backend_dram.h"
 
 using namespace vortex;
 
 class MemSim::Impl {
 private:
-    MemSim*   simobject_;
-    Config    config_;
-    MemCrossBar::Ptr mem_xbar_;
-    std::unique_ptr<IMemBackend> backend_;
-    mutable PerfStats perf_stats_;
+	MemSim*   simobject_;
+	Config    config_;
+	MemCrossBar::Ptr mem_xbar_;
+	DramSim   dram_sim_;
+	mutable PerfStats perf_stats_;
+	struct DramCallbackArgs {
+		MemSim::Impl* memsim;
+		MemReq request;
+		uint32_t bank_id;
+	};
 
 public:
-    Impl(MemSim* simobject, const Config& config)
-        : simobject_(simobject)
-        , config_(config)
-    {
-        char sname[100];
-        snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
-        mem_xbar_ = MemCrossBar::Create(
-            sname,
-            ArbiterType::RoundRobin,
-            config.num_ports,
-            config.num_banks,
-            [lg2_block_size = log2ceil(config.block_size), num_banks = config.num_banks](const MemCrossBar::ReqType& req) {
-                // Bank interleaving: choose the output index based on address bits
-                return static_cast<uint32_t>((req.addr >> lg2_block_size) & (num_banks - 1));
-            });
-
-        for (uint32_t i = 0; i < config.num_ports; ++i) {
-            simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i));
-            mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i));
-        }
-
-        #ifdef USE_SST_MEM_BACKEND
-        backend_ = std::make_unique<MemBackendSST>();
-        #else
-        backend_ = std::make_unique<MemBackendDram>(config.num_banks, config.block_size, config.clock_ratio);
-        #endif
-
-        if (backend_) {
-            backend_->mem_xbar_rsp_cb_ = [this](uint32_t bank, const MemRsp& rsp) {
-                // Push the response into the appropriate crossbar output queue
-                if (bank < mem_xbar_->RspOut.size())
-                    mem_xbar_->RspOut.at(bank).push(rsp, 1);
-            };
-        }
-    }
-
-    const PerfStats& perf_stats() const {
-        perf_stats_.bank_stalls = mem_xbar_->collisions();
-        return perf_stats_;
-    }
-
-    void reset() {
-        if (backend_)
-            backend_->reset();
-    }
-
-    void tick() {
-        // Advance the selected memory backend
-        if (backend_)
-            backend_->tick();
-
-        // Drain requests from each bank and send to the backend
-        for (uint32_t bank = 0; bank < config_.num_banks; ++bank) {
-            if (mem_xbar_->ReqOut.at(bank).empty())
-                continue;
-            auto& mem_req = mem_xbar_->ReqOut.at(bank).front();
-            if (backend_) {
-                backend_->send_request(
-                    mem_req.addr,
-                    mem_req.write,
-                    config_.block_size,
-                    mem_req.tag,
-                    mem_req.cid,
-                    mem_req.uuid);
-            }
-            DT(3, simobject_->name() << "-mem-req" << bank << ": " << mem_req);
-            mem_xbar_->ReqOut.at(bank).pop();
-        }
-    }
+	Impl(MemSim* simobject, const Config& config)
+		: simobject_(simobject)
+		, config_(config)
+		, dram_sim_(config.num_banks, config.block_size, config.clock_ratio)
+	{
+		char sname[100];
+		snprintf(sname, 100, "%s-xbar", simobject->name().c_str());
+		mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_ports, config.num_banks,
+			[lg2_block_size = log2ceil(config.block_size), num_banks = config.num_banks](const MemCrossBar::ReqType& req) {
+    	// Custom logic to calculate the output index using bank interleaving
+			return (uint32_t)((req.addr >> lg2_block_size) & (num_banks-1));
+		});
+		for (uint32_t i = 0; i < config.num_ports; ++i) {
+			simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i));
+			mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i));
+		}
+	}
+
+	~Impl() {
+		//--
+	}
+
+	const PerfStats& perf_stats() const {
+		perf_stats_.bank_stalls = mem_xbar_->collisions();
+		return perf_stats_;
+	}
+
+	void reset() {
+		dram_sim_.reset();
+	}
+
+	void tick() {
+		dram_sim_.tick();
+
+		for (uint32_t i = 0; i < config_.num_banks; ++i) {
+			if (mem_xbar_->ReqOut.at(i).empty())
+				continue;
+
+			auto& mem_req = mem_xbar_->ReqOut.at(i).front();
+
+			// enqueue the request to the memory system
+			auto req_args = new DramCallbackArgs{this, mem_req, i};
+			dram_sim_.send_request(
+				mem_req.addr,
+				mem_req.write,
+				[](void* arg) {
+					auto rsp_args = reinterpret_cast<const DramCallbackArgs*>(arg);
+					if (!rsp_args->request.write) {
+						// only send a response for read requests
+						MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid};
+						rsp_args->memsim->mem_xbar_->RspOut.at(rsp_args->bank_id).push(mem_rsp, 1);
+						DT(3, rsp_args->memsim->simobject_->name() << "-mem-rsp" << rsp_args->bank_id << ": " << mem_rsp);
+					}
+					delete rsp_args;
+				},
+				req_args
+			);
+
+			DT(3, simobject_->name() << "-mem-req" << i << ": " << mem_req);
+			mem_xbar_->ReqOut.at(i).pop();
+		}
+	}
 };
 
+///////////////////////////////////////////////////////////////////////////////
+
 MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config)
-    : SimObject<MemSim>(ctx, name)
-    , MemReqPorts(config.num_ports, this)
-    , MemRspPorts(config.num_ports, this)
-    , impl_(new Impl(this, config))
+	: SimObject<MemSim>(ctx, name)
+	, MemReqPorts(config.num_ports, this)
+	, MemRspPorts(config.num_ports, this)
+	, impl_(new Impl(this, config))
 {}
 
 MemSim::~MemSim() {
-    delete impl_;
+  delete impl_;
 }
 
 void MemSim::reset() {
-    impl_->reset();
+  impl_->reset();
 }
 
 void MemSim::tick() {
-    impl_->tick();
+  impl_->tick();
 }
 
-const MemSim::PerfStats& MemSim::perf_stats() const {
-    return impl_->perf_stats();
+const MemSim::PerfStats &MemSim::perf_stats() const {
+	return impl_->perf_stats();
 }
\ No newline at end of file
diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp
index 0cb00de010..acded6dd65 100644
--- a/sim/simx/processor.cpp
+++ b/sim/simx/processor.cpp
@@ -160,15 +160,23 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const {
 }
 
 bool ProcessorImpl::cycle() {
+  if (!is_cycle_initialized_) {
+    std::cout << "ProcessorImpl: Initializing cycle()\n";
+    SimPlatform::instance().reset();
+    this->reset();
+    is_cycle_initialized_ = true;
+  }
+  //std::cout << "ProcessorImpl: cycle()" << std::endl;
   SimPlatform::instance().tick();
   bool anyRunning = false;
-  for (auto& cluster : clusters_) {
+  for (auto cluster : clusters_) {
     if (cluster->running()) {
       anyRunning = true;
       break;
     }
   }
   perf_mem_latency_ += perf_mem_pending_reads_;
+  //std::cout << "ProcessorImpl: cycle() - returns: " << anyRunning << std::endl;
   return anyRunning;
 }
 
@@ -211,6 +219,7 @@ void Processor::dcr_write(uint32_t addr, uint32_t value) {
 
 bool Processor::cycle() {
   try {
+    //std::cout << "Processor: cycle()" << std::endl;
     return impl_->cycle();
   } catch (...) {
     return false;
diff --git a/sim/simx/processor_impl.h b/sim/simx/processor_impl.h
index 7b4537677e..031ce14afa 100644
--- a/sim/simx/processor_impl.h
+++ b/sim/simx/processor_impl.h
@@ -52,6 +52,7 @@ class ProcessorImpl {
 
   void reset();
 
+  bool is_cycle_initialized_ = false;
   const Arch& arch_;
   std::vector<std::shared_ptr<Cluster>> clusters_;
   DCRS dcrs_;
diff --git a/sim/simx/socket.cpp b/sim/simx/socket.cpp
index b8ba8939f0..a729ad3b76 100644
--- a/sim/simx/socket.cpp
+++ b/sim/simx/socket.cpp
@@ -140,10 +140,12 @@ void Socket::set_satp(uint64_t satp) {
 #endif
 
 bool Socket::running() const {
+  //std::cout << "Socket: running()" << std::endl;
   for (auto& core : cores_) {
     if (core->running())
       return true;
   }
+  std::cout << "Socket: running() returns false" << std::endl;
   return false;
 }
 
diff --git a/sim/simx/vortex_simulator.cpp b/sim/simx/vortex_simulator.cpp
index 928a029d0f..5ace3cede5 100644
--- a/sim/simx/vortex_simulator.cpp
+++ b/sim/simx/vortex_simulator.cpp
@@ -10,66 +10,84 @@
 #include "dcrs.h"
 #include <VX_config.h>
 #include <VX_types.h>
+#include "util.h"
 
 namespace vortex {
 
-// Utility to extract file extension
-static std::string getFileExt(const std::string& filename) {
-    auto pos = filename.find_last_of('.');
-    if (pos == std::string::npos) return "";
-    return filename.substr(pos + 1);
-}
-
 VortexSimulator::VortexSimulator()
 : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES)
 , ram_(0, MEM_PAGE_SIZE)
 , proc_(std::make_unique<Processor>(arch_))
-, kernel_image_{}
-, next_alloc_addr_(kAllocBaseAddr)
+// , kernel_image_{}
+// , next_alloc_addr_(kAllocBaseAddr)
 , halted_(true) {}
 
 bool VortexSimulator::init(const std::string& kernelPath) {
     proc_->attach_ram(&ram_);
 
-    kernel_image_ = {};
-    next_alloc_addr_ = kAllocBaseAddr;
-    ram_.clear();
-    ram_.set_acl(0, kGlobalMemSize, 0);
+    // kernel_image_ = {};
+    // next_alloc_addr_ = kAllocBaseAddr;
+    // ram_.clear();
+    // ram_.set_acl(0, kGlobalMemSize, 0);
 
-    bool has_kernel = false;
+    // can be used when launch descriptor is required
+    /* bool has_kernel = false;
     if (!kernelPath.empty()) {
         auto image_info = this->loadKernelImage(kernelPath);
         if (!image_info)
             return false;
         kernel_image_ = *image_info;
         has_kernel = true;
-    }
+    } */
 
     // Program base DCRs - align startup to loaded kernel when provided
-    uint64_t startup = STARTUP_ADDR;
+    /* uint64_t startup = STARTUP_ADDR;
     if (has_kernel)
-        startup = kernel_image_.base_addr;
-
-    proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR0, startup & 0xffffffffu);
+        startup = kernel_image_.base_addr; */
 
+    // setup base DCRs
+    const uint64_t startup_addr(STARTUP_ADDR);
+    proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR0, startup_addr & 0xffffffff);
     #if (XLEN == 64)
-    proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR1, startup >> 32);
+        proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR1, startup_addr >> 32);
     #endif
     proc_->dcr_write(VX_DCR_BASE_MPM_CLASS, 0);
 
+    // load program/kernel
+    {
+      std::string program_ext(fileExtension(kernelPath.c_str()));
+      if (program_ext == "bin") {
+        std::cout << "vortex_simulator: Loading binary image: " << kernelPath << " with startup address: 0x" << std::hex << startup_addr << std::dec << std::endl;
+        ram_.loadBinImage(kernelPath.c_str(), startup_addr);
+      } else if (program_ext == "hex") {
+        std::cout << "vortex_simulator: Loading hex image: " << kernelPath << std::endl;
+        ram_.loadHexImage(kernelPath.c_str());
+      } else {
+        std::cerr << "Error: only *.bin or *.hex images supported." << std::endl;
+        return -1;
+      }
+    }
+
     halted_ = false;
     return true;
 }
 
 bool VortexSimulator::cycle() {
 if (halted_) return false;
+//std::cout << "VortexSimulator: cycle()" << std::endl;
 // Advance one cycle through the processor interface
 bool running = proc_->cycle(); 
 halted_ = !running;
+//std::cout << "VortexSimulator: cycle() returns " << running << std::endl;
 return running;
 }
 
-bool VortexSimulator::allocateMemory(uint64_t size, uint64_t alignment, bool readable, bool writable, uint64_t* addr_out) {
+bool VortexSimulator::isHalted() const {
+    return halted_;
+}
+
+// Required when using launch descriptor and SST memory
+/* bool VortexSimulator::allocateMemory(uint64_t size, uint64_t alignment, bool readable, bool writable, uint64_t* addr_out) {
     if (addr_out == nullptr || size == 0)
         return false;
 
@@ -246,10 +264,6 @@ uint64_t VortexSimulator::normalizeAlignment(uint64_t alignment) {
     alignment |= alignment >> 32;
     alignment++;
     return alignment;
-}
-
-bool VortexSimulator::isHalted() const {
-    return halted_;
-}
+} */
 
 } // namespace vortex
diff --git a/sim/simx/vortex_simulator.h b/sim/simx/vortex_simulator.h
index d710b8de65..b0c890f593 100644
--- a/sim/simx/vortex_simulator.h
+++ b/sim/simx/vortex_simulator.h
@@ -11,11 +11,6 @@
 
 namespace vortex {
 
-struct KernelImageInfo {
-    uint64_t base_addr = 0;
-    uint64_t size_bytes = 0;
-};
-
 /**
  * A wrapper class used by the SST integration to drive the Vortex GPU
  * one cycle at a time.  It encapsulates the architecture definition,
@@ -32,8 +27,8 @@ class VortexSimulator {
      */
     bool init(const std::string& kernelPath);
 
-    // changes to substitute for run-time wrt memory setup
-    const KernelImageInfo& kernelImage() const { return kernel_image_; }
+    // changes to substitute for run-time wrt memory setup - required when using launch descriptor and SST memory
+/*     const KernelImageInfo& kernelImage() const { return kernel_image_; }
     bool allocateMemory(uint64_t size, uint64_t alignment, bool readable, bool writable, uint64_t* addr_out);
     bool reserveMemory(uint64_t addr, uint64_t size, bool readable, bool writable);
     void setMemoryPermissions(uint64_t addr, uint64_t size, bool readable, bool writable);
@@ -42,7 +37,7 @@ class VortexSimulator {
     RAM& ram() { return ram_; }
     const RAM& ram() const { return ram_; }
 
-    void setStartupArg(uint64_t arg_addr);
+    void setStartupArg(uint64_t arg_addr); */
 
     /**
      * Advances the simulation by one cycle.  Returns false once the
@@ -54,7 +49,8 @@ class VortexSimulator {
     bool isHalted() const;
 
 private:
-    static constexpr uint64_t kGlobalMemSize = (XLEN == 64) ? 0x200000000ull : 0x100000000ull;
+    // required when using launch descriptor and SST memory
+    /* static constexpr uint64_t kGlobalMemSize = (XLEN == 64) ? 0x200000000ull : 0x100000000ull;
     static constexpr uint64_t kAllocBaseAddr = USER_BASE_ADDR;
     static constexpr uint64_t kDefaultAlignment = 64ull;
 
@@ -62,13 +58,13 @@ class VortexSimulator {
     static uint64_t alignDown(uint64_t value, uint64_t alignment);
     static uint64_t normalizeAlignment(uint64_t alignment);
 
-    std::optional<KernelImageInfo> loadKernelImage(const std::string& path);
+    std::optional<KernelImageInfo> loadKernelImage(const std::string& path); 
+    KernelImageInfo kernel_image_;
+    uint64_t next_alloc_addr_;*/
 
     Arch arch_;
     RAM ram_;
     std::unique_ptr<Processor> proc_;
-    KernelImageInfo kernel_image_;
-    uint64_t next_alloc_addr_;
     bool halted_;
 };
 

From 85b137407ee3729f38376ac48ea62969442af8fb Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Wed, 12 Nov 2025 10:22:25 -0500
Subject: [PATCH 05/15] revert the logging and removing the unnecessary files
 added

---
 .gitignore                 |   2 ++
 sim/common/mem.cpp         |  13 ++++---------
 sim/simx/cluster.cpp       |   2 --
 sim/simx/core.cpp          |   2 --
 sim/simx/emulator.cpp      |   1 -
 sim/simx/obj/common/util.o | Bin 18136 -> 0 bytes
 sim/simx/simx_config.stamp |   1 -
 sim/simx/socket.cpp        |   2 --
 8 files changed, 6 insertions(+), 17 deletions(-)
 delete mode 100644 sim/simx/obj/common/util.o
 delete mode 100644 sim/simx/simx_config.stamp

diff --git a/.gitignore b/.gitignore
index 43388e9cb5..41d5fd961a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
 /.vscode
 *.cache
 *.code-workspace
+sim/simx/simx_config.stamp
+sim/simx/obj/common/*.o
diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp
index 64a294b2eb..96b08ff8a2 100644
--- a/sim/common/mem.cpp
+++ b/sim/common/mem.cpp
@@ -59,7 +59,7 @@ void RamMemDevice::read(void* data, uint64_t addr, uint64_t size) {
   if ((addr & (wordSize_-1))
    || (addr_end & (wordSize_-1))
    || (addr_end > contents_.size())) {
-    std::cout << "RamMemDevice::read lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n";
+    std::cout << "lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n";
     throw BadAddress();
   }
 
@@ -74,7 +74,7 @@ void RamMemDevice::write(const void* data, uint64_t addr, uint64_t size) {
   if ((addr & (wordSize_-1))
    || (addr_end & (wordSize_-1))
    || (addr_end > contents_.size())) {
-    std::cout << "RamMemDevice::write lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n";
+    std::cout << "lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n";
     throw BadAddress();
   }
 
@@ -108,16 +108,14 @@ bool MemoryUnit::ADecoder::lookup(uint64_t addr, uint32_t wordSize, mem_accessor
 
 void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) {
   assert(end >= start);
-  //std::cout << "ADecoder: map() with start 0x" << std::hex << start << " and end 0x" << end << std::dec << " and md 0x" << &md << std::endl;
   entry_t entry{&md, start, end};
   entries_.emplace_back(entry);
 }
 
 void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) {
   mem_accessor_t ma;
-  //std::cout << "MemoryUnit::ADecoder::read init lookup of 0x" << std::hex << addr << std::dec << ".\n";
   if (!this->lookup(addr, size, &ma)) {
-    std::cout << "MemoryUnit::ADecoder::read lookup of 0x" << std::hex << addr << std::dec << " failed.\n";
+    std::cout << "lookup of 0x" << std::hex << addr << std::dec << " failed.\n";
     throw BadAddress();
   }
   ma.md->read(data, ma.addr, size);
@@ -126,7 +124,7 @@ void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) {
 void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) {
   mem_accessor_t ma;
   if (!this->lookup(addr, size, &ma)) {
-    std::cout << "MemoryUnit::ADecoder::write lookup of 0x" << std::hex << addr << std::dec << " failed.\n";
+    std::cout << "lookup of 0x" << std::hex << addr << std::dec << " failed.\n";
     throw BadAddress();
   }
   ma.md->write(data, ma.addr, size);
@@ -156,7 +154,6 @@ MemoryUnit::MemoryUnit(uint64_t pageSize)
 #endif
 
 void MemoryUnit::attach(MemDevice &m, uint64_t start, uint64_t end) {
-  //std::cout << "MemoryUnit: attach() with start 0x" << std::hex << start << " and end 0x" << end << std::dec << " and m 0x" << &m << std::endl;
   decoder_.map(start, end, m);
 }
 
@@ -513,7 +510,6 @@ void RAM::loadBinImage(const char* filename, uint64_t destination) {
   std::ifstream ifs(filename);
   if (!ifs) {
     std::cerr << "Error: " << filename << " not found" << std::endl;
-    //std::cout << "loadBinImage Error: " << filename << " not found" << std::endl;
     std::abort();
   }
 
@@ -525,7 +521,6 @@ void RAM::loadBinImage(const char* filename, uint64_t destination) {
 
   this->clear();
   this->write(content.data(), destination, size);
-  //std::cout << "Loaded binary image: " << filename << ", size: " << size << " bytes, destination: 0x" << std::hex << destination << std::dec << std::endl;
 }
 
 void RAM::loadHexImage(const char* filename) {
diff --git a/sim/simx/cluster.cpp b/sim/simx/cluster.cpp
index 84cba9f134..bab67233b8 100644
--- a/sim/simx/cluster.cpp
+++ b/sim/simx/cluster.cpp
@@ -103,12 +103,10 @@ void Cluster::set_satp(uint64_t satp) {
 #endif
 
 bool Cluster::running() const {
-  //std::cout << "Cluster: running()" << std::endl;
   for (auto& socket : sockets_) {
     if (socket->running())
       return true;
   }
-  std::cout << "Cluster: running() returns false" << std::endl;
   return false;
 }
 
diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp
index 770c86701a..55791907a3 100644
--- a/sim/simx/core.cpp
+++ b/sim/simx/core.cpp
@@ -440,7 +440,6 @@ int Core::get_exitcode() const {
 
 bool Core::running() const {
   if (emulator_.running() || !pending_instrs_.empty()) {
-    //std::cout << "Core::running() emulator running: " << emulator_.running() << ", pending_instrs size: " << pending_instrs_.size() << std::endl;
   #ifndef NDEBUG
     for (auto& trace : pending_instrs_) {
       DT(5, "pipeline-pending: " << *trace);
@@ -448,7 +447,6 @@ bool Core::running() const {
   #endif
     return true;
   }
-  std::cout << "Core::running() returns false" << std::endl;
   return false;
 }
 
diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp
index 1028aba435..3eb62f9c76 100644
--- a/sim/simx/emulator.cpp
+++ b/sim/simx/emulator.cpp
@@ -130,7 +130,6 @@ void Emulator::reset() {
 
 void Emulator::attach_ram(RAM* ram) {
   // bind RAM to memory unit
-  //std::cout << "Emulator: attach_ram()" << std::endl;
 #if (XLEN == 64)
   mmu_.attach(*ram, 0, 0x7FFFFFFFFF); //39bit SV39
 #else
diff --git a/sim/simx/obj/common/util.o b/sim/simx/obj/common/util.o
deleted file mode 100644
index 6e8785424bf8ab0f48eb0b6ff4ced8860499c1df..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 18136
zcmeHOeQ;aVm4C9VL`jIG@U?dM5+I5LRF(2+!%}3&N}h-UZtQ@!5EMn0KNj2a%5vgB
zD517PR7H^8$!^)<V;D+z*e$TM6G~wyb(b_rDdnp$OSkP3I$ekh<#P#v0^Z-bAJXx&
z9)z^}$Ik4XdAj%g?z!ijd+s^s-uIq;eKfwJ%Hc3{aTs4V%6A$yjP^*mEt?TzzELY^
zXQ690t~t1l!A0YJxaQ$%Y>VZeUu)*yYc=zKG)G?db(n?fv+B@e=9>fV@VGhlTyB;*
zcE~%r6<}<^>+yUb5F7c(yQ$eMT!4mSXp#5(QDghecp(q~6wjyJO?#`)Hp~JTbeQK#
zc0(=Z$R6Jn>l&|o+<RKYSUvW;+zYRrywl9TWaeK9KX(w!A<}8)_d$VxnP2XU=YMPF
z-w+*&^=scUix(210L9O{-cvtpeJm&j|4}j-8_&%%8y?RcV~&hFu6VrDds;JC7hN<E
zx1B-nX`|FONDmq>r(e8kruqJFOvg*Gf?IOR|C{M}(LDX2nQu7^?cG<bdz|Ra!u5wI
zBm*(N@$$yY8rPB~KU&*r-thW8FmOH>I3`})9oPvvv)CK($Mf$q(~bsi0a18d$|(rl
z!iUU#qk(R<y3`(K?X9{!GVU|;qk?gGyc4Yf^q7(b4ce{<<ArXwS!nkWwo_rT!gUAZ
zd1zxg-Zb;4aq02mmcV**UvnTtGowa5HRAb6YDcIId)AOw_)F571E%NzMT??{Xyq7D
zyXu4D&4IcpQuFU)Y&Ur$i5b>}dq)FXMVkhgMR`Y~1&aZ*NN|2HAvJ>EBUK<bi%aC7
zH<b+NTEIe>`8Ul%%R$re`$ALA$bkn(_Pg^<a$F3Ye2shYW5n|CJNRr%44SwFqp@-C
z=!a;<0Y}V2JYdEf#=SQ_4Jfu?fB3oZp?LmPJ|DBNB``3}P9x*ftRhyCCUL=*fDis!
zP*W@pI5<Btza|xUtPqte2!As3S5}wSLQr9)p_*bKeXJaSE6f}AkZZ>C2Ph01*EL?%
zm_RUY#Sp_RO(&L@UOBN~a&mH~S<D3n^du)v1SGgq7{u~>L==dS$p6Hq{kXspgfixc
zxEwDk^W5<IEi<4(eoMejF*D$VlEp^@16WfNfmCCSo$a6wkH<kDiKE}oEf)T1_)uql
zN1&w|gW>unYB51&AYxu}&Cx)+?gr_4-7d7dX=r!pJ|d!MK;P$oEM2j%9I4?prRR=n
zp)bY_lP{u}=#M)NBc*W#<HdUeTR{<rQ;L8>1k*%S1S?-5HN~iS{tZlDF&Hu)af302
zDqxoydkq9ln&hr$BA0{@iS9#i2e5H@M;*|dqG&y(3dlb|+9A`H!ZBVDc~vP9ey%gW
zD-eNJvBIT3a==(&Qw^D<UQZ2qM=&ci9u!vAIA~f}yPdVy>o!(LDzLj+)EFc5C8Y|R
zVbWZn1<8<76e3P^#R@C^fFSiPp?Wi-tPnY979Xu90NqFjGig4<n%)iFLxijslwj3u
z=66Vm<!mW7Lt+Uf+$u}zWjR22nbfDdL0Y5RBjezzZ2{9uPJZ1D(mLH%D=Q|TxKhSZ
zetDRNVu?Z^S4-0_rlK@hjYQ@KyL!cLHSM;Ru`OaOGB>Fd!3{z^?szLV3%jj%^o&^q
z><-)lNwJYnyxtopp)gtw@0iPpqk#AO)q(9;KjMOfS5L$yaVg#%AoUPh&tYc|p?9}p
z|Ekic2Hi^AErGgt{uOzH`8<FgWOJ$nqdQTPdRtX}C)MC`4{9o}ohbL~!Y%>f_~9M9
z9-i~adFTXV<Z>m*XBO5RJk;#-yK>K(g+~K-g224s(ZE*JGNY1E(~?n%l1x)+kt-}_
zg+=2=NA|9lvV_&jvS=&W5@_cuq|_D;NzRD=I_a9)D}`6b1H&ahCl`m$N|$L4n6w48
zQ11bnnd+78lJ5yI)UEVfkI5?0$ggH`CAQF+w7xZFA%tzwZ93jL_}+VylgiQJ1+|IN
z6a?4L&Q!7D3UI}uVxgogr0;{Jn*vw?_yGW|Rhg2KC`{omWzxcxw9DTn(`h;UN?BFS
ztCS&W@1z}}w9|RV7SeWtDY~)c(bh73zbkC=k33Z^be(wq!^ug#Kdr_|cWeUF77;M<
z!AI29Ot-s*gVFx971@;br=3*i-;d|vbCh6V$NT;pcKiW$Nh%ka4bTid-cg#yBUlKy
zqp)r{rDGj%Bi5l1QnuR>ct()6zz#bC+o_Fuoa%zxY<Si?dOMl2gqea)j47Dzy%C@2
z+b_l;N|bv0B5oi?ykSy$EU90P4utAfnzq~(lqQ9_G=f%!CMXZ6?Xagn5wzX9U2KG$
zIIWvxBTuI5#WG-hK{}vG(H>^)t-1|A7gj_+*-VHitirzbwwZrg8b+!^XQE;wPNEPc
zrqCGCVinnzKq#L7MBVHbhEw~dJiVAmE@Ehb_RaUjzL~=6l=sa$VG8V<R{_*PAj3i)
z$jZW%f2uS=7Sypo=9MbIRSuVKR4nW#&6OvYDMc&@GfIn7V1y_GNNvF#qqu<-2a>C%
z`>6C2<iYJA_Kxm>b5n#!k5Z@0iTj1iH-j7enKlH9PfClWCo#E!kUL<opn$z8$;I>U
z==>f>P>3#&(BO$DKA4=8&s*<$GZCM6fs{Lu{#fHY?sk1bs>LZO!F}=$k~B`bTmTmk
z$~Tdl3kumQCXpv!8x8CyO6sE}flcV1ippQubBG$w+vp~8Vn}W>PIYP3{?9C=4B07*
z`2TxeDIN}DQK{&ND(!g_4Z<TcGNXFu#e5Uv@J0R_fnuzF;tN0p7v3eGY~&M+Z0}=F
zIUGNdw%rpF$2ouAJ`(;%P6JDANDlU-5`(#HPk-0>=O<PtQkj0-4W)Aa{-NGpe}5+D
z-<a&}=`f7Yu!TA~Fp%!=Fup_<7iN*+B;O#>+X);mt~br%@^-UfUv4fQgWAjcj0(Nt
zwe@nboL2~w5-%!@A70CRENK+KsOE=8`J^SCpm?3)7gdQc|K?-e;I~e2#{+CE|7*Fv
z>inpmX=(Y@s{s9I&7Z4jZ`NrV?542LIa7j^*uiW1kuO%<<Sw6DrMe)j(|k+LO2Z>c
z!b_qq|EOu`{Y!C?+jAj;4EGhe_tLf^&V{=w?4~8zAuYSSaTMjoAtaV;qii*?0jC*#
zWO?o7QtRK#52D!Q^^@?F^Be8<Na9p<*2OIST7LNur$kw1Y`N8nP23~4j!!)iL$VkR
z$rEEBH;l<iN9_+CuA65VBTm=I4C9;51@F6zk?P+&Zk<7$)h9+sPh;}dT@J@L9ETjP
z;(X)YD%TD3jfbk1yjN>{uiEwUeB-y(u0PB-el^qe!hGZ4OxI868++WYAI>-aVUFtq
z^v-ep<9x?Wb6ueS&$)zu!{eH$HSX}Zep72a<a0d>+~@j9t?`<#;k)yVH|Dt>u65jX
zoa???W83jAAP=43`u2QBv6e>NQ%g9uR;kxK$E<Z&#HXw9qd?hZ7%P`AKi^-scEeDA
zZpa@F1(yUvUkne4+wj*GhJvBR3luCyz)uVxE^>*Lm#j6gM&pFxIZ%}2RqeQDjo}#f
zIZirerh6wuhJc@}`o^hl^7wXCHG68dIh#HHk?NJ6P}NnlJ^n^dO{2%x<Z(ym2>KQ^
zCQNnI;~S~Ea<<34a*pY78v#HAs-O5%|2E<|;=IJ;zf5py1f36EbljjKJRfxpu6qeU
zG3h5xyf=Yb_U$JO<udv~-?pl%D0S*NIId}liy6+ml_*Zq^CW#qFDhrlC}bOBg~xYu
zRg<UYCTEk!U#xEO)a`IJdqUf0G<xbsX0G%ca5`_Tn(e6v*667N4QOE>Siz)s@AL#q
zrLs(n9+|mq#tv7p`X=Yi6f?92i)jD~=`VCZO}dTaqA~_Ue7bJUQnzR3I<J@k-5QTd
zw`b;lif;GAFSKq_tLeYwt<}cch{EZ`S@OE6eNN$>Q!%=88tR$B(&aQ}S`hi&ql)n#
zXB19@ykvTD8ndQ@K%C+W%JeR0$s4EkISubr0C&T<su;cSOJKJvT;G#Tb#V&M*LUUG
zX&hI+VJ)Zd90sVS(>P%oNN=}|3U-qVHQhedLNB*9`xvtnUTcxyQMlDVjuW`}v5X))
zA3F^rz&TB-By&>svJ`q!p?qO2oi_Nt*x=u_!Jo3hUjlwCG#yuhMZQD(V;g!GR3~~p
zwU8QjCh$u7pQGqQ;73L0!4)?2tAL-u{MnSljO{k`du;G0ZSW6l@Y$GBCAljFPT5cv
z?eC+&$qo@UFUq-8?nZrjRoN2SBgcW4QAta`zp%kSu)!PPn3ec^%?5wW27lTHKLs0X
zCH_lo@at{xzq7%=V>7Q8ZRk%%>a3*CIvczfIIYV7=0}D08!CHj=-;xzPey94B==$)
z{3aXx2^)MC9HSC{(+0oE2LI3oKMPwzCH|M&;8`2|AshS!8~i03+=CDLmGtr3;2|6Q
zDjVDw%JuXHQv(A=;;Wam=9VQAso~*pIJ}TQIL1<~xo|kuoy;b3*<?>{FqVq8Ch-#u
zFXWGuu@*oZ+Y+tI60Ny~q0UTpQ!?9;=*gwC$y^45lU>1{!E7=ZB56xEX0o~Ta5$99
zB)ZeX^pM)LL`aYRbug9b?chYqvgyH0@5XeZv!^$m7)a*2qia?jg~CzjCYcB0%R>f!
z!t}F{ewNVBVv8Y`a6@bu>a@2-^OERBBe61mX;Wi7ap{T`t<knbTVqo^ny|1uLfbZw
zX|w5ME*)Ld*Jt#l`%(j&!3t~9kVxdZvzbka-b@!B@6*|ACfl+qWh7w4Wa{cfs{86h
zXR@c4TW}FsE;^h`_Yd}D`lCb+i)T~aSx&VW?*h8|(;bPvWN&XK6(zx<#fcVbqRwnO
z9c|fYP)!8Pfj)=b7Lwh*+IHE|4Ub}MM{mo<KEVUVDN9Z#`)pQ>20`3XOflJ;ZVn3#
zj!yocHaH|De_AXU&6fwW>CRvx(KS4r7)WOaGyPbA+~&ka5l5}L`ku^S0)Coau1(pR
zYmk16o=P4|^KWb86IS#Y`Kk!NFkYpRGVYcAK10ajp8lR(w59TzgcCxEDXV@|N~lF#
zE*BBPmL?K34NM@>(?6Kb<|@r2x(1Ysq>&VL4DlLDL|d08T2{3#N{Hxb%}^LBHhocy
z%CsG2JcmQzluZxxCR1s|6#|f47Hd#izGk_6fIe>?CMA=Dg9u%u#n#1tiiRCMg9Dkt
z^t6chd}|}4Vr~DXp8k%+n)KjMUz#SX{ru0F&V?&uF&8V3@3ePb>2XI+i<QtmLfO+U
znJCvgLh8jCyjG?BOUx57gzlvP`Z68DLZ2DAOH#e*WHySy3&mhC$YGC|7SQ$597i6{
zQxov#9?reCoPzP%*~V%zkt~Db4sPyGMO#wXYG`v?)|Jln_VlMK1+5&zBDY7awTj$j
zi8Wk}sT(cAqIF544S6RjBc$!JBU@f_+#{pJ+OW{U0#R1R22IX`<5GpKVycR^=PVK5
z;&nVZjx^N5Q0Wwq?(fQVvtt1C?I<UcBZ>^E8%}@8#D;WNPk$nn%%!?<q96pH0rdTm
znxev7KbK31@%Z~n`4}SJJJGv5e5V)XE{31N@b4)ckE?i|DbqisaC*EsmEn&v{4|C?
z#qiS^{+hyxKfRUF`shCtBK~JGe5-oSC7eE_X!;u!eh#n@!=GR{y;IQipD;cP8BXsN
zs1Tn;xHNse4Ze=y3mAP);iM<Mi`0CsW%Oq=oZcT$A$qRQGYY4Q-fL<;FEV=i%Lt8s
z#PFpIZ^rX372>}Pm!@B(aO-^6+u(x?=lbtr{OcJ1TNypK&%KPEWNUr)FnYd@FEILg
zMt>qr2*o<D1q^RMzvjP);oLv>GMxL>&lPU<EBeC(DuiEzOY`r-`vWS}&DXJC;Y1(7
zrRjIr;I}hAjg0;thI9T8GrWn>)B6l6q|b6(TCNlC8>kS@=Q~^Bq-QfOO+Sy}d|kf9
z_;7uGVS^uLe4>p1nRq`zh2(Pl5*vIo!?|DGp>UcPkGpR(`m@oe=M~3$3o1mv0++`B
zis5`-4>Eiuqko0rbqxQ5!maai&jJyO6+g)a4>FwVzeeGtKbPCfaFU_t^#tR?*YQ<G
z&*O{Umrx-d3vg*ZCotS(IK4-qLi7<_ntrJb-l}lYKgQ_SGyGzPU&rus7=D++3FGtn
z5u@kxnq)YSi$V2@mC`7^_1F4et8mhD6~k|~!K;q}5sGzPY8lSs&aZH*{`W9??uY-%
z=p&G$^?aJq^StqEhI2k|Go0rQpL$<O`g8l7rEt=d%RP_b+#i}4&gm13KVSD=hF=UB
zdR}>klWdLO&-n9o{~4p_>;4YIdAuCw0aFy3FFzMGFr1%@HZh#r=LLn6j@%B186H8O
z)_;x{Hz-zn)+wCy;r3a=aBiP94CnUQ#Q1aj>|*rXJ`XXR+vh)Q@Ru2Xp0_`;!M#2)
z-D!F642JXkwnE|7I9kteKHtX}pAbZ8JO7H&^Z9<DaEkX68T}`WKF;vD^v_=?qz^wo
z9ItTVb1W_`x0ccKb8-Vaf1btYFCxY$#Ai0HnRJ7)%7XuegpJEB_?Hx(vfx^uvzVU5
zKcVQu7JRqDFSOu4Q+UjR>wmRdYr*w*siXzJSk1rBg10Grvjx9W;bRv328I8v1^*|7
z-)_MlRQNwz@W<7>9<<=+D*7K=@I4CuxdmUN`25m>k171WEcmmE&)XLKE=8}`pM0Fh
z#d)fqmYK&XJ+n|DdcI#b3sm|o^=W_j-{!CFVbxR1CHw-Wk4hUNzW$Qo+TV%(LWZA?
zHWk9p#ii#(&puRWt~?K1EKo_$^8nYA^VjiV)pIlB!|kcpndn#J()#RB^j3ZJIukwD
z=Uzom_$9bBA03y3b3Gqs^p`UF#~9wia2?0Q=PL|<PSKN|{JG|rHn{%ZAM3nMReo)q
zmyRdG`MNJ+^c=r{;j~8D4)p&uRIGf~*x)*USm(Q4(OdPugYn_(qVo#z;r9Oyqv!U2
zfZ^Q!;-4Cn{+#|tHuO(2ocp=XTeM%$rO#D5&uLtrvvuCmxISlKs3;`neATAPuiBbk
z-5PBcd|ch<EV!ZczRiN`zr%Qd;cnHY>Pd#rV)(lZ4>DZ)q1IFHJM)x$!sjshE{1bH
zI~YEj(f>WeiO(sj?{0>Z4E??TK86!L<!CBSB0`~x%dKNL$<=?~5oS2?8OJ3`aBy>9
zF1f)7;*G7WyLBs@?o9^qB{*jUJCeDi5!^61Xautvx*_s#uzu<-KG~MW1jWnQ()W@h
zfAcKfkN!FM&?h{`XQ;oo=jwFtW+T{@!l&8cRC*wXFW(z_(~3$=3;GAe$H(BOeci*C
z>b|~oKMaGv(@qER2b^CNA9&S$SN~9OAdAll*&M_Plcc*7o!KNlwgkJfnV|t~{iCk`
z27aNo`YAjj>h5?|(|$)6<rylvwi~bG@fb-i#*%;Oak};cBiB@~VbGFgy{g}{DCbkS
zu1-ROcttdz>Y8(9;{o-2MY=n2>6orjbYDPC@~MxPwa4<ia$AK_2{;ce;(4^=!x&3E
z7vkcjUCF0?fETU5)+2(>O5?w-#*=+`(c^SY>qPAtDy}u{ooc+P0aaV=PkV*1zh(S>
z+N4m{i(lzGlsC2h+WwS>E6IOL$=7*Dv6=R_GT%f;CHZyqk`-lGR>J=%Us>h<4w#Uy
z=ybJ$UjCz0Tjr(hCVLIzC8EI<(SWM&RPyN`x2cf-kt<r|)BZs`En<wg`V-IR(Z>tP
z;$=1l(|Dc7b)TOe9#OPBc{lyug~65V7ox)iie4wupVnUU)A&*)KSac=)G7HCAH0yR
zRCIlj8ZXiEZ<`ulMr2Em)A*$T=<IH_U%MLb*MzFp`fHf>UE*mSKdi><?<|_$I(`EN
zS6Y8l+3yx&jzadMCDr4#{VK^O`Ue!je)V8Nw-n3P@f1_m+ylrM_S3In+`$hGxL4|k
VKKh~bT50@&FH4eP%K*!*@jp&36=DDY

diff --git a/sim/simx/simx_config.stamp b/sim/simx/simx_config.stamp
deleted file mode 100644
index 1eb1208835..0000000000
--- a/sim/simx/simx_config.stamp
+++ /dev/null
@@ -1 +0,0 @@
--std=c++17 -Wall -Wextra -Wfatal-errors -fPIC -Wno-maybe-uninitialized -I/nethome/jsubburayan3/vortex/sim/simx -I/nethome/jsubburayan3/vortex/sim/common -I/export/nethomes/jsubburayan3/vortex/hw -I/nethome/jsubburayan3/vortex/third_party/softfloat/source/include -I/nethome/jsubburayan3/vortex/third_party/ramulator/ext/spdlog/include -I/nethome/jsubburayan3/vortex/third_party/ramulator/ext/yaml-cpp/include -I/nethome/jsubburayan3/vortex/third_party/ramulator/src -DXLEN_64  -O2 -DNDEBUG
diff --git a/sim/simx/socket.cpp b/sim/simx/socket.cpp
index a729ad3b76..b8ba8939f0 100644
--- a/sim/simx/socket.cpp
+++ b/sim/simx/socket.cpp
@@ -140,12 +140,10 @@ void Socket::set_satp(uint64_t satp) {
 #endif
 
 bool Socket::running() const {
-  //std::cout << "Socket: running()" << std::endl;
   for (auto& core : cores_) {
     if (core->running())
       return true;
   }
-  std::cout << "Socket: running() returns false" << std::endl;
   return false;
 }
 

From 32653581c96d8759dca8e581c1e02d84d7abd80a Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Wed, 12 Nov 2025 16:19:28 -0500
Subject: [PATCH 06/15] revert all memory based changes + makefile update

---
 .gitignore                    |   2 -
 sim/simx/Makefile             |  34 ++++--
 sim/simx/VortexGPGPU.cpp      |  96 +---------------
 sim/simx/VortexGPGPU.h        |  24 +---
 sim/simx/processor.cpp        |   6 +-
 sim/simx/vortex_simulator.cpp | 204 ----------------------------------
 sim/simx/vortex_simulator.h   |  24 ----
 7 files changed, 36 insertions(+), 354 deletions(-)

diff --git a/.gitignore b/.gitignore
index 41d5fd961a..43388e9cb5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,5 +2,3 @@
 /.vscode
 *.cache
 *.code-workspace
-sim/simx/simx_config.stamp
-sim/simx/obj/common/*.o
diff --git a/sim/simx/Makefile b/sim/simx/Makefile
index 22032d7444..e2a7f65721 100644
--- a/sim/simx/Makefile
+++ b/sim/simx/Makefile
@@ -1,6 +1,8 @@
 include ../common.mk
 
 DESTDIR ?= $(CURDIR)
+USE_SST ?= 0
+SST_PKG ?= SST-14.1 # default SST package name
 
 OBJ_DIR = $(DESTDIR)/obj
 CONFIG_FILE = $(DESTDIR)/simx_config.stamp
@@ -27,13 +29,13 @@ SRCS += $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp
 SRCS += $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp
 SRCS += $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
 
+ifeq ($(USE_SST), 1)
 VORTEX_SST_SRCS := $(SRC_DIR)/vortex_simulator.cpp
 VORTEX_SST_SRCS += $(SRC_DIR)/VortexGPGPU.cpp
-
-# SST flags
-SST_CFLAGS := $(shell pkg-config --cflags SST-14.1)
-SST_CFLAGS += -I../../../sst/sst/sst-core/include/
-SST_LFLAGS := $(shell pkg-config --libs SST-14.1)
+SST_CFLAGS := $(shell pkg-config --cflags $(SST_PKG))
+SST_LFLAGS := $(shell pkg-config --libs $(SST_PKG))
+CXXFLAGS += $(SST_CFLAGS) -DUSE_SST
+endif
 
 # Add V extension sources
 ifneq ($(findstring -DEXT_V_ENABLE, $(CONFIGS)),)
@@ -65,11 +67,16 @@ COMMON_SRCS := $(filter $(SW_COMMON_DIR)/%.cpp,$(SRCS))
 SRC_SRCS    := $(filter $(SRC_DIR)/%.cpp,$(SRCS))
 COMMON_OBJS := $(patsubst $(SW_COMMON_DIR)/%.cpp,$(OBJ_DIR)/common/%.o,$(COMMON_SRCS))
 SRC_OBJS    := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(SRC_SRCS))
-VORTEX_SST_OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(VORTEX_SST_SRCS))
 OBJS        := $(COMMON_OBJS) $(SRC_OBJS)
 MAIN_OBJ    := $(OBJ_DIR)/main.o
 
-DEPS := $(OBJS:.o=.d) $(MAIN_OBJ:.o=.d) $(VORTEX_SST_OBJS:.o=.d)
+DEPS := $(OBJS:.o=.d) $(MAIN_OBJ:.o=.d)
+
+ifeq ($(USE_SST), 1)
+VORTEX_SST_OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(VORTEX_SST_SRCS))
+DEPS += $(VORTEX_SST_OBJS:.o=.d)
+endif
+
 
 # optional: pipe through ccache if you have it
 CXX := $(if $(shell which ccache),ccache $(CXX),$(CXX))
@@ -79,11 +86,11 @@ VORTEX_LIB := libvortex.so
 
 .PHONY: all force clean clean-lib clean-exe clean-obj libvortex clean-libvortex
 
-#ifdef USE_SST
+ifeq ($(USE_SST), 1)
 all: $(DESTDIR)/$(PROJECT) $(DESTDIR)/$(VORTEX_LIB)
-#else
-#all: $(DESTDIR)/$(PROJECT)
-#endif
+else
+all: $(DESTDIR)/$(PROJECT)
+endif
 
 # build common object files
 $(OBJ_DIR)/common/%.o: $(SW_COMMON_DIR)/%.cpp $(CONFIG_FILE)
@@ -96,9 +103,11 @@ $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp $(CONFIG_FILE)
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
 # build SST-specific source object files
+ifeq ($(USE_SST), 1)
 $(VORTEX_SST_OBJS): $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp $(CONFIG_FILE)
 	@mkdir -p $(@D)
 	$(CXX) $(CXXFLAGS) $(SST_CFLAGS) -c $< -o $@
+endif
 
 # build main object file
 $(MAIN_OBJ): $(SRC_DIR)/main.cpp $(CONFIG_FILE)
@@ -113,6 +122,8 @@ $(DESTDIR)/$(PROJECT): $(OBJS) $(MAIN_OBJ)
 $(DESTDIR)/lib$(PROJECT).so: $(OBJS)
 	$(CXX) $(CXXFLAGS) $^ -shared $(LDFLAGS) -o $@
 
+# Vortex SST simulator component shared library
+ifeq ($(USE_SST), 1)
 libvortex: $(DESTDIR)/$(VORTEX_LIB)
 
 $(DESTDIR)/$(VORTEX_LIB): $(OBJS) $(VORTEX_SST_OBJS)
@@ -121,6 +132,7 @@ $(DESTDIR)/$(VORTEX_LIB): $(OBJS) $(VORTEX_SST_OBJS)
 	$(OBJS) $(VORTEX_SST_SRCS) \
 	-shared -o $@ \
 	$(LDFLAGS) $(SST_LFLAGS)
+endif
 
 # updates the timestamp when flags changed.
 $(CONFIG_FILE): force
diff --git a/sim/simx/VortexGPGPU.cpp b/sim/simx/VortexGPGPU.cpp
index 536e0e1583..84583ca6ce 100644
--- a/sim/simx/VortexGPGPU.cpp
+++ b/sim/simx/VortexGPGPU.cpp
@@ -1,8 +1,5 @@
 #include <sst/core/sst_config.h>
 #include "VortexGPGPU.h"
-#ifdef USE_SST_MEM
-#include "mem_backend_sst.h" // needed for vx_register_submit and vx_on_mem_complete
-#endif
 #include <cstdlib>
 #include <vector>
 #include <utility>
@@ -10,19 +7,12 @@
 
 using namespace SST;
 using namespace SST::Vortex;
-#ifdef USE_SST_MEM
-using SST::Interfaces::StandardMem;
-#endif
-
-#ifdef USE_SST_MEM
-VortexGPGPU *VortexGPGPU::instance_ = nullptr;
-#endif
 
 VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
     : Component(id),
       sim_(std::make_unique<vortex::VortexSimulator>()) {
 
-    std::cout << "VortexGPGPU: initializing Vortex GPGPU simulator\n";
+    std::cout << "VortexGPGPU Component: Initializing Vortex GPGPU simulator\n";
 
     // Parameter: clock frequency (default 1GHz)
     std::string clockfreq = params.find<std::string>("clock", "1GHz");
@@ -32,53 +22,10 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
 
     //const uint32_t launch_bytes = params.find<uint32_t>("launch_bytes", kDefaultLaunchBytes); // required when launch descriptor is used
 
-#ifdef USE_SST_MEM
-    // Create StandardMem interface; auto-bind to port name "memIface"
-    memIface_ = loadUserSubComponent<StandardMem>(
-        "memIface", ComponentInfo::SHARE_NONE,
-        registerClock(clockfreq,
-                      new SST::Clock::Handler<VortexGPGPU>(this, &VortexGPGPU::clockTick)),
-        new StandardMem::Handler<VortexGPGPU>(this, &VortexGPGPU::handleMemResp));
 
-    if (!memIface_) {
-        SST::Output out;
-        out.fatal(CALL_INFO, -1, "VortexGPGPU: failed to load memIface StandardMem port\n");
-    }
-#else
-    // No SST memory: just register our clock handler
+    // Register our clock handler with SST
     registerClock(clockfreq,
                   new SST::Clock::Handler<VortexGPGPU>(this, &VortexGPGPU::clockTick));
-#endif
-
-#ifdef USE_SST_MEM
-    // Register callback so SimX can submit memory to SST
-    instance_ = this;
-    // Track app-specific tags by StandardMem request-id
-        // (e.g., inside your instance_ type)
-
-        vx_register_submit(+[](uint64_t addr, bool write, uint32_t size, uint64_t tag) {
-
-        StandardMem::Request* req = nullptr;
-
-        if (write) {
-            std::vector<uint8_t> zeros(static_cast<size_t>(size), 0);
-            // posted=false so we get a WriteResp
-            req = new StandardMem::Write(static_cast<StandardMem::Addr>(addr),
-                                static_cast<uint64_t>(size),
-                                std::move(zeros),
-                                /*posted=*/false);
-        } else {
-            req = new StandardMem::Read(static_cast<StandardMem::Addr>(addr),
-                            static_cast<uint64_t>(size));
-        }
-
-        // Use the StandardMem-assigned ID to correlate responses
-        const auto id = req->getID();
-        instance_->tag_by_id.emplace(id, tag);
-
-        instance_->memIface_->send(req);
-    });
-#endif
 
     // Load the kernel image
     if (!sim_->init(kernel)) {
@@ -86,22 +33,9 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
         out.fatal(CALL_INFO, -1, "VortexSimulator init failed\n");
     }
     else{
-        std::cout << "VortexGPGPU: loaded kernel: " << kernel << std::endl;
+        std::cout << "VortexGPGPU Component: loaded kernel: " << kernel << std::endl;
     }
 
-    // needed when launch descriptor is used
-    /*
-    if (!sim_->allocateMemory(launch_bytes, 64, true, true, &launch_desc_addr_)) {
-        SST::Output out;
-        out.fatal(CALL_INFO, -1,
-                  "VortexGPGPU: unable to allocate launch descriptor (%u bytes)\n",
-                  launch_bytes);
-    }
-    std::vector<uint8_t> launch_payload(launch_bytes, 0);
-    sim_->writeMemory(launch_desc_addr_, launch_payload.data(), launch_payload.size());
-    sim_->setStartupArg(launch_desc_addr_);
-    */
-
     registerAsPrimaryComponent();
     primaryComponentDoNotEndSim();
 }
@@ -111,33 +45,13 @@ VortexGPGPU::~VortexGPGPU() = default;
 void VortexGPGPU::setup() {}
 void VortexGPGPU::finish() {}
 
+// Advance the GPU execution one cycle based on SST clock handler callback
 bool VortexGPGPU::clockTick(SST::Cycle_t cycle) {
-    // Advance the GPU one cycle
-    //std::cout << "VortexGPGPU: clockTick came from SST " << std::endl;
     bool running = sim_->cycle();
-    //std::cout << "VortexGPGPU cycle returned: " << running << std::endl;
     if (!running) {
         primaryComponentOKToEndSim();
-        std::cout << "VortexGPGPU: simulation finished\n";
+        std::cout << "VortexGPGPU Component: simulation finished\n";
         return true;
     }
-    //std::cout << "VortexGPGPU clockTick returns false " << std::endl;
     return false;
 }
-
-#ifdef USE_SST_MEM
-void VortexGPGPU::handleMemResp(StandardMem::Request *req) {
-    // Inform SimX that this request has completed
-    const auto id = req->getID();
-    const auto it = tag_by_id.find(id);
-    if (it == tag_by_id.end()) {
-        SST::Output out;
-        out.fatal(CALL_INFO, -1, "VortexGPGPU: received response with unknown ID %lu\n", id);
-    }
-    else{
-        vx_on_mem_complete(it->second);
-        tag_by_id.erase(it);
-    }
-    delete req;
-}
-#endif
diff --git a/sim/simx/VortexGPGPU.h b/sim/simx/VortexGPGPU.h
index 61acbaa40f..1421bd0c10 100644
--- a/sim/simx/VortexGPGPU.h
+++ b/sim/simx/VortexGPGPU.h
@@ -1,7 +1,6 @@
 // VortexGPGPU.h
 #pragma once
 #include <sst/core/component.h>
-//#include <sst/core/interfaces/stdMem.h>
 #include <memory>
 #include <string>
 #include "vortex_simulator.h"  // wrapper around SimX
@@ -21,18 +20,19 @@ class VortexGPGPU : public SST::Component {
     // Register with SST
     SST_ELI_REGISTER_COMPONENT(
         VortexGPGPU,
-        "vortex",           // element library name
-        "VortexGPGPU",      // component name
+        "vortex",           // Element library name
+        "VortexGPGPU",      // Component name
         SST_ELI_ELEMENT_VERSION(1,0,0),
         "Vortex GPGPU Simulator",
         COMPONENT_CATEGORY_PROCESSOR
     )
+
     SST_ELI_DOCUMENT_PARAMS(
         {"clock", "Clock frequency", "1GHz"},
-        {"program", "Path to the kernel or ELF to load (defaults to built-in test image)", ""},
-        {"launch_bytes", "Size in bytes of the default launch descriptor", "64"}
+        {"program", "Path to the kernel or ELF to load", ""},
     )
 
+    // for future usage with SST memory
     SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS(
         {"memIface", "StandardMem interface to memory hierarchy", "SST::Interfaces::StandardMem"}
     )
@@ -42,20 +42,6 @@ class VortexGPGPU : public SST::Component {
     bool clockTick(SST::Cycle_t cycle);
 
     std::unique_ptr<vortex::VortexSimulator> sim_;
-
-    //uint64_t launch_desc_addr_ = 0; // required only when launch descriptor is required
-
-    #ifdef USE_SST_MEM
-    void handleMemResp(SST::Interfaces::StandardMem::Request* req);
-    
-    // static pointer used by lambda in vx_register_submit()
-    static VortexGPGPU* instance_;
-
-    SST::Interfaces::StandardMem* memIface_;
-    std::unordered_map<SST::Interfaces::StandardMem::Request::id_t, uint64_t> tag_by_id;
-    //#else
-    SST::Interfaces::StandardMem* memIface_ = nullptr;
-    #endif
 };
 
 } // namespace Vortex
diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp
index acded6dd65..114b275d03 100644
--- a/sim/simx/processor.cpp
+++ b/sim/simx/processor.cpp
@@ -159,6 +159,7 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const {
   return perf;
 }
 
+// Advance the simulation by one cycle for SST - code adapted from run() method
 bool ProcessorImpl::cycle() {
   if (!is_cycle_initialized_) {
     std::cout << "ProcessorImpl: Initializing cycle()\n";
@@ -166,7 +167,7 @@ bool ProcessorImpl::cycle() {
     this->reset();
     is_cycle_initialized_ = true;
   }
-  //std::cout << "ProcessorImpl: cycle()" << std::endl;
+
   SimPlatform::instance().tick();
   bool anyRunning = false;
   for (auto cluster : clusters_) {
@@ -176,7 +177,6 @@ bool ProcessorImpl::cycle() {
     }
   }
   perf_mem_latency_ += perf_mem_pending_reads_;
-  //std::cout << "ProcessorImpl: cycle() - returns: " << anyRunning << std::endl;
   return anyRunning;
 }
 
@@ -217,9 +217,9 @@ void Processor::dcr_write(uint32_t addr, uint32_t value) {
   return impl_->dcr_write(addr, value);
 }
 
+// advance the simulation by one cycle for SST
 bool Processor::cycle() {
   try {
-    //std::cout << "Processor: cycle()" << std::endl;
     return impl_->cycle();
   } catch (...) {
     return false;
diff --git a/sim/simx/vortex_simulator.cpp b/sim/simx/vortex_simulator.cpp
index 5ace3cede5..f339f434d8 100644
--- a/sim/simx/vortex_simulator.cpp
+++ b/sim/simx/vortex_simulator.cpp
@@ -18,33 +18,11 @@ VortexSimulator::VortexSimulator()
 : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES)
 , ram_(0, MEM_PAGE_SIZE)
 , proc_(std::make_unique<Processor>(arch_))
-// , kernel_image_{}
-// , next_alloc_addr_(kAllocBaseAddr)
 , halted_(true) {}
 
 bool VortexSimulator::init(const std::string& kernelPath) {
     proc_->attach_ram(&ram_);
 
-    // kernel_image_ = {};
-    // next_alloc_addr_ = kAllocBaseAddr;
-    // ram_.clear();
-    // ram_.set_acl(0, kGlobalMemSize, 0);
-
-    // can be used when launch descriptor is required
-    /* bool has_kernel = false;
-    if (!kernelPath.empty()) {
-        auto image_info = this->loadKernelImage(kernelPath);
-        if (!image_info)
-            return false;
-        kernel_image_ = *image_info;
-        has_kernel = true;
-    } */
-
-    // Program base DCRs - align startup to loaded kernel when provided
-    /* uint64_t startup = STARTUP_ADDR;
-    if (has_kernel)
-        startup = kernel_image_.base_addr; */
-
     // setup base DCRs
     const uint64_t startup_addr(STARTUP_ADDR);
     proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR0, startup_addr & 0xffffffff);
@@ -74,11 +52,9 @@ bool VortexSimulator::init(const std::string& kernelPath) {
 
 bool VortexSimulator::cycle() {
 if (halted_) return false;
-//std::cout << "VortexSimulator: cycle()" << std::endl;
 // Advance one cycle through the processor interface
 bool running = proc_->cycle(); 
 halted_ = !running;
-//std::cout << "VortexSimulator: cycle() returns " << running << std::endl;
 return running;
 }
 
@@ -86,184 +62,4 @@ bool VortexSimulator::isHalted() const {
     return halted_;
 }
 
-// Required when using launch descriptor and SST memory
-/* bool VortexSimulator::allocateMemory(uint64_t size, uint64_t alignment, bool readable, bool writable, uint64_t* addr_out) {
-    if (addr_out == nullptr || size == 0)
-        return false;
-
-    alignment = normalizeAlignment(alignment);
-    uint64_t base = alignUp(next_alloc_addr_, alignment);
-    uint64_t end = base + size;
-    if (end > kGlobalMemSize)
-        return false;
-
-    uint64_t acl_start = alignDown(base, RAM_PAGE_SIZE);
-    uint64_t acl_end = alignUp(end, RAM_PAGE_SIZE);
-    if (acl_end > kGlobalMemSize)
-        return false;
-
-    int flags = 0;
-    if (readable) flags |= 0x1;
-    if (writable) flags |= 0x2;
-    if (flags != 0)
-        ram_.set_acl(acl_start, acl_end - acl_start, flags);
-
-    *addr_out = base;
-    next_alloc_addr_ = std::max(next_alloc_addr_, acl_end);
-    return true;
-}
-
-bool VortexSimulator::reserveMemory(uint64_t addr, uint64_t size, bool readable, bool writable) {
-    if (size == 0)
-        return false;
-
-    uint64_t acl_start = alignDown(addr, RAM_PAGE_SIZE);
-    uint64_t acl_end = alignUp(addr + size, RAM_PAGE_SIZE);
-    if (acl_end > kGlobalMemSize)
-        return false;
-
-    int flags = 0;
-    if (readable) flags |= 0x1;
-    if (writable) flags |= 0x2;
-    ram_.set_acl(acl_start, acl_end - acl_start, flags);
-
-    if (acl_end > next_alloc_addr_)
-        next_alloc_addr_ = acl_end;
-    return true;
-}
-
-void VortexSimulator::setMemoryPermissions(uint64_t addr, uint64_t size, bool readable, bool writable) {
-    if (size == 0)
-        return;
-    uint64_t acl_start = alignDown(addr, RAM_PAGE_SIZE);
-    uint64_t acl_end = alignUp(addr + size, RAM_PAGE_SIZE);
-    int flags = 0;
-    if (readable) flags |= 0x1;
-    if (writable) flags |= 0x2;
-    ram_.set_acl(acl_start, acl_end - acl_start, flags);
-}
-
-void VortexSimulator::writeMemory(uint64_t addr, const void* data, uint64_t size) {
-    if (data == nullptr || size == 0)
-        return;
-    ram_.write(data, addr, size);
-}
-
-void VortexSimulator::setStartupArg(uint64_t arg_addr) {
-    proc_->dcr_write(VX_DCR_BASE_STARTUP_ARG0, static_cast<uint32_t>(arg_addr & 0xffffffffu));
-#if (XLEN == 64)
-    proc_->dcr_write(VX_DCR_BASE_STARTUP_ARG1, static_cast<uint32_t>(arg_addr >> 32));
-#endif
-}
-
-std::optional<KernelImageInfo> VortexSimulator::loadKernelImage(const std::string& path) {
-    KernelImageInfo info{};
-
-    if (path.empty())
-        return info;
-
-    const auto ext = getFileExt(path);
-    if (ext == "bin") {
-        std::ifstream ifs(path, std::ios::binary);
-        if (!ifs)
-            return std::nullopt;
-
-        ifs.seekg(0, std::ios::end);
-        const uint64_t size = static_cast<uint64_t>(ifs.tellg());
-        ifs.seekg(0, std::ios::beg);
-        std::vector<uint8_t> payload(size);
-        if (size && !ifs.read(reinterpret_cast<char*>(payload.data()), size))
-            return std::nullopt;
-
-        if (!reserveMemory(STARTUP_ADDR, size, true, true))
-            return std::nullopt;
-        writeMemory(STARTUP_ADDR, payload.data(), size);
-        setMemoryPermissions(STARTUP_ADDR, size, true, false);
-
-        info.base_addr = STARTUP_ADDR;
-        info.size_bytes = size;
-        return info;
-    }
-
-    if (ext == "hex") {
-        ram_.loadHexImage(path.c_str());
-        info.base_addr = STARTUP_ADDR;
-        info.size_bytes = 0;
-        return info;
-    }
-
-    if (ext == "vxbin") {
-        std::ifstream ifs(path, std::ios::binary);
-        if (!ifs)
-            return std::nullopt;
-
-        uint64_t min_vma = 0;
-        uint64_t max_vma = 0;
-
-        ifs.read(reinterpret_cast<char*>(&min_vma), sizeof(uint64_t));
-        ifs.read(reinterpret_cast<char*>(&max_vma), sizeof(uint64_t));
-        if (!ifs || max_vma < min_vma)
-            return std::nullopt;
-
-        constexpr size_t header_bytes = sizeof(uint64_t) * 2;
-        ifs.seekg(0, std::ios::end);
-        const size_t file_size = static_cast<size_t>(ifs.tellg());
-        if (file_size < header_bytes)
-            return std::nullopt;
-
-        const uint64_t payload_size = static_cast<uint64_t>(file_size - header_bytes);
-        const uint64_t image_span   = max_vma - min_vma;
-        if (image_span == 0)
-            return std::nullopt;
-        ifs.seekg(header_bytes, std::ios::beg);
-
-        std::vector<uint8_t> payload(payload_size);
-        if (payload_size && !ifs.read(reinterpret_cast<char*>(payload.data()), payload_size))
-            return std::nullopt;
-
-        if (!reserveMemory(min_vma, image_span, true, true))
-            return std::nullopt;
-        if (payload_size)
-            writeMemory(min_vma, payload.data(), payload_size);
-        if (image_span > payload_size) {
-            std::vector<uint8_t> zeros(static_cast<size_t>(image_span - payload_size), 0);
-            writeMemory(min_vma + payload_size, zeros.data(), zeros.size());
-        }
-        setMemoryPermissions(min_vma, image_span, true, false);
-
-        info.base_addr = min_vma;
-        info.size_bytes = image_span;
-        return info;
-    }
-
-    return std::nullopt;
-}
-
-uint64_t VortexSimulator::alignUp(uint64_t value, uint64_t alignment) {
-    return (value + alignment - 1) & ~(alignment - 1);
-}
-
-uint64_t VortexSimulator::alignDown(uint64_t value, uint64_t alignment) {
-    return value & ~(alignment - 1);
-}
-
-uint64_t VortexSimulator::normalizeAlignment(uint64_t alignment) {
-    if (alignment == 0)
-        alignment = kDefaultAlignment;
-    if (alignment < kDefaultAlignment)
-        alignment = kDefaultAlignment;
-    if ((alignment & (alignment - 1)) == 0)
-        return alignment;
-
-    alignment--;
-    alignment |= alignment >> 1;
-    alignment |= alignment >> 2;
-    alignment |= alignment >> 4;
-    alignment |= alignment >> 8;
-    alignment |= alignment >> 16;
-    alignment |= alignment >> 32;
-    alignment++;
-    return alignment;
-} */
-
 } // namespace vortex
diff --git a/sim/simx/vortex_simulator.h b/sim/simx/vortex_simulator.h
index b0c890f593..028b2cc795 100644
--- a/sim/simx/vortex_simulator.h
+++ b/sim/simx/vortex_simulator.h
@@ -27,18 +27,6 @@ class VortexSimulator {
      */
     bool init(const std::string& kernelPath);
 
-    // changes to substitute for run-time wrt memory setup - required when using launch descriptor and SST memory
-/*     const KernelImageInfo& kernelImage() const { return kernel_image_; }
-    bool allocateMemory(uint64_t size, uint64_t alignment, bool readable, bool writable, uint64_t* addr_out);
-    bool reserveMemory(uint64_t addr, uint64_t size, bool readable, bool writable);
-    void setMemoryPermissions(uint64_t addr, uint64_t size, bool readable, bool writable);
-    void writeMemory(uint64_t addr, const void* data, uint64_t size);
-
-    RAM& ram() { return ram_; }
-    const RAM& ram() const { return ram_; }
-
-    void setStartupArg(uint64_t arg_addr); */
-
     /**
      * Advances the simulation by one cycle.  Returns false once the
      * simulation has completed (i.e. all clusters are halted).
@@ -49,18 +37,6 @@ class VortexSimulator {
     bool isHalted() const;
 
 private:
-    // required when using launch descriptor and SST memory
-    /* static constexpr uint64_t kGlobalMemSize = (XLEN == 64) ? 0x200000000ull : 0x100000000ull;
-    static constexpr uint64_t kAllocBaseAddr = USER_BASE_ADDR;
-    static constexpr uint64_t kDefaultAlignment = 64ull;
-
-    static uint64_t alignUp(uint64_t value, uint64_t alignment);
-    static uint64_t alignDown(uint64_t value, uint64_t alignment);
-    static uint64_t normalizeAlignment(uint64_t alignment);
-
-    std::optional<KernelImageInfo> loadKernelImage(const std::string& path); 
-    KernelImageInfo kernel_image_;
-    uint64_t next_alloc_addr_;*/
 
     Arch arch_;
     RAM ram_;

From edf3c0870f23a2a3eec29a52c8243b1d1b7e6806 Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Fri, 14 Nov 2025 08:18:53 -0500
Subject: [PATCH 07/15] addressing Saurabh's review comments

---
 sim/simx/Makefile        | 7 +++----
 sim/simx/VortexGPGPU.cpp | 5 +----
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/sim/simx/Makefile b/sim/simx/Makefile
index e2a7f65721..480d14745e 100644
--- a/sim/simx/Makefile
+++ b/sim/simx/Makefile
@@ -72,6 +72,9 @@ MAIN_OBJ    := $(OBJ_DIR)/main.o
 
 DEPS := $(OBJS:.o=.d) $(MAIN_OBJ:.o=.d)
 
+# generate .d files alongside .o files
+CXXFLAGS += -MMD -MP -MF $(@:.o=.d)
+
 ifeq ($(USE_SST), 1)
 VORTEX_SST_OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(VORTEX_SST_SRCS))
 DEPS += $(VORTEX_SST_OBJS:.o=.d)
@@ -103,11 +106,9 @@ $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp $(CONFIG_FILE)
 	$(CXX) $(CXXFLAGS) -c $< -o $@
 
 # build SST-specific source object files
-ifeq ($(USE_SST), 1)
 $(VORTEX_SST_OBJS): $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp $(CONFIG_FILE)
 	@mkdir -p $(@D)
 	$(CXX) $(CXXFLAGS) $(SST_CFLAGS) -c $< -o $@
-endif
 
 # build main object file
 $(MAIN_OBJ): $(SRC_DIR)/main.cpp $(CONFIG_FILE)
@@ -123,7 +124,6 @@ $(DESTDIR)/lib$(PROJECT).so: $(OBJS)
 	$(CXX) $(CXXFLAGS) $^ -shared $(LDFLAGS) -o $@
 
 # Vortex SST simulator component shared library
-ifeq ($(USE_SST), 1)
 libvortex: $(DESTDIR)/$(VORTEX_LIB)
 
 $(DESTDIR)/$(VORTEX_LIB): $(OBJS) $(VORTEX_SST_OBJS)
@@ -132,7 +132,6 @@ $(DESTDIR)/$(VORTEX_LIB): $(OBJS) $(VORTEX_SST_OBJS)
 	$(OBJS) $(VORTEX_SST_SRCS) \
 	-shared -o $@ \
 	$(LDFLAGS) $(SST_LFLAGS)
-endif
 
 # updates the timestamp when flags changed.
 $(CONFIG_FILE): force
diff --git a/sim/simx/VortexGPGPU.cpp b/sim/simx/VortexGPGPU.cpp
index 84583ca6ce..eff4af33d8 100644
--- a/sim/simx/VortexGPGPU.cpp
+++ b/sim/simx/VortexGPGPU.cpp
@@ -18,10 +18,7 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params &params)
     std::string clockfreq = params.find<std::string>("clock", "1GHz");
 
     // Parameter: program path
-    std::string kernel = params.find<std::string>("program", "/nethome/jsubburayan3/vortex/build/tests/kernel/hello/hello.bin");
-
-    //const uint32_t launch_bytes = params.find<uint32_t>("launch_bytes", kDefaultLaunchBytes); // required when launch descriptor is used
-
+    std::string kernel = params.find<std::string>("program", "");
 
     // Register our clock handler with SST
     registerClock(clockfreq,

From a538f61e32dec120b032e67e23b40b08eda34cc4 Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Wed, 3 Dec 2025 03:40:45 -0500
Subject: [PATCH 08/15] CI changes

---
 .github/workflows/ci.yml        |  2 +
 ci/regression.sh.in             | 17 +++++++
 ci/sst_install.sh.in            | 85 +++++++++++++++++++++++++++++++++
 ci/sst_test_vortex_conform.py   |  7 +++
 ci/sst_test_vortex_fibonacci.py |  7 +++
 ci/sst_test_vortex_hello.py     |  7 +++
 ci/sst_test_vortex_vecadd.py    |  7 +++
 sim/simx/Makefile               |  6 +--
 8 files changed, 135 insertions(+), 3 deletions(-)
 create mode 100755 ci/sst_install.sh.in
 create mode 100644 ci/sst_test_vortex_conform.py
 create mode 100644 ci/sst_test_vortex_fibonacci.py
 create mode 100644 ci/sst_test_vortex_hello.py
 create mode 100644 ci/sst_test_vortex_vecadd.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d4ba58a14b..3033e5113d 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -59,6 +59,7 @@ jobs:
           cd build
           ../configure --tooldir=$TOOLDIR
           ci/toolchain_install.sh --all
+          ci/sst_install.sh
 
       - name: Setup Third Party
         if: steps.cache-thirdparty.outputs.cache-hit != 'true'
@@ -167,6 +168,7 @@ jobs:
             ./ci/regression.sh --isa
             ./ci/regression.sh --kernel
             ./ci/regression.sh --regression
+            ./ci/regression.sh --sst
           else
             ./ci/regression.sh --${{ matrix.name }}
           fi
diff --git a/ci/regression.sh.in b/ci/regression.sh.in
index 90a46b83d1..4d28f8516d 100755
--- a/ci/regression.sh.in
+++ b/ci/regression.sh.in
@@ -78,6 +78,23 @@ kernel()
     echo "kernel tests done!"
 }
 
+sst()
+{
+    echo "begin sst tests..."
+
+    make -C sim/simx USE_SST=1
+    make -C tests/kernel
+
+    cp sim/simx/libvortex.so $SST_ELEMENTS_HOME/lib/sst-elements-library/   # alternatively - $ sst --add-lib-path `pwd` myConfig.py
+
+    sst test_vortex_hello.py
+    sst test_vortex_fibonacci.py
+    sst test_vortex_vecadd.py
+    sst test_vortex.py
+
+    echo "sst tests done!"
+}
+
 regression()
 {
     echo "begin regression tests..."
diff --git a/ci/sst_install.sh.in b/ci/sst_install.sh.in
new file mode 100755
index 0000000000..f6c581d0f4
--- /dev/null
+++ b/ci/sst_install.sh.in
@@ -0,0 +1,85 @@
+#!/bin/bash
+
+# Copyright © 2019-2023
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# exit when any command fails
+set -e
+
+OPENMPI_416=https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.6.tar.gz  # version dependency
+SST_CORE_1510=https://github.com/sstsimulator/sst-core/releases/download/v15.1.0_Final/sstcore-15.1.0.tar.gz
+SST_ELEMENTS_1510=https://github.com/sstsimulator/sst-elements/releases/download/v15.1.0_Final/sstelements-15.1.0.tar.gz
+TOOLDIR=${TOOLDIR:=@TOOLDIR@}
+
+DEBIAN_FRONTEND=noninteractive sudo apt install openmpi-bin openmpi-common libtool libtool-bin autoconf python3 python3-dev automake build-essential git
+wget $OPENMPI_416
+tar -xvzf openmpi-4.1.6.tar.gz
+mkdir -p $TOOLDIR && rm -rf $TOOLDIR/openmpi-4.1.6 && mv openmpi-4.1.6 $TOOLDIR
+rm -rf openmpi-4.1.6.tar.gz
+
+mkdir -p $TOOLDIR/openmpi_install
+cd $TOOLDIR/openmpi-4.1.6
+
+export MPIHOME=$TOOLDIR/openmpi_install
+./configure --prefix=$MPIHOME
+make all install
+
+export PATH=$MPIHOME/bin:$PATH
+export MPICC=mpicc
+export MPICXX=mpicxx
+
+echo 'export PATH='"$MPIHOME"'/bin:$PATH' >> ~/.bashrc
+echo 'export MPICC=mpicc' >> ~/.bashrc
+echo 'export MPICXX=mpicxx' >> ~/.bashrc
+
+cd $TOOLDIR
+wget $SST_CORE_1510
+tar -xvzf sstcore-15.1.0.tar.gz
+rm sstcore-15.1.0.tar.gz
+cd sst-core
+
+mkdir -p $TOOLDIR/sst-install/sst-core
+export SST_CORE_HOME=$TOOLDIR/sst-install/sst-core
+export SST_CORE_ROOT=$TOOLDIR/sst-core
+echo 'export SST_CORE_HOME='"$SST_CORE_HOME" >> ~/.bashrc
+echo 'export SST_CORE_ROOT='"$SST_CORE_ROOT" >> ~/.bashrc
+
+autoreconf -fi
+./configure --prefix=$SST_CORE_HOME
+make -j$(nproc) all
+make install
+
+export PATH=$SST_CORE_HOME/bin:$PATH
+echo 'export PATH='"$SST_CORE_HOME"'/bin:$PATH' >> ~/.bashrc
+
+cd $TOOLDIR
+wget $SST_ELEMENTS_1510
+tar -xvzf sstelements-15.1.0.tar.gz
+rm sstelements-15.1.0.tar.gz
+cd sst-elements
+
+mkdir -p $TOOLDIR/sst-install/sst-elements
+export SST_ELEMENTS_HOME=$TOOLDIR/sst-install/sst-elements
+export SST_ELEMENTS_ROOT=$TOOLDIR/sst-elements
+echo 'export SST_ELEMENTS_HOME='"$SST_ELEMENTS_HOME" >> ~/.bashrc
+echo 'export SST_ELEMENTS_ROOT='"$SST_ELEMENTS_ROOT" >> ~/.bashrc
+
+./configure --prefix=$SST_ELEMENTS_HOME --with-sst-core=$SST_CORE_HOME
+make -j2 all
+make install
+
+export PATH=$SST_ELEMENTS_HOME/bin:$PATH
+echo 'export PATH='"$SST_ELEMENTS_HOME"'/bin:$PATH' >> ~/.bashrc
+
+
+
diff --git a/ci/sst_test_vortex_conform.py b/ci/sst_test_vortex_conform.py
new file mode 100644
index 0000000000..bc341530dd
--- /dev/null
+++ b/ci/sst_test_vortex_conform.py
@@ -0,0 +1,7 @@
+import sst
+
+gpu = sst.Component("gpu0", "vortex.VortexGPGPU")
+gpu.addParams({
+    "clock": "1GHz",
+    "program": "../build/tests/kernel/conform/conform.bin"
+})
diff --git a/ci/sst_test_vortex_fibonacci.py b/ci/sst_test_vortex_fibonacci.py
new file mode 100644
index 0000000000..53da409fca
--- /dev/null
+++ b/ci/sst_test_vortex_fibonacci.py
@@ -0,0 +1,7 @@
+import sst
+
+gpu = sst.Component("gpu0", "vortex.VortexGPGPU")
+gpu.addParams({
+    "clock": "1GHz",
+    "program": "../build/tests/kernel/fibonacci/fibonacci.bin"
+})
diff --git a/ci/sst_test_vortex_hello.py b/ci/sst_test_vortex_hello.py
new file mode 100644
index 0000000000..e1c8fcea81
--- /dev/null
+++ b/ci/sst_test_vortex_hello.py
@@ -0,0 +1,7 @@
+import sst
+
+gpu = sst.Component("gpu0", "vortex.VortexGPGPU")
+gpu.addParams({
+    "clock": "1GHz",
+    "program": "../build/tests/kernel/hello/hello.bin"
+})
diff --git a/ci/sst_test_vortex_vecadd.py b/ci/sst_test_vortex_vecadd.py
new file mode 100644
index 0000000000..32d8536749
--- /dev/null
+++ b/ci/sst_test_vortex_vecadd.py
@@ -0,0 +1,7 @@
+import sst
+
+gpu = sst.Component("gpu0", "vortex.VortexGPGPU")
+gpu.addParams({
+    "clock": "1GHz",
+    "program": "../build/tests/kernel/vecadd/vecadd.bin"
+})
diff --git a/sim/simx/Makefile b/sim/simx/Makefile
index 480d14745e..6ed58b9aa0 100644
--- a/sim/simx/Makefile
+++ b/sim/simx/Makefile
@@ -2,7 +2,7 @@ include ../common.mk
 
 DESTDIR ?= $(CURDIR)
 USE_SST ?= 0
-SST_PKG ?= SST-14.1 # default SST package name
+#SST_PKG ?= SST-14.1 # default SST package name
 
 OBJ_DIR = $(DESTDIR)/obj
 CONFIG_FILE = $(DESTDIR)/simx_config.stamp
@@ -32,8 +32,8 @@ SRCS += $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp
 ifeq ($(USE_SST), 1)
 VORTEX_SST_SRCS := $(SRC_DIR)/vortex_simulator.cpp
 VORTEX_SST_SRCS += $(SRC_DIR)/VortexGPGPU.cpp
-SST_CFLAGS := $(shell pkg-config --cflags $(SST_PKG))
-SST_LFLAGS := $(shell pkg-config --libs $(SST_PKG))
+SST_CFLAGS := $(shell sst-config --ELEMENT_CXXFLAGS)
+SST_LFLAGS := $(shell sst-config --ELEMENT_LDFLAGS)
 CXXFLAGS += $(SST_CFLAGS) -DUSE_SST
 endif
 

From c50ff1a9c3f7ce5983887e870f4e21ee728c149c Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Wed, 3 Dec 2025 12:39:29 -0500
Subject: [PATCH 09/15] regression update for sst run

---
 ci/regression.sh.in | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/ci/regression.sh.in b/ci/regression.sh.in
index 4d28f8516d..9965acb5f3 100755
--- a/ci/regression.sh.in
+++ b/ci/regression.sh.in
@@ -522,7 +522,7 @@ cupbop() {
 show_usage()
 {
     echo "Vortex Regression Test"
-    echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--tensor] [--cupbop] [--all] [--h|--help]"
+    echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--tensor] [--cupbop] [--sst] [--all] [--h|--help]"
 }
 
 declare -a tests=()
@@ -581,6 +581,9 @@ while [ "$1" != "" ]; do
         --cupbop )
                 tests+=("cupbop")
                 ;;
+        --sst )
+                tests+=("sst")
+                ;;
         --all )
                 tests=()
                 tests+=("unittest")

From c754ec958e710ba65d9570675b27e3b2c68bb161 Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Wed, 3 Dec 2025 12:53:37 -0500
Subject: [PATCH 10/15] SST test update

---
 .github/workflows/ci.yml | 2 +-
 ci/regression.sh.in      | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 3033e5113d..d29cb67482 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -168,7 +168,7 @@ jobs:
             ./ci/regression.sh --isa
             ./ci/regression.sh --kernel
             ./ci/regression.sh --regression
-            ./ci/regression.sh --sst
+            ./ci/regression.sh --sst_tests
           else
             ./ci/regression.sh --${{ matrix.name }}
           fi
diff --git a/ci/regression.sh.in b/ci/regression.sh.in
index 9965acb5f3..ebfc48f559 100755
--- a/ci/regression.sh.in
+++ b/ci/regression.sh.in
@@ -78,7 +78,7 @@ kernel()
     echo "kernel tests done!"
 }
 
-sst()
+sst_tests()
 {
     echo "begin sst tests..."
 
@@ -522,7 +522,7 @@ cupbop() {
 show_usage()
 {
     echo "Vortex Regression Test"
-    echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--tensor] [--cupbop] [--sst] [--all] [--h|--help]"
+    echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--tensor] [--cupbop] [--sst_tests] [--all] [--h|--help]"
 }
 
 declare -a tests=()
@@ -581,8 +581,8 @@ while [ "$1" != "" ]; do
         --cupbop )
                 tests+=("cupbop")
                 ;;
-        --sst )
-                tests+=("sst")
+        --sst_tests )
+                tests+=("sst_tests")
                 ;;
         --all )
                 tests=()

From 0a450aba608e38006b4f2b0956b7edcf01f24e9c Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Wed, 3 Dec 2025 13:17:54 -0500
Subject: [PATCH 11/15] sst config path update

---
 ci/regression.sh.in | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ci/regression.sh.in b/ci/regression.sh.in
index ebfc48f559..00366435ed 100755
--- a/ci/regression.sh.in
+++ b/ci/regression.sh.in
@@ -87,10 +87,10 @@ sst_tests()
 
     cp sim/simx/libvortex.so $SST_ELEMENTS_HOME/lib/sst-elements-library/   # alternatively - $ sst --add-lib-path `pwd` myConfig.py
 
-    sst test_vortex_hello.py
-    sst test_vortex_fibonacci.py
-    sst test_vortex_vecadd.py
-    sst test_vortex.py
+    sst ci/sst_test_vortex_hello.py
+    sst ci/sst_test_vortex_fibonacci.py
+    sst ci/sst_test_vortex_vecadd.py
+    sst ci/sst_test_vortex_conform.py
 
     echo "sst tests done!"
 }

From cdfcd0b64345b0961ebf4d683f11e0d3798b1516 Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Mon, 8 Dec 2025 17:04:20 -0500
Subject: [PATCH 12/15] remove open mpi, sst core & elements sources after
 installation to save space

---
 ci/sst_install.sh.in | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/ci/sst_install.sh.in b/ci/sst_install.sh.in
index f6c581d0f4..f3c3c9c040 100755
--- a/ci/sst_install.sh.in
+++ b/ci/sst_install.sh.in
@@ -42,6 +42,8 @@ echo 'export PATH='"$MPIHOME"'/bin:$PATH' >> ~/.bashrc
 echo 'export MPICC=mpicc' >> ~/.bashrc
 echo 'export MPICXX=mpicxx' >> ~/.bashrc
 
+rm -r $TOOLDIR/openmpi-4.1.6
+
 cd $TOOLDIR
 wget $SST_CORE_1510
 tar -xvzf sstcore-15.1.0.tar.gz
@@ -52,13 +54,15 @@ mkdir -p $TOOLDIR/sst-install/sst-core
 export SST_CORE_HOME=$TOOLDIR/sst-install/sst-core
 export SST_CORE_ROOT=$TOOLDIR/sst-core
 echo 'export SST_CORE_HOME='"$SST_CORE_HOME" >> ~/.bashrc
-echo 'export SST_CORE_ROOT='"$SST_CORE_ROOT" >> ~/.bashrc
+#echo 'export SST_CORE_ROOT='"$SST_CORE_ROOT" >> ~/.bashrc
 
 autoreconf -fi
 ./configure --prefix=$SST_CORE_HOME
 make -j$(nproc) all
 make install
 
+rm -r $SST_CORE_ROOT
+
 export PATH=$SST_CORE_HOME/bin:$PATH
 echo 'export PATH='"$SST_CORE_HOME"'/bin:$PATH' >> ~/.bashrc
 
@@ -72,12 +76,14 @@ mkdir -p $TOOLDIR/sst-install/sst-elements
 export SST_ELEMENTS_HOME=$TOOLDIR/sst-install/sst-elements
 export SST_ELEMENTS_ROOT=$TOOLDIR/sst-elements
 echo 'export SST_ELEMENTS_HOME='"$SST_ELEMENTS_HOME" >> ~/.bashrc
-echo 'export SST_ELEMENTS_ROOT='"$SST_ELEMENTS_ROOT" >> ~/.bashrc
+#echo 'export SST_ELEMENTS_ROOT='"$SST_ELEMENTS_ROOT" >> ~/.bashrc
 
 ./configure --prefix=$SST_ELEMENTS_HOME --with-sst-core=$SST_CORE_HOME
 make -j2 all
 make install
 
+rm -r $SST_ELEMENTS_ROOT
+
 export PATH=$SST_ELEMENTS_HOME/bin:$PATH
 echo 'export PATH='"$SST_ELEMENTS_HOME"'/bin:$PATH' >> ~/.bashrc
 

From a31c1115e71b2394435aa2d3617c09c876b194fd Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Mon, 8 Dec 2025 20:34:34 -0500
Subject: [PATCH 13/15] CI fix: update GITHUB_PATH

---
 ci/sst_install.sh.in | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/ci/sst_install.sh.in b/ci/sst_install.sh.in
index f3c3c9c040..8c001a94bd 100755
--- a/ci/sst_install.sh.in
+++ b/ci/sst_install.sh.in
@@ -87,5 +87,11 @@ rm -r $SST_ELEMENTS_ROOT
 export PATH=$SST_ELEMENTS_HOME/bin:$PATH
 echo 'export PATH='"$SST_ELEMENTS_HOME"'/bin:$PATH' >> ~/.bashrc
 
+if [ -n "$GITHUB_PATH" ]; then
+  echo "$SST_CORE_HOME/bin" >> "$GITHUB_PATH"
+  echo "$SST_ELEMENTS_HOME/bin" >> "$GITHUB_PATH"
+fi
+
+
 
 

From 1511b8a95657e48ae9e0dd9fd10f40e5e4419c5a Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Tue, 9 Dec 2025 14:58:40 -0500
Subject: [PATCH 14/15] Explicitly exporting SST paths in each step

---
 .github/workflows/ci.yml | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index d29cb67482..c8c348b4ba 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -66,6 +66,13 @@ jobs:
         run: |
           make -C third_party > /dev/null
 
+      - name: Export SST paths
+        run: |
+          echo "$PWD/tools/sst-install/sst-core/bin" >> $GITHUB_PATH
+          echo "$PWD/tools/sst-install/sst-elements/bin" >> $GITHUB_PATH
+          echo "SST_CORE_HOME=$PWD/tools/sst-install/sst-core" >> $GITHUB_ENV
+          echo "SST_ELEMENTS_HOME=$PWD/tools/sst-install/sst-elements" >> $GITHUB_ENV
+
   build:
     needs: setup
     strategy:
@@ -100,6 +107,13 @@ jobs:
           restore-keys: |
             ${{ matrix.os }}-thirdparty-
 
+      - name: Export SST paths
+        run: |
+          echo "$PWD/tools/sst-install/sst-core/bin" >> $GITHUB_PATH
+          echo "$PWD/tools/sst-install/sst-elements/bin" >> $GITHUB_PATH
+          echo "SST_CORE_HOME=$PWD/tools/sst-install/sst-core" >> $GITHUB_ENV
+          echo "SST_ELEMENTS_HOME=$PWD/tools/sst-install/sst-elements" >> $GITHUB_ENV
+
       - name: Run Build
         run: |
           TOOLDIR=$PWD/tools
@@ -158,6 +172,13 @@ jobs:
           name: build-${{ matrix.os }}-${{ matrix.xlen }}
           path: build${{ matrix.xlen }}
 
+      - name: Export SST paths
+        run: |
+          echo "$PWD/tools/sst-install/sst-core/bin" >> $GITHUB_PATH
+          echo "$PWD/tools/sst-install/sst-elements/bin" >> $GITHUB_PATH
+          echo "SST_CORE_HOME=$PWD/tools/sst-install/sst-core" >> $GITHUB_ENV
+          echo "SST_ELEMENTS_HOME=$PWD/tools/sst-install/sst-elements" >> $GITHUB_ENV
+
       - name: Run tests
         run: |
           cd build${{ matrix.xlen }}

From 053df0d0e06ff3e48902cabb2e0dc72ad44441d7 Mon Sep 17 00:00:00 2001
From: Jagadheesvaran <jagadheesvaran.t.s@gmail.com>
Date: Tue, 9 Dec 2025 20:34:26 -0500
Subject: [PATCH 15/15] test scripts' relative path fix

---
 ci/sst_test_vortex_conform.py   | 2 +-
 ci/sst_test_vortex_fibonacci.py | 2 +-
 ci/sst_test_vortex_hello.py     | 2 +-
 ci/sst_test_vortex_vecadd.py    | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/ci/sst_test_vortex_conform.py b/ci/sst_test_vortex_conform.py
index bc341530dd..2f2a86535f 100644
--- a/ci/sst_test_vortex_conform.py
+++ b/ci/sst_test_vortex_conform.py
@@ -3,5 +3,5 @@
 gpu = sst.Component("gpu0", "vortex.VortexGPGPU")
 gpu.addParams({
     "clock": "1GHz",
-    "program": "../build/tests/kernel/conform/conform.bin"
+    "program": "tests/kernel/conform/conform.bin"
 })
diff --git a/ci/sst_test_vortex_fibonacci.py b/ci/sst_test_vortex_fibonacci.py
index 53da409fca..a066765c9e 100644
--- a/ci/sst_test_vortex_fibonacci.py
+++ b/ci/sst_test_vortex_fibonacci.py
@@ -3,5 +3,5 @@
 gpu = sst.Component("gpu0", "vortex.VortexGPGPU")
 gpu.addParams({
     "clock": "1GHz",
-    "program": "../build/tests/kernel/fibonacci/fibonacci.bin"
+    "program": "tests/kernel/fibonacci/fibonacci.bin"
 })
diff --git a/ci/sst_test_vortex_hello.py b/ci/sst_test_vortex_hello.py
index e1c8fcea81..21c956639e 100644
--- a/ci/sst_test_vortex_hello.py
+++ b/ci/sst_test_vortex_hello.py
@@ -3,5 +3,5 @@
 gpu = sst.Component("gpu0", "vortex.VortexGPGPU")
 gpu.addParams({
     "clock": "1GHz",
-    "program": "../build/tests/kernel/hello/hello.bin"
+    "program": "tests/kernel/hello/hello.bin"
 })
diff --git a/ci/sst_test_vortex_vecadd.py b/ci/sst_test_vortex_vecadd.py
index 32d8536749..1a50958056 100644
--- a/ci/sst_test_vortex_vecadd.py
+++ b/ci/sst_test_vortex_vecadd.py
@@ -3,5 +3,5 @@
 gpu = sst.Component("gpu0", "vortex.VortexGPGPU")
 gpu.addParams({
     "clock": "1GHz",
-    "program": "../build/tests/kernel/vecadd/vecadd.bin"
+    "program": "tests/kernel/vecadd/vecadd.bin"
 })