From 240d09490d86f85afd981a89ec68b1765f2c4282 Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Fri, 5 Sep 2025 10:13:39 -0400 Subject: [PATCH 01/15] initial changes of sst-simx integ. redes. --- sim/simx/Makefile | 26 ++++- sim/simx/VortexGPGPU.cpp | 89 ++++++++++++++++ sim/simx/VortexGPGPU.h | 32 ++++++ sim/simx/mem_backend.h | 13 +++ sim/simx/mem_backend_dram.cpp | 75 ++++++++++++++ sim/simx/mem_backend_dram.h | 51 ++++++++++ sim/simx/mem_backend_sst.cpp | 75 ++++++++++++++ sim/simx/mem_backend_sst.h | 46 +++++++++ sim/simx/mem_sim.cpp | 186 ++++++++++++++++------------------ sim/simx/vortex_simulator.cpp | 102 +++++++++++++++++++ sim/simx/vortex_simulator.h | 43 ++++++++ 11 files changed, 638 insertions(+), 100 deletions(-) create mode 100644 sim/simx/VortexGPGPU.cpp create mode 100644 sim/simx/VortexGPGPU.h create mode 100644 sim/simx/mem_backend.h create mode 100644 sim/simx/mem_backend_dram.cpp create mode 100644 sim/simx/mem_backend_dram.h create mode 100644 sim/simx/mem_backend_sst.cpp create mode 100644 sim/simx/mem_backend_sst.h create mode 100644 sim/simx/vortex_simulator.cpp create mode 100644 sim/simx/vortex_simulator.h diff --git a/sim/simx/Makefile b/sim/simx/Makefile index 1eca622701..90e18e285d 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -27,6 +27,18 @@ SRCS += $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp SRCS += $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp SRCS += $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp +# SST flags +SST_CFLAGS := $(shell pkg-config --cflags sst-core) +SST_CFLAGS += -I../../../sst/sst/sst-core/include +SST_LFLAGS := $(shell pkg-config --libs sst-core) + +VORTEX_SST_SRCS := \ + $(SRC_DIR)/mem_backend.cpp \ + $(SRC_DIR)/mem_backend_dram.cpp \ + $(SRC_DIR)/mem_backend_sst.cpp \ + $(SRC_DIR)/vortex_simulator.cpp \ + $(SRC_DIR)/VortexGPGPU.cpp + # Add V extension sources ifneq ($(findstring -DEXT_V_ENABLE, $(CONFIGS)),) SRCS += $(SRC_DIR)/voperands.cpp @@ -72,7 +84,14 @@ PROJECT := simx .PHONY: all force clean clean-lib clean-exe clean-obj -all: $(DESTDIR)/$(PROJECT) +all: $(DESTDIR)/$(PROJECT) $(DESTDIR)/libvortex.so + +$(DESTDIR)/libvortex.so: + $(CXX) $(CXXFLAGS) $(SST_CFLAGS) -DUSE_SST_MEM_BACKEND \ + -I./sim/simx \ + $(SRCS) $(VORTEX_SST_SRCS) \ + -shared -o $@ \ + $(LDFLAGS) $(SST_LFLAGS) # build common object files $(OBJ_DIR)/common/%.o: $(SW_COMMON_DIR)/%.cpp $(CONFIG_FILE) @@ -113,10 +132,13 @@ $(CONFIG_FILE): force clean-lib: rm -f $(DESTDIR)/lib$(PROJECT).so +clean-libvortex: + rm -f $(DESTDIR)/libvortex.so + clean-exe: rm -f $(DESTDIR)/$(PROJECT) clean-obj: rm -rf $(OBJ_DIR) -clean: clean-lib clean-exe clean-obj \ No newline at end of file +clean: clean-lib clean-exe clean-obj clean-libvortex \ No newline at end of file diff --git a/sim/simx/VortexGPGPU.cpp b/sim/simx/VortexGPGPU.cpp new file mode 100644 index 0000000000..882d2f98ce --- /dev/null +++ b/sim/simx/VortexGPGPU.cpp @@ -0,0 +1,89 @@ +#include +#include "VortexGPGPU.h" +#include "mem_backend_sst.h" // needed for vx_register_submit and vx_on_mem_complete + +using namespace SST; +using namespace SST::Vortex; +using SST::Interfaces::StandardMem; + +VortexGPGPU *VortexGPGPU::instance_ = nullptr; + +VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) + : Component(id), + sim_(std::make_unique()), + memIface_(nullptr) { + + // Parameter: clock frequency (default 1GHz) + std::string clockfreq = params.find("clock", "1GHz"); + // Parameter: program path + std::string kernel = params.find("program", ""); + + // Create StandardMem interface; auto-bind to port name "memIface" + memIface_ = loadUserSubComponent( + "memIface", ComponentInfo::SHARE_NONE, + registerClock(clockfreq, + new SST::Clock::Handler(this, &VortexGPGPU::clockTick)), + new StandardMem::Handler(this, &VortexGPGPU::handleMemResp)); + + // Register callback so SimX can submit memory to SST + instance_ = this; + vx_register_submit(+[](uint64_t addr, bool write, uint32_t size, uint64_t tag) { + if (write) { + std::vector zero(size, 0); + auto *req = new StandardMem::Write(addr, zero); + req->setDst(tag); + instance_->memIface_->send(req); + } else { + auto *req = new StandardMem::Read(addr, size); + req->setDst(tag); + instance_->memIface_->send(req); + } + }); + + // Load the kernel or ELF + if (!sim_->init(kernel)) { + SST::Output out; + out.fatal(CALL_INFO, -1, "VortexSimulator init failed\n"); + } + + registerAsPrimaryComponent(); + primaryComponentDoNotEndSim(); +} + +VortexGPGPU::~VortexGPGPU() = default; + +void VortexGPGPU::setup() {} +void VortexGPGPU::finish() {} + +bool VortexGPGPU::clockTick(SST::Cycle_t) { + // Advance the GPU one cycle + bool running = sim_->cycle(); + if (!running) { + primaryComponentOKToEndSim(); + return false; + } + return true; +} + +void VortexGPGPU::handleMemResp(StandardMem::Request *req) { + // Inform SimX that this request has completed + vx_on_mem_complete(req->getDst()); + delete req; +} + +// Register with SST +SST_ELI_REGISTER_COMPONENT( + VortexGPGPU, + "vortex", // element library name + "VortexGPGPU", // component name + SST_ELI_ELEMENT_VERSION(1,0,0), + "Headless Vortex GPGPU Simulator", + COMPONENT_CATEGORY_PROCESSOR +) +SST_ELI_DOCUMENT_PARAMS( + {"clock", "Clock frequency", "1GHz"}, + {"program", "Path to the kernel or ELF to load", ""} +) +SST_ELI_DOCUMENT_PORTS( + {"memIface", "StandardMem port to connect to the SST memory hierarchy", {}} +) diff --git a/sim/simx/VortexGPGPU.h b/sim/simx/VortexGPGPU.h new file mode 100644 index 0000000000..feeb538829 --- /dev/null +++ b/sim/simx/VortexGPGPU.h @@ -0,0 +1,32 @@ +// VortexGPGPU.h +#pragma once +#include +#include +#include +#include +#include "vortex_simulator.h" // wrapper around SimX + +namespace SST { +namespace Vortex { + +class VortexGPGPU : public SST::Component { +public: + VortexGPGPU(SST::ComponentId_t id, SST::Params& params); + ~VortexGPGPU() override; + + void setup() override; + void finish() override; + +private: + bool clockTick(SST::Cycle_t cycle); + void handleMemResp(SST::Interfaces::StandardMem::Request* req); + + // static pointer used by lambda in vx_register_submit() + static VortexGPGPU* instance_; + + std::unique_ptr sim_; + SST::Interfaces::StandardMem* memIface_; +}; + +} // namespace Vortex +} // namespace SST diff --git a/sim/simx/mem_backend.h b/sim/simx/mem_backend.h new file mode 100644 index 0000000000..73f63b2ad0 --- /dev/null +++ b/sim/simx/mem_backend.h @@ -0,0 +1,13 @@ +#pragma once +#include + +namespace vortex { +struct IMemBackend { + virtual ~IMemBackend() = default; + virtual void reset() = 0; + virtual void tick() = 0; + virtual void send_request(uint64_t addr, bool write, + uint32_t size, uint32_t tag, + uint32_t cid, uint64_t uuid) = 0; +}; +} // namespace vortex diff --git a/sim/simx/mem_backend_dram.cpp b/sim/simx/mem_backend_dram.cpp new file mode 100644 index 0000000000..8d83a75d2e --- /dev/null +++ b/sim/simx/mem_backend_dram.cpp @@ -0,0 +1,75 @@ +// mem_backend_dram.cpp +#include "mem_backend_dram.h" + +using namespace vortex; + +namespace { +struct CallbackData { + MemBackendDram* backend; + uint64_t tag; +}; +} // anonymous namespace + +MemBackendDram* MemBackendDram::inst_ = nullptr; + +MemBackendDram::MemBackendDram(uint32_t num_banks, uint32_t block_size, float clock_ratio) + : num_banks_(num_banks) + , block_size_(block_size) + , lg2_block_size_(0) + , dram_sim_(num_banks, block_size, clock_ratio) +{ + // Compute log2(block_size_) once; block_size_ is assumed to be a power of two. + uint32_t tmp = block_size_; + while (tmp > 1) { + ++lg2_block_size_; + tmp >>= 1; + } + inst_ = this; +} + +void MemBackendDram::reset() { + inflight_.clear(); + dram_sim_.reset(); +} + +void MemBackendDram::tick() { + // Retire pending transactions in DramSim + dram_sim_.tick(); +} + +void MemBackendDram::dram_complete(void* arg) { + auto* data = static_cast(arg); + MemBackendDram* backend = data->backend; + uint64_t tag = data->tag; + auto it = backend->inflight_.find(tag); + if (it != backend->inflight_.end()) { + const Info& info = it->second; + if (!info.write) { + // Form a MemRsp for reads only + MemRsp rsp{tag, info.cid, info.uuid}; + // Route the response to the recorded bank + uint32_t bank = info.bank; + if (backend->mem_xbar_rsp_cb_) + backend->mem_xbar_rsp_cb_(bank, rsp); + } + backend->inflight_.erase(it); + } + delete data; +} + +void MemBackendDram::send_request(uint64_t addr, bool write, + uint32_t size, uint32_t tag, + uint32_t cid, uint64_t uuid) { + // Compute bank index: (addr >> lg2(block_size)) mod num_banks + uint32_t bank_idx = 0; + if (num_banks_ > 0) + bank_idx = static_cast((addr >> lg2_block_size_) & (num_banks_ - 1)); + inflight_.emplace(tag, Info{cid, uuid, write, bank_idx}); + auto* cb_data = new CallbackData{this, tag}; + // The size is ignored by DramSim because it is configured with block_size_. + dram_sim_.send_request(addr, write, &MemBackendDram::dram_complete, cb_data); +} + +void MemBackendDram::complete(uint64_t tag) { + // Not used; dram_complete() handles completions +} \ No newline at end of file diff --git a/sim/simx/mem_backend_dram.h b/sim/simx/mem_backend_dram.h new file mode 100644 index 0000000000..5a6f6f3d11 --- /dev/null +++ b/sim/simx/mem_backend_dram.h @@ -0,0 +1,51 @@ +// mem_backend_dram.h +#pragma once +#include "mem_backend.h" +#include "dram_sim.h" +#include +#include +#include "types.h" + +namespace vortex { + +class MemBackendDram : public IMemBackend { +public: + static MemBackendDram* instance() { return inst_; } + + // Construct with the same parameters as MemSim::Config: number of banks, + // block size in bytes, and clock ratio. These values are passed to + // the underlying DramSim so that the external memory model matches. + MemBackendDram(uint32_t num_banks, uint32_t block_size, float clock_ratio); + + void reset() override; + void tick() override; + void send_request(uint64_t addr, bool write, + uint32_t size, uint32_t tag, + uint32_t cid, uint64_t uuid) override; + + // Not used directly; completions are handled by dram_complete(). + void complete(uint64_t tag); + + // Set by MemSim to push completed responses back to the correct + // bank in the crossbar. + std::function mem_xbar_rsp_cb_; + +private: + struct Info { + uint32_t cid; + uint64_t uuid; + bool write; + uint32_t bank; // bank index computed from the address + }; + std::unordered_map inflight_; + uint32_t num_banks_; + uint32_t block_size_; + uint32_t lg2_block_size_; + static MemBackendDram* inst_; + DramSim dram_sim_; + + // Static callback invoked by DramSim when a request completes + static void dram_complete(void* arg); +}; + +} // namespace vortex \ No newline at end of file diff --git a/sim/simx/mem_backend_sst.cpp b/sim/simx/mem_backend_sst.cpp new file mode 100644 index 0000000000..3cc9da2588 --- /dev/null +++ b/sim/simx/mem_backend_sst.cpp @@ -0,0 +1,75 @@ +// mem_backend_sst.cpp +// Implementation of the SST-backed memory backend. This backend forwards +// all off-chip memory requests to the SST StandardMem interface via a +// registered callback (vx_submit_fn). It maintains a table of inflight +// transactions keyed by the original request tag so that completions can +// be correlated back to the correct cluster and request. When a read +// completion is observed via vx_on_mem_complete(), the backend pushes a +// MemRsp back into the crossbar using the stored cid/uuid. Writes +// complete silently. Bank routing is currently fixed to bank 0; this +// preserves correctness but may underutilize bank-level parallelism. + +#include "mem_backend_sst.h" + +extern "C" { + +// Register a submit function provided by the SST component. The +// MemBackendSST stores it in a static member so that calls to +// send_request() can forward requests into SST. +void vx_register_submit(vx_submit_fn fn) { + vortex::MemBackendSST::set_vx_submit_fn(fn); +} + +// Notify MemBackendSST that the SST memory system has completed a +// request identified by 'tag'. The backend will produce a MemRsp for +// reads and erase the entry from its inflight table. +void vx_on_mem_complete(uint64_t tag) { + if (auto inst = vortex::MemBackendSST::instance()) + inst->complete(tag); +} + +} // extern "C" + +using namespace vortex; + +// Initialise static pointers +MemBackendSST* MemBackendSST::inst_ = nullptr; +vx_submit_fn MemBackendSST::submit_fn_ = nullptr; + +MemBackendSST::MemBackendSST() { + // Record this instance so the C wrapper can find us + inst_ = this; +} + +void MemBackendSST::reset() { + // Drop all inflight transactions; pending responses are ignored + inflight_.clear(); +} + +void MemBackendSST::send_request(uint64_t addr, bool write, + uint32_t size, uint32_t tag, + uint32_t cid, uint64_t uuid) { + // Save request metadata so we can form a response on completion + inflight_.emplace(tag, Info{cid, uuid, write}); + // Forward the request into SST. The SST wrapper will create a + // StandardMem::Read or ::Write using this address, size and tag. + if (submit_fn_) { + submit_fn_(addr, write, size, tag); + } +} + +void MemBackendSST::complete(uint64_t tag) { + auto it = inflight_.find(tag); + if (it == inflight_.end()) + return; + const Info &info = it->second; + // Only produce a MemRsp for reads; writes complete silently + if (!info.write) { + MemRsp rsp{tag, info.cid, info.uuid}; + // Always route completions to bank 0; adjust if you need per-bank + // completion routing in the future. + if (mem_xbar_rsp_cb_) + mem_xbar_rsp_cb_(0, rsp); + } + inflight_.erase(it); +} diff --git a/sim/simx/mem_backend_sst.h b/sim/simx/mem_backend_sst.h new file mode 100644 index 0000000000..808a52aefd --- /dev/null +++ b/sim/simx/mem_backend_sst.h @@ -0,0 +1,46 @@ +// mem_backend_sst.h +#pragma once +#include "mem_backend.h" +#include +#include +#include "types.h" + +extern "C" { + // Function pointer type for SST to call + typedef void (*vx_submit_fn)(uint64_t addr, bool write, uint32_t size, uint64_t tag); + // SST calls this to register its submit function + void vx_register_submit(vx_submit_fn fn); + // SST calls this when a memory response completes + void vx_on_mem_complete(uint64_t tag); +} + +namespace vortex { + +class MemBackendSST : public IMemBackend { +public: + static MemBackendSST* instance() { return inst_; } + static vx_submit_fn get_vx_submit_fn() { return submit_fn_; } + static void set_vx_submit_fn(vx_submit_fn fn) { submit_fn_ = fn; } + + MemBackendSST(); + void reset() override; + void tick() override {} + void send_request(uint64_t addr, bool write, + uint32_t size, uint32_t tag, + uint32_t cid, uint64_t uuid) override; + + // Called from vx_on_mem_complete + void complete(uint64_t tag); + + // Set by MemSim to push MemRsp back to crossbar + std::function mem_xbar_rsp_cb_; + + +private: + struct Info { uint32_t cid; uint64_t uuid; bool write; }; + std::unordered_map inflight_; + static MemBackendSST* inst_; + static vx_submit_fn submit_fn_; +}; + +} // namespace vortex diff --git a/sim/simx/mem_sim.cpp b/sim/simx/mem_sim.cpp index 740ee008b6..61bf174a86 100644 --- a/sim/simx/mem_sim.cpp +++ b/sim/simx/mem_sim.cpp @@ -1,127 +1,117 @@ -// Copyright © 2019-2023 -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - +// mem_sim.cpp #include "mem_sim.h" #include -#include #include -#include +#include #include "constants.h" #include "types.h" #include "debug.h" +#include "mem_backend.h" +#include "mem_backend_sst.h" +#include "mem_backend_dram.h" using namespace vortex; class MemSim::Impl { private: - MemSim* simobject_; - Config config_; - MemCrossBar::Ptr mem_xbar_; - DramSim dram_sim_; - mutable PerfStats perf_stats_; - struct DramCallbackArgs { - MemSim::Impl* memsim; - MemReq request; - uint32_t bank_id; - }; + MemSim* simobject_; + Config config_; + MemCrossBar::Ptr mem_xbar_; + std::unique_ptr backend_; + mutable PerfStats perf_stats_; public: - Impl(MemSim* simobject, const Config& config) - : simobject_(simobject) - , config_(config) - , dram_sim_(config.num_banks, config.block_size, config.clock_ratio) - { - char sname[100]; - snprintf(sname, 100, "%s-xbar", simobject->name().c_str()); - mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_ports, config.num_banks, - [lg2_block_size = log2ceil(config.block_size), num_banks = config.num_banks](const MemCrossBar::ReqType& req) { - // Custom logic to calculate the output index using bank interleaving - return (uint32_t)((req.addr >> lg2_block_size) & (num_banks-1)); - }); - for (uint32_t i = 0; i < config.num_ports; ++i) { - simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i)); - mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i)); - } - } - - ~Impl() { - //-- - } - - const PerfStats& perf_stats() const { - perf_stats_.bank_stalls = mem_xbar_->collisions(); - return perf_stats_; - } - - void reset() { - dram_sim_.reset(); - } - - void tick() { - dram_sim_.tick(); - - for (uint32_t i = 0; i < config_.num_banks; ++i) { - if (mem_xbar_->ReqOut.at(i).empty()) - continue; - - auto& mem_req = mem_xbar_->ReqOut.at(i).front(); - - // enqueue the request to the memory system - auto req_args = new DramCallbackArgs{this, mem_req, i}; - dram_sim_.send_request( - mem_req.addr, - mem_req.write, - [](void* arg) { - auto rsp_args = reinterpret_cast(arg); - if (!rsp_args->request.write) { - // only send a response for read requests - MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid}; - rsp_args->memsim->mem_xbar_->RspOut.at(rsp_args->bank_id).push(mem_rsp, 1); - DT(3, rsp_args->memsim->simobject_->name() << "-mem-rsp" << rsp_args->bank_id << ": " << mem_rsp); - } - delete rsp_args; - }, - req_args - ); - - DT(3, simobject_->name() << "-mem-req" << i << ": " << mem_req); - mem_xbar_->ReqOut.at(i).pop(); - } - } + Impl(MemSim* simobject, const Config& config) + : simobject_(simobject) + , config_(config) + { + char sname[100]; + snprintf(sname, 100, "%s-xbar", simobject->name().c_str()); + mem_xbar_ = MemCrossBar::Create( + sname, + ArbiterType::RoundRobin, + config.num_ports, + config.num_banks, + [lg2_block_size = log2ceil(config.block_size), num_banks = config.num_banks](const MemCrossBar::ReqType& req) { + // Bank interleaving: choose the output index based on address bits + return static_cast((req.addr >> lg2_block_size) & (num_banks - 1)); + }); + + for (uint32_t i = 0; i < config.num_ports; ++i) { + simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i)); + mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i)); + } + + #ifdef USE_SST_MEM_BACKEND + backend_ = std::make_unique(); + #else + backend_ = std::make_unique(config.num_banks, config.block_size, config.clock_ratio); + #endif + + if (backend_) { + backend_->mem_xbar_rsp_cb_ = [this](uint32_t bank, const MemRsp& rsp) { + // Push the response into the appropriate crossbar output queue + if (bank < mem_xbar_->RspOut.size()) + mem_xbar_->RspOut.at(bank).push(rsp, 1); + }; + } + } + + const PerfStats& perf_stats() const { + perf_stats_.bank_stalls = mem_xbar_->collisions(); + return perf_stats_; + } + + void reset() { + if (backend_) + backend_->reset(); + } + + void tick() { + // Advance the selected memory backend + if (backend_) + backend_->tick(); + + // Drain requests from each bank and send to the backend + for (uint32_t bank = 0; bank < config_.num_banks; ++bank) { + if (mem_xbar_->ReqOut.at(bank).empty()) + continue; + auto& mem_req = mem_xbar_->ReqOut.at(bank).front(); + if (backend_) { + backend_->send_request( + mem_req.addr, + mem_req.write, + config_.block_size, + mem_req.tag, + mem_req.cid, + mem_req.uuid); + } + DT(3, simobject_->name() << "-mem-req" << bank << ": " << mem_req); + mem_xbar_->ReqOut.at(bank).pop(); + } + } }; -/////////////////////////////////////////////////////////////////////////////// - MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config) - : SimObject(ctx, name) - , MemReqPorts(config.num_ports, this) - , MemRspPorts(config.num_ports, this) - , impl_(new Impl(this, config)) + : SimObject(ctx, name) + , MemReqPorts(config.num_ports, this) + , MemRspPorts(config.num_ports, this) + , impl_(new Impl(this, config)) {} MemSim::~MemSim() { - delete impl_; + delete impl_; } void MemSim::reset() { - impl_->reset(); + impl_->reset(); } void MemSim::tick() { - impl_->tick(); + impl_->tick(); } -const MemSim::PerfStats &MemSim::perf_stats() const { - return impl_->perf_stats(); +const MemSim::PerfStats& MemSim::perf_stats() const { + return impl_->perf_stats(); } \ No newline at end of file diff --git a/sim/simx/vortex_simulator.cpp b/sim/simx/vortex_simulator.cpp new file mode 100644 index 0000000000..82b211e82c --- /dev/null +++ b/sim/simx/vortex_simulator.cpp @@ -0,0 +1,102 @@ +#include "vortex_simulator.h" +#include +#include +#include +#include "simobject.h" +#include "dcrs.h" + +namespace vortex { + +// Fallback macro definitions in case they are not provided by VX_config.h +#ifndef NUM_CLUSTERS +#define NUM_CLUSTERS 1 +#endif +#ifndef NUM_CORES +#define NUM_CORES 1 +#endif +#ifndef NUM_WARPS +#define NUM_WARPS 1 +#endif +#ifndef NUM_THREADS +#define NUM_THREADS 1 +#endif +#ifndef RAM_PAGE_SIZE +#define RAM_PAGE_SIZE 4096 +#endif +#ifndef STARTUP_ADDR +#define STARTUP_ADDR 0x0 +#endif + +static std::string getFileExt(const std::string& filename) { + auto pos = filename.find_last_of('.'); + if (pos == std::string::npos) return ""; + return filename.substr(pos + 1); +} + +VortexSimulator::VortexSimulator() : halted_(true) {} + +bool VortexSimulator::init(const std::string& kernelPath) { + // Initialize the architecture from macros or fallbacks + arch_.num_clusters = NUM_CLUSTERS; + arch_.num_cores = NUM_CORES; + arch_.num_warps = NUM_WARPS; + arch_.num_threads = NUM_THREADS; + arch_.global_mem_size = 1ULL << 30; // 1 GiB of global memory + + ram_ = RAM(arch_.global_mem_size, RAM_PAGE_SIZE); + proc_ = std::make_unique(arch_); + proc_->attach_ram(&ram_); + + // Load a kernel binary if provided + if (!kernelPath.empty()) { + std::string ext = getFileExt(kernelPath); + if (ext == "bin") { + std::ifstream in(kernelPath, std::ios::binary); + if (!in.good()) return false; + std::vector data((std::istreambuf_iterator(in)), + std::istreambuf_iterator()); + ram_.loadBinImage(data.data(), data.size(), 0x0); + } else if (ext == "hex") { + std::ifstream in(kernelPath); + if (!in.good()) return false; + std::vector bytes; + std::string byteStr; + while (in >> byteStr) { + uint8_t val = static_cast(std::stoul(byteStr, nullptr, 16)); + bytes.push_back(val); + } + ram_.loadBinImage(bytes.data(), bytes.size(), 0x0); + } else { + return false; + } + } + + // Write start address to DCRs for each cluster + for (uint32_t cid = 0; cid < arch_.num_clusters; ++cid) { + proc_->impl_->dcr_write(cid, DCR_LSU_BASE, STARTUP_ADDR); + proc_->impl_->dcr_write(cid, DCR_HALT, 0); + } + + halted_ = false; + return true; +} + +bool VortexSimulator::cycle() { + if (halted_) return false; + SimPlatform::instance().tick(); + bool anyRunning = false; + for (auto cluster : proc_->impl_->clusters_) { + if (cluster->running()) { + anyRunning = true; + break; + } + } + halted_ = !anyRunning; + return !halted_; +} + +bool VortexSimulator::isHalted() const { + return halted_; +} + +} // namespace vortex \ No newline at end of file diff --git a/sim/simx/vortex_simulator.h b/sim/simx/vortex_simulator.h new file mode 100644 index 0000000000..eef604aca1 --- /dev/null +++ b/sim/simx/vortex_simulator.h @@ -0,0 +1,43 @@ +// vortex_simulator.h +#pragma once + +#include "processor.h" // for Processor, RAM +#include "arch.h" // for Arch +#include +#include + +namespace vortex { + +/** + * A wrapper class used by the SST integration to drive the Vortex GPU + * one cycle at a time. It encapsulates the architecture definition, + * memory subsystem, and processor instance. + */ +class VortexSimulator { +public: + VortexSimulator(); + + /** + * Initializes the simulator. If @p kernelPath is non-empty, the + * kernel image at the given path will be loaded into memory. + * Returns false if the image format is not supported. + */ + bool init(const std::string& kernelPath); + + /** + * Advances the simulation by one cycle. Returns false once the + * simulation has completed (i.e. all clusters are halted). + */ + bool cycle(); + + /** Returns true if the simulation has finished. */ + bool isHalted() const; + +private: + Arch arch_; + RAM ram_; + std::unique_ptr proc_; + bool halted_; +}; + +} // namespace vortex From 251be55b23c4d1a9597b779be30d93a7474ff7f6 Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Fri, 19 Sep 2025 00:34:36 -0400 Subject: [PATCH 02/15] simx changes to compile libvortex.so file --- sim/simx/Makefile | 30 +++++------ sim/simx/VortexGPGPU.cpp | 66 ++++++++++++++---------- sim/simx/VortexGPGPU.h | 19 +++++++ sim/simx/mem_backend.h | 3 ++ sim/simx/mem_backend_sst.cpp | 17 +++---- sim/simx/mem_backend_sst.h | 5 +- sim/simx/obj/common/util.o | Bin 0 -> 18136 bytes sim/simx/processor.cpp | 21 ++++++++ sim/simx/processor.h | 2 + sim/simx/processor_impl.h | 2 + sim/simx/simx_config.stamp | 1 + sim/simx/vortex_simulator.cpp | 92 +++++++++++----------------------- 12 files changed, 138 insertions(+), 120 deletions(-) create mode 100644 sim/simx/obj/common/util.o create mode 100644 sim/simx/simx_config.stamp diff --git a/sim/simx/Makefile b/sim/simx/Makefile index 90e18e285d..6ffd0a2cd9 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -1,5 +1,5 @@ include ../common.mk - +# now you see me 2 DESTDIR ?= $(CURDIR) OBJ_DIR = $(DESTDIR)/obj @@ -25,17 +25,16 @@ SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $( SRCS += $(SRC_DIR)/decode.cpp $(SRC_DIR)/opc_unit.cpp $(SRC_DIR)/dispatcher.cpp SRCS += $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp SRCS += $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp +SRCS += $(SRC_DIR)/mem_backend_sst.cpp \ +SRCS += $(SRC_DIR)/mem_backend_dram.cpp \ SRCS += $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp # SST flags -SST_CFLAGS := $(shell pkg-config --cflags sst-core) -SST_CFLAGS += -I../../../sst/sst/sst-core/include -SST_LFLAGS := $(shell pkg-config --libs sst-core) +SST_CFLAGS := $(shell pkg-config --cflags SST-14.1) +SST_CFLAGS += -I../../../sst/sst/sst-core/include/ +SST_LFLAGS := $(shell pkg-config --libs SST-14.1) VORTEX_SST_SRCS := \ - $(SRC_DIR)/mem_backend.cpp \ - $(SRC_DIR)/mem_backend_dram.cpp \ - $(SRC_DIR)/mem_backend_sst.cpp \ $(SRC_DIR)/vortex_simulator.cpp \ $(SRC_DIR)/VortexGPGPU.cpp @@ -74,9 +73,6 @@ MAIN_OBJ := $(OBJ_DIR)/main.o DEPS := $(OBJS:.o=.d) $(MAIN_OBJ:.o=.d) -# generate .d files alongside .o files -CXXFLAGS += -MMD -MP -MF $(@:.o=.d) - # optional: pipe through ccache if you have it CXX := $(if $(shell which ccache),ccache $(CXX),$(CXX)) @@ -86,13 +82,6 @@ PROJECT := simx all: $(DESTDIR)/$(PROJECT) $(DESTDIR)/libvortex.so -$(DESTDIR)/libvortex.so: - $(CXX) $(CXXFLAGS) $(SST_CFLAGS) -DUSE_SST_MEM_BACKEND \ - -I./sim/simx \ - $(SRCS) $(VORTEX_SST_SRCS) \ - -shared -o $@ \ - $(LDFLAGS) $(SST_LFLAGS) - # build common object files $(OBJ_DIR)/common/%.o: $(SW_COMMON_DIR)/%.cpp $(CONFIG_FILE) @mkdir -p $(@D) @@ -116,6 +105,13 @@ $(DESTDIR)/$(PROJECT): $(OBJS) $(MAIN_OBJ) $(DESTDIR)/lib$(PROJECT).so: $(OBJS) $(CXX) $(CXXFLAGS) $^ -shared $(LDFLAGS) -o $@ +$(DESTDIR)/libvortex.so: $(OBJS) $(SST_OBJS) + $(CXX) $(CXXFLAGS) $(SST_CFLAGS) -DUSE_SST_MEM_BACKEND \ + -I./sim/simx \ + $(OBJS) $(VORTEX_SST_SRCS) \ + -shared -o $@ \ + $(LDFLAGS) $(SST_LFLAGS) + # updates the timestamp when flags changed. $(CONFIG_FILE): force @mkdir -p $(@D) diff --git a/sim/simx/VortexGPGPU.cpp b/sim/simx/VortexGPGPU.cpp index 882d2f98ce..11c221f1c0 100644 --- a/sim/simx/VortexGPGPU.cpp +++ b/sim/simx/VortexGPGPU.cpp @@ -1,6 +1,9 @@ #include #include "VortexGPGPU.h" #include "mem_backend_sst.h" // needed for vx_register_submit and vx_on_mem_complete +#include +#include +#include using namespace SST; using namespace SST::Vortex; @@ -25,21 +28,40 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) new SST::Clock::Handler(this, &VortexGPGPU::clockTick)), new StandardMem::Handler(this, &VortexGPGPU::handleMemResp)); + if (!memIface_) { + SST::Output out; + out.fatal(CALL_INFO, -1, "VortexGPGPU: failed to load memIface StandardMem port\n"); + } + // Register callback so SimX can submit memory to SST instance_ = this; - vx_register_submit(+[](uint64_t addr, bool write, uint32_t size, uint64_t tag) { + // Track app-specific tags by StandardMem request-id + // (e.g., inside your instance_ type) + + vx_register_submit(+[](uint64_t addr, bool write, uint32_t size, uint64_t tag) { + + StandardMem::Request* req = nullptr; + if (write) { - std::vector zero(size, 0); - auto *req = new StandardMem::Write(addr, zero); - req->setDst(tag); - instance_->memIface_->send(req); + std::vector zeros(static_cast(size), 0); + // posted=false so we get a WriteResp + req = new StandardMem::Write(static_cast(addr), + static_cast(size), + std::move(zeros), + /*posted=*/false); } else { - auto *req = new StandardMem::Read(addr, size); - req->setDst(tag); - instance_->memIface_->send(req); + req = new StandardMem::Read(static_cast(addr), + static_cast(size)); } + + // Use the StandardMem-assigned ID to correlate responses + const auto id = req->getID(); + instance_->tag_by_id.emplace(id, tag); + + instance_->memIface_->send(req); }); + // Load the kernel or ELF if (!sim_->init(kernel)) { SST::Output out; @@ -67,23 +89,15 @@ bool VortexGPGPU::clockTick(SST::Cycle_t) { void VortexGPGPU::handleMemResp(StandardMem::Request *req) { // Inform SimX that this request has completed - vx_on_mem_complete(req->getDst()); + const auto id = req->getID(); + const auto it = tag_by_id.find(id); + if (it == tag_by_id.end()) { + SST::Output out; + out.fatal(CALL_INFO, -1, "VortexGPGPU: received response with unknown ID %lu\n", id); + } + else{ + vx_on_mem_complete(it->second); + tag_by_id.erase(it); + } delete req; } - -// Register with SST -SST_ELI_REGISTER_COMPONENT( - VortexGPGPU, - "vortex", // element library name - "VortexGPGPU", // component name - SST_ELI_ELEMENT_VERSION(1,0,0), - "Headless Vortex GPGPU Simulator", - COMPONENT_CATEGORY_PROCESSOR -) -SST_ELI_DOCUMENT_PARAMS( - {"clock", "Clock frequency", "1GHz"}, - {"program", "Path to the kernel or ELF to load", ""} -) -SST_ELI_DOCUMENT_PORTS( - {"memIface", "StandardMem port to connect to the SST memory hierarchy", {}} -) diff --git a/sim/simx/VortexGPGPU.h b/sim/simx/VortexGPGPU.h index feeb538829..664124fc5e 100644 --- a/sim/simx/VortexGPGPU.h +++ b/sim/simx/VortexGPGPU.h @@ -5,6 +5,7 @@ #include #include #include "vortex_simulator.h" // wrapper around SimX +#include namespace SST { namespace Vortex { @@ -17,6 +18,23 @@ class VortexGPGPU : public SST::Component { void setup() override; void finish() override; + // Register with SST + SST_ELI_REGISTER_COMPONENT( + VortexGPGPU, + "vortex", // element library name + "VortexGPGPU", // component name + SST_ELI_ELEMENT_VERSION(1,0,0), + "Headless Vortex GPGPU Simulator", + COMPONENT_CATEGORY_PROCESSOR + ) + SST_ELI_DOCUMENT_PARAMS( + {"clock", "Clock frequency", "1GHz"}, + {"program", "Path to the kernel or ELF to load", ""} + ) + SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( + {"memIface", "StandardMem interface to memory hierarchy", "SST::Interfaces::StandardMem"} + ) + private: bool clockTick(SST::Cycle_t cycle); void handleMemResp(SST::Interfaces::StandardMem::Request* req); @@ -26,6 +44,7 @@ class VortexGPGPU : public SST::Component { std::unique_ptr sim_; SST::Interfaces::StandardMem* memIface_; + std::unordered_map tag_by_id; }; } // namespace Vortex diff --git a/sim/simx/mem_backend.h b/sim/simx/mem_backend.h index 73f63b2ad0..d93e39d503 100644 --- a/sim/simx/mem_backend.h +++ b/sim/simx/mem_backend.h @@ -1,11 +1,14 @@ #pragma once #include +#include +#include "types.h" namespace vortex { struct IMemBackend { virtual ~IMemBackend() = default; virtual void reset() = 0; virtual void tick() = 0; + std::function mem_xbar_rsp_cb_; virtual void send_request(uint64_t addr, bool write, uint32_t size, uint32_t tag, uint32_t cid, uint64_t uuid) = 0; diff --git a/sim/simx/mem_backend_sst.cpp b/sim/simx/mem_backend_sst.cpp index 3cc9da2588..a06014aab1 100644 --- a/sim/simx/mem_backend_sst.cpp +++ b/sim/simx/mem_backend_sst.cpp @@ -6,10 +6,10 @@ // be correlated back to the correct cluster and request. When a read // completion is observed via vx_on_mem_complete(), the backend pushes a // MemRsp back into the crossbar using the stored cid/uuid. Writes -// complete silently. Bank routing is currently fixed to bank 0; this -// preserves correctness but may underutilize bank-level parallelism. +// complete silently. #include "mem_backend_sst.h" +#include extern "C" { @@ -49,10 +49,9 @@ void MemBackendSST::reset() { void MemBackendSST::send_request(uint64_t addr, bool write, uint32_t size, uint32_t tag, uint32_t cid, uint64_t uuid) { - // Save request metadata so we can form a response on completion - inflight_.emplace(tag, Info{cid, uuid, write}); - // Forward the request into SST. The SST wrapper will create a - // StandardMem::Read or ::Write using this address, size and tag. + uint32_t lg2_block = log2ceil(size); + uint32_t bank = (addr >> lg2_block) & (PLATFORM_MEMORY_NUM_BANKS - 1); + inflight_.emplace(tag, Info{cid, uuid, write, bank}); if (submit_fn_) { submit_fn_(addr, write, size, tag); } @@ -66,10 +65,8 @@ void MemBackendSST::complete(uint64_t tag) { // Only produce a MemRsp for reads; writes complete silently if (!info.write) { MemRsp rsp{tag, info.cid, info.uuid}; - // Always route completions to bank 0; adjust if you need per-bank - // completion routing in the future. if (mem_xbar_rsp_cb_) - mem_xbar_rsp_cb_(0, rsp); - } + mem_xbar_rsp_cb_(info.bank, rsp); + } inflight_.erase(it); } diff --git a/sim/simx/mem_backend_sst.h b/sim/simx/mem_backend_sst.h index 808a52aefd..678dec0b81 100644 --- a/sim/simx/mem_backend_sst.h +++ b/sim/simx/mem_backend_sst.h @@ -32,12 +32,9 @@ class MemBackendSST : public IMemBackend { // Called from vx_on_mem_complete void complete(uint64_t tag); - // Set by MemSim to push MemRsp back to crossbar - std::function mem_xbar_rsp_cb_; - private: - struct Info { uint32_t cid; uint64_t uuid; bool write; }; + struct Info { uint32_t cid; uint64_t uuid; bool write; uint32_t bank;}; std::unordered_map inflight_; static MemBackendSST* inst_; static vx_submit_fn submit_fn_; diff --git a/sim/simx/obj/common/util.o b/sim/simx/obj/common/util.o new file mode 100644 index 0000000000000000000000000000000000000000..6e8785424bf8ab0f48eb0b6ff4ced8860499c1df GIT binary patch literal 18136 zcmeHOeQ;aVm4C9VL`jIG@U?dM5+I5LRF(2+!%}3&N}h-UZtQ@!5EMn0KNj2a%5vgB zD517PR7H^8$!^)VZeUu)*yYc=zKG)G?db(n?fv+B@e=9>fV@VGhlTyB;* zcE~%r6<}<^>+yUb5F7c(yQ$eMT!4mSXp#5(QDghecp(q~6wjyJO?#`)Hp~JTbeQK# zc0(=Z$R6Jn>l&|o+r(e8kruqJFOvg*Gf?IOR|C{M}(LDX2nQu7^?cGI3`})9oPvvv)CK($Mf$q(~bsi0a18d$|(rl z!iUU#qk(R}dq)FXMVkhgMR`Y~1&aZ*NN|2HAvJ>EBUK`8Ul%%R$re`$ALA$bkn(_Pg^C2Ph01*EL?% zm_RUY#Sp_RO(&L@UOBN~a&mH~SL==dS$p6Hq{kXspgfixc zxEwDk^W5unYB51&AYxu}&Cx)+?gr_4-7d7dX=r!pJ|d!MK;P$oEM2j%9I4?prRR=n zp)bY_lP{u}=#M)NBc*W#1k*%S1S?-5HN~iS{tZlDF&Hu)af302 zDqxoydkq9ln&hr$BA0{@iS9#i2e5H@M;*|dqG&y(3dlb|+9A`H!ZBVDc~vP9ey%gW zD-eNJvBIT3a==(&Qw^Do!(LDzLj+)EFc5C8Y|R zVbWZn1<8<76e3P^#R@C^fFSiPp?Wi-tPnY979Xu90NqFjGig4Fd!3{z^?szLV3%jj%^o&^q z><-)lNwJYnyxtopp)gtw@0iPpqk#AO)q(9;KjMOfS5L$yaVg#%AoUPh&tYc|p?9}p z|Ekic2Hi^AErGgt{uOzH`8f_~9M9 z9-i~adFTXVm*XBO5RJk;#-yK>K(g+~K-g224s(ZE*JGNY1E(~?n%l1x)+kt-}_ zg+=2=NA|9lvV_&jvS=&W5@_cuq|_D;NzRD=I_a9)D}`6b1H&ahCl`m$N|$L4n6w48 zQ11bnnd+78lJ5yI)UEVfkI5?0$ggH`CAQF+w7xZFA%tzwZ93jL_}+VylgiQJ1+|IN z6a?4L&Q!7D3UI}uVxgogr0;{Jn*vw?_yGW|Rhg2KC`{omWzxcxw9DTn(`h;UN?BFS ztCS&W@1z}}w9|RV7SeWtDY~)c(bh73zbkC=k33Z^be(wq!^ug#Kdr_|cWeUF77;M< z!AI29Ot-s*gVFx971@;br=3*i-;d|vbCh6V$NT;pcKiW$Nh%ka4bTid-cg#yBUlKy zqp)r{rDGj%Bi5l1QnuR>ct()6zz#bC+o_Fuoa%zxY^)t-1|A7gj_+*-VHitirzbwwZrg8b+!^XQE;wPNEPc zrqCGCVinnzKq#L7MBVHbhEw~dJiVAmE@Ehb_RaUjzL~=6l=sa$VG8VC% z`>6C2b5n#!k5Z@0iTj1iH-j7enKlH9PfClWCo#E!kULU z==>f>P>3#&(BO$DKA4=8&s*<$GZCM6fs{Lu{#fHY?sk1bs>LZO!F}=$k~B`bTmTmk z$~Tdl3kumQCXpv!8x8CyO6sE}flcV1ippQubBG$w+vp~8Vn}W>PIYP3{?9C=4B07* z`2TxeDIN}DQK{&ND(!g_4Z=O+X);mt~br%@^-UfUv4fQgWAjcj0(Nt zwe@nboL2~w5-%!@A70CRENK+KsOE=8`J^SCpm?3)7gdQc|K?-e;I~e2#{+CE|7*Fv z>inpmX=(Y@s{s9I&7Z4jZ`NrV?542LIa7j^*uiW1kuO%<c z!b_qq|EOu`{Y!C?+jAj;4EGhe_tLf^&V{=w?4~8zAuYSSaTMjoAtaV;qii*?0jC*# zWO?o7QtRK#52D!Q^^@?F^Be8{uiEwUeB-y(u0PB-el^qe!hGZ4OxI868++WYAI>-aVUFtq z^v-ep<9x?Wb6ueS&$)zu!{eH$HSX}Zep72a+~@j9t?`<#;k)yVH|Dt>u65jX zoa???W83jAAP=43`u2QBv6e>NQ%g9uR;kxK$E*Lm#j6gM&pFxIZ%}2RqeQDjo}#f zIZirerh6wuhJc@}`o^hl^7wXCHG68dIh#HHk?NJ6P}NnlJ^n^dO{2%xKQ^ zCQNnI;~S~Ea<<34a*pY78v#HAs-O5%|2E<|;=IJ;zf5py1f36EbljjKJRfxpu6qeU zG3h5xyf=Yb_U$JOSiz)s@AL#q zrLs(n9+|mq#tv7p`X=Yi6f?92i)jD~=`VCZO}dTaqA~_Ue7bJUQnzR3IQ!%=88tR$B(&aQ}S`hi&ql)n# zXB19@ykvTD8ndQ@K%C+W%JeR0$s4EkISubr0C&TP%oNN=}|3U-qVHQhedLNB*9`xvtnUTcxyQMlDVjuW`}v5X)) zA3F^rz&TB-By&>svJ`q!p?qO2oi_Nt*x=u_!Jo3hUjlwCG#yuhMZQD(V;g!GR3~~p zwU8QjCh$u7pQGqQ;73L0!4)?2tAL-u{MnSljO{k`du;G0ZSW6l@Y$GBCAljFPT5cv z?eC+&$qo@UFUq-8?nZrjRoN2SBgcW4QAta`zp%kSu)!PPn3ec^%?5wW27lTHKLs0X zCH_lo@at{xzq7%=V>7Q8ZRk%%>a3*CIvczfIIYV7=0}D08!CHj=-;xzPey94B==$) z{3aXx2^)MC9HSC{(+0oE2LI3oKMPwzCH|M&;8`2|AshS!8~i03+=CDLmGtr3;2|6Q zDjVDw%JuXHQv(A=;;Wam=9VQAso~*pIJ}TQIL1<~xo|kuoy;b3*{FqVq8Ch-#u zFXWGuu@*oZ+Y+tI60Ny~q0UTpQ!?9;=*gwC$y^45lU>1{!E7=ZB56xEX0o~Ta5$99 zB)ZeX^pM)LL`aYRbug9b?chYqvgyH0@5XeZv!^$m7)a*2qia?jg~CzjCYcB0%R>f! z!t}F{ewNVBVv8Y`a6@bu>a@2-^OERBBe61mX;Wi7ap{T`t20`3XOflJ;ZVn3# zj!yocHaH|De_AXU&6fwW>CRvx(KS4r7)WOaGyPbA+~&ka5l5}L`ku^S0)Coau1(pR zYmk16o=P4|^KWb86IS#Y`Kk!NFkYpRGVYcAK10ajp8lR(w59TzgcCxEDXV@|N~lF# zE*BBPmL?K34NM@>(?6Kb<|@r2x(1Ysq>&VL4DlLDL|d08T2{3#N{Hxb%}^LBHhocy z%CsG2JcmQzluZxxCR1s|6#|f47Hd#izGk_6fIe>?CMA=Dg9u%u#n#1tiiRCMg9Dkt z^t6chd}|}4Vr~DXp8k%+n)KjMUz#SX{ru0F&V?&uF&8V3@3ePb>2XI+i^E8%}@8#D;WNPk$nn%%!?}Pm!@B(aO-^6+u(x?=lbtr{OcJ1TNypK&%KPEWNUr)FnYd@FEILg zMt>qr2*oGMxL>&lPU)B6l6q|b6(TCNlC8>kS@=Q~^Bq-QfOO+Sy}d|kf9 z_;7uGVS^uLe4>p1nRq`zh2(Pl5*vIo!?|DGp>UcPkGpR(`m@oe=M~3$3o1mv0++`B zis5`-4>Eiuqko0rbqxQ5!maai&jJyO6+g)a4>FwVzeeGtKbPCfaFU_t^#tR?*YQklWdLO&-n9o{~4p_>;4YIdAuCw0aFy3FFzMGFr1%@HZh#r=LLn6j@%B186H8O z)_;x{Hz-zn)+wCy;r3a=aBiP94CnUQ#Q1aj>|*rXJ`XXR+vh)Q@Ru2Xp0_`;!M#2) z-D!F642JXkwnE|7I9kteKHtX}pAbZ8JO7H&^Z9wmRdYr*w*siXzJSk1rBg10Grvjx9W;bRv328I8v1^*|7 z-)_MlRQNwz@W<7>9<<=+D*7K=@I4CuxdmUN`25m>k171WEcmmE&)XLKE=8}`pM0Fh z#d)fqmYK&XJ+n|DdcI#b3sm|o^=W_j-{!CFVbxR1CHw-Wk4hUNzW$Qo+TV%(LWZA? zHWk9p#ii#(&puRWt~?K1EKo_$^8nYA^VjiV)pIlB!|kcpndn#J()#RB^j3ZJIukwD z=Uzom_$9bBA03y3b3Gqs^p`UF#~9wia2?0Q=PL|Pd#rV)(lZ4>DZ)q1IFHJM)x$!sjshE{1bH zI~YEj(f>WeiO(sj?{0>Z4E??TK86!L9 zF1f)7;*G7WyLBs@?o9^qB{*jUJCeDi5!^61Xautvx*_s#uzu<-KG~MW1jWnQ()W@h zfAcKfkN!FM&?h{`XQ;oo=jwFtW+T{@!l&8cRC*wXFW(z_(~3$=3;GAe$H(BOeci*C z>b|~oKMaGv(@qER2b^CNA9&S$SN~9OAdAll*&M_Plcc*7o!KNlwgkJfnV|t~{iCk` z27aNo`YAjj>h5?|(|$)6Y8(9;{o-2MY=n2>6orjbYDPC@~MxPwa4O5?w-#*=+`(c^SY>qPAtDy}u{ooc+P0aaV=PkV*1zh(S> z+N4m{i(lzGlsC2h+WwS>E6IOL$=7*Dv6=R_GT%f;CHZyqk`-lGR>J=%Us>h<4w#Uy z=ybJ$UjCz0Tjr(hCVLIzC8EI<(SWM&RPyN`x2cf-kttP z;$=1l(|Dc7b)TOe9#OPBc{lyug~65V7ox)iie4wupVnUU)A&*)KSac=)G7HCAH0yR zRCIlj8ZXiEZ<`ulMr2Em)A*$T=UE*mSKdi>running()) { + anyRunning = true; + break; + } + } + perf_mem_latency_ += perf_mem_pending_reads_; + return anyRunning; +} + /////////////////////////////////////////////////////////////////////////////// Processor::Processor(const Arch& arch) @@ -196,6 +209,14 @@ void Processor::dcr_write(uint32_t addr, uint32_t value) { return impl_->dcr_write(addr, value); } +bool Processor::cycle() { + try { + return impl_->cycle(); + } catch (...) { + return false; + } +} + #ifdef VM_ENABLE int16_t Processor::set_satp_by_addr(uint64_t base_addr) { uint16_t asid = 0; diff --git a/sim/simx/processor.h b/sim/simx/processor.h index 741b04f57d..4bb3f23fc6 100644 --- a/sim/simx/processor.h +++ b/sim/simx/processor.h @@ -35,6 +35,8 @@ class Processor { int run(); + bool cycle(); + void dcr_write(uint32_t addr, uint32_t value); #ifdef VM_ENABLE bool is_satp_unset(); diff --git a/sim/simx/processor_impl.h b/sim/simx/processor_impl.h index 952b28222f..7b4537677e 100644 --- a/sim/simx/processor_impl.h +++ b/sim/simx/processor_impl.h @@ -38,6 +38,8 @@ class ProcessorImpl { int run(); + bool cycle(); + void dcr_write(uint32_t addr, uint32_t value); #ifdef VM_ENABLE diff --git a/sim/simx/simx_config.stamp b/sim/simx/simx_config.stamp new file mode 100644 index 0000000000..1eb1208835 --- /dev/null +++ b/sim/simx/simx_config.stamp @@ -0,0 +1 @@ +-std=c++17 -Wall -Wextra -Wfatal-errors -fPIC -Wno-maybe-uninitialized -I/nethome/jsubburayan3/vortex/sim/simx -I/nethome/jsubburayan3/vortex/sim/common -I/export/nethomes/jsubburayan3/vortex/hw -I/nethome/jsubburayan3/vortex/third_party/softfloat/source/include -I/nethome/jsubburayan3/vortex/third_party/ramulator/ext/spdlog/include -I/nethome/jsubburayan3/vortex/third_party/ramulator/ext/yaml-cpp/include -I/nethome/jsubburayan3/vortex/third_party/ramulator/src -DXLEN_64 -O2 -DNDEBUG diff --git a/sim/simx/vortex_simulator.cpp b/sim/simx/vortex_simulator.cpp index 82b211e82c..2e64d5215e 100644 --- a/sim/simx/vortex_simulator.cpp +++ b/sim/simx/vortex_simulator.cpp @@ -4,99 +4,65 @@ #include #include "simobject.h" #include "dcrs.h" +#include +#include namespace vortex { -// Fallback macro definitions in case they are not provided by VX_config.h -#ifndef NUM_CLUSTERS -#define NUM_CLUSTERS 1 -#endif -#ifndef NUM_CORES -#define NUM_CORES 1 -#endif -#ifndef NUM_WARPS -#define NUM_WARPS 1 -#endif -#ifndef NUM_THREADS -#define NUM_THREADS 1 -#endif -#ifndef RAM_PAGE_SIZE -#define RAM_PAGE_SIZE 4096 -#endif -#ifndef STARTUP_ADDR -#define STARTUP_ADDR 0x0 -#endif - +// Utility to extract file extension static std::string getFileExt(const std::string& filename) { auto pos = filename.find_last_of('.'); if (pos == std::string::npos) return ""; return filename.substr(pos + 1); } -VortexSimulator::VortexSimulator() : halted_(true) {} +VortexSimulator::VortexSimulator() +: arch_(NUM_THREADS, NUM_WARPS, NUM_CORES) +, ram_(0, MEM_PAGE_SIZE) +, proc_(std::make_unique(arch_)) +, halted_(true) {} bool VortexSimulator::init(const std::string& kernelPath) { - // Initialize the architecture from macros or fallbacks - arch_.num_clusters = NUM_CLUSTERS; - arch_.num_cores = NUM_CORES; - arch_.num_warps = NUM_WARPS; - arch_.num_threads = NUM_THREADS; - arch_.global_mem_size = 1ULL << 30; // 1 GiB of global memory - - ram_ = RAM(arch_.global_mem_size, RAM_PAGE_SIZE); - proc_ = std::make_unique(arch_); proc_->attach_ram(&ram_); - // Load a kernel binary if provided + // Load the kernel image if provided + // Load the kernel image if provided if (!kernelPath.empty()) { std::string ext = getFileExt(kernelPath); if (ext == "bin") { - std::ifstream in(kernelPath, std::ios::binary); - if (!in.good()) return false; - std::vector data((std::istreambuf_iterator(in)), - std::istreambuf_iterator()); - ram_.loadBinImage(data.data(), data.size(), 0x0); + // Load raw binary at STARTUP_ADDR + ram_.loadBinImage(kernelPath.c_str(), STARTUP_ADDR); } else if (ext == "hex") { - std::ifstream in(kernelPath); - if (!in.good()) return false; - std::vector bytes; - std::string byteStr; - while (in >> byteStr) { - uint8_t val = static_cast(std::stoul(byteStr, nullptr, 16)); - bytes.push_back(val); - } - ram_.loadBinImage(bytes.data(), bytes.size(), 0x0); + // Load Intel-hex + ram_.loadHexImage(kernelPath.c_str()); } else { - return false; + return false; // unsupported format } } - // Write start address to DCRs for each cluster - for (uint32_t cid = 0; cid < arch_.num_clusters; ++cid) { - proc_->impl_->dcr_write(cid, DCR_LSU_BASE, STARTUP_ADDR); - proc_->impl_->dcr_write(cid, DCR_HALT, 0); - } + // Program base DCRs (match main.cpp behavior) + const uint64_t startup = STARTUP_ADDR; + proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR0, startup & 0xffffffffu); + + #if (XLEN == 64) + proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR1, startup >> 32); + #endif + proc_->dcr_write(VX_DCR_BASE_MPM_CLASS, 0); halted_ = false; return true; } bool VortexSimulator::cycle() { - if (halted_) return false; - SimPlatform::instance().tick(); - bool anyRunning = false; - for (auto cluster : proc_->impl_->clusters_) { - if (cluster->running()) { - anyRunning = true; - break; - } - } - halted_ = !anyRunning; - return !halted_; +if (halted_) return false; +// Advance one cycle through the processor interface +bool running = proc_->cycle(); +halted_ = !running; +return running; } bool VortexSimulator::isHalted() const { return halted_; } -} // namespace vortex \ No newline at end of file +} // namespace vortex From 258e3e7cf13e056ffebc22be746994841756469c Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Fri, 24 Oct 2025 00:24:22 -0400 Subject: [PATCH 03/15] latest changes - not working --- sim/simx/Makefile | 21 +++- sim/simx/VortexGPGPU.cpp | 45 ++++++- sim/simx/VortexGPGPU.h | 12 +- sim/simx/emulator.cpp | 3 +- sim/simx/mem_backend_dram.cpp | 1 + sim/simx/mem_backend_sst.cpp | 7 ++ sim/simx/vortex_simulator.cpp | 217 +++++++++++++++++++++++++++++++--- sim/simx/vortex_simulator.h | 32 +++++ 8 files changed, 315 insertions(+), 23 deletions(-) diff --git a/sim/simx/Makefile b/sim/simx/Makefile index 6ffd0a2cd9..4aa769f379 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -6,6 +6,9 @@ OBJ_DIR = $(DESTDIR)/obj CONFIG_FILE = $(DESTDIR)/simx_config.stamp SRC_DIR = $(VORTEX_HOME)/sim/simx +# SST StandardMem bridge (default off) +SST_USE_STDMEM ?= 0 + CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors CXXFLAGS += -fPIC -Wno-maybe-uninitialized CXXFLAGS += -I$(SRC_DIR) -I$(SW_COMMON_DIR) -I$(ROOT_DIR)/hw @@ -34,9 +37,19 @@ SST_CFLAGS := $(shell pkg-config --cflags SST-14.1) SST_CFLAGS += -I../../../sst/sst/sst-core/include/ SST_LFLAGS := $(shell pkg-config --libs SST-14.1) -VORTEX_SST_SRCS := \ - $(SRC_DIR)/vortex_simulator.cpp \ - $(SRC_DIR)/VortexGPGPU.cpp +ifeq ($(SST_USE_STDMEM),1) + CXXFLAGS += -DUSE_SST_MEM_BACKEND -DVORTEX_SST_ENABLE_STDMEM + LIBVORTEX_SST_DEFS = -DUSE_SST_MEM_BACKEND -DVORTEX_SST_ENABLE_STDMEM + VORTEX_SST_SRCS := \ + $(SRC_DIR)/mem_backend_sst.cpp \ + $(SRC_DIR)/vortex_simulator.cpp \ + $(SRC_DIR)/VortexGPGPU.cpp +else + LIBVORTEX_SST_DEFS = + VORTEX_SST_SRCS := \ + $(SRC_DIR)/vortex_simulator.cpp \ + $(SRC_DIR)/VortexGPGPU.cpp +endif # Add V extension sources ifneq ($(findstring -DEXT_V_ENABLE, $(CONFIGS)),) @@ -106,7 +119,7 @@ $(DESTDIR)/lib$(PROJECT).so: $(OBJS) $(CXX) $(CXXFLAGS) $^ -shared $(LDFLAGS) -o $@ $(DESTDIR)/libvortex.so: $(OBJS) $(SST_OBJS) - $(CXX) $(CXXFLAGS) $(SST_CFLAGS) -DUSE_SST_MEM_BACKEND \ + $(CXX) $(CXXFLAGS) $(SST_CFLAGS) $(LIBVORTEX_SST_DEFS) \ -I./sim/simx \ $(OBJS) $(VORTEX_SST_SRCS) \ -shared -o $@ \ diff --git a/sim/simx/VortexGPGPU.cpp b/sim/simx/VortexGPGPU.cpp index 11c221f1c0..75336e4fd6 100644 --- a/sim/simx/VortexGPGPU.cpp +++ b/sim/simx/VortexGPGPU.cpp @@ -1,6 +1,9 @@ #include #include "VortexGPGPU.h" -#include "mem_backend_sst.h" // needed for vx_register_submit and vx_on_mem_complete +#ifdef VORTEX_SST_ENABLE_STDMEM +#include "mem_backend_sst.h" // needed for vx_register_submit and vx_on_mem_complete +#endif +#include #include #include #include @@ -9,7 +12,14 @@ using namespace SST; using namespace SST::Vortex; using SST::Interfaces::StandardMem; +namespace { +constexpr const char* kDefaultKernelPath = "/nethome/jsubburayan3/vortex/kernel.vxbin"; +constexpr uint32_t kDefaultLaunchBytes = 64; +} + +#ifdef VORTEX_SST_ENABLE_STDMEM VortexGPGPU *VortexGPGPU::instance_ = nullptr; +#endif VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) : Component(id), @@ -18,9 +28,19 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) // Parameter: clock frequency (default 1GHz) std::string clockfreq = params.find("clock", "1GHz"); + // Parameter: program path std::string kernel = params.find("program", ""); + if (kernel.empty()) { + if (const char* env = std::getenv("VORTEX_DEFAULT_KERNEL")) + kernel = env; + else + kernel = kDefaultKernelPath; + } + const uint32_t launch_bytes = params.find("launch_bytes", kDefaultLaunchBytes); + +#ifdef VORTEX_SST_ENABLE_STDMEM // Create StandardMem interface; auto-bind to port name "memIface" memIface_ = loadUserSubComponent( "memIface", ComponentInfo::SHARE_NONE, @@ -32,7 +52,13 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) SST::Output out; out.fatal(CALL_INFO, -1, "VortexGPGPU: failed to load memIface StandardMem port\n"); } +#else + // No SST memory: just register our clock handler + registerClock(clockfreq, + new SST::Clock::Handler(this, &VortexGPGPU::clockTick)); +#endif +#ifdef VORTEX_SST_ENABLE_STDMEM // Register callback so SimX can submit memory to SST instance_ = this; // Track app-specific tags by StandardMem request-id @@ -60,7 +86,7 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) instance_->memIface_->send(req); }); - +#endif // Load the kernel or ELF if (!sim_->init(kernel)) { @@ -68,6 +94,17 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) out.fatal(CALL_INFO, -1, "VortexSimulator init failed\n"); } + // Set up a default launch descriptor if the caller did not supply one + if (!sim_->allocateMemory(launch_bytes, 64, true, true, &launch_desc_addr_)) { + SST::Output out; + out.fatal(CALL_INFO, -1, + "VortexGPGPU: unable to allocate launch descriptor (%u bytes)\n", + launch_bytes); + } + std::vector launch_payload(launch_bytes, 0); + sim_->writeMemory(launch_desc_addr_, launch_payload.data(), launch_payload.size()); + sim_->setStartupArg(launch_desc_addr_); + registerAsPrimaryComponent(); primaryComponentDoNotEndSim(); } @@ -88,6 +125,7 @@ bool VortexGPGPU::clockTick(SST::Cycle_t) { } void VortexGPGPU::handleMemResp(StandardMem::Request *req) { + #ifdef VORTEX_SST_ENABLE_STDMEM // Inform SimX that this request has completed const auto id = req->getID(); const auto it = tag_by_id.find(id); @@ -100,4 +138,7 @@ void VortexGPGPU::handleMemResp(StandardMem::Request *req) { tag_by_id.erase(it); } delete req; + #else + delete req; // should never be called without StandardMem + #endif } diff --git a/sim/simx/VortexGPGPU.h b/sim/simx/VortexGPGPU.h index 664124fc5e..d35eda898d 100644 --- a/sim/simx/VortexGPGPU.h +++ b/sim/simx/VortexGPGPU.h @@ -29,8 +29,10 @@ class VortexGPGPU : public SST::Component { ) SST_ELI_DOCUMENT_PARAMS( {"clock", "Clock frequency", "1GHz"}, - {"program", "Path to the kernel or ELF to load", ""} + {"program", "Path to the kernel or ELF to load (defaults to built-in test image)", ""}, + {"launch_bytes", "Size in bytes of the default launch descriptor", "64"} ) + SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( {"memIface", "StandardMem interface to memory hierarchy", "SST::Interfaces::StandardMem"} ) @@ -39,12 +41,20 @@ class VortexGPGPU : public SST::Component { bool clockTick(SST::Cycle_t cycle); void handleMemResp(SST::Interfaces::StandardMem::Request* req); + #ifdef VORTEX_SST_ENABLE_STDMEM // static pointer used by lambda in vx_register_submit() static VortexGPGPU* instance_; + #endif std::unique_ptr sim_; + #ifdef VORTEX_SST_ENABLE_STDMEM SST::Interfaces::StandardMem* memIface_; std::unordered_map tag_by_id; + #else + SST::Interfaces::StandardMem* memIface_ = nullptr; + #endif + + uint64_t launch_desc_addr_ = 0; }; } // namespace Vortex diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 3eb62f9c76..7371a553d4 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "emulator.h" @@ -131,7 +132,7 @@ void Emulator::reset() { void Emulator::attach_ram(RAM* ram) { // bind RAM to memory unit #if (XLEN == 64) - mmu_.attach(*ram, 0, 0x7FFFFFFFFF); //39bit SV39 + mmu_.attach(*ram, 0, std::numeric_limits::max()); #else mmu_.attach(*ram, 0, 0xFFFFFFFF); #endif diff --git a/sim/simx/mem_backend_dram.cpp b/sim/simx/mem_backend_dram.cpp index 8d83a75d2e..f1cecae661 100644 --- a/sim/simx/mem_backend_dram.cpp +++ b/sim/simx/mem_backend_dram.cpp @@ -51,6 +51,7 @@ void MemBackendDram::dram_complete(void* arg) { uint32_t bank = info.bank; if (backend->mem_xbar_rsp_cb_) backend->mem_xbar_rsp_cb_(bank, rsp); + } backend->inflight_.erase(it); } diff --git a/sim/simx/mem_backend_sst.cpp b/sim/simx/mem_backend_sst.cpp index a06014aab1..3be8227ffd 100644 --- a/sim/simx/mem_backend_sst.cpp +++ b/sim/simx/mem_backend_sst.cpp @@ -12,6 +12,7 @@ #include extern "C" { +#ifdef VORTEX_SST_ENABLE_STDMEM // Register a submit function provided by the SST component. The // MemBackendSST stores it in a static member so that calls to @@ -27,6 +28,12 @@ void vx_on_mem_complete(uint64_t tag) { if (auto inst = vortex::MemBackendSST::instance()) inst->complete(tag); } +#else + +void vx_register_submit(vx_submit_fn) {} +void vx_on_mem_complete(uint64_t) {} + +#endif } // extern "C" diff --git a/sim/simx/vortex_simulator.cpp b/sim/simx/vortex_simulator.cpp index 2e64d5215e..928a029d0f 100644 --- a/sim/simx/vortex_simulator.cpp +++ b/sim/simx/vortex_simulator.cpp @@ -1,7 +1,11 @@ #include "vortex_simulator.h" +#include #include -#include +#include +#include #include +#include +#include #include "simobject.h" #include "dcrs.h" #include @@ -20,28 +24,32 @@ VortexSimulator::VortexSimulator() : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES) , ram_(0, MEM_PAGE_SIZE) , proc_(std::make_unique(arch_)) +, kernel_image_{} +, next_alloc_addr_(kAllocBaseAddr) , halted_(true) {} bool VortexSimulator::init(const std::string& kernelPath) { proc_->attach_ram(&ram_); - // Load the kernel image if provided - // Load the kernel image if provided + kernel_image_ = {}; + next_alloc_addr_ = kAllocBaseAddr; + ram_.clear(); + ram_.set_acl(0, kGlobalMemSize, 0); + + bool has_kernel = false; if (!kernelPath.empty()) { - std::string ext = getFileExt(kernelPath); - if (ext == "bin") { - // Load raw binary at STARTUP_ADDR - ram_.loadBinImage(kernelPath.c_str(), STARTUP_ADDR); - } else if (ext == "hex") { - // Load Intel-hex - ram_.loadHexImage(kernelPath.c_str()); - } else { - return false; // unsupported format - } + auto image_info = this->loadKernelImage(kernelPath); + if (!image_info) + return false; + kernel_image_ = *image_info; + has_kernel = true; } - // Program base DCRs (match main.cpp behavior) - const uint64_t startup = STARTUP_ADDR; + // Program base DCRs - align startup to loaded kernel when provided + uint64_t startup = STARTUP_ADDR; + if (has_kernel) + startup = kernel_image_.base_addr; + proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR0, startup & 0xffffffffu); #if (XLEN == 64) @@ -61,6 +69,185 @@ halted_ = !running; return running; } +bool VortexSimulator::allocateMemory(uint64_t size, uint64_t alignment, bool readable, bool writable, uint64_t* addr_out) { + if (addr_out == nullptr || size == 0) + return false; + + alignment = normalizeAlignment(alignment); + uint64_t base = alignUp(next_alloc_addr_, alignment); + uint64_t end = base + size; + if (end > kGlobalMemSize) + return false; + + uint64_t acl_start = alignDown(base, RAM_PAGE_SIZE); + uint64_t acl_end = alignUp(end, RAM_PAGE_SIZE); + if (acl_end > kGlobalMemSize) + return false; + + int flags = 0; + if (readable) flags |= 0x1; + if (writable) flags |= 0x2; + if (flags != 0) + ram_.set_acl(acl_start, acl_end - acl_start, flags); + + *addr_out = base; + next_alloc_addr_ = std::max(next_alloc_addr_, acl_end); + return true; +} + +bool VortexSimulator::reserveMemory(uint64_t addr, uint64_t size, bool readable, bool writable) { + if (size == 0) + return false; + + uint64_t acl_start = alignDown(addr, RAM_PAGE_SIZE); + uint64_t acl_end = alignUp(addr + size, RAM_PAGE_SIZE); + if (acl_end > kGlobalMemSize) + return false; + + int flags = 0; + if (readable) flags |= 0x1; + if (writable) flags |= 0x2; + ram_.set_acl(acl_start, acl_end - acl_start, flags); + + if (acl_end > next_alloc_addr_) + next_alloc_addr_ = acl_end; + return true; +} + +void VortexSimulator::setMemoryPermissions(uint64_t addr, uint64_t size, bool readable, bool writable) { + if (size == 0) + return; + uint64_t acl_start = alignDown(addr, RAM_PAGE_SIZE); + uint64_t acl_end = alignUp(addr + size, RAM_PAGE_SIZE); + int flags = 0; + if (readable) flags |= 0x1; + if (writable) flags |= 0x2; + ram_.set_acl(acl_start, acl_end - acl_start, flags); +} + +void VortexSimulator::writeMemory(uint64_t addr, const void* data, uint64_t size) { + if (data == nullptr || size == 0) + return; + ram_.write(data, addr, size); +} + +void VortexSimulator::setStartupArg(uint64_t arg_addr) { + proc_->dcr_write(VX_DCR_BASE_STARTUP_ARG0, static_cast(arg_addr & 0xffffffffu)); +#if (XLEN == 64) + proc_->dcr_write(VX_DCR_BASE_STARTUP_ARG1, static_cast(arg_addr >> 32)); +#endif +} + +std::optional VortexSimulator::loadKernelImage(const std::string& path) { + KernelImageInfo info{}; + + if (path.empty()) + return info; + + const auto ext = getFileExt(path); + if (ext == "bin") { + std::ifstream ifs(path, std::ios::binary); + if (!ifs) + return std::nullopt; + + ifs.seekg(0, std::ios::end); + const uint64_t size = static_cast(ifs.tellg()); + ifs.seekg(0, std::ios::beg); + std::vector payload(size); + if (size && !ifs.read(reinterpret_cast(payload.data()), size)) + return std::nullopt; + + if (!reserveMemory(STARTUP_ADDR, size, true, true)) + return std::nullopt; + writeMemory(STARTUP_ADDR, payload.data(), size); + setMemoryPermissions(STARTUP_ADDR, size, true, false); + + info.base_addr = STARTUP_ADDR; + info.size_bytes = size; + return info; + } + + if (ext == "hex") { + ram_.loadHexImage(path.c_str()); + info.base_addr = STARTUP_ADDR; + info.size_bytes = 0; + return info; + } + + if (ext == "vxbin") { + std::ifstream ifs(path, std::ios::binary); + if (!ifs) + return std::nullopt; + + uint64_t min_vma = 0; + uint64_t max_vma = 0; + + ifs.read(reinterpret_cast(&min_vma), sizeof(uint64_t)); + ifs.read(reinterpret_cast(&max_vma), sizeof(uint64_t)); + if (!ifs || max_vma < min_vma) + return std::nullopt; + + constexpr size_t header_bytes = sizeof(uint64_t) * 2; + ifs.seekg(0, std::ios::end); + const size_t file_size = static_cast(ifs.tellg()); + if (file_size < header_bytes) + return std::nullopt; + + const uint64_t payload_size = static_cast(file_size - header_bytes); + const uint64_t image_span = max_vma - min_vma; + if (image_span == 0) + return std::nullopt; + ifs.seekg(header_bytes, std::ios::beg); + + std::vector payload(payload_size); + if (payload_size && !ifs.read(reinterpret_cast(payload.data()), payload_size)) + return std::nullopt; + + if (!reserveMemory(min_vma, image_span, true, true)) + return std::nullopt; + if (payload_size) + writeMemory(min_vma, payload.data(), payload_size); + if (image_span > payload_size) { + std::vector zeros(static_cast(image_span - payload_size), 0); + writeMemory(min_vma + payload_size, zeros.data(), zeros.size()); + } + setMemoryPermissions(min_vma, image_span, true, false); + + info.base_addr = min_vma; + info.size_bytes = image_span; + return info; + } + + return std::nullopt; +} + +uint64_t VortexSimulator::alignUp(uint64_t value, uint64_t alignment) { + return (value + alignment - 1) & ~(alignment - 1); +} + +uint64_t VortexSimulator::alignDown(uint64_t value, uint64_t alignment) { + return value & ~(alignment - 1); +} + +uint64_t VortexSimulator::normalizeAlignment(uint64_t alignment) { + if (alignment == 0) + alignment = kDefaultAlignment; + if (alignment < kDefaultAlignment) + alignment = kDefaultAlignment; + if ((alignment & (alignment - 1)) == 0) + return alignment; + + alignment--; + alignment |= alignment >> 1; + alignment |= alignment >> 2; + alignment |= alignment >> 4; + alignment |= alignment >> 8; + alignment |= alignment >> 16; + alignment |= alignment >> 32; + alignment++; + return alignment; +} + bool VortexSimulator::isHalted() const { return halted_; } diff --git a/sim/simx/vortex_simulator.h b/sim/simx/vortex_simulator.h index eef604aca1..d710b8de65 100644 --- a/sim/simx/vortex_simulator.h +++ b/sim/simx/vortex_simulator.h @@ -3,11 +3,19 @@ #include "processor.h" // for Processor, RAM #include "arch.h" // for Arch +#include "constants.h" +#include #include +#include #include namespace vortex { +struct KernelImageInfo { + uint64_t base_addr = 0; + uint64_t size_bytes = 0; +}; + /** * A wrapper class used by the SST integration to drive the Vortex GPU * one cycle at a time. It encapsulates the architecture definition, @@ -24,6 +32,18 @@ class VortexSimulator { */ bool init(const std::string& kernelPath); + // changes to substitute for run-time wrt memory setup + const KernelImageInfo& kernelImage() const { return kernel_image_; } + bool allocateMemory(uint64_t size, uint64_t alignment, bool readable, bool writable, uint64_t* addr_out); + bool reserveMemory(uint64_t addr, uint64_t size, bool readable, bool writable); + void setMemoryPermissions(uint64_t addr, uint64_t size, bool readable, bool writable); + void writeMemory(uint64_t addr, const void* data, uint64_t size); + + RAM& ram() { return ram_; } + const RAM& ram() const { return ram_; } + + void setStartupArg(uint64_t arg_addr); + /** * Advances the simulation by one cycle. Returns false once the * simulation has completed (i.e. all clusters are halted). @@ -34,9 +54,21 @@ class VortexSimulator { bool isHalted() const; private: + static constexpr uint64_t kGlobalMemSize = (XLEN == 64) ? 0x200000000ull : 0x100000000ull; + static constexpr uint64_t kAllocBaseAddr = USER_BASE_ADDR; + static constexpr uint64_t kDefaultAlignment = 64ull; + + static uint64_t alignUp(uint64_t value, uint64_t alignment); + static uint64_t alignDown(uint64_t value, uint64_t alignment); + static uint64_t normalizeAlignment(uint64_t alignment); + + std::optional loadKernelImage(const std::string& path); + Arch arch_; RAM ram_; std::unique_ptr proc_; + KernelImageInfo kernel_image_; + uint64_t next_alloc_addr_; bool halted_; }; From 0d49477fe2afa04977762821d062d5cdfd2f140b Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Tue, 28 Oct 2025 21:55:19 -0400 Subject: [PATCH 04/15] first success - sst now runs simx with hello program --- sim/common/mem.cpp | 13 ++- sim/simx/Makefile | 47 ++++----- sim/simx/VortexGPGPU.cpp | 57 +++++------ sim/simx/VortexGPGPU.h | 21 ++-- sim/simx/cluster.cpp | 2 + sim/simx/core.cpp | 2 + sim/simx/emulator.cpp | 4 +- sim/simx/mem_backend.h | 16 --- sim/simx/mem_backend_dram.cpp | 76 -------------- sim/simx/mem_backend_dram.h | 51 ---------- sim/simx/mem_backend_sst.cpp | 79 --------------- sim/simx/mem_backend_sst.h | 43 -------- sim/simx/mem_sim.cpp | 186 ++++++++++++++++++---------------- sim/simx/processor.cpp | 11 +- sim/simx/processor_impl.h | 1 + sim/simx/socket.cpp | 2 + sim/simx/vortex_simulator.cpp | 66 +++++++----- sim/simx/vortex_simulator.h | 20 ++-- 18 files changed, 235 insertions(+), 462 deletions(-) delete mode 100644 sim/simx/mem_backend.h delete mode 100644 sim/simx/mem_backend_dram.cpp delete mode 100644 sim/simx/mem_backend_dram.h delete mode 100644 sim/simx/mem_backend_sst.cpp delete mode 100644 sim/simx/mem_backend_sst.h diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index 96b08ff8a2..64a294b2eb 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -59,7 +59,7 @@ void RamMemDevice::read(void* data, uint64_t addr, uint64_t size) { if ((addr & (wordSize_-1)) || (addr_end & (wordSize_-1)) || (addr_end > contents_.size())) { - std::cout << "lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n"; + std::cout << "RamMemDevice::read lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n"; throw BadAddress(); } @@ -74,7 +74,7 @@ void RamMemDevice::write(const void* data, uint64_t addr, uint64_t size) { if ((addr & (wordSize_-1)) || (addr_end & (wordSize_-1)) || (addr_end > contents_.size())) { - std::cout << "lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n"; + std::cout << "RamMemDevice::write lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n"; throw BadAddress(); } @@ -108,14 +108,16 @@ bool MemoryUnit::ADecoder::lookup(uint64_t addr, uint32_t wordSize, mem_accessor void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) { assert(end >= start); + //std::cout << "ADecoder: map() with start 0x" << std::hex << start << " and end 0x" << end << std::dec << " and md 0x" << &md << std::endl; entry_t entry{&md, start, end}; entries_.emplace_back(entry); } void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { mem_accessor_t ma; + //std::cout << "MemoryUnit::ADecoder::read init lookup of 0x" << std::hex << addr << std::dec << ".\n"; if (!this->lookup(addr, size, &ma)) { - std::cout << "lookup of 0x" << std::hex << addr << std::dec << " failed.\n"; + std::cout << "MemoryUnit::ADecoder::read lookup of 0x" << std::hex << addr << std::dec << " failed.\n"; throw BadAddress(); } ma.md->read(data, ma.addr, size); @@ -124,7 +126,7 @@ void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) { mem_accessor_t ma; if (!this->lookup(addr, size, &ma)) { - std::cout << "lookup of 0x" << std::hex << addr << std::dec << " failed.\n"; + std::cout << "MemoryUnit::ADecoder::write lookup of 0x" << std::hex << addr << std::dec << " failed.\n"; throw BadAddress(); } ma.md->write(data, ma.addr, size); @@ -154,6 +156,7 @@ MemoryUnit::MemoryUnit(uint64_t pageSize) #endif void MemoryUnit::attach(MemDevice &m, uint64_t start, uint64_t end) { + //std::cout << "MemoryUnit: attach() with start 0x" << std::hex << start << " and end 0x" << end << std::dec << " and m 0x" << &m << std::endl; decoder_.map(start, end, m); } @@ -510,6 +513,7 @@ void RAM::loadBinImage(const char* filename, uint64_t destination) { std::ifstream ifs(filename); if (!ifs) { std::cerr << "Error: " << filename << " not found" << std::endl; + //std::cout << "loadBinImage Error: " << filename << " not found" << std::endl; std::abort(); } @@ -521,6 +525,7 @@ void RAM::loadBinImage(const char* filename, uint64_t destination) { this->clear(); this->write(content.data(), destination, size); + //std::cout << "Loaded binary image: " << filename << ", size: " << size << " bytes, destination: 0x" << std::hex << destination << std::dec << std::endl; } void RAM::loadHexImage(const char* filename) { diff --git a/sim/simx/Makefile b/sim/simx/Makefile index 4aa769f379..22032d7444 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -1,14 +1,11 @@ include ../common.mk -# now you see me 2 + DESTDIR ?= $(CURDIR) OBJ_DIR = $(DESTDIR)/obj CONFIG_FILE = $(DESTDIR)/simx_config.stamp SRC_DIR = $(VORTEX_HOME)/sim/simx -# SST StandardMem bridge (default off) -SST_USE_STDMEM ?= 0 - CXXFLAGS += -std=c++17 -Wall -Wextra -Wfatal-errors CXXFLAGS += -fPIC -Wno-maybe-uninitialized CXXFLAGS += -I$(SRC_DIR) -I$(SW_COMMON_DIR) -I$(ROOT_DIR)/hw @@ -28,29 +25,16 @@ SRCS += $(SRC_DIR)/processor.cpp $(SRC_DIR)/cluster.cpp $(SRC_DIR)/socket.cpp $( SRCS += $(SRC_DIR)/decode.cpp $(SRC_DIR)/opc_unit.cpp $(SRC_DIR)/dispatcher.cpp SRCS += $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp SRCS += $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp -SRCS += $(SRC_DIR)/mem_backend_sst.cpp \ -SRCS += $(SRC_DIR)/mem_backend_dram.cpp \ SRCS += $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp +VORTEX_SST_SRCS := $(SRC_DIR)/vortex_simulator.cpp +VORTEX_SST_SRCS += $(SRC_DIR)/VortexGPGPU.cpp + # SST flags SST_CFLAGS := $(shell pkg-config --cflags SST-14.1) SST_CFLAGS += -I../../../sst/sst/sst-core/include/ SST_LFLAGS := $(shell pkg-config --libs SST-14.1) -ifeq ($(SST_USE_STDMEM),1) - CXXFLAGS += -DUSE_SST_MEM_BACKEND -DVORTEX_SST_ENABLE_STDMEM - LIBVORTEX_SST_DEFS = -DUSE_SST_MEM_BACKEND -DVORTEX_SST_ENABLE_STDMEM - VORTEX_SST_SRCS := \ - $(SRC_DIR)/mem_backend_sst.cpp \ - $(SRC_DIR)/vortex_simulator.cpp \ - $(SRC_DIR)/VortexGPGPU.cpp -else - LIBVORTEX_SST_DEFS = - VORTEX_SST_SRCS := \ - $(SRC_DIR)/vortex_simulator.cpp \ - $(SRC_DIR)/VortexGPGPU.cpp -endif - # Add V extension sources ifneq ($(findstring -DEXT_V_ENABLE, $(CONFIGS)),) SRCS += $(SRC_DIR)/voperands.cpp @@ -81,19 +65,25 @@ COMMON_SRCS := $(filter $(SW_COMMON_DIR)/%.cpp,$(SRCS)) SRC_SRCS := $(filter $(SRC_DIR)/%.cpp,$(SRCS)) COMMON_OBJS := $(patsubst $(SW_COMMON_DIR)/%.cpp,$(OBJ_DIR)/common/%.o,$(COMMON_SRCS)) SRC_OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(SRC_SRCS)) +VORTEX_SST_OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(VORTEX_SST_SRCS)) OBJS := $(COMMON_OBJS) $(SRC_OBJS) MAIN_OBJ := $(OBJ_DIR)/main.o -DEPS := $(OBJS:.o=.d) $(MAIN_OBJ:.o=.d) +DEPS := $(OBJS:.o=.d) $(MAIN_OBJ:.o=.d) $(VORTEX_SST_OBJS:.o=.d) # optional: pipe through ccache if you have it CXX := $(if $(shell which ccache),ccache $(CXX),$(CXX)) PROJECT := simx +VORTEX_LIB := libvortex.so -.PHONY: all force clean clean-lib clean-exe clean-obj +.PHONY: all force clean clean-lib clean-exe clean-obj libvortex clean-libvortex -all: $(DESTDIR)/$(PROJECT) $(DESTDIR)/libvortex.so +#ifdef USE_SST +all: $(DESTDIR)/$(PROJECT) $(DESTDIR)/$(VORTEX_LIB) +#else +#all: $(DESTDIR)/$(PROJECT) +#endif # build common object files $(OBJ_DIR)/common/%.o: $(SW_COMMON_DIR)/%.cpp $(CONFIG_FILE) @@ -105,6 +95,11 @@ $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp $(CONFIG_FILE) @mkdir -p $(@D) $(CXX) $(CXXFLAGS) -c $< -o $@ +# build SST-specific source object files +$(VORTEX_SST_OBJS): $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp $(CONFIG_FILE) + @mkdir -p $(@D) + $(CXX) $(CXXFLAGS) $(SST_CFLAGS) -c $< -o $@ + # build main object file $(MAIN_OBJ): $(SRC_DIR)/main.cpp $(CONFIG_FILE) @mkdir -p $(@D) @@ -118,8 +113,10 @@ $(DESTDIR)/$(PROJECT): $(OBJS) $(MAIN_OBJ) $(DESTDIR)/lib$(PROJECT).so: $(OBJS) $(CXX) $(CXXFLAGS) $^ -shared $(LDFLAGS) -o $@ -$(DESTDIR)/libvortex.so: $(OBJS) $(SST_OBJS) - $(CXX) $(CXXFLAGS) $(SST_CFLAGS) $(LIBVORTEX_SST_DEFS) \ +libvortex: $(DESTDIR)/$(VORTEX_LIB) + +$(DESTDIR)/$(VORTEX_LIB): $(OBJS) $(VORTEX_SST_OBJS) + $(CXX) $(CXXFLAGS) $(SST_CFLAGS) \ -I./sim/simx \ $(OBJS) $(VORTEX_SST_SRCS) \ -shared -o $@ \ diff --git a/sim/simx/VortexGPGPU.cpp b/sim/simx/VortexGPGPU.cpp index 75336e4fd6..536e0e1583 100644 --- a/sim/simx/VortexGPGPU.cpp +++ b/sim/simx/VortexGPGPU.cpp @@ -1,8 +1,8 @@ #include #include "VortexGPGPU.h" -#ifdef VORTEX_SST_ENABLE_STDMEM +#ifdef USE_SST_MEM #include "mem_backend_sst.h" // needed for vx_register_submit and vx_on_mem_complete -#endif +#endif #include #include #include @@ -10,37 +10,29 @@ using namespace SST; using namespace SST::Vortex; +#ifdef USE_SST_MEM using SST::Interfaces::StandardMem; +#endif -namespace { -constexpr const char* kDefaultKernelPath = "/nethome/jsubburayan3/vortex/kernel.vxbin"; -constexpr uint32_t kDefaultLaunchBytes = 64; -} - -#ifdef VORTEX_SST_ENABLE_STDMEM +#ifdef USE_SST_MEM VortexGPGPU *VortexGPGPU::instance_ = nullptr; #endif VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) : Component(id), - sim_(std::make_unique()), - memIface_(nullptr) { + sim_(std::make_unique()) { + + std::cout << "VortexGPGPU: initializing Vortex GPGPU simulator\n"; // Parameter: clock frequency (default 1GHz) std::string clockfreq = params.find("clock", "1GHz"); // Parameter: program path - std::string kernel = params.find("program", ""); - if (kernel.empty()) { - if (const char* env = std::getenv("VORTEX_DEFAULT_KERNEL")) - kernel = env; - else - kernel = kDefaultKernelPath; - } - const uint32_t launch_bytes = params.find("launch_bytes", kDefaultLaunchBytes); + std::string kernel = params.find("program", "/nethome/jsubburayan3/vortex/build/tests/kernel/hello/hello.bin"); + //const uint32_t launch_bytes = params.find("launch_bytes", kDefaultLaunchBytes); // required when launch descriptor is used -#ifdef VORTEX_SST_ENABLE_STDMEM +#ifdef USE_SST_MEM // Create StandardMem interface; auto-bind to port name "memIface" memIface_ = loadUserSubComponent( "memIface", ComponentInfo::SHARE_NONE, @@ -58,7 +50,7 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) new SST::Clock::Handler(this, &VortexGPGPU::clockTick)); #endif -#ifdef VORTEX_SST_ENABLE_STDMEM +#ifdef USE_SST_MEM // Register callback so SimX can submit memory to SST instance_ = this; // Track app-specific tags by StandardMem request-id @@ -88,13 +80,17 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) }); #endif - // Load the kernel or ELF + // Load the kernel image if (!sim_->init(kernel)) { SST::Output out; out.fatal(CALL_INFO, -1, "VortexSimulator init failed\n"); } + else{ + std::cout << "VortexGPGPU: loaded kernel: " << kernel << std::endl; + } - // Set up a default launch descriptor if the caller did not supply one + // needed when launch descriptor is used + /* if (!sim_->allocateMemory(launch_bytes, 64, true, true, &launch_desc_addr_)) { SST::Output out; out.fatal(CALL_INFO, -1, @@ -104,6 +100,7 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) std::vector launch_payload(launch_bytes, 0); sim_->writeMemory(launch_desc_addr_, launch_payload.data(), launch_payload.size()); sim_->setStartupArg(launch_desc_addr_); + */ registerAsPrimaryComponent(); primaryComponentDoNotEndSim(); @@ -114,18 +111,22 @@ VortexGPGPU::~VortexGPGPU() = default; void VortexGPGPU::setup() {} void VortexGPGPU::finish() {} -bool VortexGPGPU::clockTick(SST::Cycle_t) { +bool VortexGPGPU::clockTick(SST::Cycle_t cycle) { // Advance the GPU one cycle + //std::cout << "VortexGPGPU: clockTick came from SST " << std::endl; bool running = sim_->cycle(); + //std::cout << "VortexGPGPU cycle returned: " << running << std::endl; if (!running) { primaryComponentOKToEndSim(); - return false; + std::cout << "VortexGPGPU: simulation finished\n"; + return true; } - return true; + //std::cout << "VortexGPGPU clockTick returns false " << std::endl; + return false; } +#ifdef USE_SST_MEM void VortexGPGPU::handleMemResp(StandardMem::Request *req) { - #ifdef VORTEX_SST_ENABLE_STDMEM // Inform SimX that this request has completed const auto id = req->getID(); const auto it = tag_by_id.find(id); @@ -138,7 +139,5 @@ void VortexGPGPU::handleMemResp(StandardMem::Request *req) { tag_by_id.erase(it); } delete req; - #else - delete req; // should never be called without StandardMem - #endif } +#endif diff --git a/sim/simx/VortexGPGPU.h b/sim/simx/VortexGPGPU.h index d35eda898d..61acbaa40f 100644 --- a/sim/simx/VortexGPGPU.h +++ b/sim/simx/VortexGPGPU.h @@ -1,7 +1,7 @@ // VortexGPGPU.h #pragma once #include -#include +//#include #include #include #include "vortex_simulator.h" // wrapper around SimX @@ -24,7 +24,7 @@ class VortexGPGPU : public SST::Component { "vortex", // element library name "VortexGPGPU", // component name SST_ELI_ELEMENT_VERSION(1,0,0), - "Headless Vortex GPGPU Simulator", + "Vortex GPGPU Simulator", COMPONENT_CATEGORY_PROCESSOR ) SST_ELI_DOCUMENT_PARAMS( @@ -38,23 +38,24 @@ class VortexGPGPU : public SST::Component { ) private: + bool clockTick(SST::Cycle_t cycle); - void handleMemResp(SST::Interfaces::StandardMem::Request* req); - #ifdef VORTEX_SST_ENABLE_STDMEM + std::unique_ptr sim_; + + //uint64_t launch_desc_addr_ = 0; // required only when launch descriptor is required + + #ifdef USE_SST_MEM + void handleMemResp(SST::Interfaces::StandardMem::Request* req); + // static pointer used by lambda in vx_register_submit() static VortexGPGPU* instance_; - #endif - std::unique_ptr sim_; - #ifdef VORTEX_SST_ENABLE_STDMEM SST::Interfaces::StandardMem* memIface_; std::unordered_map tag_by_id; - #else + //#else SST::Interfaces::StandardMem* memIface_ = nullptr; #endif - - uint64_t launch_desc_addr_ = 0; }; } // namespace Vortex diff --git a/sim/simx/cluster.cpp b/sim/simx/cluster.cpp index bab67233b8..84cba9f134 100644 --- a/sim/simx/cluster.cpp +++ b/sim/simx/cluster.cpp @@ -103,10 +103,12 @@ void Cluster::set_satp(uint64_t satp) { #endif bool Cluster::running() const { + //std::cout << "Cluster: running()" << std::endl; for (auto& socket : sockets_) { if (socket->running()) return true; } + std::cout << "Cluster: running() returns false" << std::endl; return false; } diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp index 55791907a3..770c86701a 100644 --- a/sim/simx/core.cpp +++ b/sim/simx/core.cpp @@ -440,6 +440,7 @@ int Core::get_exitcode() const { bool Core::running() const { if (emulator_.running() || !pending_instrs_.empty()) { + //std::cout << "Core::running() emulator running: " << emulator_.running() << ", pending_instrs size: " << pending_instrs_.size() << std::endl; #ifndef NDEBUG for (auto& trace : pending_instrs_) { DT(5, "pipeline-pending: " << *trace); @@ -447,6 +448,7 @@ bool Core::running() const { #endif return true; } + std::cout << "Core::running() returns false" << std::endl; return false; } diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 7371a553d4..1028aba435 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include "emulator.h" @@ -131,8 +130,9 @@ void Emulator::reset() { void Emulator::attach_ram(RAM* ram) { // bind RAM to memory unit + //std::cout << "Emulator: attach_ram()" << std::endl; #if (XLEN == 64) - mmu_.attach(*ram, 0, std::numeric_limits::max()); + mmu_.attach(*ram, 0, 0x7FFFFFFFFF); //39bit SV39 #else mmu_.attach(*ram, 0, 0xFFFFFFFF); #endif diff --git a/sim/simx/mem_backend.h b/sim/simx/mem_backend.h deleted file mode 100644 index d93e39d503..0000000000 --- a/sim/simx/mem_backend.h +++ /dev/null @@ -1,16 +0,0 @@ -#pragma once -#include -#include -#include "types.h" - -namespace vortex { -struct IMemBackend { - virtual ~IMemBackend() = default; - virtual void reset() = 0; - virtual void tick() = 0; - std::function mem_xbar_rsp_cb_; - virtual void send_request(uint64_t addr, bool write, - uint32_t size, uint32_t tag, - uint32_t cid, uint64_t uuid) = 0; -}; -} // namespace vortex diff --git a/sim/simx/mem_backend_dram.cpp b/sim/simx/mem_backend_dram.cpp deleted file mode 100644 index f1cecae661..0000000000 --- a/sim/simx/mem_backend_dram.cpp +++ /dev/null @@ -1,76 +0,0 @@ -// mem_backend_dram.cpp -#include "mem_backend_dram.h" - -using namespace vortex; - -namespace { -struct CallbackData { - MemBackendDram* backend; - uint64_t tag; -}; -} // anonymous namespace - -MemBackendDram* MemBackendDram::inst_ = nullptr; - -MemBackendDram::MemBackendDram(uint32_t num_banks, uint32_t block_size, float clock_ratio) - : num_banks_(num_banks) - , block_size_(block_size) - , lg2_block_size_(0) - , dram_sim_(num_banks, block_size, clock_ratio) -{ - // Compute log2(block_size_) once; block_size_ is assumed to be a power of two. - uint32_t tmp = block_size_; - while (tmp > 1) { - ++lg2_block_size_; - tmp >>= 1; - } - inst_ = this; -} - -void MemBackendDram::reset() { - inflight_.clear(); - dram_sim_.reset(); -} - -void MemBackendDram::tick() { - // Retire pending transactions in DramSim - dram_sim_.tick(); -} - -void MemBackendDram::dram_complete(void* arg) { - auto* data = static_cast(arg); - MemBackendDram* backend = data->backend; - uint64_t tag = data->tag; - auto it = backend->inflight_.find(tag); - if (it != backend->inflight_.end()) { - const Info& info = it->second; - if (!info.write) { - // Form a MemRsp for reads only - MemRsp rsp{tag, info.cid, info.uuid}; - // Route the response to the recorded bank - uint32_t bank = info.bank; - if (backend->mem_xbar_rsp_cb_) - backend->mem_xbar_rsp_cb_(bank, rsp); - - } - backend->inflight_.erase(it); - } - delete data; -} - -void MemBackendDram::send_request(uint64_t addr, bool write, - uint32_t size, uint32_t tag, - uint32_t cid, uint64_t uuid) { - // Compute bank index: (addr >> lg2(block_size)) mod num_banks - uint32_t bank_idx = 0; - if (num_banks_ > 0) - bank_idx = static_cast((addr >> lg2_block_size_) & (num_banks_ - 1)); - inflight_.emplace(tag, Info{cid, uuid, write, bank_idx}); - auto* cb_data = new CallbackData{this, tag}; - // The size is ignored by DramSim because it is configured with block_size_. - dram_sim_.send_request(addr, write, &MemBackendDram::dram_complete, cb_data); -} - -void MemBackendDram::complete(uint64_t tag) { - // Not used; dram_complete() handles completions -} \ No newline at end of file diff --git a/sim/simx/mem_backend_dram.h b/sim/simx/mem_backend_dram.h deleted file mode 100644 index 5a6f6f3d11..0000000000 --- a/sim/simx/mem_backend_dram.h +++ /dev/null @@ -1,51 +0,0 @@ -// mem_backend_dram.h -#pragma once -#include "mem_backend.h" -#include "dram_sim.h" -#include -#include -#include "types.h" - -namespace vortex { - -class MemBackendDram : public IMemBackend { -public: - static MemBackendDram* instance() { return inst_; } - - // Construct with the same parameters as MemSim::Config: number of banks, - // block size in bytes, and clock ratio. These values are passed to - // the underlying DramSim so that the external memory model matches. - MemBackendDram(uint32_t num_banks, uint32_t block_size, float clock_ratio); - - void reset() override; - void tick() override; - void send_request(uint64_t addr, bool write, - uint32_t size, uint32_t tag, - uint32_t cid, uint64_t uuid) override; - - // Not used directly; completions are handled by dram_complete(). - void complete(uint64_t tag); - - // Set by MemSim to push completed responses back to the correct - // bank in the crossbar. - std::function mem_xbar_rsp_cb_; - -private: - struct Info { - uint32_t cid; - uint64_t uuid; - bool write; - uint32_t bank; // bank index computed from the address - }; - std::unordered_map inflight_; - uint32_t num_banks_; - uint32_t block_size_; - uint32_t lg2_block_size_; - static MemBackendDram* inst_; - DramSim dram_sim_; - - // Static callback invoked by DramSim when a request completes - static void dram_complete(void* arg); -}; - -} // namespace vortex \ No newline at end of file diff --git a/sim/simx/mem_backend_sst.cpp b/sim/simx/mem_backend_sst.cpp deleted file mode 100644 index 3be8227ffd..0000000000 --- a/sim/simx/mem_backend_sst.cpp +++ /dev/null @@ -1,79 +0,0 @@ -// mem_backend_sst.cpp -// Implementation of the SST-backed memory backend. This backend forwards -// all off-chip memory requests to the SST StandardMem interface via a -// registered callback (vx_submit_fn). It maintains a table of inflight -// transactions keyed by the original request tag so that completions can -// be correlated back to the correct cluster and request. When a read -// completion is observed via vx_on_mem_complete(), the backend pushes a -// MemRsp back into the crossbar using the stored cid/uuid. Writes -// complete silently. - -#include "mem_backend_sst.h" -#include - -extern "C" { -#ifdef VORTEX_SST_ENABLE_STDMEM - -// Register a submit function provided by the SST component. The -// MemBackendSST stores it in a static member so that calls to -// send_request() can forward requests into SST. -void vx_register_submit(vx_submit_fn fn) { - vortex::MemBackendSST::set_vx_submit_fn(fn); -} - -// Notify MemBackendSST that the SST memory system has completed a -// request identified by 'tag'. The backend will produce a MemRsp for -// reads and erase the entry from its inflight table. -void vx_on_mem_complete(uint64_t tag) { - if (auto inst = vortex::MemBackendSST::instance()) - inst->complete(tag); -} -#else - -void vx_register_submit(vx_submit_fn) {} -void vx_on_mem_complete(uint64_t) {} - -#endif - -} // extern "C" - -using namespace vortex; - -// Initialise static pointers -MemBackendSST* MemBackendSST::inst_ = nullptr; -vx_submit_fn MemBackendSST::submit_fn_ = nullptr; - -MemBackendSST::MemBackendSST() { - // Record this instance so the C wrapper can find us - inst_ = this; -} - -void MemBackendSST::reset() { - // Drop all inflight transactions; pending responses are ignored - inflight_.clear(); -} - -void MemBackendSST::send_request(uint64_t addr, bool write, - uint32_t size, uint32_t tag, - uint32_t cid, uint64_t uuid) { - uint32_t lg2_block = log2ceil(size); - uint32_t bank = (addr >> lg2_block) & (PLATFORM_MEMORY_NUM_BANKS - 1); - inflight_.emplace(tag, Info{cid, uuid, write, bank}); - if (submit_fn_) { - submit_fn_(addr, write, size, tag); - } -} - -void MemBackendSST::complete(uint64_t tag) { - auto it = inflight_.find(tag); - if (it == inflight_.end()) - return; - const Info &info = it->second; - // Only produce a MemRsp for reads; writes complete silently - if (!info.write) { - MemRsp rsp{tag, info.cid, info.uuid}; - if (mem_xbar_rsp_cb_) - mem_xbar_rsp_cb_(info.bank, rsp); - } - inflight_.erase(it); -} diff --git a/sim/simx/mem_backend_sst.h b/sim/simx/mem_backend_sst.h deleted file mode 100644 index 678dec0b81..0000000000 --- a/sim/simx/mem_backend_sst.h +++ /dev/null @@ -1,43 +0,0 @@ -// mem_backend_sst.h -#pragma once -#include "mem_backend.h" -#include -#include -#include "types.h" - -extern "C" { - // Function pointer type for SST to call - typedef void (*vx_submit_fn)(uint64_t addr, bool write, uint32_t size, uint64_t tag); - // SST calls this to register its submit function - void vx_register_submit(vx_submit_fn fn); - // SST calls this when a memory response completes - void vx_on_mem_complete(uint64_t tag); -} - -namespace vortex { - -class MemBackendSST : public IMemBackend { -public: - static MemBackendSST* instance() { return inst_; } - static vx_submit_fn get_vx_submit_fn() { return submit_fn_; } - static void set_vx_submit_fn(vx_submit_fn fn) { submit_fn_ = fn; } - - MemBackendSST(); - void reset() override; - void tick() override {} - void send_request(uint64_t addr, bool write, - uint32_t size, uint32_t tag, - uint32_t cid, uint64_t uuid) override; - - // Called from vx_on_mem_complete - void complete(uint64_t tag); - - -private: - struct Info { uint32_t cid; uint64_t uuid; bool write; uint32_t bank;}; - std::unordered_map inflight_; - static MemBackendSST* inst_; - static vx_submit_fn submit_fn_; -}; - -} // namespace vortex diff --git a/sim/simx/mem_sim.cpp b/sim/simx/mem_sim.cpp index 61bf174a86..740ee008b6 100644 --- a/sim/simx/mem_sim.cpp +++ b/sim/simx/mem_sim.cpp @@ -1,117 +1,127 @@ -// mem_sim.cpp +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include "mem_sim.h" #include +#include #include -#include +#include #include "constants.h" #include "types.h" #include "debug.h" -#include "mem_backend.h" -#include "mem_backend_sst.h" -#include "mem_backend_dram.h" using namespace vortex; class MemSim::Impl { private: - MemSim* simobject_; - Config config_; - MemCrossBar::Ptr mem_xbar_; - std::unique_ptr backend_; - mutable PerfStats perf_stats_; + MemSim* simobject_; + Config config_; + MemCrossBar::Ptr mem_xbar_; + DramSim dram_sim_; + mutable PerfStats perf_stats_; + struct DramCallbackArgs { + MemSim::Impl* memsim; + MemReq request; + uint32_t bank_id; + }; public: - Impl(MemSim* simobject, const Config& config) - : simobject_(simobject) - , config_(config) - { - char sname[100]; - snprintf(sname, 100, "%s-xbar", simobject->name().c_str()); - mem_xbar_ = MemCrossBar::Create( - sname, - ArbiterType::RoundRobin, - config.num_ports, - config.num_banks, - [lg2_block_size = log2ceil(config.block_size), num_banks = config.num_banks](const MemCrossBar::ReqType& req) { - // Bank interleaving: choose the output index based on address bits - return static_cast((req.addr >> lg2_block_size) & (num_banks - 1)); - }); - - for (uint32_t i = 0; i < config.num_ports; ++i) { - simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i)); - mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i)); - } - - #ifdef USE_SST_MEM_BACKEND - backend_ = std::make_unique(); - #else - backend_ = std::make_unique(config.num_banks, config.block_size, config.clock_ratio); - #endif - - if (backend_) { - backend_->mem_xbar_rsp_cb_ = [this](uint32_t bank, const MemRsp& rsp) { - // Push the response into the appropriate crossbar output queue - if (bank < mem_xbar_->RspOut.size()) - mem_xbar_->RspOut.at(bank).push(rsp, 1); - }; - } - } - - const PerfStats& perf_stats() const { - perf_stats_.bank_stalls = mem_xbar_->collisions(); - return perf_stats_; - } - - void reset() { - if (backend_) - backend_->reset(); - } - - void tick() { - // Advance the selected memory backend - if (backend_) - backend_->tick(); - - // Drain requests from each bank and send to the backend - for (uint32_t bank = 0; bank < config_.num_banks; ++bank) { - if (mem_xbar_->ReqOut.at(bank).empty()) - continue; - auto& mem_req = mem_xbar_->ReqOut.at(bank).front(); - if (backend_) { - backend_->send_request( - mem_req.addr, - mem_req.write, - config_.block_size, - mem_req.tag, - mem_req.cid, - mem_req.uuid); - } - DT(3, simobject_->name() << "-mem-req" << bank << ": " << mem_req); - mem_xbar_->ReqOut.at(bank).pop(); - } - } + Impl(MemSim* simobject, const Config& config) + : simobject_(simobject) + , config_(config) + , dram_sim_(config.num_banks, config.block_size, config.clock_ratio) + { + char sname[100]; + snprintf(sname, 100, "%s-xbar", simobject->name().c_str()); + mem_xbar_ = MemCrossBar::Create(sname, ArbiterType::RoundRobin, config.num_ports, config.num_banks, + [lg2_block_size = log2ceil(config.block_size), num_banks = config.num_banks](const MemCrossBar::ReqType& req) { + // Custom logic to calculate the output index using bank interleaving + return (uint32_t)((req.addr >> lg2_block_size) & (num_banks-1)); + }); + for (uint32_t i = 0; i < config.num_ports; ++i) { + simobject->MemReqPorts.at(i).bind(&mem_xbar_->ReqIn.at(i)); + mem_xbar_->RspIn.at(i).bind(&simobject->MemRspPorts.at(i)); + } + } + + ~Impl() { + //-- + } + + const PerfStats& perf_stats() const { + perf_stats_.bank_stalls = mem_xbar_->collisions(); + return perf_stats_; + } + + void reset() { + dram_sim_.reset(); + } + + void tick() { + dram_sim_.tick(); + + for (uint32_t i = 0; i < config_.num_banks; ++i) { + if (mem_xbar_->ReqOut.at(i).empty()) + continue; + + auto& mem_req = mem_xbar_->ReqOut.at(i).front(); + + // enqueue the request to the memory system + auto req_args = new DramCallbackArgs{this, mem_req, i}; + dram_sim_.send_request( + mem_req.addr, + mem_req.write, + [](void* arg) { + auto rsp_args = reinterpret_cast(arg); + if (!rsp_args->request.write) { + // only send a response for read requests + MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid}; + rsp_args->memsim->mem_xbar_->RspOut.at(rsp_args->bank_id).push(mem_rsp, 1); + DT(3, rsp_args->memsim->simobject_->name() << "-mem-rsp" << rsp_args->bank_id << ": " << mem_rsp); + } + delete rsp_args; + }, + req_args + ); + + DT(3, simobject_->name() << "-mem-req" << i << ": " << mem_req); + mem_xbar_->ReqOut.at(i).pop(); + } + } }; +/////////////////////////////////////////////////////////////////////////////// + MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config) - : SimObject(ctx, name) - , MemReqPorts(config.num_ports, this) - , MemRspPorts(config.num_ports, this) - , impl_(new Impl(this, config)) + : SimObject(ctx, name) + , MemReqPorts(config.num_ports, this) + , MemRspPorts(config.num_ports, this) + , impl_(new Impl(this, config)) {} MemSim::~MemSim() { - delete impl_; + delete impl_; } void MemSim::reset() { - impl_->reset(); + impl_->reset(); } void MemSim::tick() { - impl_->tick(); + impl_->tick(); } -const MemSim::PerfStats& MemSim::perf_stats() const { - return impl_->perf_stats(); +const MemSim::PerfStats &MemSim::perf_stats() const { + return impl_->perf_stats(); } \ No newline at end of file diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 0cb00de010..acded6dd65 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -160,15 +160,23 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const { } bool ProcessorImpl::cycle() { + if (!is_cycle_initialized_) { + std::cout << "ProcessorImpl: Initializing cycle()\n"; + SimPlatform::instance().reset(); + this->reset(); + is_cycle_initialized_ = true; + } + //std::cout << "ProcessorImpl: cycle()" << std::endl; SimPlatform::instance().tick(); bool anyRunning = false; - for (auto& cluster : clusters_) { + for (auto cluster : clusters_) { if (cluster->running()) { anyRunning = true; break; } } perf_mem_latency_ += perf_mem_pending_reads_; + //std::cout << "ProcessorImpl: cycle() - returns: " << anyRunning << std::endl; return anyRunning; } @@ -211,6 +219,7 @@ void Processor::dcr_write(uint32_t addr, uint32_t value) { bool Processor::cycle() { try { + //std::cout << "Processor: cycle()" << std::endl; return impl_->cycle(); } catch (...) { return false; diff --git a/sim/simx/processor_impl.h b/sim/simx/processor_impl.h index 7b4537677e..031ce14afa 100644 --- a/sim/simx/processor_impl.h +++ b/sim/simx/processor_impl.h @@ -52,6 +52,7 @@ class ProcessorImpl { void reset(); + bool is_cycle_initialized_ = false; const Arch& arch_; std::vector> clusters_; DCRS dcrs_; diff --git a/sim/simx/socket.cpp b/sim/simx/socket.cpp index b8ba8939f0..a729ad3b76 100644 --- a/sim/simx/socket.cpp +++ b/sim/simx/socket.cpp @@ -140,10 +140,12 @@ void Socket::set_satp(uint64_t satp) { #endif bool Socket::running() const { + //std::cout << "Socket: running()" << std::endl; for (auto& core : cores_) { if (core->running()) return true; } + std::cout << "Socket: running() returns false" << std::endl; return false; } diff --git a/sim/simx/vortex_simulator.cpp b/sim/simx/vortex_simulator.cpp index 928a029d0f..5ace3cede5 100644 --- a/sim/simx/vortex_simulator.cpp +++ b/sim/simx/vortex_simulator.cpp @@ -10,66 +10,84 @@ #include "dcrs.h" #include #include +#include "util.h" namespace vortex { -// Utility to extract file extension -static std::string getFileExt(const std::string& filename) { - auto pos = filename.find_last_of('.'); - if (pos == std::string::npos) return ""; - return filename.substr(pos + 1); -} - VortexSimulator::VortexSimulator() : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES) , ram_(0, MEM_PAGE_SIZE) , proc_(std::make_unique(arch_)) -, kernel_image_{} -, next_alloc_addr_(kAllocBaseAddr) +// , kernel_image_{} +// , next_alloc_addr_(kAllocBaseAddr) , halted_(true) {} bool VortexSimulator::init(const std::string& kernelPath) { proc_->attach_ram(&ram_); - kernel_image_ = {}; - next_alloc_addr_ = kAllocBaseAddr; - ram_.clear(); - ram_.set_acl(0, kGlobalMemSize, 0); + // kernel_image_ = {}; + // next_alloc_addr_ = kAllocBaseAddr; + // ram_.clear(); + // ram_.set_acl(0, kGlobalMemSize, 0); - bool has_kernel = false; + // can be used when launch descriptor is required + /* bool has_kernel = false; if (!kernelPath.empty()) { auto image_info = this->loadKernelImage(kernelPath); if (!image_info) return false; kernel_image_ = *image_info; has_kernel = true; - } + } */ // Program base DCRs - align startup to loaded kernel when provided - uint64_t startup = STARTUP_ADDR; + /* uint64_t startup = STARTUP_ADDR; if (has_kernel) - startup = kernel_image_.base_addr; - - proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR0, startup & 0xffffffffu); + startup = kernel_image_.base_addr; */ + // setup base DCRs + const uint64_t startup_addr(STARTUP_ADDR); + proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR0, startup_addr & 0xffffffff); #if (XLEN == 64) - proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR1, startup >> 32); + proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR1, startup_addr >> 32); #endif proc_->dcr_write(VX_DCR_BASE_MPM_CLASS, 0); + // load program/kernel + { + std::string program_ext(fileExtension(kernelPath.c_str())); + if (program_ext == "bin") { + std::cout << "vortex_simulator: Loading binary image: " << kernelPath << " with startup address: 0x" << std::hex << startup_addr << std::dec << std::endl; + ram_.loadBinImage(kernelPath.c_str(), startup_addr); + } else if (program_ext == "hex") { + std::cout << "vortex_simulator: Loading hex image: " << kernelPath << std::endl; + ram_.loadHexImage(kernelPath.c_str()); + } else { + std::cerr << "Error: only *.bin or *.hex images supported." << std::endl; + return -1; + } + } + halted_ = false; return true; } bool VortexSimulator::cycle() { if (halted_) return false; +//std::cout << "VortexSimulator: cycle()" << std::endl; // Advance one cycle through the processor interface bool running = proc_->cycle(); halted_ = !running; +//std::cout << "VortexSimulator: cycle() returns " << running << std::endl; return running; } -bool VortexSimulator::allocateMemory(uint64_t size, uint64_t alignment, bool readable, bool writable, uint64_t* addr_out) { +bool VortexSimulator::isHalted() const { + return halted_; +} + +// Required when using launch descriptor and SST memory +/* bool VortexSimulator::allocateMemory(uint64_t size, uint64_t alignment, bool readable, bool writable, uint64_t* addr_out) { if (addr_out == nullptr || size == 0) return false; @@ -246,10 +264,6 @@ uint64_t VortexSimulator::normalizeAlignment(uint64_t alignment) { alignment |= alignment >> 32; alignment++; return alignment; -} - -bool VortexSimulator::isHalted() const { - return halted_; -} +} */ } // namespace vortex diff --git a/sim/simx/vortex_simulator.h b/sim/simx/vortex_simulator.h index d710b8de65..b0c890f593 100644 --- a/sim/simx/vortex_simulator.h +++ b/sim/simx/vortex_simulator.h @@ -11,11 +11,6 @@ namespace vortex { -struct KernelImageInfo { - uint64_t base_addr = 0; - uint64_t size_bytes = 0; -}; - /** * A wrapper class used by the SST integration to drive the Vortex GPU * one cycle at a time. It encapsulates the architecture definition, @@ -32,8 +27,8 @@ class VortexSimulator { */ bool init(const std::string& kernelPath); - // changes to substitute for run-time wrt memory setup - const KernelImageInfo& kernelImage() const { return kernel_image_; } + // changes to substitute for run-time wrt memory setup - required when using launch descriptor and SST memory +/* const KernelImageInfo& kernelImage() const { return kernel_image_; } bool allocateMemory(uint64_t size, uint64_t alignment, bool readable, bool writable, uint64_t* addr_out); bool reserveMemory(uint64_t addr, uint64_t size, bool readable, bool writable); void setMemoryPermissions(uint64_t addr, uint64_t size, bool readable, bool writable); @@ -42,7 +37,7 @@ class VortexSimulator { RAM& ram() { return ram_; } const RAM& ram() const { return ram_; } - void setStartupArg(uint64_t arg_addr); + void setStartupArg(uint64_t arg_addr); */ /** * Advances the simulation by one cycle. Returns false once the @@ -54,7 +49,8 @@ class VortexSimulator { bool isHalted() const; private: - static constexpr uint64_t kGlobalMemSize = (XLEN == 64) ? 0x200000000ull : 0x100000000ull; + // required when using launch descriptor and SST memory + /* static constexpr uint64_t kGlobalMemSize = (XLEN == 64) ? 0x200000000ull : 0x100000000ull; static constexpr uint64_t kAllocBaseAddr = USER_BASE_ADDR; static constexpr uint64_t kDefaultAlignment = 64ull; @@ -62,13 +58,13 @@ class VortexSimulator { static uint64_t alignDown(uint64_t value, uint64_t alignment); static uint64_t normalizeAlignment(uint64_t alignment); - std::optional loadKernelImage(const std::string& path); + std::optional loadKernelImage(const std::string& path); + KernelImageInfo kernel_image_; + uint64_t next_alloc_addr_;*/ Arch arch_; RAM ram_; std::unique_ptr proc_; - KernelImageInfo kernel_image_; - uint64_t next_alloc_addr_; bool halted_; }; From 85b137407ee3729f38376ac48ea62969442af8fb Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Wed, 12 Nov 2025 10:22:25 -0500 Subject: [PATCH 05/15] revert the logging and removing the unnecessary files added --- .gitignore | 2 ++ sim/common/mem.cpp | 13 ++++--------- sim/simx/cluster.cpp | 2 -- sim/simx/core.cpp | 2 -- sim/simx/emulator.cpp | 1 - sim/simx/obj/common/util.o | Bin 18136 -> 0 bytes sim/simx/simx_config.stamp | 1 - sim/simx/socket.cpp | 2 -- 8 files changed, 6 insertions(+), 17 deletions(-) delete mode 100644 sim/simx/obj/common/util.o delete mode 100644 sim/simx/simx_config.stamp diff --git a/.gitignore b/.gitignore index 43388e9cb5..41d5fd961a 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,5 @@ /.vscode *.cache *.code-workspace +sim/simx/simx_config.stamp +sim/simx/obj/common/*.o diff --git a/sim/common/mem.cpp b/sim/common/mem.cpp index 64a294b2eb..96b08ff8a2 100644 --- a/sim/common/mem.cpp +++ b/sim/common/mem.cpp @@ -59,7 +59,7 @@ void RamMemDevice::read(void* data, uint64_t addr, uint64_t size) { if ((addr & (wordSize_-1)) || (addr_end & (wordSize_-1)) || (addr_end > contents_.size())) { - std::cout << "RamMemDevice::read lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n"; + std::cout << "lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n"; throw BadAddress(); } @@ -74,7 +74,7 @@ void RamMemDevice::write(const void* data, uint64_t addr, uint64_t size) { if ((addr & (wordSize_-1)) || (addr_end & (wordSize_-1)) || (addr_end > contents_.size())) { - std::cout << "RamMemDevice::write lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n"; + std::cout << "lookup of 0x" << std::hex << (addr_end-1) << std::dec << " failed.\n"; throw BadAddress(); } @@ -108,16 +108,14 @@ bool MemoryUnit::ADecoder::lookup(uint64_t addr, uint32_t wordSize, mem_accessor void MemoryUnit::ADecoder::map(uint64_t start, uint64_t end, MemDevice &md) { assert(end >= start); - //std::cout << "ADecoder: map() with start 0x" << std::hex << start << " and end 0x" << end << std::dec << " and md 0x" << &md << std::endl; entry_t entry{&md, start, end}; entries_.emplace_back(entry); } void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { mem_accessor_t ma; - //std::cout << "MemoryUnit::ADecoder::read init lookup of 0x" << std::hex << addr << std::dec << ".\n"; if (!this->lookup(addr, size, &ma)) { - std::cout << "MemoryUnit::ADecoder::read lookup of 0x" << std::hex << addr << std::dec << " failed.\n"; + std::cout << "lookup of 0x" << std::hex << addr << std::dec << " failed.\n"; throw BadAddress(); } ma.md->read(data, ma.addr, size); @@ -126,7 +124,7 @@ void MemoryUnit::ADecoder::read(void* data, uint64_t addr, uint64_t size) { void MemoryUnit::ADecoder::write(const void* data, uint64_t addr, uint64_t size) { mem_accessor_t ma; if (!this->lookup(addr, size, &ma)) { - std::cout << "MemoryUnit::ADecoder::write lookup of 0x" << std::hex << addr << std::dec << " failed.\n"; + std::cout << "lookup of 0x" << std::hex << addr << std::dec << " failed.\n"; throw BadAddress(); } ma.md->write(data, ma.addr, size); @@ -156,7 +154,6 @@ MemoryUnit::MemoryUnit(uint64_t pageSize) #endif void MemoryUnit::attach(MemDevice &m, uint64_t start, uint64_t end) { - //std::cout << "MemoryUnit: attach() with start 0x" << std::hex << start << " and end 0x" << end << std::dec << " and m 0x" << &m << std::endl; decoder_.map(start, end, m); } @@ -513,7 +510,6 @@ void RAM::loadBinImage(const char* filename, uint64_t destination) { std::ifstream ifs(filename); if (!ifs) { std::cerr << "Error: " << filename << " not found" << std::endl; - //std::cout << "loadBinImage Error: " << filename << " not found" << std::endl; std::abort(); } @@ -525,7 +521,6 @@ void RAM::loadBinImage(const char* filename, uint64_t destination) { this->clear(); this->write(content.data(), destination, size); - //std::cout << "Loaded binary image: " << filename << ", size: " << size << " bytes, destination: 0x" << std::hex << destination << std::dec << std::endl; } void RAM::loadHexImage(const char* filename) { diff --git a/sim/simx/cluster.cpp b/sim/simx/cluster.cpp index 84cba9f134..bab67233b8 100644 --- a/sim/simx/cluster.cpp +++ b/sim/simx/cluster.cpp @@ -103,12 +103,10 @@ void Cluster::set_satp(uint64_t satp) { #endif bool Cluster::running() const { - //std::cout << "Cluster: running()" << std::endl; for (auto& socket : sockets_) { if (socket->running()) return true; } - std::cout << "Cluster: running() returns false" << std::endl; return false; } diff --git a/sim/simx/core.cpp b/sim/simx/core.cpp index 770c86701a..55791907a3 100644 --- a/sim/simx/core.cpp +++ b/sim/simx/core.cpp @@ -440,7 +440,6 @@ int Core::get_exitcode() const { bool Core::running() const { if (emulator_.running() || !pending_instrs_.empty()) { - //std::cout << "Core::running() emulator running: " << emulator_.running() << ", pending_instrs size: " << pending_instrs_.size() << std::endl; #ifndef NDEBUG for (auto& trace : pending_instrs_) { DT(5, "pipeline-pending: " << *trace); @@ -448,7 +447,6 @@ bool Core::running() const { #endif return true; } - std::cout << "Core::running() returns false" << std::endl; return false; } diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 1028aba435..3eb62f9c76 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -130,7 +130,6 @@ void Emulator::reset() { void Emulator::attach_ram(RAM* ram) { // bind RAM to memory unit - //std::cout << "Emulator: attach_ram()" << std::endl; #if (XLEN == 64) mmu_.attach(*ram, 0, 0x7FFFFFFFFF); //39bit SV39 #else diff --git a/sim/simx/obj/common/util.o b/sim/simx/obj/common/util.o deleted file mode 100644 index 6e8785424bf8ab0f48eb0b6ff4ced8860499c1df..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 18136 zcmeHOeQ;aVm4C9VL`jIG@U?dM5+I5LRF(2+!%}3&N}h-UZtQ@!5EMn0KNj2a%5vgB zD517PR7H^8$!^)VZeUu)*yYc=zKG)G?db(n?fv+B@e=9>fV@VGhlTyB;* zcE~%r6<}<^>+yUb5F7c(yQ$eMT!4mSXp#5(QDghecp(q~6wjyJO?#`)Hp~JTbeQK# zc0(=Z$R6Jn>l&|o+r(e8kruqJFOvg*Gf?IOR|C{M}(LDX2nQu7^?cGI3`})9oPvvv)CK($Mf$q(~bsi0a18d$|(rl z!iUU#qk(R}dq)FXMVkhgMR`Y~1&aZ*NN|2HAvJ>EBUK`8Ul%%R$re`$ALA$bkn(_Pg^C2Ph01*EL?% zm_RUY#Sp_RO(&L@UOBN~a&mH~SL==dS$p6Hq{kXspgfixc zxEwDk^W5unYB51&AYxu}&Cx)+?gr_4-7d7dX=r!pJ|d!MK;P$oEM2j%9I4?prRR=n zp)bY_lP{u}=#M)NBc*W#1k*%S1S?-5HN~iS{tZlDF&Hu)af302 zDqxoydkq9ln&hr$BA0{@iS9#i2e5H@M;*|dqG&y(3dlb|+9A`H!ZBVDc~vP9ey%gW zD-eNJvBIT3a==(&Qw^Do!(LDzLj+)EFc5C8Y|R zVbWZn1<8<76e3P^#R@C^fFSiPp?Wi-tPnY979Xu90NqFjGig4Fd!3{z^?szLV3%jj%^o&^q z><-)lNwJYnyxtopp)gtw@0iPpqk#AO)q(9;KjMOfS5L$yaVg#%AoUPh&tYc|p?9}p z|Ekic2Hi^AErGgt{uOzH`8f_~9M9 z9-i~adFTXVm*XBO5RJk;#-yK>K(g+~K-g224s(ZE*JGNY1E(~?n%l1x)+kt-}_ zg+=2=NA|9lvV_&jvS=&W5@_cuq|_D;NzRD=I_a9)D}`6b1H&ahCl`m$N|$L4n6w48 zQ11bnnd+78lJ5yI)UEVfkI5?0$ggH`CAQF+w7xZFA%tzwZ93jL_}+VylgiQJ1+|IN z6a?4L&Q!7D3UI}uVxgogr0;{Jn*vw?_yGW|Rhg2KC`{omWzxcxw9DTn(`h;UN?BFS ztCS&W@1z}}w9|RV7SeWtDY~)c(bh73zbkC=k33Z^be(wq!^ug#Kdr_|cWeUF77;M< z!AI29Ot-s*gVFx971@;br=3*i-;d|vbCh6V$NT;pcKiW$Nh%ka4bTid-cg#yBUlKy zqp)r{rDGj%Bi5l1QnuR>ct()6zz#bC+o_Fuoa%zxY^)t-1|A7gj_+*-VHitirzbwwZrg8b+!^XQE;wPNEPc zrqCGCVinnzKq#L7MBVHbhEw~dJiVAmE@Ehb_RaUjzL~=6l=sa$VG8VC% z`>6C2b5n#!k5Z@0iTj1iH-j7enKlH9PfClWCo#E!kULU z==>f>P>3#&(BO$DKA4=8&s*<$GZCM6fs{Lu{#fHY?sk1bs>LZO!F}=$k~B`bTmTmk z$~Tdl3kumQCXpv!8x8CyO6sE}flcV1ippQubBG$w+vp~8Vn}W>PIYP3{?9C=4B07* z`2TxeDIN}DQK{&ND(!g_4Z=O+X);mt~br%@^-UfUv4fQgWAjcj0(Nt zwe@nboL2~w5-%!@A70CRENK+KsOE=8`J^SCpm?3)7gdQc|K?-e;I~e2#{+CE|7*Fv z>inpmX=(Y@s{s9I&7Z4jZ`NrV?542LIa7j^*uiW1kuO%<c z!b_qq|EOu`{Y!C?+jAj;4EGhe_tLf^&V{=w?4~8zAuYSSaTMjoAtaV;qii*?0jC*# zWO?o7QtRK#52D!Q^^@?F^Be8{uiEwUeB-y(u0PB-el^qe!hGZ4OxI868++WYAI>-aVUFtq z^v-ep<9x?Wb6ueS&$)zu!{eH$HSX}Zep72a+~@j9t?`<#;k)yVH|Dt>u65jX zoa???W83jAAP=43`u2QBv6e>NQ%g9uR;kxK$E*Lm#j6gM&pFxIZ%}2RqeQDjo}#f zIZirerh6wuhJc@}`o^hl^7wXCHG68dIh#HHk?NJ6P}NnlJ^n^dO{2%xKQ^ zCQNnI;~S~Ea<<34a*pY78v#HAs-O5%|2E<|;=IJ;zf5py1f36EbljjKJRfxpu6qeU zG3h5xyf=Yb_U$JOSiz)s@AL#q zrLs(n9+|mq#tv7p`X=Yi6f?92i)jD~=`VCZO}dTaqA~_Ue7bJUQnzR3IQ!%=88tR$B(&aQ}S`hi&ql)n# zXB19@ykvTD8ndQ@K%C+W%JeR0$s4EkISubr0C&TP%oNN=}|3U-qVHQhedLNB*9`xvtnUTcxyQMlDVjuW`}v5X)) zA3F^rz&TB-By&>svJ`q!p?qO2oi_Nt*x=u_!Jo3hUjlwCG#yuhMZQD(V;g!GR3~~p zwU8QjCh$u7pQGqQ;73L0!4)?2tAL-u{MnSljO{k`du;G0ZSW6l@Y$GBCAljFPT5cv z?eC+&$qo@UFUq-8?nZrjRoN2SBgcW4QAta`zp%kSu)!PPn3ec^%?5wW27lTHKLs0X zCH_lo@at{xzq7%=V>7Q8ZRk%%>a3*CIvczfIIYV7=0}D08!CHj=-;xzPey94B==$) z{3aXx2^)MC9HSC{(+0oE2LI3oKMPwzCH|M&;8`2|AshS!8~i03+=CDLmGtr3;2|6Q zDjVDw%JuXHQv(A=;;Wam=9VQAso~*pIJ}TQIL1<~xo|kuoy;b3*{FqVq8Ch-#u zFXWGuu@*oZ+Y+tI60Ny~q0UTpQ!?9;=*gwC$y^45lU>1{!E7=ZB56xEX0o~Ta5$99 zB)ZeX^pM)LL`aYRbug9b?chYqvgyH0@5XeZv!^$m7)a*2qia?jg~CzjCYcB0%R>f! z!t}F{ewNVBVv8Y`a6@bu>a@2-^OERBBe61mX;Wi7ap{T`t20`3XOflJ;ZVn3# zj!yocHaH|De_AXU&6fwW>CRvx(KS4r7)WOaGyPbA+~&ka5l5}L`ku^S0)Coau1(pR zYmk16o=P4|^KWb86IS#Y`Kk!NFkYpRGVYcAK10ajp8lR(w59TzgcCxEDXV@|N~lF# zE*BBPmL?K34NM@>(?6Kb<|@r2x(1Ysq>&VL4DlLDL|d08T2{3#N{Hxb%}^LBHhocy z%CsG2JcmQzluZxxCR1s|6#|f47Hd#izGk_6fIe>?CMA=Dg9u%u#n#1tiiRCMg9Dkt z^t6chd}|}4Vr~DXp8k%+n)KjMUz#SX{ru0F&V?&uF&8V3@3ePb>2XI+i^E8%}@8#D;WNPk$nn%%!?}Pm!@B(aO-^6+u(x?=lbtr{OcJ1TNypK&%KPEWNUr)FnYd@FEILg zMt>qr2*oGMxL>&lPU)B6l6q|b6(TCNlC8>kS@=Q~^Bq-QfOO+Sy}d|kf9 z_;7uGVS^uLe4>p1nRq`zh2(Pl5*vIo!?|DGp>UcPkGpR(`m@oe=M~3$3o1mv0++`B zis5`-4>Eiuqko0rbqxQ5!maai&jJyO6+g)a4>FwVzeeGtKbPCfaFU_t^#tR?*YQklWdLO&-n9o{~4p_>;4YIdAuCw0aFy3FFzMGFr1%@HZh#r=LLn6j@%B186H8O z)_;x{Hz-zn)+wCy;r3a=aBiP94CnUQ#Q1aj>|*rXJ`XXR+vh)Q@Ru2Xp0_`;!M#2) z-D!F642JXkwnE|7I9kteKHtX}pAbZ8JO7H&^Z9wmRdYr*w*siXzJSk1rBg10Grvjx9W;bRv328I8v1^*|7 z-)_MlRQNwz@W<7>9<<=+D*7K=@I4CuxdmUN`25m>k171WEcmmE&)XLKE=8}`pM0Fh z#d)fqmYK&XJ+n|DdcI#b3sm|o^=W_j-{!CFVbxR1CHw-Wk4hUNzW$Qo+TV%(LWZA? zHWk9p#ii#(&puRWt~?K1EKo_$^8nYA^VjiV)pIlB!|kcpndn#J()#RB^j3ZJIukwD z=Uzom_$9bBA03y3b3Gqs^p`UF#~9wia2?0Q=PL|Pd#rV)(lZ4>DZ)q1IFHJM)x$!sjshE{1bH zI~YEj(f>WeiO(sj?{0>Z4E??TK86!L9 zF1f)7;*G7WyLBs@?o9^qB{*jUJCeDi5!^61Xautvx*_s#uzu<-KG~MW1jWnQ()W@h zfAcKfkN!FM&?h{`XQ;oo=jwFtW+T{@!l&8cRC*wXFW(z_(~3$=3;GAe$H(BOeci*C z>b|~oKMaGv(@qER2b^CNA9&S$SN~9OAdAll*&M_Plcc*7o!KNlwgkJfnV|t~{iCk` z27aNo`YAjj>h5?|(|$)6Y8(9;{o-2MY=n2>6orjbYDPC@~MxPwa4O5?w-#*=+`(c^SY>qPAtDy}u{ooc+P0aaV=PkV*1zh(S> z+N4m{i(lzGlsC2h+WwS>E6IOL$=7*Dv6=R_GT%f;CHZyqk`-lGR>J=%Us>h<4w#Uy z=ybJ$UjCz0Tjr(hCVLIzC8EI<(SWM&RPyN`x2cf-kttP z;$=1l(|Dc7b)TOe9#OPBc{lyug~65V7ox)iie4wupVnUU)A&*)KSac=)G7HCAH0yR zRCIlj8ZXiEZ<`ulMr2Em)A*$T=UE*mSKdi>running()) return true; } - std::cout << "Socket: running() returns false" << std::endl; return false; } From 32653581c96d8759dca8e581c1e02d84d7abd80a Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Wed, 12 Nov 2025 16:19:28 -0500 Subject: [PATCH 06/15] revert all memory based changes + makefile update --- .gitignore | 2 - sim/simx/Makefile | 34 ++++-- sim/simx/VortexGPGPU.cpp | 96 +--------------- sim/simx/VortexGPGPU.h | 24 +--- sim/simx/processor.cpp | 6 +- sim/simx/vortex_simulator.cpp | 204 ---------------------------------- sim/simx/vortex_simulator.h | 24 ---- 7 files changed, 36 insertions(+), 354 deletions(-) diff --git a/.gitignore b/.gitignore index 41d5fd961a..43388e9cb5 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,3 @@ /.vscode *.cache *.code-workspace -sim/simx/simx_config.stamp -sim/simx/obj/common/*.o diff --git a/sim/simx/Makefile b/sim/simx/Makefile index 22032d7444..e2a7f65721 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -1,6 +1,8 @@ include ../common.mk DESTDIR ?= $(CURDIR) +USE_SST ?= 0 +SST_PKG ?= SST-14.1 # default SST package name OBJ_DIR = $(DESTDIR)/obj CONFIG_FILE = $(DESTDIR)/simx_config.stamp @@ -27,13 +29,13 @@ SRCS += $(SRC_DIR)/execute.cpp $(SRC_DIR)/func_unit.cpp SRCS += $(SRC_DIR)/cache_sim.cpp $(SRC_DIR)/mem_sim.cpp $(SRC_DIR)/local_mem.cpp $(SRC_DIR)/mem_coalescer.cpp SRCS += $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp +ifeq ($(USE_SST), 1) VORTEX_SST_SRCS := $(SRC_DIR)/vortex_simulator.cpp VORTEX_SST_SRCS += $(SRC_DIR)/VortexGPGPU.cpp - -# SST flags -SST_CFLAGS := $(shell pkg-config --cflags SST-14.1) -SST_CFLAGS += -I../../../sst/sst/sst-core/include/ -SST_LFLAGS := $(shell pkg-config --libs SST-14.1) +SST_CFLAGS := $(shell pkg-config --cflags $(SST_PKG)) +SST_LFLAGS := $(shell pkg-config --libs $(SST_PKG)) +CXXFLAGS += $(SST_CFLAGS) -DUSE_SST +endif # Add V extension sources ifneq ($(findstring -DEXT_V_ENABLE, $(CONFIGS)),) @@ -65,11 +67,16 @@ COMMON_SRCS := $(filter $(SW_COMMON_DIR)/%.cpp,$(SRCS)) SRC_SRCS := $(filter $(SRC_DIR)/%.cpp,$(SRCS)) COMMON_OBJS := $(patsubst $(SW_COMMON_DIR)/%.cpp,$(OBJ_DIR)/common/%.o,$(COMMON_SRCS)) SRC_OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(SRC_SRCS)) -VORTEX_SST_OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(VORTEX_SST_SRCS)) OBJS := $(COMMON_OBJS) $(SRC_OBJS) MAIN_OBJ := $(OBJ_DIR)/main.o -DEPS := $(OBJS:.o=.d) $(MAIN_OBJ:.o=.d) $(VORTEX_SST_OBJS:.o=.d) +DEPS := $(OBJS:.o=.d) $(MAIN_OBJ:.o=.d) + +ifeq ($(USE_SST), 1) +VORTEX_SST_OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(VORTEX_SST_SRCS)) +DEPS += $(VORTEX_SST_OBJS:.o=.d) +endif + # optional: pipe through ccache if you have it CXX := $(if $(shell which ccache),ccache $(CXX),$(CXX)) @@ -79,11 +86,11 @@ VORTEX_LIB := libvortex.so .PHONY: all force clean clean-lib clean-exe clean-obj libvortex clean-libvortex -#ifdef USE_SST +ifeq ($(USE_SST), 1) all: $(DESTDIR)/$(PROJECT) $(DESTDIR)/$(VORTEX_LIB) -#else -#all: $(DESTDIR)/$(PROJECT) -#endif +else +all: $(DESTDIR)/$(PROJECT) +endif # build common object files $(OBJ_DIR)/common/%.o: $(SW_COMMON_DIR)/%.cpp $(CONFIG_FILE) @@ -96,9 +103,11 @@ $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp $(CONFIG_FILE) $(CXX) $(CXXFLAGS) -c $< -o $@ # build SST-specific source object files +ifeq ($(USE_SST), 1) $(VORTEX_SST_OBJS): $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp $(CONFIG_FILE) @mkdir -p $(@D) $(CXX) $(CXXFLAGS) $(SST_CFLAGS) -c $< -o $@ +endif # build main object file $(MAIN_OBJ): $(SRC_DIR)/main.cpp $(CONFIG_FILE) @@ -113,6 +122,8 @@ $(DESTDIR)/$(PROJECT): $(OBJS) $(MAIN_OBJ) $(DESTDIR)/lib$(PROJECT).so: $(OBJS) $(CXX) $(CXXFLAGS) $^ -shared $(LDFLAGS) -o $@ +# Vortex SST simulator component shared library +ifeq ($(USE_SST), 1) libvortex: $(DESTDIR)/$(VORTEX_LIB) $(DESTDIR)/$(VORTEX_LIB): $(OBJS) $(VORTEX_SST_OBJS) @@ -121,6 +132,7 @@ $(DESTDIR)/$(VORTEX_LIB): $(OBJS) $(VORTEX_SST_OBJS) $(OBJS) $(VORTEX_SST_SRCS) \ -shared -o $@ \ $(LDFLAGS) $(SST_LFLAGS) +endif # updates the timestamp when flags changed. $(CONFIG_FILE): force diff --git a/sim/simx/VortexGPGPU.cpp b/sim/simx/VortexGPGPU.cpp index 536e0e1583..84583ca6ce 100644 --- a/sim/simx/VortexGPGPU.cpp +++ b/sim/simx/VortexGPGPU.cpp @@ -1,8 +1,5 @@ #include #include "VortexGPGPU.h" -#ifdef USE_SST_MEM -#include "mem_backend_sst.h" // needed for vx_register_submit and vx_on_mem_complete -#endif #include #include #include @@ -10,19 +7,12 @@ using namespace SST; using namespace SST::Vortex; -#ifdef USE_SST_MEM -using SST::Interfaces::StandardMem; -#endif - -#ifdef USE_SST_MEM -VortexGPGPU *VortexGPGPU::instance_ = nullptr; -#endif VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) : Component(id), sim_(std::make_unique()) { - std::cout << "VortexGPGPU: initializing Vortex GPGPU simulator\n"; + std::cout << "VortexGPGPU Component: Initializing Vortex GPGPU simulator\n"; // Parameter: clock frequency (default 1GHz) std::string clockfreq = params.find("clock", "1GHz"); @@ -32,53 +22,10 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) //const uint32_t launch_bytes = params.find("launch_bytes", kDefaultLaunchBytes); // required when launch descriptor is used -#ifdef USE_SST_MEM - // Create StandardMem interface; auto-bind to port name "memIface" - memIface_ = loadUserSubComponent( - "memIface", ComponentInfo::SHARE_NONE, - registerClock(clockfreq, - new SST::Clock::Handler(this, &VortexGPGPU::clockTick)), - new StandardMem::Handler(this, &VortexGPGPU::handleMemResp)); - if (!memIface_) { - SST::Output out; - out.fatal(CALL_INFO, -1, "VortexGPGPU: failed to load memIface StandardMem port\n"); - } -#else - // No SST memory: just register our clock handler + // Register our clock handler with SST registerClock(clockfreq, new SST::Clock::Handler(this, &VortexGPGPU::clockTick)); -#endif - -#ifdef USE_SST_MEM - // Register callback so SimX can submit memory to SST - instance_ = this; - // Track app-specific tags by StandardMem request-id - // (e.g., inside your instance_ type) - - vx_register_submit(+[](uint64_t addr, bool write, uint32_t size, uint64_t tag) { - - StandardMem::Request* req = nullptr; - - if (write) { - std::vector zeros(static_cast(size), 0); - // posted=false so we get a WriteResp - req = new StandardMem::Write(static_cast(addr), - static_cast(size), - std::move(zeros), - /*posted=*/false); - } else { - req = new StandardMem::Read(static_cast(addr), - static_cast(size)); - } - - // Use the StandardMem-assigned ID to correlate responses - const auto id = req->getID(); - instance_->tag_by_id.emplace(id, tag); - - instance_->memIface_->send(req); - }); -#endif // Load the kernel image if (!sim_->init(kernel)) { @@ -86,22 +33,9 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) out.fatal(CALL_INFO, -1, "VortexSimulator init failed\n"); } else{ - std::cout << "VortexGPGPU: loaded kernel: " << kernel << std::endl; + std::cout << "VortexGPGPU Component: loaded kernel: " << kernel << std::endl; } - // needed when launch descriptor is used - /* - if (!sim_->allocateMemory(launch_bytes, 64, true, true, &launch_desc_addr_)) { - SST::Output out; - out.fatal(CALL_INFO, -1, - "VortexGPGPU: unable to allocate launch descriptor (%u bytes)\n", - launch_bytes); - } - std::vector launch_payload(launch_bytes, 0); - sim_->writeMemory(launch_desc_addr_, launch_payload.data(), launch_payload.size()); - sim_->setStartupArg(launch_desc_addr_); - */ - registerAsPrimaryComponent(); primaryComponentDoNotEndSim(); } @@ -111,33 +45,13 @@ VortexGPGPU::~VortexGPGPU() = default; void VortexGPGPU::setup() {} void VortexGPGPU::finish() {} +// Advance the GPU execution one cycle based on SST clock handler callback bool VortexGPGPU::clockTick(SST::Cycle_t cycle) { - // Advance the GPU one cycle - //std::cout << "VortexGPGPU: clockTick came from SST " << std::endl; bool running = sim_->cycle(); - //std::cout << "VortexGPGPU cycle returned: " << running << std::endl; if (!running) { primaryComponentOKToEndSim(); - std::cout << "VortexGPGPU: simulation finished\n"; + std::cout << "VortexGPGPU Component: simulation finished\n"; return true; } - //std::cout << "VortexGPGPU clockTick returns false " << std::endl; return false; } - -#ifdef USE_SST_MEM -void VortexGPGPU::handleMemResp(StandardMem::Request *req) { - // Inform SimX that this request has completed - const auto id = req->getID(); - const auto it = tag_by_id.find(id); - if (it == tag_by_id.end()) { - SST::Output out; - out.fatal(CALL_INFO, -1, "VortexGPGPU: received response with unknown ID %lu\n", id); - } - else{ - vx_on_mem_complete(it->second); - tag_by_id.erase(it); - } - delete req; -} -#endif diff --git a/sim/simx/VortexGPGPU.h b/sim/simx/VortexGPGPU.h index 61acbaa40f..1421bd0c10 100644 --- a/sim/simx/VortexGPGPU.h +++ b/sim/simx/VortexGPGPU.h @@ -1,7 +1,6 @@ // VortexGPGPU.h #pragma once #include -//#include #include #include #include "vortex_simulator.h" // wrapper around SimX @@ -21,18 +20,19 @@ class VortexGPGPU : public SST::Component { // Register with SST SST_ELI_REGISTER_COMPONENT( VortexGPGPU, - "vortex", // element library name - "VortexGPGPU", // component name + "vortex", // Element library name + "VortexGPGPU", // Component name SST_ELI_ELEMENT_VERSION(1,0,0), "Vortex GPGPU Simulator", COMPONENT_CATEGORY_PROCESSOR ) + SST_ELI_DOCUMENT_PARAMS( {"clock", "Clock frequency", "1GHz"}, - {"program", "Path to the kernel or ELF to load (defaults to built-in test image)", ""}, - {"launch_bytes", "Size in bytes of the default launch descriptor", "64"} + {"program", "Path to the kernel or ELF to load", ""}, ) + // for future usage with SST memory SST_ELI_DOCUMENT_SUBCOMPONENT_SLOTS( {"memIface", "StandardMem interface to memory hierarchy", "SST::Interfaces::StandardMem"} ) @@ -42,20 +42,6 @@ class VortexGPGPU : public SST::Component { bool clockTick(SST::Cycle_t cycle); std::unique_ptr sim_; - - //uint64_t launch_desc_addr_ = 0; // required only when launch descriptor is required - - #ifdef USE_SST_MEM - void handleMemResp(SST::Interfaces::StandardMem::Request* req); - - // static pointer used by lambda in vx_register_submit() - static VortexGPGPU* instance_; - - SST::Interfaces::StandardMem* memIface_; - std::unordered_map tag_by_id; - //#else - SST::Interfaces::StandardMem* memIface_ = nullptr; - #endif }; } // namespace Vortex diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index acded6dd65..114b275d03 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -159,6 +159,7 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const { return perf; } +// Advance the simulation by one cycle for SST - code adapted from run() method bool ProcessorImpl::cycle() { if (!is_cycle_initialized_) { std::cout << "ProcessorImpl: Initializing cycle()\n"; @@ -166,7 +167,7 @@ bool ProcessorImpl::cycle() { this->reset(); is_cycle_initialized_ = true; } - //std::cout << "ProcessorImpl: cycle()" << std::endl; + SimPlatform::instance().tick(); bool anyRunning = false; for (auto cluster : clusters_) { @@ -176,7 +177,6 @@ bool ProcessorImpl::cycle() { } } perf_mem_latency_ += perf_mem_pending_reads_; - //std::cout << "ProcessorImpl: cycle() - returns: " << anyRunning << std::endl; return anyRunning; } @@ -217,9 +217,9 @@ void Processor::dcr_write(uint32_t addr, uint32_t value) { return impl_->dcr_write(addr, value); } +// advance the simulation by one cycle for SST bool Processor::cycle() { try { - //std::cout << "Processor: cycle()" << std::endl; return impl_->cycle(); } catch (...) { return false; diff --git a/sim/simx/vortex_simulator.cpp b/sim/simx/vortex_simulator.cpp index 5ace3cede5..f339f434d8 100644 --- a/sim/simx/vortex_simulator.cpp +++ b/sim/simx/vortex_simulator.cpp @@ -18,33 +18,11 @@ VortexSimulator::VortexSimulator() : arch_(NUM_THREADS, NUM_WARPS, NUM_CORES) , ram_(0, MEM_PAGE_SIZE) , proc_(std::make_unique(arch_)) -// , kernel_image_{} -// , next_alloc_addr_(kAllocBaseAddr) , halted_(true) {} bool VortexSimulator::init(const std::string& kernelPath) { proc_->attach_ram(&ram_); - // kernel_image_ = {}; - // next_alloc_addr_ = kAllocBaseAddr; - // ram_.clear(); - // ram_.set_acl(0, kGlobalMemSize, 0); - - // can be used when launch descriptor is required - /* bool has_kernel = false; - if (!kernelPath.empty()) { - auto image_info = this->loadKernelImage(kernelPath); - if (!image_info) - return false; - kernel_image_ = *image_info; - has_kernel = true; - } */ - - // Program base DCRs - align startup to loaded kernel when provided - /* uint64_t startup = STARTUP_ADDR; - if (has_kernel) - startup = kernel_image_.base_addr; */ - // setup base DCRs const uint64_t startup_addr(STARTUP_ADDR); proc_->dcr_write(VX_DCR_BASE_STARTUP_ADDR0, startup_addr & 0xffffffff); @@ -74,11 +52,9 @@ bool VortexSimulator::init(const std::string& kernelPath) { bool VortexSimulator::cycle() { if (halted_) return false; -//std::cout << "VortexSimulator: cycle()" << std::endl; // Advance one cycle through the processor interface bool running = proc_->cycle(); halted_ = !running; -//std::cout << "VortexSimulator: cycle() returns " << running << std::endl; return running; } @@ -86,184 +62,4 @@ bool VortexSimulator::isHalted() const { return halted_; } -// Required when using launch descriptor and SST memory -/* bool VortexSimulator::allocateMemory(uint64_t size, uint64_t alignment, bool readable, bool writable, uint64_t* addr_out) { - if (addr_out == nullptr || size == 0) - return false; - - alignment = normalizeAlignment(alignment); - uint64_t base = alignUp(next_alloc_addr_, alignment); - uint64_t end = base + size; - if (end > kGlobalMemSize) - return false; - - uint64_t acl_start = alignDown(base, RAM_PAGE_SIZE); - uint64_t acl_end = alignUp(end, RAM_PAGE_SIZE); - if (acl_end > kGlobalMemSize) - return false; - - int flags = 0; - if (readable) flags |= 0x1; - if (writable) flags |= 0x2; - if (flags != 0) - ram_.set_acl(acl_start, acl_end - acl_start, flags); - - *addr_out = base; - next_alloc_addr_ = std::max(next_alloc_addr_, acl_end); - return true; -} - -bool VortexSimulator::reserveMemory(uint64_t addr, uint64_t size, bool readable, bool writable) { - if (size == 0) - return false; - - uint64_t acl_start = alignDown(addr, RAM_PAGE_SIZE); - uint64_t acl_end = alignUp(addr + size, RAM_PAGE_SIZE); - if (acl_end > kGlobalMemSize) - return false; - - int flags = 0; - if (readable) flags |= 0x1; - if (writable) flags |= 0x2; - ram_.set_acl(acl_start, acl_end - acl_start, flags); - - if (acl_end > next_alloc_addr_) - next_alloc_addr_ = acl_end; - return true; -} - -void VortexSimulator::setMemoryPermissions(uint64_t addr, uint64_t size, bool readable, bool writable) { - if (size == 0) - return; - uint64_t acl_start = alignDown(addr, RAM_PAGE_SIZE); - uint64_t acl_end = alignUp(addr + size, RAM_PAGE_SIZE); - int flags = 0; - if (readable) flags |= 0x1; - if (writable) flags |= 0x2; - ram_.set_acl(acl_start, acl_end - acl_start, flags); -} - -void VortexSimulator::writeMemory(uint64_t addr, const void* data, uint64_t size) { - if (data == nullptr || size == 0) - return; - ram_.write(data, addr, size); -} - -void VortexSimulator::setStartupArg(uint64_t arg_addr) { - proc_->dcr_write(VX_DCR_BASE_STARTUP_ARG0, static_cast(arg_addr & 0xffffffffu)); -#if (XLEN == 64) - proc_->dcr_write(VX_DCR_BASE_STARTUP_ARG1, static_cast(arg_addr >> 32)); -#endif -} - -std::optional VortexSimulator::loadKernelImage(const std::string& path) { - KernelImageInfo info{}; - - if (path.empty()) - return info; - - const auto ext = getFileExt(path); - if (ext == "bin") { - std::ifstream ifs(path, std::ios::binary); - if (!ifs) - return std::nullopt; - - ifs.seekg(0, std::ios::end); - const uint64_t size = static_cast(ifs.tellg()); - ifs.seekg(0, std::ios::beg); - std::vector payload(size); - if (size && !ifs.read(reinterpret_cast(payload.data()), size)) - return std::nullopt; - - if (!reserveMemory(STARTUP_ADDR, size, true, true)) - return std::nullopt; - writeMemory(STARTUP_ADDR, payload.data(), size); - setMemoryPermissions(STARTUP_ADDR, size, true, false); - - info.base_addr = STARTUP_ADDR; - info.size_bytes = size; - return info; - } - - if (ext == "hex") { - ram_.loadHexImage(path.c_str()); - info.base_addr = STARTUP_ADDR; - info.size_bytes = 0; - return info; - } - - if (ext == "vxbin") { - std::ifstream ifs(path, std::ios::binary); - if (!ifs) - return std::nullopt; - - uint64_t min_vma = 0; - uint64_t max_vma = 0; - - ifs.read(reinterpret_cast(&min_vma), sizeof(uint64_t)); - ifs.read(reinterpret_cast(&max_vma), sizeof(uint64_t)); - if (!ifs || max_vma < min_vma) - return std::nullopt; - - constexpr size_t header_bytes = sizeof(uint64_t) * 2; - ifs.seekg(0, std::ios::end); - const size_t file_size = static_cast(ifs.tellg()); - if (file_size < header_bytes) - return std::nullopt; - - const uint64_t payload_size = static_cast(file_size - header_bytes); - const uint64_t image_span = max_vma - min_vma; - if (image_span == 0) - return std::nullopt; - ifs.seekg(header_bytes, std::ios::beg); - - std::vector payload(payload_size); - if (payload_size && !ifs.read(reinterpret_cast(payload.data()), payload_size)) - return std::nullopt; - - if (!reserveMemory(min_vma, image_span, true, true)) - return std::nullopt; - if (payload_size) - writeMemory(min_vma, payload.data(), payload_size); - if (image_span > payload_size) { - std::vector zeros(static_cast(image_span - payload_size), 0); - writeMemory(min_vma + payload_size, zeros.data(), zeros.size()); - } - setMemoryPermissions(min_vma, image_span, true, false); - - info.base_addr = min_vma; - info.size_bytes = image_span; - return info; - } - - return std::nullopt; -} - -uint64_t VortexSimulator::alignUp(uint64_t value, uint64_t alignment) { - return (value + alignment - 1) & ~(alignment - 1); -} - -uint64_t VortexSimulator::alignDown(uint64_t value, uint64_t alignment) { - return value & ~(alignment - 1); -} - -uint64_t VortexSimulator::normalizeAlignment(uint64_t alignment) { - if (alignment == 0) - alignment = kDefaultAlignment; - if (alignment < kDefaultAlignment) - alignment = kDefaultAlignment; - if ((alignment & (alignment - 1)) == 0) - return alignment; - - alignment--; - alignment |= alignment >> 1; - alignment |= alignment >> 2; - alignment |= alignment >> 4; - alignment |= alignment >> 8; - alignment |= alignment >> 16; - alignment |= alignment >> 32; - alignment++; - return alignment; -} */ - } // namespace vortex diff --git a/sim/simx/vortex_simulator.h b/sim/simx/vortex_simulator.h index b0c890f593..028b2cc795 100644 --- a/sim/simx/vortex_simulator.h +++ b/sim/simx/vortex_simulator.h @@ -27,18 +27,6 @@ class VortexSimulator { */ bool init(const std::string& kernelPath); - // changes to substitute for run-time wrt memory setup - required when using launch descriptor and SST memory -/* const KernelImageInfo& kernelImage() const { return kernel_image_; } - bool allocateMemory(uint64_t size, uint64_t alignment, bool readable, bool writable, uint64_t* addr_out); - bool reserveMemory(uint64_t addr, uint64_t size, bool readable, bool writable); - void setMemoryPermissions(uint64_t addr, uint64_t size, bool readable, bool writable); - void writeMemory(uint64_t addr, const void* data, uint64_t size); - - RAM& ram() { return ram_; } - const RAM& ram() const { return ram_; } - - void setStartupArg(uint64_t arg_addr); */ - /** * Advances the simulation by one cycle. Returns false once the * simulation has completed (i.e. all clusters are halted). @@ -49,18 +37,6 @@ class VortexSimulator { bool isHalted() const; private: - // required when using launch descriptor and SST memory - /* static constexpr uint64_t kGlobalMemSize = (XLEN == 64) ? 0x200000000ull : 0x100000000ull; - static constexpr uint64_t kAllocBaseAddr = USER_BASE_ADDR; - static constexpr uint64_t kDefaultAlignment = 64ull; - - static uint64_t alignUp(uint64_t value, uint64_t alignment); - static uint64_t alignDown(uint64_t value, uint64_t alignment); - static uint64_t normalizeAlignment(uint64_t alignment); - - std::optional loadKernelImage(const std::string& path); - KernelImageInfo kernel_image_; - uint64_t next_alloc_addr_;*/ Arch arch_; RAM ram_; From edf3c0870f23a2a3eec29a52c8243b1d1b7e6806 Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Fri, 14 Nov 2025 08:18:53 -0500 Subject: [PATCH 07/15] addressing Saurabh's review comments --- sim/simx/Makefile | 7 +++---- sim/simx/VortexGPGPU.cpp | 5 +---- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/sim/simx/Makefile b/sim/simx/Makefile index e2a7f65721..480d14745e 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -72,6 +72,9 @@ MAIN_OBJ := $(OBJ_DIR)/main.o DEPS := $(OBJS:.o=.d) $(MAIN_OBJ:.o=.d) +# generate .d files alongside .o files +CXXFLAGS += -MMD -MP -MF $(@:.o=.d) + ifeq ($(USE_SST), 1) VORTEX_SST_OBJS := $(patsubst $(SRC_DIR)/%.cpp,$(OBJ_DIR)/%.o,$(VORTEX_SST_SRCS)) DEPS += $(VORTEX_SST_OBJS:.o=.d) @@ -103,11 +106,9 @@ $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp $(CONFIG_FILE) $(CXX) $(CXXFLAGS) -c $< -o $@ # build SST-specific source object files -ifeq ($(USE_SST), 1) $(VORTEX_SST_OBJS): $(OBJ_DIR)/%.o: $(SRC_DIR)/%.cpp $(CONFIG_FILE) @mkdir -p $(@D) $(CXX) $(CXXFLAGS) $(SST_CFLAGS) -c $< -o $@ -endif # build main object file $(MAIN_OBJ): $(SRC_DIR)/main.cpp $(CONFIG_FILE) @@ -123,7 +124,6 @@ $(DESTDIR)/lib$(PROJECT).so: $(OBJS) $(CXX) $(CXXFLAGS) $^ -shared $(LDFLAGS) -o $@ # Vortex SST simulator component shared library -ifeq ($(USE_SST), 1) libvortex: $(DESTDIR)/$(VORTEX_LIB) $(DESTDIR)/$(VORTEX_LIB): $(OBJS) $(VORTEX_SST_OBJS) @@ -132,7 +132,6 @@ $(DESTDIR)/$(VORTEX_LIB): $(OBJS) $(VORTEX_SST_OBJS) $(OBJS) $(VORTEX_SST_SRCS) \ -shared -o $@ \ $(LDFLAGS) $(SST_LFLAGS) -endif # updates the timestamp when flags changed. $(CONFIG_FILE): force diff --git a/sim/simx/VortexGPGPU.cpp b/sim/simx/VortexGPGPU.cpp index 84583ca6ce..eff4af33d8 100644 --- a/sim/simx/VortexGPGPU.cpp +++ b/sim/simx/VortexGPGPU.cpp @@ -18,10 +18,7 @@ VortexGPGPU::VortexGPGPU(ComponentId_t id, Params ¶ms) std::string clockfreq = params.find("clock", "1GHz"); // Parameter: program path - std::string kernel = params.find("program", "/nethome/jsubburayan3/vortex/build/tests/kernel/hello/hello.bin"); - - //const uint32_t launch_bytes = params.find("launch_bytes", kDefaultLaunchBytes); // required when launch descriptor is used - + std::string kernel = params.find("program", ""); // Register our clock handler with SST registerClock(clockfreq, From a538f61e32dec120b032e67e23b40b08eda34cc4 Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Wed, 3 Dec 2025 03:40:45 -0500 Subject: [PATCH 08/15] CI changes --- .github/workflows/ci.yml | 2 + ci/regression.sh.in | 17 +++++++ ci/sst_install.sh.in | 85 +++++++++++++++++++++++++++++++++ ci/sst_test_vortex_conform.py | 7 +++ ci/sst_test_vortex_fibonacci.py | 7 +++ ci/sst_test_vortex_hello.py | 7 +++ ci/sst_test_vortex_vecadd.py | 7 +++ sim/simx/Makefile | 6 +-- 8 files changed, 135 insertions(+), 3 deletions(-) create mode 100755 ci/sst_install.sh.in create mode 100644 ci/sst_test_vortex_conform.py create mode 100644 ci/sst_test_vortex_fibonacci.py create mode 100644 ci/sst_test_vortex_hello.py create mode 100644 ci/sst_test_vortex_vecadd.py diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d4ba58a14b..3033e5113d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,6 +59,7 @@ jobs: cd build ../configure --tooldir=$TOOLDIR ci/toolchain_install.sh --all + ci/sst_install.sh - name: Setup Third Party if: steps.cache-thirdparty.outputs.cache-hit != 'true' @@ -167,6 +168,7 @@ jobs: ./ci/regression.sh --isa ./ci/regression.sh --kernel ./ci/regression.sh --regression + ./ci/regression.sh --sst else ./ci/regression.sh --${{ matrix.name }} fi diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 90a46b83d1..4d28f8516d 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -78,6 +78,23 @@ kernel() echo "kernel tests done!" } +sst() +{ + echo "begin sst tests..." + + make -C sim/simx USE_SST=1 + make -C tests/kernel + + cp sim/simx/libvortex.so $SST_ELEMENTS_HOME/lib/sst-elements-library/ # alternatively - $ sst --add-lib-path `pwd` myConfig.py + + sst test_vortex_hello.py + sst test_vortex_fibonacci.py + sst test_vortex_vecadd.py + sst test_vortex.py + + echo "sst tests done!" +} + regression() { echo "begin regression tests..." diff --git a/ci/sst_install.sh.in b/ci/sst_install.sh.in new file mode 100755 index 0000000000..f6c581d0f4 --- /dev/null +++ b/ci/sst_install.sh.in @@ -0,0 +1,85 @@ +#!/bin/bash + +# Copyright © 2019-2023 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# exit when any command fails +set -e + +OPENMPI_416=https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.6.tar.gz # version dependency +SST_CORE_1510=https://github.com/sstsimulator/sst-core/releases/download/v15.1.0_Final/sstcore-15.1.0.tar.gz +SST_ELEMENTS_1510=https://github.com/sstsimulator/sst-elements/releases/download/v15.1.0_Final/sstelements-15.1.0.tar.gz +TOOLDIR=${TOOLDIR:=@TOOLDIR@} + +DEBIAN_FRONTEND=noninteractive sudo apt install openmpi-bin openmpi-common libtool libtool-bin autoconf python3 python3-dev automake build-essential git +wget $OPENMPI_416 +tar -xvzf openmpi-4.1.6.tar.gz +mkdir -p $TOOLDIR && rm -rf $TOOLDIR/openmpi-4.1.6 && mv openmpi-4.1.6 $TOOLDIR +rm -rf openmpi-4.1.6.tar.gz + +mkdir -p $TOOLDIR/openmpi_install +cd $TOOLDIR/openmpi-4.1.6 + +export MPIHOME=$TOOLDIR/openmpi_install +./configure --prefix=$MPIHOME +make all install + +export PATH=$MPIHOME/bin:$PATH +export MPICC=mpicc +export MPICXX=mpicxx + +echo 'export PATH='"$MPIHOME"'/bin:$PATH' >> ~/.bashrc +echo 'export MPICC=mpicc' >> ~/.bashrc +echo 'export MPICXX=mpicxx' >> ~/.bashrc + +cd $TOOLDIR +wget $SST_CORE_1510 +tar -xvzf sstcore-15.1.0.tar.gz +rm sstcore-15.1.0.tar.gz +cd sst-core + +mkdir -p $TOOLDIR/sst-install/sst-core +export SST_CORE_HOME=$TOOLDIR/sst-install/sst-core +export SST_CORE_ROOT=$TOOLDIR/sst-core +echo 'export SST_CORE_HOME='"$SST_CORE_HOME" >> ~/.bashrc +echo 'export SST_CORE_ROOT='"$SST_CORE_ROOT" >> ~/.bashrc + +autoreconf -fi +./configure --prefix=$SST_CORE_HOME +make -j$(nproc) all +make install + +export PATH=$SST_CORE_HOME/bin:$PATH +echo 'export PATH='"$SST_CORE_HOME"'/bin:$PATH' >> ~/.bashrc + +cd $TOOLDIR +wget $SST_ELEMENTS_1510 +tar -xvzf sstelements-15.1.0.tar.gz +rm sstelements-15.1.0.tar.gz +cd sst-elements + +mkdir -p $TOOLDIR/sst-install/sst-elements +export SST_ELEMENTS_HOME=$TOOLDIR/sst-install/sst-elements +export SST_ELEMENTS_ROOT=$TOOLDIR/sst-elements +echo 'export SST_ELEMENTS_HOME='"$SST_ELEMENTS_HOME" >> ~/.bashrc +echo 'export SST_ELEMENTS_ROOT='"$SST_ELEMENTS_ROOT" >> ~/.bashrc + +./configure --prefix=$SST_ELEMENTS_HOME --with-sst-core=$SST_CORE_HOME +make -j2 all +make install + +export PATH=$SST_ELEMENTS_HOME/bin:$PATH +echo 'export PATH='"$SST_ELEMENTS_HOME"'/bin:$PATH' >> ~/.bashrc + + + diff --git a/ci/sst_test_vortex_conform.py b/ci/sst_test_vortex_conform.py new file mode 100644 index 0000000000..bc341530dd --- /dev/null +++ b/ci/sst_test_vortex_conform.py @@ -0,0 +1,7 @@ +import sst + +gpu = sst.Component("gpu0", "vortex.VortexGPGPU") +gpu.addParams({ + "clock": "1GHz", + "program": "../build/tests/kernel/conform/conform.bin" +}) diff --git a/ci/sst_test_vortex_fibonacci.py b/ci/sst_test_vortex_fibonacci.py new file mode 100644 index 0000000000..53da409fca --- /dev/null +++ b/ci/sst_test_vortex_fibonacci.py @@ -0,0 +1,7 @@ +import sst + +gpu = sst.Component("gpu0", "vortex.VortexGPGPU") +gpu.addParams({ + "clock": "1GHz", + "program": "../build/tests/kernel/fibonacci/fibonacci.bin" +}) diff --git a/ci/sst_test_vortex_hello.py b/ci/sst_test_vortex_hello.py new file mode 100644 index 0000000000..e1c8fcea81 --- /dev/null +++ b/ci/sst_test_vortex_hello.py @@ -0,0 +1,7 @@ +import sst + +gpu = sst.Component("gpu0", "vortex.VortexGPGPU") +gpu.addParams({ + "clock": "1GHz", + "program": "../build/tests/kernel/hello/hello.bin" +}) diff --git a/ci/sst_test_vortex_vecadd.py b/ci/sst_test_vortex_vecadd.py new file mode 100644 index 0000000000..32d8536749 --- /dev/null +++ b/ci/sst_test_vortex_vecadd.py @@ -0,0 +1,7 @@ +import sst + +gpu = sst.Component("gpu0", "vortex.VortexGPGPU") +gpu.addParams({ + "clock": "1GHz", + "program": "../build/tests/kernel/vecadd/vecadd.bin" +}) diff --git a/sim/simx/Makefile b/sim/simx/Makefile index 480d14745e..6ed58b9aa0 100644 --- a/sim/simx/Makefile +++ b/sim/simx/Makefile @@ -2,7 +2,7 @@ include ../common.mk DESTDIR ?= $(CURDIR) USE_SST ?= 0 -SST_PKG ?= SST-14.1 # default SST package name +#SST_PKG ?= SST-14.1 # default SST package name OBJ_DIR = $(DESTDIR)/obj CONFIG_FILE = $(DESTDIR)/simx_config.stamp @@ -32,8 +32,8 @@ SRCS += $(SRC_DIR)/dcrs.cpp $(SRC_DIR)/types.cpp ifeq ($(USE_SST), 1) VORTEX_SST_SRCS := $(SRC_DIR)/vortex_simulator.cpp VORTEX_SST_SRCS += $(SRC_DIR)/VortexGPGPU.cpp -SST_CFLAGS := $(shell pkg-config --cflags $(SST_PKG)) -SST_LFLAGS := $(shell pkg-config --libs $(SST_PKG)) +SST_CFLAGS := $(shell sst-config --ELEMENT_CXXFLAGS) +SST_LFLAGS := $(shell sst-config --ELEMENT_LDFLAGS) CXXFLAGS += $(SST_CFLAGS) -DUSE_SST endif From c50ff1a9c3f7ce5983887e870f4e21ee728c149c Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Wed, 3 Dec 2025 12:39:29 -0500 Subject: [PATCH 09/15] regression update for sst run --- ci/regression.sh.in | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 4d28f8516d..9965acb5f3 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -522,7 +522,7 @@ cupbop() { show_usage() { echo "Vortex Regression Test" - echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--tensor] [--cupbop] [--all] [--h|--help]" + echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--tensor] [--cupbop] [--sst] [--all] [--h|--help]" } declare -a tests=() @@ -581,6 +581,9 @@ while [ "$1" != "" ]; do --cupbop ) tests+=("cupbop") ;; + --sst ) + tests+=("sst") + ;; --all ) tests=() tests+=("unittest") From c754ec958e710ba65d9570675b27e3b2c68bb161 Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Wed, 3 Dec 2025 12:53:37 -0500 Subject: [PATCH 10/15] SST test update --- .github/workflows/ci.yml | 2 +- ci/regression.sh.in | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3033e5113d..d29cb67482 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -168,7 +168,7 @@ jobs: ./ci/regression.sh --isa ./ci/regression.sh --kernel ./ci/regression.sh --regression - ./ci/regression.sh --sst + ./ci/regression.sh --sst_tests else ./ci/regression.sh --${{ matrix.name }} fi diff --git a/ci/regression.sh.in b/ci/regression.sh.in index 9965acb5f3..ebfc48f559 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -78,7 +78,7 @@ kernel() echo "kernel tests done!" } -sst() +sst_tests() { echo "begin sst tests..." @@ -522,7 +522,7 @@ cupbop() { show_usage() { echo "Vortex Regression Test" - echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--tensor] [--cupbop] [--sst] [--all] [--h|--help]" + echo "Usage: $0 [--clean] [--unittest] [--isa] [--kernel] [--regression] [--opencl] [--cache] [--config1] [--config2] [--debug] [--scope] [--stress] [--synthesis] [--vector] [--tensor] [--cupbop] [--sst_tests] [--all] [--h|--help]" } declare -a tests=() @@ -581,8 +581,8 @@ while [ "$1" != "" ]; do --cupbop ) tests+=("cupbop") ;; - --sst ) - tests+=("sst") + --sst_tests ) + tests+=("sst_tests") ;; --all ) tests=() From 0a450aba608e38006b4f2b0956b7edcf01f24e9c Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Wed, 3 Dec 2025 13:17:54 -0500 Subject: [PATCH 11/15] sst config path update --- ci/regression.sh.in | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/regression.sh.in b/ci/regression.sh.in index ebfc48f559..00366435ed 100755 --- a/ci/regression.sh.in +++ b/ci/regression.sh.in @@ -87,10 +87,10 @@ sst_tests() cp sim/simx/libvortex.so $SST_ELEMENTS_HOME/lib/sst-elements-library/ # alternatively - $ sst --add-lib-path `pwd` myConfig.py - sst test_vortex_hello.py - sst test_vortex_fibonacci.py - sst test_vortex_vecadd.py - sst test_vortex.py + sst ci/sst_test_vortex_hello.py + sst ci/sst_test_vortex_fibonacci.py + sst ci/sst_test_vortex_vecadd.py + sst ci/sst_test_vortex_conform.py echo "sst tests done!" } From cdfcd0b64345b0961ebf4d683f11e0d3798b1516 Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Mon, 8 Dec 2025 17:04:20 -0500 Subject: [PATCH 12/15] remove open mpi, sst core & elements sources after installation to save space --- ci/sst_install.sh.in | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ci/sst_install.sh.in b/ci/sst_install.sh.in index f6c581d0f4..f3c3c9c040 100755 --- a/ci/sst_install.sh.in +++ b/ci/sst_install.sh.in @@ -42,6 +42,8 @@ echo 'export PATH='"$MPIHOME"'/bin:$PATH' >> ~/.bashrc echo 'export MPICC=mpicc' >> ~/.bashrc echo 'export MPICXX=mpicxx' >> ~/.bashrc +rm -r $TOOLDIR/openmpi-4.1.6 + cd $TOOLDIR wget $SST_CORE_1510 tar -xvzf sstcore-15.1.0.tar.gz @@ -52,13 +54,15 @@ mkdir -p $TOOLDIR/sst-install/sst-core export SST_CORE_HOME=$TOOLDIR/sst-install/sst-core export SST_CORE_ROOT=$TOOLDIR/sst-core echo 'export SST_CORE_HOME='"$SST_CORE_HOME" >> ~/.bashrc -echo 'export SST_CORE_ROOT='"$SST_CORE_ROOT" >> ~/.bashrc +#echo 'export SST_CORE_ROOT='"$SST_CORE_ROOT" >> ~/.bashrc autoreconf -fi ./configure --prefix=$SST_CORE_HOME make -j$(nproc) all make install +rm -r $SST_CORE_ROOT + export PATH=$SST_CORE_HOME/bin:$PATH echo 'export PATH='"$SST_CORE_HOME"'/bin:$PATH' >> ~/.bashrc @@ -72,12 +76,14 @@ mkdir -p $TOOLDIR/sst-install/sst-elements export SST_ELEMENTS_HOME=$TOOLDIR/sst-install/sst-elements export SST_ELEMENTS_ROOT=$TOOLDIR/sst-elements echo 'export SST_ELEMENTS_HOME='"$SST_ELEMENTS_HOME" >> ~/.bashrc -echo 'export SST_ELEMENTS_ROOT='"$SST_ELEMENTS_ROOT" >> ~/.bashrc +#echo 'export SST_ELEMENTS_ROOT='"$SST_ELEMENTS_ROOT" >> ~/.bashrc ./configure --prefix=$SST_ELEMENTS_HOME --with-sst-core=$SST_CORE_HOME make -j2 all make install +rm -r $SST_ELEMENTS_ROOT + export PATH=$SST_ELEMENTS_HOME/bin:$PATH echo 'export PATH='"$SST_ELEMENTS_HOME"'/bin:$PATH' >> ~/.bashrc From a31c1115e71b2394435aa2d3617c09c876b194fd Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Mon, 8 Dec 2025 20:34:34 -0500 Subject: [PATCH 13/15] CI fix: update GITHUB_PATH --- ci/sst_install.sh.in | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ci/sst_install.sh.in b/ci/sst_install.sh.in index f3c3c9c040..8c001a94bd 100755 --- a/ci/sst_install.sh.in +++ b/ci/sst_install.sh.in @@ -87,5 +87,11 @@ rm -r $SST_ELEMENTS_ROOT export PATH=$SST_ELEMENTS_HOME/bin:$PATH echo 'export PATH='"$SST_ELEMENTS_HOME"'/bin:$PATH' >> ~/.bashrc +if [ -n "$GITHUB_PATH" ]; then + echo "$SST_CORE_HOME/bin" >> "$GITHUB_PATH" + echo "$SST_ELEMENTS_HOME/bin" >> "$GITHUB_PATH" +fi + + From 1511b8a95657e48ae9e0dd9fd10f40e5e4419c5a Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Tue, 9 Dec 2025 14:58:40 -0500 Subject: [PATCH 14/15] Explicitly exporting SST paths in each step --- .github/workflows/ci.yml | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index d29cb67482..c8c348b4ba 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -66,6 +66,13 @@ jobs: run: | make -C third_party > /dev/null + - name: Export SST paths + run: | + echo "$PWD/tools/sst-install/sst-core/bin" >> $GITHUB_PATH + echo "$PWD/tools/sst-install/sst-elements/bin" >> $GITHUB_PATH + echo "SST_CORE_HOME=$PWD/tools/sst-install/sst-core" >> $GITHUB_ENV + echo "SST_ELEMENTS_HOME=$PWD/tools/sst-install/sst-elements" >> $GITHUB_ENV + build: needs: setup strategy: @@ -100,6 +107,13 @@ jobs: restore-keys: | ${{ matrix.os }}-thirdparty- + - name: Export SST paths + run: | + echo "$PWD/tools/sst-install/sst-core/bin" >> $GITHUB_PATH + echo "$PWD/tools/sst-install/sst-elements/bin" >> $GITHUB_PATH + echo "SST_CORE_HOME=$PWD/tools/sst-install/sst-core" >> $GITHUB_ENV + echo "SST_ELEMENTS_HOME=$PWD/tools/sst-install/sst-elements" >> $GITHUB_ENV + - name: Run Build run: | TOOLDIR=$PWD/tools @@ -158,6 +172,13 @@ jobs: name: build-${{ matrix.os }}-${{ matrix.xlen }} path: build${{ matrix.xlen }} + - name: Export SST paths + run: | + echo "$PWD/tools/sst-install/sst-core/bin" >> $GITHUB_PATH + echo "$PWD/tools/sst-install/sst-elements/bin" >> $GITHUB_PATH + echo "SST_CORE_HOME=$PWD/tools/sst-install/sst-core" >> $GITHUB_ENV + echo "SST_ELEMENTS_HOME=$PWD/tools/sst-install/sst-elements" >> $GITHUB_ENV + - name: Run tests run: | cd build${{ matrix.xlen }} From 053df0d0e06ff3e48902cabb2e0dc72ad44441d7 Mon Sep 17 00:00:00 2001 From: Jagadheesvaran Date: Tue, 9 Dec 2025 20:34:26 -0500 Subject: [PATCH 15/15] test scripts' relative path fix --- ci/sst_test_vortex_conform.py | 2 +- ci/sst_test_vortex_fibonacci.py | 2 +- ci/sst_test_vortex_hello.py | 2 +- ci/sst_test_vortex_vecadd.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ci/sst_test_vortex_conform.py b/ci/sst_test_vortex_conform.py index bc341530dd..2f2a86535f 100644 --- a/ci/sst_test_vortex_conform.py +++ b/ci/sst_test_vortex_conform.py @@ -3,5 +3,5 @@ gpu = sst.Component("gpu0", "vortex.VortexGPGPU") gpu.addParams({ "clock": "1GHz", - "program": "../build/tests/kernel/conform/conform.bin" + "program": "tests/kernel/conform/conform.bin" }) diff --git a/ci/sst_test_vortex_fibonacci.py b/ci/sst_test_vortex_fibonacci.py index 53da409fca..a066765c9e 100644 --- a/ci/sst_test_vortex_fibonacci.py +++ b/ci/sst_test_vortex_fibonacci.py @@ -3,5 +3,5 @@ gpu = sst.Component("gpu0", "vortex.VortexGPGPU") gpu.addParams({ "clock": "1GHz", - "program": "../build/tests/kernel/fibonacci/fibonacci.bin" + "program": "tests/kernel/fibonacci/fibonacci.bin" }) diff --git a/ci/sst_test_vortex_hello.py b/ci/sst_test_vortex_hello.py index e1c8fcea81..21c956639e 100644 --- a/ci/sst_test_vortex_hello.py +++ b/ci/sst_test_vortex_hello.py @@ -3,5 +3,5 @@ gpu = sst.Component("gpu0", "vortex.VortexGPGPU") gpu.addParams({ "clock": "1GHz", - "program": "../build/tests/kernel/hello/hello.bin" + "program": "tests/kernel/hello/hello.bin" }) diff --git a/ci/sst_test_vortex_vecadd.py b/ci/sst_test_vortex_vecadd.py index 32d8536749..1a50958056 100644 --- a/ci/sst_test_vortex_vecadd.py +++ b/ci/sst_test_vortex_vecadd.py @@ -3,5 +3,5 @@ gpu = sst.Component("gpu0", "vortex.VortexGPGPU") gpu.addParams({ "clock": "1GHz", - "program": "../build/tests/kernel/vecadd/vecadd.bin" + "program": "tests/kernel/vecadd/vecadd.bin" })