diff --git a/hw/rtl/VX_config.vh b/hw/rtl/VX_config.vh index 8d1c280fd..3ff9e3a54 100644 --- a/hw/rtl/VX_config.vh +++ b/hw/rtl/VX_config.vh @@ -617,7 +617,7 @@ // Number of Banks `ifndef L3_NUM_BANKS -`define L3_NUM_BANKS `MIN(4, `NUM_CLUSTERS) +`define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS) `endif // Core Response Queue Size @@ -650,6 +650,15 @@ `define L3_WRITEBACK 0 `endif +`ifndef MEMORY_BANKS +`define MEMORY_BANKS 8 +`endif + +// Number of Memory Ports from LLC +`ifndef NUM_MEM_PORTS +`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS) +`endif + // ISA Extensions ///////////////////////////////////////////////////////////// `ifdef EXT_A_ENABLE diff --git a/hw/rtl/VX_types.vh b/hw/rtl/VX_types.vh index 927ffae96..2eac22a5a 100644 --- a/hw/rtl/VX_types.vh +++ b/hw/rtl/VX_types.vh @@ -166,6 +166,10 @@ `define VX_CSR_MPM_MEM_WRITES_H 12'hB99 `define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency `define VX_CSR_MPM_MEM_LT_H 12'hB9A +`define VX_CSR_MPM_MEM_BANK_CNTR 12'hB1E // memory bank requests +`define VX_CSR_MPM_MEM_BANK_CNTR_H 12'hB9E +`define VX_CSR_MPM_MEM_BANK_TICK 12'hB1F // memory ticks +`define VX_CSR_MPM_MEM_BANK_TICK_H 12'hB9F // PERF: lmem `define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads `define VX_CSR_MPM_LMEM_READS_H 12'hB9B diff --git a/runtime/include/vortex.h b/runtime/include/vortex.h index 8481002e1..853da5994 100644 --- a/runtime/include/vortex.h +++ b/runtime/include/vortex.h @@ -34,6 +34,7 @@ typedef void* vx_buffer_h; #define VX_CAPS_GLOBAL_MEM_SIZE 0x5 #define VX_CAPS_LOCAL_MEM_SIZE 0x6 #define VX_CAPS_ISA_FLAGS 0x7 +#define VX_CAPS_NUM_MEM_BANKS 0x8 // device isa flags #define VX_ISA_STD_A (1ull << ISA_STD_A) diff --git a/runtime/opae/vortex.cpp b/runtime/opae/vortex.cpp index 390d5acc4..06458fa1f 100755 --- a/runtime/opae/vortex.cpp +++ b/runtime/opae/vortex.cpp @@ -232,6 +232,9 @@ class vx_device { case VX_CAPS_ISA_FLAGS: _value = isa_caps_; break; + case VX_CAPS_NUM_MEM_BANKS: + _value = MEMORY_BANKS; + break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); std::abort(); diff --git a/runtime/rtlsim/vortex.cpp b/runtime/rtlsim/vortex.cpp index c75a6c12f..91df7f7e8 100644 --- a/runtime/rtlsim/vortex.cpp +++ b/runtime/rtlsim/vortex.cpp @@ -77,6 +77,9 @@ class vx_device { case VX_CAPS_ISA_FLAGS: _value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD; break; + case VX_CAPS_NUM_MEM_BANKS: + _value = MEMORY_BANKS; + break; default: std::cout << "invalid caps id: " << caps_id << std::endl; std::abort(); diff --git a/runtime/simx/vortex.cpp b/runtime/simx/vortex.cpp index 89856f3a0..70ceb7fc4 100644 --- a/runtime/simx/vortex.cpp +++ b/runtime/simx/vortex.cpp @@ -81,6 +81,9 @@ class vx_device { case VX_CAPS_ISA_FLAGS: _value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD; break; + case VX_CAPS_NUM_MEM_BANKS: + _value = MEMORY_BANKS; + break; default: std::cout << "invalid caps id: " << caps_id << std::endl; std::abort(); diff --git a/runtime/stub/utils.cpp b/runtime/stub/utils.cpp index 9826db711..c1f75f092 100644 --- a/runtime/stub/utils.cpp +++ b/runtime/stub/utils.cpp @@ -211,6 +211,8 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { uint64_t mem_reads = 0; uint64_t mem_writes = 0; uint64_t mem_lat = 0; + uint64_t mem_req_counter = 0; + uint64_t mem_ticks = 0; uint64_t num_cores; CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), { @@ -221,6 +223,11 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_ISA_FLAGS, &isa_flags), { return err; }); + + uint64_t num_mem_bank_ports; + CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_MEM_BANKS, &num_mem_bank_ports), { + return err; + }); bool icache_enable = isa_flags & VX_ISA_EXT_ICACHE; bool dcache_enable = isa_flags & VX_ISA_EXT_DCACHE; @@ -533,6 +540,12 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), { return err; }); + CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_CNTR, core_id, &mem_req_counter), { + return err; + }); + CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_TICK, core_id, &mem_ticks), { + return err; + }); } } break; default: @@ -599,7 +612,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { int read_hit_ratio = calcRatio(l3cache_read_misses, l3cache_reads); int write_hit_ratio = calcRatio(l3cache_write_misses, l3cache_writes); int bank_utilization = calcAvgPercent(l3cache_reads + l3cache_writes, l3cache_reads + l3cache_writes + l3cache_bank_stalls); - int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls); + int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls); fprintf(stream, "PERF: l3cache reads=%ld\n", l3cache_reads); fprintf(stream, "PERF: l3cache writes=%ld\n", l3cache_writes); fprintf(stream, "PERF: l3cache read misses=%ld (hit ratio=%d%%)\n", l3cache_read_misses, read_hit_ratio); @@ -609,8 +622,10 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) { } int mem_avg_lat = caclAverage(mem_lat, mem_reads); + int memory_bank_port_utilization = calcAvgPercent(mem_req_counter, (mem_ticks * num_mem_bank_ports)); fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes); fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat); + fprintf(stream, "PERF: memory bank port utilization=%d%%\n", memory_bank_port_utilization); } break; default: break; diff --git a/runtime/xrt/vortex.cpp b/runtime/xrt/vortex.cpp index 408bf23ed..5f4e27ff2 100644 --- a/runtime/xrt/vortex.cpp +++ b/runtime/xrt/vortex.cpp @@ -404,6 +404,9 @@ class vx_device { case VX_CAPS_ISA_FLAGS: _value = isa_caps_; break; + case VX_CAPS_NUM_MEM_BANKS: + _value = MEMORY_BANKS; + break; default: fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id); std::abort(); diff --git a/sim/common/dram_sim.cpp b/sim/common/dram_sim.cpp index f7cfa8a32..684dd6f7d 100644 --- a/sim/common/dram_sim.cpp +++ b/sim/common/dram_sim.cpp @@ -41,11 +41,11 @@ class DramSim::Impl { dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2"; dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb"; dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192; + dram_config["MemorySystem"]["DRAM"]["org"]["channel"] = 8; dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps"; dram_config["MemorySystem"]["Controller"]["impl"] = "Generic"; dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS"; dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank"; - dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank"; dram_config["MemorySystem"]["Controller"]["RowPolicy"]["impl"] = "OpenRowPolicy"; { YAML::Node draw_plugin; @@ -66,7 +66,7 @@ class DramSim::Impl { auto original_buf = std::cout.rdbuf(); std::cout.rdbuf(nullstream.rdbuf()); ramulator_frontend_->finalize(); - ramulator_memorysystem_->finalize(); + ramulator_memorysystem_->finalize(); std::cout.rdbuf(original_buf); } diff --git a/sim/opaesim/opae_sim.cpp b/sim/opaesim/opae_sim.cpp index 9d43ea595..7a1bae3e4 100644 --- a/sim/opaesim/opae_sim.cpp +++ b/sim/opaesim/opae_sim.cpp @@ -35,13 +35,13 @@ #include #include -#ifndef MEMORY_BANKS +//#ifndef MEMORY_BANKS #ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS #define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS #else #define MEMORY_BANKS 2 #endif -#endif +//#endif #ifndef MEM_CLOCK_RATIO #define MEM_CLOCK_RATIO 1 diff --git a/sim/simx/cache_cluster.h b/sim/simx/cache_cluster.h index 63016577b..2ba26dc21 100644 --- a/sim/simx/cache_cluster.h +++ b/sim/simx/cache_cluster.h @@ -77,8 +77,8 @@ class CacheCluster : public SimObject { caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i)); } - caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i)); - cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort); + caches_.at(i)->MemReqPorts.at(0).bind(&cache_arb->ReqIn.at(i)); + cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(0)); } cache_arb->ReqOut.at(0).bind(&this->MemReqPort); diff --git a/sim/simx/cache_sim.cpp b/sim/simx/cache_sim.cpp index 65a8da70b..4f357f195 100644 --- a/sim/simx/cache_sim.cpp +++ b/sim/simx/cache_sim.cpp @@ -19,6 +19,7 @@ #include #include #include +#include using namespace vortex; @@ -315,27 +316,75 @@ class CacheSim::Impl { simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i)); bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i)); } - bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort); - simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0)); + bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0)); + simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0)); return; } - bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2); - bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort); - simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0)); - - if (config.B != 0) { - snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str()); - bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B)); - for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) { - mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i)); - bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i)); + if (strcmp(simobject->name().c_str(), "l3cache")) { + bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2); + bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0)); + simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0)); + + if (config.B != 0) { + snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str()); + bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B)); + for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) { + mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i)); + bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i)); + } + bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0)); + bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0)); + } else { + mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0)); + bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0)); } - bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0)); - bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0)); } else { - mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0)); - bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0)); + // TODO: Change this into a crossbar + uint32_t max = MAX(2, config_.num_inputs); + //printf("%s connecting\n", simobject_->name().c_str()); + //3 + if (config.B != 0) { + bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, max, max); + for (uint32_t i = 0; i < max; ++i) { + //printf("%s connecting input=%d to MemPorts\n", simobject_->name().c_str(), i); + bypass_switch_->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i % (1 << config.B))); + simobject->MemRspPorts.at(i % (1 << config.B)).bind(&bypass_switch_->RspOut.at(i)); + } + } else { + bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2); + bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0)); + simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0)); + } + + if (config.B != 0) + { + snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str()); + bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B), (1 << config.B)); + for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) + { + //1 + //printf("%s Connecting memory ports to bank=%d\n", simobject_->name().c_str(), i); + mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i)); + bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i)); + } + //2 + if (config_.num_inputs > 1) { + for (uint32_t i = 0; i < max; ++i) { + //printf("%s connecting bank and bypass port=%d\n", simobject_->name().c_str(), i); + bank_switch_->ReqOut.at(i % (1 << config.B)).bind(&bypass_switch_->ReqIn.at(i)); + bypass_switch_->RspIn.at(i).bind(&bank_switch_->RspOut.at(i % (1 << config.B))); + } + } else { + bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0)); + bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0)); + } + } + else + { + mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0)); + bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0)); + } } // calculate cache initialization cycles @@ -673,8 +722,8 @@ CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config : SimObject(ctx, name) , CoreReqPorts(config.num_inputs, this) , CoreRspPorts(config.num_inputs, this) - , MemReqPort(this) - , MemRspPort(this) + , MemReqPorts(NUM_MEM_PORTS, this) + , MemRspPorts(NUM_MEM_PORTS, this) , impl_(new Impl(this, config)) {} diff --git a/sim/simx/cache_sim.h b/sim/simx/cache_sim.h index df62bf854..aad489546 100644 --- a/sim/simx/cache_sim.h +++ b/sim/simx/cache_sim.h @@ -75,8 +75,8 @@ class CacheSim : public SimObject { std::vector> CoreReqPorts; std::vector> CoreRspPorts; - SimPort MemReqPort; - SimPort MemRspPort; + std::vector> MemReqPorts; + std::vector> MemRspPorts; CacheSim(const SimContext& ctx, const char* name, const Config& config); ~CacheSim(); diff --git a/sim/simx/cluster.cpp b/sim/simx/cluster.cpp index ec5e3f2b6..e23df448b 100644 --- a/sim/simx/cluster.cpp +++ b/sim/simx/cluster.cpp @@ -76,8 +76,8 @@ Cluster::Cluster(const SimContext& ctx, 2, // pipeline latency }); - l2cache_->MemReqPort.bind(&this->mem_req_port); - this->mem_rsp_port.bind(&l2cache_->MemRspPort); + l2cache_->MemReqPorts.at(0).bind(&this->mem_req_port); + this->mem_rsp_port.bind(&l2cache_->MemRspPorts.at(0)); icache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0)); l2cache_->CoreRspPorts.at(0).bind(&icache_switch->RspOut.at(0)); diff --git a/sim/simx/constants.h b/sim/simx/constants.h index 09a509ce1..0c707b55c 100644 --- a/sim/simx/constants.h +++ b/sim/simx/constants.h @@ -21,10 +21,6 @@ #define MEM_CLOCK_RATIO 1 #endif -#ifndef MEMORY_BANKS -#define MEMORY_BANKS 2 -#endif - #define LSU_WORD_SIZE (XLEN / 8) #define LSU_CHANNELS NUM_LSU_LANES #define LSU_NUM_REQS (NUM_LSU_BLOCKS * LSU_CHANNELS) diff --git a/sim/simx/emulator.cpp b/sim/simx/emulator.cpp index 7ed9a10f9..3dfdf420b 100644 --- a/sim/simx/emulator.cpp +++ b/sim/simx/emulator.cpp @@ -451,6 +451,8 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) { CSR_READ_64(VX_CSR_MPM_MEM_READS, proc_perf.mem_reads); CSR_READ_64(VX_CSR_MPM_MEM_WRITES, proc_perf.mem_writes); CSR_READ_64(VX_CSR_MPM_MEM_LT, proc_perf.mem_latency); + CSR_READ_64(VX_CSR_MPM_MEM_BANK_CNTR, proc_perf.memsim.counter); + CSR_READ_64(VX_CSR_MPM_MEM_BANK_TICK, proc_perf.memsim.ticks); CSR_READ_64(VX_CSR_MPM_LMEM_READS, lmem_perf.reads); CSR_READ_64(VX_CSR_MPM_LMEM_WRITES, lmem_perf.writes); diff --git a/sim/simx/mem_sim.cpp b/sim/simx/mem_sim.cpp index a12713fea..a38f4c01c 100644 --- a/sim/simx/mem_sim.cpp +++ b/sim/simx/mem_sim.cpp @@ -33,6 +33,7 @@ class MemSim::Impl { struct DramCallbackArgs { MemSim* simobject; MemReq request; + uint32_t i; }; public: @@ -56,46 +57,49 @@ class MemSim::Impl { void tick() { dram_sim_.tick(); - - if (simobject_->MemReqPort.empty()) - return; - - auto& mem_req = simobject_->MemReqPort.front(); - - // try to enqueue the request to the memory system - auto req_args = new DramCallbackArgs{simobject_, mem_req}; - auto enqueue_success = dram_sim_.send_request( - mem_req.write, - mem_req.addr, - 0, - [](void* arg) { - auto rsp_args = reinterpret_cast(arg); - // only send a response for read requests - if (!rsp_args->request.write) { - MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid}; - rsp_args->simobject->MemRspPort.push(mem_rsp, 1); - DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp); - } - delete rsp_args; - }, - req_args - ); - - // check if the request was enqueued successfully - if (!enqueue_success) { - delete req_args; - return; + uint32_t counter = 0; + + for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) { + if (simobject_->MemReqPorts.at(i).empty()) + continue; + + auto& mem_req = simobject_->MemReqPorts.at(i).front(); + + // try to enqueue the request to the memory system + auto req_args = new DramCallbackArgs{simobject_, mem_req, i}; + auto enqueue_success = dram_sim_.send_request( + mem_req.write, + mem_req.addr, + 0, + [](void* arg) { + auto rsp_args = reinterpret_cast(arg); + // only send a response for read requests + if (!rsp_args->request.write) { + MemRsp mem_rsp{rsp_args->request.tag, rsp_args->request.cid, rsp_args->request.uuid}; + rsp_args->simobject->MemRspPorts.at(rsp_args->i).push(mem_rsp, 1); + DT(3, rsp_args->simobject->name() << " mem-rsp: " << mem_rsp << " bank: " << rsp_args->i); + } + delete rsp_args; + }, + req_args + ); + + // check if the request was enqueued successfully + if (!enqueue_success) { + delete req_args; + continue; + } + + DT(3, simobject_->name() << " mem-req: " << mem_req << " bank: " << i); + + simobject_->MemReqPorts.at(i).pop(); + counter++; } - if (mem_req.write) { - ++perf_stats_.writes; - } else { - ++perf_stats_.reads; + perf_stats_.counter += counter; + if (counter > 0) { + ++perf_stats_.ticks; } - - DT(3, simobject_->name() << " mem-req: " << mem_req); - - simobject_->MemReqPort.pop(); } }; @@ -103,8 +107,8 @@ class MemSim::Impl { MemSim::MemSim(const SimContext& ctx, const char* name, const Config& config) : SimObject(ctx, name) - , MemReqPort(this) - , MemRspPort(this) + , MemReqPorts(NUM_MEM_PORTS, this) + , MemRspPorts(NUM_MEM_PORTS, this) , impl_(new Impl(this, config)) {} @@ -118,4 +122,8 @@ void MemSim::reset() { void MemSim::tick() { impl_->tick(); +} + +const MemSim::PerfStats &MemSim::perf_stats() const { + return impl_->perf_stats(); } \ No newline at end of file diff --git a/sim/simx/mem_sim.h b/sim/simx/mem_sim.h index 3f4d9801e..2f4f96187 100644 --- a/sim/simx/mem_sim.h +++ b/sim/simx/mem_sim.h @@ -26,17 +26,23 @@ class MemSim : public SimObject{ }; struct PerfStats { - uint64_t reads; - uint64_t writes; + uint64_t counter; + uint64_t ticks; PerfStats() - : reads(0) - , writes(0) + : counter(0) + , ticks(0) {} + + PerfStats& operator+=(const PerfStats& rhs) { + this->counter += rhs.counter; + this->ticks += rhs.ticks; + return *this; + } }; - SimPort MemReqPort; - SimPort MemRspPort; + std::vector> MemReqPorts; + std::vector> MemRspPorts; MemSim(const SimContext& ctx, const char* name, const Config& config); ~MemSim(); diff --git a/sim/simx/processor.cpp b/sim/simx/processor.cpp index 3807fa5e8..58fabf14c 100644 --- a/sim/simx/processor.cpp +++ b/sim/simx/processor.cpp @@ -47,8 +47,10 @@ ProcessorImpl::ProcessorImpl(const Arch& arch) ); // connect L3 memory ports - l3cache_->MemReqPort.bind(&memsim_->MemReqPort); - memsim_->MemRspPort.bind(&l3cache_->MemRspPort); + for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) { + l3cache_->MemReqPorts.at(i).bind(&memsim_->MemReqPorts.at(i)); + memsim_->MemRspPorts.at(i).bind(&l3cache_->MemRspPorts.at(i)); + } // create clusters for (uint32_t i = 0; i < arch.num_clusters(); ++i) { @@ -59,16 +61,18 @@ ProcessorImpl::ProcessorImpl(const Arch& arch) } // set up memory profiling - memsim_->MemReqPort.tx_callback([&](const MemReq& req, uint64_t cycle){ - __unused (cycle); - perf_mem_reads_ += !req.write; - perf_mem_writes_ += req.write; - perf_mem_pending_reads_ += !req.write; - }); - memsim_->MemRspPort.tx_callback([&](const MemRsp&, uint64_t cycle){ - __unused (cycle); - --perf_mem_pending_reads_; - }); + for (uint32_t i = 0; i < NUM_MEM_PORTS; ++i) { + memsim_->MemReqPorts.at(i).tx_callback([&](const MemReq& req, uint64_t cycle){ + __unused (cycle); + perf_mem_reads_ += !req.write; + perf_mem_writes_ += req.write; + perf_mem_pending_reads_ += !req.write; + }); + memsim_->MemRspPorts.at(i).tx_callback([&](const MemRsp&, uint64_t cycle){ + __unused (cycle); + --perf_mem_pending_reads_; + }); + } #ifndef NDEBUG // dump device configuration @@ -131,6 +135,7 @@ ProcessorImpl::PerfStats ProcessorImpl::perf_stats() const { perf.mem_writes = perf_mem_writes_; perf.mem_latency = perf_mem_latency_; perf.l3cache = l3cache_->perf_stats(); + perf.memsim = memsim_->perf_stats(); return perf; } diff --git a/sim/simx/processor_impl.h b/sim/simx/processor_impl.h index dcfba84d7..cffeffbfe 100644 --- a/sim/simx/processor_impl.h +++ b/sim/simx/processor_impl.h @@ -25,6 +25,7 @@ class ProcessorImpl { public: struct PerfStats { CacheSim::PerfStats l3cache; + MemSim::PerfStats memsim; uint64_t mem_reads; uint64_t mem_writes; uint64_t mem_latency;