Skip to content

Commit

Permalink
Merge pull request #169 from sij814/simx
Browse files Browse the repository at this point in the history
simx HBM initial implementation
  • Loading branch information
tinebp authored Aug 18, 2024
2 parents e23d569 + e34e4b7 commit 6c607d3
Show file tree
Hide file tree
Showing 20 changed files with 199 additions and 91 deletions.
11 changes: 10 additions & 1 deletion hw/rtl/VX_config.vh
Original file line number Diff line number Diff line change
Expand Up @@ -617,7 +617,7 @@

// Number of Banks
`ifndef L3_NUM_BANKS
`define L3_NUM_BANKS `MIN(4, `NUM_CLUSTERS)
`define L3_NUM_BANKS `MIN(8, `NUM_CLUSTERS)
`endif

// Core Response Queue Size
Expand Down Expand Up @@ -650,6 +650,15 @@
`define L3_WRITEBACK 0
`endif

`ifndef MEMORY_BANKS
`define MEMORY_BANKS 8
`endif

// Number of Memory Ports from LLC
`ifndef NUM_MEM_PORTS
`define NUM_MEM_PORTS `MIN(`MEMORY_BANKS, `L3_NUM_BANKS)
`endif

// ISA Extensions /////////////////////////////////////////////////////////////

`ifdef EXT_A_ENABLE
Expand Down
4 changes: 4 additions & 0 deletions hw/rtl/VX_types.vh
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,10 @@
`define VX_CSR_MPM_MEM_WRITES_H 12'hB99
`define VX_CSR_MPM_MEM_LT 12'hB1A // memory latency
`define VX_CSR_MPM_MEM_LT_H 12'hB9A
`define VX_CSR_MPM_MEM_BANK_CNTR 12'hB1E // memory bank requests
`define VX_CSR_MPM_MEM_BANK_CNTR_H 12'hB9E
`define VX_CSR_MPM_MEM_BANK_TICK 12'hB1F // memory ticks
`define VX_CSR_MPM_MEM_BANK_TICK_H 12'hB9F
// PERF: lmem
`define VX_CSR_MPM_LMEM_READS 12'hB1B // memory reads
`define VX_CSR_MPM_LMEM_READS_H 12'hB9B
Expand Down
1 change: 1 addition & 0 deletions runtime/include/vortex.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ typedef void* vx_buffer_h;
#define VX_CAPS_GLOBAL_MEM_SIZE 0x5
#define VX_CAPS_LOCAL_MEM_SIZE 0x6
#define VX_CAPS_ISA_FLAGS 0x7
#define VX_CAPS_NUM_MEM_BANKS 0x8

// device isa flags
#define VX_ISA_STD_A (1ull << ISA_STD_A)
Expand Down
3 changes: 3 additions & 0 deletions runtime/opae/vortex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,9 @@ class vx_device {
case VX_CAPS_ISA_FLAGS:
_value = isa_caps_;
break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default:
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
std::abort();
Expand Down
3 changes: 3 additions & 0 deletions runtime/rtlsim/vortex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,9 @@ class vx_device {
case VX_CAPS_ISA_FLAGS:
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();
Expand Down
3 changes: 3 additions & 0 deletions runtime/simx/vortex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ class vx_device {
case VX_CAPS_ISA_FLAGS:
_value = ((uint64_t(MISA_EXT))<<32) | ((log2floor(XLEN)-4) << 30) | MISA_STD;
break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default:
std::cout << "invalid caps id: " << caps_id << std::endl;
std::abort();
Expand Down
17 changes: 16 additions & 1 deletion runtime/stub/utils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,8 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
uint64_t mem_reads = 0;
uint64_t mem_writes = 0;
uint64_t mem_lat = 0;
uint64_t mem_req_counter = 0;
uint64_t mem_ticks = 0;

uint64_t num_cores;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_CORES, &num_cores), {
Expand All @@ -221,6 +223,11 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_ISA_FLAGS, &isa_flags), {
return err;
});

uint64_t num_mem_bank_ports;
CHECK_ERR(vx_dev_caps(hdevice, VX_CAPS_NUM_MEM_BANKS, &num_mem_bank_ports), {
return err;
});

bool icache_enable = isa_flags & VX_ISA_EXT_ICACHE;
bool dcache_enable = isa_flags & VX_ISA_EXT_DCACHE;
Expand Down Expand Up @@ -533,6 +540,12 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_LT, core_id, &mem_lat), {
return err;
});
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_CNTR, core_id, &mem_req_counter), {
return err;
});
CHECK_ERR(vx_mpm_query(hdevice, VX_CSR_MPM_MEM_BANK_TICK, core_id, &mem_ticks), {
return err;
});
}
} break;
default:
Expand Down Expand Up @@ -599,7 +612,7 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
int read_hit_ratio = calcRatio(l3cache_read_misses, l3cache_reads);
int write_hit_ratio = calcRatio(l3cache_write_misses, l3cache_writes);
int bank_utilization = calcAvgPercent(l3cache_reads + l3cache_writes, l3cache_reads + l3cache_writes + l3cache_bank_stalls);
int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls);
int mshr_utilization = calcAvgPercent(l3cache_read_misses + l3cache_write_misses, l3cache_read_misses + l3cache_write_misses + l3cache_mshr_stalls);
fprintf(stream, "PERF: l3cache reads=%ld\n", l3cache_reads);
fprintf(stream, "PERF: l3cache writes=%ld\n", l3cache_writes);
fprintf(stream, "PERF: l3cache read misses=%ld (hit ratio=%d%%)\n", l3cache_read_misses, read_hit_ratio);
Expand All @@ -609,8 +622,10 @@ extern int vx_dump_perf(vx_device_h hdevice, FILE* stream) {
}

int mem_avg_lat = caclAverage(mem_lat, mem_reads);
int memory_bank_port_utilization = calcAvgPercent(mem_req_counter, (mem_ticks * num_mem_bank_ports));
fprintf(stream, "PERF: memory requests=%ld (reads=%ld, writes=%ld)\n", (mem_reads + mem_writes), mem_reads, mem_writes);
fprintf(stream, "PERF: memory latency=%d cycles\n", mem_avg_lat);
fprintf(stream, "PERF: memory bank port utilization=%d%%\n", memory_bank_port_utilization);
} break;
default:
break;
Expand Down
3 changes: 3 additions & 0 deletions runtime/xrt/vortex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,9 @@ class vx_device {
case VX_CAPS_ISA_FLAGS:
_value = isa_caps_;
break;
case VX_CAPS_NUM_MEM_BANKS:
_value = MEMORY_BANKS;
break;
default:
fprintf(stderr, "[VXDRV] Error: invalid caps id: %d\n", caps_id);
std::abort();
Expand Down
4 changes: 2 additions & 2 deletions sim/common/dram_sim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,11 @@ class DramSim::Impl {
dram_config["MemorySystem"]["DRAM"]["impl"] = "HBM2";
dram_config["MemorySystem"]["DRAM"]["org"]["preset"] = "HBM2_8Gb";
dram_config["MemorySystem"]["DRAM"]["org"]["density"] = 8192;
dram_config["MemorySystem"]["DRAM"]["org"]["channel"] = 8;
dram_config["MemorySystem"]["DRAM"]["timing"]["preset"] = "HBM2_2Gbps";
dram_config["MemorySystem"]["Controller"]["impl"] = "Generic";
dram_config["MemorySystem"]["Controller"]["Scheduler"]["impl"] = "FRFCFS";
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
dram_config["MemorySystem"]["Controller"]["RefreshManager"]["impl"] = "AllBank";
dram_config["MemorySystem"]["Controller"]["RowPolicy"]["impl"] = "OpenRowPolicy";
{
YAML::Node draw_plugin;
Expand All @@ -66,7 +66,7 @@ class DramSim::Impl {
auto original_buf = std::cout.rdbuf();
std::cout.rdbuf(nullstream.rdbuf());
ramulator_frontend_->finalize();
ramulator_memorysystem_->finalize();
ramulator_memorysystem_->finalize();
std::cout.rdbuf(original_buf);
}

Expand Down
4 changes: 2 additions & 2 deletions sim/opaesim/opae_sim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,13 @@
#include <unordered_map>
#include <util.h>

#ifndef MEMORY_BANKS
//#ifndef MEMORY_BANKS
#ifdef PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#define MEMORY_BANKS PLATFORM_PARAM_LOCAL_MEMORY_BANKS
#else
#define MEMORY_BANKS 2
#endif
#endif
//#endif

#ifndef MEM_CLOCK_RATIO
#define MEM_CLOCK_RATIO 1
Expand Down
4 changes: 2 additions & 2 deletions sim/simx/cache_cluster.h
Original file line number Diff line number Diff line change
Expand Up @@ -77,8 +77,8 @@ class CacheCluster : public SimObject<CacheCluster> {
caches_.at(i)->CoreRspPorts.at(j).bind(&mem_arbs.at(j)->RspOut.at(i));
}

caches_.at(i)->MemReqPort.bind(&cache_arb->ReqIn.at(i));
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPort);
caches_.at(i)->MemReqPorts.at(0).bind(&cache_arb->ReqIn.at(i));
cache_arb->RspIn.at(i).bind(&caches_.at(i)->MemRspPorts.at(0));
}

cache_arb->ReqOut.at(0).bind(&this->MemReqPort);
Expand Down
85 changes: 67 additions & 18 deletions sim/simx/cache_sim.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <vector>
#include <list>
#include <queue>
#include <string.h>

using namespace vortex;

Expand Down Expand Up @@ -315,27 +316,75 @@ class CacheSim::Impl {
simobject->CoreReqPorts.at(i).bind(&bypass_switch_->ReqIn.at(i));
bypass_switch_->RspIn.at(i).bind(&simobject->CoreRspPorts.at(i));
}
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort);
simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0));
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
return;
}

bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPort);
simobject->MemRspPort.bind(&bypass_switch_->RspOut.at(0));

if (config.B != 0) {
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
if (strcmp(simobject->name().c_str(), "l3cache")) {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));

if (config.B != 0) {
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B));
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i) {
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
}
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
} else {
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
}
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
} else {
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
// TODO: Change this into a crossbar
uint32_t max = MAX(2, config_.num_inputs);
//printf("%s connecting\n", simobject_->name().c_str());
//3
if (config.B != 0) {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, max, max);
for (uint32_t i = 0; i < max; ++i) {
//printf("%s connecting input=%d to MemPorts\n", simobject_->name().c_str(), i);
bypass_switch_->ReqOut.at(i).bind(&simobject->MemReqPorts.at(i % (1 << config.B)));
simobject->MemRspPorts.at(i % (1 << config.B)).bind(&bypass_switch_->RspOut.at(i));
}
} else {
bypass_switch_ = MemSwitch::Create(sname, ArbiterType::Priority, 2);
bypass_switch_->ReqOut.at(0).bind(&simobject->MemReqPorts.at(0));
simobject->MemRspPorts.at(0).bind(&bypass_switch_->RspOut.at(0));
}

if (config.B != 0)
{
snprintf(sname, 100, "%s-bank-arb", simobject->name().c_str());
bank_switch_ = MemSwitch::Create(sname, ArbiterType::RoundRobin, (1 << config.B), (1 << config.B));
for (uint32_t i = 0, n = (1 << config.B); i < n; ++i)
{
//1
//printf("%s Connecting memory ports to bank=%d\n", simobject_->name().c_str(), i);
mem_req_ports_.at(i).bind(&bank_switch_->ReqIn.at(i));
bank_switch_->RspIn.at(i).bind(&mem_rsp_ports_.at(i));
}
//2
if (config_.num_inputs > 1) {
for (uint32_t i = 0; i < max; ++i) {
//printf("%s connecting bank and bypass port=%d\n", simobject_->name().c_str(), i);
bank_switch_->ReqOut.at(i % (1 << config.B)).bind(&bypass_switch_->ReqIn.at(i));
bypass_switch_->RspIn.at(i).bind(&bank_switch_->RspOut.at(i % (1 << config.B)));
}
} else {
bank_switch_->ReqOut.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&bank_switch_->RspOut.at(0));
}
}
else
{
mem_req_ports_.at(0).bind(&bypass_switch_->ReqIn.at(0));
bypass_switch_->RspIn.at(0).bind(&mem_rsp_ports_.at(0));
}
}

// calculate cache initialization cycles
Expand Down Expand Up @@ -673,8 +722,8 @@ CacheSim::CacheSim(const SimContext& ctx, const char* name, const Config& config
: SimObject<CacheSim>(ctx, name)
, CoreReqPorts(config.num_inputs, this)
, CoreRspPorts(config.num_inputs, this)
, MemReqPort(this)
, MemRspPort(this)
, MemReqPorts(NUM_MEM_PORTS, this)
, MemRspPorts(NUM_MEM_PORTS, this)
, impl_(new Impl(this, config))
{}

Expand Down
4 changes: 2 additions & 2 deletions sim/simx/cache_sim.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,8 +75,8 @@ class CacheSim : public SimObject<CacheSim> {

std::vector<SimPort<MemReq>> CoreReqPorts;
std::vector<SimPort<MemRsp>> CoreRspPorts;
SimPort<MemReq> MemReqPort;
SimPort<MemRsp> MemRspPort;
std::vector<SimPort<MemReq>> MemReqPorts;
std::vector<SimPort<MemRsp>> MemRspPorts;

CacheSim(const SimContext& ctx, const char* name, const Config& config);
~CacheSim();
Expand Down
4 changes: 2 additions & 2 deletions sim/simx/cluster.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,8 @@ Cluster::Cluster(const SimContext& ctx,
2, // pipeline latency
});

l2cache_->MemReqPort.bind(&this->mem_req_port);
this->mem_rsp_port.bind(&l2cache_->MemRspPort);
l2cache_->MemReqPorts.at(0).bind(&this->mem_req_port);
this->mem_rsp_port.bind(&l2cache_->MemRspPorts.at(0));

icache_switch->ReqOut.at(0).bind(&l2cache_->CoreReqPorts.at(0));
l2cache_->CoreRspPorts.at(0).bind(&icache_switch->RspOut.at(0));
Expand Down
4 changes: 0 additions & 4 deletions sim/simx/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@
#define MEM_CLOCK_RATIO 1
#endif

#ifndef MEMORY_BANKS
#define MEMORY_BANKS 2
#endif

#define LSU_WORD_SIZE (XLEN / 8)
#define LSU_CHANNELS NUM_LSU_LANES
#define LSU_NUM_REQS (NUM_LSU_BLOCKS * LSU_CHANNELS)
Expand Down
2 changes: 2 additions & 0 deletions sim/simx/emulator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,8 @@ Word Emulator::get_csr(uint32_t addr, uint32_t tid, uint32_t wid) {
CSR_READ_64(VX_CSR_MPM_MEM_READS, proc_perf.mem_reads);
CSR_READ_64(VX_CSR_MPM_MEM_WRITES, proc_perf.mem_writes);
CSR_READ_64(VX_CSR_MPM_MEM_LT, proc_perf.mem_latency);
CSR_READ_64(VX_CSR_MPM_MEM_BANK_CNTR, proc_perf.memsim.counter);
CSR_READ_64(VX_CSR_MPM_MEM_BANK_TICK, proc_perf.memsim.ticks);

CSR_READ_64(VX_CSR_MPM_LMEM_READS, lmem_perf.reads);
CSR_READ_64(VX_CSR_MPM_LMEM_WRITES, lmem_perf.writes);
Expand Down
Loading

0 comments on commit 6c607d3

Please sign in to comment.