From 27c1b3452e85dcee11faf61e3341b1619d719675 Mon Sep 17 00:00:00 2001 From: "tkojima.am" Date: Sat, 4 Jul 2020 21:44:05 +0900 Subject: [PATCH] add snacc debug utility --- options.cc | 39 +++++++++++++++++++++++++++++++++++ options.h | 2 ++ optiontbl.h | 6 +++++- snacc.cc | 27 ++++++++++++++++++++++++ snacc.h | 2 ++ snacccore.cc | 28 ++++++++++++++++++++++++- snacccore.h | 38 ++++++++++++++++++++++++++++++++++ snaccmodules.cc | 55 +++++++++++++++++++++++++++++++++++++++++++------ snaccmodules.h | 8 +++++-- vmips.cc | 51 +++++++++++++++++++++++++++++++++++++++++++++ 10 files changed, 246 insertions(+), 10 deletions(-) diff --git a/options.cc b/options.cc index 394721e..b07a0dd 100644 --- a/options.cc +++ b/options.cc @@ -32,6 +32,7 @@ with VMIPS; if not, write to the Free Software Foundation, Inc., #include #include #include +#include #define OPTBUFSIZ 1024 @@ -424,6 +425,44 @@ Options::option(const char *name) return NULL; } +std::vector Options::get_tuple(const char *option, int len) +{ + std::vector v; + std::string str; + + std::regex format_re; + std::regex num_re; + std::smatch m; + + // create regex + str = "\\("; + for (int i = 0; i < len; i++) { + str += "\\s*[0-9]+\\s*,"; + } + str.erase(str.size()-1); + str += "\\)"; + + try { + format_re = std::regex(str); + num_re = std::regex("[0-9]+"); + } catch(std::regex_error& e) { + fatal_error("regex is not supported\nPlease rebuild with GCC 4.9 or higher\n"); + } + + str = std::string(option); + if (std::regex_match(str, format_re)) { + for (int i = 0; i < len; i++) { + std::regex_search(str, m, num_re); + v.push_back(std::stoi(m[0].str())); + str = m.suffix(); + } + } else { + fatal_error("Invalid tuple option: %s", option); + } + + return v; +} + void Options::print_config_info(void) { diff --git a/options.h b/options.h index ae539d5..32f7d6e 100644 --- a/options.h +++ b/options.h @@ -23,6 +23,7 @@ with VMIPS; if not, write to the Free Software Foundation, Inc., #include "types.h" #include #include +#include /* This defines the name of the system default configuration file. */ #define SYSTEM_CONFIG_FILE SYSCONFDIR"/vmipsrc" @@ -77,6 +78,7 @@ class Options { virtual ~Options () { } virtual void process_options(int argc, char **argv); union OptionValue *option(const char *name); + std::vector get_tuple(const char *option, int len); }; #endif /* _OPTIONS_H_ */ diff --git a/optiontbl.h b/optiontbl.h index 90be20a..54ef2fb 100644 --- a/optiontbl.h +++ b/optiontbl.h @@ -403,6 +403,9 @@ static Option nametable[] = { // SNACC options { "snacc_sram_latency", NUM }, + { "snacc_inst_dump", STR }, + { "snacc_mad_debug", STR }, + { NULL, 0 } }; @@ -428,7 +431,8 @@ static const char *defaults_table[] = { "icachebnum=64", "dcachebnum=64", "mem_bandwidth=1", "bus_latency=8", "exmem_latency=3", "vcbufsize=24", "noroutermsg", "accelerator0=none", "accelerator1=none", "accelerator2=none", - "snacc_sram_latency=1", + "snacc_sram_latency=1", "snacc_inst_dump=disabled", + "snacc_mad_debug=disabled", NULL }; diff --git a/snacc.cc b/snacc.cc index aadcf35..b37f19b 100644 --- a/snacc.cc +++ b/snacc.cc @@ -1,5 +1,8 @@ #include "snacc.h" #include "snaccmodules.h" +#include "error.h" + +#include using namespace SNACCComponents; @@ -8,6 +11,12 @@ SNACC::SNACC(uint32 node_ID, Router* upperRouter, int core_count_) SNACC_GLB_OUTOFRANGE, false), core_count(core_count_) { + // check debug option + // std::string opt_inst_dump = + // std::string(opt->option("snacc_inst_dump")->str); + // std::string opt_mad_debug = + // std::string(opt->option("snacc_mad_debug")->str); + cores = new SNACCCore*[core_count]; dmem_upper = new DoubleBuffer*[core_count]; dmem_lower = new DoubleBuffer*[core_count]; @@ -125,4 +134,22 @@ bool SNACC::isTriggered() uint32 SNACC::get_dbg_data() { return 0; +} + +void SNACC::enable_inst_dump(int core_id) +{ + if (core_id >= 0 && core_id < core_count) { + cores[core_id]->enable_inst_dump(); + } else { + warning("core ID %d for SNACC inst dump exceeds actual core count\n", core_id); + } +} + +void SNACC::enable_mad_debug(int core_id) +{ + if (core_id >= 0 && core_id < core_count) { + cores[core_id]->enable_mad_debug(); + } else { + warning("core ID %d for SNACC mad debug exceeds actual core count\n", core_id); + } } \ No newline at end of file diff --git a/snacc.h b/snacc.h index d1ed51f..195fbcd 100644 --- a/snacc.h +++ b/snacc.h @@ -48,6 +48,8 @@ class SNACC : public CubeAccelerator{ virtual void send_commnad(uint32 cmd, uint32 arg); virtual bool isTriggered(); virtual uint32 get_dbg_data(); + void enable_inst_dump(int core_id); + void enable_mad_debug(int core_id); }; diff --git a/snacccore.cc b/snacccore.cc index 891013c..e5620b1 100644 --- a/snacccore.cc +++ b/snacccore.cc @@ -17,7 +17,7 @@ SNACCCore::SNACCCore(int core_id_, WbufArb *wbuf_arb_) : core_id(core_id_), dmem_u(dmem_u_), dmem_l(dmem_l_), rbuf_u(rbuf_u_), rbuf_l(rbuf_l_), lut(lut_), imem(imem_), wbuf(wbuf_), - wbuf_arb(wbuf_arb_) + wbuf_arb(wbuf_arb_), inst_dump(false) { dbg_msg = machine->opt->option("excmsg")->flag; mad_unit = new MadUnit(machine->opt->option("snacc_sram_latency")->num, @@ -217,6 +217,32 @@ void SNACCCore::wb_stage() //reset status isBranch = false; reg_write = false; + + if (inst_dump) { + disassemble(); + } +} + +void SNACCCore::disassemble() +{ + fprintf(stderr, "%d:\tSNACC\t", machine->num_cycles); + switch(dec_opcode) { + case SNACC_CORE_OPCODE_RTYPE0: + fprintf(stderr, RType0InstrFormat[dec_func], dec_rd, dec_rs); + break; + case SNACC_CORE_OPCODE_RTYPE1: + fprintf(stderr, RType1InstrFormat[dec_func], dec_rd, dec_rs); + break; + case SNACC_CORE_OPCODE_RTYPE2: + fprintf(stderr, RType2InstrFormat[dec_func], dec_rd, dec_rs); + break; + case SNACC_CORE_OPCODE_JUMP: + fprintf(stderr, InstrFormat[dec_func], dec_imm); + break; + default: + fprintf(stderr, InstrFormat[dec_opcode], dec_rd, dec_imm); + } + fprintf(stderr, "\n"); } diff --git a/snacccore.h b/snacccore.h index 7be4697..218b49f 100644 --- a/snacccore.h +++ b/snacccore.h @@ -26,7 +26,9 @@ #define SNACC_CTRLREG_SIZE 16 //Opcode +#define SNACC_CORE_OPCODE_RTYPE0 0 #define SNACC_CORE_OPCODE_RTYPE1 1 +#define SNACC_CORE_OPCODE_RTYPE2 2 #define SNACC_CORE_OPCODE_BNEQ 4 #define SNACC_CORE_OPCODE_JUMP 5 @@ -106,6 +108,8 @@ class SNACCCore { int status; int stall_cause; + void disassemble(); + // SRAM modules uint32 access_address; DoubleBuffer *access_mem; @@ -145,6 +149,9 @@ class SNACCCore { static const MemberFuncPtr kRTypeMemoryTable[16]; static const MemberFuncPtr kRTypeSimdTable[16]; + // for debug + bool inst_dump; + public: SNACCCore(int core_id_, DoubleBuffer *dmem_u_, @@ -159,6 +166,8 @@ class SNACCCore { void step(); void reset(); bool isDone() { return done; }; + void enable_inst_dump() { inst_dump = true; }; + void enable_mad_debug() { mad_unit->enable_debug(); }; private: void Unknown(); @@ -204,6 +213,35 @@ class SNACCCore { }; +static const char* InstrFormat[16] = { + "", "", "", "Loadi r%d, 0x%X", + "Bneq r%d, 0x%X", "Jump 0x%X", + "Mad r%d, 0x%X", "Madlp r%d, 0x%X", + "Setcr r%d, 0x%X", "Addi r%d, 0x%X", + "Subi r%d, 0x%X", "Sll r%d, 0x%X", + "Srl r%d, 0x%X", "Sra r%d, 0x%X", + "Unknown", "Unknown" }; + + +static const char* RType0InstrFormat[16] = { + "Nop", "Mov r%d, r%d", "Add r%d, r%d", "Sub r%d, r%d", + "Mul r%d, r%d", "And r%d, r%d", "Or r%d, r%d","Xor r%d, r%d", + "Neg r%d, r%d", "Unknown", "Unknown", "Unknown", + "Unknown", "Unknown", "Unknown", "Unknown" }; + +static const char* RType1InstrFormat[16] = { + "Halt", "Loadw r%d, r%d", "Storew r%d, r%d", "Loadh r%d, r%d", + "Storeh r%d, r%d", "Unknown", "Unknown", "Readcr", + "Unknown", "Unknown", + "Dbchange %d, %d", "Dma r%d, r%d", + "Unknown", "Unknown", + "Unknown", "Unknown" }; + +static const char* RType2InstrFormat[16] = { + "Nop", "Loadv r%d, r%d", "Unknown", "Unknown", + "Unknown", "Unknown", "Unknown", "Unknown", + "Unknown", "Unknown", "Unknown", "Unknown", + "Unknown", "Unknown", "Unknown", "Unknown" }; #endif //_SNACCCORE_H_ diff --git a/snaccmodules.cc b/snaccmodules.cc index 0dfef40..09e5045 100644 --- a/snaccmodules.cc +++ b/snaccmodules.cc @@ -247,7 +247,7 @@ MadUnit::MadUnit(uint32 sram_latency_, DoubleBuffer *dmem_u_, sram_latency(sram_latency_), dmem_u(dmem_u_), dmem_l(dmem_l_), rbuf_u(rbuf_u_), rbuf_l(rbuf_l_), lut(lut_), - TR0(TR0_), TR1(TR1_), FR0(FR0_), FR1(FR1_) + TR0(TR0_), TR1(TR1_), FR0(FR0_), FR1(FR1_), debug_print(false) { } @@ -453,10 +453,14 @@ void MadUnit::loadData(Fixed16 *array) void MadUnit::doMad() { Fixed16 weight[SNACC_SIMD_LANE_SIZE/2]; + Fixed16 data[SNACC_SIMD_LANE_SIZE]; //load weight loadWeight(weight); + if (debug_print) { + fprintf(stderr, "MAD\nbefore TR0 0x%08X, TR1 0x%08X\n", + tr0_fp, tr1_fp); + } if (eight_bit_mode) { - Fixed16 data[SNACC_SIMD_LANE_SIZE]; loadData(data); for (int i = 0; i < SNACC_SIMD_LANE_SIZE/2; i++) { if (mask[i]) { @@ -466,11 +470,11 @@ void MadUnit::doMad() for (int i = SNACC_SIMD_LANE_SIZE/2; i < SNACC_SIMD_LANE_SIZE; i++) { if (mask[i]) { - tr1_fp = tr1_fp + weight[i] * data[i]; + tr1_fp = tr1_fp + + weight[i - SNACC_SIMD_LANE_SIZE/2] * data[i]; } } } else { - Fixed16 data[SNACC_SIMD_LANE_SIZE/2]; loadData(data); for (int i = 0; i < SNACC_SIMD_LANE_SIZE/2; i++) { if (mask[i]) { @@ -478,12 +482,29 @@ void MadUnit::doMad() } } } + if (debug_print) { + int half_lane = SNACC_SIMD_LANE_SIZE/ 2; + int max_lane = eight_bit_mode ? SNACC_SIMD_LANE_SIZE : half_lane; + for (int i = 0; i < max_lane; i++) { + if (mask[i]) { + fprintf(stderr, "mul%d: 0x%04X * 0x%04X = 0x%04X\n", i, + weight[i % half_lane], data[i], + weight[i % half_lane] * data[i]); + } else { + fprintf(stderr, "mul%d: masked\n", i); + } + } + fprintf(stderr, "after TR0 0x%08X, TR1 0x%08X\n", + tr0_fp, tr1_fp); + } } void MadUnit::doMaxPool() { + Fixed32 prev_tr0 = tr0_fp; + Fixed32 prev_tr1 = tr1_fp; + Fixed16 data[SNACC_SIMD_LANE_SIZE]; if (eight_bit_mode) { - Fixed16 data[SNACC_SIMD_LANE_SIZE]; loadData(data); for (int i = 0; i < SNACC_SIMD_LANE_SIZE/2; i++) { if (mask[i]) { @@ -499,7 +520,6 @@ void MadUnit::doMaxPool() } } } else { - Fixed16 data[SNACC_SIMD_LANE_SIZE/2]; loadData(data); for (int i = 0; i < SNACC_SIMD_LANE_SIZE/2; i++) { if (mask[i]) { @@ -508,6 +528,29 @@ void MadUnit::doMaxPool() } } } + if (debug_print) { + fprintf(stderr, "MAXPOOL\nmax(0x%08X, ", prev_tr0); + for (int i = 0; i < SNACC_SIMD_LANE_SIZE/2; i++) { + if (mask[i]) { + fprintf(stderr, "0x%08X, ", data[i].ToFixed32()); + } else { + fprintf(stderr, "masked, "); + } + } + fprintf(stderr, "\b\b) = 0x%08X\n", tr0_fp); + if (eight_bit_mode) { + fprintf(stderr, "max(0x%08X, ", prev_tr1); + for (int i = SNACC_SIMD_LANE_SIZE/2; + i < SNACC_SIMD_LANE_SIZE; i++) { + if (mask[i]) { + fprintf(stderr, "0x%08X, ", data[i].ToFixed32()); + } else { + fprintf(stderr, "masked, "); + } + } + fprintf(stderr, "\b\b) = 0x%08X\n", tr1_fp); + } + } } void MadUnit::doAvgPool() diff --git a/snaccmodules.h b/snaccmodules.h index f952c09..37f6362 100644 --- a/snaccmodules.h +++ b/snaccmodules.h @@ -6,6 +6,7 @@ #include "snaccAddressMap.h" #include +#include #define SNACC_WBUF_ARB_4CORE 0 #define SNACC_WBUF_ARB_2CORE 2 @@ -173,7 +174,7 @@ uint32 SignedClipMostSignificant4Bits(uint32 before); // <8.24> bits signed fixed point decimal number, which is // internal representation of multiply-and-add unit. - struct Fixed32 { + class Fixed32 { public: Fixed32() : num_(0) {} @@ -238,6 +239,9 @@ uint32 SignedClipMostSignificant4Bits(uint32 before); Fixed32 tr0_fp, tr1_fp, fr0_fp, fr1_fp; + // for debug + bool debug_print; + bool overDmemBoundary(); bool overRbufBoundary(); void updataAddress(); @@ -274,7 +278,7 @@ uint32 SignedClipMostSignificant4Bits(uint32 before); void step(); void reset(); bool running(); - + void enable_debug() { debug_print = true; }; }; } diff --git a/vmips.cc b/vmips.cc index 6e1836c..55b85b4 100644 --- a/vmips.cc +++ b/vmips.cc @@ -62,6 +62,7 @@ with VMIPS; if not, write to the Free Software Foundation, Inc., #include "snacc.h" #include "dmac.h" #include "debugutils.h" +#include vmips *machine; @@ -574,15 +575,38 @@ bool vmips::setup_dmac() bool vmips::setup_cube() { + std::vector snacc_inst_dump(2, -1), snacc_mad_debug(2, -1); + bool snacc_inst_dump_fail, snacc_mad_debug_fail; + snacc_inst_dump_fail = snacc_mad_debug_fail = false; std::string ac0_name = std::string(opt->option("accelerator0")->str); std::string ac1_name = std::string(opt->option("accelerator1")->str); std::string ac2_name = std::string(opt->option("accelerator2")->str); + //get snacc options + std::string opt_str = std::string(opt->option("snacc_inst_dump")->str); + if (opt_str != std::string("disabled")) { + snacc_inst_dump = opt->get_tuple(opt_str.c_str(), 2); + snacc_inst_dump_fail = true; + } + opt_str = std::string(opt->option("snacc_mad_debug")->str); + if (opt_str != std::string("disabled")) { + snacc_mad_debug = opt->get_tuple(opt_str.c_str(), 2); + snacc_mad_debug_fail = true; + } + //setup accelerator0 if (ac0_name == std::string("CMA")) { ac0 = new CMA(1, rtif->getRouter()); } else if (ac0_name == std::string("SNACC")) { ac0 = new SNACC(1, rtif->getRouter(), 4); + if (snacc_inst_dump[0] == 0) { + ((SNACC*)(ac0))->enable_inst_dump(snacc_inst_dump[1]); + snacc_inst_dump_fail = false; + } + if (snacc_mad_debug[0] == 0) { + ((SNACC*)(ac0))->enable_mad_debug(snacc_mad_debug[1]); + snacc_mad_debug_fail = false; + } } else if (ac0_name == std::string("RemoteRam")) { ac0 = new RemoteRam(1, rtif->getRouter(), 0x2048); //2KB } else if (ac0_name != std::string("none")) { @@ -608,6 +632,15 @@ vmips::setup_cube() ac1 = new CMA(2, ac0->getRouter()); } else if (ac1_name == std::string("SNACC")) { ac1 = new SNACC(2, ac0->getRouter(), 4); + ac0 = new SNACC(1, rtif->getRouter(), 4); + if (snacc_inst_dump[0] == 0) { + ((SNACC*)(ac1))->enable_inst_dump(snacc_inst_dump[1]); + snacc_inst_dump_fail = false; + } + if (snacc_mad_debug[0] == 0) { + ((SNACC*)(ac1))->enable_mad_debug(snacc_mad_debug[1]); + snacc_mad_debug_fail = false; + } } else if (ac1_name == std::string("RemoteRam")) { ac1 = new RemoteRam(2, ac0->getRouter(), 0x2048); //2KB } else { @@ -635,6 +668,15 @@ vmips::setup_cube() ac2 = new CMA(3, ac1->getRouter()); } else if (ac1_name == std::string("SNACC")) { ac2 = new SNACC(3, ac1->getRouter(), 4); + ac0 = new SNACC(1, rtif->getRouter(), 4); + if (snacc_inst_dump[0] == 0) { + ((SNACC*)(ac2))->enable_inst_dump(snacc_inst_dump[1]); + snacc_inst_dump_fail = false; + } + if (snacc_mad_debug[0] == 0) { + ((SNACC*)(ac2))->enable_mad_debug(snacc_mad_debug[1]); + snacc_mad_debug_fail = false; + } } else if (ac1_name == std::string("RemoteRam")) { ac2 = new RemoteRam(3, ac1->getRouter(), 0x2048); //2KB } else { @@ -653,6 +695,15 @@ vmips::setup_cube() } } + if (snacc_inst_dump_fail) { + warning("SNACC inst dump option for node %d is ignored\n", + snacc_inst_dump[0]); + } + if (snacc_mad_debug_fail) { + warning("SNACC mad debug option for node %d is ignored\n", + snacc_mad_debug[0]); + } + return true; }