diff --git a/c_code/comms_i.c b/c_code/comms_i.c index ed04a8c..ab2cb5a 100644 --- a/c_code/comms_i.c +++ b/c_code/comms_i.c @@ -78,7 +78,7 @@ void i_processQueue (uint unused0, uint unused1) uint pkt_type = key & SPINN_TYPE_MASK; - // check if data packet, + // process data packet, if (pkt_type == SPINN_DATA_KEY) { // check packet phase and process accordingly @@ -96,22 +96,20 @@ void i_processQueue (uint unused0, uint unused1) } } - // check if stop packet, + // or process stop packet, else if (pkt_type == SPINN_STOP_KEY) { - // stop packet received i_stop_packet (key); } - // check if network stop packet, + // or process network stop packet, else if (pkt_type == SPINN_STPN_KEY) { - // network stop packet received i_net_stop_packet (key); } #ifdef DEBUG - // report unknown packet type, + // or report unknown packet type, else { stage_done (SPINN_UNXPD_PKT, key); diff --git a/c_code/comms_s.c b/c_code/comms_s.c index a927679..7874d00 100644 --- a/c_code/comms_s.c +++ b/c_code/comms_s.c @@ -16,7 +16,7 @@ // ------------------------------------------------------------------------ // ------------------------------------------------------------------------ // enqueue received packet -// (FORWARD, BACKPROP, ldsa, ldst, stop and net_stop types) +// (FORWARD, BACKPROP, lds, stop and net_stop types) // ------------------------------------------------------------------------ void s_receivePacket (uint key, uint payload) { @@ -77,7 +77,7 @@ void s_processQueue (uint unused0, uint unused1) uint pkt_type = key & SPINN_TYPE_MASK; - // check if data packet, + // process data packet, if (pkt_type == SPINN_DATA_KEY) { // check packet phase and process accordingly @@ -95,36 +95,26 @@ void s_processQueue (uint unused0, uint unused1) } } - // check for an LDS "accumulation" packet, + // or process LDS packet, else if (pkt_type == SPINN_LDSA_KEY) { - // process LDS "accumulation" packet - s_ldsa_packet (payload); + s_lds_packet (payload); } - // check for LDS "total" packet, - else if (pkt_type == SPINN_LDST_KEY) - { - // process LDS "total" packet - s_ldst_packet (payload); - } - - // check if stop packet, + // or process stop packet, else if (pkt_type == SPINN_STOP_KEY) { - // stop packet received s_stop_packet (key); } - // check if network stop packet, + // or process network stop packet, else if (pkt_type == SPINN_STPN_KEY) { - // network stop packet received s_net_stop_packet (key); } #ifdef DEBUG - // report unknown packet type, + // or report unknown packet type, else { stage_done (SPINN_UNXPD_PKT, key); @@ -233,34 +223,34 @@ void s_net_stop_packet (uint key) // ------------------------------------------------------------------------ -// process LDSA packet: accumulate the received partial link delta sums +// process LDS packet: accumulate the received partial link delta sums // ------------------------------------------------------------------------ -void s_ldsa_packet (uint payload) +void s_lds_packet (uint payload) { #ifdef DEBUG - lda_recv++; + lds_recv++; #endif // add the received value to the total so far, s_lds_part += (lds_t) payload; // increment the count of partial link delta sums arrived, - s_ldsa_arrived++; + s_lds_arrived++; // check whether all the partial sums have arrived - if (s_ldsa_arrived == scfg.ldsa_expected) + if (s_lds_arrived == scfg.lds_expected) { - // send the result to the first s core - // to give a total across the whole network - if (scfg.is_first_group == 0) - { - while (!spin1_send_mc_packet (ldstKey, s_lds_part, WITH_PAYLOAD)); + // broadcast (first subgroup) or relay (all others) lds value + while (!spin1_send_mc_packet (ldsKey, s_lds_part, WITH_PAYLOAD)); #ifdef DEBUG - pkt_sent++; - ldt_sent++; + pkt_sent++; + lds_sent++; #endif - } + + // prepare for next epoch + s_lds_part = 0; + s_lds_arrived = 0; // access thread semaphore with interrupts disabled uint cpsr = spin1_int_disable (); @@ -293,62 +283,3 @@ void s_ldsa_packet (uint payload) } } // ------------------------------------------------------------------------ - - -// ------------------------------------------------------------------------ -// process LDST packet: accumulate the received link delta sum totals -// ------------------------------------------------------------------------ -void s_ldst_packet (uint payload) -{ -#ifdef DEBUG - ldt_recv++; -#endif - - // add the received value to the total so far, - s_lds_part += (lds_t) payload; - - // increment the count of link delta sums arrived, - s_ldst_arrived++; - - // check whether all the partial sums have arrived - if (s_ldst_arrived == scfg.ldst_expected) - { - // send the final value of s_lds_part back to the w cores - while (!spin1_send_mc_packet (ldsrKey, s_lds_part, WITH_PAYLOAD)); - -#ifdef DEBUG - pkt_sent++; - ldr_sent++; -#endif - - // access thread semaphore with interrupts disabled - uint cpsr = spin1_int_disable (); - -#if defined(DEBUG) && defined(DEBUG_THRDS) - if (!(sb_thrds_pend & SPINN_THRD_LDST)) - wrng_cth++; -#endif - - // check if all other threads done - if (sb_thrds_pend == SPINN_THRD_LDST) - { - // if done initialise semaphore - sb_thrds_pend = SPINN_SB_THRDS; - - // restore interrupts after semaphore access, - spin1_mode_restore (cpsr); - - // and advance tick - sb_advance_tick (); - } - else - { - // if not done report processing thread done, - sb_thrds_pend &= ~SPINN_THRD_LDST; - - // and restore interrupts after semaphore access - spin1_mode_restore (cpsr); - } - } -} -// ------------------------------------------------------------------------ diff --git a/c_code/comms_s.h b/c_code/comms_s.h index be02cc7..6360d1f 100644 --- a/c_code/comms_s.h +++ b/c_code/comms_s.h @@ -7,7 +7,6 @@ void s_processQueue (uint unused0, uint unused1); void s_stop_packet (uint key); void s_net_stop_packet (uint key); -void s_ldsa_packet (uint payload); -void s_ldst_packet (uint payload); +void s_lds_packet (uint payload); #endif diff --git a/c_code/comms_t.c b/c_code/comms_t.c index b97359e..7f9d4e8 100644 --- a/c_code/comms_t.c +++ b/c_code/comms_t.c @@ -121,19 +121,19 @@ void t_processFWDQueue (uint unused0, uint unused1) tf_process (key, payload); } - // process criterion packet, + // or process criterion packet, else if (pkt_type == SPINN_CRIT_KEY) { t_criterion_packet (key); } - // process tick stop packet, + // or process tick stop packet, else if (pkt_type == SPINN_STOP_KEY) { t_stop_packet (key); } - // process network stop packet, + // or process network stop packet, else if (pkt_type == SPINN_STPN_KEY) { t_net_stop_packet (key); @@ -170,38 +170,48 @@ void t_criterion_packet (uint key) #endif // partial criterion value arrived, - tf_crit_prev = key & SPINN_STPD_MASK; + tf_crit_prev = tf_crit_prev && (key & SPINN_STPD_MASK); - // access flag with interrupts disabled, - uint cpsr = spin1_int_disable (); + // update scoreboard, + tf_crit_arrived++; - // and check if updated criterion value can be forwarded - if (tf_crit_rdy) + // and check if all criterion packets arrived + if (tf_crit_arrived == tcfg.crit_expected) { - // initialise flag, - tf_crit_rdy = tf_init_crit; + // initialise scoreboard for next tick, + tf_crit_arrived = 0; - // restore interrupts after flag access, - spin1_mode_restore (cpsr); + // access flag with interrupts disabled, + uint cpsr = spin1_int_disable (); - // send stop packet, - tf_send_stop (); - - // and advance tick if last_output_group - //NOTE: last output group does not get a tick stop packet - // so it's ready to advance tick - if (tcfg.is_last_output_group) + // and check if updated criterion value can be forwarded + if (tf_crit_rdy) { - tf_advance_tick (); + // initialise flag, + tf_crit_rdy = 0; + + // restore interrupts after flag access, + spin1_mode_restore (cpsr); + + // send stop packet, + tf_send_stop (); + + // and advance tick if last_output_group + //NOTE: last output group does not get a tick stop packet + // so it's ready to advance tick + if (tcfg.is_last_output) + { + tf_advance_tick (); + } } - } - else - { - // flag ready to forward criterion, - tf_crit_rdy = 1; + else + { + // flag ready to forward criterion, + tf_crit_rdy = 1; - // and restore interrupts after flag access - spin1_mode_restore (cpsr); + // and restore interrupts after flag access + spin1_mode_restore (cpsr); + } } } // ------------------------------------------------------------------------ @@ -311,7 +321,7 @@ void t_backprop_packet (uint key, uint payload) // store received error, t_errors[tb_comms][inx] = (error_t) payload; - // and update scoreboard, + // update scoreboard, tb_arrived++; // if all expected errors have arrived may move to next tick @@ -362,7 +372,12 @@ void tf_send_stop (void) // "aggregate" criteria, tf_stop_crit = tf_stop_crit && tf_crit_prev; - if (tcfg.is_last_output_group) + // initialise previous value, + //TODO: should this be done in critical section? + tf_crit_prev = TRUE; + + // make stop decision, + if (tcfg.is_last_output) { tf_group_crit = tf_stop_crit; @@ -386,7 +401,7 @@ void tf_send_stop (void) #ifdef DEBUG pkt_sent++; - if (tcfg.is_last_output_group) + if (tcfg.is_last_output) { stp_sent++; } diff --git a/c_code/comms_w.c b/c_code/comms_w.c index 96b4168..4fe1f0d 100644 --- a/c_code/comms_w.c +++ b/c_code/comms_w.c @@ -17,7 +17,7 @@ // ------------------------------------------------------------------------ // ------------------------------------------------------------------------ // initial handling of received packets -// (FORWARD, BACKPROP, ldsr, stop, net_stop and sync types) +// (FORWARD, BACKPROP, lds, stop, net_stop and sync types) // ------------------------------------------------------------------------ void w_receivePacket (uint key, uint payload) { @@ -142,10 +142,10 @@ void w_processBKPQueue (uint unused0, uint unused1) wb_process (key, payload); } - // process LDS result packet, - else if (pkt_type == SPINN_LDSR_KEY) + // or process LDS result packet, + else if (pkt_type == SPINN_LDSA_KEY) { - w_ldsr_packet (payload); + w_lds_packet (payload); } #ifdef DEBUG @@ -178,13 +178,6 @@ void w_forward_packet (uint key, uint payload) recv_fwd++; if (phase == SPINN_BACKPROP) wrng_fph++; - - uint blk = (key & SPINN_BLOCK_MASK) >> SPINN_BLOCK_SHIFT; - if (blk != wcfg.row_blk) - { - pkt_fwbk++; - return; - } #endif // get output index: mask out phase, core and block data, @@ -361,10 +354,10 @@ void w_sync_packet (void) // ------------------------------------------------------------------------ // process an LDS result packet // ------------------------------------------------------------------------ -void w_ldsr_packet (uint payload) +void w_lds_packet (uint payload) { #ifdef DEBUG - ldr_recv++; + lds_recv++; #endif // the final link delta sum for the epoch arrived @@ -374,12 +367,12 @@ void w_ldsr_packet (uint payload) uint cpsr = spin1_int_disable (); #if defined(DEBUG) && defined(DEBUG_THRDS) - if (!(wb_thrds_pend & SPINN_THRD_LDSR)) + if (!(wb_thrds_pend & SPINN_THRD_LDSA)) wrng_cth++; #endif // check if all other threads done - if (wb_thrds_pend == SPINN_THRD_LDSR) + if (wb_thrds_pend == SPINN_THRD_LDSA) { // initialise semaphore (no link delta summation in next tick), wb_thrds_pend = SPINN_WB_THRDS; @@ -393,7 +386,7 @@ void w_ldsr_packet (uint payload) else { // if not done report processing thread done, - wb_thrds_pend &= ~SPINN_THRD_LDSR; + wb_thrds_pend &= ~SPINN_THRD_LDSA; // and restore interrupts after semaphore access spin1_mode_restore (cpsr); diff --git a/c_code/comms_w.h b/c_code/comms_w.h index cb41af1..c410cba 100644 --- a/c_code/comms_w.h +++ b/c_code/comms_w.h @@ -10,7 +10,7 @@ void w_stop_packet (uint key); void w_net_stop_packet (uint key); void w_sync_packet (void); -void w_ldsr_packet (uint payload); +void w_lds_packet (uint payload); void store_output (uint index); void restore_outputs (uint tick); diff --git a/c_code/init_i.c b/c_code/init_i.c index 2afea6b..9b0a69c 100644 --- a/c_code/init_i.c +++ b/c_code/init_i.c @@ -26,6 +26,11 @@ uint cfg_init (void) io_printf (IO_BUF, "input\n"); #endif +#ifdef PROFILE + // configure timer 2 for profiling + tc[T2_CONTROL] = SPINN_PROFILER_CFG; +#endif + // read the data specification header data_specification_metadata_t * data = data_specification_get_data_address(); @@ -148,14 +153,6 @@ uint mem_init (void) return (SPINN_MEM_UNAVAIL); } - // allocate memory for BACKPROP keys (one per partition) - if ((i_bkpKey = ((uint *) - spin1_malloc (icfg.partitions * sizeof (uint)))) == NULL - ) - { - return (SPINN_MEM_UNAVAIL); - } - // allocate memory for INPUT functions for (uint i = 0; i < icfg.num_in_procs; i++) { @@ -271,13 +268,9 @@ void var_init (uint reset_examples) i_pkt_queue.tail = 0; // initialise packet keys - //NOTE: colour is initialised to 0. + //NOTE: colour is implicitly initialised to 0 fwdKey = rt[FWD] | SPINN_PHASE_KEY(SPINN_FORWARD); - - for (uint p = 0; p < icfg.partitions; p++) - { - i_bkpKey[p] = rt[BKPI + p] | SPINN_PHASE_KEY (SPINN_BACKPROP); - } + bkpKey = rt[BKP] | SPINN_PHASE_KEY (SPINN_BACKPROP); // if the INPUT INTEGRATOR is used // reset the memory of the INTEGRATOR state variables @@ -316,6 +309,17 @@ void var_init (uint reset_examples) tot_tick = 0; // total number of ticks executed // ------------------------------------------------------------------------ #endif + +#ifdef PROFILE +// ------------------------------------------------------------------------ +// PROFILER variables +// ------------------------------------------------------------------------ +prf_fwd_min = SPINN_PROFILER_START; // minimum FORWARD processing time +prf_fwd_max = 0; // maximum FORWARD processing time +prf_bkp_min = SPINN_PROFILER_START; // minimum BACKPROP processing time +prf_bkp_max = 0; // maximum BACKPROP processing time +// ------------------------------------------------------------------------ +#endif } // ------------------------------------------------------------------------ @@ -430,6 +434,17 @@ void stage_done (uint ec, uint key) if (wrng_sth) io_printf (IO_BUF, "wrong sth:%d\n", wrng_sth); #endif +#ifdef PROFILE + // report PROFILER values + io_printf (IO_BUF, "min fwd proc:%u\n", prf_fwd_min); + io_printf (IO_BUF, "max fwd proc:%u\n", prf_fwd_max); + if (xcfg.training) + { + io_printf (IO_BUF, "min bkp proc:%u\n", prf_bkp_min); + io_printf (IO_BUF, "max bkp proc:%u\n", prf_bkp_max); + } +#endif + #ifdef DEBUG // close log, io_printf (IO_BUF, "stopping stage %u\n", xcfg.stage_id); diff --git a/c_code/init_s.c b/c_code/init_s.c index 7ac9678..19f2ed5 100644 --- a/c_code/init_s.c +++ b/c_code/init_s.c @@ -25,6 +25,11 @@ uint cfg_init (void) io_printf (IO_BUF, "sum\n"); #endif +#ifdef PROFILE + // configure timer 2 for profiling + tc[T2_CONTROL] = SPINN_PROFILER_CFG; +#endif + // read the data specification header data_specification_metadata_t * data = data_specification_get_data_address(); @@ -89,8 +94,7 @@ uint cfg_init (void) io_printf (IO_BUF, "nu: %d\n", scfg.num_units); io_printf (IO_BUF, "fe: %d\n", scfg.fwd_expected); io_printf (IO_BUF, "be: %d\n", scfg.bkp_expected); - io_printf (IO_BUF, "ae: %d\n", scfg.ldsa_expected); - io_printf (IO_BUF, "te: %d\n", scfg.ldst_expected); + io_printf (IO_BUF, "le: %d\n", scfg.lds_expected); io_printf (IO_BUF, "uf: %d\n", xcfg.update_function); io_printf (IO_BUF, "fg: %d\n", scfg.is_first_group); io_printf (IO_BUF, "fk: 0x%08x\n", rt[FWD]); @@ -229,8 +233,6 @@ void var_init (uint reset_examples) } sf_done = 0; sb_done = 0; - s_ldsa_arrived = 0; - s_ldst_arrived = 0; // initialise thread semaphores sf_thrds_pend = SPINN_SF_THRDS; @@ -239,20 +241,20 @@ void var_init (uint reset_examples) // initialise processing thread flag s_active = FALSE; - // initialise partial lds + // initialise lds s_lds_part = 0; + s_lds_arrived = 0; // initialise packet queue s_pkt_queue.head = 0; s_pkt_queue.tail = 0; // initialise packet keys - //NOTE: colour is initialised to 0. - fwdKey = rt[FWD] | SPINN_PHASE_KEY (SPINN_FORWARD); - bkpKey = rt[BKP] | SPINN_PHASE_KEY (SPINN_BACKPROP); - ldstKey = rt[LDS] | SPINN_LDST_KEY | SPINN_PHASE_KEY (SPINN_BACKPROP); - ldsrKey = rt[LDS] | SPINN_LDSR_KEY | SPINN_PHASE_KEY (SPINN_BACKPROP); - fdsKey = rt[FDS] | SPINN_SYNC_KEY | SPINN_PHASE_KEY (SPINN_FORWARD); + //NOTE: colour is implicitly initialised to 0 + fwdKey = rt[FWD] | SPINN_PHASE_KEY (SPINN_FORWARD); + bkpKey = rt[BKP] | SPINN_PHASE_KEY (SPINN_BACKPROP); + ldsKey = rt[LDS] | SPINN_LDSA_KEY | SPINN_PHASE_KEY (SPINN_BACKPROP); + fdsKey = rt[FDS] | SPINN_SYNC_KEY | SPINN_PHASE_KEY (SPINN_FORWARD); #ifdef DEBUG // ------------------------------------------------------------------------ @@ -268,10 +270,8 @@ void var_init (uint reset_examples) stp_sent = 0; // stop packets sent stp_recv = 0; // stop packets received stn_recv = 0; // network_stop packets received - lda_recv = 0; // partial link_delta packets received - ldt_sent = 0; // total link_delta packets sent - ldt_recv = 0; // total link_delta packets received - ldr_sent = 0; // link_delta packets sent + lds_recv = 0; // link_delta packets received + lds_sent = 0; // link_delta packets sent wrng_phs = 0; // packets received in wrong phase wrng_pth = 0; // unexpected processing thread wrng_cth = 0; // unexpected comms thread @@ -279,6 +279,17 @@ void var_init (uint reset_examples) tot_tick = 0; // total number of ticks executed // ------------------------------------------------------------------------ #endif + +#ifdef PROFILE +// ------------------------------------------------------------------------ +// PROFILER variables +// ------------------------------------------------------------------------ +prf_fwd_min = SPINN_PROFILER_START; // minimum FORWARD processing time +prf_fwd_max = 0; // maximum FORWARD processing time +prf_bkp_min = SPINN_PROFILER_START; // minimum BACKPROP processing time +prf_bkp_max = 0; // maximum BACKPROP processing time +// ------------------------------------------------------------------------ +#endif } // ------------------------------------------------------------------------ @@ -392,16 +403,8 @@ void stage_done (uint ec, uint key) io_printf (IO_BUF, "total sent:%d\n", pkt_sent); io_printf (IO_BUF, "recv: fwd:%d bkp:%d\n", recv_fwd, recv_bkp); io_printf (IO_BUF, "sent: fwd:%d bkp:%d\n", sent_fwd, sent_bkp); - io_printf (IO_BUF, "ldsa recv:%d\n", lda_recv); - if (scfg.is_first_group) - { - io_printf (IO_BUF, "ldst recv:%d\n", ldt_recv); - io_printf (IO_BUF, "ldsr sent:%d\n", ldr_sent); - } - else - { - io_printf (IO_BUF, "ldst sent:%d\n", ldt_sent); - } + io_printf (IO_BUF, "lds sent:%d\n", lds_sent); + io_printf (IO_BUF, "lds recv:%d\n", lds_recv); io_printf (IO_BUF, "stop recv:%d\n", stp_recv); io_printf (IO_BUF, "stpn recv:%d\n", stn_recv); io_printf (IO_BUF, "sync sent:%d\n", spk_sent); @@ -411,6 +414,17 @@ void stage_done (uint ec, uint key) if (wrng_sth) io_printf (IO_BUF, "wrong sth:%d\n", wrng_sth); #endif +#ifdef PROFILE + // report PROFILER values + io_printf (IO_BUF, "min fwd proc:%u\n", prf_fwd_min); + io_printf (IO_BUF, "max fwd proc:%u\n", prf_fwd_max); + if (xcfg.training) + { + io_printf (IO_BUF, "min bkp proc:%u\n", prf_bkp_min); + io_printf (IO_BUF, "max bkp proc:%u\n", prf_bkp_max); + } +#endif + #ifdef DEBUG // close log, io_printf (IO_BUF, "stopping stage %u\n", xcfg.stage_id); diff --git a/c_code/init_t.c b/c_code/init_t.c index 64720f0..a9ce178 100644 --- a/c_code/init_t.c +++ b/c_code/init_t.c @@ -27,6 +27,11 @@ uint cfg_init (void) io_printf (IO_BUF, "threshold\n"); #endif +#ifdef PROFILE + // configure timer 2 for profiling + tc[T2_CONTROL] = SPINN_PROFILER_CFG; +#endif + // read the data specification header data_specification_metadata_t * data = data_specification_get_data_address(); @@ -150,8 +155,8 @@ uint cfg_init (void) #ifdef DEBUG_CFG io_printf (IO_BUF, "og: %d\n", tcfg.output_grp); io_printf (IO_BUF, "ig: %d\n", tcfg.input_grp); + io_printf (IO_BUF, "ls: %d\n", tcfg.is_last_sgrp); io_printf (IO_BUF, "nu: %d\n", tcfg.num_units); - io_printf (IO_BUF, "wb: %d\n", tcfg.write_blk); io_printf (IO_BUF, "ie: %d\n", tcfg.out_integr_en); io_printf (IO_BUF, "dt: %f\n", tcfg.out_integr_dt); io_printf (IO_BUF, "np: %d\n", tcfg.num_out_procs); @@ -165,9 +170,10 @@ uint cfg_init (void) tcfg.initOutput, SPINN_ACTIV_SHIFT)); io_printf (IO_BUF, "gs: %k\n", tcfg.tst_group_criterion); io_printf (IO_BUF, "gt: %k\n", tcfg.trn_group_criterion); + io_printf (IO_BUF, "ce: %d\n", tcfg.crit_expected); io_printf (IO_BUF, "cf: %d\n", tcfg.criterion_function); - io_printf (IO_BUF, "fg: %d\n", tcfg.is_first_output_group); - io_printf (IO_BUF, "lg: %d\n", tcfg.is_last_output_group); + io_printf (IO_BUF, "fo: %d\n", tcfg.is_first_output); + io_printf (IO_BUF, "lo: %d\n", tcfg.is_last_output); io_printf (IO_BUF, "ef: %d\n", tcfg.error_function); io_printf (IO_BUF, "fk: 0x%08x\n", rt[FWD]); io_printf (IO_BUF, "bk: 0x%08x\n", rt[BKP]); @@ -239,14 +245,6 @@ uint mem_init (void) return (SPINN_MEM_UNAVAIL); } - // allocate memory for forward keys (one per partition) - if ((t_fwdKey = ((uint *) - spin1_malloc (tcfg.partitions * sizeof (uint)))) == NULL - ) - { - return (SPINN_MEM_UNAVAIL); - } - // allocate memory for OUTPUT functions for (uint i = 0; i < tcfg.num_out_procs; i++) { @@ -488,7 +486,7 @@ void var_init (uint reset_examples, uint reset_epochs_trained) net_stop = 0; // initialise max and min ticks - if (tcfg.is_last_output_group) + if (tcfg.is_last_output) { // get max number of ticks for first event if (ev[event_idx].max_time != SPINN_FP_NaN) @@ -535,19 +533,20 @@ void var_init (uint reset_examples, uint reset_epochs_trained) tb_procs = 0; tb_comms = 1; - // initialise received net and error scoreboards + // initialise received net, error and criterion scoreboards tf_arrived = 0; tb_arrived = 0; + tf_crit_arrived = 0; // initialise thread semaphores tf_thrds_pend = SPINN_TF_THRDS; tb_thrds_pend = SPINN_TB_THRDS; // initialise recording options - t_rec_results = xcfg.rec_results && tcfg.is_last_output_group && + t_rec_results = xcfg.rec_results && tcfg.is_last_output && !xcfg.training && (stage_rec_flags & (1 << SPINN_REC_RESULTS)); - t_rec_tick_data = xcfg.rec_outputs && tcfg.is_first_output_group && + t_rec_tick_data = xcfg.rec_outputs && tcfg.is_first_output && (stage_rec_flags & (1 << SPINN_REC_TICK_DATA)); t_rec_outputs = xcfg.rec_outputs && tcfg.output_grp && @@ -580,20 +579,22 @@ void var_init (uint reset_examples, uint reset_epochs_trained) - SPINN_SHORT_ACTIV_SHIFT); t_max_target = SPINN_SHORT_ACTIV_MIN_POS << (SPINN_ACTIV_SHIFT - SPINN_SHORT_ACTIV_SHIFT); + } - // no need to wait for previous value if first group - if (tcfg.is_first_output_group) - { - tf_init_crit = 1; - tf_crit_prev = TRUE; - } - else - { - tf_init_crit = 0; - } - tf_crit_rdy = tf_init_crit; + // check if expecting a previous criterion value + if (tcfg.crit_expected) + { + tf_crit_init = 0; + } + else + { + tf_crit_init = 1; } + // initialise flag and previous value + tf_crit_rdy = tf_crit_init; + tf_crit_prev = TRUE; + // initialise processing thread flag tf_active = FALSE; @@ -602,15 +603,11 @@ void var_init (uint reset_examples, uint reset_epochs_trained) t_pkt_queue.tail = 0; // initialise packet keys - //NOTE: colour is initialised to 0 - for (uint p = 0; p < tcfg.partitions; p++) - { - t_fwdKey[p] = rt[FWDT + p] | SPINN_PHASE_KEY (SPINN_FORWARD); - } - + //NOTE: colour is implicitly initialised to 0 + fwdKey = rt[FWD] | SPINN_PHASE_KEY (SPINN_FORWARD); bkpKey = rt[BKP] | SPINN_PHASE_KEY (SPINN_BACKPROP); - if (tcfg.is_last_output_group) + if (tcfg.is_last_output) { // tick stop key tf_stop_key = rt[STP] | SPINN_STOP_KEY | SPINN_PHASE_KEY (SPINN_FORWARD); @@ -642,14 +639,29 @@ void var_init (uint reset_examples, uint reset_epochs_trained) stn_recv = 0; // network_stop packets received wrng_phs = 0; // packets received in wrong phase tot_tick = 0; // total number of ticks executed + // ------------------------------------------------------------------------ #endif #if defined(DEBUG) && defined(DEBUG_THRDS) + // ------------------------------------------------------------------------ + // THREAD DEBUG variables + // ------------------------------------------------------------------------ wrng_pth = 0; // unexpected processing thread wrng_cth = 0; // unexpected comms thread wrng_sth = 0; // unexpected stop thread -#endif // ------------------------------------------------------------------------ +#endif + +#ifdef PROFILE +// ------------------------------------------------------------------------ +// PROFILER variables +// ------------------------------------------------------------------------ +prf_fwd_min = SPINN_PROFILER_START; // minimum FORWARD processing time +prf_fwd_max = 0; // maximum FORWARD processing time +prf_bkp_min = SPINN_PROFILER_START; // minimum BACKPROP processing time +prf_bkp_max = 0; // maximum BACKPROP processing time +// ------------------------------------------------------------------------ +#endif } // ------------------------------------------------------------------------ @@ -777,22 +789,18 @@ void stage_done (uint ec, uint key) io_printf (IO_BUF, "total sent:%d\n", pkt_sent); io_printf (IO_BUF, "recv: fwd:%d bkp:%d\n", recv_fwd, recv_bkp); io_printf (IO_BUF, "sent: fwd:%d bkp:%d\n", sent_fwd, sent_bkp); - if (tcfg.is_first_output_group) + io_printf (IO_BUF, "crit sent:%d\n", crt_sent); + if (tcfg.is_last_sgrp) { - io_printf (IO_BUF, "criterion recv: first\n"); + io_printf (IO_BUF, "crit recv:%d\n", crt_recv); } - else - { - io_printf (IO_BUF, "criterion recv:%d\n", crt_recv); - } - if (tcfg.is_last_output_group) + if (tcfg.is_last_output) { io_printf (IO_BUF, "stop sent:%d\n", stp_sent); io_printf (IO_BUF, "stpn sent:%d\n", stn_sent); } else { - io_printf (IO_BUF, "criterion sent:%d\n", crt_sent); io_printf (IO_BUF, "stop recv:%d\n", stp_recv); io_printf (IO_BUF, "stpn recv:%d\n", stn_recv); } @@ -805,6 +813,17 @@ void stage_done (uint ec, uint key) if (wrng_sth) io_printf (IO_BUF, "wrong sth:%d\n", wrng_sth); #endif +#ifdef PROFILE + // report PROFILER values + io_printf (IO_BUF, "min fwd proc:%u\n", prf_fwd_min); + io_printf (IO_BUF, "max fwd proc:%u\n", prf_fwd_max); + if (xcfg.training) + { + io_printf (IO_BUF, "min bkp proc:%u\n", prf_bkp_min); + io_printf (IO_BUF, "max bkp proc:%u\n", prf_bkp_max); + } +#endif + #ifdef DEBUG // close log, io_printf (IO_BUF, "stopping stage %u\n", xcfg.stage_id); diff --git a/c_code/init_w.c b/c_code/init_w.c index c30f44c..c32638b 100644 --- a/c_code/init_w.c +++ b/c_code/init_w.c @@ -26,6 +26,11 @@ uint cfg_init (void) io_printf (IO_BUF, "weight\n"); #endif +#ifdef PROFILE + // configure timer 2 for profiling + tc[T2_CONTROL] = SPINN_PROFILER_CFG; +#endif + // read the data specification header data_specification_metadata_t * data = data_specification_get_data_address(); @@ -93,8 +98,6 @@ uint cfg_init (void) #ifdef DEBUG_CFG io_printf (IO_BUF, "nr: %d\n", wcfg.num_rows); io_printf (IO_BUF, "nc: %d\n", wcfg.num_cols); - io_printf (IO_BUF, "rb: %d\n", wcfg.row_blk); - io_printf (IO_BUF, "cb: %d\n", wcfg.col_blk); io_printf (IO_BUF, "lr: %k\n", wcfg.learningRate); io_printf (IO_BUF, "wd: %k\n", wcfg.weightDecay); io_printf (IO_BUF, "mm: %k\n", wcfg.momentum); @@ -324,12 +327,10 @@ void var_init (uint init_weights, uint reset_examples) wb_update_func = w_update_procs[xcfg.update_function]; // initialise packet keys - //NOTE: colour is initialised to 0. - fwdKey = rt[FWD] | SPINN_PHASE_KEY(SPINN_FORWARD) - | SPINN_BLOCK_KEY(wcfg.col_blk); - bkpKey = rt[BKP] | SPINN_PHASE_KEY(SPINN_BACKPROP) - | SPINN_BLOCK_KEY(wcfg.row_blk); - ldsaKey = rt[LDS] | SPINN_LDSA_KEY | SPINN_PHASE_KEY(SPINN_BACKPROP); + //NOTE: colour is implicitly initialised to 0 + fwdKey = rt[FWD] | SPINN_PHASE_KEY(SPINN_FORWARD); + bkpKey = rt[BKP] | SPINN_PHASE_KEY(SPINN_BACKPROP); + ldsKey = rt[LDS] | SPINN_LDSA_KEY | SPINN_PHASE_KEY(SPINN_BACKPROP); #ifdef DEBUG // ------------------------------------------------------------------------ @@ -347,8 +348,8 @@ void var_init (uint init_weights, uint reset_examples) stp_sent = 0; // stop packets sent stp_recv = 0; // stop packets received stn_recv = 0; // network_stop packets received - lda_sent = 0; // partial link_delta packets sent - ldr_recv = 0; // link_delta packets received + lds_sent = 0; // link_delta packets sent + lds_recv = 0; // link_delta packets received wrng_fph = 0; // FORWARD packets received in wrong phase wrng_bph = 0; // BACKPROP received in wrong phase wght_ups = 0; // number of weight updates done @@ -358,6 +359,17 @@ void var_init (uint init_weights, uint reset_examples) tot_tick = 0; // total number of ticks executed // ------------------------------------------------------------------------ #endif + +#ifdef PROFILE +// ------------------------------------------------------------------------ +// PROFILER variables +// ------------------------------------------------------------------------ +prf_fwd_min = SPINN_PROFILER_START; // minimum FORWARD processing time +prf_fwd_max = 0; // maximum FORWARD processing time +prf_bkp_min = SPINN_PROFILER_START; // minimum BACKPROP processing time +prf_bkp_max = 0; // maximum BACKPROP processing time +// ------------------------------------------------------------------------ +#endif } // ------------------------------------------------------------------------ @@ -472,8 +484,8 @@ void stage_done (uint ec, uint key) io_printf (IO_BUF, "recv: fwd:%d bkp:%d\n", recv_fwd, recv_bkp); io_printf (IO_BUF, "sent: fwd:%d bkp:%d\n", sent_fwd, sent_bkp); io_printf (IO_BUF, "unused recv: fwd:%d bkp:%d\n", pkt_fwbk, pkt_bwbk); - io_printf (IO_BUF, "ldsa sent:%d\n", lda_sent); - io_printf (IO_BUF, "ldsr recv:%d\n", ldr_recv); + io_printf (IO_BUF, "lds sent:%d\n", lds_sent); + io_printf (IO_BUF, "lds recv:%d\n", lds_recv); io_printf (IO_BUF, "stop recv:%d\n", stp_recv); io_printf (IO_BUF, "stpn recv:%d\n", stn_recv); io_printf (IO_BUF, "sync recv:%d\n", spk_recv); @@ -486,6 +498,17 @@ void stage_done (uint ec, uint key) io_printf (IO_BUF, "weight updates:%d\n", wght_ups); #endif +#ifdef PROFILE + // report PROFILER values + io_printf (IO_BUF, "min fwd proc:%u\n", prf_fwd_min); + io_printf (IO_BUF, "max fwd proc:%u\n", prf_fwd_max); + if (xcfg.training) + { + io_printf (IO_BUF, "min bkp proc:%u\n", prf_bkp_min); + io_printf (IO_BUF, "max bkp proc:%u\n", prf_bkp_max); + } +#endif + #ifdef DEBUG // close log, io_printf (IO_BUF, "stopping stage %u\n", xcfg.stage_id); diff --git a/c_code/input.c b/c_code/input.c index 5b01d6f..b821ec6 100644 --- a/c_code/input.c +++ b/c_code/input.c @@ -116,7 +116,7 @@ long_delta_t * i_deltas; // deltas computed in current tick pkt_queue_t i_pkt_queue; // queue to hold received packets uchar i_active; // processing packets from queue? -long_net_t * i_last_integr_net; //last INTEGRATOR output value +long_net_t * i_last_integr_net; //last INTEGRATOR output value long_delta_t * i_last_integr_delta; //last INTEGRATOR delta value uint i_it_idx; // index into current inputs/targets @@ -131,10 +131,8 @@ uint if_thrds_pend; // thread semaphore long_delta_t * ib_init_delta; // initial delta value for every tick scoreboard_t ib_done; // current tick delta computation done -uint * i_bkpKey; // i cores have one bkpKey per partition - // history arrays -long_net_t * i_net_history; //sdram pointer where to store input history +long_net_t * i_net_history; //sdram pointer where to store input history // ------------------------------------------------------------------------ @@ -160,6 +158,18 @@ uint tot_tick; // total number of ticks executed #endif +#ifdef PROFILE +// ------------------------------------------------------------------------ +// PROFILER variables +// ------------------------------------------------------------------------ +uint prf_fwd_min; // minimum FORWARD processing time +uint prf_fwd_max; // maximum FORWARD processing time +uint prf_bkp_min; // minimum BACKPROP processing time +uint prf_bkp_max; // maximum BACKPROP processing time +// ------------------------------------------------------------------------ +#endif + + // ------------------------------------------------------------------------ // timer callback: check that there has been progress in execution. // If no progress has been made terminate with SPINN_TIMEOUT_EXIT code. diff --git a/c_code/mlp_externs.h b/c_code/mlp_externs.h index e0edeb8..c58ff76 100644 --- a/c_code/mlp_externs.h +++ b/c_code/mlp_externs.h @@ -12,9 +12,7 @@ extern uint coreID; // 5-bit virtual core ID extern uint fwdKey; // packet ID for FORWARD-phase data extern uint bkpKey; // packet ID for BACKPROP-phase data -extern uint ldsaKey; // packet ID for link delta summation accumulators -extern uint ldstKey; // packet ID for link delta summation totals -extern uint ldsrKey; // packet ID for link delta summation reports +extern uint ldsKey; // packet ID for link delta summation extern uint fdsKey; // packet ID for FORWARD synchronisation extern uint32_t stage_step; // current stage step @@ -92,19 +90,18 @@ extern activation_t * w_output_history; // ------------------------------------------------------------------------ // sum core variables // ------------------------------------------------------------------------ -extern long_net_t * s_nets[2]; // unit nets computed in current tick -extern long_error_t * s_errors[2]; // errors computed in current tick -extern pkt_queue_t s_pkt_queue; // queue to hold received packets -extern uchar s_active; // processing packets from queue? -extern lds_t s_lds_part; // partial link delta sum -extern scoreboard_t * sf_arrived[2]; // keep count of expected net b-d-p -extern scoreboard_t sf_done; // current tick net computation done -extern uint sf_thrds_pend; // thread semaphore -extern scoreboard_t * sb_arrived[2]; // keep count of expected error b-d-p -extern scoreboard_t sb_done; // current tick error computation done -extern uint sb_thrds_pend; // thread semaphore -extern scoreboard_t s_ldsa_arrived; // keep count of the number of partial link delta sums -extern scoreboard_t s_ldst_arrived; // keep count of the number of link delta sum totals +extern long_net_t * s_nets[2]; // unit nets computed in current tick +extern long_error_t * s_errors[2]; // errors computed in current tick +extern pkt_queue_t s_pkt_queue; // queue to hold received packets +extern uchar s_active; // processing packets from queue? +extern lds_t s_lds_part; // partial link delta sum +extern scoreboard_t * sf_arrived[2]; // keep count of expected net b-d-p +extern scoreboard_t sf_done; // current tick net computation done +extern uint sf_thrds_pend; // thread semaphore +extern scoreboard_t * sb_arrived[2]; // keep count of expected error b-d-p +extern scoreboard_t sb_done; // current tick error computation done +extern uint sb_thrds_pend; // thread semaphore +extern scoreboard_t s_lds_arrived; // keep count of received link delta sums // ------------------------------------------------------------------------ // ------------------------------------------------------------------------ @@ -129,10 +126,8 @@ extern scoreboard_t ib_done; // current tick delta computation done extern long_net_t * i_last_integr_net; //last INTEGRATOR output value extern long_delta_t * i_last_integr_delta; //last INTEGRATOR delta value -extern uint * i_bkpKey; // i cores have one bkpKey per partition - // history arrays -extern long_net_t * i_net_history; //sdram pointer where to store input history +extern long_net_t * i_net_history; //sdram pointer where to store input history // ------------------------------------------------------------------------ // ------------------------------------------------------------------------ @@ -160,7 +155,8 @@ extern uchar tf_active; // processing FWD-phase packet queue? extern scoreboard_t tf_arrived; // keep count of expected nets extern uint tf_thrds_pend; // thread semaphore extern uchar tf_crit_prev; // criterion value received -extern uchar tf_init_crit; // criterion init value +extern scoreboard_t tf_crit_arrived; // keep count of expected crit pkts +extern uchar tf_crit_init; // criterion init value extern uchar tf_crit_rdy; // criterion can be forwarded extern uchar tf_stop_crit; // stop criterion met? extern uchar tf_group_crit; // stop criterion met for all groups? @@ -188,18 +184,16 @@ extern uchar t_rec_results; // record test results to SDRAM extern uchar t_rec_tick_data; // record tick data to SDRAM extern uchar t_rec_step_updt; // update recording step -extern uint * t_fwdKey; // t cores have one fwdKey per partition - // history arrays extern net_t * t_net_history; extern activation_t * t_output_history; extern long_deriv_t * t_output_deriv_history; // ------------------------------------------------------------------------ +#ifdef DEBUG // ------------------------------------------------------------------------ // DEBUG variables // ------------------------------------------------------------------------ -#ifdef DEBUG extern uint pkt_sent; // total packets sent extern uint sent_fwd; // packets sent in FORWARD phase extern uint sent_bkp; // packets sent in BACKPROP phase @@ -216,12 +210,8 @@ extern uint stp_sent; // stop packets sent extern uint stp_recv; // stop packets received extern uint stn_sent; // network_stop packets sent extern uint stn_recv; // network_stop packets received -extern uint lda_sent; // partial link_delta packets sent -extern uint lda_recv; // partial link_delta packets received -extern uint ldt_sent; // total link_delta packets sent -extern uint ldt_recv; // total link_delta packets received -extern uint ldr_sent; // link_delta packets sent -extern uint ldr_recv; // link_delta packets received +extern uint lds_sent; // link_delta packets sent +extern uint lds_recv; // link_delta packets received extern uint tot_tick; // total number of ticks executed extern uint wght_ups; // number of weight updates done extern uint wrng_phs; // packets received in wrong phase @@ -230,7 +220,20 @@ extern uint wrng_bph; // BACKPROP packets received in wrong phase extern uint wrng_pth; // unexpected processing thread extern uint wrng_cth; // unexpected comms thread extern uint wrng_sth; // unexpected stop thread +// ------------------------------------------------------------------------ #endif + + +#ifdef PROFILE +// ------------------------------------------------------------------------ +// PROFILER variables +// ------------------------------------------------------------------------ +extern uint prf_fwd_min; // minimum FORWARD processing time +extern uint prf_fwd_max; // maximum FORWARD processing time +extern uint prf_bkp_min; // minimum BACKPROP processing time +extern uint prf_bkp_max; // maximum BACKPROP processing time // ------------------------------------------------------------------------ +#endif + #endif diff --git a/c_code/mlp_params.h b/c_code/mlp_params.h index b1be49a..c08b327 100644 --- a/c_code/mlp_params.h +++ b/c_code/mlp_params.h @@ -12,6 +12,16 @@ #define SPINN_TIMER_TICK_PERIOD 1000000 #define SPINN_PRINT_SHIFT 16 + +// ------------------------------------------------------------------------ +// profiler constants +// ------------------------------------------------------------------------ +// configure timer2 for profiling: enabled, free running, +// interrupt disabled, no pre-scale and 32-bit one-shot mode +#define SPINN_PROFILER_CFG 0x83 +#define SPINN_PROFILER_START 0xffffffff + + // ------------------------------------------------------------------------ // neural net constants // ------------------------------------------------------------------------ @@ -89,9 +99,7 @@ // packet type keys #define SPINN_DATA_KEY 0x00000000 #define SPINN_SYNC_KEY 0x00001000 -#define SPINN_LDST_KEY 0x00002000 #define SPINN_LDSA_KEY 0x00003000 -#define SPINN_LDSR_KEY 0x00004000 #define SPINN_CRIT_KEY 0x00005000 #define SPINN_STPN_KEY 0x00006000 #define SPINN_STOP_KEY 0x00007000 @@ -125,17 +133,6 @@ // ------------------------------------------------------------------------ -// ------------------------------------------------------------------------ -// core function types -// ------------------------------------------------------------------------ -#define SPINN_WEIGHT_PROC 0x0 -#define SPINN_SUM_PROC 0x1 -#define SPINN_THRESHOLD_PROC 0x2 -#define SPINN_INPUT_PROC 0x3 -#define SPINN_UNUSED_PROC 0x4 -// ------------------------------------------------------------------------ - - // ------------------------------------------------------------------------ // implementation parameters // ------------------------------------------------------------------------ @@ -154,8 +151,6 @@ #define SPINN_THRD_COMS ((SPINN_THRD_PROC) << 1) #define SPINN_THRD_STOP ((SPINN_THRD_COMS) << 1) #define SPINN_THRD_LDSA ((SPINN_THRD_STOP) << 1) -#define SPINN_THRD_LDST ((SPINN_THRD_LDSA) << 1) -#define SPINN_THRD_LDSR (SPINN_THRD_LDSA) #define SPINN_WF_THRDS (SPINN_THRD_PROC | SPINN_THRD_COMS | SPINN_THRD_STOP) #define SPINN_WB_THRDS (SPINN_THRD_PROC) @@ -207,12 +202,12 @@ // ------------------------------------------------------------------------ // EXIT codes -- error // ------------------------------------------------------------------------ -#define SPINN_NO_ERROR 0 -#define SPINN_MEM_UNAVAIL 1 -#define SPINN_QUEUE_FULL 2 -#define SPINN_TIMEOUT_EXIT 3 -#define SPINN_UNXPD_PKT 4 -#define SPINN_CFG_UNAVAIL 5 +#define SPINN_NO_ERROR 0 +#define SPINN_MEM_UNAVAIL 1 +#define SPINN_QUEUE_FULL 2 +#define SPINN_TIMEOUT_EXIT 3 +#define SPINN_UNXPD_PKT 4 +#define SPINN_CFG_UNAVAIL 5 // ------------------------------------------------------------------------ #endif diff --git a/c_code/mlp_types.h b/c_code/mlp_types.h index 97fa723..37355c0 100644 --- a/c_code/mlp_types.h +++ b/c_code/mlp_types.h @@ -24,16 +24,12 @@ enum MLPRecordings { TICK_DATA = 2 }; -// t cores can have more than one FWD key (due to partitions) -// i cores can have more than one BKP key (due to partitions) enum MLPKeys { FWD = 0, BKP = 1, FDS = 2, STP = 3, - LDS = 4, - FWDT = 5, - BKPI = 5 + LDS = 4 }; @@ -223,7 +219,6 @@ typedef struct network_conf // MLP network configuration uchar net_type; // type of neural net uint ticks_per_int; // number of ticks per interval uint global_max_ticks; // max number of ticks across all the examples - uint num_write_blks; // number of groups that write outputs } network_conf_t; // ------------------------------------------------------------------------ @@ -232,8 +227,8 @@ typedef struct network_conf // MLP network configuration // weight core configuration // ------------------------------------------------------------------------ // The neural net is represented by a weight matrix. -// The matrix is divided into num_rblks x num_cblk weight blocks -// and every weight core computes for one of these blocks. +// The matrix is divided into a number of weight blocks and each +// weight core gets assigned one of these blocks for computation. // Each block is associated with a single projection, i.e., it contains // connection weights associated with a single origin group and a single // destination group (which can be the same in recurrent networks). @@ -245,8 +240,6 @@ typedef struct w_conf // weight core configuration { uint num_rows; // rows in this core's block uint num_cols; // columns in this core's block - uint row_blk; // this core's row block number - uint col_blk; // this core's column block number scoreboard_t sync_expected; // num of expected sync packets activation_t initOutput; // initial value for unit outputs short_fpreal learningRate; // network learning rate @@ -267,9 +260,9 @@ typedef struct s_conf // sum core configuration uint num_units; // this core's number of units scoreboard_t fwd_expected; // num of expected partial nets scoreboard_t bkp_expected; // num of expected partial errors - scoreboard_t ldsa_expected; // num of expected partial link delta sums - scoreboard_t ldst_expected; // num of expected link delta sum totals + scoreboard_t lds_expected; // num of expected partial link delta sums uchar is_first_group; // is this the first group in the network? + uchar is_tree_root; // is this the root of an s_core tree? } s_conf_t; // ------------------------------------------------------------------------ @@ -285,7 +278,6 @@ typedef struct i_conf // input core configuration uchar output_grp; // is this an OUTPUT group? uchar input_grp; // is this an INPUT group? uint num_units; // this core's number of units - uint partitions; // this groups's number of partitions uint num_in_procs; // number of input (net) comp procedures uint procs_list[SPINN_NUM_IN_PROCS]; uchar in_integr_en; // input INTEGRATOR in use @@ -310,9 +302,8 @@ typedef struct t_conf // threshold core configuration { uchar output_grp; // is this an OUTPUT group? uchar input_grp; // is this an INPUT group? + uchar is_last_sgrp; // is last subgroup of the group? uint num_units; // this core's number of units - uint partitions; // this group's number of partitions - uint write_blk; // this core's write block uchar hard_clamp_en; // HARD CLAMP in use uchar out_integr_en; // output INTEGRATOR in use fpreal out_integr_dt; // integration time const for input integr @@ -322,9 +313,10 @@ typedef struct t_conf // threshold core configuration activation_t initOutput; // initial value for unit outputs error_t tst_group_criterion; // test-mode convergence criterion value error_t trn_group_criterion; // train-mode convergence criterion value + uint crit_expected; // num of expected partial crit pkts uchar criterion_function; // function to eval convergence criterion - uchar is_first_output_group; // is this the first of the output groups - uchar is_last_output_group; // is this the last of the output groups + uchar is_first_output; // is this the first output subgroup + uchar is_last_output; // is this the last output subgroup uchar error_function; // error function used for BACKPROP } t_conf_t; // ------------------------------------------------------------------------ diff --git a/c_code/process_i.c b/c_code/process_i.c index 7a7ccbb..40a1535 100644 --- a/c_code/process_i.c +++ b/c_code/process_i.c @@ -27,6 +27,11 @@ void if_process (uint key, uint payload) wrng_phs++; #endif +#ifdef PROFILE + // start profiler, + tc[T2_LOAD] = SPINN_PROFILER_START; +#endif + // get net index: mask out block, phase and colour data, uint inx = key & SPINN_NET_MASK; @@ -64,6 +69,13 @@ void if_process (uint key, uint payload) // mark net as done, if_done++; +#ifdef PROFILE + // update profiler values, + uint cnt = SPINN_PROFILER_START - tc[T2_COUNT]; + if (cnt < prf_fwd_min) prf_fwd_min = cnt; + if (cnt > prf_fwd_max) prf_fwd_max = cnt; +#endif + // and check if all nets done if (if_done == icfg.num_units) { @@ -114,6 +126,11 @@ void ib_process (uint key, uint payload) wrng_phs++; #endif +#ifdef PROFILE + // start profiler, + tc[T2_LOAD] = SPINN_PROFILER_START; +#endif + // get delta index: mask out block, phase and colour data, uint inx = key & SPINN_DELTA_MASK; @@ -145,13 +162,20 @@ void ib_process (uint key, uint payload) } // incorporate delta index to the packet key and send, - while (!spin1_send_mc_packet ((i_bkpKey[inx >> SPINN_BLOCK_SHIFT] | inx), delta, WITH_PAYLOAD)); + while (!spin1_send_mc_packet ((bkpKey | inx), delta, WITH_PAYLOAD)); #ifdef DEBUG pkt_sent++; sent_bkp++; #endif +#ifdef PROFILE + // update profiler values, + uint cnt = SPINN_PROFILER_START - tc[T2_COUNT]; + if (cnt < prf_bkp_min) prf_bkp_min = cnt; + if (cnt > prf_bkp_max) prf_bkp_max = cnt; +#endif + // mark delta as done, ib_done++; diff --git a/c_code/process_s.c b/c_code/process_s.c index 6454c45..5255018 100644 --- a/c_code/process_s.c +++ b/c_code/process_s.c @@ -27,11 +27,17 @@ void sf_process (uint key, uint payload) wrng_phs++; #endif +#ifdef PROFILE + // start profiler, + tc[T2_LOAD] = SPINN_PROFILER_START; +#endif + // get net index: mask out block and phase data, uint inx = key & SPINN_NET_MASK; // get error colour: mask out block, phase and net index data, - uint clr = (key & SPINN_COLOUR_MASK) >> SPINN_COLOUR_SHIFT; + uint pkt_clr = key & SPINN_COLOUR_MASK; + uint clr = pkt_clr >> SPINN_COLOUR_SHIFT; // accumulate new net b-d-p, s_nets[clr][inx] += (long_net_t) ((net_t) payload); @@ -39,6 +45,13 @@ void sf_process (uint key, uint payload) // mark net b-d-p as arrived, sf_arrived[clr][inx]++; +#ifdef PROFILE + // update profiler values, + uint cnt = SPINN_PROFILER_START - tc[T2_COUNT]; + if (cnt < prf_fwd_min) prf_fwd_min = cnt; + if (cnt > prf_fwd_max) prf_fwd_max = cnt; +#endif + // and check if dot product complete to compute net if (sf_arrived[clr][inx] == scfg.fwd_expected) { @@ -58,8 +71,9 @@ void sf_process (uint key, uint payload) net_tmp = (net_t) s_nets[clr][inx]; } - // incorporate net index to the packet key and send, - while (!spin1_send_mc_packet ((fwdKey | inx), net_tmp, WITH_PAYLOAD)); + // incorporate colour and net index to the packet key and send, + while (!spin1_send_mc_packet ((fwdKey | pkt_clr | inx), + net_tmp, WITH_PAYLOAD)); #ifdef DEBUG pkt_sent++; @@ -124,11 +138,17 @@ void sb_process (uint key, uint payload) wrng_phs++; #endif +#ifdef PROFILE + // start profiler, + tc[T2_LOAD] = SPINN_PROFILER_START; +#endif + // get error index: mask out block, phase and colour data, uint inx = key & SPINN_ERROR_MASK; // get error colour: mask out block, phase and net index data, - uint clr = (key & SPINN_COLOUR_MASK) >> SPINN_COLOUR_SHIFT; + uint pkt_clr = key & SPINN_COLOUR_MASK; + uint clr = pkt_clr >> SPINN_COLOUR_SHIFT; // accumulate new error b-d-p, s_errors[clr][inx] += (error_t) payload; @@ -136,6 +156,13 @@ void sb_process (uint key, uint payload) // mark error b-d-p as arrived, sb_arrived[clr][inx]++; +#ifdef PROFILE + // update profiler values, + uint cnt = SPINN_PROFILER_START - tc[T2_COUNT]; + if (cnt < prf_bkp_min) prf_bkp_min = cnt; + if (cnt > prf_bkp_max) prf_bkp_max = cnt; +#endif + // and check if error complete to send to next stage if (sb_arrived[clr][inx] == scfg.bkp_expected) { @@ -160,8 +187,9 @@ void sb_process (uint key, uint payload) } */ - // incorporate error index to the packet key and send, - while (!spin1_send_mc_packet ((bkpKey | inx), error, WITH_PAYLOAD)); + // incorporate colour and error index to the packet key and send, + while (!spin1_send_mc_packet ((bkpKey | pkt_clr | inx), + error, WITH_PAYLOAD)); #ifdef DEBUG pkt_sent++; @@ -193,6 +221,8 @@ void sb_process (uint key, uint payload) if (sb_thrds_pend == SPINN_THRD_PROC) { // if done initialise semaphore: + sb_thrds_pend = SPINN_SB_THRDS; + // if we are using Doug's Momentum, and we have reached the end of the // epoch (i.e. we are on the last example, and are about to move on to // the last tick, we need have to wait for the partial link delta sums @@ -202,16 +232,7 @@ void sb_process (uint key, uint payload) && example_cnt == (xcfg.num_examples - 1) && tick == SPINN_SB_END_TICK + 1) { - // if this s core relates to the first group in the network, then we - // also need to wait for the link delta sum totals - if (scfg.is_first_group) - { - sb_thrds_pend = SPINN_SB_THRDS | SPINN_THRD_LDSA | SPINN_THRD_LDST; - } - else - { - sb_thrds_pend = SPINN_SB_THRDS | SPINN_THRD_LDSA; - } + sb_thrds_pend = SPINN_SB_THRDS | SPINN_THRD_LDSA; } // restore interrupts after flag access, @@ -385,14 +406,6 @@ void s_advance_example (void) // reset example count for next epoch, example_cnt = 0; - - // and reset the partial link delta sum - if (xcfg.training) - { - s_lds_part = 0; - s_ldsa_arrived = 0; - s_ldst_arrived = 0; - } } // start from first event for next example, @@ -400,11 +413,14 @@ void s_advance_example (void) num_events = ex[example_inx].num_events; // and send sync packet to allow next example to start - while (!spin1_send_mc_packet (fdsKey, 0, NO_PAYLOAD)); + if (scfg.is_tree_root) + { + while (!spin1_send_mc_packet (fdsKey, 0, NO_PAYLOAD)); #ifdef DEBUG - pkt_sent++; - spk_sent++; + pkt_sent++; + spk_sent++; #endif + } } // ------------------------------------------------------------------------ diff --git a/c_code/process_t.c b/c_code/process_t.c index a27c892..a98fdda 100644 --- a/c_code/process_t.c +++ b/c_code/process_t.c @@ -35,6 +35,11 @@ void tf_process (uint key, uint payload) wrng_phs++; #endif +#ifdef PROFILE + // start profiler + tc[T2_LOAD] = SPINN_PROFILER_START; +#endif + // get net index: mask out block, phase and colour data, uint inx = (key & SPINN_NET_MASK); @@ -58,9 +63,8 @@ void tf_process (uint key, uint payload) } // send newly computed output to w cores, - while (!spin1_send_mc_packet ((t_fwdKey[inx >> SPINN_BLOCK_SHIFT] | inx), - (uint) t_outputs[inx], - WITH_PAYLOAD + while (!spin1_send_mc_packet ((fwdKey | inx), (uint) t_outputs[inx], + WITH_PAYLOAD ) ); @@ -76,6 +80,13 @@ void tf_process (uint key, uint payload) // mark net as arrived, tf_arrived++; +#ifdef PROFILE + // update profiler values + uint cnt = SPINN_PROFILER_START - tc[T2_COUNT]; + if (cnt < prf_fwd_min) prf_fwd_min = cnt; + if (cnt > prf_fwd_max) prf_fwd_max = cnt; +#endif + // and check if all nets arrived (i.e., all outputs done) if (tf_arrived == tcfg.num_units) { @@ -103,65 +114,37 @@ void tf_process (uint key, uint payload) // access thread semaphore and flags with interrupts disabled, uint cpsr = spin1_int_disable (); - // and check if all other threads done - if (tcfg.output_grp) - { - // report processing thread done, - //NOTE: tick stop decision cannot have arrived! - tf_thrds_pend &= ~SPINN_THRD_PROC; + // report processing thread done, + //NOTE: tick stop decision cannot have arrived! + tf_thrds_pend &= ~SPINN_THRD_PROC; - // check if criterion value can be forwarded - if (tf_crit_rdy) - { - // initialise semaphore, - tf_crit_rdy = tf_init_crit; + // check if criterion value can be forwarded + if (tf_crit_rdy) + { + // initialise flag, + tf_crit_rdy = tf_crit_init; - // restore interrupts after flag access, - spin1_mode_restore (cpsr); + // restore interrupts after flag access, + spin1_mode_restore (cpsr); - // send (criterion/tick stop) packet, - tf_send_stop (); + // send (criterion/tick stop) packet, + tf_send_stop (); - // and advance tick if last group - //NOTE: last group does not get a stop decision - if (tcfg.is_last_output_group) - { - //TODO: check if need to schedule or can simply call - tf_advance_tick (); - } - } - else + // and advance tick if last group + //NOTE: last group does not get a stop decision + if (tcfg.is_last_output) { - // flag that local value is ready, - tf_crit_rdy = 1; - - // and restore interrupts after flag access - spin1_mode_restore (cpsr); + //TODO: check if need to schedule or can simply call + tf_advance_tick (); } } else { - // check if all other threads done - if (tf_thrds_pend == SPINN_THRD_PROC) - { - // initialise semaphore, - tf_thrds_pend = SPINN_TF_THRDS; - - // restore interrupts after flag access, - spin1_mode_restore (cpsr); - - // and advance tick - //TODO: check if need to schedule or can simply call - tf_advance_tick (); - } - else - { - // if not done report processing thread done, - tf_thrds_pend &= ~SPINN_THRD_PROC; + // flag that local value is ready, + tf_crit_rdy = 1; - // and restore interrupts after flag access - spin1_mode_restore (cpsr); - } + // and restore interrupts after flag access + spin1_mode_restore (cpsr); } } } @@ -185,6 +168,11 @@ void tb_process (uint unused0, uint unused1) //TODO: this needs checking! for (uint inx = 0; inx < tcfg.num_units; inx++) { +#ifdef PROFILE + // start profiler + tc[T2_LOAD] = SPINN_PROFILER_START; +#endif + if (tcfg.output_grp) { // output groups: @@ -220,6 +208,13 @@ void tb_process (uint unused0, uint unused1) pkt_sent++; sent_bkp++; #endif + +#ifdef PROFILE + // update profiler values + uint cnt = SPINN_PROFILER_START - tc[T2_COUNT]; + if (cnt < prf_bkp_min) prf_bkp_min = cnt; + if (cnt > prf_bkp_max) prf_bkp_max = cnt; +#endif } // access thread semaphore with interrupts disabled @@ -272,7 +267,7 @@ void tf_advance_tick (void) if (tick_stop) { // update event criterion - if (tcfg.is_last_output_group) + if (tcfg.is_last_output) { tf_event_crit = tf_event_crit && tf_group_crit && (ev_tick >= min_ticks); max_evt = evt; @@ -324,7 +319,7 @@ void tb_advance_tick (uint unused0, uint unused1) t_switch_to_fw (); // update example criterion, - if (tcfg.is_last_output_group) + if (tcfg.is_last_output) { tf_example_crit = tf_example_crit && tf_event_crit && (max_evt >= num_events - 1); } @@ -409,7 +404,7 @@ void tf_advance_event (void) t_it_idx += tcfg.num_units; // and update number of ticks for new event - if (tcfg.is_last_output_group) + if (tcfg.is_last_output) { // maximum if (ev[event_idx + evt].max_time != SPINN_FP_NaN) @@ -469,7 +464,7 @@ void t_advance_example (void) epoch++; // check if stage done, - if (tcfg.is_last_output_group) + if (tcfg.is_last_output) { // report network stop decision, nsd = (!xcfg.training || (epoch >= xcfg.num_epochs)) ? 1 : tf_example_crit; @@ -561,7 +556,7 @@ void t_advance_example (void) t_init_outputs (); // and update next event data - if (tcfg.is_last_output_group) + if (tcfg.is_last_output) { // update number of ticks for new event, // maximum diff --git a/c_code/process_w.c b/c_code/process_w.c index 42fc8fa..3e46a7b 100644 --- a/c_code/process_w.c +++ b/c_code/process_w.c @@ -32,6 +32,11 @@ void wf_process (uint unused0, uint unused1) // compute all net block dot-products and send them for accumulation, for (uint j = 0; j < wcfg.num_cols; j++) { +#ifdef PROFILE + // start profiler + tc[T2_LOAD] = SPINN_PROFILER_START; +#endif + long_net_t net_part_tmp = 0; for (uint i = 0; i < wcfg.num_rows; i++) @@ -60,6 +65,13 @@ void wf_process (uint unused0, uint unused1) pkt_sent++; sent_fwd++; #endif + +#ifdef PROFILE + // update profiler values + uint cnt = SPINN_PROFILER_START - tc[T2_COUNT]; + if (cnt < prf_fwd_min) prf_fwd_min = cnt; + if (cnt > prf_fwd_max) prf_fwd_max = cnt; +#endif } // access thread semaphore with interrupts disabled @@ -104,13 +116,11 @@ void wb_process (uint key, uint payload) recv_bkp++; if (phase == SPINN_FORWARD) wrng_bph++; +#endif - uint blk = (key & SPINN_BLOCK_MASK) >> SPINN_BLOCK_SHIFT; - if (blk != wcfg.col_blk) - { - pkt_bwbk++; - return; - } +#ifdef PROFILE + // start profiler + tc[T2_LOAD] = SPINN_PROFILER_START; #endif // get delta index: mask out phase and block data, @@ -208,14 +218,21 @@ void wb_process (uint key, uint payload) lds_to_send = (lds_t) link_delta_sum; // and send partial link delta sum - while (!spin1_send_mc_packet (ldsaKey, (uint) lds_to_send, WITH_PAYLOAD)); + while (!spin1_send_mc_packet (ldsKey, (uint) lds_to_send, WITH_PAYLOAD)); #ifdef DEBUG pkt_sent++; - lda_sent++; + lds_sent++; #endif } +#ifdef PROFILE + // update profiler values + uint cnt = SPINN_PROFILER_START - tc[T2_COUNT]; + if (cnt < prf_bkp_min) prf_bkp_min = cnt; + if (cnt > prf_bkp_max) prf_bkp_max = cnt; +#endif + // if done with all deltas advance tick if (wb_arrived == wcfg.num_cols) { @@ -242,7 +259,7 @@ void wb_process (uint key, uint payload) && example_cnt == (xcfg.num_examples - 1) && tick == SPINN_WB_END_TICK + 1) { - wb_thrds_pend = SPINN_WB_THRDS | SPINN_THRD_LDSR; + wb_thrds_pend = SPINN_WB_THRDS | SPINN_THRD_LDSA; } // restore interrupts after semaphore access, diff --git a/c_code/sum.c b/c_code/sum.c index b47a7c6..55a6602 100644 --- a/c_code/sum.c +++ b/c_code/sum.c @@ -26,8 +26,7 @@ uint coreID; // 5-bit virtual core ID uint fwdKey; // packet ID for FORWARD-phase data uint bkpKey; // packet ID for BACKPROP-phase data -uint ldstKey; // packet ID for link delta summation totals -uint ldsrKey; // packet ID for link delta summation reports +uint ldsKey; // packet ID for link delta summation uint fdsKey; // packet ID for FORWARD synchronisation uint32_t stage_step; // current stage step @@ -95,8 +94,7 @@ uint sf_thrds_pend; // thread semaphore scoreboard_t * sb_arrived[2]; // keep count of expected error b-d-p scoreboard_t sb_done; // current tick error computation done uint sb_thrds_pend; // thread semaphore -scoreboard_t s_ldsa_arrived; // keep count of the number of partial link delta sums -scoreboard_t s_ldst_arrived; // keep count of the number of link delta sum totals +scoreboard_t s_lds_arrived; // keep count of the number of partial link delta sums // ------------------------------------------------------------------------ @@ -114,10 +112,8 @@ uint spk_sent; // sync packets sent uint stp_sent; // stop packets sent uint stp_recv; // stop packets received uint stn_recv; // network_stop packets received -uint lda_recv; // partial link_delta packets received -uint ldt_sent; // total link_delta packets sent -uint ldt_recv; // total link_delta packets received -uint ldr_sent; // link_delta packets sent +uint lds_sent; // link_delta packets sent +uint lds_recv; // link_delta packets received uint wrng_phs; // packets received in wrong phase uint wrng_pth; // unexpected processing thread uint wrng_cth; // unexpected comms thread @@ -127,6 +123,18 @@ uint tot_tick; // total number of ticks executed #endif +#ifdef PROFILE +// ------------------------------------------------------------------------ +// PROFILER variables +// ------------------------------------------------------------------------ +uint prf_fwd_min; // minimum FORWARD processing time +uint prf_fwd_max; // maximum FORWARD processing time +uint prf_bkp_min; // minimum BACKPROP processing time +uint prf_bkp_max; // maximum BACKPROP processing time +// ------------------------------------------------------------------------ +#endif + + // ------------------------------------------------------------------------ // timer callback: check that there has been progress in execution. // If no progress has been made terminate with SPINN_TIMEOUT_EXIT code. diff --git a/c_code/threshold.c b/c_code/threshold.c index c68ee61..b46e7e9 100644 --- a/c_code/threshold.c +++ b/c_code/threshold.c @@ -153,7 +153,8 @@ uchar tf_active; // processing FWD-phase packet queue? scoreboard_t tf_arrived; // keep count of expected nets uint tf_thrds_pend; // thread semaphore uchar tf_crit_prev; // criterion value received -uchar tf_init_crit; // criterion init value +scoreboard_t tf_crit_arrived; // keep count of expected crit pkts +uchar tf_crit_init; // criterion init value uchar tf_crit_rdy; // criterion can be forwarded uchar tf_stop_crit; // stop criterion met? uchar tf_group_crit; // stop criterion met for all groups? @@ -186,8 +187,6 @@ uchar t_rec_results; // record test results to SDRAM uchar t_rec_tick_data; // record tick data to SDRAM uchar t_rec_step_updt; // update recording step -uint * t_fwdKey; // t cores have one fwdKey per partition - // history arrays net_t * t_net_history; activation_t * t_output_history; @@ -220,6 +219,18 @@ uint tot_tick; // total number of ticks executed #endif +#ifdef PROFILE +// ------------------------------------------------------------------------ +// PROFILER variables +// ------------------------------------------------------------------------ +uint prf_fwd_min; // minimum FORWARD processing time +uint prf_fwd_max; // maximum FORWARD processing time +uint prf_bkp_min; // minimum BACKPROP processing time +uint prf_bkp_max; // maximum BACKPROP processing time +// ------------------------------------------------------------------------ +#endif + + // ------------------------------------------------------------------------ // timer callback: check that there has been progress in execution. // If no progress has been made terminate with SPINN_TIMEOUT_EXIT code. diff --git a/c_code/weight.c b/c_code/weight.c index fc16a2d..3aecf12 100644 --- a/c_code/weight.c +++ b/c_code/weight.c @@ -40,7 +40,7 @@ uint coreID; // 5-bit virtual core ID uint fwdKey; // packet ID for FORWARD-phase data uint bkpKey; // packet ID for BACKPROP-phase data -uint ldsaKey; // packet ID for link delta summation +uint ldsKey; // packet ID for link delta summation uint32_t stage_step; // current stage step uint32_t stage_num_steps; // current stage number of steps @@ -142,8 +142,8 @@ uint spk_recv; // sync packets received uint stp_sent; // stop packets sent uint stp_recv; // stop packets received uint stn_recv; // network_stop packets received -uint lda_sent; // partial link_delta packets sent -uint ldr_recv; // link_delta packets received +uint lds_sent; // link_delta packets sent +uint lds_recv; // link_delta packets received uint wrng_fph; // FORWARD packets received in wrong phase uint wrng_bph; // BACKPROP packets received in wrong phase uint wght_ups; // number of weight updates done @@ -155,6 +155,18 @@ uint tot_tick; // total number of ticks executed #endif +#ifdef PROFILE +// ------------------------------------------------------------------------ +// PROFILER variables +// ------------------------------------------------------------------------ +uint prf_fwd_min; // minimum FORWARD processing time +uint prf_fwd_max; // maximum FORWARD processing time +uint prf_bkp_min; // minimum BACKPROP processing time +uint prf_bkp_max; // maximum BACKPROP processing time +// ------------------------------------------------------------------------ +#endif + + // ------------------------------------------------------------------------ // timer callback: check that there has been progress in execution. // If no progress has been made terminate with SPINN_TIMEOUT_EXIT code. diff --git a/spinn_pdp2/input_vertex.py b/spinn_pdp2/input_vertex.py index 315210a..a73c7b4 100644 --- a/spinn_pdp2/input_vertex.py +++ b/spinn_pdp2/input_vertex.py @@ -37,92 +37,94 @@ class InputVertex( def __init__(self, network, - group + group, + subgroup ): + self._network = network + self._group = group + self._subgroup = subgroup + super(InputVertex, self).__init__( - label = "i_core{}".format (group.id), + label = f"i_core{self.group.id}/{self.subgroup}", binary_name = "input.aplx", constraints = None) self._stage = 0 # application-level data - self._network = network - self._group = group - self._set_cfg = network._ex_set.set_config - self._ex_cfg = network._ex_set.example_config - self._ev_cfg = network._ex_set.event_config + self._set_cfg = self.network.ex_set.set_config + self._ex_cfg = self.network.ex_set.example_config + self._ev_cfg = self.network.ex_set.event_config # application parameters - self._in_integr_dt = 1.0 / network.ticks_per_int - - # forward and backprop link partition names - self._fwd_link = "fwd_i{}".format (self.group.id) - self._bkp_link = [] - for p in range (self._group.partitions): - self._bkp_link.append ("bkp_i{}_{}".format (self.group.id, p)) + self._in_integr_dt = 1.0 / self.network.ticks_per_int - # reserve key space for every link - self._n_keys = MLPConstants.KEY_SPACE_SIZE + # forward and backprop link names + self._fwd_link = f"fwd_i{self.group.id}/{self.subgroup}" + self._bkp_link = f"bkp_i{self.group.id}/{self.subgroup}" - # configuration and data files - # find out the size of an integer! - _data_int = DataType.INT32 + # input core-specific parameters + self._units = self.group.subunits[self.subgroup] + # configuration and data sizes # network configuration structure - self._N_NETWORK_CONFIGURATION_BYTES = \ - len (self._network.network_config) + self._NETWORK_CONFIGURATION_BYTES = len (self.network.network_config) # core configuration structure - self._N_CORE_CONFIGURATION_BYTES = \ - len (self.config) + self._CORE_CONFIGURATION_BYTES = len (self.config) # set configuration structure - self._N_EXAMPLE_SET_BYTES = \ - len (self._set_cfg) + self._EXAMPLE_SET_BYTES = len (self._set_cfg) # list of example configurations - self._N_EXAMPLES_BYTES = \ - len (self._ex_cfg) * len (self._ex_cfg[0]) + self._EXAMPLES_BYTES = len (self._ex_cfg) * len (self._ex_cfg[0]) # list of event configurations - self._N_EVENTS_BYTES = \ - len (self._ev_cfg) * len (self._ev_cfg[0]) + self._EVENTS_BYTES = len (self._ev_cfg) * len (self._ev_cfg[0]) - # list of group inputs (empty if not an INPUT group) - self._N_INPUTS_BYTES = \ - len (self._group.inputs) * _data_int.size + # list of subgroup inputs (empty if not an INPUT group) + if self.group.input_grp: + self._INPUTS_BYTES = ((len (self.group.inputs) // self.group.units) * + self._units * DataType.INT32.size) + else: + self._INPUTS_BYTES = 0 - # keys are integers - # i cores require a different key for every group partition - self._N_KEYS_BYTES = _data_int.size * \ - (MLPConstants.NUM_KEYS_REQ + self.group.partitions) + # list of routing keys + self._KEYS_BYTES = MLPConstants.NUM_KEYS_REQ * DataType.INT32.size # stage configuration structure - self._N_STAGE_CONFIGURATION_BYTES = len (self._network.stage_config) + self._STAGE_CONFIGURATION_BYTES = len (self.network.stage_config) # reserve SDRAM space used to store historic data - self._NET_HISTORY_BYTES = (MLPConstants.LONG_NET_SIZE // 8) * \ - self.group.units * self._network.global_max_ticks + self._NET_HISTORY_BYTES = ((MLPConstants.LONG_NET_SIZE // 8) * + self._units * self.network.global_max_ticks) self._sdram_usage = ( - self._N_NETWORK_CONFIGURATION_BYTES + \ - self._N_CORE_CONFIGURATION_BYTES + \ - self._N_EXAMPLE_SET_BYTES + \ - self._N_EXAMPLES_BYTES + \ - self._N_EVENTS_BYTES + \ - self._N_INPUTS_BYTES + \ - self._N_KEYS_BYTES + \ - self._N_STAGE_CONFIGURATION_BYTES + \ + self._NETWORK_CONFIGURATION_BYTES + + self._CORE_CONFIGURATION_BYTES + + self._EXAMPLE_SET_BYTES + + self._EXAMPLES_BYTES + + self._EVENTS_BYTES + + self._INPUTS_BYTES + + self._KEYS_BYTES + + self._STAGE_CONFIGURATION_BYTES + self._NET_HISTORY_BYTES ) + @property + def network (self): + return self._network + @property def group (self): return self._group + @property + def subgroup (self): + return self._subgroup + @property def fwd_link (self): return self._fwd_link @@ -141,7 +143,6 @@ def config (self): uchar output_grp; uchar input_grp; uint num_units; - uint partitions; uint num_in_procs; uint procs_list[SPINN_NUM_IN_PROCS]; uchar in_integr_en; @@ -155,21 +156,21 @@ def config (self): explicit padding """ # integration dt is an MLP fixed-point fpreal - in_integr_dt = int (self._in_integr_dt * (1 << MLPConstants.FPREAL_SHIFT)) + in_integr_dt = int (self._in_integr_dt * + (1 << MLPConstants.FPREAL_SHIFT)) # soft_clamp_strength is an MLP fixed-point fpreal - soft_clamp_strength = int (self.group.soft_clamp_strength *\ - (1 << MLPConstants.FPREAL_SHIFT)) + soft_clamp_strength = int (self.group.soft_clamp_strength * + (1 << MLPConstants.FPREAL_SHIFT)) # init output is an MLP fixed-point activation_t - init_output = int (self.group.init_output *\ + init_output = int (self.group.init_output * (1 << MLPConstants.ACTIV_SHIFT)) - return struct.pack ("<2B2x5IB3x4i", + return struct.pack ("<2B2x4IB3x4i", self.group.output_grp, self.group.input_grp, - self.group.units, - self.group.partitions, + self._units, self.group.num_in_procs, self.group.in_procs_list[0].value, self.group.in_procs_list[1].value, @@ -191,7 +192,7 @@ def resources_required (self): @overrides (AbstractProvidesNKeysForPartition.get_n_keys_for_partition) def get_n_keys_for_partition (self, partition, graph_mapper): - return self._n_keys + return MLPConstants.KEY_SPACE_SIZE @overrides(MachineDataSpecableVertex.generate_machine_data_specification) @@ -204,17 +205,17 @@ def generate_machine_data_specification( # Reserve and write the network configuration region spec.reserve_memory_region (MLPRegions.NETWORK.value, - self._N_NETWORK_CONFIGURATION_BYTES) + self._NETWORK_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.NETWORK.value) # write the network configuration into spec - for c in self._network.network_config: + for c in self.network.network_config: spec.write_value (c, data_type = DataType.UINT8) # Reserve and write the core configuration region spec.reserve_memory_region (MLPRegions.CORE.value, - self._N_CORE_CONFIGURATION_BYTES) + self._CORE_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.CORE.value) @@ -224,7 +225,7 @@ def generate_machine_data_specification( # Reserve and write the example set region spec.reserve_memory_region (MLPRegions.EXAMPLE_SET.value, - self._N_EXAMPLE_SET_BYTES) + self._EXAMPLE_SET_BYTES) spec.switch_write_focus (MLPRegions.EXAMPLE_SET.value) @@ -234,7 +235,7 @@ def generate_machine_data_specification( # Reserve and write the examples region spec.reserve_memory_region (MLPRegions.EXAMPLES.value, - self._N_EXAMPLES_BYTES) + self._EXAMPLES_BYTES) spec.switch_write_focus (MLPRegions.EXAMPLES.value) @@ -245,7 +246,7 @@ def generate_machine_data_specification( # Reserve and write the events region spec.reserve_memory_region (MLPRegions.EVENTS.value, - self._N_EVENTS_BYTES) + self._EVENTS_BYTES) spec.switch_write_focus (MLPRegions.EVENTS.value) @@ -255,25 +256,28 @@ def generate_machine_data_specification( spec.write_value (c, data_type = DataType.UINT8) # Reserve and write the input data region (if INPUT group) - if self._N_INPUTS_BYTES != 0: + if self.group.input_grp: spec.reserve_memory_region (MLPRegions.INPUTS.value, - self._N_INPUTS_BYTES) + self._INPUTS_BYTES) spec.switch_write_focus (MLPRegions.INPUTS.value) # write inputs to spec - for _i in self._group.inputs: - # inputs are MLP fixed-point activation_t - #NOTE: check for absent or NaN - if (_i is None) or (_i != _i): - _inp = MLPConstants.ACTIV_NaN - else: - _inp = int (_i * (1 << MLPConstants.ACTIV_SHIFT)) - spec.write_value (_inp, data_type = DataType.UINT32) + us = self.subgroup * MLPConstants.MAX_SUBGROUP_UNITS + for _ in range (len (self.group.inputs) // self.group.units): + for i in self.group.inputs[us : us + self._units]: + # inputs are fixed-point activation_t + #NOTE: check for absent or NaN + if (i is None) or (i != i): + inp = MLPConstants.ACTIV_NaN + else: + inp = int (i * (1 << MLPConstants.ACTIV_SHIFT)) + spec.write_value (inp, data_type = DataType.UINT32) + us += self.group.units # Reserve and write the routing region spec.reserve_memory_region (MLPRegions.ROUTING.value, - self._N_KEYS_BYTES) + self._KEYS_BYTES) spec.switch_write_focus (MLPRegions.ROUTING.value) @@ -281,8 +285,9 @@ def generate_machine_data_specification( spec.write_value (routing_info.get_first_key_from_pre_vertex ( self, self.fwd_link), data_type = DataType.UINT32) - # write link keys: bkp (padding - keys written below) - spec.write_value (0, data_type = DataType.UINT32) + # write link keys: bkp + spec.write_value (routing_info.get_first_key_from_pre_vertex ( + self, self.bkp_link), data_type = DataType.UINT32) # write link keys: fds (padding) spec.write_value (0, data_type = DataType.UINT32) @@ -293,19 +298,14 @@ def generate_machine_data_specification( # write link keys: lds (padding) spec.write_value (0, data_type = DataType.UINT32) - # write link keys: bkpi - for p in range (self.group.partitions): - spec.write_value (routing_info.get_first_key_from_pre_vertex ( - self, self.bkp_link[p]), data_type = DataType.UINT32) - # Reserve and write the stage configuration region spec.reserve_memory_region (MLPRegions.STAGE.value, - self._N_STAGE_CONFIGURATION_BYTES) + self._STAGE_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.STAGE.value) # write the stage configuration into spec - for c in self._network.stage_config: + for c in self.network.stage_config: spec.write_value (c, data_type = DataType.UINT8) spec.end_specification () @@ -315,12 +315,12 @@ def generate_machine_data_specification( def regenerate_data_specification(self, spec, placement): # Reserve and write the stage configuration region spec.reserve_memory_region (MLPRegions.STAGE.value, - self._N_STAGE_CONFIGURATION_BYTES) + self._STAGE_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.STAGE.value) # write the stage configuration into spec - for c in self._network.stage_config: + for c in self.network.stage_config: spec.write_value (c, data_type = DataType.UINT8) spec.end_specification() diff --git a/spinn_pdp2/mlp_examples.py b/spinn_pdp2/mlp_examples.py index 941744b..c670cbe 100644 --- a/spinn_pdp2/mlp_examples.py +++ b/spinn_pdp2/mlp_examples.py @@ -556,8 +556,11 @@ def read_Lens_examples_file (self, # clean up ef.close () - # report examples read - print (f"{examples_file} contains {len (self.examples)} examples") + # report total number of examples read + s = '' if len (self.examples) == 1 else 's' + print (f"file {examples_file} contains " + f"{len (self.examples)} example{s}" + ) # mark examples file as loaded self.examples_loaded = True diff --git a/spinn_pdp2/mlp_group.py b/spinn_pdp2/mlp_group.py index 67cc2cc..3996039 100644 --- a/spinn_pdp2/mlp_group.py +++ b/spinn_pdp2/mlp_group.py @@ -25,19 +25,20 @@ def __init__(self, self.write_blk = write_blk self.is_first_out = is_first_out self.label = label - self.VERBOSE = VERBOSE - # number of partitions required for this group - self.partitions = (self.units + MLPConstants.MAX_BLK_UNITS - 1)\ - // MLPConstants.MAX_BLK_UNITS + # number of subgroups required for this group + self.subgroups = (self.units + MLPConstants.MAX_SUBGROUP_UNITS - 1)\ + // MLPConstants.MAX_SUBGROUP_UNITS if VERBOSE: - if self.partitions == 1: - print (f"creating group {self.label} with 1 partition") - else: - print (f"creating group {self.label} with " - f"{self.partitions} partitions" - ) + s = '' if self.subgroups == 1 else 's' + print (f"creating group {self.label} with " + f"{self.subgroups} subgroup{s}" + ) + + # number of units per subgroup + self.subunits = [MLPConstants.MAX_SUBGROUP_UNITS] * (self.subgroups - 1) + self.subunits.append (self.units - sum (self.subunits)) # keep track of associated incoming links self.links_from = [] @@ -53,9 +54,9 @@ def __init__(self, # keep track of associated vertices self.w_vertices = [] - self.s_vertex = None - self.i_vertex = None - self.t_vertex = None + self.s_vertex = [] + self.i_vertex = [] + self.t_vertex = [] # group function parameters self.output_grp = (MLPGroupTypes.OUTPUT in self.type) diff --git a/spinn_pdp2/mlp_link.py b/spinn_pdp2/mlp_link.py index 4715258..869b797 100644 --- a/spinn_pdp2/mlp_link.py +++ b/spinn_pdp2/mlp_link.py @@ -13,10 +13,8 @@ def __init__(self, self.pre_link_group = pre_link_group self.post_link_group = post_link_group self.label = label - self.VERBOSE = VERBOSE - if VERBOSE: - print (f"creating link {self.label}") + if VERBOSE: print (f"creating link {self.label}") # update list of incoming links in the post_link_group self.post_link_group.links_from.append (self.pre_link_group) diff --git a/spinn_pdp2/mlp_network.py b/spinn_pdp2/mlp_network.py index 3262a13..7dc74fc 100644 --- a/spinn_pdp2/mlp_network.py +++ b/spinn_pdp2/mlp_network.py @@ -6,7 +6,7 @@ from pacman.model.graphs.machine import MachineEdge from spinn_pdp2.input_vertex import InputVertex -from spinn_pdp2.sum_vertex import SumVertex +from spinn_pdp2.sum_vertex import SumVertexTree from spinn_pdp2.threshold_vertex import ThresholdVertex from spinn_pdp2.weight_vertex import WeightVertex from spinn_pdp2.mlp_types import MLPGroupTypes, MLPConstants, \ @@ -88,11 +88,8 @@ def __init__(self, # initialise machine graph parameters self._graph_rdy = False - # keep track of the number of vertices in the graph - self._num_vertices = 0 - - # keep track of the number of partitions - self.partitions = 0 + # keep track of the number of subgroups + self.subgroups = 0 # keep track of the current execution stage self._stage_id = 0 @@ -109,6 +106,10 @@ def net_type (self): def training (self): return self._training + @property + def ex_set (self): + return self._ex_set + @property def num_epochs (self): return self._num_epochs @@ -125,6 +126,30 @@ def ticks_per_int (self): def global_max_ticks (self): return self._global_max_ticks + @property + def train_group_crit (self): + return self._train_group_crit + + @property + def test_group_crit (self): + return self._test_group_crit + + @property + def learning_rate (self): + return self._learning_rate + + @property + def weight_decay (self): + return self._weight_decay + + @property + def momentum (self): + return self._momentum + + @property + def update_function (self): + return self._update_function + @property def rec_test_results (self): return self._rec_test_results @@ -141,10 +166,6 @@ def rec_outputs (self): def rec_example_last_tick_only (self): return self._rec_example_last_tick_only - @property - def num_write_blocks (self): - return self._num_write_blks - @property def output_chain (self): return self._output_chain @@ -163,17 +184,15 @@ def network_config (self): uchar net_type; uint ticks_per_int; uint global_max_ticks; - uint num_write_blks; } network_conf_t; pack: standard sizes, little-endian byte order, explicit padding """ - return struct.pack(" self.global_max_ticks: ticks_per_example = self.global_max_ticks + # prepare to retrieve recorded data + TICK_DATA_FORMAT = "<4I" + TICK_DATA_SIZE = struct.calcsize(TICK_DATA_FORMAT) + + TOTAL_TICKS = len (rec_tick_data) // TICK_DATA_SIZE + # print recorded data in correct order current_epoch = -1 for tk in range (TOTAL_TICKS): @@ -709,27 +729,28 @@ def write_Lens_output_file (self, f.write (f"{tick} {event}\n") for g in self.output_chain: - # get group tick outputs - outputs = struct.unpack_from( - OUT_DATA_FORMATS[self.output_chain.index(g)], - rec_outputs[g.write_blk], - tk * OUT_DATA_SIZES[self.output_chain.index(g)] - ) + outputs = [] + # get tick outputs for each subgroup + for sg, rec_outs in enumerate (rec_outputs[g.write_blk]): + outputs += struct.unpack_from ( + f"<{g.subunits[sg]}H", + rec_outs, + tk * struct.calcsize(f"<{g.subunits[sg]}H") + ) # print outputs - if len (rec_outputs[g.write_blk]): - f.write (f"{g.units} 1\n") - tinx = tgt_inx * g.units - for u in range (g.units): - # outputs are s16.15 fixed-point numbers - out = (1.0 * outputs[u]) / (1.0 * (1 << 15)) - t = g.targets[tinx + u] - #NOTE: check for absent or NaN - if (t is None) or (t != t): - tgt = "-" - else: - tgt = int(t) - f.write ("{:8.6f} {}\n".format (out, tgt)) + f.write (f"{g.units} 1\n") + tinx = tgt_inx * g.units + for u in range (g.units): + # outputs are s16.15 fixed-point numbers + out = (1.0 * outputs[u]) / (1.0 * (1 << 15)) + t = g.targets[tinx + u] + #NOTE: check for absent or NaN + if (t is None) or (t != t): + tgt = "-" + else: + tgt = int (t) + f.write ("{:8.6f} {}\n".format (out, tgt)) # recorded data no longer available self._rec_data_rdy = False @@ -760,11 +781,12 @@ def show_test_results (self): TEST_RESULTS_FORMAT = "<4I" TEST_RESULTS_SIZE = struct.calcsize(TEST_RESULTS_FORMAT) - # retrieve recorded tick_data from last output group + # retrieve recorded test results from last output subgroup g = self.out_grps[-1] + ltv = g.t_vertex[g.subgroups - 1] try: - rec_test_results = g.t_vertex.read ( - gfe.placements().get_placement_of_vertex (g.t_vertex), + rec_test_results = ltv.read ( + gfe.placements().get_placement_of_vertex (ltv), gfe.buffer_manager(), MLPConstSizeRecordings.TEST_RESULTS.value ) except Exception as err: @@ -800,150 +822,242 @@ def generate_machine_graph (self): # path to binary files binaries_path = os.path.join(os.path.dirname(__file__), "..", "binaries") - # setup the machine graph - gfe.setup (model_binary_folder = binaries_path) + # estimate number of SpiNNaker boards required + # number of subgroups + for grp in self.groups: + self.subgroups += grp.subgroups - # set the number of write blocks before generating vertices - self._num_write_blks = len (self.output_chain) + # number of required cores + w_cores = self.subgroups * self.subgroups + s_cores = self.subgroups * (((self.subgroups - 2) // + (MLPConstants.MAX_S_CORE_LINKS - 1)) + 1) + i_cores = self.subgroups + t_cores = self.subgroups + cores = w_cores + s_cores + i_cores + t_cores - # compute number of partitions - for grp in self.groups: - self.partitions = self.partitions + grp.partitions + s = '' if cores == 1 else 's' + print (f"need {cores} SpiNNaker core{s}") + + # number of required chips + chips = ((cores - 1) // MLPConstants.DEF_SPINN_CORES_PER_CHIP) + 1 + + s = '' if chips == 1 else 's' + print (f"estimating {chips} SpiNNaker chip{s}") + + # number of required boards + boards = ((chips - 1) // MLPConstants.DEF_SPINN_CHIPS_PER_BOARD) + 1 + + s = '' if boards == 1 else 's' + print (f"requesting {boards} SpiNNaker board{s}") - # create associated weight, sum, input and threshold - # machine vertices for every network group + # request a SpiNNaker machine and setup the machine graph + try: + gfe.setup (model_binary_folder = binaries_path, + n_boards_required = boards + ) + except Exception as err: + print ("\n--------------------------------------------------") + print (f"error: {err}") + print ("--------------------------------------------------\n") + return False + + # create weight, sum, input and threshold + # machine vertices associated with every subgroup for grp in self.groups: - # create one weight core per partition - # of every (from_group, group) pair - # NOTE: all-zero cores can be optimised out - for from_grp in self.groups: - for _tp in range (grp.partitions): - for _fp in range (from_grp.partitions): - wv = WeightVertex (self, grp, from_grp, _tp, _fp) - grp.w_vertices.append (wv) + for sgrp in range (grp.subgroups): + # create one weight core for every + # (from_group/from_subgroup, group/subgroup) pair + #TODO: all-zero cores can be optimised out + wvs = [] + for from_grp in self.groups: + for from_sgrp in range (from_grp.subgroups): + wv = WeightVertex (self, grp, sgrp, + from_grp, from_sgrp) gfe.add_machine_vertex_instance (wv) - self._num_vertices += 1 - - # create one sum core per group - sv = SumVertex (self, grp) - grp.s_vertex = sv - gfe.add_machine_vertex_instance (sv) - self._num_vertices += 1 - - # create one input core per group - iv = InputVertex (self, grp) - grp.i_vertex = iv - gfe.add_machine_vertex_instance (iv) - self._num_vertices += 1 - - # create one threshold core per group - tv = ThresholdVertex (self, grp) - grp.t_vertex = tv - gfe.add_machine_vertex_instance (tv) - self._num_vertices += 1 + wvs.append (wv) + grp.w_vertices.append (wvs) + + # create a sum core tree per subgroup + #NOTE: sum vertices are added during tree building + svt = SumVertexTree (self, grp, sgrp) + grp.s_vertex.append (svt) + + # create one input core per subgroup + iv = InputVertex (self, grp, sgrp) + grp.i_vertex.append (iv) + gfe.add_machine_vertex_instance (iv) + + # create one threshold core per subgroup + tv = ThresholdVertex (self, grp, sgrp) + grp.t_vertex.append (tv) + gfe.add_machine_vertex_instance (tv) + + # groups and subgroups with special functions + first_lds_grp = self.groups[0] + first_subgroup_svt = first_lds_grp.s_vertex[0] + + last_out_grp = self.output_chain[-1] + last_out_subgroup_t_vertex = ( + last_out_grp.t_vertex[last_out_grp.subgroups - 1] + ) # create associated forward, backprop, link delta summation, - # synchronisation and stop machine edges for every network group - first = self.groups[0] + # criterion, stop and sync machine edges for every subgroup for grp in self.groups: - for w in grp.w_vertices: - _frmg = w.from_group - - # create forward w to s links - gfe.add_machine_edge_instance (MachineEdge (w, grp.s_vertex), - w.fwd_link) - - # create forward t to w (multicast) links - gfe.add_machine_edge_instance (MachineEdge (_frmg.t_vertex, w), - _frmg.t_vertex.fwd_link[w.row_blk]) - - # create backprop w to s links - gfe.add_machine_edge_instance (MachineEdge (w, _frmg.s_vertex), - w.bkp_link) - - # create backprop i to w (multicast) links - gfe.add_machine_edge_instance (MachineEdge (grp.i_vertex, w), - grp.i_vertex.bkp_link[w.col_blk]) - - # create link delta summation w to s links - gfe.add_machine_edge_instance (MachineEdge (w, grp.s_vertex), - w.lds_link) - - # create link delta summation result s (first) to w links - gfe.add_machine_edge_instance (MachineEdge (first.s_vertex, w), - first.s_vertex.lds_link) - - # create example synchronisation s to w (multicast) links - gfe.add_machine_edge_instance (MachineEdge (grp.s_vertex, w), - grp.s_vertex.fds_link) - - if grp != _frmg: - gfe.add_machine_edge_instance (MachineEdge (_frmg.s_vertex, w), - _frmg.s_vertex.fds_link) - - # create forward s to i link - gfe.add_machine_edge_instance (MachineEdge (grp.s_vertex, - grp.i_vertex), - grp.s_vertex.fwd_link) - - # create backprop s to t link - gfe.add_machine_edge_instance (MachineEdge (grp.s_vertex, - grp.t_vertex), - grp.s_vertex.bkp_link) - - # create forward i to t link - gfe.add_machine_edge_instance (MachineEdge (grp.i_vertex, - grp.t_vertex), - grp.i_vertex.fwd_link) - - # create backprop t to i link - gfe.add_machine_edge_instance (MachineEdge (grp.t_vertex, - grp.i_vertex), - grp.t_vertex.bkp_link) - - # create link delta summation s to s links - all s cores - # (except the first) send to the first s core - if grp != first: - gfe.add_machine_edge_instance (MachineEdge (grp.s_vertex, - first.s_vertex), - grp.s_vertex.lds_link) - - # create stop links, if OUTPUT group - if grp in self.output_chain: - # if last OUTPUT group broadcast stop decision - if grp == self.output_chain[-1]: - for stpg in self.groups: - # create stop links to all w cores - for w in stpg.w_vertices: - gfe.add_machine_edge_instance\ - (MachineEdge (grp.t_vertex, w), - grp.t_vertex.stp_link) - - # create stop links to all s cores - gfe.add_machine_edge_instance\ - (MachineEdge (grp.t_vertex, stpg.s_vertex),\ - grp.t_vertex.stp_link) - - # create stop links to all i cores - gfe.add_machine_edge_instance\ - (MachineEdge (grp.t_vertex, stpg.i_vertex),\ - grp.t_vertex.stp_link) - - # create stop links to t cores (no link to itself!) - if stpg != grp: - gfe.add_machine_edge_instance\ - (MachineEdge (grp.t_vertex, stpg.t_vertex),\ - grp.t_vertex.stp_link) - else: - # create stop link to next OUTPUT group in chain - _inx = self.output_chain.index (grp) - _stpg = self.output_chain[_inx + 1] - gfe.add_machine_edge_instance (MachineEdge (grp.t_vertex, - _stpg.t_vertex), - grp.t_vertex.stp_link) + for sgrp in range (grp.subgroups): + svt = grp.s_vertex[sgrp] + iv = grp.i_vertex[sgrp] + tv = grp.t_vertex[sgrp] + + for wv in grp.w_vertices[sgrp]: + from_grp = wv.from_group + from_sgrp = wv.from_subgroup + + from_svt = from_grp.s_vertex[from_sgrp] + from_tv = from_grp.t_vertex[from_sgrp] + + # sum tree leaf to connect to depends on group/subgroup + svt_leaf = svt.leaf (from_grp, from_sgrp) + from_svt_leaf = from_svt.leaf (grp, sgrp) + + # forward w to s link + gfe.add_machine_edge_instance ( + MachineEdge (wv, svt_leaf), + wv.fwd_link + ) + + # forward t to w (multicast) link + gfe.add_machine_edge_instance ( + MachineEdge (from_tv, wv), + from_tv.fwd_link + ) + + # backprop w to s link + gfe.add_machine_edge_instance ( + MachineEdge (wv, from_svt_leaf), + wv.bkp_link + ) + + # backprop i to w (multicast) link + gfe.add_machine_edge_instance ( + MachineEdge (iv, wv), + iv.bkp_link + ) + + # link delta summation w to s link + gfe.add_machine_edge_instance ( + MachineEdge (wv, svt_leaf), + wv.lds_link + ) + + # link delta result (first group) s to w (multicast) link + gfe.add_machine_edge_instance ( + MachineEdge (first_subgroup_svt.root, wv), + first_subgroup_svt.root.lds_link + ) + + # stop (last output group/subgroup) t to w (multicast) link + gfe.add_machine_edge_instance ( + MachineEdge (last_out_subgroup_t_vertex, wv), + last_out_subgroup_t_vertex.stp_link + ) + + # intra-subgroup sync s to w (multicast) link + gfe.add_machine_edge_instance ( + MachineEdge (svt.root, wv), + svt.root.fds_link + ) + + # inter-subgroup sync s to w (multicast) link + #NOTE: avoid duplicates + if grp != from_grp or sgrp != from_sgrp: + gfe.add_machine_edge_instance ( + MachineEdge (from_svt.root, wv), + from_svt.root.fds_link + ) + + # forward s to i link + gfe.add_machine_edge_instance ( + MachineEdge (svt.root, iv), + svt.root.fwd_link + ) + + # forward i to t link + gfe.add_machine_edge_instance ( + MachineEdge (iv, tv), + iv.fwd_link + ) + + # backprop t to i link + gfe.add_machine_edge_instance ( + MachineEdge (tv, iv), + tv.bkp_link + ) + + # backprop s to t link + gfe.add_machine_edge_instance ( + MachineEdge (svt.root, tv), + svt.root.bkp_link + ) + + # link delta summation s to s link + if sgrp != 0: + # first subgroup collects from all other subgroups + gfe.add_machine_edge_instance ( + MachineEdge ( + svt.root, + grp.s_vertex[0].root + ), + svt.root.lds_link + ) + elif grp != first_lds_grp: + # first group collects from all other groups + gfe.add_machine_edge_instance ( + MachineEdge ( + svt.root, + first_subgroup_svt.root + ), + svt.root.lds_link + ) + + # t to t criterion link + # intra-group criterion link to last subgroup t + if sgrp < (grp.subgroups - 1): + gfe.add_machine_edge_instance ( + MachineEdge (tv, grp.t_vertex[grp.subgroups - 1]), + tv.stp_link + ) + elif grp != last_out_grp: + # inter-group criterion link to last output subgroup + gfe.add_machine_edge_instance ( + MachineEdge (tv, last_out_subgroup_t_vertex), + tv.stp_link + ) + + # stop (last output group/subgroup) t to s (multicast) link + for s in svt.vertices: + gfe.add_machine_edge_instance ( + MachineEdge (last_out_subgroup_t_vertex, s), + last_out_subgroup_t_vertex.stp_link + ) + + # stop (last output group/subgroup) t to i (multicast) link + gfe.add_machine_edge_instance ( + MachineEdge (last_out_subgroup_t_vertex, iv), + last_out_subgroup_t_vertex.stp_link + ) + + # stop (last output group/subgroup) t to t (multicast) link + if tv != last_out_subgroup_t_vertex: + gfe.add_machine_edge_instance ( + MachineEdge (last_out_subgroup_t_vertex, tv), + last_out_subgroup_t_vertex.stp_link + ) self._graph_rdy = True + return True + def train (self, update_function = None, @@ -966,6 +1080,16 @@ def train (self, self._stg_reset = True self._training = 1 + + if self._stg_epochs == None: + updates = "default" + else: + updates = self._stg_epochs + + print ("\n--------------------------------------------------") + print (f"stage {self._stage_id} train (updates: {updates})") + print ("--------------------------------------------------\n") + self.stage_run () @@ -989,6 +1113,16 @@ def test (self, self._stg_reset = reset_examples self._training = 0 + + if self._stg_examples == None: + examples = "default" + else: + examples = self._stg_examples + + print ("\n--------------------------------------------------") + print (f"stage {self._stage_id} test (examples: {examples})") + print ("--------------------------------------------------\n") + self.stage_run () @@ -997,14 +1131,6 @@ def stage_run (self): """ self._aborted = False - # check that no group is too big - for grp in self.groups: - if grp.units > MLPConstants.MAX_GRP_UNITS: - print (f"run aborted: group {grp.label} has more than " - f"{MLPConstants.MAX_GRP_UNITS} units.") - self._aborted = True - return - # cannot run unless weights file exists if self._weights_file is None: print ("run aborted: weights file not given") @@ -1040,7 +1166,10 @@ def stage_run (self): # generate machine graph - if needed if not self._graph_rdy: - self.generate_machine_graph () + if not self.generate_machine_graph (): + print ("run aborted: error generating machine graph") + self._aborted = True + return # initialise recorded data flag self._rec_data_rdy = False @@ -1067,7 +1196,9 @@ def pause (self): """ pause execution to allow debugging """ # pause until a key is pressed + print ("\n--------------------------------------------------") input ("network paused: press enter to continue") + print ("--------------------------------------------------\n") def end (self): diff --git a/spinn_pdp2/mlp_types.py b/spinn_pdp2/mlp_types.py index 583c7bf..53d0a92 100644 --- a/spinn_pdp2/mlp_types.py +++ b/spinn_pdp2/mlp_types.py @@ -12,6 +12,12 @@ class MLPUpdateFuncs (Enum): class MLPConstants (): """ MLP network constants """ + # SpiNNaker machine DEFAULT values + #NOTE: leave room for monitor, 2 system-level and 1 blacklisted cores + DEF_SPINN_CORES_PER_CHIP = 14 + #NOTE: leave room for 1 blacklisted chip + DEF_SPINN_CHIPS_PER_BOARD = 47 + # network parameter CONSTANTS or DEFAULT values DEF_LEARNING_RATE = 0.1 DEF_WEIGHT_DECAY = 0 @@ -27,8 +33,8 @@ class MLPConstants (): MAX_IN_PROCS = 2 DEF_IN_PROCS = 0 - MAX_GRP_UNITS = 128 - MAX_BLK_UNITS = 32 + MAX_SUBGROUP_UNITS = 32 + MAX_S_CORE_LINKS = 8 MAX_OUT_PROCS = 5 DEF_OUT_PROCS = 2 diff --git a/spinn_pdp2/sum_vertex.py b/spinn_pdp2/sum_vertex.py index cd6265b..044255d 100644 --- a/spinn_pdp2/sum_vertex.py +++ b/spinn_pdp2/sum_vertex.py @@ -1,7 +1,10 @@ import struct +import spinnaker_graph_front_end as gfe + from data_specification.enums.data_type import DataType +from pacman.model.graphs.machine import MachineEdge from pacman.model.graphs.machine.machine_vertex import MachineVertex from pacman.model.resources.resource_container \ import ResourceContainer, ConstantSDRAM @@ -37,87 +40,81 @@ class SumVertex( def __init__(self, network, - group + group, + subgroup, + index = 0 ): + self._network = network + self._group = group + self._subgroup = subgroup + self._index = index + super(SumVertex, self).__init__( - label = "s_core{}".format (group.id), + label = f"s_core{self.group.id}/{self.subgroup}/{self.index}", binary_name = "sum.aplx", constraints = None) self._stage = 0 # application-level data - self._network = network - self._group = group - self._set_cfg = network._ex_set.set_config - self._ex_cfg = network._ex_set.example_config - - # check if first group in the network - if self.group.id == network.groups[0].id: - self._is_first_group = 1 - else: - self._is_first_group = 0 + self._set_cfg = self.network.ex_set.set_config + self._ex_cfg = self.network.ex_set.example_config - # forward, backprop, and link delta summation link partition names - self._fwd_link = "fwd_s{}".format (self.group.id) - self._bkp_link = "bkp_s{}".format (self.group.id) - self._lds_link = "lds_s{}".format (self.group.id) - self._fds_link = "fds_s{}".format (self.group.id) + # forward, backprop, link delta summation and sync link names + self._fwd_link = f"fwd_s{self.group.id}/{self.subgroup}" + self._bkp_link = f"bkp_s{self.group.id}/{self.subgroup}" + self._lds_link = f"lds_s{self.group.id}/{self.subgroup}" + self._fds_link = f"fds_s{self.group.id}/{self.subgroup}" # sum core-specific parameters # NOTE: if all-zero w cores are optimised out these need reviewing - self._fwd_expect = network.partitions - self._bkp_expect = network.partitions - self._ldsa_expect = network.partitions * self.group.units - self._ldst_expect = len (network.groups) - 1 - - # weight update function - self.update_function = network._update_function - - # reserve key space for every link - self._n_keys = MLPConstants.KEY_SPACE_SIZE - - # configuration and data files - # find out the size of an integer! - _data_int = DataType.INT32 + self._units = self.group.subunits[self.subgroup] + # configuration and data sizes # network configuration structure - self._N_NETWORK_CONFIGURATION_BYTES = \ - len (self._network.network_config) + self._NETWORK_CONFIGURATION_BYTES = len (self.network.network_config) # core configuration structure - self._N_CORE_CONFIGURATION_BYTES = \ - len (self.config) + self._CORE_CONFIGURATION_BYTES = len (self.config) # set configuration structure - self._N_EXAMPLE_SET_BYTES = \ - len (self._set_cfg) + self._EXAMPLE_SET_BYTES = len (self._set_cfg) # list of example configurations - self._N_EXAMPLES_BYTES = \ - len (self._ex_cfg) * len (self._ex_cfg[0]) + self._EXAMPLES_BYTES = len (self._ex_cfg) * len (self._ex_cfg[0]) - # keys are integers - self._N_KEYS_BYTES = MLPConstants.NUM_KEYS_REQ * _data_int.size + # list of routing keys + self._KEYS_BYTES = MLPConstants.NUM_KEYS_REQ * (DataType.INT32).size # stage configuration structure - self._N_STAGE_CONFIGURATION_BYTES = \ - len (self._network.stage_config) + self._STAGE_CONFIGURATION_BYTES = len (self.network.stage_config) self._sdram_usage = ( - self._N_NETWORK_CONFIGURATION_BYTES + \ - self._N_CORE_CONFIGURATION_BYTES + \ - self._N_EXAMPLE_SET_BYTES + \ - self._N_EXAMPLES_BYTES + \ - self._N_KEYS_BYTES + \ - self._N_STAGE_CONFIGURATION_BYTES + self._NETWORK_CONFIGURATION_BYTES + + self._CORE_CONFIGURATION_BYTES + + self._EXAMPLE_SET_BYTES + + self._EXAMPLES_BYTES + + self._KEYS_BYTES + + self._STAGE_CONFIGURATION_BYTES ) + @property + def network (self): + return self._network + @property def group (self): return self._group + @property + def subgroup (self): + return self._subgroup + + @property + def index (self): + return self._index + @property def fwd_link (self): return self._fwd_link @@ -144,22 +141,70 @@ def config (self): uint num_units; scoreboard_t fwd_expect; scoreboard_t bkp_expect; - scoreboard_t ldsa_expect; - scoreboard_t ldst_expect; + scoreboard_t lds_expect; uchar is_first_group; + uchar is_tree_root; } s_conf_t; pack: standard sizes, little-endian byte order, explicit padding """ + # check if first group in the network + if self.group == self.network.groups[0]: + is_first_group = 1 + else: + is_first_group = 0 + + # number of vertices in this SumVertex tree + num_vrt = ((self.network.subgroups - 2) // + (MLPConstants.MAX_S_CORE_LINKS - 1)) + 1 - return struct.pack ("<5IB3x", - self.group.units, - self._fwd_expect, - self._bkp_expect, - self._ldsa_expect, - self._ldst_expect, - self._is_first_group + lvs = ((num_vrt - 1) * (MLPConstants.MAX_S_CORE_LINKS - 1)) + + # number of expected packets + if self.index == (num_vrt - 1): + # the last vertex in the tree may expect fewer packets + #NOTE: this could be the root in a single-vertex tree + expected = self.network.subgroups - lvs + else: + expected = MLPConstants.MAX_S_CORE_LINKS + + # keep track of these on a unit-by-unit basis + fwd_expect = expected + bkp_expect = expected + + # keep track of the total, not unit-by-unit, count of lds packets + k = lvs // MLPConstants.MAX_S_CORE_LINKS + if self.index > (num_vrt - 2 - k): + # lds packets from w cores only + lds_expect = expected * self._units + elif self.index == (num_vrt - 2 - k): + # lds packets from w cores and other s cores + wp = lvs % MLPConstants.MAX_S_CORE_LINKS + sp = MLPConstants.MAX_S_CORE_LINKS - wp + lds_expect = wp * self._units + sp + else: + # lds packets from other s cores only + lds_expect = MLPConstants.MAX_S_CORE_LINKS + + # first subgroup expects a partial lds from every other subgroup + if self.index == 0 and self.subgroup == 0: + lds_expect += self.group.subgroups - 1 + + # first group expects a partial lds from every other group + if is_first_group: + lds_expect += len (self.network.groups) - 1 + + # is this the root of a SumVertex tree? + is_tree_root = self.index == 0 + + return struct.pack ("<4I2B2x", + self._units, + fwd_expect, + bkp_expect, + lds_expect, + is_first_group, + is_tree_root ) @property @@ -173,7 +218,7 @@ def resources_required (self): @overrides (AbstractProvidesNKeysForPartition.get_n_keys_for_partition) def get_n_keys_for_partition (self, partition, graph_mapper): - return self._n_keys + return MLPConstants.KEY_SPACE_SIZE @overrides(MachineDataSpecableVertex.generate_machine_data_specification) @@ -186,17 +231,17 @@ def generate_machine_data_specification( # Reserve and write the network configuration region spec.reserve_memory_region (MLPRegions.NETWORK.value, - self._N_NETWORK_CONFIGURATION_BYTES) + self._NETWORK_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.NETWORK.value) # write the network configuration into spec - for c in self._network.network_config: + for c in self.network.network_config: spec.write_value (c, data_type = DataType.UINT8) # Reserve and write the core configuration region spec.reserve_memory_region (MLPRegions.CORE.value, - self._N_CORE_CONFIGURATION_BYTES) + self._CORE_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.CORE.value) @@ -206,7 +251,7 @@ def generate_machine_data_specification( # Reserve and write the example set region spec.reserve_memory_region (MLPRegions.EXAMPLE_SET.value, - self._N_EXAMPLE_SET_BYTES) + self._EXAMPLE_SET_BYTES) spec.switch_write_focus (MLPRegions.EXAMPLE_SET.value) @@ -216,7 +261,7 @@ def generate_machine_data_specification( # Reserve and write the examples region spec.reserve_memory_region (MLPRegions.EXAMPLES.value, - self._N_EXAMPLES_BYTES) + self._EXAMPLES_BYTES) spec.switch_write_focus (MLPRegions.EXAMPLES.value) @@ -227,7 +272,7 @@ def generate_machine_data_specification( # Reserve and write the routing region spec.reserve_memory_region (MLPRegions.ROUTING.value, - self._N_KEYS_BYTES) + self._KEYS_BYTES) spec.switch_write_focus (MLPRegions.ROUTING.value) @@ -239,9 +284,12 @@ def generate_machine_data_specification( spec.write_value (routing_info.get_first_key_from_pre_vertex ( self, self.bkp_link), data_type = DataType.UINT32) - # write link keys: fds - spec.write_value (routing_info.get_first_key_from_pre_vertex ( - self, self.fds_link), data_type = DataType.UINT32) + # write link keys: fds (padding if not SumVertex tree root) + if (self.index == 0): + spec.write_value (routing_info.get_first_key_from_pre_vertex ( + self, self.fds_link), data_type = DataType.UINT32) + else: + spec.write_value (0, data_type = DataType.UINT32) # write link keys: stp (padding) spec.write_value (0, data_type = DataType.UINT32) @@ -252,12 +300,12 @@ def generate_machine_data_specification( # Reserve and write the stage configuration region spec.reserve_memory_region (MLPRegions.STAGE.value, - self._N_STAGE_CONFIGURATION_BYTES) + self._STAGE_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.STAGE.value) # write the stage configuration into spec - for c in self._network.stage_config: + for c in self.network.stage_config: spec.write_value (c, data_type = DataType.UINT8) spec.end_specification () @@ -267,12 +315,12 @@ def generate_machine_data_specification( def regenerate_data_specification(self, spec, placement): # Reserve and write the stage configuration region spec.reserve_memory_region (MLPRegions.STAGE.value, - self._N_STAGE_CONFIGURATION_BYTES) + self._STAGE_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.STAGE.value) # write the stage configuration into spec - for c in self._network.stage_config: + for c in self.network.stage_config: spec.write_value (c, data_type = DataType.UINT8) spec.end_specification() @@ -290,3 +338,104 @@ def mark_regions_reloaded(self): """ # prepare for next stage self._stage += 1 + + +#--------------------------------------------------------------------- +class SumVertexTree( + ): + + """ implements a tree of sum vertices + """ + + def __init__(self, + network, + group, + subgroup + ): + + max_links = MLPConstants.MAX_S_CORE_LINKS + + # total number of Sum Vertices needed to build the tree + num_vrt = ((network.subgroups - 2) // (max_links - 1)) + 1 + + # the root vertex is used as pre-vertex for outgoing links + self._root = SumVertex (network, group, subgroup, 0) + + # add the root to the graph + gfe.add_machine_vertex_instance (self.root) + + # and to the list of all tree vertices + self._vertices = [self.root] + + # create the SumVertex tree + free_links = max_links + to_vrt = 0 + for vrt in range (1, num_vrt): + # create a SumVertex + vt = SumVertex (network, group, subgroup, vrt) + + # add it to the list of vertices + self._vertices.append (vt) + + # add it to the graph + gfe.add_machine_vertex_instance (vt) + + # add all SumVertex links towards the tree root + gfe.add_machine_edge_instance ( + MachineEdge (vt, self.vertices[to_vrt]), vt.fwd_link + ) + + gfe.add_machine_edge_instance ( + MachineEdge (vt, self.vertices[to_vrt]), vt.bkp_link + ) + + gfe.add_machine_edge_instance ( + MachineEdge (vt, self.vertices[to_vrt]), vt.lds_link + ) + + # take away one free link from vertex to_vrt + free_links -= 1 + + # if out of free links use next available vertex + if free_links == 0: + free_links = max_links + to_vrt += 1 + + # finally, map every pre-vertex to an available tree vertex + self._leaf_map = {} + for grp in network.groups: + for sgrp in range (grp.subgroups): + # assign available leaf vertex + self._leaf_map[(grp.id, sgrp)] = self.vertices[to_vrt] + + # take away one free link from vertex to_vrt + free_links -= 1 + + # if out of free links use next available vertex + if free_links == 0: + free_links = max_links + to_vrt += 1 + + + def leaf (self, group, subgroup): + """ returns the leaf SumVertex to link to + from a pre-vertex in group/subgroup + + :param group: pre-vertex group + :param subgroup: pre-vertex subgroup number + + :type group: MLPGroup + :type subgroup: integer + + :return: a SumVertex + """ + return self._leaf_map[(group.id, subgroup)] + + + @property + def root (self): + return self._root + + @property + def vertices (self): + return self._vertices diff --git a/spinn_pdp2/threshold_vertex.py b/spinn_pdp2/threshold_vertex.py index ea01205..15999f2 100644 --- a/spinn_pdp2/threshold_vertex.py +++ b/spinn_pdp2/threshold_vertex.py @@ -47,130 +47,121 @@ class ThresholdVertex( def __init__(self, network, - group + group, + subgroup ): - # place OUTPUT groups "close" to the host - if group.output_grp: - constraints = [ChipAndCoreConstraint (x = 0, y = 0)] - else: - constraints = None + self._network = network + self._group = group + self._subgroup = subgroup super(ThresholdVertex, self).__init__( - label = "t_core{}".format (group.id), + label = f"t_core{self.group.id}/{self.subgroup}", binary_name = "threshold.aplx", - constraints = constraints) + constraints = None) self._stage = 0 # application-level data - self._network = network - self._group = group - self._set_cfg = network._ex_set.set_config - self._ex_cfg = network._ex_set.example_config - self._ev_cfg = network._ex_set.event_config + self._set_cfg = self.network.ex_set.set_config + self._ex_cfg = self.network.ex_set.example_config + self._ev_cfg = self.network.ex_set.event_config # application parameters - self._out_integr_dt = 1.0 / network.ticks_per_int + self._out_integr_dt = 1.0 / self.network.ticks_per_int - # choose appropriate group criteria if self.group.test_group_crit is not None: self._tst_group_criterion = self.group.test_group_crit - elif network._test_group_crit is not None: - self._tst_group_criterion = network._test_group_crit + elif self.network.test_group_crit is not None: + self._tst_group_criterion = self.network.test_group_crit else: self._tst_group_criterion = MLPConstants.DEF_GRP_CRIT if self.group.train_group_crit is not None: self._trn_group_criterion = self.group.train_group_crit - elif network._train_group_crit is not None: - self._trn_group_criterion = network._train_group_crit + elif self.network.train_group_crit is not None: + self._trn_group_criterion = self.network.train_group_crit else: self._trn_group_criterion = MLPConstants.DEF_GRP_CRIT - # check if last output group in daisy chain - if self.group == network.output_chain[-1]: - self._is_last_output_group = 1 - else: - self._is_last_output_group = 0 + # forward, backprop and stop link names + self._fwd_link = f"fwd_t{self.group.id}/{self.subgroup}" + self._bkp_link = f"bkp_t{self.group.id}/{self.subgroup}" + self._stp_link = f"stp_t{self.group.id}/{self.subgroup}" - # forward, backprop and stop link partition names - self._fwd_link = [] - for p in range (self._group.partitions): - self._fwd_link.append ("fwd_t{}_{}".format (self.group.id, p)) - self._bkp_link = "bkp_t{}".format (self.group.id) - self._stp_link = "stp_t{}".format (self.group.id) + # threshold core-specific parameters + self._units = self.group.subunits[self.subgroup] - # reserve key space for every link - self._n_keys = MLPConstants.KEY_SPACE_SIZE + # first output subgroup has special functions + self._is_first_out = self.group.is_first_out and (self.subgroup == 0) - # configuration and data files - # find out the size of an integer! - _data_int = DataType.INT32 + # last output subgroup has special functions + self._is_last_out = ((self.group == self.network.output_chain[-1]) and + (self.subgroup == (self.group.subgroups - 1))) + # configuration and data sizes # network configuration structure - self._N_NETWORK_CONFIGURATION_BYTES = \ - len (self.network.network_config) + self._NETWORK_CONFIGURATION_BYTES = len (self.network.network_config) # core configuration structure - self._N_CORE_CONFIGURATION_BYTES = \ - len (self.config) + self._CORE_CONFIGURATION_BYTES = len (self.config) # set configuration structure - self._N_EXAMPLE_SET_BYTES = \ - len (self._set_cfg) + self._EXAMPLE_SET_BYTES = len (self._set_cfg) # list of example configurations - self._N_EXAMPLES_BYTES = \ - len (self._ex_cfg) * len (self._ex_cfg[0]) + self._EXAMPLES_BYTES = len (self._ex_cfg) * len (self._ex_cfg[0]) # list of event configurations - self._N_EVENTS_BYTES = \ - len (self._ev_cfg) * len (self._ev_cfg[0]) + self._EVENTS_BYTES = len (self._ev_cfg) * len (self._ev_cfg[0]) - # list of group inputs (empty if not an INPUT group) - self._N_INPUTS_BYTES = \ - len (self._group.inputs) * _data_int.size + # list of subgroup inputs (empty if not an INPUT group) + if self.group.input_grp: + self._INPUTS_BYTES = ((len (self.group.inputs) // self.group.units) * + self._units * DataType.INT32.size) + else: + self._INPUTS_BYTES = 0 - # list of group targets (empty if not an OUTPUT group) - self._N_TARGETS_BYTES = \ - len (self._group.targets) * _data_int.size + # list of subgroup targets (empty if not an OUTPUT group) + if self.group.output_grp: + self._TARGETS_BYTES = ((len (self.group.targets) // self.group.units) * + self._units * DataType.INT32.size) + else: + self._TARGETS_BYTES = 0 - # keys are integers - # t cores require a different key for every group partition - self._N_KEYS_BYTES = _data_int.size * \ - (MLPConstants.NUM_KEYS_REQ + self._group.partitions) + # list of routing keys + self._KEYS_BYTES = MLPConstants.NUM_KEYS_REQ * DataType.INT32.size # stage configuration structure - self._N_STAGE_CONFIGURATION_BYTES = \ - len (self.network.stage_config) + self._STAGE_CONFIGURATION_BYTES = len (self.network.stage_config) # reserve SDRAM space used to store historic data - self._TARGET_HISTORY_BYTES = (MLPConstants.ACTIV_SIZE // 8) * \ - self.group.units * self.network.global_max_ticks + #NOTE: MLPConstants sizes are in bits + self._TARGET_HISTORY_BYTES = ((MLPConstants.ACTIV_SIZE // 8) * + self._units * self.network.global_max_ticks) - self._OUT_DERIV_HISTORY_BYTES = (MLPConstants.LONG_DERIV_SIZE // 8) * \ - self.group.units * self.network.global_max_ticks + self._OUT_DERIV_HISTORY_BYTES = ((MLPConstants.LONG_DERIV_SIZE // 8) * + self._units * self.network.global_max_ticks) - self._NET_HISTORY_BYTES = (MLPConstants.NET_SIZE // 8) * \ - self.group.units * self.network.global_max_ticks + self._NET_HISTORY_BYTES = ((MLPConstants.NET_SIZE // 8) * + self._units * self.network.global_max_ticks) - self._OUTPUT_HISTORY_BYTES = (MLPConstants.ACTIV_SIZE // 8) * \ - self.group.units * self.network.global_max_ticks + self._OUTPUT_HISTORY_BYTES = ((MLPConstants.ACTIV_SIZE // 8) * + self._units * self.network.global_max_ticks) # recording info region size if self.group.output_grp: # number of recording channels - NUM_REC_CHANNS = len(MLPVarSizeRecordings) + \ - len(MLPConstSizeRecordings) + NUM_REC_CHANNS = (len(MLPVarSizeRecordings) + + len(MLPConstSizeRecordings)) - # first output group has extra recording channels - if self.group.is_first_out: + # first output group/subgroup has extra recording channels + if self._is_first_out: # number of extra recording channels NUM_REC_CHANNS += len(MLPExtraRecordings) - self._REC_INFO_BYTES = \ - recording_utilities.get_recording_header_size(NUM_REC_CHANNS) + self._REC_INFO_BYTES = ( + recording_utilities.get_recording_header_size(NUM_REC_CHANNS)) else: self._REC_INFO_BYTES = 0 @@ -178,7 +169,7 @@ def __init__(self, if self.group.output_grp: # list of variable-size recording channel sizes self.VAR_CHANNEL_SIZES = [ - self.group.units * (BYTES_PER_WORD // 2) # OUTPUTS + self._units * (BYTES_PER_WORD // 2) # OUTPUTS ] # list of constant-size recording channel sizes @@ -187,7 +178,7 @@ def __init__(self, ] # list of extra recording channel sizes - if self.group.is_first_out: + if self._is_first_out: # list of extra recording channel sizes self.EXTRA_CHANNEL_SIZES = [ 4 * BYTES_PER_WORD # TICK_DATA @@ -206,15 +197,15 @@ def __init__(self, # configuration data plus application core SDRAM usage self._sdram_fixed = ( SYSTEM_BYTES_REQUIREMENT + - self._N_NETWORK_CONFIGURATION_BYTES + - self._N_CORE_CONFIGURATION_BYTES + - self._N_EXAMPLE_SET_BYTES + - self._N_EXAMPLES_BYTES + - self._N_EVENTS_BYTES + - self._N_INPUTS_BYTES + - self._N_TARGETS_BYTES + - self._N_KEYS_BYTES + - self._N_STAGE_CONFIGURATION_BYTES + + self._NETWORK_CONFIGURATION_BYTES + + self._CORE_CONFIGURATION_BYTES + + self._EXAMPLE_SET_BYTES + + self._EXAMPLES_BYTES + + self._EVENTS_BYTES + + self._INPUTS_BYTES + + self._TARGETS_BYTES + + self._KEYS_BYTES + + self._STAGE_CONFIGURATION_BYTES + self._TARGET_HISTORY_BYTES + self._OUT_DERIV_HISTORY_BYTES + self._NET_HISTORY_BYTES + @@ -236,6 +227,10 @@ def network (self): def group (self): return self._group + @property + def subgroup (self): + return self._subgroup + @property def fwd_link (self): return self._fwd_link @@ -257,9 +252,8 @@ def config (self): { uchar output_grp; uchar input_grp; + uchar is_last_sgrp; uint num_units; - uint partitions; - uint write_blk; uchar hard_clamp_en; uchar out_integr_en; fpreal out_integr_dt; @@ -269,39 +263,53 @@ def config (self): activation_t initOutput; error_t tst_group_criterion; error_t trn_group_criterion; + uint crit_expected; uchar criterion_function; - uchar is_first_output_group; - uchar is_last_output_group; + uchar is_first_output; + uchar is_last_output; uchar error_function; } t_conf_t; pack: standard sizes, little-endian byte order, explicit padding """ + # is this the last subgroup in its group + last_sgrp = (self.subgroup == (self.group.subgroups - 1)) + # integration dt is an MLP fixed-point fpreal - out_integr_dt = int (self._out_integr_dt *\ + out_integr_dt = int (self._out_integr_dt * (1 << MLPConstants.FPREAL_SHIFT)) # weak_clamp_strength is an MLP fixed-point fpreal - weak_clamp_strength = int (self.group.weak_clamp_strength *\ + weak_clamp_strength = int (self.group.weak_clamp_strength * (1 << MLPConstants.FPREAL_SHIFT)) # init output is an MLP fixed-point activation_t - init_output = int (self.group.init_output *\ + init_output = int (self.group.init_output * (1 << MLPConstants.ACTIV_SHIFT)) # group criteria are MLP fixed-point error_t - tst_group_criterion = int (self._tst_group_criterion *\ + tst_group_criterion = int (self._tst_group_criterion * (1 << MLPConstants.ERROR_SHIFT)) - trn_group_criterion = int (self._trn_group_criterion *\ + trn_group_criterion = int (self._trn_group_criterion * (1 << MLPConstants.ERROR_SHIFT)) - return struct.pack ("<2B2x3I2B2xi6I4i4B", + # criterion packets to be expected + if last_sgrp: + # expect from every other subgroup + crit_expected = self.group.subgroups - 1 + + # last group also expects from every other group + if self._is_last_out: + crit_expected += len (self.network.groups) - 1 + else: + crit_expected = 0 + + return struct.pack ("<3BxI2B2xi6I4iI4B", self.group.output_grp, self.group.input_grp, - self.group.units, - self.group.partitions, - self.group.write_blk, + last_sgrp, + self._units, self.group.hard_clamp_en, self.group.out_integr_en, out_integr_dt, @@ -315,9 +323,10 @@ def config (self): init_output, tst_group_criterion, trn_group_criterion, + crit_expected, self.group.criterion_function.value, - self.group.is_first_out, - self._is_last_output_group, + self._is_first_out, + self._is_last_out, self.group.error_function.value ) @@ -337,7 +346,7 @@ def resources_required (self): @overrides (AbstractProvidesNKeysForPartition.get_n_keys_for_partition) def get_n_keys_for_partition (self, partition, graph_mapper): - return self._n_keys + return MLPConstants.KEY_SPACE_SIZE def read(self, placement, buffer_manager, channel): @@ -372,7 +381,7 @@ def generate_machine_data_specification( # reserve and write the network configuration region spec.reserve_memory_region (MLPRegions.NETWORK.value, - self._N_NETWORK_CONFIGURATION_BYTES) + self._NETWORK_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.NETWORK.value) @@ -382,7 +391,7 @@ def generate_machine_data_specification( # reserve and write the core configuration region spec.reserve_memory_region (MLPRegions.CORE.value, - self._N_CORE_CONFIGURATION_BYTES) + self._CORE_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.CORE.value) @@ -392,7 +401,7 @@ def generate_machine_data_specification( # reserve and write the example set region spec.reserve_memory_region (MLPRegions.EXAMPLE_SET.value, - self._N_EXAMPLE_SET_BYTES) + self._EXAMPLE_SET_BYTES) spec.switch_write_focus (MLPRegions.EXAMPLE_SET.value) @@ -402,7 +411,7 @@ def generate_machine_data_specification( # reserve and write the examples region spec.reserve_memory_region (MLPRegions.EXAMPLES.value, - self._N_EXAMPLES_BYTES) + self._EXAMPLES_BYTES) spec.switch_write_focus (MLPRegions.EXAMPLES.value) @@ -413,7 +422,7 @@ def generate_machine_data_specification( # reserve and write the events region spec.reserve_memory_region (MLPRegions.EVENTS.value, - self._N_EVENTS_BYTES) + self._EVENTS_BYTES) spec.switch_write_focus (MLPRegions.EVENTS.value) @@ -423,47 +432,54 @@ def generate_machine_data_specification( spec.write_value (c, data_type = DataType.UINT8) # reserve and write the input data region (if INPUT group) - if self._N_INPUTS_BYTES != 0: + if self.group.input_grp: spec.reserve_memory_region (MLPRegions.INPUTS.value, - self._N_INPUTS_BYTES) + self._INPUTS_BYTES) spec.switch_write_focus (MLPRegions.INPUTS.value) # write inputs to spec - for _i in self._group.inputs: - # inputs are MLP fixed-point activation_t - #NOTE: check for absent or NaN - if (_i is None) or (_i != _i): - _inp = MLPConstants.ACTIV_NaN - else: - _inp = int (_i * (1 << MLPConstants.ACTIV_SHIFT)) - spec.write_value (_inp, data_type = DataType.UINT32) + us = self.subgroup * MLPConstants.MAX_SUBGROUP_UNITS + for _ in range (len (self.group.inputs) // self.group.units): + for i in self.group.inputs[us : us + self._units]: + # inputs are fixed-point activation_t + #NOTE: check for absent or NaN + if (i is None) or (i != i): + inp = MLPConstants.ACTIV_NaN + else: + inp = int (i * (1 << MLPConstants.ACTIV_SHIFT)) + spec.write_value (inp, data_type = DataType.UINT32) + us += self.group.units # reserve and write the target data region - if self._N_TARGETS_BYTES != 0: + if self.group.output_grp: spec.reserve_memory_region (MLPRegions.TARGETS.value, - self._N_TARGETS_BYTES) + self._TARGETS_BYTES) spec.switch_write_focus (MLPRegions.TARGETS.value) # write targets to spec - for _t in self._group.targets: - # targets are MLP fixed-point activation_t - #NOTE: check for absent or NaN - if (_t is None) or (_t != _t): - _tgt = MLPConstants.ACTIV_NaN - else: - _tgt = int (_t * (1 << MLPConstants.ACTIV_SHIFT)) - spec.write_value (_tgt, data_type = DataType.UINT32) + us = self.subgroup * MLPConstants.MAX_SUBGROUP_UNITS + for _ in range (len (self.group.targets) // self.group.units): + for t in self.group.targets[us : us + self._units]: + # inputs are fixed-point activation_t + #NOTE: check for absent or NaN + if (t is None) or (t != t): + tgt = MLPConstants.ACTIV_NaN + else: + tgt = int (t * (1 << MLPConstants.ACTIV_SHIFT)) + spec.write_value (tgt, data_type = DataType.UINT32) + us += self.group.units # reserve and write the routing region spec.reserve_memory_region (MLPRegions.ROUTING.value, - self._N_KEYS_BYTES) + self._KEYS_BYTES) spec.switch_write_focus (MLPRegions.ROUTING.value) - # write link keys: fwd (padding - keys written below) - spec.write_value (0, data_type = DataType.UINT32) + # write link keys: fwd + spec.write_value (routing_info.get_first_key_from_pre_vertex ( + self, self.fwd_link), data_type = DataType.UINT32) # write link keys: bkp spec.write_value (routing_info.get_first_key_from_pre_vertex ( @@ -473,24 +489,15 @@ def generate_machine_data_specification( spec.write_value (0, data_type = DataType.UINT32) # write link keys: stp - # stop key for OUTPUT groups only - if self.group.output_grp: - spec.write_value (routing_info.get_first_key_from_pre_vertex ( - self, self.stp_link), data_type = DataType.UINT32) - else: - spec.write_value (0, data_type = DataType.UINT32) + spec.write_value (routing_info.get_first_key_from_pre_vertex ( + self, self.stp_link), data_type = DataType.UINT32) # write link keys: lds (padding) spec.write_value (0, data_type = DataType.UINT32) - # write link keys: fwdt - for p in range (self.group.partitions): - spec.write_value (routing_info.get_first_key_from_pre_vertex ( - self, self.fwd_link[p]), data_type = DataType.UINT32) - # reserve and write the stage configuration region spec.reserve_memory_region (MLPRegions.STAGE.value, - self._N_STAGE_CONFIGURATION_BYTES) + self._STAGE_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.STAGE.value) @@ -504,11 +511,11 @@ def generate_machine_data_specification( region = MLPRegions.REC_INFO.value, size = self._REC_INFO_BYTES ) - + # write the actual recording channel sizes for a stage _sizes = [data_n_steps * sz for sz in self.VAR_CHANNEL_SIZES] _sizes.extend([sz for sz in self.CONST_CHANNEL_SIZES]) - if self.group.is_first_out: + if self._is_first_out: _sizes.extend( [data_n_steps * sz for sz in self.EXTRA_CHANNEL_SIZES] ) @@ -525,7 +532,7 @@ def generate_machine_data_specification( def regenerate_data_specification(self, spec, placement): # reserve and write the stage configuration region spec.reserve_memory_region (MLPRegions.STAGE.value, - self._N_STAGE_CONFIGURATION_BYTES) + self._STAGE_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.STAGE.value) @@ -557,7 +564,7 @@ def get_recorded_region_ids(self): ids.extend([ch.value for ch in MLPConstSizeRecordings]) # first output group has additional recording channels - if self.group.is_first_out: + if self._is_first_out: ids.extend([ch.value for ch in MLPExtraRecordings]) return ids diff --git a/spinn_pdp2/weight_vertex.py b/spinn_pdp2/weight_vertex.py index b0b9aed..aa60daf 100644 --- a/spinn_pdp2/weight_vertex.py +++ b/spinn_pdp2/weight_vertex.py @@ -38,130 +38,106 @@ class WeightVertex( def __init__(self, network, group, + subgroup, from_group, - col_blk, - row_blk + from_subgroup ): + self._network = network + self._group = group + self._from_group = from_group + self._subgroup = subgroup + self._from_subgroup = from_subgroup + super(WeightVertex, self).__init__( - label = f"w_core{group.id}_{from_group.id}_{row_blk}_{col_blk}", + label = (f"w_core{self.group.id}/{self.subgroup}" + f"_{self.from_group.id}/{self.from_subgroup}"), binary_name = "weight.aplx", constraints = None) self._stage = 0 # application-level data - self._network = network - self._group = group - self._from_group = from_group - self._col_blk = col_blk - self._row_blk = row_blk - self._set_cfg = network._ex_set.set_config - self._ex_cfg = network._ex_set.example_config - - # compute number of rows and columns - if self._row_blk != (self.from_group.partitions - 1): - self._num_rows = MLPConstants.MAX_BLK_UNITS - else: - _r = self.from_group.units % MLPConstants.MAX_BLK_UNITS - if _r == 0: - self._num_rows = MLPConstants.MAX_BLK_UNITS - else: - self._num_rows = _r - - if self._col_blk != (self.group.partitions - 1): - self._num_cols = MLPConstants.MAX_BLK_UNITS - else: - _r = self.group.units % MLPConstants.MAX_BLK_UNITS - if _r == 0: - self._num_cols = MLPConstants.MAX_BLK_UNITS - else: - self._num_cols = _r - - # forward, backprop and link delta summation link partition names - self._fwd_link = "fwd_w{}_{}".format (self.group.id, - self.from_group.id) - self._bkp_link = "bkp_w{}_{}".format (self.group.id, - self.from_group.id) - self._lds_link = "lds_w{}_{}".format (self.group.id, - self.from_group.id) - - # reserve key space for every link - self._n_keys = MLPConstants.KEY_SPACE_SIZE + self._set_cfg = self.network.ex_set.set_config + self._ex_cfg = self.network.ex_set.example_config - # choose weight core-specific parameters + # application parameters if len (self.group.weights[self.from_group]): if self.group.learning_rate is not None: - self.learning_rate = self.group.learning_rate - elif network._learning_rate is not None: - self.learning_rate = network._learning_rate + self._learning_rate = self.group.learning_rate + elif network.learning_rate is not None: + self._learning_rate = network.learning_rate else: - self.learning_rate = MLPConstants.DEF_LEARNING_RATE + self._learning_rate = MLPConstants.DEF_LEARNING_RATE if self.group.weight_decay is not None: - self.weight_decay = self.group.weight_decay - elif network._weight_decay is not None: - self.weight_decay = network._weight_decay + self._weight_decay = self.group.weight_decay + elif network.weight_decay is not None: + self._weight_decay = network.weight_decay else: - self.weight_decay = MLPConstants.DEF_WEIGHT_DECAY + self._weight_decay = MLPConstants.DEF_WEIGHT_DECAY if self.group.momentum is not None: - self.momentum = self.group.momentum - elif network._momentum is not None: - self.momentum = network._momentum + self._momentum = self.group.momentum + elif network.momentum is not None: + self._momentum = network.momentum else: - self.momentum = MLPConstants.DEF_MOMENTUM + self._momentum = MLPConstants.DEF_MOMENTUM else: - self.learning_rate = 0 - self.weight_decay = 0 - self.momentum = 0 + self._learning_rate = 0 + self._weight_decay = 0 + self._momentum = 0 - # weight update function - self.update_function = network._update_function + # forward, backprop and link delta summation link names + self._fwd_link = (f"fwd_w{self.group.id}/{self.subgroup}" + f"_{self.from_group.id}/{self.from_subgroup}") - # configuration and data files - # find out the size of an integer! - _data_int = DataType.INT32 + self._bkp_link = (f"bkp_w{self.group.id}/{self.subgroup}" + f"_{self.from_group.id}/{self.from_subgroup}") + self._lds_link = (f"lds_w{self.group.id}/{self.subgroup}" + f"_{self.from_group.id}/{self.from_subgroup}") + + # weight core-specific parameters + # weight matrix parameters + self._num_rows = self.from_group.subunits[self.from_subgroup] + self._num_cols = self.group.subunits[self.subgroup] + + # configuration and data sizes # network configuration structure - self._N_NETWORK_CONFIGURATION_BYTES = \ - len (self._network.network_config) + self._NETWORK_CONFIGURATION_BYTES = len (self.network.network_config) # core configuration structure - self._N_CORE_CONFIGURATION_BYTES = \ - len (self.config) + self._CORE_CONFIGURATION_BYTES = len (self.config) # set configuration structure - self._N_EXAMPLE_SET_BYTES = \ - len (self._set_cfg) + self._EXAMPLE_SET_BYTES = len (self._set_cfg) # list of example configurations - self._N_EXAMPLES_BYTES = \ - len (self._ex_cfg) * len (self._ex_cfg[0]) + self._EXAMPLES_BYTES = len (self._ex_cfg) * len (self._ex_cfg[0]) # each weight is an integer - self._N_WEIGHTS_BYTES = \ - self.group.units * self.from_group.units * _data_int.size + self._WEIGHTS_BYTES = (self._num_rows * + self._num_cols * DataType.INT32.size) - # keys are integers - self._N_KEYS_BYTES = MLPConstants.NUM_KEYS_REQ * _data_int.size + # list of routing keys + self._KEYS_BYTES = MLPConstants.NUM_KEYS_REQ * DataType.INT32.size # stage configuration structure - self._N_STAGE_CONFIGURATION_BYTES = \ - len (self._network.stage_config) + self._STAGE_CONFIGURATION_BYTES = len (self.network.stage_config) # reserve SDRAM space used to store historic data - self._OUTPUT_HISTORY_BYTES = (MLPConstants.ACTIV_SIZE // 8) * \ - self.group.units * self._network.global_max_ticks + self._OUTPUT_HISTORY_BYTES = ((MLPConstants.ACTIV_SIZE // 8) * + self.group.units * self.network.global_max_ticks) self._sdram_usage = ( - self._N_NETWORK_CONFIGURATION_BYTES + \ - self._N_CORE_CONFIGURATION_BYTES + \ - self._N_EXAMPLE_SET_BYTES + \ - self._N_EXAMPLES_BYTES + \ - self._N_WEIGHTS_BYTES + \ - self._N_KEYS_BYTES + \ - self._N_STAGE_CONFIGURATION_BYTES + \ + self._NETWORK_CONFIGURATION_BYTES + + self._CORE_CONFIGURATION_BYTES + + self._EXAMPLE_SET_BYTES + + self._EXAMPLES_BYTES + + self._WEIGHTS_BYTES + + self._KEYS_BYTES + + self._STAGE_CONFIGURATION_BYTES + self._OUTPUT_HISTORY_BYTES ) @@ -189,21 +165,25 @@ def cast_float_to_weight (self, # return an MLP fixed-point weight_t return (int (wtemp * (1 << MLPConstants.WEIGHT_SHIFT))) + @property + def network (self): + return self._network + @property def group (self): return self._group @property - def from_group (self): - return self._from_group + def subgroup (self): + return self._subgroup @property - def row_blk (self): - return self._row_blk + def from_group (self): + return self._from_group @property - def col_blk (self): - return self._col_blk + def from_subgroup (self): + return self._from_subgroup @property def fwd_link (self): @@ -226,8 +206,6 @@ def config (self): { uint num_rows; uint num_cols; - uint row_blk; - uint col_blk; scoreboard_t sync_expected; activation_t initOutput; short_fpreal_t learningRate; @@ -239,32 +217,30 @@ def config (self): explicit padding """ # expect one sync packet from 'group' and one from 'from_group' - if self._group == self._from_group: + if self.group == self.from_group and self.subgroup == self.from_subgroup: sync_expected = 1 else: sync_expected = 2 # init output is an MLP fixed-point activation_t - init_output = int (self._from_group.init_output *\ + init_output = int (self.from_group.init_output *\ (1 << MLPConstants.ACTIV_SHIFT)) # learning_rate is an MLP short fixed-point fpreal - learning_rate = int (self.learning_rate *\ + learning_rate = int (self._learning_rate *\ (1 << MLPConstants.SHORT_FPREAL_SHIFT)) # weight_decay is an MLP short fixed-point fpreal - weight_decay = int (self.weight_decay *\ + weight_decay = int (self._weight_decay *\ (1 << MLPConstants.SHORT_FPREAL_SHIFT)) # momentum is an MLP short fixed-point fpreal - momentum = int (self.momentum *\ + momentum = int (self._momentum *\ (1 << MLPConstants.SHORT_FPREAL_SHIFT)) - return struct.pack ("<5Ii3h2x", + return struct.pack ("<3Ii3h2x", self._num_rows, self._num_cols, - self._row_blk, - self._col_blk, sync_expected, init_output, learning_rate, @@ -283,7 +259,7 @@ def resources_required (self): @overrides (AbstractProvidesNKeysForPartition.get_n_keys_for_partition) def get_n_keys_for_partition (self, partition, graph_mapper): - return self._n_keys + return MLPConstants.KEY_SPACE_SIZE @overrides(MachineDataSpecableVertex.generate_machine_data_specification) @@ -296,17 +272,17 @@ def generate_machine_data_specification( # Reserve and write the network configuration region spec.reserve_memory_region (MLPRegions.NETWORK.value, - self._N_NETWORK_CONFIGURATION_BYTES) + self._NETWORK_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.NETWORK.value) # write the network configuration into spec - for c in self._network.network_config: + for c in self.network.network_config: spec.write_value (c, data_type = DataType.UINT8) # Reserve and write the core configuration region spec.reserve_memory_region (MLPRegions.CORE.value, - self._N_CORE_CONFIGURATION_BYTES) + self._CORE_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.CORE.value) @@ -316,7 +292,7 @@ def generate_machine_data_specification( # Reserve and write the example set region spec.reserve_memory_region (MLPRegions.EXAMPLE_SET.value, - self._N_EXAMPLE_SET_BYTES) + self._EXAMPLE_SET_BYTES) spec.switch_write_focus (MLPRegions.EXAMPLE_SET.value) @@ -326,7 +302,7 @@ def generate_machine_data_specification( # Reserve and write the examples region spec.reserve_memory_region (MLPRegions.EXAMPLES.value, - self._N_EXAMPLES_BYTES) + self._EXAMPLES_BYTES) spec.switch_write_focus (MLPRegions.EXAMPLES.value) @@ -337,31 +313,29 @@ def generate_machine_data_specification( # Reserve and write the weights region spec.reserve_memory_region (MLPRegions.WEIGHTS.value, - self._N_WEIGHTS_BYTES) + self._WEIGHTS_BYTES) spec.switch_write_focus (MLPRegions.WEIGHTS.value) # weight matrix is kept in column-major order # and has to be written out in row-major order - _wts = self.group.weights[self.from_group] - _nrows = self.from_group.units - _nr = self._num_rows - _nc = self._num_cols - _rb = self._row_blk * MLPConstants.MAX_BLK_UNITS - _cb = self._col_blk * MLPConstants.MAX_BLK_UNITS - if len (_wts): - for _r in range (_nr): - for _c in range (_nc): - _wt = self.cast_float_to_weight ( - _wts[(_cb + _c) * _nrows + (_rb + _r)]) - spec.write_value (_wt, data_type = DataType.INT32) + wts = self.group.weights[self.from_group] + rows_per_col = self.from_group.units + rb = self.from_subgroup * MLPConstants.MAX_SUBGROUP_UNITS + cb = self.subgroup * MLPConstants.MAX_SUBGROUP_UNITS + if len (wts): + for r in range (self._num_rows): + for c in range (self._num_cols): + wt = self.cast_float_to_weight ( + wts[(cb + c) * rows_per_col + (rb + r)]) + spec.write_value (wt, data_type = DataType.INT32) else: - for _ in range (_nr * _nc): + for _ in range (self._num_rows * self._num_cols): spec.write_value (0, data_type = DataType.INT32) # Reserve and write the routing region spec.reserve_memory_region (MLPRegions.ROUTING.value, - self._N_KEYS_BYTES) + self._KEYS_BYTES) spec.switch_write_focus (MLPRegions.ROUTING.value) @@ -385,12 +359,12 @@ def generate_machine_data_specification( # Reserve and write the stage configuration region spec.reserve_memory_region (MLPRegions.STAGE.value, - self._N_STAGE_CONFIGURATION_BYTES) + self._STAGE_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.STAGE.value) # write the stage configuration into spec - for c in self._network.stage_config: + for c in self.network.stage_config: spec.write_value (c, data_type = DataType.UINT8) spec.end_specification () @@ -400,12 +374,12 @@ def generate_machine_data_specification( def regenerate_data_specification(self, spec, placement): # Reserve and write the stage configuration region spec.reserve_memory_region (MLPRegions.STAGE.value, - self._N_STAGE_CONFIGURATION_BYTES) + self._STAGE_CONFIGURATION_BYTES) spec.switch_write_focus (MLPRegions.STAGE.value) # write the stage configuration into spec - for c in self._network.stage_config: + for c in self.network.stage_config: spec.write_value (c, data_type = DataType.UINT8) spec.end_specification()