From d979cf277fef9ad3f19ec7ff296a290e5b422070 Mon Sep 17 00:00:00 2001 From: Blaise Tine Date: Sun, 1 Sep 2024 04:00:57 -0700 Subject: [PATCH] decoder logic specialization --- hw/rtl/afu/opae/vortex_afu.sv | 2 +- hw/rtl/cache/VX_bank_flush.sv | 8 +++- hw/rtl/cache/VX_cache_bypass.sv | 9 ++-- hw/rtl/cache/VX_cache_data.sv | 2 +- hw/rtl/cache/VX_cache_mshr.sv | 2 +- hw/rtl/libs/VX_cyclic_arbiter.sv | 12 ++++- hw/rtl/libs/VX_decoder.sv | 46 +++++++++++++++++++ .../{VX_onehot_encoder.sv => VX_encoder.sv} | 2 +- hw/rtl/libs/VX_matrix_arbiter.sv | 2 +- hw/rtl/libs/VX_mem_adapter.sv | 20 +++++++- hw/rtl/libs/VX_rr_arbiter.sv | 15 ++++-- hw/rtl/libs/VX_stream_xbar.sv | 18 ++++++-- 12 files changed, 116 insertions(+), 22 deletions(-) create mode 100644 hw/rtl/libs/VX_decoder.sv rename hw/rtl/libs/{VX_onehot_encoder.sv => VX_encoder.sv} (99%) diff --git a/hw/rtl/afu/opae/vortex_afu.sv b/hw/rtl/afu/opae/vortex_afu.sv index cb5725e78..61465103e 100644 --- a/hw/rtl/afu/opae/vortex_afu.sv +++ b/hw/rtl/afu/opae/vortex_afu.sv @@ -963,7 +963,7 @@ module vortex_afu import ccip_if_pkg::*; import local_mem_cfg_pkg::*; import VX_ wire [COUT_TID_WIDTH-1:0] cout_tid; - VX_onehot_encoder #( + VX_encoder #( .N (`VX_MEM_BYTEEN_WIDTH) ) cout_tid_enc ( .data_in (vx_mem_req_byteen), diff --git a/hw/rtl/cache/VX_bank_flush.sv b/hw/rtl/cache/VX_bank_flush.sv index 2d62e354c..608eefa7d 100644 --- a/hw/rtl/cache/VX_bank_flush.sv +++ b/hw/rtl/cache/VX_bank_flush.sv @@ -114,7 +114,13 @@ module VX_bank_flush #( assign flush_line = counter_r[`CS_LINE_SEL_BITS-1:0]; if (WRITEBACK && `CS_WAY_SEL_BITS > 0) begin - assign flush_way = NUM_WAYS'(1) << counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]; + VX_decoder #( + .N (`CS_WAY_SEL_BITS) + ) ctr_decoder ( + .shift_in (counter_r[`CS_LINE_SEL_BITS +: `CS_WAY_SEL_BITS]), + .data_in (1'b1), + .data_out (flush_way) + ); end else begin assign flush_way = {NUM_WAYS{1'b1}}; end diff --git a/hw/rtl/cache/VX_cache_bypass.sv b/hw/rtl/cache/VX_cache_bypass.sv index 7992ec9e8..a3d872d7f 100644 --- a/hw/rtl/cache/VX_cache_bypass.sv +++ b/hw/rtl/cache/VX_cache_bypass.sv @@ -56,6 +56,7 @@ module VX_cache_bypass #( localparam DIRECT_PASSTHRU = PASSTHRU && (`CS_WORD_SEL_BITS == 0) && (NUM_REQS == 1); localparam REQ_SEL_BITS = `CLOG2(NUM_REQS); + localparam REQ_SEL_WIDTH = `UP(REQ_SEL_BITS); localparam MUX_DATAW = 1 + WORD_SIZE + CORE_ADDR_WIDTH + `MEM_REQ_FLAGS_WIDTH + CORE_DATA_WIDTH + CORE_TAG_WIDTH; localparam WORDS_PER_LINE = LINE_SIZE / WORD_SIZE; @@ -72,7 +73,7 @@ module VX_cache_bypass #( wire core_req_nc_valid; wire [NUM_REQS-1:0] core_req_nc_valids; wire [NUM_REQS-1:0] core_req_nc_idxs; - wire [`UP(REQ_SEL_BITS)-1:0] core_req_nc_idx; + wire [REQ_SEL_WIDTH-1:0] core_req_nc_idx; wire [NUM_REQS-1:0] core_req_nc_sel; wire core_req_nc_ready; @@ -261,17 +262,15 @@ module VX_cache_bypass #( .data_out (mem_rsp_tag_id_nc) ); - wire [`UP(REQ_SEL_BITS)-1:0] rsp_idx; + wire [REQ_SEL_WIDTH-1:0] rsp_idx; if (NUM_REQS > 1) begin assign rsp_idx = mem_rsp_tag_id_nc[(CORE_TAG_ID_BITS + WSEL_BITS) +: REQ_SEL_BITS]; end else begin assign rsp_idx = 1'b0; end - wire [NUM_REQS-1:0] rsp_nc_valid = NUM_REQS'(is_mem_rsp_nc) << rsp_idx; - for (genvar i = 0; i < NUM_REQS; ++i) begin - assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || rsp_nc_valid[i]; + assign core_rsp_in_valid[i] = core_bus_out_if[i].rsp_valid || (is_mem_rsp_nc && rsp_idx == REQ_SEL_WIDTH'(i)); assign core_bus_out_if[i].rsp_ready = core_rsp_in_ready[i]; end diff --git a/hw/rtl/cache/VX_cache_data.sv b/hw/rtl/cache/VX_cache_data.sv index 318463f76..18d44b6db 100644 --- a/hw/rtl/cache/VX_cache_data.sv +++ b/hw/rtl/cache/VX_cache_data.sv @@ -140,7 +140,7 @@ module VX_cache_data #( assign line_wren = fill; end - VX_onehot_encoder #( + VX_encoder #( .N (NUM_WAYS) ) way_enc ( .data_in (way_sel), diff --git a/hw/rtl/cache/VX_cache_mshr.sv b/hw/rtl/cache/VX_cache_mshr.sv index 4f8163269..0ca67d159 100644 --- a/hw/rtl/cache/VX_cache_mshr.sv +++ b/hw/rtl/cache/VX_cache_mshr.sv @@ -148,7 +148,7 @@ module VX_cache_mshr #( .valid_out (allocate_rdy_n) ); - VX_onehot_encoder #( + VX_encoder #( .N (MSHR_SIZE) ) prev_sel ( .data_in (addr_matches & ~next_table_x), diff --git a/hw/rtl/libs/VX_cyclic_arbiter.sv b/hw/rtl/libs/VX_cyclic_arbiter.sv index a6673c8b7..592b7a03b 100644 --- a/hw/rtl/libs/VX_cyclic_arbiter.sv +++ b/hw/rtl/libs/VX_cyclic_arbiter.sv @@ -41,7 +41,7 @@ module VX_cyclic_arbiter #( localparam IS_POW2 = (1 << LOG_NUM_REQS) == NUM_REQS; wire [LOG_NUM_REQS-1:0] grant_index_um; - wire [NUM_REQS-1:0] grant_onehot_um; + wire [NUM_REQS-1:0] grant_onehot_w, grant_onehot_um; reg [LOG_NUM_REQS-1:0] grant_index_r; always @(posedge clk) begin @@ -65,10 +65,18 @@ module VX_cyclic_arbiter #( .valid_out (grant_valid) ); + VX_decoder #( + .N (LOG_NUM_REQS) + ) grant_decoder ( + .shift_in (grant_index), + .data_in (1'b1), + .data_out (grant_onehot_w) + ); + wire is_hit = requests[grant_index_r]; assign grant_index = is_hit ? grant_index_r : grant_index_um; - assign grant_onehot = is_hit ? (NUM_REQS'(1) << grant_index) : grant_onehot_um; + assign grant_onehot = is_hit ? grant_onehot_w : grant_onehot_um; end diff --git a/hw/rtl/libs/VX_decoder.sv b/hw/rtl/libs/VX_decoder.sv new file mode 100644 index 000000000..34a378e71 --- /dev/null +++ b/hw/rtl/libs/VX_decoder.sv @@ -0,0 +1,46 @@ +// Copyright © 2019-2023 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`include "VX_platform.vh" + +// Fast encoder using parallel prefix computation +// Adapted from BaseJump STL: http://bjump.org/data_out.html + +`TRACING_OFF +module VX_decoder #( + parameter N = 1, + parameter M = 1, +`ifdef VIVADO + parameter MODEL = 1, +`else + parameter MODEL = 0, +`endif + parameter D = 1 << N +) ( + input wire [N-1:0] shift_in, + input wire [M-1:0] data_in, + output wire [D-1:0][M-1:0] data_out +); + if (MODEL == 1) begin + reg [D-1:0][M-1:0] data_out_w; + always @(*) begin + data_out_w = '0; + data_out_w[shift_in] = data_in; + end + assign data_out = data_out_w; + end else begin + assign data_out = (D*M)'(data_in) << (shift_in * M); + end + +endmodule +`TRACING_ON diff --git a/hw/rtl/libs/VX_onehot_encoder.sv b/hw/rtl/libs/VX_encoder.sv similarity index 99% rename from hw/rtl/libs/VX_onehot_encoder.sv rename to hw/rtl/libs/VX_encoder.sv index 6246a673c..85d72ce52 100644 --- a/hw/rtl/libs/VX_onehot_encoder.sv +++ b/hw/rtl/libs/VX_encoder.sv @@ -17,7 +17,7 @@ // Adapted from BaseJump STL: http://bjump.org/data_out.html `TRACING_OFF -module VX_onehot_encoder #( +module VX_encoder #( parameter N = 1, parameter REVERSE = 0, parameter MODEL = 1, diff --git a/hw/rtl/libs/VX_matrix_arbiter.sv b/hw/rtl/libs/VX_matrix_arbiter.sv index 9f0ead356..eff4eb7e1 100644 --- a/hw/rtl/libs/VX_matrix_arbiter.sv +++ b/hw/rtl/libs/VX_matrix_arbiter.sv @@ -74,7 +74,7 @@ module VX_matrix_arbiter #( assign grant_onehot = grant; - VX_onehot_encoder #( + VX_encoder #( .N (NUM_REQS) ) encoder ( .data_in (grant_onehot), diff --git a/hw/rtl/libs/VX_mem_adapter.sv b/hw/rtl/libs/VX_mem_adapter.sv index 068628be2..3e84a6292 100644 --- a/hw/rtl/libs/VX_mem_adapter.sv +++ b/hw/rtl/libs/VX_mem_adapter.sv @@ -97,10 +97,26 @@ module VX_mem_adapter #( assign mem_req_addr_out_w = mem_req_addr_in_qual; end + VX_decoder #( + .N (D), + .M (SRC_DATA_WIDTH/8) + ) req_be_dec ( + .shift_in (req_idx), + .data_in (mem_req_byteen_in), + .data_out (mem_req_byteen_out_w) + ); + + VX_decoder #( + .N (D), + .M (SRC_DATA_WIDTH) + ) req_data_dec ( + .shift_in (req_idx), + .data_in (mem_req_data_in), + .data_out (mem_req_data_out_w) + ); + assign mem_req_valid_out_w = mem_req_valid_in; assign mem_req_rw_out_w = mem_req_rw_in; - assign mem_req_byteen_out_w = DST_DATA_SIZE'(mem_req_byteen_in) << ((DST_LDATAW-3)'(req_idx) << (SRC_LDATAW-3)); - assign mem_req_data_out_w = DST_DATA_WIDTH'(mem_req_data_in) << ((DST_LDATAW'(req_idx)) << SRC_LDATAW); assign mem_req_tag_out_w = DST_TAG_WIDTH'({mem_req_tag_in, req_idx}); assign mem_req_ready_in = mem_req_ready_out_w; diff --git a/hw/rtl/libs/VX_rr_arbiter.sv b/hw/rtl/libs/VX_rr_arbiter.sv index 5c279989b..4b22a4004 100644 --- a/hw/rtl/libs/VX_rr_arbiter.sv +++ b/hw/rtl/libs/VX_rr_arbiter.sv @@ -448,7 +448,7 @@ module VX_rr_arbiter #( end end - VX_onehot_encoder #( + VX_encoder #( .N (NUM_REQS) ) onehot_encoder ( .data_in (grant_onehot), @@ -480,9 +480,16 @@ module VX_rr_arbiter #( end end - assign grant_index = grant_table[state]; - assign grant_onehot = NUM_REQS'(grant_valid) << grant_index; - assign grant_valid = (| requests); + VX_decoder #( + .N (LOG_NUM_REQS) + ) grant_decoder ( + .shift_in (grant_index), + .data_in (grant_valid), + .data_out (grant_onehot) + ); + + assign grant_index = grant_table[state]; + assign grant_valid = (| requests); end diff --git a/hw/rtl/libs/VX_stream_xbar.sv b/hw/rtl/libs/VX_stream_xbar.sv index 3dd30bc86..5a3b129ea 100644 --- a/hw/rtl/libs/VX_stream_xbar.sv +++ b/hw/rtl/libs/VX_stream_xbar.sv @@ -72,12 +72,17 @@ module VX_stream_xbar #( ); for (genvar i = 0; i < NUM_INPUTS; ++i) begin - assign per_output_valid_in[i] = NUM_OUTPUTS'(valid_in[i]) << sel_in[i]; + VX_decoder #( + .N (OUT_WIDTH) + ) sel_in_decoder ( + .shift_in (sel_in[i]), + .data_in (valid_in[i]), + .data_out (per_output_valid_in[i]) + ); assign ready_in[i] = | per_output_ready_in_w[i]; end for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin - VX_stream_arb #( .NUM_INPUTS (NUM_INPUTS), .NUM_OUTPUTS (1), @@ -131,8 +136,15 @@ module VX_stream_xbar #( wire [NUM_OUTPUTS-1:0] valid_out_w, ready_out_w; wire [NUM_OUTPUTS-1:0][DATAW-1:0] data_out_w; + VX_decoder #( + .N (OUT_WIDTH) + ) sel_in_decoder ( + .shift_in (sel_in[0]), + .data_in (valid_in[0]), + .data_out (valid_out_w) + ); + assign ready_in[0] = ready_out_w[sel_in[0]]; - assign valid_out_w = NUM_OUTPUTS'(valid_in[0]) << sel_in[0]; assign data_out_w = {NUM_OUTPUTS{data_in[0]}}; for (genvar i = 0; i < NUM_OUTPUTS; ++i) begin