newStep.v

This commit is contained in:
2025-11-27 04:28:54 +03:00
parent a84b8fcfde
commit 6e38a6c1af
85 changed files with 25646 additions and 6801 deletions

456
RTL/Attic/mini_femtorv32.v Normal file
View File

@@ -0,0 +1,456 @@
// femtorv32, a minimalistic RISC-V RV32I core
// (minus SYSTEM and FENCE that are not implemented)
// Bruno Levy, May-June 2020
//
// drop-in replacement of femtorv32,
// does 3 CPIs (cycles per instructions) in linear execution flow
// (two be compared with 2 CPIs with femtorv32.v),
// saves 20-50 LUTs
// in femtosoc.v, replace `include "femtorv32.v"
// with `include "mini_femtorv32.v"
//
// NOTE: the structure of the decoder has changed, *** NEEDS TO BE ADAPTED ***
/*******************************************************************/
`include "utils.v" // Utilities, macros for debugging
`include "register_file.v" // The 31 general-purpose registers
`include "small_alu.v" // Used on IceStick, RV32I
`include "large_alu.v" // For larger FPGAs, RV32IM
`include "branch_predicates.v" // Tests for branch instructions
`include "decoder.v" // The instruction decoder
`include "aligned_memory_access.v" // Read/write bytes, hwords and words from memory
`include "CSR_file.v" // (Optional) Control and Status registers
/********************* Nrv processor *******************************/
module FemtoRV32 #(
parameter [0:0] RV32M = 0, // Set to 1 to support mul/div/rem instructions
parameter ADDR_WIDTH = 16 // width of the address bus
) (
input clk,
// Memory interface: using the same protocol as Claire Wolf's picoR32
// (WIP: add mem_valid / mem_ready protocol)
output [31:0] mem_addr, // address bus, only ADDR_WIDTH bits are used
output wire [31:0] mem_wdata, // data to be written
output wire [3:0] mem_wmask, // write mask for individual bytes (1 means write byte)
input [31:0] mem_rdata, // input lines for both data and instr
output wire mem_rstrb, // active to initiate memory read
input wire mem_rbusy, // asserted if memory is busy reading value
input wire mem_wbusy, // asserted if memory is busy writing value
input wire reset, // set to 0 to reset the processor
output wire error // 1 if current instruction could not be decoded
);
// The internal register that stores the current address,
// directly wired to the address bus.
reg [ADDR_WIDTH-1:0] addressReg;
// The program counter (not storing the two LSBs, always aligned)
reg [ADDR_WIDTH-3:0] PC;
assign mem_addr = addressReg;
reg [31:0] instr; // Latched instruction.
reg [31:0] nextInstr; // Prefetched instruction.
// Next program counter in normal operation: advance one word
// I do not use the ALU, I create an additional adder for that.
// (not that the two LSBs are not stored, always aligned).
wire [ADDR_WIDTH-3:0] PCplus4 = PC + 1;
/**************************************************************************************************/
// Instruction decoding.
// Internal signals, all generated by the decoder from the current instruction.
wire [4:0] writeBackRegId; // The register to be written back
wire writeBackEn; // Needs to be asserted for writing back
wire [3:0] writeBackSel; // 0001: ALU 0010: PC+4 0100: RAM 1000: CSR
wire [4:0] regId1; // Register output 1
wire [4:0] regId2; // Register output 2
wire aluInSel1; // 0: register 1: pc
wire aluInSel2; // 0: register 1: imm
wire aluSel; // 0: force aluOp,aluQual to zero (ADD) 1: use aluOp,aluQual from instr field
wire [2:0] aluOp; // one of the 8 operations done by the ALU
wire aluQual; // 'qualifier' used by some operations (+/-, logical/arith shifts)
wire aluM; // asserted if instr is RV32M.
wire [31:0] imm; // immediate value decoded from the instruction
wire needWaitALU; // asserted if instruction uses at least one additional phase in ALU
wire isLoad; // guess what
wire isStore; // guess what
wire isJump; // guess what
wire isBranch; // guess what
wire decoderError; // true if instr does not correspond to any known instr
// The instruction decoder, that reads the current instruction
// and generates all the signals from it. It is in fact just a
// big combinatorial function.
NrvDecoder decoder(
.instr(instr),
.writeBackRegId(writeBackRegId),
.writeBackEn(writeBackEn),
.writeBackSel(writeBackSel),
.inRegId1(regId1),
.inRegId2(regId2),
.aluInSel1(aluInSel1),
.aluInSel2(aluInSel2),
.aluSel(aluSel),
.aluOp(aluOp),
.aluQual(aluQual),
.aluM(aluM),
.needWaitALU(needWaitALU),
.isLoad(isLoad),
.isStore(isStore),
.isJump(isJump),
.isBranch(isBranch),
.imm(imm),
.error(decoderError)
);
/**************************************************************************************************/
// Maybe not necessary, but I'd rather latch this one,
// if this one glitches, then it will break everything...
reg error_latched;
assign error = error_latched;
/**************************************************************************************************/
// The register file. At each cycle, it can read two
// registers (available at next cycle) and write one.
wire writeBack;
reg [31:0] writeBackData;
wire [31:0] regOut1;
wire [31:0] regOut2;
NrvRegisterFile regs(
.clk(clk),
.in(writeBackData),
.inEn(writeBack),
.inRegId(writeBackRegId),
.outRegId1(regId1),
.outRegId2(regId2),
.out1(regOut1),
.out2(regOut2)
);
/**************************************************************************************************/
// The ALU, partly combinatorial, partly state (for shifts).
wire [31:0] aluOut;
wire aluBusy;
wire alu_wenable;
wire [31:0] aluIn1 = aluInSel1 ? {PC, 2'b00} : regOut1;
wire [31:0] aluIn2 = aluInSel2 ? imm : regOut2;
// Select the ALU based on RV32M (use large ALU) or plain RV32I (use small ALU)
generate
if(RV32M) begin
NrvLargeALU alu(
.clk(clk),
.in1(aluIn1),
.in2(aluIn2),
.op(aluOp & {3{aluSel}}),
.opqual(aluQual & aluSel),
.opM(aluM),
.out(aluOut),
.wr(alu_wenable),
.busy(aluBusy)
);
end else begin
NrvSmallALU #(
`ifdef NRV_TWOSTAGE_SHIFTER
.TWOSTAGE_SHIFTER(1)
`else
.TWOSTAGE_SHIFTER(0)
`endif
) alu(
.clk(clk),
.in1(aluIn1),
.in2(aluIn2),
.op(aluOp & {3{aluSel}}),
.opqual(aluQual & aluSel),
.out(aluOut),
.wr(alu_wenable),
.busy(aluBusy)
);
end
endgenerate
/****************************************************************************/
// Memory only does 32-bit aligned accesses. Internally we have two small
// circuits (one for LOAD and one for STORE) that shift and adapt data
// according to data type (byte, halfword, word) and memory alignment (addr[1:0]).
// In addition, it does sign-expansion (when loading a signed byte to a word for
// instance).
// LOAD: a small combinatorial circuit that realigns
// and sign-expands mem_rdata based
// on width (aluOp[1:0]), signed/unsigned flag (aluOp[2])
// and the two LSBs of the address.
wire [31:0] LOAD_mem_rdata_aligned;
NrvLoadFromMemory load_from_mem(
.mem_rdata(mem_rdata), // Raw data read from mem
.addr_LSBs(mem_addr[1:0]), // The two LSBs of the address
.width(aluOp[1:0]), // Data width: 00:byte 01:hword 10:word
.is_unsigned(aluOp[2]), // signed/unsigned flag
.data(LOAD_mem_rdata_aligned) // Data ready to be sent to register
);
// STORE: a small combinatorial circuit that realigns
// data to be written based on width and the two LSBs
// of the address.
// When a STORE instruction is executed, the data to be stored to
// mem is available from the second register (regOut2) and the
// address where to store it is the output of the ALU (aluOut).
wire mem_wenable;
NrvStoreToMemory store_to_mem(
.data(regOut2), // Data to be sent, out of register
.addr_LSBs(aluOut[1:0]), // The two LSBs of the address
.width(aluOp[1:0]), // Data width: 00:byte 01:hword 10:word
.mem_wdata(mem_wdata), // Shifted data to be sent to memory
.mem_wmask(mem_wmask), // Write mask for the 4 bytes
.wr_enable(mem_wenable) // Write enable ('anded' with write mask)
);
/*************************************************************************/
// Control and status registers
`ifdef NRV_CSR
wire [31:0] CSR_rdata;
wire instr_retired;
NrvControlStatusRegisterFile CSR(
.clk(clk), // for counting cycles
.instr_cnt(instr_retired), // for counting retired instructions
.reset(reset), // reset all CSRs to default value
.CSRid(instr[31:20]), // CSR Id, extracted from instr
.rdata(CSR_rdata) // Read CSR value
// TODO: test for errors (.error)
);
`endif
// Note: writing to CSRs not implemented yet
/*************************************************************************/
// The value written back to the register file.
always @(*) begin
(* parallel_case, full_case *)
case(1'b1)
writeBackSel[0]: writeBackData = aluOut;
writeBackSel[1]: writeBackData = {PCplus4, 2'b00};
writeBackSel[2]: writeBackData = LOAD_mem_rdata_aligned;
`ifdef NRV_CSR
writeBackSel[3]: writeBackData = CSR_rdata;
`endif
endcase
end
/*************************************************************************/
// The predicate for conditional branches.
wire predOut;
NrvPredicate pred(
.in1(regOut1),
.in2(regOut2),
.op(aluOp),
.out(predOut)
);
/*************************************************************************/
// And, last but not least, the state machine.
/*************************************************************************/
// The states, using 1-hot encoding (reduces
// both LUT count and critical path).
localparam INITIAL = 8'b00000000;
localparam WAIT_INSTR = 8'b00000001;
localparam FETCH_INSTR = 8'b00000010;
localparam USE_PREFETCHED_INSTR = 8'b00000100;
localparam FETCH_REGS = 8'b00001000;
localparam EXECUTE = 8'b00010000;
localparam WAIT_ALU_OR_DATA = 8'b00100000;
localparam LOAD = 8'b01000000;
localparam ERROR = 8'b10000000;
localparam WAIT_INSTR_bit = 0;
localparam FETCH_INSTR_bit = 1;
localparam USE_PREFETCHED_INSTR_bit = 2;
localparam FETCH_REGS_bit = 3;
localparam EXECUTE_bit = 4;
localparam WAIT_ALU_OR_DATA_bit = 5;
localparam LOAD_bit = 6;
localparam ERROR_bit = 7;
reg [7:0] state = INITIAL;
// the internal signals that are determined combinatorially from
// state and other signals.
// The internal signal that enables register write-back
assign writeBack = (state[EXECUTE_bit] && writeBackEn) || state[WAIT_ALU_OR_DATA_bit];
// The memory-read signal. It is only needed for IO, hence it is only enabled
// right before the LOAD state. To allow execution from IO-mapped devices, it
// will be necessary to also enable it before instruction fetch.
assign mem_rstrb = (state[EXECUTE_bit] && isLoad);
// NOTE: memory write are done during the USE_PREFETCHED_INSTR state,
// Can't be done during EXECUTE (would be better), because mem_addr
// (needed) is updated at the end of EXECUTE.
// See also how load_from_mem and store_to_mem are wired.
assign mem_wenable = (state[USE_PREFETCHED_INSTR_bit] && isStore);
// alu_wenable starts computation in the ALU (for functions that
// require several cycles).
assign alu_wenable = (state[EXECUTE_bit]);
// instr_retired is asserted during one cycle for each
// retired instructions. It is used to update the instruction
// counter 'instret' in the control and status registers
`ifdef NRV_CSR
assign instr_retired = state[FETCH_REGS_bit];
`endif
// And now the state machine
`define show_state(state) `verbose($display(" %s",state))
always @(posedge clk) begin
if(!reset) begin
state <= INITIAL;
addressReg <= 0;
PC <= 0;
end else
case(1'b1)
(state == 0): begin
`show_state("initial");
state <= WAIT_INSTR;
end
state[WAIT_INSTR_bit]: begin
`show_state("wait_instr");
// this state to give enough time to fetch the
// instruction. Used for jumps and taken branches (and
// when fetching the first instruction).
state <= FETCH_INSTR;
end
state[FETCH_INSTR_bit]: begin
`show_state("fetch_instr");
instr <= mem_rdata;
// update instr address so that next instr is fetched during
// decode (and ready if there was no jump or branch)
addressReg <= {PCplus4, 2'b00};
state <= FETCH_REGS;
end
state[USE_PREFETCHED_INSTR_bit]: begin
`show_state("use_prefetched_instr");
// for linear execution flow, the prefetched isntr (nextInstr)
// can be used.
instr <= nextInstr;
// update instr address so that next instr is fetched during
// decode (and ready if there was no jump or branch)
addressReg <= {PCplus4, 2'b00};
// In addition, STORE instructions write to memory here.
// (see NrvStoreToMemory store_to_mem at beginning of file).
state <= FETCH_REGS;
end
state[FETCH_REGS_bit]: begin
`show_state("fetch_regs");
// instr was just updated -> input register ids also
// input registers available at next cycle
state <= EXECUTE;
error_latched <= decoderError;
end
state[EXECUTE_bit]: begin
`show_state("execute");
// input registers are read, aluOut is up to date
// Looked-ahead instr.
nextInstr <= mem_rdata;
// Needed for LOAD,STORE,jump,branch
// (in other cases it will be ignored)
addressReg <= aluOut;
if(error_latched) begin
state <= ERROR;
end else if(isLoad) begin
state <= LOAD;
PC <= PCplus4;
end else begin
(* parallel_case, full_case *)
case(1'b1)
isJump: begin
PC <= aluOut[31:2];
state <= WAIT_INSTR;
end
isBranch: begin
if(predOut) begin
PC <= aluOut[31:2];
state <= WAIT_INSTR;
end else begin
PC <= PCplus4;
state <= USE_PREFETCHED_INSTR;
end
end
default: begin // linear execution flow
PC <= PCplus4;
state <= needWaitALU ? WAIT_ALU_OR_DATA : USE_PREFETCHED_INSTR;
end
endcase
end
end
state[LOAD_bit]: begin
`show_state("load");
// data address (aluOut) was just updated
// data ready at next cycle
// we go to WAIT_ALU_OR_DATA to write back read data
state <= WAIT_ALU_OR_DATA;
end
state[WAIT_ALU_OR_DATA_bit]: begin
`show_state("wait_alu_or_data");
// - If ALU is still busy, continue to wait.
// - register writeback is active
state <= aluBusy ? WAIT_ALU_OR_DATA : USE_PREFETCHED_INSTR;
end
state[ERROR_bit]: begin
`bench($display("ERROR"));
state <= ERROR;
end
default: begin
`bench($display("UNKNOWN STATE"));
state <= ERROR;
end
endcase
end
/*********************************************************************/
`define show_opcode(opcode) `verbose($display("%x: %s",{PC,2'b00},opcode))
`ifdef BENCH
always @(posedge clk) begin
if(state[FETCH_REGS_bit]) begin
case(instr[6:0])
7'b0110111: `show_opcode("LUI");
7'b0010111: `show_opcode("AUIPC");
7'b1101111: `show_opcode("JAL");
7'b1100111: `show_opcode("JALR");
7'b1100011: `show_opcode("BRANCH");
7'b0010011: `show_opcode("ALU reg imm");
7'b0110011: `show_opcode("ALU reg reg");
7'b0000011: `show_opcode("LOAD");
7'b0100011: `show_opcode("STORE");
7'b0001111: `show_opcode("FENCE");
7'b1110011: `show_opcode("SYSTEM");
endcase // case (instr[6:0])
end // if (state[EXECUTE_bit])
end
`endif
endmodule