7.2 Adding New hardware
7.2.1 Hardware Modification
See "generic_tariq folder"
To add another hardware block you need address full decode.
In yacc2.v, "my_reg_file_access" is the decoded
access signal for another hardware block you will use.
Address mapping is described in "define.h"
//Jun.30.2004 blez bgtz bug fix
//Jul.7.2004 int bug fix
//Jul.11.2004 bgezQ,bltzQ
//Apr.2.2005 Change Port Address, change uart interface port
//Apr.3.2005 bgtz bug fix
//Apr.13.2005 Add stratix2 workaround
//Aug.1.2005 Separate Hardware/Ram Blocks
`include "define.h"
`ifdef RTL_SIMULATION
module yacc(clock,Async_Reset,MemoryWData,MWriteFF,data_port_address,
RXD,TXD);
input clock;
input Async_Reset;
output [31:0] MemoryWData;
output [15:0] data_port_address;
output MWriteFF;
input RXD;
output TXD;
`else
module yacc(clock,Async_Reset, RXD,TXD);
input clock;
input Async_Reset;
input RXD;
output TXD;
`endif
wire [31:0] MOUT,IRD1;
wire [31:0] Hard_or_Memory_output;//Aug.1.2005
reg [31:0] Hard_OUT;//Aug.1.2005
wire hardware_output_true;//Aug.1.2005
reg hardware_output_trueD;//Aug.1.2005
wire RegWriteD2;
wire [1:0] A_Right_SELD1;
wire [1:0] RF_inputD2;
wire M_signD1,M_signD2;
wire [1:0] M_access_modeD1,M_access_modeD2;
wire [3:0] ALU_FuncD2;
wire [1:0] Shift_FuncD2;
wire [25:0] IMMD2,IMMD1;
wire [4:0] source_addrD2,target_addrD2;
wire [4:0] source_addrD1,target_addrD1,Shift_amountD2;
wire [4:0] RF_input_addr;
wire [2:0] PC_commandD1;
wire [7:0] uread_port;
wire takenD2;//
wire [25:0] PC;
wire [25:0] DAddress;//
reg [25:0] int_address;//interim
reg [25:0] PCD1,PCD2;
reg [25:0] IRD2;
reg [25:0] DAddrD;
wire [25:0] PCCDD;
wire [31:0] memory_indata;//
wire memory_sign;
wire [1:0] memory_access_mode;
wire [31:0] ea_reg_out;
wire [31:0] regfile_indata,regfile_indata_temp;//
wire reg_compare;
wire beqQ,bneQ,blezQ,bgtzQ;
wire [25:0] IMM;
wire clear_int;
wire jumpQ,branchQQ;
wire [31:0] memory_wdata;
wire [31:0] alu_source,alu_target;
wire [1:0] RRegSelD1;
wire A_Left_SELD1;
wire [1:0] mul_alu_selD2;
wire [3:0] mul_div_funcD2;
//registers
reg sync_reset;
wire [31:0] forward_source_reg,forward_target_reg;
// reg [31:0] MOUT_ff;
reg takenD3;
reg int_req;
reg beqQQ,bneQQ,blezQQ,bgtzQQ;
reg bgezQQ,bltzQQ;
reg MWriteFF;
wire MWriteD2,MWriteD1;
reg [31:0] MemoryWData;
wire NOP_Signal;
wire [7:0] control_state;
wire [15:0] data_port_address=DAddrD;
wire [3:0] mult_func;
wire pause_out;
wire Shift_Amount_selD2;
wire source_zero;//Jun.30.2004
wire int_req_uport;
wire uart_write_req;
wire uart_write_done,uart_write_busy;
wire int_stateD1;
wire bgezQ,bltzQ;
decoder d1(.clock(clock),.sync_reset(sync_reset),.MWriteD1(MWriteD1),
.RegWriteD2(RegWriteD2),.A_Right_SELD1(A_Right_SELD1),.RF_inputD2(RF_inputD2),
.RF_input_addr(RF_input_addr),.M_signD1( M_signD1),.M_signD2(M_signD2),
.M_access_modeD1(M_access_modeD1),.M_access_modeD2(M_access_modeD2),
.ALU_FuncD2(ALU_FuncD2),.Shift_FuncD2(Shift_FuncD2),
.source_addrD1(source_addrD1),.target_addrD1(target_addrD1),.IMMD2(IMMD2),
.source_addrD2(source_addrD2),.target_addrD2(target_addrD2),
.Shift_amountD2(Shift_amountD2),.PC_commandD1(PC_commandD1),.IMMD1(IMMD1),.IRD1(IRD1),.takenD3(takenD3),.takenD2(takenD2),.beqQ(beqQ),.bneQ(bneQ),.blezQ(blezQ),.bgtzQ(bgtzQ),
.DAddress(DAddress),.PC(PC),.memory_indata(memory_indata),.MOUT(MOUT),.IMM(IMM),
.branchQQ(branchQQ),.jumpQ(jumpQ),.int_req(int_req),.clear_int(clear_int),
.int_address(int_address),.A_Left_SELD1(A_Left_SELD1),.RRegSelD1(RRegSelD1),
.MWriteD2(MWriteD2),.NOP_Signal(NOP_Signal),.mul_alu_selD2(mul_alu_selD2),
.mul_div_funcD2(mul_div_funcD2),.pause_out(pause_out),.control_state(control_state),
.Shift_Amount_selD2(Shift_Amount_selD2),
.int_stateD1(int_stateD1),.bgezQ(bgezQ),.bltzQ(bltzQ),.hard_access(hardware_output_true));//Aug.1.2005
pc_module pc1(.clock(clock),.sync_reset(sync_reset),.pc_commandD1(PC_commandD1),.PCC(PC),
.imm(IMM),.ea_reg_source(alu_source),.takenD2(takenD2),.takenD3(takenD3),
.branchQQ(branchQQ),.jumpQ(jumpQ),.NOP_Signal(NOP_Signal),
.control_state(control_state),.IMMD1(IMMD1),.PCCDD(PCCDD));
Pipelined_RegFile pipe(.clock(clock),.sync_reset(sync_reset),
.dest_addrD2(RF_input_addr),.source_addr(IMM[25:21]),.target_addr(IMM[20:16]),
.wren(RegWriteD2),.memory_wdata(memory_wdata),
.A_Right_SELD1(A_Right_SELD1),.A_Left_SELD1(A_Left_SELD1),.PCD1(PCD1),
.IMMD1(IMMD1[15:0]),.ALU_FuncD2(ALU_FuncD2),.Shift_FuncD2(Shift_FuncD2),
.Shift_amountD2(Shift_amountD2),.RRegSelD1(RRegSelD1),.MOUT(Hard_or_Memory_output),//Aug.1.2005
.RF_inputD2(RF_inputD2),.alu_source(alu_source),.alu_target(alu_target),
.MWriteD2(MWriteD2),.MWriteD1(MWriteD1),.mul_alu_selD2(mul_alu_selD2),
.mul_div_funcD2(mul_div_funcD2),.pause_out(pause_out),
.Shift_Amount_selD2(Shift_Amount_selD2),.int_stateD1(int_stateD1),.PCCDD(PCCDD));
//sync_reset
`ifdef Stratix2
always @(posedge clock ) begin//Workaround for stratix2
sync_reset <=!Async_Reset;
end
`else
always @(posedge clock , negedge Async_Reset) begin
if (!Async_Reset) sync_reset <=1'b1;
else sync_reset <=!Async_Reset;
end
`endif
//PCD1,PCD2
always @(posedge clock) begin
PCD1 <=PC+4;
end
always @(posedge clock) begin
PCD2 <=PCD1;
end
//
always @(posedge clock) begin
IRD2 <=IRD1;
end
always @(posedge clock) begin
if (sync_reset) MWriteFF<=1'b0;
else MWriteFF <=MWriteD2;
end
assign memory_access_mode=M_access_modeD1;
assign memory_sign=M_signD1;
//Apr.14.2005 assign DAddress=alu_source[25:0]+{ {6{IRD2[15]}},IRD2[15:0]};
assign DAddress=alu_source[25:0]+{ {10{IRD2[15]}},IRD2[15:0]};
//
always @(posedge clock) begin
DAddrD <=DAddress;
end
//
always @(posedge clock) begin
MemoryWData <=memory_wdata;
end
assign memory_indata=memory_wdata;
assign reg_compare=( alu_source==alu_target);
always @(posedge clock) begin
if (!NOP_Signal) begin//Jun.29.2004
beqQQ<=beqQ;
bneQQ<=bneQ;
bgtzQQ<=bgtzQ;
blezQQ<=blezQ;
bgezQQ<=bgezQ;//Jul.11.2004
bltzQQ<=bltzQ;//Jul.11.2004
end
end
always @( beqQQ ,bneQQ,bgtzQQ,blezQQ,bgezQQ,bltzQQ,reg_compare,alu_source) begin//Jul.11.2004
takenD3= ( beqQQ && reg_compare) ||
( bneQQ && !reg_compare) ||
( bgtzQQ && !alu_source[31] && !reg_compare) || //Apr.3.2005 bug fix $s >0 Jun.30.2004
( blezQQ && (alu_source[31] || reg_compare )) ||
( bgezQQ && (!alu_source[31] || reg_compare )) || //Jul.11.2004
( bltzQQ && (alu_source[31] )); //Jul.11.2004//$s <0=Jun.30.2004
end
//Hardware Blocks Aug.1.2005
reg [31:0] my_reg_file[4:1024-1];//Your Hardware Register File difinition
reg hardware_output_trueDD;
uart_read uread( .sync_reset(sync_reset),.clk(clock), .rxd(RXD),
.buffer_reg(uread_port),.int_req(int_req_uport));
uart_write uwite( .sync_reset(sync_reset), .clk(clock), .txd(TXD),.data_in(MemoryWData[7:0]) ,
.write_request(uart_write_req),.write_done(uart_write_done),.write_busy(uart_write_busy));
assign hardware_output_true=`HARD_BLOCK_START_ADDRESS<=DAddress
&& `HARD_BLOCK_END_ADDRESS>=DAddress;
wire my_reg_file_access=`REG_PORT_START_ADDRESS<=DAddrD
&& `REG_PORT_END_ADDRESS>=DAddrD;
wire uread_access=`UART_PORT_ADDRESS==DAddrD;
always @(posedge clock) begin
if (sync_reset) hardware_output_trueD<=0;
else hardware_output_trueD<=hardware_output_true;
end
always @(posedge clock) begin
if (sync_reset) hardware_output_trueDD<=0;
else hardware_output_trueDD<=hardware_output_trueD;
end
always @(posedge clock) begin
if (sync_reset) Hard_OUT<=0;
else if(uread_access)
Hard_OUT <={23'h00_0000,uart_write_busy,uread_port};
else if (my_reg_file_access) begin
Hard_OUT <=my_reg_file[DAddrD[12:2]];
end
end
assign Hard_or_Memory_output=hardware_output_trueDD ? Hard_OUT: MOUT;//Hardware/RAM multiplexer
`ifdef RAM16K
assign uart_write_req= DAddrD==`UART_PORT_ADDRESS && MWriteFF ;//`UART_WRITE_PORT_ADDRESS ;
always @ (posedge clock) begin
if (sync_reset) int_address<=0;
else if (DAddrD==`INTERUPPT_ADDRESS & MWriteFF) int_address<=MemoryWData;
else if (my_reg_file_access & MWriteFF) my_reg_file[DAddrD[12:2]]<=MemoryWData;
end
`endif
//state machine
//latch with one shot pulse
//clear by clear_int
always @(posedge clock) begin
if (sync_reset) int_req <=1'b0;
else if (clear_int) int_req <=1'b0;// assume one shot(1clk) pulse
else if ( int_req_uport) int_req<=1'b1;//
end
endmodule
7.2.2 C-Test Bench
Let's see "\yacc\bench\c_src\bubble\bubble_tariq.c"
//Tariq Version of bubble sort Demo
//Aug.27.2005 Tak.Sugawara
//
//1) Calculate bubble sort
//2) Copy the calculation result to another hardware block
//3) Print the calculation result to Debug Port
//4) Sent the calculation result to UART
//5) Halt CPU
#define print_port 0x8000
#define print_char_port 0x8001
#define print_int_port 0x8002
#define print_long_port 0x8004
#define uart_port 0x800c //for 16KRAM
#define uart_wport uart_port
#define uart_rport uart_port
#define int_set_address 0x08008 //for 16KRAM
#define REG_PORT_START_ADDRESS 0x8010 //User Register File
#define ARRAY_SIZE 20
int iarray [ARRAY_SIZE] =
{23,55,17,12,12,32,0,948,48,383,832,21,459,58,21,56,38,980,75,68};
int output [ARRAY_SIZE];
void print_long(unsigned val)//Verilog Test Bench Use
{
*(volatile unsigned *)print_long_port=val;
}
void print_uart(unsigned char* ptr)//
{
unsigned int uport;
#define WRITE_BUSY 0x0100
while (*ptr) {
do {
uport=*(volatile unsigned*) uart_port;
} while (uport & WRITE_BUSY);
*(volatile unsigned char*)uart_wport=*(ptr++);
}
//*(volatile unsigned char*)uart_wport=0x00;//Write Done
}
void putc_uart(unsigned char c)//
{
unsigned int uport;
do {
uport=*(volatile unsigned*) uart_port;
} while (uport & WRITE_BUSY);
*(volatile unsigned char*)uart_wport=c;
}
unsigned char read_uart()//Verilog Test Bench Use
{
unsigned uport;
uport= *(volatile unsigned *)uart_rport;
return uport;
}
void print(unsigned char* ptr)//Verilog Test Bench Use
{
while (*ptr) {
*(volatile unsigned char*)print_port=*(ptr++);
}
*(volatile unsigned char*)print_port=0x00;//Write Done
}
void print_char(unsigned char val)//Little Endian write out 16bit number
{
*(volatile unsigned char*)print_port=(unsigned char)val ;
}
char *itoa(unsigned long num)
{
static char buf[12];
int i;
buf[10]=0;
for(i=9;i>=0;--i) {
buf[i]=(char)((num%10)+'0');
num/=10;
}
return buf;
}
void main()
{
int x, y,i,holder;
unsigned *ptr;
//Bubble Sort
print("Bubble Sort Check Program :\n");
print("Output Address=");
print(itoa(output));
print("Physical memory Address is 2bit >> ,so pma=");
print(itoa((unsigned)output >>2));
for(x = 0; x < ARRAY_SIZE; x++){
for(y = 0; y < ARRAY_SIZE-1; y++) {
if(iarray[y] > iarray[y+1])
{
holder = iarray[y+1];
iarray[y+1] = iarray[y];
iarray[y] = holder;
}
}
}
print("\nHere are output memory results..\n");
for(i = 0; i< ARRAY_SIZE; i++)
output[i] = iarray[i];
//Check Result by debugger
print("Sort Results :\n");
for(i = 0; i< ARRAY_SIZE; i++){
print(itoa(output[i]));
print(" ");
}
print("\nDone\n");
print("Copy the result to Another H/W Block to ");
ptr=REG_PORT_START_ADDRESS;
print_long((unsigned)(ptr)); print("\n");
for (i=0;i<(ARRAY_SIZE);i=i+1) *(ptr++)= output[i];
print("\nCopy Done.\n");
print("Dumping copied result on another H/W Block.\n");
ptr=REG_PORT_START_ADDRESS;
for (i=0;i< (ARRAY_SIZE);i++) print_long(*(ptr++));
print("\nOutput to uart port. It takes long time:\n");
ptr=REG_PORT_START_ADDRESS;
for(i = 0; i< ARRAY_SIZE; i++){
print_uart(itoa(*(ptr++)));
print_uart("\n");//Flash buffer of RTL test bench.
}
print_uart("\nDone\n");
print("Uart output Done. Though CPU Enters perpetual loop,");
print("FIFO Logic is now doing popback to UART. So please be patient for a few minutes.\n");
while (1){//Perpetual Loop
} ;
}
7.2.3 RTL-Simulation
C-compile by"yacc\bench\c_src\bubble\compile_tariq.bat"
Run Veritak Project "yacc\bench\verilog\generic_ram_rtl_tariq.vtakprj"
Results are;
Bubble Sort Check Program :
Output Address=0000001648Physical memory Address is 2bit >> ,so pma=0000000412
Here are output memory results..
Sort Results :
0000000000 0000000012 0000000012 0000000017 0000000021 0000000021 0000000023 0000000032 0000000038 0000000048 0000000055 0000000056 0000000058 0000000068 0000000075 0000000383 0000000459 0000000832 0000000948 0000000980
Done
Copy the result to Another H/W Block to 00008010
Copy Done.
Dumping copied result on another H/W Block.
00000000 0000000c 0000000c 00000011 00000015 00000015 00000017 00000020 00000026 00000030 00000037 00000038 0000003a 00000044 0000004b 0000017f 000001cb 00000340 000003b4 000003d4
Output to uart port. It takes long time:
Uart output Done. Though CPU Enters perpetual loop,FIFO Logic is now doing popback to UART. So please be patient for a few minutes.
0000000000 : time= 1369110000
0000000012 : time= 2372090000
0000000012 : time= 3375070000
0000000017 : time= 4378050000
0000000021 : time= 5381030000
0000000021 : time= 6384010000
0000000023 : time= 7386990000
0000000032 : time= 8389970000
0000000038 : time= 9392950000
0000000048 : time= 10395930000
0000000055 : time= 11398910000
0000000056 : time= 12401890000
0000000058 : time= 13404870000
0000000068 : time= 14407850000
0000000075 : time= 15410830000
0000000383 : time= 16413810000
0000000459 : time= 17416790000
0000000832 : time= 18419770000
0000000948 : time= 19422750000
0000000980 : time= 20425730000
: time= 20516910000
Done : time= 20972810000