7.2 Adding New hardware

7.2.1 Hardware Modification

See "generic_tariq folder"

To add another hardware block you need address full decode.
In yacc2.v, "my_reg_file_access" is the decoded access signal for another hardware block you will use.
Address mapping is described in "define.h"

//Jun.30.2004 blez bgtz bug fix
//Jul.7.2004 int bug fix
//Jul.11.2004 bgezQ,bltzQ
//Apr.2.2005 Change Port Address, change uart interface port
//Apr.3.2005 bgtz bug fix
//Apr.13.2005 Add stratix2 workaround
//Aug.1.2005 Separate Hardware/Ram Blocks

`include "define.h"
`ifdef RTL_SIMULATION
module yacc(clock,Async_Reset,MemoryWData,MWriteFF,data_port_address,
           RXD,TXD);
        input clock;
        input Async_Reset;
        output [31:0] MemoryWData;
        output [15:0] data_port_address;




        output MWriteFF;
        input RXD;
        output TXD;
`else

module yacc(clock,Async_Reset,     RXD,TXD);
        input clock;
        input Async_Reset;
        input RXD;
        output TXD;

`endif

        wire [31:0] MOUT,IRD1;
        wire [31:0] Hard_or_Memory_output;//Aug.1.2005
        reg [31:0] Hard_OUT;//Aug.1.2005
        wire hardware_output_true;//Aug.1.2005
        reg hardware_output_trueD;//Aug.1.2005  

        wire RegWriteD2;
        wire [1:0] A_Right_SELD1;
        wire [1:0] RF_inputD2;
        wire M_signD1,M_signD2;
        wire [1:0] M_access_modeD1,M_access_modeD2;
        wire [3:0] ALU_FuncD2;
        wire [1:0] Shift_FuncD2;
        wire [25:0] IMMD2,IMMD1;
        wire [4:0] source_addrD2,target_addrD2;
        wire [4:0] source_addrD1,target_addrD1,Shift_amountD2;
        wire [4:0] RF_input_addr;
        wire [2:0] PC_commandD1;
        wire [7:0] uread_port;
        wire takenD2;//
        

        wire [25:0] PC;
        wire [25:0] DAddress;//
        reg [25:0] int_address;//interim
        reg [25:0] PCD1,PCD2;
        reg [25:0] IRD2;
        reg [25:0] DAddrD;
        wire [25:0] PCCDD;

        wire [31:0] memory_indata;//
        wire memory_sign;
        wire [1:0] memory_access_mode;

        wire [31:0] ea_reg_out;
        wire [31:0] regfile_indata,regfile_indata_temp;//
        wire reg_compare;
        wire beqQ,bneQ,blezQ,bgtzQ;
        wire [25:0] IMM;
        wire clear_int;
        wire jumpQ,branchQQ;
        wire [31:0] memory_wdata;
        wire [31:0] alu_source,alu_target;
        wire [1:0] RRegSelD1;
        wire A_Left_SELD1;
        wire [1:0] mul_alu_selD2;
        wire [3:0] mul_div_funcD2;
//registers
        reg sync_reset;
        wire [31:0] forward_source_reg,forward_target_reg;
//      reg [31:0] MOUT_ff;
        reg takenD3;
        reg int_req;
        

        reg beqQQ,bneQQ,blezQQ,bgtzQQ;
        reg bgezQQ,bltzQQ;
        reg MWriteFF;
        wire MWriteD2,MWriteD1;
        reg [31:0] MemoryWData;
        wire NOP_Signal;
        
        wire [7:0] control_state;
        wire [15:0] data_port_address=DAddrD;
        wire [3:0] mult_func;
        wire pause_out;
        wire Shift_Amount_selD2;
        wire source_zero;//Jun.30.2004
        wire int_req_uport;
        wire uart_write_req;
        wire uart_write_done,uart_write_busy;
        wire int_stateD1;
        wire bgezQ,bltzQ;

decoder d1(.clock(clock),.sync_reset(sync_reset),.MWriteD1(MWriteD1),
            .RegWriteD2(RegWriteD2),.A_Right_SELD1(A_Right_SELD1),.RF_inputD2(RF_inputD2),
            .RF_input_addr(RF_input_addr),.M_signD1( M_signD1),.M_signD2(M_signD2),
            .M_access_modeD1(M_access_modeD1),.M_access_modeD2(M_access_modeD2),
            .ALU_FuncD2(ALU_FuncD2),.Shift_FuncD2(Shift_FuncD2),
            .source_addrD1(source_addrD1),.target_addrD1(target_addrD1),.IMMD2(IMMD2),
            .source_addrD2(source_addrD2),.target_addrD2(target_addrD2),
            .Shift_amountD2(Shift_amountD2),.PC_commandD1(PC_commandD1),.IMMD1(IMMD1),.IRD1(IRD1),.takenD3(takenD3),.takenD2(takenD2),.beqQ(beqQ),.bneQ(bneQ),.blezQ(blezQ),.bgtzQ(bgtzQ),
            .DAddress(DAddress),.PC(PC),.memory_indata(memory_indata),.MOUT(MOUT),.IMM(IMM),
            .branchQQ(branchQQ),.jumpQ(jumpQ),.int_req(int_req),.clear_int(clear_int),
            .int_address(int_address),.A_Left_SELD1(A_Left_SELD1),.RRegSelD1(RRegSelD1),
            .MWriteD2(MWriteD2),.NOP_Signal(NOP_Signal),.mul_alu_selD2(mul_alu_selD2),
            .mul_div_funcD2(mul_div_funcD2),.pause_out(pause_out),.control_state(control_state),
            .Shift_Amount_selD2(Shift_Amount_selD2),
           .int_stateD1(int_stateD1),.bgezQ(bgezQ),.bltzQ(bltzQ),.hard_access(hardware_output_true));//Aug.1.2005



pc_module pc1(.clock(clock),.sync_reset(sync_reset),.pc_commandD1(PC_commandD1),.PCC(PC),
               .imm(IMM),.ea_reg_source(alu_source),.takenD2(takenD2),.takenD3(takenD3),
               .branchQQ(branchQQ),.jumpQ(jumpQ),.NOP_Signal(NOP_Signal),
               .control_state(control_state),.IMMD1(IMMD1),.PCCDD(PCCDD));



Pipelined_RegFile pipe(.clock(clock),.sync_reset(sync_reset),
        .dest_addrD2(RF_input_addr),.source_addr(IMM[25:21]),.target_addr(IMM[20:16]),
        .wren(RegWriteD2),.memory_wdata(memory_wdata),
        .A_Right_SELD1(A_Right_SELD1),.A_Left_SELD1(A_Left_SELD1),.PCD1(PCD1),
        .IMMD1(IMMD1[15:0]),.ALU_FuncD2(ALU_FuncD2),.Shift_FuncD2(Shift_FuncD2),
        .Shift_amountD2(Shift_amountD2),.RRegSelD1(RRegSelD1),.MOUT(Hard_or_Memory_output),//Aug.1.2005
        .RF_inputD2(RF_inputD2),.alu_source(alu_source),.alu_target(alu_target),
        .MWriteD2(MWriteD2),.MWriteD1(MWriteD1),.mul_alu_selD2(mul_alu_selD2),
        .mul_div_funcD2(mul_div_funcD2),.pause_out(pause_out),
        .Shift_Amount_selD2(Shift_Amount_selD2),.int_stateD1(int_stateD1),.PCCDD(PCCDD));

//sync_reset
`ifdef Stratix2
        always @(posedge clock ) begin//Workaround for stratix2 
                  sync_reset <=!Async_Reset;
        end
`else
        always @(posedge clock , negedge Async_Reset) begin
                if (!Async_Reset) sync_reset <=1'b1;
                else  sync_reset <=!Async_Reset;
        end
`endif

//PCD1,PCD2
        always @(posedge clock) begin
                PCD1 <=PC+4;
        end

        always @(posedge clock) begin
                PCD2 <=PCD1;
        end

//
        always @(posedge clock) begin
        IRD2 <=IRD1;
        end
        
        always @(posedge clock) begin
                if (sync_reset) MWriteFF<=1'b0;
                else    MWriteFF <=MWriteD2;
        end

        assign memory_access_mode=M_access_modeD1;
        assign memory_sign=M_signD1;
        
//Apr.14.2005   assign DAddress=alu_source[25:0]+{ {6{IRD2[15]}},IRD2[15:0]};
        assign DAddress=alu_source[25:0]+{ {10{IRD2[15]}},IRD2[15:0]};

//
        always @(posedge clock) begin
        DAddrD <=DAddress;
        end
//
always @(posedge clock) begin
        MemoryWData <=memory_wdata;
        end

        assign memory_indata=memory_wdata;


     

        assign reg_compare=( alu_source==alu_target);
        


        always @(posedge clock) begin
           if (!NOP_Signal) begin//Jun.29.2004
                              beqQQ<=beqQ;
                        bneQQ<=bneQ;
                        bgtzQQ<=bgtzQ;
                        blezQQ<=blezQ;
                              bgezQQ<=bgezQ;//Jul.11.2004
                        bltzQQ<=bltzQ;//Jul.11.2004
                end
        end

        always @( beqQQ ,bneQQ,bgtzQQ,blezQQ,bgezQQ,bltzQQ,reg_compare,alu_source) begin//Jul.11.2004
                takenD3=  ( beqQQ   && reg_compare) ||
                          ( bneQQ   && !reg_compare) ||
                          ( bgtzQQ  && !alu_source[31] && !reg_compare) || //Apr.3.2005 bug fix $s >0 Jun.30.2004
                          ( blezQQ  && (alu_source[31]  || reg_compare )) ||
                          ( bgezQQ  && (!alu_source[31] || reg_compare )) || //Jul.11.2004 
                          ( bltzQQ  && (alu_source[31]  )); //Jul.11.2004//$s <0=Jun.30.2004
        end
         
//Hardware Blocks Aug.1.2005


        
        
        reg [31:0] my_reg_file[4:1024-1];//Your Hardware Register File difinition
        reg hardware_output_trueDD;

        uart_read  uread( .sync_reset(sync_reset),.clk(clock), .rxd(RXD),
        .buffer_reg(uread_port),.int_req(int_req_uport));

        uart_write uwite( .sync_reset(sync_reset), .clk(clock), .txd(TXD),.data_in(MemoryWData[7:0]) ,
        .write_request(uart_write_req),.write_done(uart_write_done),.write_busy(uart_write_busy));

        assign hardware_output_true=`HARD_BLOCK_START_ADDRESS<=DAddress
                                                         && `HARD_BLOCK_END_ADDRESS>=DAddress;
        wire my_reg_file_access=`REG_PORT_START_ADDRESS<=DAddrD
                                                         && `REG_PORT_END_ADDRESS>=DAddrD;

        wire uread_access=`UART_PORT_ADDRESS==DAddrD;

        always @(posedge clock) begin
                if (sync_reset) hardware_output_trueD<=0;
                else hardware_output_trueD<=hardware_output_true;
        end

        always @(posedge clock) begin
                if (sync_reset) hardware_output_trueDD<=0;
                else hardware_output_trueDD<=hardware_output_trueD;
        end


        always @(posedge clock) begin
                if (sync_reset) Hard_OUT<=0;
                else if(uread_access) 
                                Hard_OUT <={23'h00_0000,uart_write_busy,uread_port};
                else if (my_reg_file_access) begin
                                Hard_OUT <=my_reg_file[DAddrD[12:2]];
                end             
        end
        assign Hard_or_Memory_output=hardware_output_trueDD ? Hard_OUT: MOUT;//Hardware/RAM multiplexer

`ifdef RAM16K
        assign uart_write_req= DAddrD==`UART_PORT_ADDRESS && MWriteFF  ;//`UART_WRITE_PORT_ADDRESS ;
                always @ (posedge clock) begin
                        if (sync_reset) int_address<=0;
                        else if (DAddrD==`INTERUPPT_ADDRESS & MWriteFF) int_address<=MemoryWData;
                        else if (my_reg_file_access & MWriteFF) my_reg_file[DAddrD[12:2]]<=MemoryWData;
        end
`endif



//state machine
//latch with one shot pulse 
//clear by clear_int
        always @(posedge clock) begin
                if (sync_reset) int_req <=1'b0;
                else if (clear_int) int_req <=1'b0;// assume one shot(1clk) pulse
                else if ( int_req_uport) int_req<=1'b1;//                       
        end

endmodule


7.2.2 C-Test Bench

Let's see "\yacc\bench\c_src\bubble\bubble_tariq.c"

//Tariq Version of bubble sort Demo
//Aug.27.2005 Tak.Sugawara
//
//1) Calculate bubble sort
//2) Copy the calculation result to another hardware block 
//3) Print the calculation result to Debug Port
//4) Sent  the calculation result to UART
//5) Halt CPU




#define print_port 0x8000
#define print_char_port 0x8001
#define print_int_port 0x8002
#define print_long_port 0x8004





#define uart_port               0x800c //for 16KRAM
#define uart_wport uart_port
#define uart_rport uart_port
#define int_set_address 0x08008 //for 16KRAM
#define REG_PORT_START_ADDRESS  0x8010 //User Register File




#define ARRAY_SIZE 20
int iarray [ARRAY_SIZE] =  
{23,55,17,12,12,32,0,948,48,383,832,21,459,58,21,56,38,980,75,68};
int output [ARRAY_SIZE];
 
void print_long(unsigned val)//Verilog Test Bench Use 
{

        
        
                *(volatile unsigned *)print_long_port=val;
        


}

void print_uart(unsigned char* ptr)// 
{
        unsigned int uport;
        #define WRITE_BUSY 0x0100


        while (*ptr) {
        
                do {
                  uport=*(volatile unsigned*)   uart_port;
                } while (uport & WRITE_BUSY);
                *(volatile unsigned char*)uart_wport=*(ptr++);
        }
        //*(volatile unsigned char*)uart_wport=0x00;//Write Done
}       


void putc_uart(unsigned char c)// 
{
        unsigned int uport;
        

        do {
                  uport=*(volatile unsigned*)   uart_port;
        } while (uport & WRITE_BUSY);
        *(volatile unsigned char*)uart_wport=c;
        
}       

unsigned char read_uart()//Verilog Test Bench Use 
{
                unsigned uport;
                uport= *(volatile unsigned *)uart_rport;
                return uport;
}       

void print(unsigned char* ptr)//Verilog Test Bench Use 
{

        while (*ptr) {
        
                *(volatile unsigned char*)print_port=*(ptr++);
        }

        *(volatile unsigned char*)print_port=0x00;//Write Done

}
void print_char(unsigned char val)//Little Endian write out 16bit number 
{
        *(volatile unsigned char*)print_port=(unsigned char)val ;

}





char *itoa(unsigned long num)
{
   static char buf[12];
   int i;
   buf[10]=0;
   for(i=9;i>=0;--i) {
      buf[i]=(char)((num%10)+'0');
      num/=10;
   }
   return buf;
}

void main()
{
        
 int x, y,i,holder;
 unsigned *ptr;

//Bubble Sort
  print("Bubble Sort Check Program :\n");
  print("Output Address=");
  print(itoa(output));
  print("Physical memory Address is 2bit >> ,so pma=");
  print(itoa((unsigned)output >>2));
  
  for(x = 0; x < ARRAY_SIZE; x++){
        for(y = 0; y < ARRAY_SIZE-1; y++) {
                if(iarray[y] > iarray[y+1])
                {
                         holder = iarray[y+1];
                         iarray[y+1] = iarray[y];
                         iarray[y] = holder;
                }
                }
    
  }
  
  print("\nHere are output memory results..\n");
  for(i = 0; i< ARRAY_SIZE; i++)
                output[i] = iarray[i];
 //Check Result by debugger
  print("Sort Results :\n");
  for(i = 0; i< ARRAY_SIZE; i++){
            print(itoa(output[i]));
        print(" ");             
  }
  print("\nDone\n");    
        
        
  print("Copy the result to Another H/W Block to ");
  ptr=REG_PORT_START_ADDRESS;
  print_long((unsigned)(ptr));  print("\n");
  for (i=0;i<(ARRAY_SIZE);i=i+1) *(ptr++)= output[i];
  print("\nCopy Done.\n");      

  print("Dumping copied result on another H/W Block.\n");
  ptr=REG_PORT_START_ADDRESS;
  for (i=0;i< (ARRAY_SIZE);i++) print_long(*(ptr++));
                
        
  print("\nOutput to uart port. It takes long time:\n");
  ptr=REG_PORT_START_ADDRESS;
  for(i = 0; i< ARRAY_SIZE; i++){
            print_uart(itoa(*(ptr++)));
        print_uart("\n");//Flash buffer of RTL test bench.              
  }
  print_uart("\nDone\n");       
  print("Uart output Done. Though CPU Enters perpetual loop,");
  print("FIFO Logic is now doing popback to UART. So please be patient for a few minutes.\n");
  
  while (1){//Perpetual Loop
        
  }     ;       
}

7.2.3 RTL-Simulation
C-compile by"yacc\bench\c_src\bubble\compile_tariq.bat"
Run Veritak Project "yacc\bench\verilog\generic_ram_rtl_tariq.vtakprj"

Results are;

Bubble Sort Check Program :
Output Address=0000001648Physical memory Address is 2bit >> ,so pma=0000000412
Here are output memory results..
Sort Results :
0000000000 0000000012 0000000012 0000000017 0000000021 0000000021 0000000023 0000000032 0000000038 0000000048 0000000055 0000000056 0000000058 0000000068 0000000075 0000000383 0000000459 0000000832 0000000948 0000000980 
Done
Copy the result to Another H/W Block to 00008010 

Copy Done.
Dumping copied result on another H/W Block.
00000000 0000000c 0000000c 00000011 00000015 00000015 00000017 00000020 00000026 00000030 00000037 00000038 0000003a 00000044 0000004b 0000017f 000001cb 00000340 000003b4 000003d4 
Output to uart port. It takes long time:
Uart output Done. Though CPU Enters perpetual loop,FIFO Logic is now doing popback to UART. So please be patient for a few minutes.
0000000000     : time=          1369110000
0000000012     : time=          2372090000
0000000012     : time=          3375070000
0000000017     : time=          4378050000
0000000021     : time=          5381030000
0000000021     : time=          6384010000
0000000023     : time=          7386990000
0000000032     : time=          8389970000
0000000038     : time=          9392950000
0000000048     : time=         10395930000
0000000055     : time=         11398910000
0000000056     : time=         12401890000
0000000058     : time=         13404870000
0000000068     : time=         14407850000
0000000075     : time=         15410830000
0000000383     : time=         16413810000
0000000459     : time=         17416790000
0000000832     : time=         18419770000
0000000948     : time=         19422750000
0000000980     : time=         20425730000
     : time=         20516910000
Done     : time=         20972810000