From 21617bcfc635d08ee1563cb0870327dfbc7f758b Mon Sep 17 00:00:00 2001 From: Nitsirks Date: Thu, 21 Nov 2024 08:33:03 -0800 Subject: [PATCH 01/10] added masked pwm intt opcode using pwm intt masked opcode in sequencer --- src/mldsa_top/rtl/mldsa_ctrl.sv | 11 +- src/mldsa_top/rtl/mldsa_ctrl_pkg.sv | 17 ++- src/mldsa_top/rtl/mldsa_seq_prim.sv | 46 +++---- src/mldsa_top/rtl/mldsa_seq_sec.sv | 201 ++++++++++++---------------- src/mldsa_top/rtl/mldsa_top.sv | 6 +- 5 files changed, 138 insertions(+), 143 deletions(-) diff --git a/src/mldsa_top/rtl/mldsa_ctrl.sv b/src/mldsa_top/rtl/mldsa_ctrl.sv index a85d1d2..f1bb33c 100644 --- a/src/mldsa_top/rtl/mldsa_ctrl.sv +++ b/src/mldsa_top/rtl/mldsa_ctrl.sv @@ -305,6 +305,8 @@ always_comb mldsa_privkey_lock = '0; //shared aux functions logic [1:0] normcheck_enable; + + logic [1:0][MLDSA_MEM_ADDR_WIDTH-1:0] ntt_temp_address; //Interrupts logic mldsa_status_done_d, mldsa_status_done_p; @@ -1159,9 +1161,12 @@ always_comb mldsa_privkey_lock = '0; always_comb sampler_src_offset = {4'b0, msg_cnt}; //fixme + //passing a bit on the immediate field to mux between temp address locations + always_comb ntt_temp_address[0] = prim_instr.imm[0] ? MLDSA_TEMP3_BASE : MLDSA_TEMP0_BASE; + //FIXME one interface here? always_comb ntt_mem_base_addr_o[0] = '{src_base_addr:prim_instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0], - interim_base_addr:prim_instr.operand2[MLDSA_MEM_ADDR_WIDTH-1:0], + interim_base_addr:ntt_temp_address[0], dest_base_addr:prim_instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]}; always_comb pwo_mem_base_addr_o[0] = '{pw_base_addr_b:prim_instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0], //FIXME PWO src @@ -1456,10 +1461,12 @@ mldsa_seq_prim mldsa_seq_prim_inst ntt_shuffling_en_o[1] = sec_instr.opcode.shuffling_en; end end + //passing a bit on the immediate field to mux between temp address locations + always_comb ntt_temp_address[1] = sec_instr.imm[0] ? MLDSA_TEMP3_BASE : MLDSA_TEMP0_BASE; //FIXME one interface here? always_comb ntt_mem_base_addr_o[1] = '{src_base_addr:sec_instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0], - interim_base_addr:sec_instr.operand2[MLDSA_MEM_ADDR_WIDTH-1:0], + interim_base_addr:ntt_temp_address[1], dest_base_addr:sec_instr.operand3[MLDSA_MEM_ADDR_WIDTH-1:0]}; always_comb pwo_mem_base_addr_o[1] = '{pw_base_addr_b:sec_instr.operand1[MLDSA_MEM_ADDR_WIDTH-1:0], //FIXME PWO src diff --git a/src/mldsa_top/rtl/mldsa_ctrl_pkg.sv b/src/mldsa_top/rtl/mldsa_ctrl_pkg.sv index 6eaa399..b61a643 100644 --- a/src/mldsa_top/rtl/mldsa_ctrl_pkg.sv +++ b/src/mldsa_top/rtl/mldsa_ctrl_pkg.sv @@ -141,7 +141,8 @@ package mldsa_ctrl_pkg; MLDSA_PWM_SMPL, MLDSA_PWM_ACCUM_SMPL, MLDSA_PWA, - MLDSA_PWS + MLDSA_PWS, + MLDSA_PWM_INTT } mldsa_ntt_mode_e; typedef enum logic[3:0] { @@ -201,6 +202,12 @@ package mldsa_ctrl_pkg; localparam mldsa_opcode_t MLDSA_UOP_PWM = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWM, masking_en:1'b0, shuffling_en:1'b1}; localparam mldsa_opcode_t MLDSA_UOP_PWA = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWA, masking_en:1'b0, shuffling_en:1'b1}; localparam mldsa_opcode_t MLDSA_UOP_PWS = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWS, masking_en:1'b0, shuffling_en:1'b1}; + localparam mldsa_opcode_t MLDSA_UOP_MASKED_NTT = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_NTT, masking_en:1'b1, shuffling_en:1'b1}; + localparam mldsa_opcode_t MLDSA_UOP_MASKED_INTT = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_INTT, masking_en:1'b1, shuffling_en:1'b1}; + localparam mldsa_opcode_t MLDSA_UOP_MASKED_PWM = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWM, masking_en:1'b1, shuffling_en:1'b1}; + localparam mldsa_opcode_t MLDSA_UOP_MASKED_PWA = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWA, masking_en:1'b1, shuffling_en:1'b1}; + localparam mldsa_opcode_t MLDSA_UOP_MASKED_PWS = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWS, masking_en:1'b1, shuffling_en:1'b1}; + localparam mldsa_opcode_t MLDSA_UOP_MASKED_PWM_INTT = '{keccak_en: 1'b0, sampler_en:1'b0, ntt_en:1'b1, aux_en: 1'b0, mode:MLDSA_PWM_INTT,masking_en:1'b1, shuffling_en:1'b1}; //Load Keccak with data but don't run it yet localparam mldsa_opcode_t MLDSA_UOP_LD_SHAKE256 = '{keccak_en: 1'b1, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SHAKE256, masking_en:1'b0, shuffling_en:1'b0}; localparam mldsa_opcode_t MLDSA_UOP_LD_SHAKE128 = '{keccak_en: 1'b1, sampler_en:1'b0, ntt_en:1'b0, aux_en: 1'b0, mode:MLDSA_SHAKE128, masking_en:1'b0, shuffling_en:1'b0}; @@ -434,10 +441,10 @@ package mldsa_ctrl_pkg; localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_CHECK_C_VLD = MLDSA_SIGN_INIT_S + 24; localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_VALID_S = MLDSA_SIGN_CHECK_C_VLD + 1; localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_CHECK_Y_VLD = MLDSA_SIGN_VALID_S + 1; - localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_CLEAR_Y = MLDSA_SIGN_VALID_S + 37; - localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_CHECK_W0_VLD = MLDSA_SIGN_VALID_S + 54; - localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_CLEAR_W0 = MLDSA_SIGN_VALID_S + 103; - localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_GEN_S = MLDSA_SIGN_VALID_S + 105; + localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_CLEAR_Y = MLDSA_SIGN_VALID_S + 30; + localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_CHECK_W0_VLD = MLDSA_SIGN_VALID_S + 39; + localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_CLEAR_W0 = MLDSA_SIGN_VALID_S + 80; + localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_GEN_S = MLDSA_SIGN_VALID_S + 82; localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_CLEAR_C = MLDSA_SIGN_GEN_S + 1; localparam [MLDSA_PROG_ADDR_W-1 : 0] MLDSA_SIGN_GEN_E = MLDSA_SIGN_GEN_S + 2; diff --git a/src/mldsa_top/rtl/mldsa_seq_prim.sv b/src/mldsa_top/rtl/mldsa_seq_prim.sv index 63ba99d..ddd3d2d 100644 --- a/src/mldsa_top/rtl/mldsa_seq_prim.sv +++ b/src/mldsa_top/rtl/mldsa_seq_prim.sv @@ -93,7 +93,7 @@ module mldsa_seq_prim MLDSA_KG_S+ 31 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0005, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_5_NTT_BASE, operand3:MLDSA_AS0_BASE}; MLDSA_KG_S+ 32 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0006, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_6_NTT_BASE, operand3:MLDSA_AS0_BASE}; //NTT−1(Aˆ ◦NTT(s1)) - MLDSA_KG_S+ 33 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d00, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; + MLDSA_KG_S+ 33 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d01, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; //t ←NTT−1(Aˆ ◦NTT(s1))+s2 MLDSA_KG_S+ 34 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'d00, length:'d00, operand1:MLDSA_AS0_INTT_BASE, operand2:MLDSA_S2_0_BASE, operand3:MLDSA_T0_BASE}; //ExpandA(ρ) AND Aˆ NTT(s1) @@ -105,7 +105,7 @@ module mldsa_seq_prim MLDSA_KG_S+ 40 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0105, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_5_NTT_BASE, operand3:MLDSA_AS0_BASE}; MLDSA_KG_S+ 41 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0106, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_6_NTT_BASE, operand3:MLDSA_AS0_BASE}; //NTT−1(Aˆ ◦NTT(s1)) - MLDSA_KG_S+ 42 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d00, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; + MLDSA_KG_S+ 42 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d01, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; //t ←NTT−1(Aˆ ◦NTT(s1))+s2 MLDSA_KG_S+ 43 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'d00, length:'d00, operand1:MLDSA_AS0_INTT_BASE, operand2:MLDSA_S2_1_BASE, operand3:MLDSA_T1_BASE}; //ExpandA(ρ) AND Aˆ NTT(s1) @@ -117,7 +117,7 @@ module mldsa_seq_prim MLDSA_KG_S+ 49 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0205, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_5_NTT_BASE, operand3:MLDSA_AS0_BASE}; MLDSA_KG_S+ 50 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0206, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_6_NTT_BASE, operand3:MLDSA_AS0_BASE}; //NTT−1(Aˆ ◦NTT(s1)) - MLDSA_KG_S+ 51 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d00, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; + MLDSA_KG_S+ 51 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d01, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; //t ←NTT−1(Aˆ ◦NTT(s1))+s2 MLDSA_KG_S+ 52 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'d00, length:'d00, operand1:MLDSA_AS0_INTT_BASE, operand2:MLDSA_S2_2_BASE, operand3:MLDSA_T2_BASE}; //ExpandA(ρ) AND Aˆ NTT(s1) @@ -129,7 +129,7 @@ module mldsa_seq_prim MLDSA_KG_S+ 58 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0305, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_5_NTT_BASE, operand3:MLDSA_AS0_BASE}; MLDSA_KG_S+ 59 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0306, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_6_NTT_BASE, operand3:MLDSA_AS0_BASE}; //NTT−1(Aˆ ◦NTT(s1)) - MLDSA_KG_S+ 60 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d00, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; + MLDSA_KG_S+ 60 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d01, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; //t ←NTT−1(Aˆ ◦NTT(s1))+s2 MLDSA_KG_S+ 61 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'d00, length:'d00, operand1:MLDSA_AS0_INTT_BASE, operand2:MLDSA_S2_3_BASE, operand3:MLDSA_T3_BASE}; //ExpandA(ρ) AND Aˆ NTT(s1) @@ -141,7 +141,7 @@ module mldsa_seq_prim MLDSA_KG_S+ 67 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0405, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_5_NTT_BASE, operand3:MLDSA_AS0_BASE}; MLDSA_KG_S+ 68 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0406, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_6_NTT_BASE, operand3:MLDSA_AS0_BASE}; //NTT−1(Aˆ ◦NTT(s1)) - MLDSA_KG_S+ 69 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d00, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; + MLDSA_KG_S+ 69 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d01, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; //t ←NTT−1(Aˆ ◦NTT(s1))+s2 MLDSA_KG_S+ 70 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'d00, length:'d00, operand1:MLDSA_AS0_INTT_BASE, operand2:MLDSA_S2_4_BASE, operand3:MLDSA_T4_BASE}; //ExpandA(ρ) AND Aˆ NTT(s1) @@ -153,7 +153,7 @@ module mldsa_seq_prim MLDSA_KG_S+ 76 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0505, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_5_NTT_BASE, operand3:MLDSA_AS0_BASE}; MLDSA_KG_S+ 77 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0506, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_6_NTT_BASE, operand3:MLDSA_AS0_BASE}; //NTT−1(Aˆ ◦NTT(s1)) - MLDSA_KG_S+ 78 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d00, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; + MLDSA_KG_S+ 78 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d01, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; //t ←NTT−1(Aˆ ◦NTT(s1))+s2 MLDSA_KG_S+ 79 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'d00, length:'d00, operand1:MLDSA_AS0_INTT_BASE, operand2:MLDSA_S2_5_BASE, operand3:MLDSA_T5_BASE}; //ExpandA(ρ) AND Aˆ NTT(s1) @@ -165,7 +165,7 @@ module mldsa_seq_prim MLDSA_KG_S+ 85 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0605, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_5_NTT_BASE, operand3:MLDSA_AS0_BASE}; MLDSA_KG_S+ 86 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0606, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_6_NTT_BASE, operand3:MLDSA_AS0_BASE}; //NTT−1(Aˆ ◦NTT(s1)) - MLDSA_KG_S+ 87 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d00, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; + MLDSA_KG_S+ 87 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d01, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; //t ←NTT−1(Aˆ ◦NTT(s1))+s2 MLDSA_KG_S+ 88 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'d00, length:'d00, operand1:MLDSA_AS0_INTT_BASE, operand2:MLDSA_S2_6_BASE, operand3:MLDSA_T6_BASE}; //ExpandA(ρ) AND Aˆ NTT(s1) @@ -177,7 +177,7 @@ module mldsa_seq_prim MLDSA_KG_S+ 94 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0705, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_5_NTT_BASE, operand3:MLDSA_AS0_BASE}; MLDSA_KG_S+ 95 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0706, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_S1_6_NTT_BASE, operand3:MLDSA_AS0_BASE}; //NTT−1(Aˆ ◦NTT(s1)) - MLDSA_KG_S+ 96 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d00, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; + MLDSA_KG_S+ 96 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'d01, length:'d00, operand1:MLDSA_AS0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_AS0_INTT_BASE}; //t ←NTT−1(Aˆ ◦NTT(s1))+s2 MLDSA_KG_S+ 97 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'d00, length:'d00, operand1:MLDSA_AS0_INTT_BASE, operand2:MLDSA_S2_7_BASE, operand3:MLDSA_T7_BASE}; //(t1,t0)←Power2Round(t,d) AND pk ←pkEncode(ρ,t1) @@ -217,13 +217,13 @@ module mldsa_seq_prim MLDSA_SIGN_MAKE_Y_S+ 5 : data_o_rom <= '{opcode:MLDSA_UOP_EXP_MASK, imm:'h0005, length:'d66, operand1:MLDSA_RHO_P_KAPPA_ID, operand2:MLDSA_NOP, operand3:MLDSA_Y_5_BASE}; MLDSA_SIGN_MAKE_Y_S+ 6 : data_o_rom <= '{opcode:MLDSA_UOP_EXP_MASK, imm:'h0006, length:'d66, operand1:MLDSA_RHO_P_KAPPA_ID, operand2:MLDSA_NOP, operand3:MLDSA_Y_6_BASE}; //NTT(Y) - MLDSA_SIGN_MAKE_Y_S+ 7 : data_o_rom <= '{opcode:MLDSA_UOP_NTT, imm:'h0000, length:'d00, operand1:MLDSA_Y_0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_Y_0_NTT_BASE}; - MLDSA_SIGN_MAKE_Y_S+ 8 : data_o_rom <= '{opcode:MLDSA_UOP_NTT, imm:'h0000, length:'d00, operand1:MLDSA_Y_1_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_Y_1_NTT_BASE}; - MLDSA_SIGN_MAKE_Y_S+ 9 : data_o_rom <= '{opcode:MLDSA_UOP_NTT, imm:'h0000, length:'d00, operand1:MLDSA_Y_2_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_Y_2_NTT_BASE}; - MLDSA_SIGN_MAKE_Y_S+ 10 : data_o_rom <= '{opcode:MLDSA_UOP_NTT, imm:'h0000, length:'d00, operand1:MLDSA_Y_3_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_Y_3_NTT_BASE}; - MLDSA_SIGN_MAKE_Y_S+ 11 : data_o_rom <= '{opcode:MLDSA_UOP_NTT, imm:'h0000, length:'d00, operand1:MLDSA_Y_4_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_Y_4_NTT_BASE}; - MLDSA_SIGN_MAKE_Y_S+ 12 : data_o_rom <= '{opcode:MLDSA_UOP_NTT, imm:'h0000, length:'d00, operand1:MLDSA_Y_5_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_Y_5_NTT_BASE}; - MLDSA_SIGN_MAKE_Y_S+ 13 : data_o_rom <= '{opcode:MLDSA_UOP_NTT, imm:'h0000, length:'d00, operand1:MLDSA_Y_6_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_Y_6_NTT_BASE}; + MLDSA_SIGN_MAKE_Y_S+ 7 : data_o_rom <= '{opcode:MLDSA_UOP_NTT, imm:'h0001, length:'d00, operand1:MLDSA_Y_0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_Y_0_NTT_BASE}; + MLDSA_SIGN_MAKE_Y_S+ 8 : data_o_rom <= '{opcode:MLDSA_UOP_NTT, imm:'h0001, length:'d00, operand1:MLDSA_Y_1_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_Y_1_NTT_BASE}; + MLDSA_SIGN_MAKE_Y_S+ 9 : data_o_rom <= '{opcode:MLDSA_UOP_NTT, imm:'h0001, length:'d00, operand1:MLDSA_Y_2_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_Y_2_NTT_BASE}; + MLDSA_SIGN_MAKE_Y_S+ 10 : data_o_rom <= '{opcode:MLDSA_UOP_NTT, imm:'h0001, length:'d00, operand1:MLDSA_Y_3_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_Y_3_NTT_BASE}; + MLDSA_SIGN_MAKE_Y_S+ 11 : data_o_rom <= '{opcode:MLDSA_UOP_NTT, imm:'h0001, length:'d00, operand1:MLDSA_Y_4_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_Y_4_NTT_BASE}; + MLDSA_SIGN_MAKE_Y_S+ 12 : data_o_rom <= '{opcode:MLDSA_UOP_NTT, imm:'h0001, length:'d00, operand1:MLDSA_Y_5_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_Y_5_NTT_BASE}; + MLDSA_SIGN_MAKE_Y_S+ 13 : data_o_rom <= '{opcode:MLDSA_UOP_NTT, imm:'h0001, length:'d00, operand1:MLDSA_Y_6_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_Y_6_NTT_BASE}; //Check W0 clear MLDSA_SIGN_CHECK_W0_CLR : data_o_rom <= '{opcode:MLDSA_UOP_NOP, imm:'h0000, length:'d00, operand1:MLDSA_NOP, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; //Aˆ ←ExpandA(ρ) AND Aˆ ◦NTT(y) @@ -235,7 +235,7 @@ module mldsa_seq_prim MLDSA_SIGN_MAKE_W_S+ 5 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0005, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_5_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 6 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0006, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_6_NTT_BASE, operand3:MLDSA_AY0_BASE}; - MLDSA_SIGN_MAKE_W_S+ 7 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_0_BASE}; + MLDSA_SIGN_MAKE_W_S+ 7 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0001, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_0_BASE}; MLDSA_SIGN_MAKE_W_S+ 8 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWM, imm:'h0100, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_0_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 9 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0101, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_1_NTT_BASE, operand3:MLDSA_AY0_BASE}; @@ -245,7 +245,7 @@ module mldsa_seq_prim MLDSA_SIGN_MAKE_W_S+ 13 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0105, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_5_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 14 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0106, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_6_NTT_BASE, operand3:MLDSA_AY0_BASE}; - MLDSA_SIGN_MAKE_W_S+ 15 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_1_BASE}; + MLDSA_SIGN_MAKE_W_S+ 15 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0001, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_1_BASE}; MLDSA_SIGN_MAKE_W_S+ 16 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWM, imm:'h0200, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_0_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 17 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0201, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_1_NTT_BASE, operand3:MLDSA_AY0_BASE}; @@ -255,7 +255,7 @@ module mldsa_seq_prim MLDSA_SIGN_MAKE_W_S+ 21 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0205, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_5_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 22 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0206, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_6_NTT_BASE, operand3:MLDSA_AY0_BASE}; - MLDSA_SIGN_MAKE_W_S+ 23 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_2_BASE}; + MLDSA_SIGN_MAKE_W_S+ 23 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0001, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_2_BASE}; MLDSA_SIGN_MAKE_W_S+ 24 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWM, imm:'h0300, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_0_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 25 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0301, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_1_NTT_BASE, operand3:MLDSA_AY0_BASE}; @@ -265,7 +265,7 @@ module mldsa_seq_prim MLDSA_SIGN_MAKE_W_S+ 29 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0305, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_5_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 30 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0306, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_6_NTT_BASE, operand3:MLDSA_AY0_BASE}; - MLDSA_SIGN_MAKE_W_S+ 31 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_3_BASE}; + MLDSA_SIGN_MAKE_W_S+ 31 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0001, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_3_BASE}; MLDSA_SIGN_MAKE_W_S+ 32 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWM, imm:'h0400, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_0_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 33 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0401, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_1_NTT_BASE, operand3:MLDSA_AY0_BASE}; @@ -275,7 +275,7 @@ module mldsa_seq_prim MLDSA_SIGN_MAKE_W_S+ 37 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0405, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_5_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 38 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0406, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_6_NTT_BASE, operand3:MLDSA_AY0_BASE}; - MLDSA_SIGN_MAKE_W_S+ 39 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_4_BASE}; + MLDSA_SIGN_MAKE_W_S+ 39 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0001, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_4_BASE}; MLDSA_SIGN_MAKE_W_S+ 40 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWM, imm:'h0500, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_0_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 41 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0501, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_1_NTT_BASE, operand3:MLDSA_AY0_BASE}; @@ -285,7 +285,7 @@ module mldsa_seq_prim MLDSA_SIGN_MAKE_W_S+ 45 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0505, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_5_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 46 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0506, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_6_NTT_BASE, operand3:MLDSA_AY0_BASE}; - MLDSA_SIGN_MAKE_W_S+ 47 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_5_BASE}; + MLDSA_SIGN_MAKE_W_S+ 47 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0001, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_5_BASE}; MLDSA_SIGN_MAKE_W_S+ 48 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWM, imm:'h0600, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_0_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 49 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0601, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_1_NTT_BASE, operand3:MLDSA_AY0_BASE}; @@ -295,7 +295,7 @@ module mldsa_seq_prim MLDSA_SIGN_MAKE_W_S+ 53 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0605, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_5_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 54 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0606, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_6_NTT_BASE, operand3:MLDSA_AY0_BASE}; - MLDSA_SIGN_MAKE_W_S+ 55 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_6_BASE}; + MLDSA_SIGN_MAKE_W_S+ 55 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0001, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_6_BASE}; MLDSA_SIGN_MAKE_W_S+ 56 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWM, imm:'h0700, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_0_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 57 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0701, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_1_NTT_BASE, operand3:MLDSA_AY0_BASE}; @@ -305,7 +305,7 @@ module mldsa_seq_prim MLDSA_SIGN_MAKE_W_S+ 61 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0705, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_5_NTT_BASE, operand3:MLDSA_AY0_BASE}; MLDSA_SIGN_MAKE_W_S+ 62 : data_o_rom <= '{opcode:MLDSA_UOP_REJS_PWMA, imm:'h0706, length:'d34, operand1:MLDSA_RHO_ID, operand2:MLDSA_Y_6_NTT_BASE, operand3:MLDSA_AY0_BASE}; - MLDSA_SIGN_MAKE_W_S+ 63 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_7_BASE}; + MLDSA_SIGN_MAKE_W_S+ 63 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0001, length:'d00, operand1:MLDSA_AY0_BASE, operand2:MLDSA_TEMP3_BASE, operand3:MLDSA_W0_7_BASE}; //Set Y valid //FIXME this can move before MAKE_W if we opt accumulator to not have to read dest MLDSA_SIGN_SET_Y : data_o_rom <= '{opcode:MLDSA_UOP_NOP, imm:'h0000, length:'d00, operand1:MLDSA_UOP_NOP, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; diff --git a/src/mldsa_top/rtl/mldsa_seq_sec.sv b/src/mldsa_top/rtl/mldsa_seq_sec.sv index f38d8ee..cabb40e 100644 --- a/src/mldsa_top/rtl/mldsa_seq_sec.sv +++ b/src/mldsa_top/rtl/mldsa_seq_sec.sv @@ -86,129 +86,106 @@ module mldsa_seq_sec //Compute Z and perform norm check MLDSA_SIGN_CHECK_Y_VLD : data_o_rom <= '{opcode:MLDSA_UOP_NOP, imm:'h0000, length:'d00, operand1:MLDSA_NOP, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+2 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S1_0_BASE, operand3:MLDSA_CS1_BASE}; - MLDSA_SIGN_VALID_S+3 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS1_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS1_BASE}; - MLDSA_SIGN_VALID_S+4 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_Y_0_BASE, operand2:MLDSA_CS1_BASE, operand3:MLDSA_Z_BASE}; - MLDSA_SIGN_VALID_S+5 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_Z, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+6 : data_o_rom <= '{opcode:MLDSA_UOP_SIGENCODE, imm:'h0000, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:15'h000}; - - MLDSA_SIGN_VALID_S+7 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S1_1_BASE, operand3:MLDSA_CS1_BASE}; - MLDSA_SIGN_VALID_S+8 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS1_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS1_BASE}; - MLDSA_SIGN_VALID_S+9 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_Y_1_BASE, operand2:MLDSA_CS1_BASE, operand3:MLDSA_Z_BASE}; - MLDSA_SIGN_VALID_S+10 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_Z, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+11 : data_o_rom <= '{opcode:MLDSA_UOP_SIGENCODE, imm:'h0000, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:15'h040}; - - MLDSA_SIGN_VALID_S+12 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S1_2_BASE, operand3:MLDSA_CS1_BASE}; - MLDSA_SIGN_VALID_S+13 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS1_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS1_BASE}; - MLDSA_SIGN_VALID_S+14 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_Y_2_BASE, operand2:MLDSA_CS1_BASE, operand3:MLDSA_Z_BASE}; - MLDSA_SIGN_VALID_S+15 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_Z, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+16 : data_o_rom <= '{opcode:MLDSA_UOP_SIGENCODE, imm:'h0000, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:15'h080}; - - MLDSA_SIGN_VALID_S+17 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S1_3_BASE, operand3:MLDSA_CS1_BASE}; - MLDSA_SIGN_VALID_S+18 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS1_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS1_BASE}; - MLDSA_SIGN_VALID_S+19 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_Y_3_BASE, operand2:MLDSA_CS1_BASE, operand3:MLDSA_Z_BASE}; + MLDSA_SIGN_VALID_S+2 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S1_0_BASE, operand3:MLDSA_CS1_BASE}; + MLDSA_SIGN_VALID_S+3 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_Y_0_BASE, operand2:MLDSA_CS1_BASE, operand3:MLDSA_Z_BASE}; + MLDSA_SIGN_VALID_S+4 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_Z, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+5 : data_o_rom <= '{opcode:MLDSA_UOP_SIGENCODE, imm:'h0000, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:15'h000}; + + MLDSA_SIGN_VALID_S+6 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S1_1_BASE, operand3:MLDSA_CS1_BASE}; + MLDSA_SIGN_VALID_S+7 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_Y_1_BASE, operand2:MLDSA_CS1_BASE, operand3:MLDSA_Z_BASE}; + MLDSA_SIGN_VALID_S+8 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_Z, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+9 : data_o_rom <= '{opcode:MLDSA_UOP_SIGENCODE, imm:'h0000, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:15'h040}; + + MLDSA_SIGN_VALID_S+10 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S1_2_BASE, operand3:MLDSA_CS1_BASE}; + MLDSA_SIGN_VALID_S+11 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_Y_2_BASE, operand2:MLDSA_CS1_BASE, operand3:MLDSA_Z_BASE}; + MLDSA_SIGN_VALID_S+12 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_Z, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+13 : data_o_rom <= '{opcode:MLDSA_UOP_SIGENCODE, imm:'h0000, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:15'h080}; + + MLDSA_SIGN_VALID_S+14 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S1_3_BASE, operand3:MLDSA_CS1_BASE}; + MLDSA_SIGN_VALID_S+15 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_Y_3_BASE, operand2:MLDSA_CS1_BASE, operand3:MLDSA_Z_BASE}; + MLDSA_SIGN_VALID_S+16 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_Z, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+17 : data_o_rom <= '{opcode:MLDSA_UOP_SIGENCODE, imm:'h0000, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:15'h0C0}; + + MLDSA_SIGN_VALID_S+18 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S1_4_BASE, operand3:MLDSA_CS1_BASE}; + MLDSA_SIGN_VALID_S+19 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_Y_4_BASE, operand2:MLDSA_CS1_BASE, operand3:MLDSA_Z_BASE}; MLDSA_SIGN_VALID_S+20 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_Z, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+21 : data_o_rom <= '{opcode:MLDSA_UOP_SIGENCODE, imm:'h0000, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:15'h0C0}; - - MLDSA_SIGN_VALID_S+22 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S1_4_BASE, operand3:MLDSA_CS1_BASE}; - MLDSA_SIGN_VALID_S+23 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS1_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS1_BASE}; - MLDSA_SIGN_VALID_S+24 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_Y_4_BASE, operand2:MLDSA_CS1_BASE, operand3:MLDSA_Z_BASE}; - MLDSA_SIGN_VALID_S+25 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_Z, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+26 : data_o_rom <= '{opcode:MLDSA_UOP_SIGENCODE, imm:'h0000, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:15'h100}; - - MLDSA_SIGN_VALID_S+27 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S1_5_BASE, operand3:MLDSA_CS1_BASE}; - MLDSA_SIGN_VALID_S+28 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS1_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS1_BASE}; - MLDSA_SIGN_VALID_S+29 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_Y_5_BASE, operand2:MLDSA_CS1_BASE, operand3:MLDSA_Z_BASE}; - MLDSA_SIGN_VALID_S+30 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_Z, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+31 : data_o_rom <= '{opcode:MLDSA_UOP_SIGENCODE, imm:'h0000, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:15'h140}; - - MLDSA_SIGN_VALID_S+32 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S1_6_BASE, operand3:MLDSA_CS1_BASE}; - MLDSA_SIGN_VALID_S+33 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS1_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS1_BASE}; - MLDSA_SIGN_VALID_S+34 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_Y_6_BASE, operand2:MLDSA_CS1_BASE, operand3:MLDSA_Z_BASE}; - MLDSA_SIGN_VALID_S+35 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_Z, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+36 : data_o_rom <= '{opcode:MLDSA_UOP_SIGENCODE, imm:'h0000, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:15'h180}; + MLDSA_SIGN_VALID_S+21 : data_o_rom <= '{opcode:MLDSA_UOP_SIGENCODE, imm:'h0000, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:15'h100}; + + MLDSA_SIGN_VALID_S+22 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S1_5_BASE, operand3:MLDSA_CS1_BASE}; + MLDSA_SIGN_VALID_S+23 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_Y_5_BASE, operand2:MLDSA_CS1_BASE, operand3:MLDSA_Z_BASE}; + MLDSA_SIGN_VALID_S+24 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_Z, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+25 : data_o_rom <= '{opcode:MLDSA_UOP_SIGENCODE, imm:'h0000, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:15'h140}; + + MLDSA_SIGN_VALID_S+26 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S1_6_BASE, operand3:MLDSA_CS1_BASE}; + MLDSA_SIGN_VALID_S+27 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_Y_6_BASE, operand2:MLDSA_CS1_BASE, operand3:MLDSA_Z_BASE}; + MLDSA_SIGN_VALID_S+28 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_Z, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+29 : data_o_rom <= '{opcode:MLDSA_UOP_SIGENCODE, imm:'h0000, length:'d00, operand1:MLDSA_Z_BASE, operand2:MLDSA_NOP, operand3:15'h180}; MLDSA_SIGN_CLEAR_Y : data_o_rom <= '{opcode:MLDSA_UOP_NOP, imm:'h0000, length:'d00, operand1:MLDSA_NOP, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+38 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T0_BASE, operand3:MLDSA_CT_0_BASE}; - MLDSA_SIGN_VALID_S+39 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T1_BASE, operand3:MLDSA_CT_1_BASE}; - MLDSA_SIGN_VALID_S+40 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T2_BASE, operand3:MLDSA_CT_2_BASE}; - MLDSA_SIGN_VALID_S+41 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T3_BASE, operand3:MLDSA_CT_3_BASE}; - MLDSA_SIGN_VALID_S+42 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T4_BASE, operand3:MLDSA_CT_4_BASE}; - MLDSA_SIGN_VALID_S+43 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T5_BASE, operand3:MLDSA_CT_5_BASE}; - MLDSA_SIGN_VALID_S+44 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T6_BASE, operand3:MLDSA_CT_6_BASE}; - MLDSA_SIGN_VALID_S+45 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T7_BASE, operand3:MLDSA_CT_7_BASE}; - MLDSA_SIGN_VALID_S+46 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CT_0_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CT_0_BASE}; - MLDSA_SIGN_VALID_S+47 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CT_1_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CT_1_BASE}; - MLDSA_SIGN_VALID_S+48 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CT_2_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CT_2_BASE}; - MLDSA_SIGN_VALID_S+49 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CT_3_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CT_3_BASE}; - MLDSA_SIGN_VALID_S+50 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CT_4_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CT_4_BASE}; - MLDSA_SIGN_VALID_S+51 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CT_5_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CT_5_BASE}; - MLDSA_SIGN_VALID_S+52 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CT_6_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CT_6_BASE}; - MLDSA_SIGN_VALID_S+53 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CT_7_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CT_7_BASE}; + MLDSA_SIGN_VALID_S+31 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T0_BASE, operand3:MLDSA_CT_0_BASE}; + MLDSA_SIGN_VALID_S+32 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T1_BASE, operand3:MLDSA_CT_1_BASE}; + MLDSA_SIGN_VALID_S+33 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T2_BASE, operand3:MLDSA_CT_2_BASE}; + MLDSA_SIGN_VALID_S+34 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T3_BASE, operand3:MLDSA_CT_3_BASE}; + MLDSA_SIGN_VALID_S+35 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T4_BASE, operand3:MLDSA_CT_4_BASE}; + MLDSA_SIGN_VALID_S+36 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T5_BASE, operand3:MLDSA_CT_5_BASE}; + MLDSA_SIGN_VALID_S+37 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T6_BASE, operand3:MLDSA_CT_6_BASE}; + MLDSA_SIGN_VALID_S+38 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_T7_BASE, operand3:MLDSA_CT_7_BASE}; MLDSA_SIGN_CHECK_W0_VLD : data_o_rom <= '{opcode:MLDSA_UOP_NOP, imm:'h0000, length:'d00, operand1:MLDSA_NOP, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; //Make R0, CT0 and Hint_r - MLDSA_SIGN_VALID_S+55 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_0_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+56 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+57 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_0_BASE, operand3:MLDSA_R0_BASE}; - MLDSA_SIGN_VALID_S+58 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+59 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+60 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_0_BASE, operand3:MLDSA_HINT_R_0_BASE}; - - MLDSA_SIGN_VALID_S+61 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_1_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+62 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+63 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_1_BASE, operand3:MLDSA_R0_BASE}; - MLDSA_SIGN_VALID_S+64 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+65 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_1_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+66 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_1_BASE, operand3:MLDSA_HINT_R_1_BASE}; - - MLDSA_SIGN_VALID_S+67 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_2_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+68 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+69 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_2_BASE, operand3:MLDSA_R0_BASE}; - MLDSA_SIGN_VALID_S+70 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+71 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_2_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+72 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_2_BASE, operand3:MLDSA_HINT_R_2_BASE}; - - MLDSA_SIGN_VALID_S+73 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_3_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+74 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+75 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_3_BASE, operand3:MLDSA_R0_BASE}; - MLDSA_SIGN_VALID_S+76 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+77 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_3_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+78 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_3_BASE, operand3:MLDSA_HINT_R_3_BASE}; - - MLDSA_SIGN_VALID_S+79 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_4_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+80 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+81 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_4_BASE, operand3:MLDSA_R0_BASE}; - MLDSA_SIGN_VALID_S+82 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+83 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_4_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+84 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_4_BASE, operand3:MLDSA_HINT_R_4_BASE}; - - MLDSA_SIGN_VALID_S+85 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_5_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+86 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+87 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_5_BASE, operand3:MLDSA_R0_BASE}; - MLDSA_SIGN_VALID_S+88 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+89 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_5_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+90 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_5_BASE, operand3:MLDSA_HINT_R_5_BASE}; - - MLDSA_SIGN_VALID_S+91 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_6_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+92 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+93 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_6_BASE, operand3:MLDSA_R0_BASE}; - MLDSA_SIGN_VALID_S+94 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+95 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_6_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+96 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_6_BASE, operand3:MLDSA_HINT_R_6_BASE}; - - MLDSA_SIGN_VALID_S+97 : data_o_rom <= '{opcode:MLDSA_UOP_PWM, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_7_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+98 : data_o_rom <= '{opcode:MLDSA_UOP_INTT, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_TEMP0_BASE, operand3:MLDSA_CS2_BASE}; - MLDSA_SIGN_VALID_S+99 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_7_BASE, operand3:MLDSA_R0_BASE}; - MLDSA_SIGN_VALID_S+100 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+101 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_7_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+102 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_7_BASE, operand3:MLDSA_HINT_R_7_BASE}; + MLDSA_SIGN_VALID_S+40 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_0_BASE, operand3:MLDSA_CS2_BASE}; + MLDSA_SIGN_VALID_S+41 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_0_BASE, operand3:MLDSA_R0_BASE}; + MLDSA_SIGN_VALID_S+42 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+43 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+44 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_0_BASE, operand3:MLDSA_HINT_R_0_BASE}; + + MLDSA_SIGN_VALID_S+45 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_1_BASE, operand3:MLDSA_CS2_BASE}; + MLDSA_SIGN_VALID_S+46 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_1_BASE, operand3:MLDSA_R0_BASE}; + MLDSA_SIGN_VALID_S+47 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+48 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_1_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+49 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_1_BASE, operand3:MLDSA_HINT_R_1_BASE}; + + MLDSA_SIGN_VALID_S+50 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_2_BASE, operand3:MLDSA_CS2_BASE}; + MLDSA_SIGN_VALID_S+51 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_2_BASE, operand3:MLDSA_R0_BASE}; + MLDSA_SIGN_VALID_S+52 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+53 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_2_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+54 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_2_BASE, operand3:MLDSA_HINT_R_2_BASE}; + + MLDSA_SIGN_VALID_S+55 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_3_BASE, operand3:MLDSA_CS2_BASE}; + MLDSA_SIGN_VALID_S+56 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_3_BASE, operand3:MLDSA_R0_BASE}; + MLDSA_SIGN_VALID_S+57 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+58 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_3_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+59 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_3_BASE, operand3:MLDSA_HINT_R_3_BASE}; + + MLDSA_SIGN_VALID_S+60 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_4_BASE, operand3:MLDSA_CS2_BASE}; + MLDSA_SIGN_VALID_S+61 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_4_BASE, operand3:MLDSA_R0_BASE}; + MLDSA_SIGN_VALID_S+62 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+63 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_4_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+64 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_4_BASE, operand3:MLDSA_HINT_R_4_BASE}; + + MLDSA_SIGN_VALID_S+65 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_5_BASE, operand3:MLDSA_CS2_BASE}; + MLDSA_SIGN_VALID_S+66 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_5_BASE, operand3:MLDSA_R0_BASE}; + MLDSA_SIGN_VALID_S+67 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+68 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_5_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+69 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_5_BASE, operand3:MLDSA_HINT_R_5_BASE}; + + MLDSA_SIGN_VALID_S+70 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_6_BASE, operand3:MLDSA_CS2_BASE}; + MLDSA_SIGN_VALID_S+71 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_6_BASE, operand3:MLDSA_R0_BASE}; + MLDSA_SIGN_VALID_S+72 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+73 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_6_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+74 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_6_BASE, operand3:MLDSA_HINT_R_6_BASE}; + + MLDSA_SIGN_VALID_S+75 : data_o_rom <= '{opcode:MLDSA_UOP_MASKED_PWM_INTT, imm:'h0000, length:'d00, operand1:MLDSA_C_NTT_BASE, operand2:MLDSA_S2_7_BASE, operand3:MLDSA_CS2_BASE}; + MLDSA_SIGN_VALID_S+76 : data_o_rom <= '{opcode:MLDSA_UOP_PWS, imm:'h0000, length:'d00, operand1:MLDSA_CS2_BASE, operand2:MLDSA_W0_7_BASE, operand3:MLDSA_R0_BASE}; + MLDSA_SIGN_VALID_S+77 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_R0, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+78 : data_o_rom <= '{opcode:MLDSA_UOP_NORMCHK, imm:MLDSA_NORMCHK_CT0, length:'d00, operand1:MLDSA_CT_7_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+79 : data_o_rom <= '{opcode:MLDSA_UOP_PWA, imm:'h0000, length:'d00, operand1:MLDSA_R0_BASE, operand2:MLDSA_CT_7_BASE, operand3:MLDSA_HINT_R_7_BASE}; MLDSA_SIGN_CLEAR_W0 : data_o_rom <= '{opcode:MLDSA_UOP_NOP, imm:'h0000, length:'d00, operand1:MLDSA_NOP, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; - MLDSA_SIGN_VALID_S+104 : data_o_rom <= '{opcode:MLDSA_UOP_MAKEHINT, imm:'h0000, length:'d00, operand1:MLDSA_HINT_R_0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; + MLDSA_SIGN_VALID_S+81 : data_o_rom <= '{opcode:MLDSA_UOP_MAKEHINT, imm:'h0000, length:'d00, operand1:MLDSA_HINT_R_0_BASE, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; MLDSA_SIGN_GEN_S : data_o_rom <= '{opcode:MLDSA_UOP_NOP, imm:'h0000, length:'d00, operand1:MLDSA_NOP, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; MLDSA_SIGN_CLEAR_C : data_o_rom <= '{opcode:MLDSA_UOP_NOP, imm:'h0000, length:'d00, operand1:MLDSA_NOP, operand2:MLDSA_NOP, operand3:MLDSA_NOP}; diff --git a/src/mldsa_top/rtl/mldsa_top.sv b/src/mldsa_top/rtl/mldsa_top.sv index 2f1e142..cd61a3e 100644 --- a/src/mldsa_top/rtl/mldsa_top.sv +++ b/src/mldsa_top/rtl/mldsa_top.sv @@ -500,6 +500,10 @@ generate mode[g_inst] = pws; sampler_valid[g_inst] = 1; end + MLDSA_PWM_INTT: begin + mode[g_inst] = pwm_intt; + ntt_random_en[g_inst] = 1; + end default: begin end endcase @@ -526,7 +530,7 @@ generate .sampler_valid(sampler_valid[g_inst]), .shuffle_en(ntt_shuffling_en[g_inst]), .random(rand_bits[5:0]), - .masking_en(1'b0), + .masking_en(ntt_masking_en[g_inst]), .rnd_i(ntt_random_en[g_inst] ? ntt_rand_bits : (RND_W-6)'(0)), //(ntt_rand_bits & {(RND_W-6){ntt_random_en[g_inst]}}), //NTT mem IF .mem_wr_req(ntt_mem_wr_req[g_inst]), From f88fb7589571df04c00be3f2475ac11f53e2a26c Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Mon, 2 Dec 2024 12:56:19 -0800 Subject: [PATCH 02/10] Enable masking for pwm_intt op --- src/mldsa_top/rtl/mldsa_ctrl.sv | 4 +- src/mldsa_top/rtl/mldsa_top.sv | 1 + src/ntt_top/config/ntt_top_tb.vf | 4 +- src/ntt_top/rtl/ntt_ctrl.sv | 151 +++-- src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv | 59 +- src/ntt_top/rtl/ntt_masked_BFU_mult.sv | 4 +- src/ntt_top/rtl/ntt_masked_butterfly1x2.sv | 12 +- src/ntt_top/rtl/ntt_masked_gs_butterfly.sv | 6 +- src/ntt_top/rtl/ntt_masked_mult_redux46.sv | 5 +- src/ntt_top/rtl/ntt_masked_pwm.sv | 10 +- src/ntt_top/rtl/ntt_top.sv | 30 +- src/ntt_top/rtl/ntt_twiddle_lookup.sv | 2 +- .../directed/ntt_masking_normal_test.yml | 21 + src/ntt_top/tb/ntt_top_masking_tb.sv | 578 ++++++++++++++++++ src/ntt_top/tb/ntt_top_tb.sv | 351 ++++++----- src/ntt_top/tb/ntt_wrapper.sv | 4 +- 16 files changed, 946 insertions(+), 296 deletions(-) create mode 100644 src/ntt_top/stimulus/tests/directed/ntt_masking_normal_test.yml create mode 100644 src/ntt_top/tb/ntt_top_masking_tb.sv diff --git a/src/mldsa_top/rtl/mldsa_ctrl.sv b/src/mldsa_top/rtl/mldsa_ctrl.sv index f1bb33c..beefbfb 100644 --- a/src/mldsa_top/rtl/mldsa_ctrl.sv +++ b/src/mldsa_top/rtl/mldsa_ctrl.sv @@ -1155,7 +1155,7 @@ always_comb mldsa_privkey_lock = '0; if (prim_instr.opcode.ntt_en) begin ntt_mode_o[0] = prim_instr.opcode.mode.ntt_mode; ntt_masking_en_o[0] = prim_instr.opcode.masking_en; - ntt_shuffling_en_o[0] = prim_instr.opcode.shuffling_en; + ntt_shuffling_en_o[0] = prim_instr.opcode.shuffling_en; //1'b0; end end @@ -1458,7 +1458,7 @@ mldsa_seq_prim mldsa_seq_prim_inst if (sec_instr.opcode.ntt_en) begin ntt_mode_o[1] = sec_instr.opcode.mode.ntt_mode; ntt_masking_en_o[1] = sec_instr.opcode.masking_en; - ntt_shuffling_en_o[1] = sec_instr.opcode.shuffling_en; + ntt_shuffling_en_o[1] = sec_instr.opcode.shuffling_en; //1'b0; end end //passing a bit on the immediate field to mux between temp address locations diff --git a/src/mldsa_top/rtl/mldsa_top.sv b/src/mldsa_top/rtl/mldsa_top.sv index cd61a3e..7639053 100644 --- a/src/mldsa_top/rtl/mldsa_top.sv +++ b/src/mldsa_top/rtl/mldsa_top.sv @@ -503,6 +503,7 @@ generate MLDSA_PWM_INTT: begin mode[g_inst] = pwm_intt; ntt_random_en[g_inst] = 1; + sampler_valid[g_inst] = 1; end default: begin end diff --git a/src/ntt_top/config/ntt_top_tb.vf b/src/ntt_top/config/ntt_top_tb.vf index e149b99..addea94 100644 --- a/src/ntt_top/config/ntt_top_tb.vf +++ b/src/ntt_top/config/ntt_top_tb.vf @@ -52,6 +52,4 @@ ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv -${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_top_tb.sv -${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_ram_tdp_file.sv -${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_wrapper.sv \ No newline at end of file +${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_top_tb.sv \ No newline at end of file diff --git a/src/ntt_top/rtl/ntt_ctrl.sv b/src/ntt_top/rtl/ntt_ctrl.sv index a808233..2d00c7e 100644 --- a/src/ntt_top/rtl/ntt_ctrl.sv +++ b/src/ntt_top/rtl/ntt_ctrl.sv @@ -20,6 +20,7 @@ // 1. Keeps track of stages of bf2x2 operation in ct, gs, pwo modes // 2. Controls wr/rd addr of NTT mem // 3. Controls rd addr of twiddle ROM +// 4. Performs shuffling of wr/rd addr //====================================================================== module ntt_ctrl @@ -53,6 +54,8 @@ module ntt_ctrl input wire [5:0] random, //4+2 bits output logic bf_enable, + output logic [2:0] opcode, + output logic masking_en_ctrl, output logic buf_wren, output logic buf_rden, output logic [1:0] buf_wrptr, @@ -87,9 +90,11 @@ localparam INTT_WRITE_ADDR_STEP = 16; localparam PWO_READ_ADDR_STEP = 1; localparam PWO_WRITE_ADDR_STEP = 1; localparam PWM_LATENCY = 5; +localparam MASKED_BF_STAGE1_LATENCY = 260; localparam [MEM_ADDR_WIDTH-1:0] MEM_LAST_ADDR = 63; localparam INTT_WRBUF_LATENCY = 13; //includes BF latency + mem latency for shuffled reads to begin +localparam MASKED_INTT_WRBUF_LATENCY = 481; //TODO check //FSM states ntt_read_state_t read_fsm_state_ps, read_fsm_state_ns; ntt_write_state_t write_fsm_state_ps, write_fsm_state_ns; @@ -109,8 +114,9 @@ logic [1:0] index_rand_offset, index_count, mem_rd_index_ofst; logic [1:0] buf_rdptr_int; logic [1:0] buf_rdptr_f; logic [BF_LATENCY:0][1:0] buf_rdptr_reg; -logic [INTT_WRBUF_LATENCY-1:0][1:0] buf_wrptr_reg; -logic [BF_LATENCY:0][3:0] chunk_count_reg; +//logic [INTT_WRBUF_LATENCY-1:0][1:0] buf_wrptr_reg; +logic [MASKED_INTT_WRBUF_LATENCY-1:0][1:0] buf_wrptr_reg; +logic [MASKED_BF_STAGE1_LATENCY:0][3:0] chunk_count_reg; logic latch_chunk_rand_offset, latch_index_rand_offset; logic last_rd_addr, last_wr_addr; logic mem_wr_en_fsm, mem_wr_en_reg; @@ -217,12 +223,37 @@ logic arc_WR_STAGE_WR_MEM_OPT; //------------------------------------------ always_comb begin ct_mode = (ntt_mode == ct); - gs_mode = (ntt_mode == gs); + gs_mode = (ntt_mode == gs) | ((ntt_mode == pwm_intt) & (rounds_count > 'h0)); pwo_mode = ntt_mode inside {pwm, pwa, pws}; pwm_mode = (ntt_mode == pwm); pwa_mode = (ntt_mode == pwa); pws_mode = (ntt_mode == pws); - pwm_intt_mode = (ntt_mode == pwm_intt); + pwm_intt_mode = (ntt_mode == pwm_intt) & (rounds_count == 'h0); +end + +always_ff @(posedge clk or negedge reset_n) begin + if (!reset_n) begin + opcode <= ct; //default + masking_en_ctrl <= 'b0; + end + else if (zeroize) begin + opcode <= ct; + masking_en_ctrl <= 'b0; + end + else begin + if (pwm_intt_mode) begin //1st round + opcode <= pwm_intt; + masking_en_ctrl <= 'b1; + end + else if (ntt_mode == pwm_intt) begin //subseq rounds + opcode <= gs; + masking_en_ctrl <= 'b0; + end + else begin + opcode <= ntt_mode; //all others + masking_en_ctrl <= 'b0; + end + end end //------------------------------------------ @@ -240,7 +271,7 @@ always_ff @(posedge clk or negedge reset_n) begin else if (rounds_count == num_rounds) rounds_count <= 'h0; end -assign num_rounds = (ntt_mode inside {ct, gs}) ? NTT_NUM_ROUNDS : PWO_NUM_ROUNDS; +assign num_rounds = (ntt_mode inside {ct, gs, pwm_intt}) ? NTT_NUM_ROUNDS : PWO_NUM_ROUNDS; //------------------------------------------ //Done flags @@ -257,13 +288,13 @@ assign done = ntt_done | intt_done | pwo_done; //------------------------------------------ //NTT mem addr always_comb begin - src_base_addr = (ct_mode | gs_mode) ? ntt_mem_base_addr.src_base_addr : 'h0; - interim_base_addr = (ct_mode | gs_mode) ? ntt_mem_base_addr.interim_base_addr : 'h0; - dest_base_addr = (ct_mode | gs_mode) ? ntt_mem_base_addr.dest_base_addr : 'h0; + src_base_addr = (ct_mode | gs_mode) ? ntt_mem_base_addr.src_base_addr : 'h0; //not used in pwm_intt + interim_base_addr = (ct_mode | gs_mode | pwm_intt_mode) ? ntt_mem_base_addr.interim_base_addr : 'h0; + dest_base_addr = (ct_mode | gs_mode | pwm_intt_mode) ? ntt_mem_base_addr.dest_base_addr : 'h0; - pw_base_addr_a = pwo_mode ? pwo_mem_base_addr.pw_base_addr_a : 'h0; - pw_base_addr_b = pwo_mode ? pwo_mem_base_addr.pw_base_addr_b : 'h0; - pw_base_addr_c = pwo_mode ? pwo_mem_base_addr.pw_base_addr_c : 'h0; + pw_base_addr_a = (pwo_mode | pwm_intt_mode) ? pwo_mem_base_addr.pw_base_addr_a : 'h0; + pw_base_addr_b = (pwo_mode | pwm_intt_mode) ? pwo_mem_base_addr.pw_base_addr_b : 'h0; + pw_base_addr_c = pwo_mode ? pwo_mem_base_addr.pw_base_addr_c : 'h0; //not used in pwm_intt. TODO check when PWMA is enabled in masking end //Wraparound - indicates if we need to start at next addr (Eg. 0, 16, 32, 48, 1, 17, 33, 49, 2, ...) //Wraparound allows addr to transition from 48 to 1, 49 to 2, etc instead of overflowing @@ -273,7 +304,7 @@ always_comb begin if (shuffle_en) begin mem_rd_addr_nxt = (gs_mode | pwo_mode) ? (4*chunk_count) + (rd_addr_step*mem_rd_index_ofst) + mem_rd_base_addr : mem_rd_addr + rd_addr_step; - mem_wr_addr_nxt = ct_mode ? (MEM_ADDR_WIDTH+1)'((4*chunk_count_reg[0]) + (wr_addr_step*buf_rdptr_reg[0]) + mem_wr_base_addr) : gs_mode ? mem_wr_addr + wr_addr_step : (MEM_ADDR_WIDTH+1)'((4*chunk_count_reg[4]) + (wr_addr_step*buf_rdptr_reg[4])); + mem_wr_addr_nxt = ct_mode ? (MEM_ADDR_WIDTH+1)'((4*chunk_count_reg[0]) + (wr_addr_step*buf_rdptr_reg[0]) + mem_wr_base_addr) : (gs_mode | pwm_intt_mode) ? mem_wr_addr + wr_addr_step : (MEM_ADDR_WIDTH+1)'((4*chunk_count_reg[4]) + (wr_addr_step*buf_rdptr_reg[4])); //TODO check pwm_intt latency with shuffling end else begin mem_rd_addr_nxt = mem_rd_addr + rd_addr_step; @@ -316,13 +347,13 @@ always_ff @(posedge clk or negedge reset_n) begin end else if (rst_wr_addr) begin if (shuffle_en) - mem_wr_addr <= (ct_mode | pwo_mode) ? mem_wr_base_addr + (4*chunk_rand_offset) : gs_mode ? mem_wr_base_addr + chunk_rand_offset : mem_wr_base_addr; + mem_wr_addr <= (ct_mode | pwo_mode) ? mem_wr_base_addr + (4*chunk_rand_offset) : (gs_mode | pwm_intt_mode) ? mem_wr_base_addr + chunk_rand_offset : mem_wr_base_addr; else mem_wr_addr <= mem_wr_base_addr; end else if (incr_mem_wr_addr) begin if (shuffle_en) - mem_wr_addr <= (gs_mode & last_wr_addr) ? mem_wr_base_addr : wr_addr_wraparound ? MEM_ADDR_WIDTH'(mem_wr_addr_nxt - MEM_LAST_ADDR) : mem_wr_addr_nxt[MEM_ADDR_WIDTH-1:0]; + mem_wr_addr <= ((gs_mode | pwm_intt_mode) & last_wr_addr) ? mem_wr_base_addr : wr_addr_wraparound ? MEM_ADDR_WIDTH'(mem_wr_addr_nxt - MEM_LAST_ADDR) : mem_wr_addr_nxt[MEM_ADDR_WIDTH-1:0]; else mem_wr_addr <= wr_addr_wraparound ? MEM_ADDR_WIDTH'(mem_wr_addr_nxt - MEM_LAST_ADDR) : mem_wr_addr_nxt[MEM_ADDR_WIDTH-1:0]; end @@ -385,17 +416,17 @@ always_comb begin 'h0: begin twiddle_end_addr = ct_mode ? 'd0 : 'd63; twiddle_offset = 'h0; - twiddle_rand_offset = ct_mode ? 'h0 : 7'((4*chunk_count_reg[BF_LATENCY]) + buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); + twiddle_rand_offset = ct_mode ? 'h0 : pwm_intt_mode ? 7'((4*chunk_count_reg[MASKED_BF_STAGE1_LATENCY]) + buf_wrptr_reg[MASKED_INTT_WRBUF_LATENCY-1]) /*TODO: check buf_wrptr*/ : 7'((4*chunk_count_reg[BF_LATENCY]) + buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); end 'h1: begin twiddle_end_addr = ct_mode ? 'd3 : 'd15; twiddle_offset = ct_mode ? 'd1 : 'd64; - twiddle_rand_offset = ct_mode ? 7'(buf_rdptr_int) : 7'((chunk_count_reg[BF_LATENCY] % 4)*4 + buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); + twiddle_rand_offset = ct_mode ? 7'(buf_rdptr_int) : pwm_intt_mode ? 7'((chunk_count_reg[MASKED_BF_STAGE1_LATENCY] % 4)*4 + buf_wrptr_reg[MASKED_INTT_WRBUF_LATENCY-1]) /*TODO: check for masking*/ : 7'((chunk_count_reg[BF_LATENCY] % 4)*4 + buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); end 'h2: begin twiddle_end_addr = ct_mode ? 'd15 : 'd3; twiddle_offset = ct_mode ? 'd5 : 'd80; - twiddle_rand_offset = ct_mode ? 7'((chunk_count % 'd4)*'d4 + buf_rdptr_int) : 7'(buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); + twiddle_rand_offset = ct_mode ? 7'((chunk_count % 'd4)*'d4 + buf_rdptr_int) : pwm_intt_mode ? 7'(buf_wrptr_reg[MASKED_INTT_WRBUF_LATENCY-1]) : 7'(buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); end 'h3: begin twiddle_end_addr = ct_mode ? 'd63 : 'd0; @@ -447,12 +478,12 @@ assign twiddle_addr_int = (~shuffle_en | ct_mode) ? twiddle_addr_reg + twiddle_o //------------------------------------------ assign busy = ntt_busy | pwo_busy; assign ntt_busy = (read_fsm_state_ps != RD_IDLE) && (write_fsm_state_ps != WR_IDLE) && (ct_mode | gs_mode); -assign pwo_busy = (read_fsm_state_ps != RD_IDLE) && (write_fsm_state_ps != WR_IDLE) && pwo_mode; +assign pwo_busy = (read_fsm_state_ps != RD_IDLE) && (write_fsm_state_ps != WR_IDLE) && (pwo_mode | pwm_intt_mode); //------------------------------------------ //Valid count - to check that all 64 addr have been processed - check writes to mem //------------------------------------------ -always_comb wr_data_valid = gs_mode ? buf0_valid : butterfly_ready; //ct or pwo mode - look for bf_ready +always_comb wr_data_valid = (gs_mode | pwm_intt_mode) ? buf0_valid : butterfly_ready; //ct or pwo mode - look for bf_ready always_comb rd_data_valid = ct_mode ? buf0_valid : gs_mode ? bf_enable_fsm : sampler_valid; always_ff @(posedge clk or negedge reset_n) begin @@ -463,7 +494,7 @@ always_ff @(posedge clk or negedge reset_n) begin else if (rst_wr_valid_count) wr_valid_count <= 'h0; else if (wr_data_valid) - wr_valid_count <= gs_mode ? (wr_valid_count > 'h40) ? 'h0 : wr_valid_count + 'h4 + wr_valid_count <= (gs_mode | pwm_intt_mode) ? (wr_valid_count > 'h40) ? 'h0 : wr_valid_count + 'h4 : wr_valid_count + 'h1; end @@ -514,7 +545,8 @@ always_ff @(posedge clk or negedge reset_n) begin chunk_rand_offset <= random[5:2]; chunk_count <= random[5:2]; end - else if ((ct_mode & (buf_count == 'h3)) | ((gs_mode | (pwo_mode & incr_pw_rd_addr)) & (index_count == 'h3))) begin //update chunk after every 4 cycles - TODO: stop chunk counting when there's no incr_rd_addr in ntt/intt modes + //TODO: PWM+INTT mode + else if ((ct_mode & (buf_count == 'h3)) | (((gs_mode | pwm_intt_mode) | (pwo_mode & incr_pw_rd_addr)) & (index_count == 'h3))) begin //update chunk after every 4 cycles - TODO: stop chunk counting when there's no incr_rd_addr in ntt/intt modes chunk_count <= (chunk_count == 'hf) ? 'h0 : chunk_count + 'h1; end end @@ -546,12 +578,15 @@ always_ff @(posedge clk or negedge reset_n) begin else if (ct_mode & (buf_rden_ntt | butterfly_ready)) begin buf_rdptr_reg <= {buf_rdptr_int, buf_rdptr_reg[BF_LATENCY:1]}; end - else if (gs_mode & (incr_mem_rd_addr | butterfly_ready)) begin - buf_wrptr_reg <= {mem_rd_index_ofst, buf_wrptr_reg[INTT_WRBUF_LATENCY-1:1]}; + else if ((gs_mode & (incr_mem_rd_addr | butterfly_ready))) begin //TODO check latency in pwm_intt mode + buf_wrptr_reg <= {{468{2'h0}}, mem_rd_index_ofst, buf_wrptr_reg[INTT_WRBUF_LATENCY-1:1]}; end else if (pwo_mode & (incr_pw_rd_addr | butterfly_ready)) begin buf_rdptr_reg <= {mem_rd_index_ofst, buf_rdptr_reg[BF_LATENCY:1]}; //TODO: create new reg with apt name for PWO end + else if ((pwm_intt_mode)) begin // & (incr_pw_rd_addr | butterfly_ready))) begin + buf_wrptr_reg <= {mem_rd_index_ofst, buf_wrptr_reg[MASKED_INTT_WRBUF_LATENCY-1:1]}; + end else begin buf_rdptr_reg <= 'h0; buf_wrptr_reg <= 'h0; @@ -577,7 +612,7 @@ always_ff @(posedge clk or negedge reset_n) begin else if (zeroize) begin index_count <= 'h0; end - else if ((gs_mode & (incr_mem_rd_addr)) | (pwo_mode & incr_pw_rd_addr)) begin + else if ((gs_mode & incr_mem_rd_addr) | ((pwo_mode | pwm_intt_mode) & incr_pw_rd_addr)) begin index_count <= index_count + 'h1; end end @@ -589,8 +624,12 @@ always_ff @(posedge clk or negedge reset_n) begin else if (zeroize) begin chunk_count_reg <= 'h0; end + //chunk update can't use incr_mem_rd_addr in pwm_intt mode. + else if (pwm_intt_mode & incr_pw_rd_addr) begin + chunk_count_reg <= {chunk_count, chunk_count_reg[MASKED_BF_STAGE1_LATENCY:1]}; + end else if (buf_rden_ntt | butterfly_ready | (gs_mode & incr_mem_rd_addr) | (pwo_mode & incr_pw_rd_addr)) begin //TODO: replace gs condition with an fsm generated flag perhaps? - chunk_count_reg <= {chunk_count, chunk_count_reg[BF_LATENCY:1]}; + chunk_count_reg <= {{251{4'h0}}, chunk_count, chunk_count_reg[BF_LATENCY:1]}; end end @@ -604,7 +643,7 @@ always_ff @(posedge clk or negedge reset_n) begin else if (buf_wren & (ct_mode | ~shuffle_en)) begin //ct mode - buf writes are in order buf_wrptr <= (buf_wrptr == 'h3) ? 'h0 : buf_wrptr + 'h1; end - else if (buf_wren_intt & gs_mode & shuffle_en) begin // gs mode + else if (buf_wren_intt & (gs_mode | pwm_intt_mode) & shuffle_en) begin // gs mode buf_wrptr <= buf_wrptr_reg[0]; end end @@ -615,8 +654,8 @@ always_comb begin buf_rdptr_int = (shuffle_en & ct_mode) ? index_rand_offset + buf_count : buf_count; //TODO: flop? buf_rdptr = (shuffle_en & ct_mode) ? buf_rdptr_f : buf_count; latch_chunk_rand_offset = arc_IDLE_WR_STAGE | arc_WR_MEM_WR_STAGE | arc_WR_WAIT_WR_STAGE; - latch_index_rand_offset = ct_mode ? (buf_wrptr == 'h3) : (gs_mode | (pwo_mode & incr_pw_rd_addr)) & (arc_RD_STAGE_RD_EXEC | (index_count == 'h3)); - mem_rd_index_ofst = (pwo_mode | gs_mode) ? (index_count + index_rand_offset) : 'h0; + latch_index_rand_offset = ct_mode ? (buf_wrptr == 'h3) : ((gs_mode | pwm_intt_mode) | (pwo_mode & incr_pw_rd_addr)) & (arc_RD_STAGE_RD_EXEC | (index_count == 'h3)); + mem_rd_index_ofst = (pwo_mode | gs_mode | pwm_intt_mode) ? (index_count + index_rand_offset) : 'h0; end @@ -646,7 +685,7 @@ always_comb begin //Check to make sure all writes from prev round have finished before moving onto next round in read fsm arc_RD_STAGE_RD_BUF = (read_fsm_state_ps == RD_STAGE) && (write_fsm_state_ps == WR_STAGE) && (ct_mode && !ntt_done); - arc_RD_STAGE_RD_EXEC = (read_fsm_state_ps == RD_STAGE) && (write_fsm_state_ps == WR_STAGE) && ((gs_mode && !intt_done) || (pwo_mode && (!pwo_done /*|| ntt_enable*/))); + arc_RD_STAGE_RD_EXEC = (read_fsm_state_ps == RD_STAGE) && (write_fsm_state_ps == WR_STAGE) && (((gs_mode | pwm_intt_mode) && !intt_done) || (pwo_mode && (!pwo_done /*|| ntt_enable*/))); //Don't wait for writes to complete before transitioning to next round. (See above TODO) arc_RD_STAGE_RD_EXEC_OPT= (read_fsm_state_ps == RD_STAGE) && /*(write_fsm_state_ps == WR_STAGE) &&*/ ((gs_mode && !intt_done) || (pwo_mode && (!pwo_done /*|| ntt_enable*/))); @@ -660,7 +699,7 @@ always_comb begin arc_RD_EXEC_RD_BUF = (read_fsm_state_ps == RD_EXEC ) && ct_mode && (!buf0_valid && (buf_count == 0)) && (rd_valid_count < 'h40); //This arc is only for gs mode. Execution is done when all 63 addr locations have been read. Since there's no input buffer, valid_count ends at 63. - arc_RD_EXEC_RD_STAGE = (read_fsm_state_ps == RD_EXEC ) && ((gs_mode || pwo_mode) && (rd_valid_count == 'h3f)); + arc_RD_EXEC_RD_STAGE = (read_fsm_state_ps == RD_EXEC ) && ((gs_mode || pwo_mode || pwm_intt_mode) && (rd_valid_count == 'h3f)); //All rounds of NTT or INTT are done. Go to IDLE and wait for next command. In PWO mode, if ntt_enable is given, start next op arc_RD_STAGE_IDLE = (read_fsm_state_ps == RD_STAGE) && (ntt_done || intt_done || (pwo_done && !ntt_enable)); @@ -727,16 +766,16 @@ always_comb begin arc_RD_EXEC_EXEC_WAIT ? EXEC_WAIT : RD_EXEC; buf_wren_ntt = ct_mode; buf_rden_ntt = ct_mode; - incr_mem_rd_addr = (ntt_mode inside {ct, gs}); + incr_mem_rd_addr = (ntt_mode inside {ct, gs, pwm_intt}) & !pwm_intt_mode; if (shuffle_en) - mem_rd_en_fsm = (ntt_mode inside {ct, gs}) ? (mem_rd_addr <= MEM_LAST_ADDR + mem_rd_base_addr) & ~arc_RD_EXEC_EXEC_WAIT : 1'b0; + mem_rd_en_fsm = ((ntt_mode inside {ct, gs, pwm_intt}) & !pwm_intt_mode) ? (mem_rd_addr <= MEM_LAST_ADDR + mem_rd_base_addr) & ~arc_RD_EXEC_EXEC_WAIT : 1'b0; else - mem_rd_en_fsm = (ntt_mode inside {ct, gs}) ? (mem_rd_addr <= MEM_LAST_ADDR + mem_rd_base_addr) : 1'b0; + mem_rd_en_fsm = ((ntt_mode inside {ct, gs, pwm_intt}) & !pwm_intt_mode) ? (mem_rd_addr <= MEM_LAST_ADDR + mem_rd_base_addr) : 1'b0; bf_enable_fsm = pwo_mode ? sampler_valid : 1'b1; incr_twiddle_addr_fsm = ntt_mode inside {ct, gs, pwm_intt}; rd_addr_step = ct_mode ? NTT_READ_ADDR_STEP : INTT_READ_ADDR_STEP; - incr_pw_rd_addr = sampler_valid & pwo_mode; - pw_rden_fsm = sampler_valid & pwo_mode; + incr_pw_rd_addr = sampler_valid & (pwo_mode | pwm_intt_mode); + pw_rden_fsm = sampler_valid & (pwo_mode | pwm_intt_mode); end EXEC_WAIT: begin read_fsm_state_ns = arc_EXEC_WAIT_RD_STAGE ? RD_STAGE : arc_EXEC_WAIT_RD_EXEC ? RD_EXEC : EXEC_WAIT; @@ -747,8 +786,8 @@ always_comb begin bf_enable_fsm = pwo_mode ? sampler_valid : (buf_count <= 3); incr_twiddle_addr_fsm = (ct_mode | gs_mode | pwm_intt_mode); rd_addr_step = NTT_READ_ADDR_STEP; - incr_pw_rd_addr = (pwo_mode & sampler_valid); - pw_rden_fsm = (pwo_mode & sampler_valid); + incr_pw_rd_addr = ((pwo_mode | pwm_intt_mode) & sampler_valid); + pw_rden_fsm = ((pwo_mode | pwm_intt_mode) & sampler_valid); end default: begin read_fsm_state_ns = RD_IDLE; @@ -792,30 +831,30 @@ always_comb begin arc_WR_STAGE_WR_MEM_OPT = (write_fsm_state_ps == WR_STAGE) && (read_fsm_state_ps == RD_EXEC) && (pwo_mode && pwo_busy); //This arc is only for gs mode - arc_WR_STAGE_WR_BUF = (write_fsm_state_ps == WR_STAGE) && gs_mode && !intt_done; + arc_WR_STAGE_WR_BUF = (write_fsm_state_ps == WR_STAGE) && (gs_mode || pwm_intt_mode) && !intt_done; //pwm arc. If in WR STAGE, transition directly to wait arc_WR_STAGE_WR_WAIT = (write_fsm_state_ps == WR_STAGE) && (pwo_mode && !pwo_done); //This arc is only for gs mode. Start writing to memory when buf0_valid is asserted - arc_WR_BUF_WR_MEM = (write_fsm_state_ps == WR_BUF) && (gs_mode && buf0_valid); + arc_WR_BUF_WR_MEM = (write_fsm_state_ps == WR_BUF) && ((gs_mode || pwm_intt_mode) && buf0_valid); //This arc is only for gs mode. If there's no buf0_valid, all 4 buffers have been emptied and total valid_count is < 64, go back to buf state and wait for it to fill up //Indicates that buf0, 1, 2 3 have finished executing and there's no valid input, so wait for buf to fill up again //Since there's an output buffer, valid_count is counted in steps of 4, so it ends at 64 - arc_WR_MEM_WR_BUF = (write_fsm_state_ps == WR_MEM) && (gs_mode && (!buf0_valid && (buf_count == 0)) && (wr_valid_count < 'h40)); + arc_WR_MEM_WR_BUF = (write_fsm_state_ps == WR_MEM) && ((gs_mode || pwm_intt_mode) && (!buf0_valid && (buf_count == 0)) && (wr_valid_count < 'h40)); //Move to WR_WAIT state when the last outputs from bf2x2 have been captured in the buffers. They still need to be shifted out of the buffers and into memory, so keep buf_wren 1 here //Assumption - no bubbles in NTT or INTT. If bubbles, need to consider sampler_valid //TODO: can WR_WAIT state be removed? fsm can finish all 64 addr in WR_MEM state? - arc_WR_MEM_WR_WAIT = shuffle_en ? (write_fsm_state_ps == WR_MEM) && ((gs_mode && (buf0_valid && (wr_valid_count == 'h3c))) || (pwo_mode && butterfly_ready && (wr_valid_count == 'h3f))) - : (write_fsm_state_ps == WR_MEM) && ((gs_mode && (buf0_valid && (wr_valid_count == 'h3c))) || (pwo_mode && !butterfly_ready && (wr_valid_count < 'h3f))); // || (ct_mode && (wr_valid_count == 'h3f))); + arc_WR_MEM_WR_WAIT = shuffle_en ? (write_fsm_state_ps == WR_MEM) && (((gs_mode || pwm_intt_mode) && (buf0_valid && (wr_valid_count == 'h3c))) || (pwo_mode && butterfly_ready && (wr_valid_count == 'h3f))) + : (write_fsm_state_ps == WR_MEM) && (((gs_mode || pwm_intt_mode) && (buf0_valid && (wr_valid_count == 'h3c))) || (pwo_mode && !butterfly_ready && (wr_valid_count < 'h3f))); // || (ct_mode && (wr_valid_count == 'h3f))); //This arc is only for pwo mode. Move back from wait to write state when there's a valid BFU output arc_WR_WAIT_WR_MEM = (write_fsm_state_ps == WR_WAIT) && (pwo_mode && butterfly_ready); //When valid_count is 64 and buf_count is 3 (meaning all 4 buffers have been used), move to WR_STAGE indicating that round is done - arc_WR_WAIT_WR_STAGE = shuffle_en ? (write_fsm_state_ps == WR_WAIT) && ((gs_mode && (buf_count == 'h3)) || ct_mode || pwo_mode) + arc_WR_WAIT_WR_STAGE = shuffle_en ? (write_fsm_state_ps == WR_WAIT) && (((gs_mode || pwm_intt_mode) && (buf_count == 'h3)) || ct_mode || pwo_mode) : (write_fsm_state_ps == WR_WAIT) && (!pwo_mode && (buf_count == 'h3)); end @@ -869,10 +908,10 @@ always_comb begin write_fsm_state_ns = arc_WR_MEM_WR_BUF ? WR_BUF : arc_WR_MEM_WR_STAGE ? WR_STAGE : arc_WR_MEM_WR_WAIT ? WR_WAIT : WR_MEM; - buf_wren_intt = gs_mode ; - buf_rden_intt = gs_mode ; - incr_mem_wr_addr = ct_mode ? butterfly_ready : gs_mode ? 1'b1 : 1'b0; - mem_wr_en_fsm = ct_mode ? butterfly_ready : gs_mode ? 1'b1 : 1'b0; + buf_wren_intt = gs_mode | pwm_intt_mode ; + buf_rden_intt = gs_mode | pwm_intt_mode ; + incr_mem_wr_addr = ct_mode ? butterfly_ready : (gs_mode | pwm_intt_mode) ? 1'b1 : 1'b0; + mem_wr_en_fsm = ct_mode ? butterfly_ready : (gs_mode | pwm_intt_mode) ? 1'b1 : 1'b0; wr_addr_step = ct_mode ? NTT_WRITE_ADDR_STEP : INTT_WRITE_ADDR_STEP; incr_pw_wr_addr = pwo_mode & butterfly_ready; pw_wren_fsm = pwo_mode & butterfly_ready; @@ -880,16 +919,16 @@ always_comb begin WR_WAIT: begin if (shuffle_en) begin write_fsm_state_ns = arc_WR_WAIT_WR_STAGE ? WR_STAGE : WR_WAIT; - wr_addr_step = gs_mode ? INTT_WRITE_ADDR_STEP : NTT_WRITE_ADDR_STEP; + wr_addr_step = (gs_mode | pwm_intt_mode) ? INTT_WRITE_ADDR_STEP : NTT_WRITE_ADDR_STEP; end else begin write_fsm_state_ns = arc_WR_WAIT_WR_STAGE ? WR_STAGE : arc_WR_WAIT_WR_MEM ? WR_MEM : WR_WAIT; wr_addr_step = INTT_WRITE_ADDR_STEP; end buf_wren_intt = shuffle_en ? 'b0 : (buf_count <= 'h3); - buf_rden_intt = shuffle_en ? gs_mode : 'b1; - incr_mem_wr_addr = (ct_mode | gs_mode); - mem_wr_en_fsm = shuffle_en ? gs_mode : (ct_mode | gs_mode); + buf_rden_intt = shuffle_en ? (gs_mode | pwm_intt_mode) : 'b1; + incr_mem_wr_addr = (ct_mode | gs_mode | pwm_intt_mode); + mem_wr_en_fsm = shuffle_en ? (gs_mode | pwm_intt_mode) : (ct_mode | gs_mode | pwm_intt_mode); //TODO: clean up with (!shuffle_en & ct) | gs | pwm_intt incr_pw_wr_addr = shuffle_en ? pwo_mode & arc_WR_WAIT_WR_STAGE : arc_WR_WAIT_WR_MEM; pw_wren_fsm = shuffle_en ? 'b0 : arc_WR_WAIT_WR_MEM; @@ -906,17 +945,17 @@ always_comb begin if (shuffle_en) begin buf_wren = pwo_mode ? 1'b0 : buf_wren_ntt_reg | buf_wren_intt_reg; buf_rden = pwo_mode ? 1'b0 : ct_mode ? buf_rden_ntt_reg : buf_rden_intt; - bf_enable = (gs_mode || pwo_mode) ? bf_enable_reg_d2 : bf_enable_reg; //bf_enable_fsm; //In gs mode, memory is directly feeding bf2x2, so we need to enable it one cycle later - mem_wr_en = gs_mode ? mem_wr_en_fsm : mem_wr_en_reg; - mem_rd_en = (gs_mode | pwo_mode) ? mem_rd_en_reg : mem_rd_en_fsm; - twiddle_addr = gs_mode ? twiddle_addr_reg_d3 : twiddle_addr_int; + bf_enable = (gs_mode | pwm_intt_mode | pwo_mode) ? bf_enable_reg_d2 : bf_enable_reg; //bf_enable_fsm; //In gs mode, memory is directly feeding bf2x2, so we need to enable it one cycle later + mem_wr_en = (gs_mode | pwm_intt_mode) ? mem_wr_en_fsm : mem_wr_en_reg; + mem_rd_en = (gs_mode | pwm_intt_mode | pwo_mode) ? mem_rd_en_reg : mem_rd_en_fsm; + twiddle_addr = (gs_mode | pwm_intt_mode) ? twiddle_addr_reg_d3 : twiddle_addr_int; //TODO check latency in pwm_intt mode pw_rden = pw_rden_reg; pw_wren = pwm_mode ? pw_wren_reg : pw_wren_reg; end else begin buf_wren = pwo_mode ? 1'b0 : buf_wren_ntt_reg | buf_wren_intt; buf_rden = pwo_mode ? 1'b0 : buf_rden_ntt | buf_rden_intt; - bf_enable = (gs_mode | pwo_mode) ? bf_enable_reg : bf_enable_fsm; //In gs mode, memory is directly feeding bf2x2, so we need to enable it one cycle later + bf_enable = (gs_mode | pwm_intt_mode | pwo_mode) ? bf_enable_reg : bf_enable_fsm; //In gs mode, memory is directly feeding bf2x2, so we need to enable it one cycle later mem_wr_en = mem_wr_en_fsm; mem_rd_en = mem_rd_en_fsm; twiddle_addr = twiddle_addr_int; diff --git a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv index e7cc4db..50ef19c 100644 --- a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv +++ b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv @@ -32,10 +32,11 @@ module ntt_hybrid_butterfly_2x2 parameter UNMASKED_PWA_LATENCY = 1, //latency of modular addition parameter UNMASKED_PWS_LATENCY = 1, //latency of modular subtraction parameter UNMASKED_BF_STAGE1_LATENCY = UNMASKED_BF_LATENCY/2, - parameter MASKED_BF_STAGE1_LATENCY = 260, //For 1 masked butterfly - parameter MASKED_PWM_LATENCY = 207, //For 1 masked pwm operation + parameter MASKED_BF_STAGE1_LATENCY = 264, //For 1 masked butterfly + parameter MASKED_PWM_LATENCY = 209, //For 1 masked pwm operation + parameter MASKED_PWM_MASKED_INTT_LATENCY = MASKED_PWM_LATENCY + MASKED_BF_STAGE1_LATENCY, parameter MASKED_INTT_LATENCY = MASKED_BF_STAGE1_LATENCY + UNMASKED_BF_STAGE1_LATENCY, - parameter MASKED_PWM_INTT_LATENCY = MASKED_PWM_LATENCY + MASKED_INTT_LATENCY + parameter MASKED_PWM_INTT_LATENCY = MASKED_PWM_LATENCY + MASKED_INTT_LATENCY + 1 //TODO: adjust for PWMA case. Adding 1 cyc as a placeholder for it ) ( input wire clk, @@ -68,11 +69,12 @@ logic [HALF_WIDTH-1:0] u10_int, u11_int, v10_int, v11_int; logic [HALF_WIDTH-1:0] u10, u11, v10, v11; //Outputs of 2nd stage // logic [HALF_WIDTH-1:0] u20, u21, v20, v21; +logic masking_en_reg; //Other internal wires logic [UNMASKED_BF_STAGE1_LATENCY-1:0][HALF_WIDTH-1:0] w10_reg, w11_reg; //Shift w10 by 5 cycles to match 1st stage BF latency logic [MASKED_PWM_LATENCY-1:0][HALF_WIDTH-1:0] masked_w00_reg, masked_w01_reg; -logic [MASKED_BF_STAGE1_LATENCY-1:0][HALF_WIDTH-1:0] masked_w10_reg, masked_w11_reg; +logic [MASKED_PWM_MASKED_INTT_LATENCY-1:0][HALF_WIDTH-1:0] masked_w10_reg, masked_w11_reg; logic pwo_mode, pwm_intt_mode; // logic [UNMASKED_BF_LATENCY-1:0] ready_reg; logic [MASKED_PWM_INTT_LATENCY-1:0] masked_ready_reg; @@ -81,7 +83,7 @@ logic [MASKED_PWM_INTT_LATENCY-1:0] masked_ready_reg; logic [1:0][WIDTH-1:0] u00_share, u01_share, v00_share, v01_share, u10_share, v10_share, u11_share, v11_share; logic [1:0][WIDTH-1:0] w00_share, w01_share, w10_share, w11_share; //, w10_reg_share, w11_reg_share; logic [1:0][WIDTH-1:0] uv00_share, uv01_share, uv10_share, uv11_share; -logic [1:0][WIDTH-1:0] twiddle_w00_share, twiddle_w01_share, twiddle_w10_share, twiddle_w11_share; +logic [1:0][WIDTH-1:0] twiddle_w00_share, twiddle_w01_share; bf_uvo_t masked_gs_stage1_uvo; //w delay flops @@ -90,14 +92,17 @@ always_ff @(posedge clk or negedge reset_n) begin if (!reset_n) begin w10_reg <= 'h0; w11_reg <= 'h0; + masking_en_reg <= 'b0; end else if (zeroize) begin w10_reg <= 'h0; w11_reg <= 'h0; + masking_en_reg <= 'b0; end else begin w10_reg <= {uvw_i.w10_i, w10_reg[UNMASKED_BF_STAGE1_LATENCY-1:1]}; w11_reg <= {uvw_i.w11_i, w11_reg[UNMASKED_BF_STAGE1_LATENCY-1:1]}; + masking_en_reg <= masking_en; end end @@ -115,10 +120,10 @@ always_ff @(posedge clk or negedge reset_n) begin masked_w11_reg <= 'h0; end else begin - masked_w00_reg <= {hybrid_pw_uvw_i.twiddle_w0_i, masked_w00_reg[MASKED_PWM_LATENCY-1:1]}; + masked_w00_reg <= {hybrid_pw_uvw_i.twiddle_w0_i, masked_w00_reg[MASKED_PWM_LATENCY-1:1]}; //TODO add PWMA latency when Ay countermeasure is added masked_w01_reg <= {hybrid_pw_uvw_i.twiddle_w1_i, masked_w01_reg[MASKED_PWM_LATENCY-1:1]}; - masked_w10_reg <= {hybrid_pw_uvw_i.twiddle_w2_i, masked_w10_reg[MASKED_BF_STAGE1_LATENCY-1:1]}; - masked_w11_reg <= {hybrid_pw_uvw_i.twiddle_w3_i, masked_w11_reg[MASKED_BF_STAGE1_LATENCY-1:1]}; + masked_w10_reg <= {hybrid_pw_uvw_i.twiddle_w2_i, masked_w10_reg[MASKED_PWM_MASKED_INTT_LATENCY-1:1]}; + masked_w11_reg <= {hybrid_pw_uvw_i.twiddle_w3_i, masked_w11_reg[MASKED_PWM_MASKED_INTT_LATENCY-1:1]}; end end @@ -280,7 +285,7 @@ always_comb begin end //---------------------------------------------------- -//Masked PWMs - Used in masked PWM+INTT mode only - 207 clks +//Masked PWMs - Used in masked PWM+INTT mode only - 209 clks //---------------------------------------------------- ntt_masked_pwm #( .WIDTH(WIDTH) @@ -288,6 +293,7 @@ ntt_masked_pwm #( .clk(clk), .reset_n(reset_n), .zeroize(zeroize), + .accumulate(accumulate), .u(u00_share), .v(v00_share), .w(w00_share), @@ -301,6 +307,7 @@ ntt_masked_pwm #( .clk(clk), .reset_n(reset_n), .zeroize(zeroize), + .accumulate(accumulate), .u(u01_share), .v(v01_share), .w(w01_share), @@ -314,6 +321,7 @@ ntt_masked_pwm #( .clk(clk), .reset_n(reset_n), .zeroize(zeroize), + .accumulate(accumulate), .u(u10_share), .v(v10_share), .w(w10_share), @@ -327,6 +335,7 @@ ntt_masked_pwm #( .clk(clk), .reset_n(reset_n), .zeroize(zeroize), + .accumulate(accumulate), .u(u11_share), .v(v11_share), .w(w11_share), @@ -335,7 +344,7 @@ ntt_masked_pwm #( ); //---------------------------------------------------- -//Masked BFU stage 1 - Used in masked PWM+INTT mode only - 260 clks +//Masked BFU stage 1 - Used in masked PWM+INTT mode only - 264 clks //PWM outputs: uv00[1:0], uv01[1:0], uv10[1:0], uv11[1:0] //---------------------------------------------------- ntt_masked_butterfly1x2 #( @@ -344,7 +353,7 @@ ntt_masked_butterfly1x2 #( .clk(clk), .reset_n(reset_n), .zeroize(zeroize), - .uvw_i({uv00_share, uv01_share, uv10_share, uv11_share, twiddle_w00_share, twiddle_w01_share}), //TODO check connection + .uvw_i({uv00_share, uv10_share, uv01_share, uv11_share, twiddle_w00_share, twiddle_w01_share}), //TODO check connection .rnd_i({rnd_i[4], rnd_i[3], rnd_i[2], rnd_i[1], rnd_i[0]}), .uv_o(masked_gs_stage1_uvo) ); @@ -359,9 +368,9 @@ ntt_butterfly #( .reset_n(reset_n), .zeroize(zeroize), .mode(mode), - .opu_i(masking_en ? HALF_WIDTH'(0) : u00), - .opv_i(masking_en ? HALF_WIDTH'(0) : v00), - .opw_i(masking_en ? HALF_WIDTH'(0) : w00), + .opu_i((masking_en & pwm_intt_mode) ? HALF_WIDTH'(0) : u00), + .opv_i((masking_en & pwm_intt_mode) ? HALF_WIDTH'(0) : v00), + .opw_i((masking_en & pwm_intt_mode) ? HALF_WIDTH'(0) : w00), .accumulate(accumulate), .u_o(u10_int), .v_o(u11_int), @@ -375,9 +384,9 @@ ntt_butterfly #( .reset_n(reset_n), .zeroize(zeroize), .mode(mode), - .opu_i(masking_en ? HALF_WIDTH'(0) : u01), - .opv_i(masking_en ? HALF_WIDTH'(0) : v01), - .opw_i(masking_en ? HALF_WIDTH'(0) : w01), + .opu_i((masking_en & pwm_intt_mode) ? HALF_WIDTH'(0) : u01), + .opv_i((masking_en & pwm_intt_mode) ? HALF_WIDTH'(0) : v01), + .opw_i((masking_en & pwm_intt_mode) ? HALF_WIDTH'(0) : w01), .accumulate(accumulate), .u_o(v10_int), .v_o(v11_int), @@ -393,10 +402,10 @@ ntt_butterfly #( .clk(clk), .reset_n(reset_n), .zeroize(zeroize), - .mode(mode), - .opu_i(masking_en ? masked_gs_stage1_uvo.u20_o : u10), - .opv_i(masking_en ? masked_gs_stage1_uvo.v20_o : v10), - .opw_i(masking_en ? masked_w10_reg[0] : pwo_mode ? w10 : w10_reg[0]), + .mode((masking_en & pwm_intt_mode) ? gs : mode), + .opu_i((masking_en & pwm_intt_mode) ? masked_gs_stage1_uvo.u20_o : u10), + .opv_i((masking_en & pwm_intt_mode) ? masked_gs_stage1_uvo.v20_o : v10), + .opw_i((masking_en & pwm_intt_mode) ? masked_w10_reg[0] : pwo_mode ? w10 : w10_reg[0]), .accumulate(accumulate), .u_o(uv_o.u20_o), .v_o(uv_o.v20_o), @@ -409,10 +418,10 @@ ntt_butterfly #( .clk(clk), .reset_n(reset_n), .zeroize(zeroize), - .mode(mode), - .opu_i(masking_en ? masked_gs_stage1_uvo.u21_o : u11), - .opv_i(masking_en ? masked_gs_stage1_uvo.v21_o : v11), - .opw_i(masking_en ? masked_w11_reg[0] : pwo_mode ? w11 : w11_reg[0]), + .mode((masking_en & pwm_intt_mode) ? gs : mode), + .opu_i((masking_en & pwm_intt_mode) ? masked_gs_stage1_uvo.u21_o : u11), + .opv_i((masking_en & pwm_intt_mode) ? masked_gs_stage1_uvo.v21_o : v11), + .opw_i((masking_en & pwm_intt_mode) ? masked_w11_reg[0] : pwo_mode ? w11 : w11_reg[0]), .accumulate(accumulate), .u_o(uv_o.u21_o), .v_o(uv_o.v21_o), diff --git a/src/ntt_top/rtl/ntt_masked_BFU_mult.sv b/src/ntt_top/rtl/ntt_masked_BFU_mult.sv index db84050..4d99f2d 100644 --- a/src/ntt_top/rtl/ntt_masked_BFU_mult.sv +++ b/src/ntt_top/rtl/ntt_masked_BFU_mult.sv @@ -15,7 +15,7 @@ //====================================================================== // // ntt_masked_BFU_mult -// Performs two share multiplication and reduction - total latency = 207 clks +// Performs two share multiplication and reduction - total latency = 209 clks //====================================================================== module ntt_masked_BFU_mult @@ -88,7 +88,7 @@ module ntt_masked_BFU_mult .s(mul_res_bool) ); - //Mult reduction46 - 156 clks + //Mult reduction46 - 157 clks ntt_masked_mult_redux46 #( .WIDTH(WIDTH) ) mult_redux46_inst ( diff --git a/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv b/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv index d04c926..a04de0c 100644 --- a/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv +++ b/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv @@ -18,15 +18,14 @@ // 1. Performs 1st stage of masked INTT operation // 2. Combines output shares // 3. Performs div2 on combined outputs (unmasked) -// Total latency = 261 clks +// Total latency = 264 clks module ntt_masked_butterfly1x2 import mldsa_params_pkg::*; import ntt_defines_pkg::*; #( parameter WIDTH = 46, - parameter HALF_WIDTH = WIDTH/2, - parameter MASKED_BF_STAGE1_LATENCY = 260 + parameter HALF_WIDTH = WIDTH/2 ) ( input wire clk, @@ -59,6 +58,7 @@ module ntt_masked_butterfly1x2 w01 = uvw_i.w01_i; end + //263 ntt_masked_gs_butterfly #( .WIDTH(WIDTH) ) masked_bf_inst00 ( @@ -146,9 +146,9 @@ module ntt_masked_butterfly1x2 end else begin uv_o.u20_o <= u10_div2; - uv_o.u21_o <= u11_div2; //Check connection TODO - uv_o.v20_o <= v10_div2; - uv_o.v21_o <= v11_div2; //Check connection TODO + uv_o.u21_o <= v10_div2; //u11_div2; + uv_o.v20_o <= u11_div2; //v10_div2; + uv_o.v21_o <= v11_div2; end end diff --git a/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv b/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv index a9aa629..32c23f8 100644 --- a/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +++ b/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv @@ -16,7 +16,7 @@ // ntt_masked_gs_butterfly.sv // -------- // Only performs gs (INTT) mode of operation. All blocks are masked -// Latency = 260 clks +// Latency = 262 clks module ntt_masked_gs_butterfly import mldsa_params_pkg::*; @@ -38,7 +38,7 @@ module ntt_masked_gs_butterfly output logic [1:0] v_o [WIDTH-1:0] ); - localparam MASKED_MULT_LATENCY = 207; + localparam MASKED_MULT_LATENCY = 209; logic [52:0][1:0][WIDTH-1:0] w_reg; //TODO parameterize logic [1:0] add_res [WIDTH-1:0]; logic [1:0] sub_res [WIDTH-1:0]; @@ -124,7 +124,7 @@ module ntt_masked_gs_butterfly end end - //207 clks + //207 clks - 209 ntt_masked_BFU_mult #( .WIDTH(WIDTH) ) mult_inst_0 ( diff --git a/src/ntt_top/rtl/ntt_masked_mult_redux46.sv b/src/ntt_top/rtl/ntt_masked_mult_redux46.sv index dcabec6..5d47446 100644 --- a/src/ntt_top/rtl/ntt_masked_mult_redux46.sv +++ b/src/ntt_top/rtl/ntt_masked_mult_redux46.sv @@ -16,7 +16,7 @@ // // ntt_masked_mult_redux46 // Performs masked multiplication reduction for MLDSA -// It has 156 Cycle Latency +// It has 157 Cycle Latency //====================================================================== module ntt_masked_mult_redux46 @@ -243,6 +243,7 @@ module ntt_masked_mult_redux46 .delayed_reg(z_12_0_delayed) ); + //28 cycles? ntt_masked_special_adder add_with_conc_d10_and_z12( .clk(clk), .rst_n(rst_n), @@ -316,6 +317,7 @@ module ntt_masked_mult_redux46 .delayed_reg(z_45_23_delayed) ); + //54 cycles abr_masked_add_sub_mod_Boolean mod_adder_z_45_23_and_f13_0 ( .clk(clk), .rst_n(rst_n), @@ -339,6 +341,7 @@ module ntt_masked_mult_redux46 .delayed_reg(d22_0_delayed) ); + //54 cycles abr_masked_add_sub_mod_Boolean mod_adder_d22_0_and_e22_0 ( .clk(clk), .rst_n(rst_n), diff --git a/src/ntt_top/rtl/ntt_masked_pwm.sv b/src/ntt_top/rtl/ntt_masked_pwm.sv index edfe994..f48b73d 100644 --- a/src/ntt_top/rtl/ntt_masked_pwm.sv +++ b/src/ntt_top/rtl/ntt_masked_pwm.sv @@ -19,18 +19,21 @@ // This module performs masked pwm operation with or without accumulate // on input shares. Always performs (u*v)+w (top level needs to drive 0 // to the w input if not in accumulate mode) +// 207 clks if PWM, 260 clks if PWMA +// 209, 262 module ntt_masked_pwm import mldsa_params_pkg::*; import ntt_defines_pkg::*; #( parameter WIDTH = 46, - parameter MASKED_MULT_LATENCY = 207 + parameter MASKED_MULT_LATENCY = 209 ) ( input wire clk, input wire reset_n, input wire zeroize, + input wire accumulate, input wire [1:0][WIDTH-1:0] u, input wire [1:0][WIDTH-1:0] v, input wire [1:0][WIDTH-1:0] w, @@ -57,6 +60,7 @@ module ntt_masked_pwm end end + //209 clks (207) ntt_masked_BFU_mult #( .WIDTH(WIDTH) ) mult_inst0 ( @@ -102,8 +106,8 @@ module ntt_masked_pwm always_comb begin for (int i = 0; i < WIDTH; i++) begin - res[0][i] = res_unpacked[i][0]; - res[1][i] = res_unpacked[i][1]; + res[0][i] = accumulate ? res_unpacked[i][0] : mul_res[i][0]; + res[1][i] = accumulate ? res_unpacked[i][1] : mul_res[i][1]; end end diff --git a/src/ntt_top/rtl/ntt_top.sv b/src/ntt_top/rtl/ntt_top.sv index 1a5c848..d2ddfdc 100644 --- a/src/ntt_top/rtl/ntt_top.sv +++ b/src/ntt_top/rtl/ntt_top.sv @@ -147,6 +147,8 @@ module ntt_top logic pwo_mode; logic pwm_mode, pwa_mode, pws_mode; logic pwm_intt_mode; + mode_t opcode; + logic masking_en_ctrl; assign ct_mode = (mode == ct); assign gs_mode = (mode == gs); @@ -168,25 +170,25 @@ module ntt_top : mem_wr_data_int; //mem rd - NTT/INTT mode, read ntt data. PWM mode, read accumulate data from c mem. PWA/S mode, unused - assign mem_rd_req.rd_wr_en = (ct_mode || gs_mode) ? (mem_rden ? RW_READ : RW_IDLE) : pwm_mode ? (pw_rden_dest_mem ? RW_READ : RW_IDLE) : RW_IDLE; - assign mem_rd_req.addr = (ct_mode || gs_mode) ? mem_rd_addr : pwm_mode ? pw_mem_rd_addr_c : 'h0; - assign pwm_rd_data_c = (pwm_mode && accumulate) ? mem_rd_data : 'h0; //TODO: check if this is supposed to be mem_rd_data_reg + assign mem_rd_req.rd_wr_en = (ct_mode | gs_mode | pwm_intt_mode) ? (mem_rden ? RW_READ : RW_IDLE) : pwm_mode ? (pw_rden_dest_mem ? RW_READ : RW_IDLE) : RW_IDLE; + assign mem_rd_req.addr = (ct_mode | gs_mode | pwm_intt_mode) ? mem_rd_addr : pwm_mode ? pw_mem_rd_addr_c : 'h0; + assign pwm_rd_data_c = (pwm_mode && accumulate) ? mem_rd_data : 'h0; //TODO: masked pwm (Ay) mode //pwm rd a - PWO mode - read a operand from mem. NTT/INTT mode, not used - assign pwm_a_rd_req.rd_wr_en = pwo_mode ? (pw_rden ? RW_READ : RW_IDLE) : RW_IDLE; - assign pwm_a_rd_req.addr = pwo_mode ? pw_mem_rd_addr_a : 'h0; - assign pwm_rd_data_a = pwo_mode ? pwm_a_rd_data : 'h0; //TODO: clean up mux. Just connect input directly to logic + assign pwm_a_rd_req.rd_wr_en = (pwo_mode | pwm_intt_mode) ? (pw_rden ? RW_READ : RW_IDLE) : RW_IDLE; + assign pwm_a_rd_req.addr = (pwo_mode | pwm_intt_mode) ? pw_mem_rd_addr_a : 'h0; + assign pwm_rd_data_a = (pwo_mode | pwm_intt_mode) ? pwm_a_rd_data : 'h0; //TODO: clean up mux. Just connect input directly to logic //pwm rd b - PWO mode - read b operand from mem. Or operand b can also be connected directly to sampler, so in that case, addr/rden are not used always_comb begin if (shuffle_en) begin - pwm_b_rd_req.rd_wr_en = sampler_valid_reg & pwo_mode ? (pw_rden ? RW_READ : RW_IDLE) : RW_IDLE; //pw_rden is delayed a clk due to shuffling, so use delayed sampler_valid to line it up - pwm_b_rd_req.addr = sampler_valid_reg & pwo_mode ? pw_mem_rd_addr_b : 'h0; + pwm_b_rd_req.rd_wr_en = sampler_valid_reg & (pwo_mode | pwm_intt_mode) ? (pw_rden ? RW_READ : RW_IDLE) : RW_IDLE; //pw_rden is delayed a clk due to shuffling, so use delayed sampler_valid to line it up + pwm_b_rd_req.addr = sampler_valid_reg & (pwo_mode | pwm_intt_mode) ? pw_mem_rd_addr_b : 'h0; pwm_rd_data_b = pwm_b_rd_data_reg; end else begin - pwm_b_rd_req.rd_wr_en = sampler_valid & pwo_mode ? (pw_rden ? RW_READ : RW_IDLE) : RW_IDLE; - pwm_b_rd_req.addr = sampler_valid & pwo_mode ? pw_mem_rd_addr_b : 'h0; + pwm_b_rd_req.rd_wr_en = sampler_valid & (pwo_mode | pwm_intt_mode) ? (pw_rden ? RW_READ : RW_IDLE) : RW_IDLE; + pwm_b_rd_req.addr = sampler_valid & (pwo_mode | pwm_intt_mode) ? pw_mem_rd_addr_b : 'h0; pwm_rd_data_b = pwm_b_rd_data; end end @@ -212,6 +214,8 @@ module ntt_top .accumulate(accumulate), .bf_enable(bf_enable), + .opcode(opcode), + .masking_en_ctrl(masking_en_ctrl), .buf_wren(buf_wren), .buf_rden(buf_rden), .buf_wrptr(buf_wrptr), @@ -304,7 +308,7 @@ module ntt_top .clk(clk), .reset_n(reset_n), .zeroize(zeroize), - .mode(mode), + .mode(opcode), .enable(bf_enable_mux), .masking_en(masking_en), .uvw_i(uvw_i), @@ -397,7 +401,7 @@ module ntt_top : {1'b0, uv_o.v21_o, 1'b0, uv_o.v20_o, 1'b0, uv_o.u21_o, 1'b0, uv_o.u20_o}; always_comb begin - unique case(mode) + unique case(opcode) ct: begin uvw_i.u00_i = buf_data_o[REG_SIZE-2:0] ; uvw_i.u01_i = buf_data_o[(2*REG_SIZE)-2:REG_SIZE] ; @@ -414,7 +418,7 @@ module ntt_top pw_uvw_i = 'h0; end - pwm: begin + pwm, pwm_intt: begin uvw_i.u00_i = 'h0; uvw_i.u01_i = 'h0; uvw_i.v00_i = 'h0; diff --git a/src/ntt_top/rtl/ntt_twiddle_lookup.sv b/src/ntt_top/rtl/ntt_twiddle_lookup.sv index 65ced97..bcfadfb 100644 --- a/src/ntt_top/rtl/ntt_twiddle_lookup.sv +++ b/src/ntt_top/rtl/ntt_twiddle_lookup.sv @@ -38,7 +38,7 @@ reg [(3*DATA_WIDTH)-1:0] ntt_twiddle_mem [84:0]; reg [(3*DATA_WIDTH)-1:0] intt_twiddle_mem [84:0]; always_comb begin - rdata = (mode == ct) ? ntt_twiddle_mem[raddr] : (mode == gs) ? intt_twiddle_mem[raddr] : 'h0; + rdata = (mode == ct) ? ntt_twiddle_mem[raddr] : (mode inside {gs, pwm_intt}) ? intt_twiddle_mem[raddr] : 'h0; end logic [255 : 0][(DATA_WIDTH)-1:0] zeta; diff --git a/src/ntt_top/stimulus/tests/directed/ntt_masking_normal_test.yml b/src/ntt_top/stimulus/tests/directed/ntt_masking_normal_test.yml new file mode 100644 index 0000000..28eb238 --- /dev/null +++ b/src/ntt_top/stimulus/tests/directed/ntt_masking_normal_test.yml @@ -0,0 +1,21 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +--- +# plusargs: +# - +ECC_TEST_VECTOR_FILE='${CALIPTRA_ROOT}/src/ecc/tb/test_vectors/ecc_drbg_mbedtls.hex' +# - +ECC_TEST='ECC_normal_test' + +testname: ntt_masking_normal_test +seed: 1 diff --git a/src/ntt_top/tb/ntt_top_masking_tb.sv b/src/ntt_top/tb/ntt_top_masking_tb.sv new file mode 100644 index 0000000..7df1d65 --- /dev/null +++ b/src/ntt_top/tb/ntt_top_masking_tb.sv @@ -0,0 +1,578 @@ +// SPDX-License-Identifier: Apache-2.0 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +//====================================================================== +// +// ntt_top_masking_tb.sv +// -------- +//====================================================================== + +`default_nettype none + +module ntt_top_masking_tb + + import ntt_defines_pkg::*; + import mldsa_params_pkg::*; + +#( + parameter TEST_VECTOR_NUM = 10, + parameter PRIME = 23'd8380417, + parameter REG_SIZE = 23, + parameter MEM_DEPTH = 32768, //32 KB + parameter MEM_ADDR_WIDTH = $clog2(MEM_DEPTH) +) +(); + +parameter CLK_HALF_PERIOD = 5; +parameter CLK_PERIOD = 2 * CLK_HALF_PERIOD; + +//---------------------------------------------------------------- +// Register and Wire declarations. +//---------------------------------------------------------------- +reg [31 : 0] cycle_ctr; +reg [31 : 0] error_ctr; +reg [31 : 0] tc_ctr; + +reg clk_tb; +reg reset_n_tb; +reg cptra_pwrgood_tb; +reg zeroize_tb; +reg enable_tb; +reg bf_ready_tb; + +mode_t mode_tb; + +reg [23:0] zeta [255:0]; +reg [23:0] zeta_inv [255:0]; + +string operation; + +logic sub; +logic [45:0] actual_u, actual_v, actual_w; +logic [1:0][45:0] u; +logic [1:0][45:0] v; +logic [1:0][45:0] w; +logic [45:0] rnd0, rnd1, rnd2, rnd3; +logic wren_tb, rden_tb; +logic [1:0] wrptr_tb, rdptr_tb; +logic [5:0] random_tb; +bf_uvwi_t uvw_i_tb; +pwo_uvwi_t pw_uvw_i_tb; +hybrid_bf_uvwi_t hybrid_uvw_i_tb; + +//---------------------------------------------------------------- +// Device Under Test. +//---------------------------------------------------------------- + +// ntt_masked_BFU_add_sub dut ( +// .clk(clk_tb), +// .reset_n(reset_n_tb), +// .zeroize(zeroize_tb), +// .sub(sub), +// .u(u), +// .v(v), +// .rnd0(rnd0), +// .rnd1(rnd1), +// .rnd2(rnd2), +// .rnd3(rnd3), +// .res() +// ); + +// ntt_masked_BFU_mult dut ( +// .clk(clk_tb), +// .reset_n(reset_n_tb), +// .zeroize(zeroize_tb), +// .u(u), +// .v(v), +// .rnd0(rnd0), +// .rnd1(rnd1), +// .rnd2(rnd2), +// .rnd3(rnd3), +// .rnd4(rnd0+rnd1), +// .res() +// ); + +// ntt_masked_pwm dut ( +// .clk(clk_tb), +// .reset_n(reset_n_tb), +// .zeroize(zeroize_tb), +// .u(u), +// .v(v), +// .w(w), +// .rnd({rnd0+rnd1, rnd3, rnd2, rnd1, rnd0}), +// .res() +// ); + +// ntt_masked_butterfly1x2 dut ( +// .clk(clk_tb), +// .reset_n(reset_n_tb), +// .zeroize(zeroize_tb), +// .uvw_i(uvw_i_tb), +// .rnd_i({rnd0+rnd1, rnd3, rnd2, rnd1, rnd0}), +// .uv_o() +// ); + +// ntt_masked_gs_butterfly dut ( +// .clk(clk_tb), +// .reset_n(reset_n_tb), +// .zeroize(zeroize_tb), +// .opu_i(u), +// .opv_i(v), +// .opw_i(w), +// .rnd_i({rnd0+rnd1, rnd3, rnd2, rnd1, rnd0}), +// .u_o(), +// .v_o() +// ); + +ntt_hybrid_butterfly_2x2 dut ( + .clk(clk_tb), + .reset_n(reset_n_tb), + .zeroize(zeroize_tb), + .mode(mode_tb), + .enable(enable_tb), + .masking_en(1'b1), + .uvw_i(uvw_i_tb), + .pw_uvw_i(pw_uvw_i_tb), + .hybrid_pw_uvw_i(), + .rnd_i({rnd0+rnd1, rnd3, rnd2, rnd1, rnd0}), + .accumulate(1'b0), + .uv_o(), + .pwo_uv_o(), + .ready_o() +); + +//---------------------------------------------------------------- +// clk_gen +// +// Always running clock generator process. +//---------------------------------------------------------------- +always +begin : clk_gen + #CLK_HALF_PERIOD; + clk_tb = !clk_tb; + rnd0 = $random(); + rnd1 = $random(); + rnd2 = $random(); + rnd3 = $random(); +end // clk_gen + +//---------------------------------------------------------------- +// sys_monitor() +// +// An always running process that creates a cycle counter and +// conditionally displays information about the DUT. +//---------------------------------------------------------------- +always +begin : sys_monitor + #(CLK_PERIOD); + cycle_ctr = cycle_ctr + 1; +end + +//---------------------------------------------------------------- +// reset_dut() +// +// Toggle reset to put the DUT into a well known state. +//---------------------------------------------------------------- +task reset_dut; + begin + $display("*** Toggle reset."); + // cptra_pwrgood_tb = '0; + reset_n_tb = 0; + + // #(2 * CLK_PERIOD); + // cptra_pwrgood_tb = 1; + + #(2 * CLK_PERIOD); + reset_n_tb = 1; + + $display("End of reset"); + end +endtask // reset_dut + +//---------------------------------------------------------------- +// init_sim() +// +// Initialize all counters and testbed functionality as well +// as setting the DUT inputs to defined values. +//---------------------------------------------------------------- +task init_sim; + int i; + begin + $display("Start of init\n"); + cycle_ctr = 32'h00000000; + error_ctr = 32'h00000000; + tc_ctr = 32'h00000000; + + clk_tb = 0; + reset_n_tb = 0; + cptra_pwrgood_tb = 0; + + zeroize_tb = 'b0; + enable_tb = 'b0; + wren_tb = 'b0; rden_tb = 'b0; + wrptr_tb = 'h0; rdptr_tb = 'h0; + + mode_tb = ct; + + //NTT ctrl + bf_ready_tb = 1'b0; + random_tb <= 'h0; + + //Masking + for (int i = 0; i < 46; i++) begin + u[i] = 2'h0; + v[i] = 2'h0; + end + actual_u = 'h0; + actual_v = 'h0; + actual_w = 'h0; + sub = 'h0; + + rnd0 = 'h0; + rnd1 = 'h0; + rnd2 = 'h0; + rnd3 = 'h0; + + uvw_i_tb.u00_i = 'h0; + uvw_i_tb.u01_i = 'h0; + uvw_i_tb.v00_i = 'h0; + uvw_i_tb.v01_i = 'h0; + uvw_i_tb.w00_i = 'h0; + uvw_i_tb.w01_i = 'h0; + + pw_uvw_i_tb.u0_i = 'h0; + pw_uvw_i_tb.v0_i = 'h0; + pw_uvw_i_tb.w0_i = 'h0; + + pw_uvw_i_tb.u1_i = 'h0; + pw_uvw_i_tb.v1_i = 'h0; + pw_uvw_i_tb.w1_i = 'h0; + + pw_uvw_i_tb.u2_i = 'h0; + pw_uvw_i_tb.v2_i = 'h0; + pw_uvw_i_tb.w2_i = 'h0; + + pw_uvw_i_tb.u3_i = 'h0; + pw_uvw_i_tb.v3_i = 'h0; + pw_uvw_i_tb.w3_i = 'h0; + + hybrid_uvw_i_tb.u0_i = 'h0; + hybrid_uvw_i_tb.u1_i = 'h0; + hybrid_uvw_i_tb.u2_i = 'h0; + hybrid_uvw_i_tb.u3_i = 'h0; + + hybrid_uvw_i_tb.v0_i = 'h0; + hybrid_uvw_i_tb.v1_i = 'h0; + hybrid_uvw_i_tb.v2_i = 'h0; + hybrid_uvw_i_tb.v3_i = 'h0; + + hybrid_uvw_i_tb.w0_i = 'h0; + hybrid_uvw_i_tb.w1_i = 'h0; + hybrid_uvw_i_tb.w2_i = 'h0; + hybrid_uvw_i_tb.w3_i = 'h0; + + hybrid_uvw_i_tb.twiddle_w0_i = 'h0; + hybrid_uvw_i_tb.twiddle_w1_i = 'h0; + hybrid_uvw_i_tb.twiddle_w2_i = 'h0; + hybrid_uvw_i_tb.twiddle_w3_i = 'h0; + + $display("End of init\n"); + end +endtask + +/* +task masked_BFU_adder_test(); + logic [45:0] u_array, v_array; + logic [45:0] rand0, rand1; + sub = 1; + for (int i = 0; i < 1000; i++) begin + @(posedge clk_tb); + fork + begin + actual_u = $random()%PRIME; + actual_v = $random()%PRIME; + u_array = actual_u; + v_array = actual_v; + rand0 = $random(); + rand1 = $random(); + + u[0] = actual_u-rand0; + u[1] = rand0; + v[0] = actual_v-rand1; + v[1] = rand1; + // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); + end + begin + repeat(54) @(posedge clk_tb); + if (!sub) begin + if ((dut.add_res_reduced[1] + dut.add_res_reduced[0]) != ((u_array + v_array)%PRIME)) begin + $error("Addition Mismatch: exp_output = %h output shares = %h %h actual output = %h", (u_array + v_array)%PRIME, dut.add_res_reduced[0], dut.add_res_reduced[1], dut.add_res_reduced[0] + dut.add_res_reduced[1]); + end + end + else begin + if ((dut.add_res_reduced[1] + dut.add_res_reduced[0]) != ((u_array - v_array + PRIME)%PRIME)) begin + $error("Subtraction Mismatch: exp_output = %h output shares = %h %h actual output = %h", (u_array + PRIME + (~v_array+'h1))%PRIME, dut.add_res_reduced[0], dut.add_res_reduced[1], dut.add_res_reduced[0] + dut.add_res_reduced[1]); + end + end + end + join + end +endtask + + +task masked_BFU_mult_test(); + logic [45:0] u_array, v_array; + logic [45:0] rand0, rand1; + + for (int i = 0; i < 10; i++) begin + @(posedge clk_tb); + fork + begin + actual_u = $random()%PRIME; + actual_v = $random()%PRIME; + u_array = actual_u; + v_array = actual_v; + rand0 = $random(); + rand1 = $random(); + + // $display("actual u = %h, actual v = %h", actual_u, actual_v); + + u[0] = actual_u-rand0; + u[1] = rand0; + v[0] = actual_v-rand1; + v[1] = rand1; + // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); + end + begin + repeat(210) @(posedge clk_tb); + if ((dut.final_res[1] + dut.final_res[0]) != ((u_array * v_array)%PRIME)) begin + $error("Multiplication Mismatch: exp_output = %h output shares = %h %h actual output = %h", (u_array * v_array)%PRIME, dut.final_res[0], dut.final_res[1], dut.final_res[0] + dut.final_res[1]); + end + end + join + end +endtask +*/ + + +// task masked_gs_butterfly_test(); +// logic [45:0] rand0, rand1, rand2; +// logic [45:0] actual_u_normalized; +// for (int i = 0; i < 10; i++) begin +// @(posedge clk_tb); +// fork +// begin +// actual_u = $random()%PRIME; +// actual_v = $random()%PRIME; +// actual_w = 'h2; +// if (actual_u < actual_v) +// actual_u_normalized = actual_u + PRIME; +// else +// actual_u_normalized = actual_u; +// // u_array = actual_u; +// // v_array = actual_v; +// rand0 = $random(); +// rand1 = $random(); +// rand2 = $random(); + +// // $display("actual u = %h, actual v = %h", actual_u, actual_v); + +// u[0] = actual_u-rand0; +// u[1] = rand0; +// v[0] = actual_v-rand1; +// v[1] = rand1; +// w[0] = actual_w-rand2; +// w[1] = rand2; +// // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); +// end +// begin +// repeat(264) @(posedge clk_tb); +// if ((dut.u_o_0 + dut.u_o_1) != ((actual_u_normalized + actual_v)%PRIME)) begin +// $error("U = u+v Mismatch: exp_output = %h output shares = %h %h actual output = %h", (actual_u_normalized + actual_v)%PRIME, dut.u_o_0, dut.u_o_1, dut.u_o_0 + dut.u_o_1); +// end +// if ((dut.v_o_0 + dut.v_o_1) != (((actual_u_normalized - actual_v)*actual_w)%PRIME)) begin +// $error("V = (u-v)w Mismatch: exp_output = %h output shares = %h %h actual output = %h", ((actual_u_normalized - actual_v)*actual_w)%PRIME, dut.v_o_0, dut.v_o_1, dut.v_o_0 + dut.v_o_1); +// end +// end +// join +// end +// endtask + +/* +task masked_pwm_test(); + logic [45:0] rand0, rand1, rand2; + for (int i = 0; i < 10; i++) begin + @(posedge clk_tb); + fork + begin + actual_u = $random()%PRIME; + actual_v = $random()%PRIME; + actual_w = 'h2; + + // u_array = actual_u; + // v_array = actual_v; + rand0 = $random(); + rand1 = $random(); + rand2 = $random(); + + // $display("actual u = %h, actual v = %h", actual_u, actual_v); + + u[0] = actual_u-rand0; + u[1] = rand0; + v[0] = actual_v-rand1; + v[1] = rand1; + w[0] = actual_w-rand2; + w[1] = rand2; + // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); + end + begin + repeat(264) @(posedge clk_tb); + if ((dut.res[0] + dut.res[1]) != ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME)) begin + $error("U = u*v+w Mismatch: exp_output = %h output shares = %h %h actual output = %h", ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME), dut.res[0], dut.res[1], dut.res[0] + dut.res[1]); + end + end + join + end +endtask +*/ + +// task masked_bfu_1x2_test(); +// logic [45:0] rand0, rand1, rand2; +// for (int i = 0; i < 10; i++) begin +// @(posedge clk_tb); +// fork +// begin +// actual_u = $random()%PRIME; +// actual_v = $random()%PRIME; +// actual_w = 'h2; + +// // u_array = actual_u; +// // v_array = actual_v; +// rand0 = $random(); +// rand1 = $random(); +// rand2 = $random(); + +// // $display("actual u = %h, actual v = %h", actual_u, actual_v); + +// u[0] = actual_u-rand0; +// u[1] = rand0; +// v[0] = actual_v-rand1; +// v[1] = rand1; +// w[0] = actual_w-rand2; +// w[1] = rand2; + +// uvw_i_tb.u00_i = u; +// uvw_i_tb.u01_i = u; +// uvw_i_tb.v00_i = v; +// uvw_i_tb.v01_i = v; +// uvw_i_tb.w00_i = w; +// uvw_i_tb.w01_i = w; +// // $display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); +// end +// // begin +// // repeat(264) @(posedge clk_tb); +// // if ((dut.res[0] + dut.res[1]) != ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME)) begin +// // $error("U = u*v+w Mismatch: exp_output = %h output shares = %h %h actual output = %h", ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME), dut.res[0], dut.res[1], dut.res[0] + dut.res[1]); +// // end +// // end +// join +// end +// endtask + +task masked_hybrid_bf_2x2_test(); + logic [45:0] rand0, rand1, rand2; + for (int j = 0; j < 6; j++) begin + mode_tb = j; + for (int i = 0; i < 10; i++) begin + @(posedge clk_tb); + enable_tb = 1'b1; + fork + begin + actual_u = $random()%PRIME; + actual_v = $random()%PRIME; + actual_w = 'h2; + + // u_array = actual_u; + // v_array = actual_v; + rand0 = $random(); + rand1 = $random(); + rand2 = $random(); + + // $display("actual u = %h, actual v = %h", actual_u, actual_v); + + u[0] = actual_u-rand0; + u[1] = rand0; + v[0] = actual_v-rand1; + v[1] = rand1; + w[0] = actual_w-rand2; + w[1] = rand2; + + uvw_i_tb.u00_i = actual_u; + uvw_i_tb.u01_i = actual_u; + uvw_i_tb.v00_i = actual_v; + uvw_i_tb.v01_i = actual_v; + uvw_i_tb.w00_i = actual_w; + uvw_i_tb.w01_i = actual_w; + uvw_i_tb.w10_i = actual_w; + uvw_i_tb.w11_i = actual_w; + + pw_uvw_i_tb.u0_i = actual_u; + pw_uvw_i_tb.v0_i = actual_v; + pw_uvw_i_tb.w0_i = actual_w; + + pw_uvw_i_tb.u1_i = actual_u; + pw_uvw_i_tb.v1_i = actual_v; + pw_uvw_i_tb.w1_i = actual_w; + + pw_uvw_i_tb.u2_i = actual_u; + pw_uvw_i_tb.v2_i = actual_v; + pw_uvw_i_tb.w2_i = actual_w; + + pw_uvw_i_tb.u3_i = actual_u; + pw_uvw_i_tb.v3_i = actual_v; + pw_uvw_i_tb.w3_i = actual_w; + //$display("u0 = %h, u1 = %h, v0 = %h, v1 = %h", u[0], u[1], v[0], v[1]); + end + // begin //TODO + // repeat(470) @(posedge clk_tb); //467 clks + // if ((dut.res[0] + dut.res[1]) != ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME)) begin + // $error("U = u*v+w Mismatch: exp_output = %h output shares = %h %h actual output = %h", ((((actual_u * actual_v)%PRIME)+actual_w) % PRIME), dut.res[0], dut.res[1], dut.res[0] + dut.res[1]); + // end + // end + join + end + enable_tb = 1'b0; + @(posedge clk_tb); + end +endtask + + +initial begin + init_sim(); + reset_dut(); + + @(posedge clk_tb); + $display("Starting masked ntt test\n"); + + // masked_BFU_adder_test(); + // masked_BFU_mult_test(); + // masked_gs_butterfly_test(); + // masked_pwm_test(); + masked_hybrid_bf_2x2_test(); + + repeat(1000) @(posedge clk_tb); + $finish; +end + +endmodule \ No newline at end of file diff --git a/src/ntt_top/tb/ntt_top_tb.sv b/src/ntt_top/tb/ntt_top_tb.sv index c78779f..246c802 100644 --- a/src/ntt_top/tb/ntt_top_tb.sv +++ b/src/ntt_top/tb/ntt_top_tb.sv @@ -86,6 +86,7 @@ logic [1:0] wrptr_tb, rdptr_tb; logic [5:0] random_tb; bf_uvwi_t uvw_i_tb; pwo_uvwi_t pw_uvw_i_tb; +logic masking_en_tb; //---------------------------------------------------------------- // Device Under Test. @@ -152,9 +153,9 @@ ntt_wrapper dut ( .ntt_enable(enable_tb), .load_tb_values(load_tb_values), .load_tb_addr(load_tb_addr), - .shuffle_en(1'b0), + .shuffle_en(1'b1), .random(random_tb), - .masking_en(1'b0), + .masking_en(masking_en_tb), .rnd_i(230'h0), .ntt_mem_base_addr(ntt_mem_base_addr_tb), .pwo_mem_base_addr(pwo_mem_base_addr_tb), @@ -308,6 +309,8 @@ task init_sim; pw_uvw_i_tb.v3_i = 'h0; pw_uvw_i_tb.w3_i = 'h0; + masking_en_tb = 'b0; + $display("End of init\n"); end endtask @@ -382,77 +385,51 @@ task ntt_top_test(); end end begin - $display("NTT operation\n"); - operation = "NTT"; - mode_tb = ct; - enable_tb = 1; - ntt_mem_base_addr_tb.src_base_addr = 8'd0; - ntt_mem_base_addr_tb.interim_base_addr = 8'd64; - ntt_mem_base_addr_tb.dest_base_addr = 8'd128; - acc_tb = 1'b0; - svalid_tb = 1'b1; - @(posedge clk_tb); - enable_tb = 1'b0; - - // while(dut.ntt_top_inst0.ntt_ctrl_inst0.rounds_count == 'h0) + // $display("NTT operation\n"); + // operation = "NTT"; + // mode_tb = ct; + // enable_tb = 1; + // ntt_mem_base_addr_tb.src_base_addr = 8'd0; + // ntt_mem_base_addr_tb.interim_base_addr = 8'd64; + // ntt_mem_base_addr_tb.dest_base_addr = 8'd128; + // acc_tb = 1'b0; + // svalid_tb = 1'b1; + // @(posedge clk_tb); + // enable_tb = 1'b0; + + // $display("Waiting for ntt_done\n"); + // while(ntt_done_tb == 1'b0) // @(posedge clk_tb); - // random_tb = {4'h9, 2'h3}; + // $display("Received ntt_done\n"); - // while(dut.ntt_top_inst0.ntt_ctrl_inst0.rounds_count == 'h1) - // @(posedge clk_tb); - // random_tb = {4'h0, 2'h2}; - - // while(dut.ntt_top_inst0.ntt_ctrl_inst0.rounds_count == 'h2) - // @(posedge clk_tb); - // random_tb = {4'hf, 2'h0}; - - $display("Waiting for ntt_done\n"); - while(ntt_done_tb == 1'b0) - @(posedge clk_tb); - $display("Received ntt_done\n"); - - // for (int i = 0; i < 64; i++) begin - // if (dut.ntt_mem.mem[i+dest_base_addr] != ntt_mem_tb[i]) - // $display("Error: NTT data mismatch at index %0d (dest_base addr = %0d). Actual data = %h, expected data = %h", i, dest_base_addr, dut.ntt_mem.mem[i+dest_base_addr], ntt_mem_tb[i]); + + // $display("INTT operation\n"); + // operation = "INTT"; + // mode_tb = gs; + // enable_tb = 1; + // ntt_mem_base_addr_tb.src_base_addr = 8'd128; //read from addr where ntt stored its results + // ntt_mem_base_addr_tb.interim_base_addr = 8'd64; + // ntt_mem_base_addr_tb.dest_base_addr = 8'd128; + // acc_tb = 1'b0; + // @(posedge clk_tb); + // enable_tb = 1'b0; + // $display("Waiting for intt_done\n"); + // while(ntt_done_tb == 1'b0) // @(posedge clk_tb); - // end - // end - // join - // fork - // begin - // while(ntt_done_tb == 1'b0) begin - // random_tb = $urandom(); - // @(posedge clk_tb); - // end - // end - // begin - $display("INTT operation\n"); - operation = "INTT"; - mode_tb = gs; - enable_tb = 1; - ntt_mem_base_addr_tb.src_base_addr = 8'd128; //read from addr where ntt stored its results - ntt_mem_base_addr_tb.interim_base_addr = 8'd64; - ntt_mem_base_addr_tb.dest_base_addr = 8'd128; - acc_tb = 1'b0; - @(posedge clk_tb); - enable_tb = 1'b0; - $display("Waiting for intt_done\n"); - while(ntt_done_tb == 1'b0) - @(posedge clk_tb); - $display("Received intt_done\n"); + // $display("Received intt_done\n"); - $display("PWM operation 1\n"); - operation = "PWM 1 no acc"; - // $readmemh("pwm_iter1.hex", ntt_mem_tb); - mode_tb = pwm; - enable_tb = 1; - acc_tb = 1'b0; - @(posedge clk_tb); - enable_tb = 1'b0; - $display("Waiting for pwo_done\n"); - while(ntt_done_tb == 1'b0) - @(posedge clk_tb); - $display("Received pwo_done\n"); + // $display("PWM operation 1\n"); + // operation = "PWM 1 no acc"; + // // $readmemh("pwm_iter1.hex", ntt_mem_tb); + // mode_tb = pwm; + // enable_tb = 1; + // acc_tb = 1'b0; + // @(posedge clk_tb); + // enable_tb = 1'b0; + // $display("Waiting for pwo_done\n"); + // while(ntt_done_tb == 1'b0) + // @(posedge clk_tb); + // $display("Received pwo_done\n"); // for (int i = 0; i < 64; i++) begin // if (dut.pwm_mem_c.mem[i+0] != ntt_mem_tb[i]) @@ -461,30 +438,30 @@ task ntt_top_test(); // end - $display("PWM operation 2\n"); - operation = "PWM 2 no acc"; - mode_tb = pwm; - enable_tb = 1; - acc_tb = 1'b0; - @(posedge clk_tb); - enable_tb = 1'b0; - $display("Waiting for pwo_done\n"); - while(ntt_done_tb == 1'b0) - @(posedge clk_tb); - $display("Received pwo_done\n"); + // $display("PWM operation 2\n"); + // operation = "PWM 2 no acc"; + // mode_tb = pwm; + // enable_tb = 1; + // acc_tb = 1'b0; + // @(posedge clk_tb); + // enable_tb = 1'b0; + // $display("Waiting for pwo_done\n"); + // while(ntt_done_tb == 1'b0) + // @(posedge clk_tb); + // $display("Received pwo_done\n"); - $display("PWM operation 3\n"); - operation = "PWM 3 acc"; - mode_tb = pwm; - enable_tb = 1; - acc_tb = 1'b1; - $readmemh("pwm_iter2.hex", ntt_mem_tb); - @(posedge clk_tb); - enable_tb = 1'b0; - $display("Waiting for pwo_done\n"); - while(ntt_done_tb == 1'b0) - @(posedge clk_tb); - $display("Received pwo_done\n"); + // $display("PWM operation 3\n"); + // operation = "PWM 3 acc"; + // mode_tb = pwm; + // enable_tb = 1; + // acc_tb = 1'b1; + // $readmemh("pwm_iter2.hex", ntt_mem_tb); + // @(posedge clk_tb); + // enable_tb = 1'b0; + // $display("Waiting for pwo_done\n"); + // while(ntt_done_tb == 1'b0) + // @(posedge clk_tb); + // $display("Received pwo_done\n"); // $readmemh("pwm_iter2.hex", ntt_mem_tb); // for (int i = 0; i < 64; i++) begin @@ -493,71 +470,114 @@ task ntt_top_test(); // @(posedge clk_tb); // end - $display("PWA operation 1\n"); - operation = "PWA 1"; - mode_tb = pwa; - enable_tb = 1; - acc_tb = 1'b0; - @(posedge clk_tb); - enable_tb = 1'b0; - $display("Waiting for pwo_done\n"); - while(ntt_done_tb == 1'b0) - @(posedge clk_tb); - $display("Received pwo_done\n"); - - $display("PWA operation 2\n"); - operation = "PWA 2"; - mode_tb = pwa; - enable_tb = 1; - acc_tb = 1'b0; - @(posedge clk_tb); - enable_tb = 1'b0; - $display("Waiting for pwo_done\n"); - while(ntt_done_tb == 1'b0) - @(posedge clk_tb); - $display("Received pwo_done\n"); - - $display("PWA operation 3\n"); - operation = "PWA 3"; - mode_tb = pwa; - enable_tb = 1; - acc_tb = 1'b0; - @(posedge clk_tb); - enable_tb = 1'b0; - $display("Waiting for pwo_done\n"); - while(ntt_done_tb == 1'b0) - @(posedge clk_tb); - $display("Received pwo_done\n"); + // $display("PWA operation 1\n"); + // operation = "PWA 1"; + // mode_tb = pwa; + // enable_tb = 1; + // acc_tb = 1'b0; + // @(posedge clk_tb); + // enable_tb = 1'b0; + // $display("Waiting for pwo_done\n"); + // while(ntt_done_tb == 1'b0) + // @(posedge clk_tb); + // $display("Received pwo_done\n"); + + // $display("PWA operation 2\n"); + // operation = "PWA 2"; + // mode_tb = pwa; + // enable_tb = 1; + // acc_tb = 1'b0; + // @(posedge clk_tb); + // enable_tb = 1'b0; + // $display("Waiting for pwo_done\n"); + // while(ntt_done_tb == 1'b0) + // @(posedge clk_tb); + // $display("Received pwo_done\n"); + + // $display("PWA operation 3\n"); + // operation = "PWA 3"; + // mode_tb = pwa; + // enable_tb = 1; + // acc_tb = 1'b0; + // @(posedge clk_tb); + // enable_tb = 1'b0; + // $display("Waiting for pwo_done\n"); + // while(ntt_done_tb == 1'b0) + // @(posedge clk_tb); + // $display("Received pwo_done\n"); + + // $display("PWS operation 1\n"); + // operation = "PWS 1"; + // mode_tb = pws; + // enable_tb = 1; + // acc_tb = 1'b0; + // @(posedge clk_tb); + // enable_tb = 1'b0; + // $display("Waiting for pwo_done\n"); + // while(ntt_done_tb == 1'b0) + // @(posedge clk_tb); + // $display("Received pwo_done\n"); + + // $display("PWS operation 2\n"); + // operation = "PWS 2"; + // mode_tb = pws; + // enable_tb = 1; + // acc_tb = 1'b0; + // @(posedge clk_tb); + // enable_tb = 1'b0; + // $display("Waiting for pwo_done\n"); + // while(ntt_done_tb == 1'b0) + // @(posedge clk_tb); + // $display("Received pwo_done\n"); + + // $display("PWS operation 3\n"); + // operation = "PWS 3"; + // mode_tb = pws; + // enable_tb = 1; + // acc_tb = 1'b0; + // @(posedge clk_tb); + // enable_tb = 1'b0; + // $display("Waiting for pwo_done\n"); + // while(ntt_done_tb == 1'b0) + // @(posedge clk_tb); + // $display("Received pwo_done\n"); + // svalid_tb = 1'b0; + // @(posedge clk_tb); - $display("PWS operation 1\n"); - operation = "PWS 1"; - mode_tb = pws; - enable_tb = 1; - acc_tb = 1'b0; - @(posedge clk_tb); - enable_tb = 1'b0; - $display("Waiting for pwo_done\n"); - while(ntt_done_tb == 1'b0) - @(posedge clk_tb); - $display("Received pwo_done\n"); + - $display("PWS operation 2\n"); - operation = "PWS 2"; - mode_tb = pws; - enable_tb = 1; - acc_tb = 1'b0; - @(posedge clk_tb); - enable_tb = 1'b0; - $display("Waiting for pwo_done\n"); - while(ntt_done_tb == 1'b0) - @(posedge clk_tb); - $display("Received pwo_done\n"); + // $display("PWM + sampler operation 1\n"); + // operation = "PWM sampler"; + // mode_tb = pwm; + // enable_tb = 1; + // acc_tb = 1'b0; + // sampler_mode_tb = 1'b1; + // repeat(2) @(posedge clk_tb); + // svalid_tb <= 1'b1; + // @(posedge clk_tb); + // enable_tb = 1'b0; + // repeat(10) @(posedge clk_tb); + // svalid_tb <= 1'b0; + // repeat(10) @(posedge clk_tb); + // svalid_tb <= 1'b1; + // repeat(10) @(posedge clk_tb); + // svalid_tb <= 1'b0; + // repeat(10) @(posedge clk_tb); + // svalid_tb <= 1'b1; + // repeat(45) @(posedge clk_tb); + // svalid_tb <= 1'b0; + // $display("Waiting for pwo_done\n"); + // while(ntt_done_tb == 1'b0) + // @(posedge clk_tb); + // $display("Received pwo_done\n"); - $display("PWS operation 3\n"); - operation = "PWS 3"; - mode_tb = pws; + $display("PWM+INTT operation\n"); + operation = "PWM INTT"; + mode_tb = pwm_intt; enable_tb = 1; acc_tb = 1'b0; + svalid_tb = 1'b1; + masking_en_tb = 1'b1; @(posedge clk_tb); enable_tb = 1'b0; $display("Waiting for pwo_done\n"); @@ -566,33 +586,6 @@ task ntt_top_test(); $display("Received pwo_done\n"); svalid_tb = 1'b0; @(posedge clk_tb); - - - - $display("PWM + sampler operation 1\n"); - operation = "PWM sampler"; - mode_tb = pwm; - enable_tb = 1; - acc_tb = 1'b0; - sampler_mode_tb = 1'b1; - repeat(2) @(posedge clk_tb); - svalid_tb <= 1'b1; - @(posedge clk_tb); - enable_tb = 1'b0; - repeat(10) @(posedge clk_tb); - svalid_tb <= 1'b0; - repeat(10) @(posedge clk_tb); - svalid_tb <= 1'b1; - repeat(10) @(posedge clk_tb); - svalid_tb <= 1'b0; - repeat(10) @(posedge clk_tb); - svalid_tb <= 1'b1; - repeat(45) @(posedge clk_tb); - svalid_tb <= 1'b0; - $display("Waiting for pwo_done\n"); - while(ntt_done_tb == 1'b0) - @(posedge clk_tb); - $display("Received pwo_done\n"); end join_any diff --git a/src/ntt_top/tb/ntt_wrapper.sv b/src/ntt_top/tb/ntt_wrapper.sv index a1d30c2..8a38573 100644 --- a/src/ntt_top/tb/ntt_wrapper.sv +++ b/src/ntt_top/tb/ntt_wrapper.sv @@ -183,9 +183,9 @@ module ntt_wrapper .accumulate(accumulate), .sampler_valid(sampler_valid), .shuffle_en(shuffle_en), - .masking_en(1'b0), + .masking_en(masking_en), .random(random), - .rnd_i(230'h0), + .rnd_i(rnd_i), //NTT mem IF .mem_wr_req(mem_wr_req), .mem_rd_req(mem_rd_req), From 3ac04f59e28467e25a312f4c9d2e890db4cada6e Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Mon, 2 Dec 2024 13:17:46 -0800 Subject: [PATCH 03/10] Clean up --- src/mldsa_top/rtl/mldsa_ctrl.sv | 4 +-- src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv | 28 ++++++++++++--------- src/ntt_top/rtl/ntt_masked_gs_butterfly.sv | 11 ++++---- src/ntt_top/rtl/ntt_masked_mult_redux46.sv | 2 +- src/ntt_top/rtl/ntt_masked_pwm.sv | 5 ++-- 5 files changed, 27 insertions(+), 23 deletions(-) diff --git a/src/mldsa_top/rtl/mldsa_ctrl.sv b/src/mldsa_top/rtl/mldsa_ctrl.sv index beefbfb..f1bb33c 100644 --- a/src/mldsa_top/rtl/mldsa_ctrl.sv +++ b/src/mldsa_top/rtl/mldsa_ctrl.sv @@ -1155,7 +1155,7 @@ always_comb mldsa_privkey_lock = '0; if (prim_instr.opcode.ntt_en) begin ntt_mode_o[0] = prim_instr.opcode.mode.ntt_mode; ntt_masking_en_o[0] = prim_instr.opcode.masking_en; - ntt_shuffling_en_o[0] = prim_instr.opcode.shuffling_en; //1'b0; + ntt_shuffling_en_o[0] = prim_instr.opcode.shuffling_en; end end @@ -1458,7 +1458,7 @@ mldsa_seq_prim mldsa_seq_prim_inst if (sec_instr.opcode.ntt_en) begin ntt_mode_o[1] = sec_instr.opcode.mode.ntt_mode; ntt_masking_en_o[1] = sec_instr.opcode.masking_en; - ntt_shuffling_en_o[1] = sec_instr.opcode.shuffling_en; //1'b0; + ntt_shuffling_en_o[1] = sec_instr.opcode.shuffling_en; end end //passing a bit on the immediate field to mux between temp address locations diff --git a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv index 50ef19c..27408d8 100644 --- a/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv +++ b/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv @@ -26,17 +26,7 @@ module ntt_hybrid_butterfly_2x2 import ntt_defines_pkg::*; #( parameter WIDTH = 46, - parameter HALF_WIDTH = WIDTH/2, - parameter UNMASKED_BF_LATENCY = 10, //5 cycles per butterfly * 2 instances in serial = 10 clks - parameter UNMASKED_PWM_LATENCY = 5, //latency of modular multiplier + modular addition to perform accumulation - parameter UNMASKED_PWA_LATENCY = 1, //latency of modular addition - parameter UNMASKED_PWS_LATENCY = 1, //latency of modular subtraction - parameter UNMASKED_BF_STAGE1_LATENCY = UNMASKED_BF_LATENCY/2, - parameter MASKED_BF_STAGE1_LATENCY = 264, //For 1 masked butterfly - parameter MASKED_PWM_LATENCY = 209, //For 1 masked pwm operation - parameter MASKED_PWM_MASKED_INTT_LATENCY = MASKED_PWM_LATENCY + MASKED_BF_STAGE1_LATENCY, - parameter MASKED_INTT_LATENCY = MASKED_BF_STAGE1_LATENCY + UNMASKED_BF_STAGE1_LATENCY, - parameter MASKED_PWM_INTT_LATENCY = MASKED_PWM_LATENCY + MASKED_INTT_LATENCY + 1 //TODO: adjust for PWMA case. Adding 1 cyc as a placeholder for it + parameter HALF_WIDTH = WIDTH/2 ) ( input wire clk, @@ -57,6 +47,20 @@ module ntt_hybrid_butterfly_2x2 output logic ready_o ); +//---------------------- +//Latency params +//---------------------- +localparam UNMASKED_BF_LATENCY = 10; //5 cycles per butterfly * 2 instances in serial = 10 clks +localparam UNMASKED_PWM_LATENCY = 5; //latency of modular multiplier + modular addition to perform accumulation +localparam UNMASKED_PWA_LATENCY = 1; //latency of modular addition +localparam UNMASKED_PWS_LATENCY = 1; //latency of modular subtraction +localparam UNMASKED_BF_STAGE1_LATENCY = UNMASKED_BF_LATENCY/2; +localparam MASKED_BF_STAGE1_LATENCY = 264; //For 1 masked butterfly +localparam MASKED_PWM_LATENCY = 209; //For 1 masked pwm operation +localparam MASKED_PWM_MASKED_INTT_LATENCY = MASKED_PWM_LATENCY + MASKED_BF_STAGE1_LATENCY; +localparam MASKED_INTT_LATENCY = MASKED_BF_STAGE1_LATENCY + UNMASKED_BF_STAGE1_LATENCY; +localparam MASKED_PWM_INTT_LATENCY = MASKED_PWM_LATENCY + MASKED_INTT_LATENCY + 1; //TODO: adjust for PWMA case. Adding 1 cyc as a placeholder for it + //---------------------- //Unmasked wires //---------------------- @@ -353,7 +357,7 @@ ntt_masked_butterfly1x2 #( .clk(clk), .reset_n(reset_n), .zeroize(zeroize), - .uvw_i({uv00_share, uv10_share, uv01_share, uv11_share, twiddle_w00_share, twiddle_w01_share}), //TODO check connection + .uvw_i({uv00_share, uv10_share, uv01_share, uv11_share, twiddle_w00_share, twiddle_w01_share}), .rnd_i({rnd_i[4], rnd_i[3], rnd_i[2], rnd_i[1], rnd_i[0]}), .uv_o(masked_gs_stage1_uvo) ); diff --git a/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv b/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv index 32c23f8..0a8034f 100644 --- a/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv +++ b/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv @@ -39,7 +39,8 @@ module ntt_masked_gs_butterfly ); localparam MASKED_MULT_LATENCY = 209; - logic [52:0][1:0][WIDTH-1:0] w_reg; //TODO parameterize + localparam MASKED_ADD_SUB_LATENCY = 53; + logic [MASKED_ADD_SUB_LATENCY-1:0][1:0][WIDTH-1:0] w_reg; logic [1:0] add_res [WIDTH-1:0]; logic [1:0] sub_res [WIDTH-1:0]; logic [1:0] mul_res [WIDTH-1:0]; @@ -110,21 +111,21 @@ module ntt_masked_gs_butterfly //w delay flops always_ff @(posedge clk or negedge reset_n) begin if (!reset_n) begin - for (int i = 0; i < 53; i++) begin + for (int i = 0; i < MASKED_ADD_SUB_LATENCY; i++) begin w_reg[i] <= 'h0; end end else if (zeroize) begin - for (int i = 0; i < 53; i++) begin + for (int i = 0; i < MASKED_ADD_SUB_LATENCY; i++) begin w_reg[i] <= 'h0; end end else begin - w_reg <= {opw_i, w_reg[52:1]}; + w_reg <= {opw_i, w_reg[MASKED_ADD_SUB_LATENCY-1:1]}; end end - //207 clks - 209 + //209 clks ntt_masked_BFU_mult #( .WIDTH(WIDTH) ) mult_inst_0 ( diff --git a/src/ntt_top/rtl/ntt_masked_mult_redux46.sv b/src/ntt_top/rtl/ntt_masked_mult_redux46.sv index 5d47446..6027599 100644 --- a/src/ntt_top/rtl/ntt_masked_mult_redux46.sv +++ b/src/ntt_top/rtl/ntt_masked_mult_redux46.sv @@ -243,7 +243,7 @@ module ntt_masked_mult_redux46 .delayed_reg(z_12_0_delayed) ); - //28 cycles? + //28 cycles ntt_masked_special_adder add_with_conc_d10_and_z12( .clk(clk), .rst_n(rst_n), diff --git a/src/ntt_top/rtl/ntt_masked_pwm.sv b/src/ntt_top/rtl/ntt_masked_pwm.sv index f48b73d..a162180 100644 --- a/src/ntt_top/rtl/ntt_masked_pwm.sv +++ b/src/ntt_top/rtl/ntt_masked_pwm.sv @@ -19,8 +19,7 @@ // This module performs masked pwm operation with or without accumulate // on input shares. Always performs (u*v)+w (top level needs to drive 0 // to the w input if not in accumulate mode) -// 207 clks if PWM, 260 clks if PWMA -// 209, 262 +// 209 clks if PWM, 262 clks if PWMA module ntt_masked_pwm import mldsa_params_pkg::*; @@ -60,7 +59,7 @@ module ntt_masked_pwm end end - //209 clks (207) + //209 clks ntt_masked_BFU_mult #( .WIDTH(WIDTH) ) mult_inst0 ( From f75356d2bc67149c12afb9a807a0fed187d00300 Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Mon, 2 Dec 2024 16:17:00 -0800 Subject: [PATCH 04/10] Revert vf change --- src/ntt_top/config/ntt_top_tb.vf | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/ntt_top/config/ntt_top_tb.vf b/src/ntt_top/config/ntt_top_tb.vf index addea94..e149b99 100644 --- a/src/ntt_top/config/ntt_top_tb.vf +++ b/src/ntt_top/config/ntt_top_tb.vf @@ -52,4 +52,6 @@ ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_gs_butterfly.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_pwm.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_masked_butterfly1x2.sv ${ADAMSBRIDGE_ROOT}/src/ntt_top/rtl/ntt_hybrid_butterfly_2x2.sv -${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_top_tb.sv \ No newline at end of file +${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_top_tb.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_ram_tdp_file.sv +${ADAMSBRIDGE_ROOT}/src/ntt_top/tb/ntt_wrapper.sv \ No newline at end of file From f8909687b41d56b01dc5a2e3c2445e46f821981c Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Mon, 2 Dec 2024 16:28:33 -0800 Subject: [PATCH 05/10] Clean up --- src/ntt_top/rtl/ntt_ctrl.sv | 15 +- src/ntt_top/tb/ntt_top_tb.sv | 324 +++++++++++++++++------------------ 2 files changed, 169 insertions(+), 170 deletions(-) diff --git a/src/ntt_top/rtl/ntt_ctrl.sv b/src/ntt_top/rtl/ntt_ctrl.sv index 2d00c7e..9c199fc 100644 --- a/src/ntt_top/rtl/ntt_ctrl.sv +++ b/src/ntt_top/rtl/ntt_ctrl.sv @@ -90,11 +90,11 @@ localparam INTT_WRITE_ADDR_STEP = 16; localparam PWO_READ_ADDR_STEP = 1; localparam PWO_WRITE_ADDR_STEP = 1; localparam PWM_LATENCY = 5; -localparam MASKED_BF_STAGE1_LATENCY = 260; +localparam MASKED_BF_STAGE1_LATENCY = 260; //TODO check localparam [MEM_ADDR_WIDTH-1:0] MEM_LAST_ADDR = 63; localparam INTT_WRBUF_LATENCY = 13; //includes BF latency + mem latency for shuffled reads to begin -localparam MASKED_INTT_WRBUF_LATENCY = 481; //TODO check +localparam MASKED_INTT_WRBUF_LATENCY = 481; //masked PWM+INTT latency + mem latency for shuffled reads to begin //FSM states ntt_read_state_t read_fsm_state_ps, read_fsm_state_ns; ntt_write_state_t write_fsm_state_ps, write_fsm_state_ns; @@ -304,7 +304,7 @@ always_comb begin if (shuffle_en) begin mem_rd_addr_nxt = (gs_mode | pwo_mode) ? (4*chunk_count) + (rd_addr_step*mem_rd_index_ofst) + mem_rd_base_addr : mem_rd_addr + rd_addr_step; - mem_wr_addr_nxt = ct_mode ? (MEM_ADDR_WIDTH+1)'((4*chunk_count_reg[0]) + (wr_addr_step*buf_rdptr_reg[0]) + mem_wr_base_addr) : (gs_mode | pwm_intt_mode) ? mem_wr_addr + wr_addr_step : (MEM_ADDR_WIDTH+1)'((4*chunk_count_reg[4]) + (wr_addr_step*buf_rdptr_reg[4])); //TODO check pwm_intt latency with shuffling + mem_wr_addr_nxt = ct_mode ? (MEM_ADDR_WIDTH+1)'((4*chunk_count_reg[0]) + (wr_addr_step*buf_rdptr_reg[0]) + mem_wr_base_addr) : (gs_mode | pwm_intt_mode) ? mem_wr_addr + wr_addr_step : (MEM_ADDR_WIDTH+1)'((4*chunk_count_reg[4]) + (wr_addr_step*buf_rdptr_reg[4])); end else begin mem_rd_addr_nxt = mem_rd_addr + rd_addr_step; @@ -416,12 +416,12 @@ always_comb begin 'h0: begin twiddle_end_addr = ct_mode ? 'd0 : 'd63; twiddle_offset = 'h0; - twiddle_rand_offset = ct_mode ? 'h0 : pwm_intt_mode ? 7'((4*chunk_count_reg[MASKED_BF_STAGE1_LATENCY]) + buf_wrptr_reg[MASKED_INTT_WRBUF_LATENCY-1]) /*TODO: check buf_wrptr*/ : 7'((4*chunk_count_reg[BF_LATENCY]) + buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); + twiddle_rand_offset = ct_mode ? 'h0 : pwm_intt_mode ? 7'((4*chunk_count_reg[MASKED_BF_STAGE1_LATENCY]) + buf_wrptr_reg[MASKED_INTT_WRBUF_LATENCY-1]) : 7'((4*chunk_count_reg[BF_LATENCY]) + buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); end 'h1: begin twiddle_end_addr = ct_mode ? 'd3 : 'd15; twiddle_offset = ct_mode ? 'd1 : 'd64; - twiddle_rand_offset = ct_mode ? 7'(buf_rdptr_int) : pwm_intt_mode ? 7'((chunk_count_reg[MASKED_BF_STAGE1_LATENCY] % 4)*4 + buf_wrptr_reg[MASKED_INTT_WRBUF_LATENCY-1]) /*TODO: check for masking*/ : 7'((chunk_count_reg[BF_LATENCY] % 4)*4 + buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); + twiddle_rand_offset = ct_mode ? 7'(buf_rdptr_int) : pwm_intt_mode ? 7'((chunk_count_reg[MASKED_BF_STAGE1_LATENCY] % 4)*4 + buf_wrptr_reg[MASKED_INTT_WRBUF_LATENCY-1]) : 7'((chunk_count_reg[BF_LATENCY] % 4)*4 + buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); end 'h2: begin twiddle_end_addr = ct_mode ? 'd15 : 'd3; @@ -545,7 +545,6 @@ always_ff @(posedge clk or negedge reset_n) begin chunk_rand_offset <= random[5:2]; chunk_count <= random[5:2]; end - //TODO: PWM+INTT mode else if ((ct_mode & (buf_count == 'h3)) | (((gs_mode | pwm_intt_mode) | (pwo_mode & incr_pw_rd_addr)) & (index_count == 'h3))) begin //update chunk after every 4 cycles - TODO: stop chunk counting when there's no incr_rd_addr in ntt/intt modes chunk_count <= (chunk_count == 'hf) ? 'h0 : chunk_count + 'h1; end @@ -578,13 +577,13 @@ always_ff @(posedge clk or negedge reset_n) begin else if (ct_mode & (buf_rden_ntt | butterfly_ready)) begin buf_rdptr_reg <= {buf_rdptr_int, buf_rdptr_reg[BF_LATENCY:1]}; end - else if ((gs_mode & (incr_mem_rd_addr | butterfly_ready))) begin //TODO check latency in pwm_intt mode + else if ((gs_mode & (incr_mem_rd_addr | butterfly_ready))) begin buf_wrptr_reg <= {{468{2'h0}}, mem_rd_index_ofst, buf_wrptr_reg[INTT_WRBUF_LATENCY-1:1]}; end else if (pwo_mode & (incr_pw_rd_addr | butterfly_ready)) begin buf_rdptr_reg <= {mem_rd_index_ofst, buf_rdptr_reg[BF_LATENCY:1]}; //TODO: create new reg with apt name for PWO end - else if ((pwm_intt_mode)) begin // & (incr_pw_rd_addr | butterfly_ready))) begin + else if ((pwm_intt_mode)) begin buf_wrptr_reg <= {mem_rd_index_ofst, buf_wrptr_reg[MASKED_INTT_WRBUF_LATENCY-1:1]}; end else begin diff --git a/src/ntt_top/tb/ntt_top_tb.sv b/src/ntt_top/tb/ntt_top_tb.sv index 246c802..572b2f7 100644 --- a/src/ntt_top/tb/ntt_top_tb.sv +++ b/src/ntt_top/tb/ntt_top_tb.sv @@ -385,51 +385,51 @@ task ntt_top_test(); end end begin - // $display("NTT operation\n"); - // operation = "NTT"; - // mode_tb = ct; - // enable_tb = 1; - // ntt_mem_base_addr_tb.src_base_addr = 8'd0; - // ntt_mem_base_addr_tb.interim_base_addr = 8'd64; - // ntt_mem_base_addr_tb.dest_base_addr = 8'd128; - // acc_tb = 1'b0; - // svalid_tb = 1'b1; - // @(posedge clk_tb); - // enable_tb = 1'b0; - - // $display("Waiting for ntt_done\n"); - // while(ntt_done_tb == 1'b0) - // @(posedge clk_tb); - // $display("Received ntt_done\n"); + $display("NTT operation\n"); + operation = "NTT"; + mode_tb = ct; + enable_tb = 1; + ntt_mem_base_addr_tb.src_base_addr = 8'd0; + ntt_mem_base_addr_tb.interim_base_addr = 8'd64; + ntt_mem_base_addr_tb.dest_base_addr = 8'd128; + acc_tb = 1'b0; + svalid_tb = 1'b1; + @(posedge clk_tb); + enable_tb = 1'b0; + + $display("Waiting for ntt_done\n"); + while(ntt_done_tb == 1'b0) + @(posedge clk_tb); + $display("Received ntt_done\n"); - // $display("INTT operation\n"); - // operation = "INTT"; - // mode_tb = gs; - // enable_tb = 1; - // ntt_mem_base_addr_tb.src_base_addr = 8'd128; //read from addr where ntt stored its results - // ntt_mem_base_addr_tb.interim_base_addr = 8'd64; - // ntt_mem_base_addr_tb.dest_base_addr = 8'd128; - // acc_tb = 1'b0; - // @(posedge clk_tb); - // enable_tb = 1'b0; - // $display("Waiting for intt_done\n"); - // while(ntt_done_tb == 1'b0) - // @(posedge clk_tb); - // $display("Received intt_done\n"); + $display("INTT operation\n"); + operation = "INTT"; + mode_tb = gs; + enable_tb = 1; + ntt_mem_base_addr_tb.src_base_addr = 8'd128; //read from addr where ntt stored its results + ntt_mem_base_addr_tb.interim_base_addr = 8'd64; + ntt_mem_base_addr_tb.dest_base_addr = 8'd128; + acc_tb = 1'b0; + @(posedge clk_tb); + enable_tb = 1'b0; + $display("Waiting for intt_done\n"); + while(ntt_done_tb == 1'b0) + @(posedge clk_tb); + $display("Received intt_done\n"); - // $display("PWM operation 1\n"); - // operation = "PWM 1 no acc"; - // // $readmemh("pwm_iter1.hex", ntt_mem_tb); - // mode_tb = pwm; - // enable_tb = 1; - // acc_tb = 1'b0; - // @(posedge clk_tb); - // enable_tb = 1'b0; - // $display("Waiting for pwo_done\n"); - // while(ntt_done_tb == 1'b0) - // @(posedge clk_tb); - // $display("Received pwo_done\n"); + $display("PWM operation 1\n"); + operation = "PWM 1 no acc"; + // $readmemh("pwm_iter1.hex", ntt_mem_tb); + mode_tb = pwm; + enable_tb = 1; + acc_tb = 1'b0; + @(posedge clk_tb); + enable_tb = 1'b0; + $display("Waiting for pwo_done\n"); + while(ntt_done_tb == 1'b0) + @(posedge clk_tb); + $display("Received pwo_done\n"); // for (int i = 0; i < 64; i++) begin // if (dut.pwm_mem_c.mem[i+0] != ntt_mem_tb[i]) @@ -438,30 +438,30 @@ task ntt_top_test(); // end - // $display("PWM operation 2\n"); - // operation = "PWM 2 no acc"; - // mode_tb = pwm; - // enable_tb = 1; - // acc_tb = 1'b0; - // @(posedge clk_tb); - // enable_tb = 1'b0; - // $display("Waiting for pwo_done\n"); - // while(ntt_done_tb == 1'b0) - // @(posedge clk_tb); - // $display("Received pwo_done\n"); + $display("PWM operation 2\n"); + operation = "PWM 2 no acc"; + mode_tb = pwm; + enable_tb = 1; + acc_tb = 1'b0; + @(posedge clk_tb); + enable_tb = 1'b0; + $display("Waiting for pwo_done\n"); + while(ntt_done_tb == 1'b0) + @(posedge clk_tb); + $display("Received pwo_done\n"); - // $display("PWM operation 3\n"); - // operation = "PWM 3 acc"; - // mode_tb = pwm; - // enable_tb = 1; - // acc_tb = 1'b1; - // $readmemh("pwm_iter2.hex", ntt_mem_tb); - // @(posedge clk_tb); - // enable_tb = 1'b0; - // $display("Waiting for pwo_done\n"); - // while(ntt_done_tb == 1'b0) - // @(posedge clk_tb); - // $display("Received pwo_done\n"); + $display("PWM operation 3\n"); + operation = "PWM 3 acc"; + mode_tb = pwm; + enable_tb = 1; + acc_tb = 1'b1; + $readmemh("pwm_iter2.hex", ntt_mem_tb); + @(posedge clk_tb); + enable_tb = 1'b0; + $display("Waiting for pwo_done\n"); + while(ntt_done_tb == 1'b0) + @(posedge clk_tb); + $display("Received pwo_done\n"); // $readmemh("pwm_iter2.hex", ntt_mem_tb); // for (int i = 0; i < 64; i++) begin @@ -470,106 +470,106 @@ task ntt_top_test(); // @(posedge clk_tb); // end - // $display("PWA operation 1\n"); - // operation = "PWA 1"; - // mode_tb = pwa; - // enable_tb = 1; - // acc_tb = 1'b0; - // @(posedge clk_tb); - // enable_tb = 1'b0; - // $display("Waiting for pwo_done\n"); - // while(ntt_done_tb == 1'b0) - // @(posedge clk_tb); - // $display("Received pwo_done\n"); - - // $display("PWA operation 2\n"); - // operation = "PWA 2"; - // mode_tb = pwa; - // enable_tb = 1; - // acc_tb = 1'b0; - // @(posedge clk_tb); - // enable_tb = 1'b0; - // $display("Waiting for pwo_done\n"); - // while(ntt_done_tb == 1'b0) - // @(posedge clk_tb); - // $display("Received pwo_done\n"); - - // $display("PWA operation 3\n"); - // operation = "PWA 3"; - // mode_tb = pwa; - // enable_tb = 1; - // acc_tb = 1'b0; - // @(posedge clk_tb); - // enable_tb = 1'b0; - // $display("Waiting for pwo_done\n"); - // while(ntt_done_tb == 1'b0) - // @(posedge clk_tb); - // $display("Received pwo_done\n"); - - // $display("PWS operation 1\n"); - // operation = "PWS 1"; - // mode_tb = pws; - // enable_tb = 1; - // acc_tb = 1'b0; - // @(posedge clk_tb); - // enable_tb = 1'b0; - // $display("Waiting for pwo_done\n"); - // while(ntt_done_tb == 1'b0) - // @(posedge clk_tb); - // $display("Received pwo_done\n"); - - // $display("PWS operation 2\n"); - // operation = "PWS 2"; - // mode_tb = pws; - // enable_tb = 1; - // acc_tb = 1'b0; - // @(posedge clk_tb); - // enable_tb = 1'b0; - // $display("Waiting for pwo_done\n"); - // while(ntt_done_tb == 1'b0) - // @(posedge clk_tb); - // $display("Received pwo_done\n"); - - // $display("PWS operation 3\n"); - // operation = "PWS 3"; - // mode_tb = pws; - // enable_tb = 1; - // acc_tb = 1'b0; - // @(posedge clk_tb); - // enable_tb = 1'b0; - // $display("Waiting for pwo_done\n"); - // while(ntt_done_tb == 1'b0) - // @(posedge clk_tb); - // $display("Received pwo_done\n"); - // svalid_tb = 1'b0; - // @(posedge clk_tb); + $display("PWA operation 1\n"); + operation = "PWA 1"; + mode_tb = pwa; + enable_tb = 1; + acc_tb = 1'b0; + @(posedge clk_tb); + enable_tb = 1'b0; + $display("Waiting for pwo_done\n"); + while(ntt_done_tb == 1'b0) + @(posedge clk_tb); + $display("Received pwo_done\n"); + + $display("PWA operation 2\n"); + operation = "PWA 2"; + mode_tb = pwa; + enable_tb = 1; + acc_tb = 1'b0; + @(posedge clk_tb); + enable_tb = 1'b0; + $display("Waiting for pwo_done\n"); + while(ntt_done_tb == 1'b0) + @(posedge clk_tb); + $display("Received pwo_done\n"); + + $display("PWA operation 3\n"); + operation = "PWA 3"; + mode_tb = pwa; + enable_tb = 1; + acc_tb = 1'b0; + @(posedge clk_tb); + enable_tb = 1'b0; + $display("Waiting for pwo_done\n"); + while(ntt_done_tb == 1'b0) + @(posedge clk_tb); + $display("Received pwo_done\n"); + + $display("PWS operation 1\n"); + operation = "PWS 1"; + mode_tb = pws; + enable_tb = 1; + acc_tb = 1'b0; + @(posedge clk_tb); + enable_tb = 1'b0; + $display("Waiting for pwo_done\n"); + while(ntt_done_tb == 1'b0) + @(posedge clk_tb); + $display("Received pwo_done\n"); + + $display("PWS operation 2\n"); + operation = "PWS 2"; + mode_tb = pws; + enable_tb = 1; + acc_tb = 1'b0; + @(posedge clk_tb); + enable_tb = 1'b0; + $display("Waiting for pwo_done\n"); + while(ntt_done_tb == 1'b0) + @(posedge clk_tb); + $display("Received pwo_done\n"); + + $display("PWS operation 3\n"); + operation = "PWS 3"; + mode_tb = pws; + enable_tb = 1; + acc_tb = 1'b0; + @(posedge clk_tb); + enable_tb = 1'b0; + $display("Waiting for pwo_done\n"); + while(ntt_done_tb == 1'b0) + @(posedge clk_tb); + $display("Received pwo_done\n"); + svalid_tb = 1'b0; + @(posedge clk_tb); - // $display("PWM + sampler operation 1\n"); - // operation = "PWM sampler"; - // mode_tb = pwm; - // enable_tb = 1; - // acc_tb = 1'b0; - // sampler_mode_tb = 1'b1; - // repeat(2) @(posedge clk_tb); - // svalid_tb <= 1'b1; - // @(posedge clk_tb); - // enable_tb = 1'b0; - // repeat(10) @(posedge clk_tb); - // svalid_tb <= 1'b0; - // repeat(10) @(posedge clk_tb); - // svalid_tb <= 1'b1; - // repeat(10) @(posedge clk_tb); - // svalid_tb <= 1'b0; - // repeat(10) @(posedge clk_tb); - // svalid_tb <= 1'b1; - // repeat(45) @(posedge clk_tb); - // svalid_tb <= 1'b0; - // $display("Waiting for pwo_done\n"); - // while(ntt_done_tb == 1'b0) - // @(posedge clk_tb); - // $display("Received pwo_done\n"); + $display("PWM + sampler operation 1\n"); + operation = "PWM sampler"; + mode_tb = pwm; + enable_tb = 1; + acc_tb = 1'b0; + sampler_mode_tb = 1'b1; + repeat(2) @(posedge clk_tb); + svalid_tb <= 1'b1; + @(posedge clk_tb); + enable_tb = 1'b0; + repeat(10) @(posedge clk_tb); + svalid_tb <= 1'b0; + repeat(10) @(posedge clk_tb); + svalid_tb <= 1'b1; + repeat(10) @(posedge clk_tb); + svalid_tb <= 1'b0; + repeat(10) @(posedge clk_tb); + svalid_tb <= 1'b1; + repeat(45) @(posedge clk_tb); + svalid_tb <= 1'b0; + $display("Waiting for pwo_done\n"); + while(ntt_done_tb == 1'b0) + @(posedge clk_tb); + $display("Received pwo_done\n"); $display("PWM+INTT operation\n"); operation = "PWM INTT"; From cd1dbd0aa60869e62275d522a71c579097f791e3 Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Tue, 3 Dec 2024 06:40:43 +0000 Subject: [PATCH 06/10] MICROSOFT AUTOMATED PIPELINE: Stamp 'user/dev/kupadhyayula/masking_fixes' with updated timestamp and hash after successful run --- .github/workflow_metadata/pr_hash | 2 +- .github/workflow_metadata/pr_timestamp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflow_metadata/pr_hash b/.github/workflow_metadata/pr_hash index 7d53355..5939cb4 100644 --- a/.github/workflow_metadata/pr_hash +++ b/.github/workflow_metadata/pr_hash @@ -1 +1 @@ -ba20835b337f8b0f7b0b5b05412ea3a86284471103155009a5a86ef41d4be1f8eda2f1087224a0adafb950fac5d09b68 \ No newline at end of file +6d4fc694427afebffe85da83395252eeaa368a0ff60b37229ae7a5cbbbac366efe94b545836e71808571a5a49d9c5a40 \ No newline at end of file diff --git a/.github/workflow_metadata/pr_timestamp b/.github/workflow_metadata/pr_timestamp index 89ba20b..72469cc 100644 --- a/.github/workflow_metadata/pr_timestamp +++ b/.github/workflow_metadata/pr_timestamp @@ -1 +1 @@ -1732586578 \ No newline at end of file +1733208041 \ No newline at end of file From 6e36eac67ef5428371e04eeb78b3620e174d22bf Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Tue, 3 Dec 2024 15:52:08 -0800 Subject: [PATCH 07/10] PR suggestions --- src/ntt_top/rtl/ntt_ctrl.sv | 7 +++--- src/ntt_top/tb/ntt_top_masking_tb.sv | 32 ++++++++++++++-------------- 2 files changed, 20 insertions(+), 19 deletions(-) diff --git a/src/ntt_top/rtl/ntt_ctrl.sv b/src/ntt_top/rtl/ntt_ctrl.sv index 9c199fc..aecf6e1 100644 --- a/src/ntt_top/rtl/ntt_ctrl.sv +++ b/src/ntt_top/rtl/ntt_ctrl.sv @@ -21,6 +21,7 @@ // 2. Controls wr/rd addr of NTT mem // 3. Controls rd addr of twiddle ROM // 4. Performs shuffling of wr/rd addr +// Note: Latency changes in BFU must be reflected in the latency params here and in bf2x2 for correct pipeline operation //====================================================================== module ntt_ctrl @@ -90,7 +91,7 @@ localparam INTT_WRITE_ADDR_STEP = 16; localparam PWO_READ_ADDR_STEP = 1; localparam PWO_WRITE_ADDR_STEP = 1; localparam PWM_LATENCY = 5; -localparam MASKED_BF_STAGE1_LATENCY = 260; //TODO check +localparam MASKED_BF_STAGE1_LATENCY = 264; //TODO check localparam [MEM_ADDR_WIDTH-1:0] MEM_LAST_ADDR = 63; localparam INTT_WRBUF_LATENCY = 13; //includes BF latency + mem latency for shuffled reads to begin @@ -578,7 +579,7 @@ always_ff @(posedge clk or negedge reset_n) begin buf_rdptr_reg <= {buf_rdptr_int, buf_rdptr_reg[BF_LATENCY:1]}; end else if ((gs_mode & (incr_mem_rd_addr | butterfly_ready))) begin - buf_wrptr_reg <= {{468{2'h0}}, mem_rd_index_ofst, buf_wrptr_reg[INTT_WRBUF_LATENCY-1:1]}; + buf_wrptr_reg <= {{(MASKED_INTT_WRBUF_LATENCY-INTT_WRBUF_LATENCY){2'h0}}, mem_rd_index_ofst, buf_wrptr_reg[INTT_WRBUF_LATENCY-1:1]}; end else if (pwo_mode & (incr_pw_rd_addr | butterfly_ready)) begin buf_rdptr_reg <= {mem_rd_index_ofst, buf_rdptr_reg[BF_LATENCY:1]}; //TODO: create new reg with apt name for PWO @@ -628,7 +629,7 @@ always_ff @(posedge clk or negedge reset_n) begin chunk_count_reg <= {chunk_count, chunk_count_reg[MASKED_BF_STAGE1_LATENCY:1]}; end else if (buf_rden_ntt | butterfly_ready | (gs_mode & incr_mem_rd_addr) | (pwo_mode & incr_pw_rd_addr)) begin //TODO: replace gs condition with an fsm generated flag perhaps? - chunk_count_reg <= {{251{4'h0}}, chunk_count, chunk_count_reg[BF_LATENCY:1]}; + chunk_count_reg <= {{(MASKED_BF_STAGE1_LATENCY+1-BF_LATENCY){4'h0}}, chunk_count, chunk_count_reg[BF_LATENCY:1]}; end end diff --git a/src/ntt_top/tb/ntt_top_masking_tb.sv b/src/ntt_top/tb/ntt_top_masking_tb.sv index 7df1d65..cad3e0d 100644 --- a/src/ntt_top/tb/ntt_top_masking_tb.sv +++ b/src/ntt_top/tb/ntt_top_masking_tb.sv @@ -304,8 +304,8 @@ task masked_BFU_adder_test(); actual_v = $random()%PRIME; u_array = actual_u; v_array = actual_v; - rand0 = $random(); - rand1 = $random(); + rand0 = $random()%PRIME; + rand1 = $random()%PRIME; u[0] = actual_u-rand0; u[1] = rand0; @@ -343,8 +343,8 @@ task masked_BFU_mult_test(); actual_v = $random()%PRIME; u_array = actual_u; v_array = actual_v; - rand0 = $random(); - rand1 = $random(); + rand0 = $random()%PRIME; + rand1 = $random()%PRIME; // $display("actual u = %h, actual v = %h", actual_u, actual_v); @@ -382,9 +382,9 @@ endtask // actual_u_normalized = actual_u; // // u_array = actual_u; // // v_array = actual_v; -// rand0 = $random(); -// rand1 = $random(); -// rand2 = $random(); +// rand0 = $random()%PRIME; +// rand1 = $random()%PRIME; +// rand2 = $random()%PRIME; // // $display("actual u = %h, actual v = %h", actual_u, actual_v); @@ -422,9 +422,9 @@ task masked_pwm_test(); // u_array = actual_u; // v_array = actual_v; - rand0 = $random(); - rand1 = $random(); - rand2 = $random(); + rand0 = $random()%PRIME; + rand1 = $random()%PRIME; + rand2 = $random()%PRIME; // $display("actual u = %h, actual v = %h", actual_u, actual_v); @@ -459,9 +459,9 @@ endtask // // u_array = actual_u; // // v_array = actual_v; -// rand0 = $random(); -// rand1 = $random(); -// rand2 = $random(); +// rand0 = $random()%PRIME; +// rand1 = $random()%PRIME; +// rand2 = $random()%PRIME; // // $display("actual u = %h, actual v = %h", actual_u, actual_v); @@ -505,9 +505,9 @@ task masked_hybrid_bf_2x2_test(); // u_array = actual_u; // v_array = actual_v; - rand0 = $random(); - rand1 = $random(); - rand2 = $random(); + rand0 = $random()%PRIME; + rand1 = $random()%PRIME; + rand2 = $random()%PRIME; // $display("actual u = %h, actual v = %h", actual_u, actual_v); From ee836d0d19330b7c435db017bfe405a6176a19e4 Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Tue, 3 Dec 2024 22:54:08 -0800 Subject: [PATCH 08/10] Rename param for clarity --- src/ntt_top/rtl/ntt_ctrl.sv | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/src/ntt_top/rtl/ntt_ctrl.sv b/src/ntt_top/rtl/ntt_ctrl.sv index aecf6e1..447125b 100644 --- a/src/ntt_top/rtl/ntt_ctrl.sv +++ b/src/ntt_top/rtl/ntt_ctrl.sv @@ -92,10 +92,11 @@ localparam PWO_READ_ADDR_STEP = 1; localparam PWO_WRITE_ADDR_STEP = 1; localparam PWM_LATENCY = 5; localparam MASKED_BF_STAGE1_LATENCY = 264; //TODO check +localparam MASKED_PWM_LATENCY = 209; //For 1 masked pwm operation localparam [MEM_ADDR_WIDTH-1:0] MEM_LAST_ADDR = 63; localparam INTT_WRBUF_LATENCY = 13; //includes BF latency + mem latency for shuffled reads to begin -localparam MASKED_INTT_WRBUF_LATENCY = 481; //masked PWM+INTT latency + mem latency for shuffled reads to begin +localparam MASKED_PWM_INTT_WRBUF_LATENCY = 481; //masked PWM+INTT latency + mem latency for shuffled reads to begin //FSM states ntt_read_state_t read_fsm_state_ps, read_fsm_state_ns; ntt_write_state_t write_fsm_state_ps, write_fsm_state_ns; @@ -116,8 +117,9 @@ logic [1:0] buf_rdptr_int; logic [1:0] buf_rdptr_f; logic [BF_LATENCY:0][1:0] buf_rdptr_reg; //logic [INTT_WRBUF_LATENCY-1:0][1:0] buf_wrptr_reg; -logic [MASKED_INTT_WRBUF_LATENCY-1:0][1:0] buf_wrptr_reg; +logic [MASKED_PWM_INTT_WRBUF_LATENCY-1:0][1:0] buf_wrptr_reg; logic [MASKED_BF_STAGE1_LATENCY:0][3:0] chunk_count_reg; +// logic [MASKED_PWM_INTT_WRBUF_LATENCY:0] chunk_count_reg; logic latch_chunk_rand_offset, latch_index_rand_offset; logic last_rd_addr, last_wr_addr; logic mem_wr_en_fsm, mem_wr_en_reg; @@ -417,17 +419,17 @@ always_comb begin 'h0: begin twiddle_end_addr = ct_mode ? 'd0 : 'd63; twiddle_offset = 'h0; - twiddle_rand_offset = ct_mode ? 'h0 : pwm_intt_mode ? 7'((4*chunk_count_reg[MASKED_BF_STAGE1_LATENCY]) + buf_wrptr_reg[MASKED_INTT_WRBUF_LATENCY-1]) : 7'((4*chunk_count_reg[BF_LATENCY]) + buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); + twiddle_rand_offset = ct_mode ? 'h0 : pwm_intt_mode ? 7'((4*chunk_count_reg[MASKED_BF_STAGE1_LATENCY]) + buf_wrptr_reg[MASKED_PWM_INTT_WRBUF_LATENCY-1]) : 7'((4*chunk_count_reg[BF_LATENCY]) + buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); end 'h1: begin twiddle_end_addr = ct_mode ? 'd3 : 'd15; twiddle_offset = ct_mode ? 'd1 : 'd64; - twiddle_rand_offset = ct_mode ? 7'(buf_rdptr_int) : pwm_intt_mode ? 7'((chunk_count_reg[MASKED_BF_STAGE1_LATENCY] % 4)*4 + buf_wrptr_reg[MASKED_INTT_WRBUF_LATENCY-1]) : 7'((chunk_count_reg[BF_LATENCY] % 4)*4 + buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); + twiddle_rand_offset = ct_mode ? 7'(buf_rdptr_int) : pwm_intt_mode ? 7'((chunk_count_reg[MASKED_BF_STAGE1_LATENCY] % 4)*4 + buf_wrptr_reg[MASKED_PWM_INTT_WRBUF_LATENCY-1]) : 7'((chunk_count_reg[BF_LATENCY] % 4)*4 + buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); end 'h2: begin twiddle_end_addr = ct_mode ? 'd15 : 'd3; twiddle_offset = ct_mode ? 'd5 : 'd80; - twiddle_rand_offset = ct_mode ? 7'((chunk_count % 'd4)*'d4 + buf_rdptr_int) : pwm_intt_mode ? 7'(buf_wrptr_reg[MASKED_INTT_WRBUF_LATENCY-1]) : 7'(buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); + twiddle_rand_offset = ct_mode ? 7'((chunk_count % 'd4)*'d4 + buf_rdptr_int) : pwm_intt_mode ? 7'(buf_wrptr_reg[MASKED_PWM_INTT_WRBUF_LATENCY-1]) : 7'(buf_wrptr_reg[INTT_WRBUF_LATENCY-1]); end 'h3: begin twiddle_end_addr = ct_mode ? 'd63 : 'd0; @@ -579,13 +581,13 @@ always_ff @(posedge clk or negedge reset_n) begin buf_rdptr_reg <= {buf_rdptr_int, buf_rdptr_reg[BF_LATENCY:1]}; end else if ((gs_mode & (incr_mem_rd_addr | butterfly_ready))) begin - buf_wrptr_reg <= {{(MASKED_INTT_WRBUF_LATENCY-INTT_WRBUF_LATENCY){2'h0}}, mem_rd_index_ofst, buf_wrptr_reg[INTT_WRBUF_LATENCY-1:1]}; + buf_wrptr_reg <= {{(MASKED_PWM_INTT_WRBUF_LATENCY-INTT_WRBUF_LATENCY){2'h0}}, mem_rd_index_ofst, buf_wrptr_reg[INTT_WRBUF_LATENCY-1:1]}; end else if (pwo_mode & (incr_pw_rd_addr | butterfly_ready)) begin buf_rdptr_reg <= {mem_rd_index_ofst, buf_rdptr_reg[BF_LATENCY:1]}; //TODO: create new reg with apt name for PWO end else if ((pwm_intt_mode)) begin - buf_wrptr_reg <= {mem_rd_index_ofst, buf_wrptr_reg[MASKED_INTT_WRBUF_LATENCY-1:1]}; + buf_wrptr_reg <= {mem_rd_index_ofst, buf_wrptr_reg[MASKED_PWM_INTT_WRBUF_LATENCY-1:1]}; end else begin buf_rdptr_reg <= 'h0; From c27dcc1d7c4f5344eded1dbbed37c0e8c6a3e139 Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Wed, 4 Dec 2024 07:37:14 +0000 Subject: [PATCH 09/10] MICROSOFT AUTOMATED PIPELINE: Stamp 'user/dev/kupadhyayula/masking_fixes' with updated timestamp and hash after successful run --- .github/workflow_metadata/pr_hash | 2 +- .github/workflow_metadata/pr_timestamp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflow_metadata/pr_hash b/.github/workflow_metadata/pr_hash index 5939cb4..5cad677 100644 --- a/.github/workflow_metadata/pr_hash +++ b/.github/workflow_metadata/pr_hash @@ -1 +1 @@ -6d4fc694427afebffe85da83395252eeaa368a0ff60b37229ae7a5cbbbac366efe94b545836e71808571a5a49d9c5a40 \ No newline at end of file +16bea5c5cf4185fe947b6acc974e630b16bad5a85320ad5a573a0373984205a280a677da47bfea9468305e1a1fbe5b76 \ No newline at end of file diff --git a/.github/workflow_metadata/pr_timestamp b/.github/workflow_metadata/pr_timestamp index 72469cc..1efd179 100644 --- a/.github/workflow_metadata/pr_timestamp +++ b/.github/workflow_metadata/pr_timestamp @@ -1 +1 @@ -1733208041 \ No newline at end of file +1733297831 \ No newline at end of file From 1d4abc85d1816f96340cccab87898fa4fd50499f Mon Sep 17 00:00:00 2001 From: Kiran Upadhyayula Date: Wed, 4 Dec 2024 19:19:07 +0000 Subject: [PATCH 10/10] MICROSOFT AUTOMATED PIPELINE: Stamp 'user/dev/kupadhyayula/masking_fixes' with updated timestamp and hash after successful run --- .github/workflow_metadata/pr_hash | 2 +- .github/workflow_metadata/pr_timestamp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflow_metadata/pr_hash b/.github/workflow_metadata/pr_hash index ec9b919..930c3c4 100644 --- a/.github/workflow_metadata/pr_hash +++ b/.github/workflow_metadata/pr_hash @@ -1 +1 @@ -bfab9f4c3a310bcbb8961b11e0776fa1072ee2a0020b992aea2202c0064cb25d472c215728ec88edb81798012624560a +148a6d381422de56ae26bc8c4288130b67b86f624ee2adb675b36c18e09bc5319f1cc53b9c3268c98892d594e9a28b44 \ No newline at end of file diff --git a/.github/workflow_metadata/pr_timestamp b/.github/workflow_metadata/pr_timestamp index 963848c..69e59bc 100644 --- a/.github/workflow_metadata/pr_timestamp +++ b/.github/workflow_metadata/pr_timestamp @@ -1 +1 @@ -1733249926 +1733339945 \ No newline at end of file