1
0
mirror of synced 2026-03-09 11:56:16 +00:00

option to disable upper lane in AES/GCM isntructions; disable them in the code

This commit is contained in:
Romain Dolbeau
2021-09-05 09:56:14 -04:00
parent 13f7dc48d2
commit e710b6b2ff
3 changed files with 76 additions and 67 deletions

View File

@@ -173,9 +173,9 @@ static const uint32_t program_aes[16] = {0x0001f003,0x0005e012,0x0001d052,0x0005
static const uint32_t program_gcm_pfx[30] = {0x01400411,0x00080840,0x00040800,0x0001f043,0x0005e012,0x0001d052,0x0005c012,0x0001b052,0x0005a012,0x00019052,0x00058012,0x00017052,0x00056012,0x00015052,0x00054012,0x00013052,0x00052012,0x00811052,0x03800089,0x003c0000,0x01400411,0x0042b405,0x01400411,0x00080800,0x00040400,0xf4800809,0x00380000,0x01bc03d1,0x003cf3d1,0x00340800 };
static const uint32_t program_gcm_ad[29] = {0x0d800309,0x000000d3,0x01800011,0x00000011,0x0000d003,0x000ec0c5,0x0032d306,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x00800309,0xf2800809,0x0000000a };
static const uint32_t program_gcm_ad[29] = {0x0d800309,0x000000d3,0x01800011,0x00000011,0x0000d003,0x000f00c5,0x00321306,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x00800309,0xf2800809,0x0000000a };
static const uint32_t program_gcm_aes[50] = {0x18000309,0x01400411,0x0042b405,0x01400411,0x0001f403,0x0005e012,0x0001d052,0x0005c012,0x0001b052,0x0005a012,0x00019052,0x00058012,0x00017052,0x00056012,0x00015052,0x00054012,0x00013052,0x00052012,0x00851052,0x000000d3,0x00001003,0x00ac02d3,0x01800011,0x00000011,0x0000d003,0x000ec0c5,0x002ec2c5,0x0032d306,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x00800309,0xe8000809,0x0000000a };
static const uint32_t program_gcm_aes[50] = {0x18000309,0x01400411,0x0042b405,0x01400411,0x0001f403,0x0005e012,0x0001d052,0x0005c012,0x0001b052,0x0005a012,0x00019052,0x00058012,0x00017052,0x00056012,0x00015052,0x00054012,0x00013052,0x00052012,0x00851052,0x000000d3,0x00001003,0x00ac02d3,0x01800011,0x00000011,0x0000d003,0x000f00c5,0x002f02c5,0x00321306,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x00800309,0xe8000809,0x0000000a };
static const uint32_t program_gcm_finish[71] = {0x16000309,0x01400411,0x0042b405,0x01400411,0x0001f403,0x0005e012,0x0001d052,0x0005c012,0x0001b052,0x0005a012,0x00019052,0x00058012,0x00017052,0x00056012,0x00015052,0x00054012,0x00013052,0x00052012,0x00851052,0x0004a054,0x000000d3,0x00001003,0x00ac02d3,0x01800011,0x00000011,0x0000d003,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x01a40251,0x00249251,0x0000d243,0x0010f00d,0x0094f00d,0x0118f00d,0x019cf00d,0x00186143,0x00160191,0x00186811,0x001c61c3,0x00105103,0x008441ce,0x0082010e,0x00080010,0x009a008f,0x0112008f,0x0396008f,0x00086083,0x00105103,0x00084083,0x00341083,0x01b40351,0x0034d351,0x0020e343,0x0000000a };
@@ -532,25 +532,20 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
curve25519engine_window_write(sc, unit); /* to each session its own register file */
/* read_addr */
for (i = 0 ; i < 8 ; i ++) {
/* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0);
for (i = 0 ; i < 4 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), (i == 0) ? ((uint32_t)rd_ptr) : 0);
}
/* write_addr */
/* for (i = 0 ; i < 8 ; i ++) { */
/* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(4,i), ((i & 3) == 0) ? ((uint32_t)wr_ptr) : 0); */
/* } */
/* write_len */
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i&3) == 0) ? ((uint32_t)job->len) : 0);
for (i = 0 ; i < 8 ; i ++) { // all the way to 8 to make sure we have zero in every bit checked by BRZ
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), (i == 0) ? ((uint32_t)job->len) : 0);
}
/* data */
for (i = 0 ; i < 8 ; i ++) {
for (i = 0 ; i < 4 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(16,i), job->data[i]);
}
for (reg = 31 ; reg > 16 ; reg--) {
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i), job->keys[(i&3)+4*(31-reg)]);
for (i = 0 ; i < 4 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i), job->keys[i+4*(31-reg)]);
}
}
@@ -597,13 +592,12 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
curve25519engine_window_write(sc, unit); /* to each session its own register file */
/* read_addr */
for (i = 0 ; i < 8 ; i ++) {
/* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0);
for (i = 0 ; i < 4 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), (i == 0) ? ((uint32_t)rd_ptr) : 0);
}
/* write_len */
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i & 3) == 0) ? ((uint32_t)job->len) : 0);
for (i = 0 ; i < 8 ; i ++) { // all the way to 8 to make sure we have zero in every bit checked by BRZ
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), (i == 0) ? ((uint32_t)job->len) : 0);
}
err = start_job(sc);
@@ -650,17 +644,16 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
curve25519engine_window_write(sc, unit); /* to each session its own register file */
/* read_addr */
for (i = 0 ; i < 8 ; i ++) {
/* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0);
for (i = 0 ; i < 4 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), (i == 0) ? ((uint32_t)rd_ptr) : 0);
}
/* write_addr */
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(11,i), ((i & 3) == 0) ? ((uint32_t)wr_ptr) : 0);
for (i = 0 ; i < 4 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(11,i), (i == 0) ? ((uint32_t)wr_ptr) : 0);
}
/* write_len */
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i & 3) == 0) ? ((uint32_t)job->len) : 0);
for (i = 0 ; i < 8 ; i ++) { // all the way to 8 to make sure we have zero in every bit checked by BRZ
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), (i == 0) ? ((uint32_t)job->len) : 0);
}
err = start_job(sc);
@@ -707,22 +700,20 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
curve25519engine_window_write(sc, unit); /* to each session its own register file */
/* read_addr */
for (i = 0 ; i < 8 ; i ++) {
/* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0);
for (i = 0 ; i < 4 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), (i == 0) ? ((uint32_t)rd_ptr) : 0);
}
/* write_addr */
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(11,i), ((i & 3) == 0) ? ((uint32_t)wr_ptr) : 0);
for (i = 0 ; i < 4 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(11,i), (i == 0) ? ((uint32_t)wr_ptr) : 0);
}
/* write_len */
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i & 3) == 0) ? ((uint32_t)job->len) : 0);
for (i = 0 ; i < 8 ; i ++) { // all the way to 8 to make sure we have zero in every bit checked by BRZ
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), (i == 0) ? ((uint32_t)job->len) : 0);
}
/* final block */
for (i = 0 ; i < 4 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(9,i), job->data[i]);
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(9,i+4), job->data[i]);
}
/* create and generate MMASK */
for (i = 0 ; i < 4 ; i ++) {
@@ -736,7 +727,6 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
mask = 0xFFFFFFFF >> (8*(4-(job->len%4)));
}
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(10,i), mask);
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(10,(i+4)), mask);
}
@@ -749,7 +739,7 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
return err;
/* final accum */
for (i = 0 ; i < 8 ; i ++) {
for (i = 0 ; i < 4 ; i ++) {
job->data[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(8,i));
}

View File

@@ -25,14 +25,17 @@ opcodes = { # mnemonic : [bit coding, docstring]
"FIN" : [10, "halt execution and assert interrupt to host CPU that microcode execution is done"],
"SHL" : [11, "Wd $\gets$ Ra << 1 // shift Ra left by one and store in Wd"],
"XBT" : [12, "Wd[0] $\gets$ Ra[254] // extract the 255th bit of Ra and put it into the 0th bit of Wd"],
"AND" : [20, "Wd $\gets$ Ra & Rb // bitwise AND"],
# for CLMUL, bit #31 indicates both lanes are needed; currently same speed
"CLMUL": [13, "carry-less multiplication; reg-reg only; per 128-bits block"], # basically 256-bits form of vpclmulqdq
"GCM_SHLMI": [14, "Shift A left by imm, insert B MSB as dest LSB; reg-reg or reg-imm; per 128-bits block"], # make SHL redundant: SHL %rd, %ra == GCM_SHLMI %rd, %ra, #0, #1
"GCM_SHRMI": [15, "Shift A right by imm, insert B LSB as dest MSB; reg-reg or reg-imm; per 128-bits block"], #
"GCM_CMPD": [16, "Compute D:X0 from X1:X0; reg ; per 128-bits block"], # specific
"GCM_SWAP64": [17, "Swap doubleword (64 bits) ; reg-reg or imm-reg or reg-imm; per 128-bits block ; imm != 0 -> BYTEREV*"], #
# for AESESMI, bit #31 indicates both lanes are needed; currently same speed
"AESESMI" : [18, "AES ; reg-reg ; per 128-bits block; imm[0] is 1 for aesesi (shared opcode)" ],
# for MEM, bit #31 indicates both lanes are needed; b[31] == 0 faster as the second access is not done
"MEM" : [19, "MEM ; imm[0] == 0 for LOAD, imm[0] == 1 for STORE (beware, store copy the address in the output reg)" ],
"AND" : [20, "Wd $\gets$ Ra & Rb // bitwise AND"],
"MAX" : [21, "Maximum opcode number (for bounds checking)"],
}
@@ -239,7 +242,10 @@ class Curve25519Const(Module, AutoDoc):
10: [254, "two hundred fifty four", "The number 254 (iteration count)"],
11: [0x00000001_00000000_00000000_00000000_00000001_00000000_00000000_00000000, "increment for GCM counter (LE)", "increment for GCM counter (LE)"],
12: [0x00000000_00000000_00000000_00000010_00000000_00000000_00000000_00000010, "sixteen (twice)", "The number 16 (for block-size address increment)"],
13: [0x00000000_00000000_00000000_00000001_00000000_00000000_00000000_00000001, "decrement for GCM dual-loops (LE)", "decrement for GCM dual-loops"]
13: [0x00000000_00000000_00000000_00000001_00000000_00000000_00000000_00000001, "decrement for GCM dual-loops (LE)", "decrement for GCM dual-loops"],
# 14
# 15
16: [16, "sixteen", "The number 16"],
}
self.adr = Signal(5)
self.const = Signal(256)
@@ -1462,7 +1468,6 @@ class ExecClmul(ExecUnit, AutoDoc):
#self.q_valid.eq(self.start),
self.instruction_out.eq(self.instruction_in),
]
self.submodules.seq = seq = ClockDomainsRenamer("eng_clk")(FSM(reset_state="IDLE"))
seq.act("IDLE",
@@ -1494,9 +1499,12 @@ class ExecClmul(ExecUnit, AutoDoc):
}))
seq.act("OUT",
self.q_valid.eq(1),
self.q.eq(clmul_buf),
NextState("IDLE"),
);
If(self.instruction.immediate[8:9],
self.q.eq(clmul_buf),
).Else(
self.q.eq(Cat(clmul_buf[0:128], Signal(128, reset = 0)))
),
NextState("IDLE"));
class ExecGCMShifts(ExecUnit, AutoDoc):
@@ -1753,7 +1761,11 @@ class ExecAES(ExecUnit, AutoDoc):
self.sync.mul_clk += [
If(seq.ongoing("AES_EVEN1") | seq.ongoing("AES_EVEN2"),
self.q_valid.eq(1),
self.q.eq(aes_buf),
If(self.instruction.immediate[8:9],
self.q.eq(aes_buf),
).Else(
self.q.eq(Cat(aes_buf[0:128], Signal(128, reset = 0))),
)
).Else(
self.q_valid.eq(0),
)
@@ -1824,15 +1836,24 @@ class ExecLS(ExecUnit, AutoDoc):
lsseq.act("MEMl2",
NextValue(cpar, cpar ^ 1),
If(~interface.ack,
NextValue(interface.cyc, 1),
NextValue(interface.stb, 1),
NextValue(interface.sel, 2**len(interface.sel)-1),
NextValue(interface.adr, self.a[132:160]),
NextValue(interface.we, self.instruction.immediate[0]),
NextValue(timeout, 2047),
If(self.instruction.immediate[0],
NextValue(interface.dat_w, self.b[128:256])),
NextState("MEMh")
If(self.instruction.immediate[8:9],
NextValue(interface.cyc, 1),
NextValue(interface.stb, 1),
NextValue(interface.sel, 2**len(interface.sel)-1),
NextValue(interface.adr, self.a[132:160]),
NextValue(interface.we, self.instruction.immediate[0]),
NextValue(timeout, 2047),
If(self.instruction.immediate[0],
NextValue(interface.dat_w, self.b[128:256])),
NextState("MEMh")
).Else(
NextValue(lbuf[128:256], 0),
If(cpar, ## checkme
NextState("MEM_ODD")
).Else(
NextState("MEM_EVEN1")
)
)
))
lsseq.act("MEMh",
NextValue(cpar, cpar ^ 1),

View File

@@ -748,9 +748,8 @@ fn main() -> std::io::Result<()> {
gcm_swap64 %0, %0, %0
xor %0, %0, %13
add %3, %3, #12 // #12 is 16 in both 128 bits halves
// #13 is 1 in both 128 bits halves
sub %12, %12, #13
add %3, %3, #16
sub %12, %12, #1
// // poly mult accum = ((accum^ad) * H)
// C
@@ -865,10 +864,10 @@ fn main() -> std::io::Result<()> {
gcm_swap64 %0, %0, %0
xor %0, %0, %13
add %3, %3, #12 // #12 is 16 in both 128 bits halves
add %11, %11, #12 // #12 is 16 in both 128 bits halves
// #13 is 1 in both 128 bits halves
sub %12, %12, #13
add %3, %3, #16
add %11, %11, #16
sub %12, %12, #1
// // poly mult accum = ((accum^ad) * H)
// C
@@ -986,10 +985,10 @@ fn main() -> std::io::Result<()> {
gcm_swap64 %0, %0, %0
xor %0, %0, %13
//add %3, %3, #12 // #12 is 16 in both 128 bits halves
//add %11, %11, #12 // #12 is 16 in both 128 bits halves
// #13 is 1 in both 128 bits halves
//sub %12, %12, #13
//add %3, %3, #16
//add %11, %11, #16
//sub %12, %12, #1
// // poly mult accum = ((accum^ad) * H)
// C
@@ -1039,10 +1038,9 @@ fn main() -> std::io::Result<()> {
gcm_brev64 %9, %9
gcm_swap64 %9, %9, %9
xor %0, %9, %13
//add %3, %3, #12 // #12 is 16 in both 128 bits halves
//add %11, %11, #12 // #12 is 16 in both 128 bits halves
// #13 is 1 in both 128 bits halves
//sub %12, %12, #13
//add %3, %3, #16
//add %11, %11, #16
//sub %12, %12, #1
// // poly mult accum = ((accum^ad) * H)
// C