1
0
mirror of synced 2026-03-09 20:01:42 +00:00

preliminary work on AES256-GCM in the Engine

This commit is contained in:
Romain Dolbeau
2021-09-04 05:52:11 -04:00
parent 55298ec5b7
commit e57cf9d9a8
5 changed files with 1259 additions and 215 deletions

View File

@@ -43,6 +43,9 @@ __KERNEL_RCSID(0, "$NetBSD$");
#include <sys/ioccom.h>
#include <sys/mman.h>
#include <sys/param.h>
#include <uvm/uvm_extern.h>
#include <sys/kmem.h>
#include <dev/sbus/sbusvar.h>
@@ -97,7 +100,7 @@ struct sbusfpga_curve25519engine_aesjob {
static int init_programs(struct sbusfpga_curve25519engine_softc *sc);
static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusfpga_curve25519engine_montgomeryjob *job, const int window);
static int start_job(struct sbusfpga_curve25519engine_softc *sc);
static int wait_job(struct sbusfpga_curve25519engine_softc *sc);
static int wait_job(struct sbusfpga_curve25519engine_softc *sc, uint32_t param);
static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusfpga_curve25519engine_montgomeryjob *job, const int window);
static int dma_init(struct sbusfpga_curve25519engine_softc *sc);
@@ -107,7 +110,17 @@ static int power_off(struct sbusfpga_curve25519engine_softc *sc);
int
sbusfpga_curve25519engine_open(dev_t dev, int flags, int mode, struct lwp *l)
{
struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, minor(dev));
int unit = minor(dev) & (MAX_SESSION - 1);
int driver = unit & ~(MAX_SESSION - 1);
struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, driver);
if (sc == NULL)
return ENODEV;
if ((unit != 0) && ((sc->active_sessions & (1 << unit)) == 0)) {
return ENODEV;
}
/* first we need to turn the engine power on ... */
power_on(sc);
@@ -117,7 +130,18 @@ sbusfpga_curve25519engine_open(dev_t dev, int flags, int mode, struct lwp *l)
int
sbusfpga_curve25519engine_close(dev_t dev, int flags, int mode, struct lwp *l)
{
struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, minor(dev));
int unit = minor(dev) & (MAX_SESSION - 1);
int driver = unit & ~(MAX_SESSION - 1);
struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, driver);
if (sc == NULL)
return ENODEV;
if ((unit != 0) && (sc->active_sessions & (1 << unit))) {
device_printf(sc->sc_dev, "warning: close() on active session\n");
sc->active_sessions &= ~(1 << unit);
sc->mapped_sessions &= ~(1 << unit);
}
if (sc->active_sessions == 0)
power_off(sc);
@@ -147,11 +171,18 @@ static const uint32_t program_gcm[20] = {0x0010100d, 0x0094100d, 0x0118100d, 0x0
static const uint32_t program_aes[58] = {0x0001f003, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03800052, 0x0000000a };
static const uint32_t program_gcm_ad[70] = {0x00400800, 0x00080840, 0x0001f403, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03800052, 0x03000089, 0x003c0000, 0x01400411, 0x0042b405, 0x01400411, 0x00080800, 0xe0000809, 0x00380000, 0x01bc03d1, 0x003cf3d1, 0x0000000a };
static const uint32_t program_gcm_pfx[72] = {0x01400411, 0x00080840, 0x00040800, 0x0001f043, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03800052, 0x03800089, 0x003c0000, 0x01400411, 0x0042b405, 0x01400411, 0x00080800, 0x00040400, 0xdf800809, 0x00380000, 0x01bc03d1, 0x003cf3d1, 0x00340800 };
static const uint32_t* programs[5] = { program_ec25519, program_gcm, program_aes, program_gcm_ad, NULL };
static const uint32_t program_len[5] = { 134, 20, 58, 70, 0 };
static uint32_t program_offset[4];
static const uint32_t program_gcm_ad[29] = {0x0d800309, 0x000000d3, 0x01800011, 0x00000011, 0x0000d003, 0x000ec0c5, 0x0032d306, 0x0010f00d, 0x0094f00d, 0x0118f00d, 0x019cf00d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x009a008f, 0x0112008f, 0x0396008f, 0x00086083, 0x00105103, 0x00084083, 0x00341083, 0x00800309, 0xf2800809, 0x0000000a };
static const uint32_t program_gcm_aes[92] = {0x2d000309, 0x01400411, 0x0042b405, 0x01400411, 0x0001f403, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03840052, 0x000000d3, 0x00001003, 0x00ac02d3, 0x01800011, 0x00000011, 0x0000d003, 0x000ec0c5, 0x002ec2c5, 0x0032d306, 0x0010f00d, 0x0094f00d, 0x0118f00d, 0x019cf00d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x009a008f, 0x0112008f, 0x0396008f, 0x00086083, 0x00105103, 0x00084083, 0x00341083, 0x00800309, 0xd3000809, 0x0000000a };
static const uint32_t program_gcm_finish[113] = {0x2b000309, 0x01400411, 0x0042b405, 0x01400411, 0x0001f403, 0x0005e012, 0x00841012, 0x01041012, 0x01841012, 0x0001d052, 0x00800052, 0x01000052, 0x01800052, 0x0005c012, 0x00841012, 0x01041012, 0x01841012, 0x0001b052, 0x00800052, 0x01000052, 0x01800052, 0x0005a012, 0x00841012, 0x01041012, 0x01841012, 0x00019052, 0x00800052, 0x01000052, 0x01800052, 0x00058012, 0x00841012, 0x01041012, 0x01841012, 0x00017052, 0x00800052, 0x01000052, 0x01800052, 0x00056012, 0x00841012, 0x01041012, 0x01841012, 0x00015052, 0x00800052, 0x01000052, 0x01800052, 0x00054012, 0x00841012, 0x01041012, 0x01841012, 0x00013052, 0x00800052, 0x01000052, 0x01800052, 0x00052012, 0x00841012, 0x01041012, 0x01841012, 0x02011052, 0x02800052, 0x03000052, 0x03840052, 0x0004a054, 0x000000d3, 0x00001003, 0x00ac02d3, 0x01800011, 0x00000011, 0x0000d003, 0x0010f00d, 0x0094f00d, 0x0118f00d, 0x019cf00d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x009a008f, 0x0112008f, 0x0396008f, 0x00086083, 0x00105103, 0x00084083, 0x00341083, 0x01a40251, 0x00249251, 0x0000d243, 0x0010f00d, 0x0094f00d, 0x0118f00d, 0x019cf00d, 0x00186143, 0x00160191, 0x00186811, 0x001c61c3, 0x00105103, 0x008441ce, 0x0082010e, 0x00080010, 0x009a008f, 0x0112008f, 0x0396008f, 0x00086083, 0x00105103, 0x00084083, 0x00341083, 0x01b40351, 0x0034d351, 0x0020e343, 0x0000000a };
// second and third are for testing and shall be removed
static const uint32_t* programs[8] = { program_ec25519, program_gcm, program_aes, program_gcm_pfx, program_gcm_ad, program_gcm_aes, program_gcm_finish, NULL };
static const uint32_t program_len[8] = { 134, 20, 58, 72, 29, 92, 113, 0 };
static uint32_t program_offset[8];
/*
* Attach all the sub-devices we can find
@@ -303,18 +334,55 @@ struct sbusfpga_curve25519engine_session {
uint32_t session;
uint32_t cookie;
};
struct sbusfpga_curve25519engine_session_len {
uint32_t session;
uint32_t cookie;
uint32_t len;
};
struct sbusfpga_curve25519engine_session_len_data {
uint32_t session;
uint32_t cookie;
uint32_t len;
uint32_t data[8];
uint32_t keys[60];
};
struct sbusfpga_curve25519engine_session_len_final {
uint32_t session;
uint32_t cookie;
uint32_t len;
uint32_t data[8];
};
#define CHECKSESSION(ses) \
do { \
if ((ses->session >= MAX_ACTIVE_SESSION) || (ses->session >= MAX_SESSION)) \
return EINVAL; \
if (sc->sessions_cookies[ses->session] == 0) \
return EINVAL; \
if (sc->sessions_cookies[ses->session] != ses->cookie) \
return EINVAL; \
if (ses->session != unit) \
return EINVAL; \
if ((sc->active_sessions & (1 << ses->session)) == 0) \
return EINVAL; \
} while (0)
#define SBUSFPGA_DO_MONTGOMERYJOB _IOWR(0, 0, struct sbusfpga_curve25519engine_montgomeryjob)
#define SBUSFPGA_EC25519_CHECKGCM _IOW(0, 1, struct sbusfpga_curve25519engine_montgomeryjob)
#define SBUSFPGA_EC25519_CHECKAES _IOW(0, 2, struct sbusfpga_curve25519engine_aesjob)
#define SBUSFPGA_EC25519_GCMAD _IOW(0, 3, struct sbusfpga_curve25519engine_aesjob)
#define SBUSFPGA_EC25519_OPENSESSION _IOR(1, 0, struct sbusfpga_curve25519engine_session)
#define SBUSFPGA_EC25519_CLOSESESSION _IOR(1, 1, struct sbusfpga_curve25519engine_session)
#define SBUSFPGA_EC25519_GETSESSION _IOR(1, 0, struct sbusfpga_curve25519engine_session)
#define SBUSFPGA_EC25519_OPENSESSION _IOW(1, 1, struct sbusfpga_curve25519engine_session)
#define SBUSFPGA_EC25519_CLOSESESSION _IOW(1, 2, struct sbusfpga_curve25519engine_session)
#define SBUSFPGA_EC25519_GCMPFX _IOW(1, 3, struct sbusfpga_curve25519engine_session_len_data)
#define SBUSFPGA_EC25519_GCMAD _IOW(1, 4, struct sbusfpga_curve25519engine_session_len)
#define SBUSFPGA_EC25519_GCMAES _IOW(1, 5, struct sbusfpga_curve25519engine_session_len)
#define SBUSFPGA_EC25519_GCMFINISH _IOWR(1, 6, struct sbusfpga_curve25519engine_session_len_final)
static int get_session(struct sbusfpga_curve25519engine_softc *sc) {
int i;
/* don't use 0, we use it for testing */
/* also minor 0 is used to request session, 1-7 to open/close/map using session # */
for (i = 1 ; (i < MAX_ACTIVE_SESSION) && (i < MAX_SESSION) ; i++) {
if (((sc->active_sessions & (1<<i)) == 0) && ((sc->mapped_sessions & (1<<i)) == 0)) {
sc->active_sessions |= (1<<i);
@@ -327,9 +395,15 @@ static int get_session(struct sbusfpga_curve25519engine_softc *sc) {
int
sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, struct lwp *l)
{
struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, minor(dev));
int unit = minor(dev) & (MAX_SESSION - 1);
int driver = unit & ~(MAX_SESSION - 1);
struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, driver);
int err = 0;
if (sc == NULL) {
return ENODEV;
}
if (!sc->initialized) {
if (init_programs(sc)) {
return ENXIO;
@@ -339,6 +413,9 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
}
switch (cmd) {
case SBUSFPGA_DO_MONTGOMERYJOB: {
if (unit != 0)
return ENOTTY;
struct sbusfpga_curve25519engine_montgomeryjob* job = (struct sbusfpga_curve25519engine_montgomeryjob*)data;
curve25519engine_mpstart_write(sc, program_offset[0]); /* EC25519 */
curve25519engine_mplen_write(sc, program_len[0]); /* EC25519 */
@@ -350,7 +427,7 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
if (err)
return err;
delay(1);
err = wait_job(sc);
err = wait_job(sc, 1);
if (err)
return err;
err = read_outputs(sc, job, 0);
@@ -359,6 +436,9 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
}
break;
case SBUSFPGA_EC25519_CHECKGCM: {
if (unit != 0)
return ENOTTY;
const uint32_t base = 0;
struct sbusfpga_curve25519engine_montgomeryjob* job = (struct sbusfpga_curve25519engine_montgomeryjob*)data;
int reg, i;
@@ -376,7 +456,7 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
if (err)
return err;
delay(1);
err = wait_job(sc);
err = wait_job(sc, 1);
/* if (err) */
/* return err; */
@@ -391,6 +471,9 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
}
break;
case SBUSFPGA_EC25519_CHECKAES: {
if (unit != 0)
return ENOTTY;
const uint32_t base = 0;
struct sbusfpga_curve25519engine_aesjob* job = (struct sbusfpga_curve25519engine_aesjob*)data;
int reg, i;
@@ -410,7 +493,7 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
if (err)
return err;
delay(1);
err = wait_job(sc);
err = wait_job(sc, 1);
/* if (err) */
/* return err; */
@@ -424,19 +507,50 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
}
}
break;
case SBUSFPGA_EC25519_GCMAD: {
const uint32_t base = 0;
struct sbusfpga_curve25519engine_aesjob* job = (struct sbusfpga_curve25519engine_aesjob*)data;
int reg, i;
curve25519engine_mpstart_write(sc, program_offset[3]); /* GCM_AD */
curve25519engine_mplen_write(sc, program_len[3]); /* GCM_AD */
case SBUSFPGA_EC25519_GCMPFX: {
if (unit == 0)
return ENOTTY;
/* FIXME: need a lock!!! */
const uint32_t base = unit * 0x400;
struct sbusfpga_curve25519engine_session_len_data* job = (struct sbusfpga_curve25519engine_session_len_data*)data;
int reg, i;
void* rd_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) );
//void* wr_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) + 2048);
CHECKSESSION(job);
if (job->len > 128) {
device_printf(sc->sc_dev, "job->len too big: %u", job->len);
return EINVAL;
}
curve25519engine_mpstart_write(sc, program_offset[3]); /* GCM_PFX */
curve25519engine_mplen_write(sc, program_len[3] + program_len[4]); /* GCM_PFX + GCM_AD */
curve25519engine_window_write(sc, unit); /* to each session its own register file */
/* read_addr */
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(0,i), job->data[i]);
/* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0);
}
/* write_addr */
/* for (i = 0 ; i < 8 ; i ++) { */
/* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(4,i), ((i & 3) == 0) ? ((uint32_t)wr_ptr) : 0); */
/* } */
/* write_len */
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i&3) == 0) ? ((uint32_t)job->len) : 0);
}
/* data */
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(16,i), job->data[i]);
}
for (reg = 31 ; reg > 16 ; reg--) {
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i), job->keys[i+8*(31-reg)]);
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i), job->keys[(i&3)+4*(31-reg)]);
}
}
@@ -444,10 +558,64 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
if (err)
return err;
delay(1);
err = wait_job(sc);
/* if (err) */
/* return err; */
err = wait_job(sc, job->len);
if (err)
return err;
#if 0
for (reg = 0 ; reg < 32 ; reg++) {
uint32_t buf[8];
for (i = 0 ; i < 8 ; i ++) {
buf[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i));
}
device_printf(sc->sc_dev, "GCM_PFX %d: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n", reg,
buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]);
}
#endif
}
break;
case SBUSFPGA_EC25519_GCMAD: {
if (unit == 0)
return ENOTTY;
/* FIXME: need a lock!!! */
const uint32_t base = unit * 0x400;
struct sbusfpga_curve25519engine_session_len* job = (struct sbusfpga_curve25519engine_session_len*)data;
int i;
void* rd_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) );
//void* wr_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) + 2048);
CHECKSESSION(job);
if (job->len > 128)
return EINVAL;
curve25519engine_mpstart_write(sc, program_offset[4]); /* GCM_AES */
curve25519engine_mplen_write(sc, program_len[4]); /* GCM_AES */
curve25519engine_window_write(sc, unit); /* to each session its own register file */
/* read_addr */
for (i = 0 ; i < 8 ; i ++) {
/* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0);
}
/* write_len */
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i & 3) == 0) ? ((uint32_t)job->len) : 0);
}
err = start_job(sc);
if (err)
return err;
delay(1);
err = wait_job(sc, job->len);
if (err)
return err;
#if 0
int reg;
for (reg = 0 ; reg < 32 ; reg++) {
uint32_t buf[8];
for (i = 0 ; i < 8 ; i ++) {
@@ -456,9 +624,153 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
device_printf(sc->sc_dev, "GCM_AD %d: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n", reg,
buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]);
}
#endif
}
break;
case SBUSFPGA_EC25519_OPENSESSION:{
case SBUSFPGA_EC25519_GCMAES: {
if (unit == 0)
return ENOTTY;
/* FIXME: need a lock!!! */
const uint32_t base = unit * 0x400;
struct sbusfpga_curve25519engine_session_len* job = (struct sbusfpga_curve25519engine_session_len*)data;
int i;
void* rd_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) );
void* wr_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) + 2048);
CHECKSESSION(job);
if (job->len > 128)
return EINVAL;
curve25519engine_mpstart_write(sc, program_offset[5]); /* GCM_AES */
curve25519engine_mplen_write(sc, program_len[5]); /* GCM_AES */
curve25519engine_window_write(sc, unit); /* to each session its own register file */
/* read_addr */
for (i = 0 ; i < 8 ; i ++) {
/* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0);
}
/* write_addr */
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(11,i), ((i & 3) == 0) ? ((uint32_t)wr_ptr) : 0);
}
/* write_len */
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i & 3) == 0) ? ((uint32_t)job->len) : 0);
}
err = start_job(sc);
if (err)
return err;
delay(1);
err = wait_job(sc, job->len);
if (err)
return err;
#if 0
int reg;
for (reg = 0 ; reg < 32 ; reg++) {
uint32_t buf[8];
for (i = 0 ; i < 8 ; i ++) {
buf[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i));
}
device_printf(sc->sc_dev, "GCM_AES %d: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n", reg,
buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]);
}
#endif
}
break;
case SBUSFPGA_EC25519_GCMFINISH: {
if (unit == 0)
return ENOTTY;
/* FIXME: need a lock!!! */
const uint32_t base = unit * 0x400;
struct sbusfpga_curve25519engine_session_len_final* job = (struct sbusfpga_curve25519engine_session_len_final*)data;
int i;
void* rd_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) );
void* wr_ptr = (void*)(((vaddr_t)sc->sc_dmamap->dm_segs[0].ds_addr) + (unit * 4096) + 2048);
CHECKSESSION(job);
if (job->len > 15)
return EINVAL;
curve25519engine_mpstart_write(sc, program_offset[6]); /* GCM_FINISH */
curve25519engine_mplen_write(sc, program_len[6]); /* GCM_FINISH */
curve25519engine_window_write(sc, unit); /* to each session its own register file */
/* read_addr */
for (i = 0 ; i < 8 ; i ++) {
/* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0); */
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), ((i & 3) == 0) ? ((uint32_t)rd_ptr) : 0);
}
/* write_addr */
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(11,i), ((i & 3) == 0) ? ((uint32_t)wr_ptr) : 0);
}
/* write_len */
for (i = 0 ; i < 8 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(12,i), ((i & 3) == 0) ? ((uint32_t)job->len) : 0);
}
/* final block */
for (i = 0 ; i < 4 ; i ++) {
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(9,i), job->data[i]);
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(9,i+4), job->data[i]);
}
/* create and generate MMASK */
for (i = 0 ; i < 4 ; i ++) {
uint32_t mask;
int idx = i;
if (job->len <= (idx*4)) {
mask = 0;
} else if (job->len >= (idx+1)*4) {
mask = 0xFFFFFFFF;
} else {
mask = 0xFFFFFFFF >> (8*(4-(job->len%4)));
}
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(10,i), mask);
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(10,(i+4)), mask);
}
err = start_job(sc);
if (err)
return err;
delay(1);
err = wait_job(sc, job->len);
if (err)
return err;
/* final accum */
for (i = 0 ; i < 8 ; i ++) {
job->data[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(8,i));
}
#if 0
int reg;
for (reg = 0 ; reg < 32 ; reg++) {
uint32_t buf[8];
for (i = 0 ; i < 8 ; i ++) {
buf[i] = bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(reg,i));
}
device_printf(sc->sc_dev, "GCM_FINISH %d: 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x \n", reg,
buf[0], buf[1], buf[2], buf[3], buf[4], buf[5], buf[6], buf[7]);
}
#endif
}
break;
case SBUSFPGA_EC25519_GETSESSION:{
if (unit != 0)
return ENOTTY;
struct sbusfpga_curve25519engine_session* ses = (struct sbusfpga_curve25519engine_session*)data;
int s = get_session(sc);
if (s < 0)
@@ -468,16 +780,29 @@ sbusfpga_curve25519engine_ioctl (dev_t dev, u_long cmd, void *data, int flag, st
ses->cookie = sc->sessions_cookies[s];
}
break;
case SBUSFPGA_EC25519_CLOSESESSION:{
case SBUSFPGA_EC25519_OPENSESSION:{
if (unit == 0)
return ENOTTY;
struct sbusfpga_curve25519engine_session* ses = (struct sbusfpga_curve25519engine_session*)data;
if ((ses->session >= MAX_ACTIVE_SESSION) || (ses->session >= MAX_SESSION))
return EINVAL;
if (sc->sessions_cookies[ses->session] != ses->cookie)
return EINVAL;
CHECKSESSION(ses);
if ((sc->mapped_sessions & (1 << ses->session)) != 0)
return EBUSY;
return EINVAL;
}
break;
case SBUSFPGA_EC25519_CLOSESESSION:{
if (unit == 0)
return ENOTTY;
struct sbusfpga_curve25519engine_session* ses = (struct sbusfpga_curve25519engine_session*)data;
CHECKSESSION(ses);
/* if ((sc->mapped_sessions & (1 << ses->session)) != 0) */
/* return EBUSY; */
sc->sessions_cookies[ses->session] = 0;
sc->active_sessions &= ~(1 << ses->session);
sc->mapped_sessions &= ~(1 << ses->session); // FIXME
}
break;
@@ -570,7 +895,7 @@ static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf
int i;
uint32_t status = curve25519engine_status_read(sc);
int err = 0;
if (status & 1) {
if (status & (1<<CSR_CURVE25519ENGINE_STATUS_RUNNING_OFFSET)) {
aprint_error_dev(sc->sc_dev, "WRITE - Curve25519Engine status: 0x%08x, still running?\n", status);
return -ENXIO;
}
@@ -603,31 +928,49 @@ static int write_inputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf
static int start_job(struct sbusfpga_curve25519engine_softc *sc) {
uint32_t status = curve25519engine_status_read(sc);
if (status & 1) {
if (status & (1<<CSR_CURVE25519ENGINE_STATUS_RUNNING_OFFSET)) {
aprint_error_dev(sc->sc_dev, "START - Curve25519Engine status: 0x%08x, still running?\n", status);
return -ENXIO;
}
curve25519engine_control_write(sc, 1);
aprint_normal_dev(sc->sc_dev, "START - Curve25519Engine status: 0x%08x\n", curve25519engine_status_read(sc));
//aprint_normal_dev(sc->sc_dev, "START - Curve25519Engine status: 0x%08x\n", curve25519engine_status_read(sc));
return 0;
}
static int wait_job(struct sbusfpga_curve25519engine_softc *sc) {
static int wait_job(struct sbusfpga_curve25519engine_softc *sc, uint32_t param) {
uint32_t status = curve25519engine_status_read(sc);
int count = 0;
while ((status & 1) && (count < 50)) {
aprint_normal_dev(sc->sc_dev, "WAIT - ongoing, Curve25519Engine status: 0x%08x [%d]\n", status, count);
int max_count = 50;
int del = 1;
const int max_del = 32;
static int max_del_seen = 1;
while ((status & (1<<CSR_CURVE25519ENGINE_STATUS_RUNNING_OFFSET)) && (count < max_count)) {
//uint32_t ls_status = curve25519engine_ls_status_read(sc);
//aprint_normal_dev(sc->sc_dev, "WAIT - ongoing, Curve25519Engine status: 0x%08x [%d] ls_status: 0x%08x\n", status, count, ls_status);
count ++;
delay(1);
delay(del);
del = del < max_del ? 2*del : del;
status = curve25519engine_status_read(sc);
}
if (del > max_del_seen) {
max_del_seen = del;
aprint_normal_dev(sc->sc_dev, "WAIT - new max delay %d after %d count (param was %u)\n", max_del_seen, count, param);
}
//curve25519engine_control_write(sc, 0);
if (status & 1) {
aprint_error_dev(sc->sc_dev, "WAIT - Curve25519Engine status: 0x%08x, did not finish in time? [0x%08x]\n", status, curve25519engine_instruction_read(sc));
if (status & (1<<CSR_CURVE25519ENGINE_STATUS_RUNNING_OFFSET)) {
aprint_error_dev(sc->sc_dev, "WAIT - Curve25519Engine status: 0x%08x, did not finish in time? [inst: 0x%08x ls_status: 0x%08x]\n", status, curve25519engine_instruction_read(sc), curve25519engine_ls_status_read(sc));
return -ENXIO;
} else if (status & (1<<CSR_CURVE25519ENGINE_STATUS_SIGILL_OFFSET)) {
aprint_error_dev(sc->sc_dev, "WAIT - Curve25519Engine status: 0x%08x, sigill [inst: 0x%08x ls_status: 0x%08x]\n", status, curve25519engine_instruction_read(sc), curve25519engine_ls_status_read(sc));
return -ENXIO;
} else if (status & (1<<CSR_CURVE25519ENGINE_STATUS_ABORT_OFFSET)) {
aprint_error_dev(sc->sc_dev, "WAIT - Curve25519Engine status: 0x%08x, aborted [inst: 0x%08x ls_status: 0x%08x]\n", status, curve25519engine_instruction_read(sc), curve25519engine_ls_status_read(sc));
return -ENXIO;
} else {
aprint_normal_dev(sc->sc_dev, "WAIT - Curve25519Engine status: 0x%08x [%d, 0x%08x]\n", status, count, curve25519engine_instruction_read(sc));
//aprint_normal_dev(sc->sc_dev, "WAIT - Curve25519Engine status: 0x%08x [%d] ls_status: 0x%08x\n", status, count, curve25519engine_ls_status_read(sc));
}
return 0;
@@ -637,7 +980,7 @@ static int read_outputs(struct sbusfpga_curve25519engine_softc *sc, struct sbusf
const uint32_t base = window * 0x400;
int i;
uint32_t status = curve25519engine_status_read(sc);
if (status & 1) {
if (status & (1<<CSR_CURVE25519ENGINE_STATUS_RUNNING_OFFSET)) {
aprint_error_dev(sc->sc_dev, "READ - Curve25519Engine status: 0x%08x, still running?\n", status);
return -ENXIO;
}
@@ -690,31 +1033,42 @@ dma_init(struct sbusfpga_curve25519engine_softc *sc) {
return 0;
}
aprint_normal_dev(sc->sc_dev, "DMA: SW -> kernel address is %p, dvma address is 0x%08llx\n", sc->sc_dma_kva, sc->sc_dmamap->dm_segs[0].ds_addr);
aprint_normal_dev(sc->sc_dev, "DMA: SW -> kernel address is %p, dvma address is 0x%08llx, seg %llx / %ld\n", sc->sc_dma_kva, sc->sc_dmamap->dm_segs[0].ds_addr, sc->sc_segs.ds_addr, sc->sc_segs.ds_len);
return 1;
}
paddr_t sbusfpga_curve25519engine_mmap(dev_t dev, off_t offset, int prot) {
struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, minor(dev));
int unit = minor(dev) & (MAX_SESSION - 1);
int driver = unit & ~(MAX_SESSION - 1);
struct sbusfpga_curve25519engine_softc *sc = device_lookup_private(&sbusfpga_c29e_cd, driver);
paddr_t addr = -1;
int ses = offset / 4096;
device_printf(sc->sc_dev, "%s:%d: %lld %d for %d / %d\n", __PRETTY_FUNCTION__, __LINE__, offset, prot, driver, unit);
if (offset % 4096)
if (offset != 0)
return -1;
if (prot & PROT_EXEC)
return -1;
if (sc->mapped_sessions & (1 << ses))
/* if (sc->mapped_sessions & (1 << unit)) */
/* return -1; */
if ((sc->active_sessions & (1 << unit)) == 0)
return -1;
if ((sc->active_sessions & (1 << ses)) == 0)
if (unit >= MAX_ACTIVE_SESSION)
return -1;
if (unit <= 0)
return -1;
addr = bus_dmamem_mmap(sc->sc_dmatag, &sc->sc_segs, 1, offset, prot, BUS_DMA_NOWAIT);
device_printf(sc->sc_dev, "mapped page %d\n", ses);
// addr = bus_dmamem_mmap(sc->sc_dmatag, sc->sc_dmamap->dm_segs, 1, (off_t)(4096*unit), prot, BUS_DMA_NOWAIT);
if (pmap_extract(pmap_kernel(), ((vaddr_t)sc->sc_dma_kva) + (unit * 4096), &addr)) {
device_printf(sc->sc_dev, "mapped page %d to 0x%08lx [0x%08lx], kernel is %p\n", unit, addr, atop(addr), (void*)(((vaddr_t)sc->sc_dma_kva) + (unit * 4096)));
if (addr != -1)
sc->mapped_sessions |= (1 << ses);
((uint32_t*)(((vaddr_t)sc->sc_dma_kva) + (unit * 4096)))[0] = 0xDEADBEEF;
sc->mapped_sessions |= (1 << unit);
return addr;
}
return addr;
return -1;
}

View File

@@ -30,8 +30,10 @@ opcodes = { # mnemonic : [bit coding, docstring]
"GCM_SHRMI": [15, "Shift A right by imm, insert B LSB as dest MSB; reg-reg or reg-imm; per 128-bits block"], #
"GCM_CMPD": [16, "Compute D:X0 from X1:X0; reg ; per 128-bits block"], # specific
"GCM_SWAP64": [17, "Swap doubleword (64 bits) ; reg-reg or imm-reg or reg-imm; per 128-bits block ; imm != 0 -> BYTEREV*"], #
"AESESMI" : [18, "AES ; reg-reg ; per 128-bits block; imm[0:2] indicates sub-round (as in rv32's aes32esmi) ; imm[2] is 1 for aesesi (shared opcode)" ],
"MAX" : [19, "Maximum opcode number (for bounds checking)"],
"AESESMI" : [18, "AES ; reg-reg ; per 128-bits block; imm[0:2] indicates sub-round (as in rv32's aes32esmi) ; imm[2] is 1 for aesesi (shared opcode)" ],
"MEM" : [19, "MEM ; imm[0] == 0 for LOAD, imm[0] == 1 for STORE (beware, store copy the address in the output reg)" ],
"AND" : [20, "Wd $\gets$ Ra & Rb // bitwise AND"],
"MAX" : [21, "Maximum opcode number (for bounds checking)"],
}
num_registers = 32
@@ -236,6 +238,8 @@ class Curve25519Const(Module, AutoDoc):
9: [100, "one hundred", "The number 100 (for pow22501)"],
10: [254, "two hundred fifty four", "The number 254 (iteration count)"],
11: [0x00000001_00000000_00000000_00000000_00000001_00000000_00000000_00000000, "increment for GCM counter (LE)", "increment for GCM counter (LE)"],
12: [0x00000000_00000000_00000000_00000010_00000000_00000000_00000000_00000010, "sixteen (twice)", "The number 16 (for block-size address increment)"],
13: [0x00000000_00000000_00000000_00000001_00000000_00000000_00000000_00000001, "decrement for GCM dual-loops (LE)", "decrement for GCM dual-loops"]
}
self.adr = Signal(5)
self.const = Signal(256)
@@ -316,7 +320,7 @@ Here is an example of how to swap the contents of `ra` and `rb` based on the val
class ExecLogic(ExecUnit):
def __init__(self, width=256):
ExecUnit.__init__(self, width, ["XOR", "NOT", "PSA", "PSB", "XBT", "SHL"])
ExecUnit.__init__(self, width, ["XOR", "NOT", "PSA", "PSB", "XBT", "SHL", "AND"])
self.intro = ModuleDoc(title="Logic ExecUnit Subclass", body=f"""
This execution unit implements bit-wise logic operations: XOR, NOT, and
passthrough.
@@ -327,6 +331,7 @@ passthrough.
* PSB returns the value of B
* SHL returns A << 1
* XBT returns the 255th bit of A, reported in the 0th bit of the result
* AND returns the result of A&B
""")
@@ -348,6 +353,8 @@ passthrough.
self.q.eq(Cat(self.a[254], zeros))
).Elif(self.instruction.opcode == opcodes["SHL"][0],
self.q.eq(Cat(0, self.a[:255])),
).Elif(self.instruction.opcode == opcodes["AND"][0],
self.q.eq(self.a & self.b),
),
]
@@ -1442,7 +1449,7 @@ class ExecClmul(ExecUnit, AutoDoc):
clmul64_out = Signal(64)
clmul64h_out = Signal(64)
nlane = width // 128
clmul_buf = Signal((nlane-1) * 128) ## width must be a multiple of 128...
clmul_buf = Signal(nlane * 128) ## width must be a multiple of 128...
lanec = Signal(log2_int(nlane, False))
assert(nlane == 2) ## fixme
@@ -1460,30 +1467,37 @@ class ExecClmul(ExecUnit, AutoDoc):
self.submodules.seq = seq = ClockDomainsRenamer("eng_clk")(FSM(reset_state="IDLE"))
seq.act("IDLE",
If(self.start,
NextValue(lanec, 0),
Case(self.instruction.immediate[0:2], {
0x0: [ clmul64x_in1.eq(self.a[ 0: 64]), clmul64x_in2.eq(self.b[ 0: 64]) ],
0x1: [ clmul64x_in1.eq(self.a[ 0: 64]), clmul64x_in2.eq(self.b[ 64:128]) ],
0x2: [ clmul64x_in1.eq(self.a[ 64:128]), clmul64x_in2.eq(self.b[ 0: 64]) ],
0x3: [ clmul64x_in1.eq(self.a[ 64:128]), clmul64x_in2.eq(self.b[ 64:128]) ],
0x0: [ NextValue(clmul64x_in1, self.a[ 0: 64]), NextValue(clmul64x_in2, self.b[ 0: 64]) ],
0x1: [ NextValue(clmul64x_in1, self.a[ 0: 64]), NextValue(clmul64x_in2, self.b[ 64:128]) ],
0x2: [ NextValue(clmul64x_in1, self.a[ 64:128]), NextValue(clmul64x_in2, self.b[ 0: 64]) ],
0x3: [ NextValue(clmul64x_in1, self.a[ 64:128]), NextValue(clmul64x_in2, self.b[ 64:128]) ],
}),
NextState("NEXT")))
seq.act("NEXT",
Case(self.instruction.immediate[0:2], {
0x0: [ NextValue(clmul64x_in1, self.a[128:192]), NextValue(clmul64x_in2, self.b[128:192]) ],
0x1: [ NextValue(clmul64x_in1, self.a[128:192]), NextValue(clmul64x_in2, self.b[192:256]) ],
0x2: [ NextValue(clmul64x_in1, self.a[192:256]), NextValue(clmul64x_in2, self.b[128:192]) ],
0x3: [ NextValue(clmul64x_in1, self.a[192:256]), NextValue(clmul64x_in2, self.b[192:256]) ],
}),
NextState("WRITE"))
seq.act("WRITE",
Case(lanec, {
0: [ NextValue(clmul_buf[0:128], Cat(clmul64_out, clmul64h_out)),
Case(self.instruction.immediate[0:2], {
0x0: [ clmul64x_in1.eq(self.a[128:192]), clmul64x_in2.eq(self.b[128:192]) ],
0x1: [ clmul64x_in1.eq(self.a[128:192]), clmul64x_in2.eq(self.b[192:256]) ],
0x2: [ clmul64x_in1.eq(self.a[192:256]), clmul64x_in2.eq(self.b[128:192]) ],
0x3: [ clmul64x_in1.eq(self.a[192:256]), clmul64x_in2.eq(self.b[192:256]) ],
}),
NextValue(lanec, 1),
],
1: [ self.q_valid.eq(1),
self.q.eq(Cat(clmul_buf, clmul64_out, clmul64h_out)),
NextValue(lanec, 0),
NextState("IDLE")
1: [ NextValue(clmul_buf[128:256], Cat(clmul64_out, clmul64h_out)),
NextState("OUT"),
],
}))
seq.act("OUT",
self.q_valid.eq(1),
self.q.eq(clmul_buf),
NextState("IDLE"),
);
class ExecGCMShifts(ExecUnit, AutoDoc):
def __init__(self, width=256):
@@ -1505,13 +1519,13 @@ class ExecGCMShifts(ExecUnit, AutoDoc):
).Elif(self.instruction.opcode == opcodes["GCM_SHRMI"][0],
Case(self.instruction.immediate[0:3], {
0x0: self.q.eq(self.a),
0x1: self.q.eq(Cat(self.a[1:128], self.b[0:1], self.a[129:256], self.b[0:1])),
0x2: self.q.eq(Cat(self.a[2:128], self.b[0:2], self.a[130:256], self.b[0:2])),
0x3: self.q.eq(Cat(self.a[3:128], self.b[0:3], self.a[131:256], self.b[0:3])),
0x4: self.q.eq(Cat(self.a[4:128], self.b[0:4], self.a[132:256], self.b[0:4])),
0x5: self.q.eq(Cat(self.a[5:128], self.b[0:5], self.a[133:256], self.b[0:5])),
0x6: self.q.eq(Cat(self.a[6:128], self.b[0:6], self.a[134:256], self.b[0:6])),
0x7: self.q.eq(Cat(self.a[7:128], self.b[0:7], self.a[135:256], self.b[0:7])),
0x1: self.q.eq(Cat(self.a[1:128], self.b[0:1], self.a[129:256], self.b[128:129])),
0x2: self.q.eq(Cat(self.a[2:128], self.b[0:2], self.a[130:256], self.b[128:130])),
0x3: self.q.eq(Cat(self.a[3:128], self.b[0:3], self.a[131:256], self.b[128:131])),
0x4: self.q.eq(Cat(self.a[4:128], self.b[0:4], self.a[132:256], self.b[128:132])),
0x5: self.q.eq(Cat(self.a[5:128], self.b[0:5], self.a[133:256], self.b[128:133])),
0x6: self.q.eq(Cat(self.a[6:128], self.b[0:6], self.a[134:256], self.b[128:134])),
0x7: self.q.eq(Cat(self.a[7:128], self.b[0:7], self.a[135:256], self.b[128:135])),
})
).Elif(self.instruction.opcode == opcodes["GCM_SHLMI"][0],
Case(self.instruction.immediate[0:3], {
@@ -1525,12 +1539,33 @@ class ExecGCMShifts(ExecUnit, AutoDoc):
0x7: self.q.eq(Cat(self.b[121:128], self.a[0:121], self.b[249:256], self.a[128:249])),
})
).Elif(self.instruction.opcode == opcodes["GCM_SWAP64"][0],
# also gcm_brev*
# also gcm_brev*, gcm_swap32
Case(self.instruction.immediate[0:2], {
0: self.q.eq(Cat(self.b[64:128], self.a[0:64], self.b[192:256], self.a[128:192])),
1: self.q.eq(Cat(self.a[8:16], self.a[0:8], self.a[24:32], self.a[16:24], self.a[40:48], self.a[32:40], self.a[56:64], self.a[48:56], self.a[72:80], self.a[64:72], self.a[88:96], self.a[80:88], self.a[104:112], self.a[96:104], self.a[120:128], self.a[112:120], self.a[136:144], self.a[128:136], self.a[152:160], self.a[144:152], self.a[168:176], self.a[160:168], self.a[184:192], self.a[176:184], self.a[200:208], self.a[192:200], self.a[216:224], self.a[208:216], self.a[232:240], self.a[224:232], self.a[248:256], self.a[240:248])),
2: self.q.eq(Cat(self.a[24:32], self.a[16:24], self.a[8:16], self.a[0:8], self.a[56:64], self.a[48:56], self.a[40:48], self.a[32:40], self.a[88:96], self.a[80:88], self.a[72:80], self.a[64:72], self.a[120:128], self.a[112:120], self.a[104:112], self.a[96:104], self.a[152:160], self.a[144:152], self.a[136:144], self.a[128:136], self.a[184:192], self.a[176:184], self.a[168:176], self.a[160:168], self.a[216:224], self.a[208:216], self.a[200:208], self.a[192:200], self.a[248:256], self.a[240:248], self.a[232:240], self.a[224:232])),
3: self.q.eq(Cat(self.a[56:64], self.a[48:56], self.a[40:48], self.a[32:40], self.a[24:32], self.a[16:24], self.a[8:16], self.a[0:8], self.a[120:128], self.a[112:120], self.a[104:112], self.a[96:104], self.a[88:96], self.a[80:88], self.a[72:80], self.a[64:72], self.a[184:192], self.a[176:184], self.a[168:176], self.a[160:168], self.a[152:160], self.a[144:152], self.a[136:144], self.a[128:136], self.a[248:256], self.a[240:248], self.a[232:240], self.a[224:232], self.a[216:224], self.a[208:216], self.a[200:208], self.a[192:200])),
# SWAP64
0: self.q.eq(Cat(self.b[ 64:128], self.a[ 0: 64],
self.b[192:256], self.a[128:192])),
# SWAP32
4: self.q.eq(Cat(self.b[ 32: 64], self.a[ 0: 32], self.b[ 96:128], self.a[ 64: 96],
self.b[160:192], self.a[128:160], self.b[224:256], self.a[192:224])),
# BREV16
1: self.q.eq(Cat(self.a[ 8: 16], self.a[ 0: 8], self.a[ 24: 32], self.a[ 16: 24], self.a[ 40: 48], self.a[ 32: 40], self.a[ 56: 64], self.a[ 48: 56],
self.a[ 72: 80], self.a[ 64: 72], self.a[ 88: 96], self.a[ 80: 88], self.a[104:112], self.a[ 96:104], self.a[120:128], self.a[112:120],
self.a[136:144], self.a[128:136], self.a[152:160], self.a[144:152], self.a[168:176], self.a[160:168], self.a[184:192], self.a[176:184],
self.a[200:208], self.a[192:200], self.a[216:224], self.a[208:216], self.a[232:240], self.a[224:232], self.a[248:256], self.a[240:248])),
# BREV32
2: self.q.eq(Cat(self.a[ 24: 32], self.a[ 16: 24], self.a[ 8: 16], self.a[ 0: 8],
self.a[ 56: 64], self.a[ 48: 56], self.a[ 40: 48], self.a[ 32: 40],
self.a[ 88: 96], self.a[ 80: 88], self.a[ 72: 80], self.a[ 64: 72],
self.a[120:128], self.a[112:120], self.a[104:112], self.a[ 96:104],
self.a[152:160], self.a[144:152], self.a[136:144], self.a[128:136],
self.a[184:192], self.a[176:184], self.a[168:176], self.a[160:168],
self.a[216:224], self.a[208:216], self.a[200:208], self.a[192:200],
self.a[248:256], self.a[240:248], self.a[232:240], self.a[224:232])),
# BREV64
3: self.q.eq(Cat(self.a[ 56: 64], self.a[ 48: 56], self.a[ 40: 48], self.a[ 32: 40], self.a[ 24: 32], self.a[ 16: 24], self.a[ 8: 16], self.a[ 0: 8],
self.a[120:128], self.a[112:120], self.a[104:112], self.a[ 96:104], self.a[ 88: 96], self.a[ 80: 88], self.a[ 72: 80], self.a[ 64: 72],
self.a[184:192], self.a[176:184], self.a[168:176], self.a[160:168], self.a[152:160], self.a[144:152], self.a[136:144], self.a[128:136],
self.a[248:256], self.a[240:248], self.a[232:240], self.a[224:232], self.a[216:224], self.a[208:216], self.a[200:208], self.a[192:200])),
})
)
]
@@ -1543,7 +1578,7 @@ class ExecAES(ExecUnit, AutoDoc):
assert(width == 256) # fixme
nlane = width // 128
aes_buf = Signal((nlane-1) * 128) ## width must be a multiple of 128...
aes_buf = Signal(nlane * 128) ## width must be a multiple of 128...
lanec = Signal(log2_int(nlane, False))
assert(nlane == 2) ## fixme
@@ -1560,120 +1595,278 @@ class ExecAES(ExecUnit, AutoDoc):
self.submodules.seq = seq = ClockDomainsRenamer("eng_clk")(FSM(reset_state="IDLE"))
seq.act("IDLE",
If(self.start,
NextValue(lanec, 0),
Case(self.instruction.immediate[0:2], {
0x0: [ aes_in[0].eq(self.a[ 0: 8]), aes_in[1].eq(self.a[ 32: 40]), aes_in[2].eq(self.a[ 64: 72]), aes_in[3].eq(self.a[ 96:104]) ],
0x1: [ aes_in[3].eq(self.a[ 8: 16]), aes_in[0].eq(self.a[ 40: 48]), aes_in[1].eq(self.a[ 72: 80]), aes_in[2].eq(self.a[104:112]) ],
0x2: [ aes_in[2].eq(self.a[ 16: 24]), aes_in[3].eq(self.a[ 48: 56]), aes_in[0].eq(self.a[ 80: 88]), aes_in[1].eq(self.a[112:120]) ],
0x3: [ aes_in[1].eq(self.a[ 24: 32]), aes_in[2].eq(self.a[ 56: 64]), aes_in[3].eq(self.a[ 88: 96]), aes_in[0].eq(self.a[120:128]) ],
0x0: [ NextValue(aes_in[0], self.a[ 0: 8]), NextValue(aes_in[1], self.a[ 32: 40]), NextValue(aes_in[2], self.a[ 64: 72]), NextValue(aes_in[3], self.a[ 96:104]) ],
0x1: [ NextValue(aes_in[3], self.a[ 8: 16]), NextValue(aes_in[0], self.a[ 40: 48]), NextValue(aes_in[1], self.a[ 72: 80]), NextValue(aes_in[2], self.a[104:112]) ],
0x2: [ NextValue(aes_in[2], self.a[ 16: 24]), NextValue(aes_in[3], self.a[ 48: 56]), NextValue(aes_in[0], self.a[ 80: 88]), NextValue(aes_in[1], self.a[112:120]) ],
0x3: [ NextValue(aes_in[1], self.a[ 24: 32]), NextValue(aes_in[2], self.a[ 56: 64]), NextValue(aes_in[3], self.a[ 88: 96]), NextValue(aes_in[0], self.a[120:128]) ],
}),
NextState("NEXT")))
seq.act("NEXT",
Case(self.instruction.immediate[0:2], {
0x0: [ NextValue(aes_in[0], self.a[128:136]), NextValue(aes_in[1], self.a[160:168]), NextValue(aes_in[2], self.a[192:200]), NextValue(aes_in[3], self.a[224:232]) ],
0x1: [ NextValue(aes_in[3], self.a[136:144]), NextValue(aes_in[0], self.a[168:176]), NextValue(aes_in[1], self.a[200:208]), NextValue(aes_in[2], self.a[232:240]) ],
0x2: [ NextValue(aes_in[2], self.a[144:152]), NextValue(aes_in[3], self.a[176:184]), NextValue(aes_in[0], self.a[208:216]), NextValue(aes_in[1], self.a[240:248]) ],
0x3: [ NextValue(aes_in[1], self.a[152:160]), NextValue(aes_in[2], self.a[184:192]), NextValue(aes_in[3], self.a[216:224]), NextValue(aes_in[0], self.a[248:256]) ],
}),
NextState("WRITE"))
seq.act("WRITE",
Case(lanec, {
0: [ Case(self.instruction.immediate[0:2], {
0x0: [ aes_in[0].eq(self.a[128:136]), aes_in[1].eq(self.a[160:168]), aes_in[2].eq(self.a[192:200]), aes_in[3].eq(self.a[224:232]) ],
0x1: [ aes_in[3].eq(self.a[136:144]), aes_in[0].eq(self.a[168:176]), aes_in[1].eq(self.a[200:208]), aes_in[2].eq(self.a[232:240]) ],
0x2: [ aes_in[2].eq(self.a[144:152]), aes_in[3].eq(self.a[176:184]), aes_in[0].eq(self.a[208:216]), aes_in[1].eq(self.a[240:248]) ],
0x3: [ aes_in[1].eq(self.a[152:160]), aes_in[2].eq(self.a[184:192]), aes_in[3].eq(self.a[216:224]), aes_in[0].eq(self.a[248:256]) ],
}),
Case(self.instruction.immediate[2:3], {
0: Case(self.instruction.immediate[0:2], {
0x0: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 0:16], aes_out[0][ 8:24],
aes_out[1][ 0:16], aes_out[1][ 8:24],
aes_out[2][ 0:16], aes_out[2][ 8:24],
aes_out[3][ 0:16], aes_out[3][ 8:24])),
],
0x1: [ NextValue(aes_buf[0:128], Cat(aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16],
aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16],
aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16],
aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])),
],
0x2: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:24], aes_out[0][ 0:16],
aes_out[1][ 8:24], aes_out[1][ 0:16],
aes_out[2][ 8:24], aes_out[2][ 0:16],
aes_out[3][ 8:24], aes_out[3][ 0:16])),
],
0x3: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8],
aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8],
aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8],
aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])),
],
}),
1: Case(self.instruction.immediate[0:2], {
0x0: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:16], Signal(24, reset = 0),
aes_out[1][ 8:16], Signal(24, reset = 0),
aes_out[2][ 8:16], Signal(24, reset = 0),
aes_out[3][ 8:16], Signal(24, reset = 0))),
],
0x1: [ NextValue(aes_buf[0:128], Cat(Signal(8, reset = 0), aes_out[0][ 8:16], Signal(16, reset = 0),
Signal(8, reset = 0), aes_out[1][ 8:16], Signal(16, reset = 0),
Signal(8, reset = 0), aes_out[2][ 8:16], Signal(16, reset = 0),
Signal(8, reset = 0), aes_out[3][ 8:16], Signal(16, reset = 0))),
],
0x2: [ NextValue(aes_buf[0:128], Cat(Signal(16, reset = 0), aes_out[0][ 8:16], Signal(8, reset = 0),
Signal(16, reset = 0), aes_out[1][ 8:16], Signal(8, reset = 0),
Signal(16, reset = 0), aes_out[2][ 8:16], Signal(8, reset = 0),
Signal(16, reset = 0), aes_out[3][ 8:16], Signal(8, reset = 0))),
],
0x3: [ NextValue(aes_buf[0:128], Cat(Signal(24, reset = 0), aes_out[0][ 8:16],
Signal(24, reset = 0), aes_out[1][ 8:16],
Signal(24, reset = 0), aes_out[2][ 8:16],
Signal(24, reset = 0), aes_out[3][ 8:16])),
],
}),
}),
NextValue(lanec, 1),
],
1: [ self.q_valid.eq(1),
Case(self.instruction.immediate[2:3], {
0: Case(self.instruction.immediate[0:2], {
0x0: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 0:16], aes_out[0][ 8:24],
aes_out[1][ 0:16], aes_out[1][ 8:24],
aes_out[2][ 0:16], aes_out[2][ 8:24],
aes_out[3][ 0:16], aes_out[3][ 8:24])),
],
0x1: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16],
aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16],
aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16],
aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])),
],
0x2: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 8:24], aes_out[0][ 0:16],
aes_out[1][ 8:24], aes_out[1][ 0:16],
aes_out[2][ 8:24], aes_out[2][ 0:16],
aes_out[3][ 8:24], aes_out[3][ 0:16])),
],
0x3: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8],
aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8],
aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8],
aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])),
],
}),
1: Case(self.instruction.immediate[0:2], {
0x0: [ self.q.eq(self.b ^ Cat(aes_buf, aes_out[0][ 8:16], Signal(24, reset = 0),
aes_out[1][ 8:16], Signal(24, reset = 0),
aes_out[2][ 8:16], Signal(24, reset = 0),
aes_out[3][ 8:16], Signal(24, reset = 0))),
],
0x1: [ self.q.eq(self.b ^ Cat(aes_buf, Signal(8, reset = 0), aes_out[0][ 8:16], Signal(16, reset = 0),
Signal(8, reset = 0), aes_out[1][ 8:16], Signal(16, reset = 0),
Signal(8, reset = 0), aes_out[2][ 8:16], Signal(16, reset = 0),
Signal(8, reset = 0), aes_out[3][ 8:16], Signal(16, reset = 0))),
],
0x2: [ self.q.eq(self.b ^ Cat(aes_buf, Signal(16, reset = 0), aes_out[0][ 8:16], Signal(8, reset = 0),
Signal(16, reset = 0), aes_out[1][ 8:16], Signal(8, reset = 0),
Signal(16, reset = 0), aes_out[2][ 8:16], Signal(8, reset = 0),
Signal(16, reset = 0), aes_out[3][ 8:16], Signal(8, reset = 0))),
],
0x3: [ self.q.eq(self.b ^ Cat(aes_buf, Signal(24, reset = 0), aes_out[0][ 8:16],
Signal(24, reset = 0), aes_out[1][ 8:16],
Signal(24, reset = 0), aes_out[2][ 8:16],
Signal(24, reset = 0), aes_out[3][ 8:16])),
],
}),
}),
NextValue(lanec, 0),
NextState("IDLE")
0: [ Case(self.instruction.immediate[2:3], {
0: Case(self.instruction.immediate[0:2], {
0x0: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 0:16], aes_out[0][ 8:24],
aes_out[1][ 0:16], aes_out[1][ 8:24],
aes_out[2][ 0:16], aes_out[2][ 8:24],
aes_out[3][ 0:16], aes_out[3][ 8:24])),
],
0x1: [ NextValue(aes_buf[0:128], Cat(aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16],
aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16],
aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16],
aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])),
],
0x2: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:24], aes_out[0][ 0:16],
aes_out[1][ 8:24], aes_out[1][ 0:16],
aes_out[2][ 8:24], aes_out[2][ 0:16],
aes_out[3][ 8:24], aes_out[3][ 0:16])),
],
0x3: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8],
aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8],
aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8],
aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])),
],
}),
1: Case(self.instruction.immediate[0:2], {
0x0: [ NextValue(aes_buf[0:128], Cat(aes_out[0][ 8:16], Signal(24, reset = 0),
aes_out[1][ 8:16], Signal(24, reset = 0),
aes_out[2][ 8:16], Signal(24, reset = 0),
aes_out[3][ 8:16], Signal(24, reset = 0))),
],
0x1: [ NextValue(aes_buf[0:128], Cat(Signal(8, reset = 0), aes_out[0][ 8:16], Signal(16, reset = 0),
Signal(8, reset = 0), aes_out[1][ 8:16], Signal(16, reset = 0),
Signal(8, reset = 0), aes_out[2][ 8:16], Signal(16, reset = 0),
Signal(8, reset = 0), aes_out[3][ 8:16], Signal(16, reset = 0))),
],
0x2: [ NextValue(aes_buf[0:128], Cat(Signal(16, reset = 0), aes_out[0][ 8:16], Signal(8, reset = 0),
Signal(16, reset = 0), aes_out[1][ 8:16], Signal(8, reset = 0),
Signal(16, reset = 0), aes_out[2][ 8:16], Signal(8, reset = 0),
Signal(16, reset = 0), aes_out[3][ 8:16], Signal(8, reset = 0))),
],
0x3: [ NextValue(aes_buf[0:128], Cat(Signal(24, reset = 0), aes_out[0][ 8:16],
Signal(24, reset = 0), aes_out[1][ 8:16],
Signal(24, reset = 0), aes_out[2][ 8:16],
Signal(24, reset = 0), aes_out[3][ 8:16])),
],
}),
}),
NextValue(lanec, 1)],
1: [ Case(self.instruction.immediate[2:3], {
0: Case(self.instruction.immediate[0:2], {
0x0: [ NextValue(aes_buf[128:256], Cat(aes_out[0][ 0:16], aes_out[0][ 8:24],
aes_out[1][ 0:16], aes_out[1][ 8:24],
aes_out[2][ 0:16], aes_out[2][ 8:24],
aes_out[3][ 0:16], aes_out[3][ 8:24])),
],
0x1: [ NextValue(aes_buf[128:256], Cat(aes_out[0][16:24], aes_out[0][ 0:16], aes_out[0][ 8:16],
aes_out[1][16:24], aes_out[1][ 0:16], aes_out[1][ 8:16],
aes_out[2][16:24], aes_out[2][ 0:16], aes_out[2][ 8:16],
aes_out[3][16:24], aes_out[3][ 0:16], aes_out[3][ 8:16])),
],
0x2: [ NextValue(aes_buf[128:256], Cat(aes_out[0][ 8:24], aes_out[0][ 0:16],
aes_out[1][ 8:24], aes_out[1][ 0:16],
aes_out[2][ 8:24], aes_out[2][ 0:16],
aes_out[3][ 8:24], aes_out[3][ 0:16])),
],
0x3: [ NextValue(aes_buf[128:256], Cat(aes_out[0][ 8:16], aes_out[0][ 8:24], aes_out[0][ 0: 8],
aes_out[1][ 8:16], aes_out[1][ 8:24], aes_out[1][ 0: 8],
aes_out[2][ 8:16], aes_out[2][ 8:24], aes_out[2][ 0: 8],
aes_out[3][ 8:16], aes_out[3][ 8:24], aes_out[3][ 0: 8])),
],
}),
1: Case(self.instruction.immediate[0:2], {
0x0: [ NextValue(aes_buf[128:256], Cat(aes_out[0][ 8:16], Signal(24, reset = 0),
aes_out[1][ 8:16], Signal(24, reset = 0),
aes_out[2][ 8:16], Signal(24, reset = 0),
aes_out[3][ 8:16], Signal(24, reset = 0))),
],
0x1: [ NextValue(aes_buf[128:256], Cat(Signal(8, reset = 0), aes_out[0][ 8:16], Signal(16, reset = 0),
Signal(8, reset = 0), aes_out[1][ 8:16], Signal(16, reset = 0),
Signal(8, reset = 0), aes_out[2][ 8:16], Signal(16, reset = 0),
Signal(8, reset = 0), aes_out[3][ 8:16], Signal(16, reset = 0))),
],
0x2: [ NextValue(aes_buf[128:256], Cat(Signal(16, reset = 0), aes_out[0][ 8:16], Signal(8, reset = 0),
Signal(16, reset = 0), aes_out[1][ 8:16], Signal(8, reset = 0),
Signal(16, reset = 0), aes_out[2][ 8:16], Signal(8, reset = 0),
Signal(16, reset = 0), aes_out[3][ 8:16], Signal(8, reset = 0))),
],
0x3: [ NextValue(aes_buf[128:256], Cat(Signal(24, reset = 0), aes_out[0][ 8:16],
Signal(24, reset = 0), aes_out[1][ 8:16],
Signal(24, reset = 0), aes_out[2][ 8:16],
Signal(24, reset = 0), aes_out[3][ 8:16])),
],
}),
}),
NextState("OUT")
],
}))
seq.act("OUT",
self.q_valid.eq(1),
self.q.eq(self.b ^ aes_buf),
NextState("IDLE"))
class ExecLS(ExecUnit, AutoDoc):
def __init__(self, width=256, interface=None):
ExecUnit.__init__(self, width, ["MEM"])
self.notes = ModuleDoc(title=f"Load/Store ExecUnit Subclass", body=f"""
""")
self.sync.eng_clk += [ # pipeline the instruction
self.instruction_out.eq(self.instruction_in),
]
assert(width == 256) # fixme
assert(len(interface.sel) == 16) # 128 bits Wishbone
start_pipe = Signal()
self.sync.mul_clk += start_pipe.eq(self.start) # break critical path of instruction decode -> SETUP_A state muxes
self.submodules.lsseq = lsseq = ClockDomainsRenamer("mul_clk")(FSM(reset_state="IDLE"))
cpar = Signal() # to keep track of the odd-ness of our cycle, so we can align 2 mul_clk cycles of output on 1 eng_clk cycle
lbuf = Signal(width)
timeout = Signal(11)
#tries = Signal()
self.has_failure = Signal(2)
self.has_timeout = Signal(2)
self.sync.mul_clk += If(timeout > 0, timeout.eq(timeout - 1))
lsseq.act("IDLE",
If(start_pipe,
#NextValue(lbuf, 0xF00FF00F_0FF00FF0_F00FF00F_0FF00FF0_F00FF00F_0FF00FF0_F00FF00F_0FF00FF0),
NextValue(cpar, 0),
NextValue(self.has_timeout, 0),
NextValue(self.has_failure, 0),
NextValue(interface.cyc, 1),
NextValue(interface.stb, 1),
NextValue(interface.sel, 2**len(interface.sel)-1),
NextValue(interface.adr, self.a[4:32]),
NextValue(interface.we, self.instruction.immediate[0]),
NextValue(timeout, 2047),
If(self.instruction.immediate[0], # do we need those tests or could we always update dat_w/dat_r ?
NextValue(interface.dat_w, self.b[0:128])),
NextState("MEMl") # MEMl
)
)
lsseq.act("MEMl",
NextValue(cpar, cpar ^ 1),
If(interface.ack,
If(~self.instruction.immediate[0],
NextValue(lbuf[0:128], interface.dat_r)),
NextValue(interface.cyc, 0),
NextValue(interface.stb, 0),
NextState("MEMl2")
).Elif(interface.err,
NextValue(self.has_failure[0], 1),
NextValue(interface.cyc, 0),
NextValue(interface.stb, 0),
NextState("ERR"),
).Elif(timeout == 0,
NextValue(self.has_timeout[0], 1),
NextValue(interface.cyc, 0),
NextValue(interface.stb, 0),
NextState("ERR"),
))
lsseq.act("MEMl2",
NextValue(cpar, cpar ^ 1),
If(~interface.ack,
NextValue(interface.cyc, 1),
NextValue(interface.stb, 1),
NextValue(interface.sel, 2**len(interface.sel)-1),
NextValue(interface.adr, self.a[132:160]),
NextValue(interface.we, self.instruction.immediate[0]),
NextValue(timeout, 2047),
If(self.instruction.immediate[0],
NextValue(interface.dat_w, self.b[128:256])),
NextState("MEMh")
))
lsseq.act("MEMh",
NextValue(cpar, cpar ^ 1),
If(interface.ack,
If(~self.instruction.immediate[0],
NextValue(lbuf[128:256], interface.dat_r)),
NextValue(interface.cyc, 0),
NextValue(interface.stb, 0),
NextState("MEMh2")
).Elif(interface.err,
NextValue(self.has_failure[1], 1),
NextValue(interface.cyc, 0),
NextValue(interface.stb, 0),
NextState("ERR"),
).Elif(timeout == 0,
NextValue(self.has_timeout[1], 1),
NextValue(interface.cyc, 0),
NextValue(interface.stb, 0),
NextState("ERR"),
))
lsseq.act("MEMh2",
NextValue(cpar, cpar ^ 1),
If(~interface.ack,
#NextValue(tries, 0),
If(cpar, ## checkme
NextState("MEM_ODD")
).Else(
NextState("MEM_EVEN1")
)
))
lsseq.act("MEM_ODD", # clock alignement cycle
NextState("MEM_EVEN1"))
lsseq.act("MEM_EVEN1",
NextState("MEM_EVEN2"))
lsseq.act("MEM_EVEN2",
NextValue(cpar, 0),
NextValue(self.has_failure, 0),
NextValue(self.has_timeout, 0),
NextState("IDLE"))
lsseq.act("ERR",
#If(~tries, # second attempt
# NextValue(cpar, 0),
# NextValue(tries, 1),
# NextState("IDLE")
#).Else(NextValue(tries, 0), # no third attempt, give up
If(cpar, ## checkme
NextState("MEM_ODD")
).Else(
NextState("MEM_EVEN1")
)
#)
)
self.sync.mul_clk += [
If(lsseq.ongoing("MEM_EVEN1") | lsseq.ongoing("MEM_EVEN2"),
self.q_valid.eq(1),
If(~self.instruction.immediate[0],
self.q.eq(lbuf),
).Else(
# self.q.eq(Cat((self.a[0:32] + 16)[0:32], self.a[32:128],
# (self.a[128:160] + 16)[0:32], self.a[160:256])),
self.q.eq(self.a),
),
).Else(
self.q_valid.eq(0),
)
]
self.state = Signal(32)
self.sync.mul_clk += self.state[0].eq(lsseq.ongoing("IDLE"))
self.sync.mul_clk += self.state[1].eq(lsseq.ongoing("MEMl"))
self.sync.mul_clk += self.state[2].eq(lsseq.ongoing("MEMl2"))
self.sync.mul_clk += self.state[3].eq(lsseq.ongoing("MEMh"))
self.sync.mul_clk += self.state[4].eq(lsseq.ongoing("MEMh2"))
self.sync.mul_clk += self.state[5].eq(lsseq.ongoing("MEM_ODD"))
self.sync.mul_clk += self.state[6].eq(lsseq.ongoing("MEM_EVEN1"))
self.sync.mul_clk += self.state[7].eq(lsseq.ongoing("MEM_EVEN2"))
self.sync.mul_clk += self.state[8].eq(lsseq.ongoing("MEM_ERR"))
self.sync.mul_clk += self.state[28:30].eq((self.state[28:30] & Replicate(~start_pipe, 2)) | self.has_timeout)
self.sync.mul_clk += self.state[30:32].eq((self.state[30:32] & Replicate(~start_pipe, 2)) | self.has_failure)
class Engine(Module, AutoCSR, AutoDoc):
@@ -1764,6 +1957,7 @@ Here are the currently implemented opcodes for The Engine:
instruction = Record(instruction_layout) # current instruction to execute
illegal_opcode = Signal()
abort = Signal();
### register file
rf_depth_raw = 512
@@ -1824,6 +2018,7 @@ Here are the currently implemented opcodes for The Engine:
CSRField("mpc", size=log2_int(microcode_depth), description="Current location of the microcode program counter. Mostly for debug."),
CSRField("pause_gnt", size=1, description="When set, the engine execution has been paused, and the RF & microcode ROM can be read out for suspend/resume"),
CSRField("sigill", size=1, description="Illegal Instruction"),
CSRField("abort", size=1, description="Abort from failure"),
CSRField("finished", size=1, description="Finished"),
])
pause_gnt = Signal()
@@ -1834,6 +2029,7 @@ Here are the currently implemented opcodes for The Engine:
self.status.fields.pause_gnt.eq(pause_gnt),
self.status.fields.mpc.eq(mpc),
self.status.fields.sigill.eq(illegal_opcode),
self.status.fields.abort.eq(abort),
self.status.fields.finished.eq(((~running & running_r) | self.status.fields.finished) & (~(running & ~running_r))),
]
@@ -1874,6 +2070,8 @@ Here are the currently implemented opcodes for The Engine:
self.instruction.status.eq(micro_runport.dat_r)
]
self.ls_status = CSRStatus(32, description="Status of the L/S unit")
### wishbone bus interface: decode the two address spaces and dispatch accordingly
self.bus = bus = wishbone.Interface()
wdata = Signal(32)
@@ -2120,7 +2318,10 @@ Here are the currently implemented opcodes for The Engine:
NextValue(running, 0),
)
).Else(
If(mpc < mpc_stop,
If(abort,
NextState("IDLE"),
NextValue(running, 0),
).Elif(mpc < mpc_stop,
NextState("FETCH"),
NextValue(mpc, mpc + 1),
).Else(
@@ -2136,6 +2337,7 @@ Here are the currently implemented opcodes for The Engine:
)
)
self.busls = wishbone.Interface(data_width = 128, adr_width = 28)
exec_units = {
"exec_mask" : ExecMask(width=rf_width_raw),
"exec_logic" : ExecLogic(width=rf_width_raw),
@@ -2145,6 +2347,7 @@ Here are the currently implemented opcodes for The Engine:
"exec_clmul" : ExecClmul(width=rf_width_raw),
"exec_gcmshifts" : ExecGCMShifts(width=rf_width_raw),
"exec_aes" : ExecAES(width=rf_width_raw),
"exec_ls" : ExecLS(width=rf_width_raw,interface=self.busls)
}
index = 0
for name, unit in exec_units.items():
@@ -2190,6 +2393,9 @@ Here are the currently implemented opcodes for The Engine:
self.comb += [
rf_write.eq(done),
]
self.sync += abort.eq((abort & ~engine_go) | (self.exec_ls.has_failure[0] | self.exec_ls.has_failure[1] | self.exec_ls.has_timeout[0] | self.exec_ls.has_timeout[1]))
self.comb += self.ls_status.status.eq(self.exec_ls.state)
##### TIMING CONSTRAINTS -- you want these. Trust me.

View File

@@ -693,19 +693,25 @@ fn main() -> std::io::Result<()> {
fin
);
let gcm_ad_code = assemble_engine25519!(
let gcm_pfx_code = assemble_engine25519!(
start:
// Input: rkeys in %31-%17 (backward)
// Input: rkeys in %31-%17 (backward, LE)
// pub in %16 (0-11, 12-15 are ctr so 0, LE)
// RD_PTR in %3
// ADLEN in %12 (in 16-byte-blocks)
// Transient:
// %0, %1, %2 are tmp
// init counter in %16
// H will go in %15
// T will go in %14
psa %16, #0
// Output:
// all inputs preserved
// H will go in %15 (byte-reverted)
// T will go in %14
// accum (0) will go in %13
gcm_brev32 %16, %16
// use %2 as a flag
psa %2, #1
psa %1, #0
genht:
xor %0, %16, %31
xor %0, %1, %31
aesesmi %1, %0, %30, #0
aesesmi %1, %0, %1, #1
@@ -788,23 +794,479 @@ fn main() -> std::io::Result<()> {
gcm_brev32 %16, %16
// clear flag & go encrypt t
psa %2, #0
psa %1, %16
brz genht, #0
afterht:
// store T in %14
psa %14, %0
// fully byte-revert H (first byte-in-dword, then dword-in-128bit)
// fully byte-revert H (first byte-in-dword, then dword-in-64bit)
gcm_brev64 %15, %15
gcm_swap64 %15, %15, %15
psa %13, #0
fin
// no fin; we fall directly into the AD code
//fin
);
let gcm_ad_code = assemble_engine25519!(
// Input: rkeys in %31-%17 (backward, LE)
// pub in %16 (0-11, 12-15 are ctr so 0, LE)
// RD_PTR in %3
// ADLEN in %12 (in 16-byte-blocks)
// H in %15 (byte-reverted)
// T in %14
// accum in %13
// Transient:
// %0, %1, %4, %5, %6, %7 are tmp
// Output:
// all inputs preserved except ADLEN (%12) & RD_PTR (%3)
// Updated accum is in %13
// if no ad, finish
brz done, %12
// do one block, repeat
do_ad: load %0, %3
gcm_brev64 %0, %0
gcm_swap64 %0, %0, %0
xor %0, %0, %13
add %3, %3, #12 // #12 is 16 in both 128 bits halves
// #13 is 1 in both 128 bits halves
sub %12, %12, #13
// // poly mult accum = ((accum^ad) * H)
// C
clmul %4, %0, %15, #0
// E
clmul %5, %0, %15, #1
// F
clmul %6, %0, %15, #2
// D
clmul %7, %0, %15, #3
// E ^ F
xor %6, %5, %6
// put low64 of E^F in high64
gcm_swap64 %5, %6, #0
// put high64 of E^F in low64
gcm_swap64 %6, #0, %6
// D xor low
xor %7, %7, %6
// C xor high
xor %4, %4, %5
// // reduction
// X1:X0 in %4
// X3:X2 in %7
// shift everybody by 1 to the left
// high shifting in 1 bit from low
gcm_shlmi %1, %7, %4, #1
// low
gcm_shlmi %0, %4, #0, #1
// post-shift
// X1:X0 in %0
// X3:X2 in %1
// compute D
gcm_cmpd %2, %0
// compute E, F, G
gcm_shrmi %6, %2, #0, #1
gcm_shrmi %4, %2, #0, #2
gcm_shrmi %5, %2, #0, #7
// XOR everybody
xor %2, %2, %6
xor %4, %4, %5
xor %2, %2, %4
xor %13, %2, %1
brz done, %12
brz do_ad, #0
done:
fin
);
let gcm_aes_code = assemble_engine25519!(
// pub in %16 (0-11, 12-15 are ctr so 0, LE)
// RD_PTR in %3
// WR_PTR in %11
// MLEN in %12 (in *complete* 16-byte-blocks)
// H in %15 (byte-reverted)
// T in %14
// accum in %13
// Transient:
// %0, %1, %4, %5, %6, %7 are tmp
// Output:
// all inputs preserved except RD_PTR (%3), WR_PTR (%11), MLEN (%12)
// accum is in %13
// if no msg, finish
brz done, %12
// do one block, repeat
do_msg:
// increment counter
gcm_brev32 %16, %16
add %16, %16, #11
gcm_brev32 %16, %16
xor %0, %16, %31
aesesmi %1, %0, %30, #0
aesesmi %1, %0, %1, #1
aesesmi %1, %0, %1, #2
aesesmi %1, %0, %1, #3
aesesmi %0, %1, %29, #0
aesesmi %0, %1, %0, #1
aesesmi %0, %1, %0, #2
aesesmi %0, %1, %0, #3
aesesmi %1, %0, %28, #0
aesesmi %1, %0, %1, #1
aesesmi %1, %0, %1, #2
aesesmi %1, %0, %1, #3
aesesmi %0, %1, %27, #0
aesesmi %0, %1, %0, #1
aesesmi %0, %1, %0, #2
aesesmi %0, %1, %0, #3
aesesmi %1, %0, %26, #0
aesesmi %1, %0, %1, #1
aesesmi %1, %0, %1, #2
aesesmi %1, %0, %1, #3
aesesmi %0, %1, %25, #0
aesesmi %0, %1, %0, #1
aesesmi %0, %1, %0, #2
aesesmi %0, %1, %0, #3
aesesmi %1, %0, %24, #0
aesesmi %1, %0, %1, #1
aesesmi %1, %0, %1, #2
aesesmi %1, %0, %1, #3
aesesmi %0, %1, %23, #0
aesesmi %0, %1, %0, #1
aesesmi %0, %1, %0, #2
aesesmi %0, %1, %0, #3
aesesmi %1, %0, %22, #0
aesesmi %1, %0, %1, #1
aesesmi %1, %0, %1, #2
aesesmi %1, %0, %1, #3
aesesmi %0, %1, %21, #0
aesesmi %0, %1, %0, #1
aesesmi %0, %1, %0, #2
aesesmi %0, %1, %0, #3
aesesmi %1, %0, %20, #0
aesesmi %1, %0, %1, #1
aesesmi %1, %0, %1, #2
aesesmi %1, %0, %1, #3
aesesmi %0, %1, %19, #0
aesesmi %0, %1, %0, #1
aesesmi %0, %1, %0, #2
aesesmi %0, %1, %0, #3
aesesmi %1, %0, %18, #0
aesesmi %1, %0, %1, #1
aesesmi %1, %0, %1, #2
aesesmi %1, %0, %1, #3
aesesi %0, %1, %17, #0
aesesi %0, %1, %0, #1
aesesi %0, %1, %0, #2
aesesi %1, %1, %0, #3
//gcm_brev64 %1, %0
//gcm_swap64 %1, %1, %1
load %0, %3
xor %0, %0, %1
store %11, %11, %0
gcm_brev64 %0, %0
gcm_swap64 %0, %0, %0
xor %0, %0, %13
add %3, %3, #12 // #12 is 16 in both 128 bits halves
add %11, %11, #12 // #12 is 16 in both 128 bits halves
// #13 is 1 in both 128 bits halves
sub %12, %12, #13
// // poly mult accum = ((accum^ad) * H)
// C
clmul %4, %0, %15, #0
// E
clmul %5, %0, %15, #1
// F
clmul %6, %0, %15, #2
// D
clmul %7, %0, %15, #3
// E ^ F
xor %6, %5, %6
// put low64 of E^F in high64
gcm_swap64 %5, %6, #0
// put high64 of E^F in low64
gcm_swap64 %6, #0, %6
// D xor low
xor %7, %7, %6
// C xor high
xor %4, %4, %5
// // reduction
// X1:X0 in %4
// X3:X2 in %7
// shift everybody by 1 to the left
// high shifting in 1 bit from low
gcm_shlmi %1, %7, %4, #1
// low
gcm_shlmi %0, %4, #0, #1
// post-shift
// X1:X0 in %0
// X3:X2 in %1
// compute D
gcm_cmpd %2, %0
// compute E, F, G
gcm_shrmi %6, %2, #0, #1
gcm_shrmi %4, %2, #0, #2
gcm_shrmi %5, %2, #0, #7
// XOR everybody
xor %2, %2, %6
xor %4, %4, %5
xor %2, %2, %4
xor %13, %2, %1
brz done, %12
brz do_msg, #0
done:
fin
);
let gcm_finish_code = assemble_engine25519!(
// pub in %16 (0-11, 12-15 are ctr so 0, LE)
// RD_PTR in %3
// WR_PTR in %11
// MLEN in %12 (do one *partial* 16-byte-blocks, so 0 or non-zero)
// MMASK in %10 (could be computed from MLEN%16 but we don't have an instruction for it yet)
// finalblock in %9 (could be computed but we'd need to know the exact value of adlen)
// H in %15 (byte-reverted)
// T in %14
// accum in %13
// Transient:
// %0, %1, %4, %5, %6, %7 are tmp
// Output:
// all inputs preserved except RD_PTR (%3), WR_PTR (%11), MLEN (%12)
// accum is in %13
// accum ^ T is in %8
brz last, %12
finish_mlen:
// increment counter
gcm_brev32 %16, %16
add %16, %16, #11
gcm_brev32 %16, %16
xor %0, %16, %31
aesesmi %1, %0, %30, #0
aesesmi %1, %0, %1, #1
aesesmi %1, %0, %1, #2
aesesmi %1, %0, %1, #3
aesesmi %0, %1, %29, #0
aesesmi %0, %1, %0, #1
aesesmi %0, %1, %0, #2
aesesmi %0, %1, %0, #3
aesesmi %1, %0, %28, #0
aesesmi %1, %0, %1, #1
aesesmi %1, %0, %1, #2
aesesmi %1, %0, %1, #3
aesesmi %0, %1, %27, #0
aesesmi %0, %1, %0, #1
aesesmi %0, %1, %0, #2
aesesmi %0, %1, %0, #3
aesesmi %1, %0, %26, #0
aesesmi %1, %0, %1, #1
aesesmi %1, %0, %1, #2
aesesmi %1, %0, %1, #3
aesesmi %0, %1, %25, #0
aesesmi %0, %1, %0, #1
aesesmi %0, %1, %0, #2
aesesmi %0, %1, %0, #3
aesesmi %1, %0, %24, #0
aesesmi %1, %0, %1, #1
aesesmi %1, %0, %1, #2
aesesmi %1, %0, %1, #3
aesesmi %0, %1, %23, #0
aesesmi %0, %1, %0, #1
aesesmi %0, %1, %0, #2
aesesmi %0, %1, %0, #3
aesesmi %1, %0, %22, #0
aesesmi %1, %0, %1, #1
aesesmi %1, %0, %1, #2
aesesmi %1, %0, %1, #3
aesesmi %0, %1, %21, #0
aesesmi %0, %1, %0, #1
aesesmi %0, %1, %0, #2
aesesmi %0, %1, %0, #3
aesesmi %1, %0, %20, #0
aesesmi %1, %0, %1, #1
aesesmi %1, %0, %1, #2
aesesmi %1, %0, %1, #3
aesesmi %0, %1, %19, #0
aesesmi %0, %1, %0, #1
aesesmi %0, %1, %0, #2
aesesmi %0, %1, %0, #3
aesesmi %1, %0, %18, #0
aesesmi %1, %0, %1, #1
aesesmi %1, %0, %1, #2
aesesmi %1, %0, %1, #3
aesesi %0, %1, %17, #0
aesesi %0, %1, %0, #1
aesesi %0, %1, %0, #2
aesesi %1, %1, %0, #3
//gcm_brev64 %1, %0
//gcm_swap64 %1, %1, %1
and %1, %1, %10
load %0, %3
xor %0, %0, %1
store %11, %11, %0
gcm_brev64 %0, %0
gcm_swap64 %0, %0, %0
xor %0, %0, %13
//add %3, %3, #12 // #12 is 16 in both 128 bits halves
//add %11, %11, #12 // #12 is 16 in both 128 bits halves
// #13 is 1 in both 128 bits halves
//sub %12, %12, #13
// // poly mult accum = ((accum^ad) * H)
// C
clmul %4, %0, %15, #0
// E
clmul %5, %0, %15, #1
// F
clmul %6, %0, %15, #2
// D
clmul %7, %0, %15, #3
// E ^ F
xor %6, %5, %6
// put low64 of E^F in high64
gcm_swap64 %5, %6, #0
// put high64 of E^F in low64
gcm_swap64 %6, #0, %6
// D xor low
xor %7, %7, %6
// C xor high
xor %4, %4, %5
// // reduction
// X1:X0 in %4
// X3:X2 in %7
// shift everybody by 1 to the left
// high shifting in 1 bit from low
gcm_shlmi %1, %7, %4, #1
// low
gcm_shlmi %0, %4, #0, #1
// post-shift
// X1:X0 in %0
// X3:X2 in %1
// compute D
gcm_cmpd %2, %0
// compute E, F, G
gcm_shrmi %6, %2, #0, #1
gcm_shrmi %4, %2, #0, #2
gcm_shrmi %5, %2, #0, #7
// XOR everybody
xor %2, %2, %6
xor %4, %4, %5
xor %2, %2, %4
xor %13, %2, %1
last:
// addmul of finalblock
gcm_brev64 %9, %9
gcm_swap64 %9, %9, %9
xor %0, %9, %13
//add %3, %3, #12 // #12 is 16 in both 128 bits halves
//add %11, %11, #12 // #12 is 16 in both 128 bits halves
// #13 is 1 in both 128 bits halves
//sub %12, %12, #13
// // poly mult accum = ((accum^ad) * H)
// C
clmul %4, %0, %15, #0
// E
clmul %5, %0, %15, #1
// F
clmul %6, %0, %15, #2
// D
clmul %7, %0, %15, #3
// E ^ F
xor %6, %5, %6
// put low64 of E^F in high64
gcm_swap64 %5, %6, #0
// put high64 of E^F in low64
gcm_swap64 %6, #0, %6
// D xor low
xor %7, %7, %6
// C xor high
xor %4, %4, %5
// // reduction
// X1:X0 in %4
// X3:X2 in %7
// shift everybody by 1 to the left
// high shifting in 1 bit from low
gcm_shlmi %1, %7, %4, #1
// low
gcm_shlmi %0, %4, #0, #1
// post-shift
// X1:X0 in %0
// X3:X2 in %1
// compute D
gcm_cmpd %2, %0
// compute E, F, G
gcm_shrmi %6, %2, #0, #1
gcm_shrmi %4, %2, #0, #2
gcm_shrmi %5, %2, #0, #7
// XOR everybody
xor %2, %2, %6
xor %4, %4, %5
xor %2, %2, %4
xor %13, %2, %1
gcm_brev64 %13, %13
gcm_swap64 %13, %13, %13
xor %8, %13, %14
fin
);
let mut pos = 0;
while pos < gcm_ad_code.len() {
println!("0x{:08x},", gcm_ad_code[pos]);
while pos < gcm_finish_code.len() {
println!("0x{:08x},", gcm_finish_code[pos]);
pos = pos + 1;
}
Ok(())

View File

@@ -1,5 +1,5 @@
//--------------------------------------------------------------------------------
// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-08-22 07:40:46
// Auto-generated by Migen (3ffd64c) & LiteX (8a644c90) on 2021-09-03 09:40:05
//--------------------------------------------------------------------------------
#ifndef __GENERATED_CSR_H
#define __GENERATED_CSR_H
@@ -237,11 +237,21 @@ static inline uint32_t curve25519engine_status_sigill_read(struct sbusfpga_curve
uint32_t word = curve25519engine_status_read(sc);
return curve25519engine_status_sigill_extract(sc, word);
}
#define CSR_CURVE25519ENGINE_STATUS_FINISHED_OFFSET 13
#define CSR_CURVE25519ENGINE_STATUS_ABORT_OFFSET 13
#define CSR_CURVE25519ENGINE_STATUS_ABORT_SIZE 1
static inline uint32_t curve25519engine_status_abort_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) {
uint32_t mask = ((1 << 1)-1);
return ( (oldword >> 13) & mask );
}
static inline uint32_t curve25519engine_status_abort_read(struct sbusfpga_curve25519engine_softc *sc) {
uint32_t word = curve25519engine_status_read(sc);
return curve25519engine_status_abort_extract(sc, word);
}
#define CSR_CURVE25519ENGINE_STATUS_FINISHED_OFFSET 14
#define CSR_CURVE25519ENGINE_STATUS_FINISHED_SIZE 1
static inline uint32_t curve25519engine_status_finished_extract(struct sbusfpga_curve25519engine_softc *sc, uint32_t oldword) {
uint32_t mask = ((1 << 1)-1);
return ( (oldword >> 13) & mask );
return ( (oldword >> 14) & mask );
}
static inline uint32_t curve25519engine_status_finished_read(struct sbusfpga_curve25519engine_softc *sc) {
uint32_t word = curve25519engine_status_read(sc);
@@ -439,6 +449,11 @@ static inline uint32_t curve25519engine_instruction_immediate_read(struct sbusfp
uint32_t word = curve25519engine_instruction_read(sc);
return curve25519engine_instruction_immediate_extract(sc, word);
}
#define CSR_CURVE25519ENGINE_LS_STATUS_ADDR (CSR_CURVE25519ENGINE_BASE + 0x2cL)
#define CSR_CURVE25519ENGINE_LS_STATUS_SIZE 1
static inline uint32_t curve25519engine_ls_status_read(struct sbusfpga_curve25519engine_softc *sc) {
return bus_space_read_4(sc->sc_bustag, sc->sc_bhregs_curve25519engine, 0x2cL);
}
#endif // CSR_CURVE25519ENGINE_BASE
/* ddrphy */

View File

@@ -139,9 +139,9 @@ class _CRG(Module):
self.submodules.idelayctrl = S7IDELAYCTRL(self.cd_idelay)
class SBusFPGA(SoCCore):
def __init__(self, **kwargs):
self.version = "V1.0";
def __init__(self, version, **kwargs):
print(f"Building SBusFPGA for board version {version}")
kwargs["cpu_type"] = "None"
kwargs["integrated_sram_size"] = 0
kwargs["with_uart"] = False
@@ -149,9 +149,9 @@ class SBusFPGA(SoCCore):
self.sys_clk_freq = sys_clk_freq = 100e6 ## 25e6
self.platform = platform = ztex213_sbus.Platform(variant="ztex2.13a", version = self.version)
self.platform = platform = ztex213_sbus.Platform(variant="ztex2.13a", version = version)
if (self.version == "V1.0"):
if (version == "V1.0"):
self.platform.add_extension(ztex213_sbus._usb_io_v1_0)
SoCCore.__init__(self,
@@ -183,7 +183,7 @@ class SBusFPGA(SoCCore):
self.submodules.crg = _CRG(platform=platform, sys_clk_freq=sys_clk_freq)
self.platform.add_period_constraint(self.platform.lookup_request("SBUS_3V3_CLK", loose=True), 1e9/25e6) # SBus max
if (self.version == "V1.0"):
if (version == "V1.0"):
self.submodules.leds = LedChaser(
pads = platform.request("SBUS_DATA_OE_LED_2"), #platform.request("user_led", 7),
sys_clk_freq = sys_clk_freq)
@@ -299,6 +299,7 @@ class SBusFPGA(SoCCore):
#self.submodules.curve25519engine_wishbone_cdc = wishbone.WishboneDomainCrossingMaster(platform=self.platform, slave=self.curve25519engine.bus, cd_master="sys", cd_slave="clk100")
#self.bus.add_slave("curve25519engine", self.curve25519engine_wishbone_cdc, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False))
self.bus.add_slave("curve25519engine", self.curve25519engine.bus, SoCRegion(origin=self.mem_map.get("curve25519engine", None), size=0x20000, cached=False))
self.bus.add_master(name="curve25519engineLS", master=self.curve25519engine.busls)
#self.submodules.curve25519_on_sync = BusSynchronizer(width = 1, idomain = "clk100", odomain = "sys")
#self.comb += self.curve25519_on_sync.i.eq(self.curve25519engine.power.fields.on)
#self.comb += self.crg.curve25519_on.eq(self.curve25519_on_sync.o)
@@ -307,17 +308,20 @@ class SBusFPGA(SoCCore):
def main():
parser = argparse.ArgumentParser(description="SbusFPGA")
parser.add_argument("--build", action="store_true", help="Build bitstream")
parser.add_argument("--version", default="V1.0", help="SBusFPGA board version (default V1.0)")
builder_args(parser)
vivado_build_args(parser)
args = parser.parse_args()
soc = SBusFPGA(**soc_core_argdict(args))
soc = SBusFPGA(**soc_core_argdict(args),
version=args.version)
#soc.add_uart(name="uart", baudrate=115200, fifo_depth=16)
builder = Builder(soc, **builder_argdict(args))
builder.build(**vivado_build_argdict(args), run=args.build)
# Generate modified CSR registers definitions/access functions to netbsd_csr.h.
# should be split per-device (and without base) to still work if we have identical devices in different configurations on multiple boards
csr_contents = sbus_to_fpga_export.get_csr_header(
regions = soc.csr_regions,
constants = soc.constants,
@@ -325,6 +329,9 @@ def main():
write_to_file(os.path.join("netbsd_csr.h"), csr_contents)
# tells the prom where to find what
# just one, as that is board-specific
# BEWARE! then need to run 'forth_to_migen_rom.sh' *and* regenerate the bitstream with the proper PROM built-in!
# (there's surely a better way...)
csr_forth_contents = sbus_to_fpga_export.get_csr_forth_header(
csr_regions = soc.csr_regions,
mem_regions = soc.mem_regions,