more jareth-with-goblin
This commit is contained in:
parent
936736d35a
commit
be6101d39d
@ -117,9 +117,12 @@ struct scrolltest {
|
||||
int x0;
|
||||
int w;
|
||||
int n;
|
||||
int pm;
|
||||
int rop;
|
||||
};
|
||||
#define GOBLIN_SCROLL _IOW('X', 0, struct scrolltest)
|
||||
#define GOBLIN_FILL _IOW('X', 1, struct scrolltest)
|
||||
#define GOBLIN_FILLROP _IOW('X', 2, struct scrolltest)
|
||||
|
||||
static int goblin_ioctl(void *, void *, u_long, void *, int, struct lwp *);
|
||||
static paddr_t goblin_mmap(void *, void *, off_t, int);
|
||||
@ -140,12 +143,30 @@ static int power_on(struct goblin_softc *sc);
|
||||
static int power_off(struct goblin_softc *sc);
|
||||
static int jareth_scroll(struct goblin_softc *sc, enum jareth_verbosity verbose, int y0, int y1, int x0, int w, int n);
|
||||
static int jareth_fill(struct goblin_softc *sc, enum jareth_verbosity verbose, int y0, int pat, int x0, int w, int n);
|
||||
static const uint32_t program_scroll128[12] = { 0x407c0012,0x00140080,0x201c0013,0x60fc7013,0x00170146,0xfe000148,0x000e10c6,0x010000c9,0x00004005,0xfb000809,0x0000000a,0x0000000a };
|
||||
static const uint32_t program_fill128[12] = { 0x407c0012,0x00140080,0x607c1013,0x00170146,0xfe800148,0x000e10c6,0x010000c9,0x00004005,0xfb800809,0x0000000a,0x0000000a,0x0000000a };
|
||||
static int jareth_fillrop(struct goblin_softc *sc, enum jareth_verbosity verbose, int y0, int pat, int x0, int w, int n, int pm, int rop);
|
||||
static const uint32_t program_scroll128[12] = { 0x407c0012,0x00140080,0x201c0013,0x60fc7013,0x00170146,0xfe000148,0x000e10c6,0x010000c9,
|
||||
0x00004005,0xfb000809,0x0000000a,0x0000000a };
|
||||
static const uint32_t program_fill128[11] = { 0x407c0012,0x00140080,0x607c1013,0x00170146,0xfe800148,0x000e10c6,0x010000c9,0x00004005,
|
||||
0xfb800809,0x0000000a,0x0000000a };
|
||||
static const uint32_t program_fill256[14] = { 0x01bc0014,0x001a6087,0x013c6814,0x403c0012,0x00146086,0xe03c1013,0x00165146,0xfe800148,
|
||||
0x000e10c6,0x010000c9,0x00004005,0xfb800809,0x0000000a,0x0000000a };
|
||||
|
||||
static const uint32_t* programs[3] = { program_scroll128, program_fill128, NULL };
|
||||
static const uint32_t program_len[3] = { 12, 12, 0 };
|
||||
static uint32_t program_offset[3];
|
||||
static const uint32_t program_fill[38] = { 0x11800089,0x110000c9,0x01bc0014,0x0800000d,0x013c2014,0x001400c0,0x00180000,0x403c0192,
|
||||
0x801c0013,0x001c0060,0xc03c7013,0x00184185,0x00161146,0xfc800148,0x00166007,0x00145946,
|
||||
0x0014214f,0x00005005,0x00085086,0x08000089,0x001a6087,0x013c6814,0x403c0012,0x00146086,
|
||||
0x01800149,0xe03c1013,0x00165146,0xfe800148,0x0180018d,0x801c0013,0x001c0060,0xc03c7013,
|
||||
0x000e10c6,0x010000c9,0x00004005,0xf9000809,0x0000000a,0x0000000a };
|
||||
|
||||
static const uint32_t program_fillrop[41] = { 0x13000089,0x128000c9,0x01bc0014,0x003c014c,0x0800000d,0x013c2014,0x002000c0,0x00180000,
|
||||
0x403c0192,0x801c0013,0x001c11e2,0xc03c7013,0x00184185,0x00221206,0xfc800208,0x00226007,
|
||||
0x00208946,0x0020220f,0x00008005,0x00088086,0x09000089,0x001a6087,0x013c6814,0x403c0012,
|
||||
0x00206086,0x02800209,0x801c0013,0x001c11c2,0xe03c7013,0x00225206,0xfd800208,0x0180018d,
|
||||
0x801c0013,0x001c11e2,0xc03c7013,0x000e10c6,0x010000c9,0x00004005,0xf8000809,0x0000000a,
|
||||
0x0000000a};
|
||||
|
||||
static const uint32_t* programs[6] = { program_scroll128, program_fill128, program_fill256, program_fill, program_fillrop, NULL };
|
||||
static const uint32_t program_len[6] = { 12, 11, 14, 38, 41, 0 };
|
||||
static uint32_t program_offset[6];
|
||||
|
||||
static void goblin_set_depth(struct goblin_softc *, int);
|
||||
|
||||
@ -376,6 +397,12 @@ goblinioctl(dev_t dev, u_long cmd, void *data, int flags, struct lwp *l)
|
||||
}
|
||||
break;
|
||||
|
||||
case GOBLIN_FILLROP: {
|
||||
struct scrolltest *st = (struct scrolltest *)data;
|
||||
jareth_fillrop(sc, jareth_verbose, st->y0, st->y1, st->x0, st->w, st->n, st->pm, st->rop);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
return (ENOTTY);
|
||||
}
|
||||
@ -447,11 +474,17 @@ goblinloadcmap(struct goblin_softc *sc, int start, int ncolors)
|
||||
* offset, allowing for the given protection, or return -1 for error.
|
||||
* 'inspired' by the cg6 code
|
||||
*/
|
||||
#define GOBLIN_USER_FBC 0x70000000
|
||||
#define GOBLIN_USER_RAM 0x70016000
|
||||
#define GOBLIN_USER_FBC 0x70000000
|
||||
#define JARETH_USER_REG 0x70001000
|
||||
#define JARETH_USER_MICROCODE 0x70002000
|
||||
#define JARETH_USER_REGFILE 0x70003000
|
||||
#define GOBLIN_USER_RAM 0x70016000
|
||||
typedef enum {
|
||||
goblin_bank_fbc,
|
||||
goblin_bank_fb
|
||||
goblin_bank_fb,
|
||||
jareth_bank_reg,
|
||||
jareth_bank_microcode,
|
||||
jareth_bank_regfile
|
||||
} gobo_reg_bank;
|
||||
struct mmo {
|
||||
u_long mo_uaddr; /* user (virtual) address */
|
||||
@ -466,8 +499,11 @@ goblinmmap(dev_t dev, off_t off, int prot)
|
||||
struct mmo *mo;
|
||||
u_int u, sz, flags;
|
||||
static struct mmo mmo[] = {
|
||||
{ GOBLIN_USER_RAM, 0, goblin_bank_fb },
|
||||
{ GOBLIN_USER_FBC, 1, goblin_bank_fbc },
|
||||
{ GOBLIN_USER_RAM, 0, goblin_bank_fb },
|
||||
{ GOBLIN_USER_FBC, 1, goblin_bank_fbc },
|
||||
{ JARETH_USER_REG, 1, jareth_bank_reg },
|
||||
{ JARETH_USER_MICROCODE, 4096, jareth_bank_microcode },
|
||||
{ JARETH_USER_REGFILE, 1024, jareth_bank_regfile },
|
||||
};
|
||||
|
||||
/* device_printf(sc->sc_dev, "requiesting %llx with %d\n", off, prot); */
|
||||
@ -506,6 +542,18 @@ goblinmmap(dev_t dev, off_t off, int prot)
|
||||
return (bus_space_mmap(sc->sc_bustag,
|
||||
sc->sc_reg_fbc_paddr, u,
|
||||
prot, flags));
|
||||
case jareth_bank_reg:
|
||||
return (bus_space_mmap(sc->sc_bustag,
|
||||
sc->sc_jareth_reg_paddr, u,
|
||||
prot, flags));
|
||||
case jareth_bank_microcode:
|
||||
return (bus_space_mmap(sc->sc_bustag,
|
||||
sc->sc_jareth_microcode_paddr, u,
|
||||
prot, flags));
|
||||
case jareth_bank_regfile:
|
||||
return (bus_space_mmap(sc->sc_bustag,
|
||||
sc->sc_jareth_regfile_paddr, u,
|
||||
prot, flags));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -773,7 +821,7 @@ static int jareth_scroll(struct goblin_softc *sc, enum jareth_verbosity verbose,
|
||||
|
||||
static int jareth_fill(struct goblin_softc *sc, enum jareth_verbosity verbose, int y0, int pat, int x0, int w, int n) {
|
||||
const uint32_t base = 0;
|
||||
const int pidx = 1;
|
||||
const int pidx = 3; // fill
|
||||
int i;
|
||||
|
||||
power_on(sc);
|
||||
@ -801,6 +849,38 @@ static int jareth_fill(struct goblin_softc *sc, enum jareth_verbosity verbose, i
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int jareth_fillrop(struct goblin_softc *sc, enum jareth_verbosity verbose, int y0, int pat, int x0, int w, int n, int pm, int rop) {
|
||||
const uint32_t base = 0;
|
||||
const int pidx = 4; // fillrop
|
||||
int i;
|
||||
|
||||
power_on(sc);
|
||||
|
||||
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(0,0), (sc->sc_internal_adr + y0 * sc->sc_stride + x0));
|
||||
for (i = 0 ; i < 8 ; i++) {
|
||||
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(1,i), pat);
|
||||
}
|
||||
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(2,0), (w));
|
||||
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,0), (n));
|
||||
/* for (i = 1 ; i < 8 ; i++) { */
|
||||
/* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(2,i), 0); */
|
||||
/* bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(3,i), 0); */
|
||||
/* } */
|
||||
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(4,0), (sc->sc_stride));
|
||||
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(5,0), (pm));
|
||||
bus_space_write_4(sc->sc_bustag, sc->sc_bhregs_regfile,SUBREG_ADDR(5,1), (rop));
|
||||
jareth_mpstart_write(sc, program_offset[pidx]);
|
||||
jareth_mplen_write(sc, program_len[pidx]);
|
||||
|
||||
(void)start_job(sc, verbose);
|
||||
delay(1);
|
||||
(void)wait_job(sc, 1, verbose);
|
||||
|
||||
power_off(sc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
jareth_copyrows(void *cookie, int src, int dst, int n)
|
||||
{
|
||||
@ -886,7 +966,7 @@ static int start_job(struct goblin_softc *sc, enum jareth_verbosity verbose) {
|
||||
static int wait_job(struct goblin_softc *sc, uint32_t param, enum jareth_verbosity verbose) {
|
||||
uint32_t status = jareth_status_read(sc);
|
||||
int count = 0;
|
||||
int max_count = 3000;
|
||||
int max_count = 5000;
|
||||
int del = 1;
|
||||
const int max_del = 64;
|
||||
static int max_del_seen = 1;
|
||||
|
||||
@ -157,6 +157,7 @@ goblinattach_sbus(device_t parent, device_t self, void *args)
|
||||
|
||||
sc->sc_has_jareth = prom_getpropint(node, "goblin-has-jareth", 0);
|
||||
sc->sc_internal_adr = prom_getpropint(node, "goblin-internal-fb", 0x8f000000);
|
||||
aprint_normal_dev(self, "Goblin framebuffer internally @ %p\n", (void*)sc->sc_internal_adr);
|
||||
|
||||
if (sc->sc_has_jareth) {
|
||||
if (sa->sa_nreg < 5) {
|
||||
@ -173,6 +174,7 @@ goblinattach_sbus(device_t parent, device_t self, void *args)
|
||||
aprint_error(": cannot map Jareth registers\n");
|
||||
sc->sc_has_jareth = 0;
|
||||
} else {
|
||||
sc->sc_jareth_reg_paddr = sbus_bus_addr(sa->sa_bustag, sa->sa_reg[2].oa_space, sa->sa_reg[2].oa_base);
|
||||
aprint_normal_dev(self, "Jareth registers @ %p\n", (void*)sc->sc_bhregs_jareth);
|
||||
/* map microcode */
|
||||
if (sbus_bus_map(sc->sc_bustag,
|
||||
@ -184,6 +186,7 @@ goblinattach_sbus(device_t parent, device_t self, void *args)
|
||||
aprint_error(": cannot map Jareth microcode\n");
|
||||
sc->sc_has_jareth = 0;
|
||||
} else {
|
||||
sc->sc_jareth_microcode_paddr = sbus_bus_addr(sa->sa_bustag, sa->sa_reg[3].oa_space, sa->sa_reg[3].oa_base);
|
||||
aprint_normal_dev(self, "Jareth microcode @ %p\n", (void*)sc->sc_bhregs_microcode);
|
||||
/* map register file */
|
||||
if (sbus_bus_map(sc->sc_bustag,
|
||||
@ -195,6 +198,7 @@ goblinattach_sbus(device_t parent, device_t self, void *args)
|
||||
aprint_error(": cannot map Jareth regfile\n");
|
||||
sc->sc_has_jareth = 0;
|
||||
} else {
|
||||
sc->sc_jareth_regfile_paddr = sbus_bus_addr(sa->sa_bustag, sa->sa_reg[4].oa_space, sa->sa_reg[4].oa_base);
|
||||
aprint_normal_dev(self, "Jareth regfile @ %p\n", (void*)sc->sc_bhregs_regfile);
|
||||
}
|
||||
}
|
||||
|
||||
@ -50,6 +50,9 @@ struct goblin_softc {
|
||||
bus_space_tag_t sc_bustag;
|
||||
bus_addr_t sc_reg_fbc_paddr; /* phys address for device mmap() */
|
||||
bus_addr_t sc_fb_paddr; /* phys address for device mmap() */
|
||||
bus_addr_t sc_jareth_reg_paddr; /* phys address for device mmap() */
|
||||
bus_addr_t sc_jareth_microcode_paddr; /* phys address for device mmap() */
|
||||
bus_addr_t sc_jareth_regfile_paddr; /* phys address for device mmap() */
|
||||
uint32_t sc_size; /* full memory size */
|
||||
int sc_opens; /* number of open() to track 8/24 bits */
|
||||
int sc_has_jareth; /* whether we have a Jareth vector engine available */
|
||||
|
||||
@ -36,8 +36,11 @@
|
||||
#include "exa.h"
|
||||
|
||||
/* Various offsets in virtual (ie. mmap()) spaces Linux and Solaris support. */
|
||||
#define GOBLIN_FBC_VOFF 0x70000000
|
||||
#define GOBLIN_RAM_VOFF 0x70016000
|
||||
#define GOBLIN_FBC_VOFF 0x70000000
|
||||
#define JARETH_REG_VOFF 0x70001000
|
||||
#define JARETH_MICROCODE_VOFF 0x70002000
|
||||
#define JARETH_REGFILE_VOFF 0x70003000
|
||||
#define GOBLIN_RAM_VOFF 0x70016000
|
||||
|
||||
typedef struct {
|
||||
unsigned int fg, bg; /* FG/BG colors for stipple */
|
||||
@ -54,20 +57,25 @@ typedef struct {
|
||||
typedef struct {
|
||||
unsigned char *fb;
|
||||
GoblinFbcPtr fbc;
|
||||
int vclipmax;
|
||||
JarethRegPtr jreg;
|
||||
JarethMicrocodePtr jmicrocode;
|
||||
JarethRegfilePtr jregfile;
|
||||
int width;
|
||||
int height;
|
||||
int maxheight;
|
||||
int vidmem;
|
||||
|
||||
sbusDevicePtr psdp;
|
||||
Bool NoAccel;
|
||||
CloseScreenProcPtr CloseScreen;
|
||||
OptionInfoPtr Options;
|
||||
Bool has_accel;
|
||||
|
||||
int clipxa, clipxe;
|
||||
ExaDriverPtr pExa;
|
||||
int srcoff, fg;
|
||||
uint32_t last_mask;
|
||||
uint32_t last_rop;
|
||||
uint32_t fg;
|
||||
int xdir, ydir;
|
||||
uint32_t srcoff, srcpitch;
|
||||
} GoblinRec, *GoblinPtr;
|
||||
|
||||
extern int GoblinScreenPrivateIndex;
|
||||
|
||||
@ -31,14 +31,34 @@
|
||||
#include "goblin_regs.h"
|
||||
#include "dgaproc.h"
|
||||
|
||||
#include <unistd.h>
|
||||
|
||||
/* DGA stuff */
|
||||
|
||||
#define DEBUG_GOBLIN 1
|
||||
|
||||
#ifdef DEBUG_GOBLIN
|
||||
#define ENTER xf86Msg(X_ERROR, "%s>\n", __func__);
|
||||
#define DPRINTF xf86Msg
|
||||
#else
|
||||
#define ENTER
|
||||
#define DPRINTF while (0) xf86Msg
|
||||
#endif
|
||||
|
||||
static Bool Goblin_OpenFramebuffer(ScrnInfoPtr pScrn, char **, unsigned char **mem,
|
||||
int *, int *, int *);
|
||||
static Bool Goblin_SetMode(ScrnInfoPtr, DGAModePtr);
|
||||
static void Goblin_SetViewport(ScrnInfoPtr, int, int, int);
|
||||
static int Goblin_GetViewport(ScrnInfoPtr);
|
||||
|
||||
static void GoblinWaitMarker(ScreenPtr pScreen, int Marker);
|
||||
static Bool GoblinUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, char *src, int src_pitch);
|
||||
static Bool GoblinDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, char *dst, int dst_pitch);
|
||||
static Bool GoblinPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg);
|
||||
static void GoblinSolid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2);
|
||||
static void GoblinDone(PixmapPtr pDstPixmap);
|
||||
static Bool GoblinPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, int ydir, int alu, Pixel planemask);
|
||||
static void GoblinCopy(PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, int w, int h);
|
||||
static void GoblinSync(ScrnInfoPtr);
|
||||
|
||||
static DGAFunctionRec Goblin_DGAFuncs = {
|
||||
@ -153,3 +173,316 @@ Goblin_GetViewport(ScrnInfoPtr pScrn)
|
||||
/* No viewports, none pending... */
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
GOBLINEXAInit(ScreenPtr pScreen)
|
||||
{
|
||||
ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
|
||||
GoblinPtr pGoblin = GET_GOBLIN_FROM_SCRN(pScrn);
|
||||
ExaDriverPtr pExa;
|
||||
|
||||
pExa = exaDriverAlloc();
|
||||
if (!pExa)
|
||||
return FALSE;
|
||||
|
||||
pGoblin->pExa = pExa;
|
||||
|
||||
pExa->exa_major = EXA_VERSION_MAJOR;
|
||||
pExa->exa_minor = EXA_VERSION_MINOR;
|
||||
|
||||
pExa->memoryBase = pGoblin->fb;
|
||||
|
||||
pExa->memorySize = pGoblin->vidmem - 32;
|
||||
pExa->offScreenBase = pGoblin->width * pGoblin->height * 4; // 32-bits
|
||||
|
||||
/*
|
||||
* Jareth has 128-bits memory access
|
||||
*/
|
||||
pExa->pixmapOffsetAlign = 16;
|
||||
pExa->pixmapPitchAlign = 16;
|
||||
|
||||
pExa->flags = EXA_OFFSCREEN_PIXMAPS;/* | EXA_MIXED_PIXMAPS; */ /* | EXA_SUPPORTS_OFFSCREEN_OVERLAPS; */
|
||||
|
||||
/*
|
||||
* these limits are bogus
|
||||
* Jareth doesn't deal with coordinates at all, so there is no limit but
|
||||
* we have to put something here
|
||||
*/
|
||||
pExa->maxX = 4096;
|
||||
pExa->maxY = 4096;
|
||||
|
||||
pExa->WaitMarker = GoblinWaitMarker;
|
||||
|
||||
pExa->PrepareSolid = GoblinPrepareSolid;
|
||||
pExa->Solid = GoblinSolid;
|
||||
pExa->DoneSolid = GoblinDone;
|
||||
|
||||
pExa->PrepareCopy = GoblinPrepareCopy;
|
||||
pExa->Copy = GoblinCopy;
|
||||
pExa->DoneCopy = GoblinDone;
|
||||
|
||||
pExa->UploadToScreen = GoblinUploadToScreen;
|
||||
pExa->DownloadFromScreen = GoblinDownloadFromScreen;
|
||||
|
||||
return exaDriverInit(pScreen, pExa);;
|
||||
}
|
||||
|
||||
static inline void
|
||||
GoblinWait(GoblinPtr pGoblin)
|
||||
{
|
||||
uint32_t status = pGoblin->jreg->status;
|
||||
int count = 0;
|
||||
int max_count = 1000;
|
||||
int del = 1;
|
||||
const int param = 1;
|
||||
const int max_del = 32;
|
||||
|
||||
ENTER;
|
||||
|
||||
while ((status & 1) && (count < max_count)) {
|
||||
count ++;
|
||||
usleep(del * param);
|
||||
del = del < max_del ? 2*del : del;
|
||||
status = pGoblin->jreg->status;
|
||||
}
|
||||
|
||||
if (status & 1) {
|
||||
xf86Msg(X_ERROR, "Jareth wait for idle timed out %08x %08x\n", status);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
GoblinWaitMarker(ScreenPtr pScreen, int Marker)
|
||||
{
|
||||
ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
|
||||
GoblinPtr p = GET_GOBLIN_FROM_SCRN(pScrn);
|
||||
|
||||
GoblinWait(p);
|
||||
}
|
||||
|
||||
/*
|
||||
* Memcpy-based UTS.
|
||||
*/
|
||||
static Bool
|
||||
GoblinUploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, char *src, int src_pitch)
|
||||
{
|
||||
ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum];
|
||||
GoblinPtr pGoblin = GET_GOBLIN_FROM_SCRN(pScrn);
|
||||
char *dst = pGoblin->fb + exaGetPixmapOffset(pDst);
|
||||
int dst_pitch = exaGetPixmapPitch(pDst);
|
||||
|
||||
int bpp = pDst->drawable.bitsPerPixel;
|
||||
int cpp = (bpp + 7) >> 3;
|
||||
int wBytes = w * cpp;
|
||||
|
||||
ENTER;
|
||||
DPRINTF(X_ERROR, "%s depth %d\n", __func__, bpp);
|
||||
dst += (x * cpp) + (y * dst_pitch);
|
||||
|
||||
GoblinWait(pGoblin);
|
||||
|
||||
while (h--) {
|
||||
memcpy(dst, src, wBytes);
|
||||
src += src_pitch;
|
||||
dst += dst_pitch;
|
||||
}
|
||||
__asm("stbar;");
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Memcpy-based DFS.
|
||||
*/
|
||||
static Bool
|
||||
GoblinDownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, char *dst, int dst_pitch)
|
||||
{
|
||||
ScrnInfoPtr pScrn = xf86Screens[pSrc->drawable.pScreen->myNum];
|
||||
GoblinPtr pGoblin = GET_GOBLIN_FROM_SCRN(pScrn);
|
||||
char *src = pGoblin->fb + exaGetPixmapOffset(pSrc);
|
||||
int src_pitch = exaGetPixmapPitch(pSrc);
|
||||
|
||||
ENTER;
|
||||
int bpp = pSrc->drawable.bitsPerPixel;
|
||||
int cpp = (bpp + 7) >> 3;
|
||||
int wBytes = w * cpp;
|
||||
|
||||
src += (x * cpp) + (y * src_pitch);
|
||||
|
||||
GoblinWait(pGoblin);
|
||||
|
||||
while (h--) {
|
||||
memcpy(dst, src, wBytes);
|
||||
src += src_pitch;
|
||||
dst += dst_pitch;
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static Bool
|
||||
GoblinPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg)
|
||||
{
|
||||
ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
|
||||
GoblinPtr pGoblin = GET_GOBLIN_FROM_SCRN(pScrn);
|
||||
int i;
|
||||
|
||||
ENTER;
|
||||
DPRINTF(X_ERROR, "bits per pixel: %d\n",
|
||||
pPixmap->drawable.bitsPerPixel);
|
||||
|
||||
if ((pGoblin->jreg->power & 1) != 1)
|
||||
pGoblin->jreg->power = 1;
|
||||
|
||||
GoblinWait(pGoblin);
|
||||
|
||||
pGoblin->fg = fg;
|
||||
for (i = 0 ; i < 8; i++)
|
||||
pGoblin->jregfile->reg[1][i] = fg;
|
||||
|
||||
pGoblin->jregfile->reg[5][0] = planemask;
|
||||
pGoblin->jregfile->reg[5][1] = alu;
|
||||
|
||||
pGoblin->last_mask = planemask;
|
||||
pGoblin->last_rop = alu;
|
||||
|
||||
if ((alu == 0x3) && // GCcopy
|
||||
(planemask == 0xFFFFFFFF)) {
|
||||
// fill
|
||||
pGoblin->jreg->mpstart = 37;
|
||||
pGoblin->jreg->mplen = 38;
|
||||
} else {
|
||||
// fillrop
|
||||
pGoblin->jreg->mpstart = 75;
|
||||
pGoblin->jreg->mplen = 41;
|
||||
}
|
||||
|
||||
DPRINTF(X_ERROR, "%s: %x; %x\n", __func__, alu, planemask);
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
GoblinSolid(PixmapPtr pPixmap, int x1, int y1, int x2, int y2)
|
||||
{
|
||||
ScrnInfoPtr pScrn = xf86Screens[pPixmap->drawable.pScreen->myNum];
|
||||
GoblinPtr pGoblin = GET_GOBLIN_FROM_SCRN(pScrn);
|
||||
int w = x2 - x1, h = y2 - y1, dstoff, dstpitch;
|
||||
int start, depth;
|
||||
uint32_t ptr;
|
||||
ENTER;
|
||||
|
||||
if (pGoblin->last_rop == 5) // GXnoop
|
||||
return;
|
||||
|
||||
dstpitch = exaGetPixmapPitch(pPixmap);
|
||||
dstoff = exaGetPixmapOffset(pPixmap);
|
||||
|
||||
depth = pPixmap->drawable.bitsPerPixel;
|
||||
switch (depth) {
|
||||
case 32:
|
||||
start = dstoff + (y1 * dstpitch) + (x1 << 2);
|
||||
/* we work in bytes not pixels */
|
||||
w = w * 4;
|
||||
break;
|
||||
case 8:
|
||||
start = dstoff + (y1 * dstpitch) + x1;
|
||||
break;
|
||||
}
|
||||
|
||||
ptr = 0x8f000000; // fixme
|
||||
ptr += start;
|
||||
|
||||
GoblinWait(pGoblin);
|
||||
|
||||
pGoblin->jregfile->reg[0][0] = ptr;
|
||||
pGoblin->jregfile->reg[2][0] = w;
|
||||
pGoblin->jregfile->reg[3][0] = h;
|
||||
pGoblin->jregfile->reg[4][0] = dstpitch;
|
||||
|
||||
DPRINTF(X_ERROR, "Solid %d %d %d %d [%d %d], %d %d -> %d (%p: %p)\n", x1, y1, x2, y2,
|
||||
w, h, dstpitch, dstoff, start, (void*)start, ptr);
|
||||
|
||||
pGoblin->jreg->control = 1; // start
|
||||
|
||||
exaMarkSync(pPixmap->drawable.pScreen);
|
||||
}
|
||||
|
||||
static void GoblinDone(PixmapPtr pDstPixmap) {
|
||||
}
|
||||
|
||||
|
||||
static Bool
|
||||
GoblinPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap,
|
||||
int xdir, int ydir, int alu, Pixel planemask)
|
||||
{
|
||||
ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
|
||||
GoblinPtr pGoblin = GET_GOBLIN_FROM_SCRN(pScrn);
|
||||
ENTER;
|
||||
|
||||
pGoblin->srcpitch = exaGetPixmapPitch(pSrcPixmap);
|
||||
pGoblin->srcoff = exaGetPixmapOffset(pSrcPixmap);
|
||||
pGoblin->xdir = xdir;
|
||||
pGoblin->ydir = ydir;
|
||||
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
static void
|
||||
GoblinCopy(PixmapPtr pDstPixmap,
|
||||
int srcX, int srcY, int dstX, int dstY, int w, int h)
|
||||
{
|
||||
ScrnInfoPtr pScrn = xf86Screens[pDstPixmap->drawable.pScreen->myNum];
|
||||
GoblinPtr pGoblin = GET_GOBLIN_FROM_SCRN(pScrn);
|
||||
int dstoff = exaGetPixmapOffset(pDstPixmap);
|
||||
int dstpitch = exaGetPixmapPitch(pDstPixmap);
|
||||
int srcstart, dststart;
|
||||
char *src, *dst;
|
||||
int i, j;
|
||||
ENTER;
|
||||
|
||||
DPRINTF(X_ERROR, "Copy %d %d -> %d %d [%d x %d]\n", srcX, srcY, dstX, dstY, w, h);
|
||||
|
||||
srcstart = (srcX << 2) + (pGoblin->srcpitch * srcY) + pGoblin->srcoff;
|
||||
dststart = (dstX << 2) + ( dstpitch * dstY) + dstoff;
|
||||
|
||||
src = pGoblin->fb + srcstart;
|
||||
dst = pGoblin->fb + dststart;
|
||||
|
||||
if (ydir > 0 && xdir > 0) {
|
||||
for (j = 0 ; j < h ; j++) {
|
||||
for (i = 0 ; i < w; i ++) {
|
||||
*(src+i) = *(dst+i);
|
||||
}
|
||||
src += srcpitch;
|
||||
dst += dstpitch;
|
||||
}
|
||||
} else if (ydir > 0 && xdir < 0) {
|
||||
for (j = 0 ; j < h ; j++) {
|
||||
for (i = w - 1 ; i >= 0 ; i --) {
|
||||
*(src+i) = *(dst+i);
|
||||
}
|
||||
src += srcpitch;
|
||||
dst += dstpitch;
|
||||
}
|
||||
} else if (ydir < 0 && xdir > 0) {
|
||||
src += srcpitch * h;
|
||||
dst += dstpitch * h;
|
||||
for (j = 0 ; j < h ; j++) {
|
||||
src -= srcpitch;
|
||||
dst -= dstpitch;
|
||||
for (i = 0 ; i < w; i ++) {
|
||||
*(src+i) = *(dst+i);
|
||||
}
|
||||
}
|
||||
} else if (ydir < 0 && xdir < 0) {
|
||||
src += srcpitch * h;
|
||||
dst += dstpitch * h;
|
||||
for (j = 0 ; j < h ; j++) {
|
||||
src -= srcpitch;
|
||||
dst -= dstpitch;
|
||||
for (i = w - 1 ; i >= 0 ; i --) {
|
||||
*(src+i) = *(dst+i);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -40,9 +40,11 @@
|
||||
|
||||
#include "compat-api.h"
|
||||
|
||||
#ifndef SBUS_DEVICE_GOBLIN
|
||||
#define SBUS_DEVICE_GOBLIN 0x0010
|
||||
#endif
|
||||
|
||||
/*
|
||||
|
||||
|
||||
0011 src
|
||||
0101 dst
|
||||
GXclear 0x0 0 0000
|
||||
@ -309,8 +311,8 @@ GOBLINPreInit(ScrnInfoPtr pScrn, int flags)
|
||||
{
|
||||
GoblinPtr pGoblin;
|
||||
sbusDevicePtr psdp;
|
||||
MessageType from;
|
||||
int i;
|
||||
int i, prom, len;
|
||||
char *ptr;
|
||||
|
||||
if (flags & PROBE_DETECT) return FALSE;
|
||||
|
||||
@ -350,6 +352,24 @@ GOBLINPreInit(ScrnInfoPtr pScrn, int flags)
|
||||
} else
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
prom = sparcPromInit();
|
||||
len = 4;
|
||||
if ((ptr = sparcPromGetProperty(&psdp->node, "goblin-has-jareth", &len))) {
|
||||
if (len >= 1) {
|
||||
/* if (ptr[0]) */
|
||||
/* pGoblin->has_accel = TRUE; */
|
||||
/* else */
|
||||
/* pGoblin->has_accel = FALSE; */
|
||||
pGoblin->has_accel = TRUE;
|
||||
} else {
|
||||
pGoblin->has_accel = FALSE;
|
||||
}
|
||||
}
|
||||
if (pGoblin->has_accel)
|
||||
xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Jareth found\n");
|
||||
else
|
||||
xf86DrvMsg(pScrn->scrnIndex, X_INFO, "no Jareth (%p)\n", ptr);
|
||||
|
||||
/*********************
|
||||
deal with depth
|
||||
@ -432,12 +452,9 @@ GOBLINPreInit(ScrnInfoPtr pScrn, int flags)
|
||||
}
|
||||
}
|
||||
|
||||
/* Set the bits per RGB for 8bpp mode */
|
||||
from = X_DEFAULT;
|
||||
|
||||
if (xf86ReturnOptValBool(pGoblin->Options, OPTION_NOACCEL, FALSE)) {
|
||||
pGoblin->NoAccel = TRUE;
|
||||
xf86DrvMsg(pScrn->scrnIndex, X_CONFIG, "Acceleration disabled\n");
|
||||
pGoblin->NoAccel = TRUE;
|
||||
xf86DrvMsg(pScrn->scrnIndex, X_CONFIG, "Acceleration disabled\n");
|
||||
}
|
||||
|
||||
char *optstr;
|
||||
@ -526,6 +543,21 @@ GOBLINScreenInit(SCREEN_INIT_ARGS_DECL)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
if (pGoblin->has_accel && !pGoblin->NoAccel) {
|
||||
// map Jareth registers
|
||||
pGoblin->jreg = xf86MapSbusMem(psdp, JARETH_REG_VOFF, sizeof(JarethReg));
|
||||
pGoblin->jmicrocode = xf86MapSbusMem(psdp, JARETH_MICROCODE_VOFF, sizeof(JarethReg));
|
||||
pGoblin->jregfile = xf86MapSbusMem(psdp, JARETH_REGFILE_VOFF, sizeof(JarethMicrocode));
|
||||
if ((pGoblin->jreg == NULL) ||
|
||||
(pGoblin->jmicrocode == NULL) ||
|
||||
(pGoblin->jregfile == NULL)) {
|
||||
xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "xf86MapSbusMem failed for Jareth\n");
|
||||
pGoblin->has_accel = FALSE;
|
||||
} else {
|
||||
xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Jareth successfully mapped\n");
|
||||
}
|
||||
}
|
||||
|
||||
/* Darken the screen for aesthetic reasons and set the viewport */
|
||||
GOBLINSaveScreen(pScreen, SCREEN_SAVER_ON);
|
||||
|
||||
@ -595,26 +627,24 @@ GOBLINScreenInit(SCREEN_INIT_ARGS_DECL)
|
||||
}
|
||||
}
|
||||
|
||||
if (!pGoblin->NoAccel) {
|
||||
#if 0
|
||||
{
|
||||
/* EXA */
|
||||
XF86ModReqInfo req;
|
||||
int errmaj, errmin;
|
||||
|
||||
memset(&req, 0, sizeof(XF86ModReqInfo));
|
||||
req.majorversion = EXA_VERSION_MAJOR;
|
||||
req.minorversion = EXA_VERSION_MINOR;
|
||||
if (!LoadSubModule(pScrn->module, "exa", NULL, NULL, NULL, &req,
|
||||
&errmaj, &errmin)) {
|
||||
LoaderErrorMsg(NULL, "exa", errmaj, errmin);
|
||||
return FALSE;
|
||||
}
|
||||
if (!GOBLINEXAInit(pScreen))
|
||||
return FALSE;
|
||||
xf86Msg(X_INFO, "%s: Using EXA acceleration\n", pGoblin->psdp->device);
|
||||
}
|
||||
#endif
|
||||
if (!pGoblin->NoAccel && pGoblin->has_accel) {
|
||||
{
|
||||
/* EXA */
|
||||
XF86ModReqInfo req;
|
||||
int errmaj, errmin;
|
||||
|
||||
memset(&req, 0, sizeof(XF86ModReqInfo));
|
||||
req.majorversion = EXA_VERSION_MAJOR;
|
||||
req.minorversion = EXA_VERSION_MINOR;
|
||||
if (!LoadSubModule(pScrn->module, "exa", NULL, NULL, NULL, &req,
|
||||
&errmaj, &errmin)) {
|
||||
LoaderErrorMsg(NULL, "exa", errmaj, errmin);
|
||||
return FALSE;
|
||||
}
|
||||
if (!GOBLINEXAInit(pScreen))
|
||||
return FALSE;
|
||||
xf86Msg(X_INFO, "%s: Using EXA acceleration\n", pGoblin->psdp->device);
|
||||
}
|
||||
}
|
||||
|
||||
/* setup DGA */
|
||||
|
||||
@ -50,13 +50,36 @@
|
||||
#define GOBOFB_INTR_CLEAR_CLEAR 0x0
|
||||
|
||||
typedef struct goblin_fbc {
|
||||
uint32_t mode;
|
||||
uint32_t vbl_mask;
|
||||
uint32_t videoctrl;
|
||||
uint32_t intr_clear;
|
||||
uint32_t reset;
|
||||
uint32_t lut_addr;
|
||||
uint32_t lut;
|
||||
volatile uint32_t mode;
|
||||
volatile uint32_t vbl_mask;
|
||||
volatile uint32_t videoctrl;
|
||||
volatile uint32_t intr_clear;
|
||||
volatile uint32_t reset;
|
||||
volatile uint32_t lut_addr;
|
||||
volatile uint32_t lut;
|
||||
} GoblinFbc, *GoblinFbcPtr;
|
||||
|
||||
typedef struct jareth_reg {
|
||||
volatile uint32_t window;
|
||||
volatile uint32_t mpstart;
|
||||
volatile uint32_t mplen;
|
||||
volatile uint32_t control;
|
||||
volatile uint32_t mpresume;
|
||||
volatile uint32_t power;
|
||||
volatile uint32_t status;
|
||||
volatile uint32_t ev_status;
|
||||
volatile uint32_t ev_prending;
|
||||
volatile uint32_t ev_enable;
|
||||
volatile uint32_t instruction;
|
||||
volatile uint32_t ls_status;
|
||||
} JarethReg, *JarethRegPtr;
|
||||
|
||||
typedef struct jareth_microcode {
|
||||
volatile uint32_t mc[1024];
|
||||
} JarethMicrocode, *JarethMicrocodePtr;
|
||||
|
||||
typedef struct jareth_regfile {
|
||||
volatile uint32_t reg[32][8];
|
||||
} JarethRegfile, *JarethRegfilePtr;
|
||||
|
||||
#endif /* GOBLIN_REGS_H */
|
||||
|
||||
@ -14,17 +14,20 @@ opcodes = { # mnemonic : [bit coding, docstring] ; if bit 6 (0x20) is set, shif
|
||||
"UDF" : [-1, "Placeholder for undefined opcodes"],
|
||||
"PSA" : [0, "Wd $\gets$ Ra // pass A"],
|
||||
"PSB" : [1, "Wd $\gets$ Rb // pass B"], # for star version mostly
|
||||
# 2 MSK
|
||||
"ROP32V" : [2, "Wd $\gets$ ((Rb ROP Ra) & planemask) | (Ra & ~planemask)" ], # replace MSK
|
||||
"XOR" : [3, "Wd $\gets$ Ra ^ Rb // bitwise XOR"],
|
||||
"NOT" : [4, "Wd $\gets$ ~Ra // binary invert"],
|
||||
"ADD32V" : [5, "Wd[x..x+32] $\gets$ Ra[x..x+32] + Rb[x..x+32] // vector 32-bit binary add"],
|
||||
"SUB32V" : [6, "Wd[x..x+32] $\gets$ Ra[x..x+32] - Rb[x..x+32] // vector 32-bit binary add"],
|
||||
"SUB32V" : [6, "Wd[x..x+32] $\gets$ Ra[x..x+32] - Rb[x..x+32] // vector 32-bit binary sub"],
|
||||
"AND" : [7, "Wd $\gets$ Ra & Rb // bitwise AND"], # replace MUL
|
||||
"BRNZ32" : [8, "If Ra[0:32] != 0 then mpc[9:0] $\gets$ mpc[9:0] + immediate[9:0] + 1, else mpc $\gets$ mpc + 1 // Branch if non-zero"], # replace TRD
|
||||
"BRZ32" : [9, "If Ra[0:32] == 0 then mpc[9:0] $\gets$ mpc[9:0] + immediate[9:0] + 1, else mpc $\gets$ mpc + 1 // Branch if zero"],
|
||||
"BRNZ32" : [8, "If Ra[0:32] != 0 then mpc[9:0] $\gets$ mpc[9:0] + immediate[9:0] + 1, else mpc $\gets$ mpc + 1 // Branch if non-zero (32-bits)"], # replace TRD
|
||||
"BRZ32" : [9, "If Ra[0:32] == 0 then mpc[9:0] $\gets$ mpc[9:0] + immediate[9:0] + 1, else mpc $\gets$ mpc + 1 // Branch if zero (32-bits)"],
|
||||
"FIN" : [10, "halt execution and assert interrupt to host CPU that microcode execution is done"],
|
||||
"SHL" : [11, "Wd $\gets$ Ra << 1 // shift Ra left by one and store in Wd"],
|
||||
# 12 XBT
|
||||
"SROP" : [12, "set planemask & rop from Ra[0:32] and Ra[32:36]" ], # was XBT
|
||||
"BRZ4" : [13, "If Ra[0:4] == 0 then mpc[9:0] $\gets$ mpc[9:0] + immediate[9:0] + 1, else mpc $\gets$ mpc + 1 // Branch if zero (4-bits)"],
|
||||
"BRZ5" : [14, "If Ra[0:5] == 0 then mpc[9:0] $\gets$ mpc[9:0] + immediate[9:0] + 1, else mpc $\gets$ mpc + 1 // Branch if zero (5-bits)"],
|
||||
"MIN32V" : [15, "Wd[x..x+32] $\gets$ umin(Ra[x..x+32], Rb[x..x+32]) // vector 32-bit umin"],
|
||||
# for MEM, bit #31 (imm[8]) indicates both lanes are needed; imm[31] == 0 faster as the second access is not done ;
|
||||
"GETM": [17, "GETM: getmask" ],
|
||||
"ADR": [18, "ADR: set or recover addresses, Wd $\gets$ ADR (for GETADR) or Wd $\gets$ 0 (for SETADR)" ],
|
||||
@ -230,12 +233,13 @@ class JarethConst(Module, AutoDoc):
|
||||
2: [2, "two", "The number two"],
|
||||
#3: [3, "three", "The number three"],
|
||||
#4: [4, "four", "The number four"],
|
||||
#5: [5, "five", "The number five"],
|
||||
###5: [5, "five", "The number five"],
|
||||
5: [32, "thirty-two", "The number thirty-two"],
|
||||
#6: [6, "six", "The number six"],
|
||||
###6: [6, "six", "The number six"],
|
||||
6: [31, "thirty-one", "The number thirty-one"],
|
||||
#7: [7, "seven", "The number seven"],
|
||||
#8: [8, "eight", "The number eight"],
|
||||
15: [15, "sixteen", "The number fifteen"],
|
||||
15: [15, "fifteen", "The number fifteen"],
|
||||
16: [16, "sixteen", "The number sixteen"],
|
||||
}
|
||||
self.adr = Signal(5)
|
||||
@ -293,16 +297,16 @@ class ExecUnit(Module, AutoDoc):
|
||||
|
||||
class ExecLogic(ExecUnit):
|
||||
def __init__(self, width=256):
|
||||
ExecUnit.__init__(self, width, ["XOR", "NOT", "PSA", "SHL", "AND"])
|
||||
ExecUnit.__init__(self, width, ["XOR", "NOT", "PSA", "SHL", "AND" ])
|
||||
self.intro = ModuleDoc(title="Logic ExecUnit Subclass", body=f"""
|
||||
This execution unit implements bit-wise logic operations: XOR, NOT, and
|
||||
passthrough.
|
||||
|
||||
* XOR returns the result of A^sB
|
||||
* XOR returns the result of A^B
|
||||
* NOT returns the result of !A
|
||||
* PSA returns the value of A
|
||||
* SHL returns A << 1
|
||||
* AND returns the result of A&sB
|
||||
* AND returns the result of A&B
|
||||
|
||||
""")
|
||||
|
||||
@ -329,7 +333,7 @@ passthrough.
|
||||
|
||||
class ExecAddSub(ExecUnit, AutoDoc):
|
||||
def __init__(self, width=256):
|
||||
ExecUnit.__init__(self, width, ["ADD32V", "SUB32V"])
|
||||
ExecUnit.__init__(self, width, ["ADD32V", "SUB32V", "MIN32V" ])
|
||||
self.notes = ModuleDoc(title="Add/Sub ExecUnit Subclass", body=f"""
|
||||
""")
|
||||
|
||||
@ -342,8 +346,70 @@ class ExecAddSub(ExecUnit, AutoDoc):
|
||||
[ self.q[x*32:(x+1)*32].eq(self.a[x*32:(x+1)*32] + self.b[x*32:(x+1)*32]) for x in range(0, width//32) ],
|
||||
).Elif(self.instruction.opcode == opcodes["SUB32V"][0],
|
||||
[ self.q[x*32:(x+1)*32].eq(self.a[x*32:(x+1)*32] - self.b[x*32:(x+1)*32]) for x in range(0, width//32) ],
|
||||
),
|
||||
).Elif(self.instruction.opcode == opcodes["MIN32V"][0],
|
||||
[ If((self.a[x*32:(x+1)*32] <= self.b[x*32:(x+1)*32]), self.q[x*32:(x+1)*32].eq(self.a[x*32:(x+1)*32]), self.q.eq(self.b[x*32:(x+1)*32])) for x in range(0, width//32) ],
|
||||
)
|
||||
]
|
||||
|
||||
class ExecRop(ExecUnit, AutoDoc):
|
||||
def __init__(self, width=256):
|
||||
ExecUnit.__init__(self, width, ["ROP32V", "SROP"])
|
||||
self.notes = ModuleDoc(title="Rop ExecUnit Subclass", body=f"""
|
||||
""")
|
||||
|
||||
rop = Array(Signal() for x in range(4))
|
||||
planemask = Signal(32)
|
||||
|
||||
lanewidth = 128
|
||||
nlane = width // lanewidth
|
||||
assert(nlane == 2) ## fixme
|
||||
rop_in = Array(Signal(2) for x in range(lanewidth))
|
||||
rop_out = Signal(128)
|
||||
rop_buf = Signal(nlane * lanewidth)
|
||||
lanec = Signal(log2_int(nlane, False))
|
||||
|
||||
self.sync.eng_clk += [ rop_out[x].eq((rop[(rop_in[x])] & planemask[x%32]) | (rop_in[x][0] & ~planemask[x%32])) for x in range(lanewidth) ]
|
||||
|
||||
self.sync.eng_clk += [
|
||||
#self.q_valid.eq(self.start),
|
||||
self.instruction_out.eq(self.instruction_in),
|
||||
]
|
||||
|
||||
self.submodules.seq = seq = ClockDomainsRenamer("eng_clk")(FSM(reset_state="IDLE"))
|
||||
seq.act("IDLE",
|
||||
If(self.start,
|
||||
If(self.instruction.opcode == opcodes["ROP32V"][0],
|
||||
NextValue(lanec, 0),
|
||||
[ NextValue(rop_in[x][0], self.a[x]) for x in range(0, lanewidth) ],
|
||||
[ NextValue(rop_in[x][1], self.b[x]) for x in range(0, lanewidth) ],
|
||||
NextState("NEXT")
|
||||
).Elif(self.instruction.opcode == opcodes["SROP"][0],
|
||||
NextValue(rop_buf, 0),
|
||||
NextValue(rop[0], self.a[35]),
|
||||
NextValue(rop[1], self.a[34]),
|
||||
NextValue(rop[2], self.a[33]),
|
||||
NextValue(rop[3], self.a[32]),
|
||||
NextValue(planemask, self.a[0:32]),
|
||||
NextState("OUT"),
|
||||
)
|
||||
))
|
||||
seq.act("NEXT",
|
||||
[ NextValue(rop_in[x][0], self.a[128+x]) for x in range(0, lanewidth) ],
|
||||
[ NextValue(rop_in[x][1], self.b[128+x]) for x in range(0, lanewidth) ],
|
||||
NextState("WRITE"))
|
||||
seq.act("WRITE",
|
||||
Case(lanec, {
|
||||
0: [ NextValue(rop_buf[0:128], rop_out),
|
||||
NextValue(lanec, 1),
|
||||
],
|
||||
1: [ NextValue(rop_buf[128:256], rop_out),
|
||||
NextState("OUT"),
|
||||
],
|
||||
}))
|
||||
seq.act("OUT",
|
||||
self.q_valid.eq(1),
|
||||
self.q.eq(rop_buf),
|
||||
NextState("IDLE"));
|
||||
|
||||
class ExecLS(ExecUnit, AutoDoc):
|
||||
def __init__(self, width=256, interface=None, memoryport=None, r_dat_f=None, r_dat_m=None, granule=0):
|
||||
@ -439,6 +505,7 @@ class ExecLS(ExecUnit, AutoDoc):
|
||||
)
|
||||
)
|
||||
)
|
||||
# having this extra stage and a registered 'address' help with timings, apparently
|
||||
lsseq.act("DOMEM",
|
||||
NextValue(cpar, cpar ^ 1),
|
||||
If(self.instruction.opcode == opcodes["MEM"][0],
|
||||
@ -706,7 +773,6 @@ class ExecLS(ExecUnit, AutoDoc):
|
||||
self.sync.mul_clk += self.state[28:30].eq((self.state[28:30] & Replicate(~start_pipe, 2)) | self.has_timeout)
|
||||
self.sync.mul_clk += self.state[30:32].eq((self.state[30:32] & Replicate(~start_pipe, 2)) | self.has_failure)
|
||||
|
||||
|
||||
class Jareth(Module, AutoCSR, AutoDoc):
|
||||
def __init__(self, platform, prefix, memoryport, sim=False, build_prefix=""):
|
||||
opdoc = "\n"
|
||||
@ -1148,6 +1214,10 @@ Here are the currently implemented opcodes for The Engine:
|
||||
NextState("DO_BRZ32"),
|
||||
).Elif(instruction.opcode == opcodes["BRNZ32"][0],
|
||||
NextState("DO_BRNZ32"),
|
||||
).Elif(instruction.opcode == opcodes["BRZ4"][0],
|
||||
NextState("DO_BRZ4"),
|
||||
).Elif(instruction.opcode == opcodes["BRZ5"][0],
|
||||
NextState("DO_BRZ5"),
|
||||
).Elif(instruction.opcode == opcodes["FIN"][0],
|
||||
NextState("IDLE"),
|
||||
NextValue(running, 0),
|
||||
@ -1218,6 +1288,50 @@ Here are the currently implemented opcodes for The Engine:
|
||||
)
|
||||
),
|
||||
)
|
||||
seq.act("DO_BRZ4",
|
||||
If(ra_dat[0:4] == 0,
|
||||
If( (sext_immediate + mpc + 1 < mpc_stop) & (sext_immediate + mpc + 1 >= self.mpstart.fields.mpstart), # validate new PC is in range
|
||||
NextState("FETCH"),
|
||||
NextValue(mpc, sext_immediate + mpc + 1),
|
||||
).Else(
|
||||
NextState("IDLE"),
|
||||
NextValue(running, 0),
|
||||
)
|
||||
).Else(
|
||||
If(abort,
|
||||
NextState("IDLE"),
|
||||
NextValue(running, 0),
|
||||
).Elif(mpc < mpc_stop,
|
||||
NextState("FETCH"),
|
||||
NextValue(mpc, mpc + 1),
|
||||
).Else(
|
||||
NextState("IDLE"),
|
||||
NextValue(running, 0),
|
||||
)
|
||||
),
|
||||
)
|
||||
seq.act("DO_BRZ5",
|
||||
If(ra_dat[0:5] == 0,
|
||||
If( (sext_immediate + mpc + 1 < mpc_stop) & (sext_immediate + mpc + 1 >= self.mpstart.fields.mpstart), # validate new PC is in range
|
||||
NextState("FETCH"),
|
||||
NextValue(mpc, sext_immediate + mpc + 1),
|
||||
).Else(
|
||||
NextState("IDLE"),
|
||||
NextValue(running, 0),
|
||||
)
|
||||
).Else(
|
||||
If(abort,
|
||||
NextState("IDLE"),
|
||||
NextValue(running, 0),
|
||||
).Elif(mpc < mpc_stop,
|
||||
NextState("FETCH"),
|
||||
NextValue(mpc, mpc + 1),
|
||||
).Else(
|
||||
NextState("IDLE"),
|
||||
NextValue(running, 0),
|
||||
)
|
||||
),
|
||||
)
|
||||
seq.act("PAUSED",
|
||||
If(~pause_req,
|
||||
NextValue(pause_gnt, 0),
|
||||
@ -1232,11 +1346,13 @@ Here are the currently implemented opcodes for The Engine:
|
||||
exec_units = {
|
||||
"exec_logic" : ExecLogic(width=rf_width_raw),
|
||||
"exec_addsub" : ExecAddSub(width=rf_width_raw),
|
||||
"exec_rop" : ExecRop(width=rf_width_raw),
|
||||
"exec_ls" : ExecLS(width=rf_width_raw, interface=self.busls, memoryport=memoryport, r_dat_f=r_dat_f, r_dat_m=r_dat_m, granule=granule),
|
||||
}
|
||||
exec_units_shift = {
|
||||
"exec_logic": True,
|
||||
"exec_addsub": False,
|
||||
"exec_rop": True,
|
||||
"exec_ls": False,
|
||||
}
|
||||
exec_unit_shift_num = { }
|
||||
|
||||
@ -12,36 +12,36 @@ fn main() -> std::io::Result<()> {
|
||||
// -----
|
||||
// size & 7 in %5
|
||||
// size rounded down in %6
|
||||
// input in %16
|
||||
// output in %17
|
||||
// 0 in %31
|
||||
// input in %7
|
||||
// output in %8
|
||||
// 0 in %15
|
||||
start:
|
||||
resm %31
|
||||
setadr %31, %0
|
||||
load256inc %16, %0
|
||||
load256inc %17, %1
|
||||
resm %15
|
||||
setadr %15, %0
|
||||
load256inc %7, ^0
|
||||
load256inc %8, ^1
|
||||
// slow
|
||||
setma %31, %0, #16
|
||||
setma %15, %0, #16
|
||||
// slow
|
||||
setmq %31, %1, #16
|
||||
setmq %15, %1, #16
|
||||
and %5, %2, #15
|
||||
sub32v %6, %2, %5
|
||||
brz32 done, %6
|
||||
loop:
|
||||
psa %18, %16
|
||||
psa %19, %17
|
||||
psa* %17, %16
|
||||
psa %20, %17
|
||||
store128inc %31, %2, %17
|
||||
psa %18, %7
|
||||
psa %19, %8
|
||||
psa* %8, %7
|
||||
psa %20, %8
|
||||
store128inc %15, ^2, %8
|
||||
sub32v %6, %6, #16
|
||||
brz32 last, %6
|
||||
loadh128inc %16, %0, %16
|
||||
loadh128inc %17, %1, %17
|
||||
loadh128inc %7, ^0, %7
|
||||
loadh128inc %8, ^1, %8
|
||||
brz32 loop, #0
|
||||
last:
|
||||
// FIXME: not if Q is aligned
|
||||
loadh128inc %17, %1, %17
|
||||
store128inc %31, %2, %17
|
||||
loadh128inc %8, ^1, %8
|
||||
store128inc %15, ^2, %8
|
||||
done:
|
||||
getadr %3
|
||||
getm %2
|
||||
@ -58,19 +58,19 @@ fn main() -> std::io::Result<()> {
|
||||
// live X count in %5
|
||||
// // live Y count in %3
|
||||
// data in %7
|
||||
// 0/scrap in %31
|
||||
// 0/scrap in %15
|
||||
start:
|
||||
// reset masks (probably not necessary with the starred-instruction)
|
||||
// resm %31
|
||||
// resm %15
|
||||
loop_y:
|
||||
// set source and destination addresses for current Y, X=first
|
||||
setadr %31, %0
|
||||
setadr %15, %0
|
||||
psa %5, %2
|
||||
loop_x:
|
||||
// load from SRC w/ post-increment
|
||||
load256inc %7, %0
|
||||
load256inc %7, ^0
|
||||
// store to DST w/ post-increment
|
||||
store256inc %31, %1, %7
|
||||
store256inc %15, ^1, %7
|
||||
// sub 32 (#5 is 32...) from live X count
|
||||
sub32v %5, %5, #5
|
||||
// if X count is not 0, keep looping
|
||||
@ -98,19 +98,19 @@ fn main() -> std::io::Result<()> {
|
||||
// live X count in %5
|
||||
// // live Y count in %3
|
||||
// data in %7
|
||||
// 0/scrap in %31
|
||||
// 0/scrap in %15
|
||||
start:
|
||||
// reset masks (probably not necessary with the starred-instruction)
|
||||
// resm %31
|
||||
// resm %15
|
||||
loop_y:
|
||||
// set source and destination addresses for current Y, X=first
|
||||
setadr %31, %0
|
||||
setadr %15, %0
|
||||
psa %5, %2
|
||||
loop_x:
|
||||
// load from SRC w/ post-increment
|
||||
load128inc %7, %0
|
||||
load128inc %7, ^0
|
||||
// store to DST w/ post-increment
|
||||
store128inc %31, %1, %7
|
||||
store128inc %15, ^1, %7
|
||||
// sub 16 (#16 is 16) from live X count
|
||||
sub32v %5, %5, #16
|
||||
// if X count is not 0, keep looping
|
||||
@ -139,17 +139,17 @@ fn main() -> std::io::Result<()> {
|
||||
// live X count in %5
|
||||
// // live Y count in %3
|
||||
// data in %7
|
||||
// 0/scrap in %31
|
||||
// 0/scrap in %15
|
||||
start:
|
||||
// reset masks (probably not necessary with the starred-instruction)
|
||||
// resm %31
|
||||
// resm %15
|
||||
loop_y:
|
||||
// set source and destination addresses for current Y, X=first
|
||||
setadr %31, %0
|
||||
setadr %15, %0
|
||||
psa %5, %2
|
||||
loop_x:
|
||||
// store to DST w/ post-increment
|
||||
store128inc %31, %0, %1
|
||||
store128inc %15, ^0, %1
|
||||
// sub 16 (#16 is 16) from live X count
|
||||
sub32v %5, %5, #16
|
||||
// if X count is not 0, keep looping
|
||||
@ -167,6 +167,291 @@ fn main() -> std::io::Result<()> {
|
||||
fin
|
||||
fin
|
||||
);
|
||||
|
||||
let mcode_fill256 = assemble_jareth!(
|
||||
// x..x / $DST in %0, aligned on 128 bits
|
||||
// 128-bits pattern in %1
|
||||
// x..x / X size in %2, multiple of 128 bits (16 bytes)
|
||||
// x..x / Y size in %3, arbitrary
|
||||
// x..x / dst_stride in %4 (screen width)
|
||||
// -----
|
||||
// live X count in %5
|
||||
// // live Y count in %3
|
||||
// data in %7
|
||||
// 0/scrap in %15
|
||||
start:
|
||||
// reset masks (probably not necessary with the starred-instruction)
|
||||
resm %15
|
||||
// compute X leftovers (modulo 32 -> #6 is 31)
|
||||
and %6, %2, #6
|
||||
// set the leftovers mask (offset is 0 as we are aligned)
|
||||
setmq %15, #0, %6
|
||||
loop_y:
|
||||
// set source and destination addresses for current Y, X=first
|
||||
setadr %15, %0
|
||||
// then the rounded value in X
|
||||
sub32v %5, %2, %6
|
||||
loop_x:
|
||||
// store to DST w/ post-increment
|
||||
store256inc %15, ^0, %1
|
||||
// sub 16 (#5 is 32) from live X count
|
||||
sub32v %5, %5, #5
|
||||
// if X count is not 0, keep looping
|
||||
brnz32 loop_x, %5
|
||||
|
||||
// decrement Y count
|
||||
sub32v %3, %3, #1
|
||||
// if 0, finished
|
||||
brz32 done, %3
|
||||
// add strides to initial addresses
|
||||
add32v %0, %0, %4
|
||||
// loop to do next line
|
||||
brz32 loop_y, #0
|
||||
done:
|
||||
fin
|
||||
fin
|
||||
);
|
||||
|
||||
let mcode_fill = assemble_jareth!(
|
||||
// x..x / $DST in %0, 128 bits
|
||||
// 128-bits pattern in %1 [assumed to be alignement-homogneous]
|
||||
// x..x / X size in %2
|
||||
// x..x / Y size in %3,
|
||||
// x..x / dst_stride in %4 (screen width?)
|
||||
// -----
|
||||
// main loop:
|
||||
// live X count in %5
|
||||
// leftover X in %6
|
||||
// // live Y count in %3
|
||||
// data in %7
|
||||
// masked data in %7
|
||||
// 0/scrap in %15
|
||||
// -----
|
||||
// header loop:
|
||||
// live Y count in %5
|
||||
// $DST in %6
|
||||
// data in %7
|
||||
// 0/scrap in %15
|
||||
|
||||
|
||||
start:
|
||||
// if number of line or element in line is 0, exit early
|
||||
brz32 done256, %2
|
||||
brz32 done256, %3
|
||||
// reset masks
|
||||
resm %15
|
||||
// if $DST is aligned on 128 bits, jump to aligned loop
|
||||
brz4 start256, %0
|
||||
|
||||
// do the first column
|
||||
startX:
|
||||
// set alignement; we shift by the addr offset, and we mask whatever data is needed in the first 32 bytes
|
||||
setmq %15, %0, %2
|
||||
// copy Y
|
||||
psa %5, %3
|
||||
// copy $DST
|
||||
psa %6, %0
|
||||
loopX_y:
|
||||
// setadr
|
||||
setadr %15, %6
|
||||
// load old data
|
||||
load256 %7, ^0
|
||||
// insert pattern
|
||||
psa* %7, %1
|
||||
// rewrite data
|
||||
store256 %15, ^0, %7
|
||||
// increment copied $DST by stride
|
||||
add32v %6, %6, %4
|
||||
// decrement copied Y count
|
||||
sub32v %5, %5, #1
|
||||
// if not zero, continue
|
||||
brnz32 loopX_y, %5
|
||||
|
||||
loopX_done:
|
||||
// how much did we do (#6 is 31, #5 is 32)
|
||||
and %5, %0, #6
|
||||
// compute 32-(x&31)
|
||||
sub32v %5, #5, %5
|
||||
// compute the proper value
|
||||
min32v %5, %5, %2
|
||||
// add that to the address, which will now be aligned
|
||||
add32v %0, %0, %5
|
||||
// remove from X, as we have done it
|
||||
sub32v %2, %2, %5
|
||||
// fall through the aligned loop if not 0
|
||||
brz32 done256, %2
|
||||
|
||||
start256:
|
||||
// compute X leftovers (modulo 32 -> #6 is 31)
|
||||
and %6, %2, #6
|
||||
// set the leftovers mask (offset is 0 as we are aligned)
|
||||
setmq %15, #0, %6
|
||||
|
||||
loop256_y:
|
||||
// set source and destination addresses for current Y
|
||||
setadr %15, %0
|
||||
// then the rounded value in X
|
||||
sub32v %5, %2, %6
|
||||
// already 0, bypass aligned stuff
|
||||
brz32 loop256_x_end, %5
|
||||
|
||||
loop256_x:
|
||||
// store to DST w/ post-increment
|
||||
store256inc %15, ^0, %1
|
||||
// sub 32 (#5 is 32) from live rounded X count
|
||||
sub32v %5, %5, #5
|
||||
// if X count is not 0, keep looping
|
||||
brnz32 loop256_x, %5
|
||||
// check for line leftovers
|
||||
loop256_x_end:
|
||||
brz4 done256_x, %6
|
||||
|
||||
// load old data
|
||||
load256 %7, ^0
|
||||
// insert pattern
|
||||
psa* %7, %1
|
||||
// rewrite data
|
||||
store256 %15, ^0, %7
|
||||
|
||||
done256_x:
|
||||
// decrement Y count
|
||||
sub32v %3, %3, #1
|
||||
// if 0, finished
|
||||
brz32 done256, %3
|
||||
|
||||
// add strides to initial addresses
|
||||
add32v %0, %0, %4
|
||||
// loop256 to do next line
|
||||
brz32 loop256_y, #0
|
||||
|
||||
done256:
|
||||
fin
|
||||
fin
|
||||
);
|
||||
|
||||
let mcode_fillrop = assemble_jareth!(
|
||||
// x..x / $DST in %0, 128 bits
|
||||
// 128-bits pattern in %1 [assumed to be alignement-homogneous]
|
||||
// x..x / X size in %2
|
||||
// x..x / Y size in %3,
|
||||
// x..x / dst_stride in %4 (screen width?)
|
||||
// x..x / rop / planemask in %5 [assumed to be alignement-homogenous]
|
||||
// -----
|
||||
// main loop:
|
||||
// live X count in %8
|
||||
// leftover X in %6
|
||||
// // live Y count in %3
|
||||
// data in %7
|
||||
// masked data in %7
|
||||
// 0/scrap in %15
|
||||
// -----
|
||||
// header loop:
|
||||
// live Y count in %8
|
||||
// $DST in %6
|
||||
// data in %7
|
||||
// 0/scrap in %15
|
||||
|
||||
|
||||
start:
|
||||
// if number of line or element in line is 0, exit early
|
||||
brz32 done256, %2
|
||||
brz32 done256, %3
|
||||
// reset masks
|
||||
resm %15
|
||||
// set planemask / rop
|
||||
srop %15, %5
|
||||
// if $DST is aligned on 128 bits, jump to aligned loop
|
||||
brz4 start256, %0
|
||||
|
||||
// do the first column(s)
|
||||
startX:
|
||||
// set alignement; we shift by the addr offset, and we mask whatever data is needed in the first 32 bytes
|
||||
setmq %15, %0, %2
|
||||
// copy Y
|
||||
psa %8, %3
|
||||
// copy $DST
|
||||
psa %6, %0
|
||||
loopX_y:
|
||||
// setadr
|
||||
setadr %15, %6
|
||||
// load old data
|
||||
load256 %7, ^0
|
||||
// rop & insert
|
||||
rop32v* %7, %7, %1
|
||||
// rewrite data
|
||||
store256 %15, ^0, %7
|
||||
// increment copied $DST by stride
|
||||
add32v %6, %6, %4
|
||||
// decrement copied Y count
|
||||
sub32v %8, %8, #1
|
||||
// if not zero, continue
|
||||
brnz32 loopX_y, %8
|
||||
|
||||
loopX_done:
|
||||
// how much did we do (#6 is 31, #5 is 32)
|
||||
and %8, %0, #6
|
||||
// compute 32-(x&31) - upper bound
|
||||
sub32v %8, #5, %8
|
||||
// compute the proper value
|
||||
min32v %8, %8, %2
|
||||
// add that to the address, which will now be aligned if there's stuff left to do
|
||||
add32v %0, %0, %8
|
||||
// remove from X, as we have done it
|
||||
sub32v %2, %2, %8
|
||||
// fall through the aligned loop if not 0, otherwise done
|
||||
brz32 done256, %2
|
||||
|
||||
start256:
|
||||
// compute X leftovers (modulo 32 -> #6 is 31)
|
||||
and %6, %2, #6
|
||||
// set the leftovers mask (offset is 0 as we are aligned)
|
||||
setmq %15, #0, %6
|
||||
|
||||
loop256_y:
|
||||
// set source and destination addresses for current Y
|
||||
setadr %15, %0
|
||||
// then the rounded value in X
|
||||
sub32v %8, %2, %6
|
||||
// already 0, bypass aligned stuff
|
||||
brz32 loop256_x_end, %8
|
||||
|
||||
loop256_x:
|
||||
// load data
|
||||
load256 %7, ^0
|
||||
// rop
|
||||
rop32v %7, %7, %1
|
||||
// store to DST w/ post-increment
|
||||
store256inc %15, ^0, %7
|
||||
// sub 32 (#5 is 32) from live rounded X count
|
||||
sub32v %8, %8, #5
|
||||
// if X count is not 0, keep looping
|
||||
brnz32 loop256_x, %8
|
||||
// check for line leftovers
|
||||
loop256_x_end:
|
||||
brz4 done256_x, %6
|
||||
|
||||
// load old data
|
||||
load256 %7, ^0
|
||||
// insert pattern
|
||||
rop32v* %7, %7, %1
|
||||
// rewrite data
|
||||
store256 %15, ^0, %7
|
||||
|
||||
done256_x:
|
||||
// decrement Y count
|
||||
sub32v %3, %3, #1
|
||||
// if 0, finished
|
||||
brz32 done256, %3
|
||||
|
||||
// add strides to initial addresses
|
||||
add32v %0, %0, %4
|
||||
// loop256 to do next line
|
||||
brz32 loop256_y, #0
|
||||
|
||||
done256:
|
||||
fin
|
||||
fin
|
||||
);
|
||||
|
||||
let mut pos;
|
||||
|
||||
@ -206,5 +491,32 @@ fn main() -> std::io::Result<()> {
|
||||
println!("");
|
||||
println!("-> {}", mcode_fill128.len());
|
||||
|
||||
pos = 0;
|
||||
println!("fill256:");
|
||||
while pos < mcode_fill256.len() {
|
||||
print!("0x{:08x},", mcode_fill256[pos]);
|
||||
pos = pos + 1;
|
||||
}
|
||||
println!("");
|
||||
println!("-> {}", mcode_fill256.len());
|
||||
|
||||
pos = 0;
|
||||
println!("fill:");
|
||||
while pos < mcode_fill.len() {
|
||||
print!("0x{:08x},", mcode_fill[pos]);
|
||||
pos = pos + 1;
|
||||
}
|
||||
println!("");
|
||||
println!("-> {}", mcode_fill.len());
|
||||
|
||||
pos = 0;
|
||||
println!("fillrop:");
|
||||
while pos < mcode_fillrop.len() {
|
||||
print!("0x{:08x},", mcode_fillrop[pos]);
|
||||
pos = pos + 1;
|
||||
}
|
||||
println!("");
|
||||
println!("-> {}", mcode_fillrop.len());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user