From fa0387fe96fb97e54e309a727bf39a2f44a682de Mon Sep 17 00:00:00 2001 From: Romain Dolbeau Date: Sun, 24 Oct 2021 20:02:34 +0200 Subject: [PATCH] trying to improve the cg accel stuff --- sbus-to-ztex-gateware-migen/blit.c | 862 ++++++++++-------- sbus-to-ztex-gateware-migen/blit.sh | 20 +- sbus-to-ztex-gateware-migen/cg6_accel.py | 74 +- sbus-to-ztex-gateware-migen/cg6_fb.py | 4 +- .../sbus_to_fpga_soc.py | 2 +- 5 files changed, 522 insertions(+), 440 deletions(-) diff --git a/sbus-to-ztex-gateware-migen/blit.c b/sbus-to-ztex-gateware-migen/blit.c index 9267d47..1f12615 100644 --- a/sbus-to-ztex-gateware-migen/blit.c +++ b/sbus-to-ztex-gateware-migen/blit.c @@ -311,7 +311,7 @@ struct control_blitter { /* the FBC, mapped at BASE_FDC */ struct cg6_fbc { u_int32_t fbc_config; /* r/o CONFIG register */ - u_int32_t fbc_mode; /* mode setting */ + volatile u_int32_t fbc_mode; /* mode setting */ u_int32_t fbc_clip; /* TEC clip check */ u_int32_t fbc_pad2[1]; u_int32_t fbc_s; /* global status */ @@ -345,8 +345,9 @@ struct cg6_fbc { u_int32_t fbc_pad10[2]; u_int32_t fbc_fg; /* fg value for rop */ u_int32_t fbc_bg; - u_int32_t fbc_alu; /* operation to be performed */ - u_int32_t fbc_pad12[509]; + volatile u_int32_t fbc_alu; /* operation to be performed */ + u_int32_t fbc_pm; /* planemask */ + u_int32_t fbc_pad12[508]; u_int32_t fbc_arectx; /* rectangle drawing, x coord */ u_int32_t fbc_arecty; /* rectangle drawing, y coord */ /* actually much more, but nothing more we need */ @@ -373,24 +374,45 @@ static inline void flush_cache(void) { typedef unsigned int unsigned_param_type; -void rectfill(unsigned_param_type xd, - unsigned_param_type yd, - unsigned_param_type wi, - unsigned_param_type re, - unsigned_param_type color +static void rectfill(const unsigned_param_type xd, + const unsigned_param_type yd, + const unsigned_param_type wi, + const unsigned_param_type re, + const unsigned_param_type color ); -void invert(unsigned_param_type xd, - unsigned_param_type yd, - unsigned_param_type wi, - unsigned_param_type re +static void rectfill_pm(const unsigned_param_type xd, + const unsigned_param_type yd, + const unsigned_param_type wi, + const unsigned_param_type re, + const unsigned_param_type color, + const unsigned char pm + ); +static void xorrectfill(const unsigned_param_type xd, + const unsigned_param_type yd, + const unsigned_param_type wi, + const unsigned_param_type re, + const unsigned_param_type color + ); +static void xorrectfill_pm(const unsigned_param_type xd, + const unsigned_param_type yd, + const unsigned_param_type wi, + const unsigned_param_type re, + const unsigned_param_type color, + const unsigned char pm + ); +static void invert(const unsigned_param_type xd, + const unsigned_param_type yd, + const unsigned_param_type wi, + const unsigned_param_type re ); -void bitblit(unsigned_param_type xs, - unsigned_param_type ys, - unsigned_param_type wi, - unsigned_param_type re, - unsigned_param_type xd, - unsigned_param_type yd, - unsigned char gxop +static void bitblit(const unsigned_param_type xs, + const unsigned_param_type ys, + const unsigned_param_type wi, + const unsigned_param_type re, + const unsigned_param_type xd, + const unsigned_param_type yd, + const unsigned char pm, + const unsigned char gxop ); asm(".global _start\n" @@ -402,7 +424,7 @@ asm(".global _start\n" //"slli a0,a0,12\n" // 0x00001000, BASE_RAM_SIZE //"add sp,sp,a0\n" // SP at the end of the SRAM "nop\n" - "li sp, 0x00420ffc\n" // SP at the end of the SRAM - normally unused + "li sp, 0x00420ffc\n" // SP at the end of the SRAM //"li a0, 0x00700968\n" // @ of r5_cmd //"li a1, 0x00C0FFEE\n" //"sw a1, 0(a0)\n" @@ -434,38 +456,43 @@ asm(".global _start\n" void from_reset(void) { struct cg6_fbc* fbc = (struct cg6_fbc*)BASE_FBC; unsigned int cmd = fbc->fbc_r5_cmd; - - // we don't want stale data from the previous run - // (write are write-through so we should be OK on that aspect) - // maybe should be at the end to save time ? - // moved to _start - // flush_cache(); + unsigned int alu = fbc->fbc_alu; + unsigned int mode = fbc->fbc_mode; + unsigned int use_planemask = ((alu & GX_PLANE_MASK) == GX_PLANE_MASK) ? 1 : 0; switch (cmd & 0xF) { case FUN_DRAW: { - switch (fbc->fbc_alu) { + switch (alu) { case CG6_ALU_FILL: // ____ff00 console case CG6_ALU_COPY: // ____cccc equivalent to fill if patterns == 1 (... which is the case with GX_PATTERN_ONES) - case ROP_FILL(GX_ROP_CLEAR, GX_ROP_SET): // ____ff00 Draw/GXcopy in X11 FIXME: planemask ? - // //case ROP_FILL(GX_ROP_SET, GX_ROP_SET): // ____ffff Draw/GXset in X11 FIXME: planemask ? - case ROP_BLIT(GX_ROP_CLEAR, GX_ROP_SET): // ____cccc Blit/GXcopy in X11 FIXME: planemask ? + case ROP_FILL(GX_ROP_CLEAR, GX_ROP_SET): // ____ff00 Draw/GXcopy in X11 + // //case ROP_FILL(GX_ROP_SET, GX_ROP_SET): // ____ffff Draw/GXset in X11 + case ROP_BLIT(GX_ROP_CLEAR, GX_ROP_SET): // ____cccc Blit/GXcopy in X11 { - switch (fbc->fbc_mode) { + switch (mode) { case (GX_BLIT_SRC | GX_MODE_COLOR8): // console: rectfill & clearscreen - case (GX_BLIT_SRC | GX_MODE_COLOR8 | GX_DRAW_RENDER | GX_BWRITE0_ENABLE | GX_BWRITE1_DISABLE | GX_BREAD_0 | GX_BDISP_0): // X11 FIXME:planemask? - rectfill(fbc->fbc_arectx_prev, - fbc->fbc_arecty_prev, - 1 + fbc->fbc_arectx - fbc->fbc_arectx_prev, - 1 + fbc->fbc_arecty - fbc->fbc_arecty_prev, - fbc->fbc_fg); + case (GX_BLIT_SRC | GX_MODE_COLOR8 | GX_DRAW_RENDER | GX_BWRITE0_ENABLE | GX_BWRITE1_DISABLE | GX_BREAD_0 | GX_BDISP_0): + if (use_planemask) + rectfill_pm(fbc->fbc_arectx_prev, + fbc->fbc_arecty_prev, + 1 + fbc->fbc_arectx - fbc->fbc_arectx_prev, + 1 + fbc->fbc_arecty - fbc->fbc_arecty_prev, + fbc->fbc_fg, + fbc->fbc_pm); + else + rectfill(fbc->fbc_arectx_prev, + fbc->fbc_arecty_prev, + 1 + fbc->fbc_arectx - fbc->fbc_arectx_prev, + 1 + fbc->fbc_arecty - fbc->fbc_arecty_prev, + fbc->fbc_fg); break; default: - SHOW_PC_2VAL(fbc->fbc_alu, fbc->fbc_mode); + SHOW_PC_2VAL(alu, mode); break; } } break; - case CG6_ALU_FLIP: { // console - switch (fbc->fbc_mode) + case CG6_ALU_FLIP: { // ____5555 console + switch (mode) { case (GX_BLIT_SRC | GX_MODE_COLOR8): // invert invert(fbc->fbc_arectx_prev, @@ -474,109 +501,133 @@ void from_reset(void) { 1 + fbc->fbc_arecty - fbc->fbc_arecty_prev); break; default: - SHOW_PC_2VAL(fbc->fbc_alu, fbc->fbc_mode); + SHOW_PC_2VAL(alu, mode); + break; + } + } break; + case ROP_FILL(GX_ROP_NOOP, GX_ROP_INVERT): { // ____55aa Draw/GXxor in X11 + switch (mode) + { + case (GX_BLIT_SRC | GX_MODE_COLOR8 | GX_DRAW_RENDER | GX_BWRITE0_ENABLE | GX_BWRITE1_DISABLE | GX_BREAD_0 | GX_BDISP_0): + if (use_planemask) + xorrectfill_pm(fbc->fbc_arectx_prev, + fbc->fbc_arecty_prev, + 1 + fbc->fbc_arectx - fbc->fbc_arectx_prev, + 1 + fbc->fbc_arecty - fbc->fbc_arecty_prev, + fbc->fbc_fg, + fbc->fbc_pm); + else + xorrectfill(fbc->fbc_arectx_prev, + fbc->fbc_arecty_prev, + 1 + fbc->fbc_arectx - fbc->fbc_arectx_prev, + 1 + fbc->fbc_arecty - fbc->fbc_arecty_prev, + fbc->fbc_fg); + + break; + default: + SHOW_PC_2VAL(alu, mode); break; } } break; default: - SHOW_PC_2VAL(fbc->fbc_alu, fbc->fbc_mode); + SHOW_PC_2VAL(alu, mode); break; } } break; case FUN_BLIT: { - switch (fbc->fbc_alu) + switch (alu) { - case CG6_ALU_COPY: // console - case ROP_BLIT(GX_ROP_CLEAR, GX_ROP_SET): // Blit/GXcopy in X11 - case ROP_BLIT(GX_ROP_SET, GX_ROP_SET): // Blit/GXset in X11 FIXME: correct or not ? + case CG6_ALU_COPY: // ____cccc console + case ROP_BLIT(GX_ROP_CLEAR, GX_ROP_SET): // ____ff00 Blit/GXcopy in X11 { - switch (fbc->fbc_mode) { + switch (mode) { case (GX_BLIT_SRC | GX_MODE_COLOR8): // console case (GX_BLIT_SRC | GX_MODE_COLOR8 | GX_DRAW_RENDER | GX_BWRITE0_ENABLE | GX_BWRITE1_DISABLE | GX_BREAD_0 | GX_BDISP_0): // X11 FIXME:planemask? { - unsigned_param_type xs = fbc->fbc_x0; - unsigned_param_type ys = fbc->fbc_y0; - unsigned_param_type wi = fbc->fbc_x1 - xs + 1; - unsigned_param_type re = fbc->fbc_y1 - ys + 1; - unsigned_param_type xd = fbc->fbc_x2; - unsigned_param_type yd = fbc->fbc_y2; - unsigned_param_type wi_dup = fbc->fbc_x3 - xd + 1; - unsigned_param_type re_dup = fbc->fbc_y3 - yd + 1; -#if 0 - int do_blit = 1; - if (wi_dup != wi) - do_blit = 0; - if (re_dup != re) - do_blit = 0; - - if (xs > HRES) - do_blit = 0; - if (ys > VRES) - do_blit = 0; - if (xd > HRES) - do_blit = 0; - if (yd > VRES) - do_blit = 0; - - if (wi > HRES) - do_blit = 0; - if (re > VRES) - do_blit = 0; - - if ((xs+wi) > HRES) - do_blit = 0; - if ((ys+re) > VRES) - do_blit = 0; - if ((xd+wi) > HRES) - do_blit = 0; - if ((yd+re) > VRES) - do_blit = 0; - if (do_blit) -#endif - bitblit(xs, ys, wi, re, xd, yd, 0x3); // GXcopy + const unsigned_param_type xs = fbc->fbc_x0; + const unsigned_param_type ys = fbc->fbc_y0; + const unsigned_param_type wi = fbc->fbc_x1 - xs + 1; + const unsigned_param_type re = fbc->fbc_y1 - ys + 1; + const unsigned_param_type xd = fbc->fbc_x2; + const unsigned_param_type yd = fbc->fbc_y2; + const unsigned_param_type wi_dup = fbc->fbc_x3 - xd + 1; + const unsigned_param_type re_dup = fbc->fbc_y3 - yd + 1; + if (use_planemask) + bitblit(xs, ys, wi, re, xd, yd, fbc->fbc_pm, 0x3); // GXcopy + else + bitblit(xs, ys, wi, re, xd, yd, 0xFF, 0x3); // GXcopy } break; default: - SHOW_PC_2VAL(fbc->fbc_alu, fbc->fbc_mode); + SHOW_PC_2VAL(alu, mode); break; } } break; - case ROP_BLIT(GX_ROP_NOOP, GX_ROP_INVERT): // Blit/GXxor in X11 + case ROP_BLIT(GX_ROP_SET, GX_ROP_SET): // ____ffff Blit/GXset in X11 { - switch (fbc->fbc_mode) { + switch (mode) { + case (GX_BLIT_SRC | GX_MODE_COLOR8): // console + case (GX_BLIT_SRC | GX_MODE_COLOR8 | GX_DRAW_RENDER | GX_BWRITE0_ENABLE | GX_BWRITE1_DISABLE | GX_BREAD_0 | GX_BDISP_0): + { + const unsigned_param_type xs = fbc->fbc_x0; + const unsigned_param_type ys = fbc->fbc_y0; + const unsigned_param_type wi = fbc->fbc_x1 - xs + 1; + const unsigned_param_type re = fbc->fbc_y1 - ys + 1; + const unsigned_param_type xd = fbc->fbc_x2; + const unsigned_param_type yd = fbc->fbc_y2; + const unsigned_param_type wi_dup = fbc->fbc_x3 - xd + 1; + const unsigned_param_type re_dup = fbc->fbc_y3 - yd + 1; + if (use_planemask) + rectfill_pm(xd, yd, wi, re, 0xff, fbc->fbc_pm); // GXset doesn't need the source ??? + else + rectfill(xd, yd, wi, re, 0xff); // GXset doesn't need the source ??? + } + break; + default: + SHOW_PC_2VAL(alu, mode); + break; + } + } break; + case ROP_BLIT(GX_ROP_NOOP, GX_ROP_INVERT): // ____6666 Blit/GXxor in X11 + { + switch (mode) { case (GX_BLIT_SRC | GX_MODE_COLOR8 | GX_DRAW_RENDER | GX_BWRITE0_ENABLE | GX_BWRITE1_DISABLE | GX_BREAD_0 | GX_BDISP_0): // X11 FIXME:planemask? { - unsigned_param_type xs = fbc->fbc_x0; - unsigned_param_type ys = fbc->fbc_y0; - unsigned_param_type wi = fbc->fbc_x1 - xs + 1; - unsigned_param_type re = fbc->fbc_y1 - ys + 1; - unsigned_param_type xd = fbc->fbc_x2; - unsigned_param_type yd = fbc->fbc_y2; - unsigned_param_type wi_dup = fbc->fbc_x3 - xd + 1; - unsigned_param_type re_dup = fbc->fbc_y3 - yd + 1; - bitblit(xs, ys, wi, re, xd, yd, 0x6); // GXor + const unsigned_param_type xs = fbc->fbc_x0; + const unsigned_param_type ys = fbc->fbc_y0; + const unsigned_param_type wi = fbc->fbc_x1 - xs + 1; + const unsigned_param_type re = fbc->fbc_y1 - ys + 1; + const unsigned_param_type xd = fbc->fbc_x2; + const unsigned_param_type yd = fbc->fbc_y2; + const unsigned_param_type wi_dup = fbc->fbc_x3 - xd + 1; + const unsigned_param_type re_dup = fbc->fbc_y3 - yd + 1; + if (use_planemask) + bitblit(xs, ys, wi, re, xd, yd, fbc->fbc_pm, 0x6); // GXor + else + bitblit(xs, ys, wi, re, xd, yd, 0xFF, 0x6); // GXor } break; default: - SHOW_PC_2VAL(fbc->fbc_alu, fbc->fbc_mode); + SHOW_PC_2VAL(alu, mode); break; } } break; default: - SHOW_PC_2VAL(fbc->fbc_alu, fbc->fbc_mode); + SHOW_PC_2VAL(alu, mode); break; } } break; case FUN_FONT: { - switch (fbc->fbc_alu) + switch (alu) { - case CG6_ALU_COPY: { // console - case ROP_BLIT(GX_ROP_CLEAR, GX_ROP_SET): // Blit/GXcopy in X11 - switch (fbc->fbc_mode) { + case CG6_ALU_COPY: // console + case ROP_BLIT(GX_ROP_CLEAR, GX_ROP_SET): // Blit/GXcopy in X11 + { + switch (mode) { case (GX_BLIT_NOSRC | GX_MODE_COLOR8): // console - case (GX_BLIT_NOSRC | GX_MODE_COLOR8 | GX_DRAW_RENDER | GX_BWRITE0_ENABLE | GX_BWRITE1_DISABLE | GX_BREAD_0 | GX_BDISP_0): // X11 FIXME:planemask? + case (GX_BLIT_NOSRC | GX_MODE_COLOR8 | GX_DRAW_RENDER | GX_BWRITE0_ENABLE | GX_BWRITE1_DISABLE | GX_BREAD_0 | GX_BDISP_0): //case (GX_BLIT_SRC | GX_MODE_COLOR8): // what is SRC then? { // cgsix_putchar_aa @@ -598,12 +649,19 @@ void from_reset(void) { const unsigned int xoff = xds - xdsr; if ((xde >= xds) && (xofffbc_next_font; +#if 1 + unsigned int rbits; + asm("rev8 %0, %1\n" : "=r"(rbits) : "r"(bits)); +#endif unsigned char *dptr = (((unsigned char *)BASE_FB) + mul_HRES(yd) + xdsr); for (unsigned i = xoff ; i < we ; i++) { +#if 0 unsigned char data = (bits >> (((we-1)-i) * 8)) & 0xFF; - //if (((dptr + i) >= ((unsigned char *)BASE_FB)) && - // ((dptr + i) < ((1048576*2)+(unsigned char *)BASE_FB))) dptr[i] = data; +#else + dptr[i] = (unsigned char)rbits; + rbits >>= 8; +#endif } } cmd = (FUN_FONT_NEXT_REQ | FUN_FONT); @@ -612,13 +670,13 @@ void from_reset(void) { } break; default: - SHOW_PC_2VAL(fbc->fbc_alu, fbc->fbc_mode); + SHOW_PC_2VAL(alu, mode); break; } - } break; + } break; case (GX_PATTERN_ONES | ROP_OSTP(GX_ROP_CLEAR, GX_ROP_SET)): // console, also X11 OpaqueStipple/GXcopy FIXME:planemask? { - switch (fbc->fbc_mode) { + switch (mode) { case (GX_BLIT_NOSRC | GX_MODE_COLOR1): { const unsigned int xdsm = fbc->fbc_clipminx; @@ -643,7 +701,13 @@ void from_reset(void) { unsigned int bits = fbc->fbc_next_font << xoff; unsigned char *dptr = (((unsigned char *)BASE_FB) + mul_HRES(yd) + xdsr); for (unsigned i = xoff ; i < we ; i++) { +#if 0 if (bits & 0x80000000) dptr[i] = fg8; else dptr[i] = bg8; +#else + unsigned char data; + asm("cmov %0, %1, %2, %3\n" : "=r"(data) : "r"(bits&0x80000000), "r"(fg8), "r"(bg8)); + dptr[i] = data; +#endif bits <<= 1; } } @@ -653,13 +717,13 @@ void from_reset(void) { } break; default: - SHOW_PC_2VAL(fbc->fbc_alu, fbc->fbc_mode); + SHOW_PC_2VAL(alu, mode); break; } } break; case (GX_PATTERN_ONES | ROP_STIP(GX_ROP_CLEAR, GX_ROP_SET)): // X11 Stipple/GXcopy (not used in console) FIXME:planemask? { - switch (fbc->fbc_mode) + switch (mode) { case (GX_BLIT_NOSRC | GX_MODE_COLOR1): { @@ -695,12 +759,12 @@ void from_reset(void) { } break; default: - SHOW_PC_2VAL(fbc->fbc_alu, fbc->fbc_mode); + SHOW_PC_2VAL(alu, mode); break; } } break; default: - SHOW_PC_2VAL(fbc->fbc_alu, fbc->fbc_mode); + SHOW_PC_2VAL(alu, mode); break; } } break; @@ -719,68 +783,77 @@ void from_reset(void) { goto done; } -#define bitblit_proto_int(a, b, suf) \ - void bitblit##a##b##suf(unsigned_param_type xs, \ - unsigned_param_type ys, \ - unsigned_param_type wi, \ - unsigned_param_type re, \ - unsigned_param_type xd, \ - unsigned_param_type yd \ - ); -#define bitblit_proto(suf) \ - bitblit_proto_int(_fwd, _fwd, suf) \ - bitblit_proto_int(_bwd, _fwd, suf) \ - bitblit_proto_int(_fwd, _bwd, suf) \ - bitblit_proto_int(_bwd, _bwd, suf) +#define bitblit_proto_int(a, b, suf) \ + static void bitblit##a##b##suf(const unsigned_param_type xs, \ + const unsigned_param_type ys, \ + const unsigned_param_type wi, \ + const unsigned_param_type re, \ + const unsigned_param_type xd, \ + const unsigned_param_type yd, \ + const unsigned char pm \ + ) +#define bitblit_proto(suf) \ + bitblit_proto_int(_fwd, _fwd, suf); \ + bitblit_proto_int(_bwd, _fwd, suf); \ + bitblit_proto_int(_fwd, _bwd, suf) +// bitblit_proto_int(_bwd, _bwd, suf); -bitblit_proto(_copy) -bitblit_proto(_xor) +bitblit_proto(_copy); +bitblit_proto(_xor); +bitblit_proto(_copy_pm); +bitblit_proto(_xor_pm); -void bitblit(unsigned_param_type xs, - unsigned_param_type ys, - unsigned_param_type wi, - unsigned_param_type re, - unsigned_param_type xd, - unsigned_param_type yd, - unsigned char gxop + +#define ROUTE_BITBLIT_PM(pm, bb) \ + if (pm == 0xFF) bb(xs, ys, wi, re, xd, yd, pm); \ + else bb##_pm(xs, ys, wi, re, xd, yd, pm) + +static void bitblit(const unsigned_param_type xs, + const unsigned_param_type ys, + const unsigned_param_type wi, + const unsigned_param_type re, + const unsigned_param_type xd, + const unsigned_param_type yd, + const unsigned char pm, + const unsigned char gxop ) { struct cg6_fbc* fbc = (struct cg6_fbc*)BASE_FBC; if (ys > yd) { switch(gxop) { case 0x3: // GXcopy - bitblit_fwd_fwd_copy(xs, ys, wi, re, xd, yd); + ROUTE_BITBLIT_PM(pm, bitblit_fwd_fwd_copy); break; case 0x6: // GXxor - bitblit_fwd_fwd_xor(xs, ys, wi, re, xd, yd); + ROUTE_BITBLIT_PM(pm, bitblit_fwd_fwd_xor); break; } } else if (ys < yd) { switch(gxop) { case 0x3: // GXcopy - bitblit_bwd_fwd_copy(xs, ys, wi, re, xd, yd); + ROUTE_BITBLIT_PM(pm, bitblit_bwd_fwd_copy); break; case 0x6: // GXxor - bitblit_bwd_fwd_xor(xs, ys, wi, re, xd, yd); + ROUTE_BITBLIT_PM(pm, bitblit_bwd_fwd_xor); break; } } else { // ys == yd if (xs > xd) { switch(gxop) { case 0x3: // GXcopy - bitblit_fwd_fwd_copy(xs, ys, wi, re, xd, yd); + ROUTE_BITBLIT_PM(pm, bitblit_fwd_fwd_copy); break; case 0x6: // GXxor - bitblit_fwd_fwd_xor(xs, ys, wi, re, xd, yd); + ROUTE_BITBLIT_PM(pm, bitblit_fwd_fwd_xor); break; } } else if (xs < xd) { switch(gxop) { case 0x3: // GXcopy - bitblit_fwd_bwd_copy(xs, ys, wi, re, xd, yd); + ROUTE_BITBLIT_PM(pm, bitblit_fwd_bwd_copy); break; case 0x6: // GXxor - bitblit_fwd_bwd_xor(xs, ys, wi, re, xd, yd); + ROUTE_BITBLIT_PM(pm, bitblit_fwd_bwd_xor); break; } } else { // xs == xd @@ -789,7 +862,7 @@ void bitblit(unsigned_param_type xs, /* don't bother */ break; case 0x6: // GXxor - rectfill(xd, yd, wi, re, 0); + rectfill(xd, yd, wi, re, 0); // FIXME: pixelmask break; } } @@ -797,11 +870,11 @@ void bitblit(unsigned_param_type xs, } -void rectfill(unsigned_param_type xd, - unsigned_param_type yd, - unsigned_param_type wi, - unsigned_param_type re, - unsigned_param_type color +static void rectfill(const unsigned_param_type xd, + const unsigned_param_type yd, + const unsigned_param_type wi, + const unsigned_param_type re, + const unsigned_param_type color ) { struct cg6_fbc* fbc = (struct cg6_fbc*)BASE_FBC; unsigned int i, j; @@ -827,10 +900,104 @@ void rectfill(unsigned_param_type xd, } } -void invert(unsigned_param_type xd, - unsigned_param_type yd, - unsigned_param_type wi, - unsigned_param_type re +static void rectfill_pm(const unsigned_param_type xd, + const unsigned_param_type yd, + const unsigned_param_type wi, + const unsigned_param_type re, + const unsigned_param_type color, + const unsigned char pm + ) { + struct cg6_fbc* fbc = (struct cg6_fbc*)BASE_FBC; + unsigned int i, j; + unsigned char *dptr = (((unsigned char *)BASE_FB) + mul_HRES(yd) + xd); + unsigned char *dptr_line = dptr; + unsigned char u8color = color; + + for (j = 0 ; j < re ; j++) { + unsigned char *dptr_elt = dptr_line; + i = 0; + if ((xd & 0x3) == 0) { + unsigned int u32color = (unsigned int)u8color | ((unsigned int)u8color)<<8 | ((unsigned int)u8color)<<16 | ((unsigned int)u8color)<<24; + unsigned int u32pm = (unsigned int)pm | ((unsigned int)pm)<<8 | ((unsigned int)pm)<<16 | ((unsigned int)pm)<<24; + for ( ; i < (wi&(~3)) ; i+=4) { + *(unsigned int*)dptr_elt = (u32color & u32pm) | (*(unsigned int*)dptr_elt & ~u32pm); + dptr_elt +=4; + } + } + for ( ; i < wi ; i++) { + *dptr_elt = (u8color & pm) | (*dptr_elt & ~pm); + dptr_elt ++; + } + dptr_line += HRES; + } +} + + +static void xorrectfill(const unsigned_param_type xd, + const unsigned_param_type yd, + const unsigned_param_type wi, + const unsigned_param_type re, + const unsigned_param_type color + ) { + struct cg6_fbc* fbc = (struct cg6_fbc*)BASE_FBC; + unsigned int i, j; + unsigned char *dptr = (((unsigned char *)BASE_FB) + mul_HRES(yd) + xd); + unsigned char *dptr_line = dptr; + unsigned char u8color = color & 0xFF; + + for (j = 0 ; j < re ; j++) { + unsigned char *dptr_elt = dptr_line; + i = 0; + if ((xd & 0x3) == 0) { + unsigned int u32color = (unsigned int)u8color | ((unsigned int)u8color)<<8 | ((unsigned int)u8color)<<16 | ((unsigned int)u8color)<<24; + for ( ; i < (wi&(~3)) ; i+=4) { + *(unsigned int*)dptr_elt ^= u32color; + dptr_elt +=4; + } + } + for ( ; i < wi ; i++) { + *dptr_elt ^= u8color; + dptr_elt ++; + } + dptr_line += HRES; + } +} +static void xorrectfill_pm(const unsigned_param_type xd, + const unsigned_param_type yd, + const unsigned_param_type wi, + const unsigned_param_type re, + const unsigned_param_type color, + const unsigned char pm + ) { + struct cg6_fbc* fbc = (struct cg6_fbc*)BASE_FBC; + unsigned int i, j; + unsigned char *dptr = (((unsigned char *)BASE_FB) + mul_HRES(yd) + xd); + unsigned char *dptr_line = dptr; + unsigned char u8color = color; + + for (j = 0 ; j < re ; j++) { + unsigned char *dptr_elt = dptr_line; + i = 0; + if ((xd & 0x3) == 0) { + unsigned int u32color = (unsigned int)u8color | ((unsigned int)u8color)<<8 | ((unsigned int)u8color)<<16 | ((unsigned int)u8color)<<24; + unsigned int u32pm = (unsigned int)pm | ((unsigned int)pm)<<8 | ((unsigned int)pm)<<16 | ((unsigned int)pm)<<24; + for ( ; i < (wi&(~3)) ; i+=4) { + *(unsigned int*)dptr_elt ^= (u32color & u32pm); + dptr_elt +=4; + } + } + for ( ; i < wi ; i++) { + *dptr_elt ^= (u8color & pm); + dptr_elt ++; + } + dptr_line += HRES; + } +} + +static void invert(const unsigned_param_type xd, + const unsigned_param_type yd, + const unsigned_param_type wi, + const unsigned_param_type re ) { struct cg6_fbc* fbc = (struct cg6_fbc*)BASE_FBC; unsigned int i, j; @@ -855,247 +1022,155 @@ void invert(unsigned_param_type xd, } -void bitblit_fwd_fwd_copy(unsigned_param_type xs, - unsigned_param_type ys, - unsigned_param_type wi, - unsigned_param_type re, - unsigned_param_type xd, - unsigned_param_type yd - ) { - unsigned int i, j; - unsigned char *sptr = (((unsigned char *)BASE_FB) + mul_HRES(ys) + xs); - unsigned char *dptr = (((unsigned char *)BASE_FB) + mul_HRES(yd) + xd); - unsigned char *sptr_line = sptr; - unsigned char *dptr_line = dptr; +// NOT using npm enables the use of 'cmix' in more cases +#define COPY(d,s,pm,npm) (d) = (s) +//#define COPY_PM(d,s,pm,npm) (d) = (((s) & (pm)) | ((d) & (npm))) +#define COPY_PM(d,s,pm,npm) (d) = (((s) & (pm)) | ((d) & (~pm))) +#define XOR(d,s,pm,npm) (d) = ((s) ^ (d)) +//#define XOR_PM(d,s,pm,npm) (d) = ((((s) ^ (d)) & (pm)) | ((d) & (npm))) +#define XOR_PM(d,s,pm,npm) (d) = ((((s) ^ (d)) & (pm)) | ((d) & (~pm))) - // flush_cache(); // handled in boot() - - for (j = 0 ; j < re ; j++) { - unsigned char *sptr_elt = sptr_line; - unsigned char *dptr_elt = dptr_line; - i = 0; - /* this case is the console case */ - if (((xs & 0xf) == 0) && ((xd & 0xf) == 0)) { - for ( ; i < (wi&(~0xf)) ; i+= 16) { - ((unsigned int*)dptr_elt)[0] = ((unsigned int*)sptr_elt)[0]; - ((unsigned int*)dptr_elt)[1] = ((unsigned int*)sptr_elt)[1]; - ((unsigned int*)dptr_elt)[2] = ((unsigned int*)sptr_elt)[2]; - ((unsigned int*)dptr_elt)[3] = ((unsigned int*)sptr_elt)[3]; - dptr_elt += 16; - sptr_elt += 16; - } - } - if (((xs & 0x3) == 0) && ((xd & 0x3) == 0)) { - for ( ; i < (wi&(~3)) ; i+= 4) { - ((unsigned int*)dptr_elt)[0] = ((unsigned int*)sptr_elt)[0]; - dptr_elt += 4; - sptr_elt += 4; - } - } - for ( ; i < wi ; i++) { - *dptr_elt = *sptr_elt; - dptr_elt ++; - sptr_elt ++; - } - sptr_line += HRES; - dptr_line += HRES; +#define BLIT_FWD_FWD(NAME, OP) \ + static void bitblit_fwd_fwd_##NAME(const unsigned_param_type xs, \ + const unsigned_param_type ys, \ + const unsigned_param_type wi, \ + const unsigned_param_type re, \ + const unsigned_param_type xd, \ + const unsigned_param_type yd, \ + const unsigned char pm) { \ + unsigned int i, j; \ + unsigned char *sptr = (((unsigned char *)BASE_FB) + mul_HRES(ys) + xs); \ + unsigned char *dptr = (((unsigned char *)BASE_FB) + mul_HRES(yd) + xd); \ + unsigned char *sptr_line = sptr; \ + unsigned char *dptr_line = dptr; \ + /*const unsigned char npm = ~pm;*/ \ + \ + for (j = 0 ; j < re ; j++) { \ + unsigned char *sptr_elt = sptr_line; \ + unsigned char *dptr_elt = dptr_line; \ + i = 0; \ + if (((xs & 0xf) == 0) && ((xd & 0xf) == 0)) { \ + const unsigned int u32pm = (unsigned int)pm | ((unsigned int)pm)<<8 | ((unsigned int)pm)<<16 | ((unsigned int)pm)<<24; \ + /*const unsigned int u32npm = (unsigned int)npm | ((unsigned int)npm)<<8 | ((unsigned int)npm)<<16 | ((unsigned int)npm)<<24;*/ \ + for ( ; i < (wi&(~0xf)) ; i+= 16) { \ + OP(((unsigned int*)dptr_elt)[0], ((unsigned int*)sptr_elt)[0], u32pm, u32npm); \ + OP(((unsigned int*)dptr_elt)[1], ((unsigned int*)sptr_elt)[1], u32pm, u32npm); \ + OP(((unsigned int*)dptr_elt)[2], ((unsigned int*)sptr_elt)[2], u32pm, u32npm); \ + OP(((unsigned int*)dptr_elt)[3], ((unsigned int*)sptr_elt)[3], u32pm, u32npm); \ + dptr_elt += 16; \ + sptr_elt += 16; \ + } \ + } \ + if (((xs & 0x3) == 0) && ((xd & 0x3) == 0)) { \ + const unsigned int u32pm = (unsigned int)pm | ((unsigned int)pm)<<8 | ((unsigned int)pm)<<16 | ((unsigned int)pm)<<24; \ + /*const unsigned int u32npm = (unsigned int)npm | ((unsigned int)npm)<<8 | ((unsigned int)npm)<<16 | ((unsigned int)npm)<<24;*/ \ + for ( ; i < (wi&(~3)) ; i+= 4) { \ + OP(((unsigned int*)dptr_elt)[0], ((unsigned int*)sptr_elt)[0], u32pm, u32npm); \ + dptr_elt += 4; \ + sptr_elt += 4; \ + } \ + } \ + for ( ; i < wi ; i++) { \ + OP(*dptr_elt, *sptr_elt, pm, npm); \ + dptr_elt ++; \ + sptr_elt ++; \ + } \ + sptr_line += HRES; \ + dptr_line += HRES; \ + } \ } -} -void bitblit_fwd_bwd_copy(unsigned_param_type xs, - unsigned_param_type ys, - unsigned_param_type wi, - unsigned_param_type re, - unsigned_param_type xd, - unsigned_param_type yd - ) { - unsigned int i, j; - unsigned char *sptr = (((unsigned char *)BASE_FB) + mul_HRES(ys) + xs); - unsigned char *dptr = (((unsigned char *)BASE_FB) + mul_HRES(yd) + xd); - unsigned char *sptr_line = sptr + wi - 1; - unsigned char *dptr_line = dptr + wi - 1; - - // flush_cache(); // handled in boot() - - for (j = 0 ; j < re ; j++) { - unsigned char *sptr_elt = sptr_line; - unsigned char *dptr_elt = dptr_line; - for (i = 0 ; i < wi ; i++) { - *dptr_elt = *sptr_elt; - dptr_elt --; - sptr_elt --; - } - sptr_line += HRES; - dptr_line += HRES; +#define BLIT_FWD_BWD(NAME, OP) \ + static void bitblit_fwd_bwd_##NAME(const unsigned_param_type xs, \ + const unsigned_param_type ys, \ + const unsigned_param_type wi, \ + const unsigned_param_type re, \ + const unsigned_param_type xd, \ + const unsigned_param_type yd, \ + const unsigned char pm \ + ) { \ + unsigned int i, j; \ + unsigned char *sptr = (((unsigned char *)BASE_FB) + mul_HRES(ys) + xs); \ + unsigned char *dptr = (((unsigned char *)BASE_FB) + mul_HRES(yd) + xd); \ + unsigned char *sptr_line = sptr + wi - 1; \ + unsigned char *dptr_line = dptr + wi - 1; \ + const unsigned char npm = ~pm; \ + \ + for (j = 0 ; j < re ; j++) { \ + unsigned char *sptr_elt = sptr_line; \ + unsigned char *dptr_elt = dptr_line; \ + for (i = 0 ; i < wi ; i++) { \ + OP(*dptr_elt, *sptr_elt, pm, npm); \ + dptr_elt --; \ + sptr_elt --; \ + } \ + sptr_line += HRES; \ + dptr_line += HRES; \ + } \ } -} -void bitblit_bwd_fwd_copy(unsigned_param_type xs, - unsigned_param_type ys, - unsigned_param_type wi, - unsigned_param_type re, - unsigned_param_type xd, - unsigned_param_type yd - ) { - unsigned int i, j; - unsigned char *sptr = (((unsigned char *)BASE_FB) + mul_HRES(ys) + xs); - unsigned char *dptr = (((unsigned char *)BASE_FB) + mul_HRES(yd) + xd); - unsigned char *sptr_line = sptr + mul_HRES((re-1)); - unsigned char *dptr_line = dptr + mul_HRES((re-1)); - // flush_cache(); // handled in boot() - - for (j = 0 ; j < re ; j++) { - unsigned char *sptr_elt = sptr_line; - unsigned char *dptr_elt = dptr_line; - i = 0; - if (((xs & 0xf) == 0) && ((xd & 0xf) == 0)) { - for ( ; i < (wi&(~0xf)) ; i+= 16) { - ((unsigned int*)dptr_elt)[0] = ((unsigned int*)sptr_elt)[0]; - ((unsigned int*)dptr_elt)[1] = ((unsigned int*)sptr_elt)[1]; - ((unsigned int*)dptr_elt)[2] = ((unsigned int*)sptr_elt)[2]; - ((unsigned int*)dptr_elt)[3] = ((unsigned int*)sptr_elt)[3]; - dptr_elt += 16; - sptr_elt += 16; - } - } - if (((xs & 0x3) == 0) && ((xd & 0x3) == 0)) { - for ( ; i < (wi&(~3)) ; i+= 4) { - ((unsigned int*)dptr_elt)[0] = ((unsigned int*)sptr_elt)[0]; - dptr_elt += 4; - sptr_elt += 4; - } - } - for ( ; i < wi ; i++) { - *dptr_elt = *sptr_elt; - dptr_elt ++; - sptr_elt ++; - } - sptr_line -= HRES; - dptr_line -= HRES; +#define BLIT_BWD_FWD(NAME, OP) \ + static void bitblit_bwd_fwd_##NAME(const unsigned_param_type xs, \ + const unsigned_param_type ys, \ + const unsigned_param_type wi, \ + const unsigned_param_type re, \ + const unsigned_param_type xd, \ + const unsigned_param_type yd, \ + const unsigned char pm \ + ) { \ + unsigned int i, j; \ + unsigned char *sptr = (((unsigned char *)BASE_FB) + mul_HRES(ys) + xs); \ + unsigned char *dptr = (((unsigned char *)BASE_FB) + mul_HRES(yd) + xd); \ + unsigned char *sptr_line = sptr + mul_HRES((re-1)); \ + unsigned char *dptr_line = dptr + mul_HRES((re-1)); \ + const unsigned char npm = ~pm; \ + \ + for (j = 0 ; j < re ; j++) { \ + unsigned char *sptr_elt = sptr_line; \ + unsigned char *dptr_elt = dptr_line; \ + i = 0; \ + if (((xs & 0xf) == 0) && ((xd & 0xf) == 0)) { \ + for ( ; i < (wi&(~0xf)) ; i+= 16) { \ + const unsigned int u32pm = (unsigned int)pm | ((unsigned int)pm)<<8 | ((unsigned int)pm)<<16 | ((unsigned int)pm)<<24; \ + /*const unsigned int u32npm = (unsigned int)npm | ((unsigned int)npm)<<8 | ((unsigned int)npm)<<16 | ((unsigned int)npm)<<24;*/ \ + OP(((unsigned int*)dptr_elt)[0], ((unsigned int*)sptr_elt)[0], u32pm, u32npm); \ + OP(((unsigned int*)dptr_elt)[1], ((unsigned int*)sptr_elt)[1], u32pm, u32npm); \ + OP(((unsigned int*)dptr_elt)[2], ((unsigned int*)sptr_elt)[2], u32pm, u32npm); \ + OP(((unsigned int*)dptr_elt)[3], ((unsigned int*)sptr_elt)[3], u32pm, u32npm); \ + dptr_elt += 16; \ + sptr_elt += 16; \ + } \ + } \ + if (((xs & 0x3) == 0) && ((xd & 0x3) == 0)) { \ + for ( ; i < (wi&(~3)) ; i+= 4) { \ + const unsigned int u32pm = (unsigned int)pm | ((unsigned int)pm)<<8 | ((unsigned int)pm)<<16 | ((unsigned int)pm)<<24; \ + /*const unsigned int u32npm = (unsigned int)npm | ((unsigned int)npm)<<8 | ((unsigned int)npm)<<16 | ((unsigned int)npm)<<24;*/ \ + OP(((unsigned int*)dptr_elt)[0], ((unsigned int*)sptr_elt)[0], u32pm, u32npm); \ + dptr_elt += 4; \ + sptr_elt += 4; \ + } \ + } \ + for ( ; i < wi ; i++) { \ + OP(*dptr_elt, *sptr_elt, pm, npm); \ + dptr_elt ++; \ + sptr_elt ++; \ + } \ + sptr_line -= HRES; \ + dptr_line -= HRES; \ + } \ } -} +#define BLIT_ALLDIR(NAME, OP) \ + BLIT_FWD_FWD(NAME, OP) \ + BLIT_FWD_BWD(NAME, OP) \ + BLIT_BWD_FWD(NAME, OP) \ + -void bitblit_fwd_fwd_xor(unsigned_param_type xs, - unsigned_param_type ys, - unsigned_param_type wi, - unsigned_param_type re, - unsigned_param_type xd, - unsigned_param_type yd - ) { - unsigned int i, j; - unsigned char *sptr = (((unsigned char *)BASE_FB) + mul_HRES(ys) + xs); - unsigned char *dptr = (((unsigned char *)BASE_FB) + mul_HRES(yd) + xd); - unsigned char *sptr_line = sptr; - unsigned char *dptr_line = dptr; - - // flush_cache(); // handled in boot() - - for (j = 0 ; j < re ; j++) { - unsigned char *sptr_elt = sptr_line; - unsigned char *dptr_elt = dptr_line; - i = 0; - /* this case is the console case */ - if (((xs & 0xf) == 0) && ((xd & 0xf) == 0)) { - for ( ; i < (wi&(~0xf)) ; i+= 16) { - ((unsigned int*)dptr_elt)[0] ^= ((unsigned int*)sptr_elt)[0]; - ((unsigned int*)dptr_elt)[1] ^= ((unsigned int*)sptr_elt)[1]; - ((unsigned int*)dptr_elt)[2] ^= ((unsigned int*)sptr_elt)[2]; - ((unsigned int*)dptr_elt)[3] ^= ((unsigned int*)sptr_elt)[3]; - dptr_elt += 16; - sptr_elt += 16; - } - } - if (((xs & 0x3) == 0) && ((xd & 0x3) == 0)) { - for ( ; i < (wi&(~3)) ; i+= 4) { - ((unsigned int*)dptr_elt)[0] ^= ((unsigned int*)sptr_elt)[0]; - dptr_elt += 4; - sptr_elt += 4; - } - } - for ( ; i < wi ; i++) { - *dptr_elt ^= *sptr_elt; - dptr_elt ++; - sptr_elt ++; - } - sptr_line += HRES; - dptr_line += HRES; - } -} - -void bitblit_fwd_bwd_xor(unsigned_param_type xs, - unsigned_param_type ys, - unsigned_param_type wi, - unsigned_param_type re, - unsigned_param_type xd, - unsigned_param_type yd - ) { - unsigned int i, j; - unsigned char *sptr = (((unsigned char *)BASE_FB) + mul_HRES(ys) + xs); - unsigned char *dptr = (((unsigned char *)BASE_FB) + mul_HRES(yd) + xd); - unsigned char *sptr_line = sptr + wi - 1; - unsigned char *dptr_line = dptr + wi - 1; - - // flush_cache(); // handled in boot() - - for (j = 0 ; j < re ; j++) { - unsigned char *sptr_elt = sptr_line; - unsigned char *dptr_elt = dptr_line; - for (i = 0 ; i < wi ; i++) { - *dptr_elt ^= *sptr_elt; - dptr_elt --; - sptr_elt --; - } - sptr_line += HRES; - dptr_line += HRES; - } -} -void bitblit_bwd_fwd_xor(unsigned_param_type xs, - unsigned_param_type ys, - unsigned_param_type wi, - unsigned_param_type re, - unsigned_param_type xd, - unsigned_param_type yd - ) { - unsigned int i, j; - unsigned char *sptr = (((unsigned char *)BASE_FB) + mul_HRES(ys) + xs); - unsigned char *dptr = (((unsigned char *)BASE_FB) + mul_HRES(yd) + xd); - unsigned char *sptr_line = sptr + mul_HRES((re-1)); - unsigned char *dptr_line = dptr + mul_HRES((re-1)); - - // flush_cache(); // handled in boot() - - for (j = 0 ; j < re ; j++) { - unsigned char *sptr_elt = sptr_line; - unsigned char *dptr_elt = dptr_line; - i = 0; - if (((xs & 0xf) == 0) && ((xd & 0xf) == 0)) { - for ( ; i < (wi&(~0xf)) ; i+= 16) { - ((unsigned int*)dptr_elt)[0] ^= ((unsigned int*)sptr_elt)[0]; - ((unsigned int*)dptr_elt)[1] ^= ((unsigned int*)sptr_elt)[1]; - ((unsigned int*)dptr_elt)[2] ^= ((unsigned int*)sptr_elt)[2]; - ((unsigned int*)dptr_elt)[3] ^= ((unsigned int*)sptr_elt)[3]; - dptr_elt += 16; - sptr_elt += 16; - } - } - if (((xs & 0x3) == 0) && ((xd & 0x3) == 0)) { - for ( ; i < (wi&(~3)) ; i+= 4) { - ((unsigned int*)dptr_elt)[0] ^= ((unsigned int*)sptr_elt)[0]; - dptr_elt += 4; - sptr_elt += 4; - } - } - for ( ; i < wi ; i++) { - *dptr_elt ^= *sptr_elt; - dptr_elt ++; - sptr_elt ++; - } - sptr_line -= HRES; - dptr_line -= HRES; - } -} +BLIT_ALLDIR(copy, COPY) +BLIT_ALLDIR(xor, XOR) +BLIT_ALLDIR(copy_pm, COPY_PM) +BLIT_ALLDIR(xor_pm, XOR_PM) #if 0 else if ((xd & 0xf) == 0) { @@ -1133,12 +1208,13 @@ void bitblit_bwd_fwd_xor(unsigned_param_type xs, #endif #if 0 -void bitblit_bwd_bwd_copy(unsigned_param_type xs, - unsigned_param_type ys, - unsigned_param_type wi, - unsigned_param_type re, - unsigned_param_type xd, - unsigned_param_type yd +static void bitblit_bwd_bwd_copy(const unsigned_param_type xs, + const unsigned_param_type ys, + const unsigned_param_type wi, + const unsigned_param_type re, + const unsigned_param_type xd, + const unsigned_param_type yd, + const unsigned char pm ) { unsigned int i, j; unsigned char *sptr = (((unsigned char *)BASE_FB) + mul_HRES(ys) + xs); diff --git a/sbus-to-ztex-gateware-migen/blit.sh b/sbus-to-ztex-gateware-migen/blit.sh index c9cc29c..6fdc02c 100755 --- a/sbus-to-ztex-gateware-migen/blit.sh +++ b/sbus-to-ztex-gateware-migen/blit.sh @@ -2,21 +2,23 @@ GCCDIR=~/LITEX/riscv64-unknown-elf-gcc-10.1.0-2020.08.2-x86_64-linux-ubuntu14 GCCPFX=riscv64-unknown-elf- +GCCLINK=${GCCDIR}/bin/${GCCPFX}gcc #GCCDIR=/opt/rv32bk #GCCPFX=riscv32-buildroot-linux-gnu- -#GCCDIR=~dolbeau/LITEX/buildroot-32SF/output/host -#GCCPFX=riscv32-buildroot-linux-gnu- +GCCDIR=~dolbeau2/LITEX/buildroot-rv32/output/host +GCCPFX=riscv32-buildroot-linux-gnu- GCC=${GCCDIR}/bin/${GCCPFX}gcc OBJCOPY=${GCCDIR}/bin/${GCCPFX}objcopy -if test "x$1" == "xASM"; then - $GCC -Os -o blit -march=rv32ib -mabi=ilp32 -T blit.lds -nostartfiles blit.s && - $OBJCOPY -O binary -j .text blit blit.raw -else -$GCC -Os -S blit.c -march=rv32ib -mabi=ilp32 -mstrict-align -fno-builtin-memset -nostdlib -ffreestanding -nostartfiles && - $GCC -Os -o blit -march=rv32ib -mabi=ilp32 -T blit.lds -nostartfiles blit.s && - $OBJCOPY -O binary -j .text blit blit.raw +OPT=-Os #-fno-inline +ARCH=rv32i_zba_zbb_zbt + +if test "x$1" != "xASM"; then + $GCC $OPT -S -o blit.s -march=$ARCH -mabi=ilp32 -mstrict-align -fno-builtin-memset -nostdlib -ffreestanding -nostartfiles blit.c fi +$GCC $OPT -c -o blit.o -march=$ARCH -mabi=ilp32 -mstrict-align -fno-builtin-memset -nostdlib -ffreestanding -nostartfiles blit.s && +$GCCLINK $OPT -o blit -march=$ARCH -mabi=ilp32 -T blit.lds -nostartfiles blit.o && +$OBJCOPY -O binary -j .text blit blit.raw diff --git a/sbus-to-ztex-gateware-migen/cg6_accel.py b/sbus-to-ztex-gateware-migen/cg6_accel.py index e315e32..5498d06 100644 --- a/sbus-to-ztex-gateware-migen/cg6_accel.py +++ b/sbus-to-ztex-gateware-migen/cg6_accel.py @@ -11,46 +11,47 @@ class CG6Accel(Module): # AutoCSR ? # for FBC and TEC - where we just ignore TEC self.bus = bus = wishbone.Interface() - + COORD_BITS=12 fbc_config = Signal(32, reset = (0x60000000)) # bit 11-12 are for resolution, see the GX manual fbc_mode = Signal(32) fbc_clip = Signal(32) fbc_s = Signal(32) #fbc_font = Signal(32) - fbc_x = Array(Signal(32) for a in range(0, 4)) - fbc_y = Array(Signal(32) for a in range(0, 4)) - fbc_offx = Signal(32) - fbc_offy = Signal(32) - fbc_incx = Signal(32) - fbc_incy = Signal(32) - fbc_clipminx = Signal(32) - fbc_clipminy = Signal(32) - fbc_clipmaxx = Signal(32) - fbc_clipmaxy = Signal(32) - fbc_fg = Signal(32) - fbc_bg = Signal(32) + fbc_x = Array(Signal(COORD_BITS) for a in range(0, 4)) + fbc_y = Array(Signal(COORD_BITS) for a in range(0, 4)) + fbc_offx = Signal(COORD_BITS) + fbc_offy = Signal(COORD_BITS) + fbc_incx = Signal(COORD_BITS) + fbc_incy = Signal(COORD_BITS) + fbc_clipminx = Signal(COORD_BITS) + fbc_clipminy = Signal(COORD_BITS) + fbc_clipmaxx = Signal(COORD_BITS) + fbc_clipmaxy = Signal(COORD_BITS) + fbc_fg = Signal(8) + fbc_bg = Signal(8) fbc_alu = Signal(32) - fbc_arectx = Signal(32) - fbc_arecty = Signal(32) + fbc_pm = Signal(8) + fbc_arectx = Signal(COORD_BITS) + fbc_arecty = Signal(COORD_BITS) # extra stuff for compatibility - fbc_arectx_prev = Signal(32) # after fbc_arecty (600) - R/O - fbc_arecty_prev = Signal(32) # after fbc_arectx_prev (601) - R/O + fbc_arectx_prev = Signal(COORD_BITS) # after fbc_arecty (600) - R/O + fbc_arecty_prev = Signal(COORD_BITS) # after fbc_arectx_prev (601) - R/O fbc_r5_cmd = Signal(32) # to communicate with Vex (602) fbc_r5_status = Array(Signal(32) for a in range(0, 4)) fbc_next_font = Signal(32) - fbc_next_x0 = Signal(12) - fbc_next_x1 = Signal(12) - fbc_next_y0 = Signal(12) + fbc_next_x0 = Signal(COORD_BITS) + fbc_next_x1 = Signal(COORD_BITS) + fbc_next_y0 = Signal(COORD_BITS) fbc_do_draw = Signal() fbc_do_blit = Signal() font_layout = [ ("font", 32), - ("x0", 12), - ("x1", 12), - ("y0", 12), + ("x0", COORD_BITS), + ("x1", COORD_BITS), + ("y0", COORD_BITS), ] # depth is because the current 'font' is a bit slow, so we need to buffer a lot... self.submodules.fbc_fifo_font = SyncFIFOBuffered(width=layout_len(font_layout),depth=1024) @@ -78,9 +79,9 @@ class CG6Accel(Module): # AutoCSR ? # 6: fbc_blit R/O 7: [ self.fbc_fifo_font.we.eq(1), fbc_fifo_font_in.font.eq(bus.dat_w), - fbc_fifo_font_in.x0.eq(fbc_x[0][0:12]), - fbc_fifo_font_in.x1.eq(fbc_x[1][0:12]), - fbc_fifo_font_in.y0.eq(fbc_y[0][0:12]), + fbc_fifo_font_in.x0.eq(fbc_x[0]), + fbc_fifo_font_in.x1.eq(fbc_x[1]), + fbc_fifo_font_in.y0.eq(fbc_y[0]), NextValue(fbc_x[0], fbc_x[0] + fbc_incx), NextValue(fbc_x[1], fbc_x[1] + fbc_incx), NextValue(fbc_y[0], fbc_y[0] + fbc_incy), @@ -113,7 +114,7 @@ class CG6Accel(Module): # AutoCSR ? 64: [ NextValue(fbc_fg, bus.dat_w) ], 65: [ NextValue(fbc_bg, bus.dat_w) ], 66: [ NextValue(fbc_alu, bus.dat_w) ], - # 67: planemask reg + 67: [ NextValue(fbc_pm, bus.dat_w) ], # 67: planemask reg # 68: pixelmask reg # 69-70: # 71: pattalign reg @@ -131,10 +132,10 @@ class CG6Accel(Module): # AutoCSR ? # 579: # 580-582: fbc_relrect[xyz] -> update absolute 580: [ NextValue(fbc_arectx_prev, fbc_arectx), - NextValue(fbc_arectx, fbc_arectx + bus.dat_w), + NextValue(fbc_arectx, fbc_arectx + bus.dat_w[0:COORD_BITS]), ], 581: [ NextValue(fbc_arecty_prev, fbc_arecty), - NextValue(fbc_arecty, fbc_arecty + bus.dat_w), + NextValue(fbc_arecty, fbc_arecty + bus.dat_w[0:COORD_BITS]), ], # 600-601: fbc_arect[xy]next, not directly writable 602: [ NextValue(fbc_r5_cmd, bus.dat_w) ], @@ -185,7 +186,8 @@ class CG6Accel(Module): # AutoCSR ? # 62-63: pad10 64: [ NextValue(bus.dat_r, fbc_fg) ], # 0x100 65: [ NextValue(bus.dat_r, fbc_bg) ], # 0x104 - 66: [ NextValue(bus.dat_r, fbc_alu) ], + 66: [ NextValue(bus.dat_r, fbc_alu) ], # 0x108 + 67: [ NextValue(bus.dat_r, fbc_pm) ], # 0x10c 576: [ NextValue(bus.dat_r, fbc_arectx), ], 577: [ NextValue(bus.dat_r, fbc_arecty), @@ -207,11 +209,11 @@ class CG6Accel(Module): # AutoCSR ? ], 608: [ NextValue(bus.dat_r, fbc_next_font), ], - 609: [ NextValue(bus.dat_r, Cat(fbc_next_x0, Signal(20, reset = 0))), + 609: [ NextValue(bus.dat_r, fbc_next_x0), ], - 610: [ NextValue(bus.dat_r, Cat(fbc_next_x1, Signal(20, reset = 0))), + 610: [ NextValue(bus.dat_r, fbc_next_x1), ], - 611: [ NextValue(bus.dat_r, Cat(fbc_next_y0, Signal(20, reset = 0))), + 611: [ NextValue(bus.dat_r, fbc_next_y0), ], }), NextValue(bus.ack, 1), @@ -240,8 +242,10 @@ class CG6Accel(Module): # AutoCSR ? #timeout_rst = 0xFFFFFFF #timeout = Signal(28, reset = timeout_rst) - #pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") - #self.comb += pad_SBUS_DATA_OE_LED.eq(~local_reset); + pad_SBUS_DATA_OE_LED = platform.request("SBUS_DATA_OE_LED") + self.comb += pad_SBUS_DATA_OE_LED.eq(~local_reset); + #self.comb += pad_SBUS_DATA_OE_LED.eq(fbc_r5_cmd[1]); # blitting + #self.comb += pad_SBUS_DATA_OE_LED.eq(fbc_pm != 0); # planemasking self.sync += [ self.fbc_fifo_font.re.eq(0), diff --git a/sbus-to-ztex-gateware-migen/cg6_fb.py b/sbus-to-ztex-gateware-migen/cg6_fb.py index bba533c..329189a 100644 --- a/sbus-to-ztex-gateware-migen/cg6_fb.py +++ b/sbus-to-ztex-gateware-migen/cg6_fb.py @@ -152,8 +152,8 @@ class cg6(Module, AutoCSR): self.comb += vtg.hwcursor_x.eq(hwcursor_x) self.comb += vtg.hwcursor_y.eq(hwcursor_y) - pad_SBUS_DATA_OE_LED = soc.platform.request("SBUS_DATA_OE_LED") - self.comb += pad_SBUS_DATA_OE_LED.eq((hwcursor_x < 1280) & (hwcursor_y < 1024)); + #pad_SBUS_DATA_OE_LED = soc.platform.request("SBUS_DATA_OE_LED") + #self.comb += pad_SBUS_DATA_OE_LED.eq((hwcursor_x < 1280) & (hwcursor_y < 1024)); self.bus2 = bus2 = wishbone.Interface() self.submodules.wishbone_fsm2 = wishbone_fsm2 = FSM(reset_state = "Reset") diff --git a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py index 8f82e7f..d801b49 100644 --- a/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py +++ b/sbus-to-ztex-gateware-migen/sbus_to_fpga_soc.py @@ -460,7 +460,7 @@ class SBusFPGA(SoCCore): self.bus.add_master(name="cg6_accel_r5_d", master=self.cg6_accel.dbus) cg6_rom_file = "blit.raw" cg6_rom_data = soc_core.get_mem_data(cg6_rom_file, "little") - self.add_ram("cg6_accel_rom", origin=self.mem_map["cg6_accel_rom"], size=2**12, contents=cg6_rom_data, mode="r") + self.add_ram("cg6_accel_rom", origin=self.mem_map["cg6_accel_rom"], size=2**13, contents=cg6_rom_data, mode="r") self.add_ram("cg6_accel_ram", origin=self.mem_map["cg6_accel_ram"], size=2**12, mode="rw") print("IRQ to Device map:\n")