diff --git a/blit_goblin.c b/blit_goblin.c index 874d8cd..db30556 100644 --- a/blit_goblin.c +++ b/blit_goblin.c @@ -812,7 +812,7 @@ static void invert(const unsigned_param_type xd, for ( ; i < (wi-3) ; i+=4) { \ unsigned int src1 = ((unsigned int*)sptr_elt_al)[1]; \ unsigned int val; \ - asm("fsr %0, %1, %2, %3\n" : "=r"(val) : "r"(src0), "r"(src1), "r"(fsr_cst)); \ + val = fsr(src0, src1, fsr_cst); \ OP(*(unsigned int*)dptr_elt, val, u32pm, u32npm); \ src0 = src1; \ dptr_elt += 4; \ @@ -918,7 +918,7 @@ static void invert(const unsigned_param_type xd, for ( ; i < (wi-3) ; i+=4) { \ unsigned int src1 = ((unsigned int*)sptr_elt_al)[1]; \ unsigned int val; \ - asm("fsr %0, %1, %2, %3\n" : "=r"(val) : "r"(src0), "r"(src1), "r"(fsr_cst)); \ + val = fsr(src0, src1, fsr_cst); \ OP(*(unsigned int*)dptr_elt, val, u32pm, u32npm); \ src0 = src1; \ dptr_elt += 4; \ @@ -1015,7 +1015,7 @@ static void bitblit_fwd_fwd_copy(const unsigned_param_type xs, for ( ; (dptr_elt < (dptr_elt_last-3)) ; ) { unsigned int src1 = ((unsigned int*)sptr_elt_al)[1]; unsigned int val; - asm("fsr %0, %1, %2, %3\n" : "=r"(val) : "r"(src0), "r"(src1), "r"(fsr_cst)); + val = fsr(src0, src1, fsr_cst); ((unsigned int*)dptr_elt)[0] = val; src0 = src1; dptr_elt += 4; @@ -1153,7 +1153,7 @@ static void patternrectfill(const unsigned_param_type xd, for ( ; i < (wi-3) ; i+=4) { unsigned int src1 = ((unsigned int*)pat_ptr_line)[((i+io+4) & pat_xmask) >> 2]; unsigned int val; - asm("fsr %0, %1, %2, %3\n" : "=r"(val) : "r"(src0), "r"(src1), "r"(fsr_cst)); + val = fsr(src0, src1, fsr_cst); ((unsigned int*)dptr_elt)[0] = val; src0 = src1; dptr_elt += 4; @@ -1318,7 +1318,7 @@ static inline uint32_t pixelswap(const uint32_t p) { /* uint32_t r = __builtin_bswap32(p); */ /* asm("fsr %0, %1, %2, %3\n" : "=r"(r) : "r"(r), "r"(r), "r"(8)); */ uint32_t r; - asm("fsr %0, %1, %2, %3\n" : "=r"(r) : "r"(p), "r"(p), "r"(8)); + r = fsr(p, p, 8); return __builtin_bswap32(r); } diff --git a/blit_goblin_nubus.sh b/blit_goblin_nubus.sh index ad8d0e5..799bdfa 100755 --- a/blit_goblin_nubus.sh +++ b/blit_goblin_nubus.sh @@ -15,8 +15,12 @@ GCCPFX=riscv32-buildroot-linux-gnu- GCC=${GCCDIR}/bin/${GCCPFX}gcc OBJCOPY=${GCCDIR}/bin/${GCCPFX}objcopy -OPT=-O3 #-fno-inline -ARCH=rv32im_zba_zbb_zbt +## for compiler that support Zbt, not yet mainline GCC +#OPT="-O3 -DCOMPILER_SUPPORT_FSR" #-fno-inline +#ARCH=rv32im_zba_zbb_zbt +## for mainline GCC recent enough to have Zba/Zbb support (older unsupported) +OPT="-O3" #-fno-inline +ARCH=rv32im_zba_zbb PARAM="-DBASE_FB=${BASE_FB} -DGOBLIN_NUBUS" diff --git a/ldsdsupport.h b/ldsdsupport.h index 8c4c3ba..b40b862 100644 --- a/ldsdsupport.h +++ b/ldsdsupport.h @@ -121,3 +121,24 @@ static inline unsigned int ufma8vlv(const unsigned int a, const unsigned int b, _ufma8vlv(r, a, b); return r; } + +#ifdef COMPILER_SUPPORT_FSR +static inline unsigned int fsr(const unsigned int a, const unsigned int b, const unsigned int c) { + unsigned int r; + asm("fsr %0, %1, %2, %3\n" : "=r"(r) : "r"(a), "r"(b), "r"(c)); + return r; +} +#else +#define opcode_zbt(opcode, func3, func2, rd, rs1, rs2, rs3) \ + asm volatile(".word ((" #opcode ") | (regnum_%0 << 7) | (regnum_%1 << 15) | (regnum_%2 << 20) | ((" #func3 ") << 12) | ((" #func2 ") << 25) | (regnum_%3 << 27));" \ + : "=r" (rd) \ + : "r" (rs1), "r" (rs2), "r" (rs3) \ + ); +#define _fsr(rd, rs1, rs2, rs3) opcode_zbt(0x00000033, 0x05, 0x02, rd, rs1, rs2, rs3) + +static inline unsigned int fsr(const unsigned int a, const unsigned int b, const unsigned int c) { + unsigned int r; + _fsr(r, a, c, b); // !!!! + return r; +} +#endif