diff --git a/blit_goblin.c b/blit_goblin.c
index 874d8cd..db30556 100644
--- a/blit_goblin.c
+++ b/blit_goblin.c
@@ -812,7 +812,7 @@ static void invert(const unsigned_param_type xd,
 				for ( ; i < (wi-3) ; i+=4) {							\
 					unsigned int src1 = ((unsigned int*)sptr_elt_al)[1]; \
 					unsigned int val;									\
-					asm("fsr %0, %1, %2, %3\n" : "=r"(val) : "r"(src0), "r"(src1), "r"(fsr_cst)); \
+					val = fsr(src0, src1, fsr_cst);	\
 					OP(*(unsigned int*)dptr_elt, val, u32pm, u32npm);	\
 					src0 = src1;										\
 					dptr_elt += 4;										\
@@ -918,7 +918,7 @@ static void invert(const unsigned_param_type xd,
 				for ( ; i < (wi-3) ; i+=4) {							\
 					unsigned int src1 = ((unsigned int*)sptr_elt_al)[1]; \
 					unsigned int val;									\
-					asm("fsr %0, %1, %2, %3\n" : "=r"(val) : "r"(src0), "r"(src1), "r"(fsr_cst)); \
+					val = fsr(src0, src1, fsr_cst);	\
 					OP(*(unsigned int*)dptr_elt, val, u32pm, u32npm);	\
 					src0 = src1;										\
 					dptr_elt += 4;										\
@@ -1015,7 +1015,7 @@ static void bitblit_fwd_fwd_copy(const unsigned_param_type xs,
 				for ( ; (dptr_elt < (dptr_elt_last-3)) ; ) {
 					unsigned int src1 = ((unsigned int*)sptr_elt_al)[1];
 					unsigned int val;
-					asm("fsr %0, %1, %2, %3\n" : "=r"(val) : "r"(src0), "r"(src1), "r"(fsr_cst));
+					val = fsr(src0, src1, fsr_cst);
 					((unsigned int*)dptr_elt)[0] = val;
 					src0 = src1;
 					dptr_elt += 4;
@@ -1153,7 +1153,7 @@ static void patternrectfill(const unsigned_param_type xd,
 			for ( ; i < (wi-3) ; i+=4) {
 				unsigned int src1 = ((unsigned int*)pat_ptr_line)[((i+io+4) & pat_xmask) >> 2];
 				unsigned int val;
-				asm("fsr %0, %1, %2, %3\n" : "=r"(val) : "r"(src0), "r"(src1), "r"(fsr_cst));
+				val = fsr(src0, src1, fsr_cst);
 				((unsigned int*)dptr_elt)[0] = val;
 				src0 = src1;
 				dptr_elt += 4;
@@ -1318,7 +1318,7 @@ static inline uint32_t pixelswap(const uint32_t p) {
 	/* uint32_t r = __builtin_bswap32(p); */
 	/* asm("fsr %0, %1, %2, %3\n" : "=r"(r) : "r"(r), "r"(r), "r"(8)); */
 	uint32_t r;
-	asm("fsr %0, %1, %2, %3\n" : "=r"(r) : "r"(p), "r"(p), "r"(8));
+	r = fsr(p, p, 8);
 	return __builtin_bswap32(r);
 }
 
diff --git a/blit_goblin_nubus.sh b/blit_goblin_nubus.sh
index ad8d0e5..799bdfa 100755
--- a/blit_goblin_nubus.sh
+++ b/blit_goblin_nubus.sh
@@ -15,8 +15,12 @@ GCCPFX=riscv32-buildroot-linux-gnu-
 GCC=${GCCDIR}/bin/${GCCPFX}gcc
 OBJCOPY=${GCCDIR}/bin/${GCCPFX}objcopy
 
-OPT=-O3 #-fno-inline
-ARCH=rv32im_zba_zbb_zbt
+## for compiler that support Zbt, not yet mainline GCC
+#OPT="-O3 -DCOMPILER_SUPPORT_FSR" #-fno-inline
+#ARCH=rv32im_zba_zbb_zbt
+## for mainline GCC recent enough to have Zba/Zbb support (older unsupported)
+OPT="-O3" #-fno-inline
+ARCH=rv32im_zba_zbb
 
 PARAM="-DBASE_FB=${BASE_FB} -DGOBLIN_NUBUS"
 
diff --git a/ldsdsupport.h b/ldsdsupport.h
index 8c4c3ba..b40b862 100644
--- a/ldsdsupport.h
+++ b/ldsdsupport.h
@@ -121,3 +121,24 @@ static inline unsigned int ufma8vlv(const unsigned int a, const unsigned int b,
 	_ufma8vlv(r, a, b);
 	return r;
 }
+
+#ifdef COMPILER_SUPPORT_FSR
+static inline unsigned int fsr(const unsigned int a, const unsigned int b, const unsigned int c) {
+  unsigned int r;
+  asm("fsr %0, %1, %2, %3\n" : "=r"(r) : "r"(a), "r"(b), "r"(c));
+  return r;
+}
+#else
+#define opcode_zbt(opcode, func3, func2, rd, rs1, rs2, rs3)		\
+  asm volatile(".word ((" #opcode ") | (regnum_%0 << 7) | (regnum_%1 << 15) | (regnum_%2 << 20) | ((" #func3 ") << 12) | ((" #func2 ") << 25) | (regnum_%3 << 27));" \
+	       : "=r" (rd)						\
+	       : "r" (rs1), "r" (rs2), "r" (rs3)			\
+	       );
+#define _fsr(rd, rs1, rs2, rs3) opcode_zbt(0x00000033, 0x05, 0x02, rd, rs1, rs2, rs3)
+
+static inline unsigned int fsr(const unsigned int a, const unsigned int b, const unsigned int c) {
+  unsigned int r;
+  _fsr(r, a, c, b); // !!!!
+  return r;
+}
+#endif